[XFS] Per iclog callback chain lock

Rather than use the icloglock for protecting the iclog completion callback
chain, use a new per-iclog lock so that walking the callback chain doesn't
require holding a global lock.

This reduces contention on the icloglock during transaction commit and log
I/O completion by reducing the number of times we need to hold the global
icloglock during these operations.

SGI-PV: 978729
SGI-Modid: xfs-linux-melb:xfs-kern:30770a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
This commit is contained in:
David Chinner 2008-04-10 12:18:39 +10:00 committed by Lachlan McIlroy
parent 2abdb8c881
commit 114d23aae5
2 changed files with 45 additions and 23 deletions

View File

@ -397,12 +397,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
void *iclog_hndl, /* iclog to hang callback off */ void *iclog_hndl, /* iclog to hang callback off */
xfs_log_callback_t *cb) xfs_log_callback_t *cb)
{ {
xlog_t *log = mp->m_log;
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
int abortflg; int abortflg;
cb->cb_next = NULL; spin_lock(&iclog->ic_callback_lock);
spin_lock(&log->l_icloglock);
abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
if (!abortflg) { if (!abortflg) {
ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
@ -411,7 +409,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
*(iclog->ic_callback_tail) = cb; *(iclog->ic_callback_tail) = cb;
iclog->ic_callback_tail = &(cb->cb_next); iclog->ic_callback_tail = &(cb->cb_next);
} }
spin_unlock(&log->l_icloglock); spin_unlock(&iclog->ic_callback_lock);
return abortflg; return abortflg;
} /* xfs_log_notify */ } /* xfs_log_notify */
@ -1257,6 +1255,8 @@ xlog_alloc_log(xfs_mount_t *mp,
iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_state = XLOG_STATE_ACTIVE;
iclog->ic_log = log; iclog->ic_log = log;
atomic_set(&iclog->ic_refcnt, 0);
spin_lock_init(&iclog->ic_callback_lock);
iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_callback_tail = &(iclog->ic_callback);
iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
@ -1987,7 +1987,7 @@ xlog_state_clean_log(xlog_t *log)
if (iclog->ic_state == XLOG_STATE_DIRTY) { if (iclog->ic_state == XLOG_STATE_DIRTY) {
iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_state = XLOG_STATE_ACTIVE;
iclog->ic_offset = 0; iclog->ic_offset = 0;
iclog->ic_callback = NULL; /* don't need to free */ ASSERT(iclog->ic_callback == NULL);
/* /*
* If the number of ops in this iclog indicate it just * If the number of ops in this iclog indicate it just
* contains the dummy transaction, we can * contains the dummy transaction, we can
@ -2190,37 +2190,40 @@ xlog_state_do_callback(
be64_to_cpu(iclog->ic_header.h_lsn); be64_to_cpu(iclog->ic_header.h_lsn);
spin_unlock(&log->l_grant_lock); spin_unlock(&log->l_grant_lock);
/*
* Keep processing entries in the callback list
* until we come around and it is empty. We
* need to atomically see that the list is
* empty and change the state to DIRTY so that
* we don't miss any more callbacks being added.
*/
spin_lock(&log->l_icloglock);
} else { } else {
spin_unlock(&log->l_icloglock);
ioerrors++; ioerrors++;
} }
cb = iclog->ic_callback;
/*
* Keep processing entries in the callback list until
* we come around and it is empty. We need to
* atomically see that the list is empty and change the
* state to DIRTY so that we don't miss any more
* callbacks being added.
*/
spin_lock(&iclog->ic_callback_lock);
cb = iclog->ic_callback;
while (cb) { while (cb) {
iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_callback_tail = &(iclog->ic_callback);
iclog->ic_callback = NULL; iclog->ic_callback = NULL;
spin_unlock(&log->l_icloglock); spin_unlock(&iclog->ic_callback_lock);
/* perform callbacks in the order given */ /* perform callbacks in the order given */
for (; cb; cb = cb_next) { for (; cb; cb = cb_next) {
cb_next = cb->cb_next; cb_next = cb->cb_next;
cb->cb_func(cb->cb_arg, aborted); cb->cb_func(cb->cb_arg, aborted);
} }
spin_lock(&log->l_icloglock); spin_lock(&iclog->ic_callback_lock);
cb = iclog->ic_callback; cb = iclog->ic_callback;
} }
loopdidcallbacks++; loopdidcallbacks++;
funcdidcallbacks++; funcdidcallbacks++;
spin_lock(&log->l_icloglock);
ASSERT(iclog->ic_callback == NULL); ASSERT(iclog->ic_callback == NULL);
spin_unlock(&iclog->ic_callback_lock);
if (!(iclog->ic_state & XLOG_STATE_IOERROR)) if (!(iclog->ic_state & XLOG_STATE_IOERROR))
iclog->ic_state = XLOG_STATE_DIRTY; iclog->ic_state = XLOG_STATE_DIRTY;

View File

@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header {
* - ic_offset is the current number of bytes written to in this iclog. * - ic_offset is the current number of bytes written to in this iclog.
* - ic_refcnt is bumped when someone is writing to the log. * - ic_refcnt is bumped when someone is writing to the log.
* - ic_state is the state of the iclog. * - ic_state is the state of the iclog.
*
* Because of cacheline contention on large machines, we need to separate
* various resources onto different cachelines. To start with, make the
* structure cacheline aligned. The following fields can be contended on
* by independent processes:
*
* - ic_callback_*
* - ic_refcnt
* - fields protected by the global l_icloglock
*
* so we need to ensure that these fields are located in separate cachelines.
* We'll put all the read-only and l_icloglock fields in the first cacheline,
* and move everything else out to subsequent cachelines.
*/ */
typedef struct xlog_iclog_fields { typedef struct xlog_iclog_fields {
sv_t ic_forcesema; sv_t ic_forcesema;
@ -332,18 +345,23 @@ typedef struct xlog_iclog_fields {
struct xlog_in_core *ic_prev; struct xlog_in_core *ic_prev;
struct xfs_buf *ic_bp; struct xfs_buf *ic_bp;
struct log *ic_log; struct log *ic_log;
xfs_log_callback_t *ic_callback;
xfs_log_callback_t **ic_callback_tail;
#ifdef XFS_LOG_TRACE
struct ktrace *ic_trace;
#endif
int ic_size; int ic_size;
int ic_offset; int ic_offset;
atomic_t ic_refcnt;
int ic_bwritecnt; int ic_bwritecnt;
ushort_t ic_state; ushort_t ic_state;
char *ic_datap; /* pointer to iclog data */ char *ic_datap; /* pointer to iclog data */
} xlog_iclog_fields_t; #ifdef XFS_LOG_TRACE
struct ktrace *ic_trace;
#endif
/* Callback structures need their own cacheline */
spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
xfs_log_callback_t *ic_callback;
xfs_log_callback_t **ic_callback_tail;
/* reference counts need their own cacheline */
atomic_t ic_refcnt ____cacheline_aligned_in_smp;
} xlog_iclog_fields_t ____cacheline_aligned_in_smp;
typedef union xlog_in_core2 { typedef union xlog_in_core2 {
xlog_rec_header_t hic_header; xlog_rec_header_t hic_header;
@ -366,6 +384,7 @@ typedef struct xlog_in_core {
#define ic_bp hic_fields.ic_bp #define ic_bp hic_fields.ic_bp
#define ic_log hic_fields.ic_log #define ic_log hic_fields.ic_log
#define ic_callback hic_fields.ic_callback #define ic_callback hic_fields.ic_callback
#define ic_callback_lock hic_fields.ic_callback_lock
#define ic_callback_tail hic_fields.ic_callback_tail #define ic_callback_tail hic_fields.ic_callback_tail
#define ic_trace hic_fields.ic_trace #define ic_trace hic_fields.ic_trace
#define ic_size hic_fields.ic_size #define ic_size hic_fields.ic_size