[XFS] Per iclog callback chain lock
Rather than use the icloglock for protecting the iclog completion callback chain, use a new per-iclog lock so that walking the callback chain doesn't require holding a global lock. This reduces contention on the icloglock during transaction commit and log I/O completion by reducing the number of times we need to hold the global icloglock during these operations. SGI-PV: 978729 SGI-Modid: xfs-linux-melb:xfs-kern:30770a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
This commit is contained in:
parent
2abdb8c881
commit
114d23aae5
@ -397,12 +397,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
|
|||||||
void *iclog_hndl, /* iclog to hang callback off */
|
void *iclog_hndl, /* iclog to hang callback off */
|
||||||
xfs_log_callback_t *cb)
|
xfs_log_callback_t *cb)
|
||||||
{
|
{
|
||||||
xlog_t *log = mp->m_log;
|
|
||||||
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
|
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
|
||||||
int abortflg;
|
int abortflg;
|
||||||
|
|
||||||
cb->cb_next = NULL;
|
spin_lock(&iclog->ic_callback_lock);
|
||||||
spin_lock(&log->l_icloglock);
|
|
||||||
abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
|
abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
|
||||||
if (!abortflg) {
|
if (!abortflg) {
|
||||||
ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
|
ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
|
||||||
@ -411,7 +409,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
|
|||||||
*(iclog->ic_callback_tail) = cb;
|
*(iclog->ic_callback_tail) = cb;
|
||||||
iclog->ic_callback_tail = &(cb->cb_next);
|
iclog->ic_callback_tail = &(cb->cb_next);
|
||||||
}
|
}
|
||||||
spin_unlock(&log->l_icloglock);
|
spin_unlock(&iclog->ic_callback_lock);
|
||||||
return abortflg;
|
return abortflg;
|
||||||
} /* xfs_log_notify */
|
} /* xfs_log_notify */
|
||||||
|
|
||||||
@ -1257,6 +1255,8 @@ xlog_alloc_log(xfs_mount_t *mp,
|
|||||||
iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
|
iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
|
||||||
iclog->ic_state = XLOG_STATE_ACTIVE;
|
iclog->ic_state = XLOG_STATE_ACTIVE;
|
||||||
iclog->ic_log = log;
|
iclog->ic_log = log;
|
||||||
|
atomic_set(&iclog->ic_refcnt, 0);
|
||||||
|
spin_lock_init(&iclog->ic_callback_lock);
|
||||||
iclog->ic_callback_tail = &(iclog->ic_callback);
|
iclog->ic_callback_tail = &(iclog->ic_callback);
|
||||||
iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
|
iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
|
||||||
|
|
||||||
@ -1987,7 +1987,7 @@ xlog_state_clean_log(xlog_t *log)
|
|||||||
if (iclog->ic_state == XLOG_STATE_DIRTY) {
|
if (iclog->ic_state == XLOG_STATE_DIRTY) {
|
||||||
iclog->ic_state = XLOG_STATE_ACTIVE;
|
iclog->ic_state = XLOG_STATE_ACTIVE;
|
||||||
iclog->ic_offset = 0;
|
iclog->ic_offset = 0;
|
||||||
iclog->ic_callback = NULL; /* don't need to free */
|
ASSERT(iclog->ic_callback == NULL);
|
||||||
/*
|
/*
|
||||||
* If the number of ops in this iclog indicate it just
|
* If the number of ops in this iclog indicate it just
|
||||||
* contains the dummy transaction, we can
|
* contains the dummy transaction, we can
|
||||||
@ -2190,37 +2190,40 @@ xlog_state_do_callback(
|
|||||||
be64_to_cpu(iclog->ic_header.h_lsn);
|
be64_to_cpu(iclog->ic_header.h_lsn);
|
||||||
spin_unlock(&log->l_grant_lock);
|
spin_unlock(&log->l_grant_lock);
|
||||||
|
|
||||||
/*
|
|
||||||
* Keep processing entries in the callback list
|
|
||||||
* until we come around and it is empty. We
|
|
||||||
* need to atomically see that the list is
|
|
||||||
* empty and change the state to DIRTY so that
|
|
||||||
* we don't miss any more callbacks being added.
|
|
||||||
*/
|
|
||||||
spin_lock(&log->l_icloglock);
|
|
||||||
} else {
|
} else {
|
||||||
|
spin_unlock(&log->l_icloglock);
|
||||||
ioerrors++;
|
ioerrors++;
|
||||||
}
|
}
|
||||||
cb = iclog->ic_callback;
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Keep processing entries in the callback list until
|
||||||
|
* we come around and it is empty. We need to
|
||||||
|
* atomically see that the list is empty and change the
|
||||||
|
* state to DIRTY so that we don't miss any more
|
||||||
|
* callbacks being added.
|
||||||
|
*/
|
||||||
|
spin_lock(&iclog->ic_callback_lock);
|
||||||
|
cb = iclog->ic_callback;
|
||||||
while (cb) {
|
while (cb) {
|
||||||
iclog->ic_callback_tail = &(iclog->ic_callback);
|
iclog->ic_callback_tail = &(iclog->ic_callback);
|
||||||
iclog->ic_callback = NULL;
|
iclog->ic_callback = NULL;
|
||||||
spin_unlock(&log->l_icloglock);
|
spin_unlock(&iclog->ic_callback_lock);
|
||||||
|
|
||||||
/* perform callbacks in the order given */
|
/* perform callbacks in the order given */
|
||||||
for (; cb; cb = cb_next) {
|
for (; cb; cb = cb_next) {
|
||||||
cb_next = cb->cb_next;
|
cb_next = cb->cb_next;
|
||||||
cb->cb_func(cb->cb_arg, aborted);
|
cb->cb_func(cb->cb_arg, aborted);
|
||||||
}
|
}
|
||||||
spin_lock(&log->l_icloglock);
|
spin_lock(&iclog->ic_callback_lock);
|
||||||
cb = iclog->ic_callback;
|
cb = iclog->ic_callback;
|
||||||
}
|
}
|
||||||
|
|
||||||
loopdidcallbacks++;
|
loopdidcallbacks++;
|
||||||
funcdidcallbacks++;
|
funcdidcallbacks++;
|
||||||
|
|
||||||
|
spin_lock(&log->l_icloglock);
|
||||||
ASSERT(iclog->ic_callback == NULL);
|
ASSERT(iclog->ic_callback == NULL);
|
||||||
|
spin_unlock(&iclog->ic_callback_lock);
|
||||||
if (!(iclog->ic_state & XLOG_STATE_IOERROR))
|
if (!(iclog->ic_state & XLOG_STATE_IOERROR))
|
||||||
iclog->ic_state = XLOG_STATE_DIRTY;
|
iclog->ic_state = XLOG_STATE_DIRTY;
|
||||||
|
|
||||||
|
@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header {
|
|||||||
* - ic_offset is the current number of bytes written to in this iclog.
|
* - ic_offset is the current number of bytes written to in this iclog.
|
||||||
* - ic_refcnt is bumped when someone is writing to the log.
|
* - ic_refcnt is bumped when someone is writing to the log.
|
||||||
* - ic_state is the state of the iclog.
|
* - ic_state is the state of the iclog.
|
||||||
|
*
|
||||||
|
* Because of cacheline contention on large machines, we need to separate
|
||||||
|
* various resources onto different cachelines. To start with, make the
|
||||||
|
* structure cacheline aligned. The following fields can be contended on
|
||||||
|
* by independent processes:
|
||||||
|
*
|
||||||
|
* - ic_callback_*
|
||||||
|
* - ic_refcnt
|
||||||
|
* - fields protected by the global l_icloglock
|
||||||
|
*
|
||||||
|
* so we need to ensure that these fields are located in separate cachelines.
|
||||||
|
* We'll put all the read-only and l_icloglock fields in the first cacheline,
|
||||||
|
* and move everything else out to subsequent cachelines.
|
||||||
*/
|
*/
|
||||||
typedef struct xlog_iclog_fields {
|
typedef struct xlog_iclog_fields {
|
||||||
sv_t ic_forcesema;
|
sv_t ic_forcesema;
|
||||||
@ -332,18 +345,23 @@ typedef struct xlog_iclog_fields {
|
|||||||
struct xlog_in_core *ic_prev;
|
struct xlog_in_core *ic_prev;
|
||||||
struct xfs_buf *ic_bp;
|
struct xfs_buf *ic_bp;
|
||||||
struct log *ic_log;
|
struct log *ic_log;
|
||||||
xfs_log_callback_t *ic_callback;
|
|
||||||
xfs_log_callback_t **ic_callback_tail;
|
|
||||||
#ifdef XFS_LOG_TRACE
|
|
||||||
struct ktrace *ic_trace;
|
|
||||||
#endif
|
|
||||||
int ic_size;
|
int ic_size;
|
||||||
int ic_offset;
|
int ic_offset;
|
||||||
atomic_t ic_refcnt;
|
|
||||||
int ic_bwritecnt;
|
int ic_bwritecnt;
|
||||||
ushort_t ic_state;
|
ushort_t ic_state;
|
||||||
char *ic_datap; /* pointer to iclog data */
|
char *ic_datap; /* pointer to iclog data */
|
||||||
} xlog_iclog_fields_t;
|
#ifdef XFS_LOG_TRACE
|
||||||
|
struct ktrace *ic_trace;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Callback structures need their own cacheline */
|
||||||
|
spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
|
||||||
|
xfs_log_callback_t *ic_callback;
|
||||||
|
xfs_log_callback_t **ic_callback_tail;
|
||||||
|
|
||||||
|
/* reference counts need their own cacheline */
|
||||||
|
atomic_t ic_refcnt ____cacheline_aligned_in_smp;
|
||||||
|
} xlog_iclog_fields_t ____cacheline_aligned_in_smp;
|
||||||
|
|
||||||
typedef union xlog_in_core2 {
|
typedef union xlog_in_core2 {
|
||||||
xlog_rec_header_t hic_header;
|
xlog_rec_header_t hic_header;
|
||||||
@ -366,6 +384,7 @@ typedef struct xlog_in_core {
|
|||||||
#define ic_bp hic_fields.ic_bp
|
#define ic_bp hic_fields.ic_bp
|
||||||
#define ic_log hic_fields.ic_log
|
#define ic_log hic_fields.ic_log
|
||||||
#define ic_callback hic_fields.ic_callback
|
#define ic_callback hic_fields.ic_callback
|
||||||
|
#define ic_callback_lock hic_fields.ic_callback_lock
|
||||||
#define ic_callback_tail hic_fields.ic_callback_tail
|
#define ic_callback_tail hic_fields.ic_callback_tail
|
||||||
#define ic_trace hic_fields.ic_trace
|
#define ic_trace hic_fields.ic_trace
|
||||||
#define ic_size hic_fields.ic_size
|
#define ic_size hic_fields.ic_size
|
||||||
|
Loading…
x
Reference in New Issue
Block a user