jbd2: Change j_state_lock to be a rwlock_t

Lockstat reports have shown that j_state_lock is a major source of
lock contention, especially on systems with more than 4 CPU cores.  So
change it to be a read/write spinlock.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
Theodore Ts'o 2010-08-03 21:35:12 -04:00
parent a51dca9cd3
commit a931da6ac9
8 changed files with 114 additions and 110 deletions

View File

@ -5066,7 +5066,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
transaction_t *transaction; transaction_t *transaction;
tid_t tid; tid_t tid;
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
if (journal->j_running_transaction) if (journal->j_running_transaction)
transaction = journal->j_running_transaction; transaction = journal->j_running_transaction;
else else
@ -5075,7 +5075,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
tid = transaction->t_tid; tid = transaction->t_tid;
else else
tid = journal->j_commit_sequence; tid = journal->j_commit_sequence;
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
ei->i_sync_tid = tid; ei->i_sync_tid = tid;
ei->i_datasync_tid = tid; ei->i_datasync_tid = tid;
} }

View File

@ -3232,7 +3232,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_min_batch_time = sbi->s_min_batch_time; journal->j_min_batch_time = sbi->s_min_batch_time;
journal->j_max_batch_time = sbi->s_max_batch_time; journal->j_max_batch_time = sbi->s_max_batch_time;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (test_opt(sb, BARRIER)) if (test_opt(sb, BARRIER))
journal->j_flags |= JBD2_BARRIER; journal->j_flags |= JBD2_BARRIER;
else else
@ -3241,7 +3241,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
else else
journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
static journal_t *ext4_get_journal(struct super_block *sb, static journal_t *ext4_get_journal(struct super_block *sb,

View File

@ -118,13 +118,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
void __jbd2_log_wait_for_space(journal_t *journal) void __jbd2_log_wait_for_space(journal_t *journal)
{ {
int nblocks, space_left; int nblocks, space_left;
assert_spin_locked(&journal->j_state_lock); /* assert_spin_locked(&journal->j_state_lock); */
nblocks = jbd_space_needed(journal); nblocks = jbd_space_needed(journal);
while (__jbd2_log_space_left(journal) < nblocks) { while (__jbd2_log_space_left(journal) < nblocks) {
if (journal->j_flags & JBD2_ABORT) if (journal->j_flags & JBD2_ABORT)
return; return;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
mutex_lock(&journal->j_checkpoint_mutex); mutex_lock(&journal->j_checkpoint_mutex);
/* /*
@ -138,7 +138,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
* filesystem, so abort the journal and leave a stack * filesystem, so abort the journal and leave a stack
* trace for forensic evidence. * trace for forensic evidence.
*/ */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
nblocks = jbd_space_needed(journal); nblocks = jbd_space_needed(journal);
space_left = __jbd2_log_space_left(journal); space_left = __jbd2_log_space_left(journal);
@ -149,7 +149,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
if (journal->j_committing_transaction) if (journal->j_committing_transaction)
tid = journal->j_committing_transaction->t_tid; tid = journal->j_committing_transaction->t_tid;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
if (chkpt) { if (chkpt) {
jbd2_log_do_checkpoint(journal); jbd2_log_do_checkpoint(journal);
} else if (jbd2_cleanup_journal_tail(journal) == 0) { } else if (jbd2_cleanup_journal_tail(journal) == 0) {
@ -167,7 +167,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
WARN_ON(1); WARN_ON(1);
jbd2_journal_abort(journal, 0); jbd2_journal_abort(journal, 0);
} }
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} else { } else {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
} }
@ -474,7 +474,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* next transaction ID we will write, and where it will * next transaction ID we will write, and where it will
* start. */ * start. */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
transaction = journal->j_checkpoint_transactions; transaction = journal->j_checkpoint_transactions;
if (transaction) { if (transaction) {
@ -496,7 +496,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* If the oldest pinned transaction is at the tail of the log /* If the oldest pinned transaction is at the tail of the log
already then there's not much we can do right now. */ already then there's not much we can do right now. */
if (journal->j_tail_sequence == first_tid) { if (journal->j_tail_sequence == first_tid) {
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return 1; return 1;
} }
@ -516,7 +516,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
journal->j_free += freed; journal->j_free += freed;
journal->j_tail_sequence = first_tid; journal->j_tail_sequence = first_tid;
journal->j_tail = blocknr; journal->j_tail = blocknr;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
/* /*
* If there is an external journal, we need to make sure that * If there is an external journal, we need to make sure that

View File

@ -152,9 +152,9 @@ static int journal_submit_commit_record(journal_t *journal,
printk(KERN_WARNING printk(KERN_WARNING
"JBD2: Disabling barriers on %s, " "JBD2: Disabling barriers on %s, "
"not supported by device\n", journal->j_devname); "not supported by device\n", journal->j_devname);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_BARRIER; journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
/* And try again, without the barrier */ /* And try again, without the barrier */
lock_buffer(bh); lock_buffer(bh);
@ -182,9 +182,9 @@ retry:
printk(KERN_WARNING printk(KERN_WARNING
"JBD2: %s: disabling barries on %s - not supported " "JBD2: %s: disabling barries on %s - not supported "
"by device\n", __func__, journal->j_devname); "by device\n", __func__, journal->j_devname);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_BARRIER; journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
lock_buffer(bh); lock_buffer(bh);
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
@ -400,7 +400,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(1, "JBD: starting commit of transaction %d\n", jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid); commit_transaction->t_tid);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED; commit_transaction->t_state = T_LOCKED;
/* /*
@ -424,9 +424,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
if (atomic_read(&commit_transaction->t_updates)) { if (atomic_read(&commit_transaction->t_updates)) {
spin_unlock(&commit_transaction->t_handle_lock); spin_unlock(&commit_transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
schedule(); schedule();
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
spin_lock(&commit_transaction->t_handle_lock); spin_lock(&commit_transaction->t_handle_lock);
} }
finish_wait(&journal->j_wait_updates, &wait); finish_wait(&journal->j_wait_updates, &wait);
@ -497,7 +497,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
start_time = ktime_get(); start_time = ktime_get();
commit_transaction->t_log_start = journal->j_head; commit_transaction->t_log_start = journal->j_head;
wake_up(&journal->j_wait_transaction_locked); wake_up(&journal->j_wait_transaction_locked);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd_debug (3, "JBD: commit phase 2\n"); jbd_debug (3, "JBD: commit phase 2\n");
@ -519,9 +519,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* transaction! Now comes the tricky part: we need to write out * transaction! Now comes the tricky part: we need to write out
* metadata. Loop over the transaction's entire buffer list: * metadata. Loop over the transaction's entire buffer list:
*/ */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
commit_transaction->t_state = T_COMMIT; commit_transaction->t_state = T_COMMIT;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
trace_jbd2_commit_logging(journal, commit_transaction); trace_jbd2_commit_logging(journal, commit_transaction);
stats.run.rs_logging = jiffies; stats.run.rs_logging = jiffies;
@ -978,7 +978,7 @@ restart_loop:
* __jbd2_journal_drop_transaction(). Otherwise we could race with * __jbd2_journal_drop_transaction(). Otherwise we could race with
* other checkpointing code processing the transaction... * other checkpointing code processing the transaction...
*/ */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
/* /*
* Now recheck if some buffers did not get attached to the transaction * Now recheck if some buffers did not get attached to the transaction
@ -986,7 +986,7 @@ restart_loop:
*/ */
if (commit_transaction->t_forget) { if (commit_transaction->t_forget) {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
goto restart_loop; goto restart_loop;
} }
@ -1038,7 +1038,7 @@ restart_loop:
journal->j_average_commit_time*3) / 4; journal->j_average_commit_time*3) / 4;
else else
journal->j_average_commit_time = commit_time; journal->j_average_commit_time = commit_time;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
if (commit_transaction->t_checkpoint_list == NULL && if (commit_transaction->t_checkpoint_list == NULL &&
commit_transaction->t_checkpoint_io_list == NULL) { commit_transaction->t_checkpoint_io_list == NULL) {

View File

@ -142,7 +142,7 @@ static int kjournald2(void *arg)
/* /*
* And now, wait forever for commit wakeup events. * And now, wait forever for commit wakeup events.
*/ */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
loop: loop:
if (journal->j_flags & JBD2_UNMOUNT) if (journal->j_flags & JBD2_UNMOUNT)
@ -153,10 +153,10 @@ loop:
if (journal->j_commit_sequence != journal->j_commit_request) { if (journal->j_commit_sequence != journal->j_commit_request) {
jbd_debug(1, "OK, requests differ\n"); jbd_debug(1, "OK, requests differ\n");
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
del_timer_sync(&journal->j_commit_timer); del_timer_sync(&journal->j_commit_timer);
jbd2_journal_commit_transaction(journal); jbd2_journal_commit_transaction(journal);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
goto loop; goto loop;
} }
@ -168,9 +168,9 @@ loop:
* be already stopped. * be already stopped.
*/ */
jbd_debug(1, "Now suspending kjournald2\n"); jbd_debug(1, "Now suspending kjournald2\n");
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
refrigerator(); refrigerator();
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} else { } else {
/* /*
* We assume on resume that commits are already there, * We assume on resume that commits are already there,
@ -190,9 +190,9 @@ loop:
if (journal->j_flags & JBD2_UNMOUNT) if (journal->j_flags & JBD2_UNMOUNT)
should_sleep = 0; should_sleep = 0;
if (should_sleep) { if (should_sleep) {
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
schedule(); schedule();
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} }
finish_wait(&journal->j_wait_commit, &wait); finish_wait(&journal->j_wait_commit, &wait);
} }
@ -210,7 +210,7 @@ loop:
goto loop; goto loop;
end_loop: end_loop:
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
del_timer_sync(&journal->j_commit_timer); del_timer_sync(&journal->j_commit_timer);
journal->j_task = NULL; journal->j_task = NULL;
wake_up(&journal->j_wait_done_commit); wake_up(&journal->j_wait_done_commit);
@ -233,16 +233,16 @@ static int jbd2_journal_start_thread(journal_t *journal)
static void journal_kill_thread(journal_t *journal) static void journal_kill_thread(journal_t *journal)
{ {
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_UNMOUNT; journal->j_flags |= JBD2_UNMOUNT;
while (journal->j_task) { while (journal->j_task) {
wake_up(&journal->j_wait_commit); wake_up(&journal->j_wait_commit);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_done_commit, journal->j_task == NULL); wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} }
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
/* /*
@ -452,7 +452,7 @@ int __jbd2_log_space_left(journal_t *journal)
{ {
int left = journal->j_free; int left = journal->j_free;
assert_spin_locked(&journal->j_state_lock); /* assert_spin_locked(&journal->j_state_lock); */
/* /*
* Be pessimistic here about the number of those free blocks which * Be pessimistic here about the number of those free blocks which
@ -497,9 +497,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid)
{ {
int ret; int ret;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
ret = __jbd2_log_start_commit(journal, tid); ret = __jbd2_log_start_commit(journal, tid);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return ret; return ret;
} }
@ -518,7 +518,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
transaction_t *transaction = NULL; transaction_t *transaction = NULL;
tid_t tid; tid_t tid;
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
if (journal->j_running_transaction && !current->journal_info) { if (journal->j_running_transaction && !current->journal_info) {
transaction = journal->j_running_transaction; transaction = journal->j_running_transaction;
__jbd2_log_start_commit(journal, transaction->t_tid); __jbd2_log_start_commit(journal, transaction->t_tid);
@ -526,12 +526,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
transaction = journal->j_committing_transaction; transaction = journal->j_committing_transaction;
if (!transaction) { if (!transaction) {
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
return 0; /* Nothing to retry */ return 0; /* Nothing to retry */
} }
tid = transaction->t_tid; tid = transaction->t_tid;
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid); jbd2_log_wait_commit(journal, tid);
return 1; return 1;
} }
@ -545,7 +545,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
{ {
int ret = 0; int ret = 0;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (journal->j_running_transaction) { if (journal->j_running_transaction) {
tid_t tid = journal->j_running_transaction->t_tid; tid_t tid = journal->j_running_transaction->t_tid;
@ -564,7 +564,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
*ptid = journal->j_committing_transaction->t_tid; *ptid = journal->j_committing_transaction->t_tid;
ret = 1; ret = 1;
} }
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return ret; return ret;
} }
@ -576,26 +576,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
{ {
int err = 0; int err = 0;
read_lock(&journal->j_state_lock);
#ifdef CONFIG_JBD2_DEBUG #ifdef CONFIG_JBD2_DEBUG
spin_lock(&journal->j_state_lock);
if (!tid_geq(journal->j_commit_request, tid)) { if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_EMERG printk(KERN_EMERG
"%s: error: j_commit_request=%d, tid=%d\n", "%s: error: j_commit_request=%d, tid=%d\n",
__func__, journal->j_commit_request, tid); __func__, journal->j_commit_request, tid);
} }
spin_unlock(&journal->j_state_lock);
#endif #endif
spin_lock(&journal->j_state_lock);
while (tid_gt(tid, journal->j_commit_sequence)) { while (tid_gt(tid, journal->j_commit_sequence)) {
jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
tid, journal->j_commit_sequence); tid, journal->j_commit_sequence);
wake_up(&journal->j_wait_commit); wake_up(&journal->j_wait_commit);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_done_commit, wait_event(journal->j_wait_done_commit,
!tid_gt(tid, journal->j_commit_sequence)); !tid_gt(tid, journal->j_commit_sequence));
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
} }
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
if (unlikely(is_journal_aborted(journal))) { if (unlikely(is_journal_aborted(journal))) {
printk(KERN_EMERG "journal commit I/O error\n"); printk(KERN_EMERG "journal commit I/O error\n");
@ -612,7 +610,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
{ {
unsigned long blocknr; unsigned long blocknr;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
J_ASSERT(journal->j_free > 1); J_ASSERT(journal->j_free > 1);
blocknr = journal->j_head; blocknr = journal->j_head;
@ -620,7 +618,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
journal->j_free--; journal->j_free--;
if (journal->j_head == journal->j_last) if (journal->j_head == journal->j_last)
journal->j_head = journal->j_first; journal->j_head = journal->j_first;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return jbd2_journal_bmap(journal, blocknr, retp); return jbd2_journal_bmap(journal, blocknr, retp);
} }
@ -840,7 +838,7 @@ static journal_t * journal_init_common (void)
mutex_init(&journal->j_checkpoint_mutex); mutex_init(&journal->j_checkpoint_mutex);
spin_lock_init(&journal->j_revoke_lock); spin_lock_init(&journal->j_revoke_lock);
spin_lock_init(&journal->j_list_lock); spin_lock_init(&journal->j_list_lock);
spin_lock_init(&journal->j_state_lock); rwlock_init(&journal->j_state_lock);
journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
journal->j_min_batch_time = 0; journal->j_min_batch_time = 0;
@ -1106,14 +1104,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
} }
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
journal->j_tail, journal->j_tail_sequence, journal->j_errno); journal->j_tail, journal->j_tail_sequence, journal->j_errno);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
sb->s_start = cpu_to_be32(journal->j_tail); sb->s_start = cpu_to_be32(journal->j_tail);
sb->s_errno = cpu_to_be32(journal->j_errno); sb->s_errno = cpu_to_be32(journal->j_errno);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
BUFFER_TRACE(bh, "marking dirty"); BUFFER_TRACE(bh, "marking dirty");
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
@ -1134,12 +1132,12 @@ out:
* any future commit will have to be careful to update the * any future commit will have to be careful to update the
* superblock again to re-record the true start of the log. */ * superblock again to re-record the true start of the log. */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (sb->s_start) if (sb->s_start)
journal->j_flags &= ~JBD2_FLUSHED; journal->j_flags &= ~JBD2_FLUSHED;
else else
journal->j_flags |= JBD2_FLUSHED; journal->j_flags |= JBD2_FLUSHED;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
/* /*
@ -1551,7 +1549,7 @@ int jbd2_journal_flush(journal_t *journal)
transaction_t *transaction = NULL; transaction_t *transaction = NULL;
unsigned long old_tail; unsigned long old_tail;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
/* Force everything buffered to the log... */ /* Force everything buffered to the log... */
if (journal->j_running_transaction) { if (journal->j_running_transaction) {
@ -1564,10 +1562,10 @@ int jbd2_journal_flush(journal_t *journal)
if (transaction) { if (transaction) {
tid_t tid = transaction->t_tid; tid_t tid = transaction->t_tid;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid); jbd2_log_wait_commit(journal, tid);
} else { } else {
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
/* ...and flush everything in the log out to disk. */ /* ...and flush everything in the log out to disk. */
@ -1591,12 +1589,12 @@ int jbd2_journal_flush(journal_t *journal)
* the magic code for a fully-recovered superblock. Any future * the magic code for a fully-recovered superblock. Any future
* commits of data to the journal will restore the current * commits of data to the journal will restore the current
* s_start value. */ * s_start value. */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
old_tail = journal->j_tail; old_tail = journal->j_tail;
journal->j_tail = 0; journal->j_tail = 0;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd2_journal_update_superblock(journal, 1); jbd2_journal_update_superblock(journal, 1);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_tail = old_tail; journal->j_tail = old_tail;
J_ASSERT(!journal->j_running_transaction); J_ASSERT(!journal->j_running_transaction);
@ -1604,7 +1602,7 @@ int jbd2_journal_flush(journal_t *journal)
J_ASSERT(!journal->j_checkpoint_transactions); J_ASSERT(!journal->j_checkpoint_transactions);
J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return 0; return 0;
} }
@ -1668,12 +1666,12 @@ void __jbd2_journal_abort_hard(journal_t *journal)
printk(KERN_ERR "Aborting journal on device %s.\n", printk(KERN_ERR "Aborting journal on device %s.\n",
journal->j_devname); journal->j_devname);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_ABORT; journal->j_flags |= JBD2_ABORT;
transaction = journal->j_running_transaction; transaction = journal->j_running_transaction;
if (transaction) if (transaction)
__jbd2_log_start_commit(journal, transaction->t_tid); __jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
/* Soft abort: record the abort error status in the journal superblock, /* Soft abort: record the abort error status in the journal superblock,
@ -1758,12 +1756,12 @@ int jbd2_journal_errno(journal_t *journal)
{ {
int err; int err;
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
if (journal->j_flags & JBD2_ABORT) if (journal->j_flags & JBD2_ABORT)
err = -EROFS; err = -EROFS;
else else
err = journal->j_errno; err = journal->j_errno;
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
return err; return err;
} }
@ -1778,12 +1776,12 @@ int jbd2_journal_clear_err(journal_t *journal)
{ {
int err = 0; int err = 0;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (journal->j_flags & JBD2_ABORT) if (journal->j_flags & JBD2_ABORT)
err = -EROFS; err = -EROFS;
else else
journal->j_errno = 0; journal->j_errno = 0;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return err; return err;
} }
@ -1796,10 +1794,10 @@ int jbd2_journal_clear_err(journal_t *journal)
*/ */
void jbd2_journal_ack_err(journal_t *journal) void jbd2_journal_ack_err(journal_t *journal)
{ {
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (journal->j_errno) if (journal->j_errno)
journal->j_flags |= JBD2_ACK_ERR; journal->j_flags |= JBD2_ACK_ERR;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
int jbd2_journal_blocks_per_page(struct inode *inode) int jbd2_journal_blocks_per_page(struct inode *inode)

View File

@ -124,37 +124,39 @@ alloc_transaction:
jbd_debug(3, "New handle %p going live.\n", handle); jbd_debug(3, "New handle %p going live.\n", handle);
repeat:
/* /*
* We need to hold j_state_lock until t_updates has been incremented, * We need to hold j_state_lock until t_updates has been incremented,
* for proper journal barrier handling * for proper journal barrier handling
*/ */
spin_lock(&journal->j_state_lock); repeat:
repeat_locked: read_lock(&journal->j_state_lock);
if (is_journal_aborted(journal) || if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
kfree(new_transaction); kfree(new_transaction);
return -EROFS; return -EROFS;
} }
/* Wait on the journal's transaction barrier if necessary */ /* Wait on the journal's transaction barrier if necessary */
if (journal->j_barrier_count) { if (journal->j_barrier_count) {
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_transaction_locked, wait_event(journal->j_wait_transaction_locked,
journal->j_barrier_count == 0); journal->j_barrier_count == 0);
goto repeat; goto repeat;
} }
if (!journal->j_running_transaction) { if (!journal->j_running_transaction) {
if (!new_transaction) { read_unlock(&journal->j_state_lock);
spin_unlock(&journal->j_state_lock); if (!new_transaction)
goto alloc_transaction; goto alloc_transaction;
} write_lock(&journal->j_state_lock);
if (!journal->j_running_transaction) {
jbd2_get_transaction(journal, new_transaction); jbd2_get_transaction(journal, new_transaction);
new_transaction = NULL; new_transaction = NULL;
} }
write_unlock(&journal->j_state_lock);
goto repeat;
}
transaction = journal->j_running_transaction; transaction = journal->j_running_transaction;
@ -167,7 +169,7 @@ repeat_locked:
prepare_to_wait(&journal->j_wait_transaction_locked, prepare_to_wait(&journal->j_wait_transaction_locked,
&wait, TASK_UNINTERRUPTIBLE); &wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
schedule(); schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait); finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat; goto repeat;
@ -194,7 +196,7 @@ repeat_locked:
prepare_to_wait(&journal->j_wait_transaction_locked, &wait, prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
__jbd2_log_start_commit(journal, transaction->t_tid); __jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
schedule(); schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait); finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat; goto repeat;
@ -228,8 +230,12 @@ repeat_locked:
if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
read_unlock(&journal->j_state_lock);
write_lock(&journal->j_state_lock);
if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
__jbd2_log_wait_for_space(journal); __jbd2_log_wait_for_space(journal);
goto repeat_locked; write_unlock(&journal->j_state_lock);
goto repeat;
} }
/* OK, account for the buffers that this operation expects to /* OK, account for the buffers that this operation expects to
@ -250,7 +256,7 @@ repeat_locked:
atomic_read(&transaction->t_outstanding_credits), atomic_read(&transaction->t_outstanding_credits),
__jbd2_log_space_left(journal)); __jbd2_log_space_left(journal));
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
lock_map_acquire(&handle->h_lockdep_map); lock_map_acquire(&handle->h_lockdep_map);
kfree(new_transaction); kfree(new_transaction);
@ -362,7 +368,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
result = 1; result = 1;
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
/* Don't extend a locked-down transaction! */ /* Don't extend a locked-down transaction! */
if (handle->h_transaction->t_state != T_RUNNING) { if (handle->h_transaction->t_state != T_RUNNING) {
@ -394,7 +400,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
unlock: unlock:
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
error_out: error_out:
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
out: out:
return result; return result;
} }
@ -432,7 +438,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
J_ASSERT(atomic_read(&transaction->t_updates) > 0); J_ASSERT(atomic_read(&transaction->t_updates) > 0);
J_ASSERT(journal_current_handle() == handle); J_ASSERT(journal_current_handle() == handle);
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
spin_lock(&transaction->t_handle_lock); spin_lock(&transaction->t_handle_lock);
atomic_sub(handle->h_buffer_credits, atomic_sub(handle->h_buffer_credits,
&transaction->t_outstanding_credits); &transaction->t_outstanding_credits);
@ -442,7 +448,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
jbd_debug(2, "restarting handle %p\n", handle); jbd_debug(2, "restarting handle %p\n", handle);
__jbd2_log_start_commit(journal, transaction->t_tid); __jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
lock_map_release(&handle->h_lockdep_map); lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks; handle->h_buffer_credits = nblocks;
@ -472,7 +478,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
{ {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
++journal->j_barrier_count; ++journal->j_barrier_count;
/* Wait until there are no running updates */ /* Wait until there are no running updates */
@ -490,12 +496,12 @@ void jbd2_journal_lock_updates(journal_t *journal)
prepare_to_wait(&journal->j_wait_updates, &wait, prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
schedule(); schedule();
finish_wait(&journal->j_wait_updates, &wait); finish_wait(&journal->j_wait_updates, &wait);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} }
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
/* /*
* We have now established a barrier against other normal updates, but * We have now established a barrier against other normal updates, but
@ -519,9 +525,9 @@ void jbd2_journal_unlock_updates (journal_t *journal)
J_ASSERT(journal->j_barrier_count != 0); J_ASSERT(journal->j_barrier_count != 0);
mutex_unlock(&journal->j_barrier); mutex_unlock(&journal->j_barrier);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
--journal->j_barrier_count; --journal->j_barrier_count;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_transaction_locked); wake_up(&journal->j_wait_transaction_locked);
} }
@ -1314,9 +1320,9 @@ int jbd2_journal_stop(handle_t *handle)
journal->j_last_sync_writer = pid; journal->j_last_sync_writer = pid;
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
commit_time = journal->j_average_commit_time; commit_time = journal->j_average_commit_time;
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
trans_time = ktime_to_ns(ktime_sub(ktime_get(), trans_time = ktime_to_ns(ktime_sub(ktime_get(),
transaction->t_start_time)); transaction->t_start_time));
@ -1748,7 +1754,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
goto zap_buffer_unlocked; goto zap_buffer_unlocked;
/* OK, we have data buffer in journaled mode */ /* OK, we have data buffer in journaled mode */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
jbd_lock_bh_state(bh); jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
@ -1801,7 +1807,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return ret; return ret;
} else { } else {
/* There is no currently-running transaction. So the /* There is no currently-running transaction. So the
@ -1815,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return ret; return ret;
} else { } else {
/* The orphan record's transaction has /* The orphan record's transaction has
@ -1839,7 +1845,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return 0; return 0;
} else { } else {
/* Good, the buffer belongs to the running transaction. /* Good, the buffer belongs to the running transaction.
@ -1858,7 +1864,7 @@ zap_buffer:
zap_buffer_no_jh: zap_buffer_no_jh:
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
zap_buffer_unlocked: zap_buffer_unlocked:
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
J_ASSERT_BH(bh, !buffer_jbddirty(bh)); J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@ -2165,9 +2171,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
/* Locks are here just to force reading of recent values, it is /* Locks are here just to force reading of recent values, it is
* enough that the transaction was not committing before we started * enough that the transaction was not committing before we started
* a transaction adding the inode to orphan list */ * a transaction adding the inode to orphan list */
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
commit_trans = journal->j_committing_transaction; commit_trans = journal->j_committing_transaction;
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
inode_trans = jinode->i_transaction; inode_trans = jinode->i_transaction;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);

View File

@ -760,13 +760,13 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
if (osb->osb_commit_interval) if (osb->osb_commit_interval)
commit_interval = osb->osb_commit_interval; commit_interval = osb->osb_commit_interval;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_commit_interval = commit_interval; journal->j_commit_interval = commit_interval;
if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
journal->j_flags |= JBD2_BARRIER; journal->j_flags |= JBD2_BARRIER;
else else
journal->j_flags &= ~JBD2_BARRIER; journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)

View File

@ -764,7 +764,7 @@ struct journal_s
/* /*
* Protect the various scalars in the journal * Protect the various scalars in the journal
*/ */
spinlock_t j_state_lock; rwlock_t j_state_lock;
/* /*
* Number of processes waiting to create a barrier lock [j_state_lock] * Number of processes waiting to create a barrier lock [j_state_lock]