2017-12-17 22:00:59 -05:00
// SPDX-License-Identifier: GPL-2.0+
2006-10-11 01:20:57 -07:00
/*
2007-05-09 07:51:49 +02:00
* linux / fs / jbd2 / checkpoint . c
2006-10-11 01:20:57 -07:00
*
* Written by Stephen C . Tweedie < sct @ redhat . com > , 1999
*
* Copyright 1999 Red Hat Software - - - All Rights Reserved
*
* Checkpoint routines for the generic filesystem journaling code .
* Part of the ext2fs journaling system .
*
* Checkpointing is the process of ensuring that a section of the log is
* committed fully to disk , so that that portion of the log can be
* reused .
*/
# include <linux/time.h>
# include <linux/fs.h>
2006-10-11 01:20:59 -07:00
# include <linux/jbd2.h>
2006-10-11 01:20:57 -07:00
# include <linux/errno.h>
# include <linux/slab.h>
2009-12-23 06:52:08 -05:00
# include <linux/blkdev.h>
2009-06-17 11:47:48 -04:00
# include <trace/events/jbd2.h>
2006-10-11 01:20:57 -07:00
/*
* Unlink a buffer from a transaction checkpoint list .
*
* Called with j_list_lock held .
*/
2023-06-06 21:59:24 +08:00
static inline void __buffer_unlink ( struct journal_head * jh )
2006-10-11 01:20:57 -07:00
{
transaction_t * transaction = jh - > b_cp_transaction ;
jh - > b_cpnext - > b_cpprev = jh - > b_cpprev ;
jh - > b_cpprev - > b_cpnext = jh - > b_cpnext ;
if ( transaction - > t_checkpoint_list = = jh ) {
transaction - > t_checkpoint_list = jh - > b_cpnext ;
if ( transaction - > t_checkpoint_list = = jh )
transaction - > t_checkpoint_list = NULL ;
}
}
/*
2006-10-11 01:20:59 -07:00
* __jbd2_log_wait_for_space : wait until there is space in the journal .
2006-10-11 01:20:57 -07:00
*
* Called under j - state_lock * only * . It will be unlocked if we have to wait
* for a checkpoint to free up some space in the log .
*/
2006-10-11 01:20:59 -07:00
void __jbd2_log_wait_for_space ( journal_t * journal )
2020-11-07 00:00:49 -05:00
__acquires ( & journal - > j_state_lock )
__releases ( & journal - > j_state_lock )
2006-10-11 01:20:57 -07:00
{
2008-11-06 22:38:07 -05:00
int nblocks , space_left ;
2010-08-03 21:35:12 -04:00
/* assert_spin_locked(&journal->j_state_lock); */
2006-10-11 01:20:57 -07:00
2019-11-05 17:44:25 +01:00
nblocks = journal - > j_max_transaction_buffers ;
2013-06-04 12:12:57 -04:00
while ( jbd2_log_space_left ( journal ) < nblocks ) {
2010-08-03 21:35:12 -04:00
write_unlock ( & journal - > j_state_lock ) ;
2019-01-31 23:42:11 -05:00
mutex_lock_io ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 01:20:57 -07:00
/*
* Test again , another process may have checkpointed while we
2008-10-08 23:28:31 -04:00
* were waiting for the checkpoint lock . If there are no
2008-11-06 22:38:07 -05:00
* transactions ready to be checkpointed , try to recover
* journal space by calling cleanup_journal_tail ( ) , and if
* that doesn ' t work , by waiting for the currently committing
* transaction to complete . If there is absolutely no way
* to make progress , this is either a BUG or corrupted
* filesystem , so abort the journal and leave a stack
* trace for forensic evidence .
2006-10-11 01:20:57 -07:00
*/
2010-08-03 21:35:12 -04:00
write_lock ( & journal - > j_state_lock ) ;
2014-09-16 14:50:50 -04:00
if ( journal - > j_flags & JBD2_ABORT ) {
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
return ;
}
2008-10-08 23:28:31 -04:00
spin_lock ( & journal - > j_list_lock ) ;
2013-06-04 12:12:57 -04:00
space_left = jbd2_log_space_left ( journal ) ;
2008-11-06 22:38:07 -05:00
if ( space_left < nblocks ) {
2008-10-08 23:28:31 -04:00
int chkpt = journal - > j_checkpoint_transactions ! = NULL ;
2008-11-06 22:38:07 -05:00
tid_t tid = 0 ;
2008-10-08 23:28:31 -04:00
2008-11-06 22:38:07 -05:00
if ( journal - > j_committing_transaction )
tid = journal - > j_committing_transaction - > t_tid ;
2008-10-08 23:28:31 -04:00
spin_unlock ( & journal - > j_list_lock ) ;
2010-08-03 21:35:12 -04:00
write_unlock ( & journal - > j_state_lock ) ;
2008-10-08 23:28:31 -04:00
if ( chkpt ) {
jbd2_log_do_checkpoint ( journal ) ;
2008-11-06 22:38:07 -05:00
} else if ( jbd2_cleanup_journal_tail ( journal ) = = 0 ) {
/* We were able to recover space; yay! */
;
} else if ( tid ) {
jbd2: drop checkpoint mutex when waiting in __jbd2_log_wait_for_space()
While trying to debug an an issue under extreme I/O loading
on preempt-rt kernels, the following backtrace was observed
via SysRQ output:
rm D ffff8802203afbc0 4600 4878 4748 0x00000000
ffff8802217bfb78 0000000000000082 ffff88021fc2bb80 ffff88021fc2bb80
ffff88021fc2bb80 ffff8802217bffd8 ffff8802217bffd8 ffff8802217bffd8
ffff88021f1d4c80 ffff88021fc2bb80 ffff8802217bfb88 ffff88022437b000
Call Trace:
[<ffffffff8172dc34>] schedule+0x24/0x70
[<ffffffff81225b5d>] jbd2_log_wait_commit+0xbd/0x140
[<ffffffff81060390>] ? __init_waitqueue_head+0x50/0x50
[<ffffffff81223635>] jbd2_log_do_checkpoint+0xf5/0x520
[<ffffffff81223b09>] __jbd2_log_wait_for_space+0xa9/0x1f0
[<ffffffff8121dc40>] start_this_handle.isra.10+0x2e0/0x530
[<ffffffff81060390>] ? __init_waitqueue_head+0x50/0x50
[<ffffffff8121e0a3>] jbd2__journal_start+0xc3/0x110
[<ffffffff811de7ce>] ? ext4_rmdir+0x6e/0x230
[<ffffffff8121e0fe>] jbd2_journal_start+0xe/0x10
[<ffffffff811f308b>] ext4_journal_start_sb+0x5b/0x160
[<ffffffff811de7ce>] ext4_rmdir+0x6e/0x230
[<ffffffff811435c5>] vfs_rmdir+0xd5/0x140
[<ffffffff8114370f>] do_rmdir+0xdf/0x120
[<ffffffff8105c6b4>] ? task_work_run+0x44/0x80
[<ffffffff81002889>] ? do_notify_resume+0x89/0x100
[<ffffffff817361ae>] ? int_signal+0x12/0x17
[<ffffffff81145d85>] sys_unlinkat+0x25/0x40
[<ffffffff81735f22>] system_call_fastpath+0x16/0x1b
What is interesting here, is that we call log_wait_commit, from
within wait_for_space, but we are still holding the checkpoint_mutex
as it surrounds mostly the whole of wait_for_space. And then, as we
are waiting, journal_commit_transaction can run, and if the JBD2_FLUSHED
bit is set, then we will also try to take the same checkpoint_mutex.
It seems that we need to drop the checkpoint_mutex while sitting in
jbd2_log_wait_commit, if we want to guarantee that progress can be made
by jbd2_journal_commit_transaction(). There does not seem to be
anything preempt-rt specific about this, other then perhaps increasing
the odds of it happening.
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2013-06-12 22:47:35 -04:00
/*
* jbd2_journal_commit_transaction ( ) may want
* to take the checkpoint_mutex if JBD2_FLUSHED
* is set . So we need to temporarily drop it .
*/
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2008-11-06 22:38:07 -05:00
jbd2_log_wait_commit ( journal , tid ) ;
jbd2: drop checkpoint mutex when waiting in __jbd2_log_wait_for_space()
While trying to debug an an issue under extreme I/O loading
on preempt-rt kernels, the following backtrace was observed
via SysRQ output:
rm D ffff8802203afbc0 4600 4878 4748 0x00000000
ffff8802217bfb78 0000000000000082 ffff88021fc2bb80 ffff88021fc2bb80
ffff88021fc2bb80 ffff8802217bffd8 ffff8802217bffd8 ffff8802217bffd8
ffff88021f1d4c80 ffff88021fc2bb80 ffff8802217bfb88 ffff88022437b000
Call Trace:
[<ffffffff8172dc34>] schedule+0x24/0x70
[<ffffffff81225b5d>] jbd2_log_wait_commit+0xbd/0x140
[<ffffffff81060390>] ? __init_waitqueue_head+0x50/0x50
[<ffffffff81223635>] jbd2_log_do_checkpoint+0xf5/0x520
[<ffffffff81223b09>] __jbd2_log_wait_for_space+0xa9/0x1f0
[<ffffffff8121dc40>] start_this_handle.isra.10+0x2e0/0x530
[<ffffffff81060390>] ? __init_waitqueue_head+0x50/0x50
[<ffffffff8121e0a3>] jbd2__journal_start+0xc3/0x110
[<ffffffff811de7ce>] ? ext4_rmdir+0x6e/0x230
[<ffffffff8121e0fe>] jbd2_journal_start+0xe/0x10
[<ffffffff811f308b>] ext4_journal_start_sb+0x5b/0x160
[<ffffffff811de7ce>] ext4_rmdir+0x6e/0x230
[<ffffffff811435c5>] vfs_rmdir+0xd5/0x140
[<ffffffff8114370f>] do_rmdir+0xdf/0x120
[<ffffffff8105c6b4>] ? task_work_run+0x44/0x80
[<ffffffff81002889>] ? do_notify_resume+0x89/0x100
[<ffffffff817361ae>] ? int_signal+0x12/0x17
[<ffffffff81145d85>] sys_unlinkat+0x25/0x40
[<ffffffff81735f22>] system_call_fastpath+0x16/0x1b
What is interesting here, is that we call log_wait_commit, from
within wait_for_space, but we are still holding the checkpoint_mutex
as it surrounds mostly the whole of wait_for_space. And then, as we
are waiting, journal_commit_transaction can run, and if the JBD2_FLUSHED
bit is set, then we will also try to take the same checkpoint_mutex.
It seems that we need to drop the checkpoint_mutex while sitting in
jbd2_log_wait_commit, if we want to guarantee that progress can be made
by jbd2_journal_commit_transaction(). There does not seem to be
anything preempt-rt specific about this, other then perhaps increasing
the odds of it happening.
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2013-06-12 22:47:35 -04:00
write_lock ( & journal - > j_state_lock ) ;
continue ;
2008-10-08 23:28:31 -04:00
} else {
2008-11-06 22:38:07 -05:00
printk ( KERN_ERR " %s: needed %d blocks and "
" only had %d space available \n " ,
__func__ , nblocks , space_left ) ;
printk ( KERN_ERR " %s: no way to get more "
" journal space in %s \n " , __func__ ,
journal - > j_devname ) ;
WARN_ON ( 1 ) ;
2019-12-04 20:46:12 +08:00
jbd2_journal_abort ( journal , - EIO ) ;
2008-10-08 23:28:31 -04:00
}
2010-08-03 21:35:12 -04:00
write_lock ( & journal - > j_state_lock ) ;
2008-10-08 23:28:31 -04:00
} else {
spin_unlock ( & journal - > j_list_lock ) ;
2006-10-11 01:20:57 -07:00
}
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
}
}
static void
2008-11-05 00:09:22 -05:00
__flush_batch ( journal_t * journal , int * batch_count )
2006-10-11 01:20:57 -07:00
{
int i ;
2011-06-27 12:36:29 -04:00
struct blk_plug plug ;
2006-10-11 01:20:57 -07:00
2011-06-27 12:36:29 -04:00
blk_start_plug ( & plug ) ;
2010-08-11 17:06:24 +02:00
for ( i = 0 ; i < * batch_count ; i + + )
2016-11-01 07:40:10 -06:00
write_dirty_buffer ( journal - > j_chkpt_bhs [ i ] , REQ_SYNC ) ;
2011-06-27 12:36:29 -04:00
blk_finish_plug ( & plug ) ;
2010-08-11 17:06:24 +02:00
2006-10-11 01:20:57 -07:00
for ( i = 0 ; i < * batch_count ; i + + ) {
2008-11-05 00:09:22 -05:00
struct buffer_head * bh = journal - > j_chkpt_bhs [ i ] ;
2006-10-11 01:20:57 -07:00
BUFFER_TRACE ( bh , " brelse " ) ;
__brelse ( bh ) ;
2023-06-06 21:59:23 +08:00
journal - > j_chkpt_bhs [ i ] = NULL ;
2006-10-11 01:20:57 -07:00
}
* batch_count = 0 ;
}
/*
* Perform an actual checkpoint . We take the first transaction on the
* list of transactions to be checkpointed and send all its buffers
* to disk . We submit larger chunks of data at once .
*
* The journal should be locked before calling this function .
2008-10-10 20:29:13 -04:00
* Called with j_checkpoint_mutex held .
2006-10-11 01:20:57 -07:00
*/
2006-10-11 01:20:59 -07:00
int jbd2_log_do_checkpoint ( journal_t * journal )
2006-10-11 01:20:57 -07:00
{
2014-09-01 21:19:01 -04:00
struct journal_head * jh ;
struct buffer_head * bh ;
transaction_t * transaction ;
tid_t this_tid ;
2014-09-04 18:09:22 -04:00
int result , batch_count = 0 ;
2006-10-11 01:20:57 -07:00
2022-06-08 13:23:48 +02:00
jbd2_debug ( 1 , " Start checkpoint \n " ) ;
2006-10-11 01:20:57 -07:00
/*
* First thing : if there are any transactions in the log which
* don ' t need checkpointing , just eliminate them from the
* journal straight away .
*/
2006-10-11 01:20:59 -07:00
result = jbd2_cleanup_journal_tail ( journal ) ;
2009-06-17 11:47:48 -04:00
trace_jbd2_checkpoint ( journal , result ) ;
2022-06-08 13:23:48 +02:00
jbd2_debug ( 1 , " cleanup_journal_tail returned %d \n " , result ) ;
2006-10-11 01:20:57 -07:00
if ( result < = 0 )
return result ;
/*
* OK , we need to start writing disk blocks . Take one transaction
* and write it .
*/
spin_lock ( & journal - > j_list_lock ) ;
if ( ! journal - > j_checkpoint_transactions )
goto out ;
transaction = journal - > j_checkpoint_transactions ;
2008-01-28 23:58:27 -05:00
if ( transaction - > t_chp_stats . cs_chp_time = = 0 )
transaction - > t_chp_stats . cs_chp_time = jiffies ;
2006-10-11 01:20:57 -07:00
this_tid = transaction - > t_tid ;
restart :
/*
* If someone cleaned up this transaction while we slept , we ' re
* done ( maybe it ' s a new transaction , but it fell at the same
* address ) .
*/
2014-09-01 21:19:01 -04:00
if ( journal - > j_checkpoint_transactions ! = transaction | |
transaction - > t_tid ! = this_tid )
goto out ;
2006-10-11 01:20:57 -07:00
2014-09-01 21:19:01 -04:00
/* checkpoint all of the transaction's buffers */
while ( transaction - > t_checkpoint_list ) {
jh = transaction - > t_checkpoint_list ;
bh = jh2bh ( jh ) ;
if ( jh - > b_transaction ! = NULL ) {
transaction_t * t = jh - > b_transaction ;
tid_t tid = t - > t_tid ;
2006-10-11 01:20:57 -07:00
2014-09-01 21:19:01 -04:00
transaction - > t_chp_stats . cs_forced_to_close + + ;
spin_unlock ( & journal - > j_list_lock ) ;
if ( unlikely ( journal - > j_flags & JBD2_UNMOUNT ) )
/*
* The journal thread is dead ; so
* starting and waiting for a commit
* to finish will cause us to wait for
* a _very_ long time .
*/
printk ( KERN_ERR
" JBD2: %s: Waiting for Godot: block %llu \n " ,
journal - > j_devname , ( unsigned long long ) bh - > b_blocknr ) ;
2019-01-31 23:42:11 -05:00
if ( batch_count )
__flush_batch ( journal , & batch_count ) ;
2014-09-01 21:19:01 -04:00
jbd2_log_start_commit ( journal , tid ) ;
2019-01-31 23:42:11 -05:00
/*
* jbd2_journal_commit_transaction ( ) may want
* to take the checkpoint_mutex if JBD2_FLUSHED
* is set , jbd2_update_log_tail ( ) called by
* jbd2_journal_commit_transaction ( ) may also take
* checkpoint_mutex . So we need to temporarily
* drop it .
*/
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2014-09-01 21:19:01 -04:00
jbd2_log_wait_commit ( journal , tid ) ;
2019-01-31 23:42:11 -05:00
mutex_lock_io ( & journal - > j_checkpoint_mutex ) ;
spin_lock ( & journal - > j_list_lock ) ;
goto restart ;
2014-09-01 21:19:01 -04:00
}
2023-06-06 21:59:26 +08:00
if ( ! trylock_buffer ( bh ) ) {
/*
* The buffer is locked , it may be writing back , or
* flushing out in the last couple of cycles , or
* re - adding into a new transaction , need to check
* it again until it ' s unlocked .
*/
get_bh ( bh ) ;
spin_unlock ( & journal - > j_list_lock ) ;
wait_on_buffer ( bh ) ;
/* the journal_head may have gone by now */
BUFFER_TRACE ( bh , " brelse " ) ;
__brelse ( bh ) ;
goto retry ;
} else if ( ! buffer_dirty ( bh ) ) {
unlock_buffer ( bh ) ;
2014-09-01 21:19:01 -04:00
BUFFER_TRACE ( bh , " remove from checkpoint " ) ;
2023-06-06 21:59:23 +08:00
/*
* If the transaction was released or the checkpoint
* list was empty , we ' re done .
*/
if ( __jbd2_journal_remove_checkpoint ( jh ) | |
! transaction - > t_checkpoint_list )
2014-09-04 18:09:29 -04:00
goto out ;
2023-06-06 21:59:23 +08:00
} else {
2023-06-06 21:59:26 +08:00
unlock_buffer ( bh ) ;
2023-06-06 21:59:23 +08:00
/*
* We are about to write the buffer , it could be
* raced by some other transaction shrink or buffer
* re - log logic once we release the j_list_lock ,
* leave it on the checkpoint list and check status
* again to make sure it ' s clean .
*/
BUFFER_TRACE ( bh , " queue " ) ;
get_bh ( bh ) ;
J_ASSERT_BH ( bh , ! buffer_jwrite ( bh ) ) ;
journal - > j_chkpt_bhs [ batch_count + + ] = bh ;
transaction - > t_chp_stats . cs_written + + ;
transaction - > t_checkpoint_list = jh - > b_cpnext ;
2006-10-11 01:20:57 -07:00
}
2023-06-06 21:59:23 +08:00
2014-09-01 21:19:01 -04:00
if ( ( batch_count = = JBD2_NR_BATCH ) | |
2023-06-06 21:59:23 +08:00
need_resched ( ) | | spin_needbreak ( & journal - > j_list_lock ) | |
jh2bh ( transaction - > t_checkpoint_list ) = = journal - > j_chkpt_bhs [ 0 ] )
2014-09-01 21:19:01 -04:00
goto unlock_and_flush ;
2006-10-11 01:20:57 -07:00
}
2014-09-01 21:19:01 -04:00
if ( batch_count ) {
unlock_and_flush :
spin_unlock ( & journal - > j_list_lock ) ;
retry :
if ( batch_count )
__flush_batch ( journal , & batch_count ) ;
spin_lock ( & journal - > j_list_lock ) ;
goto restart ;
}
2006-10-11 01:20:57 -07:00
out :
spin_unlock ( & journal - > j_list_lock ) ;
2021-06-10 19:24:36 +08:00
result = jbd2_cleanup_journal_tail ( journal ) ;
2008-10-10 20:29:13 -04:00
return ( result < 0 ) ? result : 0 ;
2006-10-11 01:20:57 -07:00
}
/*
* Check the list of checkpoint transactions for the journal to see if
* we have already got rid of any since the last update of the log tail
* in the journal superblock . If so , we can instantly roll the
* superblock forward to remove those transactions from the log .
*
* Return < 0 on error , 0 on success , 1 if there was nothing to clean up .
*
* Called with the journal lock held .
*
* This is the only part of the journaling code which really needs to be
* aware of transaction aborts . Checkpointing involves writing to the
* main filesystem area rather than to the journal , so it can proceed
2008-10-10 20:29:13 -04:00
* even in abort state , but we must not update the super block if
* checkpointing may have failed . Otherwise , we would lose some metadata
* buffers which should be written - back to the filesystem .
2006-10-11 01:20:57 -07:00
*/
2006-10-11 01:20:59 -07:00
int jbd2_cleanup_journal_tail ( journal_t * journal )
2006-10-11 01:20:57 -07:00
{
tid_t first_tid ;
2012-03-13 22:22:54 -04:00
unsigned long blocknr ;
2006-10-11 01:20:57 -07:00
2008-10-10 20:29:13 -04:00
if ( is_journal_aborted ( journal ) )
2015-06-15 14:36:01 -04:00
return - EIO ;
2008-10-10 20:29:13 -04:00
2012-03-13 22:22:54 -04:00
if ( ! jbd2_journal_get_log_tail ( journal , & first_tid , & blocknr ) )
2006-10-11 01:20:57 -07:00
return 1 ;
2012-03-13 22:22:54 -04:00
J_ASSERT ( blocknr ! = 0 ) ;
2009-12-23 06:52:08 -05:00
/*
2012-03-13 22:22:54 -04:00
* We need to make sure that any blocks that were recently written out
* - - - perhaps by jbd2_log_do_checkpoint ( ) - - - are flushed out before
* we drop the transactions from the journal . It ' s unlikely this will
* be necessary , especially with an appropriately sized journal , but we
* need this to guarantee correctness . Fortunately
* jbd2_cleanup_journal_tail ( ) doesn ' t get called all that often .
2009-12-23 06:52:08 -05:00
*/
2012-03-13 22:22:54 -04:00
if ( journal - > j_flags & JBD2_BARRIER )
2021-01-26 15:52:35 +01:00
blkdev_issue_flush ( journal - > j_fs_dev ) ;
2012-03-13 22:22:54 -04:00
2015-06-15 14:36:01 -04:00
return __jbd2_update_log_tail ( journal , first_tid , blocknr ) ;
2006-10-11 01:20:57 -07:00
}
/* Checkpoint list management */
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 10:55:26 +08:00
enum shrink_type { SHRINK_DESTROY , SHRINK_BUSY_STOP , SHRINK_BUSY_SKIP } ;
2021-06-10 19:24:37 +08:00
/*
* journal_shrink_one_cp_list
*
2023-06-06 21:59:25 +08:00
* Find all the written - back checkpoint buffers in the given list
2021-06-10 19:24:37 +08:00
* and try to release them . If the whole transaction is released , set
* the ' released ' parameter . Return the number of released checkpointed
* buffers .
*
* Called with j_list_lock held .
*/
static unsigned long journal_shrink_one_cp_list ( struct journal_head * jh ,
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 10:55:26 +08:00
enum shrink_type type ,
bool * released )
2021-06-10 19:24:37 +08:00
{
struct journal_head * last_jh ;
struct journal_head * next_jh = jh ;
unsigned long nr_freed = 0 ;
int ret ;
2023-06-06 21:59:25 +08:00
* released = false ;
if ( ! jh )
2021-06-10 19:24:37 +08:00
return 0 ;
last_jh = jh - > b_cpprev ;
do {
jh = next_jh ;
next_jh = jh - > b_cpnext ;
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 10:55:26 +08:00
if ( type = = SHRINK_DESTROY ) {
2023-06-06 21:59:27 +08:00
ret = __jbd2_journal_remove_checkpoint ( jh ) ;
} else {
ret = jbd2_journal_try_remove_checkpoint ( jh ) ;
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 10:55:26 +08:00
if ( ret < 0 ) {
if ( type = = SHRINK_BUSY_SKIP )
continue ;
break ;
}
2023-06-06 21:59:27 +08:00
}
2021-06-10 19:24:37 +08:00
nr_freed + + ;
if ( ret ) {
* released = true ;
break ;
}
if ( need_resched ( ) )
break ;
2023-06-06 21:59:25 +08:00
} while ( jh ! = last_jh ) ;
2021-06-10 19:24:37 +08:00
return nr_freed ;
}
/*
* jbd2_journal_shrink_checkpoint_list
*
* Find ' nr_to_scan ' written - back checkpoint buffers in the journal
* and try to release them . Return the number of released checkpointed
* buffers .
*
* Called with j_list_lock held .
*/
unsigned long jbd2_journal_shrink_checkpoint_list ( journal_t * journal ,
unsigned long * nr_to_scan )
{
transaction_t * transaction , * last_transaction , * next_transaction ;
2023-06-06 21:59:25 +08:00
bool __maybe_unused released ;
2021-06-10 19:24:37 +08:00
tid_t first_tid = 0 , last_tid = 0 , next_tid = 0 ;
tid_t tid = 0 ;
unsigned long nr_freed = 0 ;
2023-06-06 21:59:25 +08:00
unsigned long freed ;
2021-06-10 19:24:37 +08:00
again :
spin_lock ( & journal - > j_list_lock ) ;
if ( ! journal - > j_checkpoint_transactions ) {
spin_unlock ( & journal - > j_list_lock ) ;
goto out ;
}
/*
* Get next shrink transaction , resume previous scan or start
* over again . If some others do checkpoint and drop transaction
* from the checkpoint list , we ignore saved j_shrink_transaction
* and start over unconditionally .
*/
if ( journal - > j_shrink_transaction )
transaction = journal - > j_shrink_transaction ;
else
transaction = journal - > j_checkpoint_transactions ;
if ( ! first_tid )
first_tid = transaction - > t_tid ;
last_transaction = journal - > j_checkpoint_transactions - > t_cpprev ;
next_transaction = transaction ;
last_tid = last_transaction - > t_tid ;
do {
transaction = next_transaction ;
next_transaction = transaction - > t_cpnext ;
tid = transaction - > t_tid ;
2023-06-06 21:59:25 +08:00
freed = journal_shrink_one_cp_list ( transaction - > t_checkpoint_list ,
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 10:55:26 +08:00
SHRINK_BUSY_SKIP , & released ) ;
2023-06-06 21:59:25 +08:00
nr_freed + = freed ;
( * nr_to_scan ) - = min ( * nr_to_scan , freed ) ;
2021-06-10 19:24:37 +08:00
if ( * nr_to_scan = = 0 )
break ;
if ( need_resched ( ) | | spin_needbreak ( & journal - > j_list_lock ) )
break ;
} while ( transaction ! = last_transaction ) ;
if ( transaction ! = last_transaction ) {
journal - > j_shrink_transaction = next_transaction ;
next_tid = next_transaction - > t_tid ;
} else {
journal - > j_shrink_transaction = NULL ;
next_tid = 0 ;
}
spin_unlock ( & journal - > j_list_lock ) ;
cond_resched ( ) ;
if ( * nr_to_scan & & next_tid )
goto again ;
out :
trace_jbd2_shrink_checkpoint_list ( journal , first_tid , tid , last_tid ,
2023-06-06 21:59:25 +08:00
nr_freed , next_tid ) ;
2021-06-10 19:24:37 +08:00
return nr_freed ;
}
2006-10-11 01:20:57 -07:00
/*
* journal_clean_checkpoint_list
*
* Find all the written - back checkpoint buffers in the journal and release them .
2015-07-28 14:57:14 -04:00
* If ' destroy ' is set , release all buffers unconditionally .
2006-10-11 01:20:57 -07:00
*
* Called with j_list_lock held .
*/
2015-07-28 14:57:14 -04:00
void __jbd2_journal_clean_checkpoint_list ( journal_t * journal , bool destroy )
2006-10-11 01:20:57 -07:00
{
transaction_t * transaction , * last_transaction , * next_transaction ;
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 10:55:26 +08:00
enum shrink_type type ;
2023-06-06 21:59:25 +08:00
bool released ;
2006-10-11 01:20:57 -07:00
transaction = journal - > j_checkpoint_transactions ;
if ( ! transaction )
2014-09-18 00:58:12 -04:00
return ;
2006-10-11 01:20:57 -07:00
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 10:55:26 +08:00
type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP ;
2006-10-11 01:20:57 -07:00
last_transaction = transaction - > t_cpprev ;
next_transaction = transaction ;
do {
transaction = next_transaction ;
next_transaction = transaction - > t_cpnext ;
2023-06-06 21:59:25 +08:00
journal_shrink_one_cp_list ( transaction - > t_checkpoint_list ,
jbd2: fix checkpoint cleanup performance regression
journal_clean_one_cp_list() has been merged into
journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
committing process is just a best effort, it should stop scan once it
meet a busy buffer, or else it will cause a lot of invalid buffer scan
and checks. We catch a performance regression when doing fs_mark tests
below.
Test cmd:
./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
Before merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8304.9 49033
After merging checkpoint buffer cleanup:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 7649.0 50012
FSUse% Count Size Files/sec App Overhead
95 10000 1024 2107.1 50871
After merging checkpoint buffer cleanup, the total loop count in
journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
100,000+ in general), most of them are invalid. This patch fix it
through passing 'shrink_type' into journal_shrink_one_cp_list() and add
a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
buffer. After fix, the loop count descending back to 10,000+.
After this fix:
FSUse% Count Size Files/sec App Overhead
95 10000 1024 8558.4 49109
Cc: stable@kernel.org
Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2023-07-14 10:55:26 +08:00
type , & released ) ;
2006-10-11 01:20:57 -07:00
/*
* This function only frees up some memory if possible so we
* dont have an obligation to finish processing . Bail out if
* preemption requested :
*/
2014-09-18 00:58:12 -04:00
if ( need_resched ( ) )
return ;
/*
* Stop scanning if we couldn ' t free the transaction . This
* avoids pointless scanning of transactions which still
* weren ' t checkpointed .
*/
2023-06-06 21:59:25 +08:00
if ( ! released )
2014-09-18 00:58:12 -04:00
return ;
2006-10-11 01:20:57 -07:00
} while ( transaction ! = last_transaction ) ;
}
2015-07-28 14:57:14 -04:00
/*
* Remove buffers from all checkpoint lists as journal is aborted and we just
* need to free memory
*/
void jbd2_journal_destroy_checkpoint ( journal_t * journal )
{
/*
* We loop because __jbd2_journal_clean_checkpoint_list ( ) may abort
* early due to a need of rescheduling .
*/
while ( 1 ) {
spin_lock ( & journal - > j_list_lock ) ;
if ( ! journal - > j_checkpoint_transactions ) {
spin_unlock ( & journal - > j_list_lock ) ;
break ;
}
__jbd2_journal_clean_checkpoint_list ( journal , true ) ;
spin_unlock ( & journal - > j_list_lock ) ;
cond_resched ( ) ;
}
}
2006-10-11 01:20:57 -07:00
/*
* journal_remove_checkpoint : called after a buffer has been committed
* to disk ( either by being write - back flushed to disk , or being
* committed to the log ) .
*
* We cannot safely clean a transaction out of the log until all of the
* buffer updates committed in that transaction have safely been stored
* elsewhere on disk . To achieve this , all of the buffers in a
* transaction need to be maintained on the transaction ' s checkpoint
* lists until they have been rewritten , at which point this function is
* called to remove the buffer from the existing transaction ' s
* checkpoint lists .
*
* The function returns 1 if it frees the transaction , 0 otherwise .
2011-06-13 15:38:22 -04:00
* The function can free jh and bh .
2006-10-11 01:20:57 -07:00
*
* This function is called with j_list_lock held .
*/
2006-10-11 01:20:59 -07:00
int __jbd2_journal_remove_checkpoint ( struct journal_head * jh )
2006-10-11 01:20:57 -07:00
{
2009-09-30 00:32:06 -04:00
struct transaction_chp_stats_s * stats ;
2006-10-11 01:20:57 -07:00
transaction_t * transaction ;
journal_t * journal ;
JBUFFER_TRACE ( jh , " entry " ) ;
2021-06-10 19:24:33 +08:00
transaction = jh - > b_cp_transaction ;
if ( ! transaction ) {
2006-10-11 01:20:57 -07:00
JBUFFER_TRACE ( jh , " not on transaction " ) ;
2021-06-10 19:24:33 +08:00
return 0 ;
2006-10-11 01:20:57 -07:00
}
journal = transaction - > t_journal ;
2011-06-13 15:38:22 -04:00
JBUFFER_TRACE ( jh , " removing from transaction " ) ;
2021-06-10 19:24:34 +08:00
2006-10-11 01:20:57 -07:00
__buffer_unlink ( jh ) ;
jh - > b_cp_transaction = NULL ;
2021-07-02 18:05:03 -04:00
percpu_counter_dec ( & journal - > j_checkpoint_jh_count ) ;
2011-06-13 15:38:22 -04:00
jbd2_journal_put_journal_head ( jh ) ;
2006-10-11 01:20:57 -07:00
2021-06-10 19:24:33 +08:00
/* Is this transaction empty? */
2023-06-06 21:59:24 +08:00
if ( transaction - > t_checkpoint_list )
2021-06-10 19:24:33 +08:00
return 0 ;
2006-10-11 01:20:57 -07:00
/*
* There is one special case to worry about : if we have just pulled the
2008-01-28 23:58:27 -05:00
* buffer off a running or committing transaction ' s checkpoing list ,
* then even if the checkpoint list is empty , the transaction obviously
* cannot be dropped !
2006-10-11 01:20:57 -07:00
*
2008-01-28 23:58:27 -05:00
* The locking here around t_state is a bit sleazy .
2006-10-11 01:20:59 -07:00
* See the comment at the end of jbd2_journal_commit_transaction ( ) .
2006-10-11 01:20:57 -07:00
*/
2011-06-13 15:38:22 -04:00
if ( transaction - > t_state ! = T_FINISHED )
2021-06-10 19:24:33 +08:00
return 0 ;
2006-10-11 01:20:57 -07:00
2021-06-10 19:24:33 +08:00
/*
* OK , that was the last buffer for the transaction , we can now
* safely remove this transaction from the log .
*/
2009-09-30 00:32:06 -04:00
stats = & transaction - > t_chp_stats ;
if ( stats - > cs_chp_time )
stats - > cs_chp_time = jbd2_time_diff ( stats - > cs_chp_time ,
jiffies ) ;
trace_jbd2_checkpoint_stats ( journal - > j_fs_dev - > bd_dev ,
transaction - > t_tid , stats ) ;
2006-10-11 01:20:57 -07:00
2006-10-11 01:20:59 -07:00
__jbd2_journal_drop_transaction ( journal , transaction ) ;
2012-02-20 17:53:02 -05:00
jbd2_journal_free_transaction ( transaction ) ;
2021-06-10 19:24:33 +08:00
return 1 ;
2006-10-11 01:20:57 -07:00
}
2023-06-06 21:59:27 +08:00
/*
* Check the checkpoint buffer and try to remove it from the checkpoint
* list if it ' s clean . Returns - EBUSY if it is not clean , returns 1 if
* it frees the transaction , 0 otherwise .
*
* This function is called with j_list_lock held .
*/
int jbd2_journal_try_remove_checkpoint ( struct journal_head * jh )
{
struct buffer_head * bh = jh2bh ( jh ) ;
2023-07-14 10:55:27 +08:00
if ( jh - > b_transaction )
return - EBUSY ;
2023-06-06 21:59:27 +08:00
if ( ! trylock_buffer ( bh ) )
return - EBUSY ;
if ( buffer_dirty ( bh ) ) {
unlock_buffer ( bh ) ;
return - EBUSY ;
}
unlock_buffer ( bh ) ;
/*
* Buffer is clean and the IO has finished ( we held the buffer
* lock ) so the checkpoint is done . We can safely remove the
* buffer from this transaction .
*/
JBUFFER_TRACE ( jh , " remove from checkpoint list " ) ;
return __jbd2_journal_remove_checkpoint ( jh ) ;
}
2006-10-11 01:20:57 -07:00
/*
* journal_insert_checkpoint : put a committed buffer onto a checkpoint
* list so that we know when it is safe to clean the transaction out of
* the log .
*
* Called with the journal locked .
* Called with j_list_lock held .
*/
2006-10-11 01:20:59 -07:00
void __jbd2_journal_insert_checkpoint ( struct journal_head * jh ,
2006-10-11 01:20:57 -07:00
transaction_t * transaction )
{
JBUFFER_TRACE ( jh , " entry " ) ;
J_ASSERT_JH ( jh , buffer_dirty ( jh2bh ( jh ) ) | | buffer_jbddirty ( jh2bh ( jh ) ) ) ;
J_ASSERT_JH ( jh , jh - > b_cp_transaction = = NULL ) ;
2011-06-13 15:38:22 -04:00
/* Get reference for checkpointing transaction */
jbd2_journal_grab_journal_head ( jh2bh ( jh ) ) ;
2006-10-11 01:20:57 -07:00
jh - > b_cp_transaction = transaction ;
if ( ! transaction - > t_checkpoint_list ) {
jh - > b_cpnext = jh - > b_cpprev = jh ;
} else {
jh - > b_cpnext = transaction - > t_checkpoint_list ;
jh - > b_cpprev = transaction - > t_checkpoint_list - > b_cpprev ;
jh - > b_cpprev - > b_cpnext = jh ;
jh - > b_cpnext - > b_cpprev = jh ;
}
transaction - > t_checkpoint_list = jh ;
2021-07-02 18:05:03 -04:00
percpu_counter_inc ( & transaction - > t_journal - > j_checkpoint_jh_count ) ;
2006-10-11 01:20:57 -07:00
}
/*
* We ' ve finished with this transaction structure : adios . . .
*
* The transaction must have no links except for the checkpoint by this
* point .
*
* Called with the journal locked .
* Called with j_list_lock held .
*/
2006-10-11 01:20:59 -07:00
void __jbd2_journal_drop_transaction ( journal_t * journal , transaction_t * transaction )
2006-10-11 01:20:57 -07:00
{
assert_spin_locked ( & journal - > j_list_lock ) ;
2021-06-10 19:24:37 +08:00
journal - > j_shrink_transaction = NULL ;
2006-10-11 01:20:57 -07:00
if ( transaction - > t_cpnext ) {
transaction - > t_cpnext - > t_cpprev = transaction - > t_cpprev ;
transaction - > t_cpprev - > t_cpnext = transaction - > t_cpnext ;
if ( journal - > j_checkpoint_transactions = = transaction )
journal - > j_checkpoint_transactions =
transaction - > t_cpnext ;
if ( journal - > j_checkpoint_transactions = = transaction )
journal - > j_checkpoint_transactions = NULL ;
}
J_ASSERT ( transaction - > t_state = = T_FINISHED ) ;
J_ASSERT ( transaction - > t_buffers = = NULL ) ;
J_ASSERT ( transaction - > t_forget = = NULL ) ;
J_ASSERT ( transaction - > t_shadow_list = = NULL ) ;
J_ASSERT ( transaction - > t_checkpoint_list = = NULL ) ;
2010-08-02 08:43:25 -04:00
J_ASSERT ( atomic_read ( & transaction - > t_updates ) = = 0 ) ;
2006-10-11 01:20:57 -07:00
J_ASSERT ( journal - > j_committing_transaction ! = transaction ) ;
J_ASSERT ( journal - > j_running_transaction ! = transaction ) ;
2012-02-20 17:53:01 -05:00
trace_jbd2_drop_transaction ( journal , transaction ) ;
2022-06-08 13:23:48 +02:00
jbd2_debug ( 1 , " Dropping transaction %d, all done \n " , transaction - > t_tid ) ;
2006-10-11 01:20:57 -07:00
}