2006-10-11 12:20:57 +04:00
/*
2006-10-11 12:20:59 +04:00
* linux / fs / jbd2 / journal . c
2006-10-11 12:20:57 +04:00
*
* Written by Stephen C . Tweedie < sct @ redhat . com > , 1998
*
* Copyright 1998 Red Hat corp - - - All Rights Reserved
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License , version 2 , or at your
* option , any later version , incorporated herein by reference .
*
* Generic filesystem journal - writing code ; part of the ext2fs
* journaling system .
*
* This file manages journals : areas of disk reserved for logging
* transactional updates . This includes the kernel journaling thread
* which is responsible for scheduling updates to the log .
*
* We do not actually manage the physical storage of the journal in this
* file : that is left to a per - journal policy function , which allows us
* to store the journal within a filesystem - specified area for ext2
* journaling ( ext2 can use a reserved inode for storing the log ) .
*/
# include <linux/module.h>
# include <linux/time.h>
# include <linux/fs.h>
2006-10-11 12:20:59 +04:00
# include <linux/jbd2.h>
2006-10-11 12:20:57 +04:00
# include <linux/errno.h>
# include <linux/slab.h>
# include <linux/init.h>
# include <linux/mm.h>
2006-12-07 07:34:23 +03:00
# include <linux/freezer.h>
2006-10-11 12:20:57 +04:00
# include <linux/pagemap.h>
# include <linux/kthread.h>
# include <linux/poison.h>
# include <linux/proc_fs.h>
2008-01-29 07:58:27 +03:00
# include <linux/seq_file.h>
2009-01-12 06:34:01 +03:00
# include <linux/math64.h>
2009-06-17 19:47:48 +04:00
# include <linux/hash.h>
2009-12-07 18:36:20 +03:00
# include <linux/log2.h>
# include <linux/vmalloc.h>
2010-07-27 19:56:05 +04:00
# include <linux/backing-dev.h>
2010-10-28 05:25:12 +04:00
# include <linux/bitops.h>
2010-12-17 18:44:16 +03:00
# include <linux/ratelimit.h>
2009-06-17 19:47:48 +04:00
# define CREATE_TRACE_POINTS
# include <trace/events/jbd2.h>
2006-10-11 12:20:57 +04:00
# include <asm/uaccess.h>
# include <asm/page.h>
2013-02-10 01:29:20 +04:00
# ifdef CONFIG_JBD2_DEBUG
ushort jbd2_journal_enable_debug __read_mostly ;
EXPORT_SYMBOL ( jbd2_journal_enable_debug ) ;
module_param_named ( jbd2_debug , jbd2_journal_enable_debug , ushort , 0644 ) ;
MODULE_PARM_DESC ( jbd2_debug , " Debugging level for jbd2 " ) ;
# endif
2006-10-11 12:20:59 +04:00
EXPORT_SYMBOL ( jbd2_journal_extend ) ;
EXPORT_SYMBOL ( jbd2_journal_stop ) ;
EXPORT_SYMBOL ( jbd2_journal_lock_updates ) ;
EXPORT_SYMBOL ( jbd2_journal_unlock_updates ) ;
EXPORT_SYMBOL ( jbd2_journal_get_write_access ) ;
EXPORT_SYMBOL ( jbd2_journal_get_create_access ) ;
EXPORT_SYMBOL ( jbd2_journal_get_undo_access ) ;
2008-09-12 02:35:47 +04:00
EXPORT_SYMBOL ( jbd2_journal_set_triggers ) ;
2006-10-11 12:20:59 +04:00
EXPORT_SYMBOL ( jbd2_journal_dirty_metadata ) ;
EXPORT_SYMBOL ( jbd2_journal_forget ) ;
2006-10-11 12:20:57 +04:00
#if 0
EXPORT_SYMBOL ( journal_sync_buffer ) ;
# endif
2006-10-11 12:20:59 +04:00
EXPORT_SYMBOL ( jbd2_journal_flush ) ;
EXPORT_SYMBOL ( jbd2_journal_revoke ) ;
EXPORT_SYMBOL ( jbd2_journal_init_dev ) ;
EXPORT_SYMBOL ( jbd2_journal_init_inode ) ;
EXPORT_SYMBOL ( jbd2_journal_check_used_features ) ;
EXPORT_SYMBOL ( jbd2_journal_check_available_features ) ;
EXPORT_SYMBOL ( jbd2_journal_set_features ) ;
EXPORT_SYMBOL ( jbd2_journal_load ) ;
EXPORT_SYMBOL ( jbd2_journal_destroy ) ;
EXPORT_SYMBOL ( jbd2_journal_abort ) ;
EXPORT_SYMBOL ( jbd2_journal_errno ) ;
EXPORT_SYMBOL ( jbd2_journal_ack_err ) ;
EXPORT_SYMBOL ( jbd2_journal_clear_err ) ;
EXPORT_SYMBOL ( jbd2_log_wait_commit ) ;
2009-12-10 04:42:53 +03:00
EXPORT_SYMBOL ( jbd2_log_start_commit ) ;
2006-10-11 12:20:59 +04:00
EXPORT_SYMBOL ( jbd2_journal_start_commit ) ;
EXPORT_SYMBOL ( jbd2_journal_force_commit_nested ) ;
EXPORT_SYMBOL ( jbd2_journal_wipe ) ;
EXPORT_SYMBOL ( jbd2_journal_blocks_per_page ) ;
EXPORT_SYMBOL ( jbd2_journal_invalidatepage ) ;
EXPORT_SYMBOL ( jbd2_journal_try_to_free_buffers ) ;
EXPORT_SYMBOL ( jbd2_journal_force_commit ) ;
2008-07-12 03:27:31 +04:00
EXPORT_SYMBOL ( jbd2_journal_file_inode ) ;
EXPORT_SYMBOL ( jbd2_journal_init_jbd_inode ) ;
EXPORT_SYMBOL ( jbd2_journal_release_jbd_inode ) ;
EXPORT_SYMBOL ( jbd2_journal_begin_ordered_truncate ) ;
2011-01-10 20:29:43 +03:00
EXPORT_SYMBOL ( jbd2_inode_cache ) ;
2006-10-11 12:20:57 +04:00
static void __journal_abort_soft ( journal_t * journal , int errno ) ;
2009-12-07 18:36:20 +03:00
static int jbd2_journal_create_slab ( size_t slab_size ) ;
2006-10-11 12:20:57 +04:00
jbd2: use a single printk for jbd_debug()
Since the jbd_debug() is implemented with two separate printk()
calls, it can lead to corrupted and misleading debug output like
the following (see lines marked with "*"):
[ 290.339362] (fs/jbd2/journal.c, 203): kjournald2: kjournald2 wakes
[ 290.339365] (fs/jbd2/journal.c, 155): kjournald2: commit_sequence=42103, commit_request=42104
[ 290.339369] (fs/jbd2/journal.c, 158): kjournald2: OK, requests differ
[* 290.339376] (fs/jbd2/journal.c, 648): jbd2_log_wait_commit:
[* 290.339379] (fs/jbd2/commit.c, 370): jbd2_journal_commit_transaction: JBD2: want 42104, j_commit_sequence=42103
[* 290.339382] JBD2: starting commit of transaction 42104
[ 290.339410] (fs/jbd2/revoke.c, 566): jbd2_journal_write_revoke_records: Wrote 0 revoke records
[ 290.376555] (fs/jbd2/commit.c, 1088): jbd2_journal_commit_transaction: JBD2: commit 42104 complete, head 42079
i.e. the debug output from log_wait_commit and journal_commit_transaction
have become interleaved. The output should have been:
(fs/jbd2/journal.c, 648): jbd2_log_wait_commit: JBD2: want 42104, j_commit_sequence=42103
(fs/jbd2/commit.c, 370): jbd2_journal_commit_transaction: JBD2: starting commit of transaction 42104
It is expected that this is not easy to replicate -- I was only able
to cause it on preempt-rt kernels, and even then only under heavy
I/O load.
Reported-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Suggested-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2013-06-13 07:04:04 +04:00
# ifdef CONFIG_JBD2_DEBUG
void __jbd2_debug ( int level , const char * file , const char * func ,
unsigned int line , const char * fmt , . . . )
{
struct va_format vaf ;
va_list args ;
if ( level > jbd2_journal_enable_debug )
return ;
va_start ( args , fmt ) ;
vaf . fmt = fmt ;
vaf . va = & args ;
printk ( KERN_DEBUG " %s: (%s, %u): %pV \n " , file , func , line , & vaf ) ;
va_end ( args ) ;
}
EXPORT_SYMBOL ( __jbd2_debug ) ;
# endif
2012-05-27 15:48:56 +04:00
/* Checksumming functions */
2014-02-18 05:49:04 +04:00
static int jbd2_verify_csum_type ( journal_t * j , journal_superblock_t * sb )
2012-05-27 15:48:56 +04:00
{
2014-08-28 02:40:07 +04:00
if ( ! jbd2_journal_has_csum_v2or3 ( j ) )
2012-05-27 15:48:56 +04:00
return 1 ;
return sb - > s_checksum_type = = JBD2_CRC32C_CHKSUM ;
}
2013-08-28 22:59:58 +04:00
static __be32 jbd2_superblock_csum ( journal_t * j , journal_superblock_t * sb )
2012-05-27 16:08:22 +04:00
{
2013-08-28 22:59:58 +04:00
__u32 csum ;
__be32 old_csum ;
2012-05-27 16:08:22 +04:00
old_csum = sb - > s_checksum ;
sb - > s_checksum = 0 ;
csum = jbd2_chksum ( j , ~ 0 , ( char * ) sb , sizeof ( journal_superblock_t ) ) ;
sb - > s_checksum = old_csum ;
return cpu_to_be32 ( csum ) ;
}
2014-02-18 05:49:04 +04:00
static int jbd2_superblock_csum_verify ( journal_t * j , journal_superblock_t * sb )
2012-05-27 16:08:22 +04:00
{
2014-08-28 02:40:07 +04:00
if ( ! jbd2_journal_has_csum_v2or3 ( j ) )
2012-05-27 16:08:22 +04:00
return 1 ;
return sb - > s_checksum = = jbd2_superblock_csum ( j , sb ) ;
}
2014-02-18 05:49:04 +04:00
static void jbd2_superblock_csum_set ( journal_t * j , journal_superblock_t * sb )
2012-05-27 16:08:22 +04:00
{
2014-08-28 02:40:07 +04:00
if ( ! jbd2_journal_has_csum_v2or3 ( j ) )
2012-05-27 16:08:22 +04:00
return ;
sb - > s_checksum = jbd2_superblock_csum ( j , sb ) ;
}
2006-10-11 12:20:57 +04:00
/*
* Helper function used to manage commit timeouts
*/
static void commit_timeout ( unsigned long __data )
{
struct task_struct * p = ( struct task_struct * ) __data ;
wake_up_process ( p ) ;
}
/*
2006-10-11 12:20:59 +04:00
* kjournald2 : The main thread function used to manage a logging device
2006-10-11 12:20:57 +04:00
* journal .
*
* This kernel thread is responsible for two things :
*
* 1 ) COMMIT : Every so often we need to commit the current state of the
* filesystem to disk . The journal thread is responsible for writing
* all of the metadata buffers to disk .
*
* 2 ) CHECKPOINT : We cannot reuse a used section of the log file until all
* of the data in that part of the log has been rewritten elsewhere on
* the disk . Flushing these old buffers to reclaim space in the log is
* known as checkpointing , and this thread is responsible for that job .
*/
2006-10-11 12:20:59 +04:00
static int kjournald2 ( void * arg )
2006-10-11 12:20:57 +04:00
{
journal_t * journal = arg ;
transaction_t * transaction ;
/*
* Set up an interval timer which can be used to trigger a commit wakeup
* after the commit interval expires
*/
setup_timer ( & journal - > j_commit_timer , commit_timeout ,
( unsigned long ) current ) ;
2012-02-03 12:59:41 +04:00
set_freezable ( ) ;
2006-10-11 12:20:57 +04:00
/* Record that the journal thread is running */
journal - > j_task = current ;
wake_up ( & journal - > j_wait_done_commit ) ;
/*
* And now , wait forever for commit wakeup events .
*/
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
loop :
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_UNMOUNT )
2006-10-11 12:20:57 +04:00
goto end_loop ;
jbd_debug ( 1 , " commit_sequence=%d, commit_request=%d \n " ,
journal - > j_commit_sequence , journal - > j_commit_request ) ;
if ( journal - > j_commit_sequence ! = journal - > j_commit_request ) {
jbd_debug ( 1 , " OK, requests differ \n " ) ;
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
del_timer_sync ( & journal - > j_commit_timer ) ;
2006-10-11 12:20:59 +04:00
jbd2_journal_commit_transaction ( journal ) ;
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
goto loop ;
}
wake_up ( & journal - > j_wait_done_commit ) ;
if ( freezing ( current ) ) {
/*
* The simpler the better . Flushing journal isn ' t a
* good idea , because that depends on threads that may
* be already stopped .
*/
2006-10-11 12:20:59 +04:00
jbd_debug ( 1 , " Now suspending kjournald2 \n " ) ;
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2011-11-22 00:32:22 +04:00
try_to_freeze ( ) ;
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
} else {
/*
* We assume on resume that commits are already there ,
* so we don ' t sleep
*/
DEFINE_WAIT ( wait ) ;
int should_sleep = 1 ;
prepare_to_wait ( & journal - > j_wait_commit , & wait ,
TASK_INTERRUPTIBLE ) ;
if ( journal - > j_commit_sequence ! = journal - > j_commit_request )
should_sleep = 0 ;
transaction = journal - > j_running_transaction ;
if ( transaction & & time_after_eq ( jiffies ,
transaction - > t_expires ) )
should_sleep = 0 ;
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_UNMOUNT )
2006-10-11 12:20:57 +04:00
should_sleep = 0 ;
if ( should_sleep ) {
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
schedule ( ) ;
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
}
finish_wait ( & journal - > j_wait_commit , & wait ) ;
}
2006-10-11 12:20:59 +04:00
jbd_debug ( 1 , " kjournald2 wakes \n " ) ;
2006-10-11 12:20:57 +04:00
/*
* Were we woken up by a commit wakeup event ?
*/
transaction = journal - > j_running_transaction ;
if ( transaction & & time_after_eq ( jiffies , transaction - > t_expires ) ) {
journal - > j_commit_request = transaction - > t_tid ;
jbd_debug ( 1 , " woke because of timeout \n " ) ;
}
goto loop ;
end_loop :
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
del_timer_sync ( & journal - > j_commit_timer ) ;
journal - > j_task = NULL ;
wake_up ( & journal - > j_wait_done_commit ) ;
jbd_debug ( 1 , " Journal thread exiting. \n " ) ;
return 0 ;
}
2007-05-08 11:30:42 +04:00
static int jbd2_journal_start_thread ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
2007-05-08 11:30:42 +04:00
struct task_struct * t ;
2009-09-29 23:51:30 +04:00
t = kthread_run ( kjournald2 , journal , " jbd2/%s " ,
journal - > j_devname ) ;
2007-05-08 11:30:42 +04:00
if ( IS_ERR ( t ) )
return PTR_ERR ( t ) ;
2008-03-29 06:07:18 +03:00
wait_event ( journal - > j_wait_done_commit , journal - > j_task ! = NULL ) ;
2007-05-08 11:30:42 +04:00
return 0 ;
2006-10-11 12:20:57 +04:00
}
static void journal_kill_thread ( journal_t * journal )
{
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
journal - > j_flags | = JBD2_UNMOUNT ;
2006-10-11 12:20:57 +04:00
while ( journal - > j_task ) {
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2014-03-09 04:11:36 +04:00
wake_up ( & journal - > j_wait_commit ) ;
2008-03-29 06:07:18 +03:00
wait_event ( journal - > j_wait_done_commit , journal - > j_task = = NULL ) ;
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
}
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
}
/*
2006-10-11 12:20:59 +04:00
* jbd2_journal_write_metadata_buffer : write a metadata buffer to the journal .
2006-10-11 12:20:57 +04:00
*
* Writes a metadata buffer to a given disk block . The actual IO is not
* performed but a new buffer_head is constructed which labels the data
* to be written with the correct destination disk block .
*
* Any magic - number escaping which needs to be done will cause a
* copy - out here . If the buffer happens to start with the
2006-10-11 12:20:59 +04:00
* JBD2_MAGIC_NUMBER , then we can ' t write it to the log directly : the
2006-10-11 12:20:57 +04:00
* magic number is only written to the log for descripter blocks . In
* this case , we copy the data and replace the first word with 0 , and we
* return a result code which indicates that this buffer needs to be
* marked as an escaped buffer in the corresponding log descriptor
* block . The missing word can then be restored when the block is read
* during recovery .
*
* If the source buffer has already been modified by a new transaction
* since we took the last commit snapshot , we use the frozen copy of
2013-06-04 20:01:45 +04:00
* that data for IO . If we end up using the existing buffer_head ' s data
* for the write , then we have to make sure nobody modifies it while the
* IO is in progress . do_get_write_access ( ) handles this .
2006-10-11 12:20:57 +04:00
*
2013-06-04 20:01:45 +04:00
* The function returns a pointer to the buffer_head to be used for IO .
*
2006-10-11 12:20:57 +04:00
*
* Return value :
* < 0 : Error
* > = 0 : Finished OK
*
* On success :
* Bit 0 set = = escape performed on the data
* Bit 1 set = = buffer copy - out performed ( kfree the data after IO )
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_write_metadata_buffer ( transaction_t * transaction ,
2006-10-11 12:20:57 +04:00
struct journal_head * jh_in ,
2013-06-04 20:01:45 +04:00
struct buffer_head * * bh_out ,
sector_t blocknr )
2006-10-11 12:20:57 +04:00
{
int need_copy_out = 0 ;
int done_copy_out = 0 ;
int do_escape = 0 ;
char * mapped_data ;
struct buffer_head * new_bh ;
struct page * new_page ;
unsigned int new_offset ;
struct buffer_head * bh_in = jh2bh ( jh_in ) ;
2009-07-14 01:55:35 +04:00
journal_t * journal = transaction - > t_journal ;
2006-10-11 12:20:57 +04:00
/*
* The buffer really shouldn ' t be locked : only the current committing
* transaction is allowed to write it , so nobody else is allowed
* to do any IO .
*
* akpm : except if we ' re journalling data , and write ( ) output is
* also part of a shared mapping , and another thread has
* decided to launch a writepage ( ) against this buffer .
*/
J_ASSERT_BH ( bh_in , buffer_jbddirty ( bh_in ) ) ;
2010-07-27 19:56:05 +04:00
retry_alloc :
new_bh = alloc_buffer_head ( GFP_NOFS ) ;
if ( ! new_bh ) {
/*
* Failure is not an option , but __GFP_NOFAIL is going
* away ; so we retry ourselves here .
*/
congestion_wait ( BLK_RW_ASYNC , HZ / 50 ) ;
goto retry_alloc ;
}
2009-07-14 01:55:35 +04:00
/* keep subsequent assertions sane */
atomic_set ( & new_bh - > b_count , 1 ) ;
2006-10-11 12:20:57 +04:00
2013-06-04 20:01:45 +04:00
jbd_lock_bh_state ( bh_in ) ;
repeat :
2006-10-11 12:20:57 +04:00
/*
* If a new transaction has already done a buffer copy - out , then
* we use that version of the data for the commit .
*/
if ( jh_in - > b_frozen_data ) {
done_copy_out = 1 ;
new_page = virt_to_page ( jh_in - > b_frozen_data ) ;
new_offset = offset_in_page ( jh_in - > b_frozen_data ) ;
} else {
new_page = jh2bh ( jh_in ) - > b_page ;
new_offset = offset_in_page ( jh2bh ( jh_in ) - > b_data ) ;
}
2011-11-25 19:14:31 +04:00
mapped_data = kmap_atomic ( new_page ) ;
2008-09-12 02:35:47 +04:00
/*
2010-07-14 09:56:33 +04:00
* Fire data frozen trigger if data already wasn ' t frozen . Do this
* before checking for escaping , as the trigger may modify the magic
* offset . If a copy - out happens afterwards , it will have the correct
* data in the buffer .
2008-09-12 02:35:47 +04:00
*/
2010-07-14 09:56:33 +04:00
if ( ! done_copy_out )
jbd2_buffer_frozen_trigger ( jh_in , mapped_data + new_offset ,
jh_in - > b_triggers ) ;
2008-09-12 02:35:47 +04:00
2006-10-11 12:20:57 +04:00
/*
* Check for escaping
*/
if ( * ( ( __be32 * ) ( mapped_data + new_offset ) ) = =
2006-10-11 12:20:59 +04:00
cpu_to_be32 ( JBD2_MAGIC_NUMBER ) ) {
2006-10-11 12:20:57 +04:00
need_copy_out = 1 ;
do_escape = 1 ;
}
2011-11-25 19:14:31 +04:00
kunmap_atomic ( mapped_data ) ;
2006-10-11 12:20:57 +04:00
/*
* Do we need to do a data copy ?
*/
if ( need_copy_out & & ! done_copy_out ) {
char * tmp ;
jbd_unlock_bh_state ( bh_in ) ;
2007-10-17 02:38:25 +04:00
tmp = jbd2_alloc ( bh_in - > b_size , GFP_NOFS ) ;
2009-12-01 17:04:42 +03:00
if ( ! tmp ) {
2013-06-04 20:01:45 +04:00
brelse ( new_bh ) ;
2009-12-01 17:04:42 +03:00
return - ENOMEM ;
}
2006-10-11 12:20:57 +04:00
jbd_lock_bh_state ( bh_in ) ;
if ( jh_in - > b_frozen_data ) {
2007-10-17 02:38:25 +04:00
jbd2_free ( tmp , bh_in - > b_size ) ;
2006-10-11 12:20:57 +04:00
goto repeat ;
}
jh_in - > b_frozen_data = tmp ;
2011-11-25 19:14:31 +04:00
mapped_data = kmap_atomic ( new_page ) ;
2013-06-04 20:01:45 +04:00
memcpy ( tmp , mapped_data + new_offset , bh_in - > b_size ) ;
2011-11-25 19:14:31 +04:00
kunmap_atomic ( mapped_data ) ;
2006-10-11 12:20:57 +04:00
new_page = virt_to_page ( tmp ) ;
new_offset = offset_in_page ( tmp ) ;
done_copy_out = 1 ;
2008-09-12 02:35:47 +04:00
/*
* This isn ' t strictly necessary , as we ' re using frozen
* data for the escaping , but it keeps consistency with
* b_frozen_data usage .
*/
jh_in - > b_frozen_triggers = jh_in - > b_triggers ;
2006-10-11 12:20:57 +04:00
}
/*
* Did we need to do an escaping ? Now we ' ve done all the
* copying , we can finally do so .
*/
if ( do_escape ) {
2011-11-25 19:14:31 +04:00
mapped_data = kmap_atomic ( new_page ) ;
2006-10-11 12:20:57 +04:00
* ( ( unsigned int * ) ( mapped_data + new_offset ) ) = 0 ;
2011-11-25 19:14:31 +04:00
kunmap_atomic ( mapped_data ) ;
2006-10-11 12:20:57 +04:00
}
set_bh_page ( new_bh , new_page , new_offset ) ;
2013-06-04 20:01:45 +04:00
new_bh - > b_size = bh_in - > b_size ;
new_bh - > b_bdev = journal - > j_dev ;
2006-10-11 12:20:57 +04:00
new_bh - > b_blocknr = blocknr ;
2013-06-04 20:08:56 +04:00
new_bh - > b_private = bh_in ;
2006-10-11 12:20:57 +04:00
set_buffer_mapped ( new_bh ) ;
set_buffer_dirty ( new_bh ) ;
2013-06-04 20:01:45 +04:00
* bh_out = new_bh ;
2006-10-11 12:20:57 +04:00
/*
* The to - be - written buffer needs to get moved to the io queue ,
* and the original buffer whose contents we are shadowing or
* copying is moved to the transaction ' s shadow queue .
*/
JBUFFER_TRACE ( jh_in , " file as BJ_Shadow " ) ;
2009-07-14 01:55:35 +04:00
spin_lock ( & journal - > j_list_lock ) ;
__jbd2_journal_file_buffer ( jh_in , transaction , BJ_Shadow ) ;
spin_unlock ( & journal - > j_list_lock ) ;
2013-06-04 20:08:56 +04:00
set_buffer_shadow ( bh_in ) ;
2009-07-14 01:55:35 +04:00
jbd_unlock_bh_state ( bh_in ) ;
2006-10-11 12:20:57 +04:00
return do_escape | ( done_copy_out < < 1 ) ;
}
/*
* Allocation code for the journal file . Manage the space left in the
* journal , so that we can begin checkpointing when appropriate .
*/
/*
2011-02-12 16:18:24 +03:00
* Called with j_state_lock locked for writing .
* Returns true if a transaction commit was started .
2006-10-11 12:20:57 +04:00
*/
2006-10-11 12:20:59 +04:00
int __jbd2_log_start_commit ( journal_t * journal , tid_t target )
2006-10-11 12:20:57 +04:00
{
2013-01-30 09:39:28 +04:00
/* Return if the txn has already requested to be committed */
if ( journal - > j_commit_request = = target )
return 0 ;
2006-10-11 12:20:57 +04:00
/*
2011-05-02 02:16:26 +04:00
* The only transaction we can possibly wait upon is the
* currently running transaction ( if it exists ) . Otherwise ,
* the target tid must be an old one .
2006-10-11 12:20:57 +04:00
*/
2011-05-02 02:16:26 +04:00
if ( journal - > j_running_transaction & &
journal - > j_running_transaction - > t_tid = = target ) {
2006-10-11 12:20:57 +04:00
/*
2010-10-16 17:19:14 +04:00
* We want a new commit : OK , mark the request and wakeup the
2006-10-11 12:20:57 +04:00
* commit thread . We do _not_ do the commit ourselves .
*/
journal - > j_commit_request = target ;
2011-11-02 03:09:18 +04:00
jbd_debug ( 1 , " JBD2: requesting commit %d/%d \n " ,
2006-10-11 12:20:57 +04:00
journal - > j_commit_request ,
journal - > j_commit_sequence ) ;
2013-02-07 07:30:23 +04:00
journal - > j_running_transaction - > t_requested = jiffies ;
2006-10-11 12:20:57 +04:00
wake_up ( & journal - > j_wait_commit ) ;
return 1 ;
2011-05-02 02:16:26 +04:00
} else if ( ! tid_geq ( journal - > j_commit_request , target ) )
/* This should never happen, but if it does, preserve
the evidence before kjournald goes into a loop and
increments j_commit_sequence beyond all recognition . */
2011-11-02 03:09:18 +04:00
WARN_ONCE ( 1 , " JBD2: bad log_start_commit: %u %u %u %u \n " ,
2011-05-09 03:37:54 +04:00
journal - > j_commit_request ,
journal - > j_commit_sequence ,
target , journal - > j_running_transaction ?
journal - > j_running_transaction - > t_tid : 0 ) ;
2006-10-11 12:20:57 +04:00
return 0 ;
}
2006-10-11 12:20:59 +04:00
int jbd2_log_start_commit ( journal_t * journal , tid_t tid )
2006-10-11 12:20:57 +04:00
{
int ret ;
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
ret = __jbd2_log_start_commit ( journal , tid ) ;
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
return ret ;
}
/*
2013-06-13 06:24:07 +04:00
* Force and wait any uncommitted transactions . We can only force the running
* transaction if we don ' t have an active handle , otherwise , we will deadlock .
* Returns : < 0 in case of error ,
* 0 if nothing to commit ,
* 1 if transaction was successfully committed .
2006-10-11 12:20:57 +04:00
*/
2013-06-13 06:24:07 +04:00
static int __jbd2_journal_force_commit ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
transaction_t * transaction = NULL ;
tid_t tid ;
2013-06-13 06:24:07 +04:00
int need_to_start = 0 , ret = 0 ;
2006-10-11 12:20:57 +04:00
2010-08-04 05:35:12 +04:00
read_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
if ( journal - > j_running_transaction & & ! current - > journal_info ) {
transaction = journal - > j_running_transaction ;
2011-02-12 16:18:24 +03:00
if ( ! tid_geq ( journal - > j_commit_request , transaction - > t_tid ) )
need_to_start = 1 ;
2006-10-11 12:20:57 +04:00
} else if ( journal - > j_committing_transaction )
transaction = journal - > j_committing_transaction ;
if ( ! transaction ) {
2013-06-13 06:24:07 +04:00
/* Nothing to commit */
2010-08-04 05:35:12 +04:00
read_unlock ( & journal - > j_state_lock ) ;
2013-06-13 06:24:07 +04:00
return 0 ;
2006-10-11 12:20:57 +04:00
}
tid = transaction - > t_tid ;
2010-08-04 05:35:12 +04:00
read_unlock ( & journal - > j_state_lock ) ;
2011-02-12 16:18:24 +03:00
if ( need_to_start )
jbd2_log_start_commit ( journal , tid ) ;
2013-06-13 06:24:07 +04:00
ret = jbd2_log_wait_commit ( journal , tid ) ;
if ( ! ret )
ret = 1 ;
return ret ;
}
/**
* Force and wait upon a commit if the calling process is not within
* transaction . This is used for forcing out undo - protected data which contains
* bitmaps , when the fs is running out of space .
*
* @ journal : journal to force
* Returns true if progress was made .
*/
int jbd2_journal_force_commit_nested ( journal_t * journal )
{
int ret ;
ret = __jbd2_journal_force_commit ( journal ) ;
return ret > 0 ;
}
/**
* int journal_force_commit ( ) - force any uncommitted transactions
* @ journal : journal to force
*
* Caller want unconditional commit . We can only force the running transaction
* if we don ' t have an active handle , otherwise , we will deadlock .
*/
int jbd2_journal_force_commit ( journal_t * journal )
{
int ret ;
J_ASSERT ( ! current - > journal_info ) ;
ret = __jbd2_journal_force_commit ( journal ) ;
if ( ret > 0 )
ret = 0 ;
return ret ;
2006-10-11 12:20:57 +04:00
}
/*
* Start a commit of the current running transaction ( if any ) . Returns true
2009-02-10 19:27:46 +03:00
* if a transaction is going to be committed ( or is currently already
* committing ) , and fills its tid in at * ptid
2006-10-11 12:20:57 +04:00
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_start_commit ( journal_t * journal , tid_t * ptid )
2006-10-11 12:20:57 +04:00
{
int ret = 0 ;
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
if ( journal - > j_running_transaction ) {
tid_t tid = journal - > j_running_transaction - > t_tid ;
2009-02-10 19:27:46 +03:00
__jbd2_log_start_commit ( journal , tid ) ;
/* There's a running transaction and we've just made sure
* it ' s commit has been scheduled . */
if ( ptid )
2006-10-11 12:20:57 +04:00
* ptid = tid ;
2009-02-10 19:27:46 +03:00
ret = 1 ;
} else if ( journal - > j_committing_transaction ) {
2006-10-11 12:20:57 +04:00
/*
2012-07-25 19:12:07 +04:00
* If commit has been started , then we have to wait for
* completion of that transaction .
2006-10-11 12:20:57 +04:00
*/
2009-02-10 19:27:46 +03:00
if ( ptid )
* ptid = journal - > j_committing_transaction - > t_tid ;
2006-10-11 12:20:57 +04:00
ret = 1 ;
}
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
return ret ;
}
2011-05-24 19:59:18 +04:00
/*
* Return 1 if a given transaction has not yet sent barrier request
* connected with a transaction commit . If 0 is returned , transaction
* may or may not have sent the barrier . Used to avoid sending barrier
* twice in common cases .
*/
int jbd2_trans_will_send_data_barrier ( journal_t * journal , tid_t tid )
{
int ret = 0 ;
transaction_t * commit_trans ;
if ( ! ( journal - > j_flags & JBD2_BARRIER ) )
return 0 ;
read_lock ( & journal - > j_state_lock ) ;
/* Transaction already committed? */
if ( tid_geq ( journal - > j_commit_sequence , tid ) )
goto out ;
commit_trans = journal - > j_committing_transaction ;
if ( ! commit_trans | | commit_trans - > t_tid ! = tid ) {
ret = 1 ;
goto out ;
}
/*
* Transaction is being committed and we already proceeded to
* submitting a flush to fs partition ?
*/
if ( journal - > j_fs_dev ! = journal - > j_dev ) {
if ( ! commit_trans - > t_need_data_flush | |
commit_trans - > t_state > = T_COMMIT_DFLUSH )
goto out ;
} else {
if ( commit_trans - > t_state > = T_COMMIT_JFLUSH )
goto out ;
}
ret = 1 ;
out :
read_unlock ( & journal - > j_state_lock ) ;
return ret ;
}
EXPORT_SYMBOL ( jbd2_trans_will_send_data_barrier ) ;
2006-10-11 12:20:57 +04:00
/*
* Wait for a specified commit to complete .
* The caller may not hold the journal lock .
*/
2006-10-11 12:20:59 +04:00
int jbd2_log_wait_commit ( journal_t * journal , tid_t tid )
2006-10-11 12:20:57 +04:00
{
int err = 0 ;
2010-08-04 05:35:12 +04:00
read_lock ( & journal - > j_state_lock ) ;
2007-07-18 16:57:06 +04:00
# ifdef CONFIG_JBD2_DEBUG
2006-10-11 12:20:57 +04:00
if ( ! tid_geq ( journal - > j_commit_request , tid ) ) {
2013-12-09 06:13:59 +04:00
printk ( KERN_ERR
2006-10-11 12:20:57 +04:00
" %s: error: j_commit_request=%d, tid=%d \n " ,
2008-04-17 18:38:59 +04:00
__func__ , journal - > j_commit_request , tid ) ;
2006-10-11 12:20:57 +04:00
}
# endif
while ( tid_gt ( tid , journal - > j_commit_sequence ) ) {
2011-11-02 03:09:18 +04:00
jbd_debug ( 1 , " JBD2: want %d, j_commit_sequence=%d \n " ,
2006-10-11 12:20:57 +04:00
tid , journal - > j_commit_sequence ) ;
2010-08-04 05:35:12 +04:00
read_unlock ( & journal - > j_state_lock ) ;
2014-03-09 04:11:36 +04:00
wake_up ( & journal - > j_wait_commit ) ;
2006-10-11 12:20:57 +04:00
wait_event ( journal - > j_wait_done_commit ,
! tid_gt ( tid , journal - > j_commit_sequence ) ) ;
2010-08-04 05:35:12 +04:00
read_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
}
2010-08-04 05:35:12 +04:00
read_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
2013-12-09 06:13:59 +04:00
if ( unlikely ( is_journal_aborted ( journal ) ) )
2006-10-11 12:20:57 +04:00
err = - EIO ;
return err ;
}
ext4/jbd2: don't wait (forever) for stale tid caused by wraparound
In the case where an inode has a very stale transaction id (tid) in
i_datasync_tid or i_sync_tid, it's possible that after a very large
(2**31) number of transactions, that the tid number space might wrap,
causing tid_geq()'s calculations to fail.
Commit deeeaf13 "jbd2: fix fsync() tid wraparound bug", later modified
by commit e7b04ac0 "jbd2: don't wake kjournald unnecessarily",
attempted to fix this problem, but it only avoided kjournald spinning
forever by fixing the logic in jbd2_log_start_commit().
Unfortunately, in the codepaths in fs/ext4/fsync.c and fs/ext4/inode.c
that might call jbd2_log_start_commit() with a stale tid, those
functions will subsequently call jbd2_log_wait_commit() with the same
stale tid, and then wait for a very long time. To fix this, we
replace the calls to jbd2_log_start_commit() and
jbd2_log_wait_commit() with a call to a new function,
jbd2_complete_transaction(), which will correctly handle stale tid's.
As a bonus, jbd2_complete_transaction() will avoid locking
j_state_lock for writing unless a commit needs to be started. This
should have a small (but probably not measurable) improvement for
ext4's scalability.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reported-by: Ben Hutchings <ben@decadent.org.uk>
Reported-by: George Barnett <gbarnett@atlassian.com>
Cc: stable@vger.kernel.org
2013-04-04 06:02:52 +04:00
/*
* When this function returns the transaction corresponding to tid
* will be completed . If the transaction has currently running , start
* committing that transaction before waiting for it to complete . If
* the transaction id is stale , it is by definition already completed ,
* so just return SUCCESS .
*/
int jbd2_complete_transaction ( journal_t * journal , tid_t tid )
{
int need_to_wait = 1 ;
read_lock ( & journal - > j_state_lock ) ;
if ( journal - > j_running_transaction & &
journal - > j_running_transaction - > t_tid = = tid ) {
if ( journal - > j_commit_request ! = tid ) {
/* transaction not yet started, so request it */
read_unlock ( & journal - > j_state_lock ) ;
jbd2_log_start_commit ( journal , tid ) ;
goto wait_commit ;
}
} else if ( ! ( journal - > j_committing_transaction & &
journal - > j_committing_transaction - > t_tid = = tid ) )
need_to_wait = 0 ;
read_unlock ( & journal - > j_state_lock ) ;
if ( ! need_to_wait )
return 0 ;
wait_commit :
return jbd2_log_wait_commit ( journal , tid ) ;
}
EXPORT_SYMBOL ( jbd2_complete_transaction ) ;
2006-10-11 12:20:57 +04:00
/*
* Log buffer allocation routines :
*/
2006-10-11 12:21:13 +04:00
int jbd2_journal_next_log_block ( journal_t * journal , unsigned long long * retp )
2006-10-11 12:20:57 +04:00
{
unsigned long blocknr ;
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
J_ASSERT ( journal - > j_free > 1 ) ;
blocknr = journal - > j_head ;
journal - > j_head + + ;
journal - > j_free - - ;
if ( journal - > j_head = = journal - > j_last )
journal - > j_head = journal - > j_first ;
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
return jbd2_journal_bmap ( journal , blocknr , retp ) ;
2006-10-11 12:20:57 +04:00
}
/*
* Conversion of logical to physical block numbers for the journal
*
* On external journals the journal blocks are identity - mapped , so
* this is a no - op . If needed , we can use j_blk_offset - everything is
* ready .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_bmap ( journal_t * journal , unsigned long blocknr ,
2006-10-11 12:21:13 +04:00
unsigned long long * retp )
2006-10-11 12:20:57 +04:00
{
int err = 0 ;
2006-10-11 12:21:13 +04:00
unsigned long long ret ;
2006-10-11 12:20:57 +04:00
if ( journal - > j_inode ) {
ret = bmap ( journal - > j_inode , blocknr ) ;
if ( ret )
* retp = ret ;
else {
printk ( KERN_ALERT " %s: journal block not found "
" at offset %lu on %s \n " ,
2008-09-16 22:36:17 +04:00
__func__ , blocknr , journal - > j_devname ) ;
2006-10-11 12:20:57 +04:00
err = - EIO ;
__journal_abort_soft ( journal , err ) ;
}
} else {
* retp = blocknr ; /* +journal->j_blk_offset */
}
return err ;
}
/*
* We play buffer_head aliasing tricks to write data / metadata blocks to
* the journal without copying their contents , but for journal
* descriptor blocks we do need to generate bona fide buffers .
*
2006-10-11 12:20:59 +04:00
* After the caller of jbd2_journal_get_descriptor_buffer ( ) has finished modifying
2006-10-11 12:20:57 +04:00
* the buffer ' s contents they really should run flush_dcache_page ( bh - > b_page ) .
* But we don ' t bother doing that , so there will be coherency problems with
* mmaps of blockdevs which hold live JBD - controlled filesystems .
*/
2013-06-04 20:06:01 +04:00
struct buffer_head * jbd2_journal_get_descriptor_buffer ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
struct buffer_head * bh ;
2006-10-11 12:21:13 +04:00
unsigned long long blocknr ;
2006-10-11 12:20:57 +04:00
int err ;
2006-10-11 12:20:59 +04:00
err = jbd2_journal_next_log_block ( journal , & blocknr ) ;
2006-10-11 12:20:57 +04:00
if ( err )
return NULL ;
bh = __getblk ( journal - > j_dev , blocknr , journal - > j_blocksize ) ;
2009-01-06 22:53:35 +03:00
if ( ! bh )
return NULL ;
2006-10-11 12:20:57 +04:00
lock_buffer ( bh ) ;
memset ( bh - > b_data , 0 , journal - > j_blocksize ) ;
set_buffer_uptodate ( bh ) ;
unlock_buffer ( bh ) ;
BUFFER_TRACE ( bh , " return this buffer " ) ;
2013-06-04 20:06:01 +04:00
return bh ;
2006-10-11 12:20:57 +04:00
}
2012-03-14 06:22:54 +04:00
/*
* Return tid of the oldest transaction in the journal and block in the journal
* where the transaction starts .
*
* If the journal is now empty , return which will be the next transaction ID
* we will write and where will that transaction start .
*
* The return value is 0 if journal tail cannot be pushed any further , 1 if
* it can .
*/
int jbd2_journal_get_log_tail ( journal_t * journal , tid_t * tid ,
unsigned long * block )
{
transaction_t * transaction ;
int ret ;
read_lock ( & journal - > j_state_lock ) ;
spin_lock ( & journal - > j_list_lock ) ;
transaction = journal - > j_checkpoint_transactions ;
if ( transaction ) {
* tid = transaction - > t_tid ;
* block = transaction - > t_log_start ;
} else if ( ( transaction = journal - > j_committing_transaction ) ! = NULL ) {
* tid = transaction - > t_tid ;
* block = transaction - > t_log_start ;
} else if ( ( transaction = journal - > j_running_transaction ) ! = NULL ) {
* tid = transaction - > t_tid ;
* block = journal - > j_head ;
} else {
* tid = journal - > j_transaction_sequence ;
* block = journal - > j_head ;
}
ret = tid_gt ( * tid , journal - > j_tail_sequence ) ;
spin_unlock ( & journal - > j_list_lock ) ;
read_unlock ( & journal - > j_state_lock ) ;
return ret ;
}
/*
* Update information in journal structure and in on disk journal superblock
* about log tail . This function does not check whether information passed in
* really pushes log tail further . It ' s responsibility of the caller to make
* sure provided log tail information is valid ( e . g . by holding
* j_checkpoint_mutex all the time between computing log tail and calling this
* function as is the case with jbd2_cleanup_journal_tail ( ) ) .
*
* Requires j_checkpoint_mutex
*/
void __jbd2_update_log_tail ( journal_t * journal , tid_t tid , unsigned long block )
{
unsigned long freed ;
BUG_ON ( ! mutex_is_locked ( & journal - > j_checkpoint_mutex ) ) ;
/*
* We cannot afford for write to remain in drive ' s caches since as
* soon as we update j_tail , next transaction can start reusing journal
* space and if we lose sb update during power failure we ' d replay
* old transaction with possibly newly overwritten data .
*/
jbd2_journal_update_sb_log_tail ( journal , tid , block , WRITE_FUA ) ;
write_lock ( & journal - > j_state_lock ) ;
freed = block - journal - > j_tail ;
if ( block < journal - > j_tail )
freed + = journal - > j_last - journal - > j_first ;
trace_jbd2_update_log_tail ( journal , tid , block , freed ) ;
jbd_debug ( 1 ,
" Cleaning journal tail from %d to %d (offset %lu), "
" freeing %lu \n " ,
journal - > j_tail_sequence , tid , block , freed ) ;
journal - > j_free + = freed ;
journal - > j_tail_sequence = tid ;
journal - > j_tail = block ;
write_unlock ( & journal - > j_state_lock ) ;
}
2012-03-14 06:45:38 +04:00
/*
* This is a variaon of __jbd2_update_log_tail which checks for validity of
* provided log tail and locks j_checkpoint_mutex . So it is safe against races
* with other threads updating log tail .
*/
void jbd2_update_log_tail ( journal_t * journal , tid_t tid , unsigned long block )
{
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
if ( tid_gt ( tid , journal - > j_tail_sequence ) )
__jbd2_update_log_tail ( journal , tid , block ) ;
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
}
2008-01-29 07:58:27 +03:00
struct jbd2_stats_proc_session {
journal_t * journal ;
struct transaction_stats_s * stats ;
int start ;
int max ;
} ;
static void * jbd2_seq_info_start ( struct seq_file * seq , loff_t * pos )
{
return * pos ? NULL : SEQ_START_TOKEN ;
}
static void * jbd2_seq_info_next ( struct seq_file * seq , void * v , loff_t * pos )
{
return NULL ;
}
static int jbd2_seq_info_show ( struct seq_file * seq , void * v )
{
struct jbd2_stats_proc_session * s = seq - > private ;
if ( v ! = SEQ_START_TOKEN )
return 0 ;
2013-02-07 07:30:23 +04:00
seq_printf ( seq , " %lu transactions (%lu requested), "
" each up to %u blocks \n " ,
s - > stats - > ts_tid , s - > stats - > ts_requested ,
s - > journal - > j_max_transaction_buffers ) ;
2008-01-29 07:58:27 +03:00
if ( s - > stats - > ts_tid = = 0 )
return 0 ;
seq_printf ( seq , " average: \n %ums waiting for transaction \n " ,
2009-09-30 08:32:06 +04:00
jiffies_to_msecs ( s - > stats - > run . rs_wait / s - > stats - > ts_tid ) ) ;
2013-02-07 07:30:23 +04:00
seq_printf ( seq , " %ums request delay \n " ,
( s - > stats - > ts_requested = = 0 ) ? 0 :
jiffies_to_msecs ( s - > stats - > run . rs_request_delay /
s - > stats - > ts_requested ) ) ;
2008-01-29 07:58:27 +03:00
seq_printf ( seq , " %ums running transaction \n " ,
2009-09-30 08:32:06 +04:00
jiffies_to_msecs ( s - > stats - > run . rs_running / s - > stats - > ts_tid ) ) ;
2008-01-29 07:58:27 +03:00
seq_printf ( seq , " %ums transaction was being locked \n " ,
2009-09-30 08:32:06 +04:00
jiffies_to_msecs ( s - > stats - > run . rs_locked / s - > stats - > ts_tid ) ) ;
2008-01-29 07:58:27 +03:00
seq_printf ( seq , " %ums flushing data (in ordered mode) \n " ,
2009-09-30 08:32:06 +04:00
jiffies_to_msecs ( s - > stats - > run . rs_flushing / s - > stats - > ts_tid ) ) ;
2008-01-29 07:58:27 +03:00
seq_printf ( seq , " %ums logging transaction \n " ,
2009-09-30 08:32:06 +04:00
jiffies_to_msecs ( s - > stats - > run . rs_logging / s - > stats - > ts_tid ) ) ;
2009-01-12 06:34:01 +03:00
seq_printf ( seq , " %lluus average transaction commit time \n " ,
div_u64 ( s - > journal - > j_average_commit_time , 1000 ) ) ;
2008-01-29 07:58:27 +03:00
seq_printf ( seq , " %lu handles per transaction \n " ,
2009-09-30 08:32:06 +04:00
s - > stats - > run . rs_handle_count / s - > stats - > ts_tid ) ;
2008-01-29 07:58:27 +03:00
seq_printf ( seq , " %lu blocks per transaction \n " ,
2009-09-30 08:32:06 +04:00
s - > stats - > run . rs_blocks / s - > stats - > ts_tid ) ;
2008-01-29 07:58:27 +03:00
seq_printf ( seq , " %lu logged blocks per transaction \n " ,
2009-09-30 08:32:06 +04:00
s - > stats - > run . rs_blocks_logged / s - > stats - > ts_tid ) ;
2008-01-29 07:58:27 +03:00
return 0 ;
}
static void jbd2_seq_info_stop ( struct seq_file * seq , void * v )
{
}
2009-09-23 03:43:43 +04:00
static const struct seq_operations jbd2_seq_info_ops = {
2008-01-29 07:58:27 +03:00
. start = jbd2_seq_info_start ,
. next = jbd2_seq_info_next ,
. stop = jbd2_seq_info_stop ,
. show = jbd2_seq_info_show ,
} ;
static int jbd2_seq_info_open ( struct inode * inode , struct file * file )
{
2013-04-01 02:16:14 +04:00
journal_t * journal = PDE_DATA ( inode ) ;
2008-01-29 07:58:27 +03:00
struct jbd2_stats_proc_session * s ;
int rc , size ;
s = kmalloc ( sizeof ( * s ) , GFP_KERNEL ) ;
if ( s = = NULL )
return - ENOMEM ;
size = sizeof ( struct transaction_stats_s ) ;
s - > stats = kmalloc ( size , GFP_KERNEL ) ;
if ( s - > stats = = NULL ) {
kfree ( s ) ;
return - ENOMEM ;
}
spin_lock ( & journal - > j_history_lock ) ;
memcpy ( s - > stats , & journal - > j_stats , size ) ;
s - > journal = journal ;
spin_unlock ( & journal - > j_history_lock ) ;
rc = seq_open ( file , & jbd2_seq_info_ops ) ;
if ( rc = = 0 ) {
struct seq_file * m = file - > private_data ;
m - > private = s ;
} else {
kfree ( s - > stats ) ;
kfree ( s ) ;
}
return rc ;
}
static int jbd2_seq_info_release ( struct inode * inode , struct file * file )
{
struct seq_file * seq = file - > private_data ;
struct jbd2_stats_proc_session * s = seq - > private ;
kfree ( s - > stats ) ;
kfree ( s ) ;
return seq_release ( inode , file ) ;
}
2009-10-02 02:43:56 +04:00
static const struct file_operations jbd2_seq_info_fops = {
2008-01-29 07:58:27 +03:00
. owner = THIS_MODULE ,
. open = jbd2_seq_info_open ,
. read = seq_read ,
. llseek = seq_lseek ,
. release = jbd2_seq_info_release ,
} ;
static struct proc_dir_entry * proc_jbd2_stats ;
static void jbd2_stats_proc_init ( journal_t * journal )
{
2008-09-16 22:36:17 +04:00
journal - > j_proc_entry = proc_mkdir ( journal - > j_devname , proc_jbd2_stats ) ;
2008-01-29 07:58:27 +03:00
if ( journal - > j_proc_entry ) {
2008-04-29 12:02:11 +04:00
proc_create_data ( " info " , S_IRUGO , journal - > j_proc_entry ,
& jbd2_seq_info_fops , journal ) ;
2008-01-29 07:58:27 +03:00
}
}
static void jbd2_stats_proc_exit ( journal_t * journal )
{
remove_proc_entry ( " info " , journal - > j_proc_entry ) ;
2008-09-16 22:36:17 +04:00
remove_proc_entry ( journal - > j_devname , proc_jbd2_stats ) ;
2008-01-29 07:58:27 +03:00
}
2006-10-11 12:20:57 +04:00
/*
* Management for journal control blocks : functions to create and
* destroy journal_t structures , and to initialise and read existing
* journal blocks from disk . */
/* First: create and setup a journal_t object in memory. We initialise
* very few fields yet : that has to wait until we have created the
* journal structures from from scratch , or loaded them from disk . */
static journal_t * journal_init_common ( void )
{
journal_t * journal ;
int err ;
2009-12-23 16:05:15 +03:00
journal = kzalloc ( sizeof ( * journal ) , GFP_KERNEL ) ;
2006-10-11 12:20:57 +04:00
if ( ! journal )
2010-12-18 21:39:38 +03:00
return NULL ;
2006-10-11 12:20:57 +04:00
init_waitqueue_head ( & journal - > j_wait_transaction_locked ) ;
init_waitqueue_head ( & journal - > j_wait_done_commit ) ;
init_waitqueue_head ( & journal - > j_wait_commit ) ;
init_waitqueue_head ( & journal - > j_wait_updates ) ;
2013-06-04 20:35:11 +04:00
init_waitqueue_head ( & journal - > j_wait_reserved ) ;
2006-10-11 12:20:57 +04:00
mutex_init ( & journal - > j_barrier ) ;
mutex_init ( & journal - > j_checkpoint_mutex ) ;
spin_lock_init ( & journal - > j_revoke_lock ) ;
spin_lock_init ( & journal - > j_list_lock ) ;
2010-08-04 05:35:12 +04:00
rwlock_init ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
2007-10-17 02:38:25 +04:00
journal - > j_commit_interval = ( HZ * JBD2_DEFAULT_MAX_COMMIT_AGE ) ;
2009-01-04 04:27:38 +03:00
journal - > j_min_batch_time = 0 ;
journal - > j_max_batch_time = 15000 ; /* 15ms */
2013-06-04 20:35:11 +04:00
atomic_set ( & journal - > j_reserved_credits , 0 ) ;
2006-10-11 12:20:57 +04:00
/* The journal is marked for error until we succeed with recovery! */
2006-10-11 12:20:59 +04:00
journal - > j_flags = JBD2_ABORT ;
2006-10-11 12:20:57 +04:00
/* Set up a default-sized revoke table for the new mount. */
2006-10-11 12:20:59 +04:00
err = jbd2_journal_init_revoke ( journal , JOURNAL_REVOKE_DEFAULT_HASH ) ;
2006-10-11 12:20:57 +04:00
if ( err ) {
kfree ( journal ) ;
2010-12-18 21:39:38 +03:00
return NULL ;
2006-10-11 12:20:57 +04:00
}
2008-01-29 07:58:27 +03:00
2009-09-30 08:32:06 +04:00
spin_lock_init ( & journal - > j_history_lock ) ;
2008-01-29 07:58:27 +03:00
2006-10-11 12:20:57 +04:00
return journal ;
}
2006-10-11 12:20:59 +04:00
/* jbd2_journal_init_dev and jbd2_journal_init_inode:
2006-10-11 12:20:57 +04:00
*
* Create a journal structure assigned some fixed set of disk blocks to
* the journal . We don ' t actually touch those disk blocks yet , but we
* need to set up all of the mapping information to tell the journaling
* system where the journal blocks are .
*
*/
/**
2008-04-17 18:38:59 +04:00
* journal_t * jbd2_journal_init_dev ( ) - creates and initialises a journal structure
2006-10-11 12:20:57 +04:00
* @ bdev : Block device on which to create the journal
* @ fs_dev : Device which hold journalled filesystem for this journal .
* @ start : Block nr Start of journal .
* @ len : Length of the journal in blocks .
* @ blocksize : blocksize of journalling device
2008-04-17 18:38:59 +04:00
*
* Returns : a newly created journal_t *
2006-10-11 12:20:57 +04:00
*
2006-10-11 12:20:59 +04:00
* jbd2_journal_init_dev creates a journal which maps a fixed contiguous
2006-10-11 12:20:57 +04:00
* range of blocks on an arbitrary block device .
*
*/
2006-10-11 12:20:59 +04:00
journal_t * jbd2_journal_init_dev ( struct block_device * bdev ,
2006-10-11 12:20:57 +04:00
struct block_device * fs_dev ,
2006-10-11 12:21:13 +04:00
unsigned long long start , int len , int blocksize )
2006-10-11 12:20:57 +04:00
{
journal_t * journal = journal_init_common ( ) ;
struct buffer_head * bh ;
2008-09-16 22:36:17 +04:00
char * p ;
2006-10-11 12:20:57 +04:00
int n ;
if ( ! journal )
return NULL ;
/* journal descriptor can store up to n blocks -bzzz */
journal - > j_blocksize = blocksize ;
2010-11-18 05:46:26 +03:00
journal - > j_dev = bdev ;
journal - > j_fs_dev = fs_dev ;
journal - > j_blk_offset = start ;
journal - > j_maxlen = len ;
bdevname ( journal - > j_dev , journal - > j_devname ) ;
p = journal - > j_devname ;
while ( ( p = strchr ( p , ' / ' ) ) )
* p = ' ! ' ;
2009-01-06 22:53:35 +03:00
jbd2_stats_proc_init ( journal ) ;
2006-10-11 12:20:57 +04:00
n = journal - > j_blocksize / sizeof ( journal_block_tag_t ) ;
journal - > j_wbufsize = n ;
journal - > j_wbuf = kmalloc ( n * sizeof ( struct buffer_head * ) , GFP_KERNEL ) ;
if ( ! journal - > j_wbuf ) {
2011-03-31 05:57:33 +04:00
printk ( KERN_ERR " %s: Can't allocate bhs for commit thread \n " ,
2008-04-17 18:38:59 +04:00
__func__ ) ;
2009-01-06 22:53:35 +03:00
goto out_err ;
2006-10-11 12:20:57 +04:00
}
bh = __getblk ( journal - > j_dev , start , journal - > j_blocksize ) ;
2009-01-06 22:53:35 +03:00
if ( ! bh ) {
printk ( KERN_ERR
" %s: Cannot get buffer for journal superblock \n " ,
__func__ ) ;
goto out_err ;
}
2006-10-11 12:20:57 +04:00
journal - > j_sb_buffer = bh ;
journal - > j_superblock = ( journal_superblock_t * ) bh - > b_data ;
2009-01-06 22:53:35 +03:00
2006-10-11 12:20:57 +04:00
return journal ;
2009-01-06 22:53:35 +03:00
out_err :
2009-11-10 12:13:22 +03:00
kfree ( journal - > j_wbuf ) ;
2009-01-06 22:53:35 +03:00
jbd2_stats_proc_exit ( journal ) ;
kfree ( journal ) ;
return NULL ;
2006-10-11 12:20:57 +04:00
}
/**
2006-10-11 12:20:59 +04:00
* journal_t * jbd2_journal_init_inode ( ) - creates a journal which maps to a inode .
2006-10-11 12:20:57 +04:00
* @ inode : An inode to create the journal in
*
2006-10-11 12:20:59 +04:00
* jbd2_journal_init_inode creates a journal which maps an on - disk inode as
2006-10-11 12:20:57 +04:00
* the journal . The inode must exist already , must support bmap ( ) and
* must have all data blocks preallocated .
*/
2006-10-11 12:20:59 +04:00
journal_t * jbd2_journal_init_inode ( struct inode * inode )
2006-10-11 12:20:57 +04:00
{
struct buffer_head * bh ;
journal_t * journal = journal_init_common ( ) ;
2008-09-16 22:36:17 +04:00
char * p ;
2006-10-11 12:20:57 +04:00
int err ;
int n ;
2006-10-11 12:21:13 +04:00
unsigned long long blocknr ;
2006-10-11 12:20:57 +04:00
if ( ! journal )
return NULL ;
journal - > j_dev = journal - > j_fs_dev = inode - > i_sb - > s_bdev ;
journal - > j_inode = inode ;
2008-09-16 22:36:17 +04:00
bdevname ( journal - > j_dev , journal - > j_devname ) ;
p = journal - > j_devname ;
while ( ( p = strchr ( p , ' / ' ) ) )
* p = ' ! ' ;
p = journal - > j_devname + strlen ( journal - > j_devname ) ;
2009-09-29 23:51:30 +04:00
sprintf ( p , " -%lu " , journal - > j_inode - > i_ino ) ;
2006-10-11 12:20:57 +04:00
jbd_debug ( 1 ,
" journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld \n " ,
journal , inode - > i_sb - > s_id , inode - > i_ino ,
( long long ) inode - > i_size ,
inode - > i_sb - > s_blocksize_bits , inode - > i_sb - > s_blocksize ) ;
journal - > j_maxlen = inode - > i_size > > inode - > i_sb - > s_blocksize_bits ;
journal - > j_blocksize = inode - > i_sb - > s_blocksize ;
2008-01-29 07:58:27 +03:00
jbd2_stats_proc_init ( journal ) ;
2006-10-11 12:20:57 +04:00
/* journal descriptor can store up to n blocks -bzzz */
n = journal - > j_blocksize / sizeof ( journal_block_tag_t ) ;
journal - > j_wbufsize = n ;
journal - > j_wbuf = kmalloc ( n * sizeof ( struct buffer_head * ) , GFP_KERNEL ) ;
if ( ! journal - > j_wbuf ) {
2011-03-31 05:57:33 +04:00
printk ( KERN_ERR " %s: Can't allocate bhs for commit thread \n " ,
2008-04-17 18:38:59 +04:00
__func__ ) ;
2009-01-06 22:53:35 +03:00
goto out_err ;
2006-10-11 12:20:57 +04:00
}
2006-10-11 12:20:59 +04:00
err = jbd2_journal_bmap ( journal , 0 , & blocknr ) ;
2006-10-11 12:20:57 +04:00
/* If that failed, give up */
if ( err ) {
2011-02-27 07:34:05 +03:00
printk ( KERN_ERR " %s: Cannot locate journal superblock \n " ,
2008-04-17 18:38:59 +04:00
__func__ ) ;
2009-01-06 22:53:35 +03:00
goto out_err ;
2006-10-11 12:20:57 +04:00
}
2014-09-05 06:36:35 +04:00
bh = getblk_unmovable ( journal - > j_dev , blocknr , journal - > j_blocksize ) ;
2009-01-06 22:53:35 +03:00
if ( ! bh ) {
printk ( KERN_ERR
" %s: Cannot get buffer for journal superblock \n " ,
__func__ ) ;
goto out_err ;
}
2006-10-11 12:20:57 +04:00
journal - > j_sb_buffer = bh ;
journal - > j_superblock = ( journal_superblock_t * ) bh - > b_data ;
return journal ;
2009-01-06 22:53:35 +03:00
out_err :
2009-11-10 12:13:22 +03:00
kfree ( journal - > j_wbuf ) ;
2009-01-06 22:53:35 +03:00
jbd2_stats_proc_exit ( journal ) ;
kfree ( journal ) ;
return NULL ;
2006-10-11 12:20:57 +04:00
}
/*
* If the journal init or create aborts , we need to mark the journal
* superblock as being NULL to prevent the journal destroy from writing
* back a bogus superblock .
*/
static void journal_fail_superblock ( journal_t * journal )
{
struct buffer_head * bh = journal - > j_sb_buffer ;
brelse ( bh ) ;
journal - > j_sb_buffer = NULL ;
}
/*
* Given a journal_t structure , initialise the various fields for
* startup of a new journaling session . We use this both when creating
* a journal , and after recovering an old journal to reset it for
* subsequent use .
*/
static int journal_reset ( journal_t * journal )
{
journal_superblock_t * sb = journal - > j_superblock ;
2006-10-11 12:21:13 +04:00
unsigned long long first , last ;
2006-10-11 12:20:57 +04:00
first = be32_to_cpu ( sb - > s_first ) ;
last = be32_to_cpu ( sb - > s_maxlen ) ;
2009-07-17 18:40:01 +04:00
if ( first + JBD2_MIN_JOURNAL_BLOCKS > last + 1 ) {
2011-11-02 03:09:18 +04:00
printk ( KERN_ERR " JBD2: Journal too short (blocks %llu-%llu). \n " ,
2009-07-17 18:40:01 +04:00
first , last ) ;
journal_fail_superblock ( journal ) ;
return - EINVAL ;
}
2006-10-11 12:20:57 +04:00
journal - > j_first = first ;
journal - > j_last = last ;
journal - > j_head = first ;
journal - > j_tail = first ;
journal - > j_free = last - first ;
journal - > j_tail_sequence = journal - > j_transaction_sequence ;
journal - > j_commit_sequence = journal - > j_transaction_sequence - 1 ;
journal - > j_commit_request = journal - > j_commit_sequence ;
journal - > j_max_transaction_buffers = journal - > j_maxlen / 4 ;
/*
* As a special case , if the on - disk copy is already marked as needing
2012-03-13 23:41:04 +04:00
* no recovery ( s_start = = 0 ) , then we can safely defer the superblock
* update until the next commit by setting JBD2_FLUSHED . This avoids
2006-10-11 12:20:57 +04:00
* attempting a write to a potential - readonly device .
*/
2012-03-13 23:41:04 +04:00
if ( sb - > s_start = = 0 ) {
2011-11-02 03:09:18 +04:00
jbd_debug ( 1 , " JBD2: Skipping superblock update on recovered sb "
2006-10-11 12:20:57 +04:00
" (start %ld, seq %d, errno %d) \n " ,
journal - > j_tail , journal - > j_tail_sequence ,
journal - > j_errno ) ;
2012-03-13 23:41:04 +04:00
journal - > j_flags | = JBD2_FLUSHED ;
} else {
2012-03-13 23:43:04 +04:00
/* Lock here to make assertions happy... */
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2012-03-14 06:22:54 +04:00
/*
* Update log tail information . We use WRITE_FUA since new
* transaction will start reusing journal space and so we
* must make sure information about current log tail is on
* disk before that .
*/
jbd2_journal_update_sb_log_tail ( journal ,
journal - > j_tail_sequence ,
journal - > j_tail ,
WRITE_FUA ) ;
2012-03-13 23:43:04 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 12:20:57 +04:00
}
2012-03-13 23:41:04 +04:00
return jbd2_journal_start_thread ( journal ) ;
}
2006-10-11 12:20:57 +04:00
2012-03-14 06:22:54 +04:00
static void jbd2_write_superblock ( journal_t * journal , int write_op )
2012-03-13 23:41:04 +04:00
{
struct buffer_head * bh = journal - > j_sb_buffer ;
2013-07-01 16:12:38 +04:00
journal_superblock_t * sb = journal - > j_superblock ;
2012-03-14 06:22:54 +04:00
int ret ;
2006-10-11 12:20:57 +04:00
2012-03-14 06:22:54 +04:00
trace_jbd2_write_superblock ( journal , write_op ) ;
if ( ! ( journal - > j_flags & JBD2_BARRIER ) )
write_op & = ~ ( REQ_FUA | REQ_FLUSH ) ;
lock_buffer ( bh ) ;
2008-10-07 05:35:40 +04:00
if ( buffer_write_io_error ( bh ) ) {
/*
* Oh , dear . A previous attempt to write the journal
* superblock failed . This could happen because the
* USB device was yanked out . Or it could happen to
* be a transient write error and maybe the block will
* be remapped . Nothing we can do but to retry the
* write and hope for the best .
*/
printk ( KERN_ERR " JBD2: previous I/O error detected "
" for journal superblock update for %s. \n " ,
journal - > j_devname ) ;
clear_buffer_write_io_error ( bh ) ;
set_buffer_uptodate ( bh ) ;
}
2013-07-01 16:12:38 +04:00
jbd2_superblock_csum_set ( journal , sb ) ;
2012-03-14 06:22:54 +04:00
get_bh ( bh ) ;
bh - > b_end_io = end_buffer_write_sync ;
ret = submit_bh ( write_op , bh ) ;
wait_on_buffer ( bh ) ;
2012-03-13 23:41:04 +04:00
if ( buffer_write_io_error ( bh ) ) {
clear_buffer_write_io_error ( bh ) ;
set_buffer_uptodate ( bh ) ;
2012-03-14 06:22:54 +04:00
ret = - EIO ;
}
if ( ret ) {
printk ( KERN_ERR " JBD2: Error %d detected when updating "
" journal superblock for %s. \n " , ret ,
journal - > j_devname ) ;
2012-03-13 23:41:04 +04:00
}
}
/**
* jbd2_journal_update_sb_log_tail ( ) - Update log tail in journal sb on disk .
* @ journal : The journal to update .
2012-03-14 06:22:54 +04:00
* @ tail_tid : TID of the new transaction at the tail of the log
* @ tail_block : The first block of the transaction at the tail of the log
* @ write_op : With which operation should we write the journal sb
2012-03-13 23:41:04 +04:00
*
* Update a journal ' s superblock information about log tail and write it to
* disk , waiting for the IO to complete .
*/
2012-03-14 06:22:54 +04:00
void jbd2_journal_update_sb_log_tail ( journal_t * journal , tid_t tail_tid ,
unsigned long tail_block , int write_op )
2012-03-13 23:41:04 +04:00
{
journal_superblock_t * sb = journal - > j_superblock ;
2012-03-13 23:43:04 +04:00
BUG_ON ( ! mutex_is_locked ( & journal - > j_checkpoint_mutex ) ) ;
2012-03-14 06:22:54 +04:00
jbd_debug ( 1 , " JBD2: updating superblock (start %lu, seq %u) \n " ,
tail_block , tail_tid ) ;
2006-10-11 12:20:57 +04:00
2012-03-14 06:22:54 +04:00
sb - > s_sequence = cpu_to_be32 ( tail_tid ) ;
sb - > s_start = cpu_to_be32 ( tail_block ) ;
2006-10-11 12:20:57 +04:00
2012-03-14 06:22:54 +04:00
jbd2_write_superblock ( journal , write_op ) ;
2006-10-11 12:20:57 +04:00
2012-03-13 23:41:04 +04:00
/* Log is no longer empty */
write_lock ( & journal - > j_state_lock ) ;
WARN_ON ( ! sb - > s_sequence ) ;
journal - > j_flags & = ~ JBD2_FLUSHED ;
write_unlock ( & journal - > j_state_lock ) ;
}
2012-02-21 02:53:01 +04:00
2012-03-13 23:41:04 +04:00
/**
* jbd2_mark_journal_empty ( ) - Mark on disk journal as empty .
* @ journal : The journal to update .
*
* Update a journal ' s dynamic superblock fields to show that journal is empty .
* Write updated superblock to disk waiting for IO to complete .
*/
static void jbd2_mark_journal_empty ( journal_t * journal )
{
journal_superblock_t * sb = journal - > j_superblock ;
2008-10-07 05:35:40 +04:00
2012-03-13 23:43:04 +04:00
BUG_ON ( ! mutex_is_locked ( & journal - > j_checkpoint_mutex ) ) ;
2010-08-04 05:35:12 +04:00
read_lock ( & journal - > j_state_lock ) ;
2012-08-19 06:29:40 +04:00
/* Is it already empty? */
if ( sb - > s_start = = 0 ) {
read_unlock ( & journal - > j_state_lock ) ;
return ;
}
2012-03-13 23:41:04 +04:00
jbd_debug ( 1 , " JBD2: Marking journal as empty (seq %d) \n " ,
journal - > j_tail_sequence ) ;
2006-10-11 12:20:57 +04:00
sb - > s_sequence = cpu_to_be32 ( journal - > j_tail_sequence ) ;
2012-03-13 23:41:04 +04:00
sb - > s_start = cpu_to_be32 ( 0 ) ;
2010-08-04 05:35:12 +04:00
read_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
2012-03-14 06:22:54 +04:00
jbd2_write_superblock ( journal , WRITE_FUA ) ;
2006-10-11 12:20:57 +04:00
2012-03-13 23:41:04 +04:00
/* Log is no longer empty */
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2012-03-13 23:41:04 +04:00
journal - > j_flags | = JBD2_FLUSHED ;
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
}
2012-03-13 23:41:04 +04:00
/**
* jbd2_journal_update_sb_errno ( ) - Update error in the journal .
* @ journal : The journal to update .
*
* Update a journal ' s errno . Write updated superblock to disk waiting for IO
* to complete .
*/
2012-08-06 03:04:57 +04:00
void jbd2_journal_update_sb_errno ( journal_t * journal )
2012-03-13 23:41:04 +04:00
{
journal_superblock_t * sb = journal - > j_superblock ;
read_lock ( & journal - > j_state_lock ) ;
jbd_debug ( 1 , " JBD2: updating superblock error (errno %d) \n " ,
journal - > j_errno ) ;
sb - > s_errno = cpu_to_be32 ( journal - > j_errno ) ;
read_unlock ( & journal - > j_state_lock ) ;
2012-03-14 06:22:54 +04:00
jbd2_write_superblock ( journal , WRITE_SYNC ) ;
2012-03-13 23:41:04 +04:00
}
2012-08-06 03:04:57 +04:00
EXPORT_SYMBOL ( jbd2_journal_update_sb_errno ) ;
2012-03-13 23:41:04 +04:00
2006-10-11 12:20:57 +04:00
/*
* Read the superblock for a given journal , performing initial
* validation of the format .
*/
static int journal_get_superblock ( journal_t * journal )
{
struct buffer_head * bh ;
journal_superblock_t * sb ;
int err = - EIO ;
bh = journal - > j_sb_buffer ;
J_ASSERT ( bh ! = NULL ) ;
if ( ! buffer_uptodate ( bh ) ) {
ll_rw_block ( READ , 1 , & bh ) ;
wait_on_buffer ( bh ) ;
if ( ! buffer_uptodate ( bh ) ) {
2011-11-02 03:09:18 +04:00
printk ( KERN_ERR
" JBD2: IO error reading journal superblock \n " ) ;
2006-10-11 12:20:57 +04:00
goto out ;
}
}
2012-05-27 15:48:56 +04:00
if ( buffer_verified ( bh ) )
return 0 ;
2006-10-11 12:20:57 +04:00
sb = journal - > j_superblock ;
err = - EINVAL ;
2006-10-11 12:20:59 +04:00
if ( sb - > s_header . h_magic ! = cpu_to_be32 ( JBD2_MAGIC_NUMBER ) | |
2006-10-11 12:20:57 +04:00
sb - > s_blocksize ! = cpu_to_be32 ( journal - > j_blocksize ) ) {
2011-11-02 03:09:18 +04:00
printk ( KERN_WARNING " JBD2: no valid journal superblock found \n " ) ;
2006-10-11 12:20:57 +04:00
goto out ;
}
switch ( be32_to_cpu ( sb - > s_header . h_blocktype ) ) {
2006-10-11 12:20:59 +04:00
case JBD2_SUPERBLOCK_V1 :
2006-10-11 12:20:57 +04:00
journal - > j_format_version = 1 ;
break ;
2006-10-11 12:20:59 +04:00
case JBD2_SUPERBLOCK_V2 :
2006-10-11 12:20:57 +04:00
journal - > j_format_version = 2 ;
break ;
default :
2011-11-02 03:09:18 +04:00
printk ( KERN_WARNING " JBD2: unrecognised superblock format ID \n " ) ;
2006-10-11 12:20:57 +04:00
goto out ;
}
if ( be32_to_cpu ( sb - > s_maxlen ) < journal - > j_maxlen )
journal - > j_maxlen = be32_to_cpu ( sb - > s_maxlen ) ;
else if ( be32_to_cpu ( sb - > s_maxlen ) > journal - > j_maxlen ) {
2011-11-02 03:09:18 +04:00
printk ( KERN_WARNING " JBD2: journal file too short \n " ) ;
2006-10-11 12:20:57 +04:00
goto out ;
}
2011-11-02 03:04:59 +04:00
if ( be32_to_cpu ( sb - > s_first ) = = 0 | |
be32_to_cpu ( sb - > s_first ) > = journal - > j_maxlen ) {
printk ( KERN_WARNING
" JBD2: Invalid start block of journal: %u \n " ,
be32_to_cpu ( sb - > s_first ) ) ;
goto out ;
}
2014-08-28 02:40:07 +04:00
if ( JBD2_HAS_INCOMPAT_FEATURE ( journal , JBD2_FEATURE_INCOMPAT_CSUM_V2 ) & &
JBD2_HAS_INCOMPAT_FEATURE ( journal , JBD2_FEATURE_INCOMPAT_CSUM_V3 ) ) {
/* Can't have checksum v2 and v3 at the same time! */
printk ( KERN_ERR " JBD2: Can't enable checksumming v2 and v3 "
" at the same time! \n " ) ;
goto out ;
}
2014-09-11 19:38:21 +04:00
if ( jbd2_journal_has_csum_v2or3 ( journal ) & &
JBD2_HAS_COMPAT_FEATURE ( journal , JBD2_FEATURE_COMPAT_CHECKSUM ) ) {
/* Can't have checksum v1 and v2 on at the same time! */
printk ( KERN_ERR " JBD2: Can't enable checksumming v1 and v2/3 "
" at the same time! \n " ) ;
goto out ;
}
2012-05-27 15:48:56 +04:00
if ( ! jbd2_verify_csum_type ( journal , sb ) ) {
2013-12-09 06:14:59 +04:00
printk ( KERN_ERR " JBD2: Unknown checksum type \n " ) ;
2012-05-27 15:48:56 +04:00
goto out ;
}
2012-05-27 15:50:56 +04:00
/* Load the checksum driver */
2014-08-28 02:40:07 +04:00
if ( jbd2_journal_has_csum_v2or3 ( journal ) ) {
2012-05-27 15:50:56 +04:00
journal - > j_chksum_driver = crypto_alloc_shash ( " crc32c " , 0 , 0 ) ;
if ( IS_ERR ( journal - > j_chksum_driver ) ) {
2013-12-09 06:14:59 +04:00
printk ( KERN_ERR " JBD2: Cannot load crc32c driver. \n " ) ;
2012-05-27 15:50:56 +04:00
err = PTR_ERR ( journal - > j_chksum_driver ) ;
journal - > j_chksum_driver = NULL ;
goto out ;
}
}
2012-05-27 16:08:22 +04:00
/* Check superblock checksum */
if ( ! jbd2_superblock_csum_verify ( journal , sb ) ) {
2013-12-09 06:14:59 +04:00
printk ( KERN_ERR " JBD2: journal checksum error \n " ) ;
2012-05-27 16:08:22 +04:00
goto out ;
}
/* Precompute checksum seed for all metadata */
2014-08-28 02:40:07 +04:00
if ( jbd2_journal_has_csum_v2or3 ( journal ) )
2012-05-27 16:08:22 +04:00
journal - > j_csum_seed = jbd2_chksum ( journal , ~ 0 , sb - > s_uuid ,
sizeof ( sb - > s_uuid ) ) ;
2012-05-27 15:48:56 +04:00
set_buffer_verified ( bh ) ;
2006-10-11 12:20:57 +04:00
return 0 ;
out :
journal_fail_superblock ( journal ) ;
return err ;
}
/*
* Load the on - disk journal superblock and read the key fields into the
* journal_t .
*/
static int load_superblock ( journal_t * journal )
{
int err ;
journal_superblock_t * sb ;
err = journal_get_superblock ( journal ) ;
if ( err )
return err ;
sb = journal - > j_superblock ;
journal - > j_tail_sequence = be32_to_cpu ( sb - > s_sequence ) ;
journal - > j_tail = be32_to_cpu ( sb - > s_start ) ;
journal - > j_first = be32_to_cpu ( sb - > s_first ) ;
journal - > j_last = be32_to_cpu ( sb - > s_maxlen ) ;
journal - > j_errno = be32_to_cpu ( sb - > s_errno ) ;
return 0 ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_load ( ) - Read journal from disk .
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
*
* Given a journal_t structure which tells us which disk blocks contain
* a journal , read the journal from disk to initialise the in - memory
* structures .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_load ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int err ;
journal_superblock_t * sb ;
err = load_superblock ( journal ) ;
if ( err )
return err ;
sb = journal - > j_superblock ;
/* If this is a V2 superblock, then we have to check the
* features flags on it . */
if ( journal - > j_format_version > = 2 ) {
if ( ( sb - > s_feature_ro_compat &
2006-10-11 12:20:59 +04:00
~ cpu_to_be32 ( JBD2_KNOWN_ROCOMPAT_FEATURES ) ) | |
2006-10-11 12:20:57 +04:00
( sb - > s_feature_incompat &
2006-10-11 12:20:59 +04:00
~ cpu_to_be32 ( JBD2_KNOWN_INCOMPAT_FEATURES ) ) ) {
2011-11-02 03:09:18 +04:00
printk ( KERN_WARNING
" JBD2: Unrecognised features on journal \n " ) ;
2006-10-11 12:20:57 +04:00
return - EINVAL ;
}
}
2009-12-07 18:36:20 +03:00
/*
* Create a slab for this blocksize
*/
err = jbd2_journal_create_slab ( be32_to_cpu ( sb - > s_blocksize ) ) ;
if ( err )
return err ;
2006-10-11 12:20:57 +04:00
/* Let the recovery code check whether it needs to recover any
* data from the journal . */
2006-10-11 12:20:59 +04:00
if ( jbd2_journal_recover ( journal ) )
2006-10-11 12:20:57 +04:00
goto recovery_error ;
2009-11-15 23:31:37 +03:00
if ( journal - > j_failed_commit ) {
printk ( KERN_ERR " JBD2: journal transaction %u on %s "
" is corrupt. \n " , journal - > j_failed_commit ,
journal - > j_devname ) ;
return - EIO ;
}
2006-10-11 12:20:57 +04:00
/* OK, we've finished with the dynamic journal bits:
* reinitialise the dynamic contents of the superblock in memory
* and reset them on disk . */
if ( journal_reset ( journal ) )
goto recovery_error ;
2006-10-11 12:20:59 +04:00
journal - > j_flags & = ~ JBD2_ABORT ;
journal - > j_flags | = JBD2_LOADED ;
2006-10-11 12:20:57 +04:00
return 0 ;
recovery_error :
2011-11-02 03:09:18 +04:00
printk ( KERN_WARNING " JBD2: recovery failed \n " ) ;
2006-10-11 12:20:57 +04:00
return - EIO ;
}
/**
2006-10-11 12:20:59 +04:00
* void jbd2_journal_destroy ( ) - Release a journal_t structure .
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
*
* Release a journal_t structure once it is no longer in use by the
* journaled object .
2008-10-11 04:29:13 +04:00
* Return < 0 if we couldn ' t clean up the journal .
2006-10-11 12:20:57 +04:00
*/
2008-10-11 04:29:13 +04:00
int jbd2_journal_destroy ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
2008-10-11 04:29:13 +04:00
int err = 0 ;
2006-10-11 12:20:57 +04:00
/* Wait for the commit thread to wake up and die. */
journal_kill_thread ( journal ) ;
/* Force a final log commit */
if ( journal - > j_running_transaction )
2006-10-11 12:20:59 +04:00
jbd2_journal_commit_transaction ( journal ) ;
2006-10-11 12:20:57 +04:00
/* Force any old transactions to disk */
/* Totally anal locking here... */
spin_lock ( & journal - > j_list_lock ) ;
while ( journal - > j_checkpoint_transactions ! = NULL ) {
spin_unlock ( & journal - > j_list_lock ) ;
2008-11-05 08:09:22 +03:00
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 12:20:59 +04:00
jbd2_log_do_checkpoint ( journal ) ;
2008-11-05 08:09:22 +03:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 12:20:57 +04:00
spin_lock ( & journal - > j_list_lock ) ;
}
J_ASSERT ( journal - > j_running_transaction = = NULL ) ;
J_ASSERT ( journal - > j_committing_transaction = = NULL ) ;
J_ASSERT ( journal - > j_checkpoint_transactions = = NULL ) ;
spin_unlock ( & journal - > j_list_lock ) ;
if ( journal - > j_sb_buffer ) {
2008-10-11 04:29:13 +04:00
if ( ! is_journal_aborted ( journal ) ) {
2012-03-13 23:43:04 +04:00
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2012-03-13 23:41:04 +04:00
jbd2_mark_journal_empty ( journal ) ;
2012-03-13 23:43:04 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
} else
2008-10-11 04:29:13 +04:00
err = - EIO ;
2006-10-11 12:20:57 +04:00
brelse ( journal - > j_sb_buffer ) ;
}
2008-01-29 07:58:27 +03:00
if ( journal - > j_proc_entry )
jbd2_stats_proc_exit ( journal ) ;
2014-11-26 04:02:37 +03:00
iput ( journal - > j_inode ) ;
2006-10-11 12:20:57 +04:00
if ( journal - > j_revoke )
2006-10-11 12:20:59 +04:00
jbd2_journal_destroy_revoke ( journal ) ;
2012-05-27 15:50:56 +04:00
if ( journal - > j_chksum_driver )
crypto_free_shash ( journal - > j_chksum_driver ) ;
2006-10-11 12:20:57 +04:00
kfree ( journal - > j_wbuf ) ;
kfree ( journal ) ;
2008-10-11 04:29:13 +04:00
return err ;
2006-10-11 12:20:57 +04:00
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_check_used_features ( ) - Check if features specified are used .
2006-10-11 12:20:57 +04:00
* @ journal : Journal to check .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
*
* Check whether the journal uses all of a given set of
* features . Return true ( non - zero ) if it does .
* */
2006-10-11 12:20:59 +04:00
int jbd2_journal_check_used_features ( journal_t * journal , unsigned long compat ,
2006-10-11 12:20:57 +04:00
unsigned long ro , unsigned long incompat )
{
journal_superblock_t * sb ;
if ( ! compat & & ! ro & & ! incompat )
return 1 ;
2010-07-23 02:04:16 +04:00
/* Load journal superblock if it is not loaded yet. */
if ( journal - > j_format_version = = 0 & &
journal_get_superblock ( journal ) ! = 0 )
return 0 ;
2006-10-11 12:20:57 +04:00
if ( journal - > j_format_version = = 1 )
return 0 ;
sb = journal - > j_superblock ;
if ( ( ( be32_to_cpu ( sb - > s_feature_compat ) & compat ) = = compat ) & &
( ( be32_to_cpu ( sb - > s_feature_ro_compat ) & ro ) = = ro ) & &
( ( be32_to_cpu ( sb - > s_feature_incompat ) & incompat ) = = incompat ) )
return 1 ;
return 0 ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_check_available_features ( ) - Check feature set in journalling layer
2006-10-11 12:20:57 +04:00
* @ journal : Journal to check .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
*
* Check whether the journaling code supports the use of
* all of a given set of features on this journal . Return true
* ( non - zero ) if it can . */
2006-10-11 12:20:59 +04:00
int jbd2_journal_check_available_features ( journal_t * journal , unsigned long compat ,
2006-10-11 12:20:57 +04:00
unsigned long ro , unsigned long incompat )
{
if ( ! compat & & ! ro & & ! incompat )
return 1 ;
/* We can support any known requested features iff the
* superblock is in version 2. Otherwise we fail to support any
* extended sb features . */
if ( journal - > j_format_version ! = 2 )
return 0 ;
2006-10-11 12:20:59 +04:00
if ( ( compat & JBD2_KNOWN_COMPAT_FEATURES ) = = compat & &
( ro & JBD2_KNOWN_ROCOMPAT_FEATURES ) = = ro & &
( incompat & JBD2_KNOWN_INCOMPAT_FEATURES ) = = incompat )
2006-10-11 12:20:57 +04:00
return 1 ;
return 0 ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_set_features ( ) - Mark a given journal feature in the superblock
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
*
* Mark a given journal feature as present on the
* superblock . Returns true if the requested features could be set .
*
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_set_features ( journal_t * journal , unsigned long compat ,
2006-10-11 12:20:57 +04:00
unsigned long ro , unsigned long incompat )
{
2012-05-27 15:48:56 +04:00
# define INCOMPAT_FEATURE_ON(f) \
( ( incompat & ( f ) ) & & ! ( sb - > s_feature_incompat & cpu_to_be32 ( f ) ) )
# define COMPAT_FEATURE_ON(f) \
( ( compat & ( f ) ) & & ! ( sb - > s_feature_compat & cpu_to_be32 ( f ) ) )
2006-10-11 12:20:57 +04:00
journal_superblock_t * sb ;
2006-10-11 12:20:59 +04:00
if ( jbd2_journal_check_used_features ( journal , compat , ro , incompat ) )
2006-10-11 12:20:57 +04:00
return 1 ;
2006-10-11 12:20:59 +04:00
if ( ! jbd2_journal_check_available_features ( journal , compat , ro , incompat ) )
2006-10-11 12:20:57 +04:00
return 0 ;
2014-08-28 02:40:07 +04:00
/* If enabling v2 checksums, turn on v3 instead */
if ( incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 ) {
incompat & = ~ JBD2_FEATURE_INCOMPAT_CSUM_V2 ;
incompat | = JBD2_FEATURE_INCOMPAT_CSUM_V3 ;
}
/* Asking for checksumming v3 and v1? Only give them v3. */
if ( incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 & &
2012-05-27 15:48:56 +04:00
compat & JBD2_FEATURE_COMPAT_CHECKSUM )
compat & = ~ JBD2_FEATURE_COMPAT_CHECKSUM ;
2006-10-11 12:20:57 +04:00
jbd_debug ( 1 , " Setting new features 0x%lx/0x%lx/0x%lx \n " ,
compat , ro , incompat ) ;
sb = journal - > j_superblock ;
2014-08-28 02:40:07 +04:00
/* If enabling v3 checksums, update superblock */
if ( INCOMPAT_FEATURE_ON ( JBD2_FEATURE_INCOMPAT_CSUM_V3 ) ) {
2012-05-27 15:48:56 +04:00
sb - > s_checksum_type = JBD2_CRC32C_CHKSUM ;
sb - > s_feature_compat & =
~ cpu_to_be32 ( JBD2_FEATURE_COMPAT_CHECKSUM ) ;
2012-05-27 15:50:56 +04:00
/* Load the checksum driver */
if ( journal - > j_chksum_driver = = NULL ) {
journal - > j_chksum_driver = crypto_alloc_shash ( " crc32c " ,
0 , 0 ) ;
if ( IS_ERR ( journal - > j_chksum_driver ) ) {
2013-12-09 06:14:59 +04:00
printk ( KERN_ERR " JBD2: Cannot load crc32c "
2012-05-27 15:50:56 +04:00
" driver. \n " ) ;
journal - > j_chksum_driver = NULL ;
return 0 ;
}
2012-05-27 16:08:22 +04:00
2014-12-02 03:22:23 +03:00
/* Precompute checksum seed for all metadata */
2012-05-27 16:08:22 +04:00
journal - > j_csum_seed = jbd2_chksum ( journal , ~ 0 ,
sb - > s_uuid ,
sizeof ( sb - > s_uuid ) ) ;
2014-12-02 03:22:23 +03:00
}
2012-05-27 15:48:56 +04:00
}
/* If enabling v1 checksums, downgrade superblock */
if ( COMPAT_FEATURE_ON ( JBD2_FEATURE_COMPAT_CHECKSUM ) )
sb - > s_feature_incompat & =
2014-08-28 02:40:07 +04:00
~ cpu_to_be32 ( JBD2_FEATURE_INCOMPAT_CSUM_V2 |
JBD2_FEATURE_INCOMPAT_CSUM_V3 ) ;
2012-05-27 15:48:56 +04:00
2006-10-11 12:20:57 +04:00
sb - > s_feature_compat | = cpu_to_be32 ( compat ) ;
sb - > s_feature_ro_compat | = cpu_to_be32 ( ro ) ;
sb - > s_feature_incompat | = cpu_to_be32 ( incompat ) ;
return 1 ;
2012-05-27 15:48:56 +04:00
# undef COMPAT_FEATURE_ON
# undef INCOMPAT_FEATURE_ON
2006-10-11 12:20:57 +04:00
}
2008-01-29 07:58:27 +03:00
/*
* jbd2_journal_clear_features ( ) - Clear a given journal feature in the
* superblock
* @ journal : Journal to act on .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
*
* Clear a given journal feature as present on the
* superblock .
*/
void jbd2_journal_clear_features ( journal_t * journal , unsigned long compat ,
unsigned long ro , unsigned long incompat )
{
journal_superblock_t * sb ;
jbd_debug ( 1 , " Clear features 0x%lx/0x%lx/0x%lx \n " ,
compat , ro , incompat ) ;
sb = journal - > j_superblock ;
sb - > s_feature_compat & = ~ cpu_to_be32 ( compat ) ;
sb - > s_feature_ro_compat & = ~ cpu_to_be32 ( ro ) ;
sb - > s_feature_incompat & = ~ cpu_to_be32 ( incompat ) ;
}
EXPORT_SYMBOL ( jbd2_journal_clear_features ) ;
2006-10-11 12:20:57 +04:00
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_flush ( ) - Flush journal
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
*
* Flush all data for a given journal to disk and empty the journal .
* Filesystems can use this when remounting readonly to ensure that
* recovery does not need to happen on remount .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_flush ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int err = 0 ;
transaction_t * transaction = NULL ;
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
/* Force everything buffered to the log... */
if ( journal - > j_running_transaction ) {
transaction = journal - > j_running_transaction ;
2006-10-11 12:20:59 +04:00
__jbd2_log_start_commit ( journal , transaction - > t_tid ) ;
2006-10-11 12:20:57 +04:00
} else if ( journal - > j_committing_transaction )
transaction = journal - > j_committing_transaction ;
/* Wait for the log commit to complete... */
if ( transaction ) {
tid_t tid = transaction - > t_tid ;
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
jbd2_log_wait_commit ( journal , tid ) ;
2006-10-11 12:20:57 +04:00
} else {
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
}
/* ...and flush everything in the log out to disk. */
spin_lock ( & journal - > j_list_lock ) ;
while ( ! err & & journal - > j_checkpoint_transactions ! = NULL ) {
spin_unlock ( & journal - > j_list_lock ) ;
2008-10-11 04:29:13 +04:00
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 12:20:59 +04:00
err = jbd2_log_do_checkpoint ( journal ) ;
2008-10-11 04:29:13 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 12:20:57 +04:00
spin_lock ( & journal - > j_list_lock ) ;
}
spin_unlock ( & journal - > j_list_lock ) ;
2008-10-11 04:29:13 +04:00
if ( is_journal_aborted ( journal ) )
return - EIO ;
2012-03-13 23:43:04 +04:00
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 12:20:59 +04:00
jbd2_cleanup_journal_tail ( journal ) ;
2006-10-11 12:20:57 +04:00
/* Finally, mark the journal as really needing no recovery.
* This sets s_start = = 0 in the underlying superblock , which is
* the magic code for a fully - recovered superblock . Any future
* commits of data to the journal will restore the current
* s_start value . */
2012-03-13 23:41:04 +04:00
jbd2_mark_journal_empty ( journal ) ;
2012-03-13 23:43:04 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
J_ASSERT ( ! journal - > j_running_transaction ) ;
J_ASSERT ( ! journal - > j_committing_transaction ) ;
J_ASSERT ( ! journal - > j_checkpoint_transactions ) ;
J_ASSERT ( journal - > j_head = = journal - > j_tail ) ;
J_ASSERT ( journal - > j_tail_sequence = = journal - > j_transaction_sequence ) ;
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2008-10-11 04:29:13 +04:00
return 0 ;
2006-10-11 12:20:57 +04:00
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_wipe ( ) - Wipe journal contents
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
* @ write : flag ( see below )
*
* Wipe out all of the contents of a journal , safely . This will produce
* a warning if the journal contains any valid recovery information .
2006-10-11 12:20:59 +04:00
* Must be called between journal_init_ * ( ) and jbd2_journal_load ( ) .
2006-10-11 12:20:57 +04:00
*
* If ' write ' is non - zero , then we wipe out the journal on disk ; otherwise
* we merely suppress recovery .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_wipe ( journal_t * journal , int write )
2006-10-11 12:20:57 +04:00
{
int err = 0 ;
2006-10-11 12:20:59 +04:00
J_ASSERT ( ! ( journal - > j_flags & JBD2_LOADED ) ) ;
2006-10-11 12:20:57 +04:00
err = load_superblock ( journal ) ;
if ( err )
return err ;
if ( ! journal - > j_tail )
goto no_recovery ;
2011-11-02 03:09:18 +04:00
printk ( KERN_WARNING " JBD2: %s recovery information on journal \n " ,
2006-10-11 12:20:57 +04:00
write ? " Clearing " : " Ignoring " ) ;
2006-10-11 12:20:59 +04:00
err = jbd2_journal_skip_recovery ( journal ) ;
2012-03-13 23:43:04 +04:00
if ( write ) {
/* Lock to make assertions happy... */
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2012-03-13 23:41:04 +04:00
jbd2_mark_journal_empty ( journal ) ;
2012-03-13 23:43:04 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
}
2006-10-11 12:20:57 +04:00
no_recovery :
return err ;
}
/*
* Journal abort has very specific semantics , which we describe
* for journal abort .
*
2009-06-09 08:06:20 +04:00
* Two internal functions , which provide abort to the jbd layer
2006-10-11 12:20:57 +04:00
* itself are here .
*/
/*
* Quick version for internal journal use ( doesn ' t lock the journal ) .
* Aborts hard - - - we mark the abort as occurred , but do _nothing_ else ,
* and don ' t attempt to make any other journal updates .
*/
2006-10-11 12:20:59 +04:00
void __jbd2_journal_abort_hard ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
transaction_t * transaction ;
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_ABORT )
2006-10-11 12:20:57 +04:00
return ;
printk ( KERN_ERR " Aborting journal on device %s. \n " ,
2008-09-16 22:36:17 +04:00
journal - > j_devname ) ;
2006-10-11 12:20:57 +04:00
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
journal - > j_flags | = JBD2_ABORT ;
2006-10-11 12:20:57 +04:00
transaction = journal - > j_running_transaction ;
if ( transaction )
2006-10-11 12:20:59 +04:00
__jbd2_log_start_commit ( journal , transaction - > t_tid ) ;
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
}
/* Soft abort: record the abort error status in the journal superblock,
* but don ' t do any other IO . */
static void __journal_abort_soft ( journal_t * journal , int errno )
{
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_ABORT )
2006-10-11 12:20:57 +04:00
return ;
if ( ! journal - > j_errno )
journal - > j_errno = errno ;
2006-10-11 12:20:59 +04:00
__jbd2_journal_abort_hard ( journal ) ;
2006-10-11 12:20:57 +04:00
if ( errno )
2012-03-13 23:41:04 +04:00
jbd2_journal_update_sb_errno ( journal ) ;
2006-10-11 12:20:57 +04:00
}
/**
2006-10-11 12:20:59 +04:00
* void jbd2_journal_abort ( ) - Shutdown the journal immediately .
2006-10-11 12:20:57 +04:00
* @ journal : the journal to shutdown .
* @ errno : an error number to record in the journal indicating
* the reason for the shutdown .
*
* Perform a complete , immediate shutdown of the ENTIRE
* journal ( not of a single transaction ) . This operation cannot be
* undone without closing and reopening the journal .
*
2006-10-11 12:20:59 +04:00
* The jbd2_journal_abort function is intended to support higher level error
2006-10-11 12:20:57 +04:00
* recovery mechanisms such as the ext2 / ext3 remount - readonly error
* mode .
*
* Journal abort has very specific semantics . Any existing dirty ,
* unjournaled buffers in the main filesystem will still be written to
* disk by bdflush , but the journaling mechanism will be suspended
* immediately and no further transaction commits will be honoured .
*
* Any dirty , journaled buffers will be written back to disk without
* hitting the journal . Atomicity cannot be guaranteed on an aborted
* filesystem , but we _do_ attempt to leave as much data as possible
* behind for fsck to use for cleanup .
*
* Any attempt to get a new transaction handle on a journal which is in
* ABORT state will just result in an - EROFS error return . A
2006-10-11 12:20:59 +04:00
* jbd2_journal_stop on an existing handle will return - EIO if we have
2006-10-11 12:20:57 +04:00
* entered abort state during the update .
*
* Recursive transactions are not disturbed by journal abort until the
2006-10-11 12:20:59 +04:00
* final jbd2_journal_stop , which will receive the - EIO error .
2006-10-11 12:20:57 +04:00
*
2006-10-11 12:20:59 +04:00
* Finally , the jbd2_journal_abort call allows the caller to supply an errno
2006-10-11 12:20:57 +04:00
* which will be recorded ( if possible ) in the journal superblock . This
* allows a client to record failure conditions in the middle of a
* transaction without having to complete the transaction to record the
* failure to disk . ext3_error , for example , now uses this
* functionality .
*
* Errors which originate from within the journaling layer will NOT
* supply an errno ; a null errno implies that absolutely no further
* writes are done to the journal ( unless there are any already in
* progress ) .
*
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_abort ( journal_t * journal , int errno )
2006-10-11 12:20:57 +04:00
{
__journal_abort_soft ( journal , errno ) ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_errno ( ) - returns the journal ' s error state .
2006-10-11 12:20:57 +04:00
* @ journal : journal to examine .
*
2009-06-09 08:06:20 +04:00
* This is the errno number set with jbd2_journal_abort ( ) , the last
2006-10-11 12:20:57 +04:00
* time the journal was mounted - if the journal was stopped
* without calling abort this will be 0.
*
* If the journal has been aborted on this mount time - EROFS will
* be returned .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_errno ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int err ;
2010-08-04 05:35:12 +04:00
read_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_ABORT )
2006-10-11 12:20:57 +04:00
err = - EROFS ;
else
err = journal - > j_errno ;
2010-08-04 05:35:12 +04:00
read_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
return err ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_clear_err ( ) - clears the journal ' s error state
2006-10-11 12:20:57 +04:00
* @ journal : journal to act on .
*
2009-06-09 08:06:20 +04:00
* An error must be cleared or acked to take a FS out of readonly
2006-10-11 12:20:57 +04:00
* mode .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_clear_err ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int err = 0 ;
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_ABORT )
2006-10-11 12:20:57 +04:00
err = - EROFS ;
else
journal - > j_errno = 0 ;
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
return err ;
}
/**
2006-10-11 12:20:59 +04:00
* void jbd2_journal_ack_err ( ) - Ack journal err .
2006-10-11 12:20:57 +04:00
* @ journal : journal to act on .
*
2009-06-09 08:06:20 +04:00
* An error must be cleared or acked to take a FS out of readonly
2006-10-11 12:20:57 +04:00
* mode .
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_ack_err ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
2010-08-04 05:35:12 +04:00
write_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
if ( journal - > j_errno )
2006-10-11 12:20:59 +04:00
journal - > j_flags | = JBD2_ACK_ERR ;
2010-08-04 05:35:12 +04:00
write_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:57 +04:00
}
2006-10-11 12:20:59 +04:00
int jbd2_journal_blocks_per_page ( struct inode * inode )
2006-10-11 12:20:57 +04:00
{
return 1 < < ( PAGE_CACHE_SHIFT - inode - > i_sb - > s_blocksize_bits ) ;
}
2006-10-11 12:21:08 +04:00
/*
* helper functions to deal with 32 or 64 bit block numbers .
*/
size_t journal_tag_bytes ( journal_t * journal )
{
2014-08-28 02:40:07 +04:00
size_t sz ;
if ( JBD2_HAS_INCOMPAT_FEATURE ( journal , JBD2_FEATURE_INCOMPAT_CSUM_V3 ) )
return sizeof ( journal_block_tag3_t ) ;
sz = sizeof ( journal_block_tag_t ) ;
2012-05-27 16:12:12 +04:00
if ( JBD2_HAS_INCOMPAT_FEATURE ( journal , JBD2_FEATURE_INCOMPAT_CSUM_V2 ) )
2014-08-28 02:40:07 +04:00
sz + = sizeof ( __u16 ) ;
2012-05-27 16:12:12 +04:00
2006-10-11 12:21:08 +04:00
if ( JBD2_HAS_INCOMPAT_FEATURE ( journal , JBD2_FEATURE_INCOMPAT_64BIT ) )
2014-08-28 02:40:07 +04:00
return sz ;
2006-10-11 12:21:08 +04:00
else
2014-08-28 02:40:07 +04:00
return sz - sizeof ( __u32 ) ;
2006-10-11 12:21:08 +04:00
}
2009-12-07 18:36:20 +03:00
/*
* JBD memory management
*
* These functions are used to allocate block - sized chunks of memory
* used for making copies of buffer_head data . Very often it will be
* page - sized chunks of data , but sometimes it will be in
* sub - page - size chunks . ( For example , 16 k pages on Power systems
* with a 4 k block file system . ) For blocks smaller than a page , we
* use a SLAB allocator . There are slab caches for each block size ,
* which are allocated at mount time , if necessary , and we only free
* ( all of ) the slab caches when / if the jbd2 module is unloaded . For
* this reason we don ' t need to a mutex to protect access to
* jbd2_slab [ ] allocating or releasing memory ; only in
* jbd2_journal_create_slab ( ) .
*/
# define JBD2_MAX_SLABS 8
static struct kmem_cache * jbd2_slab [ JBD2_MAX_SLABS ] ;
static const char * jbd2_slab_names [ JBD2_MAX_SLABS ] = {
" jbd2_1k " , " jbd2_2k " , " jbd2_4k " , " jbd2_8k " ,
" jbd2_16k " , " jbd2_32k " , " jbd2_64k " , " jbd2_128k "
} ;
static void jbd2_journal_destroy_slabs ( void )
{
int i ;
for ( i = 0 ; i < JBD2_MAX_SLABS ; i + + ) {
if ( jbd2_slab [ i ] )
kmem_cache_destroy ( jbd2_slab [ i ] ) ;
jbd2_slab [ i ] = NULL ;
}
}
static int jbd2_journal_create_slab ( size_t size )
{
2010-10-17 00:34:39 +04:00
static DEFINE_MUTEX ( jbd2_slab_create_mutex ) ;
2009-12-07 18:36:20 +03:00
int i = order_base_2 ( size ) - 10 ;
size_t slab_size ;
if ( size = = PAGE_SIZE )
return 0 ;
if ( i > = JBD2_MAX_SLABS )
return - EINVAL ;
if ( unlikely ( i < 0 ) )
i = 0 ;
2010-10-17 00:34:39 +04:00
mutex_lock ( & jbd2_slab_create_mutex ) ;
2009-12-07 18:36:20 +03:00
if ( jbd2_slab [ i ] ) {
2010-10-17 00:34:39 +04:00
mutex_unlock ( & jbd2_slab_create_mutex ) ;
2009-12-07 18:36:20 +03:00
return 0 ; /* Already created */
}
slab_size = 1 < < ( i + 10 ) ;
jbd2_slab [ i ] = kmem_cache_create ( jbd2_slab_names [ i ] , slab_size ,
slab_size , 0 , NULL ) ;
2010-10-17 00:34:39 +04:00
mutex_unlock ( & jbd2_slab_create_mutex ) ;
2009-12-07 18:36:20 +03:00
if ( ! jbd2_slab [ i ] ) {
printk ( KERN_EMERG " JBD2: no memory for jbd2_slab cache \n " ) ;
return - ENOMEM ;
}
return 0 ;
}
static struct kmem_cache * get_slab ( size_t size )
{
int i = order_base_2 ( size ) - 10 ;
BUG_ON ( i > = JBD2_MAX_SLABS ) ;
if ( unlikely ( i < 0 ) )
i = 0 ;
2010-04-30 17:34:31 +04:00
BUG_ON ( jbd2_slab [ i ] = = NULL ) ;
2009-12-07 18:36:20 +03:00
return jbd2_slab [ i ] ;
}
void * jbd2_alloc ( size_t size , gfp_t flags )
{
void * ptr ;
BUG_ON ( size & ( size - 1 ) ) ; /* Must be a power of 2 */
flags | = __GFP_REPEAT ;
if ( size = = PAGE_SIZE )
ptr = ( void * ) __get_free_pages ( flags , 0 ) ;
else if ( size > PAGE_SIZE ) {
int order = get_order ( size ) ;
if ( order < 3 )
ptr = ( void * ) __get_free_pages ( flags , order ) ;
else
ptr = vmalloc ( size ) ;
} else
ptr = kmem_cache_alloc ( get_slab ( size ) , flags ) ;
/* Check alignment; SLUB has gotten this wrong in the past,
* and this can lead to user data corruption ! */
BUG_ON ( ( ( unsigned long ) ptr ) & ( size - 1 ) ) ;
return ptr ;
}
void jbd2_free ( void * ptr , size_t size )
{
if ( size = = PAGE_SIZE ) {
free_pages ( ( unsigned long ) ptr , 0 ) ;
return ;
}
if ( size > PAGE_SIZE ) {
int order = get_order ( size ) ;
if ( order < 3 )
free_pages ( ( unsigned long ) ptr , order ) ;
else
vfree ( ptr ) ;
return ;
}
kmem_cache_free ( get_slab ( size ) , ptr ) ;
} ;
2006-10-11 12:20:57 +04:00
/*
* Journal_head storage management
*/
2006-12-07 07:33:20 +03:00
static struct kmem_cache * jbd2_journal_head_cache ;
2007-07-18 16:57:06 +04:00
# ifdef CONFIG_JBD2_DEBUG
2006-10-11 12:20:57 +04:00
static atomic_t nr_journal_heads = ATOMIC_INIT ( 0 ) ;
# endif
2012-02-21 02:53:03 +04:00
static int jbd2_journal_init_journal_head_cache ( void )
2006-10-11 12:20:57 +04:00
{
int retval ;
2008-03-29 06:07:18 +03:00
J_ASSERT ( jbd2_journal_head_cache = = NULL ) ;
2006-10-11 12:21:00 +04:00
jbd2_journal_head_cache = kmem_cache_create ( " jbd2_journal_head " ,
2006-10-11 12:20:57 +04:00
sizeof ( struct journal_head ) ,
0 , /* offset */
2008-01-29 07:58:27 +03:00
SLAB_TEMPORARY , /* flags */
2007-07-20 05:11:58 +04:00
NULL ) ; /* ctor */
2006-10-11 12:20:57 +04:00
retval = 0 ;
2008-03-29 06:07:18 +03:00
if ( ! jbd2_journal_head_cache ) {
2006-10-11 12:20:57 +04:00
retval = - ENOMEM ;
2011-11-02 03:09:18 +04:00
printk ( KERN_EMERG " JBD2: no memory for journal_head cache \n " ) ;
2006-10-11 12:20:57 +04:00
}
return retval ;
}
2012-02-21 02:53:03 +04:00
static void jbd2_journal_destroy_journal_head_cache ( void )
2006-10-11 12:20:57 +04:00
{
2008-04-17 18:38:59 +04:00
if ( jbd2_journal_head_cache ) {
kmem_cache_destroy ( jbd2_journal_head_cache ) ;
jbd2_journal_head_cache = NULL ;
}
2006-10-11 12:20:57 +04:00
}
/*
* journal_head splicing and dicing
*/
static struct journal_head * journal_alloc_journal_head ( void )
{
struct journal_head * ret ;
2007-07-18 16:57:06 +04:00
# ifdef CONFIG_JBD2_DEBUG
2006-10-11 12:20:57 +04:00
atomic_inc ( & nr_journal_heads ) ;
# endif
2013-05-28 15:27:11 +04:00
ret = kmem_cache_zalloc ( jbd2_journal_head_cache , GFP_NOFS ) ;
2008-03-29 06:07:18 +03:00
if ( ! ret ) {
2006-10-11 12:20:57 +04:00
jbd_debug ( 1 , " out of memory for journal_head \n " ) ;
2010-12-17 18:44:16 +03:00
pr_notice_ratelimited ( " ENOMEM in %s, retrying. \n " , __func__ ) ;
2008-03-29 06:07:18 +03:00
while ( ! ret ) {
2006-10-11 12:20:57 +04:00
yield ( ) ;
2013-05-28 15:27:11 +04:00
ret = kmem_cache_zalloc ( jbd2_journal_head_cache , GFP_NOFS ) ;
2006-10-11 12:20:57 +04:00
}
}
return ret ;
}
static void journal_free_journal_head ( struct journal_head * jh )
{
2007-07-18 16:57:06 +04:00
# ifdef CONFIG_JBD2_DEBUG
2006-10-11 12:20:57 +04:00
atomic_dec ( & nr_journal_heads ) ;
2007-10-17 02:38:25 +04:00
memset ( jh , JBD2_POISON_FREE , sizeof ( * jh ) ) ;
2006-10-11 12:20:57 +04:00
# endif
2006-10-11 12:20:59 +04:00
kmem_cache_free ( jbd2_journal_head_cache , jh ) ;
2006-10-11 12:20:57 +04:00
}
/*
* A journal_head is attached to a buffer_head whenever JBD has an
* interest in the buffer .
*
* Whenever a buffer has an attached journal_head , its - > b_state : BH_JBD bit
* is set . This bit is tested in core kernel code where we need to take
* JBD - specific actions . Testing the zeroness of - > b_private is not reliable
* there .
*
* When a buffer has its BH_JBD bit set , its - > b_count is elevated by one .
*
* When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code , mainly via - > b_count .
*
2011-06-13 23:38:22 +04:00
* A journal_head is detached from its buffer_head when the journal_head ' s
* b_jcount reaches zero . Running transaction ( b_transaction ) and checkpoint
* transaction ( b_cp_transaction ) hold their references to b_jcount .
2006-10-11 12:20:57 +04:00
*
* Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction . To protect the
2006-10-11 12:20:59 +04:00
* journal_head in this situation , jbd2_journal_add_journal_head elevates the
2006-10-11 12:20:57 +04:00
* journal_head ' s b_jcount refcount by one . The caller must call
2006-10-11 12:20:59 +04:00
* jbd2_journal_put_journal_head ( ) to undo this .
2006-10-11 12:20:57 +04:00
*
* So the typical usage would be :
*
* ( Attach a journal_head if needed . Increments b_jcount )
2006-10-11 12:20:59 +04:00
* struct journal_head * jh = jbd2_journal_add_journal_head ( bh ) ;
2006-10-11 12:20:57 +04:00
* . . .
2011-06-13 23:38:22 +04:00
* ( Get another reference for transaction )
* jbd2_journal_grab_journal_head ( bh ) ;
2006-10-11 12:20:57 +04:00
* jh - > b_transaction = xxx ;
2011-06-13 23:38:22 +04:00
* ( Put original reference )
2006-10-11 12:20:59 +04:00
* jbd2_journal_put_journal_head ( jh ) ;
2006-10-11 12:20:57 +04:00
*/
/*
* Give a buffer_head a journal_head .
*
* May sleep .
*/
2006-10-11 12:20:59 +04:00
struct journal_head * jbd2_journal_add_journal_head ( struct buffer_head * bh )
2006-10-11 12:20:57 +04:00
{
struct journal_head * jh ;
struct journal_head * new_jh = NULL ;
repeat :
2013-05-28 15:27:11 +04:00
if ( ! buffer_jbd ( bh ) )
2006-10-11 12:20:57 +04:00
new_jh = journal_alloc_journal_head ( ) ;
jbd_lock_bh_journal_head ( bh ) ;
if ( buffer_jbd ( bh ) ) {
jh = bh2jh ( bh ) ;
} else {
J_ASSERT_BH ( bh ,
( atomic_read ( & bh - > b_count ) > 0 ) | |
( bh - > b_page & & bh - > b_page - > mapping ) ) ;
if ( ! new_jh ) {
jbd_unlock_bh_journal_head ( bh ) ;
goto repeat ;
}
jh = new_jh ;
new_jh = NULL ; /* We consumed it */
set_buffer_jbd ( bh ) ;
bh - > b_private = jh ;
jh - > b_bh = bh ;
get_bh ( bh ) ;
BUFFER_TRACE ( bh , " added journal_head " ) ;
}
jh - > b_jcount + + ;
jbd_unlock_bh_journal_head ( bh ) ;
if ( new_jh )
journal_free_journal_head ( new_jh ) ;
return bh - > b_private ;
}
/*
* Grab a ref against this buffer_head ' s journal_head . If it ended up not
* having a journal_head , return NULL
*/
2006-10-11 12:20:59 +04:00
struct journal_head * jbd2_journal_grab_journal_head ( struct buffer_head * bh )
2006-10-11 12:20:57 +04:00
{
struct journal_head * jh = NULL ;
jbd_lock_bh_journal_head ( bh ) ;
if ( buffer_jbd ( bh ) ) {
jh = bh2jh ( bh ) ;
jh - > b_jcount + + ;
}
jbd_unlock_bh_journal_head ( bh ) ;
return jh ;
}
static void __journal_remove_journal_head ( struct buffer_head * bh )
{
struct journal_head * jh = bh2jh ( bh ) ;
J_ASSERT_JH ( jh , jh - > b_jcount > = 0 ) ;
2011-06-13 23:38:22 +04:00
J_ASSERT_JH ( jh , jh - > b_transaction = = NULL ) ;
J_ASSERT_JH ( jh , jh - > b_next_transaction = = NULL ) ;
J_ASSERT_JH ( jh , jh - > b_cp_transaction = = NULL ) ;
J_ASSERT_JH ( jh , jh - > b_jlist = = BJ_None ) ;
J_ASSERT_BH ( bh , buffer_jbd ( bh ) ) ;
J_ASSERT_BH ( bh , jh2bh ( jh ) = = bh ) ;
BUFFER_TRACE ( bh , " remove journal_head " ) ;
if ( jh - > b_frozen_data ) {
printk ( KERN_WARNING " %s: freeing b_frozen_data \n " , __func__ ) ;
jbd2_free ( jh - > b_frozen_data , bh - > b_size ) ;
2006-10-11 12:20:57 +04:00
}
2011-06-13 23:38:22 +04:00
if ( jh - > b_committed_data ) {
printk ( KERN_WARNING " %s: freeing b_committed_data \n " , __func__ ) ;
jbd2_free ( jh - > b_committed_data , bh - > b_size ) ;
}
bh - > b_private = NULL ;
jh - > b_bh = NULL ; /* debug, really */
clear_buffer_jbd ( bh ) ;
journal_free_journal_head ( jh ) ;
2006-10-11 12:20:57 +04:00
}
/*
2011-06-13 23:38:22 +04:00
* Drop a reference on the passed journal_head . If it fell to zero then
2006-10-11 12:20:57 +04:00
* release the journal_head from the buffer_head .
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_put_journal_head ( struct journal_head * jh )
2006-10-11 12:20:57 +04:00
{
struct buffer_head * bh = jh2bh ( jh ) ;
jbd_lock_bh_journal_head ( bh ) ;
J_ASSERT_JH ( jh , jh - > b_jcount > 0 ) ;
- - jh - > b_jcount ;
2011-06-13 23:38:22 +04:00
if ( ! jh - > b_jcount ) {
2006-10-11 12:20:57 +04:00
__journal_remove_journal_head ( bh ) ;
2011-06-13 23:38:22 +04:00
jbd_unlock_bh_journal_head ( bh ) ;
2006-10-11 12:20:57 +04:00
__brelse ( bh ) ;
2011-06-13 23:38:22 +04:00
} else
jbd_unlock_bh_journal_head ( bh ) ;
2006-10-11 12:20:57 +04:00
}
2008-07-12 03:27:31 +04:00
/*
* Initialize jbd inode head
*/
void jbd2_journal_init_jbd_inode ( struct jbd2_inode * jinode , struct inode * inode )
{
jinode - > i_transaction = NULL ;
jinode - > i_next_transaction = NULL ;
jinode - > i_vfs_inode = inode ;
jinode - > i_flags = 0 ;
INIT_LIST_HEAD ( & jinode - > i_list ) ;
}
/*
* Function to be called before we start removing inode from memory ( i . e . ,
* clear_inode ( ) is a fine place to be called from ) . It removes inode from
* transaction ' s lists .
*/
void jbd2_journal_release_jbd_inode ( journal_t * journal ,
struct jbd2_inode * jinode )
{
if ( ! journal )
return ;
restart :
spin_lock ( & journal - > j_list_lock ) ;
/* Is commit writing out inode - we have to wait */
2010-10-28 05:25:12 +04:00
if ( test_bit ( __JI_COMMIT_RUNNING , & jinode - > i_flags ) ) {
2008-07-12 03:27:31 +04:00
wait_queue_head_t * wq ;
DEFINE_WAIT_BIT ( wait , & jinode - > i_flags , __JI_COMMIT_RUNNING ) ;
wq = bit_waitqueue ( & jinode - > i_flags , __JI_COMMIT_RUNNING ) ;
prepare_to_wait ( wq , & wait . wait , TASK_UNINTERRUPTIBLE ) ;
spin_unlock ( & journal - > j_list_lock ) ;
schedule ( ) ;
finish_wait ( wq , & wait . wait ) ;
goto restart ;
}
if ( jinode - > i_transaction ) {
list_del ( & jinode - > i_list ) ;
jinode - > i_transaction = NULL ;
}
spin_unlock ( & journal - > j_list_lock ) ;
}
2006-10-11 12:20:57 +04:00
2008-01-29 07:58:27 +03:00
# ifdef CONFIG_PROC_FS
# define JBD2_STATS_PROC_NAME "fs / jbd2"
static void __init jbd2_create_jbd_stats_proc_entry ( void )
{
proc_jbd2_stats = proc_mkdir ( JBD2_STATS_PROC_NAME , NULL ) ;
}
static void __exit jbd2_remove_jbd_stats_proc_entry ( void )
{
if ( proc_jbd2_stats )
remove_proc_entry ( JBD2_STATS_PROC_NAME , NULL ) ;
}
# else
# define jbd2_create_jbd_stats_proc_entry() do {} while (0)
# define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
# endif
2011-01-10 20:29:43 +03:00
struct kmem_cache * jbd2_handle_cache , * jbd2_inode_cache ;
2006-10-11 12:20:57 +04:00
2012-02-21 02:53:03 +04:00
static int __init jbd2_journal_init_handle_cache ( void )
2006-10-11 12:20:57 +04:00
{
2011-01-10 20:29:43 +03:00
jbd2_handle_cache = KMEM_CACHE ( jbd2_journal_handle , SLAB_TEMPORARY ) ;
2006-10-11 12:20:59 +04:00
if ( jbd2_handle_cache = = NULL ) {
2011-01-10 20:29:43 +03:00
printk ( KERN_EMERG " JBD2: failed to create handle cache \n " ) ;
return - ENOMEM ;
}
jbd2_inode_cache = KMEM_CACHE ( jbd2_inode , 0 ) ;
if ( jbd2_inode_cache = = NULL ) {
printk ( KERN_EMERG " JBD2: failed to create inode cache \n " ) ;
kmem_cache_destroy ( jbd2_handle_cache ) ;
2006-10-11 12:20:57 +04:00
return - ENOMEM ;
}
return 0 ;
}
2006-10-11 12:20:59 +04:00
static void jbd2_journal_destroy_handle_cache ( void )
2006-10-11 12:20:57 +04:00
{
2006-10-11 12:20:59 +04:00
if ( jbd2_handle_cache )
kmem_cache_destroy ( jbd2_handle_cache ) ;
2011-01-10 20:29:43 +03:00
if ( jbd2_inode_cache )
kmem_cache_destroy ( jbd2_inode_cache ) ;
2006-10-11 12:20:57 +04:00
}
/*
* Module startup and shutdown
*/
static int __init journal_init_caches ( void )
{
int ret ;
2006-10-11 12:20:59 +04:00
ret = jbd2_journal_init_revoke_caches ( ) ;
2006-10-11 12:20:57 +04:00
if ( ret = = 0 )
2012-02-21 02:53:03 +04:00
ret = jbd2_journal_init_journal_head_cache ( ) ;
2006-10-11 12:20:57 +04:00
if ( ret = = 0 )
2012-02-21 02:53:03 +04:00
ret = jbd2_journal_init_handle_cache ( ) ;
2006-10-11 12:20:57 +04:00
if ( ret = = 0 )
2012-02-21 02:53:02 +04:00
ret = jbd2_journal_init_transaction_cache ( ) ;
2006-10-11 12:20:57 +04:00
return ret ;
}
2006-10-11 12:20:59 +04:00
static void jbd2_journal_destroy_caches ( void )
2006-10-11 12:20:57 +04:00
{
2006-10-11 12:20:59 +04:00
jbd2_journal_destroy_revoke_caches ( ) ;
2012-02-21 02:53:03 +04:00
jbd2_journal_destroy_journal_head_cache ( ) ;
2006-10-11 12:20:59 +04:00
jbd2_journal_destroy_handle_cache ( ) ;
2012-02-21 02:53:02 +04:00
jbd2_journal_destroy_transaction_cache ( ) ;
2009-12-07 18:36:20 +03:00
jbd2_journal_destroy_slabs ( ) ;
2006-10-11 12:20:57 +04:00
}
static int __init journal_init ( void )
{
int ret ;
BUILD_BUG_ON ( sizeof ( struct journal_superblock_s ) ! = 1024 ) ;
ret = journal_init_caches ( ) ;
2008-04-30 06:02:47 +04:00
if ( ret = = 0 ) {
jbd2_create_jbd_stats_proc_entry ( ) ;
} else {
2006-10-11 12:20:59 +04:00
jbd2_journal_destroy_caches ( ) ;
2008-04-30 06:02:47 +04:00
}
2006-10-11 12:20:57 +04:00
return ret ;
}
static void __exit journal_exit ( void )
{
2007-07-18 16:57:06 +04:00
# ifdef CONFIG_JBD2_DEBUG
2006-10-11 12:20:57 +04:00
int n = atomic_read ( & nr_journal_heads ) ;
if ( n )
2013-12-09 06:13:59 +04:00
printk ( KERN_ERR " JBD2: leaked %d journal_heads! \n " , n ) ;
2006-10-11 12:20:57 +04:00
# endif
2008-01-29 07:58:27 +03:00
jbd2_remove_jbd_stats_proc_entry ( ) ;
2006-10-11 12:20:59 +04:00
jbd2_journal_destroy_caches ( ) ;
2006-10-11 12:20:57 +04:00
}
MODULE_LICENSE ( " GPL " ) ;
module_init ( journal_init ) ;
module_exit ( journal_exit ) ;