2006-10-11 12:20:57 +04:00
/*
2006-10-11 12:20:59 +04:00
* linux / fs / jbd2 / journal . c
2006-10-11 12:20:57 +04:00
*
* Written by Stephen C . Tweedie < sct @ redhat . com > , 1998
*
* Copyright 1998 Red Hat corp - - - All Rights Reserved
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License , version 2 , or at your
* option , any later version , incorporated herein by reference .
*
* Generic filesystem journal - writing code ; part of the ext2fs
* journaling system .
*
* This file manages journals : areas of disk reserved for logging
* transactional updates . This includes the kernel journaling thread
* which is responsible for scheduling updates to the log .
*
* We do not actually manage the physical storage of the journal in this
* file : that is left to a per - journal policy function , which allows us
* to store the journal within a filesystem - specified area for ext2
* journaling ( ext2 can use a reserved inode for storing the log ) .
*/
# include <linux/module.h>
# include <linux/time.h>
# include <linux/fs.h>
2006-10-11 12:20:59 +04:00
# include <linux/jbd2.h>
2006-10-11 12:20:57 +04:00
# include <linux/errno.h>
# include <linux/slab.h>
# include <linux/init.h>
# include <linux/mm.h>
2006-12-07 07:34:23 +03:00
# include <linux/freezer.h>
2006-10-11 12:20:57 +04:00
# include <linux/pagemap.h>
# include <linux/kthread.h>
# include <linux/poison.h>
# include <linux/proc_fs.h>
2007-07-18 16:50:18 +04:00
# include <linux/debugfs.h>
2008-01-29 07:58:27 +03:00
# include <linux/seq_file.h>
2009-01-12 06:34:01 +03:00
# include <linux/math64.h>
2009-06-17 19:47:48 +04:00
# include <linux/hash.h>
# define CREATE_TRACE_POINTS
# include <trace/events/jbd2.h>
2006-10-11 12:20:57 +04:00
# include <asm/uaccess.h>
# include <asm/page.h>
2006-10-11 12:20:59 +04:00
EXPORT_SYMBOL ( jbd2_journal_start ) ;
EXPORT_SYMBOL ( jbd2_journal_restart ) ;
EXPORT_SYMBOL ( jbd2_journal_extend ) ;
EXPORT_SYMBOL ( jbd2_journal_stop ) ;
EXPORT_SYMBOL ( jbd2_journal_lock_updates ) ;
EXPORT_SYMBOL ( jbd2_journal_unlock_updates ) ;
EXPORT_SYMBOL ( jbd2_journal_get_write_access ) ;
EXPORT_SYMBOL ( jbd2_journal_get_create_access ) ;
EXPORT_SYMBOL ( jbd2_journal_get_undo_access ) ;
2008-09-12 02:35:47 +04:00
EXPORT_SYMBOL ( jbd2_journal_set_triggers ) ;
2006-10-11 12:20:59 +04:00
EXPORT_SYMBOL ( jbd2_journal_dirty_metadata ) ;
EXPORT_SYMBOL ( jbd2_journal_release_buffer ) ;
EXPORT_SYMBOL ( jbd2_journal_forget ) ;
2006-10-11 12:20:57 +04:00
#if 0
EXPORT_SYMBOL ( journal_sync_buffer ) ;
# endif
2006-10-11 12:20:59 +04:00
EXPORT_SYMBOL ( jbd2_journal_flush ) ;
EXPORT_SYMBOL ( jbd2_journal_revoke ) ;
EXPORT_SYMBOL ( jbd2_journal_init_dev ) ;
EXPORT_SYMBOL ( jbd2_journal_init_inode ) ;
EXPORT_SYMBOL ( jbd2_journal_update_format ) ;
EXPORT_SYMBOL ( jbd2_journal_check_used_features ) ;
EXPORT_SYMBOL ( jbd2_journal_check_available_features ) ;
EXPORT_SYMBOL ( jbd2_journal_set_features ) ;
EXPORT_SYMBOL ( jbd2_journal_load ) ;
EXPORT_SYMBOL ( jbd2_journal_destroy ) ;
EXPORT_SYMBOL ( jbd2_journal_abort ) ;
EXPORT_SYMBOL ( jbd2_journal_errno ) ;
EXPORT_SYMBOL ( jbd2_journal_ack_err ) ;
EXPORT_SYMBOL ( jbd2_journal_clear_err ) ;
EXPORT_SYMBOL ( jbd2_log_wait_commit ) ;
EXPORT_SYMBOL ( jbd2_journal_start_commit ) ;
EXPORT_SYMBOL ( jbd2_journal_force_commit_nested ) ;
EXPORT_SYMBOL ( jbd2_journal_wipe ) ;
EXPORT_SYMBOL ( jbd2_journal_blocks_per_page ) ;
EXPORT_SYMBOL ( jbd2_journal_invalidatepage ) ;
EXPORT_SYMBOL ( jbd2_journal_try_to_free_buffers ) ;
EXPORT_SYMBOL ( jbd2_journal_force_commit ) ;
2008-07-12 03:27:31 +04:00
EXPORT_SYMBOL ( jbd2_journal_file_inode ) ;
EXPORT_SYMBOL ( jbd2_journal_init_jbd_inode ) ;
EXPORT_SYMBOL ( jbd2_journal_release_jbd_inode ) ;
EXPORT_SYMBOL ( jbd2_journal_begin_ordered_truncate ) ;
2006-10-11 12:20:57 +04:00
static int journal_convert_superblock_v1 ( journal_t * , journal_superblock_t * ) ;
static void __journal_abort_soft ( journal_t * journal , int errno ) ;
/*
* Helper function used to manage commit timeouts
*/
static void commit_timeout ( unsigned long __data )
{
struct task_struct * p = ( struct task_struct * ) __data ;
wake_up_process ( p ) ;
}
/*
2006-10-11 12:20:59 +04:00
* kjournald2 : The main thread function used to manage a logging device
2006-10-11 12:20:57 +04:00
* journal .
*
* This kernel thread is responsible for two things :
*
* 1 ) COMMIT : Every so often we need to commit the current state of the
* filesystem to disk . The journal thread is responsible for writing
* all of the metadata buffers to disk .
*
* 2 ) CHECKPOINT : We cannot reuse a used section of the log file until all
* of the data in that part of the log has been rewritten elsewhere on
* the disk . Flushing these old buffers to reclaim space in the log is
* known as checkpointing , and this thread is responsible for that job .
*/
2006-10-11 12:20:59 +04:00
static int kjournald2 ( void * arg )
2006-10-11 12:20:57 +04:00
{
journal_t * journal = arg ;
transaction_t * transaction ;
/*
* Set up an interval timer which can be used to trigger a commit wakeup
* after the commit interval expires
*/
setup_timer ( & journal - > j_commit_timer , commit_timeout ,
( unsigned long ) current ) ;
/* Record that the journal thread is running */
journal - > j_task = current ;
wake_up ( & journal - > j_wait_done_commit ) ;
2009-01-04 06:56:44 +03:00
printk ( KERN_INFO " kjournald2 starting: pid %d, dev %s, "
" commit interval %ld seconds \n " , current - > pid ,
journal - > j_devname , journal - > j_commit_interval / HZ ) ;
2006-10-11 12:20:57 +04:00
/*
* And now , wait forever for commit wakeup events .
*/
spin_lock ( & journal - > j_state_lock ) ;
loop :
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_UNMOUNT )
2006-10-11 12:20:57 +04:00
goto end_loop ;
jbd_debug ( 1 , " commit_sequence=%d, commit_request=%d \n " ,
journal - > j_commit_sequence , journal - > j_commit_request ) ;
if ( journal - > j_commit_sequence ! = journal - > j_commit_request ) {
jbd_debug ( 1 , " OK, requests differ \n " ) ;
spin_unlock ( & journal - > j_state_lock ) ;
del_timer_sync ( & journal - > j_commit_timer ) ;
2006-10-11 12:20:59 +04:00
jbd2_journal_commit_transaction ( journal ) ;
2006-10-11 12:20:57 +04:00
spin_lock ( & journal - > j_state_lock ) ;
goto loop ;
}
wake_up ( & journal - > j_wait_done_commit ) ;
if ( freezing ( current ) ) {
/*
* The simpler the better . Flushing journal isn ' t a
* good idea , because that depends on threads that may
* be already stopped .
*/
2006-10-11 12:20:59 +04:00
jbd_debug ( 1 , " Now suspending kjournald2 \n " ) ;
2006-10-11 12:20:57 +04:00
spin_unlock ( & journal - > j_state_lock ) ;
refrigerator ( ) ;
spin_lock ( & journal - > j_state_lock ) ;
} else {
/*
* We assume on resume that commits are already there ,
* so we don ' t sleep
*/
DEFINE_WAIT ( wait ) ;
int should_sleep = 1 ;
prepare_to_wait ( & journal - > j_wait_commit , & wait ,
TASK_INTERRUPTIBLE ) ;
if ( journal - > j_commit_sequence ! = journal - > j_commit_request )
should_sleep = 0 ;
transaction = journal - > j_running_transaction ;
if ( transaction & & time_after_eq ( jiffies ,
transaction - > t_expires ) )
should_sleep = 0 ;
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_UNMOUNT )
2006-10-11 12:20:57 +04:00
should_sleep = 0 ;
if ( should_sleep ) {
spin_unlock ( & journal - > j_state_lock ) ;
schedule ( ) ;
spin_lock ( & journal - > j_state_lock ) ;
}
finish_wait ( & journal - > j_wait_commit , & wait ) ;
}
2006-10-11 12:20:59 +04:00
jbd_debug ( 1 , " kjournald2 wakes \n " ) ;
2006-10-11 12:20:57 +04:00
/*
* Were we woken up by a commit wakeup event ?
*/
transaction = journal - > j_running_transaction ;
if ( transaction & & time_after_eq ( jiffies , transaction - > t_expires ) ) {
journal - > j_commit_request = transaction - > t_tid ;
jbd_debug ( 1 , " woke because of timeout \n " ) ;
}
goto loop ;
end_loop :
spin_unlock ( & journal - > j_state_lock ) ;
del_timer_sync ( & journal - > j_commit_timer ) ;
journal - > j_task = NULL ;
wake_up ( & journal - > j_wait_done_commit ) ;
jbd_debug ( 1 , " Journal thread exiting. \n " ) ;
return 0 ;
}
2007-05-08 11:30:42 +04:00
static int jbd2_journal_start_thread ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
2007-05-08 11:30:42 +04:00
struct task_struct * t ;
t = kthread_run ( kjournald2 , journal , " kjournald2 " ) ;
if ( IS_ERR ( t ) )
return PTR_ERR ( t ) ;
2008-03-29 06:07:18 +03:00
wait_event ( journal - > j_wait_done_commit , journal - > j_task ! = NULL ) ;
2007-05-08 11:30:42 +04:00
return 0 ;
2006-10-11 12:20:57 +04:00
}
static void journal_kill_thread ( journal_t * journal )
{
spin_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
journal - > j_flags | = JBD2_UNMOUNT ;
2006-10-11 12:20:57 +04:00
while ( journal - > j_task ) {
wake_up ( & journal - > j_wait_commit ) ;
spin_unlock ( & journal - > j_state_lock ) ;
2008-03-29 06:07:18 +03:00
wait_event ( journal - > j_wait_done_commit , journal - > j_task = = NULL ) ;
2006-10-11 12:20:57 +04:00
spin_lock ( & journal - > j_state_lock ) ;
}
spin_unlock ( & journal - > j_state_lock ) ;
}
/*
2006-10-11 12:20:59 +04:00
* jbd2_journal_write_metadata_buffer : write a metadata buffer to the journal .
2006-10-11 12:20:57 +04:00
*
* Writes a metadata buffer to a given disk block . The actual IO is not
* performed but a new buffer_head is constructed which labels the data
* to be written with the correct destination disk block .
*
* Any magic - number escaping which needs to be done will cause a
* copy - out here . If the buffer happens to start with the
2006-10-11 12:20:59 +04:00
* JBD2_MAGIC_NUMBER , then we can ' t write it to the log directly : the
2006-10-11 12:20:57 +04:00
* magic number is only written to the log for descripter blocks . In
* this case , we copy the data and replace the first word with 0 , and we
* return a result code which indicates that this buffer needs to be
* marked as an escaped buffer in the corresponding log descriptor
* block . The missing word can then be restored when the block is read
* during recovery .
*
* If the source buffer has already been modified by a new transaction
* since we took the last commit snapshot , we use the frozen copy of
* that data for IO . If we end up using the existing buffer_head ' s data
* for the write , then we * have * to lock the buffer to prevent anyone
* else from using and possibly modifying it while the IO is in
* progress .
*
* The function returns a pointer to the buffer_heads to be used for IO .
*
* We assume that the journal has already been locked in this function .
*
* Return value :
* < 0 : Error
* > = 0 : Finished OK
*
* On success :
* Bit 0 set = = escape performed on the data
* Bit 1 set = = buffer copy - out performed ( kfree the data after IO )
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_write_metadata_buffer ( transaction_t * transaction ,
2006-10-11 12:20:57 +04:00
struct journal_head * jh_in ,
struct journal_head * * jh_out ,
2006-10-11 12:21:13 +04:00
unsigned long long blocknr )
2006-10-11 12:20:57 +04:00
{
int need_copy_out = 0 ;
int done_copy_out = 0 ;
int do_escape = 0 ;
char * mapped_data ;
struct buffer_head * new_bh ;
struct journal_head * new_jh ;
struct page * new_page ;
unsigned int new_offset ;
struct buffer_head * bh_in = jh2bh ( jh_in ) ;
2008-09-12 02:35:47 +04:00
struct jbd2_buffer_trigger_type * triggers ;
2009-07-14 01:55:35 +04:00
journal_t * journal = transaction - > t_journal ;
2006-10-11 12:20:57 +04:00
/*
* The buffer really shouldn ' t be locked : only the current committing
* transaction is allowed to write it , so nobody else is allowed
* to do any IO .
*
* akpm : except if we ' re journalling data , and write ( ) output is
* also part of a shared mapping , and another thread has
* decided to launch a writepage ( ) against this buffer .
*/
J_ASSERT_BH ( bh_in , buffer_jbddirty ( bh_in ) ) ;
new_bh = alloc_buffer_head ( GFP_NOFS | __GFP_NOFAIL ) ;
2009-07-14 01:55:35 +04:00
/* keep subsequent assertions sane */
new_bh - > b_state = 0 ;
init_buffer ( new_bh , NULL , NULL ) ;
atomic_set ( & new_bh - > b_count , 1 ) ;
new_jh = jbd2_journal_add_journal_head ( new_bh ) ; /* This sleeps */
2006-10-11 12:20:57 +04:00
/*
* If a new transaction has already done a buffer copy - out , then
* we use that version of the data for the commit .
*/
jbd_lock_bh_state ( bh_in ) ;
repeat :
if ( jh_in - > b_frozen_data ) {
done_copy_out = 1 ;
new_page = virt_to_page ( jh_in - > b_frozen_data ) ;
new_offset = offset_in_page ( jh_in - > b_frozen_data ) ;
2008-09-12 02:35:47 +04:00
triggers = jh_in - > b_frozen_triggers ;
2006-10-11 12:20:57 +04:00
} else {
new_page = jh2bh ( jh_in ) - > b_page ;
new_offset = offset_in_page ( jh2bh ( jh_in ) - > b_data ) ;
2008-09-12 02:35:47 +04:00
triggers = jh_in - > b_triggers ;
2006-10-11 12:20:57 +04:00
}
mapped_data = kmap_atomic ( new_page , KM_USER0 ) ;
2008-09-12 02:35:47 +04:00
/*
* Fire any commit trigger . Do this before checking for escaping ,
* as the trigger may modify the magic offset . If a copy - out
* happens afterwards , it will have the correct data in the buffer .
*/
jbd2_buffer_commit_trigger ( jh_in , mapped_data + new_offset ,
triggers ) ;
2006-10-11 12:20:57 +04:00
/*
* Check for escaping
*/
if ( * ( ( __be32 * ) ( mapped_data + new_offset ) ) = =
2006-10-11 12:20:59 +04:00
cpu_to_be32 ( JBD2_MAGIC_NUMBER ) ) {
2006-10-11 12:20:57 +04:00
need_copy_out = 1 ;
do_escape = 1 ;
}
kunmap_atomic ( mapped_data , KM_USER0 ) ;
/*
* Do we need to do a data copy ?
*/
if ( need_copy_out & & ! done_copy_out ) {
char * tmp ;
jbd_unlock_bh_state ( bh_in ) ;
2007-10-17 02:38:25 +04:00
tmp = jbd2_alloc ( bh_in - > b_size , GFP_NOFS ) ;
2006-10-11 12:20:57 +04:00
jbd_lock_bh_state ( bh_in ) ;
if ( jh_in - > b_frozen_data ) {
2007-10-17 02:38:25 +04:00
jbd2_free ( tmp , bh_in - > b_size ) ;
2006-10-11 12:20:57 +04:00
goto repeat ;
}
jh_in - > b_frozen_data = tmp ;
mapped_data = kmap_atomic ( new_page , KM_USER0 ) ;
memcpy ( tmp , mapped_data + new_offset , jh2bh ( jh_in ) - > b_size ) ;
kunmap_atomic ( mapped_data , KM_USER0 ) ;
new_page = virt_to_page ( tmp ) ;
new_offset = offset_in_page ( tmp ) ;
done_copy_out = 1 ;
2008-09-12 02:35:47 +04:00
/*
* This isn ' t strictly necessary , as we ' re using frozen
* data for the escaping , but it keeps consistency with
* b_frozen_data usage .
*/
jh_in - > b_frozen_triggers = jh_in - > b_triggers ;
2006-10-11 12:20:57 +04:00
}
/*
* Did we need to do an escaping ? Now we ' ve done all the
* copying , we can finally do so .
*/
if ( do_escape ) {
mapped_data = kmap_atomic ( new_page , KM_USER0 ) ;
* ( ( unsigned int * ) ( mapped_data + new_offset ) ) = 0 ;
kunmap_atomic ( mapped_data , KM_USER0 ) ;
}
set_bh_page ( new_bh , new_page , new_offset ) ;
new_jh - > b_transaction = NULL ;
new_bh - > b_size = jh2bh ( jh_in ) - > b_size ;
new_bh - > b_bdev = transaction - > t_journal - > j_dev ;
new_bh - > b_blocknr = blocknr ;
set_buffer_mapped ( new_bh ) ;
set_buffer_dirty ( new_bh ) ;
* jh_out = new_jh ;
/*
* The to - be - written buffer needs to get moved to the io queue ,
* and the original buffer whose contents we are shadowing or
* copying is moved to the transaction ' s shadow queue .
*/
JBUFFER_TRACE ( jh_in , " file as BJ_Shadow " ) ;
2009-07-14 01:55:35 +04:00
spin_lock ( & journal - > j_list_lock ) ;
__jbd2_journal_file_buffer ( jh_in , transaction , BJ_Shadow ) ;
spin_unlock ( & journal - > j_list_lock ) ;
jbd_unlock_bh_state ( bh_in ) ;
2006-10-11 12:20:57 +04:00
JBUFFER_TRACE ( new_jh , " file as BJ_IO " ) ;
2006-10-11 12:20:59 +04:00
jbd2_journal_file_buffer ( new_jh , transaction , BJ_IO ) ;
2006-10-11 12:20:57 +04:00
return do_escape | ( done_copy_out < < 1 ) ;
}
/*
* Allocation code for the journal file . Manage the space left in the
* journal , so that we can begin checkpointing when appropriate .
*/
/*
2006-10-11 12:20:59 +04:00
* __jbd2_log_space_left : Return the number of free blocks left in the journal .
2006-10-11 12:20:57 +04:00
*
* Called with the journal already locked .
*
* Called under j_state_lock
*/
2006-10-11 12:20:59 +04:00
int __jbd2_log_space_left ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int left = journal - > j_free ;
assert_spin_locked ( & journal - > j_state_lock ) ;
/*
* Be pessimistic here about the number of those free blocks which
* might be required for log descriptor control blocks .
*/
# define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */
left - = MIN_LOG_RESERVED_BLOCKS ;
if ( left < = 0 )
return 0 ;
left - = ( left > > 3 ) ;
return left ;
}
/*
2009-02-10 19:27:46 +03:00
* Called under j_state_lock . Returns true if a transaction commit was started .
2006-10-11 12:20:57 +04:00
*/
2006-10-11 12:20:59 +04:00
int __jbd2_log_start_commit ( journal_t * journal , tid_t target )
2006-10-11 12:20:57 +04:00
{
/*
* Are we already doing a recent enough commit ?
*/
if ( ! tid_geq ( journal - > j_commit_request , target ) ) {
/*
* We want a new commit : OK , mark the request and wakup the
* commit thread . We do _not_ do the commit ourselves .
*/
journal - > j_commit_request = target ;
jbd_debug ( 1 , " JBD: requesting commit %d/%d \n " ,
journal - > j_commit_request ,
journal - > j_commit_sequence ) ;
wake_up ( & journal - > j_wait_commit ) ;
return 1 ;
}
return 0 ;
}
2006-10-11 12:20:59 +04:00
int jbd2_log_start_commit ( journal_t * journal , tid_t tid )
2006-10-11 12:20:57 +04:00
{
int ret ;
spin_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
ret = __jbd2_log_start_commit ( journal , tid ) ;
2006-10-11 12:20:57 +04:00
spin_unlock ( & journal - > j_state_lock ) ;
return ret ;
}
/*
* Force and wait upon a commit if the calling process is not within
* transaction . This is used for forcing out undo - protected data which contains
* bitmaps , when the fs is running out of space .
*
* We can only force the running transaction if we don ' t have an active handle ;
* otherwise , we will deadlock .
*
* Returns true if a transaction was started .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_force_commit_nested ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
transaction_t * transaction = NULL ;
tid_t tid ;
spin_lock ( & journal - > j_state_lock ) ;
if ( journal - > j_running_transaction & & ! current - > journal_info ) {
transaction = journal - > j_running_transaction ;
2006-10-11 12:20:59 +04:00
__jbd2_log_start_commit ( journal , transaction - > t_tid ) ;
2006-10-11 12:20:57 +04:00
} else if ( journal - > j_committing_transaction )
transaction = journal - > j_committing_transaction ;
if ( ! transaction ) {
spin_unlock ( & journal - > j_state_lock ) ;
return 0 ; /* Nothing to retry */
}
tid = transaction - > t_tid ;
spin_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
jbd2_log_wait_commit ( journal , tid ) ;
2006-10-11 12:20:57 +04:00
return 1 ;
}
/*
* Start a commit of the current running transaction ( if any ) . Returns true
2009-02-10 19:27:46 +03:00
* if a transaction is going to be committed ( or is currently already
* committing ) , and fills its tid in at * ptid
2006-10-11 12:20:57 +04:00
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_start_commit ( journal_t * journal , tid_t * ptid )
2006-10-11 12:20:57 +04:00
{
int ret = 0 ;
spin_lock ( & journal - > j_state_lock ) ;
if ( journal - > j_running_transaction ) {
tid_t tid = journal - > j_running_transaction - > t_tid ;
2009-02-10 19:27:46 +03:00
__jbd2_log_start_commit ( journal , tid ) ;
/* There's a running transaction and we've just made sure
* it ' s commit has been scheduled . */
if ( ptid )
2006-10-11 12:20:57 +04:00
* ptid = tid ;
2009-02-10 19:27:46 +03:00
ret = 1 ;
} else if ( journal - > j_committing_transaction ) {
2006-10-11 12:20:57 +04:00
/*
* If ext3_write_super ( ) recently started a commit , then we
* have to wait for completion of that transaction
*/
2009-02-10 19:27:46 +03:00
if ( ptid )
* ptid = journal - > j_committing_transaction - > t_tid ;
2006-10-11 12:20:57 +04:00
ret = 1 ;
}
spin_unlock ( & journal - > j_state_lock ) ;
return ret ;
}
/*
* Wait for a specified commit to complete .
* The caller may not hold the journal lock .
*/
2006-10-11 12:20:59 +04:00
int jbd2_log_wait_commit ( journal_t * journal , tid_t tid )
2006-10-11 12:20:57 +04:00
{
int err = 0 ;
2007-07-18 16:57:06 +04:00
# ifdef CONFIG_JBD2_DEBUG
2006-10-11 12:20:57 +04:00
spin_lock ( & journal - > j_state_lock ) ;
if ( ! tid_geq ( journal - > j_commit_request , tid ) ) {
printk ( KERN_EMERG
" %s: error: j_commit_request=%d, tid=%d \n " ,
2008-04-17 18:38:59 +04:00
__func__ , journal - > j_commit_request , tid ) ;
2006-10-11 12:20:57 +04:00
}
spin_unlock ( & journal - > j_state_lock ) ;
# endif
spin_lock ( & journal - > j_state_lock ) ;
while ( tid_gt ( tid , journal - > j_commit_sequence ) ) {
jbd_debug ( 1 , " JBD: want %d, j_commit_sequence=%d \n " ,
tid , journal - > j_commit_sequence ) ;
wake_up ( & journal - > j_wait_commit ) ;
spin_unlock ( & journal - > j_state_lock ) ;
wait_event ( journal - > j_wait_done_commit ,
! tid_gt ( tid , journal - > j_commit_sequence ) ) ;
spin_lock ( & journal - > j_state_lock ) ;
}
spin_unlock ( & journal - > j_state_lock ) ;
if ( unlikely ( is_journal_aborted ( journal ) ) ) {
printk ( KERN_EMERG " journal commit I/O error \n " ) ;
err = - EIO ;
}
return err ;
}
/*
* Log buffer allocation routines :
*/
2006-10-11 12:21:13 +04:00
int jbd2_journal_next_log_block ( journal_t * journal , unsigned long long * retp )
2006-10-11 12:20:57 +04:00
{
unsigned long blocknr ;
spin_lock ( & journal - > j_state_lock ) ;
J_ASSERT ( journal - > j_free > 1 ) ;
blocknr = journal - > j_head ;
journal - > j_head + + ;
journal - > j_free - - ;
if ( journal - > j_head = = journal - > j_last )
journal - > j_head = journal - > j_first ;
spin_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
return jbd2_journal_bmap ( journal , blocknr , retp ) ;
2006-10-11 12:20:57 +04:00
}
/*
* Conversion of logical to physical block numbers for the journal
*
* On external journals the journal blocks are identity - mapped , so
* this is a no - op . If needed , we can use j_blk_offset - everything is
* ready .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_bmap ( journal_t * journal , unsigned long blocknr ,
2006-10-11 12:21:13 +04:00
unsigned long long * retp )
2006-10-11 12:20:57 +04:00
{
int err = 0 ;
2006-10-11 12:21:13 +04:00
unsigned long long ret ;
2006-10-11 12:20:57 +04:00
if ( journal - > j_inode ) {
ret = bmap ( journal - > j_inode , blocknr ) ;
if ( ret )
* retp = ret ;
else {
printk ( KERN_ALERT " %s: journal block not found "
" at offset %lu on %s \n " ,
2008-09-16 22:36:17 +04:00
__func__ , blocknr , journal - > j_devname ) ;
2006-10-11 12:20:57 +04:00
err = - EIO ;
__journal_abort_soft ( journal , err ) ;
}
} else {
* retp = blocknr ; /* +journal->j_blk_offset */
}
return err ;
}
/*
* We play buffer_head aliasing tricks to write data / metadata blocks to
* the journal without copying their contents , but for journal
* descriptor blocks we do need to generate bona fide buffers .
*
2006-10-11 12:20:59 +04:00
* After the caller of jbd2_journal_get_descriptor_buffer ( ) has finished modifying
2006-10-11 12:20:57 +04:00
* the buffer ' s contents they really should run flush_dcache_page ( bh - > b_page ) .
* But we don ' t bother doing that , so there will be coherency problems with
* mmaps of blockdevs which hold live JBD - controlled filesystems .
*/
2006-10-11 12:20:59 +04:00
struct journal_head * jbd2_journal_get_descriptor_buffer ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
struct buffer_head * bh ;
2006-10-11 12:21:13 +04:00
unsigned long long blocknr ;
2006-10-11 12:20:57 +04:00
int err ;
2006-10-11 12:20:59 +04:00
err = jbd2_journal_next_log_block ( journal , & blocknr ) ;
2006-10-11 12:20:57 +04:00
if ( err )
return NULL ;
bh = __getblk ( journal - > j_dev , blocknr , journal - > j_blocksize ) ;
2009-01-06 22:53:35 +03:00
if ( ! bh )
return NULL ;
2006-10-11 12:20:57 +04:00
lock_buffer ( bh ) ;
memset ( bh - > b_data , 0 , journal - > j_blocksize ) ;
set_buffer_uptodate ( bh ) ;
unlock_buffer ( bh ) ;
BUFFER_TRACE ( bh , " return this buffer " ) ;
2006-10-11 12:20:59 +04:00
return jbd2_journal_add_journal_head ( bh ) ;
2006-10-11 12:20:57 +04:00
}
2008-01-29 07:58:27 +03:00
struct jbd2_stats_proc_session {
journal_t * journal ;
struct transaction_stats_s * stats ;
int start ;
int max ;
} ;
static void * jbd2_history_skip_empty ( struct jbd2_stats_proc_session * s ,
struct transaction_stats_s * ts ,
int first )
{
if ( ts = = s - > stats + s - > max )
ts = s - > stats ;
if ( ! first & & ts = = s - > stats + s - > start )
return NULL ;
while ( ts - > ts_type = = 0 ) {
ts + + ;
if ( ts = = s - > stats + s - > max )
ts = s - > stats ;
if ( ts = = s - > stats + s - > start )
return NULL ;
}
return ts ;
}
static void * jbd2_seq_history_start ( struct seq_file * seq , loff_t * pos )
{
struct jbd2_stats_proc_session * s = seq - > private ;
struct transaction_stats_s * ts ;
int l = * pos ;
if ( l = = 0 )
return SEQ_START_TOKEN ;
ts = jbd2_history_skip_empty ( s , s - > stats + s - > start , 1 ) ;
if ( ! ts )
return NULL ;
l - - ;
while ( l ) {
ts = jbd2_history_skip_empty ( s , + + ts , 0 ) ;
if ( ! ts )
break ;
l - - ;
}
return ts ;
}
static void * jbd2_seq_history_next ( struct seq_file * seq , void * v , loff_t * pos )
{
struct jbd2_stats_proc_session * s = seq - > private ;
struct transaction_stats_s * ts = v ;
+ + * pos ;
if ( v = = SEQ_START_TOKEN )
return jbd2_history_skip_empty ( s , s - > stats + s - > start , 1 ) ;
else
return jbd2_history_skip_empty ( s , + + ts , 0 ) ;
}
static int jbd2_seq_history_show ( struct seq_file * seq , void * v )
{
struct transaction_stats_s * ts = v ;
if ( v = = SEQ_START_TOKEN ) {
seq_printf ( seq , " %-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
" %-5s %-5s %-5s %-5s %-5s \n " , " R/C " , " tid " ,
" wait " , " run " , " lock " , " flush " , " log " , " hndls " ,
" block " , " inlog " , " ctime " , " write " , " drop " ,
" close " ) ;
return 0 ;
}
if ( ts - > ts_type = = JBD2_STATS_RUN )
seq_printf ( seq , " %-4s %-5lu %-5u %-5u %-5u %-5u %-5u "
" %-6lu %-5lu %-5lu \n " , " R " , ts - > ts_tid ,
jiffies_to_msecs ( ts - > u . run . rs_wait ) ,
jiffies_to_msecs ( ts - > u . run . rs_running ) ,
jiffies_to_msecs ( ts - > u . run . rs_locked ) ,
jiffies_to_msecs ( ts - > u . run . rs_flushing ) ,
jiffies_to_msecs ( ts - > u . run . rs_logging ) ,
ts - > u . run . rs_handle_count ,
ts - > u . run . rs_blocks ,
ts - > u . run . rs_blocks_logged ) ;
else if ( ts - > ts_type = = JBD2_STATS_CHECKPOINT )
seq_printf ( seq , " %-4s %-5lu %48s %-5u %-5lu %-5lu %-5lu \n " ,
" C " , ts - > ts_tid , " " ,
jiffies_to_msecs ( ts - > u . chp . cs_chp_time ) ,
ts - > u . chp . cs_written , ts - > u . chp . cs_dropped ,
ts - > u . chp . cs_forced_to_close ) ;
else
J_ASSERT ( 0 ) ;
return 0 ;
}
static void jbd2_seq_history_stop ( struct seq_file * seq , void * v )
{
}
static struct seq_operations jbd2_seq_history_ops = {
. start = jbd2_seq_history_start ,
. next = jbd2_seq_history_next ,
. stop = jbd2_seq_history_stop ,
. show = jbd2_seq_history_show ,
} ;
static int jbd2_seq_history_open ( struct inode * inode , struct file * file )
{
journal_t * journal = PDE ( inode ) - > data ;
struct jbd2_stats_proc_session * s ;
int rc , size ;
s = kmalloc ( sizeof ( * s ) , GFP_KERNEL ) ;
if ( s = = NULL )
return - ENOMEM ;
size = sizeof ( struct transaction_stats_s ) * journal - > j_history_max ;
s - > stats = kmalloc ( size , GFP_KERNEL ) ;
if ( s - > stats = = NULL ) {
kfree ( s ) ;
return - ENOMEM ;
}
spin_lock ( & journal - > j_history_lock ) ;
memcpy ( s - > stats , journal - > j_history , size ) ;
s - > max = journal - > j_history_max ;
s - > start = journal - > j_history_cur % s - > max ;
spin_unlock ( & journal - > j_history_lock ) ;
rc = seq_open ( file , & jbd2_seq_history_ops ) ;
if ( rc = = 0 ) {
struct seq_file * m = file - > private_data ;
m - > private = s ;
} else {
kfree ( s - > stats ) ;
kfree ( s ) ;
}
return rc ;
}
static int jbd2_seq_history_release ( struct inode * inode , struct file * file )
{
struct seq_file * seq = file - > private_data ;
struct jbd2_stats_proc_session * s = seq - > private ;
kfree ( s - > stats ) ;
kfree ( s ) ;
return seq_release ( inode , file ) ;
}
static struct file_operations jbd2_seq_history_fops = {
. owner = THIS_MODULE ,
. open = jbd2_seq_history_open ,
. read = seq_read ,
. llseek = seq_lseek ,
. release = jbd2_seq_history_release ,
} ;
static void * jbd2_seq_info_start ( struct seq_file * seq , loff_t * pos )
{
return * pos ? NULL : SEQ_START_TOKEN ;
}
static void * jbd2_seq_info_next ( struct seq_file * seq , void * v , loff_t * pos )
{
return NULL ;
}
static int jbd2_seq_info_show ( struct seq_file * seq , void * v )
{
struct jbd2_stats_proc_session * s = seq - > private ;
if ( v ! = SEQ_START_TOKEN )
return 0 ;
seq_printf ( seq , " %lu transaction, each upto %u blocks \n " ,
s - > stats - > ts_tid ,
s - > journal - > j_max_transaction_buffers ) ;
if ( s - > stats - > ts_tid = = 0 )
return 0 ;
seq_printf ( seq , " average: \n %ums waiting for transaction \n " ,
jiffies_to_msecs ( s - > stats - > u . run . rs_wait / s - > stats - > ts_tid ) ) ;
seq_printf ( seq , " %ums running transaction \n " ,
jiffies_to_msecs ( s - > stats - > u . run . rs_running / s - > stats - > ts_tid ) ) ;
seq_printf ( seq , " %ums transaction was being locked \n " ,
jiffies_to_msecs ( s - > stats - > u . run . rs_locked / s - > stats - > ts_tid ) ) ;
seq_printf ( seq , " %ums flushing data (in ordered mode) \n " ,
jiffies_to_msecs ( s - > stats - > u . run . rs_flushing / s - > stats - > ts_tid ) ) ;
seq_printf ( seq , " %ums logging transaction \n " ,
jiffies_to_msecs ( s - > stats - > u . run . rs_logging / s - > stats - > ts_tid ) ) ;
2009-01-12 06:34:01 +03:00
seq_printf ( seq , " %lluus average transaction commit time \n " ,
div_u64 ( s - > journal - > j_average_commit_time , 1000 ) ) ;
2008-01-29 07:58:27 +03:00
seq_printf ( seq , " %lu handles per transaction \n " ,
s - > stats - > u . run . rs_handle_count / s - > stats - > ts_tid ) ;
seq_printf ( seq , " %lu blocks per transaction \n " ,
s - > stats - > u . run . rs_blocks / s - > stats - > ts_tid ) ;
seq_printf ( seq , " %lu logged blocks per transaction \n " ,
s - > stats - > u . run . rs_blocks_logged / s - > stats - > ts_tid ) ;
return 0 ;
}
static void jbd2_seq_info_stop ( struct seq_file * seq , void * v )
{
}
static struct seq_operations jbd2_seq_info_ops = {
. start = jbd2_seq_info_start ,
. next = jbd2_seq_info_next ,
. stop = jbd2_seq_info_stop ,
. show = jbd2_seq_info_show ,
} ;
static int jbd2_seq_info_open ( struct inode * inode , struct file * file )
{
journal_t * journal = PDE ( inode ) - > data ;
struct jbd2_stats_proc_session * s ;
int rc , size ;
s = kmalloc ( sizeof ( * s ) , GFP_KERNEL ) ;
if ( s = = NULL )
return - ENOMEM ;
size = sizeof ( struct transaction_stats_s ) ;
s - > stats = kmalloc ( size , GFP_KERNEL ) ;
if ( s - > stats = = NULL ) {
kfree ( s ) ;
return - ENOMEM ;
}
spin_lock ( & journal - > j_history_lock ) ;
memcpy ( s - > stats , & journal - > j_stats , size ) ;
s - > journal = journal ;
spin_unlock ( & journal - > j_history_lock ) ;
rc = seq_open ( file , & jbd2_seq_info_ops ) ;
if ( rc = = 0 ) {
struct seq_file * m = file - > private_data ;
m - > private = s ;
} else {
kfree ( s - > stats ) ;
kfree ( s ) ;
}
return rc ;
}
static int jbd2_seq_info_release ( struct inode * inode , struct file * file )
{
struct seq_file * seq = file - > private_data ;
struct jbd2_stats_proc_session * s = seq - > private ;
kfree ( s - > stats ) ;
kfree ( s ) ;
return seq_release ( inode , file ) ;
}
static struct file_operations jbd2_seq_info_fops = {
. owner = THIS_MODULE ,
. open = jbd2_seq_info_open ,
. read = seq_read ,
. llseek = seq_lseek ,
. release = jbd2_seq_info_release ,
} ;
static struct proc_dir_entry * proc_jbd2_stats ;
static void jbd2_stats_proc_init ( journal_t * journal )
{
2008-09-16 22:36:17 +04:00
journal - > j_proc_entry = proc_mkdir ( journal - > j_devname , proc_jbd2_stats ) ;
2008-01-29 07:58:27 +03:00
if ( journal - > j_proc_entry ) {
2008-04-29 12:02:11 +04:00
proc_create_data ( " history " , S_IRUGO , journal - > j_proc_entry ,
& jbd2_seq_history_fops , journal ) ;
proc_create_data ( " info " , S_IRUGO , journal - > j_proc_entry ,
& jbd2_seq_info_fops , journal ) ;
2008-01-29 07:58:27 +03:00
}
}
static void jbd2_stats_proc_exit ( journal_t * journal )
{
remove_proc_entry ( " info " , journal - > j_proc_entry ) ;
remove_proc_entry ( " history " , journal - > j_proc_entry ) ;
2008-09-16 22:36:17 +04:00
remove_proc_entry ( journal - > j_devname , proc_jbd2_stats ) ;
2008-01-29 07:58:27 +03:00
}
static void journal_init_stats ( journal_t * journal )
{
int size ;
if ( ! proc_jbd2_stats )
return ;
journal - > j_history_max = 100 ;
size = sizeof ( struct transaction_stats_s ) * journal - > j_history_max ;
journal - > j_history = kzalloc ( size , GFP_KERNEL ) ;
if ( ! journal - > j_history ) {
journal - > j_history_max = 0 ;
return ;
}
spin_lock_init ( & journal - > j_history_lock ) ;
}
2006-10-11 12:20:57 +04:00
/*
* Management for journal control blocks : functions to create and
* destroy journal_t structures , and to initialise and read existing
* journal blocks from disk . */
/* First: create and setup a journal_t object in memory. We initialise
* very few fields yet : that has to wait until we have created the
* journal structures from from scratch , or loaded them from disk . */
static journal_t * journal_init_common ( void )
{
journal_t * journal ;
int err ;
2007-10-17 02:38:25 +04:00
journal = kzalloc ( sizeof ( * journal ) , GFP_KERNEL | __GFP_NOFAIL ) ;
2006-10-11 12:20:57 +04:00
if ( ! journal )
goto fail ;
init_waitqueue_head ( & journal - > j_wait_transaction_locked ) ;
init_waitqueue_head ( & journal - > j_wait_logspace ) ;
init_waitqueue_head ( & journal - > j_wait_done_commit ) ;
init_waitqueue_head ( & journal - > j_wait_checkpoint ) ;
init_waitqueue_head ( & journal - > j_wait_commit ) ;
init_waitqueue_head ( & journal - > j_wait_updates ) ;
mutex_init ( & journal - > j_barrier ) ;
mutex_init ( & journal - > j_checkpoint_mutex ) ;
spin_lock_init ( & journal - > j_revoke_lock ) ;
spin_lock_init ( & journal - > j_list_lock ) ;
spin_lock_init ( & journal - > j_state_lock ) ;
2007-10-17 02:38:25 +04:00
journal - > j_commit_interval = ( HZ * JBD2_DEFAULT_MAX_COMMIT_AGE ) ;
2009-01-04 04:27:38 +03:00
journal - > j_min_batch_time = 0 ;
journal - > j_max_batch_time = 15000 ; /* 15ms */
2006-10-11 12:20:57 +04:00
/* The journal is marked for error until we succeed with recovery! */
2006-10-11 12:20:59 +04:00
journal - > j_flags = JBD2_ABORT ;
2006-10-11 12:20:57 +04:00
/* Set up a default-sized revoke table for the new mount. */
2006-10-11 12:20:59 +04:00
err = jbd2_journal_init_revoke ( journal , JOURNAL_REVOKE_DEFAULT_HASH ) ;
2006-10-11 12:20:57 +04:00
if ( err ) {
kfree ( journal ) ;
goto fail ;
}
2008-01-29 07:58:27 +03:00
journal_init_stats ( journal ) ;
2006-10-11 12:20:57 +04:00
return journal ;
fail :
return NULL ;
}
2006-10-11 12:20:59 +04:00
/* jbd2_journal_init_dev and jbd2_journal_init_inode:
2006-10-11 12:20:57 +04:00
*
* Create a journal structure assigned some fixed set of disk blocks to
* the journal . We don ' t actually touch those disk blocks yet , but we
* need to set up all of the mapping information to tell the journaling
* system where the journal blocks are .
*
*/
/**
2008-04-17 18:38:59 +04:00
* journal_t * jbd2_journal_init_dev ( ) - creates and initialises a journal structure
2006-10-11 12:20:57 +04:00
* @ bdev : Block device on which to create the journal
* @ fs_dev : Device which hold journalled filesystem for this journal .
* @ start : Block nr Start of journal .
* @ len : Length of the journal in blocks .
* @ blocksize : blocksize of journalling device
2008-04-17 18:38:59 +04:00
*
* Returns : a newly created journal_t *
2006-10-11 12:20:57 +04:00
*
2006-10-11 12:20:59 +04:00
* jbd2_journal_init_dev creates a journal which maps a fixed contiguous
2006-10-11 12:20:57 +04:00
* range of blocks on an arbitrary block device .
*
*/
2006-10-11 12:20:59 +04:00
journal_t * jbd2_journal_init_dev ( struct block_device * bdev ,
2006-10-11 12:20:57 +04:00
struct block_device * fs_dev ,
2006-10-11 12:21:13 +04:00
unsigned long long start , int len , int blocksize )
2006-10-11 12:20:57 +04:00
{
journal_t * journal = journal_init_common ( ) ;
struct buffer_head * bh ;
2008-09-16 22:36:17 +04:00
char * p ;
2006-10-11 12:20:57 +04:00
int n ;
if ( ! journal )
return NULL ;
/* journal descriptor can store up to n blocks -bzzz */
journal - > j_blocksize = blocksize ;
2009-01-06 22:53:35 +03:00
jbd2_stats_proc_init ( journal ) ;
2006-10-11 12:20:57 +04:00
n = journal - > j_blocksize / sizeof ( journal_block_tag_t ) ;
journal - > j_wbufsize = n ;
journal - > j_wbuf = kmalloc ( n * sizeof ( struct buffer_head * ) , GFP_KERNEL ) ;
if ( ! journal - > j_wbuf ) {
printk ( KERN_ERR " %s: Cant allocate bhs for commit thread \n " ,
2008-04-17 18:38:59 +04:00
__func__ ) ;
2009-01-06 22:53:35 +03:00
goto out_err ;
2006-10-11 12:20:57 +04:00
}
journal - > j_dev = bdev ;
journal - > j_fs_dev = fs_dev ;
journal - > j_blk_offset = start ;
journal - > j_maxlen = len ;
2008-09-16 22:36:17 +04:00
bdevname ( journal - > j_dev , journal - > j_devname ) ;
p = journal - > j_devname ;
while ( ( p = strchr ( p , ' / ' ) ) )
* p = ' ! ' ;
2006-10-11 12:20:57 +04:00
bh = __getblk ( journal - > j_dev , start , journal - > j_blocksize ) ;
2009-01-06 22:53:35 +03:00
if ( ! bh ) {
printk ( KERN_ERR
" %s: Cannot get buffer for journal superblock \n " ,
__func__ ) ;
goto out_err ;
}
2006-10-11 12:20:57 +04:00
journal - > j_sb_buffer = bh ;
journal - > j_superblock = ( journal_superblock_t * ) bh - > b_data ;
2009-01-06 22:53:35 +03:00
2006-10-11 12:20:57 +04:00
return journal ;
2009-01-06 22:53:35 +03:00
out_err :
jbd2_stats_proc_exit ( journal ) ;
kfree ( journal ) ;
return NULL ;
2006-10-11 12:20:57 +04:00
}
/**
2006-10-11 12:20:59 +04:00
* journal_t * jbd2_journal_init_inode ( ) - creates a journal which maps to a inode .
2006-10-11 12:20:57 +04:00
* @ inode : An inode to create the journal in
*
2006-10-11 12:20:59 +04:00
* jbd2_journal_init_inode creates a journal which maps an on - disk inode as
2006-10-11 12:20:57 +04:00
* the journal . The inode must exist already , must support bmap ( ) and
* must have all data blocks preallocated .
*/
2006-10-11 12:20:59 +04:00
journal_t * jbd2_journal_init_inode ( struct inode * inode )
2006-10-11 12:20:57 +04:00
{
struct buffer_head * bh ;
journal_t * journal = journal_init_common ( ) ;
2008-09-16 22:36:17 +04:00
char * p ;
2006-10-11 12:20:57 +04:00
int err ;
int n ;
2006-10-11 12:21:13 +04:00
unsigned long long blocknr ;
2006-10-11 12:20:57 +04:00
if ( ! journal )
return NULL ;
journal - > j_dev = journal - > j_fs_dev = inode - > i_sb - > s_bdev ;
journal - > j_inode = inode ;
2008-09-16 22:36:17 +04:00
bdevname ( journal - > j_dev , journal - > j_devname ) ;
p = journal - > j_devname ;
while ( ( p = strchr ( p , ' / ' ) ) )
* p = ' ! ' ;
p = journal - > j_devname + strlen ( journal - > j_devname ) ;
sprintf ( p , " :%lu " , journal - > j_inode - > i_ino ) ;
2006-10-11 12:20:57 +04:00
jbd_debug ( 1 ,
" journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld \n " ,
journal , inode - > i_sb - > s_id , inode - > i_ino ,
( long long ) inode - > i_size ,
inode - > i_sb - > s_blocksize_bits , inode - > i_sb - > s_blocksize ) ;
journal - > j_maxlen = inode - > i_size > > inode - > i_sb - > s_blocksize_bits ;
journal - > j_blocksize = inode - > i_sb - > s_blocksize ;
2008-01-29 07:58:27 +03:00
jbd2_stats_proc_init ( journal ) ;
2006-10-11 12:20:57 +04:00
/* journal descriptor can store up to n blocks -bzzz */
n = journal - > j_blocksize / sizeof ( journal_block_tag_t ) ;
journal - > j_wbufsize = n ;
journal - > j_wbuf = kmalloc ( n * sizeof ( struct buffer_head * ) , GFP_KERNEL ) ;
if ( ! journal - > j_wbuf ) {
printk ( KERN_ERR " %s: Cant allocate bhs for commit thread \n " ,
2008-04-17 18:38:59 +04:00
__func__ ) ;
2009-01-06 22:53:35 +03:00
goto out_err ;
2006-10-11 12:20:57 +04:00
}
2006-10-11 12:20:59 +04:00
err = jbd2_journal_bmap ( journal , 0 , & blocknr ) ;
2006-10-11 12:20:57 +04:00
/* If that failed, give up */
if ( err ) {
printk ( KERN_ERR " %s: Cannnot locate journal superblock \n " ,
2008-04-17 18:38:59 +04:00
__func__ ) ;
2009-01-06 22:53:35 +03:00
goto out_err ;
2006-10-11 12:20:57 +04:00
}
bh = __getblk ( journal - > j_dev , blocknr , journal - > j_blocksize ) ;
2009-01-06 22:53:35 +03:00
if ( ! bh ) {
printk ( KERN_ERR
" %s: Cannot get buffer for journal superblock \n " ,
__func__ ) ;
goto out_err ;
}
2006-10-11 12:20:57 +04:00
journal - > j_sb_buffer = bh ;
journal - > j_superblock = ( journal_superblock_t * ) bh - > b_data ;
return journal ;
2009-01-06 22:53:35 +03:00
out_err :
jbd2_stats_proc_exit ( journal ) ;
kfree ( journal ) ;
return NULL ;
2006-10-11 12:20:57 +04:00
}
/*
* If the journal init or create aborts , we need to mark the journal
* superblock as being NULL to prevent the journal destroy from writing
* back a bogus superblock .
*/
static void journal_fail_superblock ( journal_t * journal )
{
struct buffer_head * bh = journal - > j_sb_buffer ;
brelse ( bh ) ;
journal - > j_sb_buffer = NULL ;
}
/*
* Given a journal_t structure , initialise the various fields for
* startup of a new journaling session . We use this both when creating
* a journal , and after recovering an old journal to reset it for
* subsequent use .
*/
static int journal_reset ( journal_t * journal )
{
journal_superblock_t * sb = journal - > j_superblock ;
2006-10-11 12:21:13 +04:00
unsigned long long first , last ;
2006-10-11 12:20:57 +04:00
first = be32_to_cpu ( sb - > s_first ) ;
last = be32_to_cpu ( sb - > s_maxlen ) ;
journal - > j_first = first ;
journal - > j_last = last ;
journal - > j_head = first ;
journal - > j_tail = first ;
journal - > j_free = last - first ;
journal - > j_tail_sequence = journal - > j_transaction_sequence ;
journal - > j_commit_sequence = journal - > j_transaction_sequence - 1 ;
journal - > j_commit_request = journal - > j_commit_sequence ;
journal - > j_max_transaction_buffers = journal - > j_maxlen / 4 ;
/* Add the dynamic fields and write it to disk. */
2006-10-11 12:20:59 +04:00
jbd2_journal_update_superblock ( journal , 1 ) ;
2007-05-08 11:30:42 +04:00
return jbd2_journal_start_thread ( journal ) ;
2006-10-11 12:20:57 +04:00
}
/**
2006-10-11 12:20:59 +04:00
* void jbd2_journal_update_superblock ( ) - Update journal sb on disk .
2006-10-11 12:20:57 +04:00
* @ journal : The journal to update .
* @ wait : Set to ' 0 ' if you don ' t want to wait for IO completion .
*
* Update a journal ' s dynamic superblock fields and write it to disk ,
* optionally waiting for the IO to complete .
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_update_superblock ( journal_t * journal , int wait )
2006-10-11 12:20:57 +04:00
{
journal_superblock_t * sb = journal - > j_superblock ;
struct buffer_head * bh = journal - > j_sb_buffer ;
/*
* As a special case , if the on - disk copy is already marked as needing
* no recovery ( s_start = = 0 ) and there are no outstanding transactions
* in the filesystem , then we can safely defer the superblock update
2006-10-11 12:20:59 +04:00
* until the next commit by setting JBD2_FLUSHED . This avoids
2006-10-11 12:20:57 +04:00
* attempting a write to a potential - readonly device .
*/
if ( sb - > s_start = = 0 & & journal - > j_tail_sequence = =
journal - > j_transaction_sequence ) {
jbd_debug ( 1 , " JBD: Skipping superblock update on recovered sb "
" (start %ld, seq %d, errno %d) \n " ,
journal - > j_tail , journal - > j_tail_sequence ,
journal - > j_errno ) ;
goto out ;
}
2008-10-07 05:35:40 +04:00
if ( buffer_write_io_error ( bh ) ) {
/*
* Oh , dear . A previous attempt to write the journal
* superblock failed . This could happen because the
* USB device was yanked out . Or it could happen to
* be a transient write error and maybe the block will
* be remapped . Nothing we can do but to retry the
* write and hope for the best .
*/
printk ( KERN_ERR " JBD2: previous I/O error detected "
" for journal superblock update for %s. \n " ,
journal - > j_devname ) ;
clear_buffer_write_io_error ( bh ) ;
set_buffer_uptodate ( bh ) ;
}
2006-10-11 12:20:57 +04:00
spin_lock ( & journal - > j_state_lock ) ;
jbd_debug ( 1 , " JBD: updating superblock (start %ld, seq %d, errno %d) \n " ,
journal - > j_tail , journal - > j_tail_sequence , journal - > j_errno ) ;
sb - > s_sequence = cpu_to_be32 ( journal - > j_tail_sequence ) ;
sb - > s_start = cpu_to_be32 ( journal - > j_tail ) ;
sb - > s_errno = cpu_to_be32 ( journal - > j_errno ) ;
spin_unlock ( & journal - > j_state_lock ) ;
BUFFER_TRACE ( bh , " marking dirty " ) ;
mark_buffer_dirty ( bh ) ;
2008-10-07 05:35:40 +04:00
if ( wait ) {
2006-10-11 12:20:57 +04:00
sync_dirty_buffer ( bh ) ;
2008-10-07 05:35:40 +04:00
if ( buffer_write_io_error ( bh ) ) {
printk ( KERN_ERR " JBD2: I/O error detected "
" when updating journal superblock for %s. \n " ,
journal - > j_devname ) ;
clear_buffer_write_io_error ( bh ) ;
set_buffer_uptodate ( bh ) ;
}
} else
2006-10-11 12:20:57 +04:00
ll_rw_block ( SWRITE , 1 , & bh ) ;
out :
/* If we have just flushed the log (by marking s_start==0), then
* any future commit will have to be careful to update the
* superblock again to re - record the true start of the log . */
spin_lock ( & journal - > j_state_lock ) ;
if ( sb - > s_start )
2006-10-11 12:20:59 +04:00
journal - > j_flags & = ~ JBD2_FLUSHED ;
2006-10-11 12:20:57 +04:00
else
2006-10-11 12:20:59 +04:00
journal - > j_flags | = JBD2_FLUSHED ;
2006-10-11 12:20:57 +04:00
spin_unlock ( & journal - > j_state_lock ) ;
}
/*
* Read the superblock for a given journal , performing initial
* validation of the format .
*/
static int journal_get_superblock ( journal_t * journal )
{
struct buffer_head * bh ;
journal_superblock_t * sb ;
int err = - EIO ;
bh = journal - > j_sb_buffer ;
J_ASSERT ( bh ! = NULL ) ;
if ( ! buffer_uptodate ( bh ) ) {
ll_rw_block ( READ , 1 , & bh ) ;
wait_on_buffer ( bh ) ;
if ( ! buffer_uptodate ( bh ) ) {
printk ( KERN_ERR
" JBD: IO error reading journal superblock \n " ) ;
goto out ;
}
}
sb = journal - > j_superblock ;
err = - EINVAL ;
2006-10-11 12:20:59 +04:00
if ( sb - > s_header . h_magic ! = cpu_to_be32 ( JBD2_MAGIC_NUMBER ) | |
2006-10-11 12:20:57 +04:00
sb - > s_blocksize ! = cpu_to_be32 ( journal - > j_blocksize ) ) {
printk ( KERN_WARNING " JBD: no valid journal superblock found \n " ) ;
goto out ;
}
switch ( be32_to_cpu ( sb - > s_header . h_blocktype ) ) {
2006-10-11 12:20:59 +04:00
case JBD2_SUPERBLOCK_V1 :
2006-10-11 12:20:57 +04:00
journal - > j_format_version = 1 ;
break ;
2006-10-11 12:20:59 +04:00
case JBD2_SUPERBLOCK_V2 :
2006-10-11 12:20:57 +04:00
journal - > j_format_version = 2 ;
break ;
default :
printk ( KERN_WARNING " JBD: unrecognised superblock format ID \n " ) ;
goto out ;
}
if ( be32_to_cpu ( sb - > s_maxlen ) < journal - > j_maxlen )
journal - > j_maxlen = be32_to_cpu ( sb - > s_maxlen ) ;
else if ( be32_to_cpu ( sb - > s_maxlen ) > journal - > j_maxlen ) {
printk ( KERN_WARNING " JBD: journal file too short \n " ) ;
goto out ;
}
return 0 ;
out :
journal_fail_superblock ( journal ) ;
return err ;
}
/*
* Load the on - disk journal superblock and read the key fields into the
* journal_t .
*/
static int load_superblock ( journal_t * journal )
{
int err ;
journal_superblock_t * sb ;
err = journal_get_superblock ( journal ) ;
if ( err )
return err ;
sb = journal - > j_superblock ;
journal - > j_tail_sequence = be32_to_cpu ( sb - > s_sequence ) ;
journal - > j_tail = be32_to_cpu ( sb - > s_start ) ;
journal - > j_first = be32_to_cpu ( sb - > s_first ) ;
journal - > j_last = be32_to_cpu ( sb - > s_maxlen ) ;
journal - > j_errno = be32_to_cpu ( sb - > s_errno ) ;
return 0 ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_load ( ) - Read journal from disk .
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
*
* Given a journal_t structure which tells us which disk blocks contain
* a journal , read the journal from disk to initialise the in - memory
* structures .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_load ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int err ;
journal_superblock_t * sb ;
err = load_superblock ( journal ) ;
if ( err )
return err ;
sb = journal - > j_superblock ;
/* If this is a V2 superblock, then we have to check the
* features flags on it . */
if ( journal - > j_format_version > = 2 ) {
if ( ( sb - > s_feature_ro_compat &
2006-10-11 12:20:59 +04:00
~ cpu_to_be32 ( JBD2_KNOWN_ROCOMPAT_FEATURES ) ) | |
2006-10-11 12:20:57 +04:00
( sb - > s_feature_incompat &
2006-10-11 12:20:59 +04:00
~ cpu_to_be32 ( JBD2_KNOWN_INCOMPAT_FEATURES ) ) ) {
2006-10-11 12:20:57 +04:00
printk ( KERN_WARNING
" JBD: Unrecognised features on journal \n " ) ;
return - EINVAL ;
}
}
/* Let the recovery code check whether it needs to recover any
* data from the journal . */
2006-10-11 12:20:59 +04:00
if ( jbd2_journal_recover ( journal ) )
2006-10-11 12:20:57 +04:00
goto recovery_error ;
/* OK, we've finished with the dynamic journal bits:
* reinitialise the dynamic contents of the superblock in memory
* and reset them on disk . */
if ( journal_reset ( journal ) )
goto recovery_error ;
2006-10-11 12:20:59 +04:00
journal - > j_flags & = ~ JBD2_ABORT ;
journal - > j_flags | = JBD2_LOADED ;
2006-10-11 12:20:57 +04:00
return 0 ;
recovery_error :
printk ( KERN_WARNING " JBD: recovery failed \n " ) ;
return - EIO ;
}
/**
2006-10-11 12:20:59 +04:00
* void jbd2_journal_destroy ( ) - Release a journal_t structure .
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
*
* Release a journal_t structure once it is no longer in use by the
* journaled object .
2008-10-11 04:29:13 +04:00
* Return < 0 if we couldn ' t clean up the journal .
2006-10-11 12:20:57 +04:00
*/
2008-10-11 04:29:13 +04:00
int jbd2_journal_destroy ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
2008-10-11 04:29:13 +04:00
int err = 0 ;
2006-10-11 12:20:57 +04:00
/* Wait for the commit thread to wake up and die. */
journal_kill_thread ( journal ) ;
/* Force a final log commit */
if ( journal - > j_running_transaction )
2006-10-11 12:20:59 +04:00
jbd2_journal_commit_transaction ( journal ) ;
2006-10-11 12:20:57 +04:00
/* Force any old transactions to disk */
/* Totally anal locking here... */
spin_lock ( & journal - > j_list_lock ) ;
while ( journal - > j_checkpoint_transactions ! = NULL ) {
spin_unlock ( & journal - > j_list_lock ) ;
2008-11-05 08:09:22 +03:00
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 12:20:59 +04:00
jbd2_log_do_checkpoint ( journal ) ;
2008-11-05 08:09:22 +03:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 12:20:57 +04:00
spin_lock ( & journal - > j_list_lock ) ;
}
J_ASSERT ( journal - > j_running_transaction = = NULL ) ;
J_ASSERT ( journal - > j_committing_transaction = = NULL ) ;
J_ASSERT ( journal - > j_checkpoint_transactions = = NULL ) ;
spin_unlock ( & journal - > j_list_lock ) ;
if ( journal - > j_sb_buffer ) {
2008-10-11 04:29:13 +04:00
if ( ! is_journal_aborted ( journal ) ) {
/* We can now mark the journal as empty. */
journal - > j_tail = 0 ;
journal - > j_tail_sequence =
+ + journal - > j_transaction_sequence ;
jbd2_journal_update_superblock ( journal , 1 ) ;
} else {
err = - EIO ;
}
2006-10-11 12:20:57 +04:00
brelse ( journal - > j_sb_buffer ) ;
}
2008-01-29 07:58:27 +03:00
if ( journal - > j_proc_entry )
jbd2_stats_proc_exit ( journal ) ;
2006-10-11 12:20:57 +04:00
if ( journal - > j_inode )
iput ( journal - > j_inode ) ;
if ( journal - > j_revoke )
2006-10-11 12:20:59 +04:00
jbd2_journal_destroy_revoke ( journal ) ;
2006-10-11 12:20:57 +04:00
kfree ( journal - > j_wbuf ) ;
kfree ( journal ) ;
2008-10-11 04:29:13 +04:00
return err ;
2006-10-11 12:20:57 +04:00
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_check_used_features ( ) - Check if features specified are used .
2006-10-11 12:20:57 +04:00
* @ journal : Journal to check .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
*
* Check whether the journal uses all of a given set of
* features . Return true ( non - zero ) if it does .
* */
2006-10-11 12:20:59 +04:00
int jbd2_journal_check_used_features ( journal_t * journal , unsigned long compat ,
2006-10-11 12:20:57 +04:00
unsigned long ro , unsigned long incompat )
{
journal_superblock_t * sb ;
if ( ! compat & & ! ro & & ! incompat )
return 1 ;
if ( journal - > j_format_version = = 1 )
return 0 ;
sb = journal - > j_superblock ;
if ( ( ( be32_to_cpu ( sb - > s_feature_compat ) & compat ) = = compat ) & &
( ( be32_to_cpu ( sb - > s_feature_ro_compat ) & ro ) = = ro ) & &
( ( be32_to_cpu ( sb - > s_feature_incompat ) & incompat ) = = incompat ) )
return 1 ;
return 0 ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_check_available_features ( ) - Check feature set in journalling layer
2006-10-11 12:20:57 +04:00
* @ journal : Journal to check .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
*
* Check whether the journaling code supports the use of
* all of a given set of features on this journal . Return true
* ( non - zero ) if it can . */
2006-10-11 12:20:59 +04:00
int jbd2_journal_check_available_features ( journal_t * journal , unsigned long compat ,
2006-10-11 12:20:57 +04:00
unsigned long ro , unsigned long incompat )
{
journal_superblock_t * sb ;
if ( ! compat & & ! ro & & ! incompat )
return 1 ;
sb = journal - > j_superblock ;
/* We can support any known requested features iff the
* superblock is in version 2. Otherwise we fail to support any
* extended sb features . */
if ( journal - > j_format_version ! = 2 )
return 0 ;
2006-10-11 12:20:59 +04:00
if ( ( compat & JBD2_KNOWN_COMPAT_FEATURES ) = = compat & &
( ro & JBD2_KNOWN_ROCOMPAT_FEATURES ) = = ro & &
( incompat & JBD2_KNOWN_INCOMPAT_FEATURES ) = = incompat )
2006-10-11 12:20:57 +04:00
return 1 ;
return 0 ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_set_features ( ) - Mark a given journal feature in the superblock
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
*
* Mark a given journal feature as present on the
* superblock . Returns true if the requested features could be set .
*
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_set_features ( journal_t * journal , unsigned long compat ,
2006-10-11 12:20:57 +04:00
unsigned long ro , unsigned long incompat )
{
journal_superblock_t * sb ;
2006-10-11 12:20:59 +04:00
if ( jbd2_journal_check_used_features ( journal , compat , ro , incompat ) )
2006-10-11 12:20:57 +04:00
return 1 ;
2006-10-11 12:20:59 +04:00
if ( ! jbd2_journal_check_available_features ( journal , compat , ro , incompat ) )
2006-10-11 12:20:57 +04:00
return 0 ;
jbd_debug ( 1 , " Setting new features 0x%lx/0x%lx/0x%lx \n " ,
compat , ro , incompat ) ;
sb = journal - > j_superblock ;
sb - > s_feature_compat | = cpu_to_be32 ( compat ) ;
sb - > s_feature_ro_compat | = cpu_to_be32 ( ro ) ;
sb - > s_feature_incompat | = cpu_to_be32 ( incompat ) ;
return 1 ;
}
2008-01-29 07:58:27 +03:00
/*
* jbd2_journal_clear_features ( ) - Clear a given journal feature in the
* superblock
* @ journal : Journal to act on .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
*
* Clear a given journal feature as present on the
* superblock .
*/
void jbd2_journal_clear_features ( journal_t * journal , unsigned long compat ,
unsigned long ro , unsigned long incompat )
{
journal_superblock_t * sb ;
jbd_debug ( 1 , " Clear features 0x%lx/0x%lx/0x%lx \n " ,
compat , ro , incompat ) ;
sb = journal - > j_superblock ;
sb - > s_feature_compat & = ~ cpu_to_be32 ( compat ) ;
sb - > s_feature_ro_compat & = ~ cpu_to_be32 ( ro ) ;
sb - > s_feature_incompat & = ~ cpu_to_be32 ( incompat ) ;
}
EXPORT_SYMBOL ( jbd2_journal_clear_features ) ;
2006-10-11 12:20:57 +04:00
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_update_format ( ) - Update on - disk journal structure .
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
*
* Given an initialised but unloaded journal struct , poke about in the
* on - disk structure to update it to the most recent supported version .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_update_format ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
journal_superblock_t * sb ;
int err ;
err = journal_get_superblock ( journal ) ;
if ( err )
return err ;
sb = journal - > j_superblock ;
switch ( be32_to_cpu ( sb - > s_header . h_blocktype ) ) {
2006-10-11 12:20:59 +04:00
case JBD2_SUPERBLOCK_V2 :
2006-10-11 12:20:57 +04:00
return 0 ;
2006-10-11 12:20:59 +04:00
case JBD2_SUPERBLOCK_V1 :
2006-10-11 12:20:57 +04:00
return journal_convert_superblock_v1 ( journal , sb ) ;
default :
break ;
}
return - EINVAL ;
}
static int journal_convert_superblock_v1 ( journal_t * journal ,
journal_superblock_t * sb )
{
int offset , blocksize ;
struct buffer_head * bh ;
printk ( KERN_WARNING
" JBD: Converting superblock from version 1 to 2. \n " ) ;
/* Pre-initialise new fields to zero */
offset = ( ( char * ) & ( sb - > s_feature_compat ) ) - ( ( char * ) sb ) ;
blocksize = be32_to_cpu ( sb - > s_blocksize ) ;
memset ( & sb - > s_feature_compat , 0 , blocksize - offset ) ;
sb - > s_nr_users = cpu_to_be32 ( 1 ) ;
2006-10-11 12:20:59 +04:00
sb - > s_header . h_blocktype = cpu_to_be32 ( JBD2_SUPERBLOCK_V2 ) ;
2006-10-11 12:20:57 +04:00
journal - > j_format_version = 2 ;
bh = journal - > j_sb_buffer ;
BUFFER_TRACE ( bh , " marking dirty " ) ;
mark_buffer_dirty ( bh ) ;
sync_dirty_buffer ( bh ) ;
return 0 ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_flush ( ) - Flush journal
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
*
* Flush all data for a given journal to disk and empty the journal .
* Filesystems can use this when remounting readonly to ensure that
* recovery does not need to happen on remount .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_flush ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int err = 0 ;
transaction_t * transaction = NULL ;
unsigned long old_tail ;
spin_lock ( & journal - > j_state_lock ) ;
/* Force everything buffered to the log... */
if ( journal - > j_running_transaction ) {
transaction = journal - > j_running_transaction ;
2006-10-11 12:20:59 +04:00
__jbd2_log_start_commit ( journal , transaction - > t_tid ) ;
2006-10-11 12:20:57 +04:00
} else if ( journal - > j_committing_transaction )
transaction = journal - > j_committing_transaction ;
/* Wait for the log commit to complete... */
if ( transaction ) {
tid_t tid = transaction - > t_tid ;
spin_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
jbd2_log_wait_commit ( journal , tid ) ;
2006-10-11 12:20:57 +04:00
} else {
spin_unlock ( & journal - > j_state_lock ) ;
}
/* ...and flush everything in the log out to disk. */
spin_lock ( & journal - > j_list_lock ) ;
while ( ! err & & journal - > j_checkpoint_transactions ! = NULL ) {
spin_unlock ( & journal - > j_list_lock ) ;
2008-10-11 04:29:13 +04:00
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 12:20:59 +04:00
err = jbd2_log_do_checkpoint ( journal ) ;
2008-10-11 04:29:13 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2006-10-11 12:20:57 +04:00
spin_lock ( & journal - > j_list_lock ) ;
}
spin_unlock ( & journal - > j_list_lock ) ;
2008-10-11 04:29:13 +04:00
if ( is_journal_aborted ( journal ) )
return - EIO ;
2006-10-11 12:20:59 +04:00
jbd2_cleanup_journal_tail ( journal ) ;
2006-10-11 12:20:57 +04:00
/* Finally, mark the journal as really needing no recovery.
* This sets s_start = = 0 in the underlying superblock , which is
* the magic code for a fully - recovered superblock . Any future
* commits of data to the journal will restore the current
* s_start value . */
spin_lock ( & journal - > j_state_lock ) ;
old_tail = journal - > j_tail ;
journal - > j_tail = 0 ;
spin_unlock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
jbd2_journal_update_superblock ( journal , 1 ) ;
2006-10-11 12:20:57 +04:00
spin_lock ( & journal - > j_state_lock ) ;
journal - > j_tail = old_tail ;
J_ASSERT ( ! journal - > j_running_transaction ) ;
J_ASSERT ( ! journal - > j_committing_transaction ) ;
J_ASSERT ( ! journal - > j_checkpoint_transactions ) ;
J_ASSERT ( journal - > j_head = = journal - > j_tail ) ;
J_ASSERT ( journal - > j_tail_sequence = = journal - > j_transaction_sequence ) ;
spin_unlock ( & journal - > j_state_lock ) ;
2008-10-11 04:29:13 +04:00
return 0 ;
2006-10-11 12:20:57 +04:00
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_wipe ( ) - Wipe journal contents
2006-10-11 12:20:57 +04:00
* @ journal : Journal to act on .
* @ write : flag ( see below )
*
* Wipe out all of the contents of a journal , safely . This will produce
* a warning if the journal contains any valid recovery information .
2006-10-11 12:20:59 +04:00
* Must be called between journal_init_ * ( ) and jbd2_journal_load ( ) .
2006-10-11 12:20:57 +04:00
*
* If ' write ' is non - zero , then we wipe out the journal on disk ; otherwise
* we merely suppress recovery .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_wipe ( journal_t * journal , int write )
2006-10-11 12:20:57 +04:00
{
journal_superblock_t * sb ;
int err = 0 ;
2006-10-11 12:20:59 +04:00
J_ASSERT ( ! ( journal - > j_flags & JBD2_LOADED ) ) ;
2006-10-11 12:20:57 +04:00
err = load_superblock ( journal ) ;
if ( err )
return err ;
sb = journal - > j_superblock ;
if ( ! journal - > j_tail )
goto no_recovery ;
printk ( KERN_WARNING " JBD: %s recovery information on journal \n " ,
write ? " Clearing " : " Ignoring " ) ;
2006-10-11 12:20:59 +04:00
err = jbd2_journal_skip_recovery ( journal ) ;
2006-10-11 12:20:57 +04:00
if ( write )
2006-10-11 12:20:59 +04:00
jbd2_journal_update_superblock ( journal , 1 ) ;
2006-10-11 12:20:57 +04:00
no_recovery :
return err ;
}
/*
* Journal abort has very specific semantics , which we describe
* for journal abort .
*
2009-06-09 08:06:20 +04:00
* Two internal functions , which provide abort to the jbd layer
2006-10-11 12:20:57 +04:00
* itself are here .
*/
/*
* Quick version for internal journal use ( doesn ' t lock the journal ) .
* Aborts hard - - - we mark the abort as occurred , but do _nothing_ else ,
* and don ' t attempt to make any other journal updates .
*/
2006-10-11 12:20:59 +04:00
void __jbd2_journal_abort_hard ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
transaction_t * transaction ;
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_ABORT )
2006-10-11 12:20:57 +04:00
return ;
printk ( KERN_ERR " Aborting journal on device %s. \n " ,
2008-09-16 22:36:17 +04:00
journal - > j_devname ) ;
2006-10-11 12:20:57 +04:00
spin_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
journal - > j_flags | = JBD2_ABORT ;
2006-10-11 12:20:57 +04:00
transaction = journal - > j_running_transaction ;
if ( transaction )
2006-10-11 12:20:59 +04:00
__jbd2_log_start_commit ( journal , transaction - > t_tid ) ;
2006-10-11 12:20:57 +04:00
spin_unlock ( & journal - > j_state_lock ) ;
}
/* Soft abort: record the abort error status in the journal superblock,
* but don ' t do any other IO . */
static void __journal_abort_soft ( journal_t * journal , int errno )
{
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_ABORT )
2006-10-11 12:20:57 +04:00
return ;
if ( ! journal - > j_errno )
journal - > j_errno = errno ;
2006-10-11 12:20:59 +04:00
__jbd2_journal_abort_hard ( journal ) ;
2006-10-11 12:20:57 +04:00
if ( errno )
2006-10-11 12:20:59 +04:00
jbd2_journal_update_superblock ( journal , 1 ) ;
2006-10-11 12:20:57 +04:00
}
/**
2006-10-11 12:20:59 +04:00
* void jbd2_journal_abort ( ) - Shutdown the journal immediately .
2006-10-11 12:20:57 +04:00
* @ journal : the journal to shutdown .
* @ errno : an error number to record in the journal indicating
* the reason for the shutdown .
*
* Perform a complete , immediate shutdown of the ENTIRE
* journal ( not of a single transaction ) . This operation cannot be
* undone without closing and reopening the journal .
*
2006-10-11 12:20:59 +04:00
* The jbd2_journal_abort function is intended to support higher level error
2006-10-11 12:20:57 +04:00
* recovery mechanisms such as the ext2 / ext3 remount - readonly error
* mode .
*
* Journal abort has very specific semantics . Any existing dirty ,
* unjournaled buffers in the main filesystem will still be written to
* disk by bdflush , but the journaling mechanism will be suspended
* immediately and no further transaction commits will be honoured .
*
* Any dirty , journaled buffers will be written back to disk without
* hitting the journal . Atomicity cannot be guaranteed on an aborted
* filesystem , but we _do_ attempt to leave as much data as possible
* behind for fsck to use for cleanup .
*
* Any attempt to get a new transaction handle on a journal which is in
* ABORT state will just result in an - EROFS error return . A
2006-10-11 12:20:59 +04:00
* jbd2_journal_stop on an existing handle will return - EIO if we have
2006-10-11 12:20:57 +04:00
* entered abort state during the update .
*
* Recursive transactions are not disturbed by journal abort until the
2006-10-11 12:20:59 +04:00
* final jbd2_journal_stop , which will receive the - EIO error .
2006-10-11 12:20:57 +04:00
*
2006-10-11 12:20:59 +04:00
* Finally , the jbd2_journal_abort call allows the caller to supply an errno
2006-10-11 12:20:57 +04:00
* which will be recorded ( if possible ) in the journal superblock . This
* allows a client to record failure conditions in the middle of a
* transaction without having to complete the transaction to record the
* failure to disk . ext3_error , for example , now uses this
* functionality .
*
* Errors which originate from within the journaling layer will NOT
* supply an errno ; a null errno implies that absolutely no further
* writes are done to the journal ( unless there are any already in
* progress ) .
*
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_abort ( journal_t * journal , int errno )
2006-10-11 12:20:57 +04:00
{
__journal_abort_soft ( journal , errno ) ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_errno ( ) - returns the journal ' s error state .
2006-10-11 12:20:57 +04:00
* @ journal : journal to examine .
*
2009-06-09 08:06:20 +04:00
* This is the errno number set with jbd2_journal_abort ( ) , the last
2006-10-11 12:20:57 +04:00
* time the journal was mounted - if the journal was stopped
* without calling abort this will be 0.
*
* If the journal has been aborted on this mount time - EROFS will
* be returned .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_errno ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int err ;
spin_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_ABORT )
2006-10-11 12:20:57 +04:00
err = - EROFS ;
else
err = journal - > j_errno ;
spin_unlock ( & journal - > j_state_lock ) ;
return err ;
}
/**
2006-10-11 12:20:59 +04:00
* int jbd2_journal_clear_err ( ) - clears the journal ' s error state
2006-10-11 12:20:57 +04:00
* @ journal : journal to act on .
*
2009-06-09 08:06:20 +04:00
* An error must be cleared or acked to take a FS out of readonly
2006-10-11 12:20:57 +04:00
* mode .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_clear_err ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int err = 0 ;
spin_lock ( & journal - > j_state_lock ) ;
2006-10-11 12:20:59 +04:00
if ( journal - > j_flags & JBD2_ABORT )
2006-10-11 12:20:57 +04:00
err = - EROFS ;
else
journal - > j_errno = 0 ;
spin_unlock ( & journal - > j_state_lock ) ;
return err ;
}
/**
2006-10-11 12:20:59 +04:00
* void jbd2_journal_ack_err ( ) - Ack journal err .
2006-10-11 12:20:57 +04:00
* @ journal : journal to act on .
*
2009-06-09 08:06:20 +04:00
* An error must be cleared or acked to take a FS out of readonly
2006-10-11 12:20:57 +04:00
* mode .
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_ack_err ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
spin_lock ( & journal - > j_state_lock ) ;
if ( journal - > j_errno )
2006-10-11 12:20:59 +04:00
journal - > j_flags | = JBD2_ACK_ERR ;
2006-10-11 12:20:57 +04:00
spin_unlock ( & journal - > j_state_lock ) ;
}
2006-10-11 12:20:59 +04:00
int jbd2_journal_blocks_per_page ( struct inode * inode )
2006-10-11 12:20:57 +04:00
{
return 1 < < ( PAGE_CACHE_SHIFT - inode - > i_sb - > s_blocksize_bits ) ;
}
2006-10-11 12:21:08 +04:00
/*
* helper functions to deal with 32 or 64 bit block numbers .
*/
size_t journal_tag_bytes ( journal_t * journal )
{
if ( JBD2_HAS_INCOMPAT_FEATURE ( journal , JBD2_FEATURE_INCOMPAT_64BIT ) )
2007-10-17 02:38:25 +04:00
return JBD2_TAG_SIZE64 ;
2006-10-11 12:21:08 +04:00
else
2007-10-17 02:38:25 +04:00
return JBD2_TAG_SIZE32 ;
2006-10-11 12:21:08 +04:00
}
2006-10-11 12:20:57 +04:00
/*
* Journal_head storage management
*/
2006-12-07 07:33:20 +03:00
static struct kmem_cache * jbd2_journal_head_cache ;
2007-07-18 16:57:06 +04:00
# ifdef CONFIG_JBD2_DEBUG
2006-10-11 12:20:57 +04:00
static atomic_t nr_journal_heads = ATOMIC_INIT ( 0 ) ;
# endif
2006-10-11 12:20:59 +04:00
static int journal_init_jbd2_journal_head_cache ( void )
2006-10-11 12:20:57 +04:00
{
int retval ;
2008-03-29 06:07:18 +03:00
J_ASSERT ( jbd2_journal_head_cache = = NULL ) ;
2006-10-11 12:21:00 +04:00
jbd2_journal_head_cache = kmem_cache_create ( " jbd2_journal_head " ,
2006-10-11 12:20:57 +04:00
sizeof ( struct journal_head ) ,
0 , /* offset */
2008-01-29 07:58:27 +03:00
SLAB_TEMPORARY , /* flags */
2007-07-20 05:11:58 +04:00
NULL ) ; /* ctor */
2006-10-11 12:20:57 +04:00
retval = 0 ;
2008-03-29 06:07:18 +03:00
if ( ! jbd2_journal_head_cache ) {
2006-10-11 12:20:57 +04:00
retval = - ENOMEM ;
printk ( KERN_EMERG " JBD: no memory for journal_head cache \n " ) ;
}
return retval ;
}
2006-10-11 12:20:59 +04:00
static void jbd2_journal_destroy_jbd2_journal_head_cache ( void )
2006-10-11 12:20:57 +04:00
{
2008-04-17 18:38:59 +04:00
if ( jbd2_journal_head_cache ) {
kmem_cache_destroy ( jbd2_journal_head_cache ) ;
jbd2_journal_head_cache = NULL ;
}
2006-10-11 12:20:57 +04:00
}
/*
* journal_head splicing and dicing
*/
static struct journal_head * journal_alloc_journal_head ( void )
{
struct journal_head * ret ;
static unsigned long last_warning ;
2007-07-18 16:57:06 +04:00
# ifdef CONFIG_JBD2_DEBUG
2006-10-11 12:20:57 +04:00
atomic_inc ( & nr_journal_heads ) ;
# endif
2006-10-11 12:20:59 +04:00
ret = kmem_cache_alloc ( jbd2_journal_head_cache , GFP_NOFS ) ;
2008-03-29 06:07:18 +03:00
if ( ! ret ) {
2006-10-11 12:20:57 +04:00
jbd_debug ( 1 , " out of memory for journal_head \n " ) ;
if ( time_after ( jiffies , last_warning + 5 * HZ ) ) {
printk ( KERN_NOTICE " ENOMEM in %s, retrying. \n " ,
2008-04-17 18:38:59 +04:00
__func__ ) ;
2006-10-11 12:20:57 +04:00
last_warning = jiffies ;
}
2008-03-29 06:07:18 +03:00
while ( ! ret ) {
2006-10-11 12:20:57 +04:00
yield ( ) ;
2006-10-11 12:20:59 +04:00
ret = kmem_cache_alloc ( jbd2_journal_head_cache , GFP_NOFS ) ;
2006-10-11 12:20:57 +04:00
}
}
return ret ;
}
static void journal_free_journal_head ( struct journal_head * jh )
{
2007-07-18 16:57:06 +04:00
# ifdef CONFIG_JBD2_DEBUG
2006-10-11 12:20:57 +04:00
atomic_dec ( & nr_journal_heads ) ;
2007-10-17 02:38:25 +04:00
memset ( jh , JBD2_POISON_FREE , sizeof ( * jh ) ) ;
2006-10-11 12:20:57 +04:00
# endif
2006-10-11 12:20:59 +04:00
kmem_cache_free ( jbd2_journal_head_cache , jh ) ;
2006-10-11 12:20:57 +04:00
}
/*
* A journal_head is attached to a buffer_head whenever JBD has an
* interest in the buffer .
*
* Whenever a buffer has an attached journal_head , its - > b_state : BH_JBD bit
* is set . This bit is tested in core kernel code where we need to take
* JBD - specific actions . Testing the zeroness of - > b_private is not reliable
* there .
*
* When a buffer has its BH_JBD bit set , its - > b_count is elevated by one .
*
* When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code , mainly via - > b_count .
*
* A journal_head may be detached from its buffer_head when the journal_head ' s
* b_transaction , b_cp_transaction and b_next_transaction pointers are NULL .
2006-10-11 12:20:59 +04:00
* Various places in JBD call jbd2_journal_remove_journal_head ( ) to indicate that the
2006-10-11 12:20:57 +04:00
* journal_head can be dropped if needed .
*
* Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction . To protect the
2006-10-11 12:20:59 +04:00
* journal_head in this situation , jbd2_journal_add_journal_head elevates the
2006-10-11 12:20:57 +04:00
* journal_head ' s b_jcount refcount by one . The caller must call
2006-10-11 12:20:59 +04:00
* jbd2_journal_put_journal_head ( ) to undo this .
2006-10-11 12:20:57 +04:00
*
* So the typical usage would be :
*
* ( Attach a journal_head if needed . Increments b_jcount )
2006-10-11 12:20:59 +04:00
* struct journal_head * jh = jbd2_journal_add_journal_head ( bh ) ;
2006-10-11 12:20:57 +04:00
* . . .
* jh - > b_transaction = xxx ;
2006-10-11 12:20:59 +04:00
* jbd2_journal_put_journal_head ( jh ) ;
2006-10-11 12:20:57 +04:00
*
* Now , the journal_head ' s b_jcount is zero , but it is safe from being released
* because it has a non - zero b_transaction .
*/
/*
* Give a buffer_head a journal_head .
*
* Doesn ' t need the journal lock .
* May sleep .
*/
2006-10-11 12:20:59 +04:00
struct journal_head * jbd2_journal_add_journal_head ( struct buffer_head * bh )
2006-10-11 12:20:57 +04:00
{
struct journal_head * jh ;
struct journal_head * new_jh = NULL ;
repeat :
if ( ! buffer_jbd ( bh ) ) {
new_jh = journal_alloc_journal_head ( ) ;
memset ( new_jh , 0 , sizeof ( * new_jh ) ) ;
}
jbd_lock_bh_journal_head ( bh ) ;
if ( buffer_jbd ( bh ) ) {
jh = bh2jh ( bh ) ;
} else {
J_ASSERT_BH ( bh ,
( atomic_read ( & bh - > b_count ) > 0 ) | |
( bh - > b_page & & bh - > b_page - > mapping ) ) ;
if ( ! new_jh ) {
jbd_unlock_bh_journal_head ( bh ) ;
goto repeat ;
}
jh = new_jh ;
new_jh = NULL ; /* We consumed it */
set_buffer_jbd ( bh ) ;
bh - > b_private = jh ;
jh - > b_bh = bh ;
get_bh ( bh ) ;
BUFFER_TRACE ( bh , " added journal_head " ) ;
}
jh - > b_jcount + + ;
jbd_unlock_bh_journal_head ( bh ) ;
if ( new_jh )
journal_free_journal_head ( new_jh ) ;
return bh - > b_private ;
}
/*
* Grab a ref against this buffer_head ' s journal_head . If it ended up not
* having a journal_head , return NULL
*/
2006-10-11 12:20:59 +04:00
struct journal_head * jbd2_journal_grab_journal_head ( struct buffer_head * bh )
2006-10-11 12:20:57 +04:00
{
struct journal_head * jh = NULL ;
jbd_lock_bh_journal_head ( bh ) ;
if ( buffer_jbd ( bh ) ) {
jh = bh2jh ( bh ) ;
jh - > b_jcount + + ;
}
jbd_unlock_bh_journal_head ( bh ) ;
return jh ;
}
static void __journal_remove_journal_head ( struct buffer_head * bh )
{
struct journal_head * jh = bh2jh ( bh ) ;
J_ASSERT_JH ( jh , jh - > b_jcount > = 0 ) ;
get_bh ( bh ) ;
if ( jh - > b_jcount = = 0 ) {
if ( jh - > b_transaction = = NULL & &
jh - > b_next_transaction = = NULL & &
jh - > b_cp_transaction = = NULL ) {
J_ASSERT_JH ( jh , jh - > b_jlist = = BJ_None ) ;
J_ASSERT_BH ( bh , buffer_jbd ( bh ) ) ;
J_ASSERT_BH ( bh , jh2bh ( jh ) = = bh ) ;
BUFFER_TRACE ( bh , " remove journal_head " ) ;
if ( jh - > b_frozen_data ) {
printk ( KERN_WARNING " %s: freeing "
" b_frozen_data \n " ,
2008-04-17 18:38:59 +04:00
__func__ ) ;
2007-10-17 02:38:25 +04:00
jbd2_free ( jh - > b_frozen_data , bh - > b_size ) ;
2006-10-11 12:20:57 +04:00
}
if ( jh - > b_committed_data ) {
printk ( KERN_WARNING " %s: freeing "
" b_committed_data \n " ,
2008-04-17 18:38:59 +04:00
__func__ ) ;
2007-10-17 02:38:25 +04:00
jbd2_free ( jh - > b_committed_data , bh - > b_size ) ;
2006-10-11 12:20:57 +04:00
}
bh - > b_private = NULL ;
jh - > b_bh = NULL ; /* debug, really */
clear_buffer_jbd ( bh ) ;
__brelse ( bh ) ;
journal_free_journal_head ( jh ) ;
} else {
BUFFER_TRACE ( bh , " journal_head was locked " ) ;
}
}
}
/*
2006-10-11 12:20:59 +04:00
* jbd2_journal_remove_journal_head ( ) : if the buffer isn ' t attached to a transaction
2006-10-11 12:20:57 +04:00
* and has a zero b_jcount then remove and release its journal_head . If we did
* see that the buffer is not used by any transaction we also " logically "
* decrement - > b_count .
*
* We in fact take an additional increment on - > b_count as a convenience ,
* because the caller usually wants to do additional things with the bh
* after calling here .
2006-10-11 12:20:59 +04:00
* The caller of jbd2_journal_remove_journal_head ( ) * must * run __brelse ( bh ) at some
2006-10-11 12:20:57 +04:00
* time . Once the caller has run __brelse ( ) , the buffer is eligible for
* reaping by try_to_free_buffers ( ) .
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_remove_journal_head ( struct buffer_head * bh )
2006-10-11 12:20:57 +04:00
{
jbd_lock_bh_journal_head ( bh ) ;
__journal_remove_journal_head ( bh ) ;
jbd_unlock_bh_journal_head ( bh ) ;
}
/*
* Drop a reference on the passed journal_head . If it fell to zero then try to
* release the journal_head from the buffer_head .
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_put_journal_head ( struct journal_head * jh )
2006-10-11 12:20:57 +04:00
{
struct buffer_head * bh = jh2bh ( jh ) ;
jbd_lock_bh_journal_head ( bh ) ;
J_ASSERT_JH ( jh , jh - > b_jcount > 0 ) ;
- - jh - > b_jcount ;
if ( ! jh - > b_jcount & & ! jh - > b_transaction ) {
__journal_remove_journal_head ( bh ) ;
__brelse ( bh ) ;
}
jbd_unlock_bh_journal_head ( bh ) ;
}
2008-07-12 03:27:31 +04:00
/*
* Initialize jbd inode head
*/
void jbd2_journal_init_jbd_inode ( struct jbd2_inode * jinode , struct inode * inode )
{
jinode - > i_transaction = NULL ;
jinode - > i_next_transaction = NULL ;
jinode - > i_vfs_inode = inode ;
jinode - > i_flags = 0 ;
INIT_LIST_HEAD ( & jinode - > i_list ) ;
}
/*
* Function to be called before we start removing inode from memory ( i . e . ,
* clear_inode ( ) is a fine place to be called from ) . It removes inode from
* transaction ' s lists .
*/
void jbd2_journal_release_jbd_inode ( journal_t * journal ,
struct jbd2_inode * jinode )
{
int writeout = 0 ;
if ( ! journal )
return ;
restart :
spin_lock ( & journal - > j_list_lock ) ;
/* Is commit writing out inode - we have to wait */
if ( jinode - > i_flags & JI_COMMIT_RUNNING ) {
wait_queue_head_t * wq ;
DEFINE_WAIT_BIT ( wait , & jinode - > i_flags , __JI_COMMIT_RUNNING ) ;
wq = bit_waitqueue ( & jinode - > i_flags , __JI_COMMIT_RUNNING ) ;
prepare_to_wait ( wq , & wait . wait , TASK_UNINTERRUPTIBLE ) ;
spin_unlock ( & journal - > j_list_lock ) ;
schedule ( ) ;
finish_wait ( wq , & wait . wait ) ;
goto restart ;
}
/* Do we need to wait for data writeback? */
if ( journal - > j_committing_transaction = = jinode - > i_transaction )
writeout = 1 ;
if ( jinode - > i_transaction ) {
list_del ( & jinode - > i_list ) ;
jinode - > i_transaction = NULL ;
}
spin_unlock ( & journal - > j_list_lock ) ;
}
2006-10-11 12:20:57 +04:00
/*
2007-07-18 16:50:18 +04:00
* debugfs tunables
2006-10-11 12:20:57 +04:00
*/
2007-10-17 02:38:25 +04:00
# ifdef CONFIG_JBD2_DEBUG
u8 jbd2_journal_enable_debug __read_mostly ;
2006-10-11 12:20:59 +04:00
EXPORT_SYMBOL ( jbd2_journal_enable_debug ) ;
2006-10-11 12:20:57 +04:00
2007-07-18 16:50:18 +04:00
# define JBD2_DEBUG_NAME "jbd2-debug"
2006-10-11 12:20:57 +04:00
2007-10-17 02:38:25 +04:00
static struct dentry * jbd2_debugfs_dir ;
static struct dentry * jbd2_debug ;
2006-10-11 12:20:57 +04:00
2007-07-18 16:50:18 +04:00
static void __init jbd2_create_debugfs_entry ( void )
{
jbd2_debugfs_dir = debugfs_create_dir ( " jbd2 " , NULL ) ;
if ( jbd2_debugfs_dir )
jbd2_debug = debugfs_create_u8 ( JBD2_DEBUG_NAME , S_IRUGO ,
jbd2_debugfs_dir ,
& jbd2_journal_enable_debug ) ;
2006-10-11 12:20:57 +04:00
}
2007-07-18 16:50:18 +04:00
static void __exit jbd2_remove_debugfs_entry ( void )
2006-10-11 12:20:57 +04:00
{
2007-10-17 02:38:25 +04:00
debugfs_remove ( jbd2_debug ) ;
debugfs_remove ( jbd2_debugfs_dir ) ;
2006-10-11 12:20:57 +04:00
}
2007-07-18 16:50:18 +04:00
# else
2006-10-11 12:20:57 +04:00
2007-07-18 16:50:18 +04:00
static void __init jbd2_create_debugfs_entry ( void )
2006-10-11 12:20:57 +04:00
{
}
2007-07-18 16:50:18 +04:00
static void __exit jbd2_remove_debugfs_entry ( void )
2006-10-11 12:20:57 +04:00
{
}
# endif
2008-01-29 07:58:27 +03:00
# ifdef CONFIG_PROC_FS
# define JBD2_STATS_PROC_NAME "fs / jbd2"
static void __init jbd2_create_jbd_stats_proc_entry ( void )
{
proc_jbd2_stats = proc_mkdir ( JBD2_STATS_PROC_NAME , NULL ) ;
}
static void __exit jbd2_remove_jbd_stats_proc_entry ( void )
{
if ( proc_jbd2_stats )
remove_proc_entry ( JBD2_STATS_PROC_NAME , NULL ) ;
}
# else
# define jbd2_create_jbd_stats_proc_entry() do {} while (0)
# define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
# endif
2006-12-07 07:33:20 +03:00
struct kmem_cache * jbd2_handle_cache ;
2006-10-11 12:20:57 +04:00
static int __init journal_init_handle_cache ( void )
{
2006-10-11 12:21:00 +04:00
jbd2_handle_cache = kmem_cache_create ( " jbd2_journal_handle " ,
2006-10-11 12:20:57 +04:00
sizeof ( handle_t ) ,
0 , /* offset */
2008-01-29 07:58:27 +03:00
SLAB_TEMPORARY , /* flags */
2007-07-20 05:11:58 +04:00
NULL ) ; /* ctor */
2006-10-11 12:20:59 +04:00
if ( jbd2_handle_cache = = NULL ) {
2006-10-11 12:20:57 +04:00
printk ( KERN_EMERG " JBD: failed to create handle cache \n " ) ;
return - ENOMEM ;
}
return 0 ;
}
2006-10-11 12:20:59 +04:00
static void jbd2_journal_destroy_handle_cache ( void )
2006-10-11 12:20:57 +04:00
{
2006-10-11 12:20:59 +04:00
if ( jbd2_handle_cache )
kmem_cache_destroy ( jbd2_handle_cache ) ;
2006-10-11 12:20:57 +04:00
}
/*
* Module startup and shutdown
*/
static int __init journal_init_caches ( void )
{
int ret ;
2006-10-11 12:20:59 +04:00
ret = jbd2_journal_init_revoke_caches ( ) ;
2006-10-11 12:20:57 +04:00
if ( ret = = 0 )
2006-10-11 12:20:59 +04:00
ret = journal_init_jbd2_journal_head_cache ( ) ;
2006-10-11 12:20:57 +04:00
if ( ret = = 0 )
ret = journal_init_handle_cache ( ) ;
return ret ;
}
2006-10-11 12:20:59 +04:00
static void jbd2_journal_destroy_caches ( void )
2006-10-11 12:20:57 +04:00
{
2006-10-11 12:20:59 +04:00
jbd2_journal_destroy_revoke_caches ( ) ;
jbd2_journal_destroy_jbd2_journal_head_cache ( ) ;
jbd2_journal_destroy_handle_cache ( ) ;
2006-10-11 12:20:57 +04:00
}
static int __init journal_init ( void )
{
int ret ;
BUILD_BUG_ON ( sizeof ( struct journal_superblock_s ) ! = 1024 ) ;
ret = journal_init_caches ( ) ;
2008-04-30 06:02:47 +04:00
if ( ret = = 0 ) {
jbd2_create_debugfs_entry ( ) ;
jbd2_create_jbd_stats_proc_entry ( ) ;
} else {
2006-10-11 12:20:59 +04:00
jbd2_journal_destroy_caches ( ) ;
2008-04-30 06:02:47 +04:00
}
2006-10-11 12:20:57 +04:00
return ret ;
}
static void __exit journal_exit ( void )
{
2007-07-18 16:57:06 +04:00
# ifdef CONFIG_JBD2_DEBUG
2006-10-11 12:20:57 +04:00
int n = atomic_read ( & nr_journal_heads ) ;
if ( n )
printk ( KERN_EMERG " JBD: leaked %d journal_heads! \n " , n ) ;
# endif
2007-07-18 16:50:18 +04:00
jbd2_remove_debugfs_entry ( ) ;
2008-01-29 07:58:27 +03:00
jbd2_remove_jbd_stats_proc_entry ( ) ;
2006-10-11 12:20:59 +04:00
jbd2_journal_destroy_caches ( ) ;
2006-10-11 12:20:57 +04:00
}
2009-06-17 19:47:48 +04:00
/*
* jbd2_dev_to_name is a utility function used by the jbd2 and ext4
* tracing infrastructure to map a dev_t to a device name .
*
* The caller should use rcu_read_lock ( ) in order to make sure the
* device name stays valid until its done with it . We use
* rcu_read_lock ( ) as well to make sure we ' re safe in case the caller
* gets sloppy , and because rcu_read_lock ( ) is cheap and can be safely
* nested .
*/
struct devname_cache {
struct rcu_head rcu ;
dev_t device ;
char devname [ BDEVNAME_SIZE ] ;
} ;
# define CACHE_SIZE_BITS 6
static struct devname_cache * devcache [ 1 < < CACHE_SIZE_BITS ] ;
static DEFINE_SPINLOCK ( devname_cache_lock ) ;
static void free_devcache ( struct rcu_head * rcu )
{
kfree ( rcu ) ;
}
const char * jbd2_dev_to_name ( dev_t device )
{
int i = hash_32 ( device , CACHE_SIZE_BITS ) ;
char * ret ;
struct block_device * bd ;
2009-06-21 07:34:44 +04:00
static struct devname_cache * new_dev ;
2009-06-17 19:47:48 +04:00
rcu_read_lock ( ) ;
if ( devcache [ i ] & & devcache [ i ] - > device = = device ) {
ret = devcache [ i ] - > devname ;
rcu_read_unlock ( ) ;
return ret ;
}
rcu_read_unlock ( ) ;
2009-06-21 07:34:44 +04:00
new_dev = kmalloc ( sizeof ( struct devname_cache ) , GFP_KERNEL ) ;
if ( ! new_dev )
return " NODEV-ALLOCFAILURE " ; /* Something non-NULL */
2009-06-17 19:47:48 +04:00
spin_lock ( & devname_cache_lock ) ;
if ( devcache [ i ] ) {
if ( devcache [ i ] - > device = = device ) {
2009-06-21 07:34:44 +04:00
kfree ( new_dev ) ;
2009-06-17 19:47:48 +04:00
ret = devcache [ i ] - > devname ;
spin_unlock ( & devname_cache_lock ) ;
return ret ;
}
call_rcu ( & devcache [ i ] - > rcu , free_devcache ) ;
}
2009-06-21 07:34:44 +04:00
devcache [ i ] = new_dev ;
2009-06-17 19:47:48 +04:00
devcache [ i ] - > device = device ;
bd = bdget ( device ) ;
if ( bd ) {
bdevname ( bd , devcache [ i ] - > devname ) ;
bdput ( bd ) ;
} else
__bdevname ( device , devcache [ i ] - > devname ) ;
ret = devcache [ i ] - > devname ;
spin_unlock ( & devname_cache_lock ) ;
return ret ;
}
EXPORT_SYMBOL ( jbd2_dev_to_name ) ;
2006-10-11 12:20:57 +04:00
MODULE_LICENSE ( " GPL " ) ;
module_init ( journal_init ) ;
module_exit ( journal_exit ) ;