2005-04-17 02:20:36 +04:00
/*
2006-10-04 01:01:26 +04:00
* linux / fs / jbd / journal . c
2005-04-17 02:20:36 +04:00
*
* Written by Stephen C . Tweedie < sct @ redhat . com > , 1998
*
* Copyright 1998 Red Hat corp - - - All Rights Reserved
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License , version 2 , or at your
* option , any later version , incorporated herein by reference .
*
* Generic filesystem journal - writing code ; part of the ext2fs
* journaling system .
*
* This file manages journals : areas of disk reserved for logging
* transactional updates . This includes the kernel journaling thread
* which is responsible for scheduling updates to the log .
*
* We do not actually manage the physical storage of the journal in this
* file : that is left to a per - journal policy function , which allows us
* to store the journal within a filesystem - specified area for ext2
* journaling ( ext2 can use a reserved inode for storing the log ) .
*/
# include <linux/module.h>
# include <linux/time.h>
# include <linux/fs.h>
# include <linux/jbd.h>
# include <linux/errno.h>
# include <linux/slab.h>
# include <linux/init.h>
# include <linux/mm.h>
2006-12-07 07:34:23 +03:00
# include <linux/freezer.h>
2005-04-17 02:20:36 +04:00
# include <linux/pagemap.h>
2006-03-25 14:06:53 +03:00
# include <linux/kthread.h>
2006-06-27 13:53:52 +04:00
# include <linux/poison.h>
2006-03-25 14:06:53 +03:00
# include <linux/proc_fs.h>
2007-10-19 10:39:22 +04:00
# include <linux/debugfs.h>
2010-10-04 14:12:13 +04:00
# include <linux/ratelimit.h>
2006-03-25 14:06:53 +03:00
2011-05-23 20:33:02 +04:00
# define CREATE_TRACE_POINTS
# include <trace/events/jbd.h>
2005-04-17 02:20:36 +04:00
# include <asm/uaccess.h>
# include <asm/page.h>
EXPORT_SYMBOL ( journal_start ) ;
EXPORT_SYMBOL ( journal_restart ) ;
EXPORT_SYMBOL ( journal_extend ) ;
EXPORT_SYMBOL ( journal_stop ) ;
EXPORT_SYMBOL ( journal_lock_updates ) ;
EXPORT_SYMBOL ( journal_unlock_updates ) ;
EXPORT_SYMBOL ( journal_get_write_access ) ;
EXPORT_SYMBOL ( journal_get_create_access ) ;
EXPORT_SYMBOL ( journal_get_undo_access ) ;
EXPORT_SYMBOL ( journal_dirty_data ) ;
EXPORT_SYMBOL ( journal_dirty_metadata ) ;
EXPORT_SYMBOL ( journal_release_buffer ) ;
EXPORT_SYMBOL ( journal_forget ) ;
#if 0
EXPORT_SYMBOL ( journal_sync_buffer ) ;
# endif
EXPORT_SYMBOL ( journal_flush ) ;
EXPORT_SYMBOL ( journal_revoke ) ;
EXPORT_SYMBOL ( journal_init_dev ) ;
EXPORT_SYMBOL ( journal_init_inode ) ;
EXPORT_SYMBOL ( journal_update_format ) ;
EXPORT_SYMBOL ( journal_check_used_features ) ;
EXPORT_SYMBOL ( journal_check_available_features ) ;
EXPORT_SYMBOL ( journal_set_features ) ;
EXPORT_SYMBOL ( journal_create ) ;
EXPORT_SYMBOL ( journal_load ) ;
EXPORT_SYMBOL ( journal_destroy ) ;
EXPORT_SYMBOL ( journal_abort ) ;
EXPORT_SYMBOL ( journal_errno ) ;
EXPORT_SYMBOL ( journal_ack_err ) ;
EXPORT_SYMBOL ( journal_clear_err ) ;
EXPORT_SYMBOL ( log_wait_commit ) ;
2009-11-12 11:53:50 +03:00
EXPORT_SYMBOL ( log_start_commit ) ;
2005-04-17 02:20:36 +04:00
EXPORT_SYMBOL ( journal_start_commit ) ;
EXPORT_SYMBOL ( journal_force_commit_nested ) ;
EXPORT_SYMBOL ( journal_wipe ) ;
EXPORT_SYMBOL ( journal_blocks_per_page ) ;
EXPORT_SYMBOL ( journal_invalidatepage ) ;
EXPORT_SYMBOL ( journal_try_to_free_buffers ) ;
EXPORT_SYMBOL ( journal_force_commit ) ;
static int journal_convert_superblock_v1 ( journal_t * , journal_superblock_t * ) ;
2005-09-07 02:16:41 +04:00
static void __journal_abort_soft ( journal_t * journal , int errno ) ;
2010-10-04 23:35:05 +04:00
static const char * journal_dev_name ( journal_t * journal , char * buffer ) ;
2005-04-17 02:20:36 +04:00
/*
* Helper function used to manage commit timeouts
*/
static void commit_timeout ( unsigned long __data )
{
struct task_struct * p = ( struct task_struct * ) __data ;
wake_up_process ( p ) ;
}
/*
* kjournald : The main thread function used to manage a logging device
* journal .
*
* This kernel thread is responsible for two things :
*
* 1 ) COMMIT : Every so often we need to commit the current state of the
* filesystem to disk . The journal thread is responsible for writing
* all of the metadata buffers to disk .
*
* 2 ) CHECKPOINT : We cannot reuse a used section of the log file until all
* of the data in that part of the log has been rewritten elsewhere on
* the disk . Flushing these old buffers to reclaim space in the log is
* known as checkpointing , and this thread is responsible for that job .
*/
2005-09-07 02:16:41 +04:00
static int kjournald ( void * arg )
2005-04-17 02:20:36 +04:00
{
2006-03-25 14:06:53 +03:00
journal_t * journal = arg ;
2005-04-17 02:20:36 +04:00
transaction_t * transaction ;
2006-03-25 14:06:53 +03:00
/*
* Set up an interval timer which can be used to trigger a commit wakeup
* after the commit interval expires
*/
setup_timer ( & journal - > j_commit_timer , commit_timeout ,
( unsigned long ) current ) ;
2005-04-17 02:20:36 +04:00
2012-02-03 12:59:41 +04:00
set_freezable ( ) ;
2005-04-17 02:20:36 +04:00
/* Record that the journal thread is running */
journal - > j_task = current ;
wake_up ( & journal - > j_wait_done_commit ) ;
printk ( KERN_INFO " kjournald starting. Commit interval %ld seconds \n " ,
journal - > j_commit_interval / HZ ) ;
/*
* And now , wait forever for commit wakeup events .
*/
spin_lock ( & journal - > j_state_lock ) ;
loop :
if ( journal - > j_flags & JFS_UNMOUNT )
goto end_loop ;
jbd_debug ( 1 , " commit_sequence=%d, commit_request=%d \n " ,
journal - > j_commit_sequence , journal - > j_commit_request ) ;
if ( journal - > j_commit_sequence ! = journal - > j_commit_request ) {
jbd_debug ( 1 , " OK, requests differ \n " ) ;
spin_unlock ( & journal - > j_state_lock ) ;
2006-03-25 14:06:53 +03:00
del_timer_sync ( & journal - > j_commit_timer ) ;
2005-04-17 02:20:36 +04:00
journal_commit_transaction ( journal ) ;
spin_lock ( & journal - > j_state_lock ) ;
goto loop ;
}
wake_up ( & journal - > j_wait_done_commit ) ;
2005-06-25 10:13:50 +04:00
if ( freezing ( current ) ) {
2005-04-17 02:20:36 +04:00
/*
* The simpler the better . Flushing journal isn ' t a
* good idea , because that depends on threads that may
* be already stopped .
*/
jbd_debug ( 1 , " Now suspending kjournald \n " ) ;
spin_unlock ( & journal - > j_state_lock ) ;
2011-11-22 00:32:22 +04:00
try_to_freeze ( ) ;
2005-04-17 02:20:36 +04:00
spin_lock ( & journal - > j_state_lock ) ;
} else {
/*
* We assume on resume that commits are already there ,
* so we don ' t sleep
*/
DEFINE_WAIT ( wait ) ;
int should_sleep = 1 ;
prepare_to_wait ( & journal - > j_wait_commit , & wait ,
TASK_INTERRUPTIBLE ) ;
if ( journal - > j_commit_sequence ! = journal - > j_commit_request )
should_sleep = 0 ;
transaction = journal - > j_running_transaction ;
if ( transaction & & time_after_eq ( jiffies ,
transaction - > t_expires ) )
should_sleep = 0 ;
2005-09-07 02:19:08 +04:00
if ( journal - > j_flags & JFS_UNMOUNT )
2006-09-27 12:49:35 +04:00
should_sleep = 0 ;
2005-04-17 02:20:36 +04:00
if ( should_sleep ) {
spin_unlock ( & journal - > j_state_lock ) ;
schedule ( ) ;
spin_lock ( & journal - > j_state_lock ) ;
}
finish_wait ( & journal - > j_wait_commit , & wait ) ;
}
jbd_debug ( 1 , " kjournald wakes \n " ) ;
/*
* Were we woken up by a commit wakeup event ?
*/
transaction = journal - > j_running_transaction ;
if ( transaction & & time_after_eq ( jiffies , transaction - > t_expires ) ) {
journal - > j_commit_request = transaction - > t_tid ;
jbd_debug ( 1 , " woke because of timeout \n " ) ;
}
goto loop ;
end_loop :
spin_unlock ( & journal - > j_state_lock ) ;
2006-03-25 14:06:53 +03:00
del_timer_sync ( & journal - > j_commit_timer ) ;
2005-04-17 02:20:36 +04:00
journal - > j_task = NULL ;
wake_up ( & journal - > j_wait_done_commit ) ;
jbd_debug ( 1 , " Journal thread exiting. \n " ) ;
return 0 ;
}
2007-05-08 11:30:42 +04:00
static int journal_start_thread ( journal_t * journal )
2005-04-17 02:20:36 +04:00
{
2007-05-08 11:30:42 +04:00
struct task_struct * t ;
t = kthread_run ( kjournald , journal , " kjournald " ) ;
if ( IS_ERR ( t ) )
return PTR_ERR ( t ) ;
2007-10-18 14:07:05 +04:00
wait_event ( journal - > j_wait_done_commit , journal - > j_task ! = NULL ) ;
2007-05-08 11:30:42 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
static void journal_kill_thread ( journal_t * journal )
{
spin_lock ( & journal - > j_state_lock ) ;
journal - > j_flags | = JFS_UNMOUNT ;
while ( journal - > j_task ) {
wake_up ( & journal - > j_wait_commit ) ;
spin_unlock ( & journal - > j_state_lock ) ;
2007-10-18 14:07:05 +04:00
wait_event ( journal - > j_wait_done_commit ,
journal - > j_task = = NULL ) ;
2005-04-17 02:20:36 +04:00
spin_lock ( & journal - > j_state_lock ) ;
}
spin_unlock ( & journal - > j_state_lock ) ;
}
/*
* journal_write_metadata_buffer : write a metadata buffer to the journal .
*
* Writes a metadata buffer to a given disk block . The actual IO is not
* performed but a new buffer_head is constructed which labels the data
* to be written with the correct destination disk block .
*
* Any magic - number escaping which needs to be done will cause a
* copy - out here . If the buffer happens to start with the
* JFS_MAGIC_NUMBER , then we can ' t write it to the log directly : the
* magic number is only written to the log for descripter blocks . In
* this case , we copy the data and replace the first word with 0 , and we
* return a result code which indicates that this buffer needs to be
* marked as an escaped buffer in the corresponding log descriptor
* block . The missing word can then be restored when the block is read
* during recovery .
*
* If the source buffer has already been modified by a new transaction
* since we took the last commit snapshot , we use the frozen copy of
* that data for IO . If we end up using the existing buffer_head ' s data
* for the write , then we * have * to lock the buffer to prevent anyone
* else from using and possibly modifying it while the IO is in
* progress .
*
* The function returns a pointer to the buffer_heads to be used for IO .
*
* We assume that the journal has already been locked in this function .
*
* Return value :
* < 0 : Error
* > = 0 : Finished OK
*
* On success :
* Bit 0 set = = escape performed on the data
* Bit 1 set = = buffer copy - out performed ( kfree the data after IO )
*/
int journal_write_metadata_buffer ( transaction_t * transaction ,
struct journal_head * jh_in ,
struct journal_head * * jh_out ,
2009-08-03 21:21:00 +04:00
unsigned int blocknr )
2005-04-17 02:20:36 +04:00
{
int need_copy_out = 0 ;
int done_copy_out = 0 ;
int do_escape = 0 ;
char * mapped_data ;
struct buffer_head * new_bh ;
struct journal_head * new_jh ;
struct page * new_page ;
unsigned int new_offset ;
struct buffer_head * bh_in = jh2bh ( jh_in ) ;
2009-07-15 23:42:05 +04:00
journal_t * journal = transaction - > t_journal ;
2005-04-17 02:20:36 +04:00
/*
* The buffer really shouldn ' t be locked : only the current committing
* transaction is allowed to write it , so nobody else is allowed
* to do any IO .
*
* akpm : except if we ' re journalling data , and write ( ) output is
* also part of a shared mapping , and another thread has
* decided to launch a writepage ( ) against this buffer .
*/
J_ASSERT_BH ( bh_in , buffer_jbddirty ( bh_in ) ) ;
new_bh = alloc_buffer_head ( GFP_NOFS | __GFP_NOFAIL ) ;
2009-07-15 23:42:05 +04:00
/* keep subsequent assertions sane */
new_bh - > b_state = 0 ;
init_buffer ( new_bh , NULL , NULL ) ;
atomic_set ( & new_bh - > b_count , 1 ) ;
new_jh = journal_add_journal_head ( new_bh ) ; /* This sleeps */
2005-04-17 02:20:36 +04:00
/*
* If a new transaction has already done a buffer copy - out , then
* we use that version of the data for the commit .
*/
jbd_lock_bh_state ( bh_in ) ;
repeat :
if ( jh_in - > b_frozen_data ) {
done_copy_out = 1 ;
new_page = virt_to_page ( jh_in - > b_frozen_data ) ;
new_offset = offset_in_page ( jh_in - > b_frozen_data ) ;
} else {
new_page = jh2bh ( jh_in ) - > b_page ;
new_offset = offset_in_page ( jh2bh ( jh_in ) - > b_data ) ;
}
2011-11-25 19:14:31 +04:00
mapped_data = kmap_atomic ( new_page ) ;
2005-04-17 02:20:36 +04:00
/*
* Check for escaping
*/
if ( * ( ( __be32 * ) ( mapped_data + new_offset ) ) = =
cpu_to_be32 ( JFS_MAGIC_NUMBER ) ) {
need_copy_out = 1 ;
do_escape = 1 ;
}
2011-11-25 19:14:31 +04:00
kunmap_atomic ( mapped_data ) ;
2005-04-17 02:20:36 +04:00
/*
* Do we need to do a data copy ?
*/
if ( need_copy_out & & ! done_copy_out ) {
char * tmp ;
jbd_unlock_bh_state ( bh_in ) ;
2007-10-17 02:38:25 +04:00
tmp = jbd_alloc ( bh_in - > b_size , GFP_NOFS ) ;
2005-04-17 02:20:36 +04:00
jbd_lock_bh_state ( bh_in ) ;
if ( jh_in - > b_frozen_data ) {
2007-10-17 02:38:25 +04:00
jbd_free ( tmp , bh_in - > b_size ) ;
2005-04-17 02:20:36 +04:00
goto repeat ;
}
jh_in - > b_frozen_data = tmp ;
2011-11-25 19:14:31 +04:00
mapped_data = kmap_atomic ( new_page ) ;
2005-04-17 02:20:36 +04:00
memcpy ( tmp , mapped_data + new_offset , jh2bh ( jh_in ) - > b_size ) ;
2011-11-25 19:14:31 +04:00
kunmap_atomic ( mapped_data ) ;
2005-04-17 02:20:36 +04:00
new_page = virt_to_page ( tmp ) ;
new_offset = offset_in_page ( tmp ) ;
done_copy_out = 1 ;
}
/*
* Did we need to do an escaping ? Now we ' ve done all the
* copying , we can finally do so .
*/
if ( do_escape ) {
2011-11-25 19:14:31 +04:00
mapped_data = kmap_atomic ( new_page ) ;
2005-04-17 02:20:36 +04:00
* ( ( unsigned int * ) ( mapped_data + new_offset ) ) = 0 ;
2011-11-25 19:14:31 +04:00
kunmap_atomic ( mapped_data ) ;
2005-04-17 02:20:36 +04:00
}
set_bh_page ( new_bh , new_page , new_offset ) ;
new_jh - > b_transaction = NULL ;
new_bh - > b_size = jh2bh ( jh_in ) - > b_size ;
new_bh - > b_bdev = transaction - > t_journal - > j_dev ;
new_bh - > b_blocknr = blocknr ;
set_buffer_mapped ( new_bh ) ;
set_buffer_dirty ( new_bh ) ;
* jh_out = new_jh ;
/*
* The to - be - written buffer needs to get moved to the io queue ,
* and the original buffer whose contents we are shadowing or
* copying is moved to the transaction ' s shadow queue .
*/
JBUFFER_TRACE ( jh_in , " file as BJ_Shadow " ) ;
2009-07-15 23:42:05 +04:00
spin_lock ( & journal - > j_list_lock ) ;
__journal_file_buffer ( jh_in , transaction , BJ_Shadow ) ;
spin_unlock ( & journal - > j_list_lock ) ;
jbd_unlock_bh_state ( bh_in ) ;
2005-04-17 02:20:36 +04:00
JBUFFER_TRACE ( new_jh , " file as BJ_IO " ) ;
journal_file_buffer ( new_jh , transaction , BJ_IO ) ;
return do_escape | ( done_copy_out < < 1 ) ;
}
/*
* Allocation code for the journal file . Manage the space left in the
* journal , so that we can begin checkpointing when appropriate .
*/
/*
* __log_space_left : Return the number of free blocks left in the journal .
*
* Called with the journal already locked .
*
* Called under j_state_lock
*/
int __log_space_left ( journal_t * journal )
{
int left = journal - > j_free ;
assert_spin_locked ( & journal - > j_state_lock ) ;
/*
* Be pessimistic here about the number of those free blocks which
* might be required for log descriptor control blocks .
*/
# define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */
left - = MIN_LOG_RESERVED_BLOCKS ;
if ( left < = 0 )
return 0 ;
left - = ( left > > 3 ) ;
return left ;
}
/*
2009-02-12 00:04:25 +03:00
* Called under j_state_lock . Returns true if a transaction commit was started .
2005-04-17 02:20:36 +04:00
*/
int __log_start_commit ( journal_t * journal , tid_t target )
{
/*
2011-04-30 21:17:11 +04:00
* The only transaction we can possibly wait upon is the
* currently running transaction ( if it exists ) . Otherwise ,
* the target tid must be an old one .
2005-04-17 02:20:36 +04:00
*/
2011-04-30 21:17:11 +04:00
if ( journal - > j_running_transaction & &
journal - > j_running_transaction - > t_tid = = target ) {
2005-04-17 02:20:36 +04:00
/*
2010-10-16 17:19:14 +04:00
* We want a new commit : OK , mark the request and wakeup the
2005-04-17 02:20:36 +04:00
* commit thread . We do _not_ do the commit ourselves .
*/
journal - > j_commit_request = target ;
jbd_debug ( 1 , " JBD: requesting commit %d/%d \n " ,
journal - > j_commit_request ,
journal - > j_commit_sequence ) ;
wake_up ( & journal - > j_wait_commit ) ;
return 1 ;
2011-04-30 21:17:11 +04:00
} else if ( ! tid_geq ( journal - > j_commit_request , target ) )
/* This should never happen, but if it does, preserve
the evidence before kjournald goes into a loop and
increments j_commit_sequence beyond all recognition . */
WARN_ONCE ( 1 , " jbd: bad log_start_commit: %u %u %u %u \n " ,
journal - > j_commit_request , journal - > j_commit_sequence ,
target , journal - > j_running_transaction ?
journal - > j_running_transaction - > t_tid : 0 ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
int log_start_commit ( journal_t * journal , tid_t tid )
{
int ret ;
spin_lock ( & journal - > j_state_lock ) ;
ret = __log_start_commit ( journal , tid ) ;
spin_unlock ( & journal - > j_state_lock ) ;
return ret ;
}
/*
* Force and wait upon a commit if the calling process is not within
* transaction . This is used for forcing out undo - protected data which contains
* bitmaps , when the fs is running out of space .
*
* We can only force the running transaction if we don ' t have an active handle ;
* otherwise , we will deadlock .
*
* Returns true if a transaction was started .
*/
int journal_force_commit_nested ( journal_t * journal )
{
transaction_t * transaction = NULL ;
tid_t tid ;
spin_lock ( & journal - > j_state_lock ) ;
if ( journal - > j_running_transaction & & ! current - > journal_info ) {
transaction = journal - > j_running_transaction ;
__log_start_commit ( journal , transaction - > t_tid ) ;
} else if ( journal - > j_committing_transaction )
transaction = journal - > j_committing_transaction ;
if ( ! transaction ) {
spin_unlock ( & journal - > j_state_lock ) ;
return 0 ; /* Nothing to retry */
}
tid = transaction - > t_tid ;
spin_unlock ( & journal - > j_state_lock ) ;
log_wait_commit ( journal , tid ) ;
return 1 ;
}
/*
* Start a commit of the current running transaction ( if any ) . Returns true
2009-02-12 00:04:25 +03:00
* if a transaction is going to be committed ( or is currently already
* committing ) , and fills its tid in at * ptid
2005-04-17 02:20:36 +04:00
*/
int journal_start_commit ( journal_t * journal , tid_t * ptid )
{
int ret = 0 ;
spin_lock ( & journal - > j_state_lock ) ;
if ( journal - > j_running_transaction ) {
tid_t tid = journal - > j_running_transaction - > t_tid ;
2009-02-12 00:04:25 +03:00
__log_start_commit ( journal , tid ) ;
/* There's a running transaction and we've just made sure
* it ' s commit has been scheduled . */
if ( ptid )
2005-04-17 02:20:36 +04:00
* ptid = tid ;
2009-02-12 00:04:25 +03:00
ret = 1 ;
} else if ( journal - > j_committing_transaction ) {
2005-04-17 02:20:36 +04:00
/*
2012-07-25 19:12:07 +04:00
* If commit has been started , then we have to wait for
* completion of that transaction .
2005-04-17 02:20:36 +04:00
*/
2009-02-12 00:04:25 +03:00
if ( ptid )
* ptid = journal - > j_committing_transaction - > t_tid ;
2005-04-17 02:20:36 +04:00
ret = 1 ;
}
spin_unlock ( & journal - > j_state_lock ) ;
return ret ;
}
/*
* Wait for a specified commit to complete .
* The caller may not hold the journal lock .
*/
int log_wait_commit ( journal_t * journal , tid_t tid )
{
int err = 0 ;
# ifdef CONFIG_JBD_DEBUG
spin_lock ( & journal - > j_state_lock ) ;
if ( ! tid_geq ( journal - > j_commit_request , tid ) ) {
printk ( KERN_EMERG
" %s: error: j_commit_request=%d, tid=%d \n " ,
2008-04-28 13:16:16 +04:00
__func__ , journal - > j_commit_request , tid ) ;
2005-04-17 02:20:36 +04:00
}
spin_unlock ( & journal - > j_state_lock ) ;
# endif
spin_lock ( & journal - > j_state_lock ) ;
2011-02-21 19:25:37 +03:00
if ( ! tid_geq ( journal - > j_commit_waited , tid ) )
journal - > j_commit_waited = tid ;
2005-04-17 02:20:36 +04:00
while ( tid_gt ( tid , journal - > j_commit_sequence ) ) {
jbd_debug ( 1 , " JBD: want %d, j_commit_sequence=%d \n " ,
tid , journal - > j_commit_sequence ) ;
wake_up ( & journal - > j_wait_commit ) ;
spin_unlock ( & journal - > j_state_lock ) ;
wait_event ( journal - > j_wait_done_commit ,
! tid_gt ( tid , journal - > j_commit_sequence ) ) ;
spin_lock ( & journal - > j_state_lock ) ;
}
spin_unlock ( & journal - > j_state_lock ) ;
if ( unlikely ( is_journal_aborted ( journal ) ) ) {
printk ( KERN_EMERG " journal commit I/O error \n " ) ;
err = - EIO ;
}
return err ;
}
2010-04-16 00:16:24 +04:00
/*
* Return 1 if a given transaction has not yet sent barrier request
* connected with a transaction commit . If 0 is returned , transaction
* may or may not have sent the barrier . Used to avoid sending barrier
* twice in common cases .
*/
int journal_trans_will_send_data_barrier ( journal_t * journal , tid_t tid )
{
int ret = 0 ;
transaction_t * commit_trans ;
if ( ! ( journal - > j_flags & JFS_BARRIER ) )
return 0 ;
spin_lock ( & journal - > j_state_lock ) ;
/* Transaction already committed? */
if ( tid_geq ( journal - > j_commit_sequence , tid ) )
goto out ;
/*
* Transaction is being committed and we already proceeded to
* writing commit record ?
*/
commit_trans = journal - > j_committing_transaction ;
if ( commit_trans & & commit_trans - > t_tid = = tid & &
commit_trans - > t_state > = T_COMMIT_RECORD )
goto out ;
ret = 1 ;
out :
spin_unlock ( & journal - > j_state_lock ) ;
return ret ;
}
2010-04-16 00:24:26 +04:00
EXPORT_SYMBOL ( journal_trans_will_send_data_barrier ) ;
2010-04-16 00:16:24 +04:00
2005-04-17 02:20:36 +04:00
/*
* Log buffer allocation routines :
*/
2009-08-03 21:21:00 +04:00
int journal_next_log_block ( journal_t * journal , unsigned int * retp )
2005-04-17 02:20:36 +04:00
{
2009-08-03 21:21:00 +04:00
unsigned int blocknr ;
2005-04-17 02:20:36 +04:00
spin_lock ( & journal - > j_state_lock ) ;
J_ASSERT ( journal - > j_free > 1 ) ;
blocknr = journal - > j_head ;
journal - > j_head + + ;
journal - > j_free - - ;
if ( journal - > j_head = = journal - > j_last )
journal - > j_head = journal - > j_first ;
spin_unlock ( & journal - > j_state_lock ) ;
return journal_bmap ( journal , blocknr , retp ) ;
}
/*
* Conversion of logical to physical block numbers for the journal
*
* On external journals the journal blocks are identity - mapped , so
* this is a no - op . If needed , we can use j_blk_offset - everything is
* ready .
*/
2009-08-03 21:21:00 +04:00
int journal_bmap ( journal_t * journal , unsigned int blocknr ,
unsigned int * retp )
2005-04-17 02:20:36 +04:00
{
int err = 0 ;
2009-08-03 21:21:00 +04:00
unsigned int ret ;
2005-04-17 02:20:36 +04:00
if ( journal - > j_inode ) {
ret = bmap ( journal - > j_inode , blocknr ) ;
if ( ret )
* retp = ret ;
else {
char b [ BDEVNAME_SIZE ] ;
printk ( KERN_ALERT " %s: journal block not found "
2009-08-03 21:21:00 +04:00
" at offset %u on %s \n " ,
2008-04-28 13:16:16 +04:00
__func__ ,
2005-04-17 02:20:36 +04:00
blocknr ,
bdevname ( journal - > j_dev , b ) ) ;
err = - EIO ;
__journal_abort_soft ( journal , err ) ;
}
} else {
* retp = blocknr ; /* +journal->j_blk_offset */
}
return err ;
}
/*
* We play buffer_head aliasing tricks to write data / metadata blocks to
* the journal without copying their contents , but for journal
* descriptor blocks we do need to generate bona fide buffers .
*
* After the caller of journal_get_descriptor_buffer ( ) has finished modifying
* the buffer ' s contents they really should run flush_dcache_page ( bh - > b_page ) .
* But we don ' t bother doing that , so there will be coherency problems with
* mmaps of blockdevs which hold live JBD - controlled filesystems .
*/
struct journal_head * journal_get_descriptor_buffer ( journal_t * journal )
{
struct buffer_head * bh ;
2009-08-03 21:21:00 +04:00
unsigned int blocknr ;
2005-04-17 02:20:36 +04:00
int err ;
err = journal_next_log_block ( journal , & blocknr ) ;
if ( err )
return NULL ;
bh = __getblk ( journal - > j_dev , blocknr , journal - > j_blocksize ) ;
2009-04-03 03:57:13 +04:00
if ( ! bh )
return NULL ;
2005-04-17 02:20:36 +04:00
lock_buffer ( bh ) ;
memset ( bh - > b_data , 0 , journal - > j_blocksize ) ;
set_buffer_uptodate ( bh ) ;
unlock_buffer ( bh ) ;
BUFFER_TRACE ( bh , " return this buffer " ) ;
return journal_add_journal_head ( bh ) ;
}
/*
* Management for journal control blocks : functions to create and
* destroy journal_t structures , and to initialise and read existing
* journal blocks from disk . */
/* First: create and setup a journal_t object in memory. We initialise
* very few fields yet : that has to wait until we have created the
* journal structures from from scratch , or loaded them from disk . */
static journal_t * journal_init_common ( void )
{
journal_t * journal ;
int err ;
2007-10-19 10:39:20 +04:00
journal = kzalloc ( sizeof ( * journal ) , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! journal )
goto fail ;
init_waitqueue_head ( & journal - > j_wait_transaction_locked ) ;
init_waitqueue_head ( & journal - > j_wait_logspace ) ;
init_waitqueue_head ( & journal - > j_wait_done_commit ) ;
init_waitqueue_head ( & journal - > j_wait_checkpoint ) ;
init_waitqueue_head ( & journal - > j_wait_commit ) ;
init_waitqueue_head ( & journal - > j_wait_updates ) ;
2006-03-23 14:00:35 +03:00
mutex_init ( & journal - > j_checkpoint_mutex ) ;
2005-04-17 02:20:36 +04:00
spin_lock_init ( & journal - > j_revoke_lock ) ;
spin_lock_init ( & journal - > j_list_lock ) ;
spin_lock_init ( & journal - > j_state_lock ) ;
journal - > j_commit_interval = ( HZ * JBD_DEFAULT_MAX_COMMIT_AGE ) ;
/* The journal is marked for error until we succeed with recovery! */
journal - > j_flags = JFS_ABORT ;
/* Set up a default-sized revoke table for the new mount. */
err = journal_init_revoke ( journal , JOURNAL_REVOKE_DEFAULT_HASH ) ;
if ( err ) {
kfree ( journal ) ;
goto fail ;
}
return journal ;
fail :
return NULL ;
}
/* journal_init_dev and journal_init_inode:
*
* Create a journal structure assigned some fixed set of disk blocks to
* the journal . We don ' t actually touch those disk blocks yet , but we
* need to set up all of the mapping information to tell the journaling
* system where the journal blocks are .
*
*/
/**
2008-03-20 03:00:44 +03:00
* journal_t * journal_init_dev ( ) - creates and initialises a journal structure
2005-04-17 02:20:36 +04:00
* @ bdev : Block device on which to create the journal
* @ fs_dev : Device which hold journalled filesystem for this journal .
* @ start : Block nr Start of journal .
2006-09-27 12:49:31 +04:00
* @ len : Length of the journal in blocks .
2005-04-17 02:20:36 +04:00
* @ blocksize : blocksize of journalling device
2008-03-20 03:00:44 +03:00
*
* Returns : a newly created journal_t *
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
* journal_init_dev creates a journal which maps a fixed contiguous
* range of blocks on an arbitrary block device .
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
*/
journal_t * journal_init_dev ( struct block_device * bdev ,
struct block_device * fs_dev ,
int start , int len , int blocksize )
{
journal_t * journal = journal_init_common ( ) ;
struct buffer_head * bh ;
int n ;
if ( ! journal )
return NULL ;
/* journal descriptor can store up to n blocks -bzzz */
2006-09-29 12:58:40 +04:00
journal - > j_blocksize = blocksize ;
2005-04-17 02:20:36 +04:00
n = journal - > j_blocksize / sizeof ( journal_block_tag_t ) ;
journal - > j_wbufsize = n ;
journal - > j_wbuf = kmalloc ( n * sizeof ( struct buffer_head * ) , GFP_KERNEL ) ;
if ( ! journal - > j_wbuf ) {
2011-03-31 05:57:33 +04:00
printk ( KERN_ERR " %s: Can't allocate bhs for commit thread \n " ,
2008-04-28 13:16:16 +04:00
__func__ ) ;
2009-04-03 03:57:13 +04:00
goto out_err ;
2005-04-17 02:20:36 +04:00
}
2006-09-29 12:58:40 +04:00
journal - > j_dev = bdev ;
journal - > j_fs_dev = fs_dev ;
journal - > j_blk_offset = start ;
journal - > j_maxlen = len ;
bh = __getblk ( journal - > j_dev , start , journal - > j_blocksize ) ;
2009-04-03 03:57:13 +04:00
if ( ! bh ) {
printk ( KERN_ERR
" %s: Cannot get buffer for journal superblock \n " ,
__func__ ) ;
goto out_err ;
}
2006-09-29 12:58:40 +04:00
journal - > j_sb_buffer = bh ;
journal - > j_superblock = ( journal_superblock_t * ) bh - > b_data ;
2009-04-03 03:57:13 +04:00
2005-04-17 02:20:36 +04:00
return journal ;
2009-04-03 03:57:13 +04:00
out_err :
2009-11-10 12:13:22 +03:00
kfree ( journal - > j_wbuf ) ;
2009-04-03 03:57:13 +04:00
kfree ( journal ) ;
return NULL ;
2005-04-17 02:20:36 +04:00
}
2006-09-27 12:49:27 +04:00
/**
2005-04-17 02:20:36 +04:00
* journal_t * journal_init_inode ( ) - creates a journal which maps to a inode .
* @ inode : An inode to create the journal in
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
* journal_init_inode creates a journal which maps an on - disk inode as
* the journal . The inode must exist already , must support bmap ( ) and
* must have all data blocks preallocated .
*/
journal_t * journal_init_inode ( struct inode * inode )
{
struct buffer_head * bh ;
journal_t * journal = journal_init_common ( ) ;
int err ;
int n ;
2009-08-03 21:21:00 +04:00
unsigned int blocknr ;
2005-04-17 02:20:36 +04:00
if ( ! journal )
return NULL ;
journal - > j_dev = journal - > j_fs_dev = inode - > i_sb - > s_bdev ;
journal - > j_inode = inode ;
jbd_debug ( 1 ,
" journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld \n " ,
2006-09-27 12:49:27 +04:00
journal , inode - > i_sb - > s_id , inode - > i_ino ,
2005-04-17 02:20:36 +04:00
( long long ) inode - > i_size ,
inode - > i_sb - > s_blocksize_bits , inode - > i_sb - > s_blocksize ) ;
journal - > j_maxlen = inode - > i_size > > inode - > i_sb - > s_blocksize_bits ;
journal - > j_blocksize = inode - > i_sb - > s_blocksize ;
/* journal descriptor can store up to n blocks -bzzz */
n = journal - > j_blocksize / sizeof ( journal_block_tag_t ) ;
journal - > j_wbufsize = n ;
journal - > j_wbuf = kmalloc ( n * sizeof ( struct buffer_head * ) , GFP_KERNEL ) ;
if ( ! journal - > j_wbuf ) {
2011-03-31 05:57:33 +04:00
printk ( KERN_ERR " %s: Can't allocate bhs for commit thread \n " ,
2008-04-28 13:16:16 +04:00
__func__ ) ;
2009-04-03 03:57:13 +04:00
goto out_err ;
2005-04-17 02:20:36 +04:00
}
err = journal_bmap ( journal , 0 , & blocknr ) ;
/* If that failed, give up */
if ( err ) {
2011-02-27 07:34:05 +03:00
printk ( KERN_ERR " %s: Cannot locate journal superblock \n " ,
2008-04-28 13:16:16 +04:00
__func__ ) ;
2009-04-03 03:57:13 +04:00
goto out_err ;
2005-04-17 02:20:36 +04:00
}
bh = __getblk ( journal - > j_dev , blocknr , journal - > j_blocksize ) ;
2009-04-03 03:57:13 +04:00
if ( ! bh ) {
printk ( KERN_ERR
" %s: Cannot get buffer for journal superblock \n " ,
__func__ ) ;
goto out_err ;
}
2005-04-17 02:20:36 +04:00
journal - > j_sb_buffer = bh ;
journal - > j_superblock = ( journal_superblock_t * ) bh - > b_data ;
return journal ;
2009-04-03 03:57:13 +04:00
out_err :
2009-11-10 12:13:22 +03:00
kfree ( journal - > j_wbuf ) ;
2009-04-03 03:57:13 +04:00
kfree ( journal ) ;
return NULL ;
2005-04-17 02:20:36 +04:00
}
2006-09-27 12:49:27 +04:00
/*
2005-04-17 02:20:36 +04:00
* If the journal init or create aborts , we need to mark the journal
* superblock as being NULL to prevent the journal destroy from writing
2006-09-27 12:49:27 +04:00
* back a bogus superblock .
2005-04-17 02:20:36 +04:00
*/
static void journal_fail_superblock ( journal_t * journal )
{
struct buffer_head * bh = journal - > j_sb_buffer ;
brelse ( bh ) ;
journal - > j_sb_buffer = NULL ;
}
/*
* Given a journal_t structure , initialise the various fields for
* startup of a new journaling session . We use this both when creating
* a journal , and after recovering an old journal to reset it for
* subsequent use .
*/
static int journal_reset ( journal_t * journal )
{
journal_superblock_t * sb = journal - > j_superblock ;
2009-08-03 21:21:00 +04:00
unsigned int first , last ;
2005-04-17 02:20:36 +04:00
first = be32_to_cpu ( sb - > s_first ) ;
last = be32_to_cpu ( sb - > s_maxlen ) ;
2009-07-15 22:36:08 +04:00
if ( first + JFS_MIN_JOURNAL_BLOCKS > last + 1 ) {
2009-08-03 21:21:00 +04:00
printk ( KERN_ERR " JBD: Journal too short (blocks %u-%u). \n " ,
2009-07-15 22:36:08 +04:00
first , last ) ;
journal_fail_superblock ( journal ) ;
return - EINVAL ;
}
2005-04-17 02:20:36 +04:00
journal - > j_first = first ;
journal - > j_last = last ;
journal - > j_head = first ;
journal - > j_tail = first ;
journal - > j_free = last - first ;
journal - > j_tail_sequence = journal - > j_transaction_sequence ;
journal - > j_commit_sequence = journal - > j_transaction_sequence - 1 ;
journal - > j_commit_request = journal - > j_commit_sequence ;
journal - > j_max_transaction_buffers = journal - > j_maxlen / 4 ;
2012-04-07 14:33:03 +04:00
/*
* As a special case , if the on - disk copy is already marked as needing
* no recovery ( s_start = = 0 ) , then we can safely defer the superblock
* update until the next commit by setting JFS_FLUSHED . This avoids
* attempting a write to a potential - readonly device .
*/
if ( sb - > s_start = = 0 ) {
jbd_debug ( 1 , " JBD: Skipping superblock update on recovered sb "
" (start %u, seq %d, errno %d) \n " ,
journal - > j_tail , journal - > j_tail_sequence ,
journal - > j_errno ) ;
journal - > j_flags | = JFS_FLUSHED ;
} else {
2012-04-07 14:50:13 +04:00
/* Lock here to make assertions happy... */
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2012-04-07 13:05:19 +04:00
/*
* Update log tail information . We use WRITE_FUA since new
* transaction will start reusing journal space and so we
* must make sure information about current log tail is on
* disk before that .
*/
journal_update_sb_log_tail ( journal ,
journal - > j_tail_sequence ,
journal - > j_tail ,
WRITE_FUA ) ;
2012-04-07 14:50:13 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2012-04-07 14:33:03 +04:00
}
2007-05-08 11:30:42 +04:00
return journal_start_thread ( journal ) ;
2005-04-17 02:20:36 +04:00
}
2006-09-27 12:49:27 +04:00
/**
2005-04-17 02:20:36 +04:00
* int journal_create ( ) - Initialise the new journal file
* @ journal : Journal to create . This structure must have been initialised
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
* Given a journal_t structure which tells us which disk blocks we can
* use , create a new journal superblock and initialise all of the
2006-09-27 12:49:27 +04:00
* journal fields from scratch .
2005-04-17 02:20:36 +04:00
* */
int journal_create ( journal_t * journal )
{
2009-08-03 21:21:00 +04:00
unsigned int blocknr ;
2005-04-17 02:20:36 +04:00
struct buffer_head * bh ;
journal_superblock_t * sb ;
int i , err ;
if ( journal - > j_maxlen < JFS_MIN_JOURNAL_BLOCKS ) {
printk ( KERN_ERR " Journal length (%d blocks) too short. \n " ,
journal - > j_maxlen ) ;
journal_fail_superblock ( journal ) ;
return - EINVAL ;
}
if ( journal - > j_inode = = NULL ) {
/*
* We don ' t know what block to start at !
*/
printk ( KERN_EMERG
" %s: creation of journal on external device! \n " ,
2008-04-28 13:16:16 +04:00
__func__ ) ;
2005-04-17 02:20:36 +04:00
BUG ( ) ;
}
/* Zero out the entire journal on disk. We cannot afford to
have any blocks on disk beginning with JFS_MAGIC_NUMBER . */
jbd_debug ( 1 , " JBD: Zeroing out journal blocks... \n " ) ;
for ( i = 0 ; i < journal - > j_maxlen ; i + + ) {
err = journal_bmap ( journal , i , & blocknr ) ;
if ( err )
return err ;
bh = __getblk ( journal - > j_dev , blocknr , journal - > j_blocksize ) ;
2010-10-13 12:17:18 +04:00
if ( unlikely ( ! bh ) )
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
lock_buffer ( bh ) ;
memset ( bh - > b_data , 0 , journal - > j_blocksize ) ;
BUFFER_TRACE ( bh , " marking dirty " ) ;
mark_buffer_dirty ( bh ) ;
BUFFER_TRACE ( bh , " marking uptodate " ) ;
set_buffer_uptodate ( bh ) ;
unlock_buffer ( bh ) ;
__brelse ( bh ) ;
}
sync_blockdev ( journal - > j_dev ) ;
jbd_debug ( 1 , " JBD: journal cleared. \n " ) ;
/* OK, fill in the initial static fields in the new superblock */
sb = journal - > j_superblock ;
sb - > s_header . h_magic = cpu_to_be32 ( JFS_MAGIC_NUMBER ) ;
sb - > s_header . h_blocktype = cpu_to_be32 ( JFS_SUPERBLOCK_V2 ) ;
sb - > s_blocksize = cpu_to_be32 ( journal - > j_blocksize ) ;
sb - > s_maxlen = cpu_to_be32 ( journal - > j_maxlen ) ;
sb - > s_first = cpu_to_be32 ( 1 ) ;
journal - > j_transaction_sequence = 1 ;
journal - > j_flags & = ~ JFS_ABORT ;
journal - > j_format_version = 2 ;
return journal_reset ( journal ) ;
}
2012-04-07 13:05:19 +04:00
static void journal_write_superblock ( journal_t * journal , int write_op )
2005-04-17 02:20:36 +04:00
{
struct buffer_head * bh = journal - > j_sb_buffer ;
2012-04-07 13:05:19 +04:00
int ret ;
2005-04-17 02:20:36 +04:00
2012-04-07 13:05:19 +04:00
trace_journal_write_superblock ( journal , write_op ) ;
if ( ! ( journal - > j_flags & JFS_BARRIER ) )
write_op & = ~ ( REQ_FUA | REQ_FLUSH ) ;
lock_buffer ( bh ) ;
2010-10-04 23:35:05 +04:00
if ( buffer_write_io_error ( bh ) ) {
char b [ BDEVNAME_SIZE ] ;
/*
* Oh , dear . A previous attempt to write the journal
* superblock failed . This could happen because the
* USB device was yanked out . Or it could happen to
* be a transient write error and maybe the block will
* be remapped . Nothing we can do but to retry the
* write and hope for the best .
*/
printk ( KERN_ERR " JBD: previous I/O error detected "
" for journal superblock update for %s. \n " ,
journal_dev_name ( journal , b ) ) ;
clear_buffer_write_io_error ( bh ) ;
set_buffer_uptodate ( bh ) ;
}
2012-04-07 13:05:19 +04:00
get_bh ( bh ) ;
bh - > b_end_io = end_buffer_write_sync ;
ret = submit_bh ( write_op , bh ) ;
wait_on_buffer ( bh ) ;
2012-04-07 14:33:03 +04:00
if ( buffer_write_io_error ( bh ) ) {
clear_buffer_write_io_error ( bh ) ;
set_buffer_uptodate ( bh ) ;
2012-04-07 13:05:19 +04:00
ret = - EIO ;
}
if ( ret ) {
char b [ BDEVNAME_SIZE ] ;
printk ( KERN_ERR " JBD: Error %d detected "
" when updating journal superblock for %s. \n " ,
ret , journal_dev_name ( journal , b ) ) ;
2012-04-07 14:33:03 +04:00
}
}
/**
* journal_update_sb_log_tail ( ) - Update log tail in journal sb on disk .
* @ journal : The journal to update .
2012-04-07 13:05:19 +04:00
* @ tail_tid : TID of the new transaction at the tail of the log
* @ tail_block : The first block of the transaction at the tail of the log
* @ write_op : With which operation should we write the journal sb
2012-04-07 14:33:03 +04:00
*
* Update a journal ' s superblock information about log tail and write it to
* disk , waiting for the IO to complete .
*/
2012-04-07 13:05:19 +04:00
void journal_update_sb_log_tail ( journal_t * journal , tid_t tail_tid ,
unsigned int tail_block , int write_op )
2012-04-07 14:33:03 +04:00
{
journal_superblock_t * sb = journal - > j_superblock ;
2012-04-07 14:50:13 +04:00
BUG_ON ( ! mutex_is_locked ( & journal - > j_checkpoint_mutex ) ) ;
2012-04-07 13:05:19 +04:00
jbd_debug ( 1 , " JBD: updating superblock (start %u, seq %u) \n " ,
tail_block , tail_tid ) ;
2005-04-17 02:20:36 +04:00
2012-04-07 13:05:19 +04:00
sb - > s_sequence = cpu_to_be32 ( tail_tid ) ;
sb - > s_start = cpu_to_be32 ( tail_block ) ;
2005-04-17 02:20:36 +04:00
2012-04-07 13:05:19 +04:00
journal_write_superblock ( journal , write_op ) ;
2005-04-17 02:20:36 +04:00
2012-04-07 14:33:03 +04:00
/* Log is no longer empty */
spin_lock ( & journal - > j_state_lock ) ;
WARN_ON ( ! sb - > s_sequence ) ;
journal - > j_flags & = ~ JFS_FLUSHED ;
spin_unlock ( & journal - > j_state_lock ) ;
}
/**
* mark_journal_empty ( ) - Mark on disk journal as empty .
* @ journal : The journal to update .
*
* Update a journal ' s dynamic superblock fields to show that journal is empty .
* Write updated superblock to disk waiting for IO to complete .
*/
static void mark_journal_empty ( journal_t * journal )
{
journal_superblock_t * sb = journal - > j_superblock ;
2005-04-17 02:20:36 +04:00
2012-04-07 14:50:13 +04:00
BUG_ON ( ! mutex_is_locked ( & journal - > j_checkpoint_mutex ) ) ;
2005-04-17 02:20:36 +04:00
spin_lock ( & journal - > j_state_lock ) ;
2012-08-15 15:50:27 +04:00
/* Is it already empty? */
if ( sb - > s_start = = 0 ) {
spin_unlock ( & journal - > j_state_lock ) ;
return ;
}
2012-04-07 14:33:03 +04:00
jbd_debug ( 1 , " JBD: Marking journal as empty (seq %d) \n " ,
journal - > j_tail_sequence ) ;
sb - > s_sequence = cpu_to_be32 ( journal - > j_tail_sequence ) ;
sb - > s_start = cpu_to_be32 ( 0 ) ;
spin_unlock ( & journal - > j_state_lock ) ;
2012-04-07 13:05:19 +04:00
journal_write_superblock ( journal , WRITE_FUA ) ;
2012-04-07 14:33:03 +04:00
spin_lock ( & journal - > j_state_lock ) ;
/* Log is empty */
journal - > j_flags | = JFS_FLUSHED ;
2005-04-17 02:20:36 +04:00
spin_unlock ( & journal - > j_state_lock ) ;
}
2012-04-07 14:33:03 +04:00
/**
* journal_update_sb_errno ( ) - Update error in the journal .
* @ journal : The journal to update .
*
* Update a journal ' s errno . Write updated superblock to disk waiting for IO
* to complete .
*/
static void journal_update_sb_errno ( journal_t * journal )
{
journal_superblock_t * sb = journal - > j_superblock ;
spin_lock ( & journal - > j_state_lock ) ;
jbd_debug ( 1 , " JBD: updating superblock error (errno %d) \n " ,
journal - > j_errno ) ;
sb - > s_errno = cpu_to_be32 ( journal - > j_errno ) ;
spin_unlock ( & journal - > j_state_lock ) ;
2012-04-07 13:05:19 +04:00
journal_write_superblock ( journal , WRITE_SYNC ) ;
2012-04-07 14:33:03 +04:00
}
2005-04-17 02:20:36 +04:00
/*
* Read the superblock for a given journal , performing initial
* validation of the format .
*/
static int journal_get_superblock ( journal_t * journal )
{
struct buffer_head * bh ;
journal_superblock_t * sb ;
int err = - EIO ;
bh = journal - > j_sb_buffer ;
J_ASSERT ( bh ! = NULL ) ;
if ( ! buffer_uptodate ( bh ) ) {
ll_rw_block ( READ , 1 , & bh ) ;
wait_on_buffer ( bh ) ;
if ( ! buffer_uptodate ( bh ) ) {
printk ( KERN_ERR
" JBD: IO error reading journal superblock \n " ) ;
goto out ;
}
}
sb = journal - > j_superblock ;
err = - EINVAL ;
if ( sb - > s_header . h_magic ! = cpu_to_be32 ( JFS_MAGIC_NUMBER ) | |
sb - > s_blocksize ! = cpu_to_be32 ( journal - > j_blocksize ) ) {
printk ( KERN_WARNING " JBD: no valid journal superblock found \n " ) ;
goto out ;
}
switch ( be32_to_cpu ( sb - > s_header . h_blocktype ) ) {
case JFS_SUPERBLOCK_V1 :
journal - > j_format_version = 1 ;
break ;
case JFS_SUPERBLOCK_V2 :
journal - > j_format_version = 2 ;
break ;
default :
printk ( KERN_WARNING " JBD: unrecognised superblock format ID \n " ) ;
goto out ;
}
if ( be32_to_cpu ( sb - > s_maxlen ) < journal - > j_maxlen )
journal - > j_maxlen = be32_to_cpu ( sb - > s_maxlen ) ;
else if ( be32_to_cpu ( sb - > s_maxlen ) > journal - > j_maxlen ) {
printk ( KERN_WARNING " JBD: journal file too short \n " ) ;
goto out ;
}
2011-11-02 03:04:59 +04:00
if ( be32_to_cpu ( sb - > s_first ) = = 0 | |
be32_to_cpu ( sb - > s_first ) > = journal - > j_maxlen ) {
printk ( KERN_WARNING
" JBD: Invalid start block of journal: %u \n " ,
be32_to_cpu ( sb - > s_first ) ) ;
goto out ;
}
2005-04-17 02:20:36 +04:00
return 0 ;
out :
journal_fail_superblock ( journal ) ;
return err ;
}
/*
* Load the on - disk journal superblock and read the key fields into the
* journal_t .
*/
static int load_superblock ( journal_t * journal )
{
int err ;
journal_superblock_t * sb ;
err = journal_get_superblock ( journal ) ;
if ( err )
return err ;
sb = journal - > j_superblock ;
journal - > j_tail_sequence = be32_to_cpu ( sb - > s_sequence ) ;
journal - > j_tail = be32_to_cpu ( sb - > s_start ) ;
journal - > j_first = be32_to_cpu ( sb - > s_first ) ;
journal - > j_last = be32_to_cpu ( sb - > s_maxlen ) ;
journal - > j_errno = be32_to_cpu ( sb - > s_errno ) ;
return 0 ;
}
/**
* int journal_load ( ) - Read journal from disk .
* @ journal : Journal to act on .
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
* Given a journal_t structure which tells us which disk blocks contain
* a journal , read the journal from disk to initialise the in - memory
* structures .
*/
int journal_load ( journal_t * journal )
{
int err ;
2006-08-27 12:23:52 +04:00
journal_superblock_t * sb ;
2005-04-17 02:20:36 +04:00
err = load_superblock ( journal ) ;
if ( err )
return err ;
2006-08-27 12:23:52 +04:00
sb = journal - > j_superblock ;
2005-04-17 02:20:36 +04:00
/* If this is a V2 superblock, then we have to check the
* features flags on it . */
if ( journal - > j_format_version > = 2 ) {
if ( ( sb - > s_feature_ro_compat &
~ cpu_to_be32 ( JFS_KNOWN_ROCOMPAT_FEATURES ) ) | |
( sb - > s_feature_incompat &
~ cpu_to_be32 ( JFS_KNOWN_INCOMPAT_FEATURES ) ) ) {
printk ( KERN_WARNING
" JBD: Unrecognised features on journal \n " ) ;
return - EINVAL ;
}
}
/* Let the recovery code check whether it needs to recover any
* data from the journal . */
if ( journal_recover ( journal ) )
goto recovery_error ;
/* OK, we've finished with the dynamic journal bits:
* reinitialise the dynamic contents of the superblock in memory
* and reset them on disk . */
if ( journal_reset ( journal ) )
goto recovery_error ;
journal - > j_flags & = ~ JFS_ABORT ;
journal - > j_flags | = JFS_LOADED ;
return 0 ;
recovery_error :
printk ( KERN_WARNING " JBD: recovery failed \n " ) ;
return - EIO ;
}
/**
* void journal_destroy ( ) - Release a journal_t structure .
* @ journal : Journal to act on .
*
* Release a journal_t structure once it is no longer in use by the
* journaled object .
2008-10-23 01:15:00 +04:00
* Return < 0 if we couldn ' t clean up the journal .
2005-04-17 02:20:36 +04:00
*/
2008-10-23 01:15:00 +04:00
int journal_destroy ( journal_t * journal )
2005-04-17 02:20:36 +04:00
{
2008-10-23 01:15:00 +04:00
int err = 0 ;
2010-04-16 00:16:24 +04:00
2005-04-17 02:20:36 +04:00
/* Wait for the commit thread to wake up and die. */
journal_kill_thread ( journal ) ;
/* Force a final log commit */
if ( journal - > j_running_transaction )
journal_commit_transaction ( journal ) ;
/* Force any old transactions to disk */
2012-04-07 14:50:13 +04:00
/* We cannot race with anybody but must keep assertions happy */
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2005-04-17 02:20:36 +04:00
/* Totally anal locking here... */
spin_lock ( & journal - > j_list_lock ) ;
while ( journal - > j_checkpoint_transactions ! = NULL ) {
spin_unlock ( & journal - > j_list_lock ) ;
log_do_checkpoint ( journal ) ;
spin_lock ( & journal - > j_list_lock ) ;
}
J_ASSERT ( journal - > j_running_transaction = = NULL ) ;
J_ASSERT ( journal - > j_committing_transaction = = NULL ) ;
J_ASSERT ( journal - > j_checkpoint_transactions = = NULL ) ;
spin_unlock ( & journal - > j_list_lock ) ;
if ( journal - > j_sb_buffer ) {
2008-10-23 01:15:00 +04:00
if ( ! is_journal_aborted ( journal ) ) {
journal - > j_tail_sequence =
+ + journal - > j_transaction_sequence ;
2012-04-07 14:33:03 +04:00
mark_journal_empty ( journal ) ;
} else
2008-10-23 01:15:00 +04:00
err = - EIO ;
2005-04-17 02:20:36 +04:00
brelse ( journal - > j_sb_buffer ) ;
}
2012-04-07 14:50:13 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2005-04-17 02:20:36 +04:00
if ( journal - > j_inode )
iput ( journal - > j_inode ) ;
if ( journal - > j_revoke )
journal_destroy_revoke ( journal ) ;
kfree ( journal - > j_wbuf ) ;
kfree ( journal ) ;
2008-10-23 01:15:00 +04:00
return err ;
2005-04-17 02:20:36 +04:00
}
/**
* int journal_check_used_features ( ) - Check if features specified are used .
* @ journal : Journal to check .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
* Check whether the journal uses all of a given set of
2006-09-27 12:49:27 +04:00
* features . Return true ( non - zero ) if it does .
2005-04-17 02:20:36 +04:00
* */
int journal_check_used_features ( journal_t * journal , unsigned long compat ,
unsigned long ro , unsigned long incompat )
{
journal_superblock_t * sb ;
if ( ! compat & & ! ro & & ! incompat )
return 1 ;
if ( journal - > j_format_version = = 1 )
return 0 ;
sb = journal - > j_superblock ;
if ( ( ( be32_to_cpu ( sb - > s_feature_compat ) & compat ) = = compat ) & &
( ( be32_to_cpu ( sb - > s_feature_ro_compat ) & ro ) = = ro ) & &
( ( be32_to_cpu ( sb - > s_feature_incompat ) & incompat ) = = incompat ) )
return 1 ;
return 0 ;
}
/**
* int journal_check_available_features ( ) - Check feature set in journalling layer
* @ journal : Journal to check .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
* Check whether the journaling code supports the use of
* all of a given set of features on this journal . Return true
* ( non - zero ) if it can . */
int journal_check_available_features ( journal_t * journal , unsigned long compat ,
unsigned long ro , unsigned long incompat )
{
if ( ! compat & & ! ro & & ! incompat )
return 1 ;
/* We can support any known requested features iff the
* superblock is in version 2. Otherwise we fail to support any
* extended sb features . */
if ( journal - > j_format_version ! = 2 )
return 0 ;
if ( ( compat & JFS_KNOWN_COMPAT_FEATURES ) = = compat & &
( ro & JFS_KNOWN_ROCOMPAT_FEATURES ) = = ro & &
( incompat & JFS_KNOWN_INCOMPAT_FEATURES ) = = incompat )
return 1 ;
return 0 ;
}
/**
* int journal_set_features ( ) - Mark a given journal feature in the superblock
* @ journal : Journal to act on .
* @ compat : bitmask of compatible features
* @ ro : bitmask of features that force read - only mount
* @ incompat : bitmask of incompatible features
*
* Mark a given journal feature as present on the
2006-09-27 12:49:27 +04:00
* superblock . Returns true if the requested features could be set .
2005-04-17 02:20:36 +04:00
*
*/
int journal_set_features ( journal_t * journal , unsigned long compat ,
unsigned long ro , unsigned long incompat )
{
journal_superblock_t * sb ;
if ( journal_check_used_features ( journal , compat , ro , incompat ) )
return 1 ;
if ( ! journal_check_available_features ( journal , compat , ro , incompat ) )
return 0 ;
jbd_debug ( 1 , " Setting new features 0x%lx/0x%lx/0x%lx \n " ,
compat , ro , incompat ) ;
sb = journal - > j_superblock ;
sb - > s_feature_compat | = cpu_to_be32 ( compat ) ;
sb - > s_feature_ro_compat | = cpu_to_be32 ( ro ) ;
sb - > s_feature_incompat | = cpu_to_be32 ( incompat ) ;
return 1 ;
}
/**
* int journal_update_format ( ) - Update on - disk journal structure .
* @ journal : Journal to act on .
*
* Given an initialised but unloaded journal struct , poke about in the
* on - disk structure to update it to the most recent supported version .
*/
int journal_update_format ( journal_t * journal )
{
journal_superblock_t * sb ;
int err ;
err = journal_get_superblock ( journal ) ;
if ( err )
return err ;
sb = journal - > j_superblock ;
switch ( be32_to_cpu ( sb - > s_header . h_blocktype ) ) {
case JFS_SUPERBLOCK_V2 :
return 0 ;
case JFS_SUPERBLOCK_V1 :
return journal_convert_superblock_v1 ( journal , sb ) ;
default :
break ;
}
return - EINVAL ;
}
static int journal_convert_superblock_v1 ( journal_t * journal ,
journal_superblock_t * sb )
{
int offset , blocksize ;
struct buffer_head * bh ;
printk ( KERN_WARNING
" JBD: Converting superblock from version 1 to 2. \n " ) ;
/* Pre-initialise new fields to zero */
offset = ( ( char * ) & ( sb - > s_feature_compat ) ) - ( ( char * ) sb ) ;
blocksize = be32_to_cpu ( sb - > s_blocksize ) ;
memset ( & sb - > s_feature_compat , 0 , blocksize - offset ) ;
sb - > s_nr_users = cpu_to_be32 ( 1 ) ;
sb - > s_header . h_blocktype = cpu_to_be32 ( JFS_SUPERBLOCK_V2 ) ;
journal - > j_format_version = 2 ;
bh = journal - > j_sb_buffer ;
BUFFER_TRACE ( bh , " marking dirty " ) ;
mark_buffer_dirty ( bh ) ;
sync_dirty_buffer ( bh ) ;
return 0 ;
}
/**
* int journal_flush ( ) - Flush journal
* @ journal : Journal to act on .
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
* Flush all data for a given journal to disk and empty the journal .
* Filesystems can use this when remounting readonly to ensure that
* recovery does not need to happen on remount .
*/
int journal_flush ( journal_t * journal )
{
int err = 0 ;
transaction_t * transaction = NULL ;
spin_lock ( & journal - > j_state_lock ) ;
/* Force everything buffered to the log... */
if ( journal - > j_running_transaction ) {
transaction = journal - > j_running_transaction ;
__log_start_commit ( journal , transaction - > t_tid ) ;
} else if ( journal - > j_committing_transaction )
transaction = journal - > j_committing_transaction ;
/* Wait for the log commit to complete... */
if ( transaction ) {
tid_t tid = transaction - > t_tid ;
spin_unlock ( & journal - > j_state_lock ) ;
log_wait_commit ( journal , tid ) ;
} else {
spin_unlock ( & journal - > j_state_lock ) ;
}
/* ...and flush everything in the log out to disk. */
spin_lock ( & journal - > j_list_lock ) ;
while ( ! err & & journal - > j_checkpoint_transactions ! = NULL ) {
spin_unlock ( & journal - > j_list_lock ) ;
2008-10-23 01:15:00 +04:00
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2005-04-17 02:20:36 +04:00
err = log_do_checkpoint ( journal ) ;
2008-10-23 01:15:00 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2005-04-17 02:20:36 +04:00
spin_lock ( & journal - > j_list_lock ) ;
}
spin_unlock ( & journal - > j_list_lock ) ;
2008-10-23 01:15:00 +04:00
if ( is_journal_aborted ( journal ) )
return - EIO ;
2012-04-07 14:50:13 +04:00
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2005-04-17 02:20:36 +04:00
cleanup_journal_tail ( journal ) ;
/* Finally, mark the journal as really needing no recovery.
* This sets s_start = = 0 in the underlying superblock , which is
* the magic code for a fully - recovered superblock . Any future
* commits of data to the journal will restore the current
* s_start value . */
2012-04-07 14:33:03 +04:00
mark_journal_empty ( journal ) ;
2012-04-07 14:50:13 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
2005-04-17 02:20:36 +04:00
spin_lock ( & journal - > j_state_lock ) ;
J_ASSERT ( ! journal - > j_running_transaction ) ;
J_ASSERT ( ! journal - > j_committing_transaction ) ;
J_ASSERT ( ! journal - > j_checkpoint_transactions ) ;
J_ASSERT ( journal - > j_head = = journal - > j_tail ) ;
J_ASSERT ( journal - > j_tail_sequence = = journal - > j_transaction_sequence ) ;
spin_unlock ( & journal - > j_state_lock ) ;
2008-10-23 01:15:00 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
/**
* int journal_wipe ( ) - Wipe journal contents
* @ journal : Journal to act on .
* @ write : flag ( see below )
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
* Wipe out all of the contents of a journal , safely . This will produce
* a warning if the journal contains any valid recovery information .
* Must be called between journal_init_ * ( ) and journal_load ( ) .
*
* If ' write ' is non - zero , then we wipe out the journal on disk ; otherwise
* we merely suppress recovery .
*/
int journal_wipe ( journal_t * journal , int write )
{
int err = 0 ;
J_ASSERT ( ! ( journal - > j_flags & JFS_LOADED ) ) ;
err = load_superblock ( journal ) ;
if ( err )
return err ;
if ( ! journal - > j_tail )
goto no_recovery ;
printk ( KERN_WARNING " JBD: %s recovery information on journal \n " ,
write ? " Clearing " : " Ignoring " ) ;
err = journal_skip_recovery ( journal ) ;
2012-04-07 14:50:13 +04:00
if ( write ) {
/* Lock to make assertions happy... */
mutex_lock ( & journal - > j_checkpoint_mutex ) ;
2012-04-07 14:33:03 +04:00
mark_journal_empty ( journal ) ;
2012-04-07 14:50:13 +04:00
mutex_unlock ( & journal - > j_checkpoint_mutex ) ;
}
2005-04-17 02:20:36 +04:00
no_recovery :
return err ;
}
/*
* journal_dev_name : format a character string to describe on what
* device this journal is present .
*/
2005-09-07 02:16:41 +04:00
static const char * journal_dev_name ( journal_t * journal , char * buffer )
2005-04-17 02:20:36 +04:00
{
struct block_device * bdev ;
if ( journal - > j_inode )
bdev = journal - > j_inode - > i_sb - > s_bdev ;
else
bdev = journal - > j_dev ;
return bdevname ( bdev , buffer ) ;
}
/*
* Journal abort has very specific semantics , which we describe
2006-09-27 12:49:27 +04:00
* for journal abort .
2005-04-17 02:20:36 +04:00
*
* Two internal function , which provide abort to te jbd layer
* itself are here .
*/
/*
* Quick version for internal journal use ( doesn ' t lock the journal ) .
* Aborts hard - - - we mark the abort as occurred , but do _nothing_ else ,
* and don ' t attempt to make any other journal updates .
*/
2008-02-06 12:40:12 +03:00
static void __journal_abort_hard ( journal_t * journal )
2005-04-17 02:20:36 +04:00
{
transaction_t * transaction ;
char b [ BDEVNAME_SIZE ] ;
if ( journal - > j_flags & JFS_ABORT )
return ;
printk ( KERN_ERR " Aborting journal on device %s. \n " ,
journal_dev_name ( journal , b ) ) ;
spin_lock ( & journal - > j_state_lock ) ;
journal - > j_flags | = JFS_ABORT ;
transaction = journal - > j_running_transaction ;
if ( transaction )
__log_start_commit ( journal , transaction - > t_tid ) ;
spin_unlock ( & journal - > j_state_lock ) ;
}
/* Soft abort: record the abort error status in the journal superblock,
* but don ' t do any other IO . */
2005-09-07 02:16:41 +04:00
static void __journal_abort_soft ( journal_t * journal , int errno )
2005-04-17 02:20:36 +04:00
{
if ( journal - > j_flags & JFS_ABORT )
return ;
if ( ! journal - > j_errno )
journal - > j_errno = errno ;
__journal_abort_hard ( journal ) ;
if ( errno )
2012-04-07 14:33:03 +04:00
journal_update_sb_errno ( journal ) ;
2005-04-17 02:20:36 +04:00
}
/**
* void journal_abort ( ) - Shutdown the journal immediately .
* @ journal : the journal to shutdown .
* @ errno : an error number to record in the journal indicating
* the reason for the shutdown .
*
* Perform a complete , immediate shutdown of the ENTIRE
* journal ( not of a single transaction ) . This operation cannot be
* undone without closing and reopening the journal .
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
* The journal_abort function is intended to support higher level error
* recovery mechanisms such as the ext2 / ext3 remount - readonly error
* mode .
*
* Journal abort has very specific semantics . Any existing dirty ,
* unjournaled buffers in the main filesystem will still be written to
* disk by bdflush , but the journaling mechanism will be suspended
* immediately and no further transaction commits will be honoured .
*
* Any dirty , journaled buffers will be written back to disk without
* hitting the journal . Atomicity cannot be guaranteed on an aborted
* filesystem , but we _do_ attempt to leave as much data as possible
* behind for fsck to use for cleanup .
*
* Any attempt to get a new transaction handle on a journal which is in
* ABORT state will just result in an - EROFS error return . A
* journal_stop on an existing handle will return - EIO if we have
* entered abort state during the update .
*
* Recursive transactions are not disturbed by journal abort until the
* final journal_stop , which will receive the - EIO error .
*
* Finally , the journal_abort call allows the caller to supply an errno
* which will be recorded ( if possible ) in the journal superblock . This
* allows a client to record failure conditions in the middle of a
* transaction without having to complete the transaction to record the
* failure to disk . ext3_error , for example , now uses this
* functionality .
*
* Errors which originate from within the journaling layer will NOT
* supply an errno ; a null errno implies that absolutely no further
* writes are done to the journal ( unless there are any already in
* progress ) .
2006-09-27 12:49:27 +04:00
*
2005-04-17 02:20:36 +04:00
*/
void journal_abort ( journal_t * journal , int errno )
{
__journal_abort_soft ( journal , errno ) ;
}
2006-09-27 12:49:27 +04:00
/**
2005-04-17 02:20:36 +04:00
* int journal_errno ( ) - returns the journal ' s error state .
* @ journal : journal to examine .
*
* This is the errno numbet set with journal_abort ( ) , the last
* time the journal was mounted - if the journal was stopped
* without calling abort this will be 0.
*
* If the journal has been aborted on this mount time - EROFS will
* be returned .
*/
int journal_errno ( journal_t * journal )
{
int err ;
spin_lock ( & journal - > j_state_lock ) ;
if ( journal - > j_flags & JFS_ABORT )
err = - EROFS ;
else
err = journal - > j_errno ;
spin_unlock ( & journal - > j_state_lock ) ;
return err ;
}
2006-09-27 12:49:27 +04:00
/**
2005-04-17 02:20:36 +04:00
* int journal_clear_err ( ) - clears the journal ' s error state
* @ journal : journal to act on .
*
* An error must be cleared or Acked to take a FS out of readonly
* mode .
*/
int journal_clear_err ( journal_t * journal )
{
int err = 0 ;
spin_lock ( & journal - > j_state_lock ) ;
if ( journal - > j_flags & JFS_ABORT )
err = - EROFS ;
else
journal - > j_errno = 0 ;
spin_unlock ( & journal - > j_state_lock ) ;
return err ;
}
2006-09-27 12:49:27 +04:00
/**
2005-04-17 02:20:36 +04:00
* void journal_ack_err ( ) - Ack journal err .
* @ journal : journal to act on .
*
* An error must be cleared or Acked to take a FS out of readonly
* mode .
*/
void journal_ack_err ( journal_t * journal )
{
spin_lock ( & journal - > j_state_lock ) ;
if ( journal - > j_errno )
journal - > j_flags | = JFS_ACK_ERR ;
spin_unlock ( & journal - > j_state_lock ) ;
}
int journal_blocks_per_page ( struct inode * inode )
{
return 1 < < ( PAGE_CACHE_SHIFT - inode - > i_sb - > s_blocksize_bits ) ;
}
/*
* Journal_head storage management
*/
2006-12-07 07:33:20 +03:00
static struct kmem_cache * journal_head_cache ;
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_JBD_DEBUG
static atomic_t nr_journal_heads = ATOMIC_INIT ( 0 ) ;
# endif
static int journal_init_journal_head_cache ( void )
{
int retval ;
2008-03-29 06:07:18 +03:00
J_ASSERT ( journal_head_cache = = NULL ) ;
2005-04-17 02:20:36 +04:00
journal_head_cache = kmem_cache_create ( " journal_head " ,
sizeof ( struct journal_head ) ,
0 , /* offset */
2007-10-16 12:25:52 +04:00
SLAB_TEMPORARY , /* flags */
2007-07-20 05:11:58 +04:00
NULL ) ; /* ctor */
2005-04-17 02:20:36 +04:00
retval = 0 ;
2008-03-29 06:07:18 +03:00
if ( ! journal_head_cache ) {
2005-04-17 02:20:36 +04:00
retval = - ENOMEM ;
printk ( KERN_EMERG " JBD: no memory for journal_head cache \n " ) ;
}
return retval ;
}
static void journal_destroy_journal_head_cache ( void )
{
2008-07-25 12:46:19 +04:00
if ( journal_head_cache ) {
kmem_cache_destroy ( journal_head_cache ) ;
journal_head_cache = NULL ;
}
2005-04-17 02:20:36 +04:00
}
/*
* journal_head splicing and dicing
*/
static struct journal_head * journal_alloc_journal_head ( void )
{
struct journal_head * ret ;
# ifdef CONFIG_JBD_DEBUG
atomic_inc ( & nr_journal_heads ) ;
# endif
ret = kmem_cache_alloc ( journal_head_cache , GFP_NOFS ) ;
2007-10-18 14:07:05 +04:00
if ( ret = = NULL ) {
2005-04-17 02:20:36 +04:00
jbd_debug ( 1 , " out of memory for journal_head \n " ) ;
2010-10-04 14:12:13 +04:00
printk_ratelimited ( KERN_NOTICE " ENOMEM in %s, retrying. \n " ,
__func__ ) ;
2007-10-18 14:07:05 +04:00
while ( ret = = NULL ) {
2005-04-17 02:20:36 +04:00
yield ( ) ;
ret = kmem_cache_alloc ( journal_head_cache , GFP_NOFS ) ;
}
}
return ret ;
}
static void journal_free_journal_head ( struct journal_head * jh )
{
# ifdef CONFIG_JBD_DEBUG
atomic_dec ( & nr_journal_heads ) ;
2006-06-27 13:53:52 +04:00
memset ( jh , JBD_POISON_FREE , sizeof ( * jh ) ) ;
2005-04-17 02:20:36 +04:00
# endif
kmem_cache_free ( journal_head_cache , jh ) ;
}
/*
* A journal_head is attached to a buffer_head whenever JBD has an
* interest in the buffer .
*
* Whenever a buffer has an attached journal_head , its - > b_state : BH_JBD bit
* is set . This bit is tested in core kernel code where we need to take
* JBD - specific actions . Testing the zeroness of - > b_private is not reliable
* there .
*
* When a buffer has its BH_JBD bit set , its - > b_count is elevated by one .
*
* When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code , mainly via - > b_count .
*
2011-06-25 01:11:59 +04:00
* A journal_head is detached from its buffer_head when the journal_head ' s
* b_jcount reaches zero . Running transaction ( b_transaction ) and checkpoint
* transaction ( b_cp_transaction ) hold their references to b_jcount .
2005-04-17 02:20:36 +04:00
*
* Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction . To protect the
* journal_head in this situation , journal_add_journal_head elevates the
* journal_head ' s b_jcount refcount by one . The caller must call
* journal_put_journal_head ( ) to undo this .
*
* So the typical usage would be :
*
* ( Attach a journal_head if needed . Increments b_jcount )
* struct journal_head * jh = journal_add_journal_head ( bh ) ;
* . . .
2011-06-25 01:11:59 +04:00
* ( Get another reference for transaction )
* journal_grab_journal_head ( bh ) ;
* jh - > b_transaction = xxx ;
* ( Put original reference )
* journal_put_journal_head ( jh ) ;
2005-04-17 02:20:36 +04:00
*/
/*
* Give a buffer_head a journal_head .
*
* May sleep .
*/
struct journal_head * journal_add_journal_head ( struct buffer_head * bh )
{
struct journal_head * jh ;
struct journal_head * new_jh = NULL ;
repeat :
if ( ! buffer_jbd ( bh ) ) {
new_jh = journal_alloc_journal_head ( ) ;
memset ( new_jh , 0 , sizeof ( * new_jh ) ) ;
}
jbd_lock_bh_journal_head ( bh ) ;
if ( buffer_jbd ( bh ) ) {
jh = bh2jh ( bh ) ;
} else {
J_ASSERT_BH ( bh ,
( atomic_read ( & bh - > b_count ) > 0 ) | |
( bh - > b_page & & bh - > b_page - > mapping ) ) ;
if ( ! new_jh ) {
jbd_unlock_bh_journal_head ( bh ) ;
goto repeat ;
}
jh = new_jh ;
new_jh = NULL ; /* We consumed it */
set_buffer_jbd ( bh ) ;
bh - > b_private = jh ;
jh - > b_bh = bh ;
get_bh ( bh ) ;
BUFFER_TRACE ( bh , " added journal_head " ) ;
}
jh - > b_jcount + + ;
jbd_unlock_bh_journal_head ( bh ) ;
if ( new_jh )
journal_free_journal_head ( new_jh ) ;
return bh - > b_private ;
}
/*
* Grab a ref against this buffer_head ' s journal_head . If it ended up not
* having a journal_head , return NULL
*/
struct journal_head * journal_grab_journal_head ( struct buffer_head * bh )
{
struct journal_head * jh = NULL ;
jbd_lock_bh_journal_head ( bh ) ;
if ( buffer_jbd ( bh ) ) {
jh = bh2jh ( bh ) ;
jh - > b_jcount + + ;
}
jbd_unlock_bh_journal_head ( bh ) ;
return jh ;
}
static void __journal_remove_journal_head ( struct buffer_head * bh )
{
struct journal_head * jh = bh2jh ( bh ) ;
J_ASSERT_JH ( jh , jh - > b_jcount > = 0 ) ;
2011-06-25 01:11:59 +04:00
J_ASSERT_JH ( jh , jh - > b_transaction = = NULL ) ;
J_ASSERT_JH ( jh , jh - > b_next_transaction = = NULL ) ;
J_ASSERT_JH ( jh , jh - > b_cp_transaction = = NULL ) ;
J_ASSERT_JH ( jh , jh - > b_jlist = = BJ_None ) ;
J_ASSERT_BH ( bh , buffer_jbd ( bh ) ) ;
J_ASSERT_BH ( bh , jh2bh ( jh ) = = bh ) ;
BUFFER_TRACE ( bh , " remove journal_head " ) ;
if ( jh - > b_frozen_data ) {
printk ( KERN_WARNING " %s: freeing b_frozen_data \n " , __func__ ) ;
jbd_free ( jh - > b_frozen_data , bh - > b_size ) ;
2005-04-17 02:20:36 +04:00
}
2011-06-25 01:11:59 +04:00
if ( jh - > b_committed_data ) {
printk ( KERN_WARNING " %s: freeing b_committed_data \n " , __func__ ) ;
jbd_free ( jh - > b_committed_data , bh - > b_size ) ;
}
bh - > b_private = NULL ;
jh - > b_bh = NULL ; /* debug, really */
clear_buffer_jbd ( bh ) ;
journal_free_journal_head ( jh ) ;
2005-04-17 02:20:36 +04:00
}
/*
2011-06-25 01:11:59 +04:00
* Drop a reference on the passed journal_head . If it fell to zero then
2005-04-17 02:20:36 +04:00
* release the journal_head from the buffer_head .
*/
void journal_put_journal_head ( struct journal_head * jh )
{
struct buffer_head * bh = jh2bh ( jh ) ;
jbd_lock_bh_journal_head ( bh ) ;
J_ASSERT_JH ( jh , jh - > b_jcount > 0 ) ;
- - jh - > b_jcount ;
2011-06-25 01:11:59 +04:00
if ( ! jh - > b_jcount ) {
2005-04-17 02:20:36 +04:00
__journal_remove_journal_head ( bh ) ;
2011-06-25 01:11:59 +04:00
jbd_unlock_bh_journal_head ( bh ) ;
2005-04-17 02:20:36 +04:00
__brelse ( bh ) ;
2011-06-25 01:11:59 +04:00
} else
jbd_unlock_bh_journal_head ( bh ) ;
2005-04-17 02:20:36 +04:00
}
/*
2007-10-19 10:39:22 +04:00
* debugfs tunables
2005-04-17 02:20:36 +04:00
*/
2007-10-19 10:39:22 +04:00
# ifdef CONFIG_JBD_DEBUG
2005-04-17 02:20:36 +04:00
2007-10-19 10:39:22 +04:00
u8 journal_enable_debug __read_mostly ;
EXPORT_SYMBOL ( journal_enable_debug ) ;
2005-04-17 02:20:36 +04:00
2007-10-19 10:39:22 +04:00
static struct dentry * jbd_debugfs_dir ;
static struct dentry * jbd_debug ;
2005-04-17 02:20:36 +04:00
2007-10-19 10:39:22 +04:00
static void __init jbd_create_debugfs_entry ( void )
2005-04-17 02:20:36 +04:00
{
2007-10-19 10:39:22 +04:00
jbd_debugfs_dir = debugfs_create_dir ( " jbd " , NULL ) ;
if ( jbd_debugfs_dir )
2009-12-16 01:48:25 +03:00
jbd_debug = debugfs_create_u8 ( " jbd-debug " , S_IRUGO | S_IWUSR ,
2007-10-19 10:39:22 +04:00
jbd_debugfs_dir ,
& journal_enable_debug ) ;
2005-04-17 02:20:36 +04:00
}
2007-10-19 10:39:22 +04:00
static void __exit jbd_remove_debugfs_entry ( void )
2005-04-17 02:20:36 +04:00
{
2007-10-19 10:39:22 +04:00
debugfs_remove ( jbd_debug ) ;
debugfs_remove ( jbd_debugfs_dir ) ;
2005-04-17 02:20:36 +04:00
}
2007-10-19 10:39:22 +04:00
# else
2005-04-17 02:20:36 +04:00
2007-10-19 10:39:22 +04:00
static inline void jbd_create_debugfs_entry ( void )
2005-04-17 02:20:36 +04:00
{
}
2007-10-19 10:39:22 +04:00
static inline void jbd_remove_debugfs_entry ( void )
2005-04-17 02:20:36 +04:00
{
}
# endif
2006-12-07 07:33:20 +03:00
struct kmem_cache * jbd_handle_cache ;
2005-04-17 02:20:36 +04:00
static int __init journal_init_handle_cache ( void )
{
jbd_handle_cache = kmem_cache_create ( " journal_handle " ,
sizeof ( handle_t ) ,
0 , /* offset */
2007-10-16 12:25:52 +04:00
SLAB_TEMPORARY , /* flags */
2007-07-20 05:11:58 +04:00
NULL ) ; /* ctor */
2005-04-17 02:20:36 +04:00
if ( jbd_handle_cache = = NULL ) {
printk ( KERN_EMERG " JBD: failed to create handle cache \n " ) ;
return - ENOMEM ;
}
return 0 ;
}
static void journal_destroy_handle_cache ( void )
{
if ( jbd_handle_cache )
kmem_cache_destroy ( jbd_handle_cache ) ;
}
/*
* Module startup and shutdown
*/
static int __init journal_init_caches ( void )
{
int ret ;
ret = journal_init_revoke_caches ( ) ;
if ( ret = = 0 )
ret = journal_init_journal_head_cache ( ) ;
if ( ret = = 0 )
ret = journal_init_handle_cache ( ) ;
return ret ;
}
static void journal_destroy_caches ( void )
{
journal_destroy_revoke_caches ( ) ;
journal_destroy_journal_head_cache ( ) ;
journal_destroy_handle_cache ( ) ;
}
static int __init journal_init ( void )
{
int ret ;
2006-09-27 12:49:28 +04:00
BUILD_BUG_ON ( sizeof ( struct journal_superblock_s ) ! = 1024 ) ;
2005-09-07 02:16:41 +04:00
2005-04-17 02:20:36 +04:00
ret = journal_init_caches ( ) ;
if ( ret ! = 0 )
journal_destroy_caches ( ) ;
2007-10-19 10:39:22 +04:00
jbd_create_debugfs_entry ( ) ;
2005-04-17 02:20:36 +04:00
return ret ;
}
static void __exit journal_exit ( void )
{
# ifdef CONFIG_JBD_DEBUG
int n = atomic_read ( & nr_journal_heads ) ;
if ( n )
printk ( KERN_EMERG " JBD: leaked %d journal_heads! \n " , n ) ;
# endif
2007-10-19 10:39:22 +04:00
jbd_remove_debugfs_entry ( ) ;
2005-04-17 02:20:36 +04:00
journal_destroy_caches ( ) ;
}
MODULE_LICENSE ( " GPL " ) ;
module_init ( journal_init ) ;
module_exit ( journal_exit ) ;