2006-10-11 12:20:57 +04:00
/*
2007-05-09 09:51:49 +04:00
* linux / fs / jbd2 / revoke . c
2006-10-11 12:20:57 +04:00
*
* Written by Stephen C . Tweedie < sct @ redhat . com > , 2000
*
* Copyright 2000 Red Hat corp - - - All Rights Reserved
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License , version 2 , or at your
* option , any later version , incorporated herein by reference .
*
* Journal revoke routines for the generic filesystem journaling code ;
* part of the ext2fs journaling system .
*
* Revoke is the mechanism used to prevent old log records for deleted
* metadata from being replayed on top of newer data using the same
* blocks . The revoke mechanism is used in two separate places :
*
* + Commit : during commit we write the entire list of the current
* transaction ' s revoked blocks to the journal
*
* + Recovery : during recovery we record the transaction ID of all
* revoked blocks . If there are multiple revoke records in the log
* for a single block , only the last one counts , and if there is a log
* entry for a block beyond the last revoke , then that log entry still
* gets replayed .
*
* We can get interactions between revokes and new log data within a
* single transaction :
*
* Block is revoked and then journaled :
* The desired end result is the journaling of the new block , so we
* cancel the revoke before the transaction commits .
*
* Block is journaled and then revoked :
* The revoke must take precedence over the write of the block , so we
* need either to cancel the journal entry or to write the revoke
* later in the log than the log block . In this case , we choose the
* latter : journaling a block cancels any revoke record for that block
* in the current transaction , so any revoke for that block in the
* transaction must have happened after the block was journaled and so
* the revoke must take precedence .
*
* Block is revoked and then written as data :
* The data write is allowed to succeed , but the revoke is _not_
* cancelled . We still need to prevent old log records from
* overwriting the new data . We don ' t even need to clear the revoke
* bit here .
*
2011-12-29 02:46:46 +04:00
* We cache revoke status of a buffer in the current transaction in b_states
* bits . As the name says , revokevalid flag indicates that the cached revoke
* status of a buffer is valid and we can rely on the cached status .
*
2006-10-11 12:20:57 +04:00
* Revoke information on buffers is a tri - state value :
*
* RevokeValid clear : no cached revoke status , need to look it up
* RevokeValid set , Revoked clear :
* buffer has not been revoked , and cancel_revoke
* need do nothing .
* RevokeValid set , Revoked set :
* buffer has been revoked .
2009-03-28 00:20:40 +03:00
*
* Locking rules :
* We keep two hash tables of revoke records . One hashtable belongs to the
* running transaction ( is pointed to by journal - > j_revoke ) , the other one
* belongs to the committing transaction . Accesses to the second hash table
* happen only from the kjournald and no other thread touches this table . Also
* journal_switch_revoke_table ( ) which switches which hashtable belongs to the
* running and which to the committing transaction is called only from
* kjournald . Therefore we need no locks when accessing the hashtable belonging
* to the committing transaction .
*
* All users operating on the hash table belonging to the running transaction
* have a handle to the transaction . Therefore they are safe from kjournald
* switching hash tables under them . For operations on the lists of entries in
* the hash table j_revoke_lock is used .
*
2011-03-31 05:57:33 +04:00
* Finally , also replay code uses the hash tables but at this moment no one else
2009-03-28 00:20:40 +03:00
* can touch them ( filesystem isn ' t mounted yet ) and hence no locking is
* needed .
2006-10-11 12:20:57 +04:00
*/
# ifndef __KERNEL__
# include "jfs_user.h"
# else
# include <linux/time.h>
# include <linux/fs.h>
2006-10-11 12:20:59 +04:00
# include <linux/jbd2.h>
2006-10-11 12:20:57 +04:00
# include <linux/errno.h>
# include <linux/slab.h>
# include <linux/list.h>
# include <linux/init.h>
2009-04-14 15:50:56 +04:00
# include <linux/bio.h>
2006-10-11 12:20:57 +04:00
# endif
2007-07-16 10:41:17 +04:00
# include <linux/log2.h>
2006-10-11 12:20:57 +04:00
2006-12-07 07:33:20 +03:00
static struct kmem_cache * jbd2_revoke_record_cache ;
static struct kmem_cache * jbd2_revoke_table_cache ;
2006-10-11 12:20:57 +04:00
/* Each revoke record represents one single revoked block. During
journal replay , this involves recording the transaction ID of the
last transaction to revoke this block . */
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s
2006-10-11 12:20:57 +04:00
{
struct list_head hash ;
tid_t sequence ; /* Used for recovery only */
2006-10-11 12:21:13 +04:00
unsigned long long blocknr ;
2006-10-11 12:20:57 +04:00
} ;
/* The revoke table is just a simple hash table of revoke records. */
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_table_s
2006-10-11 12:20:57 +04:00
{
/* It is conceivable that we might want a larger hash table
* for recovery . Must be a power of two . */
int hash_size ;
int hash_shift ;
struct list_head * hash_table ;
} ;
# ifdef __KERNEL__
static void write_one_revoke_record ( journal_t * , transaction_t * ,
struct journal_head * * , int * ,
2009-04-14 15:50:56 +04:00
struct jbd2_revoke_record_s * , int ) ;
static void flush_descriptor ( journal_t * , struct journal_head * , int , int ) ;
2006-10-11 12:20:57 +04:00
# endif
/* Utility functions to maintain the revoke table */
/* Borrowed from buffer.c: this is a tried and tested block hash function */
2006-10-11 12:21:13 +04:00
static inline int hash ( journal_t * journal , unsigned long long block )
2006-10-11 12:20:57 +04:00
{
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_table_s * table = journal - > j_revoke ;
2006-10-11 12:20:57 +04:00
int hash_shift = table - > hash_shift ;
2006-10-11 12:21:09 +04:00
int hash = ( int ) block ^ ( int ) ( ( block > > 31 ) > > 1 ) ;
2006-10-11 12:20:57 +04:00
2006-10-11 12:21:09 +04:00
return ( ( hash < < ( hash_shift - 6 ) ) ^
( hash > > 13 ) ^
( hash < < ( hash_shift - 12 ) ) ) & ( table - > hash_size - 1 ) ;
2006-10-11 12:20:57 +04:00
}
2006-10-11 12:21:13 +04:00
static int insert_revoke_hash ( journal_t * journal , unsigned long long blocknr ,
2006-10-11 12:20:57 +04:00
tid_t seq )
{
struct list_head * hash_list ;
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
2006-10-11 12:20:57 +04:00
repeat :
2006-10-11 12:20:59 +04:00
record = kmem_cache_alloc ( jbd2_revoke_record_cache , GFP_NOFS ) ;
2006-10-11 12:20:57 +04:00
if ( ! record )
goto oom ;
record - > sequence = seq ;
record - > blocknr = blocknr ;
hash_list = & journal - > j_revoke - > hash_table [ hash ( journal , blocknr ) ] ;
spin_lock ( & journal - > j_revoke_lock ) ;
list_add ( & record - > hash , hash_list ) ;
spin_unlock ( & journal - > j_revoke_lock ) ;
return 0 ;
oom :
if ( ! journal_oom_retry )
return - ENOMEM ;
2008-04-17 18:38:59 +04:00
jbd_debug ( 1 , " ENOMEM in %s, retrying \n " , __func__ ) ;
2006-10-11 12:20:57 +04:00
yield ( ) ;
goto repeat ;
}
/* Find a revoke record in the journal's hash table. */
2006-10-11 12:20:59 +04:00
static struct jbd2_revoke_record_s * find_revoke_record ( journal_t * journal ,
2006-10-11 12:21:13 +04:00
unsigned long long blocknr )
2006-10-11 12:20:57 +04:00
{
struct list_head * hash_list ;
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
2006-10-11 12:20:57 +04:00
hash_list = & journal - > j_revoke - > hash_table [ hash ( journal , blocknr ) ] ;
spin_lock ( & journal - > j_revoke_lock ) ;
2006-10-11 12:20:59 +04:00
record = ( struct jbd2_revoke_record_s * ) hash_list - > next ;
2006-10-11 12:20:57 +04:00
while ( & ( record - > hash ) ! = hash_list ) {
if ( record - > blocknr = = blocknr ) {
spin_unlock ( & journal - > j_revoke_lock ) ;
return record ;
}
2006-10-11 12:20:59 +04:00
record = ( struct jbd2_revoke_record_s * ) record - > hash . next ;
2006-10-11 12:20:57 +04:00
}
spin_unlock ( & journal - > j_revoke_lock ) ;
return NULL ;
}
2008-04-28 17:40:00 +04:00
void jbd2_journal_destroy_revoke_caches ( void )
{
if ( jbd2_revoke_record_cache ) {
kmem_cache_destroy ( jbd2_revoke_record_cache ) ;
jbd2_revoke_record_cache = NULL ;
}
if ( jbd2_revoke_table_cache ) {
kmem_cache_destroy ( jbd2_revoke_table_cache ) ;
jbd2_revoke_table_cache = NULL ;
}
}
2006-10-11 12:20:59 +04:00
int __init jbd2_journal_init_revoke_caches ( void )
2006-10-11 12:20:57 +04:00
{
2008-04-28 17:40:00 +04:00
J_ASSERT ( ! jbd2_revoke_record_cache ) ;
J_ASSERT ( ! jbd2_revoke_table_cache ) ;
2012-02-21 02:53:03 +04:00
jbd2_revoke_record_cache = KMEM_CACHE ( jbd2_revoke_record_s ,
SLAB_HWCACHE_ALIGN | SLAB_TEMPORARY ) ;
2008-03-29 06:07:18 +03:00
if ( ! jbd2_revoke_record_cache )
2008-04-28 17:40:00 +04:00
goto record_cache_failure ;
2006-10-11 12:20:57 +04:00
2012-02-21 02:53:03 +04:00
jbd2_revoke_table_cache = KMEM_CACHE ( jbd2_revoke_table_s ,
SLAB_TEMPORARY ) ;
2008-04-28 17:40:00 +04:00
if ( ! jbd2_revoke_table_cache )
goto table_cache_failure ;
2006-10-11 12:20:57 +04:00
return 0 ;
2008-04-28 17:40:00 +04:00
table_cache_failure :
jbd2_journal_destroy_revoke_caches ( ) ;
record_cache_failure :
return - ENOMEM ;
2006-10-11 12:20:57 +04:00
}
2008-04-17 18:38:59 +04:00
static struct jbd2_revoke_table_s * jbd2_journal_init_revoke_table ( int hash_size )
2006-10-11 12:20:57 +04:00
{
2008-04-17 18:38:59 +04:00
int shift = 0 ;
int tmp = hash_size ;
struct jbd2_revoke_table_s * table ;
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
table = kmem_cache_alloc ( jbd2_revoke_table_cache , GFP_KERNEL ) ;
if ( ! table )
goto out ;
2006-10-11 12:20:57 +04:00
while ( ( tmp > > = 1UL ) ! = 0UL )
shift + + ;
2008-04-17 18:38:59 +04:00
table - > hash_size = hash_size ;
table - > hash_shift = shift ;
table - > hash_table =
2006-10-11 12:20:57 +04:00
kmalloc ( hash_size * sizeof ( struct list_head ) , GFP_KERNEL ) ;
2008-04-17 18:38:59 +04:00
if ( ! table - > hash_table ) {
kmem_cache_free ( jbd2_revoke_table_cache , table ) ;
table = NULL ;
goto out ;
2006-10-11 12:20:57 +04:00
}
for ( tmp = 0 ; tmp < hash_size ; tmp + + )
2008-04-17 18:38:59 +04:00
INIT_LIST_HEAD ( & table - > hash_table [ tmp ] ) ;
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
out :
return table ;
}
static void jbd2_journal_destroy_revoke_table ( struct jbd2_revoke_table_s * table )
{
int i ;
struct list_head * hash_list ;
for ( i = 0 ; i < table - > hash_size ; i + + ) {
hash_list = & table - > hash_table [ i ] ;
J_ASSERT ( list_empty ( hash_list ) ) ;
2006-10-11 12:20:57 +04:00
}
2008-04-17 18:38:59 +04:00
kfree ( table - > hash_table ) ;
kmem_cache_free ( jbd2_revoke_table_cache , table ) ;
}
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
/* Initialise the revoke table for a given journal to a given size. */
int jbd2_journal_init_revoke ( journal_t * journal , int hash_size )
{
J_ASSERT ( journal - > j_revoke_table [ 0 ] = = NULL ) ;
2007-07-16 10:41:17 +04:00
J_ASSERT ( is_power_of_2 ( hash_size ) ) ;
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
journal - > j_revoke_table [ 0 ] = jbd2_journal_init_revoke_table ( hash_size ) ;
if ( ! journal - > j_revoke_table [ 0 ] )
goto fail0 ;
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
journal - > j_revoke_table [ 1 ] = jbd2_journal_init_revoke_table ( hash_size ) ;
if ( ! journal - > j_revoke_table [ 1 ] )
goto fail1 ;
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
journal - > j_revoke = journal - > j_revoke_table [ 1 ] ;
2006-10-11 12:20:57 +04:00
spin_lock_init ( & journal - > j_revoke_lock ) ;
return 0 ;
2008-04-17 18:38:59 +04:00
fail1 :
jbd2_journal_destroy_revoke_table ( journal - > j_revoke_table [ 0 ] ) ;
fail0 :
return - ENOMEM ;
}
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
/* Destroy a journal's revoke table. The table must already be empty! */
2006-10-11 12:20:59 +04:00
void jbd2_journal_destroy_revoke ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
journal - > j_revoke = NULL ;
2008-04-17 18:38:59 +04:00
if ( journal - > j_revoke_table [ 0 ] )
jbd2_journal_destroy_revoke_table ( journal - > j_revoke_table [ 0 ] ) ;
if ( journal - > j_revoke_table [ 1 ] )
jbd2_journal_destroy_revoke_table ( journal - > j_revoke_table [ 1 ] ) ;
2006-10-11 12:20:57 +04:00
}
# ifdef __KERNEL__
/*
2006-10-11 12:20:59 +04:00
* jbd2_journal_revoke : revoke a given buffer_head from the journal . This
2006-10-11 12:20:57 +04:00
* prevents the block from being replayed during recovery if we take a
* crash after this current transaction commits . Any subsequent
* metadata writes of the buffer in this transaction cancel the
* revoke .
*
* Note that this call may block - - - it is up to the caller to make
* sure that there are no further calls to journal_write_metadata
* before the revoke is complete . In ext3 , this implies calling the
* revoke before clearing the block bitmap when we are deleting
* metadata .
*
2006-10-11 12:20:59 +04:00
* Revoke performs a jbd2_journal_forget on any buffer_head passed in as a
2006-10-11 12:20:57 +04:00
* parameter , but does _not_ forget the buffer_head if the bh was only
* found implicitly .
*
* bh_in may not be a journalled buffer - it may have come off
* the hash tables without an attached journal_head .
*
2006-10-11 12:20:59 +04:00
* If bh_in is non - zero , jbd2_journal_revoke ( ) will decrement its b_count
2006-10-11 12:20:57 +04:00
* by one .
*/
2006-10-11 12:21:13 +04:00
int jbd2_journal_revoke ( handle_t * handle , unsigned long long blocknr ,
2006-10-11 12:20:57 +04:00
struct buffer_head * bh_in )
{
struct buffer_head * bh = NULL ;
journal_t * journal ;
struct block_device * bdev ;
int err ;
might_sleep ( ) ;
if ( bh_in )
BUFFER_TRACE ( bh_in , " enter " ) ;
journal = handle - > h_transaction - > t_journal ;
2006-10-11 12:20:59 +04:00
if ( ! jbd2_journal_set_features ( journal , 0 , 0 , JBD2_FEATURE_INCOMPAT_REVOKE ) ) {
2006-10-11 12:20:57 +04:00
J_ASSERT ( ! " Cannot set revoke feature! " ) ;
return - EINVAL ;
}
bdev = journal - > j_fs_dev ;
bh = bh_in ;
if ( ! bh ) {
bh = __find_get_block ( bdev , blocknr , journal - > j_blocksize ) ;
if ( bh )
BUFFER_TRACE ( bh , " found on hash " ) ;
}
2007-10-17 02:38:25 +04:00
# ifdef JBD2_EXPENSIVE_CHECKING
2006-10-11 12:20:57 +04:00
else {
struct buffer_head * bh2 ;
/* If there is a different buffer_head lying around in
* memory anywhere . . . */
bh2 = __find_get_block ( bdev , blocknr , journal - > j_blocksize ) ;
if ( bh2 ) {
/* ... and it has RevokeValid status... */
if ( bh2 ! = bh & & buffer_revokevalid ( bh2 ) )
/* ...then it better be revoked too,
* since it ' s illegal to create a revoke
* record against a buffer_head which is
* not marked revoked - - - that would
* risk missing a subsequent revoke
* cancel . */
J_ASSERT_BH ( bh2 , buffer_revoked ( bh2 ) ) ;
put_bh ( bh2 ) ;
}
}
# endif
/* We really ought not ever to revoke twice in a row without
first having the revoke cancelled : it ' s illegal to free a
block twice without allocating it in between ! */
if ( bh ) {
if ( ! J_EXPECT_BH ( bh , ! buffer_revoked ( bh ) ,
" inconsistent data on disk " ) ) {
if ( ! bh_in )
brelse ( bh ) ;
return - EIO ;
}
set_buffer_revoked ( bh ) ;
set_buffer_revokevalid ( bh ) ;
if ( bh_in ) {
2006-10-11 12:20:59 +04:00
BUFFER_TRACE ( bh_in , " call jbd2_journal_forget " ) ;
jbd2_journal_forget ( handle , bh_in ) ;
2006-10-11 12:20:57 +04:00
} else {
BUFFER_TRACE ( bh , " call brelse " ) ;
__brelse ( bh ) ;
}
}
2006-10-11 12:21:09 +04:00
jbd_debug ( 2 , " insert revoke for block %llu, bh_in=%p \n " , blocknr , bh_in ) ;
2006-10-11 12:20:57 +04:00
err = insert_revoke_hash ( journal , blocknr ,
handle - > h_transaction - > t_tid ) ;
BUFFER_TRACE ( bh_in , " exit " ) ;
return err ;
}
/*
* Cancel an outstanding revoke . For use only internally by the
2006-10-11 12:20:59 +04:00
* journaling code ( called from jbd2_journal_get_write_access ) .
2006-10-11 12:20:57 +04:00
*
* We trust buffer_revoked ( ) on the buffer if the buffer is already
* being journaled : if there is no revoke pending on the buffer , then we
* don ' t do anything here .
*
* This would break if it were possible for a buffer to be revoked and
* discarded , and then reallocated within the same transaction . In such
* a case we would have lost the revoked bit , but when we arrived here
* the second time we would still have a pending revoke to cancel . So ,
* do not trust the Revoked bit on buffers unless RevokeValid is also
* set .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_cancel_revoke ( handle_t * handle , struct journal_head * jh )
2006-10-11 12:20:57 +04:00
{
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
2006-10-11 12:20:57 +04:00
journal_t * journal = handle - > h_transaction - > t_journal ;
int need_cancel ;
int did_revoke = 0 ; /* akpm: debug */
struct buffer_head * bh = jh2bh ( jh ) ;
jbd_debug ( 4 , " journal_head %p, cancelling revoke \n " , jh ) ;
/* Is the existing Revoke bit valid? If so, we trust it, and
* only perform the full cancel if the revoke bit is set . If
* not , we can ' t trust the revoke bit , and we need to do the
* full search for a revoke record . */
if ( test_set_buffer_revokevalid ( bh ) ) {
need_cancel = test_clear_buffer_revoked ( bh ) ;
} else {
need_cancel = 1 ;
clear_buffer_revoked ( bh ) ;
}
if ( need_cancel ) {
record = find_revoke_record ( journal , bh - > b_blocknr ) ;
if ( record ) {
jbd_debug ( 4 , " cancelled existing revoke on "
" blocknr %llu \n " , ( unsigned long long ) bh - > b_blocknr ) ;
spin_lock ( & journal - > j_revoke_lock ) ;
list_del ( & record - > hash ) ;
spin_unlock ( & journal - > j_revoke_lock ) ;
2006-10-11 12:20:59 +04:00
kmem_cache_free ( jbd2_revoke_record_cache , record ) ;
2006-10-11 12:20:57 +04:00
did_revoke = 1 ;
}
}
2007-10-17 02:38:25 +04:00
# ifdef JBD2_EXPENSIVE_CHECKING
2006-10-11 12:20:57 +04:00
/* There better not be one left behind by now! */
record = find_revoke_record ( journal , bh - > b_blocknr ) ;
J_ASSERT_JH ( jh , record = = NULL ) ;
# endif
/* Finally, have we just cleared revoke on an unhashed
* buffer_head ? If so , we ' d better make sure we clear the
* revoked status on any hashed alias too , otherwise the revoke
* state machine will get very upset later on . */
if ( need_cancel ) {
struct buffer_head * bh2 ;
bh2 = __find_get_block ( bh - > b_bdev , bh - > b_blocknr , bh - > b_size ) ;
if ( bh2 ) {
if ( bh2 ! = bh )
clear_buffer_revoked ( bh2 ) ;
__brelse ( bh2 ) ;
}
}
return did_revoke ;
}
2011-12-29 02:46:46 +04:00
/*
* journal_clear_revoked_flag clears revoked flag of buffers in
* revoke table to reflect there is no revoked buffers in the next
* transaction which is going to be started .
*/
void jbd2_clear_buffer_revoked_flags ( journal_t * journal )
{
struct jbd2_revoke_table_s * revoke = journal - > j_revoke ;
int i = 0 ;
for ( i = 0 ; i < revoke - > hash_size ; i + + ) {
struct list_head * hash_list ;
struct list_head * list_entry ;
hash_list = & revoke - > hash_table [ i ] ;
list_for_each ( list_entry , hash_list ) {
struct jbd2_revoke_record_s * record ;
struct buffer_head * bh ;
record = ( struct jbd2_revoke_record_s * ) list_entry ;
bh = __find_get_block ( journal - > j_fs_dev ,
record - > blocknr ,
journal - > j_blocksize ) ;
if ( bh ) {
clear_buffer_revoked ( bh ) ;
__brelse ( bh ) ;
}
}
}
}
2006-10-11 12:20:57 +04:00
/* journal_switch_revoke table select j_revoke for next transaction
* we do not want to suspend any processing until all revokes are
* written - bzzz
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_switch_revoke_table ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int i ;
if ( journal - > j_revoke = = journal - > j_revoke_table [ 0 ] )
journal - > j_revoke = journal - > j_revoke_table [ 1 ] ;
else
journal - > j_revoke = journal - > j_revoke_table [ 0 ] ;
for ( i = 0 ; i < journal - > j_revoke - > hash_size ; i + + )
INIT_LIST_HEAD ( & journal - > j_revoke - > hash_table [ i ] ) ;
}
/*
* Write revoke records to the journal for all entries in the current
* revoke hash , deleting the entries as we go .
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_write_revoke_records ( journal_t * journal ,
2009-04-14 15:50:56 +04:00
transaction_t * transaction ,
int write_op )
2006-10-11 12:20:57 +04:00
{
struct journal_head * descriptor ;
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
struct jbd2_revoke_table_s * revoke ;
2006-10-11 12:20:57 +04:00
struct list_head * hash_list ;
int i , offset , count ;
descriptor = NULL ;
offset = 0 ;
count = 0 ;
/* select revoke table for committing transaction */
revoke = journal - > j_revoke = = journal - > j_revoke_table [ 0 ] ?
journal - > j_revoke_table [ 1 ] : journal - > j_revoke_table [ 0 ] ;
for ( i = 0 ; i < revoke - > hash_size ; i + + ) {
hash_list = & revoke - > hash_table [ i ] ;
while ( ! list_empty ( hash_list ) ) {
2006-10-11 12:20:59 +04:00
record = ( struct jbd2_revoke_record_s * )
2006-10-11 12:20:57 +04:00
hash_list - > next ;
write_one_revoke_record ( journal , transaction ,
& descriptor , & offset ,
2009-04-14 15:50:56 +04:00
record , write_op ) ;
2006-10-11 12:20:57 +04:00
count + + ;
list_del ( & record - > hash ) ;
2006-10-11 12:20:59 +04:00
kmem_cache_free ( jbd2_revoke_record_cache , record ) ;
2006-10-11 12:20:57 +04:00
}
}
if ( descriptor )
2009-04-14 15:50:56 +04:00
flush_descriptor ( journal , descriptor , offset , write_op ) ;
2006-10-11 12:20:57 +04:00
jbd_debug ( 1 , " Wrote %d revoke records \n " , count ) ;
}
/*
* Write out one revoke record . We need to create a new descriptor
* block if the old one is full or if we have not already created one .
*/
static void write_one_revoke_record ( journal_t * journal ,
transaction_t * transaction ,
struct journal_head * * descriptorp ,
int * offsetp ,
2009-04-14 15:50:56 +04:00
struct jbd2_revoke_record_s * record ,
int write_op )
2006-10-11 12:20:57 +04:00
{
2012-05-27 16:08:24 +04:00
int csum_size = 0 ;
2006-10-11 12:20:57 +04:00
struct journal_head * descriptor ;
int offset ;
journal_header_t * header ;
/* If we are already aborting, this all becomes a noop. We
still need to go round the loop in
2006-10-11 12:20:59 +04:00
jbd2_journal_write_revoke_records in order to free all of the
2006-10-11 12:20:57 +04:00
revoke records : only the IO to the journal is omitted . */
if ( is_journal_aborted ( journal ) )
return ;
descriptor = * descriptorp ;
offset = * offsetp ;
2012-05-27 16:08:24 +04:00
/* Do we need to leave space at the end for a checksum? */
if ( JBD2_HAS_INCOMPAT_FEATURE ( journal , JBD2_FEATURE_INCOMPAT_CSUM_V2 ) )
csum_size = sizeof ( struct jbd2_journal_revoke_tail ) ;
2006-10-11 12:20:57 +04:00
/* Make sure we have a descriptor with space left for the record */
if ( descriptor ) {
2012-05-27 16:08:24 +04:00
if ( offset > = journal - > j_blocksize - csum_size ) {
2009-04-14 15:50:56 +04:00
flush_descriptor ( journal , descriptor , offset , write_op ) ;
2006-10-11 12:20:57 +04:00
descriptor = NULL ;
}
}
if ( ! descriptor ) {
2006-10-11 12:20:59 +04:00
descriptor = jbd2_journal_get_descriptor_buffer ( journal ) ;
2006-10-11 12:20:57 +04:00
if ( ! descriptor )
return ;
header = ( journal_header_t * ) & jh2bh ( descriptor ) - > b_data [ 0 ] ;
2006-10-11 12:20:59 +04:00
header - > h_magic = cpu_to_be32 ( JBD2_MAGIC_NUMBER ) ;
header - > h_blocktype = cpu_to_be32 ( JBD2_REVOKE_BLOCK ) ;
2006-10-11 12:20:57 +04:00
header - > h_sequence = cpu_to_be32 ( transaction - > t_tid ) ;
/* Record it so that we can wait for IO completion later */
JBUFFER_TRACE ( descriptor , " file as BJ_LogCtl " ) ;
2006-10-11 12:20:59 +04:00
jbd2_journal_file_buffer ( descriptor , transaction , BJ_LogCtl ) ;
2006-10-11 12:20:57 +04:00
2006-10-11 12:20:59 +04:00
offset = sizeof ( jbd2_journal_revoke_header_t ) ;
2006-10-11 12:20:57 +04:00
* descriptorp = descriptor ;
}
2006-10-11 12:21:08 +04:00
if ( JBD2_HAS_INCOMPAT_FEATURE ( journal , JBD2_FEATURE_INCOMPAT_64BIT ) ) {
* ( ( __be64 * ) ( & jh2bh ( descriptor ) - > b_data [ offset ] ) ) =
cpu_to_be64 ( record - > blocknr ) ;
offset + = 8 ;
} else {
* ( ( __be32 * ) ( & jh2bh ( descriptor ) - > b_data [ offset ] ) ) =
cpu_to_be32 ( record - > blocknr ) ;
offset + = 4 ;
}
2006-10-11 12:20:57 +04:00
* offsetp = offset ;
}
2012-05-27 16:08:24 +04:00
static void jbd2_revoke_csum_set ( journal_t * j ,
struct journal_head * descriptor )
{
struct jbd2_journal_revoke_tail * tail ;
__u32 csum ;
if ( ! JBD2_HAS_INCOMPAT_FEATURE ( j , JBD2_FEATURE_INCOMPAT_CSUM_V2 ) )
return ;
tail = ( struct jbd2_journal_revoke_tail * )
( jh2bh ( descriptor ) - > b_data + j - > j_blocksize -
sizeof ( struct jbd2_journal_revoke_tail ) ) ;
tail - > r_checksum = 0 ;
csum = jbd2_chksum ( j , j - > j_csum_seed , jh2bh ( descriptor ) - > b_data ,
j - > j_blocksize ) ;
tail - > r_checksum = cpu_to_be32 ( csum ) ;
}
2006-10-11 12:20:57 +04:00
/*
* Flush a revoke descriptor out to the journal . If we are aborting ,
* this is a noop ; otherwise we are generating a buffer which needs to
* be waited for during commit , so it has to go onto the appropriate
* journal buffer list .
*/
static void flush_descriptor ( journal_t * journal ,
struct journal_head * descriptor ,
2009-04-14 15:50:56 +04:00
int offset , int write_op )
2006-10-11 12:20:57 +04:00
{
2006-10-11 12:20:59 +04:00
jbd2_journal_revoke_header_t * header ;
2006-10-11 12:20:57 +04:00
struct buffer_head * bh = jh2bh ( descriptor ) ;
if ( is_journal_aborted ( journal ) ) {
put_bh ( bh ) ;
return ;
}
2006-10-11 12:20:59 +04:00
header = ( jbd2_journal_revoke_header_t * ) jh2bh ( descriptor ) - > b_data ;
2006-10-11 12:20:57 +04:00
header - > r_count = cpu_to_be32 ( offset ) ;
2012-05-27 16:08:24 +04:00
jbd2_revoke_csum_set ( journal , descriptor ) ;
2006-10-11 12:20:57 +04:00
set_buffer_jwrite ( bh ) ;
BUFFER_TRACE ( bh , " write " ) ;
set_buffer_dirty ( bh ) ;
2010-08-11 19:06:24 +04:00
write_dirty_buffer ( bh , write_op ) ;
2006-10-11 12:20:57 +04:00
}
# endif
/*
* Revoke support for recovery .
*
* Recovery needs to be able to :
*
* record all revoke records , including the tid of the latest instance
* of each revoke in the journal
*
* check whether a given block in a given transaction should be replayed
* ( ie . has not been revoked by a revoke record in that or a subsequent
* transaction )
*
* empty the revoke table after recovery .
*/
/*
* First , setting revoke records . We create a new revoke record for
* every block ever revoked in the log as we scan it for recovery , and
* we update the existing records if we find multiple revokes for a
* single block .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_set_revoke ( journal_t * journal ,
2006-10-11 12:21:13 +04:00
unsigned long long blocknr ,
2006-10-11 12:20:57 +04:00
tid_t sequence )
{
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
2006-10-11 12:20:57 +04:00
record = find_revoke_record ( journal , blocknr ) ;
if ( record ) {
/* If we have multiple occurrences, only record the
* latest sequence number in the hashed record */
if ( tid_gt ( sequence , record - > sequence ) )
record - > sequence = sequence ;
return 0 ;
}
return insert_revoke_hash ( journal , blocknr , sequence ) ;
}
/*
* Test revoke records . For a given block referenced in the log , has
* that block been revoked ? A revoke record with a given transaction
* sequence number revokes all blocks in that transaction and earlier
* ones , but later transactions still need replayed .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_test_revoke ( journal_t * journal ,
2006-10-11 12:21:13 +04:00
unsigned long long blocknr ,
2006-10-11 12:20:57 +04:00
tid_t sequence )
{
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
2006-10-11 12:20:57 +04:00
record = find_revoke_record ( journal , blocknr ) ;
if ( ! record )
return 0 ;
if ( tid_gt ( sequence , record - > sequence ) )
return 0 ;
return 1 ;
}
/*
* Finally , once recovery is over , we need to clear the revoke table so
* that it can be reused by the running filesystem .
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_clear_revoke ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int i ;
struct list_head * hash_list ;
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
struct jbd2_revoke_table_s * revoke ;
2006-10-11 12:20:57 +04:00
revoke = journal - > j_revoke ;
for ( i = 0 ; i < revoke - > hash_size ; i + + ) {
hash_list = & revoke - > hash_table [ i ] ;
while ( ! list_empty ( hash_list ) ) {
2006-10-11 12:20:59 +04:00
record = ( struct jbd2_revoke_record_s * ) hash_list - > next ;
2006-10-11 12:20:57 +04:00
list_del ( & record - > hash ) ;
2006-10-11 12:20:59 +04:00
kmem_cache_free ( jbd2_revoke_record_cache , record ) ;
2006-10-11 12:20:57 +04:00
}
}
}