2017-12-18 06:00:59 +03:00
// SPDX-License-Identifier: GPL-2.0+
2006-10-11 12:20:57 +04:00
/*
2007-05-09 09:51:49 +04:00
* linux / fs / jbd2 / revoke . c
2006-10-11 12:20:57 +04:00
*
* Written by Stephen C . Tweedie < sct @ redhat . com > , 2000
*
* Copyright 2000 Red Hat corp - - - All Rights Reserved
*
* Journal revoke routines for the generic filesystem journaling code ;
* part of the ext2fs journaling system .
*
* Revoke is the mechanism used to prevent old log records for deleted
* metadata from being replayed on top of newer data using the same
* blocks . The revoke mechanism is used in two separate places :
*
* + Commit : during commit we write the entire list of the current
* transaction ' s revoked blocks to the journal
*
* + Recovery : during recovery we record the transaction ID of all
* revoked blocks . If there are multiple revoke records in the log
* for a single block , only the last one counts , and if there is a log
* entry for a block beyond the last revoke , then that log entry still
* gets replayed .
*
* We can get interactions between revokes and new log data within a
* single transaction :
*
* Block is revoked and then journaled :
* The desired end result is the journaling of the new block , so we
* cancel the revoke before the transaction commits .
*
* Block is journaled and then revoked :
* The revoke must take precedence over the write of the block , so we
* need either to cancel the journal entry or to write the revoke
* later in the log than the log block . In this case , we choose the
* latter : journaling a block cancels any revoke record for that block
* in the current transaction , so any revoke for that block in the
* transaction must have happened after the block was journaled and so
* the revoke must take precedence .
*
* Block is revoked and then written as data :
* The data write is allowed to succeed , but the revoke is _not_
* cancelled . We still need to prevent old log records from
* overwriting the new data . We don ' t even need to clear the revoke
* bit here .
*
2011-12-29 02:46:46 +04:00
* We cache revoke status of a buffer in the current transaction in b_states
* bits . As the name says , revokevalid flag indicates that the cached revoke
* status of a buffer is valid and we can rely on the cached status .
*
2006-10-11 12:20:57 +04:00
* Revoke information on buffers is a tri - state value :
*
* RevokeValid clear : no cached revoke status , need to look it up
* RevokeValid set , Revoked clear :
* buffer has not been revoked , and cancel_revoke
* need do nothing .
* RevokeValid set , Revoked set :
* buffer has been revoked .
2009-03-28 00:20:40 +03:00
*
* Locking rules :
* We keep two hash tables of revoke records . One hashtable belongs to the
* running transaction ( is pointed to by journal - > j_revoke ) , the other one
* belongs to the committing transaction . Accesses to the second hash table
* happen only from the kjournald and no other thread touches this table . Also
* journal_switch_revoke_table ( ) which switches which hashtable belongs to the
* running and which to the committing transaction is called only from
* kjournald . Therefore we need no locks when accessing the hashtable belonging
* to the committing transaction .
*
* All users operating on the hash table belonging to the running transaction
* have a handle to the transaction . Therefore they are safe from kjournald
* switching hash tables under them . For operations on the lists of entries in
* the hash table j_revoke_lock is used .
*
2011-03-31 05:57:33 +04:00
* Finally , also replay code uses the hash tables but at this moment no one else
2009-03-28 00:20:40 +03:00
* can touch them ( filesystem isn ' t mounted yet ) and hence no locking is
* needed .
2006-10-11 12:20:57 +04:00
*/
# ifndef __KERNEL__
# include "jfs_user.h"
# else
# include <linux/time.h>
# include <linux/fs.h>
2006-10-11 12:20:59 +04:00
# include <linux/jbd2.h>
2006-10-11 12:20:57 +04:00
# include <linux/errno.h>
# include <linux/slab.h>
# include <linux/list.h>
# include <linux/init.h>
2009-04-14 15:50:56 +04:00
# include <linux/bio.h>
2007-07-16 10:41:17 +04:00
# include <linux/log2.h>
2014-10-30 17:53:17 +03:00
# include <linux/hash.h>
2014-08-28 02:40:07 +04:00
# endif
2006-10-11 12:20:57 +04:00
2006-12-07 07:33:20 +03:00
static struct kmem_cache * jbd2_revoke_record_cache ;
static struct kmem_cache * jbd2_revoke_table_cache ;
2006-10-11 12:20:57 +04:00
/* Each revoke record represents one single revoked block. During
journal replay , this involves recording the transaction ID of the
last transaction to revoke this block . */
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s
2006-10-11 12:20:57 +04:00
{
struct list_head hash ;
tid_t sequence ; /* Used for recovery only */
2006-10-11 12:21:13 +04:00
unsigned long long blocknr ;
2006-10-11 12:20:57 +04:00
} ;
/* The revoke table is just a simple hash table of revoke records. */
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_table_s
2006-10-11 12:20:57 +04:00
{
/* It is conceivable that we might want a larger hash table
* for recovery . Must be a power of two . */
int hash_size ;
int hash_shift ;
struct list_head * hash_table ;
} ;
# ifdef __KERNEL__
2016-02-23 07:07:30 +03:00
static void write_one_revoke_record ( transaction_t * ,
2013-06-04 20:06:01 +04:00
struct list_head * ,
struct buffer_head * * , int * ,
2016-02-23 07:07:30 +03:00
struct jbd2_revoke_record_s * ) ;
static void flush_descriptor ( journal_t * , struct buffer_head * , int ) ;
2006-10-11 12:20:57 +04:00
# endif
/* Utility functions to maintain the revoke table */
2006-10-11 12:21:13 +04:00
static inline int hash ( journal_t * journal , unsigned long long block )
2006-10-11 12:20:57 +04:00
{
2014-10-30 17:53:17 +03:00
return hash_64 ( block , journal - > j_revoke - > hash_shift ) ;
2006-10-11 12:20:57 +04:00
}
2006-10-11 12:21:13 +04:00
static int insert_revoke_hash ( journal_t * journal , unsigned long long blocknr ,
2006-10-11 12:20:57 +04:00
tid_t seq )
{
struct list_head * hash_list ;
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
2015-06-15 22:45:58 +03:00
gfp_t gfp_mask = GFP_NOFS ;
2006-10-11 12:20:57 +04:00
2015-06-15 22:45:58 +03:00
if ( journal_oom_retry )
gfp_mask | = __GFP_NOFAIL ;
record = kmem_cache_alloc ( jbd2_revoke_record_cache , gfp_mask ) ;
2006-10-11 12:20:57 +04:00
if ( ! record )
2015-06-15 22:45:58 +03:00
return - ENOMEM ;
2006-10-11 12:20:57 +04:00
record - > sequence = seq ;
record - > blocknr = blocknr ;
hash_list = & journal - > j_revoke - > hash_table [ hash ( journal , blocknr ) ] ;
spin_lock ( & journal - > j_revoke_lock ) ;
list_add ( & record - > hash , hash_list ) ;
spin_unlock ( & journal - > j_revoke_lock ) ;
return 0 ;
}
/* Find a revoke record in the journal's hash table. */
2006-10-11 12:20:59 +04:00
static struct jbd2_revoke_record_s * find_revoke_record ( journal_t * journal ,
2006-10-11 12:21:13 +04:00
unsigned long long blocknr )
2006-10-11 12:20:57 +04:00
{
struct list_head * hash_list ;
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
2006-10-11 12:20:57 +04:00
hash_list = & journal - > j_revoke - > hash_table [ hash ( journal , blocknr ) ] ;
spin_lock ( & journal - > j_revoke_lock ) ;
2006-10-11 12:20:59 +04:00
record = ( struct jbd2_revoke_record_s * ) hash_list - > next ;
2006-10-11 12:20:57 +04:00
while ( & ( record - > hash ) ! = hash_list ) {
if ( record - > blocknr = = blocknr ) {
spin_unlock ( & journal - > j_revoke_lock ) ;
return record ;
}
2006-10-11 12:20:59 +04:00
record = ( struct jbd2_revoke_record_s * ) record - > hash . next ;
2006-10-11 12:20:57 +04:00
}
spin_unlock ( & journal - > j_revoke_lock ) ;
return NULL ;
}
2019-05-11 04:15:47 +03:00
void jbd2_journal_destroy_revoke_record_cache ( void )
2008-04-28 17:40:00 +04:00
{
2018-05-21 05:38:26 +03:00
kmem_cache_destroy ( jbd2_revoke_record_cache ) ;
jbd2_revoke_record_cache = NULL ;
2019-05-11 04:15:47 +03:00
}
void jbd2_journal_destroy_revoke_table_cache ( void )
{
2018-05-21 05:38:26 +03:00
kmem_cache_destroy ( jbd2_revoke_table_cache ) ;
jbd2_revoke_table_cache = NULL ;
2008-04-28 17:40:00 +04:00
}
2019-05-11 04:15:47 +03:00
int __init jbd2_journal_init_revoke_record_cache ( void )
2006-10-11 12:20:57 +04:00
{
2008-04-28 17:40:00 +04:00
J_ASSERT ( ! jbd2_revoke_record_cache ) ;
2012-02-21 02:53:03 +04:00
jbd2_revoke_record_cache = KMEM_CACHE ( jbd2_revoke_record_s ,
SLAB_HWCACHE_ALIGN | SLAB_TEMPORARY ) ;
2006-10-11 12:20:57 +04:00
2019-05-11 04:15:47 +03:00
if ( ! jbd2_revoke_record_cache ) {
pr_emerg ( " JBD2: failed to create revoke_record cache \n " ) ;
return - ENOMEM ;
}
return 0 ;
}
int __init jbd2_journal_init_revoke_table_cache ( void )
{
J_ASSERT ( ! jbd2_revoke_table_cache ) ;
2012-02-21 02:53:03 +04:00
jbd2_revoke_table_cache = KMEM_CACHE ( jbd2_revoke_table_s ,
SLAB_TEMPORARY ) ;
2019-05-11 04:15:47 +03:00
if ( ! jbd2_revoke_table_cache ) {
pr_emerg ( " JBD2: failed to create revoke_table cache \n " ) ;
2008-04-28 17:40:00 +04:00
return - ENOMEM ;
2019-05-11 04:15:47 +03:00
}
return 0 ;
2006-10-11 12:20:57 +04:00
}
2008-04-17 18:38:59 +04:00
static struct jbd2_revoke_table_s * jbd2_journal_init_revoke_table ( int hash_size )
2006-10-11 12:20:57 +04:00
{
2008-04-17 18:38:59 +04:00
int shift = 0 ;
int tmp = hash_size ;
struct jbd2_revoke_table_s * table ;
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
table = kmem_cache_alloc ( jbd2_revoke_table_cache , GFP_KERNEL ) ;
if ( ! table )
goto out ;
2006-10-11 12:20:57 +04:00
while ( ( tmp > > = 1UL ) ! = 0UL )
shift + + ;
2008-04-17 18:38:59 +04:00
table - > hash_size = hash_size ;
table - > hash_shift = shift ;
table - > hash_table =
treewide: kmalloc() -> kmalloc_array()
The kmalloc() function has a 2-factor argument form, kmalloc_array(). This
patch replaces cases of:
kmalloc(a * b, gfp)
with:
kmalloc_array(a * b, gfp)
as well as handling cases of:
kmalloc(a * b * c, gfp)
with:
kmalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kmalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kmalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The tools/ directory was manually excluded, since it has its own
implementation of kmalloc().
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kmalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kmalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kmalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kmalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kmalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kmalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kmalloc
+ kmalloc_array
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kmalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kmalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kmalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kmalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kmalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kmalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kmalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kmalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kmalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kmalloc(C1 * C2 * C3, ...)
|
kmalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kmalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kmalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kmalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kmalloc(sizeof(THING) * C2, ...)
|
kmalloc(sizeof(TYPE) * C2, ...)
|
kmalloc(C1 * C2 * C3, ...)
|
kmalloc(C1 * C2, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- (E1) * E2
+ E1, E2
, ...)
|
- kmalloc
+ kmalloc_array
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kmalloc
+ kmalloc_array
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 23:55:00 +03:00
kmalloc_array ( hash_size , sizeof ( struct list_head ) , GFP_KERNEL ) ;
2008-04-17 18:38:59 +04:00
if ( ! table - > hash_table ) {
kmem_cache_free ( jbd2_revoke_table_cache , table ) ;
table = NULL ;
goto out ;
2006-10-11 12:20:57 +04:00
}
for ( tmp = 0 ; tmp < hash_size ; tmp + + )
2008-04-17 18:38:59 +04:00
INIT_LIST_HEAD ( & table - > hash_table [ tmp ] ) ;
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
out :
return table ;
}
static void jbd2_journal_destroy_revoke_table ( struct jbd2_revoke_table_s * table )
{
int i ;
struct list_head * hash_list ;
for ( i = 0 ; i < table - > hash_size ; i + + ) {
hash_list = & table - > hash_table [ i ] ;
J_ASSERT ( list_empty ( hash_list ) ) ;
2006-10-11 12:20:57 +04:00
}
2008-04-17 18:38:59 +04:00
kfree ( table - > hash_table ) ;
kmem_cache_free ( jbd2_revoke_table_cache , table ) ;
}
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
/* Initialise the revoke table for a given journal to a given size. */
int jbd2_journal_init_revoke ( journal_t * journal , int hash_size )
{
J_ASSERT ( journal - > j_revoke_table [ 0 ] = = NULL ) ;
2007-07-16 10:41:17 +04:00
J_ASSERT ( is_power_of_2 ( hash_size ) ) ;
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
journal - > j_revoke_table [ 0 ] = jbd2_journal_init_revoke_table ( hash_size ) ;
if ( ! journal - > j_revoke_table [ 0 ] )
goto fail0 ;
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
journal - > j_revoke_table [ 1 ] = jbd2_journal_init_revoke_table ( hash_size ) ;
if ( ! journal - > j_revoke_table [ 1 ] )
goto fail1 ;
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
journal - > j_revoke = journal - > j_revoke_table [ 1 ] ;
2006-10-11 12:20:57 +04:00
spin_lock_init ( & journal - > j_revoke_lock ) ;
return 0 ;
2008-04-17 18:38:59 +04:00
fail1 :
jbd2_journal_destroy_revoke_table ( journal - > j_revoke_table [ 0 ] ) ;
2017-03-15 22:08:48 +03:00
journal - > j_revoke_table [ 0 ] = NULL ;
2008-04-17 18:38:59 +04:00
fail0 :
return - ENOMEM ;
}
2006-10-11 12:20:57 +04:00
2008-04-17 18:38:59 +04:00
/* Destroy a journal's revoke table. The table must already be empty! */
2006-10-11 12:20:59 +04:00
void jbd2_journal_destroy_revoke ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
journal - > j_revoke = NULL ;
2008-04-17 18:38:59 +04:00
if ( journal - > j_revoke_table [ 0 ] )
jbd2_journal_destroy_revoke_table ( journal - > j_revoke_table [ 0 ] ) ;
if ( journal - > j_revoke_table [ 1 ] )
jbd2_journal_destroy_revoke_table ( journal - > j_revoke_table [ 1 ] ) ;
2006-10-11 12:20:57 +04:00
}
# ifdef __KERNEL__
/*
2006-10-11 12:20:59 +04:00
* jbd2_journal_revoke : revoke a given buffer_head from the journal . This
2006-10-11 12:20:57 +04:00
* prevents the block from being replayed during recovery if we take a
* crash after this current transaction commits . Any subsequent
* metadata writes of the buffer in this transaction cancel the
* revoke .
*
* Note that this call may block - - - it is up to the caller to make
* sure that there are no further calls to journal_write_metadata
* before the revoke is complete . In ext3 , this implies calling the
* revoke before clearing the block bitmap when we are deleting
* metadata .
*
2006-10-11 12:20:59 +04:00
* Revoke performs a jbd2_journal_forget on any buffer_head passed in as a
2006-10-11 12:20:57 +04:00
* parameter , but does _not_ forget the buffer_head if the bh was only
* found implicitly .
*
* bh_in may not be a journalled buffer - it may have come off
* the hash tables without an attached journal_head .
*
2006-10-11 12:20:59 +04:00
* If bh_in is non - zero , jbd2_journal_revoke ( ) will decrement its b_count
2006-10-11 12:20:57 +04:00
* by one .
*/
2006-10-11 12:21:13 +04:00
int jbd2_journal_revoke ( handle_t * handle , unsigned long long blocknr ,
2006-10-11 12:20:57 +04:00
struct buffer_head * bh_in )
{
struct buffer_head * bh = NULL ;
journal_t * journal ;
struct block_device * bdev ;
int err ;
might_sleep ( ) ;
if ( bh_in )
BUFFER_TRACE ( bh_in , " enter " ) ;
journal = handle - > h_transaction - > t_journal ;
2006-10-11 12:20:59 +04:00
if ( ! jbd2_journal_set_features ( journal , 0 , 0 , JBD2_FEATURE_INCOMPAT_REVOKE ) ) {
2006-10-11 12:20:57 +04:00
J_ASSERT ( ! " Cannot set revoke feature! " ) ;
return - EINVAL ;
}
bdev = journal - > j_fs_dev ;
bh = bh_in ;
if ( ! bh ) {
bh = __find_get_block ( bdev , blocknr , journal - > j_blocksize ) ;
if ( bh )
BUFFER_TRACE ( bh , " found on hash " ) ;
}
2007-10-17 02:38:25 +04:00
# ifdef JBD2_EXPENSIVE_CHECKING
2006-10-11 12:20:57 +04:00
else {
struct buffer_head * bh2 ;
/* If there is a different buffer_head lying around in
* memory anywhere . . . */
bh2 = __find_get_block ( bdev , blocknr , journal - > j_blocksize ) ;
if ( bh2 ) {
/* ... and it has RevokeValid status... */
if ( bh2 ! = bh & & buffer_revokevalid ( bh2 ) )
/* ...then it better be revoked too,
* since it ' s illegal to create a revoke
* record against a buffer_head which is
* not marked revoked - - - that would
* risk missing a subsequent revoke
* cancel . */
J_ASSERT_BH ( bh2 , buffer_revoked ( bh2 ) ) ;
put_bh ( bh2 ) ;
}
}
# endif
2019-11-05 19:44:26 +03:00
if ( WARN_ON_ONCE ( handle - > h_revoke_credits < = 0 ) ) {
if ( ! bh_in )
brelse ( bh ) ;
return - EIO ;
}
2006-10-11 12:20:57 +04:00
/* We really ought not ever to revoke twice in a row without
first having the revoke cancelled : it ' s illegal to free a
block twice without allocating it in between ! */
if ( bh ) {
if ( ! J_EXPECT_BH ( bh , ! buffer_revoked ( bh ) ,
" inconsistent data on disk " ) ) {
if ( ! bh_in )
brelse ( bh ) ;
return - EIO ;
}
set_buffer_revoked ( bh ) ;
set_buffer_revokevalid ( bh ) ;
if ( bh_in ) {
2006-10-11 12:20:59 +04:00
BUFFER_TRACE ( bh_in , " call jbd2_journal_forget " ) ;
jbd2_journal_forget ( handle , bh_in ) ;
2006-10-11 12:20:57 +04:00
} else {
BUFFER_TRACE ( bh , " call brelse " ) ;
__brelse ( bh ) ;
}
}
2019-11-05 19:44:26 +03:00
handle - > h_revoke_credits - - ;
2006-10-11 12:20:57 +04:00
2006-10-11 12:21:09 +04:00
jbd_debug ( 2 , " insert revoke for block %llu, bh_in=%p \n " , blocknr , bh_in ) ;
2006-10-11 12:20:57 +04:00
err = insert_revoke_hash ( journal , blocknr ,
handle - > h_transaction - > t_tid ) ;
BUFFER_TRACE ( bh_in , " exit " ) ;
return err ;
}
/*
* Cancel an outstanding revoke . For use only internally by the
2006-10-11 12:20:59 +04:00
* journaling code ( called from jbd2_journal_get_write_access ) .
2006-10-11 12:20:57 +04:00
*
* We trust buffer_revoked ( ) on the buffer if the buffer is already
* being journaled : if there is no revoke pending on the buffer , then we
* don ' t do anything here .
*
* This would break if it were possible for a buffer to be revoked and
* discarded , and then reallocated within the same transaction . In such
* a case we would have lost the revoked bit , but when we arrived here
* the second time we would still have a pending revoke to cancel . So ,
* do not trust the Revoked bit on buffers unless RevokeValid is also
* set .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_cancel_revoke ( handle_t * handle , struct journal_head * jh )
2006-10-11 12:20:57 +04:00
{
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
2006-10-11 12:20:57 +04:00
journal_t * journal = handle - > h_transaction - > t_journal ;
int need_cancel ;
int did_revoke = 0 ; /* akpm: debug */
struct buffer_head * bh = jh2bh ( jh ) ;
jbd_debug ( 4 , " journal_head %p, cancelling revoke \n " , jh ) ;
/* Is the existing Revoke bit valid? If so, we trust it, and
* only perform the full cancel if the revoke bit is set . If
* not , we can ' t trust the revoke bit , and we need to do the
* full search for a revoke record . */
if ( test_set_buffer_revokevalid ( bh ) ) {
need_cancel = test_clear_buffer_revoked ( bh ) ;
} else {
need_cancel = 1 ;
clear_buffer_revoked ( bh ) ;
}
if ( need_cancel ) {
record = find_revoke_record ( journal , bh - > b_blocknr ) ;
if ( record ) {
jbd_debug ( 4 , " cancelled existing revoke on "
" blocknr %llu \n " , ( unsigned long long ) bh - > b_blocknr ) ;
spin_lock ( & journal - > j_revoke_lock ) ;
list_del ( & record - > hash ) ;
spin_unlock ( & journal - > j_revoke_lock ) ;
2006-10-11 12:20:59 +04:00
kmem_cache_free ( jbd2_revoke_record_cache , record ) ;
2006-10-11 12:20:57 +04:00
did_revoke = 1 ;
}
}
2007-10-17 02:38:25 +04:00
# ifdef JBD2_EXPENSIVE_CHECKING
2006-10-11 12:20:57 +04:00
/* There better not be one left behind by now! */
record = find_revoke_record ( journal , bh - > b_blocknr ) ;
J_ASSERT_JH ( jh , record = = NULL ) ;
# endif
/* Finally, have we just cleared revoke on an unhashed
* buffer_head ? If so , we ' d better make sure we clear the
* revoked status on any hashed alias too , otherwise the revoke
* state machine will get very upset later on . */
if ( need_cancel ) {
struct buffer_head * bh2 ;
bh2 = __find_get_block ( bh - > b_bdev , bh - > b_blocknr , bh - > b_size ) ;
if ( bh2 ) {
if ( bh2 ! = bh )
clear_buffer_revoked ( bh2 ) ;
__brelse ( bh2 ) ;
}
}
return did_revoke ;
}
2011-12-29 02:46:46 +04:00
/*
* journal_clear_revoked_flag clears revoked flag of buffers in
* revoke table to reflect there is no revoked buffers in the next
* transaction which is going to be started .
*/
void jbd2_clear_buffer_revoked_flags ( journal_t * journal )
{
struct jbd2_revoke_table_s * revoke = journal - > j_revoke ;
int i = 0 ;
for ( i = 0 ; i < revoke - > hash_size ; i + + ) {
struct list_head * hash_list ;
struct list_head * list_entry ;
hash_list = & revoke - > hash_table [ i ] ;
list_for_each ( list_entry , hash_list ) {
struct jbd2_revoke_record_s * record ;
struct buffer_head * bh ;
record = ( struct jbd2_revoke_record_s * ) list_entry ;
bh = __find_get_block ( journal - > j_fs_dev ,
record - > blocknr ,
journal - > j_blocksize ) ;
if ( bh ) {
clear_buffer_revoked ( bh ) ;
__brelse ( bh ) ;
}
}
}
}
2006-10-11 12:20:57 +04:00
/* journal_switch_revoke table select j_revoke for next transaction
* we do not want to suspend any processing until all revokes are
* written - bzzz
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_switch_revoke_table ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int i ;
if ( journal - > j_revoke = = journal - > j_revoke_table [ 0 ] )
journal - > j_revoke = journal - > j_revoke_table [ 1 ] ;
else
journal - > j_revoke = journal - > j_revoke_table [ 0 ] ;
for ( i = 0 ; i < journal - > j_revoke - > hash_size ; i + + )
INIT_LIST_HEAD ( & journal - > j_revoke - > hash_table [ i ] ) ;
}
/*
* Write revoke records to the journal for all entries in the current
* revoke hash , deleting the entries as we go .
*/
2016-02-23 07:07:30 +03:00
void jbd2_journal_write_revoke_records ( transaction_t * transaction ,
struct list_head * log_bufs )
2006-10-11 12:20:57 +04:00
{
2016-02-23 07:07:30 +03:00
journal_t * journal = transaction - > t_journal ;
2013-06-04 20:06:01 +04:00
struct buffer_head * descriptor ;
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
struct jbd2_revoke_table_s * revoke ;
2006-10-11 12:20:57 +04:00
struct list_head * hash_list ;
int i , offset , count ;
descriptor = NULL ;
offset = 0 ;
count = 0 ;
/* select revoke table for committing transaction */
revoke = journal - > j_revoke = = journal - > j_revoke_table [ 0 ] ?
journal - > j_revoke_table [ 1 ] : journal - > j_revoke_table [ 0 ] ;
for ( i = 0 ; i < revoke - > hash_size ; i + + ) {
hash_list = & revoke - > hash_table [ i ] ;
while ( ! list_empty ( hash_list ) ) {
2006-10-11 12:20:59 +04:00
record = ( struct jbd2_revoke_record_s * )
2006-10-11 12:20:57 +04:00
hash_list - > next ;
2016-02-23 07:07:30 +03:00
write_one_revoke_record ( transaction , log_bufs ,
& descriptor , & offset , record ) ;
2006-10-11 12:20:57 +04:00
count + + ;
list_del ( & record - > hash ) ;
2006-10-11 12:20:59 +04:00
kmem_cache_free ( jbd2_revoke_record_cache , record ) ;
2006-10-11 12:20:57 +04:00
}
}
if ( descriptor )
2016-02-23 07:07:30 +03:00
flush_descriptor ( journal , descriptor , offset ) ;
2006-10-11 12:20:57 +04:00
jbd_debug ( 1 , " Wrote %d revoke records \n " , count ) ;
}
/*
* Write out one revoke record . We need to create a new descriptor
* block if the old one is full or if we have not already created one .
*/
2016-02-23 07:07:30 +03:00
static void write_one_revoke_record ( transaction_t * transaction ,
2013-06-04 20:06:01 +04:00
struct list_head * log_bufs ,
struct buffer_head * * descriptorp ,
2006-10-11 12:20:57 +04:00
int * offsetp ,
2016-02-23 07:07:30 +03:00
struct jbd2_revoke_record_s * record )
2006-10-11 12:20:57 +04:00
{
2016-02-23 07:07:30 +03:00
journal_t * journal = transaction - > t_journal ;
2012-05-27 16:08:24 +04:00
int csum_size = 0 ;
2013-06-04 20:06:01 +04:00
struct buffer_head * descriptor ;
2015-05-15 02:11:50 +03:00
int sz , offset ;
2006-10-11 12:20:57 +04:00
/* If we are already aborting, this all becomes a noop. We
still need to go round the loop in
2006-10-11 12:20:59 +04:00
jbd2_journal_write_revoke_records in order to free all of the
2006-10-11 12:20:57 +04:00
revoke records : only the IO to the journal is omitted . */
if ( is_journal_aborted ( journal ) )
return ;
descriptor = * descriptorp ;
offset = * offsetp ;
2012-05-27 16:08:24 +04:00
/* Do we need to leave space at the end for a checksum? */
2014-08-28 02:40:07 +04:00
if ( jbd2_journal_has_csum_v2or3 ( journal ) )
2016-02-23 07:19:09 +03:00
csum_size = sizeof ( struct jbd2_journal_block_tail ) ;
2012-05-27 16:08:24 +04:00
2015-10-17 23:18:45 +03:00
if ( jbd2_has_feature_64bit ( journal ) )
2015-05-15 02:11:50 +03:00
sz = 8 ;
else
sz = 4 ;
2006-10-11 12:20:57 +04:00
/* Make sure we have a descriptor with space left for the record */
if ( descriptor ) {
2015-05-15 02:11:50 +03:00
if ( offset + sz > journal - > j_blocksize - csum_size ) {
2016-02-23 07:07:30 +03:00
flush_descriptor ( journal , descriptor , offset ) ;
2006-10-11 12:20:57 +04:00
descriptor = NULL ;
}
}
if ( ! descriptor ) {
2016-02-23 07:17:15 +03:00
descriptor = jbd2_journal_get_descriptor_buffer ( transaction ,
JBD2_REVOKE_BLOCK ) ;
2006-10-11 12:20:57 +04:00
if ( ! descriptor )
return ;
/* Record it so that we can wait for IO completion later */
2013-06-04 20:06:01 +04:00
BUFFER_TRACE ( descriptor , " file in log_bufs " ) ;
jbd2_file_log_bh ( log_bufs , descriptor ) ;
2006-10-11 12:20:57 +04:00
2006-10-11 12:20:59 +04:00
offset = sizeof ( jbd2_journal_revoke_header_t ) ;
2006-10-11 12:20:57 +04:00
* descriptorp = descriptor ;
}
2015-10-17 23:18:45 +03:00
if ( jbd2_has_feature_64bit ( journal ) )
2013-06-04 20:06:01 +04:00
* ( ( __be64 * ) ( & descriptor - > b_data [ offset ] ) ) =
2006-10-11 12:21:08 +04:00
cpu_to_be64 ( record - > blocknr ) ;
2015-05-15 02:11:50 +03:00
else
2013-06-04 20:06:01 +04:00
* ( ( __be32 * ) ( & descriptor - > b_data [ offset ] ) ) =
2006-10-11 12:21:08 +04:00
cpu_to_be32 ( record - > blocknr ) ;
2015-05-15 02:11:50 +03:00
offset + = sz ;
2006-10-11 12:21:08 +04:00
2006-10-11 12:20:57 +04:00
* offsetp = offset ;
}
/*
* Flush a revoke descriptor out to the journal . If we are aborting ,
* this is a noop ; otherwise we are generating a buffer which needs to
* be waited for during commit , so it has to go onto the appropriate
* journal buffer list .
*/
static void flush_descriptor ( journal_t * journal ,
2013-06-04 20:06:01 +04:00
struct buffer_head * descriptor ,
2016-02-23 07:07:30 +03:00
int offset )
2006-10-11 12:20:57 +04:00
{
2006-10-11 12:20:59 +04:00
jbd2_journal_revoke_header_t * header ;
2006-10-11 12:20:57 +04:00
2019-08-11 23:29:41 +03:00
if ( is_journal_aborted ( journal ) )
2006-10-11 12:20:57 +04:00
return ;
2013-06-04 20:06:01 +04:00
header = ( jbd2_journal_revoke_header_t * ) descriptor - > b_data ;
2006-10-11 12:20:57 +04:00
header - > r_count = cpu_to_be32 ( offset ) ;
2016-02-23 07:19:09 +03:00
jbd2_descriptor_block_csum_set ( journal , descriptor ) ;
2012-05-27 16:08:24 +04:00
2013-06-04 20:06:01 +04:00
set_buffer_jwrite ( descriptor ) ;
BUFFER_TRACE ( descriptor , " write " ) ;
set_buffer_dirty ( descriptor ) ;
2016-11-01 16:40:10 +03:00
write_dirty_buffer ( descriptor , REQ_SYNC ) ;
2006-10-11 12:20:57 +04:00
}
# endif
/*
* Revoke support for recovery .
*
* Recovery needs to be able to :
*
* record all revoke records , including the tid of the latest instance
* of each revoke in the journal
*
* check whether a given block in a given transaction should be replayed
* ( ie . has not been revoked by a revoke record in that or a subsequent
* transaction )
*
* empty the revoke table after recovery .
*/
/*
* First , setting revoke records . We create a new revoke record for
* every block ever revoked in the log as we scan it for recovery , and
* we update the existing records if we find multiple revokes for a
* single block .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_set_revoke ( journal_t * journal ,
2006-10-11 12:21:13 +04:00
unsigned long long blocknr ,
2006-10-11 12:20:57 +04:00
tid_t sequence )
{
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
2006-10-11 12:20:57 +04:00
record = find_revoke_record ( journal , blocknr ) ;
if ( record ) {
/* If we have multiple occurrences, only record the
* latest sequence number in the hashed record */
if ( tid_gt ( sequence , record - > sequence ) )
record - > sequence = sequence ;
return 0 ;
}
return insert_revoke_hash ( journal , blocknr , sequence ) ;
}
/*
* Test revoke records . For a given block referenced in the log , has
* that block been revoked ? A revoke record with a given transaction
* sequence number revokes all blocks in that transaction and earlier
* ones , but later transactions still need replayed .
*/
2006-10-11 12:20:59 +04:00
int jbd2_journal_test_revoke ( journal_t * journal ,
2006-10-11 12:21:13 +04:00
unsigned long long blocknr ,
2006-10-11 12:20:57 +04:00
tid_t sequence )
{
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
2006-10-11 12:20:57 +04:00
record = find_revoke_record ( journal , blocknr ) ;
if ( ! record )
return 0 ;
if ( tid_gt ( sequence , record - > sequence ) )
return 0 ;
return 1 ;
}
/*
* Finally , once recovery is over , we need to clear the revoke table so
* that it can be reused by the running filesystem .
*/
2006-10-11 12:20:59 +04:00
void jbd2_journal_clear_revoke ( journal_t * journal )
2006-10-11 12:20:57 +04:00
{
int i ;
struct list_head * hash_list ;
2006-10-11 12:20:59 +04:00
struct jbd2_revoke_record_s * record ;
struct jbd2_revoke_table_s * revoke ;
2006-10-11 12:20:57 +04:00
revoke = journal - > j_revoke ;
for ( i = 0 ; i < revoke - > hash_size ; i + + ) {
hash_list = & revoke - > hash_table [ i ] ;
while ( ! list_empty ( hash_list ) ) {
2006-10-11 12:20:59 +04:00
record = ( struct jbd2_revoke_record_s * ) hash_list - > next ;
2006-10-11 12:20:57 +04:00
list_del ( & record - > hash ) ;
2006-10-11 12:20:59 +04:00
kmem_cache_free ( jbd2_revoke_record_cache , record ) ;
2006-10-11 12:20:57 +04:00
}
}
}