This merge window saw the the following new featuers added to ext4:

* Direct I/O via iomap (required the iomap-for-next branch from Darrick
    as a prereq).
  * Support for using dioread-nolock where the block size < page size.
  * Support for encryption for file systems where the block size < page size.
  * Rework of journal credits handling so a revoke-heavy workload will
    not cause the journal to run out of space.
  * Replace bit-spinlocks with spinlocks in jbd2
 
 Also included were some bug fixes and cleanups, mostly to clean up
 corner cases from fuzzed file systems and error path handling.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAl3dHxoACgkQ8vlZVpUN
 gaMZswf5AbtQhTEJDXO7Pc1ull38GIGFgAv7uAth0TymLC3h1/FEYWW0crEPFsDr
 1Eei55UUVOYrMMUKQ4P7wlLX0cIh3XDPMWnRFuqBoV5/ZOsH/ZSbkY//TG2Xze/v
 9wXIH/RKQnzbRtXffJ1+DnvmXJk+HFm1R1gjl0nfyUXGrnlSfqJxhLSczyd6bJJq
 ehi/tso5UC/4EQsAIdWp7VWsAdaHcZ7ogHqDoy8dXpM1equ408iml7VlKr8R+Nr7
 5ANpCISXChSlLLYm0NYN5vhO8upF5uDxWLdCtxVPL5kFdM2m/ELjXw9h9C+78l7C
 EWJGlGlxvx07Px+e+bfStEsoixpWBg==
 =0eko
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "This merge window saw the the following new featuers added to ext4:

   - Direct I/O via iomap (required the iomap-for-next branch from
     Darrick as a prereq).

   - Support for using dioread-nolock where the block size < page size.

   - Support for encryption for file systems where the block size < page
     size.

   - Rework of journal credits handling so a revoke-heavy workload will
     not cause the journal to run out of space.

   - Replace bit-spinlocks with spinlocks in jbd2

  Also included were some bug fixes and cleanups, mostly to clean up
  corner cases from fuzzed file systems and error path handling"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (59 commits)
  ext4: work around deleting a file with i_nlink == 0 safely
  ext4: add more paranoia checking in ext4_expand_extra_isize handling
  jbd2: make jbd2_handle_buffer_credits() handle reserved handles
  ext4: fix a bug in ext4_wait_for_tail_page_commit
  ext4: bio_alloc with __GFP_DIRECT_RECLAIM never fails
  ext4: code cleanup for get_next_id
  ext4: fix leak of quota reservations
  ext4: remove unused variable warning in parse_options()
  ext4: Enable encryption for subpage-sized blocks
  fs/buffer.c: support fscrypt in block_read_full_page()
  ext4: Add error handling for io_end_vec struct allocation
  jbd2: Fine tune estimate of necessary descriptor blocks
  jbd2: Provide trace event for handle restarts
  ext4: Reserve revoke credits for freed blocks
  jbd2: Make credit checking more strict
  jbd2: Rename h_buffer_credits to h_total_credits
  jbd2: Reserve space for revoke descriptor blocks
  jbd2: Drop jbd2_space_needed()
  jbd2: Account descriptor blocks into t_outstanding_credits
  jbd2: Factor out common parts of stopping and restarting a handle
  ...
This commit is contained in:
Linus Torvalds 2019-11-30 10:53:02 -08:00
commit 50b8b3f85a
30 changed files with 1713 additions and 1483 deletions

View File

@ -342,8 +342,8 @@ Contents encryption
------------------- -------------------
For file contents, each filesystem block is encrypted independently. For file contents, each filesystem block is encrypted independently.
Currently, only the case where the filesystem block size is equal to Starting from Linux kernel 5.5, encryption of filesystems with block
the system's page size (usually 4096 bytes) is supported. size less than system's page size is supported.
Each block's IV is set to the logical block number within the file as Each block's IV is set to the logical block number within the file as
a little endian number, except that: a little endian number, except that:

View File

@ -47,6 +47,7 @@
#include <linux/pagevec.h> #include <linux/pagevec.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <trace/events/block.h> #include <trace/events/block.h>
#include <linux/fscrypt.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
@ -246,10 +247,6 @@ out:
return ret; return ret;
} }
/*
* I/O completion handler for block_read_full_page() - pages
* which come unlocked at the end of I/O.
*/
static void end_buffer_async_read(struct buffer_head *bh, int uptodate) static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
{ {
unsigned long flags; unsigned long flags;
@ -307,6 +304,47 @@ still_busy:
return; return;
} }
struct decrypt_bh_ctx {
struct work_struct work;
struct buffer_head *bh;
};
static void decrypt_bh(struct work_struct *work)
{
struct decrypt_bh_ctx *ctx =
container_of(work, struct decrypt_bh_ctx, work);
struct buffer_head *bh = ctx->bh;
int err;
err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
bh_offset(bh));
end_buffer_async_read(bh, err == 0);
kfree(ctx);
}
/*
* I/O completion handler for block_read_full_page() - pages
* which come unlocked at the end of I/O.
*/
static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
{
/* Decrypt if needed */
if (uptodate && IS_ENABLED(CONFIG_FS_ENCRYPTION) &&
IS_ENCRYPTED(bh->b_page->mapping->host) &&
S_ISREG(bh->b_page->mapping->host->i_mode)) {
struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
if (ctx) {
INIT_WORK(&ctx->work, decrypt_bh);
ctx->bh = bh;
fscrypt_enqueue_decrypt_work(&ctx->work);
return;
}
uptodate = 0;
}
end_buffer_async_read(bh, uptodate);
}
/* /*
* Completion handler for block_write_full_page() - pages which are unlocked * Completion handler for block_write_full_page() - pages which are unlocked
* during I/O, and which have PageWriteback cleared upon I/O completion. * during I/O, and which have PageWriteback cleared upon I/O completion.
@ -379,7 +417,7 @@ EXPORT_SYMBOL(end_buffer_async_write);
*/ */
static void mark_buffer_async_read(struct buffer_head *bh) static void mark_buffer_async_read(struct buffer_head *bh)
{ {
bh->b_end_io = end_buffer_async_read; bh->b_end_io = end_buffer_async_read_io;
set_buffer_async_read(bh); set_buffer_async_read(bh);
} }

View File

@ -198,6 +198,12 @@ struct ext4_system_blocks {
*/ */
#define EXT4_IO_END_UNWRITTEN 0x0001 #define EXT4_IO_END_UNWRITTEN 0x0001
struct ext4_io_end_vec {
struct list_head list; /* list of io_end_vec */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
};
/* /*
* For converting unwritten extents on a work queue. 'handle' is used for * For converting unwritten extents on a work queue. 'handle' is used for
* buffered writeback. * buffered writeback.
@ -211,8 +217,7 @@ typedef struct ext4_io_end {
* bios covering the extent */ * bios covering the extent */
unsigned int flag; /* unwritten or not */ unsigned int flag; /* unwritten or not */
atomic_t count; /* reference counter */ atomic_t count; /* reference counter */
loff_t offset; /* offset in the file */ struct list_head list_vec; /* list of ext4_io_end_vec */
ssize_t size; /* size of the extent */
} ext4_io_end_t; } ext4_io_end_t;
struct ext4_io_submit { struct ext4_io_submit {
@ -1579,7 +1584,6 @@ enum {
EXT4_STATE_NO_EXPAND, /* No space for expansion */ EXT4_STATE_NO_EXPAND, /* No space for expansion */
EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */ EXT4_STATE_NEWENTRY, /* File just added to dir */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */ EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
@ -2562,8 +2566,6 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock, int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create); struct buffer_head *bh, int create);
int ext4_walk_page_buffers(handle_t *handle, int ext4_walk_page_buffers(handle_t *handle,
@ -2606,7 +2608,6 @@ extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *); extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *); extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *); extern void ext4_set_inode_flags(struct inode *);
extern int ext4_alloc_da_blocks(struct inode *inode); extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode); extern void ext4_set_aops(struct inode *inode);
@ -3266,6 +3267,8 @@ extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
loff_t len); loff_t len);
extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
loff_t offset, ssize_t len); loff_t offset, ssize_t len);
extern int ext4_convert_unwritten_io_end_vec(handle_t *handle,
ext4_io_end_t *io_end);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags); struct ext4_map_blocks *map, int flags);
extern int ext4_ext_calc_metadata_amount(struct inode *inode, extern int ext4_ext_calc_metadata_amount(struct inode *inode,
@ -3298,6 +3301,10 @@ extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
ext4_lblk_t lblk2, ext4_lblk_t count, ext4_lblk_t lblk2, ext4_lblk_t count,
int mark_unwritten,int *err); int mark_unwritten,int *err);
extern int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu); extern int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu);
extern int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
int check_cred, int restart_cred,
int revoke_cred);
/* move_extent.c */ /* move_extent.c */
extern void ext4_double_down_write_data_sem(struct inode *first, extern void ext4_double_down_write_data_sem(struct inode *first,
@ -3324,6 +3331,8 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
int len, int len,
struct writeback_control *wbc, struct writeback_control *wbc,
bool keep_towrite); bool keep_towrite);
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);
/* mmp.c */ /* mmp.c */
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
@ -3381,6 +3390,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
} }
extern const struct iomap_ops ext4_iomap_ops; extern const struct iomap_ops ext4_iomap_ops;
extern const struct iomap_ops ext4_iomap_report_ops;
static inline int ext4_buffer_uptodate(struct buffer_head *bh) static inline int ext4_buffer_uptodate(struct buffer_head *bh)
{ {

View File

@ -65,12 +65,14 @@ static int ext4_journal_check_start(struct super_block *sb)
} }
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
int type, int blocks, int rsv_blocks) int type, int blocks, int rsv_blocks,
int revoke_creds)
{ {
journal_t *journal; journal_t *journal;
int err; int err;
trace_ext4_journal_start(sb, blocks, rsv_blocks, _RET_IP_); trace_ext4_journal_start(sb, blocks, rsv_blocks, revoke_creds,
_RET_IP_);
err = ext4_journal_check_start(sb); err = ext4_journal_check_start(sb);
if (err < 0) if (err < 0)
return ERR_PTR(err); return ERR_PTR(err);
@ -78,8 +80,8 @@ handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
journal = EXT4_SB(sb)->s_journal; journal = EXT4_SB(sb)->s_journal;
if (!journal) if (!journal)
return ext4_get_nojournal(); return ext4_get_nojournal();
return jbd2__journal_start(journal, blocks, rsv_blocks, GFP_NOFS, return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds,
type, line); GFP_NOFS, type, line);
} }
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
@ -119,8 +121,8 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
return ext4_get_nojournal(); return ext4_get_nojournal();
sb = handle->h_journal->j_private; sb = handle->h_journal->j_private;
trace_ext4_journal_start_reserved(sb, handle->h_buffer_credits, trace_ext4_journal_start_reserved(sb,
_RET_IP_); jbd2_handle_buffer_credits(handle), _RET_IP_);
err = ext4_journal_check_start(sb); err = ext4_journal_check_start(sb);
if (err < 0) { if (err < 0) {
jbd2_journal_free_reserved(handle); jbd2_journal_free_reserved(handle);
@ -133,6 +135,19 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
return handle; return handle;
} }
int __ext4_journal_ensure_credits(handle_t *handle, int check_cred,
int extend_cred, int revoke_cred)
{
if (!ext4_handle_valid(handle))
return 0;
if (jbd2_handle_buffer_credits(handle) >= check_cred &&
handle->h_revoke_credits >= revoke_cred)
return 0;
extend_cred = max(0, extend_cred - jbd2_handle_buffer_credits(handle));
revoke_cred = max(0, revoke_cred - handle->h_revoke_credits);
return ext4_journal_extend(handle, extend_cred, revoke_cred);
}
static void ext4_journal_abort_handle(const char *caller, unsigned int line, static void ext4_journal_abort_handle(const char *caller, unsigned int line,
const char *err_fn, const char *err_fn,
struct buffer_head *bh, struct buffer_head *bh,
@ -278,7 +293,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
handle->h_type, handle->h_type,
handle->h_line_no, handle->h_line_no,
handle->h_requested_credits, handle->h_requested_credits,
handle->h_buffer_credits, err); jbd2_handle_buffer_credits(handle), err);
return err; return err;
} }
ext4_error_inode(inode, where, line, ext4_error_inode(inode, where, line,
@ -289,7 +304,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
handle->h_type, handle->h_type,
handle->h_line_no, handle->h_line_no,
handle->h_requested_credits, handle->h_requested_credits,
handle->h_buffer_credits, err); jbd2_handle_buffer_credits(handle),
err);
} }
} else { } else {
if (inode) if (inode)

View File

@ -261,7 +261,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
int type, int blocks, int rsv_blocks); int type, int blocks, int rsv_blocks,
int revoke_creds);
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@ -288,28 +289,41 @@ static inline int ext4_handle_is_aborted(handle_t *handle)
return 0; return 0;
} }
static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed) static inline int ext4_free_metadata_revoke_credits(struct super_block *sb,
int blocks)
{ {
if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed) /* Freeing each metadata block can result in freeing one cluster */
return 0; return blocks * EXT4_SB(sb)->s_cluster_ratio;
return 1; }
static inline int ext4_trans_default_revoke_credits(struct super_block *sb)
{
return ext4_free_metadata_revoke_credits(sb, 8);
} }
#define ext4_journal_start_sb(sb, type, nblocks) \ #define ext4_journal_start_sb(sb, type, nblocks) \
__ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0) __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0, \
ext4_trans_default_revoke_credits(sb))
#define ext4_journal_start(inode, type, nblocks) \ #define ext4_journal_start(inode, type, nblocks) \
__ext4_journal_start((inode), __LINE__, (type), (nblocks), 0) __ext4_journal_start((inode), __LINE__, (type), (nblocks), 0, \
ext4_trans_default_revoke_credits((inode)->i_sb))
#define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks) \ #define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks)\
__ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks)) __ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks),\
ext4_trans_default_revoke_credits((inode)->i_sb))
#define ext4_journal_start_with_revoke(inode, type, blocks, revoke_creds) \
__ext4_journal_start((inode), __LINE__, (type), (blocks), 0, \
(revoke_creds))
static inline handle_t *__ext4_journal_start(struct inode *inode, static inline handle_t *__ext4_journal_start(struct inode *inode,
unsigned int line, int type, unsigned int line, int type,
int blocks, int rsv_blocks) int blocks, int rsv_blocks,
int revoke_creds)
{ {
return __ext4_journal_start_sb(inode->i_sb, line, type, blocks, return __ext4_journal_start_sb(inode->i_sb, line, type, blocks,
rsv_blocks); rsv_blocks, revoke_creds);
} }
#define ext4_journal_stop(handle) \ #define ext4_journal_stop(handle) \
@ -332,20 +346,68 @@ static inline handle_t *ext4_journal_current_handle(void)
return journal_current_handle(); return journal_current_handle();
} }
static inline int ext4_journal_extend(handle_t *handle, int nblocks) static inline int ext4_journal_extend(handle_t *handle, int nblocks, int revoke)
{ {
if (ext4_handle_valid(handle)) if (ext4_handle_valid(handle))
return jbd2_journal_extend(handle, nblocks); return jbd2_journal_extend(handle, nblocks, revoke);
return 0; return 0;
} }
static inline int ext4_journal_restart(handle_t *handle, int nblocks) static inline int ext4_journal_restart(handle_t *handle, int nblocks,
int revoke)
{ {
if (ext4_handle_valid(handle)) if (ext4_handle_valid(handle))
return jbd2_journal_restart(handle, nblocks); return jbd2__journal_restart(handle, nblocks, revoke, GFP_NOFS);
return 0; return 0;
} }
int __ext4_journal_ensure_credits(handle_t *handle, int check_cred,
int extend_cred, int revoke_cred);
/*
* Ensure @handle has at least @check_creds credits available. If not,
* transaction will be extended or restarted to contain at least @extend_cred
* credits. Before restarting transaction @fn is executed to allow for cleanup
* before the transaction is restarted.
*
* The return value is < 0 in case of error, 0 in case the handle has enough
* credits or transaction extension succeeded, 1 in case transaction had to be
* restarted.
*/
#define ext4_journal_ensure_credits_fn(handle, check_cred, extend_cred, \
revoke_cred, fn) \
({ \
__label__ __ensure_end; \
int err = __ext4_journal_ensure_credits((handle), (check_cred), \
(extend_cred), (revoke_cred)); \
\
if (err <= 0) \
goto __ensure_end; \
err = (fn); \
if (err < 0) \
goto __ensure_end; \
err = ext4_journal_restart((handle), (extend_cred), (revoke_cred)); \
if (err == 0) \
err = 1; \
__ensure_end: \
err; \
})
/*
* Ensure given handle has at least requested amount of credits available,
* possibly restarting transaction if needed. We also make sure the transaction
* has space for at least ext4_trans_default_revoke_credits(sb) revoke records
* as freeing one or two blocks is very common pattern and requesting this is
* very cheap.
*/
static inline int ext4_journal_ensure_credits(handle_t *handle, int credits,
int revoke_creds)
{
return ext4_journal_ensure_credits_fn(handle, credits, credits,
revoke_creds, 0);
}
static inline int ext4_journal_blocks_per_page(struct inode *inode) static inline int ext4_journal_blocks_per_page(struct inode *inode)
{ {
if (EXT4_JOURNAL(inode) != NULL) if (EXT4_JOURNAL(inode) != NULL)
@ -407,6 +469,7 @@ static inline int ext4_inode_journal_mode(struct inode *inode)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */ return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
/* We do not support data journalling with delayed allocation */ /* We do not support data journalling with delayed allocation */
if (!S_ISREG(inode->i_mode) || if (!S_ISREG(inode->i_mode) ||
ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) ||
test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
(ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
!test_opt(inode->i_sb, DELALLOC))) { !test_opt(inode->i_sb, DELALLOC))) {
@ -437,6 +500,19 @@ static inline int ext4_should_writeback_data(struct inode *inode)
return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE; return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
} }
static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks)
{
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
return 0;
if (!ext4_should_journal_data(inode))
return 0;
/*
* Data blocks in one extent are contiguous, just account for partial
* clusters at extent boundaries
*/
return blocks + 2*(EXT4_SB(inode->i_sb)->s_cluster_ratio - 1);
}
/* /*
* This function controls whether or not we should try to go down the * This function controls whether or not we should try to go down the
* dioread_nolock code paths, which makes it safe to avoid taking * dioread_nolock code paths, which makes it safe to avoid taking

View File

@ -100,29 +100,41 @@ static int ext4_split_extent_at(handle_t *handle,
static int ext4_find_delayed_extent(struct inode *inode, static int ext4_find_delayed_extent(struct inode *inode,
struct extent_status *newes); struct extent_status *newes);
static int ext4_ext_truncate_extend_restart(handle_t *handle, static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
struct inode *inode,
int needed)
{ {
int err;
if (!ext4_handle_valid(handle))
return 0;
if (handle->h_buffer_credits >= needed)
return 0;
/* /*
* If we need to extend the journal get a few extra blocks * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
* while we're at it for efficiency's sake. * moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by
* i_mutex. So we can safely drop the i_data_sem here.
*/ */
needed += 3; BUG_ON(EXT4_JOURNAL(inode) == NULL);
err = ext4_journal_extend(handle, needed - handle->h_buffer_credits); ext4_discard_preallocations(inode);
if (err <= 0) up_write(&EXT4_I(inode)->i_data_sem);
return err; *dropped = 1;
err = ext4_truncate_restart_trans(handle, inode, needed); return 0;
if (err == 0) }
err = -EAGAIN;
return err; /*
* Make sure 'handle' has at least 'check_cred' credits. If not, restart
* transaction with 'restart_cred' credits. The function drops i_data_sem
* when restarting transaction and gets it after transaction is restarted.
*
* The function returns 0 on success, 1 if transaction had to be restarted,
* and < 0 in case of fatal error.
*/
int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
int check_cred, int restart_cred,
int revoke_cred)
{
int ret;
int dropped = 0;
ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
if (dropped)
down_write(&EXT4_I(inode)->i_data_sem);
return ret;
} }
/* /*
@ -1753,16 +1765,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
*/ */
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0; return 0;
/*
* The check for IO to unwritten extent is somewhat racy as we
* increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after
* dropping i_data_sem. But reserved blocks should save us in that
* case.
*/
if (ext4_ext_is_unwritten(ex1) && if (ext4_ext_is_unwritten(ex1) &&
(ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
atomic_read(&EXT4_I(inode)->i_unwritten) ||
(ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)))
return 0; return 0;
#ifdef AGGRESSIVE_TEST #ifdef AGGRESSIVE_TEST
if (ext1_ee_len >= 4) if (ext1_ee_len >= 4)
@ -1840,7 +1845,8 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
* group descriptor to release the extent tree block. If we * group descriptor to release the extent tree block. If we
* can't get the journal credits, give up. * can't get the journal credits, give up.
*/ */
if (ext4_journal_extend(handle, 2)) if (ext4_journal_extend(handle, 2,
ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
return; return;
/* /*
@ -2727,7 +2733,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
{ {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int err = 0, correct_index = 0; int err = 0, correct_index = 0;
int depth = ext_depth(inode), credits; int depth = ext_depth(inode), credits, revoke_credits;
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
ext4_lblk_t a, b; ext4_lblk_t a, b;
unsigned num; unsigned num;
@ -2819,10 +2825,23 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
credits += (ext_depth(inode)) + 1; credits += (ext_depth(inode)) + 1;
} }
credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
/*
* We may end up freeing some index blocks and data from the
* punched range. Note that partial clusters are accounted for
* by ext4_free_data_revoke_credits().
*/
revoke_credits =
ext4_free_metadata_revoke_credits(inode->i_sb,
ext_depth(inode)) +
ext4_free_data_revoke_credits(inode, b - a + 1);
err = ext4_ext_truncate_extend_restart(handle, inode, credits); err = ext4_datasem_ensure_credits(handle, inode, credits,
if (err) credits, revoke_credits);
if (err) {
if (err > 0)
err = -EAGAIN;
goto out; goto out;
}
err = ext4_ext_get_access(handle, inode, path + depth); err = ext4_ext_get_access(handle, inode, path + depth);
if (err) if (err)
@ -2948,7 +2967,9 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
ext_debug("truncate since %u to %u\n", start, end); ext_debug("truncate since %u to %u\n", start, end);
/* probably first extent we're gonna free will be last in block */ /* probably first extent we're gonna free will be last in block */
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1); handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
depth + 1,
ext4_free_metadata_revoke_credits(inode->i_sb, depth));
if (IS_ERR(handle)) if (IS_ERR(handle))
return PTR_ERR(handle); return PTR_ERR(handle);
@ -4962,23 +4983,13 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
int ret = 0; int ret = 0;
int ret2 = 0; int ret2 = 0;
struct ext4_map_blocks map; struct ext4_map_blocks map;
unsigned int credits, blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
unsigned int credits = 0;
map.m_lblk = offset >> blkbits; map.m_lblk = offset >> blkbits;
max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits); max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
/* if (!handle) {
* This is somewhat ugly but the idea is clear: When transaction is
* reserved, everything goes into it. Otherwise we rather start several
* smaller transactions for conversion of each extent separately.
*/
if (handle) {
handle = ext4_journal_start_reserved(handle,
EXT4_HT_EXT_CONVERT);
if (IS_ERR(handle))
return PTR_ERR(handle);
credits = 0;
} else {
/* /*
* credits to insert 1 extent into extent tree * credits to insert 1 extent into extent tree
*/ */
@ -5009,11 +5020,40 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
if (ret <= 0 || ret2) if (ret <= 0 || ret2)
break; break;
} }
if (!credits)
ret2 = ext4_journal_stop(handle);
return ret > 0 ? ret2 : ret; return ret > 0 ? ret2 : ret;
} }
int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
{
int ret, err = 0;
struct ext4_io_end_vec *io_end_vec;
/*
* This is somewhat ugly but the idea is clear: When transaction is
* reserved, everything goes into it. Otherwise we rather start several
* smaller transactions for conversion of each extent separately.
*/
if (handle) {
handle = ext4_journal_start_reserved(handle,
EXT4_HT_EXT_CONVERT);
if (IS_ERR(handle))
return PTR_ERR(handle);
}
list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
ret = ext4_convert_unwritten_extents(handle, io_end->inode,
io_end_vec->offset,
io_end_vec->size);
if (ret)
break;
}
if (handle)
err = ext4_journal_stop(handle);
return ret < 0 ? ret : err;
}
/* /*
* If newes is not existing extent (newes->ec_pblk equals zero) find * If newes is not existing extent (newes->ec_pblk equals zero) find
* delayed extent at start of newes and update newes accordingly and * delayed extent at start of newes and update newes accordingly and
@ -5206,13 +5246,10 @@ ext4_access_path(handle_t *handle, struct inode *inode,
* descriptor) for each block group; assume two block * descriptor) for each block group; assume two block
* groups * groups
*/ */
if (handle->h_buffer_credits < 7) { credits = ext4_writepage_trans_blocks(inode);
credits = ext4_writepage_trans_blocks(inode); err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0);
err = ext4_ext_truncate_extend_restart(handle, inode, credits); if (err < 0)
/* EAGAIN is success */ return err;
if (err && err != -EAGAIN)
return err;
}
err = ext4_ext_get_access(handle, inode, path); err = ext4_ext_get_access(handle, inode, path);
return err; return err;

View File

@ -29,10 +29,58 @@
#include <linux/pagevec.h> #include <linux/pagevec.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/backing-dev.h>
#include "ext4.h" #include "ext4.h"
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "xattr.h" #include "xattr.h"
#include "acl.h" #include "acl.h"
#include "truncate.h"
static bool ext4_dio_supported(struct inode *inode)
{
if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
return false;
if (fsverity_active(inode))
return false;
if (ext4_should_journal_data(inode))
return false;
if (ext4_has_inline_data(inode))
return false;
return true;
}
static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
ssize_t ret;
struct inode *inode = file_inode(iocb->ki_filp);
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!inode_trylock_shared(inode))
return -EAGAIN;
} else {
inode_lock_shared(inode);
}
if (!ext4_dio_supported(inode)) {
inode_unlock_shared(inode);
/*
* Fallback to buffered I/O if the operation being performed on
* the inode is not supported by direct I/O. The IOCB_DIRECT
* flag needs to be cleared here in order to ensure that the
* direct I/O path within generic_file_read_iter() is not
* taken.
*/
iocb->ki_flags &= ~IOCB_DIRECT;
return generic_file_read_iter(iocb, to);
}
ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL,
is_sync_kiocb(iocb));
inode_unlock_shared(inode);
file_accessed(iocb->ki_filp);
return ret;
}
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
@ -64,16 +112,21 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{ {
if (unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb)))) struct inode *inode = file_inode(iocb->ki_filp);
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO; return -EIO;
if (!iov_iter_count(to)) if (!iov_iter_count(to))
return 0; /* skip atime */ return 0; /* skip atime */
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
if (IS_DAX(file_inode(iocb->ki_filp))) if (IS_DAX(inode))
return ext4_dax_read_iter(iocb, to); return ext4_dax_read_iter(iocb, to);
#endif #endif
if (iocb->ki_flags & IOCB_DIRECT)
return ext4_dio_read_iter(iocb, to);
return generic_file_read_iter(iocb, to); return generic_file_read_iter(iocb, to);
} }
@ -103,13 +156,6 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
return 0; return 0;
} }
static void ext4_unwritten_wait(struct inode *inode)
{
wait_queue_head_t *wq = ext4_ioend_wq(inode);
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
}
/* /*
* This tests whether the IO in question is block-aligned or not. * This tests whether the IO in question is block-aligned or not.
* Ext4 utilizes unwritten extents when hole-filling during direct IO, and they * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
@ -162,13 +208,13 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret; ssize_t ret;
if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
ret = generic_write_checks(iocb, from); ret = generic_write_checks(iocb, from);
if (ret <= 0) if (ret <= 0)
return ret; return ret;
if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
/* /*
* If we have encountered a bitmap-format file, the size limit * If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files. * is smaller than s_maxbytes, which is for extent-mapped files.
@ -180,32 +226,301 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
return -EFBIG; return -EFBIG;
iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos); iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
} }
ret = file_modified(iocb->ki_filp);
if (ret)
return ret;
return iov_iter_count(from); return iov_iter_count(from);
} }
static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
struct iov_iter *from)
{
ssize_t ret;
struct inode *inode = file_inode(iocb->ki_filp);
if (iocb->ki_flags & IOCB_NOWAIT)
return -EOPNOTSUPP;
inode_lock(inode);
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
current->backing_dev_info = inode_to_bdi(inode);
ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
current->backing_dev_info = NULL;
out:
inode_unlock(inode);
if (likely(ret > 0)) {
iocb->ki_pos += ret;
ret = generic_write_sync(iocb, ret);
}
return ret;
}
static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
ssize_t written, size_t count)
{
handle_t *handle;
bool truncate = false;
u8 blkbits = inode->i_blkbits;
ext4_lblk_t written_blk, end_blk;
/*
* Note that EXT4_I(inode)->i_disksize can get extended up to
* inode->i_size while the I/O was running due to writeback of delalloc
* blocks. But, the code in ext4_iomap_alloc() is careful to use
* zeroed/unwritten extents if this is possible; thus we won't leave
* uninitialized blocks in a file even if we didn't succeed in writing
* as much as we intended.
*/
WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
if (offset + count <= EXT4_I(inode)->i_disksize) {
/*
* We need to ensure that the inode is removed from the orphan
* list if it has been added prematurely, due to writeback of
* delalloc blocks.
*/
if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
ext4_orphan_del(NULL, inode);
return PTR_ERR(handle);
}
ext4_orphan_del(handle, inode);
ext4_journal_stop(handle);
}
return written;
}
if (written < 0)
goto truncate;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
written = PTR_ERR(handle);
goto truncate;
}
if (ext4_update_inode_size(inode, offset + written))
ext4_mark_inode_dirty(handle, inode);
/*
* We may need to truncate allocated but not written blocks beyond EOF.
*/
written_blk = ALIGN(offset + written, 1 << blkbits);
end_blk = ALIGN(offset + count, 1 << blkbits);
if (written_blk < end_blk && ext4_can_truncate(inode))
truncate = true;
/*
* Remove the inode from the orphan list if it has been extended and
* everything went OK.
*/
if (!truncate && inode->i_nlink)
ext4_orphan_del(handle, inode);
ext4_journal_stop(handle);
if (truncate) {
truncate:
ext4_truncate_failed_write(inode);
/*
* If the truncate operation failed early, then the inode may
* still be on the orphan list. In that case, we need to try
* remove the inode from the in-memory linked list.
*/
if (inode->i_nlink)
ext4_orphan_del(NULL, inode);
}
return written;
}
static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
int error, unsigned int flags)
{
loff_t offset = iocb->ki_pos;
struct inode *inode = file_inode(iocb->ki_filp);
if (error)
return error;
if (size && flags & IOMAP_DIO_UNWRITTEN)
return ext4_convert_unwritten_extents(NULL, inode,
offset, size);
return 0;
}
static const struct iomap_dio_ops ext4_dio_write_ops = {
.end_io = ext4_dio_write_end_io,
};
static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
ssize_t ret;
size_t count;
loff_t offset;
handle_t *handle;
struct inode *inode = file_inode(iocb->ki_filp);
bool extend = false, overwrite = false, unaligned_aio = false;
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!inode_trylock(inode))
return -EAGAIN;
} else {
inode_lock(inode);
}
if (!ext4_dio_supported(inode)) {
inode_unlock(inode);
/*
* Fallback to buffered I/O if the inode does not support
* direct I/O.
*/
return ext4_buffered_write_iter(iocb, from);
}
ret = ext4_write_checks(iocb, from);
if (ret <= 0) {
inode_unlock(inode);
return ret;
}
/*
* Unaligned asynchronous direct I/O must be serialized among each
* other as the zeroing of partial blocks of two competing unaligned
* asynchronous direct I/O writes can result in data corruption.
*/
offset = iocb->ki_pos;
count = iov_iter_count(from);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
!is_sync_kiocb(iocb) && ext4_unaligned_aio(inode, from, offset)) {
unaligned_aio = true;
inode_dio_wait(inode);
}
/*
* Determine whether the I/O will overwrite allocated and initialized
* blocks. If so, check to see whether it is possible to take the
* dioread_nolock path.
*/
if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) &&
ext4_should_dioread_nolock(inode)) {
overwrite = true;
downgrade_write(&inode->i_rwsem);
}
if (offset + count > EXT4_I(inode)->i_disksize) {
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
}
ret = ext4_orphan_add(handle, inode);
if (ret) {
ext4_journal_stop(handle);
goto out;
}
extend = true;
ext4_journal_stop(handle);
}
ret = iomap_dio_rw(iocb, from, &ext4_iomap_ops, &ext4_dio_write_ops,
is_sync_kiocb(iocb) || unaligned_aio || extend);
if (extend)
ret = ext4_handle_inode_extension(inode, offset, ret, count);
out:
if (overwrite)
inode_unlock_shared(inode);
else
inode_unlock(inode);
if (ret >= 0 && iov_iter_count(from)) {
ssize_t err;
loff_t endbyte;
offset = iocb->ki_pos;
err = ext4_buffered_write_iter(iocb, from);
if (err < 0)
return err;
/*
* We need to ensure that the pages within the page cache for
* the range covered by this I/O are written to disk and
* invalidated. This is in attempt to preserve the expected
* direct I/O semantics in the case we fallback to buffered I/O
* to complete off the I/O request.
*/
ret += err;
endbyte = offset + err - 1;
err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
offset, endbyte);
if (!err)
invalidate_mapping_pages(iocb->ki_filp->f_mapping,
offset >> PAGE_SHIFT,
endbyte >> PAGE_SHIFT);
}
return ret;
}
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
static ssize_t static ssize_t
ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
{ {
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret; ssize_t ret;
size_t count;
loff_t offset;
handle_t *handle;
bool extend = false;
struct inode *inode = file_inode(iocb->ki_filp);
if (!inode_trylock(inode)) { if (!inode_trylock(inode)) {
if (iocb->ki_flags & IOCB_NOWAIT) if (iocb->ki_flags & IOCB_NOWAIT)
return -EAGAIN; return -EAGAIN;
inode_lock(inode); inode_lock(inode);
} }
ret = ext4_write_checks(iocb, from); ret = ext4_write_checks(iocb, from);
if (ret <= 0) if (ret <= 0)
goto out; goto out;
ret = file_remove_privs(iocb->ki_filp);
if (ret) offset = iocb->ki_pos;
goto out; count = iov_iter_count(from);
ret = file_update_time(iocb->ki_filp);
if (ret) if (offset + count > EXT4_I(inode)->i_disksize) {
goto out; handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
}
ret = ext4_orphan_add(handle, inode);
if (ret) {
ext4_journal_stop(handle);
goto out;
}
extend = true;
ext4_journal_stop(handle);
}
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
if (extend)
ret = ext4_handle_inode_extension(inode, offset, ret, count);
out: out:
inode_unlock(inode); inode_unlock(inode);
if (ret > 0) if (ret > 0)
@ -218,10 +533,6 @@ static ssize_t
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{ {
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
int o_direct = iocb->ki_flags & IOCB_DIRECT;
int unaligned_aio = 0;
int overwrite = 0;
ssize_t ret;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO; return -EIO;
@ -230,59 +541,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (IS_DAX(inode)) if (IS_DAX(inode))
return ext4_dax_write_iter(iocb, from); return ext4_dax_write_iter(iocb, from);
#endif #endif
if (iocb->ki_flags & IOCB_DIRECT)
return ext4_dio_write_iter(iocb, from);
if (!inode_trylock(inode)) { return ext4_buffered_write_iter(iocb, from);
if (iocb->ki_flags & IOCB_NOWAIT)
return -EAGAIN;
inode_lock(inode);
}
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
/*
* Unaligned direct AIO must be serialized among each other as zeroing
* of partial blocks of two competing unaligned AIOs can result in data
* corruption.
*/
if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
!is_sync_kiocb(iocb) &&
ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
unaligned_aio = 1;
ext4_unwritten_wait(inode);
}
iocb->private = &overwrite;
/* Check whether we do a DIO overwrite or not */
if (o_direct && !unaligned_aio) {
if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
if (ext4_should_dioread_nolock(inode))
overwrite = 1;
} else if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN;
goto out;
}
}
ret = __generic_file_write_iter(iocb, from);
/*
* Unaligned direct AIO must be the only IO in flight. Otherwise
* overlapping aligned IO after unaligned might result in data
* corruption.
*/
if (ret == -EIOCBQUEUED && unaligned_aio)
ext4_unwritten_wait(inode);
inode_unlock(inode);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
out:
inode_unlock(inode);
return ret;
} }
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
@ -494,12 +756,14 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
maxbytes, i_size_read(inode)); maxbytes, i_size_read(inode));
case SEEK_HOLE: case SEEK_HOLE:
inode_lock_shared(inode); inode_lock_shared(inode);
offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops); offset = iomap_seek_hole(inode, offset,
&ext4_iomap_report_ops);
inode_unlock_shared(inode); inode_unlock_shared(inode);
break; break;
case SEEK_DATA: case SEEK_DATA:
inode_lock_shared(inode); inode_lock_shared(inode);
offset = iomap_seek_data(inode, offset, &ext4_iomap_ops); offset = iomap_seek_data(inode, offset,
&ext4_iomap_report_ops);
inode_unlock_shared(inode); inode_unlock_shared(inode);
break; break;
} }

View File

@ -80,6 +80,43 @@ static int ext4_sync_parent(struct inode *inode)
return ret; return ret;
} }
static int ext4_fsync_nojournal(struct inode *inode, bool datasync,
bool *needs_barrier)
{
int ret, err;
ret = sync_mapping_buffers(inode->i_mapping);
if (!(inode->i_state & I_DIRTY_ALL))
return ret;
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
return ret;
err = sync_inode_metadata(inode, 1);
if (!ret)
ret = err;
if (!ret)
ret = ext4_sync_parent(inode);
if (test_opt(inode->i_sb, BARRIER))
*needs_barrier = true;
return ret;
}
static int ext4_fsync_journal(struct inode *inode, bool datasync,
bool *needs_barrier)
{
struct ext4_inode_info *ei = EXT4_I(inode);
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
*needs_barrier = true;
return jbd2_complete_transaction(journal, commit_tid);
}
/* /*
* akpm: A new design for ext4_sync_file(). * akpm: A new design for ext4_sync_file().
* *
@ -91,17 +128,14 @@ static int ext4_sync_parent(struct inode *inode)
* What we do is just kick off a commit and wait on it. This will snapshot the * What we do is just kick off a commit and wait on it. This will snapshot the
* inode to disk. * inode to disk.
*/ */
int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{ {
struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
int ret = 0, err; int ret = 0, err;
tid_t commit_tid;
bool needs_barrier = false; bool needs_barrier = false;
struct inode *inode = file->f_mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) if (unlikely(ext4_forced_shutdown(sbi)))
return -EIO; return -EIO;
J_ASSERT(ext4_journal_current_handle() == NULL); J_ASSERT(ext4_journal_current_handle() == NULL);
@ -111,23 +145,15 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (sb_rdonly(inode->i_sb)) { if (sb_rdonly(inode->i_sb)) {
/* Make sure that we read updated s_mount_flags value */ /* Make sure that we read updated s_mount_flags value */
smp_rmb(); smp_rmb();
if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED) if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ret = -EROFS; ret = -EROFS;
goto out; goto out;
} }
if (!journal) {
ret = __generic_file_fsync(file, start, end, datasync);
if (!ret)
ret = ext4_sync_parent(inode);
if (test_opt(inode->i_sb, BARRIER))
goto issue_flush;
goto out;
}
ret = file_write_and_wait_range(file, start, end); ret = file_write_and_wait_range(file, start, end);
if (ret) if (ret)
return ret; return ret;
/* /*
* data=writeback,ordered: * data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data. * The caller's filemap_fdatawrite()/wait will sync the data.
@ -142,18 +168,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* (they were dirtied by commit). But that's OK - the blocks are * (they were dirtied by commit). But that's OK - the blocks are
* safe in-journal, which is all fsync() needs to ensure. * safe in-journal, which is all fsync() needs to ensure.
*/ */
if (ext4_should_journal_data(inode)) { if (!sbi->s_journal)
ret = ext4_fsync_nojournal(inode, datasync, &needs_barrier);
else if (ext4_should_journal_data(inode))
ret = ext4_force_commit(inode->i_sb); ret = ext4_force_commit(inode->i_sb);
goto out; else
} ret = ext4_fsync_journal(inode, datasync, &needs_barrier);
commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
needs_barrier = true;
ret = jbd2_complete_transaction(journal, commit_tid);
if (needs_barrier) { if (needs_barrier) {
issue_flush:
err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
if (!ret) if (!ret)
ret = err; ret = err;

View File

@ -265,13 +265,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
ext4_debug("freeing inode %lu\n", ino); ext4_debug("freeing inode %lu\n", ino);
trace_ext4_free_inode(inode); trace_ext4_free_inode(inode);
/*
* Note: we must free any quota before locking the superblock,
* as writing the quota to disk may need the lock as well.
*/
dquot_initialize(inode); dquot_initialize(inode);
dquot_free_inode(inode); dquot_free_inode(inode);
dquot_drop(inode);
is_directory = S_ISDIR(inode->i_mode); is_directory = S_ISDIR(inode->i_mode);
@ -927,7 +922,7 @@ repeat_in_this_group:
BUG_ON(nblocks <= 0); BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no, handle = __ext4_journal_start_sb(dir->i_sb, line_no,
handle_type, nblocks, handle_type, nblocks,
0); 0, 0);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
err = PTR_ERR(handle); err = PTR_ERR(handle);
ext4_std_error(sb, err); ext4_std_error(sb, err);

View File

@ -331,11 +331,14 @@ static int ext4_alloc_branch(handle_t *handle,
for (i = 0; i <= indirect_blks; i++) { for (i = 0; i <= indirect_blks; i++) {
if (i == indirect_blks) { if (i == indirect_blks) {
new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
} else } else {
ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
ar->inode, ar->goal, ar->inode, ar->goal,
ar->flags & EXT4_MB_DELALLOC_RESERVED, ar->flags & EXT4_MB_DELALLOC_RESERVED,
NULL, &err); NULL, &err);
/* Simplify error cleanup... */
branch[i+1].bh = NULL;
}
if (err) { if (err) {
i--; i--;
goto failed; goto failed;
@ -377,18 +380,25 @@ static int ext4_alloc_branch(handle_t *handle,
} }
return 0; return 0;
failed: failed:
if (i == indirect_blks) {
/* Free data blocks */
ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
ar->len, 0);
i--;
}
for (; i >= 0; i--) { for (; i >= 0; i--) {
/* /*
* We want to ext4_forget() only freshly allocated indirect * We want to ext4_forget() only freshly allocated indirect
* blocks. Buffer for new_blocks[i-1] is at branch[i].bh and * blocks. Buffer for new_blocks[i] is at branch[i+1].bh
* buffer at branch[0].bh is indirect block / inode already * (buffer at branch[0].bh is indirect block / inode already
* existing before ext4_alloc_branch() was called. * existing before ext4_alloc_branch() was called). Also
* because blocks are freshly allocated, we don't need to
* revoke them which is why we don't set
* EXT4_FREE_BLOCKS_METADATA.
*/ */
if (i > 0 && i != indirect_blks && branch[i].bh) ext4_free_blocks(handle, ar->inode, branch[i+1].bh,
ext4_forget(handle, 1, ar->inode, branch[i].bh, new_blocks[i], 1,
branch[i].bh->b_blocknr); branch[i+1].bh ? EXT4_FREE_BLOCKS_FORGET : 0);
ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
(i == indirect_blks) ? ar->len : 1, 0);
} }
return err; return err;
} }
@ -689,27 +699,63 @@ int ext4_ind_trans_blocks(struct inode *inode, int nrblocks)
return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
} }
static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
struct buffer_head *bh, int *dropped)
{
int err;
if (bh) {
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, inode, bh);
if (unlikely(err))
return err;
}
err = ext4_mark_inode_dirty(handle, inode);
if (unlikely(err))
return err;
/*
* Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
* moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by
* i_mutex. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;
}
/* /*
* Truncate transactions can be complex and absolutely huge. So we need to * Truncate transactions can be complex and absolutely huge. So we need to
* be able to restart the transaction at a conventient checkpoint to make * be able to restart the transaction at a conventient checkpoint to make
* sure we don't overflow the journal. * sure we don't overflow the journal.
* *
* Try to extend this transaction for the purposes of truncation. If * Try to extend this transaction for the purposes of truncation. If
* extend fails, we need to propagate the failure up and restart the * extend fails, we restart transaction.
* transaction in the top-level truncate loop. --sct
*
* Returns 0 if we managed to create more room. If we can't create more
* room, and the transaction must be restarted we return 1.
*/ */
static int try_to_extend_transaction(handle_t *handle, struct inode *inode) static int ext4_ind_truncate_ensure_credits(handle_t *handle,
struct inode *inode,
struct buffer_head *bh,
int revoke_creds)
{ {
if (!ext4_handle_valid(handle)) int ret;
return 0; int dropped = 0;
if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
return 0; ret = ext4_journal_ensure_credits_fn(handle, EXT4_RESERVE_TRANS_BLOCKS,
if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) ext4_blocks_for_truncate(inode), revoke_creds,
return 0; ext4_ind_trunc_restart_fn(handle, inode, bh, &dropped));
return 1; if (dropped)
down_write(&EXT4_I(inode)->i_data_sem);
if (ret <= 0)
return ret;
if (bh) {
BUFFER_TRACE(bh, "retaking write access");
ret = ext4_journal_get_write_access(handle, bh);
if (unlikely(ret))
return ret;
}
return 0;
} }
/* /*
@ -844,27 +890,10 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
return 1; return 1;
} }
if (try_to_extend_transaction(handle, inode)) { err = ext4_ind_truncate_ensure_credits(handle, inode, bh,
if (bh) { ext4_free_data_revoke_credits(inode, count));
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); if (err < 0)
err = ext4_handle_dirty_metadata(handle, inode, bh); goto out_err;
if (unlikely(err))
goto out_err;
}
err = ext4_mark_inode_dirty(handle, inode);
if (unlikely(err))
goto out_err;
err = ext4_truncate_restart_trans(handle, inode,
ext4_blocks_for_truncate(inode));
if (unlikely(err))
goto out_err;
if (bh) {
BUFFER_TRACE(bh, "retaking write access");
err = ext4_journal_get_write_access(handle, bh);
if (unlikely(err))
goto out_err;
}
}
for (p = first; p < last; p++) for (p = first; p < last; p++)
*p = 0; *p = 0;
@ -1047,11 +1076,11 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
*/ */
if (ext4_handle_is_aborted(handle)) if (ext4_handle_is_aborted(handle))
return; return;
if (try_to_extend_transaction(handle, inode)) { if (ext4_ind_truncate_ensure_credits(handle, inode,
ext4_mark_inode_dirty(handle, inode); NULL,
ext4_truncate_restart_trans(handle, inode, ext4_free_metadata_revoke_credits(
ext4_blocks_for_truncate(inode)); inode->i_sb, 1)) < 0)
} return;
/* /*
* The forget flag here is critical because if * The forget flag here is critical because if

File diff suppressed because it is too large Load Diff

View File

@ -50,29 +50,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
needed = ext4_ext_calc_credits_for_single_extent(inode, needed = ext4_ext_calc_credits_for_single_extent(inode,
lb->last_block - lb->first_block + 1, path); lb->last_block - lb->first_block + 1, path);
/* retval = ext4_datasem_ensure_credits(handle, inode, needed, needed, 0);
* Make sure the credit we accumalated is not really high if (retval < 0)
*/ goto err_out;
if (needed && ext4_handle_has_enough_credits(handle,
EXT4_RESERVE_TRANS_BLOCKS)) {
up_write((&EXT4_I(inode)->i_data_sem));
retval = ext4_journal_restart(handle, needed);
down_write((&EXT4_I(inode)->i_data_sem));
if (retval)
goto err_out;
} else if (needed) {
retval = ext4_journal_extend(handle, needed);
if (retval) {
/*
* IF not able to extend the journal restart the journal
*/
up_write((&EXT4_I(inode)->i_data_sem));
retval = ext4_journal_restart(handle, needed);
down_write((&EXT4_I(inode)->i_data_sem));
if (retval)
goto err_out;
}
}
retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0); retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
err_out: err_out:
up_write((&EXT4_I(inode)->i_data_sem)); up_write((&EXT4_I(inode)->i_data_sem));
@ -196,42 +176,30 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
} }
static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
{
int retval = 0, needed;
if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
return 0;
/*
* We are freeing a blocks. During this we touch
* superblock, group descriptor and block bitmap.
* So allocate a credit of 3. We may update
* quota (user and group).
*/
needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
if (ext4_journal_extend(handle, needed) != 0)
retval = ext4_journal_restart(handle, needed);
return retval;
}
static int free_dind_blocks(handle_t *handle, static int free_dind_blocks(handle_t *handle,
struct inode *inode, __le32 i_data) struct inode *inode, __le32 i_data)
{ {
int i; int i;
__le32 *tmp_idata; __le32 *tmp_idata;
struct buffer_head *bh; struct buffer_head *bh;
struct super_block *sb = inode->i_sb;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2; unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
int err;
bh = ext4_sb_bread(inode->i_sb, le32_to_cpu(i_data), 0); bh = ext4_sb_bread(sb, le32_to_cpu(i_data), 0);
if (IS_ERR(bh)) if (IS_ERR(bh))
return PTR_ERR(bh); return PTR_ERR(bh);
tmp_idata = (__le32 *)bh->b_data; tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) { for (i = 0; i < max_entries; i++) {
if (tmp_idata[i]) { if (tmp_idata[i]) {
extend_credit_for_blkdel(handle, inode); err = ext4_journal_ensure_credits(handle,
EXT4_RESERVE_TRANS_BLOCKS,
ext4_free_metadata_revoke_credits(sb, 1));
if (err < 0) {
put_bh(bh);
return err;
}
ext4_free_blocks(handle, inode, NULL, ext4_free_blocks(handle, inode, NULL,
le32_to_cpu(tmp_idata[i]), 1, le32_to_cpu(tmp_idata[i]), 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_METADATA |
@ -239,7 +207,10 @@ static int free_dind_blocks(handle_t *handle,
} }
} }
put_bh(bh); put_bh(bh);
extend_credit_for_blkdel(handle, inode); err = ext4_journal_ensure_credits(handle, EXT4_RESERVE_TRANS_BLOCKS,
ext4_free_metadata_revoke_credits(sb, 1));
if (err < 0)
return err;
ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET); EXT4_FREE_BLOCKS_FORGET);
@ -270,7 +241,10 @@ static int free_tind_blocks(handle_t *handle,
} }
} }
put_bh(bh); put_bh(bh);
extend_credit_for_blkdel(handle, inode); retval = ext4_journal_ensure_credits(handle, EXT4_RESERVE_TRANS_BLOCKS,
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
if (retval < 0)
return retval;
ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET); EXT4_FREE_BLOCKS_FORGET);
@ -283,7 +257,11 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
/* ei->i_data[EXT4_IND_BLOCK] */ /* ei->i_data[EXT4_IND_BLOCK] */
if (i_data[0]) { if (i_data[0]) {
extend_credit_for_blkdel(handle, inode); retval = ext4_journal_ensure_credits(handle,
EXT4_RESERVE_TRANS_BLOCKS,
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
if (retval < 0)
return retval;
ext4_free_blocks(handle, inode, NULL, ext4_free_blocks(handle, inode, NULL,
le32_to_cpu(i_data[0]), 1, le32_to_cpu(i_data[0]), 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_METADATA |
@ -318,12 +296,9 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
* One credit accounted for writing the * One credit accounted for writing the
* i_data field of the original inode * i_data field of the original inode
*/ */
retval = ext4_journal_extend(handle, 1); retval = ext4_journal_ensure_credits(handle, 1, 0);
if (retval) { if (retval < 0)
retval = ext4_journal_restart(handle, 1); goto err_out;
if (retval)
goto err_out;
}
i_data[0] = ei->i_data[EXT4_IND_BLOCK]; i_data[0] = ei->i_data[EXT4_IND_BLOCK];
i_data[1] = ei->i_data[EXT4_DIND_BLOCK]; i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
@ -391,15 +366,20 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
ix = EXT_FIRST_INDEX(eh); ix = EXT_FIRST_INDEX(eh);
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) { for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
retval = free_ext_idx(handle, inode, ix); retval = free_ext_idx(handle, inode, ix);
if (retval) if (retval) {
break; put_bh(bh);
return retval;
}
} }
} }
put_bh(bh); put_bh(bh);
extend_credit_for_blkdel(handle, inode); retval = ext4_journal_ensure_credits(handle, EXT4_RESERVE_TRANS_BLOCKS,
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
if (retval < 0)
return retval;
ext4_free_blocks(handle, inode, NULL, block, 1, ext4_free_blocks(handle, inode, NULL, block, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return retval; return 0;
} }
/* /*
@ -574,9 +554,9 @@ err_out:
} }
/* We mark the tmp_inode dirty via ext4_ext_tree_init. */ /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
if (ext4_journal_extend(handle, 1) != 0) retval = ext4_journal_ensure_credits(handle, 1, 0);
ext4_journal_restart(handle, 1); if (retval < 0)
goto out_stop;
/* /*
* Mark the tmp_inode as of size zero * Mark the tmp_inode as of size zero
*/ */
@ -594,6 +574,7 @@ err_out:
/* Reset the extent details */ /* Reset the extent details */
ext4_ext_tree_init(handle, tmp_inode); ext4_ext_tree_init(handle, tmp_inode);
out_stop:
ext4_journal_stop(handle); ext4_journal_stop(handle);
out: out:
unlock_new_inode(tmp_inode); unlock_new_inode(tmp_inode);

View File

@ -2547,18 +2547,29 @@ static void ext4_dec_count(handle_t *handle, struct inode *inode)
} }
/*
* Add non-directory inode to a directory. On success, the inode reference is
* consumed by dentry is instantiation. This is also indicated by clearing of
* *inodep pointer. On failure, the caller is responsible for dropping the
* inode reference in the safe context.
*/
static int ext4_add_nondir(handle_t *handle, static int ext4_add_nondir(handle_t *handle,
struct dentry *dentry, struct inode *inode) struct dentry *dentry, struct inode **inodep)
{ {
struct inode *dir = d_inode(dentry->d_parent);
struct inode *inode = *inodep;
int err = ext4_add_entry(handle, dentry, inode); int err = ext4_add_entry(handle, dentry, inode);
if (!err) { if (!err) {
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
d_instantiate_new(dentry, inode); d_instantiate_new(dentry, inode);
*inodep = NULL;
return 0; return 0;
} }
drop_nlink(inode); drop_nlink(inode);
ext4_orphan_add(handle, inode);
unlock_new_inode(inode); unlock_new_inode(inode);
iput(inode);
return err; return err;
} }
@ -2592,12 +2603,12 @@ retry:
inode->i_op = &ext4_file_inode_operations; inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations; inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode); ext4_set_aops(inode);
err = ext4_add_nondir(handle, dentry, inode); err = ext4_add_nondir(handle, dentry, &inode);
if (!err && IS_DIRSYNC(dir))
ext4_handle_sync(handle);
} }
if (handle) if (handle)
ext4_journal_stop(handle); ext4_journal_stop(handle);
if (!IS_ERR_OR_NULL(inode))
iput(inode);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry; goto retry;
return err; return err;
@ -2624,12 +2635,12 @@ retry:
if (!IS_ERR(inode)) { if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev); init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &ext4_special_inode_operations; inode->i_op = &ext4_special_inode_operations;
err = ext4_add_nondir(handle, dentry, inode); err = ext4_add_nondir(handle, dentry, &inode);
if (!err && IS_DIRSYNC(dir))
ext4_handle_sync(handle);
} }
if (handle) if (handle)
ext4_journal_stop(handle); ext4_journal_stop(handle);
if (!IS_ERR_OR_NULL(inode))
iput(inode);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry; goto retry;
return err; return err;
@ -2779,10 +2790,12 @@ retry:
if (err) { if (err) {
out_clear_inode: out_clear_inode:
clear_nlink(inode); clear_nlink(inode);
ext4_orphan_add(handle, inode);
unlock_new_inode(inode); unlock_new_inode(inode);
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
iput(inode); iput(inode);
goto out_stop; goto out_retry;
} }
ext4_inc_count(handle, dir); ext4_inc_count(handle, dir);
ext4_update_dx_flag(dir); ext4_update_dx_flag(dir);
@ -2796,6 +2809,7 @@ out_clear_inode:
out_stop: out_stop:
if (handle) if (handle)
ext4_journal_stop(handle); ext4_journal_stop(handle);
out_retry:
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry; goto retry;
return err; return err;
@ -3182,18 +3196,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
if (IS_DIRSYNC(dir)) if (IS_DIRSYNC(dir))
ext4_handle_sync(handle); ext4_handle_sync(handle);
if (inode->i_nlink == 0) {
ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
dentry->d_name.len, dentry->d_name.name);
set_nlink(inode, 1);
}
retval = ext4_delete_entry(handle, dir, de, bh); retval = ext4_delete_entry(handle, dir, de, bh);
if (retval) if (retval)
goto end_unlink; goto end_unlink;
dir->i_ctime = dir->i_mtime = current_time(dir); dir->i_ctime = dir->i_mtime = current_time(dir);
ext4_update_dx_flag(dir); ext4_update_dx_flag(dir);
ext4_mark_inode_dirty(handle, dir); ext4_mark_inode_dirty(handle, dir);
drop_nlink(inode); if (inode->i_nlink == 0)
ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
dentry->d_name.len, dentry->d_name.name);
else
drop_nlink(inode);
if (!inode->i_nlink) if (!inode->i_nlink)
ext4_orphan_add(handle, inode); ext4_orphan_add(handle, inode);
inode->i_ctime = current_time(inode); inode->i_ctime = current_time(inode);
@ -3328,12 +3341,11 @@ static int ext4_symlink(struct inode *dir,
inode->i_size = disk_link.len - 1; inode->i_size = disk_link.len - 1;
} }
EXT4_I(inode)->i_disksize = inode->i_size; EXT4_I(inode)->i_disksize = inode->i_size;
err = ext4_add_nondir(handle, dentry, inode); err = ext4_add_nondir(handle, dentry, &inode);
if (!err && IS_DIRSYNC(dir))
ext4_handle_sync(handle);
if (handle) if (handle)
ext4_journal_stop(handle); ext4_journal_stop(handle);
if (inode)
iput(inode);
goto out_free_encrypted_link; goto out_free_encrypted_link;
err_drop_inode: err_drop_inode:

View File

@ -31,18 +31,56 @@
#include "acl.h" #include "acl.h"
static struct kmem_cache *io_end_cachep; static struct kmem_cache *io_end_cachep;
static struct kmem_cache *io_end_vec_cachep;
int __init ext4_init_pageio(void) int __init ext4_init_pageio(void)
{ {
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
if (io_end_cachep == NULL) if (io_end_cachep == NULL)
return -ENOMEM; return -ENOMEM;
io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0);
if (io_end_vec_cachep == NULL) {
kmem_cache_destroy(io_end_cachep);
return -ENOMEM;
}
return 0; return 0;
} }
void ext4_exit_pageio(void) void ext4_exit_pageio(void)
{ {
kmem_cache_destroy(io_end_cachep); kmem_cache_destroy(io_end_cachep);
kmem_cache_destroy(io_end_vec_cachep);
}
struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
{
struct ext4_io_end_vec *io_end_vec;
io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS);
if (!io_end_vec)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&io_end_vec->list);
list_add_tail(&io_end_vec->list, &io_end->list_vec);
return io_end_vec;
}
static void ext4_free_io_end_vec(ext4_io_end_t *io_end)
{
struct ext4_io_end_vec *io_end_vec, *tmp;
if (list_empty(&io_end->list_vec))
return;
list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) {
list_del(&io_end_vec->list);
kmem_cache_free(io_end_vec_cachep, io_end_vec);
}
}
struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end)
{
BUG_ON(list_empty(&io_end->list_vec));
return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list);
} }
/* /*
@ -125,6 +163,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
ext4_finish_bio(bio); ext4_finish_bio(bio);
bio_put(bio); bio_put(bio);
} }
ext4_free_io_end_vec(io_end);
kmem_cache_free(io_end_cachep, io_end); kmem_cache_free(io_end_cachep, io_end);
} }
@ -136,29 +175,26 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
* cannot get to ext4_ext_truncate() before all IOs overlapping that range are * cannot get to ext4_ext_truncate() before all IOs overlapping that range are
* completed (happens from ext4_free_ioend()). * completed (happens from ext4_free_ioend()).
*/ */
static int ext4_end_io(ext4_io_end_t *io) static int ext4_end_io_end(ext4_io_end_t *io_end)
{ {
struct inode *inode = io->inode; struct inode *inode = io_end->inode;
loff_t offset = io->offset; handle_t *handle = io_end->handle;
ssize_t size = io->size;
handle_t *handle = io->handle;
int ret = 0; int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n", "list->prev 0x%p\n",
io, inode->i_ino, io->list.next, io->list.prev); io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
io->handle = NULL; /* Following call will use up the handle */ io_end->handle = NULL; /* Following call will use up the handle */
ret = ext4_convert_unwritten_extents(handle, inode, offset, size); ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) { if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
ext4_msg(inode->i_sb, KERN_EMERG, ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written " "failed to convert unwritten extents to written "
"extents -- potential data loss! " "extents -- potential data loss! "
"(inode %lu, offset %llu, size %zd, error %d)", "(inode %lu, error %d)", inode->i_ino, ret);
inode->i_ino, offset, size, ret);
} }
ext4_clear_io_unwritten_flag(io); ext4_clear_io_unwritten_flag(io_end);
ext4_release_io_end(io); ext4_release_io_end(io_end);
return ret; return ret;
} }
@ -166,21 +202,21 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head)
{ {
#ifdef EXT4FS_DEBUG #ifdef EXT4FS_DEBUG
struct list_head *cur, *before, *after; struct list_head *cur, *before, *after;
ext4_io_end_t *io, *io0, *io1; ext4_io_end_t *io_end, *io_end0, *io_end1;
if (list_empty(head)) if (list_empty(head))
return; return;
ext4_debug("Dump inode %lu completed io list\n", inode->i_ino); ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
list_for_each_entry(io, head, list) { list_for_each_entry(io_end, head, list) {
cur = &io->list; cur = &io_end->list;
before = cur->prev; before = cur->prev;
io0 = container_of(before, ext4_io_end_t, list); io_end0 = container_of(before, ext4_io_end_t, list);
after = cur->next; after = cur->next;
io1 = container_of(after, ext4_io_end_t, list); io_end1 = container_of(after, ext4_io_end_t, list);
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
io, inode->i_ino, io0, io1); io_end, inode->i_ino, io_end0, io_end1);
} }
#endif #endif
} }
@ -207,7 +243,7 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end)
static int ext4_do_flush_completed_IO(struct inode *inode, static int ext4_do_flush_completed_IO(struct inode *inode,
struct list_head *head) struct list_head *head)
{ {
ext4_io_end_t *io; ext4_io_end_t *io_end;
struct list_head unwritten; struct list_head unwritten;
unsigned long flags; unsigned long flags;
struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *ei = EXT4_I(inode);
@ -219,11 +255,11 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
while (!list_empty(&unwritten)) { while (!list_empty(&unwritten)) {
io = list_entry(unwritten.next, ext4_io_end_t, list); io_end = list_entry(unwritten.next, ext4_io_end_t, list);
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
list_del_init(&io->list); list_del_init(&io_end->list);
err = ext4_end_io(io); err = ext4_end_io_end(io_end);
if (unlikely(!ret && err)) if (unlikely(!ret && err))
ret = err; ret = err;
} }
@ -242,19 +278,22 @@ void ext4_end_io_rsv_work(struct work_struct *work)
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
{ {
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags); ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags);
if (io) {
io->inode = inode; if (io_end) {
INIT_LIST_HEAD(&io->list); io_end->inode = inode;
atomic_set(&io->count, 1); INIT_LIST_HEAD(&io_end->list);
INIT_LIST_HEAD(&io_end->list_vec);
atomic_set(&io_end->count, 1);
} }
return io; return io_end;
} }
void ext4_put_io_end_defer(ext4_io_end_t *io_end) void ext4_put_io_end_defer(ext4_io_end_t *io_end)
{ {
if (atomic_dec_and_test(&io_end->count)) { if (atomic_dec_and_test(&io_end->count)) {
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
list_empty(&io_end->list_vec)) {
ext4_release_io_end(io_end); ext4_release_io_end(io_end);
return; return;
} }
@ -268,9 +307,8 @@ int ext4_put_io_end(ext4_io_end_t *io_end)
if (atomic_dec_and_test(&io_end->count)) { if (atomic_dec_and_test(&io_end->count)) {
if (io_end->flag & EXT4_IO_END_UNWRITTEN) { if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
err = ext4_convert_unwritten_extents(io_end->handle, err = ext4_convert_unwritten_io_end_vec(io_end->handle,
io_end->inode, io_end->offset, io_end);
io_end->size);
io_end->handle = NULL; io_end->handle = NULL;
ext4_clear_io_unwritten_flag(io_end); ext4_clear_io_unwritten_flag(io_end);
} }
@ -307,10 +345,8 @@ static void ext4_end_bio(struct bio *bio)
struct inode *inode = io_end->inode; struct inode *inode = io_end->inode;
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
"(offset %llu size %ld starting block %llu)", "starting block %llu)",
bio->bi_status, inode->i_ino, bio->bi_status, inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long) (unsigned long long)
bi_sector >> (inode->i_blkbits - 9)); bi_sector >> (inode->i_blkbits - 9));
mapping_set_error(inode->i_mapping, mapping_set_error(inode->i_mapping,
@ -358,14 +394,16 @@ void ext4_io_submit_init(struct ext4_io_submit *io,
io->io_end = NULL; io->io_end = NULL;
} }
static int io_submit_init_bio(struct ext4_io_submit *io, static void io_submit_init_bio(struct ext4_io_submit *io,
struct buffer_head *bh) struct buffer_head *bh)
{ {
struct bio *bio; struct bio *bio;
/*
* bio_alloc will _always_ be able to allocate a bio if
* __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
*/
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
if (!bio)
return -ENOMEM;
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev); bio_set_dev(bio, bh->b_bdev);
bio->bi_end_io = ext4_end_bio; bio->bi_end_io = ext4_end_bio;
@ -373,13 +411,12 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
io->io_bio = bio; io->io_bio = bio;
io->io_next_block = bh->b_blocknr; io->io_next_block = bh->b_blocknr;
wbc_init_bio(io->io_wbc, bio); wbc_init_bio(io->io_wbc, bio);
return 0;
} }
static int io_submit_add_bh(struct ext4_io_submit *io, static void io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode, struct inode *inode,
struct page *page, struct page *page,
struct buffer_head *bh) struct buffer_head *bh)
{ {
int ret; int ret;
@ -388,9 +425,7 @@ submit_and_retry:
ext4_io_submit(io); ext4_io_submit(io);
} }
if (io->io_bio == NULL) { if (io->io_bio == NULL) {
ret = io_submit_init_bio(io, bh); io_submit_init_bio(io, bh);
if (ret)
return ret;
io->io_bio->bi_write_hint = inode->i_write_hint; io->io_bio->bi_write_hint = inode->i_write_hint;
} }
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
@ -398,7 +433,6 @@ submit_and_retry:
goto submit_and_retry; goto submit_and_retry;
wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size); wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size);
io->io_next_block++; io->io_next_block++;
return 0;
} }
int ext4_bio_write_page(struct ext4_io_submit *io, int ext4_bio_write_page(struct ext4_io_submit *io,
@ -491,8 +525,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
gfp_flags |= __GFP_NOFAIL; gfp_flags |= __GFP_NOFAIL;
goto retry_encrypt; goto retry_encrypt;
} }
bounce_page = NULL;
goto out; printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
redirty_page_for_writepage(wbc, page);
do {
clear_buffer_async_write(bh);
bh = bh->b_this_page;
} while (bh != head);
goto unlock;
} }
} }
@ -500,30 +540,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do { do {
if (!buffer_async_write(bh)) if (!buffer_async_write(bh))
continue; continue;
ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh); io_submit_add_bh(io, inode,
if (ret) { bounce_page ? bounce_page : page, bh);
/*
* We only get here on ENOMEM. Not much else
* we can do but mark the page as dirty, and
* better luck next time.
*/
break;
}
nr_submitted++; nr_submitted++;
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
/* Error stopped previous loop? Clean up buffers... */ unlock:
if (ret) {
out:
fscrypt_free_bounce_page(bounce_page);
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
redirty_page_for_writepage(wbc, page);
do {
clear_buffer_async_write(bh);
bh = bh->b_this_page;
} while (bh != head);
}
unlock_page(page); unlock_page(page);
/* Nothing submitted - we have to end page writeback */ /* Nothing submitted - we have to end page writeback */
if (!nr_submitted) if (!nr_submitted)

View File

@ -360,10 +360,12 @@ int ext4_mpage_readpages(struct address_space *mapping,
if (bio == NULL) { if (bio == NULL) {
struct bio_post_read_ctx *ctx; struct bio_post_read_ctx *ctx;
/*
* bio_alloc will _always_ be able to allocate a bio if
* __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
*/
bio = bio_alloc(GFP_KERNEL, bio = bio_alloc(GFP_KERNEL,
min_t(int, nr_pages, BIO_MAX_PAGES)); min_t(int, nr_pages, BIO_MAX_PAGES));
if (!bio)
goto set_error_page;
ctx = get_bio_post_read_ctx(inode, bio, page->index); ctx = get_bio_post_read_ctx(inode, bio, page->index);
if (IS_ERR(ctx)) { if (IS_ERR(ctx)) {
bio_put(bio); bio_put(bio);

View File

@ -388,28 +388,10 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
return bh; return bh;
} }
/* static int ext4_resize_ensure_credits_batch(handle_t *handle, int credits)
* If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA.
* If that fails, restart the transaction & regain write access for the
* buffer head which is used for block_bitmap modifications.
*/
static int extend_or_restart_transaction(handle_t *handle, int thresh)
{ {
int err; return ext4_journal_ensure_credits_fn(handle, credits,
EXT4_MAX_TRANS_DATA, 0, 0);
if (ext4_handle_has_enough_credits(handle, thresh))
return 0;
err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
if (err < 0)
return err;
if (err) {
err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA);
if (err)
return err;
}
return 0;
} }
/* /*
@ -451,8 +433,8 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
continue; continue;
} }
err = extend_or_restart_transaction(handle, 1); err = ext4_resize_ensure_credits_batch(handle, 1);
if (err) if (err < 0)
return err; return err;
bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
@ -544,8 +526,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
struct buffer_head *gdb; struct buffer_head *gdb;
ext4_debug("update backup group %#04llx\n", block); ext4_debug("update backup group %#04llx\n", block);
err = extend_or_restart_transaction(handle, 1); err = ext4_resize_ensure_credits_batch(handle, 1);
if (err) if (err < 0)
goto out; goto out;
gdb = sb_getblk(sb, block); gdb = sb_getblk(sb, block);
@ -602,8 +584,8 @@ handle_bb:
/* Initialize block bitmap of the @group */ /* Initialize block bitmap of the @group */
block = group_data[i].block_bitmap; block = group_data[i].block_bitmap;
err = extend_or_restart_transaction(handle, 1); err = ext4_resize_ensure_credits_batch(handle, 1);
if (err) if (err < 0)
goto out; goto out;
bh = bclean(handle, sb, block); bh = bclean(handle, sb, block);
@ -631,8 +613,8 @@ handle_ib:
/* Initialize inode bitmap of the @group */ /* Initialize inode bitmap of the @group */
block = group_data[i].inode_bitmap; block = group_data[i].inode_bitmap;
err = extend_or_restart_transaction(handle, 1); err = ext4_resize_ensure_credits_batch(handle, 1);
if (err) if (err < 0)
goto out; goto out;
/* Mark unused entries in inode bitmap used */ /* Mark unused entries in inode bitmap used */
bh = bclean(handle, sb, block); bh = bclean(handle, sb, block);
@ -1109,10 +1091,8 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
ext4_fsblk_t backup_block; ext4_fsblk_t backup_block;
/* Out of journal space, and can't get more - abort - so sad */ /* Out of journal space, and can't get more - abort - so sad */
if (ext4_handle_valid(handle) && err = ext4_resize_ensure_credits_batch(handle, 1);
handle->h_buffer_credits == 0 && if (err < 0)
ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
(err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
break; break;
if (meta_bg == 0) if (meta_bg == 0)

View File

@ -1172,9 +1172,9 @@ void ext4_clear_inode(struct inode *inode)
{ {
invalidate_inode_buffers(inode); invalidate_inode_buffers(inode);
clear_inode(inode); clear_inode(inode);
dquot_drop(inode);
ext4_discard_preallocations(inode); ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
dquot_drop(inode);
if (EXT4_I(inode)->jinode) { if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode); EXT4_I(inode)->jinode);
@ -1388,7 +1388,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
static int ext4_quota_enable(struct super_block *sb, int type, int format_id, static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags); unsigned int flags);
static int ext4_enable_quotas(struct super_block *sb); static int ext4_enable_quotas(struct super_block *sb);
static int ext4_get_next_id(struct super_block *sb, struct kqid *qid);
static struct dquot **ext4_get_dquots(struct inode *inode) static struct dquot **ext4_get_dquots(struct inode *inode)
{ {
@ -1406,7 +1405,7 @@ static const struct dquot_operations ext4_quota_operations = {
.destroy_dquot = dquot_destroy, .destroy_dquot = dquot_destroy,
.get_projid = ext4_get_projid, .get_projid = ext4_get_projid,
.get_inode_usage = ext4_get_inode_usage, .get_inode_usage = ext4_get_inode_usage,
.get_next_id = ext4_get_next_id, .get_next_id = dquot_get_next_id,
}; };
static const struct quotactl_ops ext4_qctl_operations = { static const struct quotactl_ops ext4_qctl_operations = {
@ -2065,7 +2064,7 @@ static int parse_options(char *options, struct super_block *sb,
unsigned int *journal_ioprio, unsigned int *journal_ioprio,
int is_remount) int is_remount)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb);
char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name; char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name;
substring_t args[MAX_OPT_ARGS]; substring_t args[MAX_OPT_ARGS];
int token; int token;
@ -2119,16 +2118,6 @@ static int parse_options(char *options, struct super_block *sb,
} }
} }
#endif #endif
if (test_opt(sb, DIOREAD_NOLOCK)) {
int blocksize =
BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
if (blocksize < PAGE_SIZE) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"dioread_nolock if block size != PAGE_SIZE");
return 0;
}
}
return 1; return 1;
} }
@ -3569,12 +3558,15 @@ static void ext4_clamp_want_extra_isize(struct super_block *sb)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es; struct ext4_super_block *es = sbi->s_es;
unsigned def_extra_isize = sizeof(struct ext4_inode) -
EXT4_GOOD_OLD_INODE_SIZE;
/* determine the minimum size of new large inodes, if present */ if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) {
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && sbi->s_want_extra_isize = 0;
sbi->s_want_extra_isize == 0) { return;
sbi->s_want_extra_isize = sizeof(struct ext4_inode) - }
EXT4_GOOD_OLD_INODE_SIZE; if (sbi->s_want_extra_isize < 4) {
sbi->s_want_extra_isize = def_extra_isize;
if (ext4_has_feature_extra_isize(sb)) { if (ext4_has_feature_extra_isize(sb)) {
if (sbi->s_want_extra_isize < if (sbi->s_want_extra_isize <
le16_to_cpu(es->s_want_extra_isize)) le16_to_cpu(es->s_want_extra_isize))
@ -3587,10 +3579,10 @@ static void ext4_clamp_want_extra_isize(struct super_block *sb)
} }
} }
/* Check if enough inode space is available */ /* Check if enough inode space is available */
if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > if ((sbi->s_want_extra_isize > sbi->s_inode_size) ||
sbi->s_inode_size) { (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
sbi->s_want_extra_isize = sizeof(struct ext4_inode) - sbi->s_inode_size)) {
EXT4_GOOD_OLD_INODE_SIZE; sbi->s_want_extra_isize = def_extra_isize;
ext4_msg(sb, KERN_INFO, ext4_msg(sb, KERN_INFO,
"required extra inode space not available"); "required extra inode space not available");
} }
@ -4453,13 +4445,6 @@ no_journal:
} }
} }
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
(blocksize != PAGE_SIZE)) {
ext4_msg(sb, KERN_ERR,
"Unsupported blocksize for fs encryption");
goto failed_mount_wq;
}
if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) { if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity"); ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
goto failed_mount_wq; goto failed_mount_wq;
@ -6033,18 +6018,6 @@ out:
} }
return len; return len;
} }
static int ext4_get_next_id(struct super_block *sb, struct kqid *qid)
{
const struct quota_format_ops *ops;
if (!sb_has_quota_loaded(sb, qid->type))
return -ESRCH;
ops = sb_dqopt(sb)->ops[qid->type];
if (!ops || !ops->get_next_id)
return -ENOSYS;
return dquot_get_next_id(sb, qid);
}
#endif #endif
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,

View File

@ -967,55 +967,6 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
return credits; return credits;
} }
static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
int credits, struct buffer_head *bh,
bool dirty, bool block_csum)
{
int error;
if (!ext4_handle_valid(handle))
return 0;
if (handle->h_buffer_credits >= credits)
return 0;
error = ext4_journal_extend(handle, credits - handle->h_buffer_credits);
if (!error)
return 0;
if (error < 0) {
ext4_warning(inode->i_sb, "Extend journal (error %d)", error);
return error;
}
if (bh && dirty) {
if (block_csum)
ext4_xattr_block_csum_set(inode, bh);
error = ext4_handle_dirty_metadata(handle, NULL, bh);
if (error) {
ext4_warning(inode->i_sb, "Handle metadata (error %d)",
error);
return error;
}
}
error = ext4_journal_restart(handle, credits);
if (error) {
ext4_warning(inode->i_sb, "Restart journal (error %d)", error);
return error;
}
if (bh) {
error = ext4_journal_get_write_access(handle, bh);
if (error) {
ext4_warning(inode->i_sb,
"Get write access failed (error %d)",
error);
return error;
}
}
return 0;
}
static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
int ref_change) int ref_change)
{ {
@ -1149,6 +1100,24 @@ cleanup:
return saved_err; return saved_err;
} }
static int ext4_xattr_restart_fn(handle_t *handle, struct inode *inode,
struct buffer_head *bh, bool block_csum, bool dirty)
{
int error;
if (bh && dirty) {
if (block_csum)
ext4_xattr_block_csum_set(inode, bh);
error = ext4_handle_dirty_metadata(handle, NULL, bh);
if (error) {
ext4_warning(inode->i_sb, "Handle metadata (error %d)",
error);
return error;
}
}
return 0;
}
static void static void
ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent, ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
struct buffer_head *bh, struct buffer_head *bh,
@ -1185,13 +1154,24 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
continue; continue;
} }
err = ext4_xattr_ensure_credits(handle, parent, credits, bh, err = ext4_journal_ensure_credits_fn(handle, credits, credits,
dirty, block_csum); ext4_free_metadata_revoke_credits(parent->i_sb, 1),
if (err) { ext4_xattr_restart_fn(handle, parent, bh, block_csum,
dirty));
if (err < 0) {
ext4_warning_inode(ea_inode, "Ensure credits err=%d", ext4_warning_inode(ea_inode, "Ensure credits err=%d",
err); err);
continue; continue;
} }
if (err > 0) {
err = ext4_journal_get_write_access(handle, bh);
if (err) {
ext4_warning_inode(ea_inode,
"Re-get write access err=%d",
err);
continue;
}
}
err = ext4_xattr_inode_dec_ref(handle, ea_inode); err = ext4_xattr_inode_dec_ref(handle, ea_inode);
if (err) { if (err) {
@ -2335,7 +2315,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
flags & XATTR_CREATE); flags & XATTR_CREATE);
brelse(bh); brelse(bh);
if (!ext4_handle_has_enough_credits(handle, credits)) { if (jbd2_handle_buffer_credits(handle) < credits) {
error = -ENOSPC; error = -ENOSPC;
goto cleanup; goto cleanup;
} }
@ -2862,11 +2842,9 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
struct inode *ea_inode; struct inode *ea_inode;
int error; int error;
error = ext4_xattr_ensure_credits(handle, inode, extra_credits, error = ext4_journal_ensure_credits(handle, extra_credits,
NULL /* bh */, ext4_free_metadata_revoke_credits(inode->i_sb, 1));
false /* dirty */, if (error < 0) {
false /* block_csum */);
if (error) {
EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error); EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
goto cleanup; goto cleanup;
} }

View File

@ -110,7 +110,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
int nblocks, space_left; int nblocks, space_left;
/* assert_spin_locked(&journal->j_state_lock); */ /* assert_spin_locked(&journal->j_state_lock); */
nblocks = jbd2_space_needed(journal); nblocks = journal->j_max_transaction_buffers;
while (jbd2_log_space_left(journal) < nblocks) { while (jbd2_log_space_left(journal) < nblocks) {
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
mutex_lock_io(&journal->j_checkpoint_mutex); mutex_lock_io(&journal->j_checkpoint_mutex);

View File

@ -482,10 +482,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (jh->b_committed_data) { if (jh->b_committed_data) {
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
jbd2_free(jh->b_committed_data, bh->b_size); jbd2_free(jh->b_committed_data, bh->b_size);
jh->b_committed_data = NULL; jh->b_committed_data = NULL;
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
} }
jbd2_journal_refile_buffer(journal, jh); jbd2_journal_refile_buffer(journal, jh);
} }
@ -560,8 +560,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
stats.run.rs_logging = jiffies; stats.run.rs_logging = jiffies;
stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
stats.run.rs_logging); stats.run.rs_logging);
stats.run.rs_blocks = stats.run.rs_blocks = commit_transaction->t_nr_buffers;
atomic_read(&commit_transaction->t_outstanding_credits);
stats.run.rs_blocks_logged = 0; stats.run.rs_blocks_logged = 0;
J_ASSERT(commit_transaction->t_nr_buffers <= J_ASSERT(commit_transaction->t_nr_buffers <=
@ -642,8 +641,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
/* /*
* start_this_handle() uses t_outstanding_credits to determine * start_this_handle() uses t_outstanding_credits to determine
* the free space in the log, but this counter is changed * the free space in the log.
* by jbd2_journal_next_log_block() also.
*/ */
atomic_dec(&commit_transaction->t_outstanding_credits); atomic_dec(&commit_transaction->t_outstanding_credits);
@ -727,7 +725,6 @@ start_journal_io:
submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
} }
cond_resched(); cond_resched();
stats.run.rs_blocks_logged += bufs;
/* Force a new descriptor to be generated next /* Force a new descriptor to be generated next
time round the loop. */ time round the loop. */
@ -814,6 +811,7 @@ start_journal_io:
if (unlikely(!buffer_uptodate(bh))) if (unlikely(!buffer_uptodate(bh)))
err = -EIO; err = -EIO;
jbd2_unfile_log_bh(bh); jbd2_unfile_log_bh(bh);
stats.run.rs_blocks_logged++;
/* /*
* The list contains temporary buffer heads created by * The list contains temporary buffer heads created by
@ -859,6 +857,7 @@ start_journal_io:
BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
clear_buffer_jwrite(bh); clear_buffer_jwrite(bh);
jbd2_unfile_log_bh(bh); jbd2_unfile_log_bh(bh);
stats.run.rs_blocks_logged++;
__brelse(bh); /* One for getblk */ __brelse(bh); /* One for getblk */
/* AKPM: bforget here */ /* AKPM: bforget here */
} }
@ -880,6 +879,7 @@ start_journal_io:
} }
if (cbh) if (cbh)
err = journal_wait_on_commit_record(journal, cbh); err = journal_wait_on_commit_record(journal, cbh);
stats.run.rs_blocks_logged++;
if (jbd2_has_feature_async_commit(journal) && if (jbd2_has_feature_async_commit(journal) &&
journal->j_flags & JBD2_BARRIER) { journal->j_flags & JBD2_BARRIER) {
blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL); blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
@ -888,6 +888,9 @@ start_journal_io:
if (err) if (err)
jbd2_journal_abort(journal, err); jbd2_journal_abort(journal, err);
WARN_ON_ONCE(
atomic_read(&commit_transaction->t_outstanding_credits) < 0);
/* /*
* Now disk caches for filesystem device are flushed so we are safe to * Now disk caches for filesystem device are flushed so we are safe to
* erase checkpointed transactions from the log by updating journal * erase checkpointed transactions from the log by updating journal
@ -918,6 +921,7 @@ restart_loop:
transaction_t *cp_transaction; transaction_t *cp_transaction;
struct buffer_head *bh; struct buffer_head *bh;
int try_to_free = 0; int try_to_free = 0;
bool drop_ref;
jh = commit_transaction->t_forget; jh = commit_transaction->t_forget;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
@ -927,7 +931,7 @@ restart_loop:
* done with it. * done with it.
*/ */
get_bh(bh); get_bh(bh);
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
/* /*
@ -1022,8 +1026,10 @@ restart_loop:
try_to_free = 1; try_to_free = 1;
} }
JBUFFER_TRACE(jh, "refile or unfile buffer"); JBUFFER_TRACE(jh, "refile or unfile buffer");
__jbd2_journal_refile_buffer(jh); drop_ref = __jbd2_journal_refile_buffer(jh);
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
if (drop_ref)
jbd2_journal_put_journal_head(jh);
if (try_to_free) if (try_to_free)
release_buffer_page(bh); /* Drops bh reference */ release_buffer_page(bh); /* Drops bh reference */
else else

View File

@ -363,7 +363,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
/* keep subsequent assertions sane */ /* keep subsequent assertions sane */
atomic_set(&new_bh->b_count, 1); atomic_set(&new_bh->b_count, 1);
jbd_lock_bh_state(bh_in); spin_lock(&jh_in->b_state_lock);
repeat: repeat:
/* /*
* If a new transaction has already done a buffer copy-out, then * If a new transaction has already done a buffer copy-out, then
@ -405,13 +405,13 @@ repeat:
if (need_copy_out && !done_copy_out) { if (need_copy_out && !done_copy_out) {
char *tmp; char *tmp;
jbd_unlock_bh_state(bh_in); spin_unlock(&jh_in->b_state_lock);
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
if (!tmp) { if (!tmp) {
brelse(new_bh); brelse(new_bh);
return -ENOMEM; return -ENOMEM;
} }
jbd_lock_bh_state(bh_in); spin_lock(&jh_in->b_state_lock);
if (jh_in->b_frozen_data) { if (jh_in->b_frozen_data) {
jbd2_free(tmp, bh_in->b_size); jbd2_free(tmp, bh_in->b_size);
goto repeat; goto repeat;
@ -464,7 +464,7 @@ repeat:
__jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
set_buffer_shadow(bh_in); set_buffer_shadow(bh_in);
jbd_unlock_bh_state(bh_in); spin_unlock(&jh_in->b_state_lock);
return do_escape | (done_copy_out << 1); return do_escape | (done_copy_out << 1);
} }
@ -840,6 +840,7 @@ jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
if (!bh) if (!bh)
return NULL; return NULL;
atomic_dec(&transaction->t_outstanding_credits);
lock_buffer(bh); lock_buffer(bh);
memset(bh->b_data, 0, journal->j_blocksize); memset(bh->b_data, 0, journal->j_blocksize);
header = (journal_header_t *)bh->b_data; header = (journal_header_t *)bh->b_data;
@ -1098,6 +1099,16 @@ static void jbd2_stats_proc_exit(journal_t *journal)
remove_proc_entry(journal->j_devname, proc_jbd2_stats); remove_proc_entry(journal->j_devname, proc_jbd2_stats);
} }
/* Minimum size of descriptor tag */
static int jbd2_min_tag_size(void)
{
/*
* Tag with 32-bit block numbers does not use last four bytes of the
* structure
*/
return sizeof(journal_block_tag_t) - 4;
}
/* /*
* Management for journal control blocks: functions to create and * Management for journal control blocks: functions to create and
* destroy journal_t structures, and to initialise and read existing * destroy journal_t structures, and to initialise and read existing
@ -1156,7 +1167,8 @@ static journal_t *journal_init_common(struct block_device *bdev,
journal->j_fs_dev = fs_dev; journal->j_fs_dev = fs_dev;
journal->j_blk_offset = start; journal->j_blk_offset = start;
journal->j_maxlen = len; journal->j_maxlen = len;
n = journal->j_blocksize / sizeof(journal_block_tag_t); /* We need enough buffers to write out full descriptor block. */
n = journal->j_blocksize / jbd2_min_tag_size();
journal->j_wbufsize = n; journal->j_wbufsize = n;
journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *), journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
GFP_KERNEL); GFP_KERNEL);
@ -1488,6 +1500,21 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
} }
EXPORT_SYMBOL(jbd2_journal_update_sb_errno); EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
static int journal_revoke_records_per_block(journal_t *journal)
{
int record_size;
int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
if (jbd2_has_feature_64bit(journal))
record_size = 8;
else
record_size = 4;
if (jbd2_journal_has_csum_v2or3(journal))
space -= sizeof(struct jbd2_journal_block_tail);
return space / record_size;
}
/* /*
* Read the superblock for a given journal, performing initial * Read the superblock for a given journal, performing initial
* validation of the format. * validation of the format.
@ -1596,6 +1623,8 @@ static int journal_get_superblock(journal_t *journal)
sizeof(sb->s_uuid)); sizeof(sb->s_uuid));
} }
journal->j_revoke_records_per_block =
journal_revoke_records_per_block(journal);
set_buffer_verified(bh); set_buffer_verified(bh);
return 0; return 0;
@ -1916,6 +1945,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
sb->s_feature_ro_compat |= cpu_to_be32(ro); sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat); sb->s_feature_incompat |= cpu_to_be32(incompat);
unlock_buffer(journal->j_sb_buffer); unlock_buffer(journal->j_sb_buffer);
journal->j_revoke_records_per_block =
journal_revoke_records_per_block(journal);
return 1; return 1;
#undef COMPAT_FEATURE_ON #undef COMPAT_FEATURE_ON
@ -1946,6 +1977,8 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
sb->s_feature_compat &= ~cpu_to_be32(compat); sb->s_feature_compat &= ~cpu_to_be32(compat);
sb->s_feature_ro_compat &= ~cpu_to_be32(ro); sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
sb->s_feature_incompat &= ~cpu_to_be32(incompat); sb->s_feature_incompat &= ~cpu_to_be32(incompat);
journal->j_revoke_records_per_block =
journal_revoke_records_per_block(journal);
} }
EXPORT_SYMBOL(jbd2_journal_clear_features); EXPORT_SYMBOL(jbd2_journal_clear_features);
@ -2410,6 +2443,8 @@ static struct journal_head *journal_alloc_journal_head(void)
ret = kmem_cache_zalloc(jbd2_journal_head_cache, ret = kmem_cache_zalloc(jbd2_journal_head_cache,
GFP_NOFS | __GFP_NOFAIL); GFP_NOFS | __GFP_NOFAIL);
} }
if (ret)
spin_lock_init(&ret->b_state_lock);
return ret; return ret;
} }
@ -2529,17 +2564,23 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
J_ASSERT_BH(bh, buffer_jbd(bh)); J_ASSERT_BH(bh, buffer_jbd(bh));
J_ASSERT_BH(bh, jh2bh(jh) == bh); J_ASSERT_BH(bh, jh2bh(jh) == bh);
BUFFER_TRACE(bh, "remove journal_head"); BUFFER_TRACE(bh, "remove journal_head");
if (jh->b_frozen_data) {
printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); /* Unlink before dropping the lock */
jbd2_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
jbd2_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL; bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */ jh->b_bh = NULL; /* debug, really */
clear_buffer_jbd(bh); clear_buffer_jbd(bh);
}
static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
{
if (jh->b_frozen_data) {
printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
jbd2_free(jh->b_frozen_data, b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
jbd2_free(jh->b_committed_data, b_size);
}
journal_free_journal_head(jh); journal_free_journal_head(jh);
} }
@ -2557,9 +2598,11 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
if (!jh->b_jcount) { if (!jh->b_jcount) {
__journal_remove_journal_head(bh); __journal_remove_journal_head(bh);
jbd_unlock_bh_journal_head(bh); jbd_unlock_bh_journal_head(bh);
journal_release_journal_head(jh, bh->b_size);
__brelse(bh); __brelse(bh);
} else } else {
jbd_unlock_bh_journal_head(bh); jbd_unlock_bh_journal_head(bh);
}
} }
/* /*

View File

@ -371,6 +371,11 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
} }
#endif #endif
if (WARN_ON_ONCE(handle->h_revoke_credits <= 0)) {
if (!bh_in)
brelse(bh);
return -EIO;
}
/* We really ought not ever to revoke twice in a row without /* We really ought not ever to revoke twice in a row without
first having the revoke cancelled: it's illegal to free a first having the revoke cancelled: it's illegal to free a
block twice without allocating it in between! */ block twice without allocating it in between! */
@ -391,6 +396,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
__brelse(bh); __brelse(bh);
} }
} }
handle->h_revoke_credits--;
jbd_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in); jbd_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in);
err = insert_revoke_hash(journal, blocknr, err = insert_revoke_hash(journal, blocknr,

View File

@ -62,6 +62,28 @@ void jbd2_journal_free_transaction(transaction_t *transaction)
kmem_cache_free(transaction_cache, transaction); kmem_cache_free(transaction_cache, transaction);
} }
/*
* Base amount of descriptor blocks we reserve for each transaction.
*/
static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
{
int tag_space = journal->j_blocksize - sizeof(journal_header_t);
int tags_per_block;
/* Subtract UUID */
tag_space -= 16;
if (jbd2_journal_has_csum_v2or3(journal))
tag_space -= sizeof(struct jbd2_journal_block_tail);
/* Commit code leaves a slack space of 16 bytes at the end of block */
tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
/*
* Revoke descriptors are accounted separately so we need to reserve
* space for commit block and normal transaction descriptor blocks.
*/
return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
tags_per_block);
}
/* /*
* jbd2_get_transaction: obtain a new transaction_t object. * jbd2_get_transaction: obtain a new transaction_t object.
* *
@ -88,7 +110,9 @@ static void jbd2_get_transaction(journal_t *journal,
spin_lock_init(&transaction->t_handle_lock); spin_lock_init(&transaction->t_handle_lock);
atomic_set(&transaction->t_updates, 0); atomic_set(&transaction->t_updates, 0);
atomic_set(&transaction->t_outstanding_credits, atomic_set(&transaction->t_outstanding_credits,
jbd2_descriptor_blocks_per_trans(journal) +
atomic_read(&journal->j_reserved_credits)); atomic_read(&journal->j_reserved_credits));
atomic_set(&transaction->t_outstanding_revokes, 0);
atomic_set(&transaction->t_handle_count, 0); atomic_set(&transaction->t_handle_count, 0);
INIT_LIST_HEAD(&transaction->t_inode_list); INIT_LIST_HEAD(&transaction->t_inode_list);
INIT_LIST_HEAD(&transaction->t_private_list); INIT_LIST_HEAD(&transaction->t_private_list);
@ -258,12 +282,13 @@ static int add_transaction_credits(journal_t *journal, int blocks,
* *before* starting to dirty potentially checkpointed buffers * *before* starting to dirty potentially checkpointed buffers
* in the new transaction. * in the new transaction.
*/ */
if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) { if (jbd2_log_space_left(journal) < journal->j_max_transaction_buffers) {
atomic_sub(total, &t->t_outstanding_credits); atomic_sub(total, &t->t_outstanding_credits);
read_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
jbd2_might_wait_for_commit(journal); jbd2_might_wait_for_commit(journal);
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) if (jbd2_log_space_left(journal) <
journal->j_max_transaction_buffers)
__jbd2_log_wait_for_space(journal); __jbd2_log_wait_for_space(journal);
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return 1; return 1;
@ -299,12 +324,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
gfp_t gfp_mask) gfp_t gfp_mask)
{ {
transaction_t *transaction, *new_transaction = NULL; transaction_t *transaction, *new_transaction = NULL;
int blocks = handle->h_buffer_credits; int blocks = handle->h_total_credits;
int rsv_blocks = 0; int rsv_blocks = 0;
unsigned long ts = jiffies; unsigned long ts = jiffies;
if (handle->h_rsv_handle) if (handle->h_rsv_handle)
rsv_blocks = handle->h_rsv_handle->h_buffer_credits; rsv_blocks = handle->h_rsv_handle->h_total_credits;
/* /*
* Limit the number of reserved credits to 1/2 of maximum transaction * Limit the number of reserved credits to 1/2 of maximum transaction
@ -405,6 +430,7 @@ repeat:
update_t_max_wait(transaction, ts); update_t_max_wait(transaction, ts);
handle->h_transaction = transaction; handle->h_transaction = transaction;
handle->h_requested_credits = blocks; handle->h_requested_credits = blocks;
handle->h_revoke_credits_requested = handle->h_revoke_credits;
handle->h_start_jiffies = jiffies; handle->h_start_jiffies = jiffies;
atomic_inc(&transaction->t_updates); atomic_inc(&transaction->t_updates);
atomic_inc(&transaction->t_handle_count); atomic_inc(&transaction->t_handle_count);
@ -431,15 +457,15 @@ static handle_t *new_handle(int nblocks)
handle_t *handle = jbd2_alloc_handle(GFP_NOFS); handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
if (!handle) if (!handle)
return NULL; return NULL;
handle->h_buffer_credits = nblocks; handle->h_total_credits = nblocks;
handle->h_ref = 1; handle->h_ref = 1;
return handle; return handle;
} }
handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
gfp_t gfp_mask, unsigned int type, int revoke_records, gfp_t gfp_mask,
unsigned int line_no) unsigned int type, unsigned int line_no)
{ {
handle_t *handle = journal_current_handle(); handle_t *handle = journal_current_handle();
int err; int err;
@ -453,6 +479,8 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
return handle; return handle;
} }
nblocks += DIV_ROUND_UP(revoke_records,
journal->j_revoke_records_per_block);
handle = new_handle(nblocks); handle = new_handle(nblocks);
if (!handle) if (!handle)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
@ -468,6 +496,7 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
rsv_handle->h_journal = journal; rsv_handle->h_journal = journal;
handle->h_rsv_handle = rsv_handle; handle->h_rsv_handle = rsv_handle;
} }
handle->h_revoke_credits = revoke_records;
err = start_this_handle(journal, handle, gfp_mask); err = start_this_handle(journal, handle, gfp_mask);
if (err < 0) { if (err < 0) {
@ -508,16 +537,21 @@ EXPORT_SYMBOL(jbd2__journal_start);
*/ */
handle_t *jbd2_journal_start(journal_t *journal, int nblocks) handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
{ {
return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0); return jbd2__journal_start(journal, nblocks, 0, 0, GFP_NOFS, 0, 0);
} }
EXPORT_SYMBOL(jbd2_journal_start); EXPORT_SYMBOL(jbd2_journal_start);
void jbd2_journal_free_reserved(handle_t *handle) static void __jbd2_journal_unreserve_handle(handle_t *handle)
{ {
journal_t *journal = handle->h_journal; journal_t *journal = handle->h_journal;
WARN_ON(!handle->h_reserved); WARN_ON(!handle->h_reserved);
sub_reserved_credits(journal, handle->h_buffer_credits); sub_reserved_credits(journal, handle->h_total_credits);
}
void jbd2_journal_free_reserved(handle_t *handle)
{
__jbd2_journal_unreserve_handle(handle);
jbd2_free_handle(handle); jbd2_free_handle(handle);
} }
EXPORT_SYMBOL(jbd2_journal_free_reserved); EXPORT_SYMBOL(jbd2_journal_free_reserved);
@ -571,7 +605,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
handle->h_line_no = line_no; handle->h_line_no = line_no;
trace_jbd2_handle_start(journal->j_fs_dev->bd_dev, trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
handle->h_transaction->t_tid, type, handle->h_transaction->t_tid, type,
line_no, handle->h_buffer_credits); line_no, handle->h_total_credits);
return 0; return 0;
} }
EXPORT_SYMBOL(jbd2_journal_start_reserved); EXPORT_SYMBOL(jbd2_journal_start_reserved);
@ -580,6 +614,7 @@ EXPORT_SYMBOL(jbd2_journal_start_reserved);
* int jbd2_journal_extend() - extend buffer credits. * int jbd2_journal_extend() - extend buffer credits.
* @handle: handle to 'extend' * @handle: handle to 'extend'
* @nblocks: nr blocks to try to extend by. * @nblocks: nr blocks to try to extend by.
* @revoke_records: number of revoke records to try to extend by.
* *
* Some transactions, such as large extends and truncates, can be done * Some transactions, such as large extends and truncates, can be done
* atomically all at once or in several stages. The operation requests * atomically all at once or in several stages. The operation requests
@ -596,7 +631,7 @@ EXPORT_SYMBOL(jbd2_journal_start_reserved);
* return code < 0 implies an error * return code < 0 implies an error
* return code > 0 implies normal transaction-full status. * return code > 0 implies normal transaction-full status.
*/ */
int jbd2_journal_extend(handle_t *handle, int nblocks) int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal; journal_t *journal;
@ -618,6 +653,12 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
goto error_out; goto error_out;
} }
nblocks += DIV_ROUND_UP(
handle->h_revoke_credits_requested + revoke_records,
journal->j_revoke_records_per_block) -
DIV_ROUND_UP(
handle->h_revoke_credits_requested,
journal->j_revoke_records_per_block);
spin_lock(&transaction->t_handle_lock); spin_lock(&transaction->t_handle_lock);
wanted = atomic_add_return(nblocks, wanted = atomic_add_return(nblocks,
&transaction->t_outstanding_credits); &transaction->t_outstanding_credits);
@ -629,22 +670,16 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
goto unlock; goto unlock;
} }
if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) >
jbd2_log_space_left(journal)) {
jbd_debug(3, "denied handle %p %d blocks: "
"insufficient log space\n", handle, nblocks);
atomic_sub(nblocks, &transaction->t_outstanding_credits);
goto unlock;
}
trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
transaction->t_tid, transaction->t_tid,
handle->h_type, handle->h_line_no, handle->h_type, handle->h_line_no,
handle->h_buffer_credits, handle->h_total_credits,
nblocks); nblocks);
handle->h_buffer_credits += nblocks; handle->h_total_credits += nblocks;
handle->h_requested_credits += nblocks; handle->h_requested_credits += nblocks;
handle->h_revoke_credits += revoke_records;
handle->h_revoke_credits_requested += revoke_records;
result = 0; result = 0;
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
@ -655,11 +690,55 @@ error_out:
return result; return result;
} }
static void stop_this_handle(handle_t *handle)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
int revokes;
J_ASSERT(journal_current_handle() == handle);
J_ASSERT(atomic_read(&transaction->t_updates) > 0);
current->journal_info = NULL;
/*
* Subtract necessary revoke descriptor blocks from handle credits. We
* take care to account only for revoke descriptor blocks the
* transaction will really need as large sequences of transactions with
* small numbers of revokes are relatively common.
*/
revokes = handle->h_revoke_credits_requested - handle->h_revoke_credits;
if (revokes) {
int t_revokes, revoke_descriptors;
int rr_per_blk = journal->j_revoke_records_per_block;
WARN_ON_ONCE(DIV_ROUND_UP(revokes, rr_per_blk)
> handle->h_total_credits);
t_revokes = atomic_add_return(revokes,
&transaction->t_outstanding_revokes);
revoke_descriptors =
DIV_ROUND_UP(t_revokes, rr_per_blk) -
DIV_ROUND_UP(t_revokes - revokes, rr_per_blk);
handle->h_total_credits -= revoke_descriptors;
}
atomic_sub(handle->h_total_credits,
&transaction->t_outstanding_credits);
if (handle->h_rsv_handle)
__jbd2_journal_unreserve_handle(handle->h_rsv_handle);
if (atomic_dec_and_test(&transaction->t_updates))
wake_up(&journal->j_wait_updates);
rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
/*
* Scope of the GFP_NOFS context is over here and so we can restore the
* original alloc context.
*/
memalloc_nofs_restore(handle->saved_alloc_context);
}
/** /**
* int jbd2_journal_restart() - restart a handle . * int jbd2_journal_restart() - restart a handle .
* @handle: handle to restart * @handle: handle to restart
* @nblocks: nr credits requested * @nblocks: nr credits requested
* @revoke_records: number of revoke record credits requested
* @gfp_mask: memory allocation flags (for start_this_handle) * @gfp_mask: memory allocation flags (for start_this_handle)
* *
* Restart a handle for a multi-transaction filesystem * Restart a handle for a multi-transaction filesystem
@ -672,56 +751,48 @@ error_out:
* credits. We preserve reserved handle if there's any attached to the * credits. We preserve reserved handle if there's any attached to the
* passed in handle. * passed in handle.
*/ */
int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
gfp_t gfp_mask)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal; journal_t *journal;
tid_t tid; tid_t tid;
int need_to_start, ret; int need_to_start;
int ret;
/* If we've had an abort of any type, don't even think about /* If we've had an abort of any type, don't even think about
* actually doing the restart! */ * actually doing the restart! */
if (is_handle_aborted(handle)) if (is_handle_aborted(handle))
return 0; return 0;
journal = transaction->t_journal; journal = transaction->t_journal;
tid = transaction->t_tid;
/* /*
* First unlink the handle from its current transaction, and start the * First unlink the handle from its current transaction, and start the
* commit on that. * commit on that.
*/ */
J_ASSERT(atomic_read(&transaction->t_updates) > 0);
J_ASSERT(journal_current_handle() == handle);
read_lock(&journal->j_state_lock);
spin_lock(&transaction->t_handle_lock);
atomic_sub(handle->h_buffer_credits,
&transaction->t_outstanding_credits);
if (handle->h_rsv_handle) {
sub_reserved_credits(journal,
handle->h_rsv_handle->h_buffer_credits);
}
if (atomic_dec_and_test(&transaction->t_updates))
wake_up(&journal->j_wait_updates);
tid = transaction->t_tid;
spin_unlock(&transaction->t_handle_lock);
handle->h_transaction = NULL;
current->journal_info = NULL;
jbd_debug(2, "restarting handle %p\n", handle); jbd_debug(2, "restarting handle %p\n", handle);
stop_this_handle(handle);
handle->h_transaction = NULL;
/*
* TODO: If we use READ_ONCE / WRITE_ONCE for j_commit_request we can
* get rid of pointless j_state_lock traffic like this.
*/
read_lock(&journal->j_state_lock);
need_to_start = !tid_geq(journal->j_commit_request, tid); need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
if (need_to_start) if (need_to_start)
jbd2_log_start_commit(journal, tid); jbd2_log_start_commit(journal, tid);
handle->h_total_credits = nblocks +
rwsem_release(&journal->j_trans_commit_map, _THIS_IP_); DIV_ROUND_UP(revoke_records,
handle->h_buffer_credits = nblocks; journal->j_revoke_records_per_block);
/* handle->h_revoke_credits = revoke_records;
* Restore the original nofs context because the journal restart
* is basically the same thing as journal stop and start.
* start_this_handle will start a new nofs context.
*/
memalloc_nofs_restore(handle->saved_alloc_context);
ret = start_this_handle(journal, handle, gfp_mask); ret = start_this_handle(journal, handle, gfp_mask);
trace_jbd2_handle_restart(journal->j_fs_dev->bd_dev,
ret ? 0 : handle->h_transaction->t_tid,
handle->h_type, handle->h_line_no,
handle->h_total_credits);
return ret; return ret;
} }
EXPORT_SYMBOL(jbd2__journal_restart); EXPORT_SYMBOL(jbd2__journal_restart);
@ -729,7 +800,7 @@ EXPORT_SYMBOL(jbd2__journal_restart);
int jbd2_journal_restart(handle_t *handle, int nblocks) int jbd2_journal_restart(handle_t *handle, int nblocks)
{ {
return jbd2__journal_restart(handle, nblocks, GFP_NOFS); return jbd2__journal_restart(handle, nblocks, 0, GFP_NOFS);
} }
EXPORT_SYMBOL(jbd2_journal_restart); EXPORT_SYMBOL(jbd2_journal_restart);
@ -879,7 +950,7 @@ repeat:
start_lock = jiffies; start_lock = jiffies;
lock_buffer(bh); lock_buffer(bh);
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
/* If it takes too long to lock the buffer, trace it */ /* If it takes too long to lock the buffer, trace it */
time_lock = jbd2_time_diff(start_lock, jiffies); time_lock = jbd2_time_diff(start_lock, jiffies);
@ -929,7 +1000,7 @@ repeat:
error = -EROFS; error = -EROFS;
if (is_handle_aborted(handle)) { if (is_handle_aborted(handle)) {
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
goto out; goto out;
} }
error = 0; error = 0;
@ -993,7 +1064,7 @@ repeat:
*/ */
if (buffer_shadow(bh)) { if (buffer_shadow(bh)) {
JBUFFER_TRACE(jh, "on shadow: sleep"); JBUFFER_TRACE(jh, "on shadow: sleep");
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE); wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
goto repeat; goto repeat;
} }
@ -1014,7 +1085,7 @@ repeat:
JBUFFER_TRACE(jh, "generate frozen data"); JBUFFER_TRACE(jh, "generate frozen data");
if (!frozen_buffer) { if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer"); JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS | __GFP_NOFAIL); GFP_NOFS | __GFP_NOFAIL);
goto repeat; goto repeat;
@ -1033,7 +1104,7 @@ attach_next:
jh->b_next_transaction = transaction; jh->b_next_transaction = transaction;
done: done:
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
/* /*
* If we are about to journal a buffer, then any revoke pending on it is * If we are about to journal a buffer, then any revoke pending on it is
@ -1172,7 +1243,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
* that case: the transaction must have deleted the buffer for it to be * that case: the transaction must have deleted the buffer for it to be
* reused here. * reused here.
*/ */
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, (jh->b_transaction == transaction || J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
jh->b_transaction == NULL || jh->b_transaction == NULL ||
(jh->b_transaction == journal->j_committing_transaction && (jh->b_transaction == journal->j_committing_transaction &&
@ -1207,7 +1278,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
jh->b_next_transaction = transaction; jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
} }
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
/* /*
* akpm: I added this. ext3_alloc_branch can pick up new indirect * akpm: I added this. ext3_alloc_branch can pick up new indirect
@ -1275,13 +1346,13 @@ repeat:
committed_data = jbd2_alloc(jh2bh(jh)->b_size, committed_data = jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS|__GFP_NOFAIL); GFP_NOFS|__GFP_NOFAIL);
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
if (!jh->b_committed_data) { if (!jh->b_committed_data) {
/* Copy out the current buffer contents into the /* Copy out the current buffer contents into the
* preserved, committed copy. */ * preserved, committed copy. */
JBUFFER_TRACE(jh, "generate b_committed data"); JBUFFER_TRACE(jh, "generate b_committed data");
if (!committed_data) { if (!committed_data) {
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
goto repeat; goto repeat;
} }
@ -1289,7 +1360,7 @@ repeat:
committed_data = NULL; committed_data = NULL;
memcpy(jh->b_committed_data, bh->b_data, bh->b_size); memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
} }
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
out: out:
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
if (unlikely(committed_data)) if (unlikely(committed_data))
@ -1390,16 +1461,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
*/ */
if (jh->b_transaction != transaction && if (jh->b_transaction != transaction &&
jh->b_next_transaction != transaction) { jh->b_next_transaction != transaction) {
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
J_ASSERT_JH(jh, jh->b_transaction == transaction || J_ASSERT_JH(jh, jh->b_transaction == transaction ||
jh->b_next_transaction == transaction); jh->b_next_transaction == transaction);
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
} }
if (jh->b_modified == 1) { if (jh->b_modified == 1) {
/* If it's in our transaction it must be in BJ_Metadata list. */ /* If it's in our transaction it must be in BJ_Metadata list. */
if (jh->b_transaction == transaction && if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) { jh->b_jlist != BJ_Metadata) {
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
if (jh->b_transaction == transaction && if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) jh->b_jlist != BJ_Metadata)
pr_err("JBD2: assertion failure: h_type=%u " pr_err("JBD2: assertion failure: h_type=%u "
@ -1409,13 +1480,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
jh->b_jlist); jh->b_jlist);
J_ASSERT_JH(jh, jh->b_transaction != transaction || J_ASSERT_JH(jh, jh->b_transaction != transaction ||
jh->b_jlist == BJ_Metadata); jh->b_jlist == BJ_Metadata);
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
} }
goto out; goto out;
} }
journal = transaction->t_journal; journal = transaction->t_journal;
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
if (jh->b_modified == 0) { if (jh->b_modified == 0) {
/* /*
@ -1423,12 +1494,12 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* of the transaction. This needs to be done * of the transaction. This needs to be done
* once a transaction -bzzz * once a transaction -bzzz
*/ */
if (handle->h_buffer_credits <= 0) { if (WARN_ON_ONCE(jbd2_handle_buffer_credits(handle) <= 0)) {
ret = -ENOSPC; ret = -ENOSPC;
goto out_unlock_bh; goto out_unlock_bh;
} }
jh->b_modified = 1; jh->b_modified = 1;
handle->h_buffer_credits--; handle->h_total_credits--;
} }
/* /*
@ -1501,7 +1572,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
__jbd2_journal_file_buffer(jh, transaction, BJ_Metadata); __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
out_unlock_bh: out_unlock_bh:
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
out: out:
JBUFFER_TRACE(jh, "exit"); JBUFFER_TRACE(jh, "exit");
return ret; return ret;
@ -1539,18 +1610,20 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
BUFFER_TRACE(bh, "entry"); BUFFER_TRACE(bh, "entry");
jbd_lock_bh_state(bh); jh = jbd2_journal_grab_journal_head(bh);
if (!jh) {
__bforget(bh);
return 0;
}
if (!buffer_jbd(bh)) spin_lock(&jh->b_state_lock);
goto not_jbd;
jh = bh2jh(bh);
/* Critical error: attempting to delete a bitmap buffer, maybe? /* Critical error: attempting to delete a bitmap buffer, maybe?
* Don't do any jbd operations, and return an error. */ * Don't do any jbd operations, and return an error. */
if (!J_EXPECT_JH(jh, !jh->b_committed_data, if (!J_EXPECT_JH(jh, !jh->b_committed_data,
"inconsistent data on disk")) { "inconsistent data on disk")) {
err = -EIO; err = -EIO;
goto not_jbd; goto drop;
} }
/* keep track of whether or not this transaction modified us */ /* keep track of whether or not this transaction modified us */
@ -1598,10 +1671,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget); __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
} else { } else {
__jbd2_journal_unfile_buffer(jh); __jbd2_journal_unfile_buffer(jh);
if (!buffer_jbd(bh)) { jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
goto not_jbd;
}
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
} else if (jh->b_transaction) { } else if (jh->b_transaction) {
@ -1643,7 +1713,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
if (!jh->b_cp_transaction) { if (!jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "belongs to none transaction"); JBUFFER_TRACE(jh, "belongs to none transaction");
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
goto not_jbd; goto drop;
} }
/* /*
@ -1653,7 +1723,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
if (!buffer_dirty(bh)) { if (!buffer_dirty(bh)) {
__jbd2_journal_remove_checkpoint(jh); __jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
goto not_jbd; goto drop;
} }
/* /*
@ -1666,20 +1736,15 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget); __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
} }
jbd_unlock_bh_state(bh);
__brelse(bh);
drop: drop:
__brelse(bh);
spin_unlock(&jh->b_state_lock);
jbd2_journal_put_journal_head(jh);
if (drop_reserve) { if (drop_reserve) {
/* no need to reserve log space for this block -bzzz */ /* no need to reserve log space for this block -bzzz */
handle->h_buffer_credits++; handle->h_total_credits++;
} }
return err; return err;
not_jbd:
jbd_unlock_bh_state(bh);
__bforget(bh);
goto drop;
} }
/** /**
@ -1706,45 +1771,34 @@ int jbd2_journal_stop(handle_t *handle)
tid_t tid; tid_t tid;
pid_t pid; pid_t pid;
if (--handle->h_ref > 0) {
jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
handle->h_ref);
if (is_handle_aborted(handle))
return -EIO;
return 0;
}
if (!transaction) { if (!transaction) {
/* /*
* Handle is already detached from the transaction so * Handle is already detached from the transaction so there is
* there is nothing to do other than decrease a refcount, * nothing to do other than free the handle.
* or free the handle if refcount drops to zero
*/ */
if (--handle->h_ref > 0) { memalloc_nofs_restore(handle->saved_alloc_context);
jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, goto free_and_exit;
handle->h_ref);
return err;
} else {
if (handle->h_rsv_handle)
jbd2_free_handle(handle->h_rsv_handle);
goto free_and_exit;
}
} }
journal = transaction->t_journal; journal = transaction->t_journal;
tid = transaction->t_tid;
J_ASSERT(journal_current_handle() == handle);
if (is_handle_aborted(handle)) if (is_handle_aborted(handle))
err = -EIO; err = -EIO;
else
J_ASSERT(atomic_read(&transaction->t_updates) > 0);
if (--handle->h_ref > 0) {
jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
handle->h_ref);
return err;
}
jbd_debug(4, "Handle %p going down\n", handle); jbd_debug(4, "Handle %p going down\n", handle);
trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
transaction->t_tid, tid, handle->h_type, handle->h_line_no,
handle->h_type, handle->h_line_no,
jiffies - handle->h_start_jiffies, jiffies - handle->h_start_jiffies,
handle->h_sync, handle->h_requested_credits, handle->h_sync, handle->h_requested_credits,
(handle->h_requested_credits - (handle->h_requested_credits -
handle->h_buffer_credits)); handle->h_total_credits));
/* /*
* Implement synchronous transaction batching. If the handle * Implement synchronous transaction batching. If the handle
@ -1804,19 +1858,13 @@ int jbd2_journal_stop(handle_t *handle)
if (handle->h_sync) if (handle->h_sync)
transaction->t_synchronous_commit = 1; transaction->t_synchronous_commit = 1;
current->journal_info = NULL;
atomic_sub(handle->h_buffer_credits,
&transaction->t_outstanding_credits);
/* /*
* If the handle is marked SYNC, we need to set another commit * If the handle is marked SYNC, we need to set another commit
* going! We also want to force a commit if the current * going! We also want to force a commit if the transaction is too
* transaction is occupying too much of the log, or if the * old now.
* transaction is too old now.
*/ */
if (handle->h_sync || if (handle->h_sync ||
(atomic_read(&transaction->t_outstanding_credits) >
journal->j_max_transaction_buffers) ||
time_after_eq(jiffies, transaction->t_expires)) { time_after_eq(jiffies, transaction->t_expires)) {
/* Do this even for aborted journals: an abort still /* Do this even for aborted journals: an abort still
* completes the commit thread, it just doesn't write * completes the commit thread, it just doesn't write
@ -1825,7 +1873,7 @@ int jbd2_journal_stop(handle_t *handle)
jbd_debug(2, "transaction too old, requesting commit for " jbd_debug(2, "transaction too old, requesting commit for "
"handle %p\n", handle); "handle %p\n", handle);
/* This is non-blocking */ /* This is non-blocking */
jbd2_log_start_commit(journal, transaction->t_tid); jbd2_log_start_commit(journal, tid);
/* /*
* Special case: JBD2_SYNC synchronous updates require us * Special case: JBD2_SYNC synchronous updates require us
@ -1836,31 +1884,19 @@ int jbd2_journal_stop(handle_t *handle)
} }
/* /*
* Once we drop t_updates, if it goes to zero the transaction * Once stop_this_handle() drops t_updates, the transaction could start
* could start committing on us and eventually disappear. So * committing on us and eventually disappear. So we must not
* once we do this, we must not dereference transaction * dereference transaction pointer again after calling
* pointer again. * stop_this_handle().
*/ */
tid = transaction->t_tid; stop_this_handle(handle);
if (atomic_dec_and_test(&transaction->t_updates)) {
wake_up(&journal->j_wait_updates);
if (journal->j_barrier_count)
wake_up(&journal->j_wait_transaction_locked);
}
rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
if (wait_for_commit) if (wait_for_commit)
err = jbd2_log_wait_commit(journal, tid); err = jbd2_log_wait_commit(journal, tid);
if (handle->h_rsv_handle)
jbd2_journal_free_reserved(handle->h_rsv_handle);
free_and_exit: free_and_exit:
/* if (handle->h_rsv_handle)
* Scope of the GFP_NOFS context is over here and so we can restore the jbd2_free_handle(handle->h_rsv_handle);
* original alloc context.
*/
memalloc_nofs_restore(handle->saved_alloc_context);
jbd2_free_handle(handle); jbd2_free_handle(handle);
return err; return err;
} }
@ -1878,7 +1914,7 @@ free_and_exit:
* *
* j_list_lock is held. * j_list_lock is held.
* *
* jbd_lock_bh_state(jh2bh(jh)) is held. * jh->b_state_lock is held.
*/ */
static inline void static inline void
@ -1902,7 +1938,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
* *
* Called with j_list_lock held, and the journal may not be locked. * Called with j_list_lock held, and the journal may not be locked.
* *
* jbd_lock_bh_state(jh2bh(jh)) is held. * jh->b_state_lock is held.
*/ */
static inline void static inline void
@ -1934,7 +1970,7 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
transaction_t *transaction; transaction_t *transaction;
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); lockdep_assert_held(&jh->b_state_lock);
transaction = jh->b_transaction; transaction = jh->b_transaction;
if (transaction) if (transaction)
assert_spin_locked(&transaction->t_journal->j_list_lock); assert_spin_locked(&transaction->t_journal->j_list_lock);
@ -1971,17 +2007,15 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
} }
/* /*
* Remove buffer from all transactions. * Remove buffer from all transactions. The caller is responsible for dropping
* the jh reference that belonged to the transaction.
* *
* Called with bh_state lock and j_list_lock * Called with bh_state lock and j_list_lock
*
* jh and bh may be already freed when this function returns.
*/ */
static void __jbd2_journal_unfile_buffer(struct journal_head *jh) static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{ {
__jbd2_journal_temp_unlink_buffer(jh); __jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL; jh->b_transaction = NULL;
jbd2_journal_put_journal_head(jh);
} }
void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
@ -1990,18 +2024,19 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
/* Get reference so that buffer cannot be freed before we unlock it */ /* Get reference so that buffer cannot be freed before we unlock it */
get_bh(bh); get_bh(bh);
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
__jbd2_journal_unfile_buffer(jh); __jbd2_journal_unfile_buffer(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
jbd2_journal_put_journal_head(jh);
__brelse(bh); __brelse(bh);
} }
/* /*
* Called from jbd2_journal_try_to_free_buffers(). * Called from jbd2_journal_try_to_free_buffers().
* *
* Called under jbd_lock_bh_state(bh) * Called under jh->b_state_lock
*/ */
static void static void
__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
@ -2088,10 +2123,10 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
if (!jh) if (!jh)
continue; continue;
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
__journal_try_to_free_buffer(journal, bh); __journal_try_to_free_buffer(journal, bh);
spin_unlock(&jh->b_state_lock);
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
jbd_unlock_bh_state(bh);
if (buffer_jbd(bh)) if (buffer_jbd(bh))
goto busy; goto busy;
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
@ -2112,7 +2147,7 @@ busy:
* *
* Called under j_list_lock. * Called under j_list_lock.
* *
* Called under jbd_lock_bh_state(bh). * Called under jh->b_state_lock.
*/ */
static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
{ {
@ -2133,6 +2168,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
} else { } else {
JBUFFER_TRACE(jh, "on running transaction"); JBUFFER_TRACE(jh, "on running transaction");
__jbd2_journal_unfile_buffer(jh); __jbd2_journal_unfile_buffer(jh);
jbd2_journal_put_journal_head(jh);
} }
return may_free; return may_free;
} }
@ -2199,18 +2235,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
* holding the page lock. --sct * holding the page lock. --sct
*/ */
if (!buffer_jbd(bh)) jh = jbd2_journal_grab_journal_head(bh);
if (!jh)
goto zap_buffer_unlocked; goto zap_buffer_unlocked;
/* OK, we have data buffer in journaled mode */ /* OK, we have data buffer in journaled mode */
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
jbd_lock_bh_state(bh); spin_lock(&jh->b_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
jh = jbd2_journal_grab_journal_head(bh);
if (!jh)
goto zap_buffer_no_jh;
/* /*
* We cannot remove the buffer from checkpoint lists until the * We cannot remove the buffer from checkpoint lists until the
* transaction adding inode to orphan list (let's call it T) * transaction adding inode to orphan list (let's call it T)
@ -2289,10 +2322,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
* for commit and try again. * for commit and try again.
*/ */
if (partial_page) { if (partial_page) {
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd2_journal_put_journal_head(jh);
return -EBUSY; return -EBUSY;
} }
/* /*
@ -2304,10 +2337,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
set_buffer_freed(bh); set_buffer_freed(bh);
if (journal->j_running_transaction && buffer_jbddirty(bh)) if (journal->j_running_transaction && buffer_jbddirty(bh))
jh->b_next_transaction = journal->j_running_transaction; jh->b_next_transaction = journal->j_running_transaction;
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd2_journal_put_journal_head(jh);
return 0; return 0;
} else { } else {
/* Good, the buffer belongs to the running transaction. /* Good, the buffer belongs to the running transaction.
@ -2331,11 +2364,10 @@ zap_buffer:
* here. * here.
*/ */
jh->b_modified = 0; jh->b_modified = 0;
jbd2_journal_put_journal_head(jh);
zap_buffer_no_jh:
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd2_journal_put_journal_head(jh);
zap_buffer_unlocked: zap_buffer_unlocked:
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
J_ASSERT_BH(bh, !buffer_jbddirty(bh)); J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@ -2422,7 +2454,7 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
int was_dirty = 0; int was_dirty = 0;
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); lockdep_assert_held(&jh->b_state_lock);
assert_spin_locked(&transaction->t_journal->j_list_lock); assert_spin_locked(&transaction->t_journal->j_list_lock);
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
@ -2484,11 +2516,11 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
void jbd2_journal_file_buffer(struct journal_head *jh, void jbd2_journal_file_buffer(struct journal_head *jh,
transaction_t *transaction, int jlist) transaction_t *transaction, int jlist)
{ {
jbd_lock_bh_state(jh2bh(jh)); spin_lock(&jh->b_state_lock);
spin_lock(&transaction->t_journal->j_list_lock); spin_lock(&transaction->t_journal->j_list_lock);
__jbd2_journal_file_buffer(jh, transaction, jlist); __jbd2_journal_file_buffer(jh, transaction, jlist);
spin_unlock(&transaction->t_journal->j_list_lock); spin_unlock(&transaction->t_journal->j_list_lock);
jbd_unlock_bh_state(jh2bh(jh)); spin_unlock(&jh->b_state_lock);
} }
/* /*
@ -2498,23 +2530,25 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
* buffer on that transaction's metadata list. * buffer on that transaction's metadata list.
* *
* Called under j_list_lock * Called under j_list_lock
* Called under jbd_lock_bh_state(jh2bh(jh)) * Called under jh->b_state_lock
* *
* jh and bh may be already free when this function returns * When this function returns true, there's no next transaction to refile to
* and the caller has to drop jh reference through
* jbd2_journal_put_journal_head().
*/ */
void __jbd2_journal_refile_buffer(struct journal_head *jh) bool __jbd2_journal_refile_buffer(struct journal_head *jh)
{ {
int was_dirty, jlist; int was_dirty, jlist;
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); lockdep_assert_held(&jh->b_state_lock);
if (jh->b_transaction) if (jh->b_transaction)
assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
/* If the buffer is now unused, just drop it. */ /* If the buffer is now unused, just drop it. */
if (jh->b_next_transaction == NULL) { if (jh->b_next_transaction == NULL) {
__jbd2_journal_unfile_buffer(jh); __jbd2_journal_unfile_buffer(jh);
return; return true;
} }
/* /*
@ -2542,6 +2576,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
if (was_dirty) if (was_dirty)
set_buffer_jbddirty(bh); set_buffer_jbddirty(bh);
return false;
} }
/* /*
@ -2552,16 +2587,15 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
*/ */
void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{ {
struct buffer_head *bh = jh2bh(jh); bool drop;
/* Get reference so that buffer cannot be freed before we unlock it */ spin_lock(&jh->b_state_lock);
get_bh(bh);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
__jbd2_journal_refile_buffer(jh); drop = __jbd2_journal_refile_buffer(jh);
jbd_unlock_bh_state(bh); spin_unlock(&jh->b_state_lock);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
__brelse(bh); if (drop)
jbd2_journal_put_journal_head(jh);
} }
/* /*

View File

@ -2288,9 +2288,9 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
int ret = 0; int ret = 0;
int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
if (handle->h_buffer_credits < credits) if (jbd2_handle_buffer_credits(handle) < credits)
ret = ocfs2_extend_trans(handle, ret = ocfs2_extend_trans(handle,
credits - handle->h_buffer_credits); credits - jbd2_handle_buffer_credits(handle));
return ret; return ret;
} }
@ -2367,7 +2367,7 @@ static int ocfs2_rotate_tree_right(handle_t *handle,
struct ocfs2_path *right_path, struct ocfs2_path *right_path,
struct ocfs2_path **ret_left_path) struct ocfs2_path **ret_left_path)
{ {
int ret, start, orig_credits = handle->h_buffer_credits; int ret, start, orig_credits = jbd2_handle_buffer_credits(handle);
u32 cpos; u32 cpos;
struct ocfs2_path *left_path = NULL; struct ocfs2_path *left_path = NULL;
struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
@ -3148,7 +3148,7 @@ static int ocfs2_rotate_tree_left(handle_t *handle,
struct ocfs2_path *path, struct ocfs2_path *path,
struct ocfs2_cached_dealloc_ctxt *dealloc) struct ocfs2_cached_dealloc_ctxt *dealloc)
{ {
int ret, orig_credits = handle->h_buffer_credits; int ret, orig_credits = jbd2_handle_buffer_credits(handle);
struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
struct ocfs2_extent_block *eb; struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el; struct ocfs2_extent_list *el;
@ -3386,8 +3386,8 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
right_path); right_path);
ret = ocfs2_extend_rotate_transaction(handle, subtree_index, ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
handle->h_buffer_credits, jbd2_handle_buffer_credits(handle),
right_path); right_path);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
@ -3548,8 +3548,8 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
right_path); right_path);
ret = ocfs2_extend_rotate_transaction(handle, subtree_index, ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
handle->h_buffer_credits, jbd2_handle_buffer_credits(handle),
left_path); left_path);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;
@ -3623,7 +3623,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
le16_to_cpu(el->l_next_free_rec) == 1) { le16_to_cpu(el->l_next_free_rec) == 1) {
/* extend credit for ocfs2_remove_rightmost_path */ /* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0, ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits, jbd2_handle_buffer_credits(handle),
right_path); right_path);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
@ -3669,7 +3669,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) { if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
/* extend credit for ocfs2_remove_rightmost_path */ /* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0, ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits, jbd2_handle_buffer_credits(handle),
path); path);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
@ -3725,7 +3725,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
/* extend credit for ocfs2_remove_rightmost_path */ /* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0, ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits, jbd2_handle_buffer_credits(handle),
path); path);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
@ -3755,7 +3755,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
/* extend credit for ocfs2_remove_rightmost_path */ /* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0, ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits, jbd2_handle_buffer_credits(handle),
path); path);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
@ -3799,7 +3799,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
if (ctxt->c_split_covers_rec) { if (ctxt->c_split_covers_rec) {
/* extend credit for ocfs2_remove_rightmost_path */ /* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0, ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits, jbd2_handle_buffer_credits(handle),
path); path);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
@ -5358,7 +5358,7 @@ static int ocfs2_truncate_rec(handle_t *handle,
if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
/* extend credit for ocfs2_remove_rightmost_path */ /* extend credit for ocfs2_remove_rightmost_path */
ret = ocfs2_extend_rotate_transaction(handle, 0, ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits, jbd2_handle_buffer_credits(handle),
path); path);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
@ -5427,8 +5427,8 @@ static int ocfs2_truncate_rec(handle_t *handle,
} }
ret = ocfs2_extend_rotate_transaction(handle, 0, ret = ocfs2_extend_rotate_transaction(handle, 0,
handle->h_buffer_credits, jbd2_handle_buffer_credits(handle),
path); path);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
goto out; goto out;

View File

@ -420,14 +420,14 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
if (!nblocks) if (!nblocks)
return 0; return 0;
old_nblocks = handle->h_buffer_credits; old_nblocks = jbd2_handle_buffer_credits(handle);
trace_ocfs2_extend_trans(old_nblocks, nblocks); trace_ocfs2_extend_trans(old_nblocks, nblocks);
#ifdef CONFIG_OCFS2_DEBUG_FS #ifdef CONFIG_OCFS2_DEBUG_FS
status = 1; status = 1;
#else #else
status = jbd2_journal_extend(handle, nblocks); status = jbd2_journal_extend(handle, nblocks, 0);
if (status < 0) { if (status < 0) {
mlog_errno(status); mlog_errno(status);
goto bail; goto bail;
@ -461,13 +461,13 @@ int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
BUG_ON(!handle); BUG_ON(!handle);
old_nblks = handle->h_buffer_credits; old_nblks = jbd2_handle_buffer_credits(handle);
trace_ocfs2_allocate_extend_trans(old_nblks, thresh); trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
if (old_nblks < thresh) if (old_nblks < thresh)
return 0; return 0;
status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA); status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA, 0);
if (status < 0) { if (status < 0) {
mlog_errno(status); mlog_errno(status);
goto bail; goto bail;

View File

@ -1252,6 +1252,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
int nr) int nr)
{ {
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct journal_head *jh;
int ret; int ret;
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
@ -1260,13 +1261,14 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
if (!buffer_jbd(bg_bh)) if (!buffer_jbd(bg_bh))
return 1; return 1;
jbd_lock_bh_state(bg_bh); jh = bh2jh(bg_bh);
bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; spin_lock(&jh->b_state_lock);
bg = (struct ocfs2_group_desc *) jh->b_committed_data;
if (bg) if (bg)
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
else else
ret = 1; ret = 1;
jbd_unlock_bh_state(bg_bh); spin_unlock(&jh->b_state_lock);
return ret; return ret;
} }
@ -2387,6 +2389,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
int status; int status;
unsigned int tmp; unsigned int tmp;
struct ocfs2_group_desc *undo_bg = NULL; struct ocfs2_group_desc *undo_bg = NULL;
struct journal_head *jh;
/* The caller got this descriptor from /* The caller got this descriptor from
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */ * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
@ -2405,10 +2408,10 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
goto bail; goto bail;
} }
jh = bh2jh(group_bh);
if (undo_fn) { if (undo_fn) {
jbd_lock_bh_state(group_bh); spin_lock(&jh->b_state_lock);
undo_bg = (struct ocfs2_group_desc *) undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data;
bh2jh(group_bh)->b_committed_data;
BUG_ON(!undo_bg); BUG_ON(!undo_bg);
} }
@ -2423,7 +2426,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
le16_add_cpu(&bg->bg_free_bits_count, num_bits); le16_add_cpu(&bg->bg_free_bits_count, num_bits);
if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
if (undo_fn) if (undo_fn)
jbd_unlock_bh_state(group_bh); spin_unlock(&jh->b_state_lock);
return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n", return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
(unsigned long long)le64_to_cpu(bg->bg_blkno), (unsigned long long)le64_to_cpu(bg->bg_blkno),
le16_to_cpu(bg->bg_bits), le16_to_cpu(bg->bg_bits),
@ -2432,7 +2435,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
} }
if (undo_fn) if (undo_fn)
jbd_unlock_bh_state(group_bh); spin_unlock(&jh->b_state_lock);
ocfs2_journal_dirty(handle, group_bh); ocfs2_journal_dirty(handle, group_bh);
bail: bail:

View File

@ -313,7 +313,6 @@ enum jbd_state_bits {
BH_Revoked, /* Has been revoked from the log */ BH_Revoked, /* Has been revoked from the log */
BH_RevokeValid, /* Revoked flag is valid */ BH_RevokeValid, /* Revoked flag is valid */
BH_JBDDirty, /* Is dirty but journaled */ BH_JBDDirty, /* Is dirty but journaled */
BH_State, /* Pins most journal_head state */
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
BH_Shadow, /* IO on shadow buffer is running */ BH_Shadow, /* IO on shadow buffer is running */
BH_Verified, /* Metadata block has been verified ok */ BH_Verified, /* Metadata block has been verified ok */
@ -342,26 +341,6 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
return bh->b_private; return bh->b_private;
} }
static inline void jbd_lock_bh_state(struct buffer_head *bh)
{
bit_spin_lock(BH_State, &bh->b_state);
}
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
{
return bit_spin_trylock(BH_State, &bh->b_state);
}
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
{
return bit_spin_is_locked(BH_State, &bh->b_state);
}
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
{
bit_spin_unlock(BH_State, &bh->b_state);
}
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
{ {
bit_spin_lock(BH_JournalHead, &bh->b_state); bit_spin_lock(BH_JournalHead, &bh->b_state);
@ -477,7 +456,9 @@ struct jbd2_revoke_table_s;
* @h_transaction: Which compound transaction is this update a part of? * @h_transaction: Which compound transaction is this update a part of?
* @h_journal: Which journal handle belongs to - used iff h_reserved set. * @h_journal: Which journal handle belongs to - used iff h_reserved set.
* @h_rsv_handle: Handle reserved for finishing the logical operation. * @h_rsv_handle: Handle reserved for finishing the logical operation.
* @h_buffer_credits: Number of remaining buffers we are allowed to dirty. * @h_total_credits: Number of remaining buffers we are allowed to add to
journal. These are dirty buffers and revoke descriptor blocks.
* @h_revoke_credits: Number of remaining revoke records available for handle
* @h_ref: Reference count on this handle. * @h_ref: Reference count on this handle.
* @h_err: Field for caller's use to track errors through large fs operations. * @h_err: Field for caller's use to track errors through large fs operations.
* @h_sync: Flag for sync-on-close. * @h_sync: Flag for sync-on-close.
@ -487,7 +468,8 @@ struct jbd2_revoke_table_s;
* @h_type: For handle statistics. * @h_type: For handle statistics.
* @h_line_no: For handle statistics. * @h_line_no: For handle statistics.
* @h_start_jiffies: Handle Start time. * @h_start_jiffies: Handle Start time.
* @h_requested_credits: Holds @h_buffer_credits after handle is started. * @h_requested_credits: Holds @h_total_credits after handle is started.
* @h_revoke_credits_requested: Holds @h_revoke_credits after handle is started.
* @saved_alloc_context: Saved context while transaction is open. * @saved_alloc_context: Saved context while transaction is open.
**/ **/
@ -504,7 +486,9 @@ struct jbd2_journal_handle
}; };
handle_t *h_rsv_handle; handle_t *h_rsv_handle;
int h_buffer_credits; int h_total_credits;
int h_revoke_credits;
int h_revoke_credits_requested;
int h_ref; int h_ref;
int h_err; int h_err;
@ -556,9 +540,9 @@ struct transaction_chp_stats_s {
* ->jbd_lock_bh_journal_head() (This is "innermost") * ->jbd_lock_bh_journal_head() (This is "innermost")
* *
* j_state_lock * j_state_lock
* ->jbd_lock_bh_state() * ->b_state_lock
* *
* jbd_lock_bh_state() * b_state_lock
* ->j_list_lock * ->j_list_lock
* *
* j_state_lock * j_state_lock
@ -681,11 +665,24 @@ struct transaction_s
atomic_t t_updates; atomic_t t_updates;
/* /*
* Number of buffers reserved for use by all handles in this transaction * Number of blocks reserved for this transaction in the journal.
* handle but not yet modified. [none] * This is including all credits reserved when starting transaction
* handles as well as all journal descriptor blocks needed for this
* transaction. [none]
*/ */
atomic_t t_outstanding_credits; atomic_t t_outstanding_credits;
/*
* Number of revoke records for this transaction added by already
* stopped handles. [none]
*/
atomic_t t_outstanding_revokes;
/*
* How many handles used this transaction? [none]
*/
atomic_t t_handle_count;
/* /*
* Forward and backward links for the circular list of all transactions * Forward and backward links for the circular list of all transactions
* awaiting checkpoint. [j_list_lock] * awaiting checkpoint. [j_list_lock]
@ -703,11 +700,6 @@ struct transaction_s
*/ */
ktime_t t_start_time; ktime_t t_start_time;
/*
* How many handles used this transaction? [none]
*/
atomic_t t_handle_count;
/* /*
* This transaction is being forced and some process is * This transaction is being forced and some process is
* waiting for it to finish. * waiting for it to finish.
@ -1024,6 +1016,13 @@ struct journal_s
*/ */
int j_max_transaction_buffers; int j_max_transaction_buffers;
/**
* @j_revoke_records_per_block:
*
* Number of revoke records that fit in one descriptor block.
*/
int j_revoke_records_per_block;
/** /**
* @j_commit_interval: * @j_commit_interval:
* *
@ -1257,7 +1256,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3)
/* Filing buffers */ /* Filing buffers */
extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
extern void __jbd2_journal_refile_buffer(struct journal_head *); extern bool __jbd2_journal_refile_buffer(struct journal_head *);
extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
extern void __journal_free_buffer(struct journal_head *bh); extern void __journal_free_buffer(struct journal_head *bh);
@ -1358,14 +1357,16 @@ static inline handle_t *journal_current_handle(void)
extern handle_t *jbd2_journal_start(journal_t *, int nblocks); extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
extern handle_t *jbd2__journal_start(journal_t *, int blocks, int rsv_blocks, extern handle_t *jbd2__journal_start(journal_t *, int blocks, int rsv_blocks,
gfp_t gfp_mask, unsigned int type, int revoke_records, gfp_t gfp_mask,
unsigned int line_no); unsigned int type, unsigned int line_no);
extern int jbd2_journal_restart(handle_t *, int nblocks); extern int jbd2_journal_restart(handle_t *, int nblocks);
extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask); extern int jbd2__journal_restart(handle_t *, int nblocks,
int revoke_records, gfp_t gfp_mask);
extern int jbd2_journal_start_reserved(handle_t *handle, extern int jbd2_journal_start_reserved(handle_t *handle,
unsigned int type, unsigned int line_no); unsigned int type, unsigned int line_no);
extern void jbd2_journal_free_reserved(handle_t *handle); extern void jbd2_journal_free_reserved(handle_t *handle);
extern int jbd2_journal_extend (handle_t *, int nblocks); extern int jbd2_journal_extend(handle_t *handle, int nblocks,
int revoke_records);
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
@ -1560,38 +1561,19 @@ static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
return journal->j_chksum_driver != NULL; return journal->j_chksum_driver != NULL;
} }
/*
* We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for
* transaction control blocks.
*/
#define JBD2_CONTROL_BLOCKS_SHIFT 5
/*
* Return the minimum number of blocks which must be free in the journal
* before a new transaction may be started. Must be called under j_state_lock.
*/
static inline int jbd2_space_needed(journal_t *journal)
{
int nblocks = journal->j_max_transaction_buffers;
return nblocks + (nblocks >> JBD2_CONTROL_BLOCKS_SHIFT);
}
/* /*
* Return number of free blocks in the log. Must be called under j_state_lock. * Return number of free blocks in the log. Must be called under j_state_lock.
*/ */
static inline unsigned long jbd2_log_space_left(journal_t *journal) static inline unsigned long jbd2_log_space_left(journal_t *journal)
{ {
/* Allow for rounding errors */ /* Allow for rounding errors */
unsigned long free = journal->j_free - 32; long free = journal->j_free - 32;
if (journal->j_committing_transaction) { if (journal->j_committing_transaction) {
unsigned long committing = atomic_read(&journal-> free -= atomic_read(&journal->
j_committing_transaction->t_outstanding_credits); j_committing_transaction->t_outstanding_credits);
/* Transaction + control blocks */
free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT);
} }
return free; return max_t(long, free, 0);
} }
/* /*
@ -1645,6 +1627,20 @@ static inline tid_t jbd2_get_latest_transaction(journal_t *journal)
return tid; return tid;
} }
static inline int jbd2_handle_buffer_credits(handle_t *handle)
{
journal_t *journal;
if (!handle->h_reserved)
journal = handle->h_transaction->t_journal;
else
journal = handle->h_journal;
return handle->h_total_credits -
DIV_ROUND_UP(handle->h_revoke_credits_requested,
journal->j_revoke_records_per_block);
}
#ifdef __KERNEL__ #ifdef __KERNEL__
#define buffer_trace_init(bh) do {} while (0) #define buffer_trace_init(bh) do {} while (0)

View File

@ -11,6 +11,8 @@
#ifndef JOURNAL_HEAD_H_INCLUDED #ifndef JOURNAL_HEAD_H_INCLUDED
#define JOURNAL_HEAD_H_INCLUDED #define JOURNAL_HEAD_H_INCLUDED
#include <linux/spinlock.h>
typedef unsigned int tid_t; /* Unique transaction ID */ typedef unsigned int tid_t; /* Unique transaction ID */
typedef struct transaction_s transaction_t; /* Compound transaction type */ typedef struct transaction_s transaction_t; /* Compound transaction type */
@ -23,6 +25,11 @@ struct journal_head {
*/ */
struct buffer_head *b_bh; struct buffer_head *b_bh;
/*
* Protect the buffer head state
*/
spinlock_t b_state_lock;
/* /*
* Reference count - see description in journal.c * Reference count - see description in journal.c
* [jbd_lock_bh_journal_head()] * [jbd_lock_bh_journal_head()]
@ -30,7 +37,7 @@ struct journal_head {
int b_jcount; int b_jcount;
/* /*
* Journalling list for this buffer [jbd_lock_bh_state()] * Journalling list for this buffer [b_state_lock]
* NOTE: We *cannot* combine this with b_modified into a bitfield * NOTE: We *cannot* combine this with b_modified into a bitfield
* as gcc would then (which the C standard allows but which is * as gcc would then (which the C standard allows but which is
* very unuseful) make 64-bit accesses to the bitfield and clobber * very unuseful) make 64-bit accesses to the bitfield and clobber
@ -41,20 +48,20 @@ struct journal_head {
/* /*
* This flag signals the buffer has been modified by * This flag signals the buffer has been modified by
* the currently running transaction * the currently running transaction
* [jbd_lock_bh_state()] * [b_state_lock]
*/ */
unsigned b_modified; unsigned b_modified;
/* /*
* Copy of the buffer data frozen for writing to the log. * Copy of the buffer data frozen for writing to the log.
* [jbd_lock_bh_state()] * [b_state_lock]
*/ */
char *b_frozen_data; char *b_frozen_data;
/* /*
* Pointer to a saved copy of the buffer containing no uncommitted * Pointer to a saved copy of the buffer containing no uncommitted
* deallocation references, so that allocations can avoid overwriting * deallocation references, so that allocations can avoid overwriting
* uncommitted deletes. [jbd_lock_bh_state()] * uncommitted deletes. [b_state_lock]
*/ */
char *b_committed_data; char *b_committed_data;
@ -63,7 +70,7 @@ struct journal_head {
* metadata: either the running transaction or the committing * metadata: either the running transaction or the committing
* transaction (if there is one). Only applies to buffers on a * transaction (if there is one). Only applies to buffers on a
* transaction's data or metadata journaling list. * transaction's data or metadata journaling list.
* [j_list_lock] [jbd_lock_bh_state()] * [j_list_lock] [b_state_lock]
* Either of these locks is enough for reading, both are needed for * Either of these locks is enough for reading, both are needed for
* changes. * changes.
*/ */
@ -73,13 +80,13 @@ struct journal_head {
* Pointer to the running compound transaction which is currently * Pointer to the running compound transaction which is currently
* modifying the buffer's metadata, if there was already a transaction * modifying the buffer's metadata, if there was already a transaction
* committing it when the new transaction touched it. * committing it when the new transaction touched it.
* [t_list_lock] [jbd_lock_bh_state()] * [t_list_lock] [b_state_lock]
*/ */
transaction_t *b_next_transaction; transaction_t *b_next_transaction;
/* /*
* Doubly-linked list of buffers on a transaction's data, metadata or * Doubly-linked list of buffers on a transaction's data, metadata or
* forget queue. [t_list_lock] [jbd_lock_bh_state()] * forget queue. [t_list_lock] [b_state_lock]
*/ */
struct journal_head *b_tnext, *b_tprev; struct journal_head *b_tnext, *b_tprev;

View File

@ -1746,15 +1746,16 @@ TRACE_EVENT(ext4_load_inode,
TRACE_EVENT(ext4_journal_start, TRACE_EVENT(ext4_journal_start,
TP_PROTO(struct super_block *sb, int blocks, int rsv_blocks, TP_PROTO(struct super_block *sb, int blocks, int rsv_blocks,
unsigned long IP), int revoke_creds, unsigned long IP),
TP_ARGS(sb, blocks, rsv_blocks, IP), TP_ARGS(sb, blocks, rsv_blocks, revoke_creds, IP),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
__field(unsigned long, ip ) __field(unsigned long, ip )
__field( int, blocks ) __field( int, blocks )
__field( int, rsv_blocks ) __field( int, rsv_blocks )
__field( int, revoke_creds )
), ),
TP_fast_assign( TP_fast_assign(
@ -1762,11 +1763,13 @@ TRACE_EVENT(ext4_journal_start,
__entry->ip = IP; __entry->ip = IP;
__entry->blocks = blocks; __entry->blocks = blocks;
__entry->rsv_blocks = rsv_blocks; __entry->rsv_blocks = rsv_blocks;
__entry->revoke_creds = revoke_creds;
), ),
TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pS", TP_printk("dev %d,%d blocks %d, rsv_blocks %d, revoke_creds %d, "
MAJOR(__entry->dev), MINOR(__entry->dev), "caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->blocks, __entry->rsv_blocks, (void *)__entry->ip) __entry->blocks, __entry->rsv_blocks, __entry->revoke_creds,
(void *)__entry->ip)
); );
TRACE_EVENT(ext4_journal_start_reserved, TRACE_EVENT(ext4_journal_start_reserved,

View File

@ -133,7 +133,7 @@ TRACE_EVENT(jbd2_submit_inode_data,
(unsigned long) __entry->ino) (unsigned long) __entry->ino)
); );
TRACE_EVENT(jbd2_handle_start, DECLARE_EVENT_CLASS(jbd2_handle_start_class,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
unsigned int line_no, int requested_blocks), unsigned int line_no, int requested_blocks),
@ -161,6 +161,20 @@ TRACE_EVENT(jbd2_handle_start,
__entry->type, __entry->line_no, __entry->requested_blocks) __entry->type, __entry->line_no, __entry->requested_blocks)
); );
DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_start,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
unsigned int line_no, int requested_blocks),
TP_ARGS(dev, tid, type, line_no, requested_blocks)
);
DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_restart,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
unsigned int line_no, int requested_blocks),
TP_ARGS(dev, tid, type, line_no, requested_blocks)
);
TRACE_EVENT(jbd2_handle_extend, TRACE_EVENT(jbd2_handle_extend,
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type, TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
unsigned int line_no, int buffer_credits, unsigned int line_no, int buffer_credits,