ext4: Fix possible deadlock between ext4_truncate() and ext4_get_blocks()
During truncate we are sometimes forced to start a new transaction as the amount of blocks to be journaled is both quite large and hard to predict. So far we restarted a transaction while holding i_data_sem and that violates lock ordering because i_data_sem ranks below a transaction start (and it can lead to a real deadlock with ext4_get_blocks() mapping blocks in some page while having a transaction open). We fix the problem by dropping the i_data_sem before restarting the transaction and acquire it afterwards. It's slightly subtle that this works: 1) By the time ext4_truncate() is called, all the page cache for the truncated part of the file is dropped so get_block() should not be called on it (we only have to invalidate extent cache after we reacquire i_data_sem because some extent from not-truncated part could extend also into the part we are going to truncate). 2) Writes, migrate or defrag hold i_mutex so they are stopped for all the time of the truncate. This bug has been found and analyzed by Theodore Tso <tytso@mit.edu>. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
parent
9599b0e597
commit
487caeef9f
@ -1370,6 +1370,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
|
||||
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
|
||||
extern int ext4_can_truncate(struct inode *inode);
|
||||
extern void ext4_truncate(struct inode *);
|
||||
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
|
||||
extern void ext4_set_inode_flags(struct inode *);
|
||||
extern void ext4_get_inode_flags(struct ext4_inode_info *);
|
||||
extern int ext4_alloc_da_blocks(struct inode *inode);
|
||||
|
@ -93,7 +93,9 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
|
||||
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
|
||||
}
|
||||
|
||||
static int ext4_ext_journal_restart(handle_t *handle, int needed)
|
||||
static int ext4_ext_truncate_extend_restart(handle_t *handle,
|
||||
struct inode *inode,
|
||||
int needed)
|
||||
{
|
||||
int err;
|
||||
|
||||
@ -104,7 +106,14 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
|
||||
err = ext4_journal_extend(handle, needed);
|
||||
if (err <= 0)
|
||||
return err;
|
||||
return ext4_journal_restart(handle, needed);
|
||||
err = ext4_truncate_restart_trans(handle, inode, needed);
|
||||
/*
|
||||
* We have dropped i_data_sem so someone might have cached again
|
||||
* an extent we are going to truncate.
|
||||
*/
|
||||
ext4_ext_invalidate_cache(inode);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2150,7 +2159,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
}
|
||||
credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
|
||||
|
||||
err = ext4_ext_journal_restart(handle, credits);
|
||||
err = ext4_ext_truncate_extend_restart(handle, inode, credits);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
|
@ -192,11 +192,24 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
|
||||
* so before we call here everything must be consistently dirtied against
|
||||
* this transaction.
|
||||
*/
|
||||
static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
|
||||
int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
|
||||
int nblocks)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Drop i_data_sem to avoid deadlock with ext4_get_blocks At this
|
||||
* moment, get_block can be called only for blocks inside i_size since
|
||||
* page cache has been already dropped and writes are blocked by
|
||||
* i_mutex. So we can safely drop the i_data_sem here.
|
||||
*/
|
||||
BUG_ON(EXT4_JOURNAL(inode) == NULL);
|
||||
jbd_debug(2, "restarting handle %p\n", handle);
|
||||
return ext4_journal_restart(handle, blocks_for_truncate(inode));
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3658,7 +3671,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_handle_dirty_metadata(handle, inode, bh);
|
||||
}
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_journal_test_restart(handle, inode);
|
||||
ext4_truncate_restart_trans(handle, inode,
|
||||
blocks_for_truncate(inode));
|
||||
if (bh) {
|
||||
BUFFER_TRACE(bh, "retaking write access");
|
||||
ext4_journal_get_write_access(handle, bh);
|
||||
@ -3869,7 +3883,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
|
||||
return;
|
||||
if (try_to_extend_transaction(handle, inode)) {
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_journal_test_restart(handle, inode);
|
||||
ext4_truncate_restart_trans(handle, inode,
|
||||
blocks_for_truncate(inode));
|
||||
}
|
||||
|
||||
ext4_free_blocks(handle, inode, nr, 1, 1);
|
||||
|
Loading…
Reference in New Issue
Block a user