Various bug fixes for ext4. Perhaps the most serious bug fixed is one
which could cause file system corruptions when performing file punch operations. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.12 (GNU/Linux) iQIcBAABCAAGBQJQ374OAAoJENNvdpvBGATwEGAP/jKUwjQhBZiF0k9dg1kQ5eTz bdli4fy1vxrEMIOym8IZa4nBQJVCkArwRgjc28gCBD6k9u6X3GPa26vUydsoPfP6 odPdc9c9HtsbYQGuaq1SohID5HfjxHewTcUmCs4X4SpGcSurUcT7eQYWqSuIxFHR 0nKk8NO4EcWh2uqIoGPrc8QpSdor0DXXYYjZmHCeVLH1n6PyoMsnrFMfO9KqMLUL vNR54CX9n1GRTfAfJNkNzcwfs8IfNkDUyv5hFpDh15tLltogU0TqnlAl3vSeZGSx vVfhwHmQTK/bJyC3YaoRZqq9CQJVk2f/OTBpJDFY/USaapuitJd6vqbmh7NiRNAN LaKmFt99MPfwyjEhIA7+J0LCTraAxc536q43oWWK5dAJhWI7DW0lbHARVeQTixNy KJ1Lp0pmmz1mX8/lugOnK1SPBF525kTaoiz2bWqg4oQgn7mBzUlgj+EV22/6Rq83 TpKOKstl4BiZi8t5AhmFiwqtknCDiT5vUKQNy2kuM/oXtPJID/lM/TJbR5viYD3l AH3Ef7xj61CynFZ0oBeraGwtXc2BHJpJdWz+8uj0/VhFfC+uNUYapSLFwyiAVZKO xxaItT3ylfKpa0AWK6HBc2SLuL72SCHAPks06YKFtSyHtr5C8SCcafxU2DSOSi7K VrhkcH6STa77Br7a1ORt =9R/D -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 bug fixes from Ted Ts'o: "Various bug fixes for ext4. Perhaps the most serious bug fixed is one which could cause file system corruptions when performing file punch operations." * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: avoid hang when mounting non-journal filesystems with orphan list ext4: lock i_mutex when truncating orphan inodes ext4: do not try to write superblock on ro remount w/o journal ext4: include journal blocks in df overhead calcs ext4: remove unaligned AIO warning printk ext4: fix an incorrect comment about i_mutex ext4: fix deadlock in journal_unmap_buffer() ext4: split off ext4_journalled_invalidatepage() jbd2: fix assertion failure in jbd2_journal_flush() ext4: check dioread_nolock on remount ext4: fix extent tree corruption caused by hole punch
This commit is contained in:
commit
5439ca6b8f
@ -2226,13 +2226,14 @@ errout:
|
||||
* removes index from the index block.
|
||||
*/
|
||||
static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
|
||||
struct ext4_ext_path *path)
|
||||
struct ext4_ext_path *path, int depth)
|
||||
{
|
||||
int err;
|
||||
ext4_fsblk_t leaf;
|
||||
|
||||
/* free index block */
|
||||
path--;
|
||||
depth--;
|
||||
path = path + depth;
|
||||
leaf = ext4_idx_pblock(path->p_idx);
|
||||
if (unlikely(path->p_hdr->eh_entries == 0)) {
|
||||
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
|
||||
@ -2257,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
|
||||
|
||||
ext4_free_blocks(handle, inode, NULL, leaf, 1,
|
||||
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
|
||||
|
||||
while (--depth >= 0) {
|
||||
if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
|
||||
break;
|
||||
path--;
|
||||
err = ext4_ext_get_access(handle, inode, path);
|
||||
if (err)
|
||||
break;
|
||||
path->p_idx->ei_block = (path+1)->p_idx->ei_block;
|
||||
err = ext4_ext_dirty(handle, inode, path);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -2599,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
/* if this leaf is free, then we should
|
||||
* remove it from index block above */
|
||||
if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
|
||||
err = ext4_ext_rm_idx(handle, inode, path + depth);
|
||||
err = ext4_ext_rm_idx(handle, inode, path, depth);
|
||||
|
||||
out:
|
||||
return err;
|
||||
@ -2802,7 +2816,7 @@ again:
|
||||
/* index is empty, remove it;
|
||||
* handle must be already prepared by the
|
||||
* truncatei_leaf() */
|
||||
err = ext4_ext_rm_idx(handle, inode, path + i);
|
||||
err = ext4_ext_rm_idx(handle, inode, path, i);
|
||||
}
|
||||
/* root level has p_bh == NULL, brelse() eats this */
|
||||
brelse(path[i].p_bh);
|
||||
|
@ -108,14 +108,6 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
|
||||
/* Unaligned direct AIO must be serialized; see comment above */
|
||||
if (unaligned_aio) {
|
||||
static unsigned long unaligned_warn_time;
|
||||
|
||||
/* Warn about this once per day */
|
||||
if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
|
||||
ext4_msg(inode->i_sb, KERN_WARNING,
|
||||
"Unaligned AIO/DIO on inode %ld by %s; "
|
||||
"performance will be poor.",
|
||||
inode->i_ino, current->comm);
|
||||
mutex_lock(ext4_aio_mutex(inode));
|
||||
ext4_unwritten_wait(inode);
|
||||
}
|
||||
|
@ -109,8 +109,6 @@ static int __sync_inode(struct inode *inode, int datasync)
|
||||
*
|
||||
* What we do is just kick off a commit and wait on it. This will snapshot the
|
||||
* inode to disk.
|
||||
*
|
||||
* i_mutex lock is held when entering and exiting this function
|
||||
*/
|
||||
|
||||
int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
|
@ -2880,8 +2880,6 @@ static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offs
|
||||
|
||||
static void ext4_invalidatepage(struct page *page, unsigned long offset)
|
||||
{
|
||||
journal_t *journal = EXT4_JOURNAL(page->mapping->host);
|
||||
|
||||
trace_ext4_invalidatepage(page, offset);
|
||||
|
||||
/*
|
||||
@ -2889,16 +2887,34 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
|
||||
*/
|
||||
if (ext4_should_dioread_nolock(page->mapping->host))
|
||||
ext4_invalidatepage_free_endio(page, offset);
|
||||
|
||||
/* No journalling happens on data buffers when this function is used */
|
||||
WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
|
||||
|
||||
block_invalidatepage(page, offset);
|
||||
}
|
||||
|
||||
static int __ext4_journalled_invalidatepage(struct page *page,
|
||||
unsigned long offset)
|
||||
{
|
||||
journal_t *journal = EXT4_JOURNAL(page->mapping->host);
|
||||
|
||||
trace_ext4_journalled_invalidatepage(page, offset);
|
||||
|
||||
/*
|
||||
* If it's a full truncate we just forget about the pending dirtying
|
||||
*/
|
||||
if (offset == 0)
|
||||
ClearPageChecked(page);
|
||||
|
||||
if (journal)
|
||||
jbd2_journal_invalidatepage(journal, page, offset);
|
||||
else
|
||||
block_invalidatepage(page, offset);
|
||||
return jbd2_journal_invalidatepage(journal, page, offset);
|
||||
}
|
||||
|
||||
/* Wrapper for aops... */
|
||||
static void ext4_journalled_invalidatepage(struct page *page,
|
||||
unsigned long offset)
|
||||
{
|
||||
WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
|
||||
}
|
||||
|
||||
static int ext4_releasepage(struct page *page, gfp_t wait)
|
||||
@ -3264,7 +3280,7 @@ static const struct address_space_operations ext4_journalled_aops = {
|
||||
.write_end = ext4_journalled_write_end,
|
||||
.set_page_dirty = ext4_journalled_set_page_dirty,
|
||||
.bmap = ext4_bmap,
|
||||
.invalidatepage = ext4_invalidatepage,
|
||||
.invalidatepage = ext4_journalled_invalidatepage,
|
||||
.releasepage = ext4_releasepage,
|
||||
.direct_IO = ext4_direct_IO,
|
||||
.is_partially_uptodate = block_is_partially_uptodate,
|
||||
@ -4304,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
|
||||
* buffers that are attached to a page stradding i_size and are undergoing
|
||||
* commit. In that case we have to wait for commit to finish and try again.
|
||||
*/
|
||||
static void ext4_wait_for_tail_page_commit(struct inode *inode)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned offset;
|
||||
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||
tid_t commit_tid = 0;
|
||||
int ret;
|
||||
|
||||
offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
|
||||
/*
|
||||
* All buffers in the last page remain valid? Then there's nothing to
|
||||
* do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
|
||||
* blocksize case
|
||||
*/
|
||||
if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
|
||||
return;
|
||||
while (1) {
|
||||
page = find_lock_page(inode->i_mapping,
|
||||
inode->i_size >> PAGE_CACHE_SHIFT);
|
||||
if (!page)
|
||||
return;
|
||||
ret = __ext4_journalled_invalidatepage(page, offset);
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
if (ret != -EBUSY)
|
||||
return;
|
||||
commit_tid = 0;
|
||||
read_lock(&journal->j_state_lock);
|
||||
if (journal->j_committing_transaction)
|
||||
commit_tid = journal->j_committing_transaction->t_tid;
|
||||
read_unlock(&journal->j_state_lock);
|
||||
if (commit_tid)
|
||||
jbd2_log_wait_commit(journal, commit_tid);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_setattr()
|
||||
*
|
||||
@ -4417,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
}
|
||||
|
||||
if (attr->ia_valid & ATTR_SIZE) {
|
||||
if (attr->ia_size != i_size_read(inode)) {
|
||||
truncate_setsize(inode, attr->ia_size);
|
||||
/* Inode size will be reduced, wait for dio in flight.
|
||||
* Temporarily disable dioread_nolock to prevent
|
||||
* livelock. */
|
||||
if (attr->ia_size != inode->i_size) {
|
||||
loff_t oldsize = inode->i_size;
|
||||
|
||||
i_size_write(inode, attr->ia_size);
|
||||
/*
|
||||
* Blocks are going to be removed from the inode. Wait
|
||||
* for dio in flight. Temporarily disable
|
||||
* dioread_nolock to prevent livelock.
|
||||
*/
|
||||
if (orphan) {
|
||||
ext4_inode_block_unlocked_dio(inode);
|
||||
inode_dio_wait(inode);
|
||||
ext4_inode_resume_unlocked_dio(inode);
|
||||
if (!ext4_should_journal_data(inode)) {
|
||||
ext4_inode_block_unlocked_dio(inode);
|
||||
inode_dio_wait(inode);
|
||||
ext4_inode_resume_unlocked_dio(inode);
|
||||
} else
|
||||
ext4_wait_for_tail_page_commit(inode);
|
||||
}
|
||||
/*
|
||||
* Truncate pagecache after we've waited for commit
|
||||
* in data=journal mode to make pages freeable.
|
||||
*/
|
||||
truncate_pagecache(inode, oldsize, inode->i_size);
|
||||
}
|
||||
ext4_truncate(inode);
|
||||
}
|
||||
|
@ -2648,7 +2648,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
|
||||
struct ext4_iloc iloc;
|
||||
int err = 0;
|
||||
|
||||
if (!EXT4_SB(inode->i_sb)->s_journal)
|
||||
if ((!EXT4_SB(inode->i_sb)->s_journal) &&
|
||||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS))
|
||||
return 0;
|
||||
|
||||
mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
|
||||
|
@ -1645,9 +1645,7 @@ static int parse_options(char *options, struct super_block *sb,
|
||||
unsigned int *journal_ioprio,
|
||||
int is_remount)
|
||||
{
|
||||
#ifdef CONFIG_QUOTA
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
#endif
|
||||
char *p;
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
int token;
|
||||
@ -1696,6 +1694,16 @@ static int parse_options(char *options, struct super_block *sb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (test_opt(sb, DIOREAD_NOLOCK)) {
|
||||
int blocksize =
|
||||
BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
|
||||
|
||||
if (blocksize < PAGE_CACHE_SIZE) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
"dioread_nolock if block size != PAGE_SIZE");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -2212,7 +2220,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
|
||||
__func__, inode->i_ino, inode->i_size);
|
||||
jbd_debug(2, "truncating inode %lu to %lld bytes\n",
|
||||
inode->i_ino, inode->i_size);
|
||||
mutex_lock(&inode->i_mutex);
|
||||
ext4_truncate(inode);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
nr_truncates++;
|
||||
} else {
|
||||
ext4_msg(sb, KERN_DEBUG,
|
||||
@ -3223,6 +3233,10 @@ int ext4_calculate_overhead(struct super_block *sb)
|
||||
memset(buf, 0, PAGE_SIZE);
|
||||
cond_resched();
|
||||
}
|
||||
/* Add the journal blocks as well */
|
||||
if (sbi->s_journal)
|
||||
overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen);
|
||||
|
||||
sbi->s_overhead = overhead;
|
||||
smp_wmb();
|
||||
free_page((unsigned long) buf);
|
||||
@ -3436,15 +3450,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
clear_opt(sb, DELALLOC);
|
||||
}
|
||||
|
||||
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
|
||||
if (test_opt(sb, DIOREAD_NOLOCK)) {
|
||||
if (blocksize < PAGE_SIZE) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
"dioread_nolock if block size != PAGE_SIZE");
|
||||
goto failed_mount;
|
||||
}
|
||||
}
|
||||
|
||||
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
|
||||
(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
|
||||
|
||||
@ -3486,6 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
|
||||
goto failed_mount;
|
||||
|
||||
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
|
||||
if (blocksize < EXT4_MIN_BLOCK_SIZE ||
|
||||
blocksize > EXT4_MAX_BLOCK_SIZE) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
@ -4725,7 +4731,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
||||
}
|
||||
|
||||
ext4_setup_system_zone(sb);
|
||||
if (sbi->s_journal == NULL)
|
||||
if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
|
||||
ext4_commit_super(sb, 1);
|
||||
|
||||
#ifdef CONFIG_QUOTA
|
||||
|
@ -209,7 +209,8 @@ repeat:
|
||||
if (!new_transaction)
|
||||
goto alloc_transaction;
|
||||
write_lock(&journal->j_state_lock);
|
||||
if (!journal->j_running_transaction) {
|
||||
if (!journal->j_running_transaction &&
|
||||
!journal->j_barrier_count) {
|
||||
jbd2_get_transaction(journal, new_transaction);
|
||||
new_transaction = NULL;
|
||||
}
|
||||
@ -1839,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
|
||||
|
||||
BUFFER_TRACE(bh, "entry");
|
||||
|
||||
retry:
|
||||
/*
|
||||
* It is safe to proceed here without the j_list_lock because the
|
||||
* buffers cannot be stolen by try_to_free_buffers as long as we are
|
||||
@ -1934,14 +1934,11 @@ retry:
|
||||
* for commit and try again.
|
||||
*/
|
||||
if (partial_page) {
|
||||
tid_t tid = journal->j_committing_transaction->t_tid;
|
||||
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
jbd_unlock_bh_state(bh);
|
||||
write_unlock(&journal->j_state_lock);
|
||||
jbd2_log_wait_commit(journal, tid);
|
||||
goto retry;
|
||||
return -EBUSY;
|
||||
}
|
||||
/*
|
||||
* OK, buffer won't be reachable after truncate. We just set
|
||||
@ -2002,21 +1999,23 @@ zap_buffer_unlocked:
|
||||
* @page: page to flush
|
||||
* @offset: length of page to invalidate.
|
||||
*
|
||||
* Reap page buffers containing data after offset in page.
|
||||
*
|
||||
* Reap page buffers containing data after offset in page. Can return -EBUSY
|
||||
* if buffers are part of the committing transaction and the page is straddling
|
||||
* i_size. Caller then has to wait for current commit and try again.
|
||||
*/
|
||||
void jbd2_journal_invalidatepage(journal_t *journal,
|
||||
struct page *page,
|
||||
unsigned long offset)
|
||||
int jbd2_journal_invalidatepage(journal_t *journal,
|
||||
struct page *page,
|
||||
unsigned long offset)
|
||||
{
|
||||
struct buffer_head *head, *bh, *next;
|
||||
unsigned int curr_off = 0;
|
||||
int may_free = 1;
|
||||
int ret = 0;
|
||||
|
||||
if (!PageLocked(page))
|
||||
BUG();
|
||||
if (!page_has_buffers(page))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
/* We will potentially be playing with lists other than just the
|
||||
* data lists (especially for journaled data mode), so be
|
||||
@ -2030,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
|
||||
if (offset <= curr_off) {
|
||||
/* This block is wholly outside the truncation point */
|
||||
lock_buffer(bh);
|
||||
may_free &= journal_unmap_buffer(journal, bh,
|
||||
offset > 0);
|
||||
ret = journal_unmap_buffer(journal, bh, offset > 0);
|
||||
unlock_buffer(bh);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
may_free &= ret;
|
||||
}
|
||||
curr_off = next_off;
|
||||
bh = next;
|
||||
@ -2043,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
|
||||
if (may_free && try_to_free_buffers(page))
|
||||
J_ASSERT(!page_has_buffers(page));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *,
|
||||
extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
|
||||
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
|
||||
extern void journal_sync_buffer (struct buffer_head *);
|
||||
extern void jbd2_journal_invalidatepage(journal_t *,
|
||||
extern int jbd2_journal_invalidatepage(journal_t *,
|
||||
struct page *, unsigned long);
|
||||
extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
|
||||
extern int jbd2_journal_stop(handle_t *);
|
||||
|
@ -451,7 +451,7 @@ DEFINE_EVENT(ext4__page_op, ext4_releasepage,
|
||||
TP_ARGS(page)
|
||||
);
|
||||
|
||||
TRACE_EVENT(ext4_invalidatepage,
|
||||
DECLARE_EVENT_CLASS(ext4_invalidatepage_op,
|
||||
TP_PROTO(struct page *page, unsigned long offset),
|
||||
|
||||
TP_ARGS(page, offset),
|
||||
@ -477,6 +477,18 @@ TRACE_EVENT(ext4_invalidatepage,
|
||||
(unsigned long) __entry->index, __entry->offset)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage,
|
||||
TP_PROTO(struct page *page, unsigned long offset),
|
||||
|
||||
TP_ARGS(page, offset)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage,
|
||||
TP_PROTO(struct page *page, unsigned long offset),
|
||||
|
||||
TP_ARGS(page, offset)
|
||||
);
|
||||
|
||||
TRACE_EVENT(ext4_discard_blocks,
|
||||
TP_PROTO(struct super_block *sb, unsigned long long blk,
|
||||
unsigned long long count),
|
||||
|
Loading…
Reference in New Issue
Block a user