Merge branch 'delalloc-buffer-write' into dev
Fix a bug in how we update i_disksize, and the error path in inline_data_end. Finally, drop an unnecessary creation of a journal handle which was only needed for inline data, which can give us a large performance gain in delayed allocation writes. Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
commit
11ef08c9eb
@ -3603,9 +3603,6 @@ extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
|
||||
unsigned flags,
|
||||
struct page **pagep,
|
||||
void **fsdata);
|
||||
extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
|
||||
unsigned len, unsigned copied,
|
||||
struct page *page);
|
||||
extern int ext4_try_add_inline_entry(handle_t *handle,
|
||||
struct ext4_filename *fname,
|
||||
struct inode *dir, struct inode *inode);
|
||||
|
131
fs/ext4/inline.c
131
fs/ext4/inline.c
@ -733,45 +733,83 @@ convert:
|
||||
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
|
||||
unsigned copied, struct page *page)
|
||||
{
|
||||
int ret, no_expand;
|
||||
handle_t *handle = ext4_journal_current_handle();
|
||||
int no_expand;
|
||||
void *kaddr;
|
||||
struct ext4_iloc iloc;
|
||||
int ret = 0, ret2;
|
||||
|
||||
if (unlikely(copied < len)) {
|
||||
if (!PageUptodate(page)) {
|
||||
copied = 0;
|
||||
if (unlikely(copied < len) && !PageUptodate(page))
|
||||
copied = 0;
|
||||
|
||||
if (likely(copied)) {
|
||||
ret = ext4_get_inode_loc(inode, &iloc);
|
||||
if (ret) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
ext4_std_error(inode->i_sb, ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ext4_write_lock_xattr(inode, &no_expand);
|
||||
BUG_ON(!ext4_has_inline_data(inode));
|
||||
|
||||
ret = ext4_get_inode_loc(inode, &iloc);
|
||||
if (ret) {
|
||||
ext4_std_error(inode->i_sb, ret);
|
||||
copied = 0;
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* ei->i_inline_off may have changed since
|
||||
* ext4_write_begin() called
|
||||
* ext4_try_to_write_inline_data()
|
||||
*/
|
||||
(void) ext4_find_inline_data_nolock(inode);
|
||||
|
||||
ext4_write_lock_xattr(inode, &no_expand);
|
||||
BUG_ON(!ext4_has_inline_data(inode));
|
||||
kaddr = kmap_atomic(page);
|
||||
ext4_write_inline_data(inode, &iloc, kaddr, pos, copied);
|
||||
kunmap_atomic(kaddr);
|
||||
SetPageUptodate(page);
|
||||
/* clear page dirty so that writepages wouldn't work for us. */
|
||||
ClearPageDirty(page);
|
||||
|
||||
ext4_write_unlock_xattr(inode, &no_expand);
|
||||
brelse(iloc.bh);
|
||||
|
||||
/*
|
||||
* It's important to update i_size while still holding page
|
||||
* lock: page writeout could otherwise come in and zero
|
||||
* beyond i_size.
|
||||
*/
|
||||
ext4_update_inode_size(inode, pos + copied);
|
||||
}
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
|
||||
/*
|
||||
* ei->i_inline_off may have changed since ext4_write_begin()
|
||||
* called ext4_try_to_write_inline_data()
|
||||
* Don't mark the inode dirty under page lock. First, it unnecessarily
|
||||
* makes the holding time of page lock longer. Second, it forces lock
|
||||
* ordering of page lock and transaction start for journaling
|
||||
* filesystems.
|
||||
*/
|
||||
(void) ext4_find_inline_data_nolock(inode);
|
||||
|
||||
kaddr = kmap_atomic(page);
|
||||
ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
|
||||
kunmap_atomic(kaddr);
|
||||
SetPageUptodate(page);
|
||||
/* clear page dirty so that writepages wouldn't work for us. */
|
||||
ClearPageDirty(page);
|
||||
|
||||
ext4_write_unlock_xattr(inode, &no_expand);
|
||||
brelse(iloc.bh);
|
||||
mark_inode_dirty(inode);
|
||||
if (likely(copied))
|
||||
mark_inode_dirty(inode);
|
||||
out:
|
||||
return copied;
|
||||
/*
|
||||
* If we didn't copy as much data as expected, we need to trim back
|
||||
* size of xattr containing inline data.
|
||||
*/
|
||||
if (pos + len > inode->i_size && ext4_can_truncate(inode))
|
||||
ext4_orphan_add(handle, inode);
|
||||
|
||||
ret2 = ext4_journal_stop(handle);
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
if (pos + len > inode->i_size) {
|
||||
ext4_truncate_failed_write(inode);
|
||||
/*
|
||||
* If truncate failed early the inode might still be
|
||||
* on the orphan list; we need to make sure the inode
|
||||
* is removed from the orphan list in that case.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(NULL, inode);
|
||||
}
|
||||
return ret ? ret : copied;
|
||||
}
|
||||
|
||||
struct buffer_head *
|
||||
@ -953,43 +991,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
|
||||
unsigned len, unsigned copied,
|
||||
struct page *page)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = ext4_write_inline_data_end(inode, pos, len, copied, page);
|
||||
if (ret < 0) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
return ret;
|
||||
}
|
||||
copied = ret;
|
||||
|
||||
/*
|
||||
* No need to use i_size_read() here, the i_size
|
||||
* cannot change under us because we hold i_mutex.
|
||||
*
|
||||
* But it's important to update i_size while still holding page lock:
|
||||
* page writeout could otherwise come in and zero beyond i_size.
|
||||
*/
|
||||
if (pos+copied > inode->i_size)
|
||||
i_size_write(inode, pos+copied);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
|
||||
/*
|
||||
* Don't mark the inode dirty under page lock. First, it unnecessarily
|
||||
* makes the holding time of page lock longer. Second, it forces lock
|
||||
* ordering of page lock and transaction start for journaling
|
||||
* filesystems.
|
||||
*/
|
||||
mark_inode_dirty(inode);
|
||||
|
||||
return copied;
|
||||
}
|
||||
|
||||
#ifdef INLINE_DIR_DEBUG
|
||||
void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
|
||||
void *inline_start, int inline_size)
|
||||
|
152
fs/ext4/inode.c
152
fs/ext4/inode.c
@ -1284,22 +1284,14 @@ static int ext4_write_end(struct file *file,
|
||||
loff_t old_size = inode->i_size;
|
||||
int ret = 0, ret2;
|
||||
int i_size_changed = 0;
|
||||
int inline_data = ext4_has_inline_data(inode);
|
||||
bool verity = ext4_verity_in_progress(inode);
|
||||
|
||||
trace_ext4_write_end(inode, pos, len, copied);
|
||||
if (inline_data) {
|
||||
ret = ext4_write_inline_data_end(inode, pos, len,
|
||||
copied, page);
|
||||
if (ret < 0) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto errout;
|
||||
}
|
||||
copied = ret;
|
||||
} else
|
||||
copied = block_write_end(file, mapping, pos,
|
||||
len, copied, page, fsdata);
|
||||
|
||||
if (ext4_has_inline_data(inode))
|
||||
return ext4_write_inline_data_end(inode, pos, len, copied, page);
|
||||
|
||||
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
|
||||
/*
|
||||
* it's important to update i_size while still holding page lock:
|
||||
* page writeout could otherwise come in and zero beyond i_size.
|
||||
@ -1320,7 +1312,7 @@ static int ext4_write_end(struct file *file,
|
||||
* ordering of page lock and transaction start for journaling
|
||||
* filesystems.
|
||||
*/
|
||||
if (i_size_changed || inline_data)
|
||||
if (i_size_changed)
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
|
||||
if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
|
||||
@ -1329,7 +1321,7 @@ static int ext4_write_end(struct file *file,
|
||||
* inode->i_size. So truncate them
|
||||
*/
|
||||
ext4_orphan_add(handle, inode);
|
||||
errout:
|
||||
|
||||
ret2 = ext4_journal_stop(handle);
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
@ -1395,7 +1387,6 @@ static int ext4_journalled_write_end(struct file *file,
|
||||
int partial = 0;
|
||||
unsigned from, to;
|
||||
int size_changed = 0;
|
||||
int inline_data = ext4_has_inline_data(inode);
|
||||
bool verity = ext4_verity_in_progress(inode);
|
||||
|
||||
trace_ext4_journalled_write_end(inode, pos, len, copied);
|
||||
@ -1404,16 +1395,10 @@ static int ext4_journalled_write_end(struct file *file,
|
||||
|
||||
BUG_ON(!ext4_handle_valid(handle));
|
||||
|
||||
if (inline_data) {
|
||||
ret = ext4_write_inline_data_end(inode, pos, len,
|
||||
copied, page);
|
||||
if (ret < 0) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
goto errout;
|
||||
}
|
||||
copied = ret;
|
||||
} else if (unlikely(copied < len) && !PageUptodate(page)) {
|
||||
if (ext4_has_inline_data(inode))
|
||||
return ext4_write_inline_data_end(inode, pos, len, copied, page);
|
||||
|
||||
if (unlikely(copied < len) && !PageUptodate(page)) {
|
||||
copied = 0;
|
||||
ext4_journalled_zero_new_buffers(handle, inode, page, from, to);
|
||||
} else {
|
||||
@ -1436,7 +1421,7 @@ static int ext4_journalled_write_end(struct file *file,
|
||||
if (old_size < pos && !verity)
|
||||
pagecache_isize_extended(inode, old_size, pos);
|
||||
|
||||
if (size_changed || inline_data) {
|
||||
if (size_changed) {
|
||||
ret2 = ext4_mark_inode_dirty(handle, inode);
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
@ -1449,7 +1434,6 @@ static int ext4_journalled_write_end(struct file *file,
|
||||
*/
|
||||
ext4_orphan_add(handle, inode);
|
||||
|
||||
errout:
|
||||
ret2 = ext4_journal_stop(handle);
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
@ -2932,19 +2916,6 @@ static int ext4_nonda_switch(struct super_block *sb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We always reserve for an inode update; the superblock could be there too */
|
||||
static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
|
||||
{
|
||||
if (likely(ext4_has_feature_large_file(inode->i_sb)))
|
||||
return 1;
|
||||
|
||||
if (pos + len <= 0x7fffffffULL)
|
||||
return 1;
|
||||
|
||||
/* We might need to update the superblock to set LARGE_FILE */
|
||||
return 2;
|
||||
}
|
||||
|
||||
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata)
|
||||
@ -2953,7 +2924,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
|
||||
struct page *page;
|
||||
pgoff_t index;
|
||||
struct inode *inode = mapping->host;
|
||||
handle_t *handle;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
|
||||
return -EIO;
|
||||
@ -2979,41 +2949,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* grab_cache_page_write_begin() can take a long time if the
|
||||
* system is thrashing due to memory pressure, or if the page
|
||||
* is being written back. So grab it first before we start
|
||||
* the transaction handle. This also allows us to allocate
|
||||
* the page (if needed) without using GFP_NOFS.
|
||||
*/
|
||||
retry_grab:
|
||||
retry:
|
||||
page = grab_cache_page_write_begin(mapping, index, flags);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
unlock_page(page);
|
||||
|
||||
/*
|
||||
* With delayed allocation, we don't log the i_disksize update
|
||||
* if there is delayed block allocation. But we still need
|
||||
* to journalling the i_disksize update if writes to the end
|
||||
* of file which has an already mapped buffer.
|
||||
*/
|
||||
retry_journal:
|
||||
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
|
||||
ext4_da_write_credits(inode, pos, len));
|
||||
if (IS_ERR(handle)) {
|
||||
put_page(page);
|
||||
return PTR_ERR(handle);
|
||||
}
|
||||
|
||||
lock_page(page);
|
||||
if (page->mapping != mapping) {
|
||||
/* The page got truncated from under us */
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
ext4_journal_stop(handle);
|
||||
goto retry_grab;
|
||||
}
|
||||
/* In case writeback began while the page was unlocked */
|
||||
wait_for_stable_page(page);
|
||||
|
||||
@ -3025,20 +2965,18 @@ retry_journal:
|
||||
#endif
|
||||
if (ret < 0) {
|
||||
unlock_page(page);
|
||||
ext4_journal_stop(handle);
|
||||
put_page(page);
|
||||
/*
|
||||
* block_write_begin may have instantiated a few blocks
|
||||
* outside i_size. Trim these off again. Don't need
|
||||
* i_size_read because we hold i_mutex.
|
||||
* i_size_read because we hold inode lock.
|
||||
*/
|
||||
if (pos + len > inode->i_size)
|
||||
ext4_truncate_failed_write(inode);
|
||||
|
||||
if (ret == -ENOSPC &&
|
||||
ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||
goto retry_journal;
|
||||
|
||||
put_page(page);
|
||||
goto retry;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3075,8 +3013,6 @@ static int ext4_da_write_end(struct file *file,
|
||||
struct page *page, void *fsdata)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
int ret = 0, ret2;
|
||||
handle_t *handle = ext4_journal_current_handle();
|
||||
loff_t new_i_size;
|
||||
unsigned long start, end;
|
||||
int write_mode = (int)(unsigned long)fsdata;
|
||||
@ -3086,44 +3022,36 @@ static int ext4_da_write_end(struct file *file,
|
||||
len, copied, page, fsdata);
|
||||
|
||||
trace_ext4_da_write_end(inode, pos, len, copied);
|
||||
start = pos & (PAGE_SIZE - 1);
|
||||
end = start + copied - 1;
|
||||
|
||||
/*
|
||||
* generic_write_end() will run mark_inode_dirty() if i_size
|
||||
* changes. So let's piggyback the i_disksize mark_inode_dirty
|
||||
* into that.
|
||||
*/
|
||||
new_i_size = pos + copied;
|
||||
if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
|
||||
if (ext4_has_inline_data(inode) ||
|
||||
ext4_da_should_update_i_disksize(page, end)) {
|
||||
ext4_update_i_disksize(inode, new_i_size);
|
||||
/* We need to mark inode dirty even if
|
||||
* new_i_size is less that inode->i_size
|
||||
* bu greater than i_disksize.(hint delalloc)
|
||||
*/
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
}
|
||||
}
|
||||
|
||||
if (write_mode != CONVERT_INLINE_DATA &&
|
||||
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
|
||||
ext4_has_inline_data(inode))
|
||||
ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied,
|
||||
page);
|
||||
else
|
||||
ret2 = generic_write_end(file, mapping, pos, len, copied,
|
||||
page, fsdata);
|
||||
return ext4_write_inline_data_end(inode, pos, len, copied, page);
|
||||
|
||||
copied = ret2;
|
||||
if (ret2 < 0)
|
||||
ret = ret2;
|
||||
ret2 = ext4_journal_stop(handle);
|
||||
if (unlikely(ret2 && !ret))
|
||||
ret = ret2;
|
||||
start = pos & (PAGE_SIZE - 1);
|
||||
end = start + copied - 1;
|
||||
|
||||
return ret ? ret : copied;
|
||||
/*
|
||||
* Since we are holding inode lock, we are sure i_disksize <=
|
||||
* i_size. We also know that if i_disksize < i_size, there are
|
||||
* delalloc writes pending in the range upto i_size. If the end of
|
||||
* the current write is <= i_size, there's no need to touch
|
||||
* i_disksize since writeback will push i_disksize upto i_size
|
||||
* eventually. If the end of the current write is > i_size and
|
||||
* inside an allocated block (ext4_da_should_update_i_disksize()
|
||||
* check), we need to update i_disksize here as neither
|
||||
* ext4_writepage() nor certain ext4_writepages() paths not
|
||||
* allocating blocks update i_disksize.
|
||||
*
|
||||
* Note that we defer inode dirtying to generic_write_end() /
|
||||
* ext4_da_write_inline_data_end().
|
||||
*/
|
||||
new_i_size = pos + copied;
|
||||
if (copied && new_i_size > inode->i_size &&
|
||||
ext4_da_should_update_i_disksize(page, end))
|
||||
ext4_update_i_disksize(inode, new_i_size);
|
||||
|
||||
return generic_write_end(file, mapping, pos, len, copied, page, fsdata);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user