btrfs: update btrfs_space_info's bytes_may_use timely
This patch can fix some false ENOSPC errors, below test script can
reproduce one false ENOSPC error:
#!/bin/bash
dd if=/dev/zero of=fs.img bs=$((1024*1024)) count=128
dev=$(losetup --show -f fs.img)
mkfs.btrfs -f -M $dev
mkdir /tmp/mntpoint
mount $dev /tmp/mntpoint
cd /tmp/mntpoint
xfs_io -f -c "falloc 0 $((64*1024*1024))" testfile
Above script will fail for ENOSPC reason, but indeed fs still has free
space to satisfy this request. Please see call graph:
btrfs_fallocate()
|-> btrfs_alloc_data_chunk_ondemand()
| bytes_may_use += 64M
|-> btrfs_prealloc_file_range()
|-> btrfs_reserve_extent()
|-> btrfs_add_reserved_bytes()
| alloc_type is RESERVE_ALLOC_NO_ACCOUNT, so it does not
| change bytes_may_use, and bytes_reserved += 64M. Now
| bytes_may_use + bytes_reserved == 128M, which is greater
| than btrfs_space_info's total_bytes, false enospc occurs.
| Note, the bytes_may_use decrease operation will be done in
| end of btrfs_fallocate(), which is too late.
Here is another simple case for buffered write:
CPU 1 | CPU 2
|
|-> cow_file_range() |-> __btrfs_buffered_write()
|-> btrfs_reserve_extent() | |
| | |
| | |
| ..... | |-> btrfs_check_data_free_space()
| |
| |
|-> extent_clear_unlock_delalloc() |
In CPU 1, btrfs_reserve_extent()->find_free_extent()->
btrfs_add_reserved_bytes() do not decrease bytes_may_use, the decrease
operation will be delayed to be done in extent_clear_unlock_delalloc().
Assume in this case, btrfs_reserve_extent() reserved 128MB data, CPU2's
btrfs_check_data_free_space() tries to reserve 100MB data space.
If
100MB > data_sinfo->total_bytes - data_sinfo->bytes_used -
data_sinfo->bytes_reserved - data_sinfo->bytes_pinned -
data_sinfo->bytes_readonly - data_sinfo->bytes_may_use
btrfs_check_data_free_space() will try to allcate new data chunk or call
btrfs_start_delalloc_roots(), or commit current transaction in order to
reserve some free space, obviously a lot of work. But indeed it's not
necessary as long as decreasing bytes_may_use timely, we still have
free space, decreasing 128M from bytes_may_use.
To fix this issue, this patch chooses to update bytes_may_use for both
data and metadata in btrfs_add_reserved_bytes(). For compress path, real
extent length may not be equal to file content length, so introduce a
ram_bytes argument for btrfs_reserve_extent(), find_free_extent() and
btrfs_add_reserved_bytes(), it's becasue bytes_may_use is increased by
file content length. Then compress path can update bytes_may_use
correctly. Also now we can discard RESERVE_ALLOC_NO_ACCOUNT, RESERVE_ALLOC
and RESERVE_FREE.
As we know, usually EXTENT_DO_ACCOUNTING is used for error path. In
run_delalloc_nocow(), for inode marked as NODATACOW or extent marked as
PREALLOC, we also need to update bytes_may_use, but can not pass
EXTENT_DO_ACCOUNTING, because it also clears metadata reservation, so
here we introduce EXTENT_CLEAR_DATA_RESV flag to indicate btrfs_clear_bit_hook()
to update btrfs_space_info's bytes_may_use.
Meanwhile __btrfs_prealloc_file_range() will call
btrfs_free_reserved_data_space() internally for both sucessful and failed
path, btrfs_prealloc_file_range()'s callers does not need to call
btrfs_free_reserved_data_space() any more.
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
committed by
Chris Mason
parent
4824f1f412
commit
18513091af
@@ -566,6 +566,8 @@ cont:
|
||||
PAGE_SET_WRITEBACK |
|
||||
page_error_op |
|
||||
PAGE_END_WRITEBACK);
|
||||
btrfs_free_reserved_data_space_noquota(inode, start,
|
||||
end - start + 1);
|
||||
goto free_pages_out;
|
||||
}
|
||||
}
|
||||
@@ -742,7 +744,7 @@ retry:
|
||||
lock_extent(io_tree, async_extent->start,
|
||||
async_extent->start + async_extent->ram_size - 1);
|
||||
|
||||
ret = btrfs_reserve_extent(root,
|
||||
ret = btrfs_reserve_extent(root, async_extent->ram_size,
|
||||
async_extent->compressed_size,
|
||||
async_extent->compressed_size,
|
||||
0, alloc_hint, &ins, 1, 1);
|
||||
@@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
EXTENT_DEFRAG, PAGE_UNLOCK |
|
||||
PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
|
||||
PAGE_END_WRITEBACK);
|
||||
|
||||
btrfs_free_reserved_data_space_noquota(inode, start,
|
||||
end - start + 1);
|
||||
*nr_written = *nr_written +
|
||||
(end - start + PAGE_SIZE) / PAGE_SIZE;
|
||||
*page_started = 1;
|
||||
@@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
unsigned long op;
|
||||
|
||||
cur_alloc_size = disk_num_bytes;
|
||||
ret = btrfs_reserve_extent(root, cur_alloc_size,
|
||||
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
|
||||
root->sectorsize, 0, alloc_hint,
|
||||
&ins, 1, 1);
|
||||
if (ret < 0)
|
||||
@@ -1489,8 +1492,10 @@ out_check:
|
||||
extent_clear_unlock_delalloc(inode, cur_offset,
|
||||
cur_offset + num_bytes - 1,
|
||||
locked_page, EXTENT_LOCKED |
|
||||
EXTENT_DELALLOC, PAGE_UNLOCK |
|
||||
PAGE_SET_PRIVATE2);
|
||||
EXTENT_DELALLOC |
|
||||
EXTENT_CLEAR_DATA_RESV,
|
||||
PAGE_UNLOCK | PAGE_SET_PRIVATE2);
|
||||
|
||||
if (!nolock && nocow)
|
||||
btrfs_end_write_no_snapshoting(root);
|
||||
cur_offset = extent_end;
|
||||
@@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode,
|
||||
return;
|
||||
|
||||
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
|
||||
&& do_list && !(state->state & EXTENT_NORESERVE))
|
||||
&& do_list && !(state->state & EXTENT_NORESERVE)
|
||||
&& (*bits & (EXTENT_DO_ACCOUNTING |
|
||||
EXTENT_CLEAR_DATA_RESV)))
|
||||
btrfs_free_reserved_data_space_noquota(inode,
|
||||
state->start, len);
|
||||
|
||||
@@ -7252,7 +7259,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
|
||||
int ret;
|
||||
|
||||
alloc_hint = get_extent_allocation_hint(inode, start, len);
|
||||
ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
|
||||
ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
|
||||
alloc_hint, &ins, 1, 1);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
@@ -7752,6 +7759,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
||||
ret = PTR_ERR(em2);
|
||||
goto unlock_err;
|
||||
}
|
||||
/*
|
||||
* For inode marked NODATACOW or extent marked PREALLOC,
|
||||
* use the existing or preallocated extent, so does not
|
||||
* need to adjust btrfs_space_info's bytes_may_use.
|
||||
*/
|
||||
btrfs_free_reserved_data_space_noquota(inode,
|
||||
start, len);
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
@@ -7786,7 +7800,6 @@ unlock:
|
||||
i_size_write(inode, start + len);
|
||||
|
||||
adjust_dio_outstanding_extents(inode, dio_data, len);
|
||||
btrfs_free_reserved_data_space(inode, start, len);
|
||||
WARN_ON(dio_data->reserve < len);
|
||||
dio_data->reserve -= len;
|
||||
dio_data->unsubmitted_oe_range_end = start + len;
|
||||
@@ -10306,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
u64 last_alloc = (u64)-1;
|
||||
int ret = 0;
|
||||
bool own_trans = true;
|
||||
u64 end = start + num_bytes - 1;
|
||||
|
||||
if (trans)
|
||||
own_trans = false;
|
||||
@@ -10327,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
* sized chunks.
|
||||
*/
|
||||
cur_bytes = min(cur_bytes, last_alloc);
|
||||
ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
|
||||
*alloc_hint, &ins, 1, 0);
|
||||
ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
|
||||
min_size, 0, *alloc_hint, &ins, 1, 0);
|
||||
if (ret) {
|
||||
if (own_trans)
|
||||
btrfs_end_transaction(trans, root);
|
||||
@@ -10414,6 +10428,9 @@ next:
|
||||
if (own_trans)
|
||||
btrfs_end_transaction(trans, root);
|
||||
}
|
||||
if (cur_offset < end)
|
||||
btrfs_free_reserved_data_space(inode, cur_offset,
|
||||
end - cur_offset + 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user