Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "There are some new user features and the usual load of invisible enhancements or cleanups. New features: - extend mount options to specify zlib compression level, -o compress=zlib:9 - v2 of ioctl "extent to inode mapping", addressing a usecase where we want to retrieve more but inaccurate results and do the postprocessing in userspace, aiding defragmentation or deduplication tools - populate compression heuristics logic, do data sampling and try to guess compressibility by: looking for repeated patterns, counting unique byte values and distribution, calculating Shannon entropy; this will need more benchmarking and possibly fine tuning, but the base should be good enough - enable indexing for btrfs as lower filesystem in overlayfs - speedup page cache readahead during send on large files Internal enhancements: - more sanity checks of b-tree items when reading them from disk - more EINVAL/EUCLEAN fixups, missing BLK_STS_* conversion, other errno or error handling fixes - remove some homegrown IO-related logic, that's been obsoleted by core block layer changes (batching, plug/unplug, own counters) - add ref-verify, optional debugging feature to verify extent reference accounting - simplify code handling outstanding extents, make it more clear where and how the accounting is done - make delalloc reservations per-inode, simplify the code and make the logic more straightforward - extensive cleanup of delayed refs code Notable fixes: - fix send ioctl on 32bit with 64bit kernel" * 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (102 commits) btrfs: Fix bug for misused dev_t when lookup in dev state hash table. Btrfs: heuristic: add Shannon entropy calculation Btrfs: heuristic: add byte core set calculation Btrfs: heuristic: add byte set calculation Btrfs: heuristic: add detection of repeated data patterns Btrfs: heuristic: implement sampling logic Btrfs: heuristic: add bucket and sample counters and other defines Btrfs: compression: separate heuristic/compression workspaces btrfs: move btrfs_truncate_block out of trans handle btrfs: don't call btrfs_start_delalloc_roots in flushoncommit btrfs: track refs in a rb_tree instead of a list btrfs: add a comp_refs() helper btrfs: switch args for comp_*_refs btrfs: make the delalloc block rsv per inode btrfs: add tracepoints for outstanding extents mods Btrfs: rework outstanding_extents btrfs: increase output size for LOGICAL_INO_V2 ioctl btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2 btrfs: add a flag to iterate_inodes_from_logical to find all extent refs for uncompressed extents btrfs: send: remove unused code ...
This commit is contained in:
commit
5cea7647e6
@ -91,3 +91,14 @@ config BTRFS_ASSERT
|
||||
any of the assertions trip. This is meant for btrfs developers only.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config BTRFS_FS_REF_VERIFY
|
||||
bool "Btrfs with the ref verify tool compiled in"
|
||||
depends on BTRFS_FS
|
||||
default n
|
||||
help
|
||||
Enable run-time extent reference verification instrumentation. This
|
||||
is meant to be used by btrfs developers for tracking down extent
|
||||
reference problems or verifying they didn't break something.
|
||||
|
||||
If unsure, say N.
|
||||
|
@ -10,10 +10,11 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
|
||||
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
|
||||
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
||||
uuid-tree.o props.o hash.o free-space-tree.o
|
||||
uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
|
||||
|
||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
|
||||
|
||||
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
|
||||
tests/extent-buffer-tests.o tests/btrfs-tests.o \
|
||||
|
@ -67,7 +67,7 @@ struct btrfs_workqueue {
|
||||
static void normal_work_helper(struct btrfs_work *work);
|
||||
|
||||
#define BTRFS_WORK_HELPER(name) \
|
||||
void btrfs_##name(struct work_struct *arg) \
|
||||
noinline_for_stack void btrfs_##name(struct work_struct *arg) \
|
||||
{ \
|
||||
struct btrfs_work *work = container_of(arg, struct btrfs_work, \
|
||||
normal_work); \
|
||||
|
@ -40,12 +40,14 @@ static int check_extent_in_eb(const struct btrfs_key *key,
|
||||
const struct extent_buffer *eb,
|
||||
const struct btrfs_file_extent_item *fi,
|
||||
u64 extent_item_pos,
|
||||
struct extent_inode_elem **eie)
|
||||
struct extent_inode_elem **eie,
|
||||
bool ignore_offset)
|
||||
{
|
||||
u64 offset = 0;
|
||||
struct extent_inode_elem *e;
|
||||
|
||||
if (!btrfs_file_extent_compression(eb, fi) &&
|
||||
if (!ignore_offset &&
|
||||
!btrfs_file_extent_compression(eb, fi) &&
|
||||
!btrfs_file_extent_encryption(eb, fi) &&
|
||||
!btrfs_file_extent_other_encoding(eb, fi)) {
|
||||
u64 data_offset;
|
||||
@ -84,7 +86,8 @@ static void free_inode_elem_list(struct extent_inode_elem *eie)
|
||||
|
||||
static int find_extent_in_eb(const struct extent_buffer *eb,
|
||||
u64 wanted_disk_byte, u64 extent_item_pos,
|
||||
struct extent_inode_elem **eie)
|
||||
struct extent_inode_elem **eie,
|
||||
bool ignore_offset)
|
||||
{
|
||||
u64 disk_byte;
|
||||
struct btrfs_key key;
|
||||
@ -113,7 +116,7 @@ static int find_extent_in_eb(const struct extent_buffer *eb,
|
||||
if (disk_byte != wanted_disk_byte)
|
||||
continue;
|
||||
|
||||
ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie);
|
||||
ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
@ -419,7 +422,7 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
|
||||
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
|
||||
struct ulist *parents, struct prelim_ref *ref,
|
||||
int level, u64 time_seq, const u64 *extent_item_pos,
|
||||
u64 total_refs)
|
||||
u64 total_refs, bool ignore_offset)
|
||||
{
|
||||
int ret = 0;
|
||||
int slot;
|
||||
@ -472,7 +475,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
|
||||
if (extent_item_pos) {
|
||||
ret = check_extent_in_eb(&key, eb, fi,
|
||||
*extent_item_pos,
|
||||
&eie);
|
||||
&eie, ignore_offset);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
@ -510,7 +513,8 @@ next:
|
||||
static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path, u64 time_seq,
|
||||
struct prelim_ref *ref, struct ulist *parents,
|
||||
const u64 *extent_item_pos, u64 total_refs)
|
||||
const u64 *extent_item_pos, u64 total_refs,
|
||||
bool ignore_offset)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_key root_key;
|
||||
@ -581,7 +585,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
ret = add_all_parents(root, path, parents, ref, level, time_seq,
|
||||
extent_item_pos, total_refs);
|
||||
extent_item_pos, total_refs, ignore_offset);
|
||||
out:
|
||||
path->lowest_level = 0;
|
||||
btrfs_release_path(path);
|
||||
@ -616,7 +620,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path, u64 time_seq,
|
||||
struct preftrees *preftrees,
|
||||
const u64 *extent_item_pos, u64 total_refs,
|
||||
struct share_check *sc)
|
||||
struct share_check *sc, bool ignore_offset)
|
||||
{
|
||||
int err;
|
||||
int ret = 0;
|
||||
@ -661,7 +665,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
err = resolve_indirect_ref(fs_info, path, time_seq, ref,
|
||||
parents, extent_item_pos,
|
||||
total_refs);
|
||||
total_refs, ignore_offset);
|
||||
/*
|
||||
* we can only tolerate ENOENT,otherwise,we should catch error
|
||||
* and return directly.
|
||||
@ -769,6 +773,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key tmp_op_key;
|
||||
struct btrfs_key *op_key = NULL;
|
||||
struct rb_node *n;
|
||||
int count;
|
||||
int ret = 0;
|
||||
|
||||
@ -778,7 +783,9 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
spin_lock(&head->lock);
|
||||
list_for_each_entry(node, &head->ref_list, list) {
|
||||
for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
|
||||
node = rb_entry(n, struct btrfs_delayed_ref_node,
|
||||
ref_node);
|
||||
if (node->seq > seq)
|
||||
continue;
|
||||
|
||||
@ -1107,13 +1114,17 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
|
||||
*
|
||||
* Otherwise this returns 0 for success and <0 for an error.
|
||||
*
|
||||
* If ignore_offset is set to false, only extent refs whose offsets match
|
||||
* extent_item_pos are returned. If true, every extent ref is returned
|
||||
* and extent_item_pos is ignored.
|
||||
*
|
||||
* FIXME some caching might speed things up
|
||||
*/
|
||||
static int find_parent_nodes(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist *refs,
|
||||
struct ulist *roots, const u64 *extent_item_pos,
|
||||
struct share_check *sc)
|
||||
struct share_check *sc, bool ignore_offset)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
struct btrfs_path *path;
|
||||
@ -1178,7 +1189,7 @@ again:
|
||||
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
|
||||
if (head) {
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
refcount_inc(&head->node.refs);
|
||||
refcount_inc(&head->refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
btrfs_release_path(path);
|
||||
@ -1189,7 +1200,7 @@ again:
|
||||
*/
|
||||
mutex_lock(&head->mutex);
|
||||
mutex_unlock(&head->mutex);
|
||||
btrfs_put_delayed_ref(&head->node);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
goto again;
|
||||
}
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
@ -1235,7 +1246,7 @@ again:
|
||||
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root));
|
||||
|
||||
ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
|
||||
extent_item_pos, total_refs, sc);
|
||||
extent_item_pos, total_refs, sc, ignore_offset);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -1282,7 +1293,7 @@ again:
|
||||
btrfs_tree_read_lock(eb);
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
ret = find_extent_in_eb(eb, bytenr,
|
||||
*extent_item_pos, &eie);
|
||||
*extent_item_pos, &eie, ignore_offset);
|
||||
btrfs_tree_read_unlock_blocking(eb);
|
||||
free_extent_buffer(eb);
|
||||
if (ret < 0)
|
||||
@ -1350,7 +1361,7 @@ static void free_leaf_list(struct ulist *blocks)
|
||||
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist **leafs,
|
||||
const u64 *extent_item_pos)
|
||||
const u64 *extent_item_pos, bool ignore_offset)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -1359,7 +1370,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
|
||||
return -ENOMEM;
|
||||
|
||||
ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
|
||||
*leafs, NULL, extent_item_pos, NULL);
|
||||
*leafs, NULL, extent_item_pos, NULL, ignore_offset);
|
||||
if (ret < 0 && ret != -ENOENT) {
|
||||
free_leaf_list(*leafs);
|
||||
return ret;
|
||||
@ -1383,7 +1394,8 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist **roots)
|
||||
u64 time_seq, struct ulist **roots,
|
||||
bool ignore_offset)
|
||||
{
|
||||
struct ulist *tmp;
|
||||
struct ulist_node *node = NULL;
|
||||
@ -1402,7 +1414,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while (1) {
|
||||
ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
|
||||
tmp, *roots, NULL, NULL);
|
||||
tmp, *roots, NULL, NULL, ignore_offset);
|
||||
if (ret < 0 && ret != -ENOENT) {
|
||||
ulist_free(tmp);
|
||||
ulist_free(*roots);
|
||||
@ -1421,14 +1433,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
|
||||
|
||||
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist **roots)
|
||||
u64 time_seq, struct ulist **roots,
|
||||
bool ignore_offset)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!trans)
|
||||
down_read(&fs_info->commit_root_sem);
|
||||
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
|
||||
time_seq, roots);
|
||||
time_seq, roots, ignore_offset);
|
||||
if (!trans)
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
return ret;
|
||||
@ -1483,7 +1496,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while (1) {
|
||||
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
|
||||
roots, NULL, &shared);
|
||||
roots, NULL, &shared, false);
|
||||
if (ret == BACKREF_FOUND_SHARED) {
|
||||
/* this is the only condition under which we return 1 */
|
||||
ret = 1;
|
||||
@ -1877,7 +1890,8 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
|
||||
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
|
||||
u64 extent_item_objectid, u64 extent_item_pos,
|
||||
int search_commit_root,
|
||||
iterate_extent_inodes_t *iterate, void *ctx)
|
||||
iterate_extent_inodes_t *iterate, void *ctx,
|
||||
bool ignore_offset)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_trans_handle *trans = NULL;
|
||||
@ -1903,14 +1917,15 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
|
||||
|
||||
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
|
||||
tree_mod_seq_elem.seq, &refs,
|
||||
&extent_item_pos);
|
||||
&extent_item_pos, ignore_offset);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ULIST_ITER_INIT(&ref_uiter);
|
||||
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
|
||||
ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val,
|
||||
tree_mod_seq_elem.seq, &roots);
|
||||
tree_mod_seq_elem.seq, &roots,
|
||||
ignore_offset);
|
||||
if (ret)
|
||||
break;
|
||||
ULIST_ITER_INIT(&root_uiter);
|
||||
@ -1943,7 +1958,8 @@ out:
|
||||
|
||||
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
iterate_extent_inodes_t *iterate, void *ctx)
|
||||
iterate_extent_inodes_t *iterate, void *ctx,
|
||||
bool ignore_offset)
|
||||
{
|
||||
int ret;
|
||||
u64 extent_item_pos;
|
||||
@ -1961,7 +1977,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
||||
extent_item_pos = logical - found_key.objectid;
|
||||
ret = iterate_extent_inodes(fs_info, found_key.objectid,
|
||||
extent_item_pos, search_commit_root,
|
||||
iterate, ctx);
|
||||
iterate, ctx, ignore_offset);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -43,17 +43,19 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
|
||||
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
|
||||
u64 extent_item_objectid,
|
||||
u64 extent_offset, int search_commit_root,
|
||||
iterate_extent_inodes_t *iterate, void *ctx);
|
||||
iterate_extent_inodes_t *iterate, void *ctx,
|
||||
bool ignore_offset);
|
||||
|
||||
int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
iterate_extent_inodes_t *iterate, void *ctx);
|
||||
iterate_extent_inodes_t *iterate, void *ctx,
|
||||
bool ignore_offset);
|
||||
|
||||
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
|
||||
|
||||
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist **roots);
|
||||
u64 time_seq, struct ulist **roots, bool ignore_offset);
|
||||
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
|
||||
u32 name_len, unsigned long name_off,
|
||||
struct extent_buffer *eb_in, u64 parent,
|
||||
|
@ -36,14 +36,13 @@
|
||||
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
|
||||
#define BTRFS_INODE_DUMMY 2
|
||||
#define BTRFS_INODE_IN_DEFRAG 3
|
||||
#define BTRFS_INODE_DELALLOC_META_RESERVED 4
|
||||
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
|
||||
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
|
||||
#define BTRFS_INODE_NEEDS_FULL_SYNC 7
|
||||
#define BTRFS_INODE_COPY_EVERYTHING 8
|
||||
#define BTRFS_INODE_IN_DELALLOC_LIST 9
|
||||
#define BTRFS_INODE_READDIO_NEED_LOCK 10
|
||||
#define BTRFS_INODE_HAS_PROPS 11
|
||||
#define BTRFS_INODE_HAS_ORPHAN_ITEM 4
|
||||
#define BTRFS_INODE_HAS_ASYNC_EXTENT 5
|
||||
#define BTRFS_INODE_NEEDS_FULL_SYNC 6
|
||||
#define BTRFS_INODE_COPY_EVERYTHING 7
|
||||
#define BTRFS_INODE_IN_DELALLOC_LIST 8
|
||||
#define BTRFS_INODE_READDIO_NEED_LOCK 9
|
||||
#define BTRFS_INODE_HAS_PROPS 10
|
||||
|
||||
/* in memory btrfs inode */
|
||||
struct btrfs_inode {
|
||||
@ -176,7 +175,8 @@ struct btrfs_inode {
|
||||
* of extent items we've reserved metadata for.
|
||||
*/
|
||||
unsigned outstanding_extents;
|
||||
unsigned reserved_extents;
|
||||
|
||||
struct btrfs_block_rsv block_rsv;
|
||||
|
||||
/*
|
||||
* Cached values of inode properties
|
||||
@ -267,6 +267,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
|
||||
int mod)
|
||||
{
|
||||
lockdep_assert_held(&inode->lock);
|
||||
inode->outstanding_extents += mod;
|
||||
if (btrfs_is_free_space_inode(inode))
|
||||
return;
|
||||
trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
|
||||
mod);
|
||||
}
|
||||
|
||||
static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
|
||||
{
|
||||
int ret = 0;
|
||||
|
@ -613,7 +613,7 @@ static void btrfsic_dev_state_hashtable_add(
|
||||
struct btrfsic_dev_state_hashtable *h)
|
||||
{
|
||||
const unsigned int hashval =
|
||||
(((unsigned int)((uintptr_t)ds->bdev)) &
|
||||
(((unsigned int)((uintptr_t)ds->bdev->bd_dev)) &
|
||||
(BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
|
||||
|
||||
list_add(&ds->collision_resolving_node, h->table + hashval);
|
||||
@ -2803,7 +2803,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
|
||||
mutex_lock(&btrfsic_mutex);
|
||||
/* since btrfsic_submit_bio() is also called before
|
||||
* btrfsic_mount(), this might return NULL */
|
||||
dev_state = btrfsic_dev_state_lookup(bio_dev(bio));
|
||||
dev_state = btrfsic_dev_state_lookup(bio_dev(bio) + bio->bi_partno);
|
||||
if (NULL != dev_state &&
|
||||
(bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) {
|
||||
unsigned int i = 0;
|
||||
@ -2913,7 +2913,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
|
||||
state = kvzalloc(sizeof(*state), GFP_KERNEL);
|
||||
if (!state) {
|
||||
pr_info("btrfs check-integrity: allocation failed!\n");
|
||||
return -1;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (!btrfsic_is_initialized) {
|
||||
@ -2945,7 +2945,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
|
||||
if (NULL == ds) {
|
||||
pr_info("btrfs check-integrity: kmalloc() failed!\n");
|
||||
mutex_unlock(&btrfsic_mutex);
|
||||
return -1;
|
||||
return -ENOMEM;
|
||||
}
|
||||
ds->bdev = device->bdev;
|
||||
ds->state = state;
|
||||
|
@ -33,6 +33,8 @@
|
||||
#include <linux/bit_spinlock.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/log2.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
@ -255,7 +257,8 @@ static void end_compressed_bio_write(struct bio *bio)
|
||||
cb->start,
|
||||
cb->start + cb->len - 1,
|
||||
NULL,
|
||||
bio->bi_status ? 0 : 1);
|
||||
bio->bi_status ?
|
||||
BLK_STS_OK : BLK_STS_NOTSUPP);
|
||||
cb->compressed_pages[0]->mapping = NULL;
|
||||
|
||||
end_compressed_writeback(inode, cb);
|
||||
@ -706,7 +709,86 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct {
|
||||
/*
|
||||
* Heuristic uses systematic sampling to collect data from the input data
|
||||
* range, the logic can be tuned by the following constants:
|
||||
*
|
||||
* @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample
|
||||
* @SAMPLING_INTERVAL - range from which the sampled data can be collected
|
||||
*/
|
||||
#define SAMPLING_READ_SIZE (16)
|
||||
#define SAMPLING_INTERVAL (256)
|
||||
|
||||
/*
|
||||
* For statistical analysis of the input data we consider bytes that form a
|
||||
* Galois Field of 256 objects. Each object has an attribute count, ie. how
|
||||
* many times the object appeared in the sample.
|
||||
*/
|
||||
#define BUCKET_SIZE (256)
|
||||
|
||||
/*
|
||||
* The size of the sample is based on a statistical sampling rule of thumb.
|
||||
* The common way is to perform sampling tests as long as the number of
|
||||
* elements in each cell is at least 5.
|
||||
*
|
||||
* Instead of 5, we choose 32 to obtain more accurate results.
|
||||
* If the data contain the maximum number of symbols, which is 256, we obtain a
|
||||
* sample size bound by 8192.
|
||||
*
|
||||
* For a sample of at most 8KB of data per data range: 16 consecutive bytes
|
||||
* from up to 512 locations.
|
||||
*/
|
||||
#define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED * \
|
||||
SAMPLING_READ_SIZE / SAMPLING_INTERVAL)
|
||||
|
||||
struct bucket_item {
|
||||
u32 count;
|
||||
};
|
||||
|
||||
struct heuristic_ws {
|
||||
/* Partial copy of input data */
|
||||
u8 *sample;
|
||||
u32 sample_size;
|
||||
/* Buckets store counters for each byte value */
|
||||
struct bucket_item *bucket;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static void free_heuristic_ws(struct list_head *ws)
|
||||
{
|
||||
struct heuristic_ws *workspace;
|
||||
|
||||
workspace = list_entry(ws, struct heuristic_ws, list);
|
||||
|
||||
kvfree(workspace->sample);
|
||||
kfree(workspace->bucket);
|
||||
kfree(workspace);
|
||||
}
|
||||
|
||||
static struct list_head *alloc_heuristic_ws(void)
|
||||
{
|
||||
struct heuristic_ws *ws;
|
||||
|
||||
ws = kzalloc(sizeof(*ws), GFP_KERNEL);
|
||||
if (!ws)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL);
|
||||
if (!ws->sample)
|
||||
goto fail;
|
||||
|
||||
ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL);
|
||||
if (!ws->bucket)
|
||||
goto fail;
|
||||
|
||||
INIT_LIST_HEAD(&ws->list);
|
||||
return &ws->list;
|
||||
fail:
|
||||
free_heuristic_ws(&ws->list);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
struct workspaces_list {
|
||||
struct list_head idle_ws;
|
||||
spinlock_t ws_lock;
|
||||
/* Number of free workspaces */
|
||||
@ -715,7 +797,11 @@ static struct {
|
||||
atomic_t total_ws;
|
||||
/* Waiters for a free workspace */
|
||||
wait_queue_head_t ws_wait;
|
||||
} btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
|
||||
};
|
||||
|
||||
static struct workspaces_list btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
|
||||
|
||||
static struct workspaces_list btrfs_heuristic_ws;
|
||||
|
||||
static const struct btrfs_compress_op * const btrfs_compress_op[] = {
|
||||
&btrfs_zlib_compress,
|
||||
@ -725,11 +811,25 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
|
||||
|
||||
void __init btrfs_init_compress(void)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
|
||||
struct list_head *workspace;
|
||||
INIT_LIST_HEAD(&btrfs_heuristic_ws.idle_ws);
|
||||
spin_lock_init(&btrfs_heuristic_ws.ws_lock);
|
||||
atomic_set(&btrfs_heuristic_ws.total_ws, 0);
|
||||
init_waitqueue_head(&btrfs_heuristic_ws.ws_wait);
|
||||
|
||||
workspace = alloc_heuristic_ws();
|
||||
if (IS_ERR(workspace)) {
|
||||
pr_warn(
|
||||
"BTRFS: cannot preallocate heuristic workspace, will try later\n");
|
||||
} else {
|
||||
atomic_set(&btrfs_heuristic_ws.total_ws, 1);
|
||||
btrfs_heuristic_ws.free_ws = 1;
|
||||
list_add(workspace, &btrfs_heuristic_ws.idle_ws);
|
||||
}
|
||||
|
||||
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
|
||||
INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
|
||||
spin_lock_init(&btrfs_comp_ws[i].ws_lock);
|
||||
atomic_set(&btrfs_comp_ws[i].total_ws, 0);
|
||||
@ -756,18 +856,32 @@ void __init btrfs_init_compress(void)
|
||||
* Preallocation makes a forward progress guarantees and we do not return
|
||||
* errors.
|
||||
*/
|
||||
static struct list_head *find_workspace(int type)
|
||||
static struct list_head *__find_workspace(int type, bool heuristic)
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int cpus = num_online_cpus();
|
||||
int idx = type - 1;
|
||||
unsigned nofs_flag;
|
||||
struct list_head *idle_ws;
|
||||
spinlock_t *ws_lock;
|
||||
atomic_t *total_ws;
|
||||
wait_queue_head_t *ws_wait;
|
||||
int *free_ws;
|
||||
|
||||
if (heuristic) {
|
||||
idle_ws = &btrfs_heuristic_ws.idle_ws;
|
||||
ws_lock = &btrfs_heuristic_ws.ws_lock;
|
||||
total_ws = &btrfs_heuristic_ws.total_ws;
|
||||
ws_wait = &btrfs_heuristic_ws.ws_wait;
|
||||
free_ws = &btrfs_heuristic_ws.free_ws;
|
||||
} else {
|
||||
idle_ws = &btrfs_comp_ws[idx].idle_ws;
|
||||
ws_lock = &btrfs_comp_ws[idx].ws_lock;
|
||||
total_ws = &btrfs_comp_ws[idx].total_ws;
|
||||
ws_wait = &btrfs_comp_ws[idx].ws_wait;
|
||||
free_ws = &btrfs_comp_ws[idx].free_ws;
|
||||
}
|
||||
|
||||
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
|
||||
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
|
||||
atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
|
||||
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
|
||||
int *free_ws = &btrfs_comp_ws[idx].free_ws;
|
||||
again:
|
||||
spin_lock(ws_lock);
|
||||
if (!list_empty(idle_ws)) {
|
||||
@ -797,7 +911,10 @@ again:
|
||||
* context of btrfs_compress_bio/btrfs_compress_pages
|
||||
*/
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
workspace = btrfs_compress_op[idx]->alloc_workspace();
|
||||
if (heuristic)
|
||||
workspace = alloc_heuristic_ws();
|
||||
else
|
||||
workspace = btrfs_compress_op[idx]->alloc_workspace();
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
|
||||
if (IS_ERR(workspace)) {
|
||||
@ -828,18 +945,38 @@ again:
|
||||
return workspace;
|
||||
}
|
||||
|
||||
static struct list_head *find_workspace(int type)
|
||||
{
|
||||
return __find_workspace(type, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* put a workspace struct back on the list or free it if we have enough
|
||||
* idle ones sitting around
|
||||
*/
|
||||
static void free_workspace(int type, struct list_head *workspace)
|
||||
static void __free_workspace(int type, struct list_head *workspace,
|
||||
bool heuristic)
|
||||
{
|
||||
int idx = type - 1;
|
||||
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
|
||||
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
|
||||
atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
|
||||
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
|
||||
int *free_ws = &btrfs_comp_ws[idx].free_ws;
|
||||
struct list_head *idle_ws;
|
||||
spinlock_t *ws_lock;
|
||||
atomic_t *total_ws;
|
||||
wait_queue_head_t *ws_wait;
|
||||
int *free_ws;
|
||||
|
||||
if (heuristic) {
|
||||
idle_ws = &btrfs_heuristic_ws.idle_ws;
|
||||
ws_lock = &btrfs_heuristic_ws.ws_lock;
|
||||
total_ws = &btrfs_heuristic_ws.total_ws;
|
||||
ws_wait = &btrfs_heuristic_ws.ws_wait;
|
||||
free_ws = &btrfs_heuristic_ws.free_ws;
|
||||
} else {
|
||||
idle_ws = &btrfs_comp_ws[idx].idle_ws;
|
||||
ws_lock = &btrfs_comp_ws[idx].ws_lock;
|
||||
total_ws = &btrfs_comp_ws[idx].total_ws;
|
||||
ws_wait = &btrfs_comp_ws[idx].ws_wait;
|
||||
free_ws = &btrfs_comp_ws[idx].free_ws;
|
||||
}
|
||||
|
||||
spin_lock(ws_lock);
|
||||
if (*free_ws <= num_online_cpus()) {
|
||||
@ -850,7 +987,10 @@ static void free_workspace(int type, struct list_head *workspace)
|
||||
}
|
||||
spin_unlock(ws_lock);
|
||||
|
||||
btrfs_compress_op[idx]->free_workspace(workspace);
|
||||
if (heuristic)
|
||||
free_heuristic_ws(workspace);
|
||||
else
|
||||
btrfs_compress_op[idx]->free_workspace(workspace);
|
||||
atomic_dec(total_ws);
|
||||
wake:
|
||||
/*
|
||||
@ -861,6 +1001,11 @@ wake:
|
||||
wake_up(ws_wait);
|
||||
}
|
||||
|
||||
static void free_workspace(int type, struct list_head *ws)
|
||||
{
|
||||
return __free_workspace(type, ws, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* cleanup function for module exit
|
||||
*/
|
||||
@ -869,6 +1014,13 @@ static void free_workspaces(void)
|
||||
struct list_head *workspace;
|
||||
int i;
|
||||
|
||||
while (!list_empty(&btrfs_heuristic_ws.idle_ws)) {
|
||||
workspace = btrfs_heuristic_ws.idle_ws.next;
|
||||
list_del(workspace);
|
||||
free_heuristic_ws(workspace);
|
||||
atomic_dec(&btrfs_heuristic_ws.total_ws);
|
||||
}
|
||||
|
||||
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
|
||||
while (!list_empty(&btrfs_comp_ws[i].idle_ws)) {
|
||||
workspace = btrfs_comp_ws[i].idle_ws.next;
|
||||
@ -883,6 +1035,11 @@ static void free_workspaces(void)
|
||||
* Given an address space and start and length, compress the bytes into @pages
|
||||
* that are allocated on demand.
|
||||
*
|
||||
* @type_level is encoded algorithm and level, where level 0 means whatever
|
||||
* default the algorithm chooses and is opaque here;
|
||||
* - compression algo are 0-3
|
||||
* - the level are bits 4-7
|
||||
*
|
||||
* @out_pages is an in/out parameter, holds maximum number of pages to allocate
|
||||
* and returns number of actually allocated pages
|
||||
*
|
||||
@ -897,7 +1054,7 @@ static void free_workspaces(void)
|
||||
* @max_out tells us the max number of bytes that we're allowed to
|
||||
* stuff into pages
|
||||
*/
|
||||
int btrfs_compress_pages(int type, struct address_space *mapping,
|
||||
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
|
||||
u64 start, struct page **pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
@ -905,9 +1062,11 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
|
||||
{
|
||||
struct list_head *workspace;
|
||||
int ret;
|
||||
int type = type_level & 0xF;
|
||||
|
||||
workspace = find_workspace(type);
|
||||
|
||||
btrfs_compress_op[type - 1]->set_level(workspace, type_level);
|
||||
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
|
||||
start, pages,
|
||||
out_pages,
|
||||
@ -1065,6 +1224,211 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shannon Entropy calculation
|
||||
*
|
||||
* Pure byte distribution analysis fails to determine compressiability of data.
|
||||
* Try calculating entropy to estimate the average minimum number of bits
|
||||
* needed to encode the sampled data.
|
||||
*
|
||||
* For convenience, return the percentage of needed bits, instead of amount of
|
||||
* bits directly.
|
||||
*
|
||||
* @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy
|
||||
* and can be compressible with high probability
|
||||
*
|
||||
* @ENTROPY_LVL_HIGH - data are not compressible with high probability
|
||||
*
|
||||
* Use of ilog2() decreases precision, we lower the LVL to 5 to compensate.
|
||||
*/
|
||||
#define ENTROPY_LVL_ACEPTABLE (65)
|
||||
#define ENTROPY_LVL_HIGH (80)
|
||||
|
||||
/*
|
||||
* For increasead precision in shannon_entropy calculation,
|
||||
* let's do pow(n, M) to save more digits after comma:
|
||||
*
|
||||
* - maximum int bit length is 64
|
||||
* - ilog2(MAX_SAMPLE_SIZE) -> 13
|
||||
* - 13 * 4 = 52 < 64 -> M = 4
|
||||
*
|
||||
* So use pow(n, 4).
|
||||
*/
|
||||
static inline u32 ilog2_w(u64 n)
|
||||
{
|
||||
return ilog2(n * n * n * n);
|
||||
}
|
||||
|
||||
static u32 shannon_entropy(struct heuristic_ws *ws)
|
||||
{
|
||||
const u32 entropy_max = 8 * ilog2_w(2);
|
||||
u32 entropy_sum = 0;
|
||||
u32 p, p_base, sz_base;
|
||||
u32 i;
|
||||
|
||||
sz_base = ilog2_w(ws->sample_size);
|
||||
for (i = 0; i < BUCKET_SIZE && ws->bucket[i].count > 0; i++) {
|
||||
p = ws->bucket[i].count;
|
||||
p_base = ilog2_w(p);
|
||||
entropy_sum += p * (sz_base - p_base);
|
||||
}
|
||||
|
||||
entropy_sum /= ws->sample_size;
|
||||
return entropy_sum * 100 / entropy_max;
|
||||
}
|
||||
|
||||
/* Compare buckets by size, ascending */
|
||||
static int bucket_comp_rev(const void *lv, const void *rv)
|
||||
{
|
||||
const struct bucket_item *l = (const struct bucket_item *)lv;
|
||||
const struct bucket_item *r = (const struct bucket_item *)rv;
|
||||
|
||||
return r->count - l->count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Size of the core byte set - how many bytes cover 90% of the sample
|
||||
*
|
||||
* There are several types of structured binary data that use nearly all byte
|
||||
* values. The distribution can be uniform and counts in all buckets will be
|
||||
* nearly the same (eg. encrypted data). Unlikely to be compressible.
|
||||
*
|
||||
* Other possibility is normal (Gaussian) distribution, where the data could
|
||||
* be potentially compressible, but we have to take a few more steps to decide
|
||||
* how much.
|
||||
*
|
||||
* @BYTE_CORE_SET_LOW - main part of byte values repeated frequently,
|
||||
* compression algo can easy fix that
|
||||
* @BYTE_CORE_SET_HIGH - data have uniform distribution and with high
|
||||
* probability is not compressible
|
||||
*/
|
||||
#define BYTE_CORE_SET_LOW (64)
|
||||
#define BYTE_CORE_SET_HIGH (200)
|
||||
|
||||
static int byte_core_set_size(struct heuristic_ws *ws)
|
||||
{
|
||||
u32 i;
|
||||
u32 coreset_sum = 0;
|
||||
const u32 core_set_threshold = ws->sample_size * 90 / 100;
|
||||
struct bucket_item *bucket = ws->bucket;
|
||||
|
||||
/* Sort in reverse order */
|
||||
sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL);
|
||||
|
||||
for (i = 0; i < BYTE_CORE_SET_LOW; i++)
|
||||
coreset_sum += bucket[i].count;
|
||||
|
||||
if (coreset_sum > core_set_threshold)
|
||||
return i;
|
||||
|
||||
for (; i < BYTE_CORE_SET_HIGH && bucket[i].count > 0; i++) {
|
||||
coreset_sum += bucket[i].count;
|
||||
if (coreset_sum > core_set_threshold)
|
||||
break;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Count byte values in buckets.
|
||||
* This heuristic can detect textual data (configs, xml, json, html, etc).
|
||||
* Because in most text-like data byte set is restricted to limited number of
|
||||
* possible characters, and that restriction in most cases makes data easy to
|
||||
* compress.
|
||||
*
|
||||
* @BYTE_SET_THRESHOLD - consider all data within this byte set size:
|
||||
* less - compressible
|
||||
* more - need additional analysis
|
||||
*/
|
||||
#define BYTE_SET_THRESHOLD (64)
|
||||
|
||||
static u32 byte_set_size(const struct heuristic_ws *ws)
|
||||
{
|
||||
u32 i;
|
||||
u32 byte_set_size = 0;
|
||||
|
||||
for (i = 0; i < BYTE_SET_THRESHOLD; i++) {
|
||||
if (ws->bucket[i].count > 0)
|
||||
byte_set_size++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Continue collecting count of byte values in buckets. If the byte
|
||||
* set size is bigger then the threshold, it's pointless to continue,
|
||||
* the detection technique would fail for this type of data.
|
||||
*/
|
||||
for (; i < BUCKET_SIZE; i++) {
|
||||
if (ws->bucket[i].count > 0) {
|
||||
byte_set_size++;
|
||||
if (byte_set_size > BYTE_SET_THRESHOLD)
|
||||
return byte_set_size;
|
||||
}
|
||||
}
|
||||
|
||||
return byte_set_size;
|
||||
}
|
||||
|
||||
static bool sample_repeated_patterns(struct heuristic_ws *ws)
|
||||
{
|
||||
const u32 half_of_sample = ws->sample_size / 2;
|
||||
const u8 *data = ws->sample;
|
||||
|
||||
return memcmp(&data[0], &data[half_of_sample], half_of_sample) == 0;
|
||||
}
|
||||
|
||||
static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
|
||||
struct heuristic_ws *ws)
|
||||
{
|
||||
struct page *page;
|
||||
u64 index, index_end;
|
||||
u32 i, curr_sample_pos;
|
||||
u8 *in_data;
|
||||
|
||||
/*
|
||||
* Compression handles the input data by chunks of 128KiB
|
||||
* (defined by BTRFS_MAX_UNCOMPRESSED)
|
||||
*
|
||||
* We do the same for the heuristic and loop over the whole range.
|
||||
*
|
||||
* MAX_SAMPLE_SIZE - calculated under assumption that heuristic will
|
||||
* process no more than BTRFS_MAX_UNCOMPRESSED at a time.
|
||||
*/
|
||||
if (end - start > BTRFS_MAX_UNCOMPRESSED)
|
||||
end = start + BTRFS_MAX_UNCOMPRESSED;
|
||||
|
||||
index = start >> PAGE_SHIFT;
|
||||
index_end = end >> PAGE_SHIFT;
|
||||
|
||||
/* Don't miss unaligned end */
|
||||
if (!IS_ALIGNED(end, PAGE_SIZE))
|
||||
index_end++;
|
||||
|
||||
curr_sample_pos = 0;
|
||||
while (index < index_end) {
|
||||
page = find_get_page(inode->i_mapping, index);
|
||||
in_data = kmap(page);
|
||||
/* Handle case where the start is not aligned to PAGE_SIZE */
|
||||
i = start % PAGE_SIZE;
|
||||
while (i < PAGE_SIZE - SAMPLING_READ_SIZE) {
|
||||
/* Don't sample any garbage from the last page */
|
||||
if (start > end - SAMPLING_READ_SIZE)
|
||||
break;
|
||||
memcpy(&ws->sample[curr_sample_pos], &in_data[i],
|
||||
SAMPLING_READ_SIZE);
|
||||
i += SAMPLING_INTERVAL;
|
||||
start += SAMPLING_INTERVAL;
|
||||
curr_sample_pos += SAMPLING_READ_SIZE;
|
||||
}
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
ws->sample_size = curr_sample_pos;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compression heuristic.
|
||||
*
|
||||
@ -1082,18 +1446,87 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
|
||||
*/
|
||||
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
|
||||
{
|
||||
u64 index = start >> PAGE_SHIFT;
|
||||
u64 end_index = end >> PAGE_SHIFT;
|
||||
struct page *page;
|
||||
int ret = 1;
|
||||
struct list_head *ws_list = __find_workspace(0, true);
|
||||
struct heuristic_ws *ws;
|
||||
u32 i;
|
||||
u8 byte;
|
||||
int ret = 0;
|
||||
|
||||
while (index <= end_index) {
|
||||
page = find_get_page(inode->i_mapping, index);
|
||||
kmap(page);
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
index++;
|
||||
ws = list_entry(ws_list, struct heuristic_ws, list);
|
||||
|
||||
heuristic_collect_sample(inode, start, end, ws);
|
||||
|
||||
if (sample_repeated_patterns(ws)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(ws->bucket, 0, sizeof(*ws->bucket)*BUCKET_SIZE);
|
||||
|
||||
for (i = 0; i < ws->sample_size; i++) {
|
||||
byte = ws->sample[i];
|
||||
ws->bucket[byte].count++;
|
||||
}
|
||||
|
||||
i = byte_set_size(ws);
|
||||
if (i < BYTE_SET_THRESHOLD) {
|
||||
ret = 2;
|
||||
goto out;
|
||||
}
|
||||
|
||||
i = byte_core_set_size(ws);
|
||||
if (i <= BYTE_CORE_SET_LOW) {
|
||||
ret = 3;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (i >= BYTE_CORE_SET_HIGH) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
i = shannon_entropy(ws);
|
||||
if (i <= ENTROPY_LVL_ACEPTABLE) {
|
||||
ret = 4;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* For the levels below ENTROPY_LVL_HIGH, additional analysis would be
|
||||
* needed to give green light to compression.
|
||||
*
|
||||
* For now just assume that compression at that level is not worth the
|
||||
* resources because:
|
||||
*
|
||||
* 1. it is possible to defrag the data later
|
||||
*
|
||||
* 2. the data would turn out to be hardly compressible, eg. 150 byte
|
||||
* values, every bucket has counter at level ~54. The heuristic would
|
||||
* be confused. This can happen when data have some internal repeated
|
||||
* patterns like "abbacbbc...". This can be detected by analyzing
|
||||
* pairs of bytes, which is too costly.
|
||||
*/
|
||||
if (i < ENTROPY_LVL_HIGH) {
|
||||
ret = 5;
|
||||
goto out;
|
||||
} else {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
__free_workspace(0, ws_list, true);
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned int btrfs_compress_str2level(const char *str)
|
||||
{
|
||||
if (strncmp(str, "zlib", 4) != 0)
|
||||
return 0;
|
||||
|
||||
/* Accepted form: zlib:1 up to zlib:9 and nothing left after the number */
|
||||
if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0)
|
||||
return str[5] - '0';
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ struct compressed_bio {
|
||||
void btrfs_init_compress(void);
|
||||
void btrfs_exit_compress(void);
|
||||
|
||||
int btrfs_compress_pages(int type, struct address_space *mapping,
|
||||
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
|
||||
u64 start, struct page **pages,
|
||||
unsigned long *out_pages,
|
||||
unsigned long *total_in,
|
||||
@ -95,6 +95,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
int mirror_num, unsigned long bio_flags);
|
||||
|
||||
unsigned btrfs_compress_str2level(const char *str);
|
||||
|
||||
enum btrfs_compression_type {
|
||||
BTRFS_COMPRESS_NONE = 0,
|
||||
BTRFS_COMPRESS_ZLIB = 1,
|
||||
@ -124,6 +126,8 @@ struct btrfs_compress_op {
|
||||
struct page *dest_page,
|
||||
unsigned long start_byte,
|
||||
size_t srclen, size_t destlen);
|
||||
|
||||
void (*set_level)(struct list_head *ws, unsigned int type);
|
||||
};
|
||||
|
||||
extern const struct btrfs_compress_op btrfs_zlib_compress;
|
||||
|
@ -192,7 +192,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
|
||||
* tree until you end up with a lock on the root. A locked buffer
|
||||
* is returned, with a reference held.
|
||||
*/
|
||||
static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
|
||||
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
|
||||
{
|
||||
struct extent_buffer *eb;
|
||||
|
||||
@ -5496,8 +5496,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
|
||||
goto out;
|
||||
} else if (left_end_reached) {
|
||||
if (right_level == 0) {
|
||||
ret = changed_cb(left_root, right_root,
|
||||
left_path, right_path,
|
||||
ret = changed_cb(left_path, right_path,
|
||||
&right_key,
|
||||
BTRFS_COMPARE_TREE_DELETED,
|
||||
ctx);
|
||||
@ -5508,8 +5507,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
|
||||
continue;
|
||||
} else if (right_end_reached) {
|
||||
if (left_level == 0) {
|
||||
ret = changed_cb(left_root, right_root,
|
||||
left_path, right_path,
|
||||
ret = changed_cb(left_path, right_path,
|
||||
&left_key,
|
||||
BTRFS_COMPARE_TREE_NEW,
|
||||
ctx);
|
||||
@ -5523,8 +5521,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
|
||||
if (left_level == 0 && right_level == 0) {
|
||||
cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
|
||||
if (cmp < 0) {
|
||||
ret = changed_cb(left_root, right_root,
|
||||
left_path, right_path,
|
||||
ret = changed_cb(left_path, right_path,
|
||||
&left_key,
|
||||
BTRFS_COMPARE_TREE_NEW,
|
||||
ctx);
|
||||
@ -5532,8 +5529,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
|
||||
goto out;
|
||||
advance_left = ADVANCE;
|
||||
} else if (cmp > 0) {
|
||||
ret = changed_cb(left_root, right_root,
|
||||
left_path, right_path,
|
||||
ret = changed_cb(left_path, right_path,
|
||||
&right_key,
|
||||
BTRFS_COMPARE_TREE_DELETED,
|
||||
ctx);
|
||||
@ -5550,8 +5546,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
|
||||
result = BTRFS_COMPARE_TREE_CHANGED;
|
||||
else
|
||||
result = BTRFS_COMPARE_TREE_SAME;
|
||||
ret = changed_cb(left_root, right_root,
|
||||
left_path, right_path,
|
||||
ret = changed_cb(left_path, right_path,
|
||||
&left_key, result, ctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
@ -523,7 +523,7 @@ struct btrfs_caching_control {
|
||||
};
|
||||
|
||||
/* Once caching_thread() finds this much free space, it will wake up waiters. */
|
||||
#define CACHING_CTL_WAKE_UP (1024 * 1024 * 2)
|
||||
#define CACHING_CTL_WAKE_UP SZ_2M
|
||||
|
||||
struct btrfs_io_ctl {
|
||||
void *cur, *orig;
|
||||
@ -763,8 +763,6 @@ struct btrfs_fs_info {
|
||||
* delayed dir index item
|
||||
*/
|
||||
struct btrfs_block_rsv global_block_rsv;
|
||||
/* block reservation for delay allocation */
|
||||
struct btrfs_block_rsv delalloc_block_rsv;
|
||||
/* block reservation for metadata operations */
|
||||
struct btrfs_block_rsv trans_block_rsv;
|
||||
/* block reservation for chunk tree */
|
||||
@ -790,6 +788,7 @@ struct btrfs_fs_info {
|
||||
*/
|
||||
unsigned long pending_changes;
|
||||
unsigned long compress_type:4;
|
||||
unsigned int compress_level;
|
||||
int commit_interval;
|
||||
/*
|
||||
* It is a suggestive number, the read side is safe even it gets a
|
||||
@ -878,9 +877,6 @@ struct btrfs_fs_info {
|
||||
rwlock_t tree_mod_log_lock;
|
||||
struct rb_root tree_mod_log;
|
||||
|
||||
atomic_t nr_async_submits;
|
||||
atomic_t async_submit_draining;
|
||||
atomic_t nr_async_bios;
|
||||
atomic_t async_delalloc_pages;
|
||||
atomic_t open_ioctl_trans;
|
||||
|
||||
@ -1100,6 +1096,11 @@ struct btrfs_fs_info {
|
||||
u32 nodesize;
|
||||
u32 sectorsize;
|
||||
u32 stripesize;
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
spinlock_t ref_verify_lock;
|
||||
struct rb_root block_tree;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
|
||||
@ -1338,6 +1339,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
|
||||
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
|
||||
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
|
||||
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
|
||||
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
|
||||
|
||||
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
|
||||
#define BTRFS_DEFAULT_MAX_INLINE (2048)
|
||||
@ -2639,7 +2641,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *buf,
|
||||
u64 parent, int last_ref);
|
||||
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
u64 root_objectid, u64 owner,
|
||||
struct btrfs_root *root, u64 owner,
|
||||
u64 offset, u64 ram_bytes,
|
||||
struct btrfs_key *ins);
|
||||
int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
|
||||
@ -2658,7 +2660,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, u64 flags,
|
||||
int level, int is_data);
|
||||
int btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_root *root,
|
||||
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
|
||||
u64 owner, u64 offset);
|
||||
|
||||
@ -2670,7 +2672,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_root *root,
|
||||
u64 bytenr, u64 num_bytes, u64 parent,
|
||||
u64 root_objectid, u64 owner, u64 offset);
|
||||
|
||||
@ -2744,6 +2746,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
|
||||
u64 *qgroup_reserved, bool use_global_rsv);
|
||||
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv);
|
||||
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
|
||||
|
||||
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
|
||||
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
|
||||
int btrfs_delalloc_reserve_space(struct inode *inode,
|
||||
@ -2751,6 +2755,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
|
||||
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
|
||||
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
unsigned short type);
|
||||
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv,
|
||||
unsigned short type);
|
||||
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv);
|
||||
void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
|
||||
@ -2809,6 +2816,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_key *new_key);
|
||||
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
|
||||
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
|
||||
struct btrfs_key *key, int lowest_level,
|
||||
u64 min_trans);
|
||||
@ -2821,9 +2829,7 @@ enum btrfs_compare_tree_result {
|
||||
BTRFS_COMPARE_TREE_CHANGED,
|
||||
BTRFS_COMPARE_TREE_SAME,
|
||||
};
|
||||
typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root,
|
||||
struct btrfs_root *right_root,
|
||||
struct btrfs_path *left_path,
|
||||
typedef int (*btrfs_changed_cb_t)(struct btrfs_path *left_path,
|
||||
struct btrfs_path *right_path,
|
||||
struct btrfs_key *key,
|
||||
enum btrfs_compare_tree_result result,
|
||||
|
@ -581,36 +581,12 @@ static int btrfs_delayed_inode_reserve_metadata(
|
||||
struct btrfs_block_rsv *dst_rsv;
|
||||
u64 num_bytes;
|
||||
int ret;
|
||||
bool release = false;
|
||||
|
||||
src_rsv = trans->block_rsv;
|
||||
dst_rsv = &fs_info->delayed_block_rsv;
|
||||
|
||||
num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
|
||||
|
||||
/*
|
||||
* If our block_rsv is the delalloc block reserve then check and see if
|
||||
* we have our extra reservation for updating the inode. If not fall
|
||||
* through and try to reserve space quickly.
|
||||
*
|
||||
* We used to try and steal from the delalloc block rsv or the global
|
||||
* reserve, but we'd steal a full reservation, which isn't kind. We are
|
||||
* here through delalloc which means we've likely just cowed down close
|
||||
* to the leaf that contains the inode, so we would steal less just
|
||||
* doing the fallback inode update, so if we do end up having to steal
|
||||
* from the global block rsv we hopefully only steal one or two blocks
|
||||
* worth which is less likely to hurt us.
|
||||
*/
|
||||
if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
|
||||
spin_lock(&inode->lock);
|
||||
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
|
||||
&inode->runtime_flags))
|
||||
release = true;
|
||||
else
|
||||
src_rsv = NULL;
|
||||
spin_unlock(&inode->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
|
||||
* which doesn't reserve space for speed. This is a problem since we
|
||||
@ -618,7 +594,7 @@ static int btrfs_delayed_inode_reserve_metadata(
|
||||
* space.
|
||||
*
|
||||
* Now if src_rsv == delalloc_block_rsv we'll let it just steal since
|
||||
* we're accounted for.
|
||||
* we always reserve enough to update the inode item.
|
||||
*/
|
||||
if (!src_rsv || (!trans->bytes_reserved &&
|
||||
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
|
||||
@ -643,32 +619,12 @@ static int btrfs_delayed_inode_reserve_metadata(
|
||||
}
|
||||
|
||||
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
|
||||
|
||||
/*
|
||||
* Migrate only takes a reservation, it doesn't touch the size of the
|
||||
* block_rsv. This is to simplify people who don't normally have things
|
||||
* migrated from their block rsv. If they go to release their
|
||||
* reservation, that will decrease the size as well, so if migrate
|
||||
* reduced size we'd end up with a negative size. But for the
|
||||
* delalloc_meta_reserved stuff we will only know to drop 1 reservation,
|
||||
* but we could in fact do this reserve/migrate dance several times
|
||||
* between the time we did the original reservation and we'd clean it
|
||||
* up. So to take care of this, release the space for the meta
|
||||
* reservation here. I think it may be time for a documentation page on
|
||||
* how block rsvs. work.
|
||||
*/
|
||||
if (!ret) {
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_inode",
|
||||
btrfs_ino(inode), num_bytes, 1);
|
||||
node->bytes_reserved = num_bytes;
|
||||
}
|
||||
|
||||
if (release) {
|
||||
trace_btrfs_space_reservation(fs_info, "delalloc",
|
||||
btrfs_ino(inode), num_bytes, 0);
|
||||
btrfs_block_rsv_release(fs_info, src_rsv, num_bytes);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -40,10 +40,10 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
|
||||
/*
|
||||
* compare two delayed tree backrefs with same bytenr and type
|
||||
*/
|
||||
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
|
||||
struct btrfs_delayed_tree_ref *ref1, int type)
|
||||
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
|
||||
struct btrfs_delayed_tree_ref *ref2)
|
||||
{
|
||||
if (type == BTRFS_TREE_BLOCK_REF_KEY) {
|
||||
if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
|
||||
if (ref1->root < ref2->root)
|
||||
return -1;
|
||||
if (ref1->root > ref2->root)
|
||||
@ -60,8 +60,8 @@ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
|
||||
/*
|
||||
* compare two delayed data backrefs with same bytenr and type
|
||||
*/
|
||||
static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
|
||||
struct btrfs_delayed_data_ref *ref1)
|
||||
static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
|
||||
struct btrfs_delayed_data_ref *ref2)
|
||||
{
|
||||
if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
|
||||
if (ref1->root < ref2->root)
|
||||
@ -85,6 +85,34 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int comp_refs(struct btrfs_delayed_ref_node *ref1,
|
||||
struct btrfs_delayed_ref_node *ref2,
|
||||
bool check_seq)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (ref1->type < ref2->type)
|
||||
return -1;
|
||||
if (ref1->type > ref2->type)
|
||||
return 1;
|
||||
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
|
||||
ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
|
||||
btrfs_delayed_node_to_tree_ref(ref2));
|
||||
else
|
||||
ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
|
||||
btrfs_delayed_node_to_data_ref(ref2));
|
||||
if (ret)
|
||||
return ret;
|
||||
if (check_seq) {
|
||||
if (ref1->seq < ref2->seq)
|
||||
return -1;
|
||||
if (ref1->seq > ref2->seq)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* insert a new ref to head ref rbtree */
|
||||
static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
|
||||
struct rb_node *node)
|
||||
@ -96,15 +124,43 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
|
||||
u64 bytenr;
|
||||
|
||||
ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
|
||||
bytenr = ins->node.bytenr;
|
||||
bytenr = ins->bytenr;
|
||||
while (*p) {
|
||||
parent_node = *p;
|
||||
entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
|
||||
href_node);
|
||||
|
||||
if (bytenr < entry->node.bytenr)
|
||||
if (bytenr < entry->bytenr)
|
||||
p = &(*p)->rb_left;
|
||||
else if (bytenr > entry->node.bytenr)
|
||||
else if (bytenr > entry->bytenr)
|
||||
p = &(*p)->rb_right;
|
||||
else
|
||||
return entry;
|
||||
}
|
||||
|
||||
rb_link_node(node, parent_node, p);
|
||||
rb_insert_color(node, root);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
|
||||
struct btrfs_delayed_ref_node *ins)
|
||||
{
|
||||
struct rb_node **p = &root->rb_node;
|
||||
struct rb_node *node = &ins->ref_node;
|
||||
struct rb_node *parent_node = NULL;
|
||||
struct btrfs_delayed_ref_node *entry;
|
||||
|
||||
while (*p) {
|
||||
int comp;
|
||||
|
||||
parent_node = *p;
|
||||
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
|
||||
ref_node);
|
||||
comp = comp_refs(ins, entry, true);
|
||||
if (comp < 0)
|
||||
p = &(*p)->rb_left;
|
||||
else if (comp > 0)
|
||||
p = &(*p)->rb_right;
|
||||
else
|
||||
return entry;
|
||||
@ -133,15 +189,15 @@ find_ref_head(struct rb_root *root, u64 bytenr,
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
|
||||
|
||||
if (bytenr < entry->node.bytenr)
|
||||
if (bytenr < entry->bytenr)
|
||||
n = n->rb_left;
|
||||
else if (bytenr > entry->node.bytenr)
|
||||
else if (bytenr > entry->bytenr)
|
||||
n = n->rb_right;
|
||||
else
|
||||
return entry;
|
||||
}
|
||||
if (entry && return_bigger) {
|
||||
if (bytenr > entry->node.bytenr) {
|
||||
if (bytenr > entry->bytenr) {
|
||||
n = rb_next(&entry->href_node);
|
||||
if (!n)
|
||||
n = rb_first(root);
|
||||
@ -164,17 +220,17 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
|
||||
if (mutex_trylock(&head->mutex))
|
||||
return 0;
|
||||
|
||||
refcount_inc(&head->node.refs);
|
||||
refcount_inc(&head->refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
mutex_lock(&head->mutex);
|
||||
spin_lock(&delayed_refs->lock);
|
||||
if (!head->node.in_tree) {
|
||||
if (RB_EMPTY_NODE(&head->href_node)) {
|
||||
mutex_unlock(&head->mutex);
|
||||
btrfs_put_delayed_ref(&head->node);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
return -EAGAIN;
|
||||
}
|
||||
btrfs_put_delayed_ref(&head->node);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -183,15 +239,11 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_head *head,
|
||||
struct btrfs_delayed_ref_node *ref)
|
||||
{
|
||||
if (btrfs_delayed_ref_is_head(ref)) {
|
||||
head = btrfs_delayed_node_to_head(ref);
|
||||
rb_erase(&head->href_node, &delayed_refs->href_root);
|
||||
} else {
|
||||
assert_spin_locked(&head->lock);
|
||||
list_del(&ref->list);
|
||||
if (!list_empty(&ref->add_list))
|
||||
list_del(&ref->add_list);
|
||||
}
|
||||
assert_spin_locked(&head->lock);
|
||||
rb_erase(&ref->ref_node, &head->ref_tree);
|
||||
RB_CLEAR_NODE(&ref->ref_node);
|
||||
if (!list_empty(&ref->add_list))
|
||||
list_del(&ref->add_list);
|
||||
ref->in_tree = 0;
|
||||
btrfs_put_delayed_ref(ref);
|
||||
atomic_dec(&delayed_refs->num_entries);
|
||||
@ -206,36 +258,18 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
|
||||
u64 seq)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *next;
|
||||
struct rb_node *node = rb_next(&ref->ref_node);
|
||||
bool done = false;
|
||||
|
||||
next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
|
||||
list);
|
||||
while (!done && &next->list != &head->ref_list) {
|
||||
while (!done && node) {
|
||||
int mod;
|
||||
struct btrfs_delayed_ref_node *next2;
|
||||
|
||||
next2 = list_next_entry(next, list);
|
||||
|
||||
if (next == ref)
|
||||
goto next;
|
||||
|
||||
next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
|
||||
node = rb_next(node);
|
||||
if (seq && next->seq >= seq)
|
||||
goto next;
|
||||
|
||||
if (next->type != ref->type)
|
||||
goto next;
|
||||
|
||||
if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
|
||||
comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref),
|
||||
btrfs_delayed_node_to_tree_ref(next),
|
||||
ref->type))
|
||||
goto next;
|
||||
if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY ||
|
||||
ref->type == BTRFS_SHARED_DATA_REF_KEY) &&
|
||||
comp_data_refs(btrfs_delayed_node_to_data_ref(ref),
|
||||
btrfs_delayed_node_to_data_ref(next)))
|
||||
goto next;
|
||||
break;
|
||||
if (comp_refs(ref, next, false))
|
||||
break;
|
||||
|
||||
if (ref->action == next->action) {
|
||||
mod = next->ref_mod;
|
||||
@ -259,8 +293,6 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
|
||||
WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
|
||||
}
|
||||
next:
|
||||
next = next2;
|
||||
}
|
||||
|
||||
return done;
|
||||
@ -272,11 +304,12 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
struct rb_node *node;
|
||||
u64 seq = 0;
|
||||
|
||||
assert_spin_locked(&head->lock);
|
||||
|
||||
if (list_empty(&head->ref_list))
|
||||
if (RB_EMPTY_ROOT(&head->ref_tree))
|
||||
return;
|
||||
|
||||
/* We don't have too many refs to merge for data. */
|
||||
@ -293,22 +326,13 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
spin_unlock(&fs_info->tree_mod_seq_lock);
|
||||
|
||||
ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
|
||||
list);
|
||||
while (&ref->list != &head->ref_list) {
|
||||
again:
|
||||
for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
|
||||
ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
|
||||
if (seq && ref->seq >= seq)
|
||||
goto next;
|
||||
|
||||
if (merge_ref(trans, delayed_refs, head, ref, seq)) {
|
||||
if (list_empty(&head->ref_list))
|
||||
break;
|
||||
ref = list_first_entry(&head->ref_list,
|
||||
struct btrfs_delayed_ref_node,
|
||||
list);
|
||||
continue;
|
||||
}
|
||||
next:
|
||||
ref = list_next_entry(ref, list);
|
||||
if (merge_ref(trans, delayed_refs, head, ref, seq))
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
@ -380,8 +404,8 @@ again:
|
||||
head->processing = 1;
|
||||
WARN_ON(delayed_refs->num_heads_ready == 0);
|
||||
delayed_refs->num_heads_ready--;
|
||||
delayed_refs->run_delayed_start = head->node.bytenr +
|
||||
head->node.num_bytes;
|
||||
delayed_refs->run_delayed_start = head->bytenr +
|
||||
head->num_bytes;
|
||||
return head;
|
||||
}
|
||||
|
||||
@ -391,37 +415,19 @@ again:
|
||||
* Return 0 for insert.
|
||||
* Return >0 for merge.
|
||||
*/
|
||||
static int
|
||||
add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_root *root,
|
||||
struct btrfs_delayed_ref_head *href,
|
||||
struct btrfs_delayed_ref_node *ref)
|
||||
static int insert_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_root *root,
|
||||
struct btrfs_delayed_ref_head *href,
|
||||
struct btrfs_delayed_ref_node *ref)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *exist;
|
||||
int mod;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&href->lock);
|
||||
/* Check whether we can merge the tail node with ref */
|
||||
if (list_empty(&href->ref_list))
|
||||
goto add_tail;
|
||||
exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
|
||||
list);
|
||||
/* No need to compare bytenr nor is_head */
|
||||
if (exist->type != ref->type || exist->seq != ref->seq)
|
||||
goto add_tail;
|
||||
|
||||
if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
|
||||
exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
|
||||
comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
|
||||
btrfs_delayed_node_to_tree_ref(ref),
|
||||
ref->type))
|
||||
goto add_tail;
|
||||
if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
|
||||
exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
|
||||
comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
|
||||
btrfs_delayed_node_to_data_ref(ref)))
|
||||
goto add_tail;
|
||||
exist = tree_insert(&href->ref_tree, ref);
|
||||
if (!exist)
|
||||
goto inserted;
|
||||
|
||||
/* Now we are sure we can merge */
|
||||
ret = 1;
|
||||
@ -452,9 +458,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
|
||||
drop_delayed_ref(trans, root, href, exist);
|
||||
spin_unlock(&href->lock);
|
||||
return ret;
|
||||
|
||||
add_tail:
|
||||
list_add_tail(&ref->list, &href->ref_list);
|
||||
inserted:
|
||||
if (ref->action == BTRFS_ADD_DELAYED_REF)
|
||||
list_add_tail(&ref->add_list, &href->ref_add_list);
|
||||
atomic_inc(&root->num_entries);
|
||||
@ -469,20 +473,16 @@ add_tail:
|
||||
*/
|
||||
static noinline void
|
||||
update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_node *existing,
|
||||
struct btrfs_delayed_ref_node *update,
|
||||
struct btrfs_delayed_ref_head *existing,
|
||||
struct btrfs_delayed_ref_head *update,
|
||||
int *old_ref_mod_ret)
|
||||
{
|
||||
struct btrfs_delayed_ref_head *existing_ref;
|
||||
struct btrfs_delayed_ref_head *ref;
|
||||
int old_ref_mod;
|
||||
|
||||
existing_ref = btrfs_delayed_node_to_head(existing);
|
||||
ref = btrfs_delayed_node_to_head(update);
|
||||
BUG_ON(existing_ref->is_data != ref->is_data);
|
||||
BUG_ON(existing->is_data != update->is_data);
|
||||
|
||||
spin_lock(&existing_ref->lock);
|
||||
if (ref->must_insert_reserved) {
|
||||
spin_lock(&existing->lock);
|
||||
if (update->must_insert_reserved) {
|
||||
/* if the extent was freed and then
|
||||
* reallocated before the delayed ref
|
||||
* entries were processed, we can end up
|
||||
@ -490,7 +490,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
* the must_insert_reserved flag set.
|
||||
* Set it again here
|
||||
*/
|
||||
existing_ref->must_insert_reserved = ref->must_insert_reserved;
|
||||
existing->must_insert_reserved = update->must_insert_reserved;
|
||||
|
||||
/*
|
||||
* update the num_bytes so we make sure the accounting
|
||||
@ -500,22 +500,22 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
|
||||
}
|
||||
|
||||
if (ref->extent_op) {
|
||||
if (!existing_ref->extent_op) {
|
||||
existing_ref->extent_op = ref->extent_op;
|
||||
if (update->extent_op) {
|
||||
if (!existing->extent_op) {
|
||||
existing->extent_op = update->extent_op;
|
||||
} else {
|
||||
if (ref->extent_op->update_key) {
|
||||
memcpy(&existing_ref->extent_op->key,
|
||||
&ref->extent_op->key,
|
||||
sizeof(ref->extent_op->key));
|
||||
existing_ref->extent_op->update_key = true;
|
||||
if (update->extent_op->update_key) {
|
||||
memcpy(&existing->extent_op->key,
|
||||
&update->extent_op->key,
|
||||
sizeof(update->extent_op->key));
|
||||
existing->extent_op->update_key = true;
|
||||
}
|
||||
if (ref->extent_op->update_flags) {
|
||||
existing_ref->extent_op->flags_to_set |=
|
||||
ref->extent_op->flags_to_set;
|
||||
existing_ref->extent_op->update_flags = true;
|
||||
if (update->extent_op->update_flags) {
|
||||
existing->extent_op->flags_to_set |=
|
||||
update->extent_op->flags_to_set;
|
||||
existing->extent_op->update_flags = true;
|
||||
}
|
||||
btrfs_free_delayed_extent_op(ref->extent_op);
|
||||
btrfs_free_delayed_extent_op(update->extent_op);
|
||||
}
|
||||
}
|
||||
/*
|
||||
@ -523,23 +523,23 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
* only need the lock for this case cause we could be processing it
|
||||
* currently, for refs we just added we know we're a-ok.
|
||||
*/
|
||||
old_ref_mod = existing_ref->total_ref_mod;
|
||||
old_ref_mod = existing->total_ref_mod;
|
||||
if (old_ref_mod_ret)
|
||||
*old_ref_mod_ret = old_ref_mod;
|
||||
existing->ref_mod += update->ref_mod;
|
||||
existing_ref->total_ref_mod += update->ref_mod;
|
||||
existing->total_ref_mod += update->ref_mod;
|
||||
|
||||
/*
|
||||
* If we are going to from a positive ref mod to a negative or vice
|
||||
* versa we need to make sure to adjust pending_csums accordingly.
|
||||
*/
|
||||
if (existing_ref->is_data) {
|
||||
if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
|
||||
if (existing->is_data) {
|
||||
if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
|
||||
delayed_refs->pending_csums -= existing->num_bytes;
|
||||
if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
|
||||
if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
|
||||
delayed_refs->pending_csums += existing->num_bytes;
|
||||
}
|
||||
spin_unlock(&existing_ref->lock);
|
||||
spin_unlock(&existing->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -550,14 +550,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
static noinline struct btrfs_delayed_ref_head *
|
||||
add_delayed_ref_head(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_node *ref,
|
||||
struct btrfs_delayed_ref_head *head_ref,
|
||||
struct btrfs_qgroup_extent_record *qrecord,
|
||||
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
|
||||
int action, int is_data, int *qrecord_inserted_ret,
|
||||
int *old_ref_mod, int *new_ref_mod)
|
||||
{
|
||||
struct btrfs_delayed_ref_head *existing;
|
||||
struct btrfs_delayed_ref_head *head_ref = NULL;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
int count_mod = 1;
|
||||
int must_insert_reserved = 0;
|
||||
@ -593,26 +592,21 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
|
||||
/* first set the basic ref node struct up */
|
||||
refcount_set(&ref->refs, 1);
|
||||
ref->bytenr = bytenr;
|
||||
ref->num_bytes = num_bytes;
|
||||
ref->ref_mod = count_mod;
|
||||
ref->type = 0;
|
||||
ref->action = 0;
|
||||
ref->is_head = 1;
|
||||
ref->in_tree = 1;
|
||||
ref->seq = 0;
|
||||
|
||||
head_ref = btrfs_delayed_node_to_head(ref);
|
||||
refcount_set(&head_ref->refs, 1);
|
||||
head_ref->bytenr = bytenr;
|
||||
head_ref->num_bytes = num_bytes;
|
||||
head_ref->ref_mod = count_mod;
|
||||
head_ref->must_insert_reserved = must_insert_reserved;
|
||||
head_ref->is_data = is_data;
|
||||
INIT_LIST_HEAD(&head_ref->ref_list);
|
||||
head_ref->ref_tree = RB_ROOT;
|
||||
INIT_LIST_HEAD(&head_ref->ref_add_list);
|
||||
RB_CLEAR_NODE(&head_ref->href_node);
|
||||
head_ref->processing = 0;
|
||||
head_ref->total_ref_mod = count_mod;
|
||||
head_ref->qgroup_reserved = 0;
|
||||
head_ref->qgroup_ref_root = 0;
|
||||
spin_lock_init(&head_ref->lock);
|
||||
mutex_init(&head_ref->mutex);
|
||||
|
||||
/* Record qgroup extent info if provided */
|
||||
if (qrecord) {
|
||||
@ -632,17 +626,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
|
||||
qrecord_inserted = 1;
|
||||
}
|
||||
|
||||
spin_lock_init(&head_ref->lock);
|
||||
mutex_init(&head_ref->mutex);
|
||||
|
||||
trace_add_delayed_ref_head(fs_info, ref, head_ref, action);
|
||||
trace_add_delayed_ref_head(fs_info, head_ref, action);
|
||||
|
||||
existing = htree_insert(&delayed_refs->href_root,
|
||||
&head_ref->href_node);
|
||||
if (existing) {
|
||||
WARN_ON(ref_root && reserved && existing->qgroup_ref_root
|
||||
&& existing->qgroup_reserved);
|
||||
update_existing_head_ref(delayed_refs, &existing->node, ref,
|
||||
update_existing_head_ref(delayed_refs, existing, head_ref,
|
||||
old_ref_mod);
|
||||
/*
|
||||
* we've updated the existing ref, free the newly
|
||||
@ -699,7 +690,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
||||
ref->is_head = 0;
|
||||
ref->in_tree = 1;
|
||||
ref->seq = seq;
|
||||
INIT_LIST_HEAD(&ref->list);
|
||||
RB_CLEAR_NODE(&ref->ref_node);
|
||||
INIT_LIST_HEAD(&ref->add_list);
|
||||
|
||||
full_ref = btrfs_delayed_node_to_tree_ref(ref);
|
||||
@ -713,7 +704,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
||||
|
||||
trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
|
||||
|
||||
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
|
||||
ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
|
||||
|
||||
/*
|
||||
* XXX: memory should be freed at the same level allocated.
|
||||
@ -756,7 +747,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
||||
ref->is_head = 0;
|
||||
ref->in_tree = 1;
|
||||
ref->seq = seq;
|
||||
INIT_LIST_HEAD(&ref->list);
|
||||
RB_CLEAR_NODE(&ref->ref_node);
|
||||
INIT_LIST_HEAD(&ref->add_list);
|
||||
|
||||
full_ref = btrfs_delayed_node_to_data_ref(ref);
|
||||
@ -772,8 +763,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
||||
|
||||
trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
|
||||
|
||||
ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
|
||||
|
||||
ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
|
||||
if (ret > 0)
|
||||
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
|
||||
}
|
||||
@ -821,7 +811,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
||||
* insert both the head node and the new ref without dropping
|
||||
* the spin lock
|
||||
*/
|
||||
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
|
||||
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
|
||||
bytenr, num_bytes, 0, 0, action, 0,
|
||||
&qrecord_inserted, old_ref_mod,
|
||||
new_ref_mod);
|
||||
@ -888,7 +878,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
|
||||
* insert both the head node and the new ref without dropping
|
||||
* the spin lock
|
||||
*/
|
||||
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
|
||||
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
|
||||
bytenr, num_bytes, ref_root, reserved,
|
||||
action, 1, &qrecord_inserted,
|
||||
old_ref_mod, new_ref_mod);
|
||||
@ -920,7 +910,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
spin_lock(&delayed_refs->lock);
|
||||
|
||||
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
|
||||
add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
|
||||
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
|
||||
extent_op->is_data, NULL, NULL, NULL);
|
||||
|
||||
|
@ -26,18 +26,8 @@
|
||||
#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
|
||||
#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
|
||||
|
||||
/*
|
||||
* XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
|
||||
* same ref_node structure.
|
||||
* Ref_head is in a higher logic level than tree/data ref, and duplicated
|
||||
* bytenr/num_bytes in ref_node is really a waste or memory, they should be
|
||||
* referred from ref_head.
|
||||
* This gets more disgusting after we use list to store tree/data ref in
|
||||
* ref_head. Must clean this mess up later.
|
||||
*/
|
||||
struct btrfs_delayed_ref_node {
|
||||
/*data/tree ref use list, stored in ref_head->ref_list. */
|
||||
struct list_head list;
|
||||
struct rb_node ref_node;
|
||||
/*
|
||||
* If action is BTRFS_ADD_DELAYED_REF, also link this node to
|
||||
* ref_head->ref_add_list, then we do not need to iterate the
|
||||
@ -91,8 +81,9 @@ struct btrfs_delayed_extent_op {
|
||||
* reference count modifications we've queued up.
|
||||
*/
|
||||
struct btrfs_delayed_ref_head {
|
||||
struct btrfs_delayed_ref_node node;
|
||||
|
||||
u64 bytenr;
|
||||
u64 num_bytes;
|
||||
refcount_t refs;
|
||||
/*
|
||||
* the mutex is held while running the refs, and it is also
|
||||
* held when checking the sum of reference modifications.
|
||||
@ -100,7 +91,7 @@ struct btrfs_delayed_ref_head {
|
||||
struct mutex mutex;
|
||||
|
||||
spinlock_t lock;
|
||||
struct list_head ref_list;
|
||||
struct rb_root ref_tree;
|
||||
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
|
||||
struct list_head ref_add_list;
|
||||
|
||||
@ -115,6 +106,14 @@ struct btrfs_delayed_ref_head {
|
||||
*/
|
||||
int total_ref_mod;
|
||||
|
||||
/*
|
||||
* This is the current outstanding mod references for this bytenr. This
|
||||
* is used with lookup_extent_info to get an accurate reference count
|
||||
* for a bytenr, so it is adjusted as delayed refs are run so that any
|
||||
* on disk reference count + ref_mod is accurate.
|
||||
*/
|
||||
int ref_mod;
|
||||
|
||||
/*
|
||||
* For qgroup reserved space freeing.
|
||||
*
|
||||
@ -234,15 +233,18 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
|
||||
case BTRFS_SHARED_DATA_REF_KEY:
|
||||
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
|
||||
break;
|
||||
case 0:
|
||||
kmem_cache_free(btrfs_delayed_ref_head_cachep, ref);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
if (refcount_dec_and_test(&head->refs))
|
||||
kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
|
||||
}
|
||||
|
||||
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, u64 parent,
|
||||
@ -282,36 +284,18 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
u64 seq);
|
||||
|
||||
/*
|
||||
* a node might live in a head or a regular ref, this lets you
|
||||
* test for the proper type to use.
|
||||
*/
|
||||
static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
|
||||
{
|
||||
return node->is_head;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper functions to cast a node into its container
|
||||
*/
|
||||
static inline struct btrfs_delayed_tree_ref *
|
||||
btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
|
||||
{
|
||||
WARN_ON(btrfs_delayed_ref_is_head(node));
|
||||
return container_of(node, struct btrfs_delayed_tree_ref, node);
|
||||
}
|
||||
|
||||
static inline struct btrfs_delayed_data_ref *
|
||||
btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
|
||||
{
|
||||
WARN_ON(btrfs_delayed_ref_is_head(node));
|
||||
return container_of(node, struct btrfs_delayed_data_ref, node);
|
||||
}
|
||||
|
||||
static inline struct btrfs_delayed_ref_head *
|
||||
btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
|
||||
{
|
||||
WARN_ON(!btrfs_delayed_ref_is_head(node));
|
||||
return container_of(node, struct btrfs_delayed_ref_head, node);
|
||||
}
|
||||
#endif
|
||||
|
@ -50,6 +50,8 @@
|
||||
#include "sysfs.h"
|
||||
#include "qgroup.h"
|
||||
#include "compression.h"
|
||||
#include "tree-checker.h"
|
||||
#include "ref-verify.h"
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#include <asm/cpufeature.h>
|
||||
@ -543,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define CORRUPT(reason, eb, root, slot) \
|
||||
btrfs_crit(root->fs_info, \
|
||||
"corrupt %s, %s: block=%llu, root=%llu, slot=%d", \
|
||||
btrfs_header_level(eb) == 0 ? "leaf" : "node", \
|
||||
reason, btrfs_header_bytenr(eb), root->objectid, slot)
|
||||
|
||||
static noinline int check_leaf(struct btrfs_root *root,
|
||||
struct extent_buffer *leaf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key leaf_key;
|
||||
u32 nritems = btrfs_header_nritems(leaf);
|
||||
int slot;
|
||||
|
||||
/*
|
||||
* Extent buffers from a relocation tree have a owner field that
|
||||
* corresponds to the subvolume tree they are based on. So just from an
|
||||
* extent buffer alone we can not find out what is the id of the
|
||||
* corresponding subvolume tree, so we can not figure out if the extent
|
||||
* buffer corresponds to the root of the relocation tree or not. So skip
|
||||
* this check for relocation trees.
|
||||
*/
|
||||
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
|
||||
struct btrfs_root *check_root;
|
||||
|
||||
key.objectid = btrfs_header_owner(leaf);
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = (u64)-1;
|
||||
|
||||
check_root = btrfs_get_fs_root(fs_info, &key, false);
|
||||
/*
|
||||
* The only reason we also check NULL here is that during
|
||||
* open_ctree() some roots has not yet been set up.
|
||||
*/
|
||||
if (!IS_ERR_OR_NULL(check_root)) {
|
||||
struct extent_buffer *eb;
|
||||
|
||||
eb = btrfs_root_node(check_root);
|
||||
/* if leaf is the root, then it's fine */
|
||||
if (leaf != eb) {
|
||||
CORRUPT("non-root leaf's nritems is 0",
|
||||
leaf, check_root, 0);
|
||||
free_extent_buffer(eb);
|
||||
return -EIO;
|
||||
}
|
||||
free_extent_buffer(eb);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (nritems == 0)
|
||||
return 0;
|
||||
|
||||
/* Check the 0 item */
|
||||
if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info)) {
|
||||
CORRUPT("invalid item offset size pair", leaf, root, 0);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to make sure each items keys are in the correct order and their
|
||||
* offsets make sense. We only have to loop through nritems-1 because
|
||||
* we check the current slot against the next slot, which verifies the
|
||||
* next slot's offset+size makes sense and that the current's slot
|
||||
* offset is correct.
|
||||
*/
|
||||
for (slot = 0; slot < nritems - 1; slot++) {
|
||||
btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot + 1);
|
||||
|
||||
/* Make sure the keys are in the right order */
|
||||
if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
|
||||
CORRUPT("bad key order", leaf, root, slot);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the offset and ends are right, remember that the
|
||||
* item data starts at the end of the leaf and grows towards the
|
||||
* front.
|
||||
*/
|
||||
if (btrfs_item_offset_nr(leaf, slot) !=
|
||||
btrfs_item_end_nr(leaf, slot + 1)) {
|
||||
CORRUPT("slot offset bad", leaf, root, slot);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to make sure that we don't point outside of the leaf,
|
||||
* just in case all the items are consistent to each other, but
|
||||
* all point outside of the leaf.
|
||||
*/
|
||||
if (btrfs_item_end_nr(leaf, slot) >
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info)) {
|
||||
CORRUPT("slot end outside of leaf", leaf, root, slot);
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_node(struct btrfs_root *root, struct extent_buffer *node)
|
||||
{
|
||||
unsigned long nr = btrfs_header_nritems(node);
|
||||
struct btrfs_key key, next_key;
|
||||
int slot;
|
||||
u64 bytenr;
|
||||
int ret = 0;
|
||||
|
||||
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
|
||||
btrfs_crit(root->fs_info,
|
||||
"corrupt node: block %llu root %llu nritems %lu",
|
||||
node->start, root->objectid, nr);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
for (slot = 0; slot < nr - 1; slot++) {
|
||||
bytenr = btrfs_node_blockptr(node, slot);
|
||||
btrfs_node_key_to_cpu(node, &key, slot);
|
||||
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
|
||||
|
||||
if (!bytenr) {
|
||||
CORRUPT("invalid item slot", node, root, slot);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
|
||||
CORRUPT("bad key order", node, root, slot);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
||||
u64 phy_offset, struct page *page,
|
||||
u64 start, u64 end, int mirror)
|
||||
@ -748,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
|
||||
* that we don't try and read the other copies of this block, just
|
||||
* return -EIO.
|
||||
*/
|
||||
if (found_level == 0 && check_leaf(root, eb)) {
|
||||
if (found_level == 0 && btrfs_check_leaf(root, eb)) {
|
||||
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
|
||||
ret = -EIO;
|
||||
}
|
||||
|
||||
if (found_level > 0 && check_node(root, eb))
|
||||
if (found_level > 0 && btrfs_check_node(root, eb))
|
||||
ret = -EIO;
|
||||
|
||||
if (!ret)
|
||||
@ -879,22 +741,9 @@ static void run_one_async_start(struct btrfs_work *work)
|
||||
|
||||
static void run_one_async_done(struct btrfs_work *work)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct async_submit_bio *async;
|
||||
int limit;
|
||||
|
||||
async = container_of(work, struct async_submit_bio, work);
|
||||
fs_info = async->fs_info;
|
||||
|
||||
limit = btrfs_async_submit_limit(fs_info);
|
||||
limit = limit * 2 / 3;
|
||||
|
||||
/*
|
||||
* atomic_dec_return implies a barrier for waitqueue_active
|
||||
*/
|
||||
if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
|
||||
waitqueue_active(&fs_info->async_submit_wait))
|
||||
wake_up(&fs_info->async_submit_wait);
|
||||
|
||||
/* If an error occurred we just want to clean up the bio and move on */
|
||||
if (async->status) {
|
||||
@ -942,19 +791,10 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
|
||||
async->status = 0;
|
||||
|
||||
atomic_inc(&fs_info->nr_async_submits);
|
||||
|
||||
if (op_is_sync(bio->bi_opf))
|
||||
btrfs_set_work_high_priority(&async->work);
|
||||
|
||||
btrfs_queue_work(fs_info->workers, &async->work);
|
||||
|
||||
while (atomic_read(&fs_info->async_submit_draining) &&
|
||||
atomic_read(&fs_info->nr_async_submits)) {
|
||||
wait_event(fs_info->async_submit_wait,
|
||||
(atomic_read(&fs_info->nr_async_submits) == 0));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1005,9 +845,9 @@ static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_async_write(unsigned long bio_flags)
|
||||
static int check_async_write(struct btrfs_inode *bi)
|
||||
{
|
||||
if (bio_flags & EXTENT_BIO_TREE_LOG)
|
||||
if (atomic_read(&bi->sync_writers))
|
||||
return 0;
|
||||
#ifdef CONFIG_X86
|
||||
if (static_cpu_has(X86_FEATURE_XMM4_2))
|
||||
@ -1022,7 +862,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
|
||||
{
|
||||
struct inode *inode = private_data;
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
int async = check_async_write(bio_flags);
|
||||
int async = check_async_write(BTRFS_I(inode));
|
||||
blk_status_t ret;
|
||||
|
||||
if (bio_op(bio) != REQ_OP_WRITE) {
|
||||
@ -2607,14 +2447,6 @@ int open_ctree(struct super_block *sb,
|
||||
goto fail_delalloc_bytes;
|
||||
}
|
||||
|
||||
fs_info->btree_inode = new_inode(sb);
|
||||
if (!fs_info->btree_inode) {
|
||||
err = -ENOMEM;
|
||||
goto fail_bio_counter;
|
||||
}
|
||||
|
||||
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
|
||||
|
||||
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
|
||||
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
|
||||
INIT_LIST_HEAD(&fs_info->trans_list);
|
||||
@ -2647,17 +2479,12 @@ int open_ctree(struct super_block *sb,
|
||||
btrfs_mapping_init(&fs_info->mapping_tree);
|
||||
btrfs_init_block_rsv(&fs_info->global_block_rsv,
|
||||
BTRFS_BLOCK_RSV_GLOBAL);
|
||||
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv,
|
||||
BTRFS_BLOCK_RSV_DELALLOC);
|
||||
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
|
||||
btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
|
||||
btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
|
||||
btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
|
||||
BTRFS_BLOCK_RSV_DELOPS);
|
||||
atomic_set(&fs_info->nr_async_submits, 0);
|
||||
atomic_set(&fs_info->async_delalloc_pages, 0);
|
||||
atomic_set(&fs_info->async_submit_draining, 0);
|
||||
atomic_set(&fs_info->nr_async_bios, 0);
|
||||
atomic_set(&fs_info->defrag_running, 0);
|
||||
atomic_set(&fs_info->qgroup_op_seq, 0);
|
||||
atomic_set(&fs_info->reada_works_cnt, 0);
|
||||
@ -2673,12 +2500,21 @@ int open_ctree(struct super_block *sb,
|
||||
/* readahead state */
|
||||
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
|
||||
spin_lock_init(&fs_info->reada_lock);
|
||||
btrfs_init_ref_verify(fs_info);
|
||||
|
||||
fs_info->thread_pool_size = min_t(unsigned long,
|
||||
num_online_cpus() + 2, 8);
|
||||
|
||||
INIT_LIST_HEAD(&fs_info->ordered_roots);
|
||||
spin_lock_init(&fs_info->ordered_root_lock);
|
||||
|
||||
fs_info->btree_inode = new_inode(sb);
|
||||
if (!fs_info->btree_inode) {
|
||||
err = -ENOMEM;
|
||||
goto fail_bio_counter;
|
||||
}
|
||||
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
|
||||
|
||||
fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
|
||||
GFP_KERNEL);
|
||||
if (!fs_info->delayed_root) {
|
||||
@ -2895,12 +2731,13 @@ int open_ctree(struct super_block *sb,
|
||||
sb->s_bdi->congested_fn = btrfs_congested_fn;
|
||||
sb->s_bdi->congested_data = fs_info;
|
||||
sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
|
||||
sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
|
||||
sb->s_bdi->ra_pages = VM_MAX_READAHEAD * SZ_1K / PAGE_SIZE;
|
||||
sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
|
||||
sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
|
||||
|
||||
sb->s_blocksize = sectorsize;
|
||||
sb->s_blocksize_bits = blksize_bits(sectorsize);
|
||||
memcpy(&sb->s_uuid, fs_info->fsid, BTRFS_FSID_SIZE);
|
||||
|
||||
mutex_lock(&fs_info->chunk_mutex);
|
||||
ret = btrfs_read_sys_array(fs_info);
|
||||
@ -3083,6 +2920,9 @@ retry_root_backup:
|
||||
if (ret)
|
||||
goto fail_trans_kthread;
|
||||
|
||||
if (btrfs_build_ref_tree(fs_info))
|
||||
btrfs_err(fs_info, "couldn't build ref tree");
|
||||
|
||||
/* do not make disk changes in broken FS or nologreplay is given */
|
||||
if (btrfs_super_log_root(disk_super) != 0 &&
|
||||
!btrfs_test_opt(fs_info, NOLOGREPLAY)) {
|
||||
@ -3948,6 +3788,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
|
||||
cleanup_srcu_struct(&fs_info->subvol_srcu);
|
||||
|
||||
btrfs_free_stripe_hash_table(fs_info);
|
||||
btrfs_free_ref_cache(fs_info);
|
||||
|
||||
__btrfs_free_block_rsv(root->orphan_block_rsv);
|
||||
root->orphan_block_rsv = NULL;
|
||||
@ -4007,7 +3848,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
|
||||
buf->len,
|
||||
fs_info->dirty_metadata_batch);
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
|
||||
if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) {
|
||||
btrfs_print_leaf(buf);
|
||||
ASSERT(0);
|
||||
}
|
||||
@ -4272,26 +4113,28 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
|
||||
while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
|
||||
struct btrfs_delayed_ref_head *head;
|
||||
struct btrfs_delayed_ref_node *tmp;
|
||||
struct rb_node *n;
|
||||
bool pin_bytes = false;
|
||||
|
||||
head = rb_entry(node, struct btrfs_delayed_ref_head,
|
||||
href_node);
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
refcount_inc(&head->node.refs);
|
||||
refcount_inc(&head->refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
mutex_lock(&head->mutex);
|
||||
mutex_unlock(&head->mutex);
|
||||
btrfs_put_delayed_ref(&head->node);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
spin_lock(&delayed_refs->lock);
|
||||
continue;
|
||||
}
|
||||
spin_lock(&head->lock);
|
||||
list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
|
||||
list) {
|
||||
while ((n = rb_first(&head->ref_tree)) != NULL) {
|
||||
ref = rb_entry(n, struct btrfs_delayed_ref_node,
|
||||
ref_node);
|
||||
ref->in_tree = 0;
|
||||
list_del(&ref->list);
|
||||
rb_erase(&ref->ref_node, &head->ref_tree);
|
||||
RB_CLEAR_NODE(&ref->ref_node);
|
||||
if (!list_empty(&ref->add_list))
|
||||
list_del(&ref->add_list);
|
||||
atomic_dec(&delayed_refs->num_entries);
|
||||
@ -4304,16 +4147,16 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
if (head->processing == 0)
|
||||
delayed_refs->num_heads_ready--;
|
||||
atomic_dec(&delayed_refs->num_entries);
|
||||
head->node.in_tree = 0;
|
||||
rb_erase(&head->href_node, &delayed_refs->href_root);
|
||||
RB_CLEAR_NODE(&head->href_node);
|
||||
spin_unlock(&head->lock);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
mutex_unlock(&head->mutex);
|
||||
|
||||
if (pin_bytes)
|
||||
btrfs_pin_extent(fs_info, head->node.bytenr,
|
||||
head->node.num_bytes, 1);
|
||||
btrfs_put_delayed_ref(&head->node);
|
||||
btrfs_pin_extent(fs_info, head->bytenr,
|
||||
head->num_bytes, 1);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
cond_resched();
|
||||
spin_lock(&delayed_refs->lock);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -110,7 +110,6 @@ struct extent_page_data {
|
||||
struct bio *bio;
|
||||
struct extent_io_tree *tree;
|
||||
get_extent_t *get_extent;
|
||||
unsigned long bio_flags;
|
||||
|
||||
/* tells writepage not to lock the state bits for this range
|
||||
* it still does the unlocking
|
||||
@ -2762,8 +2761,8 @@ static int merge_bio(struct extent_io_tree *tree, struct page *page,
|
||||
*/
|
||||
static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
|
||||
struct writeback_control *wbc,
|
||||
struct page *page, sector_t sector,
|
||||
size_t size, unsigned long offset,
|
||||
struct page *page, u64 offset,
|
||||
size_t size, unsigned long pg_offset,
|
||||
struct block_device *bdev,
|
||||
struct bio **bio_ret,
|
||||
bio_end_io_t end_io_func,
|
||||
@ -2777,6 +2776,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
|
||||
int contig = 0;
|
||||
int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
|
||||
size_t page_size = min_t(size_t, size, PAGE_SIZE);
|
||||
sector_t sector = offset >> 9;
|
||||
|
||||
if (bio_ret && *bio_ret) {
|
||||
bio = *bio_ret;
|
||||
@ -2787,8 +2787,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
|
||||
|
||||
if (prev_bio_flags != bio_flags || !contig ||
|
||||
force_bio_submit ||
|
||||
merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
|
||||
bio_add_page(bio, page, page_size, offset) < page_size) {
|
||||
merge_bio(tree, page, pg_offset, page_size, bio, bio_flags) ||
|
||||
bio_add_page(bio, page, page_size, pg_offset) < page_size) {
|
||||
ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
|
||||
if (ret < 0) {
|
||||
*bio_ret = NULL;
|
||||
@ -2802,8 +2802,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
|
||||
}
|
||||
}
|
||||
|
||||
bio = btrfs_bio_alloc(bdev, (u64)sector << 9);
|
||||
bio_add_page(bio, page, page_size, offset);
|
||||
bio = btrfs_bio_alloc(bdev, offset);
|
||||
bio_add_page(bio, page, page_size, pg_offset);
|
||||
bio->bi_end_io = end_io_func;
|
||||
bio->bi_private = tree;
|
||||
bio->bi_write_hint = page->mapping->host->i_write_hint;
|
||||
@ -2893,7 +2893,6 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
u64 last_byte = i_size_read(inode);
|
||||
u64 block_start;
|
||||
u64 cur_end;
|
||||
sector_t sector;
|
||||
struct extent_map *em;
|
||||
struct block_device *bdev;
|
||||
int ret = 0;
|
||||
@ -2929,6 +2928,7 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
}
|
||||
while (cur <= end) {
|
||||
bool force_bio_submit = false;
|
||||
u64 offset;
|
||||
|
||||
if (cur >= last_byte) {
|
||||
char *userpage;
|
||||
@ -2968,9 +2968,9 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
iosize = ALIGN(iosize, blocksize);
|
||||
if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
|
||||
disk_io_size = em->block_len;
|
||||
sector = em->block_start >> 9;
|
||||
offset = em->block_start;
|
||||
} else {
|
||||
sector = (em->block_start + extent_offset) >> 9;
|
||||
offset = em->block_start + extent_offset;
|
||||
disk_io_size = iosize;
|
||||
}
|
||||
bdev = em->bdev;
|
||||
@ -3063,8 +3063,8 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
}
|
||||
|
||||
ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
|
||||
page, sector, disk_io_size, pg_offset,
|
||||
bdev, bio,
|
||||
page, offset, disk_io_size,
|
||||
pg_offset, bdev, bio,
|
||||
end_bio_extent_readpage, mirror_num,
|
||||
*bio_flags,
|
||||
this_bio_flag,
|
||||
@ -3325,7 +3325,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
u64 extent_offset;
|
||||
u64 block_start;
|
||||
u64 iosize;
|
||||
sector_t sector;
|
||||
struct extent_map *em;
|
||||
struct block_device *bdev;
|
||||
size_t pg_offset = 0;
|
||||
@ -3368,6 +3367,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
|
||||
while (cur <= end) {
|
||||
u64 em_end;
|
||||
u64 offset;
|
||||
|
||||
if (cur >= i_size) {
|
||||
if (tree->ops && tree->ops->writepage_end_io_hook)
|
||||
@ -3389,7 +3389,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
BUG_ON(end < cur);
|
||||
iosize = min(em_end - cur, end - cur + 1);
|
||||
iosize = ALIGN(iosize, blocksize);
|
||||
sector = (em->block_start + extent_offset) >> 9;
|
||||
offset = em->block_start + extent_offset;
|
||||
bdev = em->bdev;
|
||||
block_start = em->block_start;
|
||||
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
|
||||
@ -3432,7 +3432,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
}
|
||||
|
||||
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
|
||||
page, sector, iosize, pg_offset,
|
||||
page, offset, iosize, pg_offset,
|
||||
bdev, &epd->bio,
|
||||
end_bio_extent_writepage,
|
||||
0, 0, 0, false);
|
||||
@ -3716,7 +3716,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
|
||||
u64 offset = eb->start;
|
||||
u32 nritems;
|
||||
unsigned long i, num_pages;
|
||||
unsigned long bio_flags = 0;
|
||||
unsigned long start, end;
|
||||
unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
|
||||
int ret = 0;
|
||||
@ -3724,8 +3723,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
|
||||
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
|
||||
num_pages = num_extent_pages(eb->start, eb->len);
|
||||
atomic_set(&eb->io_pages, num_pages);
|
||||
if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
|
||||
bio_flags = EXTENT_BIO_TREE_LOG;
|
||||
|
||||
/* set btree blocks beyond nritems with 0 to avoid stale content. */
|
||||
nritems = btrfs_header_nritems(eb);
|
||||
@ -3749,11 +3746,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
|
||||
clear_page_dirty_for_io(p);
|
||||
set_page_writeback(p);
|
||||
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
|
||||
p, offset >> 9, PAGE_SIZE, 0, bdev,
|
||||
p, offset, PAGE_SIZE, 0, bdev,
|
||||
&epd->bio,
|
||||
end_bio_extent_buffer_writepage,
|
||||
0, epd->bio_flags, bio_flags, false);
|
||||
epd->bio_flags = bio_flags;
|
||||
0, 0, 0, false);
|
||||
if (ret) {
|
||||
set_btree_ioerr(p);
|
||||
if (PageWriteback(p))
|
||||
@ -3790,7 +3786,6 @@ int btree_write_cache_pages(struct address_space *mapping,
|
||||
.tree = tree,
|
||||
.extent_locked = 0,
|
||||
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
|
||||
.bio_flags = 0,
|
||||
};
|
||||
int ret = 0;
|
||||
int done = 0;
|
||||
@ -4063,7 +4058,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
|
||||
if (epd->bio) {
|
||||
int ret;
|
||||
|
||||
ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
|
||||
ret = submit_one_bio(epd->bio, 0, 0);
|
||||
BUG_ON(ret < 0); /* -ENOMEM */
|
||||
epd->bio = NULL;
|
||||
}
|
||||
@ -4086,7 +4081,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
|
||||
.get_extent = get_extent,
|
||||
.extent_locked = 0,
|
||||
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
|
||||
.bio_flags = 0,
|
||||
};
|
||||
|
||||
ret = __extent_writepage(page, wbc, &epd);
|
||||
@ -4111,7 +4105,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
|
||||
.get_extent = get_extent,
|
||||
.extent_locked = 1,
|
||||
.sync_io = mode == WB_SYNC_ALL,
|
||||
.bio_flags = 0,
|
||||
};
|
||||
struct writeback_control wbc_writepages = {
|
||||
.sync_mode = mode,
|
||||
@ -4151,7 +4144,6 @@ int extent_writepages(struct extent_io_tree *tree,
|
||||
.get_extent = get_extent,
|
||||
.extent_locked = 0,
|
||||
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
|
||||
.bio_flags = 0,
|
||||
};
|
||||
|
||||
ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd,
|
||||
|
@ -34,7 +34,6 @@
|
||||
* type for this bio
|
||||
*/
|
||||
#define EXTENT_BIO_COMPRESSED 1
|
||||
#define EXTENT_BIO_TREE_LOG 2
|
||||
#define EXTENT_BIO_FLAG_SHIFT 16
|
||||
|
||||
/* these are bit numbers for test/set bit */
|
||||
|
@ -856,7 +856,7 @@ next_slot:
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
|
||||
if (update_refs && disk_bytenr > 0) {
|
||||
ret = btrfs_inc_extent_ref(trans, fs_info,
|
||||
ret = btrfs_inc_extent_ref(trans, root,
|
||||
disk_bytenr, num_bytes, 0,
|
||||
root->root_key.objectid,
|
||||
new_key.objectid,
|
||||
@ -940,7 +940,7 @@ delete_extent_item:
|
||||
extent_end = ALIGN(extent_end,
|
||||
fs_info->sectorsize);
|
||||
} else if (update_refs && disk_bytenr > 0) {
|
||||
ret = btrfs_free_extent(trans, fs_info,
|
||||
ret = btrfs_free_extent(trans, root,
|
||||
disk_bytenr, num_bytes, 0,
|
||||
root->root_key.objectid,
|
||||
key.objectid, key.offset -
|
||||
@ -1234,7 +1234,7 @@ again:
|
||||
extent_end - split);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
|
||||
ret = btrfs_inc_extent_ref(trans, fs_info, bytenr, num_bytes,
|
||||
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
|
||||
0, root->root_key.objectid,
|
||||
ino, orig_offset);
|
||||
if (ret) {
|
||||
@ -1268,7 +1268,7 @@ again:
|
||||
extent_end = other_end;
|
||||
del_slot = path->slots[0] + 1;
|
||||
del_nr++;
|
||||
ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
|
||||
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
|
||||
0, root->root_key.objectid,
|
||||
ino, orig_offset);
|
||||
if (ret) {
|
||||
@ -1288,7 +1288,7 @@ again:
|
||||
key.offset = other_start;
|
||||
del_slot = path->slots[0];
|
||||
del_nr++;
|
||||
ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
|
||||
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
|
||||
0, root->root_key.objectid,
|
||||
ino, orig_offset);
|
||||
if (ret) {
|
||||
@ -1590,7 +1590,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
|
||||
int ret = 0;
|
||||
bool only_release_metadata = false;
|
||||
bool force_page_uptodate = false;
|
||||
bool need_unlock;
|
||||
|
||||
nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
|
||||
PAGE_SIZE / (sizeof(struct page *)));
|
||||
@ -1613,6 +1612,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
|
||||
size_t copied;
|
||||
size_t dirty_sectors;
|
||||
size_t num_sectors;
|
||||
int extents_locked;
|
||||
|
||||
WARN_ON(num_pages > nrptrs);
|
||||
|
||||
@ -1656,6 +1656,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON(reserve_bytes == 0);
|
||||
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
|
||||
reserve_bytes);
|
||||
if (ret) {
|
||||
@ -1669,7 +1670,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
|
||||
}
|
||||
|
||||
release_bytes = reserve_bytes;
|
||||
need_unlock = false;
|
||||
again:
|
||||
/*
|
||||
* This is going to setup the pages array with the number of
|
||||
@ -1679,19 +1679,23 @@ again:
|
||||
ret = prepare_pages(inode, pages, num_pages,
|
||||
pos, write_bytes,
|
||||
force_page_uptodate);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode),
|
||||
reserve_bytes);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages,
|
||||
extents_locked = lock_and_cleanup_extent_if_need(
|
||||
BTRFS_I(inode), pages,
|
||||
num_pages, pos, write_bytes, &lockstart,
|
||||
&lockend, &cached_state);
|
||||
if (ret < 0) {
|
||||
if (ret == -EAGAIN)
|
||||
if (extents_locked < 0) {
|
||||
if (extents_locked == -EAGAIN)
|
||||
goto again;
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode),
|
||||
reserve_bytes);
|
||||
ret = extents_locked;
|
||||
break;
|
||||
} else if (ret > 0) {
|
||||
need_unlock = true;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
|
||||
@ -1718,23 +1722,10 @@ again:
|
||||
PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we had a short copy we need to release the excess delaloc
|
||||
* bytes we reserved. We need to increment outstanding_extents
|
||||
* because btrfs_delalloc_release_space and
|
||||
* btrfs_delalloc_release_metadata will decrement it, but
|
||||
* we still have an outstanding extent for the chunk we actually
|
||||
* managed to copy.
|
||||
*/
|
||||
if (num_sectors > dirty_sectors) {
|
||||
/* release everything except the sectors we dirtied */
|
||||
release_bytes -= dirty_sectors <<
|
||||
fs_info->sb->s_blocksize_bits;
|
||||
if (copied > 0) {
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
if (only_release_metadata) {
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode),
|
||||
release_bytes);
|
||||
@ -1756,10 +1747,11 @@ again:
|
||||
if (copied > 0)
|
||||
ret = btrfs_dirty_pages(inode, pages, dirty_pages,
|
||||
pos, copied, NULL);
|
||||
if (need_unlock)
|
||||
if (extents_locked)
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
|
||||
lockstart, lockend, &cached_state,
|
||||
GFP_NOFS);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
|
||||
if (ret) {
|
||||
btrfs_drop_pages(pages, num_pages);
|
||||
break;
|
||||
@ -2046,7 +2038,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_log_ctx ctx;
|
||||
int ret = 0, err;
|
||||
bool full_sync = 0;
|
||||
bool full_sync = false;
|
||||
u64 len;
|
||||
|
||||
/*
|
||||
|
@ -1286,12 +1286,8 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_block_group_cache *block_group,
|
||||
struct btrfs_path *path)
|
||||
{
|
||||
u64 start, end;
|
||||
int ret;
|
||||
|
||||
start = block_group->key.objectid;
|
||||
end = block_group->key.objectid + block_group->key.offset;
|
||||
|
||||
block_group->needs_free_space = 0;
|
||||
|
||||
ret = add_new_free_space_info(trans, fs_info, block_group, path);
|
||||
|
@ -500,11 +500,12 @@ again:
|
||||
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
|
||||
prealloc, prealloc, &alloc_hint);
|
||||
if (ret) {
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
|
||||
out_put:
|
||||
iput(inode);
|
||||
out_release:
|
||||
|
327
fs/btrfs/inode.c
327
fs/btrfs/inode.c
@ -42,6 +42,7 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/magic.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
@ -67,7 +68,6 @@ struct btrfs_iget_args {
|
||||
};
|
||||
|
||||
struct btrfs_dio_data {
|
||||
u64 outstanding_extents;
|
||||
u64 reserve;
|
||||
u64 unsubmitted_oe_range_start;
|
||||
u64 unsubmitted_oe_range_end;
|
||||
@ -316,7 +316,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
|
||||
btrfs_free_path(path);
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
trans->block_rsv = &fs_info->delalloc_block_rsv;
|
||||
trans->block_rsv = &BTRFS_I(inode)->block_rsv;
|
||||
|
||||
if (compressed_size && compressed_pages)
|
||||
extent_item_size = btrfs_file_extent_calc_inline_size(
|
||||
@ -348,7 +348,6 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
|
||||
}
|
||||
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start);
|
||||
btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
|
||||
out:
|
||||
/*
|
||||
@ -458,7 +457,6 @@ static noinline void compress_file_range(struct inode *inode,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
u64 num_bytes;
|
||||
u64 blocksize = fs_info->sectorsize;
|
||||
u64 actual_end;
|
||||
u64 isize = i_size_read(inode);
|
||||
@ -508,8 +506,6 @@ again:
|
||||
|
||||
total_compressed = min_t(unsigned long, total_compressed,
|
||||
BTRFS_MAX_UNCOMPRESSED);
|
||||
num_bytes = ALIGN(end - start + 1, blocksize);
|
||||
num_bytes = max(blocksize, num_bytes);
|
||||
total_in = 0;
|
||||
ret = 0;
|
||||
|
||||
@ -542,7 +538,10 @@ again:
|
||||
*/
|
||||
extent_range_clear_dirty_for_io(inode, start, end);
|
||||
redirty = 1;
|
||||
ret = btrfs_compress_pages(compress_type,
|
||||
|
||||
/* Compression level is applied here and only here */
|
||||
ret = btrfs_compress_pages(
|
||||
compress_type | (fs_info->compress_level << 4),
|
||||
inode->i_mapping, start,
|
||||
pages,
|
||||
&nr_pages,
|
||||
@ -570,7 +569,7 @@ again:
|
||||
cont:
|
||||
if (start == 0) {
|
||||
/* lets try to make an inline extent */
|
||||
if (ret || total_in < (actual_end - start)) {
|
||||
if (ret || total_in < actual_end) {
|
||||
/* we didn't compress the entire range, try
|
||||
* to make an uncompressed inline extent.
|
||||
*/
|
||||
@ -584,16 +583,21 @@ cont:
|
||||
}
|
||||
if (ret <= 0) {
|
||||
unsigned long clear_flags = EXTENT_DELALLOC |
|
||||
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG;
|
||||
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
|
||||
EXTENT_DO_ACCOUNTING;
|
||||
unsigned long page_error_op;
|
||||
|
||||
clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
|
||||
page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
|
||||
|
||||
/*
|
||||
* inline extent creation worked or returned error,
|
||||
* we don't need to create any more async work items.
|
||||
* Unlock and free up our temp pages.
|
||||
*
|
||||
* We use DO_ACCOUNTING here because we need the
|
||||
* delalloc_release_metadata to be done _after_ we drop
|
||||
* our outstanding extent for clearing delalloc for this
|
||||
* range.
|
||||
*/
|
||||
extent_clear_unlock_delalloc(inode, start, end, end,
|
||||
NULL, clear_flags,
|
||||
@ -602,10 +606,6 @@ cont:
|
||||
PAGE_SET_WRITEBACK |
|
||||
page_error_op |
|
||||
PAGE_END_WRITEBACK);
|
||||
if (ret == 0)
|
||||
btrfs_free_reserved_data_space_noquota(inode,
|
||||
start,
|
||||
end - start + 1);
|
||||
goto free_pages_out;
|
||||
}
|
||||
}
|
||||
@ -625,7 +625,6 @@ cont:
|
||||
*/
|
||||
total_in = ALIGN(total_in, PAGE_SIZE);
|
||||
if (total_compressed + blocksize <= total_in) {
|
||||
num_bytes = total_in;
|
||||
*num_added += 1;
|
||||
|
||||
/*
|
||||
@ -633,12 +632,12 @@ cont:
|
||||
* allocation on disk for these compressed pages, and
|
||||
* will submit them to the elevator.
|
||||
*/
|
||||
add_async_extent(async_cow, start, num_bytes,
|
||||
add_async_extent(async_cow, start, total_in,
|
||||
total_compressed, pages, nr_pages,
|
||||
compress_type);
|
||||
|
||||
if (start + num_bytes < end) {
|
||||
start += num_bytes;
|
||||
if (start + total_in < end) {
|
||||
start += total_in;
|
||||
pages = NULL;
|
||||
cond_resched();
|
||||
goto again;
|
||||
@ -982,15 +981,19 @@ static noinline int cow_file_range(struct inode *inode,
|
||||
ret = cow_file_range_inline(root, inode, start, end, 0,
|
||||
BTRFS_COMPRESS_NONE, NULL);
|
||||
if (ret == 0) {
|
||||
/*
|
||||
* We use DO_ACCOUNTING here because we need the
|
||||
* delalloc_release_metadata to be run _after_ we drop
|
||||
* our outstanding extent for clearing delalloc for this
|
||||
* range.
|
||||
*/
|
||||
extent_clear_unlock_delalloc(inode, start, end,
|
||||
delalloc_end, NULL,
|
||||
EXTENT_LOCKED | EXTENT_DELALLOC |
|
||||
EXTENT_DELALLOC_NEW |
|
||||
EXTENT_DEFRAG, PAGE_UNLOCK |
|
||||
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
|
||||
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
|
||||
PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
|
||||
PAGE_END_WRITEBACK);
|
||||
btrfs_free_reserved_data_space_noquota(inode, start,
|
||||
end - start + 1);
|
||||
*nr_written = *nr_written +
|
||||
(end - start + PAGE_SIZE) / PAGE_SIZE;
|
||||
*page_started = 1;
|
||||
@ -1226,13 +1229,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
|
||||
|
||||
btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
|
||||
|
||||
while (atomic_read(&fs_info->async_submit_draining) &&
|
||||
atomic_read(&fs_info->async_delalloc_pages)) {
|
||||
wait_event(fs_info->async_submit_wait,
|
||||
(atomic_read(&fs_info->async_delalloc_pages) ==
|
||||
0));
|
||||
}
|
||||
|
||||
*nr_written += nr_pages;
|
||||
start = cur_end + 1;
|
||||
}
|
||||
@ -1635,7 +1631,7 @@ static void btrfs_split_extent_hook(void *private_data,
|
||||
}
|
||||
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
|
||||
@ -1665,7 +1661,7 @@ static void btrfs_merge_extent_hook(void *private_data,
|
||||
/* we're not bigger than the max, unreserve the space and go */
|
||||
if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->outstanding_extents--;
|
||||
btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
return;
|
||||
}
|
||||
@ -1696,7 +1692,7 @@ static void btrfs_merge_extent_hook(void *private_data,
|
||||
return;
|
||||
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->outstanding_extents--;
|
||||
btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
|
||||
@ -1766,15 +1762,12 @@ static void btrfs_set_bit_hook(void *private_data,
|
||||
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
u64 len = state->end + 1 - state->start;
|
||||
u32 num_extents = count_max_extents(len);
|
||||
bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
|
||||
|
||||
if (*bits & EXTENT_FIRST_DELALLOC) {
|
||||
*bits &= ~EXTENT_FIRST_DELALLOC;
|
||||
} else {
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
|
||||
/* For sanity tests */
|
||||
if (btrfs_is_testing(fs_info))
|
||||
@ -1828,13 +1821,9 @@ static void btrfs_clear_bit_hook(void *private_data,
|
||||
struct btrfs_root *root = inode->root;
|
||||
bool do_list = !btrfs_is_free_space_inode(inode);
|
||||
|
||||
if (*bits & EXTENT_FIRST_DELALLOC) {
|
||||
*bits &= ~EXTENT_FIRST_DELALLOC;
|
||||
} else if (!(*bits & EXTENT_CLEAR_META_RESV)) {
|
||||
spin_lock(&inode->lock);
|
||||
inode->outstanding_extents -= num_extents;
|
||||
spin_unlock(&inode->lock);
|
||||
}
|
||||
spin_lock(&inode->lock);
|
||||
btrfs_mod_outstanding_extents(inode, -num_extents);
|
||||
spin_unlock(&inode->lock);
|
||||
|
||||
/*
|
||||
* We don't reserve metadata space for space cache inodes so we
|
||||
@ -2105,6 +2094,7 @@ again:
|
||||
0);
|
||||
ClearPageChecked(page);
|
||||
set_page_dirty(page);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
|
||||
out:
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
|
||||
&cached_state, GFP_NOFS);
|
||||
@ -2229,8 +2219,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
qg_released = ret;
|
||||
ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid,
|
||||
btrfs_ino(BTRFS_I(inode)), file_pos, qg_released, &ins);
|
||||
ret = btrfs_alloc_reserved_file_extent(trans, root,
|
||||
btrfs_ino(BTRFS_I(inode)),
|
||||
file_pos, qg_released, &ins);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
|
||||
@ -2464,7 +2455,7 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
|
||||
ret = iterate_inodes_from_logical(old->bytenr +
|
||||
old->extent_offset, fs_info,
|
||||
path, record_one_backref,
|
||||
old);
|
||||
old, false);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
return false;
|
||||
|
||||
@ -2682,7 +2673,7 @@ again:
|
||||
inode_add_bytes(inode, len);
|
||||
btrfs_release_path(path);
|
||||
|
||||
ret = btrfs_inc_extent_ref(trans, fs_info, new->bytenr,
|
||||
ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
|
||||
new->disk_len, 0,
|
||||
backref->root_id, backref->inum,
|
||||
new->file_pos); /* start - extent_offset */
|
||||
@ -2964,7 +2955,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
trans = NULL;
|
||||
goto out;
|
||||
}
|
||||
trans->block_rsv = &fs_info->delalloc_block_rsv;
|
||||
trans->block_rsv = &BTRFS_I(inode)->block_rsv;
|
||||
ret = btrfs_update_inode_fallback(trans, root, inode);
|
||||
if (ret) /* -ENOMEM or corruption */
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@ -3000,7 +2991,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
goto out;
|
||||
}
|
||||
|
||||
trans->block_rsv = &fs_info->delalloc_block_rsv;
|
||||
trans->block_rsv = &BTRFS_I(inode)->block_rsv;
|
||||
|
||||
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
|
||||
compress_type = ordered_extent->compress_type;
|
||||
@ -3058,9 +3049,6 @@ out:
|
||||
0, &cached_state, GFP_NOFS);
|
||||
}
|
||||
|
||||
if (root != fs_info->tree_root)
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode),
|
||||
ordered_extent->len);
|
||||
if (trans)
|
||||
btrfs_end_transaction(trans);
|
||||
|
||||
@ -4372,47 +4360,11 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
|
||||
|
||||
}
|
||||
|
||||
static int truncate_inline_extent(struct inode *inode,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_key *found_key,
|
||||
const u64 item_end,
|
||||
const u64 new_size)
|
||||
{
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
int slot = path->slots[0];
|
||||
struct btrfs_file_extent_item *fi;
|
||||
u32 size = (u32)(new_size - found_key->offset);
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
|
||||
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
||||
|
||||
if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
|
||||
loff_t offset = new_size;
|
||||
loff_t page_end = ALIGN(offset, PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* Zero out the remaining of the last page of our inline extent,
|
||||
* instead of directly truncating our inline extent here - that
|
||||
* would be much more complex (decompressing all the data, then
|
||||
* compressing the truncated data, which might be bigger than
|
||||
* the size of the inline extent, resize the extent, etc).
|
||||
* We release the path because to get the page we might need to
|
||||
* read the extent item from disk (data not in the page cache).
|
||||
*/
|
||||
btrfs_release_path(path);
|
||||
return btrfs_truncate_block(inode, offset, page_end - offset,
|
||||
0);
|
||||
}
|
||||
|
||||
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
|
||||
size = btrfs_file_extent_calc_inline_size(size);
|
||||
btrfs_truncate_item(root->fs_info, path, size, 1);
|
||||
|
||||
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
|
||||
inode_sub_bytes(inode, item_end + 1 - new_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Return this if we need to call truncate_block for the last bit of the
|
||||
* truncate.
|
||||
*/
|
||||
#define NEED_TRUNCATE_BLOCK 1
|
||||
|
||||
/*
|
||||
* this can truncate away extent items, csum items and directory items.
|
||||
@ -4451,9 +4403,9 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
|
||||
int err = 0;
|
||||
u64 ino = btrfs_ino(BTRFS_I(inode));
|
||||
u64 bytes_deleted = 0;
|
||||
bool be_nice = 0;
|
||||
bool should_throttle = 0;
|
||||
bool should_end = 0;
|
||||
bool be_nice = false;
|
||||
bool should_throttle = false;
|
||||
bool should_end = false;
|
||||
|
||||
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
|
||||
|
||||
@ -4463,7 +4415,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
|
||||
test_bit(BTRFS_ROOT_REF_COWS, &root->state))
|
||||
be_nice = 1;
|
||||
be_nice = true;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
@ -4573,11 +4525,6 @@ search_again:
|
||||
if (found_type != BTRFS_EXTENT_DATA_KEY)
|
||||
goto delete;
|
||||
|
||||
if (del_item)
|
||||
last_size = found_key.offset;
|
||||
else
|
||||
last_size = new_size;
|
||||
|
||||
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
|
||||
u64 num_dec;
|
||||
extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
|
||||
@ -4619,40 +4566,30 @@ search_again:
|
||||
*/
|
||||
if (!del_item &&
|
||||
btrfs_file_extent_encryption(leaf, fi) == 0 &&
|
||||
btrfs_file_extent_other_encoding(leaf, fi) == 0) {
|
||||
btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
|
||||
btrfs_file_extent_compression(leaf, fi) == 0) {
|
||||
u32 size = (u32)(new_size - found_key.offset);
|
||||
|
||||
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
|
||||
size = btrfs_file_extent_calc_inline_size(size);
|
||||
btrfs_truncate_item(root->fs_info, path, size, 1);
|
||||
} else if (!del_item) {
|
||||
/*
|
||||
* Need to release path in order to truncate a
|
||||
* compressed extent. So delete any accumulated
|
||||
* extent items so far.
|
||||
* We have to bail so the last_size is set to
|
||||
* just before this extent.
|
||||
*/
|
||||
if (btrfs_file_extent_compression(leaf, fi) !=
|
||||
BTRFS_COMPRESS_NONE && pending_del_nr) {
|
||||
err = btrfs_del_items(trans, root, path,
|
||||
pending_del_slot,
|
||||
pending_del_nr);
|
||||
if (err) {
|
||||
btrfs_abort_transaction(trans,
|
||||
err);
|
||||
goto error;
|
||||
}
|
||||
pending_del_nr = 0;
|
||||
}
|
||||
|
||||
err = truncate_inline_extent(inode, path,
|
||||
&found_key,
|
||||
item_end,
|
||||
new_size);
|
||||
if (err) {
|
||||
btrfs_abort_transaction(trans, err);
|
||||
goto error;
|
||||
}
|
||||
} else if (test_bit(BTRFS_ROOT_REF_COWS,
|
||||
&root->state)) {
|
||||
inode_sub_bytes(inode, item_end + 1 - new_size);
|
||||
err = NEED_TRUNCATE_BLOCK;
|
||||
break;
|
||||
}
|
||||
|
||||
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
|
||||
inode_sub_bytes(inode, item_end + 1 - new_size);
|
||||
}
|
||||
delete:
|
||||
if (del_item)
|
||||
last_size = found_key.offset;
|
||||
else
|
||||
last_size = new_size;
|
||||
if (del_item) {
|
||||
if (!pending_del_nr) {
|
||||
/* no pending yet, add ourselves */
|
||||
@ -4669,14 +4606,14 @@ delete:
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
should_throttle = 0;
|
||||
should_throttle = false;
|
||||
|
||||
if (found_extent &&
|
||||
(test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
|
||||
root == fs_info->tree_root)) {
|
||||
btrfs_set_path_blocking(path);
|
||||
bytes_deleted += extent_num_bytes;
|
||||
ret = btrfs_free_extent(trans, fs_info, extent_start,
|
||||
ret = btrfs_free_extent(trans, root, extent_start,
|
||||
extent_num_bytes, 0,
|
||||
btrfs_header_owner(leaf),
|
||||
ino, extent_offset);
|
||||
@ -4688,11 +4625,11 @@ delete:
|
||||
if (be_nice) {
|
||||
if (truncate_space_check(trans, root,
|
||||
extent_num_bytes)) {
|
||||
should_end = 1;
|
||||
should_end = true;
|
||||
}
|
||||
if (btrfs_should_throttle_delayed_refs(trans,
|
||||
fs_info))
|
||||
should_throttle = 1;
|
||||
should_throttle = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4801,8 +4738,11 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
|
||||
(!len || ((len & (blocksize - 1)) == 0)))
|
||||
goto out;
|
||||
|
||||
block_start = round_down(from, blocksize);
|
||||
block_end = block_start + blocksize - 1;
|
||||
|
||||
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
|
||||
round_down(from, blocksize), blocksize);
|
||||
block_start, blocksize);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -4810,15 +4750,12 @@ again:
|
||||
page = find_or_create_page(mapping, index, mask);
|
||||
if (!page) {
|
||||
btrfs_delalloc_release_space(inode, data_reserved,
|
||||
round_down(from, blocksize),
|
||||
blocksize);
|
||||
block_start, blocksize);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
block_start = round_down(from, blocksize);
|
||||
block_end = block_start + blocksize - 1;
|
||||
|
||||
if (!PageUptodate(page)) {
|
||||
ret = btrfs_readpage(NULL, page);
|
||||
lock_page(page);
|
||||
@ -4883,6 +4820,7 @@ out_unlock:
|
||||
if (ret)
|
||||
btrfs_delalloc_release_space(inode, data_reserved, block_start,
|
||||
blocksize);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
out:
|
||||
@ -7797,33 +7735,6 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
|
||||
return em;
|
||||
}
|
||||
|
||||
static void adjust_dio_outstanding_extents(struct inode *inode,
|
||||
struct btrfs_dio_data *dio_data,
|
||||
const u64 len)
|
||||
{
|
||||
unsigned num_extents = count_max_extents(len);
|
||||
|
||||
/*
|
||||
* If we have an outstanding_extents count still set then we're
|
||||
* within our reservation, otherwise we need to adjust our inode
|
||||
* counter appropriately.
|
||||
*/
|
||||
if (dio_data->outstanding_extents >= num_extents) {
|
||||
dio_data->outstanding_extents -= num_extents;
|
||||
} else {
|
||||
/*
|
||||
* If dio write length has been split due to no large enough
|
||||
* contiguous space, we need to compensate our inode counter
|
||||
* appropriately.
|
||||
*/
|
||||
u64 num_needed = num_extents - dio_data->outstanding_extents;
|
||||
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->outstanding_extents += num_needed;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
{
|
||||
@ -7985,7 +7896,6 @@ unlock:
|
||||
if (!dio_data->overwrite && start + len > i_size_read(inode))
|
||||
i_size_write(inode, start + len);
|
||||
|
||||
adjust_dio_outstanding_extents(inode, dio_data, len);
|
||||
WARN_ON(dio_data->reserve < len);
|
||||
dio_data->reserve -= len;
|
||||
dio_data->unsubmitted_oe_range_end = start + len;
|
||||
@ -8015,14 +7925,6 @@ unlock_err:
|
||||
err:
|
||||
if (dio_data)
|
||||
current->journal_info = dio_data;
|
||||
/*
|
||||
* Compensate the delalloc release we do in btrfs_direct_IO() when we
|
||||
* write less data then expected, so that we don't underflow our inode's
|
||||
* outstanding extents counter.
|
||||
*/
|
||||
if (create && dio_data)
|
||||
adjust_dio_outstanding_extents(inode, dio_data, len);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -8495,7 +8397,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
|
||||
if (dip->errors) {
|
||||
bio_io_error(dip->orig_bio);
|
||||
} else {
|
||||
dip->dio_bio->bi_status = 0;
|
||||
dip->dio_bio->bi_status = BLK_STS_OK;
|
||||
bio_endio(dip->orig_bio);
|
||||
}
|
||||
out:
|
||||
@ -8577,7 +8479,7 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
|
||||
goto err;
|
||||
}
|
||||
map:
|
||||
ret = btrfs_map_bio(fs_info, bio, 0, async_submit);
|
||||
ret = btrfs_map_bio(fs_info, bio, 0, 0);
|
||||
err:
|
||||
bio_put(bio);
|
||||
return ret;
|
||||
@ -8786,7 +8688,6 @@ free_ordered:
|
||||
}
|
||||
|
||||
static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
|
||||
struct kiocb *iocb,
|
||||
const struct iov_iter *iter, loff_t offset)
|
||||
{
|
||||
int seg;
|
||||
@ -8833,7 +8734,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||
bool relock = false;
|
||||
ssize_t ret;
|
||||
|
||||
if (check_direct_IO(fs_info, iocb, iter, offset))
|
||||
if (check_direct_IO(fs_info, iter, offset))
|
||||
return 0;
|
||||
|
||||
inode_dio_begin(inode);
|
||||
@ -8868,7 +8769,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||
offset, count);
|
||||
if (ret)
|
||||
goto out;
|
||||
dio_data.outstanding_extents = count_max_extents(count);
|
||||
|
||||
/*
|
||||
* We need to know how many extents we reserved so that we can
|
||||
@ -8915,6 +8815,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||
} else if (ret >= 0 && (size_t)ret < count)
|
||||
btrfs_delalloc_release_space(inode, data_reserved,
|
||||
offset, count - (size_t)ret);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), count);
|
||||
}
|
||||
out:
|
||||
if (wakeup)
|
||||
@ -9232,9 +9133,6 @@ again:
|
||||
fs_info->sectorsize);
|
||||
if (reserved_space < PAGE_SIZE) {
|
||||
end = page_start + reserved_space - 1;
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
btrfs_delalloc_release_space(inode, data_reserved,
|
||||
page_start, PAGE_SIZE - reserved_space);
|
||||
}
|
||||
@ -9286,12 +9184,14 @@ again:
|
||||
|
||||
out_unlock:
|
||||
if (!ret) {
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
extent_changeset_free(data_reserved);
|
||||
return VM_FAULT_LOCKED;
|
||||
}
|
||||
unlock_page(page);
|
||||
out:
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
|
||||
btrfs_delalloc_release_space(inode, data_reserved, page_start,
|
||||
reserved_space);
|
||||
out_noreserve:
|
||||
@ -9387,12 +9287,12 @@ static int btrfs_truncate(struct inode *inode)
|
||||
ret = btrfs_truncate_inode_items(trans, root, inode,
|
||||
inode->i_size,
|
||||
BTRFS_EXTENT_DATA_KEY);
|
||||
trans->block_rsv = &fs_info->trans_block_rsv;
|
||||
if (ret != -ENOSPC && ret != -EAGAIN) {
|
||||
err = ret;
|
||||
break;
|
||||
}
|
||||
|
||||
trans->block_rsv = &fs_info->trans_block_rsv;
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
if (ret) {
|
||||
err = ret;
|
||||
@ -9416,6 +9316,27 @@ static int btrfs_truncate(struct inode *inode)
|
||||
trans->block_rsv = rsv;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can't call btrfs_truncate_block inside a trans handle as we could
|
||||
* deadlock with freeze, if we got NEED_TRUNCATE_BLOCK then we know
|
||||
* we've truncated everything except the last little bit, and can do
|
||||
* btrfs_truncate_block and then update the disk_i_size.
|
||||
*/
|
||||
if (ret == NEED_TRUNCATE_BLOCK) {
|
||||
btrfs_end_transaction(trans);
|
||||
btrfs_btree_balance_dirty(fs_info);
|
||||
|
||||
ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
goto out;
|
||||
}
|
||||
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
|
||||
}
|
||||
|
||||
if (ret == 0 && inode->i_nlink > 0) {
|
||||
trans->block_rsv = root->orphan_block_rsv;
|
||||
ret = btrfs_orphan_del(trans, BTRFS_I(inode));
|
||||
@ -9480,6 +9401,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
|
||||
|
||||
struct inode *btrfs_alloc_inode(struct super_block *sb)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
||||
struct btrfs_inode *ei;
|
||||
struct inode *inode;
|
||||
|
||||
@ -9506,8 +9428,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
|
||||
|
||||
spin_lock_init(&ei->lock);
|
||||
ei->outstanding_extents = 0;
|
||||
ei->reserved_extents = 0;
|
||||
|
||||
if (sb->s_magic != BTRFS_TEST_MAGIC)
|
||||
btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
|
||||
BTRFS_BLOCK_RSV_DELALLOC);
|
||||
ei->runtime_flags = 0;
|
||||
ei->prop_compress = BTRFS_COMPRESS_NONE;
|
||||
ei->defrag_compress = BTRFS_COMPRESS_NONE;
|
||||
@ -9557,8 +9480,9 @@ void btrfs_destroy_inode(struct inode *inode)
|
||||
|
||||
WARN_ON(!hlist_empty(&inode->i_dentry));
|
||||
WARN_ON(inode->i_data.nrpages);
|
||||
WARN_ON(BTRFS_I(inode)->block_rsv.reserved);
|
||||
WARN_ON(BTRFS_I(inode)->block_rsv.size);
|
||||
WARN_ON(BTRFS_I(inode)->outstanding_extents);
|
||||
WARN_ON(BTRFS_I(inode)->reserved_extents);
|
||||
WARN_ON(BTRFS_I(inode)->delalloc_bytes);
|
||||
WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
|
||||
WARN_ON(BTRFS_I(inode)->csum_bytes);
|
||||
@ -10337,19 +10261,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
|
||||
ret = __start_delalloc_inodes(root, delay_iput, -1);
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
/*
|
||||
* the filemap_flush will queue IO into the worker threads, but
|
||||
* we have to make sure the IO is actually started and that
|
||||
* ordered extents get created before we return
|
||||
*/
|
||||
atomic_inc(&fs_info->async_submit_draining);
|
||||
while (atomic_read(&fs_info->nr_async_submits) ||
|
||||
atomic_read(&fs_info->async_delalloc_pages)) {
|
||||
wait_event(fs_info->async_submit_wait,
|
||||
(atomic_read(&fs_info->nr_async_submits) == 0 &&
|
||||
atomic_read(&fs_info->async_delalloc_pages) == 0));
|
||||
}
|
||||
atomic_dec(&fs_info->async_submit_draining);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -10391,14 +10302,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
|
||||
spin_unlock(&fs_info->delalloc_root_lock);
|
||||
|
||||
ret = 0;
|
||||
atomic_inc(&fs_info->async_submit_draining);
|
||||
while (atomic_read(&fs_info->nr_async_submits) ||
|
||||
atomic_read(&fs_info->async_delalloc_pages)) {
|
||||
wait_event(fs_info->async_submit_wait,
|
||||
(atomic_read(&fs_info->nr_async_submits) == 0 &&
|
||||
atomic_read(&fs_info->async_delalloc_pages) == 0));
|
||||
}
|
||||
atomic_dec(&fs_info->async_submit_draining);
|
||||
out:
|
||||
if (!list_empty_careful(&splice)) {
|
||||
spin_lock(&fs_info->delalloc_root_lock);
|
||||
|
156
fs/btrfs/ioctl.c
156
fs/btrfs/ioctl.c
@ -86,6 +86,19 @@ struct btrfs_ioctl_received_subvol_args_32 {
|
||||
struct btrfs_ioctl_received_subvol_args_32)
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
struct btrfs_ioctl_send_args_32 {
|
||||
__s64 send_fd; /* in */
|
||||
__u64 clone_sources_count; /* in */
|
||||
compat_uptr_t clone_sources; /* in */
|
||||
__u64 parent_root; /* in */
|
||||
__u64 flags; /* in */
|
||||
__u64 reserved[4]; /* in */
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
|
||||
struct btrfs_ioctl_send_args_32)
|
||||
#endif
|
||||
|
||||
static int btrfs_clone(struct inode *src, struct inode *inode,
|
||||
u64 off, u64 olen, u64 olen_aligned, u64 destoff,
|
||||
@ -609,23 +622,6 @@ fail_free:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btrfs_wait_for_no_snapshotting_writes(struct btrfs_root *root)
|
||||
{
|
||||
s64 writers;
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
do {
|
||||
prepare_to_wait(&root->subv_writers->wait, &wait,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
writers = percpu_counter_sum(&root->subv_writers->counter);
|
||||
if (writers)
|
||||
schedule();
|
||||
|
||||
finish_wait(&root->subv_writers->wait, &wait);
|
||||
} while (writers);
|
||||
}
|
||||
|
||||
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
|
||||
struct dentry *dentry,
|
||||
u64 *async_transid, bool readonly,
|
||||
@ -654,7 +650,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
|
||||
|
||||
atomic_inc(&root->will_be_snapshotted);
|
||||
smp_mb__after_atomic();
|
||||
btrfs_wait_for_no_snapshotting_writes(root);
|
||||
/* wait for no snapshot writes */
|
||||
wait_event(root->subv_writers->wait,
|
||||
percpu_counter_sum(&root->subv_writers->counter) == 0);
|
||||
|
||||
ret = btrfs_start_delalloc_inodes(root, 0);
|
||||
if (ret)
|
||||
@ -1219,6 +1217,7 @@ again:
|
||||
unlock_page(pages[i]);
|
||||
put_page(pages[i]);
|
||||
}
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
|
||||
extent_changeset_free(data_reserved);
|
||||
return i_done;
|
||||
out:
|
||||
@ -1229,6 +1228,7 @@ out:
|
||||
btrfs_delalloc_release_space(inode, data_reserved,
|
||||
start_index << PAGE_SHIFT,
|
||||
page_cnt << PAGE_SHIFT);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
|
||||
extent_changeset_free(data_reserved);
|
||||
return ret;
|
||||
|
||||
@ -1420,21 +1420,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
|
||||
filemap_flush(inode->i_mapping);
|
||||
}
|
||||
|
||||
if (do_compress) {
|
||||
/* the filemap_flush will queue IO into the worker threads, but
|
||||
* we have to make sure the IO is actually started and that
|
||||
* ordered extents get created before we return
|
||||
*/
|
||||
atomic_inc(&fs_info->async_submit_draining);
|
||||
while (atomic_read(&fs_info->nr_async_submits) ||
|
||||
atomic_read(&fs_info->async_delalloc_pages)) {
|
||||
wait_event(fs_info->async_submit_wait,
|
||||
(atomic_read(&fs_info->nr_async_submits) == 0 &&
|
||||
atomic_read(&fs_info->async_delalloc_pages) == 0));
|
||||
}
|
||||
atomic_dec(&fs_info->async_submit_draining);
|
||||
}
|
||||
|
||||
if (range->compress_type == BTRFS_COMPRESS_LZO) {
|
||||
btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
|
||||
} else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
|
||||
@ -1842,8 +1827,13 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
|
||||
|
||||
ret = btrfs_update_root(trans, fs_info->tree_root,
|
||||
&root->root_key, &root->root_item);
|
||||
if (ret < 0) {
|
||||
btrfs_end_transaction(trans);
|
||||
goto out_reset;
|
||||
}
|
||||
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
|
||||
btrfs_commit_transaction(trans);
|
||||
out_reset:
|
||||
if (ret)
|
||||
btrfs_set_root_flags(&root->root_item, root_flags);
|
||||
@ -2179,7 +2169,7 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
|
||||
|
||||
inode = file_inode(file);
|
||||
ret = search_ioctl(inode, &args.key, &buf_size,
|
||||
(char *)(&uarg->buf[0]));
|
||||
(char __user *)(&uarg->buf[0]));
|
||||
if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
|
||||
ret = -EFAULT;
|
||||
else if (ret == -EOVERFLOW &&
|
||||
@ -3706,7 +3696,7 @@ process_slot:
|
||||
if (disko) {
|
||||
inode_add_bytes(inode, datal);
|
||||
ret = btrfs_inc_extent_ref(trans,
|
||||
fs_info,
|
||||
root,
|
||||
disko, diskl, 0,
|
||||
root->root_key.objectid,
|
||||
btrfs_ino(BTRFS_I(inode)),
|
||||
@ -4129,10 +4119,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_space_info *dest_orig;
|
||||
struct btrfs_ioctl_space_info __user *user_dest;
|
||||
struct btrfs_space_info *info;
|
||||
u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
|
||||
BTRFS_BLOCK_GROUP_SYSTEM,
|
||||
BTRFS_BLOCK_GROUP_METADATA,
|
||||
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
|
||||
static const u64 types[] = {
|
||||
BTRFS_BLOCK_GROUP_DATA,
|
||||
BTRFS_BLOCK_GROUP_SYSTEM,
|
||||
BTRFS_BLOCK_GROUP_METADATA,
|
||||
BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA
|
||||
};
|
||||
int num_types = 4;
|
||||
int alloc_size;
|
||||
int ret = 0;
|
||||
@ -4504,8 +4496,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
|
||||
ipath->fspath->val[i] = rel_ptr;
|
||||
}
|
||||
|
||||
ret = copy_to_user((void *)(unsigned long)ipa->fspath,
|
||||
(void *)(unsigned long)ipath->fspath, size);
|
||||
ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
|
||||
ipath->fspath, size);
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
@ -4540,13 +4532,14 @@ static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
|
||||
void __user *arg)
|
||||
void __user *arg, int version)
|
||||
{
|
||||
int ret = 0;
|
||||
int size;
|
||||
struct btrfs_ioctl_logical_ino_args *loi;
|
||||
struct btrfs_data_container *inodes = NULL;
|
||||
struct btrfs_path *path = NULL;
|
||||
bool ignore_offset;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
@ -4555,13 +4548,30 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
|
||||
if (IS_ERR(loi))
|
||||
return PTR_ERR(loi);
|
||||
|
||||
if (version == 1) {
|
||||
ignore_offset = false;
|
||||
size = min_t(u32, loi->size, SZ_64K);
|
||||
} else {
|
||||
/* All reserved bits must be 0 for now */
|
||||
if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) {
|
||||
ret = -EINVAL;
|
||||
goto out_loi;
|
||||
}
|
||||
/* Only accept flags we have defined so far */
|
||||
if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) {
|
||||
ret = -EINVAL;
|
||||
goto out_loi;
|
||||
}
|
||||
ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
|
||||
size = min_t(u32, loi->size, SZ_16M);
|
||||
}
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
size = min_t(u32, loi->size, SZ_64K);
|
||||
inodes = init_data_container(size);
|
||||
if (IS_ERR(inodes)) {
|
||||
ret = PTR_ERR(inodes);
|
||||
@ -4570,20 +4580,21 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
|
||||
build_ino_list, inodes);
|
||||
build_ino_list, inodes, ignore_offset);
|
||||
if (ret == -EINVAL)
|
||||
ret = -ENOENT;
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = copy_to_user((void *)(unsigned long)loi->inodes,
|
||||
(void *)(unsigned long)inodes, size);
|
||||
ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes,
|
||||
size);
|
||||
if (ret)
|
||||
ret = -EFAULT;
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
kvfree(inodes);
|
||||
out_loi:
|
||||
kfree(loi);
|
||||
|
||||
return ret;
|
||||
@ -5160,15 +5171,11 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
|
||||
root->root_key.objectid);
|
||||
if (ret < 0 && ret != -EEXIST) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
btrfs_end_transaction(trans);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret < 0) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
up_write(&fs_info->subvol_sem);
|
||||
mnt_drop_write_file(file);
|
||||
@ -5490,6 +5497,41 @@ out_drop_write:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
|
||||
{
|
||||
struct btrfs_ioctl_send_args *arg;
|
||||
int ret;
|
||||
|
||||
if (compat) {
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
struct btrfs_ioctl_send_args_32 args32;
|
||||
|
||||
ret = copy_from_user(&args32, argp, sizeof(args32));
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
arg = kzalloc(sizeof(*arg), GFP_KERNEL);
|
||||
if (!arg)
|
||||
return -ENOMEM;
|
||||
arg->send_fd = args32.send_fd;
|
||||
arg->clone_sources_count = args32.clone_sources_count;
|
||||
arg->clone_sources = compat_ptr(args32.clone_sources);
|
||||
arg->parent_root = args32.parent_root;
|
||||
arg->flags = args32.flags;
|
||||
memcpy(arg->reserved, args32.reserved,
|
||||
sizeof(args32.reserved));
|
||||
#else
|
||||
return -ENOTTY;
|
||||
#endif
|
||||
} else {
|
||||
arg = memdup_user(argp, sizeof(*arg));
|
||||
if (IS_ERR(arg))
|
||||
return PTR_ERR(arg);
|
||||
}
|
||||
ret = btrfs_ioctl_send(file, arg);
|
||||
kfree(arg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long btrfs_ioctl(struct file *file, unsigned int
|
||||
cmd, unsigned long arg)
|
||||
{
|
||||
@ -5554,7 +5596,9 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
case BTRFS_IOC_INO_PATHS:
|
||||
return btrfs_ioctl_ino_to_path(root, argp);
|
||||
case BTRFS_IOC_LOGICAL_INO:
|
||||
return btrfs_ioctl_logical_to_ino(fs_info, argp);
|
||||
return btrfs_ioctl_logical_to_ino(fs_info, argp, 1);
|
||||
case BTRFS_IOC_LOGICAL_INO_V2:
|
||||
return btrfs_ioctl_logical_to_ino(fs_info, argp, 2);
|
||||
case BTRFS_IOC_SPACE_INFO:
|
||||
return btrfs_ioctl_space_info(fs_info, argp);
|
||||
case BTRFS_IOC_SYNC: {
|
||||
@ -5595,7 +5639,11 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
return btrfs_ioctl_set_received_subvol_32(file, argp);
|
||||
#endif
|
||||
case BTRFS_IOC_SEND:
|
||||
return btrfs_ioctl_send(file, argp);
|
||||
return _btrfs_ioctl_send(file, argp, false);
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
case BTRFS_IOC_SEND_32:
|
||||
return _btrfs_ioctl_send(file, argp, true);
|
||||
#endif
|
||||
case BTRFS_IOC_GET_DEV_STATS:
|
||||
return btrfs_ioctl_get_dev_stats(fs_info, argp);
|
||||
case BTRFS_IOC_QUOTA_CTL:
|
||||
|
@ -430,10 +430,15 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void lzo_set_level(struct list_head *ws, unsigned int type)
|
||||
{
|
||||
}
|
||||
|
||||
const struct btrfs_compress_op btrfs_lzo_compress = {
|
||||
.alloc_workspace = lzo_alloc_workspace,
|
||||
.free_workspace = lzo_free_workspace,
|
||||
.compress_pages = lzo_compress_pages,
|
||||
.decompress_bio = lzo_decompress_bio,
|
||||
.decompress = lzo_decompress,
|
||||
.set_level = lzo_set_level,
|
||||
};
|
||||
|
@ -242,6 +242,15 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
}
|
||||
spin_unlock(&root->ordered_extent_lock);
|
||||
|
||||
/*
|
||||
* We don't need the count_max_extents here, we can assume that all of
|
||||
* that work has been done at higher layers, so this is truly the
|
||||
* smallest the extent is going to get.
|
||||
*/
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -591,11 +600,19 @@ void btrfs_remove_ordered_extent(struct inode *inode,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_ordered_inode_tree *tree;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
|
||||
struct btrfs_root *root = btrfs_inode->root;
|
||||
struct rb_node *node;
|
||||
bool dec_pending_ordered = false;
|
||||
|
||||
tree = &BTRFS_I(inode)->ordered_tree;
|
||||
/* This is paired with btrfs_add_ordered_extent. */
|
||||
spin_lock(&btrfs_inode->lock);
|
||||
btrfs_mod_outstanding_extents(btrfs_inode, -1);
|
||||
spin_unlock(&btrfs_inode->lock);
|
||||
if (root != fs_info->tree_root)
|
||||
btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
|
||||
|
||||
tree = &btrfs_inode->ordered_tree;
|
||||
spin_lock_irq(&tree->lock);
|
||||
node = &entry->rb_node;
|
||||
rb_erase(node, &tree->tree);
|
||||
|
@ -1441,7 +1441,7 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr = qrecord->bytenr;
|
||||
int ret;
|
||||
|
||||
ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
|
||||
ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@ -2031,7 +2031,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
|
||||
/* Search commit root to find old_roots */
|
||||
ret = btrfs_find_all_roots(NULL, fs_info,
|
||||
record->bytenr, 0,
|
||||
&record->old_roots);
|
||||
&record->old_roots, false);
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
}
|
||||
@ -2042,7 +2042,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
|
||||
* root. It's safe inside commit_transaction().
|
||||
*/
|
||||
ret = btrfs_find_all_roots(trans, fs_info,
|
||||
record->bytenr, SEQ_LAST, &new_roots);
|
||||
record->bytenr, SEQ_LAST, &new_roots, false);
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
if (qgroup_to_skip) {
|
||||
@ -2570,7 +2570,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
|
||||
num_bytes = found.offset;
|
||||
|
||||
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
|
||||
&roots);
|
||||
&roots, false);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
/* For rescan, just pass old_roots as NULL */
|
||||
|
@ -1326,6 +1326,9 @@ write_data:
|
||||
|
||||
cleanup:
|
||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||
|
||||
while ((bio = bio_list_pop(&bio_list)))
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1582,6 +1585,10 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
|
||||
|
||||
cleanup:
|
||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||
|
||||
while ((bio = bio_list_pop(&bio_list)))
|
||||
bio_put(bio);
|
||||
|
||||
return -EIO;
|
||||
|
||||
finish:
|
||||
@ -2107,6 +2114,10 @@ cleanup:
|
||||
if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
|
||||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
|
||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||
|
||||
while ((bio = bio_list_pop(&bio_list)))
|
||||
bio_put(bio);
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@ -2231,12 +2242,18 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
ASSERT(!bio->bi_iter.bi_size);
|
||||
rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
|
||||
|
||||
for (i = 0; i < rbio->real_stripes; i++) {
|
||||
/*
|
||||
* After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted
|
||||
* to the end position, so this search can start from the first parity
|
||||
* stripe.
|
||||
*/
|
||||
for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
|
||||
if (bbio->stripes[i].dev == scrub_dev) {
|
||||
rbio->scrubp = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT(i < rbio->real_stripes);
|
||||
|
||||
/* Now we just support the sectorsize equals to page size */
|
||||
ASSERT(fs_info->sectorsize == PAGE_SIZE);
|
||||
@ -2454,6 +2471,9 @@ submit_write:
|
||||
|
||||
cleanup:
|
||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||
|
||||
while ((bio = bio_list_pop(&bio_list)))
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
|
||||
@ -2563,12 +2583,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
|
||||
int stripe;
|
||||
struct bio *bio;
|
||||
|
||||
bio_list_init(&bio_list);
|
||||
|
||||
ret = alloc_rbio_essential_pages(rbio);
|
||||
if (ret)
|
||||
goto cleanup;
|
||||
|
||||
bio_list_init(&bio_list);
|
||||
|
||||
atomic_set(&rbio->error, 0);
|
||||
/*
|
||||
* build a list of bios to read all the missing parts of this
|
||||
@ -2636,6 +2656,10 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
|
||||
|
||||
cleanup:
|
||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||
|
||||
while ((bio = bio_list_pop(&bio_list)))
|
||||
bio_put(bio);
|
||||
|
||||
return;
|
||||
|
||||
finish:
|
||||
|
1031
fs/btrfs/ref-verify.c
Normal file
1031
fs/btrfs/ref-verify.c
Normal file
File diff suppressed because it is too large
Load Diff
62
fs/btrfs/ref-verify.h
Normal file
62
fs/btrfs/ref-verify.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (C) 2014 Facebook. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 021110-1307, USA.
|
||||
*/
|
||||
#ifndef __REF_VERIFY__
|
||||
#define __REF_VERIFY__
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
|
||||
u64 parent, u64 ref_root, u64 owner, u64 offset,
|
||||
int action);
|
||||
void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
|
||||
u64 len);
|
||||
|
||||
static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
spin_lock_init(&fs_info->ref_verify_lock);
|
||||
fs_info->block_tree = RB_ROOT;
|
||||
}
|
||||
#else
|
||||
static inline int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr,
|
||||
u64 num_bytes, u64 parent, u64 ref_root,
|
||||
u64 owner, u64 offset, int action)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_BTRFS_FS_REF_VERIFY */
|
||||
#endif /* _REF_VERIFY__ */
|
@ -1742,7 +1742,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
||||
dirty = 1;
|
||||
|
||||
key.offset -= btrfs_file_extent_offset(leaf, fi);
|
||||
ret = btrfs_inc_extent_ref(trans, fs_info, new_bytenr,
|
||||
ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
|
||||
num_bytes, parent,
|
||||
btrfs_header_owner(leaf),
|
||||
key.objectid, key.offset);
|
||||
@ -1751,7 +1751,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
||||
break;
|
||||
}
|
||||
|
||||
ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
|
||||
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
|
||||
parent, btrfs_header_owner(leaf),
|
||||
key.objectid, key.offset);
|
||||
if (ret) {
|
||||
@ -1952,21 +1952,21 @@ again:
|
||||
path->slots[level], old_ptr_gen);
|
||||
btrfs_mark_buffer_dirty(path->nodes[level]);
|
||||
|
||||
ret = btrfs_inc_extent_ref(trans, fs_info, old_bytenr,
|
||||
ret = btrfs_inc_extent_ref(trans, src, old_bytenr,
|
||||
blocksize, path->nodes[level]->start,
|
||||
src->root_key.objectid, level - 1, 0);
|
||||
BUG_ON(ret);
|
||||
ret = btrfs_inc_extent_ref(trans, fs_info, new_bytenr,
|
||||
ret = btrfs_inc_extent_ref(trans, dest, new_bytenr,
|
||||
blocksize, 0, dest->root_key.objectid,
|
||||
level - 1, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_free_extent(trans, fs_info, new_bytenr, blocksize,
|
||||
ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
|
||||
path->nodes[level]->start,
|
||||
src->root_key.objectid, level - 1, 0);
|
||||
BUG_ON(ret);
|
||||
|
||||
ret = btrfs_free_extent(trans, fs_info, old_bytenr, blocksize,
|
||||
ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
|
||||
0, dest->root_key.objectid, level - 1,
|
||||
0);
|
||||
BUG_ON(ret);
|
||||
@ -2808,7 +2808,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
||||
trans->transid);
|
||||
btrfs_mark_buffer_dirty(upper->eb);
|
||||
|
||||
ret = btrfs_inc_extent_ref(trans, root->fs_info,
|
||||
ret = btrfs_inc_extent_ref(trans, root,
|
||||
node->eb->start, blocksize,
|
||||
upper->eb->start,
|
||||
btrfs_header_owner(upper->eb),
|
||||
@ -3246,6 +3246,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
|
||||
put_page(page);
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode),
|
||||
PAGE_SIZE);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode),
|
||||
PAGE_SIZE);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
@ -3275,6 +3277,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
|
||||
put_page(page);
|
||||
|
||||
index++;
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
|
||||
balance_dirty_pages_ratelimited(inode->i_mapping);
|
||||
btrfs_throttle(fs_info);
|
||||
}
|
||||
|
@ -226,10 +226,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
|
||||
struct btrfs_root *root;
|
||||
int err = 0;
|
||||
int ret;
|
||||
bool can_recover = true;
|
||||
|
||||
if (sb_rdonly(fs_info->sb))
|
||||
can_recover = false;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
|
@ -231,7 +231,7 @@ struct scrub_warning {
|
||||
struct btrfs_path *path;
|
||||
u64 extent_item_size;
|
||||
const char *errstr;
|
||||
sector_t sector;
|
||||
u64 physical;
|
||||
u64 logical;
|
||||
struct btrfs_device *dev;
|
||||
};
|
||||
@ -797,10 +797,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
|
||||
*/
|
||||
for (i = 0; i < ipath->fspath->elem_cnt; ++i)
|
||||
btrfs_warn_in_rcu(fs_info,
|
||||
"%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
|
||||
"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
|
||||
swarn->errstr, swarn->logical,
|
||||
rcu_str_deref(swarn->dev->name),
|
||||
(unsigned long long)swarn->sector,
|
||||
swarn->physical,
|
||||
root, inum, offset,
|
||||
min(isize - offset, (u64)PAGE_SIZE), nlink,
|
||||
(char *)(unsigned long)ipath->fspath->val[i]);
|
||||
@ -810,10 +810,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
|
||||
|
||||
err:
|
||||
btrfs_warn_in_rcu(fs_info,
|
||||
"%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
|
||||
"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
|
||||
swarn->errstr, swarn->logical,
|
||||
rcu_str_deref(swarn->dev->name),
|
||||
(unsigned long long)swarn->sector,
|
||||
swarn->physical,
|
||||
root, inum, offset, ret);
|
||||
|
||||
free_ipath(ipath);
|
||||
@ -845,7 +845,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
|
||||
if (!path)
|
||||
return;
|
||||
|
||||
swarn.sector = (sblock->pagev[0]->physical) >> 9;
|
||||
swarn.physical = sblock->pagev[0]->physical;
|
||||
swarn.logical = sblock->pagev[0]->logical;
|
||||
swarn.errstr = errstr;
|
||||
swarn.dev = NULL;
|
||||
@ -868,10 +868,10 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
|
||||
item_size, &ref_root,
|
||||
&ref_level);
|
||||
btrfs_warn_in_rcu(fs_info,
|
||||
"%s at logical %llu on dev %s, sector %llu: metadata %s (level %d) in tree %llu",
|
||||
"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
|
||||
errstr, swarn.logical,
|
||||
rcu_str_deref(dev->name),
|
||||
(unsigned long long)swarn.sector,
|
||||
swarn.physical,
|
||||
ref_level ? "node" : "leaf",
|
||||
ret < 0 ? -1 : ref_level,
|
||||
ret < 0 ? -1 : ref_root);
|
||||
@ -883,7 +883,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
|
||||
swarn.dev = dev;
|
||||
iterate_extent_inodes(fs_info, found_key.objectid,
|
||||
extent_item_pos, 1,
|
||||
scrub_print_warning_inode, &swarn);
|
||||
scrub_print_warning_inode, &swarn, false);
|
||||
}
|
||||
|
||||
out:
|
||||
@ -1047,7 +1047,7 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work)
|
||||
* can be found.
|
||||
*/
|
||||
ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
|
||||
scrub_fixup_readpage, fixup);
|
||||
scrub_fixup_readpage, fixup, false);
|
||||
if (ret < 0) {
|
||||
uncorrectable = 1;
|
||||
goto out;
|
||||
@ -4390,7 +4390,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
|
||||
}
|
||||
|
||||
ret = iterate_inodes_from_logical(logical, fs_info, path,
|
||||
record_inode_for_nocow, nocow_ctx);
|
||||
record_inode_for_nocow, nocow_ctx, false);
|
||||
if (ret != 0 && ret != -ENOENT) {
|
||||
btrfs_warn(fs_info,
|
||||
"iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/compat.h>
|
||||
|
||||
#include "send.h"
|
||||
#include "backref.h"
|
||||
@ -992,7 +993,6 @@ typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
|
||||
* path must point to the dir item when called.
|
||||
*/
|
||||
static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
|
||||
struct btrfs_key *found_key,
|
||||
iterate_dir_item_t iterate, void *ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
@ -1271,12 +1271,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
|
||||
*/
|
||||
if (ino >= bctx->cur_objectid)
|
||||
return 0;
|
||||
#if 0
|
||||
if (ino > bctx->cur_objectid)
|
||||
return 0;
|
||||
if (offset + bctx->extent_len > bctx->cur_offset)
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
bctx->found++;
|
||||
@ -1429,7 +1423,7 @@ static int find_extent_clone(struct send_ctx *sctx,
|
||||
extent_item_pos = 0;
|
||||
ret = iterate_extent_inodes(fs_info, found_key.objectid,
|
||||
extent_item_pos, 1, __iterate_backrefs,
|
||||
backref_ctx);
|
||||
backref_ctx, false);
|
||||
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@ -4106,8 +4100,8 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int record_ref(struct btrfs_root *root, int num, u64 dir, int index,
|
||||
struct fs_path *name, void *ctx, struct list_head *refs)
|
||||
static int record_ref(struct btrfs_root *root, u64 dir, struct fs_path *name,
|
||||
void *ctx, struct list_head *refs)
|
||||
{
|
||||
int ret = 0;
|
||||
struct send_ctx *sctx = ctx;
|
||||
@ -4143,8 +4137,7 @@ static int __record_new_ref(int num, u64 dir, int index,
|
||||
void *ctx)
|
||||
{
|
||||
struct send_ctx *sctx = ctx;
|
||||
return record_ref(sctx->send_root, num, dir, index, name,
|
||||
ctx, &sctx->new_refs);
|
||||
return record_ref(sctx->send_root, dir, name, ctx, &sctx->new_refs);
|
||||
}
|
||||
|
||||
|
||||
@ -4153,8 +4146,8 @@ static int __record_deleted_ref(int num, u64 dir, int index,
|
||||
void *ctx)
|
||||
{
|
||||
struct send_ctx *sctx = ctx;
|
||||
return record_ref(sctx->parent_root, num, dir, index, name,
|
||||
ctx, &sctx->deleted_refs);
|
||||
return record_ref(sctx->parent_root, dir, name, ctx,
|
||||
&sctx->deleted_refs);
|
||||
}
|
||||
|
||||
static int record_new_ref(struct send_ctx *sctx)
|
||||
@ -4498,7 +4491,7 @@ static int process_new_xattr(struct send_ctx *sctx)
|
||||
int ret = 0;
|
||||
|
||||
ret = iterate_dir_item(sctx->send_root, sctx->left_path,
|
||||
sctx->cmp_key, __process_new_xattr, sctx);
|
||||
__process_new_xattr, sctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -4506,7 +4499,7 @@ static int process_new_xattr(struct send_ctx *sctx)
|
||||
static int process_deleted_xattr(struct send_ctx *sctx)
|
||||
{
|
||||
return iterate_dir_item(sctx->parent_root, sctx->right_path,
|
||||
sctx->cmp_key, __process_deleted_xattr, sctx);
|
||||
__process_deleted_xattr, sctx);
|
||||
}
|
||||
|
||||
struct find_xattr_ctx {
|
||||
@ -4551,7 +4544,7 @@ static int find_xattr(struct btrfs_root *root,
|
||||
ctx.found_data = NULL;
|
||||
ctx.found_data_len = 0;
|
||||
|
||||
ret = iterate_dir_item(root, path, key, __find_xattr, &ctx);
|
||||
ret = iterate_dir_item(root, path, __find_xattr, &ctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@ -4621,11 +4614,11 @@ static int process_changed_xattr(struct send_ctx *sctx)
|
||||
int ret = 0;
|
||||
|
||||
ret = iterate_dir_item(sctx->send_root, sctx->left_path,
|
||||
sctx->cmp_key, __process_changed_new_xattr, sctx);
|
||||
__process_changed_new_xattr, sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
|
||||
sctx->cmp_key, __process_changed_deleted_xattr, sctx);
|
||||
__process_changed_deleted_xattr, sctx);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
@ -4675,8 +4668,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = iterate_dir_item(root, path, &found_key,
|
||||
__process_new_xattr, sctx);
|
||||
ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@ -4723,16 +4715,27 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
|
||||
/* initial readahead */
|
||||
memset(&sctx->ra, 0, sizeof(struct file_ra_state));
|
||||
file_ra_state_init(&sctx->ra, inode->i_mapping);
|
||||
page_cache_sync_readahead(inode->i_mapping, &sctx->ra, NULL, index,
|
||||
last_index - index + 1);
|
||||
|
||||
while (index <= last_index) {
|
||||
unsigned cur_len = min_t(unsigned, len,
|
||||
PAGE_SIZE - pg_offset);
|
||||
page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
|
||||
|
||||
page = find_lock_page(inode->i_mapping, index);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
page_cache_sync_readahead(inode->i_mapping, &sctx->ra,
|
||||
NULL, index, last_index + 1 - index);
|
||||
|
||||
page = find_or_create_page(inode->i_mapping, index,
|
||||
GFP_KERNEL);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (PageReadahead(page)) {
|
||||
page_cache_async_readahead(inode->i_mapping, &sctx->ra,
|
||||
NULL, page, index, last_index + 1 - index);
|
||||
}
|
||||
|
||||
if (!PageUptodate(page)) {
|
||||
@ -6162,9 +6165,7 @@ out:
|
||||
* Updates compare related fields in sctx and simply forwards to the actual
|
||||
* changed_xxx functions.
|
||||
*/
|
||||
static int changed_cb(struct btrfs_root *left_root,
|
||||
struct btrfs_root *right_root,
|
||||
struct btrfs_path *left_path,
|
||||
static int changed_cb(struct btrfs_path *left_path,
|
||||
struct btrfs_path *right_path,
|
||||
struct btrfs_key *key,
|
||||
enum btrfs_compare_tree_result result,
|
||||
@ -6246,8 +6247,8 @@ static int full_send_tree(struct send_ctx *sctx)
|
||||
slot = path->slots[0];
|
||||
btrfs_item_key_to_cpu(eb, &found_key, slot);
|
||||
|
||||
ret = changed_cb(send_root, NULL, path, NULL,
|
||||
&found_key, BTRFS_COMPARE_TREE_NEW, sctx);
|
||||
ret = changed_cb(path, NULL, &found_key,
|
||||
BTRFS_COMPARE_TREE_NEW, sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@ -6365,13 +6366,12 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
|
||||
spin_unlock(&root->root_item_lock);
|
||||
}
|
||||
|
||||
long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
|
||||
long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
|
||||
struct btrfs_fs_info *fs_info = send_root->fs_info;
|
||||
struct btrfs_root *clone_root;
|
||||
struct btrfs_ioctl_send_args *arg = NULL;
|
||||
struct btrfs_key key;
|
||||
struct send_ctx *sctx = NULL;
|
||||
u32 i;
|
||||
@ -6407,13 +6407,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
|
||||
goto out;
|
||||
}
|
||||
|
||||
arg = memdup_user(arg_, sizeof(*arg));
|
||||
if (IS_ERR(arg)) {
|
||||
ret = PTR_ERR(arg);
|
||||
arg = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that we don't overflow at later allocations, we request
|
||||
* clone_sources_count + 1 items, and compare to unsigned long inside
|
||||
@ -6654,7 +6647,6 @@ out:
|
||||
if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
|
||||
btrfs_root_dec_send_in_progress(sctx->parent_root);
|
||||
|
||||
kfree(arg);
|
||||
kvfree(clone_sources_tmp);
|
||||
|
||||
if (sctx) {
|
||||
|
@ -130,5 +130,5 @@ enum {
|
||||
#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
|
||||
|
||||
#ifdef __KERNEL__
|
||||
long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
|
||||
long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg);
|
||||
#endif
|
||||
|
@ -202,7 +202,6 @@ static struct ratelimit_state printk_limits[] = {
|
||||
|
||||
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
{
|
||||
struct super_block *sb = fs_info->sb;
|
||||
char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
@ -228,7 +227,8 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
vaf.va = &args;
|
||||
|
||||
if (__ratelimit(ratelimit))
|
||||
printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
|
||||
printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
|
||||
fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
|
||||
|
||||
va_end(args);
|
||||
}
|
||||
@ -292,7 +292,7 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
|
||||
vaf.va = &args;
|
||||
|
||||
errstr = btrfs_decode_error(errno);
|
||||
if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
|
||||
if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR)))
|
||||
panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
|
||||
s_id, function, line, &vaf, errno, errstr);
|
||||
|
||||
@ -325,6 +325,9 @@ enum {
|
||||
Opt_nologreplay, Opt_norecovery,
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
|
||||
#endif
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
Opt_ref_verify,
|
||||
#endif
|
||||
Opt_err,
|
||||
};
|
||||
@ -386,6 +389,9 @@ static const match_table_t tokens = {
|
||||
{Opt_fragment_data, "fragment=data"},
|
||||
{Opt_fragment_metadata, "fragment=metadata"},
|
||||
{Opt_fragment_all, "fragment=all"},
|
||||
#endif
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
{Opt_ref_verify, "ref_verify"},
|
||||
#endif
|
||||
{Opt_err, NULL},
|
||||
};
|
||||
@ -502,6 +508,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
strncmp(args[0].from, "zlib", 4) == 0) {
|
||||
compress_type = "zlib";
|
||||
info->compress_type = BTRFS_COMPRESS_ZLIB;
|
||||
info->compress_level =
|
||||
btrfs_compress_str2level(args[0].from);
|
||||
btrfs_set_opt(info->mount_opt, COMPRESS);
|
||||
btrfs_clear_opt(info->mount_opt, NODATACOW);
|
||||
btrfs_clear_opt(info->mount_opt, NODATASUM);
|
||||
@ -549,9 +557,9 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
compress_force != saved_compress_force)) ||
|
||||
(!btrfs_test_opt(info, COMPRESS) &&
|
||||
no_compress == 1)) {
|
||||
btrfs_info(info, "%s %s compression",
|
||||
btrfs_info(info, "%s %s compression, level %d",
|
||||
(compress_force) ? "force" : "use",
|
||||
compress_type);
|
||||
compress_type, info->compress_level);
|
||||
}
|
||||
compress_force = false;
|
||||
break;
|
||||
@ -824,6 +832,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
btrfs_info(info, "fragmenting data");
|
||||
btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
|
||||
break;
|
||||
#endif
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
case Opt_ref_verify:
|
||||
btrfs_info(info, "doing ref verification");
|
||||
btrfs_set_opt(info->mount_opt, REF_VERIFY);
|
||||
break;
|
||||
#endif
|
||||
case Opt_err:
|
||||
btrfs_info(info, "unrecognized mount option '%s'", p);
|
||||
@ -1205,8 +1219,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
|
||||
* happens. The pending operations are delayed to the
|
||||
* next commit after thawing.
|
||||
*/
|
||||
if (__sb_start_write(sb, SB_FREEZE_WRITE, false))
|
||||
__sb_end_write(sb, SB_FREEZE_WRITE);
|
||||
if (sb_start_write_trylock(sb))
|
||||
sb_end_write(sb);
|
||||
else
|
||||
return 0;
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
@ -1246,6 +1260,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
||||
seq_printf(seq, ",compress-force=%s", compress_type);
|
||||
else
|
||||
seq_printf(seq, ",compress=%s", compress_type);
|
||||
if (info->compress_level)
|
||||
seq_printf(seq, ":%d", info->compress_level);
|
||||
}
|
||||
if (btrfs_test_opt(info, NOSSD))
|
||||
seq_puts(seq, ",nossd");
|
||||
@ -1305,6 +1321,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
||||
if (btrfs_test_opt(info, FRAGMENT_METADATA))
|
||||
seq_puts(seq, ",fragment=metadata");
|
||||
#endif
|
||||
if (btrfs_test_opt(info, REF_VERIFY))
|
||||
seq_puts(seq, ",ref_verify");
|
||||
seq_printf(seq, ",subvolid=%llu",
|
||||
BTRFS_I(d_inode(dentry))->root->root_key.objectid);
|
||||
seq_puts(seq, ",subvol=");
|
||||
@ -2112,7 +2130,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
* succeed even if the Avail is zero. But this is better than the other
|
||||
* way around.
|
||||
*/
|
||||
thresh = 4 * 1024 * 1024;
|
||||
thresh = SZ_4M;
|
||||
|
||||
if (!mixed && total_free_meta - thresh < block_rsv->size)
|
||||
buf->f_bavail = 0;
|
||||
@ -2318,6 +2336,9 @@ static void btrfs_print_mod_info(void)
|
||||
#endif
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
", integrity-checker=on"
|
||||
#endif
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
", ref-verify=on"
|
||||
#endif
|
||||
"\n",
|
||||
btrfs_crc32c_impl());
|
||||
|
@ -247,7 +247,7 @@ static ssize_t global_rsv_size_show(struct kobject *kobj,
|
||||
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
|
||||
return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf);
|
||||
}
|
||||
BTRFS_ATTR(global_rsv_size, global_rsv_size_show);
|
||||
BTRFS_ATTR(allocation, global_rsv_size, global_rsv_size_show);
|
||||
|
||||
static ssize_t global_rsv_reserved_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a, char *buf)
|
||||
@ -256,15 +256,15 @@ static ssize_t global_rsv_reserved_show(struct kobject *kobj,
|
||||
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
|
||||
return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf);
|
||||
}
|
||||
BTRFS_ATTR(global_rsv_reserved, global_rsv_reserved_show);
|
||||
BTRFS_ATTR(allocation, global_rsv_reserved, global_rsv_reserved_show);
|
||||
|
||||
#define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj)
|
||||
#define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj)
|
||||
|
||||
static ssize_t raid_bytes_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf);
|
||||
BTRFS_RAID_ATTR(total_bytes, raid_bytes_show);
|
||||
BTRFS_RAID_ATTR(used_bytes, raid_bytes_show);
|
||||
BTRFS_ATTR(raid, total_bytes, raid_bytes_show);
|
||||
BTRFS_ATTR(raid, used_bytes, raid_bytes_show);
|
||||
|
||||
static ssize_t raid_bytes_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
@ -277,7 +277,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
|
||||
|
||||
down_read(&sinfo->groups_sem);
|
||||
list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
|
||||
if (&attr->attr == BTRFS_RAID_ATTR_PTR(total_bytes))
|
||||
if (&attr->attr == BTRFS_ATTR_PTR(raid, total_bytes))
|
||||
val += block_group->key.offset;
|
||||
else
|
||||
val += btrfs_block_group_used(&block_group->item);
|
||||
@ -287,8 +287,8 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
|
||||
}
|
||||
|
||||
static struct attribute *raid_attributes[] = {
|
||||
BTRFS_RAID_ATTR_PTR(total_bytes),
|
||||
BTRFS_RAID_ATTR_PTR(used_bytes),
|
||||
BTRFS_ATTR_PTR(raid, total_bytes),
|
||||
BTRFS_ATTR_PTR(raid, used_bytes),
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -311,7 +311,7 @@ static ssize_t btrfs_space_info_show_##field(struct kobject *kobj, \
|
||||
struct btrfs_space_info *sinfo = to_space_info(kobj); \
|
||||
return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \
|
||||
} \
|
||||
BTRFS_ATTR(field, btrfs_space_info_show_##field)
|
||||
BTRFS_ATTR(space_info, field, btrfs_space_info_show_##field)
|
||||
|
||||
static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
@ -331,19 +331,20 @@ SPACE_INFO_ATTR(bytes_may_use);
|
||||
SPACE_INFO_ATTR(bytes_readonly);
|
||||
SPACE_INFO_ATTR(disk_used);
|
||||
SPACE_INFO_ATTR(disk_total);
|
||||
BTRFS_ATTR(total_bytes_pinned, btrfs_space_info_show_total_bytes_pinned);
|
||||
BTRFS_ATTR(space_info, total_bytes_pinned,
|
||||
btrfs_space_info_show_total_bytes_pinned);
|
||||
|
||||
static struct attribute *space_info_attrs[] = {
|
||||
BTRFS_ATTR_PTR(flags),
|
||||
BTRFS_ATTR_PTR(total_bytes),
|
||||
BTRFS_ATTR_PTR(bytes_used),
|
||||
BTRFS_ATTR_PTR(bytes_pinned),
|
||||
BTRFS_ATTR_PTR(bytes_reserved),
|
||||
BTRFS_ATTR_PTR(bytes_may_use),
|
||||
BTRFS_ATTR_PTR(bytes_readonly),
|
||||
BTRFS_ATTR_PTR(disk_used),
|
||||
BTRFS_ATTR_PTR(disk_total),
|
||||
BTRFS_ATTR_PTR(total_bytes_pinned),
|
||||
BTRFS_ATTR_PTR(space_info, flags),
|
||||
BTRFS_ATTR_PTR(space_info, total_bytes),
|
||||
BTRFS_ATTR_PTR(space_info, bytes_used),
|
||||
BTRFS_ATTR_PTR(space_info, bytes_pinned),
|
||||
BTRFS_ATTR_PTR(space_info, bytes_reserved),
|
||||
BTRFS_ATTR_PTR(space_info, bytes_may_use),
|
||||
BTRFS_ATTR_PTR(space_info, bytes_readonly),
|
||||
BTRFS_ATTR_PTR(space_info, disk_used),
|
||||
BTRFS_ATTR_PTR(space_info, disk_total),
|
||||
BTRFS_ATTR_PTR(space_info, total_bytes_pinned),
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -361,8 +362,8 @@ struct kobj_type space_info_ktype = {
|
||||
};
|
||||
|
||||
static const struct attribute *allocation_attrs[] = {
|
||||
BTRFS_ATTR_PTR(global_rsv_reserved),
|
||||
BTRFS_ATTR_PTR(global_rsv_size),
|
||||
BTRFS_ATTR_PTR(allocation, global_rsv_reserved),
|
||||
BTRFS_ATTR_PTR(allocation, global_rsv_size),
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -415,7 +416,7 @@ static ssize_t btrfs_label_store(struct kobject *kobj,
|
||||
|
||||
return len;
|
||||
}
|
||||
BTRFS_ATTR_RW(label, btrfs_label_show, btrfs_label_store);
|
||||
BTRFS_ATTR_RW(, label, btrfs_label_show, btrfs_label_store);
|
||||
|
||||
static ssize_t btrfs_nodesize_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a, char *buf)
|
||||
@ -425,7 +426,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj,
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
|
||||
}
|
||||
|
||||
BTRFS_ATTR(nodesize, btrfs_nodesize_show);
|
||||
BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
|
||||
|
||||
static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a, char *buf)
|
||||
@ -436,7 +437,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
|
||||
fs_info->super_copy->sectorsize);
|
||||
}
|
||||
|
||||
BTRFS_ATTR(sectorsize, btrfs_sectorsize_show);
|
||||
BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
|
||||
|
||||
static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a, char *buf)
|
||||
@ -447,7 +448,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
|
||||
fs_info->super_copy->sectorsize);
|
||||
}
|
||||
|
||||
BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show);
|
||||
BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);
|
||||
|
||||
static ssize_t quota_override_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a, char *buf)
|
||||
@ -487,14 +488,14 @@ static ssize_t quota_override_store(struct kobject *kobj,
|
||||
return len;
|
||||
}
|
||||
|
||||
BTRFS_ATTR_RW(quota_override, quota_override_show, quota_override_store);
|
||||
BTRFS_ATTR_RW(, quota_override, quota_override_show, quota_override_store);
|
||||
|
||||
static const struct attribute *btrfs_attrs[] = {
|
||||
BTRFS_ATTR_PTR(label),
|
||||
BTRFS_ATTR_PTR(nodesize),
|
||||
BTRFS_ATTR_PTR(sectorsize),
|
||||
BTRFS_ATTR_PTR(clone_alignment),
|
||||
BTRFS_ATTR_PTR(quota_override),
|
||||
BTRFS_ATTR_PTR(, label),
|
||||
BTRFS_ATTR_PTR(, nodesize),
|
||||
BTRFS_ATTR_PTR(, sectorsize),
|
||||
BTRFS_ATTR_PTR(, clone_alignment),
|
||||
BTRFS_ATTR_PTR(, quota_override),
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -21,21 +21,16 @@ enum btrfs_feature_set {
|
||||
.store = _store, \
|
||||
}
|
||||
|
||||
#define BTRFS_ATTR_RW(_name, _show, _store) \
|
||||
static struct kobj_attribute btrfs_attr_##_name = \
|
||||
#define BTRFS_ATTR_RW(_prefix, _name, _show, _store) \
|
||||
static struct kobj_attribute btrfs_attr_##_prefix##_##_name = \
|
||||
__INIT_KOBJ_ATTR(_name, 0644, _show, _store)
|
||||
|
||||
#define BTRFS_ATTR(_name, _show) \
|
||||
static struct kobj_attribute btrfs_attr_##_name = \
|
||||
#define BTRFS_ATTR(_prefix, _name, _show) \
|
||||
static struct kobj_attribute btrfs_attr_##_prefix##_##_name = \
|
||||
__INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
|
||||
|
||||
#define BTRFS_ATTR_PTR(_name) (&btrfs_attr_##_name.attr)
|
||||
|
||||
#define BTRFS_RAID_ATTR(_name, _show) \
|
||||
static struct kobj_attribute btrfs_raid_attr_##_name = \
|
||||
__INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
|
||||
|
||||
#define BTRFS_RAID_ATTR_PTR(_name) (&btrfs_raid_attr_##_name.attr)
|
||||
#define BTRFS_ATTR_PTR(_prefix, _name) \
|
||||
(&btrfs_attr_##_prefix##_##_name.attr)
|
||||
|
||||
|
||||
struct btrfs_feature_attr {
|
||||
@ -44,15 +39,16 @@ struct btrfs_feature_attr {
|
||||
u64 feature_bit;
|
||||
};
|
||||
|
||||
#define BTRFS_FEAT_ATTR(_name, _feature_set, _prefix, _feature_bit) \
|
||||
static struct btrfs_feature_attr btrfs_attr_##_name = { \
|
||||
#define BTRFS_FEAT_ATTR(_name, _feature_set, _feature_prefix, _feature_bit) \
|
||||
static struct btrfs_feature_attr btrfs_attr_features_##_name = { \
|
||||
.kobj_attr = __INIT_KOBJ_ATTR(_name, S_IRUGO, \
|
||||
btrfs_feature_attr_show, \
|
||||
btrfs_feature_attr_store), \
|
||||
.feature_set = _feature_set, \
|
||||
.feature_bit = _prefix ##_## _feature_bit, \
|
||||
.feature_bit = _feature_prefix ##_## _feature_bit, \
|
||||
}
|
||||
#define BTRFS_FEAT_ATTR_PTR(_name) (&btrfs_attr_##_name.kobj_attr.attr)
|
||||
#define BTRFS_FEAT_ATTR_PTR(_name) \
|
||||
(&btrfs_attr_features_##_name.kobj_attr.attr)
|
||||
|
||||
#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
|
||||
BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
|
||||
|
@ -500,7 +500,8 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
test_msg("Couldn't allocate path\n");
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = add_block_group_free_space(&trans, root->fs_info, cache);
|
||||
|
@ -770,7 +770,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, 4096 * 1024, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M, 0);
|
||||
if (IS_ERR(em)) {
|
||||
test_msg("Got an error when we shouldn't have\n");
|
||||
goto out;
|
||||
@ -968,7 +968,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
btrfs_test_inode_set_ops(inode);
|
||||
|
||||
/* [BTRFS_MAX_EXTENT_SIZE] */
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
NULL, 0);
|
||||
if (ret) {
|
||||
@ -983,7 +982,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
}
|
||||
|
||||
/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
|
||||
BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
|
||||
NULL, 0);
|
||||
@ -1003,7 +1001,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
BTRFS_MAX_EXTENT_SIZE >> 1,
|
||||
(BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
|
||||
EXTENT_DELALLOC | EXTENT_DIRTY |
|
||||
EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
|
||||
EXTENT_UPTODATE, 0, 0,
|
||||
NULL, GFP_KERNEL);
|
||||
if (ret) {
|
||||
test_msg("clear_extent_bit returned %d\n", ret);
|
||||
@ -1017,7 +1015,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
}
|
||||
|
||||
/* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
|
||||
(BTRFS_MAX_EXTENT_SIZE >> 1)
|
||||
+ sectorsize - 1,
|
||||
@ -1035,12 +1032,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
|
||||
/*
|
||||
* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize]
|
||||
*
|
||||
* I'm artificially adding 2 to outstanding_extents because in the
|
||||
* buffered IO case we'd add things up as we go, but I don't feel like
|
||||
* doing that here, this isn't the interesting case we want to test.
|
||||
*/
|
||||
BTRFS_I(inode)->outstanding_extents += 2;
|
||||
ret = btrfs_set_extent_delalloc(inode,
|
||||
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
|
||||
(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
|
||||
@ -1059,7 +1051,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
/*
|
||||
* [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize]
|
||||
*/
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
ret = btrfs_set_extent_delalloc(inode,
|
||||
BTRFS_MAX_EXTENT_SIZE + sectorsize,
|
||||
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0);
|
||||
@ -1079,7 +1070,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
BTRFS_MAX_EXTENT_SIZE + sectorsize,
|
||||
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
|
||||
EXTENT_DIRTY | EXTENT_DELALLOC |
|
||||
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
|
||||
EXTENT_UPTODATE, 0, 0,
|
||||
NULL, GFP_KERNEL);
|
||||
if (ret) {
|
||||
test_msg("clear_extent_bit returned %d\n", ret);
|
||||
@ -1096,7 +1087,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
* Refill the hole again just for good measure, because I thought it
|
||||
* might fail and I'd rather satisfy my paranoia at this point.
|
||||
*/
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
ret = btrfs_set_extent_delalloc(inode,
|
||||
BTRFS_MAX_EXTENT_SIZE + sectorsize,
|
||||
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0);
|
||||
@ -1114,7 +1104,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
/* Empty */
|
||||
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
|
||||
EXTENT_DIRTY | EXTENT_DELALLOC |
|
||||
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
|
||||
EXTENT_UPTODATE, 0, 0,
|
||||
NULL, GFP_KERNEL);
|
||||
if (ret) {
|
||||
test_msg("clear_extent_bit returned %d\n", ret);
|
||||
@ -1131,7 +1121,7 @@ out:
|
||||
if (ret)
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
|
||||
EXTENT_DIRTY | EXTENT_DELALLOC |
|
||||
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
|
||||
EXTENT_UPTODATE, 0, 0,
|
||||
NULL, GFP_KERNEL);
|
||||
iput(inode);
|
||||
btrfs_free_dummy_root(root);
|
||||
|
@ -240,7 +240,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
|
||||
* we can only call btrfs_qgroup_account_extent() directly to test
|
||||
* quota.
|
||||
*/
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
|
||||
false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
test_msg("Couldn't find old roots: %d\n", ret);
|
||||
@ -252,7 +253,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
|
||||
false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
ulist_free(new_roots);
|
||||
@ -275,7 +277,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
|
||||
old_roots = NULL;
|
||||
new_roots = NULL;
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
|
||||
false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
test_msg("Couldn't find old roots: %d\n", ret);
|
||||
@ -286,7 +289,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
|
||||
false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
ulist_free(new_roots);
|
||||
@ -337,7 +341,8 @@ static int test_multiple_refs(struct btrfs_root *root,
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
|
||||
false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
test_msg("Couldn't find old roots: %d\n", ret);
|
||||
@ -349,7 +354,8 @@ static int test_multiple_refs(struct btrfs_root *root,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
|
||||
false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
ulist_free(new_roots);
|
||||
@ -370,7 +376,8 @@ static int test_multiple_refs(struct btrfs_root *root,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
|
||||
false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
test_msg("Couldn't find old roots: %d\n", ret);
|
||||
@ -382,7 +389,8 @@ static int test_multiple_refs(struct btrfs_root *root,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
|
||||
false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
ulist_free(new_roots);
|
||||
@ -409,7 +417,8 @@ static int test_multiple_refs(struct btrfs_root *root,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
|
||||
false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
test_msg("Couldn't find old roots: %d\n", ret);
|
||||
@ -421,7 +430,8 @@ static int test_multiple_refs(struct btrfs_root *root,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
|
||||
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
|
||||
false);
|
||||
if (ret) {
|
||||
ulist_free(old_roots);
|
||||
ulist_free(new_roots);
|
||||
|
@ -797,8 +797,7 @@ static int should_end_transaction(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
|
||||
if (fs_info->global_block_rsv.space_info->full &&
|
||||
btrfs_check_space_for_delayed_refs(trans, fs_info))
|
||||
if (btrfs_check_space_for_delayed_refs(trans, fs_info))
|
||||
return 1;
|
||||
|
||||
return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5);
|
||||
@ -950,6 +949,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
|
||||
u64 start = 0;
|
||||
u64 end;
|
||||
|
||||
atomic_inc(&BTRFS_I(fs_info->btree_inode)->sync_writers);
|
||||
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
|
||||
mark, &cached_state)) {
|
||||
bool wait_writeback = false;
|
||||
@ -985,6 +985,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
|
||||
cond_resched();
|
||||
start = end + 1;
|
||||
}
|
||||
atomic_dec(&BTRFS_I(fs_info->btree_inode)->sync_writers);
|
||||
return werr;
|
||||
}
|
||||
|
||||
@ -1915,8 +1916,17 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
|
||||
|
||||
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
/*
|
||||
* We use writeback_inodes_sb here because if we used
|
||||
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
|
||||
* Currently are holding the fs freeze lock, if we do an async flush
|
||||
* we'll do btrfs_join_transaction() and deadlock because we need to
|
||||
* wait for the fs freeze lock. Using the direct flushing we benefit
|
||||
* from already being in a transaction and our join_transaction doesn't
|
||||
* have to re-take the fs freeze lock.
|
||||
*/
|
||||
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
|
||||
return btrfs_start_delalloc_roots(fs_info, 1, -1);
|
||||
writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
425
fs/btrfs/tree-checker.c
Normal file
425
fs/btrfs/tree-checker.c
Normal file
@ -0,0 +1,425 @@
|
||||
/*
|
||||
* Copyright (C) Qu Wenruo 2017. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The module is used to catch unexpected/corrupted tree block data.
|
||||
* Such behavior can be caused either by a fuzzed image or bugs.
|
||||
*
|
||||
* The objective is to do leaf/node validation checks when tree block is read
|
||||
* from disk, and check *every* possible member, so other code won't
|
||||
* need to checking them again.
|
||||
*
|
||||
* Due to the potential and unwanted damage, every checker needs to be
|
||||
* carefully reviewed otherwise so it does not prevent mount of valid images.
|
||||
*/
|
||||
|
||||
#include "ctree.h"
|
||||
#include "tree-checker.h"
|
||||
#include "disk-io.h"
|
||||
#include "compression.h"
|
||||
|
||||
/*
|
||||
* Error message should follow the following format:
|
||||
* corrupt <type>: <identifier>, <reason>[, <bad_value>]
|
||||
*
|
||||
* @type: leaf or node
|
||||
* @identifier: the necessary info to locate the leaf/node.
|
||||
* It's recommened to decode key.objecitd/offset if it's
|
||||
* meaningful.
|
||||
* @reason: describe the error
|
||||
* @bad_value: optional, it's recommened to output bad value and its
|
||||
* expected value (range).
|
||||
*
|
||||
* Since comma is used to separate the components, only space is allowed
|
||||
* inside each component.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
|
||||
* Allows callers to customize the output.
|
||||
*/
|
||||
__printf(4, 5)
|
||||
static void generic_err(const struct btrfs_root *root,
|
||||
const struct extent_buffer *eb, int slot,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
|
||||
va_start(args, fmt);
|
||||
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
btrfs_crit(root->fs_info,
|
||||
"corrupt %s: root=%llu block=%llu slot=%d, %pV",
|
||||
btrfs_header_level(eb) == 0 ? "leaf" : "node",
|
||||
root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
/*
|
||||
* Customized reporter for extent data item, since its key objectid and
|
||||
* offset has its own meaning.
|
||||
*/
|
||||
__printf(4, 5)
|
||||
static void file_extent_err(const struct btrfs_root *root,
|
||||
const struct extent_buffer *eb, int slot,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
|
||||
btrfs_item_key_to_cpu(eb, &key, slot);
|
||||
va_start(args, fmt);
|
||||
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
|
||||
btrfs_crit(root->fs_info,
|
||||
"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
|
||||
btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
|
||||
btrfs_header_bytenr(eb), slot, key.objectid, key.offset, &vaf);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 0 if the btrfs_file_extent_##name is aligned to @alignment
|
||||
* Else return 1
|
||||
*/
|
||||
#define CHECK_FE_ALIGNED(root, leaf, slot, fi, name, alignment) \
|
||||
({ \
|
||||
if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
|
||||
file_extent_err((root), (leaf), (slot), \
|
||||
"invalid %s for file extent, have %llu, should be aligned to %u", \
|
||||
(#name), btrfs_file_extent_##name((leaf), (fi)), \
|
||||
(alignment)); \
|
||||
(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \
|
||||
})
|
||||
|
||||
static int check_extent_data_item(struct btrfs_root *root,
|
||||
struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
struct btrfs_file_extent_item *fi;
|
||||
u32 sectorsize = root->fs_info->sectorsize;
|
||||
u32 item_size = btrfs_item_size_nr(leaf, slot);
|
||||
|
||||
if (!IS_ALIGNED(key->offset, sectorsize)) {
|
||||
file_extent_err(root, leaf, slot,
|
||||
"unaligned file_offset for file extent, have %llu should be aligned to %u",
|
||||
key->offset, sectorsize);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
||||
|
||||
if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
|
||||
file_extent_err(root, leaf, slot,
|
||||
"invalid type for file extent, have %u expect range [0, %u]",
|
||||
btrfs_file_extent_type(leaf, fi),
|
||||
BTRFS_FILE_EXTENT_TYPES);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Support for new compression/encrption must introduce incompat flag,
|
||||
* and must be caught in open_ctree().
|
||||
*/
|
||||
if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
|
||||
file_extent_err(root, leaf, slot,
|
||||
"invalid compression for file extent, have %u expect range [0, %u]",
|
||||
btrfs_file_extent_compression(leaf, fi),
|
||||
BTRFS_COMPRESS_TYPES);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (btrfs_file_extent_encryption(leaf, fi)) {
|
||||
file_extent_err(root, leaf, slot,
|
||||
"invalid encryption for file extent, have %u expect 0",
|
||||
btrfs_file_extent_encryption(leaf, fi));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
|
||||
/* Inline extent must have 0 as key offset */
|
||||
if (key->offset) {
|
||||
file_extent_err(root, leaf, slot,
|
||||
"invalid file_offset for inline file extent, have %llu expect 0",
|
||||
key->offset);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* Compressed inline extent has no on-disk size, skip it */
|
||||
if (btrfs_file_extent_compression(leaf, fi) !=
|
||||
BTRFS_COMPRESS_NONE)
|
||||
return 0;
|
||||
|
||||
/* Uncompressed inline extent size must match item size */
|
||||
if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
|
||||
btrfs_file_extent_ram_bytes(leaf, fi)) {
|
||||
file_extent_err(root, leaf, slot,
|
||||
"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
|
||||
item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
|
||||
btrfs_file_extent_ram_bytes(leaf, fi));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Regular or preallocated extent has fixed item size */
|
||||
if (item_size != sizeof(*fi)) {
|
||||
file_extent_err(root, leaf, slot,
|
||||
"invalid item size for reg/prealloc file extent, have %u expect %zu",
|
||||
item_size, sizeof(*fi));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (CHECK_FE_ALIGNED(root, leaf, slot, fi, ram_bytes, sectorsize) ||
|
||||
CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_bytenr, sectorsize) ||
|
||||
CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_num_bytes, sectorsize) ||
|
||||
CHECK_FE_ALIGNED(root, leaf, slot, fi, offset, sectorsize) ||
|
||||
CHECK_FE_ALIGNED(root, leaf, slot, fi, num_bytes, sectorsize))
|
||||
return -EUCLEAN;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
u32 sectorsize = root->fs_info->sectorsize;
|
||||
u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
|
||||
|
||||
if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
|
||||
generic_err(root, leaf, slot,
|
||||
"invalid key objectid for csum item, have %llu expect %llu",
|
||||
key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (!IS_ALIGNED(key->offset, sectorsize)) {
|
||||
generic_err(root, leaf, slot,
|
||||
"unaligned key offset for csum item, have %llu should be aligned to %u",
|
||||
key->offset, sectorsize);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
|
||||
generic_err(root, leaf, slot,
|
||||
"unaligned item size for csum item, have %u should be aligned to %u",
|
||||
btrfs_item_size_nr(leaf, slot), csumsize);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Common point to switch the item-specific validation.
|
||||
*/
|
||||
static int check_leaf_item(struct btrfs_root *root,
|
||||
struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (key->type) {
|
||||
case BTRFS_EXTENT_DATA_KEY:
|
||||
ret = check_extent_data_item(root, leaf, key, slot);
|
||||
break;
|
||||
case BTRFS_EXTENT_CSUM_KEY:
|
||||
ret = check_csum_item(root, leaf, key, slot);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
/* No valid key type is 0, so all key should be larger than this key */
|
||||
struct btrfs_key prev_key = {0, 0, 0};
|
||||
struct btrfs_key key;
|
||||
u32 nritems = btrfs_header_nritems(leaf);
|
||||
int slot;
|
||||
|
||||
/*
|
||||
* Extent buffers from a relocation tree have a owner field that
|
||||
* corresponds to the subvolume tree they are based on. So just from an
|
||||
* extent buffer alone we can not find out what is the id of the
|
||||
* corresponding subvolume tree, so we can not figure out if the extent
|
||||
* buffer corresponds to the root of the relocation tree or not. So
|
||||
* skip this check for relocation trees.
|
||||
*/
|
||||
if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
|
||||
struct btrfs_root *check_root;
|
||||
|
||||
key.objectid = btrfs_header_owner(leaf);
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = (u64)-1;
|
||||
|
||||
check_root = btrfs_get_fs_root(fs_info, &key, false);
|
||||
/*
|
||||
* The only reason we also check NULL here is that during
|
||||
* open_ctree() some roots has not yet been set up.
|
||||
*/
|
||||
if (!IS_ERR_OR_NULL(check_root)) {
|
||||
struct extent_buffer *eb;
|
||||
|
||||
eb = btrfs_root_node(check_root);
|
||||
/* if leaf is the root, then it's fine */
|
||||
if (leaf != eb) {
|
||||
generic_err(check_root, leaf, 0,
|
||||
"invalid nritems, have %u should not be 0 for non-root leaf",
|
||||
nritems);
|
||||
free_extent_buffer(eb);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
free_extent_buffer(eb);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (nritems == 0)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Check the following things to make sure this is a good leaf, and
|
||||
* leaf users won't need to bother with similar sanity checks:
|
||||
*
|
||||
* 1) key ordering
|
||||
* 2) item offset and size
|
||||
* No overlap, no hole, all inside the leaf.
|
||||
* 3) item content
|
||||
* If possible, do comprehensive sanity check.
|
||||
* NOTE: All checks must only rely on the item data itself.
|
||||
*/
|
||||
for (slot = 0; slot < nritems; slot++) {
|
||||
u32 item_end_expected;
|
||||
int ret;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
|
||||
/* Make sure the keys are in the right order */
|
||||
if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
|
||||
generic_err(root, leaf, slot,
|
||||
"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
|
||||
prev_key.objectid, prev_key.type,
|
||||
prev_key.offset, key.objectid, key.type,
|
||||
key.offset);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the offset and ends are right, remember that the
|
||||
* item data starts at the end of the leaf and grows towards the
|
||||
* front.
|
||||
*/
|
||||
if (slot == 0)
|
||||
item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
|
||||
else
|
||||
item_end_expected = btrfs_item_offset_nr(leaf,
|
||||
slot - 1);
|
||||
if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
|
||||
generic_err(root, leaf, slot,
|
||||
"unexpected item end, have %u expect %u",
|
||||
btrfs_item_end_nr(leaf, slot),
|
||||
item_end_expected);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to make sure that we don't point outside of the leaf,
|
||||
* just in case all the items are consistent to each other, but
|
||||
* all point outside of the leaf.
|
||||
*/
|
||||
if (btrfs_item_end_nr(leaf, slot) >
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info)) {
|
||||
generic_err(root, leaf, slot,
|
||||
"slot end outside of leaf, have %u expect range [0, %u]",
|
||||
btrfs_item_end_nr(leaf, slot),
|
||||
BTRFS_LEAF_DATA_SIZE(fs_info));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* Also check if the item pointer overlaps with btrfs item. */
|
||||
if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
|
||||
btrfs_item_ptr_offset(leaf, slot)) {
|
||||
generic_err(root, leaf, slot,
|
||||
"slot overlaps with its data, item end %lu data start %lu",
|
||||
btrfs_item_nr_offset(slot) +
|
||||
sizeof(struct btrfs_item),
|
||||
btrfs_item_ptr_offset(leaf, slot));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* Check if the item size and content meet other criteria */
|
||||
ret = check_leaf_item(root, leaf, &key, slot);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
prev_key.objectid = key.objectid;
|
||||
prev_key.type = key.type;
|
||||
prev_key.offset = key.offset;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
|
||||
{
|
||||
unsigned long nr = btrfs_header_nritems(node);
|
||||
struct btrfs_key key, next_key;
|
||||
int slot;
|
||||
u64 bytenr;
|
||||
int ret = 0;
|
||||
|
||||
if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
|
||||
btrfs_crit(root->fs_info,
|
||||
"corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
|
||||
root->objectid, node->start,
|
||||
nr == 0 ? "small" : "large", nr,
|
||||
BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
for (slot = 0; slot < nr - 1; slot++) {
|
||||
bytenr = btrfs_node_blockptr(node, slot);
|
||||
btrfs_node_key_to_cpu(node, &key, slot);
|
||||
btrfs_node_key_to_cpu(node, &next_key, slot + 1);
|
||||
|
||||
if (!bytenr) {
|
||||
generic_err(root, node, slot,
|
||||
"invalid NULL node pointer");
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
|
||||
generic_err(root, node, slot,
|
||||
"unaligned pointer, have %llu should be aligned to %u",
|
||||
bytenr, root->fs_info->sectorsize);
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
|
||||
generic_err(root, node, slot,
|
||||
"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
|
||||
key.objectid, key.type, key.offset,
|
||||
next_key.objectid, next_key.type,
|
||||
next_key.offset);
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
26
fs/btrfs/tree-checker.h
Normal file
26
fs/btrfs/tree-checker.h
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (C) Qu Wenruo 2017. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public
|
||||
* License along with this program.
|
||||
*/
|
||||
|
||||
#ifndef __BTRFS_TREE_CHECKER__
|
||||
#define __BTRFS_TREE_CHECKER__
|
||||
|
||||
#include "ctree.h"
|
||||
#include "extent_io.h"
|
||||
|
||||
int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf);
|
||||
int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
|
||||
|
||||
#endif
|
@ -717,7 +717,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
|
||||
ret = btrfs_lookup_data_extent(fs_info, ins.objectid,
|
||||
ins.offset);
|
||||
if (ret == 0) {
|
||||
ret = btrfs_inc_extent_ref(trans, fs_info,
|
||||
ret = btrfs_inc_extent_ref(trans, root,
|
||||
ins.objectid, ins.offset,
|
||||
0, root->root_key.objectid,
|
||||
key->objectid, offset);
|
||||
@ -2699,34 +2699,36 @@ static void wait_log_commit(struct btrfs_root *root, int transid)
|
||||
* so we know that if ours is more than 2 older than the
|
||||
* current transaction, we're done
|
||||
*/
|
||||
do {
|
||||
for (;;) {
|
||||
prepare_to_wait(&root->log_commit_wait[index],
|
||||
&wait, TASK_UNINTERRUPTIBLE);
|
||||
|
||||
if (!(root->log_transid_committed < transid &&
|
||||
atomic_read(&root->log_commit[index])))
|
||||
break;
|
||||
|
||||
mutex_unlock(&root->log_mutex);
|
||||
|
||||
if (root->log_transid_committed < transid &&
|
||||
atomic_read(&root->log_commit[index]))
|
||||
schedule();
|
||||
|
||||
finish_wait(&root->log_commit_wait[index], &wait);
|
||||
schedule();
|
||||
mutex_lock(&root->log_mutex);
|
||||
} while (root->log_transid_committed < transid &&
|
||||
atomic_read(&root->log_commit[index]));
|
||||
}
|
||||
finish_wait(&root->log_commit_wait[index], &wait);
|
||||
}
|
||||
|
||||
static void wait_for_writer(struct btrfs_root *root)
|
||||
{
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
while (atomic_read(&root->log_writers)) {
|
||||
prepare_to_wait(&root->log_writer_wait,
|
||||
&wait, TASK_UNINTERRUPTIBLE);
|
||||
for (;;) {
|
||||
prepare_to_wait(&root->log_writer_wait, &wait,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
if (!atomic_read(&root->log_writers))
|
||||
break;
|
||||
|
||||
mutex_unlock(&root->log_mutex);
|
||||
if (atomic_read(&root->log_writers))
|
||||
schedule();
|
||||
finish_wait(&root->log_writer_wait, &wait);
|
||||
schedule();
|
||||
mutex_lock(&root->log_mutex);
|
||||
}
|
||||
finish_wait(&root->log_writer_wait, &wait);
|
||||
}
|
||||
|
||||
static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
|
||||
@ -4645,7 +4647,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_key min_key;
|
||||
struct btrfs_key max_key;
|
||||
struct btrfs_root *log = root->log_root;
|
||||
struct extent_buffer *src = NULL;
|
||||
LIST_HEAD(logged_list);
|
||||
u64 last_extent = 0;
|
||||
int err = 0;
|
||||
@ -4888,7 +4889,6 @@ again:
|
||||
goto next_slot;
|
||||
}
|
||||
|
||||
src = path->nodes[0];
|
||||
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
|
||||
ins_nr++;
|
||||
goto next_slot;
|
||||
|
@ -360,7 +360,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device)
|
||||
int again = 0;
|
||||
unsigned long num_run;
|
||||
unsigned long batch_run = 0;
|
||||
unsigned long limit;
|
||||
unsigned long last_waited = 0;
|
||||
int force_reg = 0;
|
||||
int sync_pending = 0;
|
||||
@ -375,8 +374,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device)
|
||||
blk_start_plug(&plug);
|
||||
|
||||
bdi = device->bdev->bd_bdi;
|
||||
limit = btrfs_async_submit_limit(fs_info);
|
||||
limit = limit * 2 / 3;
|
||||
|
||||
loop:
|
||||
spin_lock(&device->io_lock);
|
||||
@ -443,13 +440,6 @@ loop_lock:
|
||||
pending = pending->bi_next;
|
||||
cur->bi_next = NULL;
|
||||
|
||||
/*
|
||||
* atomic_dec_return implies a barrier for waitqueue_active
|
||||
*/
|
||||
if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
|
||||
waitqueue_active(&fs_info->async_submit_wait))
|
||||
wake_up(&fs_info->async_submit_wait);
|
||||
|
||||
BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
|
||||
|
||||
/*
|
||||
@ -517,12 +507,6 @@ loop_lock:
|
||||
&device->work);
|
||||
goto done;
|
||||
}
|
||||
/* unplug every 64 requests just for good measure */
|
||||
if (batch_run % 64 == 0) {
|
||||
blk_finish_plug(&plug);
|
||||
blk_start_plug(&plug);
|
||||
sync_pending = 0;
|
||||
}
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
@ -547,7 +531,7 @@ static void pending_bios_fn(struct btrfs_work *work)
|
||||
}
|
||||
|
||||
|
||||
void btrfs_free_stale_device(struct btrfs_device *cur_dev)
|
||||
static void btrfs_free_stale_device(struct btrfs_device *cur_dev)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devs;
|
||||
struct btrfs_device *dev;
|
||||
@ -1068,14 +1052,15 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_release_disk_super(struct page *page)
|
||||
static void btrfs_release_disk_super(struct page *page)
|
||||
{
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
|
||||
struct page **page, struct btrfs_super_block **disk_super)
|
||||
static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
|
||||
struct page **page,
|
||||
struct btrfs_super_block **disk_super)
|
||||
{
|
||||
void *p;
|
||||
pgoff_t index;
|
||||
@ -1817,8 +1802,8 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct btrfs_device *btrfs_find_next_active_device(struct btrfs_fs_devices *fs_devs,
|
||||
struct btrfs_device *device)
|
||||
static struct btrfs_device * btrfs_find_next_active_device(
|
||||
struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
|
||||
{
|
||||
struct btrfs_device *next_device;
|
||||
|
||||
@ -2031,19 +2016,20 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
btrfs_close_bdev(srcdev);
|
||||
|
||||
call_rcu(&srcdev->rcu, free_device);
|
||||
|
||||
/*
|
||||
* unless fs_devices is seed fs, num_devices shouldn't go
|
||||
* zero
|
||||
*/
|
||||
BUG_ON(!fs_devices->num_devices && !fs_devices->seeding);
|
||||
|
||||
/* if this is no devs we rather delete the fs_devices */
|
||||
if (!fs_devices->num_devices) {
|
||||
struct btrfs_fs_devices *tmp_fs_devices;
|
||||
|
||||
/*
|
||||
* On a mounted FS, num_devices can't be zero unless it's a
|
||||
* seed. In case of a seed device being replaced, the replace
|
||||
* target added to the sprout FS, so there will be no more
|
||||
* device left under the seed FS.
|
||||
*/
|
||||
ASSERT(fs_devices->seeding);
|
||||
|
||||
tmp_fs_devices = fs_info->fs_devices;
|
||||
while (tmp_fs_devices) {
|
||||
if (tmp_fs_devices->seed == fs_devices) {
|
||||
@ -2323,6 +2309,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
u64 tmp;
|
||||
int seeding_dev = 0;
|
||||
int ret = 0;
|
||||
bool unlocked = false;
|
||||
|
||||
if (sb_rdonly(sb) && !fs_info->fs_devices->seeding)
|
||||
return -EROFS;
|
||||
@ -2399,7 +2386,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
if (seeding_dev) {
|
||||
sb->s_flags &= ~MS_RDONLY;
|
||||
ret = btrfs_prepare_sprout(fs_info);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto error_trans;
|
||||
}
|
||||
}
|
||||
|
||||
device->fs_devices = fs_info->fs_devices;
|
||||
@ -2445,14 +2435,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto error_trans;
|
||||
goto error_sysfs;
|
||||
}
|
||||
}
|
||||
|
||||
ret = btrfs_add_device(trans, fs_info, device);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto error_trans;
|
||||
goto error_sysfs;
|
||||
}
|
||||
|
||||
if (seeding_dev) {
|
||||
@ -2461,7 +2451,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
ret = btrfs_finish_sprout(trans, fs_info);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto error_trans;
|
||||
goto error_sysfs;
|
||||
}
|
||||
|
||||
/* Sprouting would change fsid of the mounted root,
|
||||
@ -2479,6 +2469,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
if (seeding_dev) {
|
||||
mutex_unlock(&uuid_mutex);
|
||||
up_write(&sb->s_umount);
|
||||
unlocked = true;
|
||||
|
||||
if (ret) /* transaction commit */
|
||||
return ret;
|
||||
@ -2491,7 +2482,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
if (IS_ERR(trans)) {
|
||||
if (PTR_ERR(trans) == -ENOENT)
|
||||
return 0;
|
||||
return PTR_ERR(trans);
|
||||
ret = PTR_ERR(trans);
|
||||
trans = NULL;
|
||||
goto error_sysfs;
|
||||
}
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
}
|
||||
@ -2500,14 +2493,18 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
update_dev_time(device_path);
|
||||
return ret;
|
||||
|
||||
error_trans:
|
||||
btrfs_end_transaction(trans);
|
||||
rcu_string_free(device->name);
|
||||
error_sysfs:
|
||||
btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
|
||||
error_trans:
|
||||
if (seeding_dev)
|
||||
sb->s_flags |= MS_RDONLY;
|
||||
if (trans)
|
||||
btrfs_end_transaction(trans);
|
||||
rcu_string_free(device->name);
|
||||
kfree(device);
|
||||
error:
|
||||
blkdev_put(bdev, FMODE_EXCL);
|
||||
if (seeding_dev) {
|
||||
if (seeding_dev && !unlocked) {
|
||||
mutex_unlock(&uuid_mutex);
|
||||
up_write(&sb->s_umount);
|
||||
}
|
||||
@ -4813,16 +4810,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
em_tree = &info->mapping_tree.map_tree;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
if (!ret) {
|
||||
list_add_tail(&em->list, &trans->transaction->pending_chunks);
|
||||
refcount_inc(&em->refs);
|
||||
}
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret) {
|
||||
write_unlock(&em_tree->lock);
|
||||
free_extent_map(em);
|
||||
goto error;
|
||||
}
|
||||
|
||||
list_add_tail(&em->list, &trans->transaction->pending_chunks);
|
||||
refcount_inc(&em->refs);
|
||||
write_unlock(&em_tree->lock);
|
||||
|
||||
ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes);
|
||||
if (ret)
|
||||
goto error_del_extent;
|
||||
@ -5695,10 +5692,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
|
||||
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
|
||||
&stripe_index);
|
||||
if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS)
|
||||
if (!need_full_stripe(op))
|
||||
mirror_num = 1;
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
|
||||
if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
|
||||
if (need_full_stripe(op))
|
||||
num_stripes = map->num_stripes;
|
||||
else if (mirror_num)
|
||||
stripe_index = mirror_num - 1;
|
||||
@ -5711,7 +5708,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
|
||||
if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) {
|
||||
if (need_full_stripe(op)) {
|
||||
num_stripes = map->num_stripes;
|
||||
} else if (mirror_num) {
|
||||
stripe_index = mirror_num - 1;
|
||||
@ -5725,7 +5722,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
|
||||
stripe_index *= map->sub_stripes;
|
||||
|
||||
if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
|
||||
if (need_full_stripe(op))
|
||||
num_stripes = map->sub_stripes;
|
||||
else if (mirror_num)
|
||||
stripe_index += mirror_num - 1;
|
||||
@ -5740,9 +5737,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||
if (need_raid_map &&
|
||||
(op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS ||
|
||||
mirror_num > 1)) {
|
||||
if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
|
||||
/* push stripe_nr back to the start of the full stripe */
|
||||
stripe_nr = div64_u64(raid56_full_stripe_start,
|
||||
stripe_len * nr_data_stripes(map));
|
||||
@ -5769,9 +5764,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
/* We distribute the parity blocks across stripes */
|
||||
div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
|
||||
&stripe_index);
|
||||
if ((op != BTRFS_MAP_WRITE &&
|
||||
op != BTRFS_MAP_GET_READ_MIRRORS) &&
|
||||
mirror_num <= 1)
|
||||
if (!need_full_stripe(op) && mirror_num <= 1)
|
||||
mirror_num = 1;
|
||||
}
|
||||
} else {
|
||||
@ -6033,7 +6026,7 @@ static void btrfs_end_bio(struct bio *bio)
|
||||
* this bio is actually up to date, we didn't
|
||||
* go over the max number of errors
|
||||
*/
|
||||
bio->bi_status = 0;
|
||||
bio->bi_status = BLK_STS_OK;
|
||||
}
|
||||
|
||||
btrfs_end_bbio(bbio, bio);
|
||||
@ -6069,13 +6062,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_device *device,
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* nr_async_bios allows us to reliably return congestion to the
|
||||
* higher layers. Otherwise, the async bio makes it appear we have
|
||||
* made progress against dirty pages when we've really just put it
|
||||
* on a queue for later
|
||||
*/
|
||||
atomic_inc(&fs_info->nr_async_bios);
|
||||
WARN_ON(bio->bi_next);
|
||||
bio->bi_next = NULL;
|
||||
|
||||
@ -6144,7 +6130,10 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
|
||||
|
||||
btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
|
||||
bio->bi_iter.bi_sector = logical >> 9;
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
if (atomic_read(&bbio->error) > bbio->max_errors)
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
else
|
||||
bio->bi_status = BLK_STS_OK;
|
||||
btrfs_end_bbio(bbio, bio);
|
||||
}
|
||||
}
|
||||
@ -6249,7 +6238,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
|
||||
|
||||
device = btrfs_alloc_device(NULL, &devid, dev_uuid);
|
||||
if (IS_ERR(device))
|
||||
return NULL;
|
||||
return device;
|
||||
|
||||
list_add(&device->dev_list, &fs_devices->devices);
|
||||
device->fs_devices = fs_devices;
|
||||
@ -6377,6 +6366,17 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
|
||||
u64 devid, u8 *uuid, bool error)
|
||||
{
|
||||
if (error)
|
||||
btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
|
||||
devid, uuid);
|
||||
else
|
||||
btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
|
||||
devid, uuid);
|
||||
}
|
||||
|
||||
static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
|
||||
struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk)
|
||||
@ -6447,18 +6447,21 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
|
||||
if (!map->stripes[i].dev &&
|
||||
!btrfs_test_opt(fs_info, DEGRADED)) {
|
||||
free_extent_map(em);
|
||||
btrfs_report_missing_device(fs_info, devid, uuid);
|
||||
return -EIO;
|
||||
btrfs_report_missing_device(fs_info, devid, uuid, true);
|
||||
return -ENOENT;
|
||||
}
|
||||
if (!map->stripes[i].dev) {
|
||||
map->stripes[i].dev =
|
||||
add_missing_dev(fs_info->fs_devices, devid,
|
||||
uuid);
|
||||
if (!map->stripes[i].dev) {
|
||||
if (IS_ERR(map->stripes[i].dev)) {
|
||||
free_extent_map(em);
|
||||
return -EIO;
|
||||
btrfs_err(fs_info,
|
||||
"failed to init missing dev %llu: %ld",
|
||||
devid, PTR_ERR(map->stripes[i].dev));
|
||||
return PTR_ERR(map->stripes[i].dev);
|
||||
}
|
||||
btrfs_report_missing_device(fs_info, devid, uuid);
|
||||
btrfs_report_missing_device(fs_info, devid, uuid, false);
|
||||
}
|
||||
map->stripes[i].dev->in_fs_metadata = 1;
|
||||
}
|
||||
@ -6577,19 +6580,28 @@ static int read_one_dev(struct btrfs_fs_info *fs_info,
|
||||
device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
|
||||
if (!device) {
|
||||
if (!btrfs_test_opt(fs_info, DEGRADED)) {
|
||||
btrfs_report_missing_device(fs_info, devid, dev_uuid);
|
||||
return -EIO;
|
||||
btrfs_report_missing_device(fs_info, devid,
|
||||
dev_uuid, true);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
device = add_missing_dev(fs_devices, devid, dev_uuid);
|
||||
if (!device)
|
||||
return -ENOMEM;
|
||||
btrfs_report_missing_device(fs_info, devid, dev_uuid);
|
||||
if (IS_ERR(device)) {
|
||||
btrfs_err(fs_info,
|
||||
"failed to add missing dev %llu: %ld",
|
||||
devid, PTR_ERR(device));
|
||||
return PTR_ERR(device);
|
||||
}
|
||||
btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
|
||||
} else {
|
||||
if (!device->bdev) {
|
||||
btrfs_report_missing_device(fs_info, devid, dev_uuid);
|
||||
if (!btrfs_test_opt(fs_info, DEGRADED))
|
||||
return -EIO;
|
||||
if (!btrfs_test_opt(fs_info, DEGRADED)) {
|
||||
btrfs_report_missing_device(fs_info,
|
||||
devid, dev_uuid, true);
|
||||
return -ENOENT;
|
||||
}
|
||||
btrfs_report_missing_device(fs_info, devid,
|
||||
dev_uuid, false);
|
||||
}
|
||||
|
||||
if(!device->bdev && !device->missing) {
|
||||
@ -6756,12 +6768,6 @@ out_short_read:
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid,
|
||||
u8 *uuid)
|
||||
{
|
||||
btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing", devid, uuid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if all chunks in the fs are OK for read-write degraded mount
|
||||
*
|
||||
|
@ -542,7 +542,5 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
|
||||
|
||||
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid,
|
||||
u8 *uuid);
|
||||
|
||||
#endif
|
||||
|
@ -37,6 +37,7 @@ struct workspace {
|
||||
z_stream strm;
|
||||
char *buf;
|
||||
struct list_head list;
|
||||
int level;
|
||||
};
|
||||
|
||||
static void zlib_free_workspace(struct list_head *ws)
|
||||
@ -96,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws,
|
||||
*total_out = 0;
|
||||
*total_in = 0;
|
||||
|
||||
if (Z_OK != zlib_deflateInit(&workspace->strm, 3)) {
|
||||
if (Z_OK != zlib_deflateInit(&workspace->strm, workspace->level)) {
|
||||
pr_warn("BTRFS: deflateInit failed\n");
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
@ -402,10 +403,22 @@ next:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void zlib_set_level(struct list_head *ws, unsigned int type)
|
||||
{
|
||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||
unsigned level = (type & 0xF0) >> 4;
|
||||
|
||||
if (level > 9)
|
||||
level = 9;
|
||||
|
||||
workspace->level = level > 0 ? level : 3;
|
||||
}
|
||||
|
||||
const struct btrfs_compress_op btrfs_zlib_compress = {
|
||||
.alloc_workspace = zlib_alloc_workspace,
|
||||
.free_workspace = zlib_free_workspace,
|
||||
.compress_pages = zlib_compress_pages,
|
||||
.decompress_bio = zlib_decompress_bio,
|
||||
.decompress = zlib_decompress,
|
||||
.set_level = zlib_set_level,
|
||||
};
|
||||
|
@ -423,10 +423,15 @@ finish:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void zstd_set_level(struct list_head *ws, unsigned int type)
|
||||
{
|
||||
}
|
||||
|
||||
const struct btrfs_compress_op btrfs_zstd_compress = {
|
||||
.alloc_workspace = zstd_alloc_workspace,
|
||||
.free_workspace = zstd_free_workspace,
|
||||
.compress_pages = zstd_compress_pages,
|
||||
.decompress_bio = zstd_decompress_bio,
|
||||
.decompress = zstd_decompress,
|
||||
.set_level = zstd_set_level,
|
||||
};
|
||||
|
@ -29,6 +29,13 @@ struct btrfs_qgroup_extent_record;
|
||||
struct btrfs_qgroup;
|
||||
struct prelim_ref;
|
||||
|
||||
TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR);
|
||||
TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS);
|
||||
TRACE_DEFINE_ENUM(FLUSH_DELALLOC);
|
||||
TRACE_DEFINE_ENUM(FLUSH_DELALLOC_WAIT);
|
||||
TRACE_DEFINE_ENUM(ALLOC_CHUNK);
|
||||
TRACE_DEFINE_ENUM(COMMIT_TRANS);
|
||||
|
||||
#define show_ref_type(type) \
|
||||
__print_symbolic(type, \
|
||||
{ BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \
|
||||
@ -792,11 +799,10 @@ DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref,
|
||||
DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
|
||||
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_delayed_ref_node *ref,
|
||||
const struct btrfs_delayed_ref_head *head_ref,
|
||||
int action),
|
||||
|
||||
TP_ARGS(fs_info, ref, head_ref, action),
|
||||
TP_ARGS(fs_info, head_ref, action),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( u64, bytenr )
|
||||
@ -806,8 +812,8 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(fs_info,
|
||||
__entry->bytenr = ref->bytenr;
|
||||
__entry->num_bytes = ref->num_bytes;
|
||||
__entry->bytenr = head_ref->bytenr;
|
||||
__entry->num_bytes = head_ref->num_bytes;
|
||||
__entry->action = action;
|
||||
__entry->is_data = head_ref->is_data;
|
||||
),
|
||||
@ -822,21 +828,19 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
|
||||
DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head,
|
||||
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_delayed_ref_node *ref,
|
||||
const struct btrfs_delayed_ref_head *head_ref,
|
||||
int action),
|
||||
|
||||
TP_ARGS(fs_info, ref, head_ref, action)
|
||||
TP_ARGS(fs_info, head_ref, action)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head,
|
||||
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_delayed_ref_node *ref,
|
||||
const struct btrfs_delayed_ref_head *head_ref,
|
||||
int action),
|
||||
|
||||
TP_ARGS(fs_info, ref, head_ref, action)
|
||||
TP_ARGS(fs_info, head_ref, action)
|
||||
);
|
||||
|
||||
#define show_chunk_type(type) \
|
||||
@ -1692,6 +1696,27 @@ DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_insert,
|
||||
TP_ARGS(fs_info, oldref, newref, tree_size)
|
||||
);
|
||||
|
||||
TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
|
||||
TP_PROTO(struct btrfs_root *root, u64 ino, int mod),
|
||||
|
||||
TP_ARGS(root, ino, mod),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( u64, root_objectid )
|
||||
__field( u64, ino )
|
||||
__field( int, mod )
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(root->fs_info,
|
||||
__entry->root_objectid = root->objectid;
|
||||
__entry->ino = ino;
|
||||
__entry->mod = mod;
|
||||
),
|
||||
|
||||
TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d",
|
||||
show_root_type(__entry->root_objectid),
|
||||
(unsigned long long)__entry->ino, __entry->mod)
|
||||
);
|
||||
#endif /* _TRACE_BTRFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -609,10 +609,14 @@ struct btrfs_ioctl_ino_path_args {
|
||||
struct btrfs_ioctl_logical_ino_args {
|
||||
__u64 logical; /* in */
|
||||
__u64 size; /* in */
|
||||
__u64 reserved[4];
|
||||
__u64 reserved[3]; /* must be 0 for now */
|
||||
__u64 flags; /* in, v2 only */
|
||||
/* struct btrfs_data_container *inodes; out */
|
||||
__u64 inodes;
|
||||
};
|
||||
/* Return every ref to the extent, not just those containing logical block.
|
||||
* Requires logical == extent bytenr. */
|
||||
#define BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET (1ULL << 0)
|
||||
|
||||
enum btrfs_dev_stat_values {
|
||||
/* disk I/O failure stats */
|
||||
@ -836,5 +840,7 @@ enum btrfs_err_code {
|
||||
struct btrfs_ioctl_feature_flags[3])
|
||||
#define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \
|
||||
struct btrfs_ioctl_vol_args_v2)
|
||||
#define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
|
||||
struct btrfs_ioctl_logical_ino_args)
|
||||
|
||||
#endif /* _UAPI_LINUX_BTRFS_H */
|
||||
|
@ -733,6 +733,7 @@ struct btrfs_balance_item {
|
||||
#define BTRFS_FILE_EXTENT_INLINE 0
|
||||
#define BTRFS_FILE_EXTENT_REG 1
|
||||
#define BTRFS_FILE_EXTENT_PREALLOC 2
|
||||
#define BTRFS_FILE_EXTENT_TYPES 2
|
||||
|
||||
struct btrfs_file_extent_item {
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user