for-6.11-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmaVN3MACgkQxWXV+ddt
 WDtpIRAAl+1NjsEj8e5V/UYn8Jr06ujTOnrkR3PCTICxDHbUaMLkQEw21H0K/ogQ
 3fOiEVpSlZOfKdYXtXaMQbC0jd/Af2eA10Uht96nAEjAtxu1uJ4cFZGu2meNdXZP
 xUioivJ/CElMPH2aluG6FaQvUTqmhrEr8tSoYbxzQmUd434q9kqqyjtw1tfzYDG1
 VDn2f7ykhpB/8P0aoqgWSshWTmaCzG0GkuI28o1o0iZUIF/P9TKdzxlLRW6BVHE7
 T2oGLEQjN1GQbCH75L4IeNJDkCBVfcDcbZkUDJ/ae4Pt/jJQTFY53YIP9wXFZQnd
 mdfHmK7Atpsk75ATftYSq+ENkbQ5fsuut5CD63u54gAqA4M1FncDXTAWS1Y30F76
 P8juSCmsSy0o3gTflDIo/IMdntoh/JmncwwStF6oKzmyUZZzzarsqM8mc1P03ZNt
 3ttlnbY7lC1TDAlD5J2wXE0INCT2pN+4C9IToWdRypeuLu6qrI7cQ0oylyp9OVQM
 t9umTXm0B6s1cyqEDjJf0xJZS/JTHYwu7S4EmAJwicgiLpOjABVTmO8021rVmDJy
 TAUu6yEhSsrTT6Dxm7/2Et1EEOKFF5hhsG1SiGD9oUIZK6B5+0waT+rbkEWl7osR
 4/TAv2zX6tuCc7HIW0fQloM/6/Gyd5wcDVaQNDUzFA075uKstwY=
 =k5d3
 -----END PGP SIGNATURE-----

Merge tag 'for-6.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "The highlights are new logic behind background block group reclaim,
  automatic removal of qgroup after removing a subvolume and new
  'rescue=' mount options.

  The rest is optimizations, cleanups and refactoring.

  User visible features:

   - dynamic block group reclaim:
      - tunable framework to avoid situations where eager data
        allocations prevent creating new metadata chunks due to lack of
        unallocated space
      - reuse sysfs knob bg_reclaim_threshold (otherwise used only in
        zoned mode) for a fixed value threshold
      - new on/off sysfs knob "dynamic_reclaim" calculating the value
        based on heuristics, aiming to keep spare working space for
        relocating chunks but not to needlessly relocate partially
        utilized block groups or reclaim newly allocated ones
      - stats are exported in sysfs per block group type, files
        "reclaim_*"
      - this may increase IO load at unexpected times but the corner
        case of no allocatable block groups is known to be worse

   - automatically remove qgroup of deleted subvolumes:
      - adjust qgroup removal conditions, make sure all related
        subvolume data are already removed, or return EBUSY, also take
        into account setting of sysfs drop_subtree_threshold
      - also works in squota mode

   - mount option updates: new modes of 'rescue=' that allow to mount
     images (read-only) that could have been partially converted by user
     space tools
      - ignoremetacsums  - invalid metadata checksums are ignored
      - ignoresuperflags - super block flags that track conversion in
                           progress (like UUID or checksums)

  Core:

   - size of struct btrfs_inode is now below 1024 (on a release config),
     improved memory packing and other secondary effects

   - switch tracking of open inodes from rb-tree to xarray, minor
     performance improvement

   - reduce number of empty transaction commits when there are no dirty
     data/metadata

   - memory allocation optimizations (reduced numbers, reordering out of
     critical sections)

   - extent map structure optimizations and refactoring, more sanity
     checks

   - more subpage in zoned mode preparations or fixes

   - general snapshot code cleanups, improvements and documentation

   - tree-checker updates: more file extent ram_bytes fixes, continued

   - raid-stripe-tree update (not backward compatible):
      - remove extent encoding field from the structure, can be inferred
        from other information
      - requires btrfs-progs 6.9.1 or newer

   - cleanups and refactoring
      - error message updates
      - error handling improvements
      - return type and parameter cleanups and improvements"

* tag 'for-6.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (152 commits)
  btrfs: fix extent map use-after-free when adding pages to compressed bio
  btrfs: fix bitmap leak when loading free space cache on duplicate entry
  btrfs: remove the BUG_ON() inside extent_range_clear_dirty_for_io()
  btrfs: move extent_range_clear_dirty_for_io() into inode.c
  btrfs: enhance compression error messages
  btrfs: fix data race when accessing the last_trans field of a root
  btrfs: rename the extra_gfp parameter of btrfs_alloc_page_array()
  btrfs: remove the extra_gfp parameter from btrfs_alloc_folio_array()
  btrfs: introduce new "rescue=ignoresuperflags" mount option
  btrfs: introduce new "rescue=ignoremetacsums" mount option
  btrfs: output the unrecognized super block flags as hex
  btrfs: remove unused Opt enums
  btrfs: tree-checker: add extra ram_bytes and disk_num_bytes check
  btrfs: fix the ram_bytes assignment for truncated ordered extents
  btrfs: make validate_extent_map() catch ram_bytes mismatch
  btrfs: ignore incorrect btrfs_file_extent_item::ram_bytes
  btrfs: cleanup the bytenr usage inside btrfs_extent_item_to_extent_map()
  btrfs: fix typo in error message in btrfs_validate_super()
  btrfs: move the direct IO code into its own file
  btrfs: pass a btrfs_inode to btrfs_set_prop()
  ...
This commit is contained in:
Linus Torvalds 2024-07-17 12:38:04 -07:00
commit a1b547f0f2
93 changed files with 5184 additions and 3914 deletions

View File

@ -33,7 +33,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \ uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \ block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o \ subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o \
lru_cache.o raid-stripe-tree.o lru_cache.o raid-stripe-tree.o fiemap.o direct-io.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o

View File

@ -34,7 +34,7 @@ void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *e
static inline u8 get_unaligned_le8(const void *p) static inline u8 get_unaligned_le8(const void *p)
{ {
return *(u8 *)p; return *(const u8 *)p;
} }
static inline void put_unaligned_le8(u8 val, void *p) static inline void put_unaligned_le8(u8 val, void *p)
@ -48,8 +48,8 @@ static inline void put_unaligned_le8(u8 val, void *p)
offsetof(type, member), \ offsetof(type, member), \
sizeof_field(type, member))) sizeof_field(type, member)))
#define write_eb_member(eb, ptr, type, member, result) (\ #define write_eb_member(eb, ptr, type, member, source) ( \
write_extent_buffer(eb, (char *)(result), \ write_extent_buffer(eb, (const char *)(source), \
((unsigned long)(ptr)) + \ ((unsigned long)(ptr)) + \
offsetof(type, member), \ offsetof(type, member), \
sizeof_field(type, member))) sizeof_field(type, member)))
@ -315,11 +315,8 @@ BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
BTRFS_SETGET_FUNCS(stripe_extent_encoding, struct btrfs_stripe_extent, encoding, 8);
BTRFS_SETGET_FUNCS(raid_stride_devid, struct btrfs_raid_stride, devid, 64); BTRFS_SETGET_FUNCS(raid_stride_devid, struct btrfs_raid_stride, devid, 64);
BTRFS_SETGET_FUNCS(raid_stride_physical, struct btrfs_raid_stride, physical, 64); BTRFS_SETGET_FUNCS(raid_stride_physical, struct btrfs_raid_stride, physical, 64);
BTRFS_SETGET_STACK_FUNCS(stack_stripe_extent_encoding,
struct btrfs_stripe_extent, encoding, 8);
BTRFS_SETGET_STACK_FUNCS(stack_raid_stride_devid, struct btrfs_raid_stride, devid, 64); BTRFS_SETGET_STACK_FUNCS(stack_raid_stride_devid, struct btrfs_raid_stride, devid, 64);
BTRFS_SETGET_STACK_FUNCS(stack_raid_stride_physical, struct btrfs_raid_stride, physical, 64); BTRFS_SETGET_STACK_FUNCS(stack_raid_stride_physical, struct btrfs_raid_stride, physical, 64);
@ -353,7 +350,7 @@ static inline void btrfs_tree_block_key(const struct extent_buffer *eb,
static inline void btrfs_set_tree_block_key(const struct extent_buffer *eb, static inline void btrfs_set_tree_block_key(const struct extent_buffer *eb,
struct btrfs_tree_block_info *item, struct btrfs_tree_block_info *item,
struct btrfs_disk_key *key) const struct btrfs_disk_key *key)
{ {
write_eb_member(eb, item, struct btrfs_tree_block_info, key, key); write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
} }
@ -446,7 +443,7 @@ void btrfs_node_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr); struct btrfs_disk_key *disk_key, int nr);
static inline void btrfs_set_node_key(const struct extent_buffer *eb, static inline void btrfs_set_node_key(const struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr) const struct btrfs_disk_key *disk_key, int nr)
{ {
unsigned long ptr; unsigned long ptr;
@ -512,7 +509,7 @@ static inline void btrfs_item_key(const struct extent_buffer *eb,
} }
static inline void btrfs_set_item_key(struct extent_buffer *eb, static inline void btrfs_set_item_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr) const struct btrfs_disk_key *disk_key, int nr)
{ {
struct btrfs_item *item = btrfs_item_nr(eb, nr); struct btrfs_item *item = btrfs_item_nr(eb, nr);

View File

@ -29,7 +29,7 @@ struct btrfs_failed_bio {
/* Is this a data path I/O that needs storage layer checksum and repair? */ /* Is this a data path I/O that needs storage layer checksum and repair? */
static inline bool is_data_bbio(struct btrfs_bio *bbio) static inline bool is_data_bbio(struct btrfs_bio *bbio)
{ {
return bbio->inode && is_data_inode(&bbio->inode->vfs_inode); return bbio->inode && is_data_inode(bbio->inode);
} }
static bool bbio_has_ordered_extent(struct btrfs_bio *bbio) static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
@ -732,7 +732,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
* point, so they are handled as part of the no-checksum case. * point, so they are handled as part of the no-checksum case.
*/ */
if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) && if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) &&
!test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) && !test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
!btrfs_is_data_reloc_root(inode->root)) { !btrfs_is_data_reloc_root(inode->root)) {
if (should_async_write(bbio) && if (should_async_write(bbio) &&
btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num)) btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))

View File

@ -1022,6 +1022,13 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
} }
} }
static struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
{
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
return fs_info->block_group_root;
return btrfs_extent_root(fs_info, 0);
}
static int remove_block_group_item(struct btrfs_trans_handle *trans, static int remove_block_group_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path, struct btrfs_path *path,
struct btrfs_block_group *block_group) struct btrfs_block_group *block_group)
@ -1757,24 +1764,21 @@ static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info)
static bool should_reclaim_block_group(struct btrfs_block_group *bg, u64 bytes_freed) static bool should_reclaim_block_group(struct btrfs_block_group *bg, u64 bytes_freed)
{ {
const struct btrfs_space_info *space_info = bg->space_info; const int thresh_pct = btrfs_calc_reclaim_threshold(bg->space_info);
const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold); u64 thresh_bytes = mult_perc(bg->length, thresh_pct);
const u64 new_val = bg->used; const u64 new_val = bg->used;
const u64 old_val = new_val + bytes_freed; const u64 old_val = new_val + bytes_freed;
u64 thresh;
if (reclaim_thresh == 0) if (thresh_bytes == 0)
return false; return false;
thresh = mult_perc(bg->length, reclaim_thresh);
/* /*
* If we were below the threshold before don't reclaim, we are likely a * If we were below the threshold before don't reclaim, we are likely a
* brand new block group and we don't want to relocate new block groups. * brand new block group and we don't want to relocate new block groups.
*/ */
if (old_val < thresh) if (old_val < thresh_bytes)
return false; return false;
if (new_val >= thresh) if (new_val >= thresh_bytes)
return false; return false;
return true; return true;
} }
@ -1822,6 +1826,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp); list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp);
while (!list_empty(&fs_info->reclaim_bgs)) { while (!list_empty(&fs_info->reclaim_bgs)) {
u64 zone_unusable; u64 zone_unusable;
u64 reclaimed;
int ret = 0; int ret = 0;
bg = list_first_entry(&fs_info->reclaim_bgs, bg = list_first_entry(&fs_info->reclaim_bgs,
@ -1835,6 +1840,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
/* Don't race with allocators so take the groups_sem */ /* Don't race with allocators so take the groups_sem */
down_write(&space_info->groups_sem); down_write(&space_info->groups_sem);
spin_lock(&space_info->lock);
spin_lock(&bg->lock); spin_lock(&bg->lock);
if (bg->reserved || bg->pinned || bg->ro) { if (bg->reserved || bg->pinned || bg->ro) {
/* /*
@ -1844,6 +1850,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
* this block group. * this block group.
*/ */
spin_unlock(&bg->lock); spin_unlock(&bg->lock);
spin_unlock(&space_info->lock);
up_write(&space_info->groups_sem); up_write(&space_info->groups_sem);
goto next; goto next;
} }
@ -1862,6 +1869,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) if (!btrfs_test_opt(fs_info, DISCARD_ASYNC))
btrfs_mark_bg_unused(bg); btrfs_mark_bg_unused(bg);
spin_unlock(&bg->lock); spin_unlock(&bg->lock);
spin_unlock(&space_info->lock);
up_write(&space_info->groups_sem); up_write(&space_info->groups_sem);
goto next; goto next;
@ -1878,10 +1886,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
*/ */
if (!should_reclaim_block_group(bg, bg->length)) { if (!should_reclaim_block_group(bg, bg->length)) {
spin_unlock(&bg->lock); spin_unlock(&bg->lock);
spin_unlock(&space_info->lock);
up_write(&space_info->groups_sem); up_write(&space_info->groups_sem);
goto next; goto next;
} }
spin_unlock(&bg->lock); spin_unlock(&bg->lock);
spin_unlock(&space_info->lock);
/* /*
* Get out fast, in case we're read-only or unmounting the * Get out fast, in case we're read-only or unmounting the
@ -1914,15 +1924,26 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
div64_u64(bg->used * 100, bg->length), div64_u64(bg->used * 100, bg->length),
div64_u64(zone_unusable * 100, bg->length)); div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg); trace_btrfs_reclaim_block_group(bg);
reclaimed = bg->used;
ret = btrfs_relocate_chunk(fs_info, bg->start); ret = btrfs_relocate_chunk(fs_info, bg->start);
if (ret) { if (ret) {
btrfs_dec_block_group_ro(bg); btrfs_dec_block_group_ro(bg);
btrfs_err(fs_info, "error relocating chunk %llu", btrfs_err(fs_info, "error relocating chunk %llu",
bg->start); bg->start);
reclaimed = 0;
spin_lock(&space_info->lock);
space_info->reclaim_errors++;
if (READ_ONCE(space_info->periodic_reclaim))
space_info->periodic_reclaim_ready = false;
spin_unlock(&space_info->lock);
} }
spin_lock(&space_info->lock);
space_info->reclaim_count++;
space_info->reclaim_bytes += reclaimed;
spin_unlock(&space_info->lock);
next: next:
if (ret) { if (ret && !READ_ONCE(space_info->periodic_reclaim)) {
/* Refcount held by the reclaim_bgs list after splice. */ /* Refcount held by the reclaim_bgs list after splice. */
spin_lock(&fs_info->unused_bgs_lock); spin_lock(&fs_info->unused_bgs_lock);
/* /*
@ -1964,6 +1985,7 @@ end:
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info) void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
{ {
btrfs_reclaim_sweep(fs_info);
spin_lock(&fs_info->unused_bgs_lock); spin_lock(&fs_info->unused_bgs_lock);
if (!list_empty(&fs_info->reclaim_bgs)) if (!list_empty(&fs_info->reclaim_bgs))
queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work); queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
@ -3662,9 +3684,12 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
old_val += num_bytes; old_val += num_bytes;
cache->used = old_val; cache->used = old_val;
cache->reserved -= num_bytes; cache->reserved -= num_bytes;
cache->reclaim_mark = 0;
space_info->bytes_reserved -= num_bytes; space_info->bytes_reserved -= num_bytes;
space_info->bytes_used += num_bytes; space_info->bytes_used += num_bytes;
space_info->disk_used += num_bytes * factor; space_info->disk_used += num_bytes * factor;
if (READ_ONCE(space_info->periodic_reclaim))
btrfs_space_info_update_reclaimable(space_info, -num_bytes);
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
} else { } else {
@ -3674,8 +3699,10 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
btrfs_space_info_update_bytes_pinned(info, space_info, num_bytes); btrfs_space_info_update_bytes_pinned(info, space_info, num_bytes);
space_info->bytes_used -= num_bytes; space_info->bytes_used -= num_bytes;
space_info->disk_used -= num_bytes * factor; space_info->disk_used -= num_bytes * factor;
if (READ_ONCE(space_info->periodic_reclaim))
reclaim = should_reclaim_block_group(cache, num_bytes); btrfs_space_info_update_reclaimable(space_info, num_bytes);
else
reclaim = should_reclaim_block_group(cache, num_bytes);
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
@ -4329,13 +4356,13 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF, if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF,
&block_group->runtime_flags)) { &block_group->runtime_flags)) {
struct inode *inode = block_group->inode; struct btrfs_inode *inode = block_group->inode;
block_group->inode = NULL; block_group->inode = NULL;
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
ASSERT(block_group->io_ctl.inode == NULL); ASSERT(block_group->io_ctl.inode == NULL);
iput(inode); iput(&inode->vfs_inode);
} else { } else {
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
} }

View File

@ -115,7 +115,7 @@ struct btrfs_caching_control {
struct btrfs_block_group { struct btrfs_block_group {
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
struct inode *inode; struct btrfs_inode *inode;
spinlock_t lock; spinlock_t lock;
u64 start; u64 start;
u64 length; u64 length;
@ -263,6 +263,7 @@ struct btrfs_block_group {
struct work_struct zone_finish_work; struct work_struct zone_finish_work;
struct extent_buffer *last_eb; struct extent_buffer *last_eb;
enum btrfs_block_group_size_class size_class; enum btrfs_block_group_size_class size_class;
u64 reclaim_mark;
}; };
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)

View File

@ -19,7 +19,6 @@
#include <uapi/linux/btrfs_tree.h> #include <uapi/linux/btrfs_tree.h>
#include <trace/events/btrfs.h> #include <trace/events/btrfs.h>
#include "block-rsv.h" #include "block-rsv.h"
#include "btrfs_inode.h"
#include "extent_map.h" #include "extent_map.h"
#include "extent_io.h" #include "extent_io.h"
#include "extent-io-tree.h" #include "extent-io-tree.h"
@ -99,6 +98,29 @@ enum {
* range). * range).
*/ */
BTRFS_INODE_COW_WRITE_ERROR, BTRFS_INODE_COW_WRITE_ERROR,
/*
* Indicate this is a directory that points to a subvolume for which
* there is no root reference item. That's a case like the following:
*
* $ btrfs subvolume create /mnt/parent
* $ btrfs subvolume create /mnt/parent/child
* $ btrfs subvolume snapshot /mnt/parent /mnt/snap
*
* If subvolume "parent" is root 256, subvolume "child" is root 257 and
* snapshot "snap" is root 258, then there's no root reference item (key
* BTRFS_ROOT_REF_KEY in the root tree) for the subvolume "child"
* associated to root 258 (the snapshot) - there's only for the root
* of the "parent" subvolume (root 256). In the chunk root we have a
* (256 BTRFS_ROOT_REF_KEY 257) key but we don't have a
* (258 BTRFS_ROOT_REF_KEY 257) key - the sames goes for backrefs, we
* have a (257 BTRFS_ROOT_BACKREF_KEY 256) but we don't have a
* (257 BTRFS_ROOT_BACKREF_KEY 258) key.
*
* So when opening the "child" dentry from the snapshot's directory,
* we don't find a root ref item and we create a stub inode. This is
* done at new_simple_dir(), called from btrfs_lookup_dentry().
*/
BTRFS_INODE_ROOT_STUB,
}; };
/* in memory btrfs inode */ /* in memory btrfs inode */
@ -106,10 +128,14 @@ struct btrfs_inode {
/* which subvolume this inode belongs to */ /* which subvolume this inode belongs to */
struct btrfs_root *root; struct btrfs_root *root;
/* key used to find this inode on disk. This is used by the code #if BITS_PER_LONG == 32
* to read in roots of subvolumes /*
* The objectid of the corresponding BTRFS_INODE_ITEM_KEY.
* On 64 bits platforms we can get it from vfs_inode.i_ino, which is an
* unsigned long and therefore 64 bits on such platforms.
*/ */
struct btrfs_key location; u64 objectid;
#endif
/* Cached value of inode property 'compression'. */ /* Cached value of inode property 'compression'. */
u8 prop_compress; u8 prop_compress;
@ -165,9 +191,6 @@ struct btrfs_inode {
*/ */
struct list_head delalloc_inodes; struct list_head delalloc_inodes;
/* node for the red-black tree that links inodes in subvolume root */
struct rb_node rb_node;
unsigned long runtime_flags; unsigned long runtime_flags;
/* full 64 bit generation number, struct vfs_inode doesn't have a big /* full 64 bit generation number, struct vfs_inode doesn't have a big
@ -228,11 +251,20 @@ struct btrfs_inode {
u64 last_dir_index_offset; u64 last_dir_index_offset;
}; };
/* union {
* Total number of bytes pending defrag, used by stat to check whether /*
* it needs COW. Protected by 'lock'. * Total number of bytes pending defrag, used by stat to check whether
*/ * it needs COW. Protected by 'lock'.
u64 defrag_bytes; * Used by inodes other than the data relocation inode.
*/
u64 defrag_bytes;
/*
* Logical address of the block group being relocated.
* Used only by the data relocation inode.
*/
u64 reloc_block_group_start;
};
/* /*
* The size of the file stored in the metadata on disk. data=ordered * The size of the file stored in the metadata on disk. data=ordered
@ -241,12 +273,21 @@ struct btrfs_inode {
*/ */
u64 disk_i_size; u64 disk_i_size;
/* union {
* If this is a directory then index_cnt is the counter for the index /*
* number for new files that are created. For an empty directory, this * If this is a directory then index_cnt is the counter for the
* must be initialized to BTRFS_DIR_START_INDEX. * index number for new files that are created. For an empty
*/ * directory, this must be initialized to BTRFS_DIR_START_INDEX.
u64 index_cnt; */
u64 index_cnt;
/*
* If this is not a directory, this is the number of bytes
* outstanding that are going to need csums. This is used in
* ENOSPC accounting. Protected by 'lock'.
*/
u64 csum_bytes;
};
/* Cache the directory index number to speed the dir/file remove */ /* Cache the directory index number to speed the dir/file remove */
u64 dir_index; u64 dir_index;
@ -258,22 +299,25 @@ struct btrfs_inode {
*/ */
u64 last_unlink_trans; u64 last_unlink_trans;
/* union {
* The id/generation of the last transaction where this inode was /*
* either the source or the destination of a clone/dedupe operation. * The id/generation of the last transaction where this inode
* Used when logging an inode to know if there are shared extents that * was either the source or the destination of a clone/dedupe
* need special care when logging checksum items, to avoid duplicate * operation. Used when logging an inode to know if there are
* checksum items in a log (which can lead to a corruption where we end * shared extents that need special care when logging checksum
* up with missing checksum ranges after log replay). * items, to avoid duplicate checksum items in a log (which can
* Protected by the vfs inode lock. * lead to a corruption where we end up with missing checksum
*/ * ranges after log replay). Protected by the VFS inode lock.
u64 last_reflink_trans; * Used for regular files only.
*/
u64 last_reflink_trans;
/* /*
* Number of bytes outstanding that are going to need csums. This is * In case this a root stub inode (BTRFS_INODE_ROOT_STUB flag set),
* used in ENOSPC accounting. Protected by 'lock'. * the ID of that root.
*/ */
u64 csum_bytes; u64 ref_root_id;
};
/* Backwards incompatible flags, lower half of inode_item::flags */ /* Backwards incompatible flags, lower half of inode_item::flags */
u32 flags; u32 flags;
@ -331,10 +375,9 @@ static inline unsigned long btrfs_inode_hash(u64 objectid,
*/ */
static inline u64 btrfs_ino(const struct btrfs_inode *inode) static inline u64 btrfs_ino(const struct btrfs_inode *inode)
{ {
u64 ino = inode->location.objectid; u64 ino = inode->objectid;
/* type == BTRFS_ROOT_ITEM_KEY: subvol dir */ if (test_bit(BTRFS_INODE_ROOT_STUB, &inode->runtime_flags))
if (inode->location.type == BTRFS_ROOT_ITEM_KEY)
ino = inode->vfs_inode.i_ino; ino = inode->vfs_inode.i_ino;
return ino; return ino;
} }
@ -348,20 +391,36 @@ static inline u64 btrfs_ino(const struct btrfs_inode *inode)
#endif #endif
static inline void btrfs_get_inode_key(const struct btrfs_inode *inode,
struct btrfs_key *key)
{
key->objectid = btrfs_ino(inode);
key->type = BTRFS_INODE_ITEM_KEY;
key->offset = 0;
}
static inline void btrfs_set_inode_number(struct btrfs_inode *inode, u64 ino)
{
#if BITS_PER_LONG == 32
inode->objectid = ino;
#endif
inode->vfs_inode.i_ino = ino;
}
static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size) static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
{ {
i_size_write(&inode->vfs_inode, size); i_size_write(&inode->vfs_inode, size);
inode->disk_i_size = size; inode->disk_i_size = size;
} }
static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode) static inline bool btrfs_is_free_space_inode(const struct btrfs_inode *inode)
{ {
return test_bit(BTRFS_INODE_FREE_SPACE_INODE, &inode->runtime_flags); return test_bit(BTRFS_INODE_FREE_SPACE_INODE, &inode->runtime_flags);
} }
static inline bool is_data_inode(struct inode *inode) static inline bool is_data_inode(const struct btrfs_inode *inode)
{ {
return btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID; return btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID;
} }
static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode, static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
@ -455,8 +514,8 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
u32 bio_offset, struct bio_vec *bv); u32 bio_offset, struct bio_vec *bv);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len, struct btrfs_file_extent *file_extent,
u64 *ram_bytes, bool nowait, bool strict); bool nowait, bool strict);
void btrfs_del_delalloc_inode(struct btrfs_inode *inode); void btrfs_del_delalloc_inode(struct btrfs_inode *inode);
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
@ -515,9 +574,9 @@ void btrfs_free_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode);
int __init btrfs_init_cachep(void); int __init btrfs_init_cachep(void);
void __cold btrfs_destroy_cachep(void); void __cold btrfs_destroy_cachep(void);
struct inode *btrfs_iget_path(struct super_block *s, u64 ino, struct inode *btrfs_iget_path(u64 ino, struct btrfs_root *root,
struct btrfs_root *root, struct btrfs_path *path); struct btrfs_path *path);
struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root); struct inode *btrfs_iget(u64 ino, struct btrfs_root *root);
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct page *page, u64 start, u64 len); struct page *page, u64 start, u64 len);
int btrfs_update_inode(struct btrfs_trans_handle *trans, int btrfs_update_inode(struct btrfs_trans_handle *trans,
@ -551,10 +610,6 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded); const struct btrfs_ioctl_encoded_io_args *encoded);
ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
size_t done_before);
struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
size_t done_before);
struct btrfs_inode *btrfs_find_first_inode(struct btrfs_root *root, u64 min_ino); struct btrfs_inode *btrfs_find_first_inode(struct btrfs_root *root, u64 min_ino);
extern const struct dentry_operations btrfs_dentry_operations; extern const struct dentry_operations btrfs_dentry_operations;
@ -571,5 +626,10 @@ void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags);
void btrfs_update_inode_bytes(struct btrfs_inode *inode, const u64 add_bytes, void btrfs_update_inode_bytes(struct btrfs_inode *inode, const u64 add_bytes,
const u64 del_bytes); const u64 del_bytes);
void btrfs_assert_inode_range_clean(struct btrfs_inode *inode, u64 start, u64 end); void btrfs_assert_inode_range_clean(struct btrfs_inode *inode, u64 start, u64 end);
u64 btrfs_get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
u64 num_bytes);
struct extent_map *btrfs_create_io_em(struct btrfs_inode *inode, u64 start,
const struct btrfs_file_extent *file_extent,
int type);
#endif #endif

View File

@ -261,7 +261,7 @@ void btrfs_free_compr_folio(struct folio *folio)
folio_put(folio); folio_put(folio);
} }
static void end_bbio_comprssed_read(struct btrfs_bio *bbio) static void end_bbio_compressed_read(struct btrfs_bio *bbio)
{ {
struct compressed_bio *cb = to_compressed_bio(bbio); struct compressed_bio *cb = to_compressed_bio(bbio);
blk_status_t status = bbio->bio.bi_status; blk_status_t status = bbio->bio.bi_status;
@ -334,7 +334,7 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
* This also calls the writeback end hooks for the file pages so that metadata * This also calls the writeback end hooks for the file pages so that metadata
* and checksums can be updated in the file. * and checksums can be updated in the file.
*/ */
static void end_bbio_comprssed_write(struct btrfs_bio *bbio) static void end_bbio_compressed_write(struct btrfs_bio *bbio)
{ {
struct compressed_bio *cb = to_compressed_bio(bbio); struct compressed_bio *cb = to_compressed_bio(bbio);
struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
@ -374,7 +374,7 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
blk_opf_t write_flags, blk_opf_t write_flags,
bool writeback) bool writeback)
{ {
struct btrfs_inode *inode = BTRFS_I(ordered->inode); struct btrfs_inode *inode = ordered->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct compressed_bio *cb; struct compressed_bio *cb;
@ -383,7 +383,7 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
cb = alloc_compressed_bio(inode, ordered->file_offset, cb = alloc_compressed_bio(inode, ordered->file_offset,
REQ_OP_WRITE | write_flags, REQ_OP_WRITE | write_flags,
end_bbio_comprssed_write); end_bbio_compressed_write);
cb->start = ordered->file_offset; cb->start = ordered->file_offset;
cb->len = ordered->num_bytes; cb->len = ordered->num_bytes;
cb->compressed_folios = compressed_folios; cb->compressed_folios = compressed_folios;
@ -507,13 +507,15 @@ static noinline int add_ra_bio_pages(struct inode *inode,
*/ */
if (!em || cur < em->start || if (!em || cur < em->start ||
(cur + fs_info->sectorsize > extent_map_end(em)) || (cur + fs_info->sectorsize > extent_map_end(em)) ||
(em->block_start >> SECTOR_SHIFT) != orig_bio->bi_iter.bi_sector) { (extent_map_block_start(em) >> SECTOR_SHIFT) !=
orig_bio->bi_iter.bi_sector) {
free_extent_map(em); free_extent_map(em);
unlock_extent(tree, cur, page_end, NULL); unlock_extent(tree, cur, page_end, NULL);
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
break; break;
} }
add_size = min(em->start + em->len, page_end + 1) - cur;
free_extent_map(em); free_extent_map(em);
if (page->index == end_index) { if (page->index == end_index) {
@ -526,7 +528,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
} }
} }
add_size = min(em->start + em->len, page_end + 1) - cur;
ret = bio_add_page(orig_bio, page, add_size, offset_in_page(cur)); ret = bio_add_page(orig_bio, page, add_size, offset_in_page(cur));
if (ret != add_size) { if (ret != add_size) {
unlock_extent(tree, cur, page_end, NULL); unlock_extent(tree, cur, page_end, NULL);
@ -585,12 +586,12 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
} }
ASSERT(extent_map_is_compressed(em)); ASSERT(extent_map_is_compressed(em));
compressed_len = em->block_len; compressed_len = em->disk_num_bytes;
cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ, cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ,
end_bbio_comprssed_read); end_bbio_compressed_read);
cb->start = em->orig_start; cb->start = em->start - em->offset;
em_len = em->len; em_len = em->len;
em_start = em->start; em_start = em->start;
@ -608,7 +609,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
goto out_free_bio; goto out_free_bio;
} }
ret2 = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios, 0); ret2 = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios);
if (ret2) { if (ret2) {
ret = BLK_STS_RESOURCE; ret = BLK_STS_RESOURCE;
goto out_free_compressed_pages; goto out_free_compressed_pages;
@ -1506,7 +1507,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
* *
* Return non-zero if the compression should be done, 0 otherwise. * Return non-zero if the compression should be done, 0 otherwise.
*/ */
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end) int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end)
{ {
struct list_head *ws_list = get_workspace(0, 0); struct list_head *ws_list = get_workspace(0, 0);
struct heuristic_ws *ws; struct heuristic_ws *ws;
@ -1516,7 +1517,7 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
ws = list_entry(ws_list, struct heuristic_ws, list); ws = list_entry(ws_list, struct heuristic_ws, list);
heuristic_collect_sample(inode, start, end, ws); heuristic_collect_sample(&inode->vfs_inode, start, end, ws);
if (sample_repeated_patterns(ws)) { if (sample_repeated_patterns(ws)) {
ret = 1; ret = 1;

View File

@ -144,7 +144,7 @@ extern const struct btrfs_compress_op btrfs_zstd_compress;
const char* btrfs_compress_type2str(enum btrfs_compression_type type); const char* btrfs_compress_type2str(enum btrfs_compression_type type);
bool btrfs_compress_is_valid_type(const char *str, size_t len); bool btrfs_compress_is_valid_type(const char *str, size_t len);
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end); int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end);
int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start, int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
struct folio **in_folio_ret); struct folio **in_folio_ret);

View File

@ -321,7 +321,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != fs_info->running_transaction->transid); trans->transid != fs_info->running_transaction->transid);
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != root->last_trans); trans->transid != btrfs_get_root_last_trans(root));
level = btrfs_header_level(buf); level = btrfs_header_level(buf);
if (level == 0) if (level == 0)
@ -417,7 +417,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
u64 refs; u64 refs;
u64 owner; u64 owner;
u64 flags; u64 flags;
u64 new_flags = 0;
int ret; int ret;
/* /*
@ -462,8 +461,16 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
} }
owner = btrfs_header_owner(buf); owner = btrfs_header_owner(buf);
BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && if (unlikely(owner == BTRFS_TREE_RELOC_OBJECTID &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))) {
btrfs_crit(fs_info,
"found tree block at bytenr %llu level %d root %llu refs %llu flags %llx without full backref flag set",
buf->start, btrfs_header_level(buf),
btrfs_root_id(root), refs, flags);
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
return ret;
}
if (refs > 1) { if (refs > 1) {
if ((owner == btrfs_root_id(root) || if ((owner == btrfs_root_id(root) ||
@ -481,7 +488,10 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (ret) if (ret)
return ret; return ret;
} }
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; ret = btrfs_set_disk_extent_flags(trans, buf,
BTRFS_BLOCK_FLAG_FULL_BACKREF);
if (ret)
return ret;
} else { } else {
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
@ -491,11 +501,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (ret) if (ret)
return ret; return ret;
} }
if (new_flags != 0) {
ret = btrfs_set_disk_extent_flags(trans, buf, new_flags);
if (ret)
return ret;
}
} else { } else {
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
@ -551,7 +556,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != fs_info->running_transaction->transid); trans->transid != fs_info->running_transaction->transid);
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
trans->transid != root->last_trans); trans->transid != btrfs_get_root_last_trans(root));
level = btrfs_header_level(buf); level = btrfs_header_level(buf);
@ -588,19 +593,15 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (ret) { if (ret) {
btrfs_tree_unlock(cow);
free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
return ret; goto error_unlock_cow;
} }
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) { if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow); ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret) { if (ret) {
btrfs_tree_unlock(cow);
free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
return ret; goto error_unlock_cow;
} }
} }
@ -612,27 +613,27 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
ret = btrfs_tree_mod_log_insert_root(root->node, cow, true); ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
if (ret < 0) { if (ret < 0) {
btrfs_tree_unlock(cow);
free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
return ret; goto error_unlock_cow;
} }
atomic_inc(&cow->refs); atomic_inc(&cow->refs);
rcu_assign_pointer(root->node, cow); rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, btrfs_root_id(root), buf, ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
parent_start, last_ref); parent_start, last_ref);
free_extent_buffer(buf); free_extent_buffer(buf);
add_root_to_dirty_list(root); add_root_to_dirty_list(root);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
} else { } else {
WARN_ON(trans->transid != btrfs_header_generation(parent)); WARN_ON(trans->transid != btrfs_header_generation(parent));
ret = btrfs_tree_mod_log_insert_key(parent, parent_slot, ret = btrfs_tree_mod_log_insert_key(parent, parent_slot,
BTRFS_MOD_LOG_KEY_REPLACE); BTRFS_MOD_LOG_KEY_REPLACE);
if (ret) { if (ret) {
btrfs_tree_unlock(cow);
free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
return ret; goto error_unlock_cow;
} }
btrfs_set_node_blockptr(parent, parent_slot, btrfs_set_node_blockptr(parent, parent_slot,
cow->start); cow->start);
@ -642,14 +643,16 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
if (last_ref) { if (last_ref) {
ret = btrfs_tree_mod_log_free_eb(buf); ret = btrfs_tree_mod_log_free_eb(buf);
if (ret) { if (ret) {
btrfs_tree_unlock(cow);
free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
return ret; goto error_unlock_cow;
} }
} }
btrfs_free_tree_block(trans, btrfs_root_id(root), buf, ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
parent_start, last_ref); parent_start, last_ref);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
} }
if (unlock_orig) if (unlock_orig)
btrfs_tree_unlock(buf); btrfs_tree_unlock(buf);
@ -657,6 +660,11 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(trans, cow); btrfs_mark_buffer_dirty(trans, cow);
*cow_ret = cow; *cow_ret = cow;
return 0; return 0;
error_unlock_cow:
btrfs_tree_unlock(cow);
free_extent_buffer(cow);
return ret;
} }
static inline int should_cow_block(struct btrfs_trans_handle *trans, static inline int should_cow_block(struct btrfs_trans_handle *trans,
@ -983,9 +991,13 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
free_extent_buffer(mid); free_extent_buffer(mid);
root_sub_used_bytes(root); root_sub_used_bytes(root);
btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
/* once for the root ptr */ /* once for the root ptr */
free_extent_buffer_stale(mid); free_extent_buffer_stale(mid);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
return 0; return 0;
} }
if (btrfs_header_nritems(mid) > if (btrfs_header_nritems(mid) >
@ -1053,10 +1065,14 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto out; goto out;
} }
root_sub_used_bytes(root); root_sub_used_bytes(root);
btrfs_free_tree_block(trans, btrfs_root_id(root), right, ret = btrfs_free_tree_block(trans, btrfs_root_id(root),
0, 1); right, 0, 1);
free_extent_buffer_stale(right); free_extent_buffer_stale(right);
right = NULL; right = NULL;
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
} else { } else {
struct btrfs_disk_key right_key; struct btrfs_disk_key right_key;
btrfs_node_key(right, &right_key, 0); btrfs_node_key(right, &right_key, 0);
@ -1111,9 +1127,13 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto out; goto out;
} }
root_sub_used_bytes(root); root_sub_used_bytes(root);
btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
free_extent_buffer_stale(mid); free_extent_buffer_stale(mid);
mid = NULL; mid = NULL;
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
} else { } else {
/* update the parent key to reflect our changes */ /* update the parent key to reflect our changes */
struct btrfs_disk_key mid_key; struct btrfs_disk_key mid_key;
@ -1551,12 +1571,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
if (ret) { if (ret) {
free_extent_buffer(tmp); free_extent_buffer(tmp);
btrfs_release_path(p); btrfs_release_path(p);
return -EIO; return ret;
}
if (btrfs_check_eb_owner(tmp, btrfs_root_id(root))) {
free_extent_buffer(tmp);
btrfs_release_path(p);
return -EUCLEAN;
} }
if (unlock_up) if (unlock_up)
@ -2883,7 +2898,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
old = root->node; old = root->node;
ret = btrfs_tree_mod_log_insert_root(root->node, c, false); ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
if (ret < 0) { if (ret < 0) {
btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1); int ret2;
ret2 = btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1);
if (ret2 < 0)
btrfs_abort_transaction(trans, ret2);
btrfs_tree_unlock(c); btrfs_tree_unlock(c);
free_extent_buffer(c); free_extent_buffer(c);
return ret; return ret;
@ -4452,9 +4471,12 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used_bytes(root); root_sub_used_bytes(root);
atomic_inc(&leaf->refs); atomic_inc(&leaf->refs);
btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1); ret = btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
free_extent_buffer_stale(leaf); free_extent_buffer_stale(leaf);
return 0; if (ret < 0)
btrfs_abort_transaction(trans, ret);
return ret;
} }
/* /*
* delete the item at the leaf level in path. If that empties * delete the item at the leaf level in path. If that empties

View File

@ -221,9 +221,11 @@ struct btrfs_root {
struct list_head root_list; struct list_head root_list;
spinlock_t inode_lock; /*
/* red-black tree that keeps track of in-memory inodes */ * Xarray that keeps track of in-memory inodes, protected by the lock
struct rb_root inode_tree; * @inode_lock.
*/
struct xarray inodes;
/* /*
* Xarray that keeps track of delayed nodes of every inode, protected * Xarray that keeps track of delayed nodes of every inode, protected
@ -354,6 +356,16 @@ static inline void btrfs_set_root_last_log_commit(struct btrfs_root *root, int c
WRITE_ONCE(root->last_log_commit, commit_id); WRITE_ONCE(root->last_log_commit, commit_id);
} }
static inline u64 btrfs_get_root_last_trans(const struct btrfs_root *root)
{
return READ_ONCE(root->last_trans);
}
static inline void btrfs_set_root_last_trans(struct btrfs_root *root, u64 transid)
{
WRITE_ONCE(root->last_trans, transid);
}
/* /*
* Structure that conveys information about an extent that is going to replace * Structure that conveys information about an extent that is going to replace
* all the extents in a file range. * all the extents in a file range.

View File

@ -139,7 +139,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
if (trans) if (trans)
transid = trans->transid; transid = trans->transid;
else else
transid = inode->root->last_trans; transid = btrfs_get_root_last_trans(root);
defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS); defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
if (!defrag) if (!defrag)
@ -255,7 +255,7 @@ again:
goto cleanup; goto cleanup;
} }
inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root); inode = btrfs_iget(defrag->ino, inode_root);
btrfs_put_root(inode_root); btrfs_put_root(inode_root);
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
ret = PTR_ERR(inode); ret = PTR_ERR(inode);
@ -707,8 +707,10 @@ iterate:
*/ */
if (key.offset > start) { if (key.offset > start) {
em->start = start; em->start = start;
em->orig_start = start; em->disk_bytenr = EXTENT_MAP_HOLE;
em->block_start = EXTENT_MAP_HOLE; em->disk_num_bytes = 0;
em->ram_bytes = 0;
em->offset = 0;
em->len = key.offset - start; em->len = key.offset - start;
break; break;
} }
@ -825,7 +827,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
*/ */
next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked); next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked);
/* No more em or hole */ /* No more em or hole */
if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) if (!next || next->disk_bytenr >= EXTENT_MAP_LAST_BYTE)
goto out; goto out;
if (next->flags & EXTENT_FLAG_PREALLOC) if (next->flags & EXTENT_FLAG_PREALLOC)
goto out; goto out;
@ -992,12 +994,12 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
* This is for users who want to convert inline extents to * This is for users who want to convert inline extents to
* regular ones through max_inline= mount option. * regular ones through max_inline= mount option.
*/ */
if (em->block_start == EXTENT_MAP_INLINE && if (em->disk_bytenr == EXTENT_MAP_INLINE &&
em->len <= inode->root->fs_info->max_inline) em->len <= inode->root->fs_info->max_inline)
goto next; goto next;
/* Skip holes and preallocated extents. */ /* Skip holes and preallocated extents. */
if (em->block_start == EXTENT_MAP_HOLE || if (em->disk_bytenr == EXTENT_MAP_HOLE ||
(em->flags & EXTENT_FLAG_PREALLOC)) (em->flags & EXTENT_FLAG_PREALLOC))
goto next; goto next;
@ -1062,7 +1064,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
* So if an inline extent passed all above checks, just add it * So if an inline extent passed all above checks, just add it
* for defrag, and be converted to regular extents. * for defrag, and be converted to regular extents.
*/ */
if (em->block_start == EXTENT_MAP_INLINE) if (em->disk_bytenr == EXTENT_MAP_INLINE)
goto add; goto add;
next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,

View File

@ -111,7 +111,7 @@
* making error handling and cleanup easier. * making error handling and cleanup easier.
*/ */
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) int btrfs_alloc_data_chunk_ondemand(const struct btrfs_inode *inode, u64 bytes)
{ {
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;

View File

@ -9,7 +9,7 @@ struct extent_changeset;
struct btrfs_inode; struct btrfs_inode;
struct btrfs_fs_info; struct btrfs_fs_info;
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes); int btrfs_alloc_data_chunk_ondemand(const struct btrfs_inode *inode, u64 bytes);
int btrfs_check_data_free_space(struct btrfs_inode *inode, int btrfs_check_data_free_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len, struct extent_changeset **reserved, u64 start, u64 len,
bool noflush); bool noflush);

View File

@ -77,14 +77,14 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
return node; return node;
} }
spin_lock(&root->inode_lock); xa_lock(&root->delayed_nodes);
node = xa_load(&root->delayed_nodes, ino); node = xa_load(&root->delayed_nodes, ino);
if (node) { if (node) {
if (btrfs_inode->delayed_node) { if (btrfs_inode->delayed_node) {
refcount_inc(&node->refs); /* can be accessed */ refcount_inc(&node->refs); /* can be accessed */
BUG_ON(btrfs_inode->delayed_node != node); BUG_ON(btrfs_inode->delayed_node != node);
spin_unlock(&root->inode_lock); xa_unlock(&root->delayed_nodes);
return node; return node;
} }
@ -111,10 +111,10 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
node = NULL; node = NULL;
} }
spin_unlock(&root->inode_lock); xa_unlock(&root->delayed_nodes);
return node; return node;
} }
spin_unlock(&root->inode_lock); xa_unlock(&root->delayed_nodes);
return NULL; return NULL;
} }
@ -148,21 +148,21 @@ again:
kmem_cache_free(delayed_node_cache, node); kmem_cache_free(delayed_node_cache, node);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
spin_lock(&root->inode_lock); xa_lock(&root->delayed_nodes);
ptr = xa_load(&root->delayed_nodes, ino); ptr = xa_load(&root->delayed_nodes, ino);
if (ptr) { if (ptr) {
/* Somebody inserted it, go back and read it. */ /* Somebody inserted it, go back and read it. */
spin_unlock(&root->inode_lock); xa_unlock(&root->delayed_nodes);
kmem_cache_free(delayed_node_cache, node); kmem_cache_free(delayed_node_cache, node);
node = NULL; node = NULL;
goto again; goto again;
} }
ptr = xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC); ptr = __xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC);
ASSERT(xa_err(ptr) != -EINVAL); ASSERT(xa_err(ptr) != -EINVAL);
ASSERT(xa_err(ptr) != -ENOMEM); ASSERT(xa_err(ptr) != -ENOMEM);
ASSERT(ptr == NULL); ASSERT(ptr == NULL);
btrfs_inode->delayed_node = node; btrfs_inode->delayed_node = node;
spin_unlock(&root->inode_lock); xa_unlock(&root->delayed_nodes);
return node; return node;
} }
@ -275,14 +275,12 @@ static void __btrfs_release_delayed_node(
if (refcount_dec_and_test(&delayed_node->refs)) { if (refcount_dec_and_test(&delayed_node->refs)) {
struct btrfs_root *root = delayed_node->root; struct btrfs_root *root = delayed_node->root;
spin_lock(&root->inode_lock); xa_erase(&root->delayed_nodes, delayed_node->inode_id);
/* /*
* Once our refcount goes to zero, nobody is allowed to bump it * Once our refcount goes to zero, nobody is allowed to bump it
* back up. We can delete it now. * back up. We can delete it now.
*/ */
ASSERT(refcount_read(&delayed_node->refs) == 0); ASSERT(refcount_read(&delayed_node->refs) == 0);
xa_erase(&root->delayed_nodes, delayed_node->inode_id);
spin_unlock(&root->inode_lock);
kmem_cache_free(delayed_node_cache, delayed_node); kmem_cache_free(delayed_node_cache, delayed_node);
} }
} }
@ -1471,7 +1469,7 @@ static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
const char *name, int name_len, const char *name, int name_len,
struct btrfs_inode *dir, struct btrfs_inode *dir,
struct btrfs_disk_key *disk_key, u8 flags, const struct btrfs_disk_key *disk_key, u8 flags,
u64 index) u64 index)
{ {
struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_fs_info *fs_info = trans->fs_info;
@ -1684,7 +1682,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
return 0; return 0;
} }
bool btrfs_readdir_get_delayed_items(struct inode *inode, bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode,
u64 last_index, u64 last_index,
struct list_head *ins_list, struct list_head *ins_list,
struct list_head *del_list) struct list_head *del_list)
@ -1692,7 +1690,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
struct btrfs_delayed_node *delayed_node; struct btrfs_delayed_node *delayed_node;
struct btrfs_delayed_item *item; struct btrfs_delayed_item *item;
delayed_node = btrfs_get_delayed_node(BTRFS_I(inode)); delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node) if (!delayed_node)
return false; return false;
@ -1700,8 +1698,8 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
* We can only do one readdir with delayed items at a time because of * We can only do one readdir with delayed items at a time because of
* item->readdir_list. * item->readdir_list.
*/ */
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
btrfs_inode_lock(BTRFS_I(inode), 0); btrfs_inode_lock(inode, 0);
mutex_lock(&delayed_node->mutex); mutex_lock(&delayed_node->mutex);
item = __btrfs_first_delayed_insertion_item(delayed_node); item = __btrfs_first_delayed_insertion_item(delayed_node);
@ -1732,7 +1730,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
return true; return true;
} }
void btrfs_readdir_put_delayed_items(struct inode *inode, void btrfs_readdir_put_delayed_items(struct btrfs_inode *inode,
struct list_head *ins_list, struct list_head *ins_list,
struct list_head *del_list) struct list_head *del_list)
{ {
@ -1754,10 +1752,10 @@ void btrfs_readdir_put_delayed_items(struct inode *inode,
* The VFS is going to do up_read(), so we need to downgrade back to a * The VFS is going to do up_read(), so we need to downgrade back to a
* read lock. * read lock.
*/ */
downgrade_write(&inode->i_rwsem); downgrade_write(&inode->vfs_inode.i_rwsem);
} }
int btrfs_should_delete_dir_index(struct list_head *del_list, int btrfs_should_delete_dir_index(const struct list_head *del_list,
u64 index) u64 index)
{ {
struct btrfs_delayed_item *curr; struct btrfs_delayed_item *curr;
@ -1778,7 +1776,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
* Read dir info stored in the delayed tree. * Read dir info stored in the delayed tree.
*/ */
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
struct list_head *ins_list) const struct list_head *ins_list)
{ {
struct btrfs_dir_item *di; struct btrfs_dir_item *di;
struct btrfs_delayed_item *curr, *next; struct btrfs_delayed_item *curr, *next;
@ -1916,7 +1914,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
BTRFS_I(inode)->i_otime_nsec = btrfs_stack_timespec_nsec(&inode_item->otime); BTRFS_I(inode)->i_otime_nsec = btrfs_stack_timespec_nsec(&inode_item->otime);
inode->i_generation = BTRFS_I(inode)->generation; inode->i_generation = BTRFS_I(inode)->generation;
BTRFS_I(inode)->index_cnt = (u64)-1; if (S_ISDIR(inode->i_mode))
BTRFS_I(inode)->index_cnt = (u64)-1;
mutex_unlock(&delayed_node->mutex); mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node); btrfs_release_delayed_node(delayed_node);
@ -2057,9 +2056,9 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
struct btrfs_delayed_node *node; struct btrfs_delayed_node *node;
int count; int count;
spin_lock(&root->inode_lock); xa_lock(&root->delayed_nodes);
if (xa_empty(&root->delayed_nodes)) { if (xa_empty(&root->delayed_nodes)) {
spin_unlock(&root->inode_lock); xa_unlock(&root->delayed_nodes);
return; return;
} }
@ -2076,7 +2075,7 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
if (count >= ARRAY_SIZE(delayed_nodes)) if (count >= ARRAY_SIZE(delayed_nodes))
break; break;
} }
spin_unlock(&root->inode_lock); xa_unlock(&root->delayed_nodes);
index++; index++;
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {

View File

@ -110,7 +110,7 @@ void btrfs_init_delayed_root(struct btrfs_delayed_root *delayed_root);
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
const char *name, int name_len, const char *name, int name_len,
struct btrfs_inode *dir, struct btrfs_inode *dir,
struct btrfs_disk_key *disk_key, u8 flags, const struct btrfs_disk_key *disk_key, u8 flags,
u64 index); u64 index);
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
@ -143,17 +143,17 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info); void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info);
/* Used for readdir() */ /* Used for readdir() */
bool btrfs_readdir_get_delayed_items(struct inode *inode, bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode,
u64 last_index, u64 last_index,
struct list_head *ins_list, struct list_head *ins_list,
struct list_head *del_list); struct list_head *del_list);
void btrfs_readdir_put_delayed_items(struct inode *inode, void btrfs_readdir_put_delayed_items(struct btrfs_inode *inode,
struct list_head *ins_list, struct list_head *ins_list,
struct list_head *del_list); struct list_head *del_list);
int btrfs_should_delete_dir_index(struct list_head *del_list, int btrfs_should_delete_dir_index(const struct list_head *del_list,
u64 index); u64 index);
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
struct list_head *ins_list); const struct list_head *ins_list);
/* Used during directory logging. */ /* Used during directory logging. */
void btrfs_log_get_delayed_items(struct btrfs_inode *inode, void btrfs_log_get_delayed_items(struct btrfs_inode *inode,

View File

@ -194,48 +194,6 @@ void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info)
0, released, 0); 0, released, 0);
} }
/*
* Transfer bytes to our delayed refs rsv.
*
* @fs_info: the filesystem
* @num_bytes: number of bytes to transfer
*
* This transfers up to the num_bytes amount, previously reserved, to the
* delayed_refs_rsv. Any extra bytes are returned to the space info.
*/
void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
u64 num_bytes)
{
struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
u64 to_free = 0;
spin_lock(&delayed_refs_rsv->lock);
if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
u64 delta = delayed_refs_rsv->size -
delayed_refs_rsv->reserved;
if (num_bytes > delta) {
to_free = num_bytes - delta;
num_bytes = delta;
}
} else {
to_free = num_bytes;
num_bytes = 0;
}
if (num_bytes)
delayed_refs_rsv->reserved += num_bytes;
if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
delayed_refs_rsv->full = true;
spin_unlock(&delayed_refs_rsv->lock);
if (num_bytes)
trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
0, num_bytes, 1);
if (to_free)
btrfs_space_info_free_bytes_may_use(fs_info,
delayed_refs_rsv->space_info, to_free);
}
/* /*
* Refill based on our delayed refs usage. * Refill based on our delayed refs usage.
* *
@ -861,6 +819,12 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
spin_lock_init(&head_ref->lock); spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex); mutex_init(&head_ref->mutex);
/* If not metadata set an impossible level to help debugging. */
if (generic_ref->type == BTRFS_REF_METADATA)
head_ref->level = generic_ref->tree_ref.level;
else
head_ref->level = U8_MAX;
if (qrecord) { if (qrecord) {
if (generic_ref->ref_root && reserved) { if (generic_ref->ref_root && reserved) {
qrecord->data_rsv = reserved; qrecord->data_rsv = reserved;
@ -1114,7 +1078,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
} }
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 bytenr, u64 num_bytes, u8 level,
struct btrfs_delayed_extent_op *extent_op) struct btrfs_delayed_extent_op *extent_op)
{ {
struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_head *head_ref;
@ -1124,6 +1088,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
.action = BTRFS_UPDATE_DELAYED_HEAD, .action = BTRFS_UPDATE_DELAYED_HEAD,
.bytenr = bytenr, .bytenr = bytenr,
.num_bytes = num_bytes, .num_bytes = num_bytes,
.tree_ref.level = level,
}; };
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);

View File

@ -108,7 +108,6 @@ struct btrfs_delayed_ref_node {
struct btrfs_delayed_extent_op { struct btrfs_delayed_extent_op {
struct btrfs_disk_key key; struct btrfs_disk_key key;
u8 level;
bool update_key; bool update_key;
bool update_flags; bool update_flags;
u64 flags_to_set; u64 flags_to_set;
@ -172,6 +171,9 @@ struct btrfs_delayed_ref_head {
*/ */
u64 reserved_bytes; u64 reserved_bytes;
/* Tree block level, for metadata only. */
u8 level;
/* /*
* when a new extent is allocated, it is just reserved in memory * when a new extent is allocated, it is just reserved in memory
* The actual extent isn't inserted into the extent allocation tree * The actual extent isn't inserted into the extent allocation tree
@ -355,7 +357,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_ref *generic_ref, struct btrfs_ref *generic_ref,
u64 reserved); u64 reserved);
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 bytenr, u64 num_bytes, u8 level,
struct btrfs_delayed_extent_op *extent_op); struct btrfs_delayed_extent_op *extent_op);
void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info, void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_root *delayed_refs,
@ -386,8 +388,6 @@ void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info);
void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
enum btrfs_reserve_flush_enum flush); enum btrfs_reserve_flush_enum flush);
void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
u64 num_bytes);
bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node) static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node)

View File

@ -684,7 +684,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
if (ret) if (ret)
btrfs_err(fs_info, "kobj add dev failed %d", ret); btrfs_err(fs_info, "kobj add dev failed %d", ret);
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
/* /*
* Commit dev_replace state and reserve 1 item for it. * Commit dev_replace state and reserve 1 item for it.
@ -880,7 +880,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret; return ret;
} }
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
/* /*
* We have to use this loop approach because at this point src_device * We have to use this loop approach because at this point src_device

View File

@ -22,7 +22,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
*trans, *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct btrfs_path *path, struct btrfs_path *path,
struct btrfs_key *cpu_key, const struct btrfs_key *cpu_key,
u32 data_size, u32 data_size,
const char *name, const char *name,
int name_len) int name_len)
@ -108,7 +108,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
*/ */
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
const struct fscrypt_str *name, struct btrfs_inode *dir, const struct fscrypt_str *name, struct btrfs_inode *dir,
struct btrfs_key *location, u8 type, u64 index) const struct btrfs_key *location, u8 type, u64 index)
{ {
int ret = 0; int ret = 0;
int ret2 = 0; int ret2 = 0;
@ -379,7 +379,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
* for a specific name. * for a specific name.
*/ */
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, const struct btrfs_path *path,
const char *name, int name_len) const char *name, int name_len)
{ {
struct btrfs_dir_item *dir_item; struct btrfs_dir_item *dir_item;
@ -417,7 +417,7 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct btrfs_path *path, struct btrfs_path *path,
struct btrfs_dir_item *di) const struct btrfs_dir_item *di)
{ {
struct extent_buffer *leaf; struct extent_buffer *leaf;

View File

@ -17,7 +17,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
const struct fscrypt_str *name); const struct fscrypt_str *name);
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
const struct fscrypt_str *name, struct btrfs_inode *dir, const struct fscrypt_str *name, struct btrfs_inode *dir,
struct btrfs_key *location, u8 type, u64 index); const struct btrfs_key *location, u8 type, u64 index);
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct btrfs_path *path, u64 dir, struct btrfs_path *path, u64 dir,
@ -33,7 +33,7 @@ struct btrfs_dir_item *btrfs_search_dir_index_item(struct btrfs_root *root,
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct btrfs_path *path, struct btrfs_path *path,
struct btrfs_dir_item *di); const struct btrfs_dir_item *di);
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct btrfs_path *path, u64 objectid, struct btrfs_path *path, u64 objectid,
@ -45,7 +45,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
const char *name, u16 name_len, const char *name, u16 name_len,
int mod); int mod);
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, const struct btrfs_path *path,
const char *name, const char *name,
int name_len); int name_len);

1052
fs/btrfs/direct-io.c Normal file

File diff suppressed because it is too large Load Diff

14
fs/btrfs/direct-io.h Normal file
View File

@ -0,0 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef BTRFS_DIRECT_IO_H
#define BTRFS_DIRECT_IO_H
#include <linux/types.h>
int __init btrfs_init_dio(void);
void __cold btrfs_destroy_dio(void);
ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from);
ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to);
#endif /* BTRFS_DIRECT_IO_H */

View File

@ -213,7 +213,7 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
* structure for details. * structure for details.
*/ */
int btrfs_read_extent_buffer(struct extent_buffer *eb, int btrfs_read_extent_buffer(struct extent_buffer *eb,
struct btrfs_tree_parent_check *check) const struct btrfs_tree_parent_check *check)
{ {
struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_fs_info *fs_info = eb->fs_info;
int failed = 0; int failed = 0;
@ -358,7 +358,7 @@ static bool check_tree_block_fsid(struct extent_buffer *eb)
/* Do basic extent buffer checks at read time */ /* Do basic extent buffer checks at read time */
int btrfs_validate_extent_buffer(struct extent_buffer *eb, int btrfs_validate_extent_buffer(struct extent_buffer *eb,
struct btrfs_tree_parent_check *check) const struct btrfs_tree_parent_check *check)
{ {
struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_fs_info *fs_info = eb->fs_info;
u64 found_start; u64 found_start;
@ -367,6 +367,7 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
u8 result[BTRFS_CSUM_SIZE]; u8 result[BTRFS_CSUM_SIZE];
const u8 *header_csum; const u8 *header_csum;
int ret = 0; int ret = 0;
const bool ignore_csum = btrfs_test_opt(fs_info, IGNOREMETACSUMS);
ASSERT(check); ASSERT(check);
@ -399,13 +400,16 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
if (memcmp(result, header_csum, csum_size) != 0) { if (memcmp(result, header_csum, csum_size) != 0) {
btrfs_warn_rl(fs_info, btrfs_warn_rl(fs_info,
"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d", "checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d%s",
eb->start, eb->read_mirror, eb->start, eb->read_mirror,
CSUM_FMT_VALUE(csum_size, header_csum), CSUM_FMT_VALUE(csum_size, header_csum),
CSUM_FMT_VALUE(csum_size, result), CSUM_FMT_VALUE(csum_size, result),
btrfs_header_level(eb)); btrfs_header_level(eb),
ret = -EUCLEAN; ignore_csum ? ", ignored" : "");
goto out; if (!ignore_csum) {
ret = -EUCLEAN;
goto out;
}
} }
if (found_level != check->level) { if (found_level != check->level) {
@ -425,7 +429,7 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
goto out; goto out;
} }
if (check->has_first_key) { if (check->has_first_key) {
struct btrfs_key *expect_key = &check->first_key; const struct btrfs_key *expect_key = &check->first_key;
struct btrfs_key found_key; struct btrfs_key found_key;
if (found_level) if (found_level)
@ -635,10 +639,6 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
free_extent_buffer_stale(buf); free_extent_buffer_stale(buf);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
if (btrfs_check_eb_owner(buf, check->owner_root)) {
free_extent_buffer_stale(buf);
return ERR_PTR(-EUCLEAN);
}
return buf; return buf;
} }
@ -658,11 +658,11 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->state = 0; root->state = 0;
RB_CLEAR_NODE(&root->rb_node); RB_CLEAR_NODE(&root->rb_node);
root->last_trans = 0; btrfs_set_root_last_trans(root, 0);
root->free_objectid = 0; root->free_objectid = 0;
root->nr_delalloc_inodes = 0; root->nr_delalloc_inodes = 0;
root->nr_ordered_extents = 0; root->nr_ordered_extents = 0;
root->inode_tree = RB_ROOT; xa_init(&root->inodes);
xa_init(&root->delayed_nodes); xa_init(&root->delayed_nodes);
btrfs_init_root_block_rsv(root); btrfs_init_root_block_rsv(root);
@ -674,7 +674,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&root->ordered_extents); INIT_LIST_HEAD(&root->ordered_extents);
INIT_LIST_HEAD(&root->ordered_root); INIT_LIST_HEAD(&root->ordered_root);
INIT_LIST_HEAD(&root->reloc_dirty_list); INIT_LIST_HEAD(&root->reloc_dirty_list);
spin_lock_init(&root->inode_lock);
spin_lock_init(&root->delalloc_lock); spin_lock_init(&root->delalloc_lock);
spin_lock_init(&root->ordered_extent_lock); spin_lock_init(&root->ordered_extent_lock);
spin_lock_init(&root->accounting_lock); spin_lock_init(&root->accounting_lock);
@ -847,13 +846,6 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
return btrfs_global_root(fs_info, &key); return btrfs_global_root(fs_info, &key);
} }
struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
{
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
return fs_info->block_group_root;
return btrfs_extent_root(fs_info, 0);
}
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
u64 objectid) u64 objectid)
{ {
@ -1010,7 +1002,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
log_root->last_trans = trans->transid; btrfs_set_root_last_trans(log_root, trans->transid);
log_root->root_key.offset = btrfs_root_id(root); log_root->root_key.offset = btrfs_root_id(root);
inode_item = &log_root->root_item.inode; inode_item = &log_root->root_item.inode;
@ -1033,7 +1025,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
struct btrfs_path *path, struct btrfs_path *path,
struct btrfs_key *key) const struct btrfs_key *key)
{ {
struct btrfs_root *root; struct btrfs_root *root;
struct btrfs_tree_parent_check check = { 0 }; struct btrfs_tree_parent_check check = { 0 };
@ -1095,7 +1087,7 @@ fail:
} }
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_key *key) const struct btrfs_key *key)
{ {
struct btrfs_root *root; struct btrfs_root *root;
struct btrfs_path *path; struct btrfs_path *path;
@ -1230,7 +1222,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
return ret; return ret;
} }
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info) void btrfs_check_leaked_roots(const struct btrfs_fs_info *fs_info)
{ {
#ifdef CONFIG_BTRFS_DEBUG #ifdef CONFIG_BTRFS_DEBUG
struct btrfs_root *root; struct btrfs_root *root;
@ -1854,7 +1846,8 @@ void btrfs_put_root(struct btrfs_root *root)
return; return;
if (refcount_dec_and_test(&root->refs)) { if (refcount_dec_and_test(&root->refs)) {
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); if (WARN_ON(!xa_empty(&root->inodes)))
xa_destroy(&root->inodes);
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)); WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
if (root->anon_dev) if (root->anon_dev)
free_anon_bdev(root->anon_dev); free_anon_bdev(root->anon_dev);
@ -1928,7 +1921,7 @@ static int btrfs_init_btree_inode(struct super_block *sb)
if (!inode) if (!inode)
return -ENOMEM; return -ENOMEM;
inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; btrfs_set_inode_number(BTRFS_I(inode), BTRFS_BTREE_INODE_OBJECTID);
set_nlink(inode, 1); set_nlink(inode, 1);
/* /*
* we set the i_size on the btree inode to the max possible int. * we set the i_size on the btree inode to the max possible int.
@ -1939,15 +1932,11 @@ static int btrfs_init_btree_inode(struct super_block *sb)
inode->i_mapping->a_ops = &btree_aops; inode->i_mapping->a_ops = &btree_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
IO_TREE_BTREE_INODE_IO); IO_TREE_BTREE_INODE_IO);
extent_map_tree_init(&BTRFS_I(inode)->extent_tree); extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root); BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
BTRFS_I(inode)->location.objectid = BTRFS_BTREE_INODE_OBJECTID;
BTRFS_I(inode)->location.type = 0;
BTRFS_I(inode)->location.offset = 0;
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
__insert_inode_hash(inode, hash); __insert_inode_hash(inode, hash);
fs_info->btree_inode = inode; fs_info->btree_inode = inode;
@ -2146,7 +2135,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
/* If we have IGNOREDATACSUMS skip loading these roots. */ /* If we have IGNOREDATACSUMS skip loading these roots. */
if (objectid == BTRFS_CSUM_TREE_OBJECTID && if (objectid == BTRFS_CSUM_TREE_OBJECTID &&
btrfs_test_opt(fs_info, IGNOREDATACSUMS)) { btrfs_test_opt(fs_info, IGNOREDATACSUMS)) {
set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state); set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state);
return 0; return 0;
} }
@ -2199,7 +2188,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
if (!found || ret) { if (!found || ret) {
if (objectid == BTRFS_CSUM_TREE_OBJECTID) if (objectid == BTRFS_CSUM_TREE_OBJECTID)
set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state); set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state);
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
ret = ret ? ret : -ENOENT; ret = ret ? ret : -ENOENT;
@ -2350,21 +2339,29 @@ out:
* 1, 2 2nd and 3rd backup copy * 1, 2 2nd and 3rd backup copy
* -1 skip bytenr check * -1 skip bytenr check
*/ */
int btrfs_validate_super(struct btrfs_fs_info *fs_info, int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
struct btrfs_super_block *sb, int mirror_num) const struct btrfs_super_block *sb, int mirror_num)
{ {
u64 nodesize = btrfs_super_nodesize(sb); u64 nodesize = btrfs_super_nodesize(sb);
u64 sectorsize = btrfs_super_sectorsize(sb); u64 sectorsize = btrfs_super_sectorsize(sb);
int ret = 0; int ret = 0;
const bool ignore_flags = btrfs_test_opt(fs_info, IGNORESUPERFLAGS);
if (btrfs_super_magic(sb) != BTRFS_MAGIC) { if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
btrfs_err(fs_info, "no valid FS found"); btrfs_err(fs_info, "no valid FS found");
ret = -EINVAL; ret = -EINVAL;
} }
if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) { if ((btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)) {
btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu", if (!ignore_flags) {
btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); btrfs_err(fs_info,
ret = -EINVAL; "unrecognized or unsupported super flag 0x%llx",
btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
ret = -EINVAL;
} else {
btrfs_info(fs_info,
"unrecognized or unsupported super flags: 0x%llx, ignored",
btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
}
} }
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
btrfs_err(fs_info, "tree_root level too big: %d >= %d", btrfs_err(fs_info, "tree_root level too big: %d >= %d",
@ -2467,7 +2464,7 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
(!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) || (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
!btrfs_fs_incompat(fs_info, NO_HOLES))) { !btrfs_fs_incompat(fs_info, NO_HOLES))) {
btrfs_err(fs_info, btrfs_err(fs_info,
"block-group-tree feature requires fres-space-tree and no-holes"); "block-group-tree feature requires free-space-tree and no-holes");
ret = -EINVAL; ret = -EINVAL;
} }
@ -2882,6 +2879,8 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
if (sb_rdonly(sb)) if (sb_rdonly(sb))
set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
if (btrfs_test_opt(fs_info, IGNOREMETACSUMS))
set_bit(BTRFS_FS_STATE_SKIP_META_CSUMS, &fs_info->fs_state);
return btrfs_alloc_stripe_hash_table(fs_info); return btrfs_alloc_stripe_hash_table(fs_info);
} }
@ -2927,22 +2926,22 @@ static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{ {
u64 root_objectid = 0; u64 root_objectid = 0;
struct btrfs_root *gang[8]; struct btrfs_root *gang[8];
int i = 0; int ret = 0;
int err = 0;
unsigned int ret = 0;
while (1) { while (1) {
unsigned int found;
spin_lock(&fs_info->fs_roots_radix_lock); spin_lock(&fs_info->fs_roots_radix_lock);
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, found = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, root_objectid, (void **)gang, root_objectid,
ARRAY_SIZE(gang)); ARRAY_SIZE(gang));
if (!ret) { if (!found) {
spin_unlock(&fs_info->fs_roots_radix_lock); spin_unlock(&fs_info->fs_roots_radix_lock);
break; break;
} }
root_objectid = btrfs_root_id(gang[ret - 1]) + 1; root_objectid = btrfs_root_id(gang[found - 1]) + 1;
for (i = 0; i < ret; i++) { for (int i = 0; i < found; i++) {
/* Avoid to grab roots in dead_roots. */ /* Avoid to grab roots in dead_roots. */
if (btrfs_root_refs(&gang[i]->root_item) == 0) { if (btrfs_root_refs(&gang[i]->root_item) == 0) {
gang[i] = NULL; gang[i] = NULL;
@ -2953,24 +2952,25 @@ static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
} }
spin_unlock(&fs_info->fs_roots_radix_lock); spin_unlock(&fs_info->fs_roots_radix_lock);
for (i = 0; i < ret; i++) { for (int i = 0; i < found; i++) {
if (!gang[i]) if (!gang[i])
continue; continue;
root_objectid = btrfs_root_id(gang[i]); root_objectid = btrfs_root_id(gang[i]);
err = btrfs_orphan_cleanup(gang[i]); /*
if (err) * Continue to release the remaining roots after the first
goto out; * error without cleanup and preserve the first error
* for the return.
*/
if (!ret)
ret = btrfs_orphan_cleanup(gang[i]);
btrfs_put_root(gang[i]); btrfs_put_root(gang[i]);
} }
if (ret)
break;
root_objectid++; root_objectid++;
} }
out: return ret;
/* Release the uncleaned roots due to error. */
for (; i < ret; i++) {
if (gang[i])
btrfs_put_root(gang[i]);
}
return err;
} }
/* /*
@ -3204,7 +3204,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
} }
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
char *options) const char *options)
{ {
u32 sectorsize; u32 sectorsize;
u32 nodesize; u32 nodesize;
@ -4157,9 +4157,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
int btrfs_commit_super(struct btrfs_fs_info *fs_info) int btrfs_commit_super(struct btrfs_fs_info *fs_info)
{ {
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_trans_handle *trans;
mutex_lock(&fs_info->cleaner_mutex); mutex_lock(&fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(fs_info); btrfs_run_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_mutex); mutex_unlock(&fs_info->cleaner_mutex);
@ -4169,10 +4166,7 @@ int btrfs_commit_super(struct btrfs_fs_info *fs_info)
down_write(&fs_info->cleanup_work_sem); down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem); up_write(&fs_info->cleanup_work_sem);
trans = btrfs_join_transaction(root); return btrfs_commit_current_transaction(fs_info->tree_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
return btrfs_commit_transaction(trans);
} }
static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info) static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
@ -4533,7 +4527,7 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
* extents that haven't had their dirty pages IO start writeout yet * extents that haven't had their dirty pages IO start writeout yet
* actually get run and error out properly. * actually get run and error out properly.
*/ */
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
} }
static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,

View File

@ -41,7 +41,7 @@ static inline u64 btrfs_sb_offset(int mirror)
return BTRFS_SUPER_INFO_OFFSET; return BTRFS_SUPER_INFO_OFFSET;
} }
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info); void btrfs_check_leaked_roots(const struct btrfs_fs_info *fs_info);
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info); void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
struct btrfs_tree_parent_check *check); struct btrfs_tree_parent_check *check);
@ -52,12 +52,11 @@ struct extent_buffer *btrfs_find_create_tree_block(
int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info); int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info);
int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
const struct btrfs_super_block *disk_sb); const struct btrfs_super_block *disk_sb);
int __cold open_ctree(struct super_block *sb, int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
struct btrfs_fs_devices *fs_devices, const char *options);
char *options);
void __cold close_ctree(struct btrfs_fs_info *fs_info); void __cold close_ctree(struct btrfs_fs_info *fs_info);
int btrfs_validate_super(struct btrfs_fs_info *fs_info, int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
struct btrfs_super_block *sb, int mirror_num); const struct btrfs_super_block *sb, int mirror_num);
int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount); int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount);
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors); int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev); struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
@ -65,7 +64,7 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
int copy_num, bool drop_cache); int copy_num, bool drop_cache);
int btrfs_commit_super(struct btrfs_fs_info *fs_info); int btrfs_commit_super(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_key *key); const struct btrfs_key *key);
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root); struct btrfs_root *root);
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
@ -83,7 +82,6 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *key); struct btrfs_key *key);
struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr); struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr);
struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr); struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr);
struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info);
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info); void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info); void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
@ -91,7 +89,7 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root); struct btrfs_root *root);
int btrfs_validate_extent_buffer(struct extent_buffer *eb, int btrfs_validate_extent_buffer(struct extent_buffer *eb,
struct btrfs_tree_parent_check *check); const struct btrfs_tree_parent_check *check);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
#endif #endif
@ -118,7 +116,7 @@ void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans,
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic); int atomic);
int btrfs_read_extent_buffer(struct extent_buffer *buf, int btrfs_read_extent_buffer(struct extent_buffer *buf,
struct btrfs_tree_parent_check *check); const struct btrfs_tree_parent_check *check);
blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio); blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio);
int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans, int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,

View File

@ -40,7 +40,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
if (parent) { if (parent) {
u64 parent_root_id; u64 parent_root_id;
fid->parent_objectid = BTRFS_I(parent)->location.objectid; fid->parent_objectid = btrfs_ino(BTRFS_I(parent));
fid->parent_gen = parent->i_generation; fid->parent_gen = parent->i_generation;
parent_root_id = btrfs_root_id(BTRFS_I(parent)->root); parent_root_id = btrfs_root_id(BTRFS_I(parent)->root);
@ -84,7 +84,7 @@ struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
if (IS_ERR(root)) if (IS_ERR(root))
return ERR_CAST(root); return ERR_CAST(root);
inode = btrfs_iget(sb, objectid, root); inode = btrfs_iget(objectid, root);
btrfs_put_root(root); btrfs_put_root(root);
if (IS_ERR(inode)) if (IS_ERR(inode))
return ERR_CAST(inode); return ERR_CAST(inode);
@ -210,7 +210,7 @@ struct dentry *btrfs_get_parent(struct dentry *child)
found_key.offset, 0); found_key.offset, 0);
} }
return d_obtain_alias(btrfs_iget(fs_info->sb, key.objectid, root)); return d_obtain_alias(btrfs_iget(key.objectid, root));
fail: fail:
btrfs_free_path(path); btrfs_free_path(path);
return ERR_PTR(ret); return ERR_PTR(ret);

View File

@ -4,6 +4,7 @@
#include <trace/events/btrfs.h> #include <trace/events/btrfs.h>
#include "messages.h" #include "messages.h"
#include "ctree.h" #include "ctree.h"
#include "extent_io.h"
#include "extent-io-tree.h" #include "extent-io-tree.h"
#include "btrfs_inode.h" #include "btrfs_inode.h"
@ -1084,6 +1085,9 @@ again:
*/ */
prealloc = alloc_extent_state(mask); prealloc = alloc_extent_state(mask);
} }
/* Optimistically preallocate the extent changeset ulist node. */
if (changeset)
extent_changeset_prealloc(changeset, mask);
spin_lock(&tree->lock); spin_lock(&tree->lock);
if (cached_state && *cached_state) { if (cached_state && *cached_state) {

File diff suppressed because it is too large Load Diff

View File

@ -127,10 +127,10 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
u64 empty_size, u64 empty_size,
u64 reloc_src_root, u64 reloc_src_root,
enum btrfs_lock_nesting nest); enum btrfs_lock_nesting nest);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans, int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
u64 root_id, u64 root_id,
struct extent_buffer *buf, struct extent_buffer *buf,
u64 parent, int last_ref); u64 parent, int last_ref);
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 owner, struct btrfs_root *root, u64 owner,
u64 offset, u64 ram_bytes, u64 offset, u64 ram_bytes,

File diff suppressed because it is too large Load Diff

View File

@ -215,6 +215,11 @@ static inline struct extent_changeset *extent_changeset_alloc(void)
return ret; return ret;
} }
static inline void extent_changeset_prealloc(struct extent_changeset *changeset, gfp_t gfp_mask)
{
ulist_prealloc(&changeset->range_changed, gfp_mask);
}
static inline void extent_changeset_release(struct extent_changeset *changeset) static inline void extent_changeset_release(struct extent_changeset *changeset)
{ {
if (!changeset) if (!changeset)
@ -235,15 +240,13 @@ bool try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page); int try_release_extent_buffer(struct page *page);
int btrfs_read_folio(struct file *file, struct folio *folio); int btrfs_read_folio(struct file *file, struct folio *folio);
void extent_write_locked_range(struct inode *inode, struct page *locked_page, void extent_write_locked_range(struct inode *inode, const struct page *locked_page,
u64 start, u64 end, struct writeback_control *wbc, u64 start, u64 end, struct writeback_control *wbc,
bool pages_dirty); bool pages_dirty);
int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping, int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc); struct writeback_control *wbc);
void btrfs_readahead(struct readahead_control *rac); void btrfs_readahead(struct readahead_control *rac);
int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
int set_folio_extent_mapped(struct folio *folio); int set_folio_extent_mapped(struct folio *folio);
int set_page_extent_mapped(struct page *page); int set_page_extent_mapped(struct page *page);
void clear_page_extent_mapped(struct page *page); void clear_page_extent_mapped(struct page *page);
@ -263,7 +266,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_COMPLETE 1 #define WAIT_COMPLETE 1
#define WAIT_PAGE_LOCK 2 #define WAIT_PAGE_LOCK 2
int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
struct btrfs_tree_parent_check *parent_check); const struct btrfs_tree_parent_check *parent_check);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb); void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 owner_root, u64 gen, int level); u64 bytenr, u64 owner_root, u64 gen, int level);
@ -350,9 +353,8 @@ void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
void set_extent_buffer_dirty(struct extent_buffer *eb); void set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_uptodate(struct extent_buffer *eb); void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb); void clear_extent_buffer_uptodate(struct extent_buffer *eb);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct page *locked_page, const struct page *locked_page,
struct extent_state **cached, struct extent_state **cached,
u32 bits_to_clear, unsigned long page_ops); u32 bits_to_clear, unsigned long page_ops);
int extent_invalidate_folio(struct extent_io_tree *tree, int extent_invalidate_folio(struct extent_io_tree *tree,
@ -361,9 +363,8 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
struct extent_buffer *buf); struct extent_buffer *buf);
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
gfp_t extra_gfp); bool nofail);
int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array, int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array);
gfp_t extra_gfp);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode, bool find_lock_delalloc_range(struct inode *inode,

View File

@ -33,7 +33,7 @@ void __cold extent_map_exit(void)
*/ */
void extent_map_tree_init(struct extent_map_tree *tree) void extent_map_tree_init(struct extent_map_tree *tree)
{ {
tree->map = RB_ROOT_CACHED; tree->root = RB_ROOT;
INIT_LIST_HEAD(&tree->modified_extents); INIT_LIST_HEAD(&tree->modified_extents);
rwlock_init(&tree->lock); rwlock_init(&tree->lock);
} }
@ -85,27 +85,24 @@ static void dec_evictable_extent_maps(struct btrfs_inode *inode)
percpu_counter_dec(&fs_info->evictable_extent_maps); percpu_counter_dec(&fs_info->evictable_extent_maps);
} }
static int tree_insert(struct rb_root_cached *root, struct extent_map *em) static int tree_insert(struct rb_root *root, struct extent_map *em)
{ {
struct rb_node **p = &root->rb_root.rb_node; struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
struct extent_map *entry = NULL; struct extent_map *entry = NULL;
struct rb_node *orig_parent = NULL; struct rb_node *orig_parent = NULL;
u64 end = range_end(em->start, em->len); u64 end = range_end(em->start, em->len);
bool leftmost = true;
while (*p) { while (*p) {
parent = *p; parent = *p;
entry = rb_entry(parent, struct extent_map, rb_node); entry = rb_entry(parent, struct extent_map, rb_node);
if (em->start < entry->start) { if (em->start < entry->start)
p = &(*p)->rb_left; p = &(*p)->rb_left;
} else if (em->start >= extent_map_end(entry)) { else if (em->start >= extent_map_end(entry))
p = &(*p)->rb_right; p = &(*p)->rb_right;
leftmost = false; else
} else {
return -EEXIST; return -EEXIST;
}
} }
orig_parent = parent; orig_parent = parent;
@ -128,7 +125,7 @@ static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
return -EEXIST; return -EEXIST;
rb_link_node(&em->rb_node, orig_parent, p); rb_link_node(&em->rb_node, orig_parent, p);
rb_insert_color_cached(&em->rb_node, root, leftmost); rb_insert_color(&em->rb_node, root);
return 0; return 0;
} }
@ -186,11 +183,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
return NULL; return NULL;
} }
static inline u64 extent_map_block_len(const struct extent_map *em)
{
if (extent_map_is_compressed(em))
return em->disk_num_bytes;
return em->len;
}
static inline u64 extent_map_block_end(const struct extent_map *em) static inline u64 extent_map_block_end(const struct extent_map *em)
{ {
if (em->block_start + em->block_len < em->block_start) if (extent_map_block_start(em) + extent_map_block_len(em) <
extent_map_block_start(em))
return (u64)-1; return (u64)-1;
return em->block_start + em->block_len; return extent_map_block_start(em) + extent_map_block_len(em);
} }
static bool can_merge_extent_map(const struct extent_map *em) static bool can_merge_extent_map(const struct extent_map *em)
@ -225,15 +230,106 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
if (prev->flags != next->flags) if (prev->flags != next->flags)
return false; return false;
if (next->block_start < EXTENT_MAP_LAST_BYTE - 1) if (next->disk_bytenr < EXTENT_MAP_LAST_BYTE - 1)
return next->block_start == extent_map_block_end(prev); return extent_map_block_start(next) == extent_map_block_end(prev);
/* HOLES and INLINE extents. */ /* HOLES and INLINE extents. */
return next->block_start == prev->block_start; return next->disk_bytenr == prev->disk_bytenr;
}
/*
* Handle the on-disk data extents merge for @prev and @next.
*
* Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
* For now only uncompressed regular extent can be merged.
*
* @prev and @next will be both updated to point to the new merged range.
* Thus one of them should be removed by the caller.
*/
static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
{
u64 new_disk_bytenr;
u64 new_disk_num_bytes;
u64 new_offset;
/* @prev and @next should not be compressed. */
ASSERT(!extent_map_is_compressed(prev));
ASSERT(!extent_map_is_compressed(next));
/*
* There are two different cases where @prev and @next can be merged.
*
* 1) They are referring to the same data extent:
*
* |<----- data extent A ----->|
* |<- prev ->|<- next ->|
*
* 2) They are referring to different data extents but still adjacent:
*
* |<-- data extent A -->|<-- data extent B -->|
* |<- prev ->|<- next ->|
*
* The calculation here always merges the data extents first, then updates
* @offset using the new data extents.
*
* For case 1), the merged data extent would be the same.
* For case 2), we just merge the two data extents into one.
*/
new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
next->disk_bytenr + next->disk_num_bytes) -
new_disk_bytenr;
new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
prev->disk_bytenr = new_disk_bytenr;
prev->disk_num_bytes = new_disk_num_bytes;
prev->ram_bytes = new_disk_num_bytes;
prev->offset = new_offset;
next->disk_bytenr = new_disk_bytenr;
next->disk_num_bytes = new_disk_num_bytes;
next->ram_bytes = new_disk_num_bytes;
next->offset = new_offset;
}
static void dump_extent_map(struct btrfs_fs_info *fs_info, const char *prefix,
struct extent_map *em)
{
if (!IS_ENABLED(CONFIG_BTRFS_DEBUG))
return;
btrfs_crit(fs_info,
"%s, start=%llu len=%llu disk_bytenr=%llu disk_num_bytes=%llu ram_bytes=%llu offset=%llu flags=0x%x",
prefix, em->start, em->len, em->disk_bytenr, em->disk_num_bytes,
em->ram_bytes, em->offset, em->flags);
ASSERT(0);
}
/* Internal sanity checks for btrfs debug builds. */
static void validate_extent_map(struct btrfs_fs_info *fs_info, struct extent_map *em)
{
if (!IS_ENABLED(CONFIG_BTRFS_DEBUG))
return;
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
if (em->disk_num_bytes == 0)
dump_extent_map(fs_info, "zero disk_num_bytes", em);
if (em->offset + em->len > em->ram_bytes)
dump_extent_map(fs_info, "ram_bytes too small", em);
if (em->offset + em->len > em->disk_num_bytes &&
!extent_map_is_compressed(em))
dump_extent_map(fs_info, "disk_num_bytes too small", em);
if (!extent_map_is_compressed(em) &&
em->ram_bytes != em->disk_num_bytes)
dump_extent_map(fs_info,
"ram_bytes mismatch with disk_num_bytes for non-compressed em",
em);
} else if (em->offset) {
dump_extent_map(fs_info, "non-zero offset for hole/inline", em);
}
} }
static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em) static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_map_tree *tree = &inode->extent_tree; struct extent_map_tree *tree = &inode->extent_tree;
struct extent_map *merge = NULL; struct extent_map *merge = NULL;
struct rb_node *rb; struct rb_node *rb;
@ -258,14 +354,15 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
merge = rb_entry(rb, struct extent_map, rb_node); merge = rb_entry(rb, struct extent_map, rb_node);
if (rb && can_merge_extent_map(merge) && mergeable_maps(merge, em)) { if (rb && can_merge_extent_map(merge) && mergeable_maps(merge, em)) {
em->start = merge->start; em->start = merge->start;
em->orig_start = merge->orig_start;
em->len += merge->len; em->len += merge->len;
em->block_len += merge->block_len;
em->block_start = merge->block_start;
em->generation = max(em->generation, merge->generation); em->generation = max(em->generation, merge->generation);
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
merge_ondisk_extents(merge, em);
em->flags |= EXTENT_FLAG_MERGED; em->flags |= EXTENT_FLAG_MERGED;
rb_erase_cached(&merge->rb_node, &tree->map); validate_extent_map(fs_info, em);
rb_erase(&merge->rb_node, &tree->root);
RB_CLEAR_NODE(&merge->rb_node); RB_CLEAR_NODE(&merge->rb_node);
free_extent_map(merge); free_extent_map(merge);
dec_evictable_extent_maps(inode); dec_evictable_extent_maps(inode);
@ -277,8 +374,10 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
merge = rb_entry(rb, struct extent_map, rb_node); merge = rb_entry(rb, struct extent_map, rb_node);
if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) { if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
em->len += merge->len; em->len += merge->len;
em->block_len += merge->block_len; if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
rb_erase_cached(&merge->rb_node, &tree->map); merge_ondisk_extents(em, merge);
validate_extent_map(fs_info, em);
rb_erase(&merge->rb_node, &tree->root);
RB_CLEAR_NODE(&merge->rb_node); RB_CLEAR_NODE(&merge->rb_node);
em->generation = max(em->generation, merge->generation); em->generation = max(em->generation, merge->generation);
em->flags |= EXTENT_FLAG_MERGED; em->flags |= EXTENT_FLAG_MERGED;
@ -389,7 +488,8 @@ static int add_extent_mapping(struct btrfs_inode *inode,
lockdep_assert_held_write(&tree->lock); lockdep_assert_held_write(&tree->lock);
ret = tree_insert(&tree->map, em); validate_extent_map(fs_info, em);
ret = tree_insert(&tree->root, em);
if (ret) if (ret)
return ret; return ret;
@ -410,7 +510,7 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
struct rb_node *prev_or_next = NULL; struct rb_node *prev_or_next = NULL;
u64 end = range_end(start, len); u64 end = range_end(start, len);
rb_node = __tree_search(&tree->map.rb_root, start, &prev_or_next); rb_node = __tree_search(&tree->root, start, &prev_or_next);
if (!rb_node) { if (!rb_node) {
if (prev_or_next) if (prev_or_next)
rb_node = prev_or_next; rb_node = prev_or_next;
@ -479,7 +579,7 @@ void remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em)
lockdep_assert_held_write(&tree->lock); lockdep_assert_held_write(&tree->lock);
WARN_ON(em->flags & EXTENT_FLAG_PINNED); WARN_ON(em->flags & EXTENT_FLAG_PINNED);
rb_erase_cached(&em->rb_node, &tree->map); rb_erase(&em->rb_node, &tree->root);
if (!(em->flags & EXTENT_FLAG_LOGGING)) if (!(em->flags & EXTENT_FLAG_LOGGING))
list_del_init(&em->list); list_del_init(&em->list);
RB_CLEAR_NODE(&em->rb_node); RB_CLEAR_NODE(&em->rb_node);
@ -492,15 +592,18 @@ static void replace_extent_mapping(struct btrfs_inode *inode,
struct extent_map *new, struct extent_map *new,
int modified) int modified)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_map_tree *tree = &inode->extent_tree; struct extent_map_tree *tree = &inode->extent_tree;
lockdep_assert_held_write(&tree->lock); lockdep_assert_held_write(&tree->lock);
validate_extent_map(fs_info, new);
WARN_ON(cur->flags & EXTENT_FLAG_PINNED); WARN_ON(cur->flags & EXTENT_FLAG_PINNED);
ASSERT(extent_map_in_tree(cur)); ASSERT(extent_map_in_tree(cur));
if (!(cur->flags & EXTENT_FLAG_LOGGING)) if (!(cur->flags & EXTENT_FLAG_LOGGING))
list_del_init(&cur->list); list_del_init(&cur->list);
rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map); rb_replace_node(&cur->rb_node, &new->rb_node, &tree->root);
RB_CLEAR_NODE(&cur->rb_node); RB_CLEAR_NODE(&cur->rb_node);
setup_extent_mapping(inode, new, modified); setup_extent_mapping(inode, new, modified);
@ -561,11 +664,8 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
start_diff = start - em->start; start_diff = start - em->start;
em->start = start; em->start = start;
em->len = end - start; em->len = end - start;
if (em->block_start < EXTENT_MAP_LAST_BYTE && if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE && !extent_map_is_compressed(em))
!extent_map_is_compressed(em)) { em->offset += start_diff;
em->block_start += start_diff;
em->block_len = em->len;
}
return add_extent_mapping(inode, em, 0); return add_extent_mapping(inode, em, 0);
} }
@ -600,7 +700,7 @@ int btrfs_add_extent_mapping(struct btrfs_inode *inode,
* Tree-checker should have rejected any inline extent with non-zero * Tree-checker should have rejected any inline extent with non-zero
* file offset. Here just do a sanity check. * file offset. Here just do a sanity check.
*/ */
if (em->block_start == EXTENT_MAP_INLINE) if (em->disk_bytenr == EXTENT_MAP_INLINE)
ASSERT(em->start == 0); ASSERT(em->start == 0);
ret = add_extent_mapping(inode, em, 0); ret = add_extent_mapping(inode, em, 0);
@ -657,18 +757,23 @@ int btrfs_add_extent_mapping(struct btrfs_inode *inode,
static void drop_all_extent_maps_fast(struct btrfs_inode *inode) static void drop_all_extent_maps_fast(struct btrfs_inode *inode)
{ {
struct extent_map_tree *tree = &inode->extent_tree; struct extent_map_tree *tree = &inode->extent_tree;
struct rb_node *node;
write_lock(&tree->lock); write_lock(&tree->lock);
while (!RB_EMPTY_ROOT(&tree->map.rb_root)) { node = rb_first(&tree->root);
while (node) {
struct extent_map *em; struct extent_map *em;
struct rb_node *node; struct rb_node *next = rb_next(node);
node = rb_first_cached(&tree->map);
em = rb_entry(node, struct extent_map, rb_node); em = rb_entry(node, struct extent_map, rb_node);
em->flags &= ~(EXTENT_FLAG_PINNED | EXTENT_FLAG_LOGGING); em->flags &= ~(EXTENT_FLAG_PINNED | EXTENT_FLAG_LOGGING);
remove_extent_mapping(inode, em); remove_extent_mapping(inode, em);
free_extent_map(em); free_extent_map(em);
cond_resched_rwlock_write(&tree->lock);
if (cond_resched_rwlock_write(&tree->lock))
node = rb_first(&tree->root);
else
node = next;
} }
write_unlock(&tree->lock); write_unlock(&tree->lock);
} }
@ -729,7 +834,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
u64 gen; u64 gen;
unsigned long flags; unsigned long flags;
bool modified; bool modified;
bool compressed;
if (em_end < end) { if (em_end < end) {
next_em = next_extent_map(em); next_em = next_extent_map(em);
@ -763,7 +867,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
goto remove_em; goto remove_em;
gen = em->generation; gen = em->generation;
compressed = extent_map_is_compressed(em);
if (em->start < start) { if (em->start < start) {
if (!split) { if (!split) {
@ -775,22 +878,15 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->start = em->start; split->start = em->start;
split->len = start - em->start; split->len = start - em->start;
if (em->block_start < EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
split->orig_start = em->orig_start; split->disk_bytenr = em->disk_bytenr;
split->block_start = em->block_start; split->disk_num_bytes = em->disk_num_bytes;
split->offset = em->offset;
if (compressed)
split->block_len = em->block_len;
else
split->block_len = split->len;
split->orig_block_len = max(split->block_len,
em->orig_block_len);
split->ram_bytes = em->ram_bytes; split->ram_bytes = em->ram_bytes;
} else { } else {
split->orig_start = split->start; split->disk_bytenr = em->disk_bytenr;
split->block_len = 0; split->disk_num_bytes = 0;
split->block_start = em->block_start; split->offset = 0;
split->orig_block_len = 0;
split->ram_bytes = split->len; split->ram_bytes = split->len;
} }
@ -810,30 +906,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
} }
split->start = end; split->start = end;
split->len = em_end - end; split->len = em_end - end;
split->block_start = em->block_start; split->disk_bytenr = em->disk_bytenr;
split->flags = flags; split->flags = flags;
split->generation = gen; split->generation = gen;
if (em->block_start < EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
split->orig_block_len = max(em->block_len, split->disk_num_bytes = em->disk_num_bytes;
em->orig_block_len); split->offset = em->offset + end - em->start;
split->ram_bytes = em->ram_bytes; split->ram_bytes = em->ram_bytes;
if (compressed) {
split->block_len = em->block_len;
split->orig_start = em->orig_start;
} else {
const u64 diff = end - em->start;
split->block_len = split->len;
split->block_start += diff;
split->orig_start = em->orig_start;
}
} else { } else {
split->disk_num_bytes = 0;
split->offset = 0;
split->ram_bytes = split->len; split->ram_bytes = split->len;
split->orig_start = split->start;
split->block_len = 0;
split->orig_block_len = 0;
} }
if (extent_map_in_tree(em)) { if (extent_map_in_tree(em)) {
@ -976,7 +1060,7 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
ASSERT(em->len == len); ASSERT(em->len == len);
ASSERT(!extent_map_is_compressed(em)); ASSERT(!extent_map_is_compressed(em));
ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE); ASSERT(em->disk_bytenr < EXTENT_MAP_LAST_BYTE);
ASSERT(em->flags & EXTENT_FLAG_PINNED); ASSERT(em->flags & EXTENT_FLAG_PINNED);
ASSERT(!(em->flags & EXTENT_FLAG_LOGGING)); ASSERT(!(em->flags & EXTENT_FLAG_LOGGING));
ASSERT(!list_empty(&em->list)); ASSERT(!list_empty(&em->list));
@ -987,10 +1071,9 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
/* First, replace the em with a new extent_map starting from * em->start */ /* First, replace the em with a new extent_map starting from * em->start */
split_pre->start = em->start; split_pre->start = em->start;
split_pre->len = pre; split_pre->len = pre;
split_pre->orig_start = split_pre->start; split_pre->disk_bytenr = new_logical;
split_pre->block_start = new_logical; split_pre->disk_num_bytes = split_pre->len;
split_pre->block_len = split_pre->len; split_pre->offset = 0;
split_pre->orig_block_len = split_pre->block_len;
split_pre->ram_bytes = split_pre->len; split_pre->ram_bytes = split_pre->len;
split_pre->flags = flags; split_pre->flags = flags;
split_pre->generation = em->generation; split_pre->generation = em->generation;
@ -1005,10 +1088,9 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
/* Insert the middle extent_map. */ /* Insert the middle extent_map. */
split_mid->start = em->start + pre; split_mid->start = em->start + pre;
split_mid->len = em->len - pre; split_mid->len = em->len - pre;
split_mid->orig_start = split_mid->start; split_mid->disk_bytenr = extent_map_block_start(em) + pre;
split_mid->block_start = em->block_start + pre; split_mid->disk_num_bytes = split_mid->len;
split_mid->block_len = split_mid->len; split_mid->offset = 0;
split_mid->orig_block_len = split_mid->block_len;
split_mid->ram_bytes = split_mid->len; split_mid->ram_bytes = split_mid->len;
split_mid->flags = flags; split_mid->flags = flags;
split_mid->generation = em->generation; split_mid->generation = em->generation;
@ -1076,12 +1158,12 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c
return 0; return 0;
} }
node = rb_first_cached(&tree->map); node = rb_first(&tree->root);
while (node) { while (node) {
struct rb_node *next = rb_next(node);
struct extent_map *em; struct extent_map *em;
em = rb_entry(node, struct extent_map, rb_node); em = rb_entry(node, struct extent_map, rb_node);
node = rb_next(node);
ctx->scanned++; ctx->scanned++;
if (em->flags & EXTENT_FLAG_PINNED) if (em->flags & EXTENT_FLAG_PINNED)
@ -1115,6 +1197,7 @@ next:
*/ */
if (need_resched() || rwlock_needbreak(&tree->lock)) if (need_resched() || rwlock_needbreak(&tree->lock))
break; break;
node = next;
} }
write_unlock(&tree->lock); write_unlock(&tree->lock);
up_read(&inode->i_mmap_lock); up_read(&inode->i_mmap_lock);

View File

@ -4,12 +4,11 @@
#define BTRFS_EXTENT_MAP_H #define BTRFS_EXTENT_MAP_H
#include <linux/compiler_types.h> #include <linux/compiler_types.h>
#include <linux/rwlock_types.h> #include <linux/spinlock_types.h>
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/refcount.h> #include <linux/refcount.h>
#include "misc.h" #include "misc.h"
#include "extent_map.h"
#include "compression.h" #include "compression.h"
struct btrfs_inode; struct btrfs_inode;
@ -62,19 +61,27 @@ struct extent_map {
u64 len; u64 len;
/* /*
* The file offset of the original file extent before splitting. * The bytenr of the full on-disk extent.
* *
* This is an in-memory only member, matching * For regular extents it's btrfs_file_extent_item::disk_bytenr.
* extent_map::start - btrfs_file_extent_item::offset for * For holes it's EXTENT_MAP_HOLE and for inline extents it's
* regular/preallocated extents. EXTENT_MAP_HOLE otherwise. * EXTENT_MAP_INLINE.
*/ */
u64 orig_start; u64 disk_bytenr;
/* /*
* The full on-disk extent length, matching * The full on-disk extent length, matching
* btrfs_file_extent_item::disk_num_bytes. * btrfs_file_extent_item::disk_num_bytes.
*/ */
u64 orig_block_len; u64 disk_num_bytes;
/*
* Offset inside the decompressed extent.
*
* For regular extents it's btrfs_file_extent_item::offset.
* For holes and inline extents it's 0.
*/
u64 offset;
/* /*
* The decompressed size of the whole on-disk extent, matching * The decompressed size of the whole on-disk extent, matching
@ -82,27 +89,6 @@ struct extent_map {
*/ */
u64 ram_bytes; u64 ram_bytes;
/*
* The on-disk logical bytenr for the file extent.
*
* For compressed extents it matches btrfs_file_extent_item::disk_bytenr.
* For uncompressed extents it matches
* btrfs_file_extent_item::disk_bytenr + btrfs_file_extent_item::offset
*
* For holes it is EXTENT_MAP_HOLE and for inline extents it is
* EXTENT_MAP_INLINE.
*/
u64 block_start;
/*
* The on-disk length for the file extent.
*
* For compressed extents it matches btrfs_file_extent_item::disk_num_bytes.
* For uncompressed extents it matches extent_map::len.
* For holes and inline extents it's -1 and shouldn't be used.
*/
u64 block_len;
/* /*
* Generation of the extent map, for merged em it's the highest * Generation of the extent map, for merged em it's the highest
* generation of all merged ems. * generation of all merged ems.
@ -115,7 +101,7 @@ struct extent_map {
}; };
struct extent_map_tree { struct extent_map_tree {
struct rb_root_cached map; struct rb_root root;
struct list_head modified_extents; struct list_head modified_extents;
rwlock_t lock; rwlock_t lock;
}; };
@ -163,6 +149,16 @@ static inline int extent_map_in_tree(const struct extent_map *em)
return !RB_EMPTY_NODE(&em->rb_node); return !RB_EMPTY_NODE(&em->rb_node);
} }
static inline u64 extent_map_block_start(const struct extent_map *em)
{
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
if (extent_map_is_compressed(em))
return em->disk_bytenr;
return em->disk_bytenr + em->offset;
}
return em->disk_bytenr;
}
static inline u64 extent_map_end(const struct extent_map *em) static inline u64 extent_map_end(const struct extent_map *em)
{ {
if (em->start + em->len < em->start) if (em->start + em->len < em->start)

930
fs/btrfs/fiemap.c Normal file
View File

@ -0,0 +1,930 @@
// SPDX-License-Identifier: GPL-2.0
#include "backref.h"
#include "btrfs_inode.h"
#include "fiemap.h"
#include "file.h"
#include "file-item.h"
struct btrfs_fiemap_entry {
u64 offset;
u64 phys;
u64 len;
u32 flags;
};
/*
* Indicate the caller of emit_fiemap_extent() that it needs to unlock the file
* range from the inode's io tree, unlock the subvolume tree search path, flush
* the fiemap cache and relock the file range and research the subvolume tree.
* The value here is something negative that can't be confused with a valid
* errno value and different from 1 because that's also a return value from
* fiemap_fill_next_extent() and also it's often used to mean some btree search
* did not find a key, so make it some distinct negative value.
*/
#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1))
/*
* Used to:
*
* - Cache the next entry to be emitted to the fiemap buffer, so that we can
* merge extents that are contiguous and can be grouped as a single one;
*
* - Store extents ready to be written to the fiemap buffer in an intermediary
* buffer. This intermediary buffer is to ensure that in case the fiemap
* buffer is memory mapped to the fiemap target file, we don't deadlock
* during btrfs_page_mkwrite(). This is because during fiemap we are locking
* an extent range in order to prevent races with delalloc flushing and
* ordered extent completion, which is needed in order to reliably detect
* delalloc in holes and prealloc extents. And this can lead to a deadlock
* if the fiemap buffer is memory mapped to the file we are running fiemap
* against (a silly, useless in practice scenario, but possible) because
* btrfs_page_mkwrite() will try to lock the same extent range.
*/
struct fiemap_cache {
/* An array of ready fiemap entries. */
struct btrfs_fiemap_entry *entries;
/* Number of entries in the entries array. */
int entries_size;
/* Index of the next entry in the entries array to write to. */
int entries_pos;
/*
* Once the entries array is full, this indicates what's the offset for
* the next file extent item we must search for in the inode's subvolume
* tree after unlocking the extent range in the inode's io tree and
* releasing the search path.
*/
u64 next_search_offset;
/*
* This matches struct fiemap_extent_info::fi_mapped_extents, we use it
* to count ourselves emitted extents and stop instead of relying on
* fiemap_fill_next_extent() because we buffer ready fiemap entries at
* the @entries array, and we want to stop as soon as we hit the max
* amount of extents to map, not just to save time but also to make the
* logic at extent_fiemap() simpler.
*/
unsigned int extents_mapped;
/* Fields for the cached extent (unsubmitted, not ready, extent). */
u64 offset;
u64 phys;
u64 len;
u32 flags;
bool cached;
};
static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache)
{
for (int i = 0; i < cache->entries_pos; i++) {
struct btrfs_fiemap_entry *entry = &cache->entries[i];
int ret;
ret = fiemap_fill_next_extent(fieinfo, entry->offset,
entry->phys, entry->len,
entry->flags);
/*
* Ignore 1 (reached max entries) because we keep track of that
* ourselves in emit_fiemap_extent().
*/
if (ret < 0)
return ret;
}
cache->entries_pos = 0;
return 0;
}
/*
* Helper to submit fiemap extent.
*
* Will try to merge current fiemap extent specified by @offset, @phys,
* @len and @flags with cached one.
* And only when we fails to merge, cached one will be submitted as
* fiemap extent.
*
* Return value is the same as fiemap_fill_next_extent().
*/
static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache,
u64 offset, u64 phys, u64 len, u32 flags)
{
struct btrfs_fiemap_entry *entry;
u64 cache_end;
/* Set at the end of extent_fiemap(). */
ASSERT((flags & FIEMAP_EXTENT_LAST) == 0);
if (!cache->cached)
goto assign;
/*
* When iterating the extents of the inode, at extent_fiemap(), we may
* find an extent that starts at an offset behind the end offset of the
* previous extent we processed. This happens if fiemap is called
* without FIEMAP_FLAG_SYNC and there are ordered extents completing
* after we had to unlock the file range, release the search path, emit
* the fiemap extents stored in the buffer (cache->entries array) and
* the lock the remainder of the range and re-search the btree.
*
* For example we are in leaf X processing its last item, which is the
* file extent item for file range [512K, 1M[, and after
* btrfs_next_leaf() releases the path, there's an ordered extent that
* completes for the file range [768K, 2M[, and that results in trimming
* the file extent item so that it now corresponds to the file range
* [512K, 768K[ and a new file extent item is inserted for the file
* range [768K, 2M[, which may end up as the last item of leaf X or as
* the first item of the next leaf - in either case btrfs_next_leaf()
* will leave us with a path pointing to the new extent item, for the
* file range [768K, 2M[, since that's the first key that follows the
* last one we processed. So in order not to report overlapping extents
* to user space, we trim the length of the previously cached extent and
* emit it.
*
* Upon calling btrfs_next_leaf() we may also find an extent with an
* offset smaller than or equals to cache->offset, and this happens
* when we had a hole or prealloc extent with several delalloc ranges in
* it, but after btrfs_next_leaf() released the path, delalloc was
* flushed and the resulting ordered extents were completed, so we can
* now have found a file extent item for an offset that is smaller than
* or equals to what we have in cache->offset. We deal with this as
* described below.
*/
cache_end = cache->offset + cache->len;
if (cache_end > offset) {
if (offset == cache->offset) {
/*
* We cached a dealloc range (found in the io tree) for
* a hole or prealloc extent and we have now found a
* file extent item for the same offset. What we have
* now is more recent and up to date, so discard what
* we had in the cache and use what we have just found.
*/
goto assign;
} else if (offset > cache->offset) {
/*
* The extent range we previously found ends after the
* offset of the file extent item we found and that
* offset falls somewhere in the middle of that previous
* extent range. So adjust the range we previously found
* to end at the offset of the file extent item we have
* just found, since this extent is more up to date.
* Emit that adjusted range and cache the file extent
* item we have just found. This corresponds to the case
* where a previously found file extent item was split
* due to an ordered extent completing.
*/
cache->len = offset - cache->offset;
goto emit;
} else {
const u64 range_end = offset + len;
/*
* The offset of the file extent item we have just found
* is behind the cached offset. This means we were
* processing a hole or prealloc extent for which we
* have found delalloc ranges (in the io tree), so what
* we have in the cache is the last delalloc range we
* found while the file extent item we found can be
* either for a whole delalloc range we previously
* emmitted or only a part of that range.
*
* We have two cases here:
*
* 1) The file extent item's range ends at or behind the
* cached extent's end. In this case just ignore the
* current file extent item because we don't want to
* overlap with previous ranges that may have been
* emmitted already;
*
* 2) The file extent item starts behind the currently
* cached extent but its end offset goes beyond the
* end offset of the cached extent. We don't want to
* overlap with a previous range that may have been
* emmitted already, so we emit the currently cached
* extent and then partially store the current file
* extent item's range in the cache, for the subrange
* going the cached extent's end to the end of the
* file extent item.
*/
if (range_end <= cache_end)
return 0;
if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC)))
phys += cache_end - offset;
offset = cache_end;
len = range_end - cache_end;
goto emit;
}
}
/*
* Only merges fiemap extents if
* 1) Their logical addresses are continuous
*
* 2) Their physical addresses are continuous
* So truly compressed (physical size smaller than logical size)
* extents won't get merged with each other
*
* 3) Share same flags
*/
if (cache->offset + cache->len == offset &&
cache->phys + cache->len == phys &&
cache->flags == flags) {
cache->len += len;
return 0;
}
emit:
/* Not mergeable, need to submit cached one */
if (cache->entries_pos == cache->entries_size) {
/*
* We will need to research for the end offset of the last
* stored extent and not from the current offset, because after
* unlocking the range and releasing the path, if there's a hole
* between that end offset and this current offset, a new extent
* may have been inserted due to a new write, so we don't want
* to miss it.
*/
entry = &cache->entries[cache->entries_size - 1];
cache->next_search_offset = entry->offset + entry->len;
cache->cached = false;
return BTRFS_FIEMAP_FLUSH_CACHE;
}
entry = &cache->entries[cache->entries_pos];
entry->offset = cache->offset;
entry->phys = cache->phys;
entry->len = cache->len;
entry->flags = cache->flags;
cache->entries_pos++;
cache->extents_mapped++;
if (cache->extents_mapped == fieinfo->fi_extents_max) {
cache->cached = false;
return 1;
}
assign:
cache->cached = true;
cache->offset = offset;
cache->phys = phys;
cache->len = len;
cache->flags = flags;
return 0;
}
/*
* Emit last fiemap cache
*
* The last fiemap cache may still be cached in the following case:
* 0 4k 8k
* |<- Fiemap range ->|
* |<------------ First extent ----------->|
*
* In this case, the first extent range will be cached but not emitted.
* So we must emit it before ending extent_fiemap().
*/
static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache)
{
int ret;
if (!cache->cached)
return 0;
ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
cache->len, cache->flags);
cache->cached = false;
if (ret > 0)
ret = 0;
return ret;
}
static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *path)
{
struct extent_buffer *clone = path->nodes[0];
struct btrfs_key key;
int slot;
int ret;
path->slots[0]++;
if (path->slots[0] < btrfs_header_nritems(path->nodes[0]))
return 0;
/*
* Add a temporary extra ref to an already cloned extent buffer to
* prevent btrfs_next_leaf() freeing it, we want to reuse it to avoid
* the cost of allocating a new one.
*/
ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags));
atomic_inc(&clone->refs);
ret = btrfs_next_leaf(inode->root, path);
if (ret != 0)
goto out;
/*
* Don't bother with cloning if there are no more file extent items for
* our inode.
*/
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) {
ret = 1;
goto out;
}
/*
* Important to preserve the start field, for the optimizations when
* checking if extents are shared (see extent_fiemap()).
*
* We must set ->start before calling copy_extent_buffer_full(). If we
* are on sub-pagesize blocksize, we use ->start to determine the offset
* into the folio where our eb exists, and if we update ->start after
* the fact then any subsequent reads of the eb may read from a
* different offset in the folio than where we originally copied into.
*/
clone->start = path->nodes[0]->start;
/* See the comment at fiemap_search_slot() about why we clone. */
copy_extent_buffer_full(clone, path->nodes[0]);
slot = path->slots[0];
btrfs_release_path(path);
path->nodes[0] = clone;
path->slots[0] = slot;
out:
if (ret)
free_extent_buffer(clone);
return ret;
}
/*
* Search for the first file extent item that starts at a given file offset or
* the one that starts immediately before that offset.
* Returns: 0 on success, < 0 on error, 1 if not found.
*/
static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path,
u64 file_offset)
{
const u64 ino = btrfs_ino(inode);
struct btrfs_root *root = inode->root;
struct extent_buffer *clone;
struct btrfs_key key;
int slot;
int ret;
key.objectid = ino;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = file_offset;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
return ret;
if (ret > 0 && path->slots[0] > 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY)
path->slots[0]--;
}
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
ret = btrfs_next_leaf(root, path);
if (ret != 0)
return ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
return 1;
}
/*
* We clone the leaf and use it during fiemap. This is because while
* using the leaf we do expensive things like checking if an extent is
* shared, which can take a long time. In order to prevent blocking
* other tasks for too long, we use a clone of the leaf. We have locked
* the file range in the inode's io tree, so we know none of our file
* extent items can change. This way we avoid blocking other tasks that
* want to insert items for other inodes in the same leaf or b+tree
* rebalance operations (triggered for example when someone is trying
* to push items into this leaf when trying to insert an item in a
* neighbour leaf).
* We also need the private clone because holding a read lock on an
* extent buffer of the subvolume's b+tree will make lockdep unhappy
* when we check if extents are shared, as backref walking may need to
* lock the same leaf we are processing.
*/
clone = btrfs_clone_extent_buffer(path->nodes[0]);
if (!clone)
return -ENOMEM;
slot = path->slots[0];
btrfs_release_path(path);
path->nodes[0] = clone;
path->slots[0] = slot;
return 0;
}
/*
* Process a range which is a hole or a prealloc extent in the inode's subvolume
* btree. If @disk_bytenr is 0, we are dealing with a hole, otherwise a prealloc
* extent. The end offset (@end) is inclusive.
*/
static int fiemap_process_hole(struct btrfs_inode *inode,
struct fiemap_extent_info *fieinfo,
struct fiemap_cache *cache,
struct extent_state **delalloc_cached_state,
struct btrfs_backref_share_check_ctx *backref_ctx,
u64 disk_bytenr, u64 extent_offset,
u64 extent_gen,
u64 start, u64 end)
{
const u64 i_size = i_size_read(&inode->vfs_inode);
u64 cur_offset = start;
u64 last_delalloc_end = 0;
u32 prealloc_flags = FIEMAP_EXTENT_UNWRITTEN;
bool checked_extent_shared = false;
int ret;
/*
* There can be no delalloc past i_size, so don't waste time looking for
* it beyond i_size.
*/
while (cur_offset < end && cur_offset < i_size) {
u64 delalloc_start;
u64 delalloc_end;
u64 prealloc_start;
u64 prealloc_len = 0;
bool delalloc;
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
delalloc_cached_state,
&delalloc_start,
&delalloc_end);
if (!delalloc)
break;
/*
* If this is a prealloc extent we have to report every section
* of it that has no delalloc.
*/
if (disk_bytenr != 0) {
if (last_delalloc_end == 0) {
prealloc_start = start;
prealloc_len = delalloc_start - start;
} else {
prealloc_start = last_delalloc_end + 1;
prealloc_len = delalloc_start - prealloc_start;
}
}
if (prealloc_len > 0) {
if (!checked_extent_shared && fieinfo->fi_extents_max) {
ret = btrfs_is_data_extent_shared(inode,
disk_bytenr,
extent_gen,
backref_ctx);
if (ret < 0)
return ret;
else if (ret > 0)
prealloc_flags |= FIEMAP_EXTENT_SHARED;
checked_extent_shared = true;
}
ret = emit_fiemap_extent(fieinfo, cache, prealloc_start,
disk_bytenr + extent_offset,
prealloc_len, prealloc_flags);
if (ret)
return ret;
extent_offset += prealloc_len;
}
ret = emit_fiemap_extent(fieinfo, cache, delalloc_start, 0,
delalloc_end + 1 - delalloc_start,
FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
if (ret)
return ret;
last_delalloc_end = delalloc_end;
cur_offset = delalloc_end + 1;
extent_offset += cur_offset - delalloc_start;
cond_resched();
}
/*
* Either we found no delalloc for the whole prealloc extent or we have
* a prealloc extent that spans i_size or starts at or after i_size.
*/
if (disk_bytenr != 0 && last_delalloc_end < end) {
u64 prealloc_start;
u64 prealloc_len;
if (last_delalloc_end == 0) {
prealloc_start = start;
prealloc_len = end + 1 - start;
} else {
prealloc_start = last_delalloc_end + 1;
prealloc_len = end + 1 - prealloc_start;
}
if (!checked_extent_shared && fieinfo->fi_extents_max) {
ret = btrfs_is_data_extent_shared(inode,
disk_bytenr,
extent_gen,
backref_ctx);
if (ret < 0)
return ret;
else if (ret > 0)
prealloc_flags |= FIEMAP_EXTENT_SHARED;
}
ret = emit_fiemap_extent(fieinfo, cache, prealloc_start,
disk_bytenr + extent_offset,
prealloc_len, prealloc_flags);
if (ret)
return ret;
}
return 0;
}
static int fiemap_find_last_extent_offset(struct btrfs_inode *inode,
struct btrfs_path *path,
u64 *last_extent_end_ret)
{
const u64 ino = btrfs_ino(inode);
struct btrfs_root *root = inode->root;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *ei;
struct btrfs_key key;
u64 disk_bytenr;
int ret;
/*
* Lookup the last file extent. We're not using i_size here because
* there might be preallocation past i_size.
*/
ret = btrfs_lookup_file_extent(NULL, root, path, ino, (u64)-1, 0);
/* There can't be a file extent item at offset (u64)-1 */
ASSERT(ret != 0);
if (ret < 0)
return ret;
/*
* For a non-existing key, btrfs_search_slot() always leaves us at a
* slot > 0, except if the btree is empty, which is impossible because
* at least it has the inode item for this inode and all the items for
* the root inode 256.
*/
ASSERT(path->slots[0] > 0);
path->slots[0]--;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
/* No file extent items in the subvolume tree. */
*last_extent_end_ret = 0;
return 0;
}
/*
* For an inline extent, the disk_bytenr is where inline data starts at,
* so first check if we have an inline extent item before checking if we
* have an implicit hole (disk_bytenr == 0).
*/
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_INLINE) {
*last_extent_end_ret = btrfs_file_extent_end(path);
return 0;
}
/*
* Find the last file extent item that is not a hole (when NO_HOLES is
* not enabled). This should take at most 2 iterations in the worst
* case: we have one hole file extent item at slot 0 of a leaf and
* another hole file extent item as the last item in the previous leaf.
* This is because we merge file extent items that represent holes.
*/
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
while (disk_bytenr == 0) {
ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY);
if (ret < 0) {
return ret;
} else if (ret > 0) {
/* No file extent items that are not holes. */
*last_extent_end_ret = 0;
return 0;
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
}
*last_extent_end_ret = btrfs_file_extent_end(path);
return 0;
}
static int extent_fiemap(struct btrfs_inode *inode,
struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
const u64 ino = btrfs_ino(inode);
struct extent_state *cached_state = NULL;
struct extent_state *delalloc_cached_state = NULL;
struct btrfs_path *path;
struct fiemap_cache cache = { 0 };
struct btrfs_backref_share_check_ctx *backref_ctx;
u64 last_extent_end;
u64 prev_extent_end;
u64 range_start;
u64 range_end;
const u64 sectorsize = inode->root->fs_info->sectorsize;
bool stopped = false;
int ret;
cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry);
cache.entries = kmalloc_array(cache.entries_size,
sizeof(struct btrfs_fiemap_entry),
GFP_KERNEL);
backref_ctx = btrfs_alloc_backref_share_check_ctx();
path = btrfs_alloc_path();
if (!cache.entries || !backref_ctx || !path) {
ret = -ENOMEM;
goto out;
}
restart:
range_start = round_down(start, sectorsize);
range_end = round_up(start + len, sectorsize);
prev_extent_end = range_start;
lock_extent(&inode->io_tree, range_start, range_end, &cached_state);
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
if (ret < 0)
goto out_unlock;
btrfs_release_path(path);
path->reada = READA_FORWARD;
ret = fiemap_search_slot(inode, path, range_start);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
/*
* No file extent item found, but we may have delalloc between
* the current offset and i_size. So check for that.
*/
ret = 0;
goto check_eof_delalloc;
}
while (prev_extent_end < range_end) {
struct extent_buffer *leaf = path->nodes[0];
struct btrfs_file_extent_item *ei;
struct btrfs_key key;
u64 extent_end;
u64 extent_len;
u64 extent_offset = 0;
u64 extent_gen;
u64 disk_bytenr = 0;
u64 flags = 0;
int extent_type;
u8 compression;
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
break;
extent_end = btrfs_file_extent_end(path);
/*
* The first iteration can leave us at an extent item that ends
* before our range's start. Move to the next item.
*/
if (extent_end <= range_start)
goto next_item;
backref_ctx->curr_leaf_bytenr = leaf->start;
/* We have in implicit hole (NO_HOLES feature enabled). */
if (prev_extent_end < key.offset) {
const u64 hole_end = min(key.offset, range_end) - 1;
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state,
backref_ctx, 0, 0, 0,
prev_extent_end, hole_end);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
/* fiemap_fill_next_extent() told us to stop. */
stopped = true;
break;
}
/* We've reached the end of the fiemap range, stop. */
if (key.offset >= range_end) {
stopped = true;
break;
}
}
extent_len = extent_end - key.offset;
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
compression = btrfs_file_extent_compression(leaf, ei);
extent_type = btrfs_file_extent_type(leaf, ei);
extent_gen = btrfs_file_extent_generation(leaf, ei);
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
if (compression == BTRFS_COMPRESS_NONE)
extent_offset = btrfs_file_extent_offset(leaf, ei);
}
if (compression != BTRFS_COMPRESS_NONE)
flags |= FIEMAP_EXTENT_ENCODED;
if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
flags |= FIEMAP_EXTENT_DATA_INLINE;
flags |= FIEMAP_EXTENT_NOT_ALIGNED;
ret = emit_fiemap_extent(fieinfo, &cache, key.offset, 0,
extent_len, flags);
} else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state,
backref_ctx,
disk_bytenr, extent_offset,
extent_gen, key.offset,
extent_end - 1);
} else if (disk_bytenr == 0) {
/* We have an explicit hole. */
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state,
backref_ctx, 0, 0, 0,
key.offset, extent_end - 1);
} else {
/* We have a regular extent. */
if (fieinfo->fi_extents_max) {
ret = btrfs_is_data_extent_shared(inode,
disk_bytenr,
extent_gen,
backref_ctx);
if (ret < 0)
goto out_unlock;
else if (ret > 0)
flags |= FIEMAP_EXTENT_SHARED;
}
ret = emit_fiemap_extent(fieinfo, &cache, key.offset,
disk_bytenr + extent_offset,
extent_len, flags);
}
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
/* emit_fiemap_extent() told us to stop. */
stopped = true;
break;
}
prev_extent_end = extent_end;
next_item:
if (fatal_signal_pending(current)) {
ret = -EINTR;
goto out_unlock;
}
ret = fiemap_next_leaf_item(inode, path);
if (ret < 0) {
goto out_unlock;
} else if (ret > 0) {
/* No more file extent items for this inode. */
break;
}
cond_resched();
}
check_eof_delalloc:
if (!stopped && prev_extent_end < range_end) {
ret = fiemap_process_hole(inode, fieinfo, &cache,
&delalloc_cached_state, backref_ctx,
0, 0, 0, prev_extent_end, range_end - 1);
if (ret < 0)
goto out_unlock;
prev_extent_end = range_end;
}
if (cache.cached && cache.offset + cache.len >= last_extent_end) {
const u64 i_size = i_size_read(&inode->vfs_inode);
if (prev_extent_end < i_size) {
u64 delalloc_start;
u64 delalloc_end;
bool delalloc;
delalloc = btrfs_find_delalloc_in_range(inode,
prev_extent_end,
i_size - 1,
&delalloc_cached_state,
&delalloc_start,
&delalloc_end);
if (!delalloc)
cache.flags |= FIEMAP_EXTENT_LAST;
} else {
cache.flags |= FIEMAP_EXTENT_LAST;
}
}
out_unlock:
unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
if (ret == BTRFS_FIEMAP_FLUSH_CACHE) {
btrfs_release_path(path);
ret = flush_fiemap_cache(fieinfo, &cache);
if (ret)
goto out;
len -= cache.next_search_offset - start;
start = cache.next_search_offset;
goto restart;
} else if (ret < 0) {
goto out;
}
/*
* Must free the path before emitting to the fiemap buffer because we
* may have a non-cloned leaf and if the fiemap buffer is memory mapped
* to a file, a write into it (through btrfs_page_mkwrite()) may trigger
* waiting for an ordered extent that in order to complete needs to
* modify that leaf, therefore leading to a deadlock.
*/
btrfs_free_path(path);
path = NULL;
ret = flush_fiemap_cache(fieinfo, &cache);
if (ret)
goto out;
ret = emit_last_fiemap_cache(fieinfo, &cache);
out:
free_extent_state(delalloc_cached_state);
kfree(cache.entries);
btrfs_free_backref_share_ctx(backref_ctx);
btrfs_free_path(path);
return ret;
}
int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
int ret;
ret = fiemap_prep(inode, fieinfo, start, &len, 0);
if (ret)
return ret;
/*
* fiemap_prep() called filemap_write_and_wait() for the whole possible
* file range (0 to LLONG_MAX), but that is not enough if we have
* compression enabled. The first filemap_fdatawrite_range() only kicks
* in the compression of data (in an async thread) and will return
* before the compression is done and writeback is started. A second
* filemap_fdatawrite_range() is needed to wait for the compression to
* complete and writeback to start. We also need to wait for ordered
* extents to complete, because our fiemap implementation uses mainly
* file extent items to list the extents, searching for extent maps
* only for file ranges with holes or prealloc extents to figure out
* if we have delalloc in those ranges.
*/
if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
ret = btrfs_wait_ordered_range(btrfs_inode, 0, LLONG_MAX);
if (ret)
return ret;
}
btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
/*
* We did an initial flush to avoid holding the inode's lock while
* triggering writeback and waiting for the completion of IO and ordered
* extents. Now after we locked the inode we do it again, because it's
* possible a new write may have happened in between those two steps.
*/
if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
ret = btrfs_wait_ordered_range(btrfs_inode, 0, LLONG_MAX);
if (ret) {
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
return ret;
}
}
ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
return ret;
}

11
fs/btrfs/fiemap.h Normal file
View File

@ -0,0 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef BTRFS_FIEMAP_H
#define BTRFS_FIEMAP_H
#include <linux/fiemap.h>
int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
#endif /* BTRFS_FIEMAP_H */

View File

@ -45,13 +45,12 @@
*/ */
void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size) void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 start, end, i_size; u64 start, end, i_size;
int ret; int ret;
spin_lock(&inode->lock); spin_lock(&inode->lock);
i_size = new_i_size ?: i_size_read(&inode->vfs_inode); i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
if (btrfs_fs_incompat(fs_info, NO_HOLES)) { if (!inode->file_extent_tree) {
inode->disk_i_size = i_size; inode->disk_i_size = i_size;
goto out_unlock; goto out_unlock;
} }
@ -84,13 +83,14 @@ out_unlock:
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start, int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len) u64 len)
{ {
if (!inode->file_extent_tree)
return 0;
if (len == 0) if (len == 0)
return 0; return 0;
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize)); ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize));
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
return 0;
return set_extent_bit(inode->file_extent_tree, start, start + len - 1, return set_extent_bit(inode->file_extent_tree, start, start + len - 1,
EXTENT_DIRTY, NULL); EXTENT_DIRTY, NULL);
} }
@ -112,14 +112,15 @@ int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start, int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len) u64 len)
{ {
if (!inode->file_extent_tree)
return 0;
if (len == 0) if (len == 0)
return 0; return 0;
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) || ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) ||
len == (u64)-1); len == (u64)-1);
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
return 0;
return clear_extent_bit(inode->file_extent_tree, start, return clear_extent_bit(inode->file_extent_tree, start,
start + len - 1, EXTENT_DIRTY, NULL); start + len - 1, EXTENT_DIRTY, NULL);
} }
@ -352,7 +353,7 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
u32 bio_offset = 0; u32 bio_offset = 0;
if ((inode->flags & BTRFS_INODE_NODATASUM) || if ((inode->flags & BTRFS_INODE_NODATASUM) ||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state))
return BLK_STS_OK; return BLK_STS_OK;
/* /*
@ -1280,7 +1281,6 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
const int slot = path->slots[0]; const int slot = path->slots[0];
struct btrfs_key key; struct btrfs_key key;
u64 extent_start; u64 extent_start;
u64 bytenr;
u8 type = btrfs_file_extent_type(leaf, fi); u8 type = btrfs_file_extent_type(leaf, fi);
int compress_type = btrfs_file_extent_compression(leaf, fi); int compress_type = btrfs_file_extent_compression(leaf, fi);
@ -1290,24 +1290,29 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
em->generation = btrfs_file_extent_generation(leaf, fi); em->generation = btrfs_file_extent_generation(leaf, fi);
if (type == BTRFS_FILE_EXTENT_REG || if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) { type == BTRFS_FILE_EXTENT_PREALLOC) {
const u64 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
em->start = extent_start; em->start = extent_start;
em->len = btrfs_file_extent_end(path) - extent_start; em->len = btrfs_file_extent_end(path) - extent_start;
em->orig_start = extent_start - if (disk_bytenr == 0) {
btrfs_file_extent_offset(leaf, fi); em->disk_bytenr = EXTENT_MAP_HOLE;
em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); em->disk_num_bytes = 0;
bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); em->offset = 0;
if (bytenr == 0) {
em->block_start = EXTENT_MAP_HOLE;
return; return;
} }
em->disk_bytenr = disk_bytenr;
em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
em->offset = btrfs_file_extent_offset(leaf, fi);
if (compress_type != BTRFS_COMPRESS_NONE) { if (compress_type != BTRFS_COMPRESS_NONE) {
extent_map_set_compression(em, compress_type); extent_map_set_compression(em, compress_type);
em->block_start = bytenr;
em->block_len = em->orig_block_len;
} else { } else {
bytenr += btrfs_file_extent_offset(leaf, fi); /*
em->block_start = bytenr; * Older kernels can create regular non-hole data
em->block_len = em->len; * extents with ram_bytes smaller than disk_num_bytes.
* Not a big deal, just always use disk_num_bytes
* for ram_bytes.
*/
em->ram_bytes = em->disk_num_bytes;
if (type == BTRFS_FILE_EXTENT_PREALLOC) if (type == BTRFS_FILE_EXTENT_PREALLOC)
em->flags |= EXTENT_FLAG_PREALLOC; em->flags |= EXTENT_FLAG_PREALLOC;
} }
@ -1315,15 +1320,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
/* Tree-checker has ensured this. */ /* Tree-checker has ensured this. */
ASSERT(extent_start == 0); ASSERT(extent_start == 0);
em->block_start = EXTENT_MAP_INLINE; em->disk_bytenr = EXTENT_MAP_INLINE;
em->start = 0; em->start = 0;
em->len = fs_info->sectorsize; em->len = fs_info->sectorsize;
/* em->offset = 0;
* Initialize orig_start and block_len with the same values
* as in inode.c:btrfs_get_extent().
*/
em->orig_start = EXTENT_MAP_HOLE;
em->block_len = (u64)-1;
extent_map_set_compression(em, compress_type); extent_map_set_compression(em, compress_type);
} else { } else {
btrfs_err(fs_info, btrfs_err(fs_info,

View File

@ -17,8 +17,8 @@
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/iversion.h> #include <linux/iversion.h>
#include <linux/fsverity.h> #include <linux/fsverity.h>
#include <linux/iomap.h>
#include "ctree.h" #include "ctree.h"
#include "direct-io.h"
#include "disk-io.h" #include "disk-io.h"
#include "transaction.h" #include "transaction.h"
#include "btrfs_inode.h" #include "btrfs_inode.h"
@ -1104,7 +1104,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
&cached_state); &cached_state);
} }
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
NULL, NULL, NULL, nowait, false); NULL, nowait, false);
if (ret <= 0) if (ret <= 0)
btrfs_drew_write_unlock(&root->snapshot_lock); btrfs_drew_write_unlock(&root->snapshot_lock);
else else
@ -1140,8 +1140,7 @@ static void update_time_for_write(struct inode *inode)
inode_inc_iversion(inode); inode_inc_iversion(inode);
} }
static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count)
size_t count)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
@ -1187,8 +1186,7 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
return 0; return 0;
} }
static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
struct iov_iter *i)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
loff_t pos; loff_t pos;
@ -1451,194 +1449,6 @@ out:
return num_written ? num_written : ret; return num_written ? num_written : ret;
} }
static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
const struct iov_iter *iter, loff_t offset)
{
const u32 blocksize_mask = fs_info->sectorsize - 1;
if (offset & blocksize_mask)
return -EINVAL;
if (iov_iter_alignment(iter) & blocksize_mask)
return -EINVAL;
return 0;
}
static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
loff_t pos;
ssize_t written = 0;
ssize_t written_buffered;
size_t prev_left = 0;
loff_t endbyte;
ssize_t ret;
unsigned int ilock_flags = 0;
struct iomap_dio *dio;
if (iocb->ki_flags & IOCB_NOWAIT)
ilock_flags |= BTRFS_ILOCK_TRY;
/*
* If the write DIO is within EOF, use a shared lock and also only if
* security bits will likely not be dropped by file_remove_privs() called
* from btrfs_write_check(). Either will need to be rechecked after the
* lock was acquired.
*/
if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode))
ilock_flags |= BTRFS_ILOCK_SHARED;
relock:
ret = btrfs_inode_lock(BTRFS_I(inode), ilock_flags);
if (ret < 0)
return ret;
/* Shared lock cannot be used with security bits set. */
if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
ilock_flags &= ~BTRFS_ILOCK_SHARED;
goto relock;
}
ret = generic_write_checks(iocb, from);
if (ret <= 0) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
return ret;
}
ret = btrfs_write_check(iocb, from, ret);
if (ret < 0) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
goto out;
}
pos = iocb->ki_pos;
/*
* Re-check since file size may have changed just before taking the
* lock or pos may have changed because of O_APPEND in generic_write_check()
*/
if ((ilock_flags & BTRFS_ILOCK_SHARED) &&
pos + iov_iter_count(from) > i_size_read(inode)) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
ilock_flags &= ~BTRFS_ILOCK_SHARED;
goto relock;
}
if (check_direct_IO(fs_info, from, pos)) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
goto buffered;
}
/*
* The iov_iter can be mapped to the same file range we are writing to.
* If that's the case, then we will deadlock in the iomap code, because
* it first calls our callback btrfs_dio_iomap_begin(), which will create
* an ordered extent, and after that it will fault in the pages that the
* iov_iter refers to. During the fault in we end up in the readahead
* pages code (starting at btrfs_readahead()), which will lock the range,
* find that ordered extent and then wait for it to complete (at
* btrfs_lock_and_flush_ordered_range()), resulting in a deadlock since
* obviously the ordered extent can never complete as we didn't submit
* yet the respective bio(s). This always happens when the buffer is
* memory mapped to the same file range, since the iomap DIO code always
* invalidates pages in the target file range (after starting and waiting
* for any writeback).
*
* So here we disable page faults in the iov_iter and then retry if we
* got -EFAULT, faulting in the pages before the retry.
*/
from->nofault = true;
dio = btrfs_dio_write(iocb, from, written);
from->nofault = false;
/*
* iomap_dio_complete() will call btrfs_sync_file() if we have a dsync
* iocb, and that needs to lock the inode. So unlock it before calling
* iomap_dio_complete() to avoid a deadlock.
*/
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
if (IS_ERR_OR_NULL(dio))
ret = PTR_ERR_OR_ZERO(dio);
else
ret = iomap_dio_complete(dio);
/* No increment (+=) because iomap returns a cumulative value. */
if (ret > 0)
written = ret;
if (iov_iter_count(from) > 0 && (ret == -EFAULT || ret > 0)) {
const size_t left = iov_iter_count(from);
/*
* We have more data left to write. Try to fault in as many as
* possible of the remainder pages and retry. We do this without
* releasing and locking again the inode, to prevent races with
* truncate.
*
* Also, in case the iov refers to pages in the file range of the
* file we want to write to (due to a mmap), we could enter an
* infinite loop if we retry after faulting the pages in, since
* iomap will invalidate any pages in the range early on, before
* it tries to fault in the pages of the iov. So we keep track of
* how much was left of iov in the previous EFAULT and fallback
* to buffered IO in case we haven't made any progress.
*/
if (left == prev_left) {
ret = -ENOTBLK;
} else {
fault_in_iov_iter_readable(from, left);
prev_left = left;
goto relock;
}
}
/*
* If 'ret' is -ENOTBLK or we have not written all data, then it means
* we must fallback to buffered IO.
*/
if ((ret < 0 && ret != -ENOTBLK) || !iov_iter_count(from))
goto out;
buffered:
/*
* If we are in a NOWAIT context, then return -EAGAIN to signal the caller
* it must retry the operation in a context where blocking is acceptable,
* because even if we end up not blocking during the buffered IO attempt
* below, we will block when flushing and waiting for the IO.
*/
if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN;
goto out;
}
pos = iocb->ki_pos;
written_buffered = btrfs_buffered_write(iocb, from);
if (written_buffered < 0) {
ret = written_buffered;
goto out;
}
/*
* Ensure all data is persisted. We want the next direct IO read to be
* able to read what was just written.
*/
endbyte = pos + written_buffered - 1;
ret = btrfs_fdatawrite_range(inode, pos, endbyte);
if (ret)
goto out;
ret = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
if (ret)
goto out;
written += written_buffered;
iocb->ki_pos = pos + written_buffered;
invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
endbyte >> PAGE_SHIFT);
out:
return ret < 0 ? ret : written;
}
static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from, static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded) const struct btrfs_ioctl_encoded_io_args *encoded)
{ {
@ -1738,7 +1548,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
return 0; return 0;
} }
static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) static int start_ordered_ops(struct btrfs_inode *inode, loff_t start, loff_t end)
{ {
int ret; int ret;
struct blk_plug plug; struct blk_plug plug;
@ -1758,7 +1568,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
{ {
struct btrfs_inode *inode = BTRFS_I(ctx->inode); struct btrfs_inode *inode = ctx->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
if (btrfs_inode_in_log(inode, btrfs_get_fs_generation(fs_info)) && if (btrfs_inode_in_log(inode, btrfs_get_fs_generation(fs_info)) &&
@ -1794,9 +1604,9 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{ {
struct dentry *dentry = file_dentry(file); struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry); struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_root *root = inode->root;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx; struct btrfs_log_ctx ctx;
int ret = 0, err; int ret = 0, err;
@ -1829,7 +1639,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (ret) if (ret)
goto out; goto out;
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP);
atomic_inc(&root->log_batch); atomic_inc(&root->log_batch);
@ -1853,7 +1663,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/ */
ret = start_ordered_ops(inode, start, end); ret = start_ordered_ops(inode, start, end);
if (ret) { if (ret) {
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
goto out; goto out;
} }
@ -1865,8 +1675,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* running delalloc the full sync flag may be set if we need to drop * running delalloc the full sync flag may be set if we need to drop
* extra extent map ranges due to temporary memory allocation failures. * extra extent map ranges due to temporary memory allocation failures.
*/ */
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
&BTRFS_I(inode)->runtime_flags);
/* /*
* We have to do this here to avoid the priority inversion of waiting on * We have to do this here to avoid the priority inversion of waiting on
@ -1885,16 +1694,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/ */
if (full_sync || btrfs_is_zoned(fs_info)) { if (full_sync || btrfs_is_zoned(fs_info)) {
ret = btrfs_wait_ordered_range(inode, start, len); ret = btrfs_wait_ordered_range(inode, start, len);
clear_bit(BTRFS_INODE_COW_WRITE_ERROR, &BTRFS_I(inode)->runtime_flags); clear_bit(BTRFS_INODE_COW_WRITE_ERROR, &inode->runtime_flags);
} else { } else {
/* /*
* Get our ordered extents as soon as possible to avoid doing * Get our ordered extents as soon as possible to avoid doing
* checksum lookups in the csum tree, and use instead the * checksum lookups in the csum tree, and use instead the
* checksums attached to the ordered extents. * checksums attached to the ordered extents.
*/ */
btrfs_get_ordered_extents_for_logging(BTRFS_I(inode), btrfs_get_ordered_extents_for_logging(inode, &ctx.ordered_extents);
&ctx.ordered_extents); ret = filemap_fdatawait_range(inode->vfs_inode.i_mapping, start, end);
ret = filemap_fdatawait_range(inode->i_mapping, start, end);
if (ret) if (ret)
goto out_release_extents; goto out_release_extents;
@ -1907,8 +1715,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* extents to complete so that any extent maps that point to * extents to complete so that any extent maps that point to
* unwritten locations are dropped and we don't log them. * unwritten locations are dropped and we don't log them.
*/ */
if (test_and_clear_bit(BTRFS_INODE_COW_WRITE_ERROR, if (test_and_clear_bit(BTRFS_INODE_COW_WRITE_ERROR, &inode->runtime_flags))
&BTRFS_I(inode)->runtime_flags))
ret = btrfs_wait_ordered_range(inode, start, len); ret = btrfs_wait_ordered_range(inode, start, len);
} }
@ -1923,8 +1730,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* modified so clear this flag in case it was set for whatever * modified so clear this flag in case it was set for whatever
* reason, it's no longer relevant. * reason, it's no longer relevant.
*/ */
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
&BTRFS_I(inode)->runtime_flags);
/* /*
* An ordered extent might have started before and completed * An ordered extent might have started before and completed
* already with io errors, in which case the inode was not * already with io errors, in which case the inode was not
@ -1932,7 +1738,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* for any errors that might have happened since we last * for any errors that might have happened since we last
* checked called fsync. * checked called fsync.
*/ */
ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err); ret = filemap_check_wb_err(inode->vfs_inode.i_mapping, file->f_wb_err);
goto out_release_extents; goto out_release_extents;
} }
@ -1982,7 +1788,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* file again, but that will end up using the synchronization * file again, but that will end up using the synchronization
* inside btrfs_sync_log to keep things safe. * inside btrfs_sync_log to keep things safe.
*/ */
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
if (ret == BTRFS_NO_LOG_SYNC) { if (ret == BTRFS_NO_LOG_SYNC) {
ret = btrfs_end_transaction(trans); ret = btrfs_end_transaction(trans);
@ -2051,7 +1857,7 @@ out:
out_release_extents: out_release_extents:
btrfs_release_log_ctx_extents(&ctx); btrfs_release_log_ctx_extents(&ctx);
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
goto out; goto out;
} }
@ -2350,11 +2156,9 @@ out:
hole_em->start = offset; hole_em->start = offset;
hole_em->len = end - offset; hole_em->len = end - offset;
hole_em->ram_bytes = hole_em->len; hole_em->ram_bytes = hole_em->len;
hole_em->orig_start = offset;
hole_em->block_start = EXTENT_MAP_HOLE; hole_em->disk_bytenr = EXTENT_MAP_HOLE;
hole_em->block_len = 0; hole_em->disk_num_bytes = 0;
hole_em->orig_block_len = 0;
hole_em->generation = trans->transid; hole_em->generation = trans->transid;
ret = btrfs_replace_extent_map_range(inode, hole_em, true); ret = btrfs_replace_extent_map_range(inode, hole_em, true);
@ -2385,7 +2189,7 @@ static int find_first_non_hole(struct btrfs_inode *inode, u64 *start, u64 *len)
return PTR_ERR(em); return PTR_ERR(em);
/* Hole or vacuum extent(only exists in no-hole mode) */ /* Hole or vacuum extent(only exists in no-hole mode) */
if (em->block_start == EXTENT_MAP_HOLE) { if (em->disk_bytenr == EXTENT_MAP_HOLE) {
ret = 1; ret = 1;
*len = em->start + em->len > *start + *len ? *len = em->start + em->len > *start + *len ?
0 : *start + *len - em->start - em->len; 0 : *start + *len - em->start - em->len;
@ -2814,7 +2618,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
ret = btrfs_wait_ordered_range(inode, offset, len); ret = btrfs_wait_ordered_range(BTRFS_I(inode), offset, len);
if (ret) if (ret)
goto out_only_mutex; goto out_only_mutex;
@ -3042,7 +2846,7 @@ static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode,
if (IS_ERR(em)) if (IS_ERR(em))
return PTR_ERR(em); return PTR_ERR(em);
if (em->block_start == EXTENT_MAP_HOLE) if (em->disk_bytenr == EXTENT_MAP_HOLE)
ret = RANGE_BOUNDARY_HOLE; ret = RANGE_BOUNDARY_HOLE;
else if (em->flags & EXTENT_FLAG_PREALLOC) else if (em->flags & EXTENT_FLAG_PREALLOC)
ret = RANGE_BOUNDARY_PREALLOC_EXTENT; ret = RANGE_BOUNDARY_PREALLOC_EXTENT;
@ -3106,7 +2910,7 @@ static int btrfs_zero_range(struct inode *inode,
ASSERT(IS_ALIGNED(alloc_start, sectorsize)); ASSERT(IS_ALIGNED(alloc_start, sectorsize));
len = offset + len - alloc_start; len = offset + len - alloc_start;
offset = alloc_start; offset = alloc_start;
alloc_hint = em->block_start + em->len; alloc_hint = extent_map_block_start(em) + em->len;
} }
free_extent_map(em); free_extent_map(em);
@ -3124,7 +2928,7 @@ static int btrfs_zero_range(struct inode *inode,
mode); mode);
goto out; goto out;
} }
if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) { if (len < sectorsize && em->disk_bytenr != EXTENT_MAP_HOLE) {
free_extent_map(em); free_extent_map(em);
ret = btrfs_truncate_block(BTRFS_I(inode), offset, len, ret = btrfs_truncate_block(BTRFS_I(inode), offset, len,
0); 0);
@ -3309,7 +3113,7 @@ static long btrfs_fallocate(struct file *file, int mode,
* the file range and, due to the previous locking we did, we know there * the file range and, due to the previous locking we did, we know there
* can't be more delalloc or ordered extents in the range. * can't be more delalloc or ordered extents in the range.
*/ */
ret = btrfs_wait_ordered_range(inode, alloc_start, ret = btrfs_wait_ordered_range(BTRFS_I(inode), alloc_start,
alloc_end - alloc_start); alloc_end - alloc_start);
if (ret) if (ret)
goto out; goto out;
@ -3337,7 +3141,7 @@ static long btrfs_fallocate(struct file *file, int mode,
last_byte = min(extent_map_end(em), alloc_end); last_byte = min(extent_map_end(em), alloc_end);
actual_end = min_t(u64, extent_map_end(em), offset + len); actual_end = min_t(u64, extent_map_end(em), offset + len);
last_byte = ALIGN(last_byte, blocksize); last_byte = ALIGN(last_byte, blocksize);
if (em->block_start == EXTENT_MAP_HOLE || if (em->disk_bytenr == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size && (cur_offset >= inode->i_size &&
!(em->flags & EXTENT_FLAG_PREALLOC))) { !(em->flags & EXTENT_FLAG_PREALLOC))) {
const u64 range_len = last_byte - cur_offset; const u64 range_len = last_byte - cur_offset;
@ -3920,97 +3724,6 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
return generic_file_open(inode, filp); return generic_file_open(inode, filp);
} }
static int check_direct_read(struct btrfs_fs_info *fs_info,
const struct iov_iter *iter, loff_t offset)
{
int ret;
int i, seg;
ret = check_direct_IO(fs_info, iter, offset);
if (ret < 0)
return ret;
if (!iter_is_iovec(iter))
return 0;
for (seg = 0; seg < iter->nr_segs; seg++) {
for (i = seg + 1; i < iter->nr_segs; i++) {
const struct iovec *iov1 = iter_iov(iter) + seg;
const struct iovec *iov2 = iter_iov(iter) + i;
if (iov1->iov_base == iov2->iov_base)
return -EINVAL;
}
}
return 0;
}
static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
size_t prev_left = 0;
ssize_t read = 0;
ssize_t ret;
if (fsverity_active(inode))
return 0;
if (check_direct_read(inode_to_fs_info(inode), to, iocb->ki_pos))
return 0;
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
again:
/*
* This is similar to what we do for direct IO writes, see the comment
* at btrfs_direct_write(), but we also disable page faults in addition
* to disabling them only at the iov_iter level. This is because when
* reading from a hole or prealloc extent, iomap calls iov_iter_zero(),
* which can still trigger page fault ins despite having set ->nofault
* to true of our 'to' iov_iter.
*
* The difference to direct IO writes is that we deadlock when trying
* to lock the extent range in the inode's tree during he page reads
* triggered by the fault in (while for writes it is due to waiting for
* our own ordered extent). This is because for direct IO reads,
* btrfs_dio_iomap_begin() returns with the extent range locked, which
* is only unlocked in the endio callback (end_bio_extent_readpage()).
*/
pagefault_disable();
to->nofault = true;
ret = btrfs_dio_read(iocb, to, read);
to->nofault = false;
pagefault_enable();
/* No increment (+=) because iomap returns a cumulative value. */
if (ret > 0)
read = ret;
if (iov_iter_count(to) > 0 && (ret == -EFAULT || ret > 0)) {
const size_t left = iov_iter_count(to);
if (left == prev_left) {
/*
* We didn't make any progress since the last attempt,
* fallback to a buffered read for the remainder of the
* range. This is just to avoid any possibility of looping
* for too long.
*/
ret = read;
} else {
/*
* We made some progress since the last retry or this is
* the first time we are retrying. Fault in as many pages
* as possible and retry.
*/
fault_in_iov_iter_writeable(to, left);
prev_left = left;
goto again;
}
}
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
return ret < 0 ? ret : read;
}
static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{ {
ssize_t ret = 0; ssize_t ret = 0;
@ -4045,8 +3758,9 @@ const struct file_operations btrfs_file_operations = {
.fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC, .fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
}; };
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end) int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end)
{ {
struct address_space *mapping = inode->vfs_inode.i_mapping;
int ret; int ret;
/* /*
@ -4063,10 +3777,9 @@ int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
* know better and pull this out at some point in the future, it is * know better and pull this out at some point in the future, it is
* right and you are wrong. * right and you are wrong.
*/ */
ret = filemap_fdatawrite_range(inode->i_mapping, start, end); ret = filemap_fdatawrite_range(mapping, start, end);
if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags))
&BTRFS_I(inode)->runtime_flags)) ret = filemap_fdatawrite_range(mapping, start, end);
ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
return ret; return ret;
} }

View File

@ -37,12 +37,14 @@ int btrfs_release_file(struct inode *inode, struct file *file);
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes, size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached, bool noreserve); struct extent_state **cached, bool noreserve);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end);
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos, int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes, bool nowait); size_t *write_bytes, bool nowait);
void btrfs_check_nocow_unlock(struct btrfs_inode *inode); void btrfs_check_nocow_unlock(struct btrfs_inode *inode);
bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
struct extent_state **cached_state, struct extent_state **cached_state,
u64 *delalloc_start_ret, u64 *delalloc_end_ret); u64 *delalloc_start_ret, u64 *delalloc_end_ret);
int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count);
ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i);
#endif #endif

View File

@ -82,7 +82,6 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_path *path, struct btrfs_path *path,
u64 offset) u64 offset)
{ {
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key; struct btrfs_key key;
struct btrfs_key location; struct btrfs_key location;
struct btrfs_disk_key disk_key; struct btrfs_disk_key disk_key;
@ -116,7 +115,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
* sure NOFS is set to keep us from deadlocking. * sure NOFS is set to keep us from deadlocking.
*/ */
nofs_flag = memalloc_nofs_save(); nofs_flag = memalloc_nofs_save();
inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path); inode = btrfs_iget_path(location.objectid, root, path);
btrfs_release_path(path); btrfs_release_path(path);
memalloc_nofs_restore(nofs_flag); memalloc_nofs_restore(nofs_flag);
if (IS_ERR(inode)) if (IS_ERR(inode))
@ -138,7 +137,7 @@ struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
if (block_group->inode) if (block_group->inode)
inode = igrab(block_group->inode); inode = igrab(&block_group->inode->vfs_inode);
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
if (inode) if (inode)
return inode; return inode;
@ -157,7 +156,7 @@ struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
} }
if (!test_and_set_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags)) if (!test_and_set_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags))
block_group->inode = igrab(inode); block_group->inode = BTRFS_I(igrab(inode));
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
return inode; return inode;
@ -858,6 +857,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
spin_unlock(&ctl->tree_lock); spin_unlock(&ctl->tree_lock);
btrfs_err(fs_info, btrfs_err(fs_info,
"Duplicate entries in free space cache, dumping"); "Duplicate entries in free space cache, dumping");
kmem_cache_free(btrfs_free_space_bitmap_cachep, e->bitmap);
kmem_cache_free(btrfs_free_space_cachep, e); kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache; goto free_cache;
} }
@ -1268,7 +1268,7 @@ static int flush_dirty_cache(struct inode *inode)
{ {
int ret; int ret;
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); ret = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1);
if (ret) if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
EXTENT_DELALLOC, NULL); EXTENT_DELALLOC, NULL);
@ -1483,7 +1483,7 @@ static int __btrfs_write_out_cache(struct inode *inode,
io_ctl->entries = entries; io_ctl->entries = entries;
io_ctl->bitmaps = bitmaps; io_ctl->bitmaps = bitmaps;
ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); ret = btrfs_fdatawrite_range(BTRFS_I(inode), 0, (u64)-1);
if (ret) if (ret)
goto out; goto out;

View File

@ -1300,10 +1300,14 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_tree_lock(free_space_root->node); btrfs_tree_lock(free_space_root->node);
btrfs_clear_buffer_dirty(trans, free_space_root->node); btrfs_clear_buffer_dirty(trans, free_space_root->node);
btrfs_tree_unlock(free_space_root->node); btrfs_tree_unlock(free_space_root->node);
btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
free_space_root->node, 0, 1); free_space_root->node, 0, 1);
btrfs_put_root(free_space_root); btrfs_put_root(free_space_root);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
return btrfs_commit_transaction(trans); return btrfs_commit_transaction(trans);
} }

View File

@ -29,7 +29,6 @@
#include "extent-io-tree.h" #include "extent-io-tree.h"
#include "async-thread.h" #include "async-thread.h"
#include "block-rsv.h" #include "block-rsv.h"
#include "fs.h"
struct inode; struct inode;
struct super_block; struct super_block;
@ -99,7 +98,9 @@ enum {
/* The btrfs_fs_info created for self-tests */ /* The btrfs_fs_info created for self-tests */
BTRFS_FS_STATE_DUMMY_FS_INFO, BTRFS_FS_STATE_DUMMY_FS_INFO,
BTRFS_FS_STATE_NO_CSUMS, /* Checksum errors are ignored. */
BTRFS_FS_STATE_NO_DATA_CSUMS,
BTRFS_FS_STATE_SKIP_META_CSUMS,
/* Indicates there was an error cleaning up a log tree. */ /* Indicates there was an error cleaning up a log tree. */
BTRFS_FS_STATE_LOG_CLEANUP_ERROR, BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
@ -225,6 +226,8 @@ enum {
BTRFS_MOUNT_IGNOREDATACSUMS = (1UL << 28), BTRFS_MOUNT_IGNOREDATACSUMS = (1UL << 28),
BTRFS_MOUNT_NODISCARD = (1UL << 29), BTRFS_MOUNT_NODISCARD = (1UL << 29),
BTRFS_MOUNT_NOSPACECACHE = (1UL << 30), BTRFS_MOUNT_NOSPACECACHE = (1UL << 30),
BTRFS_MOUNT_IGNOREMETACSUMS = (1UL << 31),
BTRFS_MOUNT_IGNORESUPERFLAGS = (1ULL << 32),
}; };
/* /*
@ -958,7 +961,7 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
/* /*
* Count how many fs_info->max_extent_size cover the @size * Count how many fs_info->max_extent_size cover the @size
*/ */
static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size) static inline u32 count_max_extents(const struct btrfs_fs_info *fs_info, u64 size)
{ {
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (!fs_info) if (!fs_info)
@ -1019,7 +1022,7 @@ void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
#define btrfs_test_opt(fs_info, opt) ((fs_info)->mount_opt & \ #define btrfs_test_opt(fs_info, opt) ((fs_info)->mount_opt & \
BTRFS_MOUNT_##opt) BTRFS_MOUNT_##opt)
static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) static inline int btrfs_fs_closing(const struct btrfs_fs_info *fs_info)
{ {
/* Do it this way so we only ever do one test_bit in the normal case. */ /* Do it this way so we only ever do one test_bit in the normal case. */
if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) { if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) {
@ -1038,7 +1041,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
* since setting and checking for SB_RDONLY in the superblock's flags is not * since setting and checking for SB_RDONLY in the superblock's flags is not
* atomic. * atomic.
*/ */
static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) static inline int btrfs_need_cleaner_sleep(const struct btrfs_fs_info *fs_info)
{ {
return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) ||
btrfs_fs_closing(fs_info); btrfs_fs_closing(fs_info);
@ -1059,7 +1062,7 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
#define EXPORT_FOR_TESTS #define EXPORT_FOR_TESTS
static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info) static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info)
{ {
return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state); return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
} }
@ -1070,7 +1073,7 @@ void btrfs_test_destroy_inode(struct inode *inode);
#define EXPORT_FOR_TESTS static #define EXPORT_FOR_TESTS static
static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info) static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info)
{ {
return 0; return 0;
} }

View File

@ -141,8 +141,8 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0], extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
ref_objectid, name); ref_objectid, name);
if (!extref) { if (!extref) {
btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL); btrfs_abort_transaction(trans, -ENOENT);
ret = -EROFS; ret = -ENOENT;
goto out; goto out;
} }

File diff suppressed because it is too large Load Diff

View File

@ -375,15 +375,15 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
return PTR_ERR(trans); return PTR_ERR(trans);
if (comp) { if (comp) {
ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp, ret = btrfs_set_prop(trans, BTRFS_I(inode), "btrfs.compression",
strlen(comp), 0); comp, strlen(comp), 0);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
goto out_end_trans; goto out_end_trans;
} }
} else { } else {
ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, ret = btrfs_set_prop(trans, BTRFS_I(inode), "btrfs.compression",
0, 0); NULL, 0, 0);
if (ret && ret != -ENODATA) { if (ret && ret != -ENODATA) {
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
goto out_end_trans; goto out_end_trans;
@ -552,7 +552,7 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
return 0; return 0;
} }
int __pure btrfs_is_empty_uuid(u8 *uuid) int __pure btrfs_is_empty_uuid(const u8 *uuid)
{ {
int i; int i;
@ -658,15 +658,10 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
ret = PTR_ERR(trans); ret = PTR_ERR(trans);
goto out_release_rsv; goto out_release_rsv;
} }
ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
if (ret)
goto out;
btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
qgroup_reserved = 0; qgroup_reserved = 0;
trans->block_rsv = &block_rsv; trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size; trans->bytes_reserved = block_rsv.size;
/* Tree log can't currently deal with an inode which is a new root. */
btrfs_set_log_full_commit(trans);
ret = btrfs_qgroup_inherit(trans, 0, objectid, btrfs_root_id(root), inherit); ret = btrfs_qgroup_inherit(trans, 0, objectid, btrfs_root_id(root), inherit);
if (ret) if (ret)
@ -719,6 +714,8 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
ret = btrfs_insert_root(trans, fs_info->tree_root, &key, ret = btrfs_insert_root(trans, fs_info->tree_root, &key,
root_item); root_item);
if (ret) { if (ret) {
int ret2;
/* /*
* Since we don't abort the transaction in this case, free the * Since we don't abort the transaction in this case, free the
* tree block so that we don't leak space and leave the * tree block so that we don't leak space and leave the
@ -729,7 +726,9 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
btrfs_tree_lock(leaf); btrfs_tree_lock(leaf);
btrfs_clear_buffer_dirty(trans, leaf); btrfs_clear_buffer_dirty(trans, leaf);
btrfs_tree_unlock(leaf); btrfs_tree_unlock(leaf);
btrfs_free_tree_block(trans, objectid, leaf, 0, 1); ret2 = btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
if (ret2 < 0)
btrfs_abort_transaction(trans, ret2);
free_extent_buffer(leaf); free_extent_buffer(leaf);
goto out; goto out;
} }
@ -767,6 +766,8 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
goto out; goto out;
} }
btrfs_record_new_subvolume(trans, BTRFS_I(dir));
d_instantiate_new(dentry, new_inode_args.inode); d_instantiate_new(dentry, new_inode_args.inode);
new_inode_args.inode = NULL; new_inode_args.inode = NULL;
@ -854,7 +855,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
pending_snapshot->dentry = dentry; pending_snapshot->dentry = dentry;
pending_snapshot->root = root; pending_snapshot->root = root;
pending_snapshot->readonly = readonly; pending_snapshot->readonly = readonly;
pending_snapshot->dir = dir; pending_snapshot->dir = BTRFS_I(dir);
pending_snapshot->inherit = inherit; pending_snapshot->inherit = inherit;
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
@ -1070,7 +1071,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
atomic_inc(&root->snapshot_force_cow); atomic_inc(&root->snapshot_force_cow);
snapshot_force_cow = true; snapshot_force_cow = true;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); btrfs_wait_ordered_extents(root, U64_MAX, NULL);
ret = btrfs_mksubvol(parent, idmap, name, namelen, ret = btrfs_mksubvol(parent, idmap, name, namelen,
root, readonly, inherit); root, readonly, inherit);
@ -1917,8 +1918,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
struct btrfs_ioctl_ino_lookup_user_args *args) struct btrfs_ioctl_ino_lookup_user_args *args)
{ {
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct super_block *sb = inode->i_sb; u64 upper_limit = btrfs_ino(BTRFS_I(inode));
struct btrfs_key upper_limit = BTRFS_I(inode)->location;
u64 treeid = btrfs_root_id(BTRFS_I(inode)->root); u64 treeid = btrfs_root_id(BTRFS_I(inode)->root);
u64 dirid = args->dirid; u64 dirid = args->dirid;
unsigned long item_off; unsigned long item_off;
@ -1944,7 +1944,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
* If the bottom subvolume does not exist directly under upper_limit, * If the bottom subvolume does not exist directly under upper_limit,
* construct the path in from the bottom up. * construct the path in from the bottom up.
*/ */
if (dirid != upper_limit.objectid) { if (dirid != upper_limit) {
ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1]; ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
root = btrfs_get_fs_root(fs_info, treeid, true); root = btrfs_get_fs_root(fs_info, treeid, true);
@ -2006,7 +2006,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
* btree and lock the same leaf. * btree and lock the same leaf.
*/ */
btrfs_release_path(path); btrfs_release_path(path);
temp_inode = btrfs_iget(sb, key2.objectid, root); temp_inode = btrfs_iget(key2.objectid, root);
if (IS_ERR(temp_inode)) { if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode); ret = PTR_ERR(temp_inode);
goto out_put; goto out_put;
@ -2019,7 +2019,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
goto out_put; goto out_put;
} }
if (key.offset == upper_limit.objectid) if (key.offset == upper_limit)
break; break;
if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) { if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
ret = -EACCES; ret = -EACCES;
@ -2140,7 +2140,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
inode = file_inode(file); inode = file_inode(file);
if (args->dirid == BTRFS_FIRST_FREE_OBJECTID && if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) { btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
/* /*
* The subvolume does not exist under fd with which this is * The subvolume does not exist under fd with which this is
* called * called
@ -3807,12 +3807,29 @@ drop_write:
return ret; return ret;
} }
/*
* Quick check for ioctl handlers if quotas are enabled. Proper locking must be
* done before any operations.
*/
static bool qgroup_enabled(struct btrfs_fs_info *fs_info)
{
bool ret = true;
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
ret = false;
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_qgroup_assign_args *sa; struct btrfs_ioctl_qgroup_assign_args *sa;
struct btrfs_qgroup_list *prealloc = NULL;
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
int ret; int ret;
int err; int err;
@ -3820,6 +3837,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (!qgroup_enabled(root->fs_info))
return -ENOTCONN;
ret = mnt_want_write_file(file); ret = mnt_want_write_file(file);
if (ret) if (ret)
return ret; return ret;
@ -3830,14 +3850,27 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
goto drop_write; goto drop_write;
} }
if (sa->assign) {
prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
if (!prealloc) {
ret = -ENOMEM;
goto drop_write;
}
}
trans = btrfs_join_transaction(root); trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
ret = PTR_ERR(trans); ret = PTR_ERR(trans);
goto out; goto out;
} }
/*
* Prealloc ownership is moved to the relation handler, there it's used
* or freed on error.
*/
if (sa->assign) { if (sa->assign) {
ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst); ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst, prealloc);
prealloc = NULL;
} else { } else {
ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst); ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst);
} }
@ -3847,13 +3880,15 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
err = btrfs_run_qgroups(trans); err = btrfs_run_qgroups(trans);
mutex_unlock(&fs_info->qgroup_ioctl_lock); mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (err < 0) if (err < 0)
btrfs_handle_fs_error(fs_info, err, btrfs_warn(fs_info,
"failed to update qgroup status and info"); "qgroup status update failed after %s relation, marked as inconsistent",
sa->assign ? "adding" : "deleting");
err = btrfs_end_transaction(trans); err = btrfs_end_transaction(trans);
if (err && !ret) if (err && !ret)
ret = err; ret = err;
out: out:
kfree(prealloc);
kfree(sa); kfree(sa);
drop_write: drop_write:
mnt_drop_write_file(file); mnt_drop_write_file(file);
@ -3872,6 +3907,9 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (!qgroup_enabled(root->fs_info))
return -ENOTCONN;
ret = mnt_want_write_file(file); ret = mnt_want_write_file(file);
if (ret) if (ret)
return ret; return ret;
@ -3928,6 +3966,9 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (!qgroup_enabled(root->fs_info))
return -ENOTCONN;
ret = mnt_want_write_file(file); ret = mnt_want_write_file(file);
if (ret) if (ret)
return ret; return ret;
@ -3973,6 +4014,9 @@ static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (!qgroup_enabled(fs_info))
return -ENOTCONN;
ret = mnt_want_write_file(file); ret = mnt_want_write_file(file);
if (ret) if (ret)
return ret; return ret;
@ -4429,7 +4473,7 @@ out_drop_write:
return ret; return ret;
} }
static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat) static int _btrfs_ioctl_send(struct btrfs_inode *inode, void __user *argp, bool compat)
{ {
struct btrfs_ioctl_send_args *arg; struct btrfs_ioctl_send_args *arg;
int ret; int ret;
@ -4751,10 +4795,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_received_subvol_32(file, argp); return btrfs_ioctl_set_received_subvol_32(file, argp);
#endif #endif
case BTRFS_IOC_SEND: case BTRFS_IOC_SEND:
return _btrfs_ioctl_send(inode, argp, false); return _btrfs_ioctl_send(BTRFS_I(inode), argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_SEND_32: case BTRFS_IOC_SEND_32:
return _btrfs_ioctl_send(inode, argp, true); return _btrfs_ioctl_send(BTRFS_I(inode), argp, true);
#endif #endif
case BTRFS_IOC_GET_DEV_STATS: case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(fs_info, argp); return btrfs_ioctl_get_dev_stats(fs_info, argp);

View File

@ -19,7 +19,7 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa); struct dentry *dentry, struct fileattr *fa);
int btrfs_ioctl_get_supported_features(void __user *arg); int btrfs_ioctl_get_supported_features(void __user *arg);
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode); void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
int __pure btrfs_is_empty_uuid(u8 *uuid); int __pure btrfs_is_empty_uuid(const u8 *uuid);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs); struct btrfs_ioctl_balance_args *bargs);

View File

@ -11,7 +11,6 @@
#include <linux/lockdep.h> #include <linux/lockdep.h>
#include <linux/percpu_counter.h> #include <linux/percpu_counter.h>
#include "extent_io.h" #include "extent_io.h"
#include "locking.h"
struct extent_buffer; struct extent_buffer;
struct btrfs_path; struct btrfs_path;

View File

@ -6,7 +6,6 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/maple_tree.h> #include <linux/maple_tree.h>
#include <linux/list.h> #include <linux/list.h>
#include "lru_cache.h"
/* /*
* A cache entry. This is meant to be embedded in a structure of a user of * A cache entry. This is meant to be embedded in a structure of a user of

View File

@ -258,8 +258,8 @@ int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
workspace->cbuf, &out_len, workspace->cbuf, &out_len,
workspace->mem); workspace->mem);
kunmap_local(data_in); kunmap_local(data_in);
if (ret < 0) { if (unlikely(ret < 0)) {
pr_debug("BTRFS: lzo in loop returned %d\n", ret); /* lzo1x_1_compress never fails. */
ret = -EIO; ret = -EIO;
goto out; goto out;
} }
@ -354,11 +354,14 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
* and all sectors should be used. * and all sectors should be used.
* If this happens, it means the compressed extent is corrupted. * If this happens, it means the compressed extent is corrupted.
*/ */
if (len_in > min_t(size_t, BTRFS_MAX_COMPRESSED, cb->compressed_len) || if (unlikely(len_in > min_t(size_t, BTRFS_MAX_COMPRESSED, cb->compressed_len) ||
round_up(len_in, sectorsize) < cb->compressed_len) { round_up(len_in, sectorsize) < cb->compressed_len)) {
struct btrfs_inode *inode = cb->bbio.inode;
btrfs_err(fs_info, btrfs_err(fs_info,
"invalid lzo header, lzo len %u compressed len %u", "lzo header invalid, root %llu inode %llu offset %llu lzo len %u compressed len %u",
len_in, cb->compressed_len); btrfs_root_id(inode->root), btrfs_ino(inode),
cb->start, len_in, cb->compressed_len);
return -EUCLEAN; return -EUCLEAN;
} }
@ -383,13 +386,17 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
kunmap_local(kaddr); kunmap_local(kaddr);
cur_in += LZO_LEN; cur_in += LZO_LEN;
if (seg_len > WORKSPACE_CBUF_LENGTH) { if (unlikely(seg_len > WORKSPACE_CBUF_LENGTH)) {
struct btrfs_inode *inode = cb->bbio.inode;
/* /*
* seg_len shouldn't be larger than we have allocated * seg_len shouldn't be larger than we have allocated
* for workspace->cbuf * for workspace->cbuf
*/ */
btrfs_err(fs_info, "unexpectedly large lzo segment len %u", btrfs_err(fs_info,
seg_len); "lzo segment too big, root %llu inode %llu offset %llu len %u",
btrfs_root_id(inode->root), btrfs_ino(inode),
cb->start, seg_len);
return -EIO; return -EIO;
} }
@ -399,8 +406,13 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
/* Decompress the data */ /* Decompress the data */
ret = lzo1x_decompress_safe(workspace->cbuf, seg_len, ret = lzo1x_decompress_safe(workspace->cbuf, seg_len,
workspace->buf, &out_len); workspace->buf, &out_len);
if (ret != LZO_E_OK) { if (unlikely(ret != LZO_E_OK)) {
btrfs_err(fs_info, "failed to decompress"); struct btrfs_inode *inode = cb->bbio.inode;
btrfs_err(fs_info,
"lzo decompression failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
cb->start);
return -EIO; return -EIO;
} }
@ -454,8 +466,13 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
out_len = sectorsize; out_len = sectorsize;
ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
if (ret != LZO_E_OK) { if (unlikely(ret != LZO_E_OK)) {
pr_warn("BTRFS: decompress failed!\n"); struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
btrfs_err(fs_info,
"lzo decompression failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
page_offset(dest_page));
ret = -EIO; ret = -EIO;
goto out; goto out;
} }

View File

@ -20,7 +20,8 @@ static const char fs_state_chars[] = {
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A', [BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
[BTRFS_FS_STATE_DEV_REPLACING] = 'R', [BTRFS_FS_STATE_DEV_REPLACING] = 'R',
[BTRFS_FS_STATE_DUMMY_FS_INFO] = 0, [BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
[BTRFS_FS_STATE_NO_CSUMS] = 'C', [BTRFS_FS_STATE_NO_DATA_CSUMS] = 'C',
[BTRFS_FS_STATE_SKIP_META_CSUMS] = 'S',
[BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L', [BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
}; };

View File

@ -66,7 +66,7 @@ struct rb_simple_node {
u64 bytenr; u64 bytenr;
}; };
static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr) static inline struct rb_node *rb_simple_search(const struct rb_root *root, u64 bytenr)
{ {
struct rb_node *node = root->rb_node; struct rb_node *node = root->rb_node;
struct rb_simple_node *entry; struct rb_simple_node *entry;
@ -93,7 +93,7 @@ static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
* Return the rb_node that start at or after @bytenr. If there is no entry at * Return the rb_node that start at or after @bytenr. If there is no entry at
* or after @bytner return NULL. * or after @bytner return NULL.
*/ */
static inline struct rb_node *rb_simple_search_first(struct rb_root *root, static inline struct rb_node *rb_simple_search_first(const struct rb_root *root,
u64 bytenr) u64 bytenr)
{ {
struct rb_node *node = root->rb_node, *ret = NULL; struct rb_node *node = root->rb_node, *ret = NULL;

View File

@ -19,6 +19,7 @@
#include "qgroup.h" #include "qgroup.h"
#include "subpage.h" #include "subpage.h"
#include "file.h" #include "file.h"
#include "block-group.h"
static struct kmem_cache *btrfs_ordered_extent_cache; static struct kmem_cache *btrfs_ordered_extent_cache;
@ -179,7 +180,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
entry->disk_num_bytes = disk_num_bytes; entry->disk_num_bytes = disk_num_bytes;
entry->offset = offset; entry->offset = offset;
entry->bytes_left = num_bytes; entry->bytes_left = num_bytes;
entry->inode = igrab(&inode->vfs_inode); entry->inode = BTRFS_I(igrab(&inode->vfs_inode));
entry->compress_type = compress_type; entry->compress_type = compress_type;
entry->truncated_len = (u64)-1; entry->truncated_len = (u64)-1;
entry->qgroup_rsv = qgroup_rsv; entry->qgroup_rsv = qgroup_rsv;
@ -207,7 +208,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
static void insert_ordered_extent(struct btrfs_ordered_extent *entry) static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
{ {
struct btrfs_inode *inode = BTRFS_I(entry->inode); struct btrfs_inode *inode = entry->inode;
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
struct rb_node *node; struct rb_node *node;
@ -223,7 +224,7 @@ static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
spin_lock_irq(&inode->ordered_tree_lock); spin_lock_irq(&inode->ordered_tree_lock);
node = tree_insert(&inode->ordered_tree, entry->file_offset, node = tree_insert(&inode->ordered_tree, entry->file_offset,
&entry->rb_node); &entry->rb_node);
if (node) if (unlikely(node))
btrfs_panic(fs_info, -EEXIST, btrfs_panic(fs_info, -EEXIST,
"inconsistency in ordered tree at offset %llu", "inconsistency in ordered tree at offset %llu",
entry->file_offset); entry->file_offset);
@ -263,17 +264,39 @@ static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
*/ */
struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
struct btrfs_inode *inode, u64 file_offset, struct btrfs_inode *inode, u64 file_offset,
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, const struct btrfs_file_extent *file_extent, unsigned long flags)
u64 disk_num_bytes, u64 offset, unsigned long flags,
int compress_type)
{ {
struct btrfs_ordered_extent *entry; struct btrfs_ordered_extent *entry;
ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0); ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes, /*
disk_bytenr, disk_num_bytes, offset, flags, * For regular writes, we just use the members in @file_extent.
compress_type); *
* For NOCOW, we don't really care about the numbers except @start and
* file_extent->num_bytes, as we won't insert a file extent item at all.
*
* For PREALLOC, we do not use ordered extent members, but
* btrfs_mark_extent_written() handles everything.
*
* So here we always pass 0 as offset for NOCOW/PREALLOC ordered extents,
* or btrfs_split_ordered_extent() cannot handle it correctly.
*/
if (flags & ((1U << BTRFS_ORDERED_NOCOW) | (1U << BTRFS_ORDERED_PREALLOC)))
entry = alloc_ordered_extent(inode, file_offset,
file_extent->num_bytes,
file_extent->num_bytes,
file_extent->disk_bytenr + file_extent->offset,
file_extent->num_bytes, 0, flags,
file_extent->compression);
else
entry = alloc_ordered_extent(inode, file_offset,
file_extent->num_bytes,
file_extent->ram_bytes,
file_extent->disk_bytenr,
file_extent->disk_num_bytes,
file_extent->offset, flags,
file_extent->compression);
if (!IS_ERR(entry)) if (!IS_ERR(entry))
insert_ordered_extent(entry); insert_ordered_extent(entry);
return entry; return entry;
@ -287,7 +310,7 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum) struct btrfs_ordered_sum *sum)
{ {
struct btrfs_inode *inode = BTRFS_I(entry->inode); struct btrfs_inode *inode = entry->inode;
spin_lock_irq(&inode->ordered_tree_lock); spin_lock_irq(&inode->ordered_tree_lock);
list_add_tail(&sum->list, &entry->list); list_add_tail(&sum->list, &entry->list);
@ -297,7 +320,7 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
void btrfs_mark_ordered_extent_error(struct btrfs_ordered_extent *ordered) void btrfs_mark_ordered_extent_error(struct btrfs_ordered_extent *ordered)
{ {
if (!test_and_set_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) if (!test_and_set_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
mapping_set_error(ordered->inode->i_mapping, -EIO); mapping_set_error(ordered->inode->vfs_inode.i_mapping, -EIO);
} }
static void finish_ordered_fn(struct btrfs_work *work) static void finish_ordered_fn(struct btrfs_work *work)
@ -312,7 +335,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
struct page *page, u64 file_offset, struct page *page, u64 file_offset,
u64 len, bool uptodate) u64 len, bool uptodate)
{ {
struct btrfs_inode *inode = BTRFS_I(ordered->inode); struct btrfs_inode *inode = ordered->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
lockdep_assert_held(&inode->ordered_tree_lock); lockdep_assert_held(&inode->ordered_tree_lock);
@ -365,7 +388,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered) static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
{ {
struct btrfs_inode *inode = BTRFS_I(ordered->inode); struct btrfs_inode *inode = ordered->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ? struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ?
fs_info->endio_freespace_worker : fs_info->endio_write_workers; fs_info->endio_freespace_worker : fs_info->endio_write_workers;
@ -374,11 +397,11 @@ static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
btrfs_queue_work(wq, &ordered->work); btrfs_queue_work(wq, &ordered->work);
} }
bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered, void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
struct page *page, u64 file_offset, u64 len, struct page *page, u64 file_offset, u64 len,
bool uptodate) bool uptodate)
{ {
struct btrfs_inode *inode = BTRFS_I(ordered->inode); struct btrfs_inode *inode = ordered->inode;
unsigned long flags; unsigned long flags;
bool ret; bool ret;
@ -421,7 +444,6 @@ bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
if (ret) if (ret)
btrfs_queue_ordered_fn(ordered); btrfs_queue_ordered_fn(ordered);
return ret;
} }
/* /*
@ -588,14 +610,14 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
struct list_head *cur; struct list_head *cur;
struct btrfs_ordered_sum *sum; struct btrfs_ordered_sum *sum;
trace_btrfs_ordered_extent_put(BTRFS_I(entry->inode), entry); trace_btrfs_ordered_extent_put(entry->inode, entry);
if (refcount_dec_and_test(&entry->refs)) { if (refcount_dec_and_test(&entry->refs)) {
ASSERT(list_empty(&entry->root_extent_list)); ASSERT(list_empty(&entry->root_extent_list));
ASSERT(list_empty(&entry->log_list)); ASSERT(list_empty(&entry->log_list));
ASSERT(RB_EMPTY_NODE(&entry->rb_node)); ASSERT(RB_EMPTY_NODE(&entry->rb_node));
if (entry->inode) if (entry->inode)
btrfs_add_delayed_iput(BTRFS_I(entry->inode)); btrfs_add_delayed_iput(entry->inode);
while (!list_empty(&entry->list)) { while (!list_empty(&entry->list)) {
cur = entry->list.next; cur = entry->list.next;
sum = list_entry(cur, struct btrfs_ordered_sum, list); sum = list_entry(cur, struct btrfs_ordered_sum, list);
@ -626,7 +648,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
freespace_inode = btrfs_is_free_space_inode(btrfs_inode); freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered); btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
/* This is paired with btrfs_alloc_ordered_extent. */ /* This is paired with alloc_ordered_extent(). */
spin_lock(&btrfs_inode->lock); spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1); btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock); spin_unlock(&btrfs_inode->lock);
@ -712,11 +734,11 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
} }
/* /*
* wait for all the ordered extents in a root. This is done when balancing * Wait for all the ordered extents in a root. Use @bg as range or do whole
* space between drives. * range if it's NULL.
*/ */
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
const u64 range_start, const u64 range_len) const struct btrfs_block_group *bg)
{ {
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
LIST_HEAD(splice); LIST_HEAD(splice);
@ -724,7 +746,17 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
LIST_HEAD(works); LIST_HEAD(works);
struct btrfs_ordered_extent *ordered, *next; struct btrfs_ordered_extent *ordered, *next;
u64 count = 0; u64 count = 0;
const u64 range_end = range_start + range_len; u64 range_start, range_len;
u64 range_end;
if (bg) {
range_start = bg->start;
range_len = bg->length;
} else {
range_start = 0;
range_len = U64_MAX;
}
range_end = range_start + range_len;
mutex_lock(&root->ordered_extent_mutex); mutex_lock(&root->ordered_extent_mutex);
spin_lock(&root->ordered_extent_lock); spin_lock(&root->ordered_extent_lock);
@ -751,10 +783,10 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work); btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);
cond_resched(); cond_resched();
spin_lock(&root->ordered_extent_lock);
if (nr != U64_MAX) if (nr != U64_MAX)
nr--; nr--;
count++; count++;
spin_lock(&root->ordered_extent_lock);
} }
list_splice_tail(&skipped, &root->ordered_extents); list_splice_tail(&skipped, &root->ordered_extents);
list_splice_tail(&splice, &root->ordered_extents); list_splice_tail(&splice, &root->ordered_extents);
@ -771,8 +803,12 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
return count; return count;
} }
/*
* Wait for @nr ordered extents that intersect the @bg, or the whole range of
* the filesystem if @bg is NULL.
*/
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
const u64 range_start, const u64 range_len) const struct btrfs_block_group *bg)
{ {
struct btrfs_root *root; struct btrfs_root *root;
LIST_HEAD(splice); LIST_HEAD(splice);
@ -790,14 +826,13 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
&fs_info->ordered_roots); &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock); spin_unlock(&fs_info->ordered_root_lock);
done = btrfs_wait_ordered_extents(root, nr, done = btrfs_wait_ordered_extents(root, nr, bg);
range_start, range_len);
btrfs_put_root(root); btrfs_put_root(root);
spin_lock(&fs_info->ordered_root_lock); if (nr != U64_MAX)
if (nr != U64_MAX) {
nr -= done; nr -= done;
}
spin_lock(&fs_info->ordered_root_lock);
} }
list_splice_tail(&splice, &fs_info->ordered_roots); list_splice_tail(&splice, &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock); spin_unlock(&fs_info->ordered_root_lock);
@ -814,7 +849,7 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry)
{ {
u64 start = entry->file_offset; u64 start = entry->file_offset;
u64 end = start + entry->num_bytes - 1; u64 end = start + entry->num_bytes - 1;
struct btrfs_inode *inode = BTRFS_I(entry->inode); struct btrfs_inode *inode = entry->inode;
bool freespace_inode; bool freespace_inode;
trace_btrfs_ordered_extent_start(inode, entry); trace_btrfs_ordered_extent_start(inode, entry);
@ -841,7 +876,7 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry)
/* /*
* Used to wait on ordered extents across a large range of bytes. * Used to wait on ordered extents across a large range of bytes.
*/ */
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) int btrfs_wait_ordered_range(struct btrfs_inode *inode, u64 start, u64 len)
{ {
int ret = 0; int ret = 0;
int ret_wb = 0; int ret_wb = 0;
@ -871,11 +906,11 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
* before the ordered extents complete - to avoid failures (-EEXIST) * before the ordered extents complete - to avoid failures (-EEXIST)
* when adding the new ordered extents to the ordered tree. * when adding the new ordered extents to the ordered tree.
*/ */
ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end); ret_wb = filemap_fdatawait_range(inode->vfs_inode.i_mapping, start, orig_end);
end = orig_end; end = orig_end;
while (1) { while (1) {
ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), end); ordered = btrfs_lookup_first_ordered_extent(inode, end);
if (!ordered) if (!ordered)
break; break;
if (ordered->file_offset > orig_end) { if (ordered->file_offset > orig_end) {
@ -1173,7 +1208,7 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
struct btrfs_ordered_extent *btrfs_split_ordered_extent( struct btrfs_ordered_extent *btrfs_split_ordered_extent(
struct btrfs_ordered_extent *ordered, u64 len) struct btrfs_ordered_extent *ordered, u64 len)
{ {
struct btrfs_inode *inode = BTRFS_I(ordered->inode); struct btrfs_inode *inode = ordered->inode;
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
u64 file_offset = ordered->file_offset; u64 file_offset = ordered->file_offset;
@ -1212,15 +1247,32 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
/* One ref for the tree. */ /* One ref for the tree. */
refcount_inc(&new->refs); refcount_inc(&new->refs);
/*
* Take the root's ordered_extent_lock to avoid a race with
* btrfs_wait_ordered_extents() when updating the disk_bytenr and
* disk_num_bytes fields of the ordered extent below. And we disable
* IRQs because the inode's ordered_tree_lock is used in IRQ context
* elsewhere.
*
* There's no concern about a previous caller of
* btrfs_wait_ordered_extents() getting the trimmed ordered extent
* before we insert the new one, because even if it gets the ordered
* extent before it's trimmed and the new one inserted, right before it
* uses it or during its use, the ordered extent might have been
* trimmed in the meanwhile, and it missed the new ordered extent.
* There's no way around this and it's harmless for current use cases,
* so we take the root's ordered_extent_lock to fix that race during
* trimming and silence tools like KCSAN.
*/
spin_lock_irq(&root->ordered_extent_lock); spin_lock_irq(&root->ordered_extent_lock);
spin_lock(&inode->ordered_tree_lock); spin_lock(&inode->ordered_tree_lock);
/* Remove from tree once */
node = &ordered->rb_node;
rb_erase(node, &inode->ordered_tree);
RB_CLEAR_NODE(node);
if (inode->ordered_tree_last == node)
inode->ordered_tree_last = NULL;
/*
* We don't have overlapping ordered extents (that would imply double
* allocation of extents) and we checked above that the split length
* does not cross the ordered extent's num_bytes field, so there's
* no need to remove it and re-insert it in the tree.
*/
ordered->file_offset += len; ordered->file_offset += len;
ordered->disk_bytenr += len; ordered->disk_bytenr += len;
ordered->num_bytes -= len; ordered->num_bytes -= len;
@ -1250,18 +1302,10 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
offset += sum->len; offset += sum->len;
} }
/* Re-insert the node */
node = tree_insert(&inode->ordered_tree, ordered->file_offset,
&ordered->rb_node);
if (node)
btrfs_panic(fs_info, -EEXIST,
"zoned: inconsistency in ordered tree at offset %llu",
ordered->file_offset);
node = tree_insert(&inode->ordered_tree, new->file_offset, &new->rb_node); node = tree_insert(&inode->ordered_tree, new->file_offset, &new->rb_node);
if (node) if (unlikely(node))
btrfs_panic(fs_info, -EEXIST, btrfs_panic(fs_info, -EEXIST,
"zoned: inconsistency in ordered tree at offset %llu", "inconsistency in ordered tree at offset %llu after split",
new->file_offset); new->file_offset);
spin_unlock(&inode->ordered_tree_lock); spin_unlock(&inode->ordered_tree_lock);

View File

@ -130,7 +130,7 @@ struct btrfs_ordered_extent {
refcount_t refs; refcount_t refs;
/* the inode we belong to */ /* the inode we belong to */
struct inode *inode; struct btrfs_inode *inode;
/* list of checksums for insertion when the extent io is done */ /* list of checksums for insertion when the extent io is done */
struct list_head list; struct list_head list;
@ -162,7 +162,7 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
struct btrfs_ordered_extent *entry); struct btrfs_ordered_extent *entry);
bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered, void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
struct page *page, u64 file_offset, u64 len, struct page *page, u64 file_offset, u64 len,
bool uptodate); bool uptodate);
void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
@ -171,17 +171,28 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode, bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached, struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size); u64 file_offset, u64 io_size);
/*
* This represents details about the target file extent item of a write operation.
*/
struct btrfs_file_extent {
u64 disk_bytenr;
u64 disk_num_bytes;
u64 num_bytes;
u64 ram_bytes;
u64 offset;
u8 compression;
};
struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
struct btrfs_inode *inode, u64 file_offset, struct btrfs_inode *inode, u64 file_offset,
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, const struct btrfs_file_extent *file_extent, unsigned long flags);
u64 disk_num_bytes, u64 offset, unsigned long flags,
int compress_type);
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum); struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode, struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
u64 file_offset); u64 file_offset);
void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry); void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry);
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); int btrfs_wait_ordered_range(struct btrfs_inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent * struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset); btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset);
struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range( struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range(
@ -193,9 +204,9 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode, void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
struct list_head *list); struct list_head *list);
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
const u64 range_start, const u64 range_len); const struct btrfs_block_group *bg);
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
const u64 range_start, const u64 range_len); const struct btrfs_block_group *bg);
void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
u64 end, u64 end,
struct extent_state **cached_state); struct extent_state **cached_state);

View File

@ -109,7 +109,7 @@ static void print_extent_item(const struct extent_buffer *eb, int slot, int type
btrfs_err(eb->fs_info, btrfs_err(eb->fs_info,
"unexpected extent item size, has %u expect >= %zu", "unexpected extent item size, has %u expect >= %zu",
item_size, sizeof(*ei)); item_size, sizeof(*ei));
btrfs_handle_fs_error(eb->fs_info, -EUCLEAN, NULL); return;
} }
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
@ -208,11 +208,6 @@ static void print_raid_stripe_key(const struct extent_buffer *eb, u32 item_size,
struct btrfs_stripe_extent *stripe) struct btrfs_stripe_extent *stripe)
{ {
const int num_stripes = btrfs_num_raid_stripes(item_size); const int num_stripes = btrfs_num_raid_stripes(item_size);
const u8 encoding = btrfs_stripe_extent_encoding(eb, stripe);
pr_info("\t\t\tencoding: %s\n",
(encoding && encoding < BTRFS_NR_RAID_TYPES) ?
btrfs_raid_array[encoding].raid_name : "unknown");
for (int i = 0; i < num_stripes; i++) for (int i = 0; i < num_stripes; i++)
pr_info("\t\t\tstride %d devid %llu physical %llu\n", pr_info("\t\t\tstride %d devid %llu physical %llu\n",
@ -310,6 +305,9 @@ void btrfs_print_leaf(const struct extent_buffer *l)
case BTRFS_EXTENT_DATA_KEY: case BTRFS_EXTENT_DATA_KEY:
fi = btrfs_item_ptr(l, i, fi = btrfs_item_ptr(l, i,
struct btrfs_file_extent_item); struct btrfs_file_extent_item);
pr_info("\t\tgeneration %llu type %hhu\n",
btrfs_file_extent_generation(l, fi),
btrfs_file_extent_type(l, fi));
if (btrfs_file_extent_type(l, fi) == if (btrfs_file_extent_type(l, fi) ==
BTRFS_FILE_EXTENT_INLINE) { BTRFS_FILE_EXTENT_INLINE) {
pr_info("\t\tinline extent data size %llu\n", pr_info("\t\tinline extent data size %llu\n",

View File

@ -27,7 +27,7 @@ struct prop_handler {
int (*validate)(const struct btrfs_inode *inode, const char *value, int (*validate)(const struct btrfs_inode *inode, const char *value,
size_t len); size_t len);
int (*apply)(struct inode *inode, const char *value, size_t len); int (*apply)(struct inode *inode, const char *value, size_t len);
const char *(*extract)(struct inode *inode); const char *(*extract)(const struct inode *inode);
bool (*ignore)(const struct btrfs_inode *inode); bool (*ignore)(const struct btrfs_inode *inode);
int inheritable; int inheritable;
}; };
@ -104,7 +104,7 @@ bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name)
return handler->ignore(inode); return handler->ignore(inode);
} }
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, int btrfs_set_prop(struct btrfs_trans_handle *trans, struct btrfs_inode *inode,
const char *name, const char *value, size_t value_len, const char *name, const char *value, size_t value_len,
int flags) int flags)
{ {
@ -116,29 +116,29 @@ int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
return -EINVAL; return -EINVAL;
if (value_len == 0) { if (value_len == 0) {
ret = btrfs_setxattr(trans, inode, handler->xattr_name, ret = btrfs_setxattr(trans, &inode->vfs_inode, handler->xattr_name,
NULL, 0, flags); NULL, 0, flags);
if (ret) if (ret)
return ret; return ret;
ret = handler->apply(inode, NULL, 0); ret = handler->apply(&inode->vfs_inode, NULL, 0);
ASSERT(ret == 0); ASSERT(ret == 0);
return ret; return ret;
} }
ret = btrfs_setxattr(trans, inode, handler->xattr_name, value, ret = btrfs_setxattr(trans, &inode->vfs_inode, handler->xattr_name, value,
value_len, flags); value_len, flags);
if (ret) if (ret)
return ret; return ret;
ret = handler->apply(inode, value, value_len); ret = handler->apply(&inode->vfs_inode, value, value_len);
if (ret) { if (ret) {
btrfs_setxattr(trans, inode, handler->xattr_name, NULL, btrfs_setxattr(trans, &inode->vfs_inode, handler->xattr_name, NULL,
0, flags); 0, flags);
return ret; return ret;
} }
set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags); set_bit(BTRFS_INODE_HAS_PROPS, &inode->runtime_flags);
return 0; return 0;
} }
@ -359,7 +359,7 @@ static bool prop_compression_ignore(const struct btrfs_inode *inode)
return false; return false;
} }
static const char *prop_compression_extract(struct inode *inode) static const char *prop_compression_extract(const struct inode *inode)
{ {
switch (BTRFS_I(inode)->prop_compress) { switch (BTRFS_I(inode)->prop_compress) {
case BTRFS_COMPRESS_ZLIB: case BTRFS_COMPRESS_ZLIB:
@ -385,7 +385,7 @@ static struct prop_handler prop_handlers[] = {
}; };
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans, int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *parent) struct inode *inode, const struct inode *parent)
{ {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;

View File

@ -15,7 +15,7 @@ struct btrfs_trans_handle;
int __init btrfs_props_init(void); int __init btrfs_props_init(void);
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode, int btrfs_set_prop(struct btrfs_trans_handle *trans, struct btrfs_inode *inode,
const char *name, const char *value, size_t value_len, const char *name, const char *value, size_t value_len,
int flags); int flags);
int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name, int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name,
@ -26,6 +26,6 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans, int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *inode,
struct inode *dir); const struct inode *dir);
#endif #endif

View File

@ -30,7 +30,7 @@
#include "root-tree.h" #include "root-tree.h"
#include "tree-checker.h" #include "tree-checker.h"
enum btrfs_qgroup_mode btrfs_qgroup_mode(struct btrfs_fs_info *fs_info) enum btrfs_qgroup_mode btrfs_qgroup_mode(const struct btrfs_fs_info *fs_info)
{ {
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
return BTRFS_QGROUP_MODE_DISABLED; return BTRFS_QGROUP_MODE_DISABLED;
@ -39,12 +39,12 @@ enum btrfs_qgroup_mode btrfs_qgroup_mode(struct btrfs_fs_info *fs_info)
return BTRFS_QGROUP_MODE_FULL; return BTRFS_QGROUP_MODE_FULL;
} }
bool btrfs_qgroup_enabled(struct btrfs_fs_info *fs_info) bool btrfs_qgroup_enabled(const struct btrfs_fs_info *fs_info)
{ {
return btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_DISABLED; return btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_DISABLED;
} }
bool btrfs_qgroup_full_accounting(struct btrfs_fs_info *fs_info) bool btrfs_qgroup_full_accounting(const struct btrfs_fs_info *fs_info)
{ {
return btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL; return btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL;
} }
@ -107,7 +107,7 @@ static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info, static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *dest, struct btrfs_qgroup *dest,
struct btrfs_qgroup *src) const struct btrfs_qgroup *src)
{ {
int i; int i;
@ -117,7 +117,7 @@ static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info, static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup *dest, struct btrfs_qgroup *dest,
struct btrfs_qgroup *src) const struct btrfs_qgroup *src)
{ {
int i; int i;
@ -141,37 +141,27 @@ static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
qg->new_refcnt += mod; qg->new_refcnt += mod;
} }
static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) static inline u64 btrfs_qgroup_get_old_refcnt(const struct btrfs_qgroup *qg, u64 seq)
{ {
if (qg->old_refcnt < seq) if (qg->old_refcnt < seq)
return 0; return 0;
return qg->old_refcnt - seq; return qg->old_refcnt - seq;
} }
static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) static inline u64 btrfs_qgroup_get_new_refcnt(const struct btrfs_qgroup *qg, u64 seq)
{ {
if (qg->new_refcnt < seq) if (qg->new_refcnt < seq)
return 0; return 0;
return qg->new_refcnt - seq; return qg->new_refcnt - seq;
} }
/*
* glue structure to represent the relations between qgroups.
*/
struct btrfs_qgroup_list {
struct list_head next_group;
struct list_head next_member;
struct btrfs_qgroup *group;
struct btrfs_qgroup *member;
};
static int static int
qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
int init_flags); int init_flags);
static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
/* must be called with qgroup_ioctl_lock held */ /* must be called with qgroup_ioctl_lock held */
static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, static struct btrfs_qgroup *find_qgroup_rb(const struct btrfs_fs_info *fs_info,
u64 qgroupid) u64 qgroupid)
{ {
struct rb_node *n = fs_info->qgroup_tree.rb_node; struct rb_node *n = fs_info->qgroup_tree.rb_node;
@ -346,7 +336,7 @@ static int del_relation_rb(struct btrfs_fs_info *fs_info,
} }
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, int btrfs_verify_qgroup_counts(const struct btrfs_fs_info *fs_info, u64 qgroupid,
u64 rfer, u64 excl) u64 rfer, u64 excl)
{ {
struct btrfs_qgroup *qgroup; struct btrfs_qgroup *qgroup;
@ -608,7 +598,7 @@ out:
* Return false if no reserved space is left. * Return false if no reserved space is left.
* Return true if some reserved space is leaked. * Return true if some reserved space is leaked.
*/ */
bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info) bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info)
{ {
struct rb_node *node; struct rb_node *node;
bool ret = false; bool ret = false;
@ -1334,19 +1324,14 @@ out:
*/ */
static int flush_reservations(struct btrfs_fs_info *fs_info) static int flush_reservations(struct btrfs_fs_info *fs_info)
{ {
struct btrfs_trans_handle *trans;
int ret; int ret;
ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, false); ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, false);
if (ret) if (ret)
return ret; return ret;
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
trans = btrfs_join_transaction(fs_info->tree_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans);
return ret; return btrfs_commit_current_transaction(fs_info->tree_root);
} }
int btrfs_quota_disable(struct btrfs_fs_info *fs_info) int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
@ -1446,9 +1431,11 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
btrfs_tree_lock(quota_root->node); btrfs_tree_lock(quota_root->node);
btrfs_clear_buffer_dirty(trans, quota_root->node); btrfs_clear_buffer_dirty(trans, quota_root->node);
btrfs_tree_unlock(quota_root->node); btrfs_tree_unlock(quota_root->node);
btrfs_free_tree_block(trans, btrfs_root_id(quota_root), ret = btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
quota_root->node, 0, 1); quota_root->node, 0, 1);
if (ret < 0)
btrfs_abort_transaction(trans, ret);
out: out:
btrfs_put_root(quota_root); btrfs_put_root(quota_root);
@ -1572,15 +1559,21 @@ out:
return ret; return ret;
} }
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst) /*
* Add relation between @src and @dst qgroup. The @prealloc is allocated by the
* callers and transferred here (either used or freed on error).
*/
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst,
struct btrfs_qgroup_list *prealloc)
{ {
struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_qgroup *parent; struct btrfs_qgroup *parent;
struct btrfs_qgroup *member; struct btrfs_qgroup *member;
struct btrfs_qgroup_list *list; struct btrfs_qgroup_list *list;
struct btrfs_qgroup_list *prealloc = NULL;
int ret = 0; int ret = 0;
ASSERT(prealloc);
/* Check the level of src and dst first */ /* Check the level of src and dst first */
if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
return -EINVAL; return -EINVAL;
@ -1605,11 +1598,6 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst
} }
} }
prealloc = kzalloc(sizeof(*list), GFP_NOFS);
if (!prealloc) {
ret = -ENOMEM;
goto out;
}
ret = add_qgroup_relation_item(trans, src, dst); ret = add_qgroup_relation_item(trans, src, dst);
if (ret) if (ret)
goto out; goto out;
@ -1748,13 +1736,55 @@ out:
return ret; return ret;
} }
static bool qgroup_has_usage(struct btrfs_qgroup *qgroup) /*
* Return 0 if we can not delete the qgroup (not empty or has children etc).
* Return >0 if we can delete the qgroup.
* Return <0 for other errors during tree search.
*/
static int can_delete_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup)
{ {
return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 || struct btrfs_key key;
qgroup->excl > 0 || qgroup->excl_cmpr > 0 || struct btrfs_path *path;
qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 || int ret;
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 ||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0); /*
* Squota would never be inconsistent, but there can still be case
* where a dropped subvolume still has qgroup numbers, and squota
* relies on such qgroup for future accounting.
*
* So for squota, do not allow dropping any non-zero qgroup.
*/
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE &&
(qgroup->rfer || qgroup->excl || qgroup->excl_cmpr || qgroup->rfer_cmpr))
return 0;
/* For higher level qgroup, we can only delete it if it has no child. */
if (btrfs_qgroup_level(qgroup->qgroupid)) {
if (!list_empty(&qgroup->members))
return 0;
return 1;
}
/*
* For level-0 qgroups, we can only delete it if it has no subvolume
* for it.
* This means even a subvolume is unlinked but not yet fully dropped,
* we can not delete the qgroup.
*/
key.objectid = qgroup->qgroupid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = -1ULL;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL);
btrfs_free_path(path);
/*
* The @ret from btrfs_find_root() exactly matches our definition for
* the return value, thus can be returned directly.
*/
return ret;
} }
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
@ -1776,7 +1806,10 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
goto out; goto out;
} }
if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) { ret = can_delete_qgroup(fs_info, qgroup);
if (ret < 0)
goto out;
if (ret == 0) {
ret = -EBUSY; ret = -EBUSY;
goto out; goto out;
} }
@ -1801,6 +1834,34 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
} }
spin_lock(&fs_info->qgroup_lock); spin_lock(&fs_info->qgroup_lock);
/*
* Warn on reserved space. The subvolume should has no child nor
* corresponding subvolume.
* Thus its reserved space should all be zero, no matter if qgroup
* is consistent or the mode.
*/
WARN_ON(qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] ||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] ||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS]);
/*
* The same for rfer/excl numbers, but that's only if our qgroup is
* consistent and if it's in regular qgroup mode.
* For simple mode it's not as accurate thus we can hit non-zero values
* very frequently.
*/
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL &&
!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)) {
if (WARN_ON(qgroup->rfer || qgroup->excl ||
qgroup->rfer_cmpr || qgroup->excl_cmpr)) {
btrfs_warn_rl(fs_info,
"to be deleted qgroup %u/%llu has non-zero numbers, rfer %llu rfer_cmpr %llu excl %llu excl_cmpr %llu",
btrfs_qgroup_level(qgroup->qgroupid),
btrfs_qgroup_subvolid(qgroup->qgroupid),
qgroup->rfer, qgroup->rfer_cmpr,
qgroup->excl, qgroup->excl_cmpr);
qgroup_mark_inconsistent(fs_info);
}
}
del_qgroup_rb(fs_info, qgroupid); del_qgroup_rb(fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock); spin_unlock(&fs_info->qgroup_lock);
@ -1816,6 +1877,41 @@ out:
return ret; return ret;
} }
int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 subvolid)
{
struct btrfs_trans_handle *trans;
int ret;
if (!is_fstree(subvolid) || !btrfs_qgroup_enabled(fs_info) || !fs_info->quota_root)
return 0;
/*
* Commit current transaction to make sure all the rfer/excl numbers
* get updated.
*/
trans = btrfs_start_transaction(fs_info->quota_root, 0);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_commit_transaction(trans);
if (ret < 0)
return ret;
/* Start new trans to delete the qgroup info and limit items. */
trans = btrfs_start_transaction(fs_info->quota_root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_remove_qgroup(trans, subvolid);
btrfs_end_transaction(trans);
/*
* It's squota and the subvolume still has numbers needed for future
* accounting, in this case we can not delete it. Just skip it.
*/
if (ret == -EBUSY)
ret = 0;
return ret;
}
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
struct btrfs_qgroup_limit *limit) struct btrfs_qgroup_limit *limit)
{ {
@ -3222,7 +3318,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
struct btrfs_qgroup_inherit *inherit) struct btrfs_qgroup_inherit *inherit)
{ {
int ret = 0; int ret = 0;
int i;
u64 *i_qgroups; u64 *i_qgroups;
bool committing = false; bool committing = false;
struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_fs_info *fs_info = trans->fs_info;
@ -3279,7 +3374,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
i_qgroups = (u64 *)(inherit + 1); i_qgroups = (u64 *)(inherit + 1);
nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2 * inherit->num_excl_copies; 2 * inherit->num_excl_copies;
for (i = 0; i < nums; ++i) { for (int i = 0; i < nums; i++) {
srcgroup = find_qgroup_rb(fs_info, *i_qgroups); srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
/* /*
@ -3306,7 +3401,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
*/ */
if (inherit) { if (inherit) {
i_qgroups = (u64 *)(inherit + 1); i_qgroups = (u64 *)(inherit + 1);
for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { for (int i = 0; i < inherit->num_qgroups; i++, i_qgroups++) {
if (*i_qgroups == 0) if (*i_qgroups == 0)
continue; continue;
ret = add_qgroup_relation_item(trans, objectid, ret = add_qgroup_relation_item(trans, objectid,
@ -3392,7 +3487,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
goto unlock; goto unlock;
i_qgroups = (u64 *)(inherit + 1); i_qgroups = (u64 *)(inherit + 1);
for (i = 0; i < inherit->num_qgroups; ++i) { for (int i = 0; i < inherit->num_qgroups; i++) {
if (*i_qgroups) { if (*i_qgroups) {
ret = add_relation_rb(fs_info, qlist_prealloc[i], objectid, ret = add_relation_rb(fs_info, qlist_prealloc[i], objectid,
*i_qgroups); *i_qgroups);
@ -3412,7 +3507,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
++i_qgroups; ++i_qgroups;
} }
for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { for (int i = 0; i < inherit->num_ref_copies; i++, i_qgroups += 2) {
struct btrfs_qgroup *src; struct btrfs_qgroup *src;
struct btrfs_qgroup *dst; struct btrfs_qgroup *dst;
@ -3433,7 +3528,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
/* Manually tweaking numbers certainly needs a rescan */ /* Manually tweaking numbers certainly needs a rescan */
need_rescan = true; need_rescan = true;
} }
for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { for (int i = 0; i < inherit->num_excl_copies; i++, i_qgroups += 2) {
struct btrfs_qgroup *src; struct btrfs_qgroup *src;
struct btrfs_qgroup *dst; struct btrfs_qgroup *dst;
@ -3918,7 +4013,6 @@ int
btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
{ {
int ret = 0; int ret = 0;
struct btrfs_trans_handle *trans;
ret = qgroup_rescan_init(fs_info, 0, 1); ret = qgroup_rescan_init(fs_info, 0, 1);
if (ret) if (ret)
@ -3935,16 +4029,10 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
* going to clear all tracking information for a clean start. * going to clear all tracking information for a clean start.
*/ */
trans = btrfs_attach_transaction_barrier(fs_info->fs_root); ret = btrfs_commit_current_transaction(fs_info->fs_root);
if (IS_ERR(trans) && trans != ERR_PTR(-ENOENT)) { if (ret) {
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
return PTR_ERR(trans); return ret;
} else if (trans != ERR_PTR(-ENOENT)) {
ret = btrfs_commit_transaction(trans);
if (ret) {
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
return ret;
}
} }
qgroup_rescan_zero_tracking(fs_info); qgroup_rescan_zero_tracking(fs_info);
@ -4080,7 +4168,6 @@ static int qgroup_unreserve_range(struct btrfs_inode *inode,
*/ */
static int try_flush_qgroup(struct btrfs_root *root) static int try_flush_qgroup(struct btrfs_root *root)
{ {
struct btrfs_trans_handle *trans;
int ret; int ret;
/* Can't hold an open transaction or we run the risk of deadlocking. */ /* Can't hold an open transaction or we run the risk of deadlocking. */
@ -4101,17 +4188,9 @@ static int try_flush_qgroup(struct btrfs_root *root)
ret = btrfs_start_delalloc_snapshot(root, true); ret = btrfs_start_delalloc_snapshot(root, true);
if (ret < 0) if (ret < 0)
goto out; goto out;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); btrfs_wait_ordered_extents(root, U64_MAX, NULL);
trans = btrfs_attach_transaction_barrier(root); ret = btrfs_commit_current_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
if (ret == -ENOENT)
ret = 0;
goto out;
}
ret = btrfs_commit_transaction(trans);
out: out:
clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state); clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
wake_up(&root->qgroup_flush_wait); wake_up(&root->qgroup_flush_wait);
@ -4817,7 +4896,7 @@ void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_byte
} }
int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
struct btrfs_squota_delta *delta) const struct btrfs_squota_delta *delta)
{ {
int ret; int ret;
struct btrfs_qgroup *qgroup; struct btrfs_qgroup *qgroup;

View File

@ -123,7 +123,6 @@ struct btrfs_inode;
/* /*
* Record a dirty extent, and info qgroup to update quota on it * Record a dirty extent, and info qgroup to update quota on it
* TODO: Use kmem cache to alloc it.
*/ */
struct btrfs_qgroup_extent_record { struct btrfs_qgroup_extent_record {
struct rb_node node; struct rb_node node;
@ -279,6 +278,14 @@ struct btrfs_qgroup {
struct kobject kobj; struct kobject kobj;
}; };
/* Glue structure to represent the relations between qgroups. */
struct btrfs_qgroup_list {
struct list_head next_group;
struct list_head next_member;
struct btrfs_qgroup *group;
struct btrfs_qgroup *member;
};
struct btrfs_squota_delta { struct btrfs_squota_delta {
/* The fstree root this delta counts against. */ /* The fstree root this delta counts against. */
u64 root; u64 root;
@ -312,9 +319,9 @@ enum btrfs_qgroup_mode {
BTRFS_QGROUP_MODE_SIMPLE BTRFS_QGROUP_MODE_SIMPLE
}; };
enum btrfs_qgroup_mode btrfs_qgroup_mode(struct btrfs_fs_info *fs_info); enum btrfs_qgroup_mode btrfs_qgroup_mode(const struct btrfs_fs_info *fs_info);
bool btrfs_qgroup_enabled(struct btrfs_fs_info *fs_info); bool btrfs_qgroup_enabled(const struct btrfs_fs_info *fs_info);
bool btrfs_qgroup_full_accounting(struct btrfs_fs_info *fs_info); bool btrfs_qgroup_full_accounting(const struct btrfs_fs_info *fs_info);
int btrfs_quota_enable(struct btrfs_fs_info *fs_info, int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_quota_ctl_args *quota_ctl_args); struct btrfs_ioctl_quota_ctl_args *quota_ctl_args);
int btrfs_quota_disable(struct btrfs_fs_info *fs_info); int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
@ -322,11 +329,13 @@ int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
bool interruptible); bool interruptible);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst); int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst,
struct btrfs_qgroup_list *prealloc);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
u64 dst); u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid); int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid); int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 subvolid);
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
struct btrfs_qgroup_limit *limit); struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
@ -361,7 +370,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
enum btrfs_qgroup_rsv_type type); enum btrfs_qgroup_rsv_type type);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, int btrfs_verify_qgroup_counts(const struct btrfs_fs_info *fs_info, u64 qgroupid,
u64 rfer, u64 excl); u64 rfer, u64 excl);
#endif #endif
@ -431,9 +440,9 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *eb); struct btrfs_root *root, struct extent_buffer *eb);
void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info); bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info);
void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes); void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes);
int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info, int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
struct btrfs_squota_delta *delta); const struct btrfs_squota_delta *delta);
#endif #endif

View File

@ -80,7 +80,6 @@ static int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
struct btrfs_key stripe_key; struct btrfs_key stripe_key;
struct btrfs_root *stripe_root = fs_info->stripe_root; struct btrfs_root *stripe_root = fs_info->stripe_root;
const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type); const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
u8 encoding = btrfs_bg_flags_to_raid_index(bioc->map_type);
struct btrfs_stripe_extent *stripe_extent; struct btrfs_stripe_extent *stripe_extent;
const size_t item_size = struct_size(stripe_extent, strides, num_stripes); const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
int ret; int ret;
@ -94,7 +93,6 @@ static int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size, trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size,
num_stripes); num_stripes);
btrfs_set_stack_stripe_extent_encoding(stripe_extent, encoding);
for (int i = 0; i < num_stripes; i++) { for (int i = 0; i < num_stripes; i++) {
u64 devid = bioc->stripes[i].dev->devid; u64 devid = bioc->stripes[i].dev->devid;
u64 physical = bioc->stripes[i].physical; u64 physical = bioc->stripes[i].physical;
@ -159,7 +157,6 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf; struct extent_buffer *leaf;
const u64 end = logical + *length; const u64 end = logical + *length;
int num_stripes; int num_stripes;
u8 encoding;
u64 offset; u64 offset;
u64 found_logical; u64 found_logical;
u64 found_length; u64 found_length;
@ -222,16 +219,6 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot)); num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent); stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
encoding = btrfs_stripe_extent_encoding(leaf, stripe_extent);
if (encoding != btrfs_bg_flags_to_raid_index(map_type)) {
ret = -EUCLEAN;
btrfs_handle_fs_error(fs_info, ret,
"on-disk stripe encoding %d doesn't match RAID index %d",
encoding,
btrfs_bg_flags_to_raid_index(map_type));
goto out;
}
for (int i = 0; i < num_stripes; i++) { for (int i = 0; i < num_stripes; i++) {
struct btrfs_raid_stride *stride = &stripe_extent->strides[i]; struct btrfs_raid_stride *stride = &stripe_extent->strides[i];

View File

@ -48,8 +48,7 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,
static inline int btrfs_num_raid_stripes(u32 item_size) static inline int btrfs_num_raid_stripes(u32 item_size)
{ {
return (item_size - offsetof(struct btrfs_stripe_extent, strides)) / return item_size / sizeof(struct btrfs_raid_stride);
sizeof(struct btrfs_raid_stride);
} }
#endif #endif

View File

@ -40,6 +40,85 @@
#define BTRFS_STRIPE_HASH_TABLE_BITS 11 #define BTRFS_STRIPE_HASH_TABLE_BITS 11
static void dump_bioc(const struct btrfs_fs_info *fs_info, const struct btrfs_io_context *bioc)
{
if (unlikely(!bioc)) {
btrfs_crit(fs_info, "bioc=NULL");
return;
}
btrfs_crit(fs_info,
"bioc logical=%llu full_stripe=%llu size=%llu map_type=0x%llx mirror=%u replace_nr_stripes=%u replace_stripe_src=%d num_stripes=%u",
bioc->logical, bioc->full_stripe_logical, bioc->size,
bioc->map_type, bioc->mirror_num, bioc->replace_nr_stripes,
bioc->replace_stripe_src, bioc->num_stripes);
for (int i = 0; i < bioc->num_stripes; i++) {
btrfs_crit(fs_info, " nr=%d devid=%llu physical=%llu",
i, bioc->stripes[i].dev->devid,
bioc->stripes[i].physical);
}
}
static void btrfs_dump_rbio(const struct btrfs_fs_info *fs_info,
const struct btrfs_raid_bio *rbio)
{
if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
return;
dump_bioc(fs_info, rbio->bioc);
btrfs_crit(fs_info,
"rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u scrubp=%u dbitmap=0x%lx",
rbio->flags, rbio->nr_sectors, rbio->nr_data,
rbio->real_stripes, rbio->stripe_nsectors,
rbio->scrubp, rbio->dbitmap);
}
#define ASSERT_RBIO(expr, rbio) \
({ \
if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
(rbio)->bioc->fs_info : NULL; \
\
btrfs_dump_rbio(__fs_info, (rbio)); \
} \
ASSERT((expr)); \
})
#define ASSERT_RBIO_STRIPE(expr, rbio, stripe_nr) \
({ \
if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
(rbio)->bioc->fs_info : NULL; \
\
btrfs_dump_rbio(__fs_info, (rbio)); \
btrfs_crit(__fs_info, "stripe_nr=%d", (stripe_nr)); \
} \
ASSERT((expr)); \
})
#define ASSERT_RBIO_SECTOR(expr, rbio, sector_nr) \
({ \
if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
(rbio)->bioc->fs_info : NULL; \
\
btrfs_dump_rbio(__fs_info, (rbio)); \
btrfs_crit(__fs_info, "sector_nr=%d", (sector_nr)); \
} \
ASSERT((expr)); \
})
#define ASSERT_RBIO_LOGICAL(expr, rbio, logical) \
({ \
if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
(rbio)->bioc->fs_info : NULL; \
\
btrfs_dump_rbio(__fs_info, (rbio)); \
btrfs_crit(__fs_info, "logical=%llu", (logical)); \
} \
ASSERT((expr)); \
})
/* Used by the raid56 code to lock stripes for read/modify/write */ /* Used by the raid56 code to lock stripes for read/modify/write */
struct btrfs_stripe_hash { struct btrfs_stripe_hash {
struct list_head hash_list; struct list_head hash_list;
@ -592,8 +671,8 @@ static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio,
unsigned int stripe_nr, unsigned int stripe_nr,
unsigned int sector_nr) unsigned int sector_nr)
{ {
ASSERT(stripe_nr < rbio->real_stripes); ASSERT_RBIO_STRIPE(stripe_nr < rbio->real_stripes, rbio, stripe_nr);
ASSERT(sector_nr < rbio->stripe_nsectors); ASSERT_RBIO_SECTOR(sector_nr < rbio->stripe_nsectors, rbio, sector_nr);
return stripe_nr * rbio->stripe_nsectors + sector_nr; return stripe_nr * rbio->stripe_nsectors + sector_nr;
} }
@ -873,8 +952,10 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
struct sector_ptr *sector; struct sector_ptr *sector;
int index; int index;
ASSERT(stripe_nr >= 0 && stripe_nr < rbio->real_stripes); ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->real_stripes,
ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors); rbio, stripe_nr);
ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
rbio, sector_nr);
index = stripe_nr * rbio->stripe_nsectors + sector_nr; index = stripe_nr * rbio->stripe_nsectors + sector_nr;
ASSERT(index >= 0 && index < rbio->nr_sectors); ASSERT(index >= 0 && index < rbio->nr_sectors);
@ -970,7 +1051,7 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
{ {
int ret; int ret;
ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, 0); ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, false);
if (ret < 0) if (ret < 0)
return ret; return ret;
/* Mapping all sectors */ /* Mapping all sectors */
@ -985,7 +1066,7 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
int ret; int ret;
ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages, ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages,
rbio->stripe_pages + data_pages, 0); rbio->stripe_pages + data_pages, false);
if (ret < 0) if (ret < 0)
return ret; return ret;
@ -1057,8 +1138,10 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
* thus it can be larger than rbio->real_stripe. * thus it can be larger than rbio->real_stripe.
* So here we check against bioc->num_stripes, not rbio->real_stripes. * So here we check against bioc->num_stripes, not rbio->real_stripes.
*/ */
ASSERT(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes); ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes,
ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors); rbio, stripe_nr);
ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
rbio, sector_nr);
ASSERT(sector->page); ASSERT(sector->page);
stripe = &rbio->bioc->stripes[stripe_nr]; stripe = &rbio->bioc->stripes[stripe_nr];
@ -1197,14 +1280,14 @@ static void assert_rbio(struct btrfs_raid_bio *rbio)
* At least two stripes (2 disks RAID5), and since real_stripes is U8, * At least two stripes (2 disks RAID5), and since real_stripes is U8,
* we won't go beyond 256 disks anyway. * we won't go beyond 256 disks anyway.
*/ */
ASSERT(rbio->real_stripes >= 2); ASSERT_RBIO(rbio->real_stripes >= 2, rbio);
ASSERT(rbio->nr_data > 0); ASSERT_RBIO(rbio->nr_data > 0, rbio);
/* /*
* This is another check to make sure nr data stripes is smaller * This is another check to make sure nr data stripes is smaller
* than total stripes. * than total stripes.
*/ */
ASSERT(rbio->nr_data < rbio->real_stripes); ASSERT_RBIO(rbio->nr_data < rbio->real_stripes, rbio);
} }
/* Generate PQ for one vertical stripe. */ /* Generate PQ for one vertical stripe. */
@ -1557,7 +1640,7 @@ static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
const int data_pages = rbio->nr_data * rbio->stripe_npages; const int data_pages = rbio->nr_data * rbio->stripe_npages;
int ret; int ret;
ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, 0); ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, false);
if (ret < 0) if (ret < 0)
return ret; return ret;
@ -1641,9 +1724,10 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
const u32 sectorsize = fs_info->sectorsize; const u32 sectorsize = fs_info->sectorsize;
u64 cur_logical; u64 cur_logical;
ASSERT(orig_logical >= full_stripe_start && ASSERT_RBIO_LOGICAL(orig_logical >= full_stripe_start &&
orig_logical + orig_len <= full_stripe_start + orig_logical + orig_len <= full_stripe_start +
rbio->nr_data * BTRFS_STRIPE_LEN); rbio->nr_data * BTRFS_STRIPE_LEN,
rbio, orig_logical);
bio_list_add(&rbio->bio_list, orig_bio); bio_list_add(&rbio->bio_list, orig_bio);
rbio->bio_list_bytes += orig_bio->bi_iter.bi_size; rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
@ -2389,7 +2473,7 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
break; break;
} }
} }
ASSERT(i < rbio->real_stripes); ASSERT_RBIO_STRIPE(i < rbio->real_stripes, rbio, i);
bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors); bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors);
return rbio; return rbio;
@ -2555,7 +2639,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
* Replace is running and our parity stripe needs to be duplicated to * Replace is running and our parity stripe needs to be duplicated to
* the target device. Check we have a valid source stripe number. * the target device. Check we have a valid source stripe number.
*/ */
ASSERT(rbio->bioc->replace_stripe_src >= 0); ASSERT_RBIO(rbio->bioc->replace_stripe_src >= 0, rbio);
for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) { for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) {
struct sector_ptr *sector; struct sector_ptr *sector;

View File

@ -733,7 +733,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
* we found the previous extent covering eof and before we * we found the previous extent covering eof and before we
* attempted to increment its reference count). * attempted to increment its reference count).
*/ */
ret = btrfs_wait_ordered_range(inode, wb_start, ret = btrfs_wait_ordered_range(BTRFS_I(inode), wb_start,
destoff - wb_start); destoff - wb_start);
if (ret) if (ret)
return ret; return ret;
@ -755,7 +755,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
* range, so wait for writeback to complete before truncating pages * range, so wait for writeback to complete before truncating pages
* from the page cache. This is a rare case. * from the page cache. This is a rare case.
*/ */
wb_ret = btrfs_wait_ordered_range(inode, destoff, len); wb_ret = btrfs_wait_ordered_range(BTRFS_I(inode), destoff, len);
ret = ret ? ret : wb_ret; ret = ret ? ret : wb_ret;
/* /*
* Truncate page cache pages so that future reads will see the cloned * Truncate page cache pages so that future reads will see the cloned
@ -835,11 +835,11 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
if (ret < 0) if (ret < 0)
return ret; return ret;
ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs), ret = btrfs_wait_ordered_range(BTRFS_I(inode_in), ALIGN_DOWN(pos_in, bs),
wb_len); wb_len);
if (ret < 0) if (ret < 0)
return ret; return ret;
ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs), ret = btrfs_wait_ordered_range(BTRFS_I(inode_out), ALIGN_DOWN(pos_out, bs),
wb_len); wb_len);
if (ret < 0) if (ret < 0)
return ret; return ret;

View File

@ -817,7 +817,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
goto abort; goto abort;
} }
set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state); set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
reloc_root->last_trans = trans->transid; btrfs_set_root_last_trans(reloc_root, trans->transid);
return reloc_root; return reloc_root;
fail: fail:
kfree(root_item); kfree(root_item);
@ -864,7 +864,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
*/ */
if (root->reloc_root) { if (root->reloc_root) {
reloc_root = root->reloc_root; reloc_root = root->reloc_root;
reloc_root->last_trans = trans->transid; btrfs_set_root_last_trans(reloc_root, trans->transid);
return 0; return 0;
} }
@ -962,7 +962,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
bytenr -= BTRFS_I(reloc_inode)->index_cnt; bytenr -= BTRFS_I(reloc_inode)->reloc_block_group_start;
ret = btrfs_lookup_file_extent(NULL, root, path, ret = btrfs_lookup_file_extent(NULL, root, path,
btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0); btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
if (ret < 0) if (ret < 0)
@ -1739,7 +1739,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
* btrfs_update_reloc_root() and update our root item * btrfs_update_reloc_root() and update our root item
* appropriately. * appropriately.
*/ */
reloc_root->last_trans = trans->transid; btrfs_set_root_last_trans(reloc_root, trans->transid);
trans->block_rsv = rc->block_rsv; trans->block_rsv = rc->block_rsv;
replaced = 0; replaced = 0;
@ -2082,7 +2082,7 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root; struct btrfs_root *root;
int ret; int ret;
if (reloc_root->last_trans == trans->transid) if (btrfs_get_root_last_trans(reloc_root) == trans->transid)
return 0; return 0;
root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false); root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false);
@ -2790,14 +2790,14 @@ out_free_blocks:
return ret; return ret;
} }
static noinline_for_stack int prealloc_file_extent_cluster( static noinline_for_stack int prealloc_file_extent_cluster(struct reloc_control *rc)
struct btrfs_inode *inode,
const struct file_extent_cluster *cluster)
{ {
const struct file_extent_cluster *cluster = &rc->cluster;
struct btrfs_inode *inode = BTRFS_I(rc->data_inode);
u64 alloc_hint = 0; u64 alloc_hint = 0;
u64 start; u64 start;
u64 end; u64 end;
u64 offset = inode->index_cnt; u64 offset = inode->reloc_block_group_start;
u64 num_bytes; u64 num_bytes;
int nr; int nr;
int ret = 0; int ret = 0;
@ -2899,11 +2899,14 @@ static noinline_for_stack int prealloc_file_extent_cluster(
return ret; return ret;
} }
static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode, static noinline_for_stack int setup_relocation_extent_mapping(struct reloc_control *rc)
u64 start, u64 end, u64 block_start)
{ {
struct btrfs_inode *inode = BTRFS_I(rc->data_inode);
struct extent_map *em; struct extent_map *em;
struct extent_state *cached_state = NULL; struct extent_state *cached_state = NULL;
u64 offset = inode->reloc_block_group_start;
u64 start = rc->cluster.start - offset;
u64 end = rc->cluster.end - offset;
int ret = 0; int ret = 0;
em = alloc_extent_map(); em = alloc_extent_map();
@ -2912,13 +2915,14 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inod
em->start = start; em->start = start;
em->len = end + 1 - start; em->len = end + 1 - start;
em->block_len = em->len; em->disk_bytenr = rc->cluster.start;
em->block_start = block_start; em->disk_num_bytes = em->len;
em->ram_bytes = em->len;
em->flags |= EXTENT_FLAG_PINNED; em->flags |= EXTENT_FLAG_PINNED;
lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state); lock_extent(&inode->io_tree, start, end, &cached_state);
ret = btrfs_replace_extent_map_range(BTRFS_I(inode), em, false); ret = btrfs_replace_extent_map_range(inode, em, false);
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state); unlock_extent(&inode->io_tree, start, end, &cached_state);
free_extent_map(em); free_extent_map(em);
return ret; return ret;
@ -2946,12 +2950,14 @@ static u64 get_cluster_boundary_end(const struct file_extent_cluster *cluster,
return cluster->boundary[cluster_nr + 1] - 1; return cluster->boundary[cluster_nr + 1] - 1;
} }
static int relocate_one_folio(struct inode *inode, struct file_ra_state *ra, static int relocate_one_folio(struct reloc_control *rc,
const struct file_extent_cluster *cluster, struct file_ra_state *ra,
int *cluster_nr, unsigned long index) int *cluster_nr, unsigned long index)
{ {
const struct file_extent_cluster *cluster = &rc->cluster;
struct inode *inode = rc->data_inode;
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
u64 offset = BTRFS_I(inode)->index_cnt; u64 offset = BTRFS_I(inode)->reloc_block_group_start;
const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT; const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
struct folio *folio; struct folio *folio;
@ -3083,10 +3089,11 @@ release_folio:
return ret; return ret;
} }
static int relocate_file_extent_cluster(struct inode *inode, static int relocate_file_extent_cluster(struct reloc_control *rc)
const struct file_extent_cluster *cluster)
{ {
u64 offset = BTRFS_I(inode)->index_cnt; struct inode *inode = rc->data_inode;
const struct file_extent_cluster *cluster = &rc->cluster;
u64 offset = BTRFS_I(inode)->reloc_block_group_start;
unsigned long index; unsigned long index;
unsigned long last_index; unsigned long last_index;
struct file_ra_state *ra; struct file_ra_state *ra;
@ -3100,21 +3107,20 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (!ra) if (!ra)
return -ENOMEM; return -ENOMEM;
ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster); ret = prealloc_file_extent_cluster(rc);
if (ret) if (ret)
goto out; goto out;
file_ra_state_init(ra, inode->i_mapping); file_ra_state_init(ra, inode->i_mapping);
ret = setup_relocation_extent_mapping(inode, cluster->start - offset, ret = setup_relocation_extent_mapping(rc);
cluster->end - offset, cluster->start);
if (ret) if (ret)
goto out; goto out;
last_index = (cluster->end - offset) >> PAGE_SHIFT; last_index = (cluster->end - offset) >> PAGE_SHIFT;
for (index = (cluster->start - offset) >> PAGE_SHIFT; for (index = (cluster->start - offset) >> PAGE_SHIFT;
index <= last_index && !ret; index++) index <= last_index && !ret; index++)
ret = relocate_one_folio(inode, ra, cluster, &cluster_nr, index); ret = relocate_one_folio(rc, ra, &cluster_nr, index);
if (ret == 0) if (ret == 0)
WARN_ON(cluster_nr != cluster->nr); WARN_ON(cluster_nr != cluster->nr);
out: out:
@ -3122,15 +3128,16 @@ out:
return ret; return ret;
} }
static noinline_for_stack int relocate_data_extent(struct inode *inode, static noinline_for_stack int relocate_data_extent(struct reloc_control *rc,
const struct btrfs_key *extent_key, const struct btrfs_key *extent_key)
struct file_extent_cluster *cluster)
{ {
struct inode *inode = rc->data_inode;
struct file_extent_cluster *cluster = &rc->cluster;
int ret; int ret;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
ret = relocate_file_extent_cluster(inode, cluster); ret = relocate_file_extent_cluster(rc);
if (ret) if (ret)
return ret; return ret;
cluster->nr = 0; cluster->nr = 0;
@ -3156,7 +3163,7 @@ static noinline_for_stack int relocate_data_extent(struct inode *inode,
* the cluster we need to relocate. * the cluster we need to relocate.
*/ */
root->relocation_src_root = cluster->owning_root; root->relocation_src_root = cluster->owning_root;
ret = relocate_file_extent_cluster(inode, cluster); ret = relocate_file_extent_cluster(rc);
if (ret) if (ret)
return ret; return ret;
cluster->nr = 0; cluster->nr = 0;
@ -3175,7 +3182,7 @@ static noinline_for_stack int relocate_data_extent(struct inode *inode,
cluster->nr++; cluster->nr++;
if (cluster->nr >= MAX_EXTENTS) { if (cluster->nr >= MAX_EXTENTS) {
ret = relocate_file_extent_cluster(inode, cluster); ret = relocate_file_extent_cluster(rc);
if (ret) if (ret)
return ret; return ret;
cluster->nr = 0; cluster->nr = 0;
@ -3369,7 +3376,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
if (inode) if (inode)
goto truncate; goto truncate;
inode = btrfs_iget(fs_info->sb, ino, root); inode = btrfs_iget(ino, root);
if (IS_ERR(inode)) if (IS_ERR(inode))
return -ENOENT; return -ENOENT;
@ -3744,8 +3751,7 @@ restart:
if (rc->stage == MOVE_DATA_EXTENTS && if (rc->stage == MOVE_DATA_EXTENTS &&
(flags & BTRFS_EXTENT_FLAG_DATA)) { (flags & BTRFS_EXTENT_FLAG_DATA)) {
rc->found_file_extent = true; rc->found_file_extent = true;
ret = relocate_data_extent(rc->data_inode, ret = relocate_data_extent(rc, &key);
&key, &rc->cluster);
if (ret < 0) { if (ret < 0) {
err = ret; err = ret;
break; break;
@ -3774,8 +3780,7 @@ restart:
} }
if (!err) { if (!err) {
ret = relocate_file_extent_cluster(rc->data_inode, ret = relocate_file_extent_cluster(rc);
&rc->cluster);
if (ret < 0) if (ret < 0)
err = ret; err = ret;
} }
@ -3908,14 +3913,14 @@ static noinline_for_stack struct inode *create_reloc_inode(
if (ret) if (ret)
goto out; goto out;
inode = btrfs_iget(fs_info->sb, objectid, root); inode = btrfs_iget(objectid, root);
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
delete_orphan_inode(trans, root, objectid); delete_orphan_inode(trans, root, objectid);
ret = PTR_ERR(inode); ret = PTR_ERR(inode);
inode = NULL; inode = NULL;
goto out; goto out;
} }
BTRFS_I(inode)->index_cnt = group->start; BTRFS_I(inode)->reloc_block_group_start = group->start;
ret = btrfs_orphan_add(trans, BTRFS_I(inode)); ret = btrfs_orphan_add(trans, BTRFS_I(inode));
out: out:
@ -4002,15 +4007,13 @@ static void free_reloc_control(struct reloc_control *rc)
/* /*
* Print the block group being relocated * Print the block group being relocated
*/ */
static void describe_relocation(struct btrfs_fs_info *fs_info, static void describe_relocation(struct btrfs_block_group *block_group)
struct btrfs_block_group *block_group)
{ {
char buf[128] = {'\0'}; char buf[128] = {'\0'};
btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf)); btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
btrfs_info(fs_info, btrfs_info(block_group->fs_info, "relocating block group %llu flags %s",
"relocating block group %llu flags %s",
block_group->start, buf); block_group->start, buf);
} }
@ -4118,13 +4121,11 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
goto out; goto out;
} }
describe_relocation(fs_info, rc->block_group); describe_relocation(rc->block_group);
btrfs_wait_block_group_reservations(rc->block_group); btrfs_wait_block_group_reservations(rc->block_group);
btrfs_wait_nocow_writers(rc->block_group); btrfs_wait_nocow_writers(rc->block_group);
btrfs_wait_ordered_roots(fs_info, U64_MAX, btrfs_wait_ordered_roots(fs_info, U64_MAX, rc->block_group);
rc->block_group->start,
rc->block_group->length);
ret = btrfs_zone_finish(rc->block_group); ret = btrfs_zone_finish(rc->block_group);
WARN_ON(ret && ret != -EAGAIN); WARN_ON(ret && ret != -EAGAIN);
@ -4149,7 +4150,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
* out of the loop if we hit an error. * out of the loop if we hit an error.
*/ */
if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
ret = btrfs_wait_ordered_range(rc->data_inode, 0, ret = btrfs_wait_ordered_range(BTRFS_I(rc->data_inode), 0,
(u64)-1); (u64)-1);
if (ret) if (ret)
err = ret; err = ret;
@ -4221,8 +4222,8 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct reloc_control *rc = NULL; struct reloc_control *rc = NULL;
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
int ret; int ret2;
int err = 0; int ret = 0;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
@ -4236,15 +4237,14 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
while (1) { while (1) {
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
path, 0, 0); path, 0, 0);
if (ret < 0) { if (ret < 0)
err = ret;
goto out; goto out;
}
if (ret > 0) { if (ret > 0) {
if (path->slots[0] == 0) if (path->slots[0] == 0)
break; break;
path->slots[0]--; path->slots[0]--;
} }
ret = 0;
leaf = path->nodes[0]; leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
btrfs_release_path(path); btrfs_release_path(path);
@ -4255,7 +4255,7 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key); reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
if (IS_ERR(reloc_root)) { if (IS_ERR(reloc_root)) {
err = PTR_ERR(reloc_root); ret = PTR_ERR(reloc_root);
goto out; goto out;
} }
@ -4267,15 +4267,12 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
reloc_root->root_key.offset, false); reloc_root->root_key.offset, false);
if (IS_ERR(fs_root)) { if (IS_ERR(fs_root)) {
ret = PTR_ERR(fs_root); ret = PTR_ERR(fs_root);
if (ret != -ENOENT) { if (ret != -ENOENT)
err = ret;
goto out; goto out;
}
ret = mark_garbage_root(reloc_root); ret = mark_garbage_root(reloc_root);
if (ret < 0) { if (ret < 0)
err = ret;
goto out; goto out;
} ret = 0;
} else { } else {
btrfs_put_root(fs_root); btrfs_put_root(fs_root);
} }
@ -4293,15 +4290,13 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
rc = alloc_reloc_control(fs_info); rc = alloc_reloc_control(fs_info);
if (!rc) { if (!rc) {
err = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
ret = reloc_chunk_start(fs_info); ret = reloc_chunk_start(fs_info);
if (ret < 0) { if (ret < 0)
err = ret;
goto out_end; goto out_end;
}
rc->extent_root = btrfs_extent_root(fs_info, 0); rc->extent_root = btrfs_extent_root(fs_info, 0);
@ -4309,7 +4304,7 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
trans = btrfs_join_transaction(rc->extent_root); trans = btrfs_join_transaction(rc->extent_root);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
err = PTR_ERR(trans); ret = PTR_ERR(trans);
goto out_unset; goto out_unset;
} }
@ -4329,15 +4324,15 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
false); false);
if (IS_ERR(fs_root)) { if (IS_ERR(fs_root)) {
err = PTR_ERR(fs_root); ret = PTR_ERR(fs_root);
list_add_tail(&reloc_root->root_list, &reloc_roots); list_add_tail(&reloc_root->root_list, &reloc_roots);
btrfs_end_transaction(trans); btrfs_end_transaction(trans);
goto out_unset; goto out_unset;
} }
err = __add_reloc_root(reloc_root); ret = __add_reloc_root(reloc_root);
ASSERT(err != -EEXIST); ASSERT(ret != -EEXIST);
if (err) { if (ret) {
list_add_tail(&reloc_root->root_list, &reloc_roots); list_add_tail(&reloc_root->root_list, &reloc_roots);
btrfs_put_root(fs_root); btrfs_put_root(fs_root);
btrfs_end_transaction(trans); btrfs_end_transaction(trans);
@ -4347,8 +4342,8 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
btrfs_put_root(fs_root); btrfs_put_root(fs_root);
} }
err = btrfs_commit_transaction(trans); ret = btrfs_commit_transaction(trans);
if (err) if (ret)
goto out_unset; goto out_unset;
merge_reloc_roots(rc); merge_reloc_roots(rc);
@ -4357,14 +4352,14 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
trans = btrfs_join_transaction(rc->extent_root); trans = btrfs_join_transaction(rc->extent_root);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
err = PTR_ERR(trans); ret = PTR_ERR(trans);
goto out_clean; goto out_clean;
} }
err = btrfs_commit_transaction(trans); ret = btrfs_commit_transaction(trans);
out_clean: out_clean:
ret = clean_dirty_subvols(rc); ret2 = clean_dirty_subvols(rc);
if (ret < 0 && !err) if (ret2 < 0 && !ret)
err = ret; ret = ret2;
out_unset: out_unset:
unset_reloc_control(rc); unset_reloc_control(rc);
out_end: out_end:
@ -4375,14 +4370,14 @@ out:
btrfs_free_path(path); btrfs_free_path(path);
if (err == 0) { if (ret == 0) {
/* cleanup orphan inode in data relocation tree */ /* cleanup orphan inode in data relocation tree */
fs_root = btrfs_grab_root(fs_info->data_reloc_root); fs_root = btrfs_grab_root(fs_info->data_reloc_root);
ASSERT(fs_root); ASSERT(fs_root);
err = btrfs_orphan_cleanup(fs_root); ret = btrfs_orphan_cleanup(fs_root);
btrfs_put_root(fs_root); btrfs_put_root(fs_root);
} }
return err; return ret;
} }
/* /*
@ -4393,9 +4388,9 @@ out:
*/ */
int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered) int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered)
{ {
struct btrfs_inode *inode = BTRFS_I(ordered->inode); struct btrfs_inode *inode = ordered->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 disk_bytenr = ordered->file_offset + inode->index_cnt; u64 disk_bytenr = ordered->file_offset + inode->reloc_block_group_start;
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, disk_bytenr); struct btrfs_root *csum_root = btrfs_csum_root(fs_info, disk_bytenr);
LIST_HEAD(list); LIST_HEAD(list);
int ret; int ret;

View File

@ -261,7 +261,7 @@ static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
atomic_set(&stripe->pending_io, 0); atomic_set(&stripe->pending_io, 0);
spin_lock_init(&stripe->write_error_lock); spin_lock_init(&stripe->write_error_lock);
ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, 0); ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, false);
if (ret < 0) if (ret < 0)
goto error; goto error;
@ -2441,19 +2441,15 @@ static int finish_extent_writes_for_zoned(struct btrfs_root *root,
struct btrfs_block_group *cache) struct btrfs_block_group *cache)
{ {
struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_trans_handle *trans;
if (!btrfs_is_zoned(fs_info)) if (!btrfs_is_zoned(fs_info))
return 0; return 0;
btrfs_wait_block_group_reservations(cache); btrfs_wait_block_group_reservations(cache);
btrfs_wait_nocow_writers(cache); btrfs_wait_nocow_writers(cache);
btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, cache->length); btrfs_wait_ordered_roots(fs_info, U64_MAX, cache);
trans = btrfs_join_transaction(root); return btrfs_commit_current_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
return btrfs_commit_transaction(trans);
} }
static noinline_for_stack static noinline_for_stack
@ -2684,8 +2680,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
*/ */
if (sctx->is_dev_replace) { if (sctx->is_dev_replace) {
btrfs_wait_nocow_writers(cache); btrfs_wait_nocow_writers(cache);
btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, btrfs_wait_ordered_roots(fs_info, U64_MAX, cache);
cache->length);
} }
scrub_pause_off(fs_info); scrub_pause_off(fs_info);

View File

@ -5188,11 +5188,10 @@ out:
static int process_verity(struct send_ctx *sctx) static int process_verity(struct send_ctx *sctx)
{ {
int ret = 0; int ret = 0;
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
struct inode *inode; struct inode *inode;
struct fs_path *p; struct fs_path *p;
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, sctx->send_root); inode = btrfs_iget(sctx->cur_ino, sctx->send_root);
if (IS_ERR(inode)) if (IS_ERR(inode))
return PTR_ERR(inode); return PTR_ERR(inode);
@ -5550,7 +5549,7 @@ static int send_encoded_inline_extent(struct send_ctx *sctx,
size_t inline_size; size_t inline_size;
int ret; int ret;
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root); inode = btrfs_iget(sctx->cur_ino, root);
if (IS_ERR(inode)) if (IS_ERR(inode))
return PTR_ERR(inode); return PTR_ERR(inode);
@ -5617,7 +5616,7 @@ static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
u32 crc; u32 crc;
int ret; int ret;
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root); inode = btrfs_iget(sctx->cur_ino, root);
if (IS_ERR(inode)) if (IS_ERR(inode))
return PTR_ERR(inode); return PTR_ERR(inode);
@ -5746,7 +5745,7 @@ static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
if (sctx->cur_inode == NULL) { if (sctx->cur_inode == NULL) {
struct btrfs_root *root = sctx->send_root; struct btrfs_root *root = sctx->send_root;
sctx->cur_inode = btrfs_iget(root->fs_info->sb, sctx->cur_ino, root); sctx->cur_inode = btrfs_iget(sctx->cur_ino, root);
if (IS_ERR(sctx->cur_inode)) { if (IS_ERR(sctx->cur_inode)) {
int err = PTR_ERR(sctx->cur_inode); int err = PTR_ERR(sctx->cur_inode);
@ -7998,34 +7997,18 @@ out:
*/ */
static int ensure_commit_roots_uptodate(struct send_ctx *sctx) static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
{ {
int i; struct btrfs_root *root = sctx->parent_root;
struct btrfs_trans_handle *trans = NULL;
again: if (root && root->node != root->commit_root)
if (sctx->parent_root && return btrfs_commit_current_transaction(root);
sctx->parent_root->node != sctx->parent_root->commit_root)
goto commit_trans;
for (i = 0; i < sctx->clone_roots_cnt; i++) for (int i = 0; i < sctx->clone_roots_cnt; i++) {
if (sctx->clone_roots[i].root->node != root = sctx->clone_roots[i].root;
sctx->clone_roots[i].root->commit_root) if (root->node != root->commit_root)
goto commit_trans; return btrfs_commit_current_transaction(root);
if (trans)
return btrfs_end_transaction(trans);
return 0;
commit_trans:
/* Use any root, all fs roots will get their commit roots updated. */
if (!trans) {
trans = btrfs_join_transaction(sctx->send_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
goto again;
} }
return btrfs_commit_transaction(trans); return 0;
} }
/* /*
@ -8046,7 +8029,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
ret = btrfs_start_delalloc_snapshot(root, false); ret = btrfs_start_delalloc_snapshot(root, false);
if (ret) if (ret)
return ret; return ret;
btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); btrfs_wait_ordered_extents(root, U64_MAX, NULL);
} }
for (i = 0; i < sctx->clone_roots_cnt; i++) { for (i = 0; i < sctx->clone_roots_cnt; i++) {
@ -8054,7 +8037,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
ret = btrfs_start_delalloc_snapshot(root, false); ret = btrfs_start_delalloc_snapshot(root, false);
if (ret) if (ret)
return ret; return ret;
btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); btrfs_wait_ordered_extents(root, U64_MAX, NULL);
} }
return 0; return 0;
@ -8082,10 +8065,10 @@ static void dedupe_in_progress_warn(const struct btrfs_root *root)
btrfs_root_id(root), root->dedupe_in_progress); btrfs_root_id(root), root->dedupe_in_progress);
} }
long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) long btrfs_ioctl_send(struct btrfs_inode *inode, const struct btrfs_ioctl_send_args *arg)
{ {
int ret = 0; int ret = 0;
struct btrfs_root *send_root = BTRFS_I(inode)->root; struct btrfs_root *send_root = inode->root;
struct btrfs_fs_info *fs_info = send_root->fs_info; struct btrfs_fs_info *fs_info = send_root->fs_info;
struct btrfs_root *clone_root; struct btrfs_root *clone_root;
struct send_ctx *sctx = NULL; struct send_ctx *sctx = NULL;

View File

@ -11,7 +11,7 @@
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/align.h> #include <linux/align.h>
struct inode; struct btrfs_inode;
struct btrfs_ioctl_send_args; struct btrfs_ioctl_send_args;
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
@ -182,6 +182,6 @@ enum {
__BTRFS_SEND_A_MAX = 35, __BTRFS_SEND_A_MAX = 35,
}; };
long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg); long btrfs_ioctl_send(struct btrfs_inode *inode, const struct btrfs_ioctl_send_args *arg);
#endif #endif

View File

@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "linux/spinlock.h"
#include <linux/minmax.h>
#include "misc.h" #include "misc.h"
#include "ctree.h" #include "ctree.h"
#include "space-info.h" #include "space-info.h"
@ -190,6 +192,8 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
*/ */
#define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH (75) #define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH (75)
#define BTRFS_UNALLOC_BLOCK_GROUP_TARGET (10ULL)
/* /*
* Calculate chunk size depending on volume type (regular or zoned). * Calculate chunk size depending on volume type (regular or zoned).
*/ */
@ -232,6 +236,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
if (!space_info) if (!space_info)
return -ENOMEM; return -ENOMEM;
space_info->fs_info = info;
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
INIT_LIST_HEAD(&space_info->block_groups[i]); INIT_LIST_HEAD(&space_info->block_groups[i]);
init_rwsem(&space_info->groups_sem); init_rwsem(&space_info->groups_sem);
@ -340,11 +345,32 @@ struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
return NULL; return NULL;
} }
static u64 calc_effective_data_chunk_size(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *data_sinfo;
u64 data_chunk_size;
/*
* Calculate the data_chunk_size, space_info->chunk_size is the
* "optimal" chunk size based on the fs size. However when we actually
* allocate the chunk we will strip this down further, making it no
* more than 10% of the disk or 1G, whichever is smaller.
*
* On the zoned mode, we need to use zone_size (= data_sinfo->chunk_size)
* as it is.
*/
data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
if (btrfs_is_zoned(fs_info))
return data_sinfo->chunk_size;
data_chunk_size = min(data_sinfo->chunk_size,
mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
return min_t(u64, data_chunk_size, SZ_1G);
}
static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, struct btrfs_space_info *space_info,
enum btrfs_reserve_flush_enum flush) enum btrfs_reserve_flush_enum flush)
{ {
struct btrfs_space_info *data_sinfo;
u64 profile; u64 profile;
u64 avail; u64 avail;
u64 data_chunk_size; u64 data_chunk_size;
@ -368,23 +394,7 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
if (avail == 0) if (avail == 0)
return 0; return 0;
/* data_chunk_size = calc_effective_data_chunk_size(fs_info);
* Calculate the data_chunk_size, space_info->chunk_size is the
* "optimal" chunk size based on the fs size. However when we actually
* allocate the chunk we will strip this down further, making it no more
* than 10% of the disk or 1G, whichever is smaller.
*
* On the zoned mode, we need to use zone_size (=
* data_sinfo->chunk_size) as it is.
*/
data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
if (!btrfs_is_zoned(fs_info)) {
data_chunk_size = min(data_sinfo->chunk_size,
mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
data_chunk_size = min_t(u64, data_chunk_size, SZ_1G);
} else {
data_chunk_size = data_sinfo->chunk_size;
}
/* /*
* Since data allocations immediately use block groups as part of the * Since data allocations immediately use block groups as part of the
@ -605,8 +615,6 @@ static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info,
return nr; return nr;
} }
#define EXTENT_SIZE_PER_ITEM SZ_256K
/* /*
* shrink metadata reservation for delalloc * shrink metadata reservation for delalloc
*/ */
@ -706,7 +714,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
skip_async: skip_async:
loops++; loops++;
if (wait_ordered && !trans) { if (wait_ordered && !trans) {
btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); btrfs_wait_ordered_roots(fs_info, items, NULL);
} else { } else {
time_left = schedule_timeout_killable(1); time_left = schedule_timeout_killable(1);
if (time_left) if (time_left)
@ -825,14 +833,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
* because that does not wait for a transaction to fully commit * because that does not wait for a transaction to fully commit
* (only for it to be unblocked, state TRANS_STATE_UNBLOCKED). * (only for it to be unblocked, state TRANS_STATE_UNBLOCKED).
*/ */
trans = btrfs_attach_transaction_barrier(root); ret = btrfs_commit_current_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
if (ret == -ENOENT)
ret = 0;
break;
}
ret = btrfs_commit_transaction(trans);
break; break;
default: default:
ret = -ENOSPC; ret = -ENOSPC;
@ -1886,3 +1887,209 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
return free_bytes; return free_bytes;
} }
static u64 calc_pct_ratio(u64 x, u64 y)
{
int err;
if (!y)
return 0;
again:
err = check_mul_overflow(100, x, &x);
if (err)
goto lose_precision;
return div64_u64(x, y);
lose_precision:
x >>= 10;
y >>= 10;
if (!y)
y = 1;
goto again;
}
/*
* A reasonable buffer for unallocated space is 10 data block_groups.
* If we claw this back repeatedly, we can still achieve efficient
* utilization when near full, and not do too much reclaim while
* always maintaining a solid buffer for workloads that quickly
* allocate and pressure the unallocated space.
*/
static u64 calc_unalloc_target(struct btrfs_fs_info *fs_info)
{
u64 chunk_sz = calc_effective_data_chunk_size(fs_info);
return BTRFS_UNALLOC_BLOCK_GROUP_TARGET * chunk_sz;
}
/*
* The fundamental goal of automatic reclaim is to protect the filesystem's
* unallocated space and thus minimize the probability of the filesystem going
* read only when a metadata allocation failure causes a transaction abort.
*
* However, relocations happen into the space_info's unused space, therefore
* automatic reclaim must also back off as that space runs low. There is no
* value in doing trivial "relocations" of re-writing the same block group
* into a fresh one.
*
* Furthermore, we want to avoid doing too much reclaim even if there are good
* candidates. This is because the allocator is pretty good at filling up the
* holes with writes. So we want to do just enough reclaim to try and stay
* safe from running out of unallocated space but not be wasteful about it.
*
* Therefore, the dynamic reclaim threshold is calculated as follows:
* - calculate a target unallocated amount of 5 block group sized chunks
* - ratchet up the intensity of reclaim depending on how far we are from
* that target by using a formula of unalloc / target to set the threshold.
*
* Typically with 10 block groups as the target, the discrete values this comes
* out to are 0, 10, 20, ... , 80, 90, and 99.
*/
static int calc_dynamic_reclaim_threshold(struct btrfs_space_info *space_info)
{
struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 unalloc = atomic64_read(&fs_info->free_chunk_space);
u64 target = calc_unalloc_target(fs_info);
u64 alloc = space_info->total_bytes;
u64 used = btrfs_space_info_used(space_info, false);
u64 unused = alloc - used;
u64 want = target > unalloc ? target - unalloc : 0;
u64 data_chunk_size = calc_effective_data_chunk_size(fs_info);
/* If we have no unused space, don't bother, it won't work anyway. */
if (unused < data_chunk_size)
return 0;
/* Cast to int is OK because want <= target. */
return calc_pct_ratio(want, target);
}
int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info)
{
lockdep_assert_held(&space_info->lock);
if (READ_ONCE(space_info->dynamic_reclaim))
return calc_dynamic_reclaim_threshold(space_info);
return READ_ONCE(space_info->bg_reclaim_threshold);
}
/*
* Under "urgent" reclaim, we will reclaim even fresh block groups that have
* recently seen successful allocations, as we are desperate to reclaim
* whatever we can to avoid ENOSPC in a transaction leading to a readonly fs.
*/
static bool is_reclaim_urgent(struct btrfs_space_info *space_info)
{
struct btrfs_fs_info *fs_info = space_info->fs_info;
u64 unalloc = atomic64_read(&fs_info->free_chunk_space);
u64 data_chunk_size = calc_effective_data_chunk_size(fs_info);
return unalloc < data_chunk_size;
}
static int do_reclaim_sweep(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, int raid)
{
struct btrfs_block_group *bg;
int thresh_pct;
bool try_again = true;
bool urgent;
spin_lock(&space_info->lock);
urgent = is_reclaim_urgent(space_info);
thresh_pct = btrfs_calc_reclaim_threshold(space_info);
spin_unlock(&space_info->lock);
down_read(&space_info->groups_sem);
again:
list_for_each_entry(bg, &space_info->block_groups[raid], list) {
u64 thresh;
bool reclaim = false;
btrfs_get_block_group(bg);
spin_lock(&bg->lock);
thresh = mult_perc(bg->length, thresh_pct);
if (bg->used < thresh && bg->reclaim_mark) {
try_again = false;
reclaim = true;
}
bg->reclaim_mark++;
spin_unlock(&bg->lock);
if (reclaim)
btrfs_mark_bg_to_reclaim(bg);
btrfs_put_block_group(bg);
}
/*
* In situations where we are very motivated to reclaim (low unalloc)
* use two passes to make the reclaim mark check best effort.
*
* If we have any staler groups, we don't touch the fresher ones, but if we
* really need a block group, do take a fresh one.
*/
if (try_again && urgent) {
try_again = false;
goto again;
}
up_read(&space_info->groups_sem);
return 0;
}
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
{
u64 chunk_sz = calc_effective_data_chunk_size(space_info->fs_info);
lockdep_assert_held(&space_info->lock);
space_info->reclaimable_bytes += bytes;
if (space_info->reclaimable_bytes >= chunk_sz)
btrfs_set_periodic_reclaim_ready(space_info, true);
}
void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready)
{
lockdep_assert_held(&space_info->lock);
if (!READ_ONCE(space_info->periodic_reclaim))
return;
if (ready != space_info->periodic_reclaim_ready) {
space_info->periodic_reclaim_ready = ready;
if (!ready)
space_info->reclaimable_bytes = 0;
}
}
bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info)
{
bool ret;
if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
return false;
if (!READ_ONCE(space_info->periodic_reclaim))
return false;
spin_lock(&space_info->lock);
ret = space_info->periodic_reclaim_ready;
btrfs_set_periodic_reclaim_ready(space_info, false);
spin_unlock(&space_info->lock);
return ret;
}
int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info)
{
int ret;
int raid;
struct btrfs_space_info *space_info;
list_for_each_entry(space_info, &fs_info->space_info, list) {
if (!btrfs_should_periodic_reclaim(space_info))
continue;
for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) {
ret = do_reclaim_sweep(fs_info, space_info, raid);
if (ret)
return ret;
}
}
return ret;
}

View File

@ -94,6 +94,7 @@ enum btrfs_flush_state {
}; };
struct btrfs_space_info { struct btrfs_space_info {
struct btrfs_fs_info *fs_info;
spinlock_t lock; spinlock_t lock;
u64 total_bytes; /* total bytes in the space, u64 total_bytes; /* total bytes in the space,
@ -165,6 +166,47 @@ struct btrfs_space_info {
struct kobject kobj; struct kobject kobj;
struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES]; struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
/*
* Monotonically increasing counter of block group reclaim attempts
* Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_count
*/
u64 reclaim_count;
/*
* Monotonically increasing counter of reclaimed bytes
* Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_bytes
*/
u64 reclaim_bytes;
/*
* Monotonically increasing counter of reclaim errors
* Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_errors
*/
u64 reclaim_errors;
/*
* If true, use the dynamic relocation threshold, instead of the
* fixed bg_reclaim_threshold.
*/
bool dynamic_reclaim;
/*
* Periodically check all block groups against the reclaim
* threshold in the cleaner thread.
*/
bool periodic_reclaim;
/*
* Periodic reclaim should be a no-op if a space_info hasn't
* freed any space since the last time we tried.
*/
bool periodic_reclaim_ready;
/*
* Net bytes freed or allocated since the last reclaim pass.
*/
s64 reclaimable_bytes;
}; };
struct reserve_ticket { struct reserve_ticket {
@ -247,4 +289,10 @@ void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info); void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes);
void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready);
bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info);
int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info);
int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info);
#endif /* BTRFS_SPACE_INFO_H */ #endif /* BTRFS_SPACE_INFO_H */

View File

@ -74,7 +74,7 @@ bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space
* mapping. And if page->mapping->host is data inode, it's subpage. * mapping. And if page->mapping->host is data inode, it's subpage.
* As we have ruled our sectorsize >= PAGE_SIZE case already. * As we have ruled our sectorsize >= PAGE_SIZE case already.
*/ */
if (!mapping || !mapping->host || is_data_inode(mapping->host)) if (!mapping || !mapping->host || is_data_inode(BTRFS_I(mapping->host)))
return true; return true;
/* /*
@ -242,12 +242,12 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
#define subpage_calc_start_bit(fs_info, folio, name, start, len) \ #define subpage_calc_start_bit(fs_info, folio, name, start, len) \
({ \ ({ \
unsigned int start_bit; \ unsigned int __start_bit; \
\ \
btrfs_subpage_assert(fs_info, folio, start, len); \ btrfs_subpage_assert(fs_info, folio, start, len); \
start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ __start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
start_bit += fs_info->subpage_info->name##_offset; \ __start_bit += fs_info->subpage_info->name##_offset; \
start_bit; \ __start_bit; \
}) })
void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
@ -283,7 +283,7 @@ void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
bool last; bool last;
btrfs_subpage_assert(fs_info, folio, start, len); btrfs_subpage_assert(fs_info, folio, start, len);
is_data = is_data_inode(folio->mapping->host); is_data = is_data_inode(BTRFS_I(folio->mapping->host));
spin_lock_irqsave(&subpage->lock, flags); spin_lock_irqsave(&subpage->lock, flags);
@ -703,19 +703,29 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
* Make sure not only the page dirty bit is cleared, but also subpage dirty bit * Make sure not only the page dirty bit is cleared, but also subpage dirty bit
* is cleared. * is cleared.
*/ */
void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, struct folio *folio) void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{ {
struct btrfs_subpage *subpage = folio_get_private(folio); struct btrfs_subpage *subpage;
unsigned int start_bit;
unsigned int nbits;
unsigned long flags;
if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
return; return;
ASSERT(!folio_test_dirty(folio)); if (!btrfs_is_subpage(fs_info, folio->mapping)) {
if (!btrfs_is_subpage(fs_info, folio->mapping)) ASSERT(!folio_test_dirty(folio));
return; return;
}
ASSERT(folio_test_private(folio) && folio_get_private(folio)); start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len);
ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty)); nbits = len >> fs_info->sectorsize_bits;
subpage = folio_get_private(folio);
ASSERT(subpage);
spin_lock_irqsave(&subpage->lock, flags);
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
spin_unlock_irqrestore(&subpage->lock, flags);
} }
/* /*
@ -765,6 +775,130 @@ void btrfs_folio_unlock_writer(struct btrfs_fs_info *fs_info,
btrfs_folio_end_writer_lock(fs_info, folio, start, len); btrfs_folio_end_writer_lock(fs_info, folio, start, len);
} }
/*
* This is for folio already locked by plain lock_page()/folio_lock(), which
* doesn't have any subpage awareness.
*
* This populates the involved subpage ranges so that subpage helpers can
* properly unlock them.
*/
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len)
{
struct btrfs_subpage *subpage;
unsigned long flags;
unsigned int start_bit;
unsigned int nbits;
int ret;
ASSERT(folio_test_locked(folio));
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping))
return;
subpage = folio_get_private(folio);
start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
nbits = len >> fs_info->sectorsize_bits;
spin_lock_irqsave(&subpage->lock, flags);
/* Target range should not yet be locked. */
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
bitmap_set(subpage->bitmaps, start_bit, nbits);
ret = atomic_add_return(nbits, &subpage->writers);
ASSERT(ret <= fs_info->subpage_info->bitmap_nr_bits);
spin_unlock_irqrestore(&subpage->lock, flags);
}
/*
* Find any subpage writer locked range inside @folio, starting at file offset
* @search_start. The caller should ensure the folio is locked.
*
* Return true and update @found_start_ret and @found_len_ret to the first
* writer locked range.
* Return false if there is no writer locked range.
*/
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 search_start,
u64 *found_start_ret, u32 *found_len_ret)
{
struct btrfs_subpage_info *subpage_info = fs_info->subpage_info;
struct btrfs_subpage *subpage = folio_get_private(folio);
const unsigned int len = PAGE_SIZE - offset_in_page(search_start);
const unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
locked, search_start, len);
const unsigned int locked_bitmap_start = subpage_info->locked_offset;
const unsigned int locked_bitmap_end = locked_bitmap_start +
subpage_info->bitmap_nr_bits;
unsigned long flags;
int first_zero;
int first_set;
bool found = false;
ASSERT(folio_test_locked(folio));
spin_lock_irqsave(&subpage->lock, flags);
first_set = find_next_bit(subpage->bitmaps, locked_bitmap_end, start_bit);
if (first_set >= locked_bitmap_end)
goto out;
found = true;
*found_start_ret = folio_pos(folio) +
((first_set - locked_bitmap_start) << fs_info->sectorsize_bits);
/*
* Since @first_set is ensured to be smaller than locked_bitmap_end
* here, @found_start_ret should be inside the folio.
*/
ASSERT(*found_start_ret < folio_pos(folio) + PAGE_SIZE);
first_zero = find_next_zero_bit(subpage->bitmaps, locked_bitmap_end, first_set);
*found_len_ret = (first_zero - first_set) << fs_info->sectorsize_bits;
out:
spin_unlock_irqrestore(&subpage->lock, flags);
return found;
}
/*
* Unlike btrfs_folio_end_writer_lock() which unlocks a specified subpage range,
* this ends all writer locked ranges of a page.
*
* This is for the locked page of __extent_writepage(), as the locked page
* can contain several locked subpage ranges.
*/
void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct folio *folio)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
u64 folio_start = folio_pos(folio);
u64 cur = folio_start;
ASSERT(folio_test_locked(folio));
if (!btrfs_is_subpage(fs_info, folio->mapping)) {
folio_unlock(folio);
return;
}
/* The page has no new delalloc range locked on it. Just plain unlock. */
if (atomic_read(&subpage->writers) == 0) {
folio_unlock(folio);
return;
}
while (cur < folio_start + PAGE_SIZE) {
u64 found_start;
u32 found_len;
bool found;
bool last;
found = btrfs_subpage_find_writer_locked(fs_info, folio, cur,
&found_start, &found_len);
if (!found)
break;
last = btrfs_subpage_end_and_test_writer(fs_info, folio,
found_start, found_len);
if (last) {
folio_unlock(folio);
break;
}
cur = found_start + found_len;
}
}
#define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \ #define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \
bitmap_cut(dst, subpage->bitmaps, 0, \ bitmap_cut(dst, subpage->bitmaps, 0, \
subpage_info->name##_offset, subpage_info->bitmap_nr_bits) subpage_info->name##_offset, subpage_info->bitmap_nr_bits)
@ -775,7 +909,6 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
struct btrfs_subpage_info *subpage_info = fs_info->subpage_info; struct btrfs_subpage_info *subpage_info = fs_info->subpage_info;
struct btrfs_subpage *subpage; struct btrfs_subpage *subpage;
unsigned long uptodate_bitmap; unsigned long uptodate_bitmap;
unsigned long error_bitmap;
unsigned long dirty_bitmap; unsigned long dirty_bitmap;
unsigned long writeback_bitmap; unsigned long writeback_bitmap;
unsigned long ordered_bitmap; unsigned long ordered_bitmap;
@ -797,10 +930,9 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
dump_page(folio_page(folio, 0), "btrfs subpage dump"); dump_page(folio_page(folio, 0), "btrfs subpage dump");
btrfs_warn(fs_info, btrfs_warn(fs_info,
"start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl error=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
start, len, folio_pos(folio), start, len, folio_pos(folio),
subpage_info->bitmap_nr_bits, &uptodate_bitmap, subpage_info->bitmap_nr_bits, &uptodate_bitmap,
subpage_info->bitmap_nr_bits, &error_bitmap,
subpage_info->bitmap_nr_bits, &dirty_bitmap, subpage_info->bitmap_nr_bits, &dirty_bitmap,
subpage_info->bitmap_nr_bits, &writeback_bitmap, subpage_info->bitmap_nr_bits, &writeback_bitmap,
subpage_info->bitmap_nr_bits, &ordered_bitmap, subpage_info->bitmap_nr_bits, &ordered_bitmap,

View File

@ -112,6 +112,12 @@ int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len); struct folio *folio, u64 start, u32 len);
void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len); struct folio *folio, u64 start, u32 len);
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 search_start,
u64 *found_start_ret, u32 *found_len_ret);
void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct folio *folio);
/* /*
* Template for subpage related operations. * Template for subpage related operations.
@ -156,7 +162,8 @@ DECLARE_BTRFS_SUBPAGE_OPS(checked);
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len); struct folio *folio, u64 start, u32 len);
void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, struct folio *folio); void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
void btrfs_folio_unlock_writer(struct btrfs_fs_info *fs_info, void btrfs_folio_unlock_writer(struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len); struct folio *folio, u64 start, u32 len);
void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,

View File

@ -34,6 +34,7 @@
#include "disk-io.h" #include "disk-io.h"
#include "transaction.h" #include "transaction.h"
#include "btrfs_inode.h" #include "btrfs_inode.h"
#include "direct-io.h"
#include "props.h" #include "props.h"
#include "xattr.h" #include "xattr.h"
#include "bio.h" #include "bio.h"
@ -125,9 +126,6 @@ enum {
Opt_rescue, Opt_rescue,
Opt_usebackuproot, Opt_usebackuproot,
Opt_nologreplay, Opt_nologreplay,
Opt_ignorebadroots,
Opt_ignoredatacsums,
Opt_rescue_all,
/* Debugging options */ /* Debugging options */
Opt_enospc_debug, Opt_enospc_debug,
@ -178,6 +176,8 @@ enum {
Opt_rescue_nologreplay, Opt_rescue_nologreplay,
Opt_rescue_ignorebadroots, Opt_rescue_ignorebadroots,
Opt_rescue_ignoredatacsums, Opt_rescue_ignoredatacsums,
Opt_rescue_ignoremetacsums,
Opt_rescue_ignoresuperflags,
Opt_rescue_parameter_all, Opt_rescue_parameter_all,
}; };
@ -187,7 +187,11 @@ static const struct constant_table btrfs_parameter_rescue[] = {
{ "ignorebadroots", Opt_rescue_ignorebadroots }, { "ignorebadroots", Opt_rescue_ignorebadroots },
{ "ibadroots", Opt_rescue_ignorebadroots }, { "ibadroots", Opt_rescue_ignorebadroots },
{ "ignoredatacsums", Opt_rescue_ignoredatacsums }, { "ignoredatacsums", Opt_rescue_ignoredatacsums },
{ "ignoremetacsums", Opt_rescue_ignoremetacsums},
{ "ignoresuperflags", Opt_rescue_ignoresuperflags},
{ "idatacsums", Opt_rescue_ignoredatacsums }, { "idatacsums", Opt_rescue_ignoredatacsums },
{ "imetacsums", Opt_rescue_ignoremetacsums},
{ "isuperflags", Opt_rescue_ignoresuperflags},
{ "all", Opt_rescue_parameter_all }, { "all", Opt_rescue_parameter_all },
{} {}
}; };
@ -573,8 +577,16 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_rescue_ignoredatacsums: case Opt_rescue_ignoredatacsums:
btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS); btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS);
break; break;
case Opt_rescue_ignoremetacsums:
btrfs_set_opt(ctx->mount_opt, IGNOREMETACSUMS);
break;
case Opt_rescue_ignoresuperflags:
btrfs_set_opt(ctx->mount_opt, IGNORESUPERFLAGS);
break;
case Opt_rescue_parameter_all: case Opt_rescue_parameter_all:
btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS); btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS);
btrfs_set_opt(ctx->mount_opt, IGNOREMETACSUMS);
btrfs_set_opt(ctx->mount_opt, IGNORESUPERFLAGS);
btrfs_set_opt(ctx->mount_opt, IGNOREBADROOTS); btrfs_set_opt(ctx->mount_opt, IGNOREBADROOTS);
btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
break; break;
@ -629,7 +641,7 @@ static void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info)
btrfs_clear_opt(fs_info->mount_opt, NOSPACECACHE); btrfs_clear_opt(fs_info->mount_opt, NOSPACECACHE);
} }
static bool check_ro_option(struct btrfs_fs_info *fs_info, static bool check_ro_option(const struct btrfs_fs_info *fs_info,
unsigned long mount_opt, unsigned long opt, unsigned long mount_opt, unsigned long opt,
const char *opt_name) const char *opt_name)
{ {
@ -641,7 +653,7 @@ static bool check_ro_option(struct btrfs_fs_info *fs_info,
return false; return false;
} }
bool btrfs_check_options(struct btrfs_fs_info *info, unsigned long *mount_opt, bool btrfs_check_options(const struct btrfs_fs_info *info, unsigned long *mount_opt,
unsigned long flags) unsigned long flags)
{ {
bool ret = true; bool ret = true;
@ -649,7 +661,9 @@ bool btrfs_check_options(struct btrfs_fs_info *info, unsigned long *mount_opt,
if (!(flags & SB_RDONLY) && if (!(flags & SB_RDONLY) &&
(check_ro_option(info, *mount_opt, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") || (check_ro_option(info, *mount_opt, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") ||
check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") || check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") ||
check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums"))) check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums") ||
check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREMETACSUMS, "ignoremetacsums") ||
check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNORESUPERFLAGS, "ignoresuperflags")))
ret = false; ret = false;
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) && if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
@ -949,7 +963,7 @@ static int btrfs_fill_super(struct super_block *sb,
return err; return err;
} }
inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
err = PTR_ERR(inode); err = PTR_ERR(inode);
btrfs_handle_fs_error(fs_info, err, NULL); btrfs_handle_fs_error(fs_info, err, NULL);
@ -983,7 +997,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0; return 0;
} }
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
trans = btrfs_attach_transaction_barrier(root); trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
@ -1065,6 +1079,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
print_rescue_option(seq, "ignorebadroots", &printed); print_rescue_option(seq, "ignorebadroots", &printed);
if (btrfs_test_opt(info, IGNOREDATACSUMS)) if (btrfs_test_opt(info, IGNOREDATACSUMS))
print_rescue_option(seq, "ignoredatacsums", &printed); print_rescue_option(seq, "ignoredatacsums", &printed);
if (btrfs_test_opt(info, IGNOREMETACSUMS))
print_rescue_option(seq, "ignoremetacsums", &printed);
if (btrfs_test_opt(info, IGNORESUPERFLAGS))
print_rescue_option(seq, "ignoresuperflags", &printed);
if (btrfs_test_opt(info, FLUSHONCOMMIT)) if (btrfs_test_opt(info, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit"); seq_puts(seq, ",flushoncommit");
if (btrfs_test_opt(info, DISCARD_SYNC)) if (btrfs_test_opt(info, DISCARD_SYNC))
@ -1422,6 +1440,8 @@ static void btrfs_emit_options(struct btrfs_fs_info *info,
btrfs_info_if_set(info, old, USEBACKUPROOT, "trying to use backup root at mount time"); btrfs_info_if_set(info, old, USEBACKUPROOT, "trying to use backup root at mount time");
btrfs_info_if_set(info, old, IGNOREBADROOTS, "ignoring bad roots"); btrfs_info_if_set(info, old, IGNOREBADROOTS, "ignoring bad roots");
btrfs_info_if_set(info, old, IGNOREDATACSUMS, "ignoring data csums"); btrfs_info_if_set(info, old, IGNOREDATACSUMS, "ignoring data csums");
btrfs_info_if_set(info, old, IGNOREMETACSUMS, "ignoring meta csums");
btrfs_info_if_set(info, old, IGNORESUPERFLAGS, "ignoring unknown super block flags");
btrfs_info_if_unset(info, old, NODATACOW, "setting datacow"); btrfs_info_if_unset(info, old, NODATACOW, "setting datacow");
btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations"); btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations");
@ -2257,9 +2277,7 @@ out:
static int btrfs_freeze(struct super_block *sb) static int btrfs_freeze(struct super_block *sb)
{ {
struct btrfs_trans_handle *trans;
struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root = fs_info->tree_root;
set_bit(BTRFS_FS_FROZEN, &fs_info->flags); set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
/* /*
@ -2268,14 +2286,7 @@ static int btrfs_freeze(struct super_block *sb)
* we want to avoid on a frozen filesystem), or do the commit * we want to avoid on a frozen filesystem), or do the commit
* ourselves. * ourselves.
*/ */
trans = btrfs_attach_transaction_barrier(root); return btrfs_commit_current_transaction(fs_info->tree_root);
if (IS_ERR(trans)) {
/* no transaction, don't bother */
if (PTR_ERR(trans) == -ENOENT)
return 0;
return PTR_ERR(trans);
}
return btrfs_commit_transaction(trans);
} }
static int check_dev_super(struct btrfs_device *dev) static int check_dev_super(struct btrfs_device *dev)
@ -2498,6 +2509,9 @@ static const struct init_sequence mod_init_seq[] = {
}, { }, {
.init_func = btrfs_init_cachep, .init_func = btrfs_init_cachep,
.exit_func = btrfs_destroy_cachep, .exit_func = btrfs_destroy_cachep,
}, {
.init_func = btrfs_init_dio,
.exit_func = btrfs_destroy_dio,
}, { }, {
.init_func = btrfs_transaction_init, .init_func = btrfs_transaction_init,
.exit_func = btrfs_transaction_exit, .exit_func = btrfs_transaction_exit,
@ -2590,6 +2604,7 @@ static int __init init_btrfs_fs(void)
late_initcall(init_btrfs_fs); late_initcall(init_btrfs_fs);
module_exit(exit_btrfs_fs) module_exit(exit_btrfs_fs)
MODULE_DESCRIPTION("B-Tree File System (BTRFS)");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_SOFTDEP("pre: crc32c"); MODULE_SOFTDEP("pre: crc32c");
MODULE_SOFTDEP("pre: xxhash64"); MODULE_SOFTDEP("pre: xxhash64");

View File

@ -10,7 +10,7 @@
struct super_block; struct super_block;
struct btrfs_fs_info; struct btrfs_fs_info;
bool btrfs_check_options(struct btrfs_fs_info *info, unsigned long *mount_opt, bool btrfs_check_options(const struct btrfs_fs_info *info, unsigned long *mount_opt,
unsigned long flags); unsigned long flags);
int btrfs_sync_fs(struct super_block *sb, int wait); int btrfs_sync_fs(struct super_block *sb, int wait);
char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,

View File

@ -385,6 +385,8 @@ static const char *rescue_opts[] = {
"nologreplay", "nologreplay",
"ignorebadroots", "ignorebadroots",
"ignoredatacsums", "ignoredatacsums",
"ignoremetacsums",
"ignoresuperflags",
"all", "all",
}; };
@ -894,6 +896,9 @@ SPACE_INFO_ATTR(bytes_readonly);
SPACE_INFO_ATTR(bytes_zone_unusable); SPACE_INFO_ATTR(bytes_zone_unusable);
SPACE_INFO_ATTR(disk_used); SPACE_INFO_ATTR(disk_used);
SPACE_INFO_ATTR(disk_total); SPACE_INFO_ATTR(disk_total);
SPACE_INFO_ATTR(reclaim_count);
SPACE_INFO_ATTR(reclaim_bytes);
SPACE_INFO_ATTR(reclaim_errors);
BTRFS_ATTR_RW(space_info, chunk_size, btrfs_chunk_size_show, btrfs_chunk_size_store); BTRFS_ATTR_RW(space_info, chunk_size, btrfs_chunk_size_show, btrfs_chunk_size_store);
BTRFS_ATTR(space_info, size_classes, btrfs_size_classes_show); BTRFS_ATTR(space_info, size_classes, btrfs_size_classes_show);
@ -902,8 +907,12 @@ static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj,
char *buf) char *buf)
{ {
struct btrfs_space_info *space_info = to_space_info(kobj); struct btrfs_space_info *space_info = to_space_info(kobj);
ssize_t ret;
return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold)); spin_lock(&space_info->lock);
ret = sysfs_emit(buf, "%d\n", btrfs_calc_reclaim_threshold(space_info));
spin_unlock(&space_info->lock);
return ret;
} }
static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj, static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj,
@ -914,6 +923,9 @@ static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj,
int thresh; int thresh;
int ret; int ret;
if (READ_ONCE(space_info->dynamic_reclaim))
return -EINVAL;
ret = kstrtoint(buf, 10, &thresh); ret = kstrtoint(buf, 10, &thresh);
if (ret) if (ret)
return ret; return ret;
@ -930,6 +942,72 @@ BTRFS_ATTR_RW(space_info, bg_reclaim_threshold,
btrfs_sinfo_bg_reclaim_threshold_show, btrfs_sinfo_bg_reclaim_threshold_show,
btrfs_sinfo_bg_reclaim_threshold_store); btrfs_sinfo_bg_reclaim_threshold_store);
static ssize_t btrfs_sinfo_dynamic_reclaim_show(struct kobject *kobj,
struct kobj_attribute *a,
char *buf)
{
struct btrfs_space_info *space_info = to_space_info(kobj);
return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->dynamic_reclaim));
}
static ssize_t btrfs_sinfo_dynamic_reclaim_store(struct kobject *kobj,
struct kobj_attribute *a,
const char *buf, size_t len)
{
struct btrfs_space_info *space_info = to_space_info(kobj);
int dynamic_reclaim;
int ret;
ret = kstrtoint(buf, 10, &dynamic_reclaim);
if (ret)
return ret;
if (dynamic_reclaim < 0)
return -EINVAL;
WRITE_ONCE(space_info->dynamic_reclaim, dynamic_reclaim != 0);
return len;
}
BTRFS_ATTR_RW(space_info, dynamic_reclaim,
btrfs_sinfo_dynamic_reclaim_show,
btrfs_sinfo_dynamic_reclaim_store);
static ssize_t btrfs_sinfo_periodic_reclaim_show(struct kobject *kobj,
struct kobj_attribute *a,
char *buf)
{
struct btrfs_space_info *space_info = to_space_info(kobj);
return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->periodic_reclaim));
}
static ssize_t btrfs_sinfo_periodic_reclaim_store(struct kobject *kobj,
struct kobj_attribute *a,
const char *buf, size_t len)
{
struct btrfs_space_info *space_info = to_space_info(kobj);
int periodic_reclaim;
int ret;
ret = kstrtoint(buf, 10, &periodic_reclaim);
if (ret)
return ret;
if (periodic_reclaim < 0)
return -EINVAL;
WRITE_ONCE(space_info->periodic_reclaim, periodic_reclaim != 0);
return len;
}
BTRFS_ATTR_RW(space_info, periodic_reclaim,
btrfs_sinfo_periodic_reclaim_show,
btrfs_sinfo_periodic_reclaim_store);
/* /*
* Allocation information about block group types. * Allocation information about block group types.
* *
@ -947,8 +1025,13 @@ static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(space_info, disk_used), BTRFS_ATTR_PTR(space_info, disk_used),
BTRFS_ATTR_PTR(space_info, disk_total), BTRFS_ATTR_PTR(space_info, disk_total),
BTRFS_ATTR_PTR(space_info, bg_reclaim_threshold), BTRFS_ATTR_PTR(space_info, bg_reclaim_threshold),
BTRFS_ATTR_PTR(space_info, dynamic_reclaim),
BTRFS_ATTR_PTR(space_info, chunk_size), BTRFS_ATTR_PTR(space_info, chunk_size),
BTRFS_ATTR_PTR(space_info, size_classes), BTRFS_ATTR_PTR(space_info, size_classes),
BTRFS_ATTR_PTR(space_info, reclaim_count),
BTRFS_ATTR_PTR(space_info, reclaim_bytes),
BTRFS_ATTR_PTR(space_info, reclaim_errors),
BTRFS_ATTR_PTR(space_info, periodic_reclaim),
#ifdef CONFIG_BTRFS_DEBUG #ifdef CONFIG_BTRFS_DEBUG
BTRFS_ATTR_PTR(space_info, force_chunk_alloc), BTRFS_ATTR_PTR(space_info, force_chunk_alloc),
#endif #endif

View File

@ -61,10 +61,7 @@ struct inode *btrfs_new_test_inode(void)
return NULL; return NULL;
inode->i_mode = S_IFREG; inode->i_mode = S_IFREG;
inode->i_ino = BTRFS_FIRST_FREE_OBJECTID; btrfs_set_inode_number(BTRFS_I(inode), BTRFS_FIRST_FREE_OBJECTID);
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
inode_init_owner(&nop_mnt_idmap, inode, NULL, S_IFREG); inode_init_owner(&nop_mnt_idmap, inode, NULL, S_IFREG);
return inode; return inode;

View File

@ -19,8 +19,8 @@ static int free_extent_map_tree(struct btrfs_inode *inode)
int ret = 0; int ret = 0;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) { while (!RB_EMPTY_ROOT(&em_tree->root)) {
node = rb_first_cached(&em_tree->map); node = rb_first(&em_tree->root);
em = rb_entry(node, struct extent_map, rb_node); em = rb_entry(node, struct extent_map, rb_node);
remove_extent_mapping(inode, em); remove_extent_mapping(inode, em);
@ -28,9 +28,10 @@ static int free_extent_map_tree(struct btrfs_inode *inode)
if (refcount_read(&em->refs) != 1) { if (refcount_read(&em->refs) != 1) {
ret = -EINVAL; ret = -EINVAL;
test_err( test_err(
"em leak: em (start %llu len %llu block_start %llu block_len %llu) refs %d", "em leak: em (start %llu len %llu disk_bytenr %llu disk_num_bytes %llu offset %llu) refs %d",
em->start, em->len, em->block_start, em->start, em->len, em->disk_bytenr,
em->block_len, refcount_read(&em->refs)); em->disk_num_bytes, em->offset,
refcount_read(&em->refs));
refcount_set(&em->refs, 1); refcount_set(&em->refs, 1);
} }
@ -76,8 +77,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
/* Add [0, 16K) */ /* Add [0, 16K) */
em->start = 0; em->start = 0;
em->len = SZ_16K; em->len = SZ_16K;
em->block_start = 0; em->disk_bytenr = 0;
em->block_len = SZ_16K; em->disk_num_bytes = SZ_16K;
em->ram_bytes = SZ_16K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -97,8 +99,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
em->start = SZ_16K; em->start = SZ_16K;
em->len = SZ_4K; em->len = SZ_4K;
em->block_start = SZ_32K; /* avoid merging */ em->disk_bytenr = SZ_32K; /* avoid merging */
em->block_len = SZ_4K; em->disk_num_bytes = SZ_4K;
em->ram_bytes = SZ_4K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -118,8 +121,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
/* Add [0, 8K), should return [0, 16K) instead. */ /* Add [0, 8K), should return [0, 16K) instead. */
em->start = start; em->start = start;
em->len = len; em->len = len;
em->block_start = start; em->disk_bytenr = start;
em->block_len = len; em->disk_num_bytes = len;
em->ram_bytes = len;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -134,11 +138,11 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
goto out; goto out;
} }
if (em->start != 0 || extent_map_end(em) != SZ_16K || if (em->start != 0 || extent_map_end(em) != SZ_16K ||
em->block_start != 0 || em->block_len != SZ_16K) { em->disk_bytenr != 0 || em->disk_num_bytes != SZ_16K) {
test_err( test_err(
"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu", "case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu disk_bytenr %llu disk_num_bytes %llu",
start, start + len, ret, em->start, em->len, start, start + len, ret, em->start, em->len,
em->block_start, em->block_len); em->disk_bytenr, em->disk_num_bytes);
ret = -EINVAL; ret = -EINVAL;
} }
free_extent_map(em); free_extent_map(em);
@ -172,8 +176,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
/* Add [0, 1K) */ /* Add [0, 1K) */
em->start = 0; em->start = 0;
em->len = SZ_1K; em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE; em->disk_bytenr = EXTENT_MAP_INLINE;
em->block_len = (u64)-1; em->disk_num_bytes = 0;
em->ram_bytes = SZ_1K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -193,8 +198,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
em->start = SZ_4K; em->start = SZ_4K;
em->len = SZ_4K; em->len = SZ_4K;
em->block_start = SZ_4K; em->disk_bytenr = SZ_4K;
em->block_len = SZ_4K; em->disk_num_bytes = SZ_4K;
em->ram_bytes = SZ_4K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -214,8 +220,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
/* Add [0, 1K) */ /* Add [0, 1K) */
em->start = 0; em->start = 0;
em->len = SZ_1K; em->len = SZ_1K;
em->block_start = EXTENT_MAP_INLINE; em->disk_bytenr = EXTENT_MAP_INLINE;
em->block_len = (u64)-1; em->disk_num_bytes = 0;
em->ram_bytes = SZ_1K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -229,11 +236,10 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
goto out; goto out;
} }
if (em->start != 0 || extent_map_end(em) != SZ_1K || if (em->start != 0 || extent_map_end(em) != SZ_1K ||
em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1) { em->disk_bytenr != EXTENT_MAP_INLINE) {
test_err( test_err(
"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu", "case2 [0 1K]: ret %d return a wrong em (start %llu len %llu disk_bytenr %llu",
ret, em->start, em->len, em->block_start, ret, em->start, em->len, em->disk_bytenr);
em->block_len);
ret = -EINVAL; ret = -EINVAL;
} }
free_extent_map(em); free_extent_map(em);
@ -263,8 +269,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
/* Add [4K, 8K) */ /* Add [4K, 8K) */
em->start = SZ_4K; em->start = SZ_4K;
em->len = SZ_4K; em->len = SZ_4K;
em->block_start = SZ_4K; em->disk_bytenr = SZ_4K;
em->block_len = SZ_4K; em->disk_num_bytes = SZ_4K;
em->ram_bytes = SZ_4K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -284,8 +291,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
/* Add [0, 16K) */ /* Add [0, 16K) */
em->start = 0; em->start = 0;
em->len = SZ_16K; em->len = SZ_16K;
em->block_start = 0; em->disk_bytenr = 0;
em->block_len = SZ_16K; em->disk_num_bytes = SZ_16K;
em->ram_bytes = SZ_16K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, start, len); ret = btrfs_add_extent_mapping(inode, &em, start, len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -305,11 +313,11 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
* em->start. * em->start.
*/ */
if (start < em->start || start + len > extent_map_end(em) || if (start < em->start || start + len > extent_map_end(em) ||
em->start != em->block_start || em->len != em->block_len) { em->start != extent_map_block_start(em)) {
test_err( test_err(
"case3 [%llu %llu): ret %d em (start %llu len %llu block_start %llu block_len %llu)", "case3 [%llu %llu): ret %d em (start %llu len %llu disk_bytenr %llu block_len %llu)",
start, start + len, ret, em->start, em->len, start, start + len, ret, em->start, em->len,
em->block_start, em->block_len); em->disk_bytenr, em->disk_num_bytes);
ret = -EINVAL; ret = -EINVAL;
} }
free_extent_map(em); free_extent_map(em);
@ -370,8 +378,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
/* Add [0K, 8K) */ /* Add [0K, 8K) */
em->start = 0; em->start = 0;
em->len = SZ_8K; em->len = SZ_8K;
em->block_start = 0; em->disk_bytenr = 0;
em->block_len = SZ_8K; em->disk_num_bytes = SZ_8K;
em->ram_bytes = SZ_8K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -391,8 +400,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
/* Add [8K, 32K) */ /* Add [8K, 32K) */
em->start = SZ_8K; em->start = SZ_8K;
em->len = 24 * SZ_1K; em->len = 24 * SZ_1K;
em->block_start = SZ_16K; /* avoid merging */ em->disk_bytenr = SZ_16K; /* avoid merging */
em->block_len = 24 * SZ_1K; em->disk_num_bytes = 24 * SZ_1K;
em->ram_bytes = 24 * SZ_1K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -411,8 +421,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
/* Add [0K, 32K) */ /* Add [0K, 32K) */
em->start = 0; em->start = 0;
em->len = SZ_32K; em->len = SZ_32K;
em->block_start = 0; em->disk_bytenr = 0;
em->block_len = SZ_32K; em->disk_num_bytes = SZ_32K;
em->ram_bytes = SZ_32K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, start, len); ret = btrfs_add_extent_mapping(inode, &em, start, len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -429,9 +440,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
} }
if (start < em->start || start + len > extent_map_end(em)) { if (start < em->start || start + len > extent_map_end(em)) {
test_err( test_err(
"case4 [%llu %llu): ret %d, added wrong em (start %llu len %llu block_start %llu block_len %llu)", "case4 [%llu %llu): ret %d, added wrong em (start %llu len %llu disk_bytenr %llu disk_num_bytes %llu)",
start, start + len, ret, em->start, em->len, em->block_start, start, start + len, ret, em->start, em->len,
em->block_len); em->disk_bytenr, em->disk_num_bytes);
ret = -EINVAL; ret = -EINVAL;
} }
free_extent_map(em); free_extent_map(em);
@ -495,8 +506,9 @@ static int add_compressed_extent(struct btrfs_inode *inode,
em->start = start; em->start = start;
em->len = len; em->len = len;
em->block_start = block_start; em->disk_bytenr = block_start;
em->block_len = SZ_4K; em->disk_num_bytes = SZ_4K;
em->ram_bytes = len;
em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; em->flags |= EXTENT_FLAG_COMPRESS_ZLIB;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
@ -551,7 +563,7 @@ static int validate_range(struct extent_map_tree *em_tree, int index)
struct rb_node *n; struct rb_node *n;
int i; int i;
for (i = 0, n = rb_first_cached(&em_tree->map); for (i = 0, n = rb_first(&em_tree->root);
valid_ranges[index][i].len && n; valid_ranges[index][i].len && n;
i++, n = rb_next(n)) { i++, n = rb_next(n)) {
struct extent_map *entry = rb_entry(n, struct extent_map, rb_node); struct extent_map *entry = rb_entry(n, struct extent_map, rb_node);
@ -716,8 +728,9 @@ static int test_case_6(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
em->start = SZ_4K; em->start = SZ_4K;
em->len = SZ_4K; em->len = SZ_4K;
em->block_start = SZ_16K; em->disk_bytenr = SZ_16K;
em->block_len = SZ_16K; em->disk_num_bytes = SZ_16K;
em->ram_bytes = SZ_16K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, 0, SZ_8K); ret = btrfs_add_extent_mapping(inode, &em, 0, SZ_8K);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -769,9 +782,10 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
/* [0, 16K), pinned */ /* [0, 16K), pinned */
em->start = 0; em->start = 0;
em->len = SZ_16K; em->len = SZ_16K;
em->block_start = 0; em->disk_bytenr = 0;
em->block_len = SZ_4K; em->disk_num_bytes = SZ_4K;
em->flags |= EXTENT_FLAG_PINNED; em->ram_bytes = SZ_16K;
em->flags |= (EXTENT_FLAG_PINNED | EXTENT_FLAG_COMPRESS_ZLIB);
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -791,8 +805,9 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
/* [32K, 48K), not pinned */ /* [32K, 48K), not pinned */
em->start = SZ_32K; em->start = SZ_32K;
em->len = SZ_16K; em->len = SZ_16K;
em->block_start = SZ_32K; em->disk_bytenr = SZ_32K;
em->block_len = SZ_16K; em->disk_num_bytes = SZ_16K;
em->ram_bytes = SZ_16K;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
@ -855,8 +870,9 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
goto out; goto out;
} }
if (em->block_start != SZ_32K + SZ_4K) { if (extent_map_block_start(em) != SZ_32K + SZ_4K) {
test_err("em->block_start is %llu, expected 36K", em->block_start); test_err("em->block_start is %llu, expected 36K",
extent_map_block_start(em));
goto out; goto out;
} }

View File

@ -117,7 +117,7 @@ static void setup_file_extents(struct btrfs_root *root, u32 sectorsize)
/* Now for a regular extent */ /* Now for a regular extent */
insert_extent(root, offset, sectorsize - 1, sectorsize - 1, 0, insert_extent(root, offset, sectorsize - 1, sectorsize - 1, 0,
disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot); disk_bytenr, sectorsize - 1, BTRFS_FILE_EXTENT_REG, 0, slot);
slot++; slot++;
disk_bytenr += sectorsize; disk_bytenr += sectorsize;
offset += sectorsize - 1; offset += sectorsize - 1;
@ -264,8 +264,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start != EXTENT_MAP_HOLE) { if (em->disk_bytenr != EXTENT_MAP_HOLE) {
test_err("expected a hole, got %llu", em->block_start); test_err("expected a hole, got %llu", em->disk_bytenr);
goto out; goto out;
} }
free_extent_map(em); free_extent_map(em);
@ -283,8 +283,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start != EXTENT_MAP_INLINE) { if (em->disk_bytenr != EXTENT_MAP_INLINE) {
test_err("expected an inline, got %llu", em->block_start); test_err("expected an inline, got %llu", em->disk_bytenr);
goto out; goto out;
} }
@ -321,8 +321,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start != EXTENT_MAP_HOLE) { if (em->disk_bytenr != EXTENT_MAP_HOLE) {
test_err("expected a hole, got %llu", em->block_start); test_err("expected a hole, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != 4) { if (em->start != offset || em->len != 4) {
@ -344,8 +344,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != sectorsize - 1) { if (em->start != offset || em->len != sectorsize - 1) {
@ -358,9 +358,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("unexpected flags set, want 0 have %u", em->flags); test_err("unexpected flags set, want 0 have %u", em->flags);
goto out; goto out;
} }
if (em->orig_start != em->start) { if (em->offset != 0) {
test_err("wrong orig offset, want %llu, have %llu", em->start, test_err("wrong offset, want 0, have %llu", em->offset);
em->orig_start);
goto out; goto out;
} }
offset = em->start + em->len; offset = em->start + em->len;
@ -372,8 +371,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != sectorsize) { if (em->start != offset || em->len != sectorsize) {
@ -386,12 +385,11 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("unexpected flags set, want 0 have %u", em->flags); test_err("unexpected flags set, want 0 have %u", em->flags);
goto out; goto out;
} }
if (em->orig_start != em->start) { if (em->offset != 0) {
test_err("wrong orig offset, want %llu, have %llu", em->start, test_err("wrong offset, want 0, have %llu", em->offset);
em->orig_start);
goto out; goto out;
} }
disk_bytenr = em->block_start; disk_bytenr = extent_map_block_start(em);
orig_start = em->start; orig_start = em->start;
offset = em->start + em->len; offset = em->start + em->len;
free_extent_map(em); free_extent_map(em);
@ -401,8 +399,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start != EXTENT_MAP_HOLE) { if (em->disk_bytenr != EXTENT_MAP_HOLE) {
test_err("expected a hole, got %llu", em->block_start); test_err("expected a hole, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != sectorsize) { if (em->start != offset || em->len != sectorsize) {
@ -423,8 +421,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != 2 * sectorsize) { if (em->start != offset || em->len != 2 * sectorsize) {
@ -437,15 +435,15 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("unexpected flags set, want 0 have %u", em->flags); test_err("unexpected flags set, want 0 have %u", em->flags);
goto out; goto out;
} }
if (em->orig_start != orig_start) { if (em->start - em->offset != orig_start) {
test_err("wrong orig offset, want %llu, have %llu", test_err("wrong offset, em->start=%llu em->offset=%llu orig_start=%llu",
orig_start, em->orig_start); em->start, em->offset, orig_start);
goto out; goto out;
} }
disk_bytenr += (em->start - orig_start); disk_bytenr += (em->start - orig_start);
if (em->block_start != disk_bytenr) { if (extent_map_block_start(em) != disk_bytenr) {
test_err("wrong block start, want %llu, have %llu", test_err("wrong block start, want %llu, have %llu",
disk_bytenr, em->block_start); disk_bytenr, extent_map_block_start(em));
goto out; goto out;
} }
offset = em->start + em->len; offset = em->start + em->len;
@ -457,8 +455,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != sectorsize) { if (em->start != offset || em->len != sectorsize) {
@ -472,9 +470,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
prealloc_only, em->flags); prealloc_only, em->flags);
goto out; goto out;
} }
if (em->orig_start != em->start) { if (em->offset != 0) {
test_err("wrong orig offset, want %llu, have %llu", em->start, test_err("wrong offset, want 0, have %llu", em->offset);
em->orig_start);
goto out; goto out;
} }
offset = em->start + em->len; offset = em->start + em->len;
@ -486,8 +483,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != sectorsize) { if (em->start != offset || em->len != sectorsize) {
@ -501,12 +498,11 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
prealloc_only, em->flags); prealloc_only, em->flags);
goto out; goto out;
} }
if (em->orig_start != em->start) { if (em->offset != 0) {
test_err("wrong orig offset, want %llu, have %llu", em->start, test_err("wrong offset, want 0, have %llu", em->offset);
em->orig_start);
goto out; goto out;
} }
disk_bytenr = em->block_start; disk_bytenr = extent_map_block_start(em);
orig_start = em->start; orig_start = em->start;
offset = em->start + em->len; offset = em->start + em->len;
free_extent_map(em); free_extent_map(em);
@ -516,8 +512,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_HOLE) { if (em->disk_bytenr >= EXTENT_MAP_HOLE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != sectorsize) { if (em->start != offset || em->len != sectorsize) {
@ -530,15 +526,14 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("unexpected flags set, want 0 have %u", em->flags); test_err("unexpected flags set, want 0 have %u", em->flags);
goto out; goto out;
} }
if (em->orig_start != orig_start) { if (em->start - em->offset != orig_start) {
test_err("unexpected orig offset, wanted %llu, have %llu", test_err("unexpected offset, wanted %llu, have %llu",
orig_start, em->orig_start); em->start - orig_start, em->offset);
goto out; goto out;
} }
if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) { if (extent_map_block_start(em) != disk_bytenr + em->offset) {
test_err("unexpected block start, wanted %llu, have %llu", test_err("unexpected block start, wanted %llu, have %llu",
disk_bytenr + (em->start - em->orig_start), disk_bytenr + em->offset, extent_map_block_start(em));
em->block_start);
goto out; goto out;
} }
offset = em->start + em->len; offset = em->start + em->len;
@ -549,8 +544,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != 2 * sectorsize) { if (em->start != offset || em->len != 2 * sectorsize) {
@ -564,15 +559,14 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
prealloc_only, em->flags); prealloc_only, em->flags);
goto out; goto out;
} }
if (em->orig_start != orig_start) { if (em->start - em->offset != orig_start) {
test_err("wrong orig offset, want %llu, have %llu", orig_start, test_err("wrong offset, em->start=%llu em->offset=%llu orig_start=%llu",
em->orig_start); em->start, em->offset, orig_start);
goto out; goto out;
} }
if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) { if (extent_map_block_start(em) != disk_bytenr + em->offset) {
test_err("unexpected block start, wanted %llu, have %llu", test_err("unexpected block start, wanted %llu, have %llu",
disk_bytenr + (em->start - em->orig_start), disk_bytenr + em->offset, extent_map_block_start(em));
em->block_start);
goto out; goto out;
} }
offset = em->start + em->len; offset = em->start + em->len;
@ -584,8 +578,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != 2 * sectorsize) { if (em->start != offset || em->len != 2 * sectorsize) {
@ -599,9 +593,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
compressed_only, em->flags); compressed_only, em->flags);
goto out; goto out;
} }
if (em->orig_start != em->start) { if (em->offset != 0) {
test_err("wrong orig offset, want %llu, have %llu", test_err("wrong offset, want 0, have %llu", em->offset);
em->start, em->orig_start);
goto out; goto out;
} }
if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) { if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
@ -618,8 +611,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != sectorsize) { if (em->start != offset || em->len != sectorsize) {
@ -633,9 +626,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
compressed_only, em->flags); compressed_only, em->flags);
goto out; goto out;
} }
if (em->orig_start != em->start) { if (em->offset != 0) {
test_err("wrong orig offset, want %llu, have %llu", test_err("wrong offset, want 0, have %llu", em->offset);
em->start, em->orig_start);
goto out; goto out;
} }
if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) { if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
@ -643,7 +635,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
BTRFS_COMPRESS_ZLIB, extent_map_compression(em)); BTRFS_COMPRESS_ZLIB, extent_map_compression(em));
goto out; goto out;
} }
disk_bytenr = em->block_start; disk_bytenr = extent_map_block_start(em);
orig_start = em->start; orig_start = em->start;
offset = em->start + em->len; offset = em->start + em->len;
free_extent_map(em); free_extent_map(em);
@ -653,8 +645,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != sectorsize) { if (em->start != offset || em->len != sectorsize) {
@ -667,9 +659,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("unexpected flags set, want 0 have %u", em->flags); test_err("unexpected flags set, want 0 have %u", em->flags);
goto out; goto out;
} }
if (em->orig_start != em->start) { if (em->offset != 0) {
test_err("wrong orig offset, want %llu, have %llu", em->start, test_err("wrong offset, want 0, have %llu", em->offset);
em->orig_start);
goto out; goto out;
} }
offset = em->start + em->len; offset = em->start + em->len;
@ -680,9 +671,9 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start != disk_bytenr) { if (extent_map_block_start(em) != disk_bytenr) {
test_err("block start does not match, want %llu got %llu", test_err("block start does not match, want %llu got %llu",
disk_bytenr, em->block_start); disk_bytenr, extent_map_block_start(em));
goto out; goto out;
} }
if (em->start != offset || em->len != 2 * sectorsize) { if (em->start != offset || em->len != 2 * sectorsize) {
@ -696,9 +687,9 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
compressed_only, em->flags); compressed_only, em->flags);
goto out; goto out;
} }
if (em->orig_start != orig_start) { if (em->start - em->offset != orig_start) {
test_err("wrong orig offset, want %llu, have %llu", test_err("wrong offset, em->start=%llu em->offset=%llu orig_start=%llu",
em->start, orig_start); em->start, em->offset, orig_start);
goto out; goto out;
} }
if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) { if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
@ -715,8 +706,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != sectorsize) { if (em->start != offset || em->len != sectorsize) {
@ -729,9 +720,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("unexpected flags set, want 0 have %u", em->flags); test_err("unexpected flags set, want 0 have %u", em->flags);
goto out; goto out;
} }
if (em->orig_start != em->start) { if (em->offset != 0) {
test_err("wrong orig offset, want %llu, have %llu", em->start, test_err("wrong offset, want 0, have %llu", em->offset);
em->orig_start);
goto out; goto out;
} }
offset = em->start + em->len; offset = em->start + em->len;
@ -742,8 +732,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start != EXTENT_MAP_HOLE) { if (em->disk_bytenr != EXTENT_MAP_HOLE) {
test_err("expected a hole extent, got %llu", em->block_start); test_err("expected a hole extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
/* /*
@ -762,9 +752,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
vacancy_only, em->flags); vacancy_only, em->flags);
goto out; goto out;
} }
if (em->orig_start != em->start) { if (em->offset != 0) {
test_err("wrong orig offset, want %llu, have %llu", em->start, test_err("wrong offset, want 0, have %llu", em->offset);
em->orig_start);
goto out; goto out;
} }
offset = em->start + em->len; offset = em->start + em->len;
@ -775,8 +764,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start >= EXTENT_MAP_LAST_BYTE) { if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != offset || em->len != sectorsize) { if (em->start != offset || em->len != sectorsize) {
@ -789,9 +778,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_err("unexpected flags set, want 0 have %u", em->flags); test_err("unexpected flags set, want 0 have %u", em->flags);
goto out; goto out;
} }
if (em->orig_start != em->start) { if (em->offset != 0) {
test_err("wrong orig offset, want %llu, have %llu", em->start, test_err("wrong orig offset, want 0, have %llu", em->offset);
em->orig_start);
goto out; goto out;
} }
ret = 0; ret = 0;
@ -855,8 +843,8 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start != EXTENT_MAP_HOLE) { if (em->disk_bytenr != EXTENT_MAP_HOLE) {
test_err("expected a hole, got %llu", em->block_start); test_err("expected a hole, got %llu", em->disk_bytenr);
goto out; goto out;
} }
if (em->start != 0 || em->len != sectorsize) { if (em->start != 0 || em->len != sectorsize) {
@ -877,8 +865,8 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
test_err("got an error when we shouldn't have"); test_err("got an error when we shouldn't have");
goto out; goto out;
} }
if (em->block_start != sectorsize) { if (extent_map_block_start(em) != sectorsize) {
test_err("expected a real extent, got %llu", em->block_start); test_err("expected a real extent, got %llu", extent_map_block_start(em));
goto out; goto out;
} }
if (em->start != sectorsize || em->len != sectorsize) { if (em->start != sectorsize || em->len != sectorsize) {

View File

@ -405,7 +405,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
int ret = 0; int ret = 0;
if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
root->last_trans < trans->transid) || force) { btrfs_get_root_last_trans(root) < trans->transid) || force) {
WARN_ON(!force && root->commit_root != root->node); WARN_ON(!force && root->commit_root != root->node);
/* /*
@ -421,7 +421,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
smp_wmb(); smp_wmb();
spin_lock(&fs_info->fs_roots_radix_lock); spin_lock(&fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid && !force) { if (btrfs_get_root_last_trans(root) == trans->transid && !force) {
spin_unlock(&fs_info->fs_roots_radix_lock); spin_unlock(&fs_info->fs_roots_radix_lock);
return 0; return 0;
} }
@ -429,7 +429,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_root_id(root), (unsigned long)btrfs_root_id(root),
BTRFS_ROOT_TRANS_TAG); BTRFS_ROOT_TRANS_TAG);
spin_unlock(&fs_info->fs_roots_radix_lock); spin_unlock(&fs_info->fs_roots_radix_lock);
root->last_trans = trans->transid; btrfs_set_root_last_trans(root, trans->transid);
/* this is pretty tricky. We don't want to /* this is pretty tricky. We don't want to
* take the relocation lock in btrfs_record_root_in_trans * take the relocation lock in btrfs_record_root_in_trans
@ -491,7 +491,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
* and barriers * and barriers
*/ */
smp_rmb(); smp_rmb();
if (root->last_trans == trans->transid && if (btrfs_get_root_last_trans(root) == trans->transid &&
!test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state)) !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
return 0; return 0;
@ -1637,7 +1637,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *root = pending->root; struct btrfs_root *root = pending->root;
struct btrfs_root *parent_root; struct btrfs_root *parent_root;
struct btrfs_block_rsv *rsv; struct btrfs_block_rsv *rsv;
struct inode *parent_inode = pending->dir; struct inode *parent_inode = &pending->dir->vfs_inode;
struct btrfs_path *path; struct btrfs_path *path;
struct btrfs_dir_item *dir_item; struct btrfs_dir_item *dir_item;
struct extent_buffer *tmp; struct extent_buffer *tmp;
@ -1989,6 +1989,25 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
btrfs_put_transaction(cur_trans); btrfs_put_transaction(cur_trans);
} }
/*
* If there is a running transaction commit it or if it's already committing,
* wait for its commit to complete. Does not start and commit a new transaction
* if there isn't any running.
*/
int btrfs_commit_current_transaction(struct btrfs_root *root)
{
struct btrfs_trans_handle *trans;
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
int ret = PTR_ERR(trans);
return (ret == -ENOENT) ? 0 : ret;
}
return btrfs_commit_transaction(trans);
}
static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
{ {
struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_fs_info *fs_info = trans->fs_info;
@ -2110,7 +2129,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{ {
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
} }
/* /*

View File

@ -172,7 +172,7 @@ struct btrfs_trans_handle {
struct btrfs_pending_snapshot { struct btrfs_pending_snapshot {
struct dentry *dentry; struct dentry *dentry;
struct inode *dir; struct btrfs_inode *dir;
struct btrfs_root *root; struct btrfs_root *root;
struct btrfs_root_item *root_item; struct btrfs_root_item *root_item;
struct btrfs_root *snap; struct btrfs_root *snap;
@ -229,11 +229,11 @@ bool __cold abort_should_print_stack(int error);
*/ */
#define btrfs_abort_transaction(trans, error) \ #define btrfs_abort_transaction(trans, error) \
do { \ do { \
bool first = false; \ bool __first = false; \
/* Report first abort since mount */ \ /* Report first abort since mount */ \
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
&((trans)->fs_info->fs_state))) { \ &((trans)->fs_info->fs_state))) { \
first = true; \ __first = true; \
if (WARN(abort_should_print_stack(error), \ if (WARN(abort_should_print_stack(error), \
KERN_ERR \ KERN_ERR \
"BTRFS: Transaction aborted (error %d)\n", \ "BTRFS: Transaction aborted (error %d)\n", \
@ -246,7 +246,7 @@ do { \
} \ } \
} \ } \
__btrfs_abort_transaction((trans), __func__, \ __btrfs_abort_transaction((trans), __func__, \
__LINE__, (error), first); \ __LINE__, (error), __first); \
} while (0) } while (0)
int btrfs_end_transaction(struct btrfs_trans_handle *trans); int btrfs_end_transaction(struct btrfs_trans_handle *trans);
@ -268,6 +268,7 @@ void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info);
int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info); int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans); int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans); void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
int btrfs_commit_current_transaction(struct btrfs_root *root);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans); bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
void btrfs_throttle(struct btrfs_fs_info *fs_info); void btrfs_throttle(struct btrfs_fs_info *fs_info);

View File

@ -340,6 +340,24 @@ static int check_extent_data_item(struct extent_buffer *leaf,
} }
} }
/*
* For non-compressed data extents, ram_bytes should match its
* disk_num_bytes.
* However we do not really utilize ram_bytes in this case, so this check
* is only optional for DEBUG builds for developers to catch the
* unexpected behaviors.
*/
if (IS_ENABLED(CONFIG_BTRFS_DEBUG) &&
btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE &&
btrfs_file_extent_disk_bytenr(leaf, fi)) {
if (WARN_ON(btrfs_file_extent_ram_bytes(leaf, fi) !=
btrfs_file_extent_disk_num_bytes(leaf, fi)))
file_extent_err(leaf, slot,
"mismatch ram_bytes (%llu) and disk_num_bytes (%llu) for non-compressed extent",
btrfs_file_extent_ram_bytes(leaf, fi),
btrfs_file_extent_disk_num_bytes(leaf, fi));
}
return 0; return 0;
} }
@ -1682,9 +1700,6 @@ static int check_inode_ref(struct extent_buffer *leaf,
static int check_raid_stripe_extent(const struct extent_buffer *leaf, static int check_raid_stripe_extent(const struct extent_buffer *leaf,
const struct btrfs_key *key, int slot) const struct btrfs_key *key, int slot)
{ {
struct btrfs_stripe_extent *stripe_extent =
btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) { if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
generic_err(leaf, slot, generic_err(leaf, slot,
"invalid key objectid for raid stripe extent, have %llu expect aligned to %u", "invalid key objectid for raid stripe extent, have %llu expect aligned to %u",
@ -1698,22 +1713,6 @@ static int check_raid_stripe_extent(const struct extent_buffer *leaf,
return -EUCLEAN; return -EUCLEAN;
} }
switch (btrfs_stripe_extent_encoding(leaf, stripe_extent)) {
case BTRFS_STRIPE_RAID0:
case BTRFS_STRIPE_RAID1:
case BTRFS_STRIPE_DUP:
case BTRFS_STRIPE_RAID10:
case BTRFS_STRIPE_RAID5:
case BTRFS_STRIPE_RAID6:
case BTRFS_STRIPE_RAID1C3:
case BTRFS_STRIPE_RAID1C4:
break;
default:
generic_err(leaf, slot, "invalid raid stripe encoding %u",
btrfs_stripe_extent_encoding(leaf, stripe_extent));
return -EUCLEAN;
}
return 0; return 0;
} }

View File

@ -151,7 +151,7 @@ static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root)
* attempt a transaction commit, resulting in a deadlock. * attempt a transaction commit, resulting in a deadlock.
*/ */
nofs_flag = memalloc_nofs_save(); nofs_flag = memalloc_nofs_save();
inode = btrfs_iget(root->fs_info->sb, objectid, root); inode = btrfs_iget(objectid, root);
memalloc_nofs_restore(nofs_flag); memalloc_nofs_restore(nofs_flag);
return inode; return inode;
@ -1644,7 +1644,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (ret) if (ret)
goto out; goto out;
} }
BTRFS_I(inode)->index_cnt = (u64)-1; if (S_ISDIR(inode->i_mode))
BTRFS_I(inode)->index_cnt = (u64)-1;
if (inode->i_nlink == 0) { if (inode->i_nlink == 0) {
if (S_ISDIR(inode->i_mode)) { if (S_ISDIR(inode->i_mode)) {
@ -2839,7 +2840,7 @@ static void wait_for_writer(struct btrfs_root *root)
finish_wait(&root->log_writer_wait, &wait); finish_wait(&root->log_writer_wait, &wait);
} }
void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode) void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct btrfs_inode *inode)
{ {
ctx->log_ret = 0; ctx->log_ret = 0;
ctx->log_transid = 0; ctx->log_transid = 0;
@ -2858,7 +2859,7 @@ void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode)
void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx) void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx)
{ {
struct btrfs_inode *inode = BTRFS_I(ctx->inode); struct btrfs_inode *inode = ctx->inode;
if (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) && if (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
!test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags)) !test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags))
@ -2876,7 +2877,7 @@ void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)
struct btrfs_ordered_extent *ordered; struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_extent *tmp; struct btrfs_ordered_extent *tmp;
ASSERT(inode_is_locked(ctx->inode)); ASSERT(inode_is_locked(&ctx->inode->vfs_inode));
list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) { list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
list_del_init(&ordered->log_list); list_del_init(&ordered->log_list);
@ -4253,8 +4254,10 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, bool inode_item_dropped) struct btrfs_inode *inode, bool inode_item_dropped)
{ {
struct btrfs_inode_item *inode_item; struct btrfs_inode_item *inode_item;
struct btrfs_key key;
int ret; int ret;
btrfs_get_inode_key(inode, &key);
/* /*
* If we are doing a fast fsync and the inode was logged before in the * If we are doing a fast fsync and the inode was logged before in the
* current transaction, then we know the inode was previously logged and * current transaction, then we know the inode was previously logged and
@ -4266,7 +4269,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
* already exists can also result in unnecessarily splitting a leaf. * already exists can also result in unnecessarily splitting a leaf.
*/ */
if (!inode_item_dropped && inode->logged_trans == trans->transid) { if (!inode_item_dropped && inode->logged_trans == trans->transid) {
ret = btrfs_search_slot(trans, log, &inode->location, path, 0, 1); ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
ASSERT(ret <= 0); ASSERT(ret <= 0);
if (ret > 0) if (ret > 0)
ret = -ENOENT; ret = -ENOENT;
@ -4280,7 +4283,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
* the inode, we set BTRFS_INODE_NEEDS_FULL_SYNC on its runtime * the inode, we set BTRFS_INODE_NEEDS_FULL_SYNC on its runtime
* flags and set ->logged_trans to 0. * flags and set ->logged_trans to 0.
*/ */
ret = btrfs_insert_empty_item(trans, log, path, &inode->location, ret = btrfs_insert_empty_item(trans, log, path, &key,
sizeof(*inode_item)); sizeof(*inode_item));
ASSERT(ret != -EEXIST); ASSERT(ret != -EEXIST);
} }
@ -4594,6 +4597,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
{ {
struct btrfs_ordered_extent *ordered; struct btrfs_ordered_extent *ordered;
struct btrfs_root *csum_root; struct btrfs_root *csum_root;
u64 block_start;
u64 csum_offset; u64 csum_offset;
u64 csum_len; u64 csum_len;
u64 mod_start = em->start; u64 mod_start = em->start;
@ -4603,7 +4607,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
if (inode->flags & BTRFS_INODE_NODATASUM || if (inode->flags & BTRFS_INODE_NODATASUM ||
(em->flags & EXTENT_FLAG_PREALLOC) || (em->flags & EXTENT_FLAG_PREALLOC) ||
em->block_start == EXTENT_MAP_HOLE) em->disk_bytenr == EXTENT_MAP_HOLE)
return 0; return 0;
list_for_each_entry(ordered, &ctx->ordered_extents, log_list) { list_for_each_entry(ordered, &ctx->ordered_extents, log_list) {
@ -4667,17 +4671,18 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
/* If we're compressed we have to save the entire range of csums. */ /* If we're compressed we have to save the entire range of csums. */
if (extent_map_is_compressed(em)) { if (extent_map_is_compressed(em)) {
csum_offset = 0; csum_offset = 0;
csum_len = max(em->block_len, em->orig_block_len); csum_len = em->disk_num_bytes;
} else { } else {
csum_offset = mod_start - em->start; csum_offset = mod_start - em->start;
csum_len = mod_len; csum_len = mod_len;
} }
/* block start is already adjusted for the file extent offset. */ /* block start is already adjusted for the file extent offset. */
csum_root = btrfs_csum_root(trans->fs_info, em->block_start); block_start = extent_map_block_start(em);
ret = btrfs_lookup_csums_list(csum_root, em->block_start + csum_offset, csum_root = btrfs_csum_root(trans->fs_info, block_start);
em->block_start + csum_offset + ret = btrfs_lookup_csums_list(csum_root, block_start + csum_offset,
csum_len - 1, &ordered_sums, false); block_start + csum_offset + csum_len - 1,
&ordered_sums, false);
if (ret < 0) if (ret < 0)
return ret; return ret;
ret = 0; ret = 0;
@ -4707,7 +4712,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct btrfs_key key; struct btrfs_key key;
enum btrfs_compression_type compress_type; enum btrfs_compression_type compress_type;
u64 extent_offset = em->start - em->orig_start; u64 extent_offset = em->offset;
u64 block_start = extent_map_block_start(em);
u64 block_len; u64 block_len;
int ret; int ret;
@ -4717,14 +4723,13 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
else else
btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_REG); btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_REG);
block_len = max(em->block_len, em->orig_block_len); block_len = em->disk_num_bytes;
compress_type = extent_map_compression(em); compress_type = extent_map_compression(em);
if (compress_type != BTRFS_COMPRESS_NONE) { if (compress_type != BTRFS_COMPRESS_NONE) {
btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start); btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start);
btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len); btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
} else if (em->block_start < EXTENT_MAP_LAST_BYTE) { } else if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start - btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start - extent_offset);
extent_offset);
btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len); btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
} }
@ -5927,7 +5932,7 @@ again:
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} else if (ret > 0 && } else if (ret > 0 &&
other_ino != btrfs_ino(BTRFS_I(ctx->inode))) { other_ino != btrfs_ino(ctx->inode)) {
if (ins_nr > 0) { if (ins_nr > 0) {
ins_nr++; ins_nr++;
} else { } else {
@ -7073,6 +7078,15 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_no_trans; goto end_no_trans;
} }
/*
* If we're logging an inode from a subvolume created in the current
* transaction we must force a commit since the root is not persisted.
*/
if (btrfs_root_generation(&root->root_item) == trans->transid) {
ret = BTRFS_LOG_FORCE_COMMIT;
goto end_no_trans;
}
/* /*
* Skip already logged inodes or inodes corresponding to tmpfiles * Skip already logged inodes or inodes corresponding to tmpfiles
* (since logging them is pointless, a link count of 0 means they * (since logging them is pointless, a link count of 0 means they
@ -7453,6 +7467,24 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
mutex_unlock(&dir->log_mutex); mutex_unlock(&dir->log_mutex);
} }
/*
* Call this when creating a subvolume in a directory.
* Because we don't commit a transaction when creating a subvolume, we can't
* allow the directory pointing to the subvolume to be logged with an entry that
* points to an unpersisted root if we are still in the transaction used to
* create the subvolume, so make any attempt to log the directory to result in a
* full log sync.
* Also we don't need to worry with renames, since btrfs_rename() marks the log
* for full commit when renaming a subvolume.
*/
void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
struct btrfs_inode *dir)
{
mutex_lock(&dir->log_mutex);
dir->last_unlink_trans = trans->transid;
mutex_unlock(&dir->log_mutex);
}
/* /*
* Update the log after adding a new name for an inode. * Update the log after adding a new name for an inode.
* *
@ -7585,7 +7617,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
goto out; goto out;
} }
btrfs_init_log_ctx(&ctx, &inode->vfs_inode); btrfs_init_log_ctx(&ctx, inode);
ctx.logging_new_name = true; ctx.logging_new_name = true;
btrfs_init_log_ctx_scratch_eb(&ctx); btrfs_init_log_ctx_scratch_eb(&ctx);
/* /*

View File

@ -37,7 +37,7 @@ struct btrfs_log_ctx {
bool logging_new_delayed_dentries; bool logging_new_delayed_dentries;
/* Indicate if the inode being logged was logged before. */ /* Indicate if the inode being logged was logged before. */
bool logged_before; bool logged_before;
struct inode *inode; struct btrfs_inode *inode;
struct list_head list; struct list_head list;
/* Only used for fast fsyncs. */ /* Only used for fast fsyncs. */
struct list_head ordered_extents; struct list_head ordered_extents;
@ -55,7 +55,7 @@ struct btrfs_log_ctx {
struct extent_buffer *scratch_eb; struct extent_buffer *scratch_eb;
}; };
void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode); void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct btrfs_inode *inode);
void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx); void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx);
void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx); void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx);
@ -94,6 +94,8 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
bool for_rename); bool for_rename);
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir); struct btrfs_inode *dir);
void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
struct btrfs_inode *dir);
void btrfs_log_new_name(struct btrfs_trans_handle *trans, void btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct dentry *old_dentry, struct btrfs_inode *old_dir, struct dentry *old_dentry, struct btrfs_inode *old_dir,
u64 old_dir_index, struct dentry *parent); u64 old_dir_index, struct dentry *parent);

View File

@ -50,6 +50,7 @@ void ulist_init(struct ulist *ulist)
INIT_LIST_HEAD(&ulist->nodes); INIT_LIST_HEAD(&ulist->nodes);
ulist->root = RB_ROOT; ulist->root = RB_ROOT;
ulist->nnodes = 0; ulist->nnodes = 0;
ulist->prealloc = NULL;
} }
/* /*
@ -68,6 +69,8 @@ void ulist_release(struct ulist *ulist)
list_for_each_entry_safe(node, next, &ulist->nodes, list) { list_for_each_entry_safe(node, next, &ulist->nodes, list) {
kfree(node); kfree(node);
} }
kfree(ulist->prealloc);
ulist->prealloc = NULL;
ulist->root = RB_ROOT; ulist->root = RB_ROOT;
INIT_LIST_HEAD(&ulist->nodes); INIT_LIST_HEAD(&ulist->nodes);
} }
@ -105,6 +108,12 @@ struct ulist *ulist_alloc(gfp_t gfp_mask)
return ulist; return ulist;
} }
void ulist_prealloc(struct ulist *ulist, gfp_t gfp_mask)
{
if (!ulist->prealloc)
ulist->prealloc = kzalloc(sizeof(*ulist->prealloc), gfp_mask);
}
/* /*
* Free dynamically allocated ulist. * Free dynamically allocated ulist.
* *
@ -206,9 +215,15 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
*old_aux = node->aux; *old_aux = node->aux;
return 0; return 0;
} }
node = kmalloc(sizeof(*node), gfp_mask);
if (!node) if (ulist->prealloc) {
return -ENOMEM; node = ulist->prealloc;
ulist->prealloc = NULL;
} else {
node = kmalloc(sizeof(*node), gfp_mask);
if (!node)
return -ENOMEM;
}
node->val = val; node->val = val;
node->aux = aux; node->aux = aux;

View File

@ -41,12 +41,14 @@ struct ulist {
struct list_head nodes; struct list_head nodes;
struct rb_root root; struct rb_root root;
struct ulist_node *prealloc;
}; };
void ulist_init(struct ulist *ulist); void ulist_init(struct ulist *ulist);
void ulist_release(struct ulist *ulist); void ulist_release(struct ulist *ulist);
void ulist_reinit(struct ulist *ulist); void ulist_reinit(struct ulist *ulist);
struct ulist *ulist_alloc(gfp_t gfp_mask); struct ulist *ulist_alloc(gfp_t gfp_mask);
void ulist_prealloc(struct ulist *ulist, gfp_t mask);
void ulist_free(struct ulist *ulist); void ulist_free(struct ulist *ulist);
int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,

View File

@ -13,7 +13,7 @@
#include "accessors.h" #include "accessors.h"
#include "uuid-tree.h" #include "uuid-tree.h"
static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key) static void btrfs_uuid_to_key(const u8 *uuid, u8 type, struct btrfs_key *key)
{ {
key->type = type; key->type = type;
key->objectid = get_unaligned_le64(uuid); key->objectid = get_unaligned_le64(uuid);
@ -21,7 +21,7 @@ static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key)
} }
/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */ /* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */
static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid, static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, const u8 *uuid,
u8 type, u64 subid) u8 type, u64 subid)
{ {
int ret; int ret;
@ -81,7 +81,7 @@ out:
return ret; return ret;
} }
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type,
u64 subid_cpu) u64 subid_cpu)
{ {
struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_fs_info *fs_info = trans->fs_info;
@ -145,7 +145,7 @@ out:
return ret; return ret;
} }
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type,
u64 subid) u64 subid)
{ {
struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_fs_info *fs_info = trans->fs_info;
@ -256,7 +256,7 @@ out:
* < 0 if an error occurred * < 0 if an error occurred
*/ */
static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info, static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
u8 *uuid, u8 type, u64 subvolid) const u8 *uuid, u8 type, u64 subvolid)
{ {
int ret = 0; int ret = 0;
struct btrfs_root *subvol_root; struct btrfs_root *subvol_root;

View File

@ -8,9 +8,9 @@
struct btrfs_trans_handle; struct btrfs_trans_handle;
struct btrfs_fs_info; struct btrfs_fs_info;
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type,
u64 subid); u64 subid);
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type,
u64 subid); u64 subid);
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info); int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info);

View File

@ -722,7 +722,7 @@ error_free_page:
return -EINVAL; return -EINVAL;
} }
u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb) const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
{ {
bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) & bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) &
BTRFS_FEATURE_INCOMPAT_METADATA_UUID); BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
@ -1380,19 +1380,12 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
bool new_device_added = false; bool new_device_added = false;
struct btrfs_device *device = NULL; struct btrfs_device *device = NULL;
struct file *bdev_file; struct file *bdev_file;
u64 bytenr, bytenr_orig; u64 bytenr;
dev_t devt; dev_t devt;
int ret; int ret;
lockdep_assert_held(&uuid_mutex); lockdep_assert_held(&uuid_mutex);
/*
* we would like to check all the supers, but that would make
* a btrfs mount succeed after a mkfs from a different FS.
* So, we need to add a special mount option to scan for
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
*/
/* /*
* Avoid an exclusive open here, as the systemd-udev may initiate the * Avoid an exclusive open here, as the systemd-udev may initiate the
* device scan which may race with the user's mount or mkfs command, * device scan which may race with the user's mount or mkfs command,
@ -1407,7 +1400,12 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
if (IS_ERR(bdev_file)) if (IS_ERR(bdev_file))
return ERR_CAST(bdev_file); return ERR_CAST(bdev_file);
bytenr_orig = btrfs_sb_offset(0); /*
* We would like to check all the super blocks, but doing so would
* allow a mount to succeed after a mkfs from a different filesystem.
* Currently, recovery from a bad primary btrfs superblock is done
* using the userspace command 'btrfs check --super'.
*/
ret = btrfs_sb_log_location_bdev(file_bdev(bdev_file), 0, READ, &bytenr); ret = btrfs_sb_log_location_bdev(file_bdev(bdev_file), 0, READ, &bytenr);
if (ret) { if (ret) {
device = ERR_PTR(ret); device = ERR_PTR(ret);
@ -1415,7 +1413,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
} }
disk_super = btrfs_read_disk_super(file_bdev(bdev_file), bytenr, disk_super = btrfs_read_disk_super(file_bdev(bdev_file), bytenr,
bytenr_orig); btrfs_sb_offset(0));
if (IS_ERR(disk_super)) { if (IS_ERR(disk_super)) {
device = ERR_CAST(disk_super); device = ERR_CAST(disk_super);
goto error_bdev_put; goto error_bdev_put;
@ -2991,16 +2989,19 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
if (ret < 0) if (ret < 0)
goto out; goto out;
else if (ret > 0) { /* Logic error or corruption */ else if (ret > 0) { /* Logic error or corruption */
btrfs_handle_fs_error(fs_info, -ENOENT, btrfs_err(fs_info, "failed to lookup chunk %llu when freeing",
"Failed lookup while freeing chunk."); chunk_offset);
ret = -ENOENT; btrfs_abort_transaction(trans, -ENOENT);
ret = -EUCLEAN;
goto out; goto out;
} }
ret = btrfs_del_item(trans, root, path); ret = btrfs_del_item(trans, root, path);
if (ret < 0) if (ret < 0) {
btrfs_handle_fs_error(fs_info, ret, btrfs_err(fs_info, "failed to delete chunk %llu item", chunk_offset);
"Failed to delete chunk item."); btrfs_abort_transaction(trans, ret);
goto out;
}
out: out:
btrfs_free_path(path); btrfs_free_path(path);
return ret; return ret;
@ -5628,8 +5629,6 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
u64 start = ctl->start; u64 start = ctl->start;
u64 type = ctl->type; u64 type = ctl->type;
int ret; int ret;
int i;
int j;
map = btrfs_alloc_chunk_map(ctl->num_stripes, GFP_NOFS); map = btrfs_alloc_chunk_map(ctl->num_stripes, GFP_NOFS);
if (!map) if (!map)
@ -5644,8 +5643,8 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
map->sub_stripes = ctl->sub_stripes; map->sub_stripes = ctl->sub_stripes;
map->num_stripes = ctl->num_stripes; map->num_stripes = ctl->num_stripes;
for (i = 0; i < ctl->ndevs; ++i) { for (int i = 0; i < ctl->ndevs; i++) {
for (j = 0; j < ctl->dev_stripes; ++j) { for (int j = 0; j < ctl->dev_stripes; j++) {
int s = i * ctl->dev_stripes + j; int s = i * ctl->dev_stripes + j;
map->stripes[s].dev = devices_info[i].dev; map->stripes[s].dev = devices_info[i].dev;
map->stripes[s].physical = devices_info[i].dev_offset + map->stripes[s].physical = devices_info[i].dev_offset +
@ -6288,20 +6287,19 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
return ret; return ret;
} }
static void handle_ops_on_dev_replace(enum btrfs_map_op op, static void handle_ops_on_dev_replace(struct btrfs_io_context *bioc,
struct btrfs_io_context *bioc,
struct btrfs_dev_replace *dev_replace, struct btrfs_dev_replace *dev_replace,
u64 logical, u64 logical,
int *num_stripes_ret, int *max_errors_ret) struct btrfs_io_geometry *io_geom)
{ {
u64 srcdev_devid = dev_replace->srcdev->devid; u64 srcdev_devid = dev_replace->srcdev->devid;
/* /*
* At this stage, num_stripes is still the real number of stripes, * At this stage, num_stripes is still the real number of stripes,
* excluding the duplicated stripes. * excluding the duplicated stripes.
*/ */
int num_stripes = *num_stripes_ret; int num_stripes = io_geom->num_stripes;
int max_errors = io_geom->max_errors;
int nr_extra_stripes = 0; int nr_extra_stripes = 0;
int max_errors = *max_errors_ret;
int i; int i;
/* /*
@ -6342,7 +6340,7 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
* replace. * replace.
* If we have 2 extra stripes, only choose the one with smaller physical. * If we have 2 extra stripes, only choose the one with smaller physical.
*/ */
if (op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) { if (io_geom->op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) {
struct btrfs_io_stripe *first = &bioc->stripes[num_stripes]; struct btrfs_io_stripe *first = &bioc->stripes[num_stripes];
struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1]; struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1];
@ -6360,8 +6358,8 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
} }
} }
*num_stripes_ret = num_stripes + nr_extra_stripes; io_geom->num_stripes = num_stripes + nr_extra_stripes;
*max_errors_ret = max_errors + nr_extra_stripes; io_geom->max_errors = max_errors + nr_extra_stripes;
bioc->replace_nr_stripes = nr_extra_stripes; bioc->replace_nr_stripes = nr_extra_stripes;
} }
@ -6624,7 +6622,6 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
struct btrfs_chunk_map *map; struct btrfs_chunk_map *map;
struct btrfs_io_geometry io_geom = { 0 }; struct btrfs_io_geometry io_geom = { 0 };
u64 map_offset; u64 map_offset;
int i;
int ret = 0; int ret = 0;
int num_copies; int num_copies;
struct btrfs_io_context *bioc = NULL; struct btrfs_io_context *bioc = NULL;
@ -6770,7 +6767,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* For all other non-RAID56 profiles, just copy the target * For all other non-RAID56 profiles, just copy the target
* stripe into the bioc. * stripe into the bioc.
*/ */
for (i = 0; i < io_geom.num_stripes; i++) { for (int i = 0; i < io_geom.num_stripes; i++) {
ret = set_io_stripe(fs_info, logical, length, ret = set_io_stripe(fs_info, logical, length,
&bioc->stripes[i], map, &io_geom); &bioc->stripes[i], map, &io_geom);
if (ret < 0) if (ret < 0)
@ -6790,8 +6787,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
op != BTRFS_MAP_READ) { op != BTRFS_MAP_READ) {
handle_ops_on_dev_replace(op, bioc, dev_replace, logical, handle_ops_on_dev_replace(bioc, dev_replace, logical, &io_geom);
&io_geom.num_stripes, &io_geom.max_errors);
} }
*bioc_ret = bioc; *bioc_ret = bioc;

View File

@ -834,6 +834,6 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical); bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr); bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb); const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb);
#endif #endif

View File

@ -24,7 +24,7 @@
#include "accessors.h" #include "accessors.h"
#include "dir-item.h" #include "dir-item.h"
int btrfs_getxattr(struct inode *inode, const char *name, int btrfs_getxattr(const struct inode *inode, const char *name,
void *buffer, size_t size) void *buffer, size_t size)
{ {
struct btrfs_dir_item *di; struct btrfs_dir_item *di;
@ -451,7 +451,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
if (IS_ERR(trans)) if (IS_ERR(trans))
return PTR_ERR(trans); return PTR_ERR(trans);
ret = btrfs_set_prop(trans, inode, name, value, size, flags); ret = btrfs_set_prop(trans, BTRFS_I(inode), name, value, size, flags);
if (!ret) { if (!ret) {
inode_inc_iversion(inode); inode_inc_iversion(inode);
inode_set_ctime_current(inode); inode_set_ctime_current(inode);

View File

@ -14,7 +14,7 @@ struct btrfs_trans_handle;
extern const struct xattr_handler * const btrfs_xattr_handlers[]; extern const struct xattr_handler * const btrfs_xattr_handlers[];
int btrfs_getxattr(struct inode *inode, const char *name, int btrfs_getxattr(const struct inode *inode, const char *name,
void *buffer, size_t size); void *buffer, size_t size);
int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
const char *name, const void *value, size_t size, int flags); const char *name, const void *value, size_t size, int flags);

View File

@ -18,6 +18,7 @@
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/refcount.h> #include <linux/refcount.h>
#include "btrfs_inode.h"
#include "compression.h" #include "compression.h"
/* workspace buffer size for s390 zlib hardware support */ /* workspace buffer size for s390 zlib hardware support */
@ -112,8 +113,13 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
*total_out = 0; *total_out = 0;
*total_in = 0; *total_in = 0;
if (Z_OK != zlib_deflateInit(&workspace->strm, workspace->level)) { ret = zlib_deflateInit(&workspace->strm, workspace->level);
pr_warn("BTRFS: deflateInit failed\n"); if (unlikely(ret != Z_OK)) {
struct btrfs_inode *inode = BTRFS_I(mapping->host);
btrfs_err(inode->root->fs_info,
"zlib compression init failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
ret = -EIO; ret = -EIO;
goto out; goto out;
} }
@ -182,9 +188,13 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
} }
ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
if (ret != Z_OK) { if (unlikely(ret != Z_OK)) {
pr_debug("BTRFS: deflate in loop returned %d\n", struct btrfs_inode *inode = BTRFS_I(mapping->host);
ret);
btrfs_warn(inode->root->fs_info,
"zlib compression failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
start);
zlib_deflateEnd(&workspace->strm); zlib_deflateEnd(&workspace->strm);
ret = -EIO; ret = -EIO;
goto out; goto out;
@ -307,9 +317,14 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
workspace->strm.avail_in -= 2; workspace->strm.avail_in -= 2;
} }
if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { ret = zlib_inflateInit2(&workspace->strm, wbits);
pr_warn("BTRFS: inflateInit failed\n"); if (unlikely(ret != Z_OK)) {
struct btrfs_inode *inode = cb->bbio.inode;
kunmap_local(data_in); kunmap_local(data_in);
btrfs_err(inode->root->fs_info,
"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
return -EIO; return -EIO;
} }
while (workspace->strm.total_in < srclen) { while (workspace->strm.total_in < srclen) {
@ -348,10 +363,15 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
workspace->strm.avail_in = min(tmp, PAGE_SIZE); workspace->strm.avail_in = min(tmp, PAGE_SIZE);
} }
} }
if (ret != Z_STREAM_END) if (unlikely(ret != Z_STREAM_END)) {
btrfs_err(cb->bbio.inode->root->fs_info,
"zlib decompression failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(cb->bbio.inode->root),
btrfs_ino(cb->bbio.inode), cb->start);
ret = -EIO; ret = -EIO;
else } else {
ret = 0; ret = 0;
}
done: done:
zlib_inflateEnd(&workspace->strm); zlib_inflateEnd(&workspace->strm);
if (data_in) if (data_in)
@ -386,8 +406,14 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in,
workspace->strm.avail_in -= 2; workspace->strm.avail_in -= 2;
} }
if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { ret = zlib_inflateInit2(&workspace->strm, wbits);
pr_warn("BTRFS: inflateInit failed\n"); if (unlikely(ret != Z_OK)) {
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
btrfs_err(inode->root->fs_info,
"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
page_offset(dest_page));
return -EIO; return -EIO;
} }
@ -404,8 +430,12 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in,
out: out:
if (unlikely(to_copy != destlen)) { if (unlikely(to_copy != destlen)) {
pr_warn_ratelimited("BTRFS: inflate failed, decompressed=%lu expected=%zu\n", struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
to_copy, destlen);
btrfs_err(inode->root->fs_info,
"zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
page_offset(dest_page), to_copy, destlen);
ret = -EIO; ret = -EIO;
} else { } else {
ret = 0; ret = 0;

View File

@ -87,9 +87,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
bool empty[BTRFS_NR_SB_LOG_ZONES]; bool empty[BTRFS_NR_SB_LOG_ZONES];
bool full[BTRFS_NR_SB_LOG_ZONES]; bool full[BTRFS_NR_SB_LOG_ZONES];
sector_t sector; sector_t sector;
int i;
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL); ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL);
empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY); empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY);
full[i] = sb_zone_is_full(&zones[i]); full[i] = sb_zone_is_full(&zones[i]);
@ -121,9 +120,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
struct address_space *mapping = bdev->bd_mapping; struct address_space *mapping = bdev->bd_mapping;
struct page *page[BTRFS_NR_SB_LOG_ZONES]; struct page *page[BTRFS_NR_SB_LOG_ZONES];
struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES]; struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES];
int i;
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT; u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT;
u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) - u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) -
BTRFS_SUPER_INFO_SIZE; BTRFS_SUPER_INFO_SIZE;
@ -144,7 +142,7 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
else else
sector = zones[0].start; sector = zones[0].start;
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++)
btrfs_release_disk_super(super[i]); btrfs_release_disk_super(super[i]);
} else if (!full[0] && (empty[1] || full[1])) { } else if (!full[0] && (empty[1] || full[1])) {
sector = zones[0].wp; sector = zones[0].wp;
@ -652,8 +650,7 @@ out:
return NULL; return NULL;
} }
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, static int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone)
struct blk_zone *zone)
{ {
unsigned int nr_zones = 1; unsigned int nr_zones = 1;
int ret; int ret;
@ -770,7 +767,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
return 0; return 0;
} }
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info, unsigned long *mount_opt) int btrfs_check_mountopts_zoned(const struct btrfs_fs_info *info, unsigned long *mount_opt)
{ {
if (!btrfs_is_zoned(info)) if (!btrfs_is_zoned(info))
return 0; return 0;
@ -1726,7 +1723,7 @@ bool btrfs_use_zone_append(struct btrfs_bio *bbio)
if (!btrfs_is_zoned(fs_info)) if (!btrfs_is_zoned(fs_info))
return false; return false;
if (!inode || !is_data_inode(&inode->vfs_inode)) if (!inode || !is_data_inode(inode))
return false; return false;
if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE)
@ -1768,7 +1765,7 @@ void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered, static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered,
u64 logical) u64 logical)
{ {
struct extent_map_tree *em_tree = &BTRFS_I(ordered->inode)->extent_tree; struct extent_map_tree *em_tree = &ordered->inode->extent_tree;
struct extent_map *em; struct extent_map *em;
ordered->disk_bytenr = logical; ordered->disk_bytenr = logical;
@ -1776,7 +1773,9 @@ static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered,
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
em = search_extent_mapping(em_tree, ordered->file_offset, em = search_extent_mapping(em_tree, ordered->file_offset,
ordered->num_bytes); ordered->num_bytes);
em->block_start = logical; /* The em should be a new COW extent, thus it should not have an offset. */
ASSERT(em->offset == 0);
em->disk_bytenr = logical;
free_extent_map(em); free_extent_map(em);
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
} }
@ -1787,7 +1786,7 @@ static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered,
struct btrfs_ordered_extent *new; struct btrfs_ordered_extent *new;
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
split_extent_map(BTRFS_I(ordered->inode), ordered->file_offset, split_extent_map(ordered->inode, ordered->file_offset,
ordered->num_bytes, len, logical)) ordered->num_bytes, len, logical))
return false; return false;
@ -1801,7 +1800,7 @@ static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered,
void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered) void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
{ {
struct btrfs_inode *inode = BTRFS_I(ordered->inode); struct btrfs_inode *inode = ordered->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_ordered_sum *sum; struct btrfs_ordered_sum *sum;
u64 logical, len; u64 logical, len;
@ -1845,7 +1844,7 @@ out:
* here so that we don't attempt to log the csums later. * here so that we don't attempt to log the csums later.
*/ */
if ((inode->flags & BTRFS_INODE_NODATASUM) || if ((inode->flags & BTRFS_INODE_NODATASUM) ||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) { test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state)) {
while ((sum = list_first_entry_or_null(&ordered->list, while ((sum = list_first_entry_or_null(&ordered->list,
typeof(*sum), list))) { typeof(*sum), list))) {
list_del(&sum->list); list_del(&sum->list);
@ -2215,8 +2214,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
/* Ensure all writes in this block group finish */ /* Ensure all writes in this block group finish */
btrfs_wait_block_group_reservations(block_group); btrfs_wait_block_group_reservations(block_group);
/* No need to wait for NOCOW writers. Zoned mode does not allow that */ /* No need to wait for NOCOW writers. Zoned mode does not allow that */
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start, btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group);
block_group->length);
/* Wait for extent buffers to be written. */ /* Wait for extent buffers to be written. */
if (is_metadata) if (is_metadata)
wait_eb_writebacks(block_group); wait_eb_writebacks(block_group);

View File

@ -53,14 +53,12 @@ struct btrfs_zoned_device_info {
void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered); void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered);
#ifdef CONFIG_BLK_DEV_ZONED #ifdef CONFIG_BLK_DEV_ZONED
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone);
int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info); int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info);
int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache); int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache);
void btrfs_destroy_dev_zone_info(struct btrfs_device *device); void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev); struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev);
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info); int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info);
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info, unsigned long *mount_opt); int btrfs_check_mountopts_zoned(const struct btrfs_fs_info *info, unsigned long *mount_opt);
int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
u64 *bytenr_ret); u64 *bytenr_ret);
int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw, int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
@ -98,11 +96,6 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, bool do_finish); struct btrfs_space_info *space_info, bool do_finish);
void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info); void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
#else /* CONFIG_BLK_DEV_ZONED */ #else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
{
return 0;
}
static inline int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) static inline int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info)
{ {
@ -136,7 +129,7 @@ static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info)
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
static inline int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info, static inline int btrfs_check_mountopts_zoned(const struct btrfs_fs_info *info,
unsigned long *mount_opt) unsigned long *mount_opt)
{ {
return 0; return 0;

View File

@ -19,6 +19,7 @@
#include <linux/zstd.h> #include <linux/zstd.h>
#include "misc.h" #include "misc.h"
#include "fs.h" #include "fs.h"
#include "btrfs_inode.h"
#include "compression.h" #include "compression.h"
#include "super.h" #include "super.h"
@ -399,8 +400,13 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
/* Initialize the stream */ /* Initialize the stream */
stream = zstd_init_cstream(&params, len, workspace->mem, stream = zstd_init_cstream(&params, len, workspace->mem,
workspace->size); workspace->size);
if (!stream) { if (unlikely(!stream)) {
pr_warn("BTRFS: zstd_init_cstream failed\n"); struct btrfs_inode *inode = BTRFS_I(mapping->host);
btrfs_err(inode->root->fs_info,
"zstd compression init level %d failed, root %llu inode %llu offset %llu",
workspace->req_level, btrfs_root_id(inode->root),
btrfs_ino(inode), start);
ret = -EIO; ret = -EIO;
goto out; goto out;
} }
@ -429,9 +435,14 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
ret2 = zstd_compress_stream(stream, &workspace->out_buf, ret2 = zstd_compress_stream(stream, &workspace->out_buf,
&workspace->in_buf); &workspace->in_buf);
if (zstd_is_error(ret2)) { if (unlikely(zstd_is_error(ret2))) {
pr_debug("BTRFS: zstd_compress_stream returned %d\n", struct btrfs_inode *inode = BTRFS_I(mapping->host);
zstd_get_error_code(ret2));
btrfs_warn(inode->root->fs_info,
"zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
workspace->req_level, zstd_get_error_code(ret2),
btrfs_root_id(inode->root), btrfs_ino(inode),
start);
ret = -EIO; ret = -EIO;
goto out; goto out;
} }
@ -497,9 +508,14 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
size_t ret2; size_t ret2;
ret2 = zstd_end_stream(stream, &workspace->out_buf); ret2 = zstd_end_stream(stream, &workspace->out_buf);
if (zstd_is_error(ret2)) { if (unlikely(zstd_is_error(ret2))) {
pr_debug("BTRFS: zstd_end_stream returned %d\n", struct btrfs_inode *inode = BTRFS_I(mapping->host);
zstd_get_error_code(ret2));
btrfs_err(inode->root->fs_info,
"zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
workspace->req_level, zstd_get_error_code(ret2),
btrfs_root_id(inode->root), btrfs_ino(inode),
start);
ret = -EIO; ret = -EIO;
goto out; goto out;
} }
@ -561,8 +577,12 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
stream = zstd_init_dstream( stream = zstd_init_dstream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
if (!stream) { if (unlikely(!stream)) {
pr_debug("BTRFS: zstd_init_dstream failed\n"); struct btrfs_inode *inode = cb->bbio.inode;
btrfs_err(inode->root->fs_info,
"zstd decompression init failed, root %llu inode %llu offset %llu",
btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
ret = -EIO; ret = -EIO;
goto done; goto done;
} }
@ -580,9 +600,13 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
ret2 = zstd_decompress_stream(stream, &workspace->out_buf, ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
&workspace->in_buf); &workspace->in_buf);
if (zstd_is_error(ret2)) { if (unlikely(zstd_is_error(ret2))) {
pr_debug("BTRFS: zstd_decompress_stream returned %d\n", struct btrfs_inode *inode = cb->bbio.inode;
zstd_get_error_code(ret2));
btrfs_err(inode->root->fs_info,
"zstd decompression failed, error %d root %llu inode %llu offset %llu",
zstd_get_error_code(ret2), btrfs_root_id(inode->root),
btrfs_ino(inode), cb->start);
ret = -EIO; ret = -EIO;
goto done; goto done;
} }
@ -637,8 +661,14 @@ int zstd_decompress(struct list_head *ws, const u8 *data_in,
stream = zstd_init_dstream( stream = zstd_init_dstream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
if (!stream) { if (unlikely(!stream)) {
pr_warn("BTRFS: zstd_init_dstream failed\n"); struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
btrfs_err(inode->root->fs_info,
"zstd decompression init failed, root %llu inode %llu offset %llu",
btrfs_root_id(inode->root), btrfs_ino(inode),
page_offset(dest_page));
ret = -EIO;
goto finish; goto finish;
} }
@ -655,9 +685,13 @@ int zstd_decompress(struct list_head *ws, const u8 *data_in,
* one call should end the decompression. * one call should end the decompression.
*/ */
ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf); ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
if (zstd_is_error(ret)) { if (unlikely(zstd_is_error(ret))) {
pr_warn_ratelimited("BTRFS: zstd_decompress_stream return %d\n", struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
zstd_get_error_code(ret));
btrfs_err(inode->root->fs_info,
"zstd decompression failed, error %d root %llu inode %llu offset %llu",
zstd_get_error_code(ret), btrfs_root_id(inode->root),
btrfs_ino(inode), page_offset(dest_page));
goto finish; goto finish;
} }
to_copy = workspace->out_buf.pos; to_copy = workspace->out_buf.pos;

View File

@ -291,9 +291,6 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
__field( u64, ino ) __field( u64, ino )
__field( u64, start ) __field( u64, start )
__field( u64, len ) __field( u64, len )
__field( u64, orig_start )
__field( u64, block_start )
__field( u64, block_len )
__field( u32, flags ) __field( u32, flags )
__field( int, refs ) __field( int, refs )
), ),
@ -303,23 +300,15 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
__entry->ino = btrfs_ino(inode); __entry->ino = btrfs_ino(inode);
__entry->start = map->start; __entry->start = map->start;
__entry->len = map->len; __entry->len = map->len;
__entry->orig_start = map->orig_start;
__entry->block_start = map->block_start;
__entry->block_len = map->block_len;
__entry->flags = map->flags; __entry->flags = map->flags;
__entry->refs = refcount_read(&map->refs); __entry->refs = refcount_read(&map->refs);
), ),
TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu " TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu flags=%s refs=%u",
"orig_start=%llu block_start=%llu(%s) "
"block_len=%llu flags=%s refs=%u",
show_root_type(__entry->root_objectid), show_root_type(__entry->root_objectid),
__entry->ino, __entry->ino,
__entry->start, __entry->start,
__entry->len, __entry->len,
__entry->orig_start,
show_map_type(__entry->block_start),
__entry->block_len,
show_map_flags(__entry->flags), show_map_flags(__entry->flags),
__entry->refs) __entry->refs)
); );
@ -2617,7 +2606,6 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em,
__field( u64, root_id ) __field( u64, root_id )
__field( u64, start ) __field( u64, start )
__field( u64, len ) __field( u64, len )
__field( u64, block_start )
__field( u32, flags ) __field( u32, flags )
), ),
@ -2626,15 +2614,12 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em,
__entry->root_id = inode->root->root_key.objectid; __entry->root_id = inode->root->root_key.objectid;
__entry->start = em->start; __entry->start = em->start;
__entry->len = em->len; __entry->len = em->len;
__entry->block_start = em->block_start;
__entry->flags = em->flags; __entry->flags = em->flags;
), ),
TP_printk_btrfs( TP_printk_btrfs("ino=%llu root=%llu(%s) start=%llu len=%llu flags=%s",
"ino=%llu root=%llu(%s) start=%llu len=%llu block_start=%llu(%s) flags=%s",
__entry->ino, show_root_type(__entry->root_id), __entry->ino, show_root_type(__entry->root_id),
__entry->start, __entry->len, __entry->start, __entry->len,
show_map_type(__entry->block_start),
show_map_flags(__entry->flags)) show_map_flags(__entry->flags))
); );

View File

@ -747,21 +747,9 @@ struct btrfs_raid_stride {
__le64 physical; __le64 physical;
} __attribute__ ((__packed__)); } __attribute__ ((__packed__));
/* The stripe_extent::encoding, 1:1 mapping of enum btrfs_raid_types. */
#define BTRFS_STRIPE_RAID0 1
#define BTRFS_STRIPE_RAID1 2
#define BTRFS_STRIPE_DUP 3
#define BTRFS_STRIPE_RAID10 4
#define BTRFS_STRIPE_RAID5 5
#define BTRFS_STRIPE_RAID6 6
#define BTRFS_STRIPE_RAID1C3 7
#define BTRFS_STRIPE_RAID1C4 8
struct btrfs_stripe_extent { struct btrfs_stripe_extent {
__u8 encoding;
__u8 reserved[7];
/* An array of raid strides this stripe is composed of. */ /* An array of raid strides this stripe is composed of. */
struct btrfs_raid_stride strides[]; __DECLARE_FLEX_ARRAY(struct btrfs_raid_stride, strides);
} __attribute__ ((__packed__)); } __attribute__ ((__packed__));
#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
@ -777,6 +765,14 @@ struct btrfs_stripe_extent {
#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) #define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35)
#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) #define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
/*
* Those are temporaray flags utilized by btrfs-progs to do offline conversion.
* They are rejected by kernel.
* But still keep them all here to avoid conflicts.
*/
#define BTRFS_SUPER_FLAG_CHANGING_BG_TREE (1ULL << 38)
#define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM (1ULL << 39)
#define BTRFS_SUPER_FLAG_CHANGING_META_CSUM (1ULL << 40)
/* /*
* items in the extent btree are used to record the objectid of the * items in the extent btree are used to record the objectid of the