for-5.12-rc3-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmBTeBsACgkQxWXV+ddt WDtwcBAAoto5Pbc3Lvt0aha3qn9q/Ms9lNU3YIwTjqXV3lIRKksWCS7kQmWlFmLz dILhdRBg1iWVh8qbeqpL5su7yNJduypsY/ImJroukb/BzwQViFRDGy5qIc56qLH2 OVTx4LQ0zdqVdD86Qj0mt9ilSjgXYN+J53IUjsSSyJIpgt3vVcfjCYSkFO8zBiMH eliRtYShzJHkjEwVWLZRzk76oTnFQEC28IdYJ4y95mYl2wCABfTU2ylSeVDTtc6O x+fNMHHRmde2nbsHc+0eMm7rYLXuzvyx/tY17u6A6iwEQLGjE4rXOVZ7kA93WgAd YTXhM/B+YFfirNh029Av/MJP+2t9YBEODAHl1tnOdM0mfvXkpimaW0jvUEhi5f6I ZGu5FytscsgjyUK827WL7bZKO8WMzTLQvB3ryZ9UcrHm3QbZ7xGdoBE2L86p4Euw LiXUALdOWeYjFKSW9WWKrtQBtdjlLQYqJt+hL0ifaGlnfoi2G+DQeKtL9ZAKH5Cu gcjDUewnJtYPLyDOCRjQPFcts/MD5o81qMLeEwshmZT/bNMD9JOGEppCxBWGWSCx dYGq04Wib/dN710i5jB1XbJboBmT2SZDyBeiKTpCXs5mECBU00uWkkO98oId1YS3 wHu9qyGUOi2g88V27jH593/JstUYn6zyxJYIZX84mzcxOqZlKuo= =auMP -----END PGP SIGNATURE----- Merge tag 'for-5.12-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: "There are still regressions being found and fixed in the zoned mode and subpage code, the rest are fixes for bugs reported by users. Regressions: - subpage block support: - readahead works on the proper block size - fix last page zeroing - zoned mode: - linked list corruption for tree log Fixes: - qgroup leak after falloc failure - tree mod log and backref resolving: - extent buffer cloning race when resolving backrefs - pin deleted leaves with active tree mod log users - drop debugging flag from slab cache" * tag 'for-5.12-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: always pin deleted leaves when there are active tree mod log users btrfs: fix race when cloning extent buffer during rewind of an old root btrfs: fix slab cache flags for free space tree bitmap btrfs: subpage: make readahead work properly btrfs: subpage: fix wild pointer access during metadata read failure btrfs: zoned: fix linked list corruption after log root tree allocation failure btrfs: fix qgroup data rsv leak caused by falloc failure btrfs: track qgroup released data in own variable in insert_prealloc_file_extent btrfs: fix wrong offset to zero out range beyond i_size
This commit is contained in:
commit
81aa0968b7
@ -1365,7 +1365,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
|
||||
"failed to read tree block %llu from get_old_root",
|
||||
logical);
|
||||
} else {
|
||||
btrfs_tree_read_lock(old);
|
||||
eb = btrfs_clone_extent_buffer(old);
|
||||
btrfs_tree_read_unlock(old);
|
||||
free_extent_buffer(old);
|
||||
}
|
||||
} else if (old_root) {
|
||||
|
@ -3323,6 +3323,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (last_ref && btrfs_header_generation(buf) == trans->transid) {
|
||||
struct btrfs_block_group *cache;
|
||||
bool must_pin = false;
|
||||
|
||||
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
|
||||
ret = check_ref_cleanup(trans, buf->start);
|
||||
@ -3340,7 +3341,27 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (btrfs_is_zoned(fs_info)) {
|
||||
/*
|
||||
* If this is a leaf and there are tree mod log users, we may
|
||||
* have recorded mod log operations that point to this leaf.
|
||||
* So we must make sure no one reuses this leaf's extent before
|
||||
* mod log operations are applied to a node, otherwise after
|
||||
* rewinding a node using the mod log operations we get an
|
||||
* inconsistent btree, as the leaf's extent may now be used as
|
||||
* a node or leaf for another different btree.
|
||||
* We are safe from races here because at this point no other
|
||||
* node or root points to this extent buffer, so if after this
|
||||
* check a new tree mod log user joins, it will not be able to
|
||||
* find a node pointing to this leaf and record operations that
|
||||
* point to this leaf.
|
||||
*/
|
||||
if (btrfs_header_level(buf) == 0) {
|
||||
read_lock(&fs_info->tree_mod_log_lock);
|
||||
must_pin = !list_empty(&fs_info->tree_mod_seq_list);
|
||||
read_unlock(&fs_info->tree_mod_log_lock);
|
||||
}
|
||||
|
||||
if (must_pin || btrfs_is_zoned(fs_info)) {
|
||||
btrfs_redirty_list_add(trans->transaction, buf);
|
||||
pin_down_extent(trans, cache, buf->start, buf->len, 1);
|
||||
btrfs_put_block_group(cache);
|
||||
|
@ -2885,6 +2885,35 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
|
||||
btrfs_subpage_end_reader(fs_info, page, start, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find extent buffer for a givne bytenr.
|
||||
*
|
||||
* This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
|
||||
* in endio context.
|
||||
*/
|
||||
static struct extent_buffer *find_extent_buffer_readpage(
|
||||
struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
|
||||
{
|
||||
struct extent_buffer *eb;
|
||||
|
||||
/*
|
||||
* For regular sectorsize, we can use page->private to grab extent
|
||||
* buffer
|
||||
*/
|
||||
if (fs_info->sectorsize == PAGE_SIZE) {
|
||||
ASSERT(PagePrivate(page) && page->private);
|
||||
return (struct extent_buffer *)page->private;
|
||||
}
|
||||
|
||||
/* For subpage case, we need to lookup buffer radix tree */
|
||||
rcu_read_lock();
|
||||
eb = radix_tree_lookup(&fs_info->buffer_radix,
|
||||
bytenr >> fs_info->sectorsize_bits);
|
||||
rcu_read_unlock();
|
||||
ASSERT(eb);
|
||||
return eb;
|
||||
}
|
||||
|
||||
/*
|
||||
* after a readpage IO is done, we need to:
|
||||
* clear the uptodate bits on error
|
||||
@ -2996,7 +3025,7 @@ static void end_bio_extent_readpage(struct bio *bio)
|
||||
} else {
|
||||
struct extent_buffer *eb;
|
||||
|
||||
eb = (struct extent_buffer *)page->private;
|
||||
eb = find_extent_buffer_readpage(fs_info, page, start);
|
||||
set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
|
||||
eb->read_mirror = mirror;
|
||||
atomic_dec(&eb->io_pages);
|
||||
@ -3020,7 +3049,7 @@ readpage_ok:
|
||||
*/
|
||||
if (page->index == end_index && i_size <= end) {
|
||||
u32 zero_start = max(offset_in_page(i_size),
|
||||
offset_in_page(end));
|
||||
offset_in_page(start));
|
||||
|
||||
zero_user_segment(page, zero_start,
|
||||
offset_in_page(end) + 1);
|
||||
|
@ -9008,7 +9008,7 @@ int __init btrfs_init_cachep(void)
|
||||
|
||||
btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
|
||||
PAGE_SIZE, PAGE_SIZE,
|
||||
SLAB_RED_ZONE, NULL);
|
||||
SLAB_MEM_SPREAD, NULL);
|
||||
if (!btrfs_free_space_bitmap_cachep)
|
||||
goto fail;
|
||||
|
||||
@ -9877,6 +9877,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
|
||||
struct btrfs_path *path;
|
||||
u64 start = ins->objectid;
|
||||
u64 len = ins->offset;
|
||||
int qgroup_released;
|
||||
int ret;
|
||||
|
||||
memset(&stack_fi, 0, sizeof(stack_fi));
|
||||
@ -9889,16 +9890,16 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
|
||||
btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
|
||||
/* Encryption and other encoding is reserved and all 0 */
|
||||
|
||||
ret = btrfs_qgroup_release_data(inode, file_offset, len);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len);
|
||||
if (qgroup_released < 0)
|
||||
return ERR_PTR(qgroup_released);
|
||||
|
||||
if (trans) {
|
||||
ret = insert_reserved_file_extent(trans, inode,
|
||||
file_offset, &stack_fi,
|
||||
true, ret);
|
||||
true, qgroup_released);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
goto free_qgroup;
|
||||
return trans;
|
||||
}
|
||||
|
||||
@ -9909,21 +9910,35 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
|
||||
extent_info.file_offset = file_offset;
|
||||
extent_info.extent_buf = (char *)&stack_fi;
|
||||
extent_info.is_new_extent = true;
|
||||
extent_info.qgroup_reserved = ret;
|
||||
extent_info.qgroup_reserved = qgroup_released;
|
||||
extent_info.insertions = 0;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
if (!path) {
|
||||
ret = -ENOMEM;
|
||||
goto free_qgroup;
|
||||
}
|
||||
|
||||
ret = btrfs_replace_file_extents(&inode->vfs_inode, path, file_offset,
|
||||
file_offset + len - 1, &extent_info,
|
||||
&trans);
|
||||
btrfs_free_path(path);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
goto free_qgroup;
|
||||
return trans;
|
||||
|
||||
free_qgroup:
|
||||
/*
|
||||
* We have released qgroup data range at the beginning of the function,
|
||||
* and normally qgroup_released bytes will be freed when committing
|
||||
* transaction.
|
||||
* But if we error out early, we have to free what we have released
|
||||
* or we leak qgroup data reservation.
|
||||
*/
|
||||
btrfs_qgroup_free_refroot(inode->root->fs_info,
|
||||
inode->root->root_key.objectid, qgroup_released,
|
||||
BTRFS_QGROUP_RSV_DATA);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
|
@ -209,7 +209,7 @@ int btree_readahead_hook(struct extent_buffer *eb, int err)
|
||||
/* find extent */
|
||||
spin_lock(&fs_info->reada_lock);
|
||||
re = radix_tree_lookup(&fs_info->reada_tree,
|
||||
eb->start >> PAGE_SHIFT);
|
||||
eb->start >> fs_info->sectorsize_bits);
|
||||
if (re)
|
||||
re->refcnt++;
|
||||
spin_unlock(&fs_info->reada_lock);
|
||||
@ -240,7 +240,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
|
||||
zone = NULL;
|
||||
spin_lock(&fs_info->reada_lock);
|
||||
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
|
||||
logical >> PAGE_SHIFT, 1);
|
||||
logical >> fs_info->sectorsize_bits, 1);
|
||||
if (ret == 1 && logical >= zone->start && logical <= zone->end) {
|
||||
kref_get(&zone->refcnt);
|
||||
spin_unlock(&fs_info->reada_lock);
|
||||
@ -283,13 +283,13 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
|
||||
|
||||
spin_lock(&fs_info->reada_lock);
|
||||
ret = radix_tree_insert(&dev->reada_zones,
|
||||
(unsigned long)(zone->end >> PAGE_SHIFT),
|
||||
zone);
|
||||
(unsigned long)(zone->end >> fs_info->sectorsize_bits),
|
||||
zone);
|
||||
|
||||
if (ret == -EEXIST) {
|
||||
kfree(zone);
|
||||
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
|
||||
logical >> PAGE_SHIFT, 1);
|
||||
logical >> fs_info->sectorsize_bits, 1);
|
||||
if (ret == 1 && logical >= zone->start && logical <= zone->end)
|
||||
kref_get(&zone->refcnt);
|
||||
else
|
||||
@ -315,7 +315,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 length;
|
||||
int real_stripes;
|
||||
int nzones = 0;
|
||||
unsigned long index = logical >> PAGE_SHIFT;
|
||||
unsigned long index = logical >> fs_info->sectorsize_bits;
|
||||
int dev_replace_is_ongoing;
|
||||
int have_zone = 0;
|
||||
|
||||
@ -497,7 +497,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
|
||||
struct reada_extent *re)
|
||||
{
|
||||
int i;
|
||||
unsigned long index = re->logical >> PAGE_SHIFT;
|
||||
unsigned long index = re->logical >> fs_info->sectorsize_bits;
|
||||
|
||||
spin_lock(&fs_info->reada_lock);
|
||||
if (--re->refcnt) {
|
||||
@ -538,11 +538,12 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
|
||||
static void reada_zone_release(struct kref *kref)
|
||||
{
|
||||
struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
|
||||
struct btrfs_fs_info *fs_info = zone->device->fs_info;
|
||||
|
||||
lockdep_assert_held(&zone->device->fs_info->reada_lock);
|
||||
lockdep_assert_held(&fs_info->reada_lock);
|
||||
|
||||
radix_tree_delete(&zone->device->reada_zones,
|
||||
zone->end >> PAGE_SHIFT);
|
||||
zone->end >> fs_info->sectorsize_bits);
|
||||
|
||||
kfree(zone);
|
||||
}
|
||||
@ -593,7 +594,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical,
|
||||
static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock)
|
||||
{
|
||||
int i;
|
||||
unsigned long index = zone->end >> PAGE_SHIFT;
|
||||
unsigned long index = zone->end >> zone->device->fs_info->sectorsize_bits;
|
||||
|
||||
for (i = 0; i < zone->ndevs; ++i) {
|
||||
struct reada_zone *peer;
|
||||
@ -628,7 +629,7 @@ static int reada_pick_zone(struct btrfs_device *dev)
|
||||
(void **)&zone, index, 1);
|
||||
if (ret == 0)
|
||||
break;
|
||||
index = (zone->end >> PAGE_SHIFT) + 1;
|
||||
index = (zone->end >> dev->fs_info->sectorsize_bits) + 1;
|
||||
if (zone->locked) {
|
||||
if (zone->elems > top_locked_elems) {
|
||||
top_locked_elems = zone->elems;
|
||||
@ -709,7 +710,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
|
||||
* plugging to speed things up
|
||||
*/
|
||||
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
|
||||
dev->reada_next >> PAGE_SHIFT, 1);
|
||||
dev->reada_next >> fs_info->sectorsize_bits, 1);
|
||||
if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
|
||||
ret = reada_pick_zone(dev);
|
||||
if (!ret) {
|
||||
@ -718,7 +719,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
|
||||
}
|
||||
re = NULL;
|
||||
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
|
||||
dev->reada_next >> PAGE_SHIFT, 1);
|
||||
dev->reada_next >> fs_info->sectorsize_bits, 1);
|
||||
}
|
||||
if (ret == 0) {
|
||||
spin_unlock(&fs_info->reada_lock);
|
||||
@ -885,7 +886,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
|
||||
pr_cont(" curr off %llu",
|
||||
device->reada_next - zone->start);
|
||||
pr_cont("\n");
|
||||
index = (zone->end >> PAGE_SHIFT) + 1;
|
||||
index = (zone->end >> fs_info->sectorsize_bits) + 1;
|
||||
}
|
||||
cnt = 0;
|
||||
index = 0;
|
||||
@ -910,7 +911,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
|
||||
}
|
||||
}
|
||||
pr_cont("\n");
|
||||
index = (re->logical >> PAGE_SHIFT) + 1;
|
||||
index = (re->logical >> fs_info->sectorsize_bits) + 1;
|
||||
if (++cnt > 15)
|
||||
break;
|
||||
}
|
||||
@ -926,7 +927,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
|
||||
if (ret == 0)
|
||||
break;
|
||||
if (!re->scheduled) {
|
||||
index = (re->logical >> PAGE_SHIFT) + 1;
|
||||
index = (re->logical >> fs_info->sectorsize_bits) + 1;
|
||||
continue;
|
||||
}
|
||||
pr_debug("re: logical %llu size %u list empty %d scheduled %d",
|
||||
@ -942,7 +943,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
|
||||
}
|
||||
}
|
||||
pr_cont("\n");
|
||||
index = (re->logical >> PAGE_SHIFT) + 1;
|
||||
index = (re->logical >> fs_info->sectorsize_bits) + 1;
|
||||
}
|
||||
spin_unlock(&fs_info->reada_lock);
|
||||
}
|
||||
|
@ -3169,10 +3169,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
|
||||
|
||||
mutex_lock(&log_root_tree->log_mutex);
|
||||
|
||||
index2 = log_root_tree->log_transid % 2;
|
||||
list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
|
||||
root_log_ctx.log_transid = log_root_tree->log_transid;
|
||||
|
||||
if (btrfs_is_zoned(fs_info)) {
|
||||
if (!log_root_tree->node) {
|
||||
ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
|
||||
@ -3183,6 +3179,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
}
|
||||
|
||||
index2 = log_root_tree->log_transid % 2;
|
||||
list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
|
||||
root_log_ctx.log_transid = log_root_tree->log_transid;
|
||||
|
||||
/*
|
||||
* Now we are safe to update the log_root_tree because we're under the
|
||||
* log_mutex, and we're a current writer so we're holding the commit
|
||||
|
Loading…
Reference in New Issue
Block a user