More bcachefs bugfixes for 6.7:
- Fix a rare emergency shutdown path bug: dropping journal pins after the filesystem has mostly been torn down is not what we want. - Fix some concurrency issues with the btree write buffer and journal replay by not using the btree write buffer until journal replay is finished - A fixup from the prior patch to kill journal pre-reservations: at the start of the btree update path, where previously we took a pre-reservation, we do at least want to check the journal watermark. - Fix a race between dropping device metadata and btree node writes, which would re-add a pointer to a device that had just been dropped - Fix one of the SCRU lock warnings, in bch2_compression_stats_to_text(). - Partial fix for a rare transaction paths overflow, when indirect extents had been split by background tasks, by not running certain triggers when they're not needed. - Fix for creating a snapshot with implicit source in a subdirectory of the containing subvolume - Don't unfreeze when we're emergency read-only - Fix for rebalance spinning trying to compress unwritten extentns - Another deleted_inodes fix, for directories - Fix a rare deadlock (usually just an unecessary wait) when flushing the journal with an open journal entry. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmV2T4EACgkQE6szbY3K bnay1w/+PyH5qwE2gOy17rno6cWSNyKJELUkcqVNqrSTZpuA+TbMbcV8+oOeBnG1 9/ShwKRvwwNC4HVk6KySoTMo9lRkaZ5wX6DpEsOqxoN8aCp6kqiCUxr0inAAyVdu O8FktP83eSX/vERWNlCeGLdi1KsCK0BWVbVMpkiVEO9QhLpS9eo1C8btstDIjbsv TVGvKO7IpVgibSBwymQPKpZa6BGN4d6emLlgKStdpVVR1RwJW3eLJwi1EV2hSp1f LBnTI5eD64pu+phEb4zE83JX932XAbxdBWaHlN1y3i4l6+sJDu63Y4R8bkbW+rnJ cbiyYM5IuAH6MFbbh9rIW8kEIvjrX13mY94oGlK8ClCI9WX129jD5538tEH624U5 KnhCZpkuzeGC5CVXNAzdJ8NP/Aj9qtKvSyssG6R5ZTitQ1FnTZ391Wb2pIRgj9pm yVfpJ/Q4cizVfSsKBvtr0U5I444zq50z+brKwegIoH8uMuGHKXcIgTUOu4q5pKDD znjS9eFrQTN2li2HB3LMxuS94yUmozqwgxClMptynLsHVknQH7F3cAdD+mYbwW5Q GUOd/QTlpskBYAUfBS8ewllowRjLGDJyrGvbR9Mvitk8CxOLRgoDipdh1K13jDMS zCmG1eQgdbtPHTM6fqif8Bu8xtgK7p2r099dcBhhiWmRyLPo5Qw= =l5sa -----END PGP SIGNATURE----- Merge tag 'bcachefs-2023-12-10' of https://evilpiepirate.org/git/bcachefs Pull more bcachefs bugfixes from Kent Overstreet: - Fix a rare emergency shutdown path bug: dropping journal pins after the filesystem has mostly been torn down is not what we want. - Fix some concurrency issues with the btree write buffer and journal replay by not using the btree write buffer until journal replay is finished - A fixup from the prior patch to kill journal pre-reservations: at the start of the btree update path, where previously we took a pre-reservation, we do at least want to check the journal watermark. - Fix a race between dropping device metadata and btree node writes, which would re-add a pointer to a device that had just been dropped - Fix one of the SCRU lock warnings, in bch2_compression_stats_to_text(). - Partial fix for a rare transaction paths overflow, when indirect extents had been split by background tasks, by not running certain triggers when they're not needed. - Fix for creating a snapshot with implicit source in a subdirectory of the containing subvolume - Don't unfreeze when we're emergency read-only - Fix for rebalance spinning trying to compress unwritten extentns - Another deleted_inodes fix, for directories - Fix a rare deadlock (usually just an unecessary wait) when flushing the journal with an open journal entry. * tag 'bcachefs-2023-12-10' of https://evilpiepirate.org/git/bcachefs: bcachefs: Close journal entry if necessary when flushing all pins bcachefs: Fix uninitialized var in bch2_journal_replay() bcachefs: Fix deleted inode check for dirs bcachefs: rebalance shouldn't attempt to compress unwritten extents bcachefs: don't attempt rw on unfreeze when shutdown bcachefs: Fix creating snapshot with implict source bcachefs: Don't run indirect extent trigger unless inserting/deleting bcachefs: Convert compression_stats to for_each_btree_key2 bcachefs: Fix bch2_extent_drop_ptrs() call bcachefs: Fix a journal deadlock in replay bcachefs; Don't use btree write buffer until journal replay is finished bcachefs: Don't drop journal pins in exit path
This commit is contained in:
commit
26aff84943
@ -9,6 +9,7 @@
|
||||
#include "debug.h"
|
||||
#include "errcode.h"
|
||||
#include "error.h"
|
||||
#include "journal.h"
|
||||
#include "trace.h"
|
||||
|
||||
#include <linux/prefetch.h>
|
||||
@ -424,14 +425,11 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
|
||||
BUG_ON(btree_node_read_in_flight(b) ||
|
||||
btree_node_write_in_flight(b));
|
||||
|
||||
if (btree_node_dirty(b))
|
||||
bch2_btree_complete_write(c, b, btree_current_write(b));
|
||||
clear_btree_node_dirty_acct(c, b);
|
||||
|
||||
btree_node_data_free(c, b);
|
||||
}
|
||||
|
||||
BUG_ON(atomic_read(&c->btree_cache.dirty));
|
||||
BUG_ON(!bch2_journal_error(&c->journal) &&
|
||||
atomic_read(&c->btree_cache.dirty));
|
||||
|
||||
list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
|
||||
|
||||
|
@ -1704,8 +1704,8 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
|
||||
return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
|
||||
}
|
||||
|
||||
void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
||||
struct btree_write *w)
|
||||
static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
|
||||
struct btree_write *w)
|
||||
{
|
||||
unsigned long old, new, v = READ_ONCE(b->will_make_reachable);
|
||||
|
||||
|
@ -134,9 +134,6 @@ void bch2_btree_node_read(struct bch_fs *, struct btree *, bool);
|
||||
int bch2_btree_root_read(struct bch_fs *, enum btree_id,
|
||||
const struct bkey_i *, unsigned);
|
||||
|
||||
void bch2_btree_complete_write(struct bch_fs *, struct btree *,
|
||||
struct btree_write *);
|
||||
|
||||
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
|
||||
|
||||
enum btree_write_flags {
|
||||
|
@ -992,8 +992,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
||||
list_for_each_entry_safe(ck, n, &items, list) {
|
||||
cond_resched();
|
||||
|
||||
bch2_journal_pin_drop(&c->journal, &ck->journal);
|
||||
|
||||
list_del(&ck->list);
|
||||
kfree(ck->k);
|
||||
six_lock_exit(&ck->c.lock);
|
||||
|
@ -554,6 +554,19 @@ int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq,
|
||||
BTREE_UPDATE_PREJOURNAL);
|
||||
}
|
||||
|
||||
static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans,
|
||||
enum btree_id btree,
|
||||
struct bkey_i *k)
|
||||
{
|
||||
struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k));
|
||||
int ret = PTR_ERR_OR_ZERO(n);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bkey_copy(n, k);
|
||||
return bch2_btree_insert_trans(trans, btree, n, 0);
|
||||
}
|
||||
|
||||
int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
|
||||
enum btree_id btree,
|
||||
struct bkey_i *k)
|
||||
@ -564,6 +577,9 @@ int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
|
||||
EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size);
|
||||
EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX);
|
||||
|
||||
if (unlikely(trans->journal_replay_not_finished))
|
||||
return bch2_btree_insert_clone_trans(trans, btree, k);
|
||||
|
||||
trans_for_each_wb_update(trans, i) {
|
||||
if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) {
|
||||
bkey_copy(&i->k, k);
|
||||
|
@ -1056,6 +1056,17 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
||||
flags &= ~BCH_WATERMARK_MASK;
|
||||
flags |= watermark;
|
||||
|
||||
if (!(flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
|
||||
watermark < c->journal.watermark) {
|
||||
struct journal_res res = { 0 };
|
||||
|
||||
ret = drop_locks_do(trans,
|
||||
bch2_journal_res_get(&c->journal, &res, 1,
|
||||
watermark|JOURNAL_RES_GET_CHECK));
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
nr_nodes[!!update_level] += 1 + split;
|
||||
update_level++;
|
||||
|
@ -471,7 +471,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
||||
* we aren't using the extent overwrite path to delete, we're
|
||||
* just using the normal key deletion path:
|
||||
*/
|
||||
if (bkey_deleted(&n->k))
|
||||
if (bkey_deleted(&n->k) && !(iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
n->k.size = 0;
|
||||
|
||||
return bch2_trans_relock(trans) ?:
|
||||
@ -591,7 +591,7 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
m->data_opts.rewrite_ptrs = 0;
|
||||
/* if iter == NULL, it's just a promote */
|
||||
if (iter)
|
||||
ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts);
|
||||
ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
@ -485,20 +485,15 @@ retry:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
|
||||
int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
|
||||
ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
|
||||
SPOS(dir.inum, 0, snapshot),
|
||||
POS(dir.inum, U64_MAX), 0, k, ret)
|
||||
SPOS(dir, 0, snapshot),
|
||||
POS(dir, U64_MAX), 0, k, ret)
|
||||
if (k.k->type == KEY_TYPE_dirent) {
|
||||
ret = -ENOTEMPTY;
|
||||
break;
|
||||
@ -508,6 +503,14 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
|
||||
{
|
||||
u32 snapshot;
|
||||
|
||||
return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?:
|
||||
bch2_empty_dir_snapshot(trans, dir.inum, snapshot);
|
||||
}
|
||||
|
||||
int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
|
@ -64,6 +64,7 @@ u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum,
|
||||
const struct bch_hash_info *,
|
||||
const struct qstr *, subvol_inum *);
|
||||
|
||||
int bch2_empty_dir_snapshot(struct btree_trans *, u64, u32);
|
||||
int bch2_empty_dir_trans(struct btree_trans *, subvol_inum);
|
||||
int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *);
|
||||
|
||||
|
@ -1294,7 +1294,8 @@ unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
|
||||
unsigned i = 0;
|
||||
|
||||
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
|
||||
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) {
|
||||
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
|
||||
p.ptr.unwritten) {
|
||||
rewrite_ptrs = 0;
|
||||
goto incompressible;
|
||||
}
|
||||
|
@ -413,7 +413,7 @@ retry:
|
||||
|
||||
if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
|
||||
!arg.src_ptr)
|
||||
snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol;
|
||||
snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol;
|
||||
|
||||
inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
|
||||
dst_dentry, arg.mode|S_IFDIR,
|
||||
|
@ -1733,6 +1733,9 @@ static int bch2_unfreeze(struct super_block *sb)
|
||||
struct bch_fs *c = sb->s_fs_info;
|
||||
int ret;
|
||||
|
||||
if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
|
||||
return 0;
|
||||
|
||||
down_write(&c->state_lock);
|
||||
ret = bch2_fs_read_write(c);
|
||||
up_write(&c->state_lock);
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "compress.h"
|
||||
#include "dirent.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "extent_update.h"
|
||||
@ -1093,11 +1094,15 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (fsck_err_on(S_ISDIR(inode.bi_mode), c,
|
||||
deleted_inode_is_dir,
|
||||
"directory %llu:%u in deleted_inodes btree",
|
||||
pos.offset, pos.snapshot))
|
||||
goto delete;
|
||||
if (S_ISDIR(inode.bi_mode)) {
|
||||
ret = bch2_empty_dir_snapshot(trans, pos.offset, pos.snapshot);
|
||||
if (fsck_err_on(ret == -ENOTEMPTY, c, deleted_inode_is_dir,
|
||||
"non empty directory %llu:%u in deleted_inodes btree",
|
||||
pos.offset, pos.snapshot))
|
||||
goto delete;
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c,
|
||||
deleted_inode_not_unlinked,
|
||||
|
@ -249,7 +249,7 @@ static bool journal_entry_want_write(struct journal *j)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool journal_entry_close(struct journal *j)
|
||||
bool bch2_journal_entry_close(struct journal *j)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
@ -383,7 +383,7 @@ static bool journal_quiesced(struct journal *j)
|
||||
bool ret = atomic64_read(&j->seq) == j->seq_ondisk;
|
||||
|
||||
if (!ret)
|
||||
journal_entry_close(j);
|
||||
bch2_journal_entry_close(j);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -436,7 +436,7 @@ retry:
|
||||
|
||||
/*
|
||||
* Recheck after taking the lock, so we don't race with another thread
|
||||
* that just did journal_entry_open() and call journal_entry_close()
|
||||
* that just did journal_entry_open() and call bch2_journal_entry_close()
|
||||
* unnecessarily
|
||||
*/
|
||||
if (journal_res_get_fast(j, res, flags)) {
|
||||
@ -1041,7 +1041,7 @@ void bch2_fs_journal_stop(struct journal *j)
|
||||
bch2_journal_reclaim_stop(j);
|
||||
bch2_journal_flush_all_pins(j);
|
||||
|
||||
wait_event(j->wait, journal_entry_close(j));
|
||||
wait_event(j->wait, bch2_journal_entry_close(j));
|
||||
|
||||
/*
|
||||
* Always write a new journal entry, to make sure the clock hands are up
|
||||
|
@ -266,6 +266,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u
|
||||
return s;
|
||||
}
|
||||
|
||||
bool bch2_journal_entry_close(struct journal *);
|
||||
void bch2_journal_buf_put_final(struct journal *, u64, bool);
|
||||
|
||||
static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
|
||||
|
@ -1599,6 +1599,7 @@ static CLOSURE_CALLBACK(journal_write_done)
|
||||
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
|
||||
old.v, new.v)) != old.v);
|
||||
|
||||
bch2_journal_reclaim_fast(j);
|
||||
bch2_journal_space_available(j);
|
||||
|
||||
closure_wake_up(&w->wait);
|
||||
|
@ -776,6 +776,9 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
|
||||
(1U << JOURNAL_PIN_btree), 0, 0, 0))
|
||||
*did_work = true;
|
||||
|
||||
if (seq_to_flush > journal_cur_seq(j))
|
||||
bch2_journal_entry_close(j);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
/*
|
||||
* If journal replay hasn't completed, the unreplayed journal entries
|
||||
|
@ -144,7 +144,7 @@ static int bch2_journal_replay(struct bch_fs *c)
|
||||
u64 start_seq = c->journal_replay_seq_start;
|
||||
u64 end_seq = c->journal_replay_seq_start;
|
||||
size_t i;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
|
||||
keys->gap = keys->nr;
|
||||
|
@ -121,6 +121,14 @@ int bch2_trans_mark_reflink_v(struct btree_trans *trans,
|
||||
{
|
||||
check_indirect_extent_deleting(new, &flags);
|
||||
|
||||
if (old.k->type == KEY_TYPE_reflink_v &&
|
||||
new->k.type == KEY_TYPE_reflink_v &&
|
||||
old.k->u64s == new->k.u64s &&
|
||||
!memcmp(bkey_s_c_to_reflink_v(old).v->start,
|
||||
bkey_i_to_reflink_v(new)->v.start,
|
||||
bkey_val_bytes(&new->k) - 8))
|
||||
return 0;
|
||||
|
||||
return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags);
|
||||
}
|
||||
|
||||
|
@ -276,8 +276,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
|
||||
if (!btree_type_has_ptrs(id))
|
||||
continue;
|
||||
|
||||
for_each_btree_key(trans, iter, id, POS_MIN,
|
||||
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
|
||||
ret = for_each_btree_key2(trans, iter, id, POS_MIN,
|
||||
BTREE_ITER_ALL_SNAPSHOTS, k, ({
|
||||
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
@ -309,8 +309,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
|
||||
nr_uncompressed_extents++;
|
||||
else if (compressed)
|
||||
nr_compressed_extents++;
|
||||
}
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
0;
|
||||
}));
|
||||
}
|
||||
|
||||
bch2_trans_put(trans);
|
||||
|
Loading…
x
Reference in New Issue
Block a user