bcachefs: Minor device removal fixes

- We weren't clearing the LRU btree
 - bch2_alloc_read() runs before bch2_check_alloc_key() deletes alloc
   keys for devices/buckets that don't exists, so it needs to check for
   that
 - bch2_check_lrus() needs to check that buckets exists
 - improve some error messages

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2022-04-09 15:15:36 -04:00 committed by Kent Overstreet
parent 502f973dba
commit a9c0a4cbf1
3 changed files with 35 additions and 17 deletions

View File

@ -446,6 +446,13 @@ int bch2_alloc_read(struct bch_fs *c)
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
/*
* Not a fsck error because this is checked/repaired by
* bch2_check_alloc_key() which runs later:
*/
if (!bch2_dev_bucket_exists(c, k.k->p))
continue;
ca = bch_dev_bkey_exists(c, k.k->p.inode);
bch2_alloc_to_v4(k, &a);
@ -614,7 +621,8 @@ static int bch2_check_alloc_key(struct btree_trans *trans,
return ret;
if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c,
"alloc key for invalid device or bucket"))
"alloc key for invalid device:bucket %llu:%llu",
alloc_k.k->p.inode, alloc_k.k->p.offset))
return bch2_btree_delete_at(trans, alloc_iter, 0);
ca = bch_dev_bkey_exists(c, alloc_k.k->p.inode);
@ -727,9 +735,8 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
bch2_trans_iter_init(trans, &alloc_iter, BTREE_ID_alloc, pos, 0);
if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c,
"%llu:%llu set in %s btree but device or bucket does not exist",
pos.inode, pos.offset,
bch2_btree_ids[iter->btree_id]))
"entry in %s btree for nonexistant dev:bucket %llu:%llu",
bch2_btree_ids[iter->btree_id], pos.inode, pos.offset))
goto delete;
k = bch2_btree_iter_peek_slot(&alloc_iter);

View File

@ -133,7 +133,7 @@ static int bch2_check_lru_key(struct btree_trans *trans,
struct bch_alloc_v4 a;
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
u64 idx;
struct bpos alloc_pos;
int ret;
lru_k = bch2_btree_iter_peek(lru_iter);
@ -144,10 +144,15 @@ static int bch2_check_lru_key(struct btree_trans *trans,
if (ret)
return ret;
idx = le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx);
alloc_pos = POS(lru_k.k->p.inode,
le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx));
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
POS(lru_k.k->p.inode, idx), 0);
if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c,
"lru key points to nonexistent device:bucket %llu:%llu",
alloc_pos.inode, alloc_pos.offset))
return bch2_btree_delete_at(trans, lru_iter, 0);
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, alloc_pos, 0);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
if (ret)

View File

@ -1423,11 +1423,17 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
struct bpos end = POS(ca->dev_idx, U64_MAX);
int ret;
ret = bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
/*
* We clear the LRU and need_discard btrees first so that we don't race
* with bch2_do_invalidates() and bch2_do_discards()
*/
ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
BTREE_TRIGGER_NORUN, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
BTREE_TRIGGER_NORUN, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
BTREE_TRIGGER_NORUN, NULL) ?:
bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
BTREE_TRIGGER_NORUN, NULL);
if (ret)
bch_err(c, "error %i removing dev alloc info", ret);
@ -1462,19 +1468,19 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
goto err;
}
ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
if (ret) {
bch_err(ca, "Remove failed: error %i flushing journal", ret);
goto err;
}
ret = bch2_dev_remove_alloc(c, ca);
if (ret) {
bch_err(ca, "Remove failed, error deleting alloc info");
goto err;
}
ret = bch2_journal_error(&c->journal);
ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
if (ret) {
bch_err(ca, "Remove failed: error %i flushing journal", ret);
goto err;
}
ret = bch2_journal_flush(&c->journal);
if (ret) {
bch_err(ca, "Remove failed, journal error");
goto err;