bcachefs: Reconstruct missing snapshot nodes
When the snapshots btree is going, we'll have to delete huge amounts of data - unless we can reconstruct it by looking at the keys that refer to it. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
55936afe11
commit
a292be3b68
@ -615,6 +615,7 @@ struct bch_dev {
|
||||
*/
|
||||
|
||||
#define BCH_FS_FLAGS() \
|
||||
x(new_fs) \
|
||||
x(started) \
|
||||
x(may_go_rw) \
|
||||
x(rw) \
|
||||
|
@ -938,6 +938,7 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
int ret;
|
||||
|
||||
bch_notice(c, "initializing new filesystem");
|
||||
set_bit(BCH_FS_new_fs, &c->flags);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
|
||||
|
@ -32,6 +32,7 @@
|
||||
x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \
|
||||
x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
|
||||
x(bucket_gens_init, 17, 0) \
|
||||
x(reconstruct_snapshots, 38, 0) \
|
||||
x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
|
||||
x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
|
||||
x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
|
||||
|
@ -268,7 +268,8 @@
|
||||
x(btree_node_bkey_bad_u64s, 260) \
|
||||
x(btree_node_topology_empty_interior_node, 261) \
|
||||
x(btree_ptr_v2_min_key_bad, 262) \
|
||||
x(btree_root_unreadable_and_scan_found_nothing, 263)
|
||||
x(btree_root_unreadable_and_scan_found_nothing, 263) \
|
||||
x(snapshot_node_missing, 264)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n) BCH_FSCK_ERR_##t = n,
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "errcode.h"
|
||||
#include "error.h"
|
||||
#include "fs.h"
|
||||
#include "recovery_passes.h"
|
||||
#include "snapshot.h"
|
||||
|
||||
#include <linux/random.h>
|
||||
@ -574,6 +575,13 @@ static int check_snapshot_tree(struct btree_trans *trans,
|
||||
u32 subvol_id;
|
||||
|
||||
ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
|
||||
bch_err_fn(c, ret);
|
||||
|
||||
if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
|
||||
ret = 0;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
@ -731,7 +739,6 @@ static int check_snapshot(struct btree_trans *trans,
|
||||
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
|
||||
u32 real_depth;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
bool should_have_subvol;
|
||||
u32 i, id;
|
||||
int ret = 0;
|
||||
|
||||
@ -777,7 +784,7 @@ static int check_snapshot(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
|
||||
bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
|
||||
!BCH_SNAPSHOT_DELETED(&s);
|
||||
|
||||
if (should_have_subvol) {
|
||||
@ -879,6 +886,154 @@ int bch2_check_snapshots(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_snapshot_exists(struct btree_trans *trans, u32 id)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
if (bch2_snapshot_equiv(c, id))
|
||||
return 0;
|
||||
|
||||
u32 tree_id;
|
||||
int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
|
||||
ret = PTR_ERR_OR_ZERO(snapshot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bkey_snapshot_init(&snapshot->k_i);
|
||||
snapshot->k.p = POS(0, id);
|
||||
snapshot->v.tree = cpu_to_le32(tree_id);
|
||||
snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c));
|
||||
|
||||
return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
|
||||
bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
|
||||
bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
|
||||
bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
|
||||
}
|
||||
|
||||
/* Figure out which snapshot nodes belong in the same tree: */
|
||||
struct snapshot_tree_reconstruct {
|
||||
enum btree_id btree;
|
||||
struct bpos cur_pos;
|
||||
snapshot_id_list cur_ids;
|
||||
DARRAY(snapshot_id_list) trees;
|
||||
};
|
||||
|
||||
static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
|
||||
{
|
||||
darray_for_each(r->trees, i)
|
||||
darray_exit(i);
|
||||
darray_exit(&r->trees);
|
||||
darray_exit(&r->cur_ids);
|
||||
}
|
||||
|
||||
static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
|
||||
{
|
||||
return r->btree == BTREE_ID_inodes
|
||||
? r->cur_pos.offset == pos.offset
|
||||
: r->cur_pos.inode == pos.inode;
|
||||
}
|
||||
|
||||
static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
|
||||
{
|
||||
darray_for_each(*l, i)
|
||||
if (snapshot_list_has_id(r, *i))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
|
||||
{
|
||||
bool first = true;
|
||||
darray_for_each(*s, i) {
|
||||
if (!first)
|
||||
prt_char(out, ' ');
|
||||
first = false;
|
||||
prt_printf(out, "%u", *i);
|
||||
}
|
||||
}
|
||||
|
||||
static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
|
||||
{
|
||||
if (r->cur_ids.nr) {
|
||||
darray_for_each(r->trees, i)
|
||||
if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
|
||||
int ret = snapshot_list_merge(c, i, &r->cur_ids);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
darray_push(&r->trees, r->cur_ids);
|
||||
darray_init(&r->cur_ids);
|
||||
}
|
||||
out:
|
||||
r->cur_ids.nr = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
|
||||
{
|
||||
if (!same_snapshot(r, pos))
|
||||
snapshot_tree_reconstruct_next(c, r);
|
||||
r->cur_pos = pos;
|
||||
return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
|
||||
}
|
||||
|
||||
int bch2_reconstruct_snapshots(struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct snapshot_tree_reconstruct r = {};
|
||||
int ret = 0;
|
||||
|
||||
for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
|
||||
if (btree_type_has_snapshots(btree)) {
|
||||
r.btree = btree;
|
||||
|
||||
ret = for_each_btree_key(trans, iter, btree, POS_MIN,
|
||||
BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({
|
||||
get_snapshot_trees(c, &r, k.k->p);
|
||||
}));
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
snapshot_tree_reconstruct_next(c, &r);
|
||||
}
|
||||
}
|
||||
|
||||
darray_for_each(r.trees, t) {
|
||||
printbuf_reset(&buf);
|
||||
snapshot_id_list_to_text(&buf, t);
|
||||
|
||||
darray_for_each(*t, id) {
|
||||
if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
|
||||
c, snapshot_node_missing,
|
||||
"snapshot node %u from tree %s missing", *id, buf.buf)) {
|
||||
if (t->nr > 1) {
|
||||
bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
|
||||
ret = -BCH_ERR_fsck_repair_unimplemented;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
check_snapshot_exists(trans, *id));
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
err:
|
||||
bch2_trans_put(trans);
|
||||
snapshot_tree_reconstruct_exit(&r);
|
||||
printbuf_exit(&buf);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a snapshot as deleted, for future cleanup:
|
||||
*/
|
||||
@ -1689,6 +1844,20 @@ int bch2_snapshots_read(struct bch_fs *c)
|
||||
POS_MIN, 0, k,
|
||||
(set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
|
||||
bch_err_fn(c, ret);
|
||||
|
||||
/*
|
||||
* It's important that we check if we need to reconstruct snapshots
|
||||
* before going RW, so we mark that pass as required in the superblock -
|
||||
* otherwise, we could end up deleting keys with missing snapshot nodes
|
||||
* instead
|
||||
*/
|
||||
BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
|
||||
test_bit(BCH_FS_may_go_rw, &c->flags));
|
||||
|
||||
if (bch2_err_matches(ret, EIO) ||
|
||||
(c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
|
||||
ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -209,15 +209,34 @@ static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list
|
||||
|
||||
static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
|
||||
{
|
||||
int ret;
|
||||
|
||||
BUG_ON(snapshot_list_has_id(s, id));
|
||||
ret = darray_push(s, id);
|
||||
int ret = darray_push(s, id);
|
||||
if (ret)
|
||||
bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int snapshot_list_add_nodup(struct bch_fs *c, snapshot_id_list *s, u32 id)
|
||||
{
|
||||
int ret = snapshot_list_has_id(s, id)
|
||||
? 0
|
||||
: darray_push(s, id);
|
||||
if (ret)
|
||||
bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, snapshot_id_list *src)
|
||||
{
|
||||
darray_for_each(*src, i) {
|
||||
int ret = snapshot_list_add_nodup(c, dst, *i);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
|
||||
struct bch_snapshot *s);
|
||||
int bch2_snapshot_get_subvol(struct btree_trans *, u32,
|
||||
@ -229,6 +248,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
|
||||
|
||||
int bch2_check_snapshot_trees(struct bch_fs *);
|
||||
int bch2_check_snapshots(struct bch_fs *);
|
||||
int bch2_reconstruct_snapshots(struct bch_fs *);
|
||||
|
||||
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
|
||||
void bch2_delete_dead_snapshots_work(struct work_struct *);
|
||||
|
Loading…
Reference in New Issue
Block a user