bcachefs: Simplify hash table checks
Very early on there was a period where we were accidentally generating dirents with trailing garbage; we've since dropped support for filesystems that old and the fsck code can be dropped. Also, this patch switches to a simpler algorithm for checking hash tables. It's less efficient on hash collision - but with 64 bit keys, those are very rare. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
5c16add5ad
commit
7ac2c55e4d
@ -142,42 +142,10 @@ static int walk_inode(struct btree_trans *trans,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct hash_check {
|
static int hash_redo_key(struct btree_trans *trans,
|
||||||
struct bch_hash_info info;
|
const struct bch_hash_desc desc,
|
||||||
|
struct bch_hash_info *hash_info,
|
||||||
/* start of current chain of hash collisions: */
|
struct btree_iter *k_iter, struct bkey_s_c k)
|
||||||
struct btree_iter *chain;
|
|
||||||
|
|
||||||
/* next offset in current chain of hash collisions: */
|
|
||||||
u64 chain_end;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void hash_check_init(struct hash_check *h)
|
|
||||||
{
|
|
||||||
h->chain = NULL;
|
|
||||||
h->chain_end = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void hash_stop_chain(struct btree_trans *trans,
|
|
||||||
struct hash_check *h)
|
|
||||||
{
|
|
||||||
if (h->chain)
|
|
||||||
bch2_trans_iter_free(trans, h->chain);
|
|
||||||
h->chain = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void hash_check_set_inode(struct btree_trans *trans,
|
|
||||||
struct hash_check *h,
|
|
||||||
const struct bch_inode_unpacked *bi)
|
|
||||||
{
|
|
||||||
h->info = bch2_hash_info_init(trans->c, bi);
|
|
||||||
hash_stop_chain(trans, h);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int hash_redo_key(const struct bch_hash_desc desc,
|
|
||||||
struct btree_trans *trans, struct hash_check *h,
|
|
||||||
struct btree_iter *k_iter, struct bkey_s_c k,
|
|
||||||
u64 hashed)
|
|
||||||
{
|
{
|
||||||
struct bkey_i delete;
|
struct bkey_i delete;
|
||||||
struct bkey_i *tmp;
|
struct bkey_i *tmp;
|
||||||
@ -192,7 +160,7 @@ static int hash_redo_key(const struct bch_hash_desc desc,
|
|||||||
delete.k.p = k_iter->pos;
|
delete.k.p = k_iter->pos;
|
||||||
bch2_trans_update(trans, k_iter, &delete, 0);
|
bch2_trans_update(trans, k_iter, &delete, 0);
|
||||||
|
|
||||||
return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
|
return bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode,
|
||||||
tmp, 0);
|
tmp, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -216,203 +184,74 @@ retry:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int hash_check_duplicates(struct btree_trans *trans,
|
static int hash_check_key(struct btree_trans *trans,
|
||||||
const struct bch_hash_desc desc, struct hash_check *h,
|
const struct bch_hash_desc desc,
|
||||||
struct btree_iter *k_iter, struct bkey_s_c k)
|
struct bch_hash_info *hash_info,
|
||||||
|
struct btree_iter *k_iter, struct bkey_s_c hash_k)
|
||||||
{
|
{
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct btree_iter *iter;
|
struct btree_iter *iter = NULL;
|
||||||
struct bkey_s_c k2;
|
|
||||||
char buf[200];
|
char buf[200];
|
||||||
|
struct bkey_s_c k;
|
||||||
|
u64 hash;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (!bkey_cmp(h->chain->pos, k_iter->pos))
|
if (hash_k.k->type != desc.key_type)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
iter = bch2_trans_copy_iter(trans, h->chain);
|
hash = desc.hash_bkey(hash_info, hash_k);
|
||||||
|
|
||||||
for_each_btree_key_continue(iter, 0, k2, ret) {
|
if (likely(hash == hash_k.k->p.offset))
|
||||||
if (bkey_cmp(k2.k->p, k.k->p) >= 0)
|
return 0;
|
||||||
|
|
||||||
|
if (hash_k.k->p.offset < hash)
|
||||||
|
goto bad_hash;
|
||||||
|
|
||||||
|
for_each_btree_key(trans, iter, desc.btree_id, POS(hash_k.k->p.inode, hash),
|
||||||
|
BTREE_ITER_SLOTS, k, ret) {
|
||||||
|
if (!bkey_cmp(k.k->p, hash_k.k->p))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (fsck_err_on(k2.k->type == desc.key_type &&
|
if (fsck_err_on(k.k->type == desc.key_type &&
|
||||||
!desc.cmp_bkey(k, k2), c,
|
!desc.cmp_bkey(k, hash_k), c,
|
||||||
"duplicate hash table keys:\n%s",
|
"duplicate hash table keys:\n%s",
|
||||||
(bch2_bkey_val_to_text(&PBUF(buf), c,
|
(bch2_bkey_val_to_text(&PBUF(buf), c,
|
||||||
k), buf))) {
|
hash_k), buf))) {
|
||||||
ret = fsck_hash_delete_at(trans, desc, &h->info, k_iter);
|
ret = fsck_hash_delete_at(trans, desc, hash_info, k_iter);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
ret = 1;
|
ret = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (bkey_deleted(k.k)) {
|
||||||
|
bch2_trans_iter_free(trans, iter);
|
||||||
|
goto bad_hash;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
fsck_err:
|
|
||||||
bch2_trans_iter_free(trans, iter);
|
bch2_trans_iter_free(trans, iter);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
bad_hash:
|
||||||
|
if (fsck_err(c, "hash table key at wrong offset: btree %u inode %llu offset %llu, "
|
||||||
static void hash_set_chain_start(struct btree_trans *trans,
|
"hashed to %llu should be at %llu\n%s",
|
||||||
const struct bch_hash_desc desc,
|
desc.btree_id, hash_k.k->p.inode, hash_k.k->p.offset,
|
||||||
struct hash_check *h,
|
hash, iter->pos.offset,
|
||||||
struct btree_iter *k_iter, struct bkey_s_c k)
|
(bch2_bkey_val_to_text(&PBUF(buf), c, hash_k), buf)) == FSCK_ERR_IGNORE)
|
||||||
{
|
|
||||||
bool hole = (k.k->type != KEY_TYPE_hash_whiteout &&
|
|
||||||
k.k->type != desc.key_type);
|
|
||||||
|
|
||||||
if (hole || k.k->p.offset > h->chain_end + 1)
|
|
||||||
hash_stop_chain(trans, h);
|
|
||||||
|
|
||||||
if (!hole) {
|
|
||||||
if (!h->chain)
|
|
||||||
h->chain = bch2_trans_copy_iter(trans, k_iter);
|
|
||||||
|
|
||||||
h->chain_end = k.k->p.offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool key_has_correct_hash(struct btree_trans *trans,
|
|
||||||
const struct bch_hash_desc desc,
|
|
||||||
struct hash_check *h,
|
|
||||||
struct btree_iter *k_iter, struct bkey_s_c k)
|
|
||||||
{
|
|
||||||
u64 hash;
|
|
||||||
|
|
||||||
hash_set_chain_start(trans, desc, h, k_iter, k);
|
|
||||||
|
|
||||||
if (k.k->type != desc.key_type)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
hash = desc.hash_bkey(&h->info, k);
|
|
||||||
|
|
||||||
return hash >= h->chain->pos.offset &&
|
|
||||||
hash <= k.k->p.offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int hash_check_key(struct btree_trans *trans,
|
|
||||||
const struct bch_hash_desc desc, struct hash_check *h,
|
|
||||||
struct btree_iter *k_iter, struct bkey_s_c k)
|
|
||||||
{
|
|
||||||
struct bch_fs *c = trans->c;
|
|
||||||
char buf[200];
|
|
||||||
u64 hashed;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
hash_set_chain_start(trans, desc, h, k_iter, k);
|
|
||||||
|
|
||||||
if (k.k->type != desc.key_type)
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
hashed = desc.hash_bkey(&h->info, k);
|
|
||||||
|
|
||||||
if (fsck_err_on(hashed < h->chain->pos.offset ||
|
|
||||||
hashed > k.k->p.offset, c,
|
|
||||||
"hash table key at wrong offset: btree %u, %llu, "
|
|
||||||
"hashed to %llu chain starts at %llu\n%s",
|
|
||||||
desc.btree_id, k.k->p.offset,
|
|
||||||
hashed, h->chain->pos.offset,
|
|
||||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf))) {
|
|
||||||
ret = __bch2_trans_do(trans, NULL, NULL,
|
ret = __bch2_trans_do(trans, NULL, NULL,
|
||||||
BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
|
BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
|
||||||
hash_redo_key(desc, trans, h, k_iter, k, hashed));
|
hash_redo_key(trans, desc, hash_info, k_iter, hash_k));
|
||||||
if (ret) {
|
if (ret) {
|
||||||
bch_err(c, "hash_redo_key err %i", ret);
|
bch_err(c, "hash_redo_key err %i", ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
return -EINTR;
|
return -EINTR;
|
||||||
}
|
|
||||||
|
|
||||||
ret = hash_check_duplicates(trans, desc, h, k_iter, k);
|
|
||||||
fsck_err:
|
fsck_err:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
|
|
||||||
struct btree_iter *iter, struct bkey_s_c *k)
|
|
||||||
{
|
|
||||||
struct bch_fs *c = trans->c;
|
|
||||||
struct bkey_i_dirent *d = NULL;
|
|
||||||
int ret = -EINVAL;
|
|
||||||
char buf[200];
|
|
||||||
unsigned len;
|
|
||||||
u64 hash;
|
|
||||||
|
|
||||||
if (key_has_correct_hash(trans, bch2_dirent_hash_desc, h, iter, *k))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
len = bch2_dirent_name_bytes(bkey_s_c_to_dirent(*k));
|
|
||||||
BUG_ON(!len);
|
|
||||||
|
|
||||||
memcpy(buf, bkey_s_c_to_dirent(*k).v->d_name, len);
|
|
||||||
buf[len] = '\0';
|
|
||||||
|
|
||||||
d = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
|
|
||||||
if (!d) {
|
|
||||||
bch_err(c, "memory allocation failure");
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
bkey_reassemble(&d->k_i, *k);
|
|
||||||
|
|
||||||
do {
|
|
||||||
--len;
|
|
||||||
if (!len)
|
|
||||||
goto err_redo;
|
|
||||||
|
|
||||||
d->k.u64s = BKEY_U64s + dirent_val_u64s(len);
|
|
||||||
|
|
||||||
BUG_ON(bkey_val_bytes(&d->k) <
|
|
||||||
offsetof(struct bch_dirent, d_name) + len);
|
|
||||||
|
|
||||||
memset(d->v.d_name + len, 0,
|
|
||||||
bkey_val_bytes(&d->k) -
|
|
||||||
offsetof(struct bch_dirent, d_name) - len);
|
|
||||||
|
|
||||||
hash = bch2_dirent_hash_desc.hash_bkey(&h->info,
|
|
||||||
bkey_i_to_s_c(&d->k_i));
|
|
||||||
} while (hash < h->chain->pos.offset ||
|
|
||||||
hash > k->k->p.offset);
|
|
||||||
|
|
||||||
if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
|
|
||||||
buf, strlen(buf), d->v.d_name, len)) {
|
|
||||||
ret = __bch2_trans_do(trans, NULL, NULL,
|
|
||||||
BTREE_INSERT_NOFAIL|
|
|
||||||
BTREE_INSERT_LAZY_RW,
|
|
||||||
(bch2_trans_update(trans, iter, &d->k_i, 0), 0));
|
|
||||||
if (ret)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
*k = bch2_btree_iter_peek(iter);
|
|
||||||
|
|
||||||
BUG_ON(k->k->type != KEY_TYPE_dirent);
|
|
||||||
}
|
|
||||||
err:
|
|
||||||
fsck_err:
|
|
||||||
kfree(d);
|
|
||||||
return ret;
|
|
||||||
err_redo:
|
|
||||||
hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k);
|
|
||||||
|
|
||||||
if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n"
|
|
||||||
"hash table key at wrong offset: btree %u, offset %llu, "
|
|
||||||
"hashed to %llu chain starts at %llu\n%s",
|
|
||||||
buf, strlen(buf), BTREE_ID_dirents,
|
|
||||||
k->k->p.offset, hash, h->chain->pos.offset,
|
|
||||||
(bch2_bkey_val_to_text(&PBUF(buf), c,
|
|
||||||
*k), buf))) {
|
|
||||||
ret = __bch2_trans_do(trans, NULL, NULL,
|
|
||||||
BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
|
|
||||||
hash_redo_key(bch2_dirent_hash_desc, trans,
|
|
||||||
h, iter, *k, hash));
|
|
||||||
if (ret)
|
|
||||||
bch_err(c, "hash_redo_key err %i", ret);
|
|
||||||
else
|
|
||||||
ret = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int check_inode(struct btree_trans *trans,
|
static int check_inode(struct btree_trans *trans,
|
||||||
struct btree_iter *iter,
|
struct btree_iter *iter,
|
||||||
struct bkey_s_c_inode inode)
|
struct bkey_s_c_inode inode)
|
||||||
@ -710,7 +549,7 @@ noinline_for_stack
|
|||||||
static int check_dirents(struct bch_fs *c)
|
static int check_dirents(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct inode_walker w = inode_walker_init();
|
struct inode_walker w = inode_walker_init();
|
||||||
struct hash_check h;
|
struct bch_hash_info hash_info;
|
||||||
struct btree_trans trans;
|
struct btree_trans trans;
|
||||||
struct btree_iter *iter;
|
struct btree_iter *iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
@ -721,8 +560,6 @@ static int check_dirents(struct bch_fs *c)
|
|||||||
|
|
||||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
||||||
|
|
||||||
hash_check_init(&h);
|
|
||||||
|
|
||||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents,
|
iter = bch2_trans_get_iter(&trans, BTREE_ID_dirents,
|
||||||
POS(BCACHEFS_ROOT_INO, 0), 0);
|
POS(BCACHEFS_ROOT_INO, 0), 0);
|
||||||
retry:
|
retry:
|
||||||
@ -749,25 +586,26 @@ retry:
|
|||||||
ret = bch2_btree_delete_at(&trans, iter, 0);
|
ret = bch2_btree_delete_at(&trans, iter, 0);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
continue;
|
goto next;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (w.first_this_inode && w.have_inode)
|
if (!w.have_inode)
|
||||||
hash_check_set_inode(&trans, &h, &w.inode);
|
goto next;
|
||||||
|
|
||||||
ret = check_dirent_hash(&trans, &h, iter, &k);
|
if (w.first_this_inode)
|
||||||
|
hash_info = bch2_hash_info_init(c, &w.inode);
|
||||||
|
|
||||||
|
ret = hash_check_key(&trans, bch2_dirent_hash_desc,
|
||||||
|
&hash_info, iter, k);
|
||||||
if (ret > 0) {
|
if (ret > 0) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
continue;
|
goto next;
|
||||||
}
|
}
|
||||||
if (ret)
|
if (ret)
|
||||||
goto fsck_err;
|
goto fsck_err;
|
||||||
|
|
||||||
if (ret)
|
|
||||||
goto fsck_err;
|
|
||||||
|
|
||||||
if (k.k->type != KEY_TYPE_dirent)
|
if (k.k->type != KEY_TYPE_dirent)
|
||||||
continue;
|
goto next;
|
||||||
|
|
||||||
d = bkey_s_c_to_dirent(k);
|
d = bkey_s_c_to_dirent(k);
|
||||||
d_inum = le64_to_cpu(d.v->d_inum);
|
d_inum = le64_to_cpu(d.v->d_inum);
|
||||||
@ -786,9 +624,12 @@ retry:
|
|||||||
ret = remove_dirent(&trans, d);
|
ret = remove_dirent(&trans, d);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
continue;
|
goto next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!have_target)
|
||||||
|
goto next;
|
||||||
|
|
||||||
if (!target.bi_nlink &&
|
if (!target.bi_nlink &&
|
||||||
!(target.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) &&
|
!(target.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) &&
|
||||||
(target.bi_dir != k.k->p.inode ||
|
(target.bi_dir != k.k->p.inode ||
|
||||||
@ -822,8 +663,7 @@ retry:
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fsck_err_on(have_target &&
|
if (fsck_err_on(d.v->d_type !=
|
||||||
d.v->d_type !=
|
|
||||||
mode_to_type(target.bi_mode), c,
|
mode_to_type(target.bi_mode), c,
|
||||||
"incorrect d_type: should be %u:\n%s",
|
"incorrect d_type: should be %u:\n%s",
|
||||||
mode_to_type(target.bi_mode),
|
mode_to_type(target.bi_mode),
|
||||||
@ -849,17 +689,14 @@ retry:
|
|||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
next:
|
||||||
bch2_btree_iter_advance(iter);
|
bch2_btree_iter_advance(iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
hash_stop_chain(&trans, &h);
|
|
||||||
err:
|
err:
|
||||||
fsck_err:
|
fsck_err:
|
||||||
if (ret == -EINTR)
|
if (ret == -EINTR)
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
bch2_trans_iter_put(&trans, h.chain);
|
|
||||||
bch2_trans_iter_put(&trans, iter);
|
bch2_trans_iter_put(&trans, iter);
|
||||||
return bch2_trans_exit(&trans) ?: ret;
|
return bch2_trans_exit(&trans) ?: ret;
|
||||||
}
|
}
|
||||||
@ -871,7 +708,7 @@ noinline_for_stack
|
|||||||
static int check_xattrs(struct bch_fs *c)
|
static int check_xattrs(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct inode_walker w = inode_walker_init();
|
struct inode_walker w = inode_walker_init();
|
||||||
struct hash_check h;
|
struct bch_hash_info hash_info;
|
||||||
struct btree_trans trans;
|
struct btree_trans trans;
|
||||||
struct btree_iter *iter;
|
struct btree_iter *iter;
|
||||||
struct bkey_s_c k;
|
struct bkey_s_c k;
|
||||||
@ -879,8 +716,6 @@ static int check_xattrs(struct bch_fs *c)
|
|||||||
|
|
||||||
bch_verbose(c, "checking xattrs");
|
bch_verbose(c, "checking xattrs");
|
||||||
|
|
||||||
hash_check_init(&h);
|
|
||||||
|
|
||||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
||||||
|
|
||||||
iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs,
|
iter = bch2_trans_get_iter(&trans, BTREE_ID_xattrs,
|
||||||
@ -902,10 +737,10 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (w.first_this_inode && w.have_inode)
|
if (w.first_this_inode && w.have_inode)
|
||||||
hash_check_set_inode(&trans, &h, &w.inode);
|
hash_info = bch2_hash_info_init(c, &w.inode);
|
||||||
|
|
||||||
ret = hash_check_key(&trans, bch2_xattr_hash_desc,
|
ret = hash_check_key(&trans, bch2_xattr_hash_desc,
|
||||||
&h, iter, k);
|
&hash_info, iter, k);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -915,7 +750,6 @@ fsck_err:
|
|||||||
if (ret == -EINTR)
|
if (ret == -EINTR)
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
||||||
bch2_trans_iter_put(&trans, h.chain);
|
|
||||||
bch2_trans_iter_put(&trans, iter);
|
bch2_trans_iter_put(&trans, iter);
|
||||||
return bch2_trans_exit(&trans) ?: ret;
|
return bch2_trans_exit(&trans) ?: ret;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user