bcachefs: bch2_btree_iter_peek_upto()

In BTREE_ITER_FILTER_SNAPHOTS mode, we skip over keys in unrelated
snapshots. When we hit the end of an inode, if the next inode(s) are in
a different subvolume, we could potentially have to skip past many keys
before finding a key we can return to the caller, so they can terminate
the iteration.

This adds a peek_upto() variant to solve this problem, to be used when
we know the range we're searching within.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2022-03-11 12:31:52 -05:00 committed by Kent Overstreet
parent d4d24a6509
commit 85d8cf161f
8 changed files with 77 additions and 51 deletions

View File

@ -2346,11 +2346,12 @@ out:
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
* current position * current position
*/ */
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
{ {
struct btree_trans *trans = iter->trans; struct btree_trans *trans = iter->trans;
struct bpos search_key = btree_iter_search_key(iter); struct bpos search_key = btree_iter_search_key(iter);
struct bkey_s_c k; struct bkey_s_c k;
struct bpos iter_pos;
int ret; int ret;
if (iter->update_path) { if (iter->update_path) {
@ -2366,6 +2367,24 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
if (!k.k || bkey_err(k)) if (!k.k || bkey_err(k))
goto out; goto out;
/*
* iter->pos should be mononotically increasing, and always be
* equal to the key we just returned - except extents can
* straddle iter->pos:
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
iter_pos = k.k->p;
else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
iter_pos = bkey_start_pos(k.k);
else
iter_pos = iter->pos;
if (bkey_cmp(iter_pos, end) > 0) {
bch2_btree_iter_set_pos(iter, end);
k = bkey_s_c_null;
goto out;
}
if (iter->update_path && if (iter->update_path &&
bkey_cmp(iter->update_path->pos, k.k->p)) { bkey_cmp(iter->update_path->pos, k.k->p)) {
bch2_path_put(trans, iter->update_path, bch2_path_put(trans, iter->update_path,
@ -2419,14 +2438,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
break; break;
} }
/* iter->pos = iter_pos;
* iter->pos should be mononotically increasing, and always be equal to
* the key we just returned - except extents can straddle iter->pos:
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
iter->pos = k.k->p;
else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
iter->pos = bkey_start_pos(k.k);
iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p, iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
iter->flags & BTREE_ITER_INTENT); iter->flags & BTREE_ITER_INTENT);
@ -2658,9 +2670,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (iter->flags & BTREE_ITER_INTENT) { if (iter->flags & BTREE_ITER_INTENT) {
struct btree_iter iter2; struct btree_iter iter2;
struct bpos end = iter->pos;
if (iter->flags & BTREE_ITER_IS_EXTENTS)
end.offset = U64_MAX;
bch2_trans_copy_iter(&iter2, iter); bch2_trans_copy_iter(&iter2, iter);
k = bch2_btree_iter_peek(&iter2); k = bch2_btree_iter_peek_upto(&iter2, end);
if (k.k && !bkey_err(k)) { if (k.k && !bkey_err(k)) {
iter->k = iter2.k; iter->k = iter2.k;

View File

@ -245,9 +245,14 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *);
struct btree *bch2_btree_iter_peek_node(struct btree_iter *); struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
struct btree *bch2_btree_iter_next_node(struct btree_iter *); struct btree *bch2_btree_iter_next_node(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *); struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
{
return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
}
struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *); struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
@ -342,13 +347,26 @@ static inline int bkey_err(struct bkey_s_c k)
} }
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
unsigned flags) unsigned flags)
{ {
return flags & BTREE_ITER_SLOTS return flags & BTREE_ITER_SLOTS
? bch2_btree_iter_peek_slot(iter) ? bch2_btree_iter_peek_slot(iter)
: bch2_btree_iter_peek(iter); : bch2_btree_iter_peek(iter);
} }
static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,
struct bpos end,
unsigned flags)
{
if (!(flags & BTREE_ITER_SLOTS))
return bch2_btree_iter_peek_upto(iter, end);
if (bkey_cmp(iter->pos, end) > 0)
return bkey_s_c_null;
return bch2_btree_iter_peek_slot(iter);
}
static inline int btree_trans_too_many_iters(struct btree_trans *trans) static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{ {
return hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2 return hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2
@ -385,6 +403,14 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
!((_ret) = bkey_err(_k)) && (_k).k; \ !((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter))) bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \
_start, _end, _flags, _k, _ret) \
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
(_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\
!((_ret) = bkey_err(_k)) && (_k).k; \
bch2_btree_iter_advance(&(_iter)))
#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret) \ #define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret) \
for (; \ for (; \
(_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\ (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\

View File

@ -1286,7 +1286,7 @@ int bch2_trans_update_extent(struct btree_trans *trans,
BTREE_ITER_INTENT| BTREE_ITER_INTENT|
BTREE_ITER_WITH_UPDATES| BTREE_ITER_WITH_UPDATES|
BTREE_ITER_NOT_EXTENTS); BTREE_ITER_NOT_EXTENTS);
k = bch2_btree_iter_peek(&iter); k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
if ((ret = bkey_err(k))) if ((ret = bkey_err(k)))
goto err; goto err;
if (!k.k) if (!k.k)
@ -1405,7 +1405,8 @@ int bch2_trans_update_extent(struct btree_trans *trans,
goto out; goto out;
} }
next: next:
k = bch2_btree_iter_next(&iter); bch2_btree_iter_advance(&iter);
k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
if ((ret = bkey_err(k))) if ((ret = bkey_err(k)))
goto err; goto err;
if (!k.k) if (!k.k)

View File

@ -470,16 +470,13 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
if (ret) if (ret)
return ret; return ret;
for_each_btree_key_norestart(trans, iter, BTREE_ID_dirents, for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
SPOS(dir.inum, 0, snapshot), 0, k, ret) { SPOS(dir.inum, 0, snapshot),
if (k.k->p.inode > dir.inum) POS(dir.inum, U64_MAX), 0, k, ret)
break;
if (k.k->type == KEY_TYPE_dirent) { if (k.k->type == KEY_TYPE_dirent) {
ret = -ENOTEMPTY; ret = -ENOTEMPTY;
break; break;
} }
}
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
@ -503,11 +500,9 @@ retry:
if (ret) if (ret)
goto err; goto err;
for_each_btree_key_norestart(&trans, iter, BTREE_ID_dirents, for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents,
SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) { SPOS(inum.inum, ctx->pos, snapshot),
if (k.k->p.inode > inum.inum) POS(inum.inum, U64_MAX), 0, k, ret) {
break;
if (k.k->type != KEY_TYPE_dirent) if (k.k->type != KEY_TYPE_dirent)
continue; continue;

View File

@ -936,9 +936,8 @@ retry:
SPOS(ei->v.i_ino, start, snapshot), 0); SPOS(ei->v.i_ino, start, snapshot), 0);
while (!(ret = btree_trans_too_many_iters(&trans)) && while (!(ret = btree_trans_too_many_iters(&trans)) &&
(k = bch2_btree_iter_peek(&iter)).k && (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
!(ret = bkey_err(k)) && !(ret = bkey_err(k))) {
bkey_cmp(iter.pos, end) < 0) {
enum btree_id data_btree = BTREE_ID_extents; enum btree_id data_btree = BTREE_ID_extents;
if (!bkey_extent_is_data(k.k) && if (!bkey_extent_is_data(k.k) &&

View File

@ -586,12 +586,12 @@ static int bch2_inode_delete_keys(struct btree_trans *trans,
bch2_btree_iter_set_snapshot(&iter, snapshot); bch2_btree_iter_set_snapshot(&iter, snapshot);
k = bch2_btree_iter_peek(&iter); k = bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX));
ret = bkey_err(k); ret = bkey_err(k);
if (ret) if (ret)
goto err; goto err;
if (!k.k || iter.pos.inode != inum.inum) if (!k.k)
break; break;
bkey_init(&delete.k); bkey_init(&delete.k);

View File

@ -163,12 +163,10 @@ bch2_hash_lookup(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
for_each_btree_key_norestart(trans, *iter, desc.btree_id, for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
SPOS(inum.inum, desc.hash_key(info, key), snapshot), SPOS(inum.inum, desc.hash_key(info, key), snapshot),
POS(inum.inum, U64_MAX),
BTREE_ITER_SLOTS|flags, k, ret) { BTREE_ITER_SLOTS|flags, k, ret) {
if (iter->pos.inode != inum.inum)
break;
if (is_visible_key(desc, inum, k)) { if (is_visible_key(desc, inum, k)) {
if (!desc.cmp_key(k, key)) if (!desc.cmp_key(k, key))
return 0; return 0;
@ -199,15 +197,12 @@ bch2_hash_hole(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
for_each_btree_key_norestart(trans, *iter, desc.btree_id, for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
SPOS(inum.inum, desc.hash_key(info, key), snapshot), SPOS(inum.inum, desc.hash_key(info, key), snapshot),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { POS(inum.inum, U64_MAX),
if (iter->pos.inode != inum.inum) BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret)
break;
if (!is_visible_key(desc, inum, k)) if (!is_visible_key(desc, inum, k))
return 0; return 0;
}
bch2_trans_iter_exit(trans, iter); bch2_trans_iter_exit(trans, iter);
return ret ?: -ENOSPC; return ret ?: -ENOSPC;
@ -260,14 +255,12 @@ int bch2_hash_set(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
for_each_btree_key_norestart(trans, iter, desc.btree_id, for_each_btree_key_upto_norestart(trans, iter, desc.btree_id,
SPOS(inum.inum, SPOS(inum.inum,
desc.hash_bkey(info, bkey_i_to_s_c(insert)), desc.hash_bkey(info, bkey_i_to_s_c(insert)),
snapshot), snapshot),
POS(inum.inum, U64_MAX),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
if (iter.pos.inode != inum.inum)
break;
if (is_visible_key(desc, inum, k)) { if (is_visible_key(desc, inum, k)) {
if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert))) if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
goto found; goto found;

View File

@ -311,13 +311,9 @@ retry:
if (ret) if (ret)
goto err; goto err;
for_each_btree_key_norestart(&trans, iter, BTREE_ID_xattrs, for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_xattrs,
SPOS(inum, offset, snapshot), 0, k, ret) { SPOS(inum, offset, snapshot),
BUG_ON(k.k->p.inode < inum); POS(inum, U64_MAX), 0, k, ret) {
if (k.k->p.inode > inum)
break;
if (k.k->type != KEY_TYPE_xattr) if (k.k->type != KEY_TYPE_xattr)
continue; continue;