bcachefs: Move extent_handle_overwrites() to bch2_trans_update()

This lifts handling of overlapping extents out of __bch2_trans_commit()
and moves it to where we first do the update - which means that
BTREE_ITER_WITH_UPDATES can now work correctly in extents mode.

Also, this patch reworks how extent triggers work: previously, on
partial extent overwrite we would pass this information to the trigger,
telling it what part of the extent was being overwritten. But, this
approach has had too many subtle corner cases - now, we only mark whole
extents, meaning on partial extent overwrite we unmark the old extent
and mark the new extent.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2021-06-02 00:18:34 -04:00 committed by Kent Overstreet
parent b1d87f527d
commit 8e6bbc4181
3 changed files with 77 additions and 227 deletions

View File

@ -1656,7 +1656,7 @@ static noinline struct bkey_i *__btree_trans_peek_updates(struct btree_iter *ite
struct btree_insert_entry *i;
struct bkey_i *ret = NULL;
trans_for_each_update2(iter->trans, i) {
trans_for_each_update(iter->trans, i) {
if (i->btree_id < iter->btree_id)
continue;
if (i->btree_id > iter->btree_id)

View File

@ -775,7 +775,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
return 0;
}
static void __bch2_trans_update2(struct btree_trans *trans,
static void bch2_trans_update2(struct btree_trans *trans,
struct btree_insert_entry n)
{
struct btree_insert_entry *i;
@ -798,44 +798,23 @@ static void __bch2_trans_update2(struct btree_trans *trans,
i - trans->updates2, n);
}
static void bch2_trans_update2(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *insert)
{
__bch2_trans_update2(trans, (struct btree_insert_entry) {
.bkey_type = __btree_node_type(iter->level, iter->btree_id),
.btree_id = iter->btree_id,
.level = iter->level,
.iter = iter,
.k = insert,
});
}
static int extent_update_to_keys(struct btree_trans *trans,
struct btree_insert_entry n)
{
int ret;
ret = bch2_extent_can_insert(trans, n.iter, n.k);
if (ret)
return ret;
if (bkey_deleted(&n.k->k))
return 0;
n.iter = bch2_trans_get_iter(trans, n.iter->btree_id, n.k->k.p,
BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS);
n.is_extent = false;
__bch2_trans_update2(trans, n);
bch2_trans_update2(trans, n);
bch2_trans_iter_put(trans, n.iter);
return 0;
}
static int extent_handle_overwrites(struct btree_trans *trans,
enum btree_id btree_id,
struct bkey_i *insert)
struct bkey_i *insert,
unsigned trigger_flags)
{
struct btree_iter *iter, *update_iter;
struct bpos start = bkey_start_pos(&insert->k);
@ -861,7 +840,8 @@ static int extent_handle_overwrites(struct btree_trans *trans,
update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
bch2_trans_update2(trans, update_iter, update);
bch2_trans_update(trans, update_iter, update,
trigger_flags);
bch2_trans_iter_put(trans, update_iter);
}
@ -877,7 +857,8 @@ static int extent_handle_overwrites(struct btree_trans *trans,
update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
bch2_trans_update2(trans, update_iter, update);
bch2_trans_update(trans, update_iter, update,
trigger_flags);
bch2_trans_iter_put(trans, update_iter);
}
@ -892,7 +873,8 @@ static int extent_handle_overwrites(struct btree_trans *trans,
update_iter = bch2_trans_get_iter(trans, btree_id, update->k.p,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
bch2_trans_update2(trans, update_iter, update);
bch2_trans_update(trans, update_iter, update,
trigger_flags);
bch2_trans_iter_put(trans, update_iter);
break;
}
@ -962,18 +944,10 @@ int __bch2_trans_commit(struct btree_trans *trans)
}
} while (trans_trigger_run);
/* Turn extents updates into keys: */
trans_for_each_update(trans, i)
if (i->is_extent) {
ret = extent_handle_overwrites(trans, i->btree_id, i->k);
if (unlikely(ret))
goto out;
}
trans_for_each_update(trans, i) {
ret = i->is_extent
? extent_update_to_keys(trans, *i)
: (__bch2_trans_update2(trans, *i), 0);
: (bch2_trans_update2(trans, *i), 0);
if (unlikely(ret))
goto out;
}
@ -1051,6 +1025,7 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
.iter = iter,
.k = k
};
int ret = 0;
BUG_ON(trans->nr_updates >= BTREE_ITER_MAX);
@ -1067,97 +1042,47 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
}
#endif
iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
if (n.is_extent) {
ret = bch2_extent_can_insert(trans, n.iter, n.k);
if (ret)
return ret;
ret = extent_handle_overwrites(trans, n.btree_id, n.k, flags);
if (ret)
return ret;
iter->pos_after_commit = k->k.p;
iter->flags |= BTREE_ITER_SET_POS_AFTER_COMMIT;
if (bkey_deleted(&n.k->k))
return 0;
n.iter = bch2_trans_get_iter(trans, n.iter->btree_id, n.k->k.p,
BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS);
bch2_trans_iter_put(trans, n.iter);
n.is_extent = false;
}
BUG_ON(n.iter->flags & BTREE_ITER_IS_EXTENTS);
n.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
/*
* Pending updates are kept sorted: first, find position of new update,
* then delete/trim any updates the new update overwrites:
*/
if (!n.is_extent) {
trans_for_each_update(trans, i)
if (btree_insert_entry_cmp(&n, i) <= 0)
break;
if (i < trans->updates + trans->nr_updates &&
!btree_insert_entry_cmp(&n, i))
!btree_insert_entry_cmp(&n, i)) {
BUG_ON(i->trans_triggers_run);
*i = n;
else
} else
array_insert_item(trans->updates, trans->nr_updates,
i - trans->updates, n);
} else {
trans_for_each_update(trans, i)
if (btree_insert_entry_cmp(&n, i) < 0)
break;
while (i > trans->updates &&
i[-1].btree_id == n.btree_id &&
bkey_cmp(bkey_start_pos(&n.k->k),
bkey_start_pos(&i[-1].k->k)) <= 0) {
--i;
array_remove_item(trans->updates, trans->nr_updates,
i - trans->updates);
}
if (i > trans->updates &&
i[-1].btree_id == n.btree_id &&
bkey_cmp(bkey_start_pos(&n.k->k), i[-1].k->k.p) < 0)
bch2_cut_back(bkey_start_pos(&n.k->k), i[-1].k);
if (i < trans->updates + trans->nr_updates &&
i->btree_id == n.btree_id &&
bkey_cmp(n.k->k.p, bkey_start_pos(&i->k->k)) > 0) {
if (bkey_cmp(bkey_start_pos(&n.k->k),
bkey_start_pos(&i->k->k)) > 0) {
struct btree_insert_entry split = *i;
int ret;
BUG_ON(trans->nr_updates + 1 >= BTREE_ITER_MAX);
split.k = bch2_trans_kmalloc(trans, bkey_bytes(&i->k->k));
ret = PTR_ERR_OR_ZERO(split.k);
if (ret)
return ret;
bkey_copy(split.k, i->k);
bch2_cut_back(bkey_start_pos(&n.k->k), split.k);
split.iter = bch2_trans_get_iter(trans, split.btree_id,
bkey_start_pos(&split.k->k),
BTREE_ITER_INTENT);
split.iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
bch2_trans_iter_put(trans, split.iter);
array_insert_item(trans->updates, trans->nr_updates,
i - trans->updates, split);
i++;
}
/*
* When we have an extent that overwrites the start of another
* update, trimming that extent will mean the iterator's
* position has to change since the iterator position has to
* match the extent's start pos - but we don't want to change
* the iterator pos if some other code is using it, so we may
* need to clone it:
*/
if (btree_iter_live(trans, i->iter)) {
i->iter = bch2_trans_copy_iter(trans, i->iter);
i->iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
bch2_trans_iter_put(trans, i->iter);
}
bch2_cut_front(n.k->k.p, i->k);
bch2_btree_iter_set_pos(i->iter, n.k->k.p);
}
array_insert_item(trans->updates, trans->nr_updates,
i - trans->updates, n);
}
return 0;
}

View File

@ -1519,29 +1519,6 @@ static struct btree_iter *trans_get_update(struct btree_trans *trans,
return NULL;
}
static int trans_get_key(struct btree_trans *trans,
enum btree_id btree_id, struct bpos pos,
struct btree_iter **iter,
struct bkey_s_c *k)
{
unsigned flags = btree_id != BTREE_ID_alloc
? BTREE_ITER_SLOTS
: BTREE_ITER_CACHED;
int ret;
*iter = trans_get_update(trans, btree_id, pos, k);
if (*iter)
return 1;
*iter = bch2_trans_get_iter(trans, btree_id, pos,
flags|BTREE_ITER_INTENT);
*k = __bch2_btree_iter_peek(*iter, flags);
ret = bkey_err(*k);
if (ret)
bch2_trans_iter_put(trans, *iter);
return ret;
}
static struct bkey_alloc_buf *
bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter,
const struct bch_extent_ptr *ptr,
@ -1621,9 +1598,13 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
struct bch_replicas_padded r;
int ret = 0;
ret = trans_get_key(trans, BTREE_ID_stripes, POS(0, p.ec.idx), &iter, &k);
if (ret < 0)
return ret;
iter = bch2_trans_get_iter(trans, BTREE_ID_stripes, POS(0, p.ec.idx),
BTREE_ITER_INTENT|
BTREE_ITER_WITH_UPDATES);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
if (k.k->type != KEY_TYPE_stripe) {
bch2_fs_inconsistent(c,
@ -1631,7 +1612,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
(u64) p.ec.idx);
bch2_inconsistent_error(c);
ret = -EIO;
goto out;
goto err;
}
if (!bch2_ptr_matches_stripe(bkey_s_c_to_stripe(k).v, p)) {
@ -1639,13 +1620,13 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
"stripe pointer doesn't match stripe %llu",
(u64) p.ec.idx);
ret = -EIO;
goto out;
goto err;
}
s = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
ret = PTR_ERR_OR_ZERO(s);
if (ret)
goto out;
goto err;
bkey_reassemble(&s->k_i, k);
stripe_blockcount_set(&s->v, p.ec.block,
@ -1656,7 +1637,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
r.e.data_type = data_type;
update_replicas_list(trans, &r.e, sectors);
out:
err:
bch2_trans_iter_put(trans, iter);
return ret;
}
@ -1838,10 +1819,13 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
int frags_referenced;
s64 ret;
ret = trans_get_key(trans, BTREE_ID_reflink,
POS(0, idx), &iter, &k);
if (ret < 0)
return ret;
iter = bch2_trans_get_iter(trans, BTREE_ID_reflink, POS(0, idx),
BTREE_ITER_INTENT|
BTREE_ITER_WITH_UPDATES);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
goto err;
sectors = min_t(u64, sectors, k.k->p.offset - idx);
@ -1994,7 +1978,6 @@ int bch2_trans_mark_update(struct btree_trans *trans,
if (!btree_node_type_needs_gc(iter->btree_id))
return 0;
if (!btree_node_type_is_extents(iter->btree_id)) {
if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
old = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(old);
@ -2007,74 +1990,16 @@ int bch2_trans_mark_update(struct btree_trans *trans,
old = bkey_i_to_s_c(ck->k);
}
if (old.k->type == new->k.type) {
if (old.k->type == new->k.type &&
!btree_node_type_is_extents(iter->btree_id)) {
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
} else {
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, new->k.size,
BTREE_TRIGGER_INSERT|flags) ?:
bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, -((s64) old.k->size),
BTREE_TRIGGER_OVERWRITE|flags);
}
} else {
struct btree_iter *copy;
struct bkey _old;
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
bkey_init(&_old);
old = (struct bkey_s_c) { &_old, NULL };
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
0, new->k.size,
BTREE_TRIGGER_INSERT);
if (ret)
return ret;
copy = bch2_trans_copy_iter(trans, iter);
for_each_btree_key_continue(copy, 0, old, ret) {
unsigned offset = 0;
s64 sectors = -((s64) old.k->size);
flags |= BTREE_TRIGGER_OVERWRITE;
if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
break;
switch (bch2_extent_overlap(&new->k, old.k)) {
case BCH_EXTENT_OVERLAP_ALL:
offset = 0;
sectors = -((s64) old.k->size);
break;
case BCH_EXTENT_OVERLAP_BACK:
offset = bkey_start_offset(&new->k) -
bkey_start_offset(old.k);
sectors = bkey_start_offset(&new->k) -
old.k->p.offset;
break;
case BCH_EXTENT_OVERLAP_FRONT:
offset = 0;
sectors = bkey_start_offset(old.k) -
new->k.p.offset;
break;
case BCH_EXTENT_OVERLAP_MIDDLE:
offset = bkey_start_offset(&new->k) -
bkey_start_offset(old.k);
sectors = -((s64) new->k.size);
flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
break;
}
BUG_ON(sectors >= 0);
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
offset, sectors, flags);
if (ret)
break;
}
bch2_trans_iter_put(trans, copy);
}
return ret;
}