diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 9048441cfa55..cf092903a6ab 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1346,13 +1346,19 @@ LE64_BITMASK(BCH_SB_METADATA_TARGET, struct bch_sb, flags[3], 16, 28); x(reflink_inline_data, 14) \ x(new_varint, 15) \ x(journal_no_flush, 16) \ - x(alloc_v2, 17) + x(alloc_v2, 17) \ + x(extents_across_btree_nodes, 18) + +#define BCH_SB_FEATURES_ALWAYS \ + ((1ULL << BCH_FEATURE_new_extent_overwrite)| \ + (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ + (1ULL << BCH_FEATURE_btree_updates_journalled)|\ + (1ULL << BCH_FEATURE_extents_across_btree_nodes)) #define BCH_SB_FEATURES_ALL \ - ((1ULL << BCH_FEATURE_new_siphash)| \ - (1ULL << BCH_FEATURE_new_extent_overwrite)| \ + (BCH_SB_FEATURES_ALWAYS| \ + (1ULL << BCH_FEATURE_new_siphash)| \ (1ULL << BCH_FEATURE_btree_ptr_v2)| \ - (1ULL << BCH_FEATURE_extents_above_btree_updates)|\ (1ULL << BCH_FEATURE_new_varint)| \ (1ULL << BCH_FEATURE_journal_no_flush)| \ (1ULL << BCH_FEATURE_alloc_v2)) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 531732e30950..660e9e827ed4 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1814,11 +1814,8 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) static inline struct bkey_s_c __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter) { - struct btree_iter_level *l = &iter->l[0]; - struct btree_node_iter node_iter; struct bkey_s_c k; - struct bkey n; - int ret; + struct bpos pos, next_start; /* keys & holes can't span inode numbers: */ if (iter->pos.offset == KEY_OFFSET_MAX) { @@ -1826,50 +1823,31 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter) return bkey_s_c_null; bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos)); - - ret = bch2_btree_iter_traverse(iter); - if (unlikely(ret)) - return bkey_s_c_err(ret); } - /* - * iterator is now at the correct position for inserting at iter->pos, - * but we need to keep iterating until we find the first non whiteout so - * we know how big a hole we have, if any: - */ + pos = iter->pos; + k = bch2_btree_iter_peek(iter); + iter->pos = pos; - node_iter = l->iter; - k = __btree_iter_unpack(iter, l, &iter->k, - bch2_btree_node_iter_peek(&node_iter, l->b)); - - if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) { - /* - * We're not setting iter->uptodate because the node iterator - * doesn't necessarily point at the key we're returning: - */ - - EBUG_ON(bkey_cmp(k.k->p, iter->pos) <= 0); - bch2_btree_iter_verify(iter); + if (bkey_err(k)) return k; - } - /* hole */ + if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) + return k; - if (!k.k) - k.k = &l->b->key.k; + next_start = k.k ? bkey_start_pos(k.k) : POS_MAX; - bkey_init(&n); - n.p = iter->pos; - bch2_key_resize(&n, + bkey_init(&iter->k); + iter->k.p = iter->pos; + bch2_key_resize(&iter->k, min_t(u64, KEY_SIZE_MAX, - (k.k->p.inode == n.p.inode - ? bkey_start_offset(k.k) + (next_start.inode == iter->pos.inode + ? next_start.offset : KEY_OFFSET_MAX) - - n.p.offset)); + iter->pos.offset)); - EBUG_ON(!n.size); + EBUG_ON(!iter->k.size); - iter->k = n; iter->uptodate = BTREE_ITER_UPTODATE; bch2_btree_iter_verify_entry_exit(iter); @@ -1893,13 +1871,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (iter->uptodate == BTREE_ITER_UPTODATE) return btree_iter_peek_uptodate(iter); + if (iter->flags & BTREE_ITER_IS_EXTENTS) + return __bch2_btree_iter_peek_slot_extents(iter); + ret = bch2_btree_iter_traverse(iter); if (unlikely(ret)) return bkey_s_c_err(ret); - if (iter->flags & BTREE_ITER_IS_EXTENTS) - return __bch2_btree_iter_peek_slot_extents(iter); - k = __btree_iter_peek_all(iter, l, &iter->k); EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 5e0ce7cde017..d99a78f8950d 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -62,9 +62,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, EBUG_ON(btree_node_just_written(b)); EBUG_ON(bset_written(b, btree_bset_last(b))); EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k)); - EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) && - bkey_cmp(bkey_start_pos(&insert->k), - bkey_predecessor(b->data->min_key)) < 0); EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0); EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0); EBUG_ON(insert->k.u64s > @@ -705,26 +702,31 @@ static inline int btree_iter_pos_cmp(const struct btree_iter *l, bkey_cmp(l->pos, r->pos); } -static void bch2_trans_update2(struct btree_trans *trans, +static int bch2_trans_update2(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i *insert) { struct btree_insert_entry *i, n = (struct btree_insert_entry) { .iter = iter, .k = insert }; + int ret; btree_insert_entry_checks(trans, n.iter, n.k); - BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); - EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX); + ret = bch2_btree_iter_traverse(iter); + if (unlikely(ret)) + return ret; + + BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); + iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT; trans_for_each_update2(trans, i) { if (btree_iter_pos_cmp(n.iter, i->iter) == 0) { *i = n; - return; + return 0; } if (btree_iter_pos_cmp(n.iter, i->iter) <= 0) @@ -733,6 +735,7 @@ static void bch2_trans_update2(struct btree_trans *trans, array_insert_item(trans->updates2, trans->nr_updates2, i - trans->updates2, n); + return 0; } static int extent_update_to_keys(struct btree_trans *trans, @@ -753,9 +756,9 @@ static int extent_update_to_keys(struct btree_trans *trans, iter->flags |= BTREE_ITER_INTENT; __bch2_btree_iter_set_pos(iter, insert->k.p, false); - bch2_trans_update2(trans, iter, insert); + ret = bch2_trans_update2(trans, iter, insert); bch2_trans_iter_put(trans, iter); - return 0; + return ret; } static int extent_handle_overwrites(struct btree_trans *trans, @@ -785,8 +788,10 @@ static int extent_handle_overwrites(struct btree_trans *trans, bch2_cut_back(start, update); __bch2_btree_iter_set_pos(update_iter, update->k.p, false); - bch2_trans_update2(trans, update_iter, update); + ret = bch2_trans_update2(trans, update_iter, update); bch2_trans_iter_put(trans, update_iter); + if (ret) + goto err; } if (bkey_cmp(k.k->p, end) > 0) { @@ -800,8 +805,10 @@ static int extent_handle_overwrites(struct btree_trans *trans, bch2_cut_front(end, update); __bch2_btree_iter_set_pos(update_iter, update->k.p, false); - bch2_trans_update2(trans, update_iter, update); + ret = bch2_trans_update2(trans, update_iter, update); bch2_trans_iter_put(trans, update_iter); + if (ret) + goto err; } else { update_iter = bch2_trans_copy_iter(trans, iter); @@ -815,8 +822,10 @@ static int extent_handle_overwrites(struct btree_trans *trans, update->k.size = 0; __bch2_btree_iter_set_pos(update_iter, update->k.p, false); - bch2_trans_update2(trans, update_iter, update); + ret = bch2_trans_update2(trans, update_iter, update); bch2_trans_iter_put(trans, update_iter); + if (ret) + goto err; } k = bch2_btree_iter_next_with_updates(iter); @@ -921,11 +930,11 @@ int __bch2_trans_commit(struct btree_trans *trans) trans_for_each_update(trans, i) { if (i->iter->flags & BTREE_ITER_IS_EXTENTS) { ret = extent_update_to_keys(trans, i->iter, i->k); - if (ret) - goto out; } else { - bch2_trans_update2(trans, i->iter, i->k); + ret = bch2_trans_update2(trans, i->iter, i->k); } + if (ret) + goto out; } trans_for_each_update2(trans, i) { diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 65ae89c80590..66e50e6b36ea 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1321,9 +1321,6 @@ int bch2_mark_update(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct btree *b = iter_l(iter)->b; - struct btree_node_iter node_iter = iter_l(iter)->iter; - struct bkey_packed *_old; struct bkey_s_c old; struct bkey unpacked; int ret = 0; @@ -1363,23 +1360,24 @@ int bch2_mark_update(struct btree_trans *trans, BTREE_TRIGGER_OVERWRITE|flags); } } else { + struct btree_iter *copy; + BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED); bch2_mark_key_locked(c, old, bkey_i_to_s_c(new), 0, new->k.size, fs_usage, trans->journal_res.seq, BTREE_TRIGGER_INSERT|flags); - while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) { - unsigned offset = 0; - s64 sectors; + copy = bch2_trans_copy_iter(trans, iter); - old = bkey_disassemble(b, _old, &unpacked); - sectors = -((s64) old.k->size); + for_each_btree_key_continue(copy, 0, old, ret) { + unsigned offset = 0; + s64 sectors = -((s64) old.k->size); flags |= BTREE_TRIGGER_OVERWRITE; if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0) - return 0; + break; switch (bch2_extent_overlap(&new->k, old.k)) { case BCH_EXTENT_OVERLAP_ALL: @@ -1412,9 +1410,8 @@ int bch2_mark_update(struct btree_trans *trans, trans->journal_res.seq, flags) ?: 1; if (ret <= 0) break; - - bch2_btree_node_iter_advance(&node_iter, b); } + bch2_trans_iter_put(trans, copy); } return ret; @@ -1445,27 +1442,20 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans, pr_err("overlapping with"); if (btree_iter_type(i->iter) != BTREE_ITER_CACHED) { - struct btree *b = iter_l(i->iter)->b; - struct btree_node_iter node_iter = iter_l(i->iter)->iter; - struct bkey_packed *_k; + struct btree_iter *copy = bch2_trans_copy_iter(trans, i->iter); + struct bkey_s_c k; + int ret; - while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) { - struct bkey unpacked; - struct bkey_s_c k; - - pr_info("_k %px format %u", _k, _k->format); - k = bkey_disassemble(b, _k, &unpacked); - - if (btree_node_is_extents(b) + for_each_btree_key_continue(copy, 0, k, ret) { + if (btree_node_type_is_extents(i->iter->btree_id) ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0 : bkey_cmp(i->k->k.p, k.k->p)) break; bch2_bkey_val_to_text(&PBUF(buf), c, k); pr_err("%s", buf); - - bch2_btree_node_iter_advance(&node_iter, b); } + bch2_trans_iter_put(trans, copy); } else { struct bkey_cached *ck = (void *) i->iter->l[0].b; @@ -1860,8 +1850,6 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, } bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k)); - BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); - bch2_trans_update(trans, iter, n, 0); out: ret = sectors; @@ -1987,15 +1975,13 @@ int bch2_trans_mark_update(struct btree_trans *trans, BTREE_TRIGGER_OVERWRITE|flags); } } else { - struct btree *b = iter_l(iter)->b; - struct btree_node_iter node_iter = iter_l(iter)->iter; - struct bkey_packed *_old; - struct bkey unpacked; + struct btree_iter *copy; + struct bkey _old; EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED); - bkey_init(&unpacked); - old = (struct bkey_s_c) { &unpacked, NULL }; + bkey_init(&_old); + old = (struct bkey_s_c) { &_old, NULL }; ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, new->k.size, @@ -2003,18 +1989,16 @@ int bch2_trans_mark_update(struct btree_trans *trans, if (ret) return ret; - while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) { - unsigned flags = BTREE_TRIGGER_OVERWRITE; - unsigned offset = 0; - s64 sectors; + copy = bch2_trans_copy_iter(trans, iter); - old = bkey_disassemble(b, _old, &unpacked); - sectors = -((s64) old.k->size); + for_each_btree_key_continue(copy, 0, old, ret) { + unsigned offset = 0; + s64 sectors = -((s64) old.k->size); flags |= BTREE_TRIGGER_OVERWRITE; if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0) - return 0; + break; switch (bch2_extent_overlap(&new->k, old.k)) { case BCH_EXTENT_OVERLAP_ALL: @@ -2045,10 +2029,9 @@ int bch2_trans_mark_update(struct btree_trans *trans, ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), offset, sectors, flags); if (ret) - return ret; - - bch2_btree_node_iter_advance(&node_iter, b); + break; } + bch2_trans_iter_put(trans, copy); } return ret; diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 5c43678e94a3..16d2bca8a662 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -99,24 +99,12 @@ int bch2_extent_atomic_end(struct btree_iter *iter, struct bpos *end) { struct btree_trans *trans = iter->trans; - struct btree *b; - struct btree_node_iter node_iter; - struct bkey_packed *_k; - unsigned nr_iters = 0; + struct btree_iter *copy; + struct bkey_s_c k; + unsigned nr_iters = 0; int ret; - ret = bch2_btree_iter_traverse(iter); - if (ret) - return ret; - - b = iter->l[0].b; - node_iter = iter->l[0].iter; - - BUG_ON(bkey_cmp(b->data->min_key, POS_MIN) && - bkey_cmp(bkey_start_pos(&insert->k), - bkey_predecessor(b->data->min_key)) < 0); - - *end = bpos_min(insert->k.p, b->key.k.p); + *end = insert->k.p; /* extent_update_to_keys(): */ nr_iters += 1; @@ -126,9 +114,9 @@ int bch2_extent_atomic_end(struct btree_iter *iter, if (ret < 0) return ret; - while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) { - struct bkey unpacked; - struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked); + copy = bch2_trans_copy_iter(trans, iter); + + for_each_btree_key_continue(copy, 0, k, ret) { unsigned offset = 0; if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0) @@ -155,10 +143,9 @@ int bch2_extent_atomic_end(struct btree_iter *iter, &nr_iters, EXTENT_ITERS_MAX); if (ret) break; - - bch2_btree_node_iter_advance(&node_iter, b); } + bch2_trans_iter_put(trans, copy); return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 8560023b4c7a..54ac9cc470af 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -506,115 +506,6 @@ static void replay_now_at(struct journal *j, u64 seq) bch2_journal_pin_put(j, j->replay_journal_seq++); } -static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id, - struct bkey_i *k) -{ - struct btree_trans trans; - struct btree_iter *iter, *split_iter; - /* - * We might cause compressed extents to be split, so we need to pass in - * a disk_reservation: - */ - struct disk_reservation disk_res = - bch2_disk_reservation_init(c, 0); - struct bkey_i *split; - struct bpos atomic_end; - /* - * Some extents aren't equivalent - w.r.t. what the triggers do - * - if they're split: - */ - bool remark_if_split = bch2_bkey_sectors_compressed(bkey_i_to_s_c(k)) || - k->k.type == KEY_TYPE_reflink_p; - bool remark = false; - int ret; - - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); -retry: - bch2_trans_begin(&trans); - - iter = bch2_trans_get_iter(&trans, btree_id, - bkey_start_pos(&k->k), - BTREE_ITER_INTENT); - - do { - ret = bch2_btree_iter_traverse(iter); - if (ret) - goto err; - - atomic_end = bpos_min(k->k.p, iter->l[0].b->key.k.p); - - split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k)); - ret = PTR_ERR_OR_ZERO(split); - if (ret) - goto err; - - if (!remark && - remark_if_split && - bkey_cmp(atomic_end, k->k.p) < 0) { - ret = bch2_disk_reservation_add(c, &disk_res, - k->k.size * - bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(k)), - BCH_DISK_RESERVATION_NOFAIL); - BUG_ON(ret); - - remark = true; - } - - bkey_copy(split, k); - bch2_cut_front(iter->pos, split); - bch2_cut_back(atomic_end, split); - - split_iter = bch2_trans_copy_iter(&trans, iter); - - /* - * It's important that we don't go through the - * extent_handle_overwrites() and extent_update_to_keys() path - * here: journal replay is supposed to treat extents like - * regular keys - */ - __bch2_btree_iter_set_pos(split_iter, split->k.p, false); - bch2_trans_update(&trans, split_iter, split, - BTREE_TRIGGER_NORUN); - bch2_trans_iter_put(&trans, split_iter); - - bch2_btree_iter_set_pos(iter, split->k.p); - - if (remark) { - ret = bch2_trans_mark_key(&trans, - bkey_s_c_null, - bkey_i_to_s_c(split), - 0, split->k.size, - BTREE_TRIGGER_INSERT); - if (ret) - goto err; - } - } while (bkey_cmp(iter->pos, k->k.p) < 0); - - if (remark) { - ret = bch2_trans_mark_key(&trans, - bkey_i_to_s_c(k), - bkey_s_c_null, - 0, -((s64) k->k.size), - BTREE_TRIGGER_OVERWRITE); - if (ret) - goto err; - } - - ret = bch2_trans_commit(&trans, &disk_res, NULL, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_JOURNAL_REPLAY); -err: - bch2_trans_iter_put(&trans, iter); - - if (ret == -EINTR) - goto retry; - - bch2_disk_reservation_put(c, &disk_res); - - return bch2_trans_exit(&trans) ?: ret; -} - static int __bch2_journal_replay_key(struct btree_trans *trans, enum btree_id id, unsigned level, struct bkey_i *k) @@ -753,9 +644,7 @@ static int bch2_journal_replay(struct bch_fs *c, replay_now_at(j, keys.journal_seq_base + i->journal_seq); - ret = i->k->k.size - ? bch2_extent_replay_key(c, i->btree_id, i->k) - : bch2_journal_replay_key(c, i); + ret = bch2_journal_replay_key(c, i); if (ret) goto err; } diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 767baab18807..79d03b18b5c8 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -956,9 +956,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c) mutex_lock(&c->sb_lock); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite; - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates; - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled; + c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALWAYS; ret = bch2_write_super(c); mutex_unlock(&c->sb_lock);