diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index cf83911b3f5d..27fa3e230e6e 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -18,6 +18,9 @@ #include #include +static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *, + struct btree *); + struct bset_tree *bch2_bkey_to_bset(struct btree *b, struct bkey_packed *k) { unsigned offset = __btree_node_key_to_offset(b, k); @@ -63,8 +66,8 @@ void bch2_dump_bset(struct btree *b, struct bset *i, unsigned set) _n = bkey_next(_k); bch2_bkey_to_text(buf, sizeof(buf), &k); - printk(KERN_ERR "block %u key %zi/%u: %s\n", set, - _k->_data - i->_data, i->u64s, buf); + printk(KERN_ERR "block %u key %5u: %s\n", set, + __btree_node_key_to_offset(b, _k), buf); if (_n == vstruct_last(i)) continue; @@ -120,20 +123,6 @@ void bch2_dump_btree_node_iter(struct btree *b, #ifdef CONFIG_BCACHEFS_DEBUG -static bool keys_out_of_order(struct btree *b, - const struct bkey_packed *prev, - const struct bkey_packed *next, - bool is_extents) -{ - struct bkey nextu = bkey_unpack_key(b, next); - - return bkey_cmp_left_packed_byval(b, prev, bkey_start_pos(&nextu)) > 0 || - ((is_extents - ? !bkey_deleted(next) - : !bkey_deleted(prev)) && - !bkey_cmp_packed(b, prev, next)); -} - void __bch2_verify_btree_nr_keys(struct btree *b) { struct bset_tree *t; @@ -150,16 +139,21 @@ void __bch2_verify_btree_nr_keys(struct btree *b) BUG_ON(memcmp(&nr, &b->nr, sizeof(nr))); } -static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter, - struct btree *b, - struct bkey_packed *k) +static void bch2_btree_node_iter_next_check(struct btree_node_iter *_iter, + struct btree *b) { - const struct bkey_packed *n = bch2_btree_node_iter_peek_all(iter, b); + struct btree_node_iter iter = *_iter; + const struct bkey_packed *k, *n; + + k = bch2_btree_node_iter_peek_all(&iter, b); + __bch2_btree_node_iter_advance(&iter, b); + n = bch2_btree_node_iter_peek_all(&iter, b); bkey_unpack_key(b, k); if (n && - keys_out_of_order(b, k, n, iter->is_extents)) { + __btree_node_iter_cmp(b, k, n) > 0) { + struct btree_node_iter_set *set; struct bkey ku = bkey_unpack_key(b, k); struct bkey nu = bkey_unpack_key(b, n); char buf1[80], buf2[80]; @@ -167,12 +161,22 @@ static void bch2_btree_node_iter_next_check(struct btree_node_iter *iter, bch2_dump_btree_node(b); bch2_bkey_to_text(buf1, sizeof(buf1), &ku); bch2_bkey_to_text(buf2, sizeof(buf2), &nu); - panic("out of order/overlapping:\n%s\n%s\n", buf1, buf2); + printk(KERN_ERR "out of order/overlapping:\n%s\n%s\n", + buf1, buf2); + printk(KERN_ERR "iter was:"); + + btree_node_iter_for_each(_iter, set) { + struct bkey_packed *k = __btree_node_offset_to_key(b, set->k); + struct bset_tree *t = bch2_bkey_to_bset(b, k); + printk(" [%zi %zi]", t - b->set, + k->_data - bset(b, t)->_data); + } + panic("\n"); } } void bch2_btree_node_iter_verify(struct btree_node_iter *iter, - struct btree *b) + struct btree *b) { struct btree_node_iter_set *set, *s2; struct bset_tree *t; @@ -196,72 +200,72 @@ found: /* Verify iterator is sorted: */ btree_node_iter_for_each(iter, set) BUG_ON(set != iter->data && - btree_node_iter_cmp(iter, b, set[-1], set[0]) > 0); + btree_node_iter_cmp(b, set[-1], set[0]) > 0); +} + +void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where, + struct bkey_packed *insert, unsigned clobber_u64s) +{ + struct bset_tree *t = bch2_bkey_to_bset(b, where); + struct bkey_packed *prev = bch2_bkey_prev_all(b, t, where); + struct bkey_packed *next = (void *) (where->_data + clobber_u64s); +#if 0 + BUG_ON(prev && + __btree_node_iter_cmp(b, prev, insert) > 0); +#else + if (prev && + __btree_node_iter_cmp(b, prev, insert) > 0) { + struct bkey k1 = bkey_unpack_key(b, prev); + struct bkey k2 = bkey_unpack_key(b, insert); + char buf1[100]; + char buf2[100]; + + bch2_dump_btree_node(b); + bch2_bkey_to_text(buf1, sizeof(buf1), &k1); + bch2_bkey_to_text(buf2, sizeof(buf2), &k2); + + panic("prev > insert:\n" + "prev key %5u %s\n" + "insert key %5u %s\n", + __btree_node_key_to_offset(b, prev), buf1, + __btree_node_key_to_offset(b, insert), buf2); + } +#endif +#if 0 + BUG_ON(next != btree_bkey_last(b, t) && + __btree_node_iter_cmp(b, insert, next) > 0); +#else + if (next != btree_bkey_last(b, t) && + __btree_node_iter_cmp(b, insert, next) > 0) { + struct bkey k1 = bkey_unpack_key(b, insert); + struct bkey k2 = bkey_unpack_key(b, next); + char buf1[100]; + char buf2[100]; + + bch2_dump_btree_node(b); + bch2_bkey_to_text(buf1, sizeof(buf1), &k1); + bch2_bkey_to_text(buf2, sizeof(buf2), &k2); + + panic("insert > next:\n" + "insert key %5u %s\n" + "next key %5u %s\n", + __btree_node_key_to_offset(b, insert), buf1, + __btree_node_key_to_offset(b, next), buf2); + } +#endif } void bch2_verify_key_order(struct btree *b, - struct btree_node_iter *iter, - struct bkey_packed *where) + struct btree_node_iter *_iter, + struct bkey_packed *where) { - struct bset_tree *t = bch2_bkey_to_bset(b, where); - struct bkey_packed *k, *prev; - struct bkey uk, uw = bkey_unpack_key(b, where); - - k = bch2_bkey_prev_all(b, t, where); - if (k && - keys_out_of_order(b, k, where, iter->is_extents)) { - char buf1[100], buf2[100]; - - bch2_dump_btree_node(b); - uk = bkey_unpack_key(b, k); - bch2_bkey_to_text(buf1, sizeof(buf1), &uk); - bch2_bkey_to_text(buf2, sizeof(buf2), &uw); - panic("out of order with prev:\n%s\n%s\n", - buf1, buf2); - } - - k = bkey_next(where); - BUG_ON(k != btree_bkey_last(b, t) && - keys_out_of_order(b, where, k, iter->is_extents)); - - for_each_bset(b, t) { - if (where >= btree_bkey_first(b, t) || - where < btree_bkey_last(b, t)) - continue; - - k = bch2_btree_node_iter_bset_pos(iter, b, t); - - if (k == btree_bkey_last(b, t)) - k = bch2_bkey_prev_all(b, t, k); - - while (bkey_cmp_left_packed_byval(b, k, bkey_start_pos(&uw)) > 0 && - (prev = bch2_bkey_prev_all(b, t, k))) - k = prev; - - for (; - k != btree_bkey_last(b, t); - k = bkey_next(k)) { - uk = bkey_unpack_key(b, k); - - if (iter->is_extents) { - BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 || - bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0)); - } else { - BUG_ON(!bkey_cmp(uw.p, uk.p) && - !bkey_deleted(&uk)); - } - - if (bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0) - break; - } - } + bch2_verify_insert_pos(b, where, where, where->u64s); } #else static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter, - struct btree *b, - struct bkey_packed *k) {} + struct btree *b) {} #endif @@ -1229,6 +1233,7 @@ void bch2_bset_insert(struct btree *b, struct bkey_packed packed, *src = bkey_to_packed(insert); bch2_bset_verify_rw_aux_tree(b, t); + bch2_verify_insert_pos(b, where, bkey_to_packed(insert), clobber_u64s); if (bch2_bkey_pack_key(&packed, &insert->k, f)) src = &packed; @@ -1255,7 +1260,6 @@ void bch2_bset_insert(struct btree *b, bch2_bset_fix_lookup_table(b, t, where, clobber_u64s, src->u64s); - bch2_verify_key_order(b, iter, where); bch2_verify_btree_nr_keys(b); } @@ -1461,7 +1465,7 @@ void bch2_btree_node_iter_push(struct btree_node_iter *iter, noinline __flatten __attribute__((cold)) static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, struct btree *b, struct bpos search, - bool strictly_greater, bool is_extents) + bool strictly_greater) { struct bset_tree *t; @@ -1518,7 +1522,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, */ void bch2_btree_node_iter_init(struct btree_node_iter *iter, struct btree *b, struct bpos search, - bool strictly_greater, bool is_extents) + bool strictly_greater) { struct bset_tree *t; struct bkey_packed p, *packed_search = NULL; @@ -1526,7 +1530,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter, EBUG_ON(bkey_cmp(search, b->data->min_key) < 0); bset_aux_tree_verify(b); - __bch2_btree_node_iter_init(iter, is_extents); + memset(iter, 0, sizeof(*iter)); switch (bch2_bkey_pack_pos_lossy(&p, search, b)) { case BKEY_PACK_POS_EXACT: @@ -1537,7 +1541,7 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter, break; case BKEY_PACK_POS_FAIL: btree_node_iter_init_pack_failed(iter, b, search, - strictly_greater, is_extents); + strictly_greater); return; } @@ -1552,12 +1556,11 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter, } void bch2_btree_node_iter_init_from_start(struct btree_node_iter *iter, - struct btree *b, - bool is_extents) + struct btree *b) { struct bset_tree *t; - __bch2_btree_node_iter_init(iter, is_extents); + memset(iter, 0, sizeof(*iter)); for_each_bset(b, t) __bch2_btree_node_iter_push(iter, b, @@ -1585,7 +1588,7 @@ static inline bool btree_node_iter_sort_two(struct btree_node_iter *iter, { bool ret; - if ((ret = (btree_node_iter_cmp(iter, b, + if ((ret = (btree_node_iter_cmp(b, iter->data[first], iter->data[first + 1]) > 0))) swap(iter->data[first], iter->data[first + 1]); @@ -1640,23 +1643,14 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter, btree_node_iter_sort_two(iter, b, 1); } -/** - * bch_btree_node_iter_advance - advance @iter by one key - * - * Doesn't do debugchecks - for cases where (insert_fixup_extent()) a bset might - * momentarily have out of order extents. - */ void bch2_btree_node_iter_advance(struct btree_node_iter *iter, struct btree *b) { #ifdef CONFIG_BCACHEFS_DEBUG - struct bkey_packed *k = bch2_btree_node_iter_peek_all(iter, b); - - __bch2_btree_node_iter_advance(iter, b); - bch2_btree_node_iter_next_check(iter, b, k); -#else - __bch2_btree_node_iter_advance(iter, b); + bch2_btree_node_iter_verify(iter, b); + bch2_btree_node_iter_next_check(iter, b); #endif + __bch2_btree_node_iter_advance(iter, b); } static inline unsigned __btree_node_iter_used(struct btree_node_iter *iter) @@ -1689,8 +1683,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite bch2_btree_node_iter_bset_pos(iter, b, t), min_key_type); if (k && - (!prev || __btree_node_iter_cmp(iter->is_extents, b, - k, prev) > 0)) { + (!prev || __btree_node_iter_cmp(b, k, prev) > 0)) { prev = k; end = t->end_offset; } @@ -1723,11 +1716,11 @@ out: struct btree_node_iter iter2 = *iter; if (prev) - bch2_btree_node_iter_advance(&iter2, b); + __bch2_btree_node_iter_advance(&iter2, b); while ((k = bch2_btree_node_iter_peek_all(&iter2, b)) != orig_pos) { BUG_ON(k->type >= min_key_type); - bch2_btree_node_iter_advance(&iter2, b); + __bch2_btree_node_iter_advance(&iter2, b); } } diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h index 2fa71d7c0e8a..0787030ccc7e 100644 --- a/fs/bcachefs/bset.h +++ b/fs/bcachefs/bset.h @@ -369,6 +369,17 @@ static inline int bkey_cmp_p_or_unp(const struct btree *b, return __bch2_bkey_cmp_left_packed_format_checked(b, l, r); } +/* Returns true if @k is after iterator position @pos */ +static inline bool btree_iter_pos_cmp(struct btree_iter *iter, + const struct bkey *k) +{ + int cmp = bkey_cmp(k->p, iter->pos); + + return cmp > 0 || + (cmp == 0 && + !(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k)); +} + /* Returns true if @k is after iterator position @pos */ static inline bool btree_iter_pos_cmp_packed(const struct btree *b, struct bpos *pos, @@ -419,7 +430,7 @@ enum bch_extent_overlap { /* Returns how k overlaps with m */ static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k, - const struct bkey *m) + const struct bkey *m) { int cmp1 = bkey_cmp(k->p, m->p) < 0; int cmp2 = bkey_cmp(bkey_start_pos(k), @@ -430,20 +441,13 @@ static inline enum bch_extent_overlap bch2_extent_overlap(const struct bkey *k, /* Btree key iteration */ -static inline void __bch2_btree_node_iter_init(struct btree_node_iter *iter, - bool is_extents) -{ - iter->is_extents = is_extents; - memset(iter->data, 0, sizeof(iter->data)); -} - void bch2_btree_node_iter_push(struct btree_node_iter *, struct btree *, const struct bkey_packed *, const struct bkey_packed *); void bch2_btree_node_iter_init(struct btree_node_iter *, struct btree *, - struct bpos, bool, bool); + struct bpos, bool); void bch2_btree_node_iter_init_from_start(struct btree_node_iter *, - struct btree *, bool); + struct btree *); struct bkey_packed *bch2_btree_node_iter_bset_pos(struct btree_node_iter *, struct btree *, struct bset_tree *); @@ -470,32 +474,21 @@ static inline bool bch2_btree_node_iter_end(struct btree_node_iter *iter) return __btree_node_iter_set_end(iter, 0); } -static inline int __btree_node_iter_cmp(bool is_extents, - struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) +static inline int __btree_node_iter_cmp(struct btree *b, + const struct bkey_packed *l, + const struct bkey_packed *r) { - /* - * For non extents, when keys compare equal the deleted keys have to - * come first - so that bch2_btree_node_iter_next_check() can detect - * duplicate nondeleted keys (and possibly other reasons?) - * - * For extents, bkey_deleted() is used as a proxy for k->size == 0, so - * deleted keys have to sort last. - */ + /* When keys compare equal deleted keys come first */ return bkey_cmp_packed(b, l, r) - ?: (is_extents - ? (int) bkey_deleted(l) - (int) bkey_deleted(r) - : (int) bkey_deleted(r) - (int) bkey_deleted(l)) + ?: (int) bkey_deleted(r) - (int) bkey_deleted(l) ?: (l > r) - (l < r); } -static inline int btree_node_iter_cmp(struct btree_node_iter *iter, - struct btree *b, +static inline int btree_node_iter_cmp(struct btree *b, struct btree_node_iter_set l, struct btree_node_iter_set r) { - return __btree_node_iter_cmp(iter->is_extents, b, + return __btree_node_iter_cmp(b, __btree_node_offset_to_key(b, l.k), __btree_node_offset_to_key(b, r.k)); } @@ -582,21 +575,12 @@ bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b) return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1); } -/* - * Iterates over all _live_ keys - skipping deleted (and potentially - * overlapping) keys - */ -#define for_each_btree_node_key(b, k, iter, _is_extents) \ - for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\ - ((k) = bch2_btree_node_iter_peek(iter, b)); \ - bch2_btree_node_iter_advance(iter, b)) - struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *, struct btree *, struct bkey *); -#define for_each_btree_node_key_unpack(b, k, iter, _is_extents, unpacked)\ - for (bch2_btree_node_iter_init_from_start((iter), (b), (_is_extents));\ +#define for_each_btree_node_key_unpack(b, k, iter, unpacked) \ + for (bch2_btree_node_iter_init_from_start((iter), (b)); \ (k = bch2_btree_node_iter_peek_unpack((iter), (b), (unpacked))).k;\ bch2_btree_node_iter_advance(iter, b)) @@ -646,6 +630,8 @@ void bch2_dump_btree_node_iter(struct btree *, struct btree_node_iter *); void __bch2_verify_btree_nr_keys(struct btree *); void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *); +void bch2_verify_insert_pos(struct btree *, struct bkey_packed *, + struct bkey_packed *, unsigned); void bch2_verify_key_order(struct btree *, struct btree_node_iter *, struct bkey_packed *); @@ -654,6 +640,10 @@ void bch2_verify_key_order(struct btree *, struct btree_node_iter *, static inline void __bch2_verify_btree_nr_keys(struct btree *b) {} static inline void bch2_btree_node_iter_verify(struct btree_node_iter *iter, struct btree *b) {} +static inline void bch2_verify_insert_pos(struct btree *b, + struct bkey_packed *where, + struct bkey_packed *insert, + unsigned clobber_u64s) {} static inline void bch2_verify_key_order(struct btree *b, struct btree_node_iter *iter, struct bkey_packed *where) {} diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 1fbb9c657fc6..2526118fe9ce 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -217,7 +217,6 @@ static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b) if (btree_node_has_ptrs(b)) for_each_btree_node_key_unpack(b, k, &iter, - btree_node_is_extents(b), &unpacked) { bch2_bkey_debugcheck(c, b, k); stale = max(stale, bch2_gc_mark_key(c, type, k, 0)); @@ -1044,7 +1043,6 @@ static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id) struct bkey_s_c k; for_each_btree_node_key_unpack(b, k, &node_iter, - btree_node_is_extents(b), &unpacked) { ret = bch2_btree_mark_key_initial(c, btree_node_type(b), k); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 5c36acef2b13..889870582566 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -22,7 +22,7 @@ /* btree_node_iter_large: */ #define btree_node_iter_cmp_heap(h, _l, _r) \ - __btree_node_iter_cmp((iter)->is_extents, b, \ + __btree_node_iter_cmp(b, \ __btree_node_offset_to_key(b, (_l).k), \ __btree_node_offset_to_key(b, (_r).k)) @@ -248,6 +248,9 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst, sort_iter_sort(iter, sort_extent_whiteouts_cmp); while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) { + if (bkey_deleted(in)) + continue; + EBUG_ON(bkeyp_val_u64s(f, in)); EBUG_ON(in->type != KEY_TYPE_DISCARD); @@ -785,8 +788,7 @@ void bch2_btree_sort_into(struct bch_fs *c, bch2_bset_set_no_aux_tree(dst, dst->set); - bch2_btree_node_iter_init_from_start(&src_iter, src, - btree_node_is_extents(src)); + bch2_btree_node_iter_init_from_start(&src_iter, src); if (btree_node_ops(src)->key_normalize || btree_node_ops(src)->key_merge) @@ -1171,7 +1173,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry int ret, retry_read = 0, write = READ; iter = mempool_alloc(&c->fill_iter, GFP_NOIO); - __bch2_btree_node_iter_large_init(iter, btree_node_is_extents(b)); + iter->used = 0; if (bch2_meta_read_fault("btree")) btree_err(BTREE_ERR_MUST_RETRY, c, b, NULL, diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index 0688ce420610..7835f8a9e3a0 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -146,20 +146,11 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *); /* Sorting */ struct btree_node_iter_large { - u8 is_extents; u16 used; struct btree_node_iter_set data[MAX_BSETS]; }; -static inline void -__bch2_btree_node_iter_large_init(struct btree_node_iter_large *iter, - bool is_extents) -{ - iter->used = 0; - iter->is_extents = is_extents; -} - void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *, struct btree *); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 8918268f99f4..9d92826181dc 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -375,14 +375,20 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, struct btree_node_iter tmp = l->iter; struct bkey_packed *k; + if (iter->uptodate > BTREE_ITER_NEED_PEEK) + return; + bch2_btree_node_iter_verify(&l->iter, b); /* * For interior nodes, the iterator will have skipped past * deleted keys: + * + * For extents, the iterator may have skipped past deleted keys (but not + * whiteouts) */ - k = b->level - ? bch2_btree_node_iter_prev(&tmp, b) + k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS + ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD) : bch2_btree_node_iter_prev_all(&tmp, b); if (k && btree_iter_pos_cmp_packed(b, &iter->pos, k, iter->flags & BTREE_ITER_IS_EXTENTS)) { @@ -390,7 +396,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, struct bkey uk = bkey_unpack_key(b, k); bch2_bkey_to_text(buf, sizeof(buf), &uk); - panic("prev key should be before after pos:\n%s\n%llu:%llu\n", + panic("prev key should be before iter pos:\n%s\n%llu:%llu\n", buf, iter->pos.inode, iter->pos.offset); } @@ -401,15 +407,16 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, struct bkey uk = bkey_unpack_key(b, k); bch2_bkey_to_text(buf, sizeof(buf), &uk); - panic("next key should be before iter pos:\n%llu:%llu\n%s\n", + panic("iter should be after current key:\n" + "iter pos %llu:%llu\n" + "cur key %s\n", iter->pos.inode, iter->pos.offset, buf); } - if (iter->uptodate == BTREE_ITER_UPTODATE && - (iter->flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES) { - BUG_ON(!bkey_whiteout(&iter->k) && - bch2_btree_node_iter_end(&l->iter)); - } + BUG_ON(iter->uptodate == BTREE_ITER_UPTODATE && + (iter->flags & BTREE_ITER_TYPE) == BTREE_ITER_KEYS && + !bkey_whiteout(&iter->k) && + bch2_btree_node_iter_end(&l->iter)); } void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b) @@ -420,6 +427,11 @@ void bch2_btree_iter_verify(struct btree_iter *iter, struct btree *b) __bch2_btree_iter_verify(linked, b); } +#else + +static inline void __bch2_btree_iter_verify(struct btree_iter *iter, + struct btree *b) {} + #endif static void __bch2_btree_node_iter_fix(struct btree_iter *iter, @@ -434,7 +446,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, struct btree_node_iter_set *set; unsigned offset = __btree_node_key_to_offset(b, where); int shift = new_u64s - clobber_u64s; - unsigned old_end = (int) __btree_node_key_to_offset(b, end) - shift; + unsigned old_end = t->end_offset - shift; btree_node_iter_for_each(node_iter, set) if (set->end == old_end) @@ -456,7 +468,7 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, } return; found: - set->end = (int) set->end + shift; + set->end = t->end_offset; /* Iterator hasn't gotten to the key that changed yet: */ if (set->k < offset) @@ -517,8 +529,7 @@ iter_current_key_not_modified: k = bch2_bkey_prev_all(b, t, bch2_btree_node_iter_bset_pos(node_iter, b, t)); if (k && - __btree_node_iter_cmp(node_iter, b, - k, where) > 0) { + __btree_node_iter_cmp(b, k, where) > 0) { struct btree_node_iter_set *set; unsigned offset = __btree_node_key_to_offset(b, bkey_next(k)); @@ -557,10 +568,6 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter, __bch2_btree_node_iter_fix(linked, b, &linked->l[b->level].iter, t, where, clobber_u64s, new_u64s); - - /* interior node iterators are... special... */ - if (!b->level) - bch2_btree_iter_verify(iter, b); } static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, @@ -647,17 +654,6 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b) btree_node_unlock(iter, b->level + 1); } -/* Returns true if @k is after iterator position @pos */ -static inline bool btree_iter_pos_cmp(struct btree_iter *iter, - const struct bkey *k) -{ - int cmp = bkey_cmp(k->p, iter->pos); - - return cmp > 0 || - (cmp == 0 && - !(iter->flags & BTREE_ITER_IS_EXTENTS) && !bkey_deleted(k)); -} - static inline bool btree_iter_pos_after_node(struct btree_iter *iter, struct btree *b) { @@ -679,8 +675,7 @@ static inline void __btree_iter_init(struct btree_iter *iter, struct btree_iter_level *l = &iter->l[b->level]; bch2_btree_node_iter_init(&l->iter, b, iter->pos, - iter->flags & BTREE_ITER_IS_EXTENTS, - btree_node_is_extents(b)); + iter->flags & BTREE_ITER_IS_EXTENTS); /* Skip to first non whiteout: */ if (b->level) @@ -1022,7 +1017,9 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) } iter->uptodate = BTREE_ITER_NEED_PEEK; + bch2_btree_iter_verify_locks(iter); + __bch2_btree_iter_verify(iter, iter->l[iter->level].b); return 0; } @@ -1363,9 +1360,10 @@ struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter) } static inline struct bkey_s_c -__bch2_btree_iter_peek_slot(struct btree_iter *iter) +__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter) { struct btree_iter_level *l = &iter->l[0]; + struct btree_node_iter node_iter; struct bkey_s_c k; struct bkey n; int ret; @@ -1376,6 +1374,17 @@ recheck: bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0) __btree_iter_advance(l); + /* + * iterator is now at the correct position for inserting at iter->pos, + * but we need to keep iterating until we find the first non whiteout so + * we know how big a hole we have, if any: + */ + + node_iter = l->iter; + if (k.k && bkey_whiteout(k.k)) + k = __btree_iter_unpack(iter, l, &iter->k, + bch2_btree_node_iter_peek(&node_iter, l->b)); + /* * If we got to the end of the node, check if we need to traverse to the * next node: @@ -1392,6 +1401,13 @@ recheck: if (k.k && !bkey_whiteout(k.k) && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) { + /* + * if we skipped forward to find the first non whiteout and + * there _wasn't_ actually a hole, we want the iterator to be + * pointed at the key we found: + */ + l->iter = node_iter; + EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0); EBUG_ON(bkey_deleted(k.k)); iter->uptodate = BTREE_ITER_UPTODATE; @@ -1399,36 +1415,39 @@ recheck: } /* hole */ + + /* holes can't span inode numbers: */ + if (iter->pos.offset == KEY_OFFSET_MAX) { + if (iter->pos.inode == KEY_INODE_MAX) + return bkey_s_c_null; + + iter->pos = bkey_successor(iter->pos); + goto recheck; + } + + if (!k.k) + k.k = &l->b->key.k; + bkey_init(&n); n.p = iter->pos; + bch2_key_resize(&n, + min_t(u64, KEY_SIZE_MAX, + (k.k->p.inode == n.p.inode + ? bkey_start_offset(k.k) + : KEY_OFFSET_MAX) - + n.p.offset)); - if (iter->flags & BTREE_ITER_IS_EXTENTS) { - if (n.p.offset == KEY_OFFSET_MAX) { - if (n.p.inode == KEY_INODE_MAX) - return bkey_s_c_null; + //EBUG_ON(!n.size); + if (!n.size) { + char buf[100]; + bch2_dump_btree_node(iter->l[0].b); - iter->pos = bkey_successor(iter->pos); - goto recheck; - } - - if (k.k && bkey_whiteout(k.k)) { - struct btree_node_iter node_iter = l->iter; - - k = __btree_iter_unpack(iter, l, &iter->k, - bch2_btree_node_iter_peek(&node_iter, l->b)); - } - - if (!k.k) - k.k = &l->b->key.k; - - bch2_key_resize(&n, - min_t(u64, KEY_SIZE_MAX, - (k.k->p.inode == n.p.inode - ? bkey_start_offset(k.k) - : KEY_OFFSET_MAX) - - n.p.offset)); - - EBUG_ON(!n.size); + bch2_bkey_to_text(buf, sizeof(buf), k.k); + panic("iter at %llu:%llu\n" + "next key %s\n", + iter->pos.inode, + iter->pos.offset, + buf); } iter->k = n; @@ -1436,6 +1455,50 @@ recheck: return (struct bkey_s_c) { &iter->k, NULL }; } +static inline struct bkey_s_c +__bch2_btree_iter_peek_slot(struct btree_iter *iter) +{ + struct btree_iter_level *l = &iter->l[0]; + struct bkey_s_c k; + int ret; + + if (iter->flags & BTREE_ITER_IS_EXTENTS) + return __bch2_btree_iter_peek_slot_extents(iter); + +recheck: + while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k && + bkey_deleted(k.k) && + bkey_cmp(k.k->p, iter->pos) == 0) + __btree_iter_advance(l); + + /* + * If we got to the end of the node, check if we need to traverse to the + * next node: + */ + if (unlikely(!k.k && btree_iter_pos_after_node(iter, l->b))) { + btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); + ret = bch2_btree_iter_traverse(iter); + if (unlikely(ret)) + return bkey_s_c_err(ret); + + goto recheck; + } + + if (k.k && + !bkey_deleted(k.k) && + !bkey_cmp(iter->pos, k.k->p)) { + iter->uptodate = BTREE_ITER_UPTODATE; + return k; + } else { + /* hole */ + bkey_init(&iter->k); + iter->k.p = iter->pos; + + iter->uptodate = BTREE_ITER_UPTODATE; + return (struct bkey_s_c) { &iter->k, NULL }; + } +} + struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) { int ret; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 5376388e91e6..d57ca3d08c16 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -176,8 +176,6 @@ struct btree_cache { }; struct btree_node_iter { - u8 is_extents; - struct btree_node_iter_set { u16 k, end; } data[MAX_BSETS]; @@ -459,9 +457,6 @@ struct btree_root { * we're holding the write lock and we know what key is about to be overwritten: */ -struct btree_iter; -struct btree_node_iter; - enum btree_insert_ret { BTREE_INSERT_OK, /* extent spanned multiple leaf nodes: have to traverse to next node: */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index b60eb3d33c7b..1fe6f1e3e843 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -35,7 +35,7 @@ static void btree_node_interior_verify(struct btree *b) BUG_ON(!b->level); - bch2_btree_node_iter_init(&iter, b, b->key.k.p, false, false); + bch2_btree_node_iter_init(&iter, b, b->key.k.p, false); #if 1 BUG_ON(!(k = bch2_btree_node_iter_peek(&iter, b)) || bkey_cmp_left_packed(b, k, &b->key.k.p)); @@ -1322,7 +1322,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE); - bch2_btree_node_iter_init(&node_iter, b, k->k.p, false, false); + bch2_btree_node_iter_init(&node_iter, b, k->k.p, false); while (!bch2_keylist_empty(keys)) { k = bch2_keylist_front(keys); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index ce0223bd52b5..0ef519e8feed 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -64,7 +64,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, if (bkey_whiteout(&insert->k) && !k->needs_whiteout) { bch2_bset_delete(b, k, clobber_u64s); bch2_btree_node_iter_fix(iter, b, node_iter, t, - k, clobber_u64s, 0); + k, clobber_u64s, 0); + bch2_btree_iter_verify(iter, b); return true; } @@ -73,7 +74,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, k->type = KEY_TYPE_DELETED; bch2_btree_node_iter_fix(iter, b, node_iter, t, k, - k->u64s, k->u64s); + k->u64s, k->u64s); + bch2_btree_iter_verify(iter, b); if (bkey_whiteout(&insert->k)) { reserve_whiteout(b, k); @@ -98,7 +100,8 @@ overwrite: bch2_bset_insert(b, node_iter, k, insert, clobber_u64s); if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k)) bch2_btree_node_iter_fix(iter, b, node_iter, t, k, - clobber_u64s, k->u64s); + clobber_u64s, k->u64s); + bch2_btree_iter_verify(iter, b); return true; } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 803272b10e61..df04af882c16 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -858,30 +858,34 @@ void bch2_key_resize(struct bkey *k, * that we have to unpack the key, modify the unpacked key - then this * copies/repacks the unpacked to the original as necessary. */ -static bool __extent_save(struct btree *b, struct btree_node_iter *iter, - struct bkey_packed *dst, struct bkey *src) +static void extent_save(struct btree *b, struct bkey_packed *dst, + struct bkey *src) { struct bkey_format *f = &b->format; struct bkey_i *dst_unpacked; - bool ret; - if ((dst_unpacked = packed_to_bkey(dst))) { + if ((dst_unpacked = packed_to_bkey(dst))) dst_unpacked->k = *src; - ret = true; - } else { - ret = bch2_bkey_pack_key(dst, src, f); - } - - if (ret && iter) - bch2_verify_key_order(b, iter, dst); - - return ret; + else + BUG_ON(!bch2_bkey_pack_key(dst, src, f)); } -static void extent_save(struct btree *b, struct btree_node_iter *iter, - struct bkey_packed *dst, struct bkey *src) +static bool extent_i_save(struct btree *b, struct bkey_packed *dst, + struct bkey_i *src) { - BUG_ON(!__extent_save(b, iter, dst, src)); + struct bkey_format *f = &b->format; + struct bkey_i *dst_unpacked; + struct bkey_packed tmp; + + if ((dst_unpacked = packed_to_bkey(dst))) + dst_unpacked->k = src->k; + else if (bch2_bkey_pack_key(&tmp, &src->k, f)) + memcpy_u64s(dst, &tmp, f->key_u64s); + else + return false; + + memcpy_u64s(bkeyp_val(f, dst), &src->v, bkey_val_u64s(&src->k)); + return true; } /* @@ -1010,7 +1014,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, sort_key_next(iter, b, _r); } else { __bch2_cut_front(l.k->p, r); - extent_save(b, NULL, rk, r.k); + extent_save(b, rk, r.k); } extent_sort_sift(iter, b, _r - iter->data); @@ -1024,7 +1028,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k); __bch2_cut_front(r.k->p, l); - extent_save(b, NULL, lk, l.k); + extent_save(b, lk, l.k); extent_sort_sift(iter, b, 0); @@ -1032,7 +1036,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, bkey_to_packed(&tmp.k)); } else { bch2_cut_back(bkey_start_pos(r.k), l.k); - extent_save(b, NULL, lk, l.k); + extent_save(b, lk, l.k); } } @@ -1135,6 +1139,55 @@ extent_insert_should_stop(struct extent_insert_state *s) return BTREE_INSERT_OK; } +static void verify_extent_nonoverlapping(struct btree *b, + struct btree_node_iter *_iter, + struct bkey_i *insert) +{ +#ifdef CONFIG_BCACHEFS_DEBUG + struct btree_node_iter iter; + struct bkey_packed *k; + struct bkey uk; + + iter = *_iter; + k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_DISCARD); + BUG_ON(k && + (uk = bkey_unpack_key(b, k), + bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0)); + + iter = *_iter; + k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_DISCARD); +#if 0 + BUG_ON(k && + (uk = bkey_unpack_key(b, k), + bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0); +#else + if (k && + (uk = bkey_unpack_key(b, k), + bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) { + char buf1[100]; + char buf2[100]; + + bch2_bkey_to_text(buf1, sizeof(buf1), &insert->k); + bch2_bkey_to_text(buf2, sizeof(buf2), &uk); + + bch2_dump_btree_node(b); + panic("insert > next :\n" + "insert %s\n" + "next %s\n", + buf1, buf2); + } +#endif + +#endif +} + +static void verify_modified_extent(struct btree_iter *iter, + struct bkey_packed *k) +{ + bch2_btree_iter_verify(iter, iter->l[0].b); + bch2_verify_insert_pos(iter->l[0].b, k, k, k->u64s); +} + static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, struct bkey_i *insert) { @@ -1148,6 +1201,14 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, unsigned clobber_u64s; EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); + verify_extent_nonoverlapping(l->b, &l->iter, insert); + + if (!prev) { + while ((prev = bch2_bkey_prev_all(l->b, t, where)) && + (bkey_cmp_left_packed(l->b, prev, &insert->k.p) ?: + ((int) bkey_deleted(&insert->k) - (int) bkey_deleted(prev))) > 0) + where = prev; + } if (prev) where = bkey_next(prev); @@ -1173,12 +1234,15 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, bch2_bset_insert(l->b, &l->iter, where, insert, clobber_u64s); bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, where, - clobber_u64s, where->u64s); + clobber_u64s, where->u64s); + bch2_verify_key_order(l->b, &l->iter, where); + bch2_btree_iter_verify(iter, l->b); return; drop_deleted_keys: bch2_bset_delete(l->b, where, clobber_u64s); bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, where, clobber_u64s, 0); + bch2_btree_iter_verify(iter, l->b); } static void extent_insert_committed(struct extent_insert_state *s) @@ -1226,8 +1290,10 @@ static void extent_insert_committed(struct extent_insert_state *s) bch2_btree_journal_key(s->trans, iter, &split.k); - if (!s->deleting) + if (!s->deleting) { + bch2_btree_iter_set_pos_same_leaf(iter, s->committed); extent_bset_insert(c, iter, &split.k); + } done: bch2_btree_iter_set_pos_same_leaf(iter, s->committed); @@ -1345,22 +1411,21 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, struct btree_iter *iter = s->insert->iter; struct btree_iter_level *l = &iter->l[0]; struct btree *b = l->b; - struct btree_node_iter *node_iter = &l->iter; - enum btree_insert_ret ret; switch (overlap) { case BCH_EXTENT_OVERLAP_FRONT: /* insert overlaps with start of k: */ bch2_cut_subtract_front(s, insert->k.p, k); BUG_ON(bkey_deleted(k.k)); - extent_save(b, node_iter, _k, k.k); + extent_save(b, _k, k.k); + bch2_verify_key_order(b, &l->iter, _k); break; case BCH_EXTENT_OVERLAP_BACK: /* insert overlaps with end of k: */ bch2_cut_subtract_back(s, bkey_start_pos(&insert->k), k); BUG_ON(bkey_deleted(k.k)); - extent_save(b, node_iter, _k, k.k); + extent_save(b, _k, k.k); /* * As the auxiliary tree is indexed by the end of the @@ -1368,46 +1433,31 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, * auxiliary tree. */ bch2_bset_fix_invalidated_key(b, t, _k); - bch2_btree_node_iter_fix(iter, b, node_iter, t, - _k, _k->u64s, _k->u64s); + bch2_btree_node_iter_fix(iter, b, &l->iter, t, + _k, _k->u64s, _k->u64s); + bch2_verify_key_order(b, &l->iter, _k); break; case BCH_EXTENT_OVERLAP_ALL: { - struct bpos orig_pos = k.k->p; - /* The insert key completely covers k, invalidate k */ if (!bkey_whiteout(k.k)) btree_keys_account_key_drop(&b->nr, t - b->set, _k); bch2_drop_subtract(s, k); - k.k->p = bkey_start_pos(&insert->k); - if (!__extent_save(b, node_iter, _k, k.k)) { - /* - * Couldn't repack: we aren't necessarily able - * to repack if the new key is outside the range - * of the old extent, so we have to split - * @insert: - */ - k.k->p = orig_pos; - extent_save(b, node_iter, _k, k.k); - ret = extent_insert_advance_pos(s, k.s_c); - if (ret != BTREE_INSERT_OK) - return ret; + if (t == bset_tree_last(l->b)) { + unsigned u64s = _k->u64s; - extent_insert_committed(s); - /* - * We split and inserted upto at k.k->p - that - * has to coincide with iter->pos, so that we - * don't have anything more we have to insert - * until we recheck our journal reservation: - */ - EBUG_ON(bkey_cmp(s->committed, k.k->p)); + bch2_bset_delete(l->b, _k, _k->u64s); + bch2_btree_node_iter_fix(iter, b, &l->iter, t, + _k, u64s, 0); + bch2_btree_iter_verify(iter, b); } else { - bch2_bset_fix_invalidated_key(b, t, _k); - bch2_btree_node_iter_fix(iter, b, node_iter, t, - _k, _k->u64s, _k->u64s); + extent_save(b, _k, k.k); + bch2_btree_node_iter_fix(iter, b, &l->iter, t, + _k, _k->u64s, _k->u64s); + bch2_verify_key_order(b, &l->iter, _k); } break; @@ -1436,7 +1486,8 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, bch2_cut_subtract_front(s, insert->k.p, k); BUG_ON(bkey_deleted(k.k)); - extent_save(b, node_iter, _k, k.k); + extent_save(b, _k, k.k); + bch2_verify_key_order(b, &l->iter, _k); bch2_add_sectors(s, bkey_i_to_s_c(&split.k), bkey_start_offset(&split.k.k), @@ -1450,26 +1501,20 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, } static enum btree_insert_ret -__bch2_delete_fixup_extent(struct extent_insert_state *s) +__bch2_insert_fixup_extent(struct extent_insert_state *s) { struct bch_fs *c = s->trans->c; struct btree_iter *iter = s->insert->iter; struct btree_iter_level *l = &iter->l[0]; struct btree *b = l->b; - struct btree_node_iter *node_iter = &l->iter; struct bkey_packed *_k; struct bkey unpacked; struct bkey_i *insert = s->insert->k; enum btree_insert_ret ret = BTREE_INSERT_OK; - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k))); - - s->whiteout = *insert; - s->whiteout.k.type = KEY_TYPE_DISCARD; - while (bkey_cmp(s->committed, insert->k.p) < 0 && (ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK && - (_k = bch2_btree_node_iter_peek_all(node_iter, b))) { + (_k = bch2_btree_node_iter_peek_filter(&l->iter, b, KEY_TYPE_DISCARD))) { struct bset_tree *t = bch2_bkey_to_bset(b, _k); struct bkey_s k = __bkey_disassemble(b, _k, &unpacked); enum bch_extent_overlap overlap; @@ -1480,112 +1525,92 @@ __bch2_delete_fixup_extent(struct extent_insert_state *s) if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) break; - if (bkey_whiteout(k.k)) { - s->committed = bpos_min(insert->k.p, k.k->p); - goto next; - } - - overlap = bch2_extent_overlap(&insert->k, k.k); - ret = extent_insert_advance_pos(s, k.s_c); if (ret) break; - s->do_journal = true; + overlap = bch2_extent_overlap(&insert->k, k.k); - if (overlap == BCH_EXTENT_OVERLAP_ALL) { - btree_keys_account_key_drop(&b->nr, - t - b->set, _k); - bch2_subtract_sectors(s, k.s_c, - bkey_start_offset(k.k), k.k->size); - _k->type = KEY_TYPE_DISCARD; - reserve_whiteout(b, _k); - } else if (k.k->needs_whiteout || - bkey_written(b, _k)) { - struct bkey_i discard = *insert; + if (!s->deleting) { + if (k.k->needs_whiteout || bkey_written(b, _k)) + insert->k.needs_whiteout = true; - discard.k.type = KEY_TYPE_DISCARD; - - switch (overlap) { - case BCH_EXTENT_OVERLAP_FRONT: - bch2_cut_front(bkey_start_pos(k.k), &discard); - break; - case BCH_EXTENT_OVERLAP_BACK: - bch2_cut_back(k.k->p, &discard.k); - break; - default: - break; + if (overlap == BCH_EXTENT_OVERLAP_ALL && + bkey_whiteout(k.k) && + k.k->needs_whiteout) { + unreserve_whiteout(b, _k); + _k->needs_whiteout = false; } - discard.k.needs_whiteout = true; - ret = extent_squash(s, insert, t, _k, k, overlap); - BUG_ON(ret != BTREE_INSERT_OK); - - extent_bset_insert(c, iter, &discard); } else { - ret = extent_squash(s, insert, t, _k, k, overlap); - BUG_ON(ret != BTREE_INSERT_OK); - } + if (bkey_whiteout(k.k)) + goto next; + + s->do_journal = true; + + if (overlap == BCH_EXTENT_OVERLAP_ALL) { + btree_keys_account_key_drop(&b->nr, + t - b->set, _k); + bch2_subtract_sectors(s, k.s_c, + bkey_start_offset(k.k), k.k->size); + _k->type = KEY_TYPE_DISCARD; + reserve_whiteout(b, _k); + } else if (k.k->needs_whiteout || + bkey_written(b, _k)) { + struct bkey_i discard = *insert; + + discard.k.type = KEY_TYPE_DISCARD; + + switch (overlap) { + case BCH_EXTENT_OVERLAP_FRONT: + bch2_cut_front(bkey_start_pos(k.k), &discard); + break; + case BCH_EXTENT_OVERLAP_BACK: + bch2_cut_back(k.k->p, &discard.k); + break; + default: + break; + } + + discard.k.needs_whiteout = true; + + ret = extent_squash(s, insert, t, _k, k, overlap); + BUG_ON(ret != BTREE_INSERT_OK); + + extent_bset_insert(c, iter, &discard); + } else { + ret = extent_squash(s, insert, t, _k, k, overlap); + BUG_ON(ret != BTREE_INSERT_OK); + } next: - bch2_cut_front(s->committed, insert); - bch2_btree_iter_set_pos_same_leaf(iter, s->committed); + bch2_cut_front(s->committed, insert); + bch2_btree_iter_set_pos_same_leaf(iter, s->committed); + } + + if (ret != BTREE_INSERT_OK || + overlap == BCH_EXTENT_OVERLAP_FRONT || + overlap == BCH_EXTENT_OVERLAP_MIDDLE) + break; } - return ret; -} + if (ret == BTREE_INSERT_OK && + bkey_cmp(s->committed, insert->k.p) < 0) + ret = extent_insert_advance_pos(s, bkey_s_c_null); -static enum btree_insert_ret -__bch2_insert_fixup_extent(struct extent_insert_state *s) -{ - struct btree_iter *iter = s->insert->iter; - struct btree_iter_level *l = &iter->l[0]; - struct btree *b = l->b; - struct btree_node_iter *node_iter = &l->iter; - struct bkey_packed *_k; - struct bkey unpacked; - struct bkey_i *insert = s->insert->k; - enum btree_insert_ret ret = BTREE_INSERT_OK; + /* + * may have skipped past some deleted extents greater than the insert + * key, before we got to a non deleted extent and knew we could bail out + * rewind the iterator a bit if necessary: + */ + { + struct btree_node_iter node_iter = l->iter; + struct bkey uk; - while (bkey_cmp(s->committed, insert->k.p) < 0 && - (ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK && - (_k = bch2_btree_node_iter_peek_all(node_iter, b))) { - struct bset_tree *t = bch2_bkey_to_bset(b, _k); - struct bkey_s k = __bkey_disassemble(b, _k, &unpacked); - enum bch_extent_overlap overlap; - - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k))); - EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0); - - if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) - break; - - overlap = bch2_extent_overlap(&insert->k, k.k); - - if (!k.k->size) - goto squash; - - /* - * Only call advance pos & call hook for nonzero size extents: - */ - ret = extent_insert_advance_pos(s, k.s_c); - if (ret) - break; - - if (k.k->size && - (k.k->needs_whiteout || bkey_written(b, _k))) - insert->k.needs_whiteout = true; - - if (overlap == BCH_EXTENT_OVERLAP_ALL && - bkey_whiteout(k.k) && - k.k->needs_whiteout) { - unreserve_whiteout(b, _k); - _k->needs_whiteout = false; - } -squash: - ret = extent_squash(s, insert, t, _k, k, overlap); - if (ret != BTREE_INSERT_OK) - break; + while ((_k = bch2_btree_node_iter_prev_all(&node_iter, l->b)) && + (uk = bkey_unpack_key(l->b, _k), + bkey_cmp(uk.p, s->committed) > 0)) + l->iter = node_iter; } return ret; @@ -1647,6 +1672,11 @@ bch2_insert_fixup_extent(struct btree_insert *trans, .deleting = bkey_whiteout(&insert->k->k), }; + if (s.deleting) { + s.whiteout = *insert->k; + s.whiteout.k.type = KEY_TYPE_DISCARD; + } + EBUG_ON(iter->level); EBUG_ON(!insert->k->k.size); @@ -1657,6 +1687,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans, * @insert->k and the node iterator that we're advancing: */ EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k))); + bch2_btree_iter_verify(iter, b); if (!s.deleting && !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) @@ -1664,13 +1695,7 @@ bch2_insert_fixup_extent(struct btree_insert *trans, bkey_start_offset(&insert->k->k), insert->k->k.size); - ret = !s.deleting - ? __bch2_insert_fixup_extent(&s) - : __bch2_delete_fixup_extent(&s); - - if (ret == BTREE_INSERT_OK && - bkey_cmp(s.committed, insert->k->k.p) < 0) - ret = extent_insert_advance_pos(&s, bkey_s_c_null); + ret = __bch2_insert_fixup_extent(&s); extent_insert_committed(&s); @@ -2172,130 +2197,6 @@ enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b, return BCH_MERGE_MERGE; } -static void extent_i_save(struct btree *b, struct bkey_packed *dst, - struct bkey_i *src) -{ - struct bkey_format *f = &b->format; - struct bkey_i *dst_unpacked; - - BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k)); - - /* - * We don't want the bch2_verify_key_order() call in extent_save(), - * because we may be out of order with deleted keys that are about to be - * removed by extent_bset_insert() - */ - - if ((dst_unpacked = packed_to_bkey(dst))) - bkey_copy(dst_unpacked, src); - else - BUG_ON(!bch2_bkey_pack(dst, src, f)); -} - -static bool extent_merge_one_overlapping(struct btree_iter *iter, - struct bpos new_pos, - struct bset_tree *t, - struct bkey_packed *k, struct bkey uk, - bool check, bool could_pack) -{ - struct btree_iter_level *l = &iter->l[0]; - - BUG_ON(!bkey_deleted(k)); - - if (check) { - return !bkey_packed(k) || could_pack; - } else { - uk.p = new_pos; - extent_save(l->b, &l->iter, k, &uk); - bch2_bset_fix_invalidated_key(l->b, t, k); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, - k, k->u64s, k->u64s); - return true; - } -} - -static bool extent_merge_do_overlapping(struct btree_iter *iter, - struct bkey *m, bool back_merge) -{ - struct btree_iter_level *l = &iter->l[0]; - struct btree *b = l->b; - struct btree_node_iter *node_iter = &l->iter; - struct bset_tree *t; - struct bkey_packed *k; - struct bkey uk; - struct bpos new_pos = back_merge ? m->p : bkey_start_pos(m); - bool could_pack = bkey_pack_pos((void *) &uk, new_pos, b); - bool check = true; - - /* - * @m is the new merged extent: - * - * The merge took place in the last bset; we know there can't be any 0 - * size extents overlapping with m there because if so they would have - * been between the two extents we merged. - * - * But in the other bsets, we have to check for and fix such extents: - */ -do_fixup: - for_each_bset(b, t) { - if (t == bset_tree_last(b)) - break; - - /* - * if we don't find this bset in the iterator we already got to - * the end of that bset, so start searching from the end. - */ - k = bch2_btree_node_iter_bset_pos(node_iter, b, t); - - if (k == btree_bkey_last(b, t)) - k = bch2_bkey_prev_all(b, t, k); - if (!k) - continue; - - if (back_merge) { - /* - * Back merge: 0 size extents will be before the key - * that was just inserted (and thus the iterator - * position) - walk backwards to find them - */ - for (; - k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(uk.p, bkey_start_pos(m)) > 0); - k = bch2_bkey_prev_all(b, t, k)) { - if (bkey_cmp(uk.p, m->p) >= 0) - continue; - - if (!extent_merge_one_overlapping(iter, new_pos, - t, k, uk, check, could_pack)) - return false; - } - } else { - /* Front merge - walk forwards */ - for (; - k != btree_bkey_last(b, t) && - (uk = bkey_unpack_key(b, k), - bkey_cmp(uk.p, m->p) < 0); - k = bkey_next(k)) { - if (bkey_cmp(uk.p, - bkey_start_pos(m)) <= 0) - continue; - - if (!extent_merge_one_overlapping(iter, new_pos, - t, k, uk, check, could_pack)) - return false; - } - } - } - - if (check) { - check = false; - goto do_fixup; - } - - return true; -} - /* * When merging an extent that we're inserting into a btree node, the new merged * extent could overlap with an existing 0 size extent - if we don't fix that, @@ -2312,13 +2213,13 @@ static bool bch2_extent_merge_inline(struct bch_fs *c, { struct btree *b = iter->l[0].b; struct btree_node_iter *node_iter = &iter->l[0].iter; - const struct bkey_format *f = &b->format; - struct bset_tree *t = bset_tree_last(b); - struct bkey_packed *m; - BKEY_PADDED(k) li; - BKEY_PADDED(k) ri; - struct bkey_i *mi; - struct bkey tmp; + BKEY_PADDED(k) li, ri; + struct bkey_packed *m = back_merge ? l : r; + struct bkey_i *mi = back_merge ? &li.k : &ri.k; + struct bset_tree *t = bch2_bkey_to_bset(b, m); + enum merge_result ret; + + EBUG_ON(bkey_written(b, m)); /* * We need to save copies of both l and r, because we might get a @@ -2327,57 +2228,49 @@ static bool bch2_extent_merge_inline(struct bch_fs *c, bch2_bkey_unpack(b, &li.k, l); bch2_bkey_unpack(b, &ri.k, r); - m = back_merge ? l : r; - mi = back_merge ? &li.k : &ri.k; - - /* l & r should be in last bset: */ - EBUG_ON(bch2_bkey_to_bset(b, m) != t); - - switch (bch2_extent_merge(c, b, &li.k, &ri.k)) { - case BCH_MERGE_NOMERGE: + ret = bch2_extent_merge(c, b, &li.k, &ri.k); + if (ret == BCH_MERGE_NOMERGE) return false; - case BCH_MERGE_PARTIAL: - if (bkey_packed(m) && !bch2_bkey_pack_key((void *) &tmp, &mi->k, f)) + + /* + * check if we overlap with deleted extents - would break the sort + * order: + */ + if (back_merge) { + struct bkey_packed *n = bkey_next(m); + + if (n != btree_bkey_last(b, t) && + bkey_cmp_left_packed(b, n, &li.k.k.p) <= 0 && + bkey_deleted(n)) return false; + } else if (ret == BCH_MERGE_MERGE) { + struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m); - if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge)) + if (prev && + bkey_cmp_left_packed_byval(b, prev, + bkey_start_pos(&li.k.k)) > 0) return false; + } - extent_i_save(b, m, mi); - bch2_bset_fix_invalidated_key(b, t, m); - - /* - * Update iterator to reflect what we just inserted - otherwise, - * the iter_fix() call is going to put us _before_ the key we - * just partially merged with: - */ - if (back_merge) - bch2_btree_iter_set_pos_same_leaf(iter, li.k.k.p); - - bch2_btree_node_iter_fix(iter, b, node_iter, - t, m, m->u64s, m->u64s); + if (ret == BCH_MERGE_PARTIAL) { + if (!extent_i_save(b, m, mi)) + return false; if (!back_merge) bkey_copy(packed_to_bkey(l), &li.k); else bkey_copy(packed_to_bkey(r), &ri.k); - return false; - case BCH_MERGE_MERGE: - if (bkey_packed(m) && !bch2_bkey_pack_key((void *) &tmp, &li.k.k, f)) + } else { + if (!extent_i_save(b, m, &li.k)) return false; - - if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge)) - return false; - - extent_i_save(b, m, &li.k); - bch2_bset_fix_invalidated_key(b, t, m); - - bch2_btree_node_iter_fix(iter, b, node_iter, - t, m, m->u64s, m->u64s); - return true; - default: - BUG(); } + + bch2_bset_fix_invalidated_key(b, t, m); + bch2_btree_node_iter_fix(iter, b, node_iter, + t, m, m->u64s, m->u64s); + verify_modified_extent(iter, m); + + return ret == BCH_MERGE_MERGE; } int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)