bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key in the btree code: * bpos_successor() and bpos_predecessor() now include the snapshot field * Keys in btrees that will be using snapshots (extents, inodes, dirents and xattrs) now always have their snapshot field set to U32_MAX The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that determines whether we're iterating over keys in all snapshots or not - internally, this controlls whether bkey_(successor|predecessor) increment/decrement the snapshot field, or only the higher bits of the key. We add a new member to struct btree_iter, iter->snapshot: when BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always equal iter->snapshot, which will be 0 for btrees that don't use snapshots, and alsways U32_MAX for btrees that will use snapshots (until we enable snapshot creation). This patch also introduces a new metadata version number, and compat code for reading from/writing to older versions - this isn't a forced upgrade (yet). Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
4cf91b0270
commit
e751c01a8e
@ -142,19 +142,18 @@ struct bpos {
|
||||
#define KEY_SNAPSHOT_MAX ((__u32)~0U)
|
||||
#define KEY_SIZE_MAX ((__u32)~0U)
|
||||
|
||||
static inline struct bpos POS(__u64 inode, __u64 offset)
|
||||
static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot)
|
||||
{
|
||||
struct bpos ret;
|
||||
|
||||
ret.inode = inode;
|
||||
ret.offset = offset;
|
||||
ret.snapshot = 0;
|
||||
|
||||
return ret;
|
||||
return (struct bpos) {
|
||||
.inode = inode,
|
||||
.offset = offset,
|
||||
.snapshot = snapshot,
|
||||
};
|
||||
}
|
||||
|
||||
#define POS_MIN POS(0, 0)
|
||||
#define POS_MAX POS(KEY_INODE_MAX, KEY_OFFSET_MAX)
|
||||
#define POS_MIN SPOS(0, 0, 0)
|
||||
#define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX)
|
||||
#define POS(_inode, _offset) SPOS(_inode, _offset, 0)
|
||||
|
||||
/* Empty placeholder struct, for container_of() */
|
||||
struct bch_val {
|
||||
@ -1208,7 +1207,8 @@ enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_new_versioning = 10,
|
||||
bcachefs_metadata_version_bkey_renumber = 10,
|
||||
bcachefs_metadata_version_inode_btree_change = 11,
|
||||
bcachefs_metadata_version_max = 12,
|
||||
bcachefs_metadata_version_snapshot = 12,
|
||||
bcachefs_metadata_version_max = 13,
|
||||
};
|
||||
|
||||
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
|
||||
@ -1749,7 +1749,7 @@ struct btree_node {
|
||||
/* Closed interval: */
|
||||
struct bpos min_key;
|
||||
struct bpos max_key;
|
||||
struct bch_extent_ptr ptr;
|
||||
struct bch_extent_ptr _ptr; /* not used anymore */
|
||||
struct bkey_format format;
|
||||
|
||||
union {
|
||||
|
@ -617,15 +617,19 @@ const char *bch2_bkey_format_validate(struct bkey_format *f)
|
||||
return "incorrect number of fields";
|
||||
|
||||
for (i = 0; i < f->nr_fields; i++) {
|
||||
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
|
||||
u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1));
|
||||
u64 field_offset = le64_to_cpu(f->field_offset[i]);
|
||||
|
||||
if (f->bits_per_field[i] > 64)
|
||||
if (f->bits_per_field[i] > unpacked_bits)
|
||||
return "field too large";
|
||||
|
||||
if (field_offset &&
|
||||
(f->bits_per_field[i] == 64 ||
|
||||
(field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
|
||||
field_offset)))
|
||||
if ((f->bits_per_field[i] == unpacked_bits) && field_offset)
|
||||
return "offset + bits overflow";
|
||||
|
||||
if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) &
|
||||
unpacked_mask) <
|
||||
field_offset)
|
||||
return "offset + bits overflow";
|
||||
|
||||
bits += f->bits_per_field[i];
|
||||
@ -1126,11 +1130,12 @@ void bch2_bkey_pack_test(void)
|
||||
struct bkey_packed p;
|
||||
|
||||
struct bkey_format test_format = {
|
||||
.key_u64s = 2,
|
||||
.key_u64s = 3,
|
||||
.nr_fields = BKEY_NR_FIELDS,
|
||||
.bits_per_field = {
|
||||
13,
|
||||
64,
|
||||
32,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -258,24 +258,46 @@ static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
|
||||
format->bits_per_field[BKEY_FIELD_SNAPSHOT];
|
||||
}
|
||||
|
||||
static inline struct bpos bkey_successor(struct bpos p)
|
||||
static inline struct bpos bpos_successor(struct bpos p)
|
||||
{
|
||||
struct bpos ret = p;
|
||||
if (!++p.snapshot &&
|
||||
!++p.offset &&
|
||||
!++p.inode)
|
||||
BUG();
|
||||
|
||||
if (!++ret.offset)
|
||||
BUG_ON(!++ret.inode);
|
||||
|
||||
return ret;
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline struct bpos bkey_predecessor(struct bpos p)
|
||||
static inline struct bpos bpos_predecessor(struct bpos p)
|
||||
{
|
||||
struct bpos ret = p;
|
||||
if (!p.snapshot-- &&
|
||||
!p.offset-- &&
|
||||
!p.inode--)
|
||||
BUG();
|
||||
|
||||
if (!ret.offset--)
|
||||
BUG_ON(!ret.inode--);
|
||||
return p;
|
||||
}
|
||||
|
||||
return ret;
|
||||
static inline struct bpos bpos_nosnap_successor(struct bpos p)
|
||||
{
|
||||
p.snapshot = 0;
|
||||
|
||||
if (!++p.offset &&
|
||||
!++p.inode)
|
||||
BUG();
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline struct bpos bpos_nosnap_predecessor(struct bpos p)
|
||||
{
|
||||
p.snapshot = 0;
|
||||
|
||||
if (!p.offset-- &&
|
||||
!p.inode--)
|
||||
BUG();
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline u64 bkey_start_offset(const struct bkey *k)
|
||||
|
@ -119,9 +119,16 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
|
||||
return "nonzero size field";
|
||||
}
|
||||
|
||||
if (k.k->p.snapshot)
|
||||
if (type != BKEY_TYPE_btree &&
|
||||
!btree_type_has_snapshots(type) &&
|
||||
k.k->p.snapshot)
|
||||
return "nonzero snapshot";
|
||||
|
||||
if (type != BKEY_TYPE_btree &&
|
||||
btree_type_has_snapshots(type) &&
|
||||
k.k->p.snapshot != U32_MAX)
|
||||
return "invalid snapshot field";
|
||||
|
||||
if (type != BKEY_TYPE_btree &&
|
||||
!bkey_cmp(k.k->p, POS_MAX))
|
||||
return "POS_MAX key";
|
||||
@ -310,14 +317,15 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
|
||||
const struct bkey_ops *ops;
|
||||
struct bkey uk;
|
||||
struct bkey_s u;
|
||||
unsigned nr_compat = 5;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Do these operations in reverse order in the write path:
|
||||
*/
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
switch (!write ? i : 3 - i) {
|
||||
for (i = 0; i < nr_compat; i++)
|
||||
switch (!write ? i : nr_compat - 1 - i) {
|
||||
case 0:
|
||||
if (big_endian != CPU_BIG_ENDIAN)
|
||||
bch2_bkey_swab_key(f, k);
|
||||
@ -351,6 +359,28 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
if (version < bcachefs_metadata_version_snapshot &&
|
||||
(level || btree_type_has_snapshots(btree_id))) {
|
||||
struct bkey_i *u = packed_to_bkey(k);
|
||||
|
||||
if (u) {
|
||||
u->k.p.snapshot = write
|
||||
? 0 : U32_MAX;
|
||||
} else {
|
||||
u64 min_packed = f->field_offset[BKEY_FIELD_SNAPSHOT];
|
||||
u64 max_packed = min_packed +
|
||||
~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
|
||||
|
||||
uk = __bch2_bkey_unpack_key(f, k);
|
||||
uk.p.snapshot = write
|
||||
? min_packed : min_t(u64, U32_MAX, max_packed);
|
||||
|
||||
BUG_ON(!bch2_bkey_pack_key(k, &uk, f));
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
case 4:
|
||||
if (!bkey_packed(k)) {
|
||||
u = bkey_i_to_s(packed_to_bkey(k));
|
||||
} else {
|
||||
|
@ -1438,7 +1438,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
|
||||
* to the search key is going to have 0 sectors after the search key.
|
||||
*
|
||||
* But this does mean that we can't just search for
|
||||
* bkey_successor(start_of_range) to get the first extent that overlaps with
|
||||
* bpos_successor(start_of_range) to get the first extent that overlaps with
|
||||
* the range we want - if we're unlucky and there's an extent that ends
|
||||
* exactly where we searched, then there could be a deleted key at the same
|
||||
* position and we'd get that when we search instead of the preceding extent
|
||||
|
@ -1018,7 +1018,7 @@ out:
|
||||
if (sib != btree_prev_sib)
|
||||
swap(n1, n2);
|
||||
|
||||
if (bpos_cmp(bkey_successor(n1->key.k.p),
|
||||
if (bpos_cmp(bpos_successor(n1->key.k.p),
|
||||
n2->data->min_key)) {
|
||||
char buf1[200], buf2[200];
|
||||
|
||||
|
@ -64,7 +64,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
|
||||
struct bpos node_end = b->data->max_key;
|
||||
struct bpos expected_start = bkey_deleted(&prev->k->k)
|
||||
? node_start
|
||||
: bkey_successor(prev->k->k.p);
|
||||
: bpos_successor(prev->k->k.p);
|
||||
char buf1[200], buf2[200];
|
||||
bool update_min = false;
|
||||
bool update_max = false;
|
||||
@ -1187,7 +1187,9 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
|
||||
BTREE_ITER_PREFETCH);
|
||||
BTREE_ITER_PREFETCH|
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
|
||||
while ((k = bch2_btree_iter_peek(iter)).k &&
|
||||
!(ret = bkey_err(k))) {
|
||||
@ -1405,7 +1407,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
|
||||
n1->key.k.p = n1->data->max_key =
|
||||
bkey_unpack_pos(n1, last);
|
||||
|
||||
n2->data->min_key = bkey_successor(n1->data->max_key);
|
||||
n2->data->min_key = bpos_successor(n1->data->max_key);
|
||||
|
||||
memcpy_u64s(vstruct_last(s1),
|
||||
s2->start, u64s);
|
||||
|
@ -612,12 +612,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
BTREE_ERR_MUST_RETRY, c, ca, b, i,
|
||||
"incorrect level");
|
||||
|
||||
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
|
||||
u64 *p = (u64 *) &bn->ptr;
|
||||
|
||||
*p = swab64(*p);
|
||||
}
|
||||
|
||||
if (!write)
|
||||
compat_btree_node(b->c.level, b->c.btree_id, version,
|
||||
BSET_BIG_ENDIAN(i), write, bn);
|
||||
@ -1328,8 +1322,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
|
||||
if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_btree))
|
||||
return -1;
|
||||
|
||||
ret = validate_bset(c, NULL, b, i, sectors, WRITE, false) ?:
|
||||
validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
|
||||
ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?:
|
||||
validate_bset(c, NULL, b, i, sectors, WRITE, false);
|
||||
if (ret) {
|
||||
bch2_inconsistent_error(c);
|
||||
dump_stack();
|
||||
@ -1482,7 +1476,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
|
||||
validate_before_checksum = true;
|
||||
|
||||
/* validate_bset will be modifying: */
|
||||
if (le16_to_cpu(i->version) <= bcachefs_metadata_version_inode_btree_change)
|
||||
if (le16_to_cpu(i->version) < bcachefs_metadata_version_current)
|
||||
validate_before_checksum = true;
|
||||
|
||||
/* if we're going to be encrypting, check metadata validity first: */
|
||||
|
@ -189,8 +189,8 @@ void bch2_btree_flush_all_writes(struct bch_fs *);
|
||||
void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
|
||||
|
||||
static inline void compat_bformat(unsigned level, enum btree_id btree_id,
|
||||
unsigned version, unsigned big_endian,
|
||||
int write, struct bkey_format *f)
|
||||
unsigned version, unsigned big_endian,
|
||||
int write, struct bkey_format *f)
|
||||
{
|
||||
if (version < bcachefs_metadata_version_inode_btree_change &&
|
||||
btree_id == BTREE_ID_inodes) {
|
||||
@ -199,6 +199,16 @@ static inline void compat_bformat(unsigned level, enum btree_id btree_id,
|
||||
swap(f->field_offset[BKEY_FIELD_INODE],
|
||||
f->field_offset[BKEY_FIELD_OFFSET]);
|
||||
}
|
||||
|
||||
if (version < bcachefs_metadata_version_snapshot &&
|
||||
(level || btree_type_has_snapshots(btree_id))) {
|
||||
u64 max_packed =
|
||||
~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
|
||||
|
||||
f->field_offset[BKEY_FIELD_SNAPSHOT] = write
|
||||
? 0
|
||||
: U32_MAX - max_packed;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void compat_bpos(unsigned level, enum btree_id btree_id,
|
||||
@ -222,16 +232,24 @@ static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
|
||||
btree_node_type_is_extents(btree_id) &&
|
||||
bpos_cmp(bn->min_key, POS_MIN) &&
|
||||
write)
|
||||
bn->min_key = bkey_predecessor(bn->min_key);
|
||||
bn->min_key = bpos_nosnap_predecessor(bn->min_key);
|
||||
|
||||
if (version < bcachefs_metadata_version_snapshot &&
|
||||
write)
|
||||
bn->max_key.snapshot = 0;
|
||||
|
||||
compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
|
||||
compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
|
||||
|
||||
if (version < bcachefs_metadata_version_snapshot &&
|
||||
!write)
|
||||
bn->max_key.snapshot = U32_MAX;
|
||||
|
||||
if (version < bcachefs_metadata_version_inode_btree_change &&
|
||||
btree_node_type_is_extents(btree_id) &&
|
||||
bpos_cmp(bn->min_key, POS_MIN) &&
|
||||
!write)
|
||||
bn->min_key = bkey_successor(bn->min_key);
|
||||
bn->min_key = bpos_nosnap_successor(bn->min_key);
|
||||
}
|
||||
|
||||
#endif /* _BCACHEFS_BTREE_IO_H */
|
||||
|
@ -18,6 +18,36 @@
|
||||
|
||||
static void btree_iter_set_search_pos(struct btree_iter *, struct bpos);
|
||||
|
||||
static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
|
||||
{
|
||||
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
|
||||
|
||||
/* Are we iterating over keys in all snapshots? */
|
||||
if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
|
||||
p = bpos_successor(p);
|
||||
} else {
|
||||
p = bpos_nosnap_successor(p);
|
||||
p.snapshot = iter->snapshot;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
|
||||
{
|
||||
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
|
||||
|
||||
/* Are we iterating over keys in all snapshots? */
|
||||
if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
|
||||
p = bpos_predecessor(p);
|
||||
} else {
|
||||
p = bpos_nosnap_predecessor(p);
|
||||
p.snapshot = iter->snapshot;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
|
||||
{
|
||||
return l < BTREE_MAX_DEPTH &&
|
||||
@ -30,7 +60,7 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
|
||||
|
||||
if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
|
||||
bkey_cmp(pos, POS_MAX))
|
||||
pos = bkey_successor(pos);
|
||||
pos = bkey_successor(iter, pos);
|
||||
return pos;
|
||||
}
|
||||
|
||||
@ -591,10 +621,24 @@ err:
|
||||
|
||||
static void bch2_btree_iter_verify(struct btree_iter *iter)
|
||||
{
|
||||
enum btree_iter_type type = btree_iter_type(iter);
|
||||
unsigned i;
|
||||
|
||||
EBUG_ON(iter->btree_id >= BTREE_ID_NR);
|
||||
|
||||
BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
|
||||
iter->pos.snapshot != iter->snapshot);
|
||||
|
||||
BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
|
||||
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
|
||||
|
||||
BUG_ON(type == BTREE_ITER_NODES &&
|
||||
!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
|
||||
|
||||
BUG_ON(type != BTREE_ITER_NODES &&
|
||||
(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
|
||||
!btree_type_has_snapshots(iter->btree_id));
|
||||
|
||||
bch2_btree_iter_verify_locks(iter);
|
||||
|
||||
for (i = 0; i < BTREE_MAX_DEPTH; i++)
|
||||
@ -605,6 +649,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
|
||||
{
|
||||
enum btree_iter_type type = btree_iter_type(iter);
|
||||
|
||||
BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
|
||||
iter->pos.snapshot != iter->snapshot);
|
||||
|
||||
BUG_ON((type == BTREE_ITER_KEYS ||
|
||||
type == BTREE_ITER_CACHED) &&
|
||||
(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
|
||||
@ -1434,7 +1481,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
|
||||
* Haven't gotten to the end of the parent node: go back down to
|
||||
* the next child node
|
||||
*/
|
||||
btree_iter_set_search_pos(iter, bkey_successor(iter->pos));
|
||||
btree_iter_set_search_pos(iter, bpos_successor(iter->pos));
|
||||
|
||||
/* Unlock to avoid screwing up our lock invariants: */
|
||||
btree_node_unlock(iter, iter->level);
|
||||
@ -1508,7 +1555,7 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter)
|
||||
bool ret = bpos_cmp(pos, POS_MAX) != 0;
|
||||
|
||||
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
pos = bkey_successor(pos);
|
||||
pos = bkey_successor(iter, pos);
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
return ret;
|
||||
}
|
||||
@ -1519,7 +1566,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
|
||||
bool ret = bpos_cmp(pos, POS_MIN) != 0;
|
||||
|
||||
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
|
||||
pos = bkey_predecessor(pos);
|
||||
pos = bkey_predecessor(iter, pos);
|
||||
bch2_btree_iter_set_pos(iter, pos);
|
||||
return ret;
|
||||
}
|
||||
@ -1535,7 +1582,7 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
|
||||
* btree, in that case we want iter->pos to reflect that:
|
||||
*/
|
||||
if (ret)
|
||||
btree_iter_set_search_pos(iter, bkey_successor(next_pos));
|
||||
btree_iter_set_search_pos(iter, bpos_successor(next_pos));
|
||||
else
|
||||
bch2_btree_iter_set_pos(iter, POS_MAX);
|
||||
|
||||
@ -1548,7 +1595,7 @@ static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
|
||||
bool ret = bpos_cmp(next_pos, POS_MIN) != 0;
|
||||
|
||||
if (ret)
|
||||
btree_iter_set_search_pos(iter, bkey_predecessor(next_pos));
|
||||
btree_iter_set_search_pos(iter, bpos_predecessor(next_pos));
|
||||
else
|
||||
bch2_btree_iter_set_pos(iter, POS_MIN);
|
||||
|
||||
@ -1594,13 +1641,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bool wi
|
||||
k = btree_iter_level_peek(iter, &iter->l[0]);
|
||||
|
||||
if (next_update &&
|
||||
bkey_cmp(next_update->k.p, iter->real_pos) <= 0)
|
||||
bpos_cmp(next_update->k.p, iter->real_pos) <= 0)
|
||||
k = bkey_i_to_s_c(next_update);
|
||||
|
||||
if (likely(k.k)) {
|
||||
if (bkey_deleted(k.k)) {
|
||||
btree_iter_set_search_pos(iter,
|
||||
bkey_successor(k.k->p));
|
||||
bkey_successor(iter, k.k->p));
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1739,7 +1786,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
|
||||
if (iter->pos.inode == KEY_INODE_MAX)
|
||||
return bkey_s_c_null;
|
||||
|
||||
bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos));
|
||||
bch2_btree_iter_set_pos(iter, bkey_successor(iter, iter->pos));
|
||||
}
|
||||
|
||||
pos = iter->pos;
|
||||
@ -1973,6 +2020,14 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
|
||||
{
|
||||
struct btree_iter *iter, *best = NULL;
|
||||
|
||||
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
|
||||
!btree_type_has_snapshots(btree_id))
|
||||
flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
|
||||
|
||||
if (!(flags & BTREE_ITER_ALL_SNAPSHOTS))
|
||||
pos.snapshot = btree_type_has_snapshots(btree_id)
|
||||
? U32_MAX : 0;
|
||||
|
||||
/* We always want a fresh iterator for node iterators: */
|
||||
if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES)
|
||||
goto alloc_iter;
|
||||
@ -2007,11 +2062,14 @@ alloc_iter:
|
||||
|
||||
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
|
||||
btree_node_type_is_extents(btree_id) &&
|
||||
!(flags & BTREE_ITER_NOT_EXTENTS))
|
||||
!(flags & BTREE_ITER_NOT_EXTENTS) &&
|
||||
!(flags & BTREE_ITER_ALL_SNAPSHOTS))
|
||||
flags |= BTREE_ITER_IS_EXTENTS;
|
||||
|
||||
iter->flags = flags;
|
||||
|
||||
iter->snapshot = pos.snapshot;
|
||||
|
||||
if (!(iter->flags & BTREE_ITER_INTENT))
|
||||
bch2_btree_iter_downgrade(iter);
|
||||
else if (!iter->locks_want)
|
||||
@ -2034,6 +2092,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
|
||||
__bch2_trans_get_iter(trans, btree_id, pos,
|
||||
BTREE_ITER_NODES|
|
||||
BTREE_ITER_NOT_EXTENTS|
|
||||
BTREE_ITER_ALL_SNAPSHOTS|
|
||||
flags);
|
||||
unsigned i;
|
||||
|
||||
|
@ -172,6 +172,9 @@ bool bch2_btree_iter_rewind(struct btree_iter *);
|
||||
|
||||
static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
|
||||
{
|
||||
if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
|
||||
new_pos.snapshot = iter->snapshot;
|
||||
|
||||
bkey_init(&iter->k);
|
||||
iter->k.p = iter->pos = new_pos;
|
||||
}
|
||||
|
@ -216,6 +216,7 @@ enum btree_iter_type {
|
||||
#define BTREE_ITER_CACHED_NOFILL (1 << 9)
|
||||
#define BTREE_ITER_CACHED_NOCREATE (1 << 10)
|
||||
#define BTREE_ITER_NOT_EXTENTS (1 << 11)
|
||||
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
|
||||
|
||||
enum btree_iter_uptodate {
|
||||
BTREE_ITER_UPTODATE = 0,
|
||||
@ -245,6 +246,8 @@ struct btree_iter {
|
||||
/* what we're searching for/what the iterator actually points to: */
|
||||
struct bpos real_pos;
|
||||
struct bpos pos_after_commit;
|
||||
/* When we're filtering by snapshot, the snapshot ID we're looking for: */
|
||||
unsigned snapshot;
|
||||
|
||||
u16 flags;
|
||||
u8 idx;
|
||||
@ -329,7 +332,7 @@ struct bkey_cached {
|
||||
struct btree_insert_entry {
|
||||
unsigned trigger_flags;
|
||||
u8 bkey_type;
|
||||
u8 btree_id;
|
||||
enum btree_id btree_id:8;
|
||||
u8 level;
|
||||
unsigned trans_triggers_run:1;
|
||||
unsigned is_extent:1;
|
||||
@ -610,6 +613,17 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter)
|
||||
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
|
||||
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
|
||||
|
||||
#define BTREE_ID_HAS_SNAPSHOTS \
|
||||
((1U << BTREE_ID_extents)| \
|
||||
(1U << BTREE_ID_inodes)| \
|
||||
(1U << BTREE_ID_dirents)| \
|
||||
(1U << BTREE_ID_xattrs))
|
||||
|
||||
static inline bool btree_type_has_snapshots(enum btree_id id)
|
||||
{
|
||||
return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
|
||||
}
|
||||
|
||||
enum btree_trigger_flags {
|
||||
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
|
||||
|
||||
|
@ -69,7 +69,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
|
||||
break;
|
||||
}
|
||||
|
||||
next_node = bkey_successor(k.k->p);
|
||||
next_node = bpos_successor(k.k->p);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -289,7 +289,6 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
||||
b->data->flags = 0;
|
||||
SET_BTREE_NODE_ID(b->data, as->btree_id);
|
||||
SET_BTREE_NODE_LEVEL(b->data, level);
|
||||
b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
|
||||
|
||||
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
|
||||
@ -1100,6 +1099,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
|
||||
struct btree *n2;
|
||||
struct bset *set1, *set2;
|
||||
struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL;
|
||||
struct bpos n1_pos;
|
||||
|
||||
n2 = bch2_btree_node_alloc(as, n1->c.level);
|
||||
bch2_btree_update_add_new_node(as, n2);
|
||||
@ -1146,8 +1146,12 @@ static struct btree *__btree_split_node(struct btree_update *as,
|
||||
n1->nr.packed_keys = nr_packed;
|
||||
n1->nr.unpacked_keys = nr_unpacked;
|
||||
|
||||
btree_set_max(n1, bkey_unpack_pos(n1, prev));
|
||||
btree_set_min(n2, bkey_successor(n1->key.k.p));
|
||||
n1_pos = bkey_unpack_pos(n1, prev);
|
||||
if (as->c->sb.version < bcachefs_metadata_version_snapshot)
|
||||
n1_pos.snapshot = U32_MAX;
|
||||
|
||||
btree_set_max(n1, n1_pos);
|
||||
btree_set_min(n2, bpos_successor(n1->key.k.p));
|
||||
|
||||
bch2_bkey_format_init(&s);
|
||||
bch2_bkey_format_add_pos(&s, n2->data->min_key);
|
||||
|
@ -223,9 +223,17 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
BUG_ON(bch2_debug_check_bkeys &&
|
||||
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type));
|
||||
BUG_ON(bpos_cmp(i->k->k.p, i->iter->real_pos));
|
||||
if (bch2_debug_check_bkeys) {
|
||||
const char *invalid = bch2_bkey_invalid(c,
|
||||
bkey_i_to_s_c(i->k), i->bkey_type);
|
||||
if (invalid) {
|
||||
char buf[200];
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
|
||||
panic("invalid bkey %s on insert: %s\n", buf, invalid);
|
||||
}
|
||||
}
|
||||
BUG_ON(!i->is_extent && bpos_cmp(i->k->k.p, i->iter->real_pos));
|
||||
BUG_ON(i->level != i->iter->level);
|
||||
BUG_ON(i->btree_id != i->iter->btree_id);
|
||||
}
|
||||
|
@ -222,7 +222,9 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
|
||||
|
||||
bch2_trans_init(&trans, i->c, 0, 0);
|
||||
|
||||
iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
|
||||
iter = bch2_trans_get_iter(&trans, i->id, i->from,
|
||||
BTREE_ITER_PREFETCH|
|
||||
BTREE_ITER_ALL_SNAPSHOTS);
|
||||
k = bch2_btree_iter_peek(iter);
|
||||
|
||||
while (k.k && !(err = bkey_err(k))) {
|
||||
@ -290,7 +292,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
|
||||
* all nodes, meh
|
||||
*/
|
||||
i->from = bpos_cmp(POS_MAX, b->key.k.p)
|
||||
? bkey_successor(b->key.k.p)
|
||||
? bpos_successor(b->key.k.p)
|
||||
: b->key.k.p;
|
||||
|
||||
if (!i->size)
|
||||
|
@ -179,7 +179,8 @@ const char *bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
||||
if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
|
||||
return "value too big";
|
||||
|
||||
if (bp.v->min_key.snapshot)
|
||||
if (c->sb.version < bcachefs_metadata_version_snapshot &&
|
||||
bp.v->min_key.snapshot)
|
||||
return "invalid min_key.snapshot";
|
||||
|
||||
return bch2_bkey_ptrs_invalid(c, k);
|
||||
@ -211,8 +212,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
|
||||
btree_node_type_is_extents(btree_id) &&
|
||||
bkey_cmp(bp.v->min_key, POS_MIN))
|
||||
bp.v->min_key = write
|
||||
? bkey_predecessor(bp.v->min_key)
|
||||
: bkey_successor(bp.v->min_key);
|
||||
? bpos_nosnap_predecessor(bp.v->min_key)
|
||||
: bpos_nosnap_successor(bp.v->min_key);
|
||||
}
|
||||
|
||||
/* KEY_TYPE_extent: */
|
||||
|
@ -1318,6 +1318,7 @@ static int check_inode(struct btree_trans *trans,
|
||||
struct bkey_inode_buf p;
|
||||
|
||||
bch2_inode_pack(c, &p, &u);
|
||||
p.inode.k.p = iter->pos;
|
||||
|
||||
ret = __bch2_trans_do(trans, NULL, NULL,
|
||||
BTREE_INSERT_NOFAIL|
|
||||
|
@ -332,6 +332,7 @@ int bch2_inode_write(struct btree_trans *trans,
|
||||
return PTR_ERR(inode_p);
|
||||
|
||||
bch2_inode_pack(trans->c, inode_p, inode);
|
||||
inode_p->inode.k.p.snapshot = iter->snapshot;
|
||||
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
|
||||
return 0;
|
||||
}
|
||||
|
@ -332,6 +332,9 @@ int bch2_extent_update(struct btree_trans *trans,
|
||||
|
||||
if (i_sectors_delta || new_i_size) {
|
||||
bch2_inode_pack(trans->c, &inode_p, &inode_u);
|
||||
|
||||
inode_p.inode.k.p.snapshot = iter->snapshot;
|
||||
|
||||
bch2_trans_update(trans, inode_iter,
|
||||
&inode_p.inode.k_i, 0);
|
||||
}
|
||||
@ -447,6 +450,8 @@ int bch2_write_index_default(struct bch_write_op *op)
|
||||
|
||||
k = bch2_keylist_front(keys);
|
||||
|
||||
k->k.p.snapshot = iter->snapshot;
|
||||
|
||||
bch2_bkey_buf_realloc(&sk, c, k->k.u64s);
|
||||
bkey_copy(sk.k, k);
|
||||
bch2_cut_front(iter->pos, sk.k);
|
||||
|
@ -1449,7 +1449,7 @@ void bch2_journal_write(struct closure *cl)
|
||||
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
|
||||
validate_before_checksum = true;
|
||||
|
||||
if (le32_to_cpu(jset->version) <= bcachefs_metadata_version_inode_btree_change)
|
||||
if (le32_to_cpu(jset->version) < bcachefs_metadata_version_current)
|
||||
validate_before_checksum = true;
|
||||
|
||||
if (validate_before_checksum &&
|
||||
|
@ -998,6 +998,13 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) {
|
||||
bch_err(c, "filesystem may have incompatible bkey formats; run fsck from the compat branch to fix");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
|
||||
}
|
||||
|
||||
if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) {
|
||||
bch_info(c, "alloc_v2 feature bit not set, fsck required");
|
||||
c->opts.fsck = true;
|
||||
@ -1340,6 +1347,7 @@ int bch2_fs_initialize(struct bch_fs *c)
|
||||
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
|
||||
root_inode.bi_inum = BCACHEFS_ROOT_INO;
|
||||
bch2_inode_pack(c, &packed_inode, &root_inode);
|
||||
packed_inode.inode.k.p.snapshot = U32_MAX;
|
||||
|
||||
err = "error creating root directory";
|
||||
ret = bch2_btree_insert(c, BTREE_ID_inodes,
|
||||
|
@ -483,6 +483,7 @@ static int rand_insert(struct bch_fs *c, u64 nr)
|
||||
for (i = 0; i < nr; i++) {
|
||||
bkey_cookie_init(&k.k_i);
|
||||
k.k.p.offset = test_rand();
|
||||
k.k.p.snapshot = U32_MAX;
|
||||
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i));
|
||||
|
Loading…
x
Reference in New Issue
Block a user