bcachefs: stripe support for replicas tracking
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
c258f28eba
commit
af9d3bc203
@ -888,10 +888,11 @@ struct bch_sb_field {
|
||||
x(journal, 0) \
|
||||
x(members, 1) \
|
||||
x(crypt, 2) \
|
||||
x(replicas, 3) \
|
||||
x(replicas_v0, 3) \
|
||||
x(quota, 4) \
|
||||
x(disk_groups, 5) \
|
||||
x(clean, 6)
|
||||
x(clean, 6) \
|
||||
x(replicas, 7)
|
||||
|
||||
enum bch_sb_field_type {
|
||||
#define x(f, nr) BCH_SB_FIELD_##f = nr,
|
||||
@ -1017,16 +1018,28 @@ enum bch_data_type {
|
||||
BCH_DATA_NR = 6,
|
||||
};
|
||||
|
||||
struct bch_replicas_entry {
|
||||
struct bch_replicas_entry_v0 {
|
||||
__u8 data_type;
|
||||
__u8 nr_devs;
|
||||
__u8 devs[];
|
||||
};
|
||||
} __attribute__((packed));
|
||||
|
||||
struct bch_sb_field_replicas_v0 {
|
||||
struct bch_sb_field field;
|
||||
struct bch_replicas_entry_v0 entries[];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
struct bch_replicas_entry {
|
||||
__u8 data_type;
|
||||
__u8 nr_devs;
|
||||
__u8 nr_required;
|
||||
__u8 devs[];
|
||||
} __attribute__((packed));
|
||||
|
||||
struct bch_sb_field_replicas {
|
||||
struct bch_sb_field field;
|
||||
struct bch_replicas_entry entries[];
|
||||
};
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
/* BCH_SB_FIELD_quota: */
|
||||
|
||||
|
@ -130,7 +130,8 @@ int bch2_fs_recovery(struct bch_fs *c)
|
||||
int ret;
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
if (!bch2_sb_get_replicas(c->disk_sb.sb)) {
|
||||
if (!rcu_dereference_protected(c->replicas,
|
||||
lockdep_is_held(&c->sb_lock))->nr) {
|
||||
bch_info(c, "building replicas info");
|
||||
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
|
||||
}
|
||||
|
@ -45,7 +45,10 @@ static void replicas_entry_to_text(struct printbuf *out,
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
pr_buf(out, "%u: [", e->data_type);
|
||||
pr_buf(out, "%s: %u/%u [",
|
||||
bch2_data_types[e->data_type],
|
||||
e->nr_required,
|
||||
e->nr_devs);
|
||||
|
||||
for (i = 0; i < e->nr_devs; i++)
|
||||
pr_buf(out, i ? " %u" : "%u", e->devs[i]);
|
||||
@ -75,6 +78,8 @@ static void extent_to_replicas(struct bkey_s_c k,
|
||||
const union bch_extent_entry *entry;
|
||||
struct extent_ptr_decoded p;
|
||||
|
||||
r->nr_required = 1;
|
||||
|
||||
extent_for_each_ptr_decode(e, p, entry)
|
||||
if (!p.ptr.cached)
|
||||
r->devs[r->nr_devs++] = p.ptr.dev;
|
||||
@ -115,6 +120,7 @@ static inline void devlist_to_replicas(struct bch_devs_list devs,
|
||||
|
||||
e->data_type = data_type;
|
||||
e->nr_devs = 0;
|
||||
e->nr_required = 1;
|
||||
|
||||
for (i = 0; i < devs.nr; i++)
|
||||
e->devs[e->nr_devs++] = devs.devs[i];
|
||||
@ -359,9 +365,8 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
|
||||
{
|
||||
struct bch_replicas_entry *e, *dst;
|
||||
struct bch_replicas_cpu *cpu_r;
|
||||
unsigned nr = 0, entry_size = 0;
|
||||
unsigned nr = 0, entry_size = 0, idx = 0;
|
||||
|
||||
if (sb_r)
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
entry_size = max_t(unsigned, entry_size,
|
||||
replicas_entry_bytes(e));
|
||||
@ -376,29 +381,71 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
|
||||
cpu_r->nr = nr;
|
||||
cpu_r->entry_size = entry_size;
|
||||
|
||||
nr = 0;
|
||||
|
||||
if (sb_r)
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
dst = cpu_replicas_entry(cpu_r, nr++);
|
||||
dst = cpu_replicas_entry(cpu_r, idx++);
|
||||
memcpy(dst, e, replicas_entry_bytes(e));
|
||||
replicas_entry_sort(dst);
|
||||
}
|
||||
|
||||
bch2_cpu_replicas_sort(cpu_r);
|
||||
return cpu_r;
|
||||
}
|
||||
|
||||
static struct bch_replicas_cpu *
|
||||
__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r)
|
||||
{
|
||||
struct bch_replicas_entry_v0 *e;
|
||||
struct bch_replicas_cpu *cpu_r;
|
||||
unsigned nr = 0, entry_size = 0, idx = 0;
|
||||
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
entry_size = max_t(unsigned, entry_size,
|
||||
replicas_entry_bytes(e));
|
||||
nr++;
|
||||
}
|
||||
|
||||
entry_size += sizeof(struct bch_replicas_entry) -
|
||||
sizeof(struct bch_replicas_entry_v0);
|
||||
|
||||
cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
|
||||
nr * entry_size, GFP_NOIO);
|
||||
if (!cpu_r)
|
||||
return NULL;
|
||||
|
||||
cpu_r->nr = nr;
|
||||
cpu_r->entry_size = entry_size;
|
||||
|
||||
for_each_replicas_entry(sb_r, e) {
|
||||
struct bch_replicas_entry *dst =
|
||||
cpu_replicas_entry(cpu_r, idx++);
|
||||
|
||||
dst->data_type = e->data_type;
|
||||
dst->nr_devs = e->nr_devs;
|
||||
dst->nr_required = 1;
|
||||
memcpy(dst->devs, e->devs, e->nr_devs);
|
||||
replicas_entry_sort(dst);
|
||||
}
|
||||
|
||||
return cpu_r;
|
||||
}
|
||||
|
||||
int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
|
||||
{
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_sb_field_replicas *sb_v1;
|
||||
struct bch_sb_field_replicas_v0 *sb_v0;
|
||||
struct bch_replicas_cpu *cpu_r, *old_r;
|
||||
|
||||
sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
|
||||
cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r);
|
||||
if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb)))
|
||||
cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_v1);
|
||||
else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb)))
|
||||
cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0);
|
||||
else
|
||||
cpu_r = kzalloc(sizeof(struct bch_replicas_cpu), GFP_NOIO);
|
||||
|
||||
if (!cpu_r)
|
||||
return -ENOMEM;
|
||||
|
||||
bch2_cpu_replicas_sort(cpu_r);
|
||||
|
||||
old_r = rcu_dereference_check(c->replicas, lockdep_is_held(&c->sb_lock));
|
||||
rcu_assign_pointer(c->replicas, cpu_r);
|
||||
if (old_r)
|
||||
@ -407,23 +454,72 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
|
||||
static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *r)
|
||||
{
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_entry *dst, *src;
|
||||
struct bch_sb_field_replicas_v0 *sb_r;
|
||||
struct bch_replicas_entry_v0 *dst;
|
||||
struct bch_replicas_entry *src;
|
||||
size_t bytes;
|
||||
|
||||
bytes = sizeof(struct bch_sb_field_replicas);
|
||||
|
||||
for_each_cpu_replicas_entry(r, src)
|
||||
bytes += replicas_entry_bytes(src) - 1;
|
||||
|
||||
sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb,
|
||||
DIV_ROUND_UP(bytes, sizeof(u64)));
|
||||
if (!sb_r)
|
||||
return -ENOSPC;
|
||||
|
||||
bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
|
||||
sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb);
|
||||
|
||||
memset(&sb_r->entries, 0,
|
||||
vstruct_end(&sb_r->field) -
|
||||
(void *) &sb_r->entries);
|
||||
|
||||
dst = sb_r->entries;
|
||||
for_each_cpu_replicas_entry(r, src) {
|
||||
dst->data_type = src->data_type;
|
||||
dst->nr_devs = src->nr_devs;
|
||||
memcpy(dst->devs, src->devs, src->nr_devs);
|
||||
|
||||
dst = replicas_entry_next(dst);
|
||||
|
||||
BUG_ON((void *) dst > vstruct_end(&sb_r->field));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
|
||||
struct bch_replicas_cpu *r)
|
||||
{
|
||||
struct bch_sb_field_replicas *sb_r;
|
||||
struct bch_replicas_entry *dst, *src;
|
||||
bool need_v1 = false;
|
||||
size_t bytes;
|
||||
|
||||
bytes = sizeof(struct bch_sb_field_replicas);
|
||||
|
||||
for_each_cpu_replicas_entry(r, src) {
|
||||
bytes += replicas_entry_bytes(src);
|
||||
if (src->nr_required != 1)
|
||||
need_v1 = true;
|
||||
}
|
||||
|
||||
if (!need_v1)
|
||||
return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
|
||||
|
||||
sb_r = bch2_sb_resize_replicas(&c->disk_sb,
|
||||
DIV_ROUND_UP(bytes, sizeof(u64)));
|
||||
if (!sb_r)
|
||||
return -ENOSPC;
|
||||
|
||||
bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
|
||||
sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
|
||||
|
||||
memset(&sb_r->entries, 0,
|
||||
vstruct_end(&sb_r->field) -
|
||||
(void *) &sb_r->entries);
|
||||
@ -482,8 +578,10 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
|
||||
if (!e->nr_devs)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: too many devices";
|
||||
if (e->nr_devs >= BCH_REPLICAS_MAX)
|
||||
err = "invalid replicas entry: bad nr_required";
|
||||
if (!e->nr_required ||
|
||||
(e->nr_required > 1 &&
|
||||
e->nr_required >= e->nr_devs))
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: invalid device";
|
||||
@ -525,6 +623,45 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
|
||||
.to_text = bch2_sb_replicas_to_text,
|
||||
};
|
||||
|
||||
static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f)
|
||||
{
|
||||
struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
|
||||
struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
|
||||
struct bch_replicas_cpu *cpu_r = NULL;
|
||||
struct bch_replicas_entry_v0 *e;
|
||||
const char *err;
|
||||
unsigned i;
|
||||
|
||||
for_each_replicas_entry_v0(sb_r, e) {
|
||||
err = "invalid replicas entry: invalid data type";
|
||||
if (e->data_type >= BCH_DATA_NR)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: no devices";
|
||||
if (!e->nr_devs)
|
||||
goto err;
|
||||
|
||||
err = "invalid replicas entry: invalid device";
|
||||
for (i = 0; i < e->nr_devs; i++)
|
||||
if (!bch2_dev_exists(sb, mi, e->devs[i]))
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = "cannot allocate memory";
|
||||
cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r);
|
||||
if (!cpu_r)
|
||||
goto err;
|
||||
|
||||
err = check_dup_replicas_entries(cpu_r);
|
||||
err:
|
||||
kfree(cpu_r);
|
||||
return err;
|
||||
}
|
||||
|
||||
const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
|
||||
.validate = bch2_sb_validate_replicas_v0,
|
||||
};
|
||||
|
||||
/* Query replicas: */
|
||||
|
||||
bool bch2_replicas_marked(struct bch_fs *c,
|
||||
@ -591,7 +728,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
memset(&ret, 0, sizeof(ret));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
|
||||
ret.replicas[i].nr_online = UINT_MAX;
|
||||
ret.replicas[i].redundancy = INT_MAX;
|
||||
|
||||
mi = bch2_sb_get_members(c->disk_sb.sb);
|
||||
rcu_read_lock();
|
||||
@ -613,9 +750,9 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
nr_offline++;
|
||||
}
|
||||
|
||||
ret.replicas[e->data_type].nr_online =
|
||||
min(ret.replicas[e->data_type].nr_online,
|
||||
nr_online);
|
||||
ret.replicas[e->data_type].redundancy =
|
||||
min(ret.replicas[e->data_type].redundancy,
|
||||
(int) nr_online - (int) e->nr_required);
|
||||
|
||||
ret.replicas[e->data_type].nr_offline =
|
||||
max(ret.replicas[e->data_type].nr_offline,
|
||||
@ -624,6 +761,10 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
|
||||
if (ret.replicas[i].redundancy == INT_MAX)
|
||||
ret.replicas[i].redundancy = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -638,7 +779,7 @@ static bool have_enough_devs(struct replicas_status s,
|
||||
bool force_if_lost)
|
||||
{
|
||||
return (!s.replicas[type].nr_offline || force_if_degraded) &&
|
||||
(s.replicas[type].nr_online || force_if_lost);
|
||||
(s.replicas[type].redundancy >= 0 || force_if_lost);
|
||||
}
|
||||
|
||||
bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
|
||||
@ -654,14 +795,14 @@ bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
|
||||
flags & BCH_FORCE_IF_DATA_LOST));
|
||||
}
|
||||
|
||||
unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
|
||||
int bch2_replicas_online(struct bch_fs *c, bool meta)
|
||||
{
|
||||
struct replicas_status s = bch2_replicas_status(c);
|
||||
|
||||
return meta
|
||||
? min(s.replicas[BCH_DATA_JOURNAL].nr_online,
|
||||
s.replicas[BCH_DATA_BTREE].nr_online)
|
||||
: s.replicas[BCH_DATA_USER].nr_online;
|
||||
return (meta
|
||||
? min(s.replicas[BCH_DATA_JOURNAL].redundancy,
|
||||
s.replicas[BCH_DATA_BTREE].redundancy)
|
||||
: s.replicas[BCH_DATA_USER].redundancy) + 1;
|
||||
}
|
||||
|
||||
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
|
||||
|
@ -17,7 +17,7 @@ void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
|
||||
|
||||
struct replicas_status {
|
||||
struct {
|
||||
unsigned nr_online;
|
||||
int redundancy;
|
||||
unsigned nr_offline;
|
||||
} replicas[BCH_DATA_NR];
|
||||
};
|
||||
@ -27,7 +27,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *,
|
||||
struct replicas_status bch2_replicas_status(struct bch_fs *);
|
||||
bool bch2_have_enough_devs(struct replicas_status, unsigned);
|
||||
|
||||
unsigned bch2_replicas_online(struct bch_fs *, bool);
|
||||
int bch2_replicas_online(struct bch_fs *, bool);
|
||||
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
|
||||
|
||||
int bch2_replicas_gc_end(struct bch_fs *, int);
|
||||
@ -46,8 +46,14 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned);
|
||||
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
|
||||
(_i) = replicas_entry_next(_i))
|
||||
|
||||
#define for_each_replicas_entry_v0(_r, _i) \
|
||||
for (_i = (_r)->entries; \
|
||||
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
|
||||
(_i) = replicas_entry_next(_i))
|
||||
|
||||
int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
|
||||
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
|
||||
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
|
||||
|
||||
#endif /* _BCACHEFS_REPLICAS_H */
|
||||
|
@ -60,8 +60,13 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
|
||||
void *src, *dst;
|
||||
|
||||
src = vstruct_end(f);
|
||||
|
||||
if (u64s) {
|
||||
f->u64s = cpu_to_le32(u64s);
|
||||
dst = vstruct_end(f);
|
||||
} else {
|
||||
dst = f;
|
||||
}
|
||||
|
||||
memmove(dst, src, vstruct_end(sb->sb) - src);
|
||||
|
||||
@ -71,7 +76,16 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
|
||||
|
||||
sb->sb->u64s = cpu_to_le32(sb_u64s);
|
||||
|
||||
return f;
|
||||
return u64s ? f : NULL;
|
||||
}
|
||||
|
||||
void bch2_sb_field_delete(struct bch_sb_handle *sb,
|
||||
enum bch_sb_field_type type)
|
||||
{
|
||||
struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
|
||||
|
||||
if (f)
|
||||
__bch2_sb_field_resize(sb, f, 0);
|
||||
}
|
||||
|
||||
/* Superblock realloc/free: */
|
||||
@ -174,6 +188,7 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
|
||||
}
|
||||
|
||||
f = __bch2_sb_field_resize(sb, f, u64s);
|
||||
if (f)
|
||||
f->type = cpu_to_le32(type);
|
||||
return f;
|
||||
}
|
||||
@ -366,6 +381,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
|
||||
{
|
||||
struct bch_sb_field *src_f, *dst_f;
|
||||
struct bch_sb *dst = dst_handle->sb;
|
||||
unsigned i;
|
||||
|
||||
dst->version = src->version;
|
||||
dst->seq = src->seq;
|
||||
@ -384,14 +400,16 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
|
||||
memcpy(dst->features, src->features, sizeof(dst->features));
|
||||
memcpy(dst->compat, src->compat, sizeof(dst->compat));
|
||||
|
||||
vstruct_for_each(src, src_f) {
|
||||
if (src_f->type == BCH_SB_FIELD_journal)
|
||||
for (i = 0; i < BCH_SB_FIELD_NR; i++) {
|
||||
if (i == BCH_SB_FIELD_journal)
|
||||
continue;
|
||||
|
||||
dst_f = bch2_sb_field_get(dst, le32_to_cpu(src_f->type));
|
||||
src_f = bch2_sb_field_get(src, i);
|
||||
dst_f = bch2_sb_field_get(dst, i);
|
||||
dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
|
||||
le32_to_cpu(src_f->u64s));
|
||||
src_f ? le32_to_cpu(src_f->u64s) : 0);
|
||||
|
||||
if (src_f)
|
||||
memcpy(dst_f, src_f, vstruct_bytes(src_f));
|
||||
}
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
|
||||
struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
|
||||
enum bch_sb_field_type, unsigned);
|
||||
void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
|
||||
|
||||
#define field_to_type(_f, _name) \
|
||||
container_of_or_null(_f, struct bch_sb_field_##_name, field)
|
||||
|
@ -346,8 +346,8 @@ SHOW(bch2_fs)
|
||||
|
||||
sysfs_print(promote_whole_extents, c->promote_whole_extents);
|
||||
|
||||
sysfs_printf(meta_replicas_have, "%u", bch2_replicas_online(c, true));
|
||||
sysfs_printf(data_replicas_have, "%u", bch2_replicas_online(c, false));
|
||||
sysfs_printf(meta_replicas_have, "%i", bch2_replicas_online(c, true));
|
||||
sysfs_printf(data_replicas_have, "%i", bch2_replicas_online(c, false));
|
||||
|
||||
/* Debugging: */
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user