bcachefs: stripe support for replicas tracking

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2018-10-30 14:32:47 -04:00 committed by Kent Overstreet
parent c258f28eba
commit af9d3bc203
7 changed files with 234 additions and 54 deletions

View File

@ -888,10 +888,11 @@ struct bch_sb_field {
x(journal, 0) \
x(members, 1) \
x(crypt, 2) \
x(replicas, 3) \
x(replicas_v0, 3) \
x(quota, 4) \
x(disk_groups, 5) \
x(clean, 6)
x(clean, 6) \
x(replicas, 7)
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
@ -1017,16 +1018,28 @@ enum bch_data_type {
BCH_DATA_NR = 6,
};
struct bch_replicas_entry {
struct bch_replicas_entry_v0 {
__u8 data_type;
__u8 nr_devs;
__u8 devs[];
};
} __attribute__((packed));
struct bch_sb_field_replicas_v0 {
struct bch_sb_field field;
struct bch_replicas_entry_v0 entries[];
} __attribute__((packed, aligned(8)));
struct bch_replicas_entry {
__u8 data_type;
__u8 nr_devs;
__u8 nr_required;
__u8 devs[];
} __attribute__((packed));
struct bch_sb_field_replicas {
struct bch_sb_field field;
struct bch_replicas_entry entries[];
};
} __attribute__((packed, aligned(8)));
/* BCH_SB_FIELD_quota: */

View File

@ -130,7 +130,8 @@ int bch2_fs_recovery(struct bch_fs *c)
int ret;
mutex_lock(&c->sb_lock);
if (!bch2_sb_get_replicas(c->disk_sb.sb)) {
if (!rcu_dereference_protected(c->replicas,
lockdep_is_held(&c->sb_lock))->nr) {
bch_info(c, "building replicas info");
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
}

View File

@ -45,7 +45,10 @@ static void replicas_entry_to_text(struct printbuf *out,
{
unsigned i;
pr_buf(out, "%u: [", e->data_type);
pr_buf(out, "%s: %u/%u [",
bch2_data_types[e->data_type],
e->nr_required,
e->nr_devs);
for (i = 0; i < e->nr_devs; i++)
pr_buf(out, i ? " %u" : "%u", e->devs[i]);
@ -75,6 +78,8 @@ static void extent_to_replicas(struct bkey_s_c k,
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
r->nr_required = 1;
extent_for_each_ptr_decode(e, p, entry)
if (!p.ptr.cached)
r->devs[r->nr_devs++] = p.ptr.dev;
@ -115,6 +120,7 @@ static inline void devlist_to_replicas(struct bch_devs_list devs,
e->data_type = data_type;
e->nr_devs = 0;
e->nr_required = 1;
for (i = 0; i < devs.nr; i++)
e->devs[e->nr_devs++] = devs.devs[i];
@ -359,9 +365,8 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
{
struct bch_replicas_entry *e, *dst;
struct bch_replicas_cpu *cpu_r;
unsigned nr = 0, entry_size = 0;
unsigned nr = 0, entry_size = 0, idx = 0;
if (sb_r)
for_each_replicas_entry(sb_r, e) {
entry_size = max_t(unsigned, entry_size,
replicas_entry_bytes(e));
@ -376,29 +381,71 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
cpu_r->nr = nr;
cpu_r->entry_size = entry_size;
nr = 0;
if (sb_r)
for_each_replicas_entry(sb_r, e) {
dst = cpu_replicas_entry(cpu_r, nr++);
dst = cpu_replicas_entry(cpu_r, idx++);
memcpy(dst, e, replicas_entry_bytes(e));
replicas_entry_sort(dst);
}
bch2_cpu_replicas_sort(cpu_r);
return cpu_r;
}
static struct bch_replicas_cpu *
__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r)
{
struct bch_replicas_entry_v0 *e;
struct bch_replicas_cpu *cpu_r;
unsigned nr = 0, entry_size = 0, idx = 0;
for_each_replicas_entry(sb_r, e) {
entry_size = max_t(unsigned, entry_size,
replicas_entry_bytes(e));
nr++;
}
entry_size += sizeof(struct bch_replicas_entry) -
sizeof(struct bch_replicas_entry_v0);
cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
nr * entry_size, GFP_NOIO);
if (!cpu_r)
return NULL;
cpu_r->nr = nr;
cpu_r->entry_size = entry_size;
for_each_replicas_entry(sb_r, e) {
struct bch_replicas_entry *dst =
cpu_replicas_entry(cpu_r, idx++);
dst->data_type = e->data_type;
dst->nr_devs = e->nr_devs;
dst->nr_required = 1;
memcpy(dst->devs, e->devs, e->nr_devs);
replicas_entry_sort(dst);
}
return cpu_r;
}
int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
{
struct bch_sb_field_replicas *sb_r;
struct bch_sb_field_replicas *sb_v1;
struct bch_sb_field_replicas_v0 *sb_v0;
struct bch_replicas_cpu *cpu_r, *old_r;
sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r);
if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb)))
cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_v1);
else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb)))
cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0);
else
cpu_r = kzalloc(sizeof(struct bch_replicas_cpu), GFP_NOIO);
if (!cpu_r)
return -ENOMEM;
bch2_cpu_replicas_sort(cpu_r);
old_r = rcu_dereference_check(c->replicas, lockdep_is_held(&c->sb_lock));
rcu_assign_pointer(c->replicas, cpu_r);
if (old_r)
@ -407,23 +454,72 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
return 0;
}
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
struct bch_replicas_cpu *r)
{
struct bch_sb_field_replicas *sb_r;
struct bch_replicas_entry *dst, *src;
struct bch_sb_field_replicas_v0 *sb_r;
struct bch_replicas_entry_v0 *dst;
struct bch_replicas_entry *src;
size_t bytes;
bytes = sizeof(struct bch_sb_field_replicas);
for_each_cpu_replicas_entry(r, src)
bytes += replicas_entry_bytes(src) - 1;
sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb,
DIV_ROUND_UP(bytes, sizeof(u64)));
if (!sb_r)
return -ENOSPC;
bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb);
memset(&sb_r->entries, 0,
vstruct_end(&sb_r->field) -
(void *) &sb_r->entries);
dst = sb_r->entries;
for_each_cpu_replicas_entry(r, src) {
dst->data_type = src->data_type;
dst->nr_devs = src->nr_devs;
memcpy(dst->devs, src->devs, src->nr_devs);
dst = replicas_entry_next(dst);
BUG_ON((void *) dst > vstruct_end(&sb_r->field));
}
return 0;
}
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
struct bch_replicas_cpu *r)
{
struct bch_sb_field_replicas *sb_r;
struct bch_replicas_entry *dst, *src;
bool need_v1 = false;
size_t bytes;
bytes = sizeof(struct bch_sb_field_replicas);
for_each_cpu_replicas_entry(r, src) {
bytes += replicas_entry_bytes(src);
if (src->nr_required != 1)
need_v1 = true;
}
if (!need_v1)
return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
sb_r = bch2_sb_resize_replicas(&c->disk_sb,
DIV_ROUND_UP(bytes, sizeof(u64)));
if (!sb_r)
return -ENOSPC;
bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
memset(&sb_r->entries, 0,
vstruct_end(&sb_r->field) -
(void *) &sb_r->entries);
@ -482,8 +578,10 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
if (!e->nr_devs)
goto err;
err = "invalid replicas entry: too many devices";
if (e->nr_devs >= BCH_REPLICAS_MAX)
err = "invalid replicas entry: bad nr_required";
if (!e->nr_required ||
(e->nr_required > 1 &&
e->nr_required >= e->nr_devs))
goto err;
err = "invalid replicas entry: invalid device";
@ -525,6 +623,45 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
.to_text = bch2_sb_replicas_to_text,
};
static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f)
{
struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
struct bch_replicas_cpu *cpu_r = NULL;
struct bch_replicas_entry_v0 *e;
const char *err;
unsigned i;
for_each_replicas_entry_v0(sb_r, e) {
err = "invalid replicas entry: invalid data type";
if (e->data_type >= BCH_DATA_NR)
goto err;
err = "invalid replicas entry: no devices";
if (!e->nr_devs)
goto err;
err = "invalid replicas entry: invalid device";
for (i = 0; i < e->nr_devs; i++)
if (!bch2_dev_exists(sb, mi, e->devs[i]))
goto err;
}
err = "cannot allocate memory";
cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r);
if (!cpu_r)
goto err;
err = check_dup_replicas_entries(cpu_r);
err:
kfree(cpu_r);
return err;
}
const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
.validate = bch2_sb_validate_replicas_v0,
};
/* Query replicas: */
bool bch2_replicas_marked(struct bch_fs *c,
@ -591,7 +728,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
memset(&ret, 0, sizeof(ret));
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
ret.replicas[i].nr_online = UINT_MAX;
ret.replicas[i].redundancy = INT_MAX;
mi = bch2_sb_get_members(c->disk_sb.sb);
rcu_read_lock();
@ -613,9 +750,9 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
nr_offline++;
}
ret.replicas[e->data_type].nr_online =
min(ret.replicas[e->data_type].nr_online,
nr_online);
ret.replicas[e->data_type].redundancy =
min(ret.replicas[e->data_type].redundancy,
(int) nr_online - (int) e->nr_required);
ret.replicas[e->data_type].nr_offline =
max(ret.replicas[e->data_type].nr_offline,
@ -624,6 +761,10 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
rcu_read_unlock();
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
if (ret.replicas[i].redundancy == INT_MAX)
ret.replicas[i].redundancy = 0;
return ret;
}
@ -638,7 +779,7 @@ static bool have_enough_devs(struct replicas_status s,
bool force_if_lost)
{
return (!s.replicas[type].nr_offline || force_if_degraded) &&
(s.replicas[type].nr_online || force_if_lost);
(s.replicas[type].redundancy >= 0 || force_if_lost);
}
bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
@ -654,14 +795,14 @@ bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
flags & BCH_FORCE_IF_DATA_LOST));
}
unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
int bch2_replicas_online(struct bch_fs *c, bool meta)
{
struct replicas_status s = bch2_replicas_status(c);
return meta
? min(s.replicas[BCH_DATA_JOURNAL].nr_online,
s.replicas[BCH_DATA_BTREE].nr_online)
: s.replicas[BCH_DATA_USER].nr_online;
return (meta
? min(s.replicas[BCH_DATA_JOURNAL].redundancy,
s.replicas[BCH_DATA_BTREE].redundancy)
: s.replicas[BCH_DATA_USER].redundancy) + 1;
}
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)

View File

@ -17,7 +17,7 @@ void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
struct replicas_status {
struct {
unsigned nr_online;
int redundancy;
unsigned nr_offline;
} replicas[BCH_DATA_NR];
};
@ -27,7 +27,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *,
struct replicas_status bch2_replicas_status(struct bch_fs *);
bool bch2_have_enough_devs(struct replicas_status, unsigned);
unsigned bch2_replicas_online(struct bch_fs *, bool);
int bch2_replicas_online(struct bch_fs *, bool);
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
int bch2_replicas_gc_end(struct bch_fs *, int);
@ -46,8 +46,14 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned);
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
(_i) = replicas_entry_next(_i))
#define for_each_replicas_entry_v0(_r, _i) \
for (_i = (_r)->entries; \
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
(_i) = replicas_entry_next(_i))
int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
#endif /* _BCACHEFS_REPLICAS_H */

View File

@ -60,8 +60,13 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
void *src, *dst;
src = vstruct_end(f);
if (u64s) {
f->u64s = cpu_to_le32(u64s);
dst = vstruct_end(f);
} else {
dst = f;
}
memmove(dst, src, vstruct_end(sb->sb) - src);
@ -71,7 +76,16 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
sb->sb->u64s = cpu_to_le32(sb_u64s);
return f;
return u64s ? f : NULL;
}
void bch2_sb_field_delete(struct bch_sb_handle *sb,
enum bch_sb_field_type type)
{
struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
if (f)
__bch2_sb_field_resize(sb, f, 0);
}
/* Superblock realloc/free: */
@ -174,6 +188,7 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
}
f = __bch2_sb_field_resize(sb, f, u64s);
if (f)
f->type = cpu_to_le32(type);
return f;
}
@ -366,6 +381,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
{
struct bch_sb_field *src_f, *dst_f;
struct bch_sb *dst = dst_handle->sb;
unsigned i;
dst->version = src->version;
dst->seq = src->seq;
@ -384,14 +400,16 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
memcpy(dst->features, src->features, sizeof(dst->features));
memcpy(dst->compat, src->compat, sizeof(dst->compat));
vstruct_for_each(src, src_f) {
if (src_f->type == BCH_SB_FIELD_journal)
for (i = 0; i < BCH_SB_FIELD_NR; i++) {
if (i == BCH_SB_FIELD_journal)
continue;
dst_f = bch2_sb_field_get(dst, le32_to_cpu(src_f->type));
src_f = bch2_sb_field_get(src, i);
dst_f = bch2_sb_field_get(dst, i);
dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
le32_to_cpu(src_f->u64s));
src_f ? le32_to_cpu(src_f->u64s) : 0);
if (src_f)
memcpy(dst_f, src_f, vstruct_bytes(src_f));
}
}

View File

@ -12,6 +12,7 @@
struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
enum bch_sb_field_type, unsigned);
void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
#define field_to_type(_f, _name) \
container_of_or_null(_f, struct bch_sb_field_##_name, field)

View File

@ -346,8 +346,8 @@ SHOW(bch2_fs)
sysfs_print(promote_whole_extents, c->promote_whole_extents);
sysfs_printf(meta_replicas_have, "%u", bch2_replicas_online(c, true));
sysfs_printf(data_replicas_have, "%u", bch2_replicas_online(c, false));
sysfs_printf(meta_replicas_have, "%i", bch2_replicas_online(c, true));
sysfs_printf(data_replicas_have, "%i", bch2_replicas_online(c, false));
/* Debugging: */