bcachefs: Fsck for reflink refcounts
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
parent
c0ebe3e48c
commit
890b74f03d
@ -391,6 +391,14 @@ struct gc_pos {
|
||||
unsigned level;
|
||||
};
|
||||
|
||||
struct reflink_gc {
|
||||
u64 offset;
|
||||
u32 size;
|
||||
u32 refcount;
|
||||
};
|
||||
|
||||
typedef GENRADIX(struct reflink_gc) reflink_gc_table;
|
||||
|
||||
struct io_count {
|
||||
u64 sectors[2][BCH_DATA_NR];
|
||||
};
|
||||
@ -806,6 +814,9 @@ mempool_t bio_bounce_pages;
|
||||
|
||||
/* REFLINK */
|
||||
u64 reflink_hint;
|
||||
reflink_gc_table reflink_gc_table;
|
||||
size_t reflink_gc_nr;
|
||||
size_t reflink_gc_idx;
|
||||
|
||||
/* VFS IO PATH - fs-io.c */
|
||||
struct bio_set writepage_bioset;
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "keylist.h"
|
||||
#include "move.h"
|
||||
#include "recovery.h"
|
||||
#include "reflink.h"
|
||||
#include "replicas.h"
|
||||
#include "super-io.h"
|
||||
#include "trace.h"
|
||||
@ -1285,6 +1286,201 @@ static int bch2_gc_start(struct bch_fs *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_done_initial_fn(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
struct reflink_gc *r;
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
char buf[200];
|
||||
int ret = 0;
|
||||
|
||||
if (!refcount)
|
||||
return 0;
|
||||
|
||||
r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++);
|
||||
if (!r)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!r ||
|
||||
r->offset != k.k->p.offset ||
|
||||
r->size != k.k->size) {
|
||||
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
|
||||
"reflink key has wrong refcount:\n"
|
||||
" %s\n"
|
||||
" should be %u",
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
||||
r->refcount)) {
|
||||
struct bkey_i *new;
|
||||
|
||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
goto fsck_err;
|
||||
}
|
||||
|
||||
bkey_reassemble(new, k);
|
||||
|
||||
if (!r->refcount) {
|
||||
new->k.type = KEY_TYPE_deleted;
|
||||
new->k.size = 0;
|
||||
} else {
|
||||
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
||||
}
|
||||
|
||||
ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new);
|
||||
if (ret)
|
||||
kfree(new);
|
||||
}
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
struct reflink_gc *r;
|
||||
size_t idx = 0;
|
||||
char buf[200];
|
||||
int ret = 0;
|
||||
|
||||
if (metadata_only)
|
||||
return 0;
|
||||
|
||||
if (initial) {
|
||||
c->reflink_gc_idx = 0;
|
||||
|
||||
ret = bch2_btree_and_journal_walk(c, BTREE_ID_reflink,
|
||||
bch2_gc_reflink_done_initial_fn);
|
||||
goto out;
|
||||
}
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
|
||||
if (!refcount)
|
||||
continue;
|
||||
|
||||
r = genradix_ptr(&c->reflink_gc_table, idx);
|
||||
if (!r ||
|
||||
r->offset != k.k->p.offset ||
|
||||
r->size != k.k->size) {
|
||||
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
|
||||
"reflink key has wrong refcount:\n"
|
||||
" %s\n"
|
||||
" should be %u",
|
||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
||||
r->refcount)) {
|
||||
struct bkey_i *new;
|
||||
|
||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
bkey_reassemble(new, k);
|
||||
|
||||
if (!r->refcount)
|
||||
new->k.type = KEY_TYPE_deleted;
|
||||
else
|
||||
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
||||
|
||||
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||
__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
|
||||
kfree(new);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
bch2_trans_iter_put(&trans, iter);
|
||||
bch2_trans_exit(&trans);
|
||||
out:
|
||||
genradix_free(&c->reflink_gc_table);
|
||||
c->reflink_gc_nr = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_start_initial_fn(struct bch_fs *c, struct bkey_s_c k)
|
||||
{
|
||||
|
||||
struct reflink_gc *r;
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
|
||||
if (!refcount)
|
||||
return 0;
|
||||
|
||||
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
|
||||
GFP_KERNEL);
|
||||
if (!r)
|
||||
return -ENOMEM;
|
||||
|
||||
r->offset = k.k->p.offset;
|
||||
r->size = k.k->size;
|
||||
r->refcount = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
||||
bool metadata_only)
|
||||
{
|
||||
struct btree_trans trans;
|
||||
struct btree_iter *iter;
|
||||
struct bkey_s_c k;
|
||||
struct reflink_gc *r;
|
||||
int ret;
|
||||
|
||||
if (metadata_only)
|
||||
return 0;
|
||||
|
||||
genradix_free(&c->reflink_gc_table);
|
||||
c->reflink_gc_nr = 0;
|
||||
|
||||
if (initial)
|
||||
return bch2_btree_and_journal_walk(c, BTREE_ID_reflink,
|
||||
bch2_gc_reflink_start_initial_fn);
|
||||
|
||||
bch2_trans_init(&trans, c, 0, 0);
|
||||
|
||||
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||
BTREE_ITER_PREFETCH, k, ret) {
|
||||
const __le64 *refcount = bkey_refcount_c(k);
|
||||
|
||||
if (!refcount)
|
||||
continue;
|
||||
|
||||
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
|
||||
GFP_KERNEL);
|
||||
if (!r) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
r->offset = k.k->p.offset;
|
||||
r->size = k.k->size;
|
||||
r->refcount = 0;
|
||||
}
|
||||
bch2_trans_iter_put(&trans, iter);
|
||||
|
||||
bch2_trans_exit(&trans);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* bch2_gc - walk _all_ references to buckets, and recompute them:
|
||||
*
|
||||
@ -1319,7 +1515,8 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
|
||||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c));
|
||||
again:
|
||||
ret = bch2_gc_start(c, metadata_only);
|
||||
ret = bch2_gc_start(c, metadata_only) ?:
|
||||
bch2_gc_reflink_start(c, initial, metadata_only);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -1381,7 +1578,8 @@ out:
|
||||
bch2_journal_block(&c->journal);
|
||||
|
||||
percpu_down_write(&c->mark_lock);
|
||||
ret = bch2_gc_done(c, initial, metadata_only);
|
||||
ret = bch2_gc_reflink_done(c, initial, metadata_only) ?:
|
||||
bch2_gc_done(c, initial, metadata_only);
|
||||
|
||||
bch2_journal_unblock(&c->journal);
|
||||
} else {
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "ec.h"
|
||||
#include "error.h"
|
||||
#include "movinggc.h"
|
||||
#include "reflink.h"
|
||||
#include "replicas.h"
|
||||
#include "trace.h"
|
||||
|
||||
@ -1076,6 +1077,124 @@ static int bch2_mark_stripe(struct bch_fs *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __reflink_p_frag_references(struct bkey_s_c_reflink_p p,
|
||||
u64 p_start, u64 p_end,
|
||||
u64 v_start, u64 v_end)
|
||||
{
|
||||
if (p_start == p_end)
|
||||
return false;
|
||||
|
||||
p_start += le64_to_cpu(p.v->idx);
|
||||
p_end += le64_to_cpu(p.v->idx);
|
||||
|
||||
if (p_end <= v_start)
|
||||
return false;
|
||||
if (p_start >= v_end)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int reflink_p_frag_references(struct bkey_s_c_reflink_p p,
|
||||
u64 start, u64 end,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
return __reflink_p_frag_references(p, start, end,
|
||||
bkey_start_offset(k.k),
|
||||
k.k->p.offset);
|
||||
}
|
||||
|
||||
static int __bch2_mark_reflink_p(struct bch_fs *c,
|
||||
struct bkey_s_c_reflink_p p,
|
||||
u64 idx, unsigned sectors,
|
||||
unsigned front_frag,
|
||||
unsigned back_frag,
|
||||
unsigned flags,
|
||||
size_t *r_idx)
|
||||
{
|
||||
struct reflink_gc *r;
|
||||
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
|
||||
int frags_referenced;
|
||||
|
||||
while (1) {
|
||||
if (*r_idx >= c->reflink_gc_nr)
|
||||
goto not_found;
|
||||
r = genradix_ptr(&c->reflink_gc_table, *r_idx);
|
||||
BUG_ON(!r);
|
||||
|
||||
if (r->offset > idx)
|
||||
break;
|
||||
(*r_idx)++;
|
||||
}
|
||||
|
||||
frags_referenced =
|
||||
__reflink_p_frag_references(p, 0, front_frag,
|
||||
r->offset - r->size, r->offset) +
|
||||
__reflink_p_frag_references(p, back_frag, p.k->size,
|
||||
r->offset - r->size, r->offset);
|
||||
|
||||
if (frags_referenced == 2) {
|
||||
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE_SPLIT));
|
||||
add = -add;
|
||||
} else if (frags_referenced == 1) {
|
||||
BUG_ON(!(flags & BTREE_TRIGGER_OVERWRITE));
|
||||
add = 0;
|
||||
}
|
||||
|
||||
BUG_ON((s64) r->refcount + add < 0);
|
||||
|
||||
r->refcount += add;
|
||||
return min_t(u64, sectors, r->offset - idx);
|
||||
not_found:
|
||||
bch2_fs_inconsistent(c,
|
||||
"%llu:%llu len %u points to nonexistent indirect extent %llu",
|
||||
p.k->p.inode, p.k->p.offset, p.k->size, idx);
|
||||
bch2_inconsistent_error(c);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int bch2_mark_reflink_p(struct bch_fs *c,
|
||||
struct bkey_s_c_reflink_p p, unsigned offset,
|
||||
s64 sectors, unsigned flags)
|
||||
{
|
||||
u64 idx = le64_to_cpu(p.v->idx) + offset;
|
||||
struct reflink_gc *ref;
|
||||
size_t l, r, m;
|
||||
unsigned front_frag, back_frag;
|
||||
s64 ret = 0;
|
||||
|
||||
if (sectors < 0)
|
||||
sectors = -sectors;
|
||||
|
||||
BUG_ON(offset + sectors > p.k->size);
|
||||
|
||||
front_frag = offset;
|
||||
back_frag = offset + sectors;
|
||||
|
||||
l = 0;
|
||||
r = c->reflink_gc_nr;
|
||||
while (l < r) {
|
||||
m = l + (r - l) / 2;
|
||||
|
||||
ref = genradix_ptr(&c->reflink_gc_table, m);
|
||||
if (ref->offset <= idx)
|
||||
l = m + 1;
|
||||
else
|
||||
r = m;
|
||||
}
|
||||
|
||||
while (sectors) {
|
||||
ret = __bch2_mark_reflink_p(c, p, idx, sectors,
|
||||
front_frag, back_frag, flags, &l);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
idx += ret;
|
||||
sectors -= ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_mark_key_locked(struct bch_fs *c,
|
||||
struct bkey_s_c old,
|
||||
struct bkey_s_c new,
|
||||
@ -1131,6 +1250,10 @@ static int bch2_mark_key_locked(struct bch_fs *c,
|
||||
fs_usage->persistent_reserved[replicas - 1] += sectors;
|
||||
break;
|
||||
}
|
||||
case KEY_TYPE_reflink_p:
|
||||
ret = bch2_mark_reflink_p(c, bkey_s_c_to_reflink_p(k),
|
||||
offset, sectors, flags);
|
||||
break;
|
||||
}
|
||||
|
||||
preempt_enable();
|
||||
@ -1693,35 +1816,6 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __le64 *bkey_refcount(struct bkey_i *k)
|
||||
{
|
||||
switch (k->k.type) {
|
||||
case KEY_TYPE_reflink_v:
|
||||
return &bkey_i_to_reflink_v(k)->v.refcount;
|
||||
case KEY_TYPE_indirect_inline_data:
|
||||
return &bkey_i_to_indirect_inline_data(k)->v.refcount;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static bool reflink_p_frag_references(struct bkey_s_c_reflink_p p,
|
||||
u64 start, u64 end,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
if (start == end)
|
||||
return false;
|
||||
|
||||
start += le64_to_cpu(p.v->idx);
|
||||
end += le64_to_cpu(p.v->idx);
|
||||
|
||||
if (end <= bkey_start_offset(k.k))
|
||||
return false;
|
||||
if (start >= k.k->p.offset)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
|
||||
struct bkey_s_c_reflink_p p,
|
||||
u64 idx, unsigned sectors,
|
||||
|
@ -151,7 +151,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
|
||||
|
||||
set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
|
||||
|
||||
refcount = (void *) &r_v->v;
|
||||
refcount = bkey_refcount(r_v);
|
||||
*refcount = 0;
|
||||
memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
|
||||
|
||||
|
@ -34,6 +34,30 @@ void bch2_indirect_inline_data_to_text(struct printbuf *,
|
||||
.val_to_text = bch2_indirect_inline_data_to_text, \
|
||||
}
|
||||
|
||||
static inline const __le64 *bkey_refcount_c(struct bkey_s_c k)
|
||||
{
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_reflink_v:
|
||||
return &bkey_s_c_to_reflink_v(k).v->refcount;
|
||||
case KEY_TYPE_indirect_inline_data:
|
||||
return &bkey_s_c_to_indirect_inline_data(k).v->refcount;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline __le64 *bkey_refcount(struct bkey_i *k)
|
||||
{
|
||||
switch (k->k.type) {
|
||||
case KEY_TYPE_reflink_v:
|
||||
return &bkey_i_to_reflink_v(k)->v.refcount;
|
||||
case KEY_TYPE_indirect_inline_data:
|
||||
return &bkey_i_to_indirect_inline_data(k)->v.refcount;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos,
|
||||
u64, u64 *, u64, s64 *);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user