bcache: make bch_btree_check() to be multithreaded

When registering a cache device, bch_btree_check() is called to check
all btree nodes, to make sure the btree is consistent and not
corrupted.

bch_btree_check() is recursively executed in a single thread, when there
are a lot of data cached and the btree is huge, it may take very long
time to check all the btree nodes. In my testing, I observed it took
around 50 minutes to finish bch_btree_check().

When checking the bcache btree nodes, the cache set is not running yet,
and indeed the whole tree is in read-only state, it is safe to create
multiple threads to check the btree in parallel.

This patch tries to create multiple threads, and each thread tries to
one-by-one check the sub-tree indexed by a key from the btree root node.
The parallel thread number depends on how many keys in the btree root
node. At most BCH_BTR_CHKTHREAD_MAX (64) threads can be created, but in
practice is should be min(cpu-number/2, root-node-keys-number).

Signed-off-by: Coly Li <colyli@suse.de>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Coly Li 2020-03-22 14:03:01 +08:00 committed by Jens Axboe
parent feac1a70b8
commit 8e7102273f
2 changed files with 188 additions and 3 deletions

View File

@ -1897,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
return ret; return ret;
} }
static int bch_btree_check_thread(void *arg)
{
int ret;
struct btree_check_info *info = arg;
struct btree_check_state *check_state = info->state;
struct cache_set *c = check_state->c;
struct btree_iter iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
int i, n;
k = p = NULL;
i = n = 0;
cur_idx = prev_idx = 0;
ret = 0;
/* root node keys are checked before thread created */
bch_btree_iter_init(&c->root->keys, &iter, NULL);
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
BUG_ON(!k);
p = k;
while (k) {
/*
* Fetch a root node key index, skip the keys which
* should be fetched by other threads, then check the
* sub-tree indexed by the fetched key.
*/
spin_lock(&check_state->idx_lock);
cur_idx = check_state->key_idx;
check_state->key_idx++;
spin_unlock(&check_state->idx_lock);
skip_nr = cur_idx - prev_idx;
while (skip_nr) {
k = bch_btree_iter_next_filter(&iter,
&c->root->keys,
bch_ptr_bad);
if (k)
p = k;
else {
/*
* No more keys to check in root node,
* current checking threads are enough,
* stop creating more.
*/
atomic_set(&check_state->enough, 1);
/* Update check_state->enough earlier */
smp_mb();
goto out;
}
skip_nr--;
cond_resched();
}
if (p) {
struct btree_op op;
btree_node_prefetch(c->root, p);
c->gc_stats.nodes++;
bch_btree_op_init(&op, 0);
ret = bcache_btree(check_recurse, p, c->root, &op);
if (ret)
goto out;
}
p = NULL;
prev_idx = cur_idx;
cond_resched();
}
out:
info->result = ret;
/* update check_state->started among all CPUs */
smp_mb();
if (atomic_dec_and_test(&check_state->started))
wake_up(&check_state->wait);
return ret;
}
static int bch_btree_chkthread_nr(void)
{
int n = num_online_cpus()/2;
if (n == 0)
n = 1;
else if (n > BCH_BTR_CHKTHREAD_MAX)
n = BCH_BTR_CHKTHREAD_MAX;
return n;
}
int bch_btree_check(struct cache_set *c) int bch_btree_check(struct cache_set *c)
{ {
struct btree_op op; int ret = 0;
int i;
struct bkey *k = NULL;
struct btree_iter iter;
struct btree_check_state *check_state;
char name[32];
bch_btree_op_init(&op, SHRT_MAX); /* check and mark root node keys */
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
bch_initial_mark_key(c, c->root->level, k);
return bcache_btree_root(check_recurse, c, &op); bch_initial_mark_key(c, c->root->level + 1, &c->root->key);
if (c->root->level == 0)
return 0;
check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
if (!check_state)
return -ENOMEM;
check_state->c = c;
check_state->total_threads = bch_btree_chkthread_nr();
check_state->key_idx = 0;
spin_lock_init(&check_state->idx_lock);
atomic_set(&check_state->started, 0);
atomic_set(&check_state->enough, 0);
init_waitqueue_head(&check_state->wait);
/*
* Run multiple threads to check btree nodes in parallel,
* if check_state->enough is non-zero, it means current
* running check threads are enough, unncessary to create
* more.
*/
for (i = 0; i < check_state->total_threads; i++) {
/* fetch latest check_state->enough earlier */
smp_mb();
if (atomic_read(&check_state->enough))
break;
check_state->infos[i].result = 0;
check_state->infos[i].state = check_state;
snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
atomic_inc(&check_state->started);
check_state->infos[i].thread =
kthread_run(bch_btree_check_thread,
&check_state->infos[i],
name);
if (IS_ERR(check_state->infos[i].thread)) {
pr_err("fails to run thread bch_btrchk[%d]", i);
for (--i; i >= 0; i--)
kthread_stop(check_state->infos[i].thread);
ret = -ENOMEM;
goto out;
}
}
wait_event_interruptible(check_state->wait,
atomic_read(&check_state->started) == 0 ||
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
for (i = 0; i < check_state->total_threads; i++) {
if (check_state->infos[i].result) {
ret = check_state->infos[i].result;
goto out;
}
}
out:
kfree(check_state);
return ret;
} }
void bch_initial_gc_finish(struct cache_set *c) void bch_initial_gc_finish(struct cache_set *c)

View File

@ -145,6 +145,9 @@ struct btree {
struct bio *bio; struct bio *bio;
}; };
#define BTREE_FLAG(flag) \ #define BTREE_FLAG(flag) \
static inline bool btree_node_ ## flag(struct btree *b) \ static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ { return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
@ -216,6 +219,25 @@ struct btree_op {
unsigned int insert_collision:1; unsigned int insert_collision:1;
}; };
struct btree_check_state;
struct btree_check_info {
struct btree_check_state *state;
struct task_struct *thread;
int result;
};
#define BCH_BTR_CHKTHREAD_MAX 64
struct btree_check_state {
struct cache_set *c;
int total_threads;
int key_idx;
spinlock_t idx_lock;
atomic_t started;
atomic_t enough;
wait_queue_head_t wait;
struct btree_check_info infos[BCH_BTR_CHKTHREAD_MAX];
};
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level) static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
{ {
memset(op, 0, sizeof(struct btree_op)); memset(op, 0, sizeof(struct btree_op));