md/raid5: fix 'out of memory' during raid cache recovery
commit 483cbbeddd5fe2c80fd4141ff0748fa06c4ff146 upstream. This fixes the case when md array assembly fails because of raid cache recovery unable to allocate a stripe, despite attempts to replay stripes and increase cache size. This happens because stripes released by r5c_recovery_replay_stripes and raid5_set_cache_size don't become available for allocation immediately. Released stripes first are placed on conf->released_stripes list and require md thread to merge them on conf->inactive_list before they can be allocated. Patch allows final allocation attempt during cache recovery to wait for new stripes to become availabe for allocation. Cc: linux-raid@vger.kernel.org Cc: Shaohua Li <shli@kernel.org> Cc: linux-stable <stable@vger.kernel.org> # 4.10+ Fixes: b4c625c67362 ("md/r5cache: r5cache recovery: part 1") Signed-off-by: Alexei Naberezhnov <anaberezhnov@fb.com> Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
5b3109eb89
commit
e7f11c55ad
@ -1935,12 +1935,14 @@ out:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct stripe_head *
|
static struct stripe_head *
|
||||||
r5c_recovery_alloc_stripe(struct r5conf *conf,
|
r5c_recovery_alloc_stripe(
|
||||||
sector_t stripe_sect)
|
struct r5conf *conf,
|
||||||
|
sector_t stripe_sect,
|
||||||
|
int noblock)
|
||||||
{
|
{
|
||||||
struct stripe_head *sh;
|
struct stripe_head *sh;
|
||||||
|
|
||||||
sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0);
|
sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0);
|
||||||
if (!sh)
|
if (!sh)
|
||||||
return NULL; /* no more stripe available */
|
return NULL; /* no more stripe available */
|
||||||
|
|
||||||
@ -2150,7 +2152,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
|
|||||||
stripe_sect);
|
stripe_sect);
|
||||||
|
|
||||||
if (!sh) {
|
if (!sh) {
|
||||||
sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
|
sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1);
|
||||||
/*
|
/*
|
||||||
* cannot get stripe from raid5_get_active_stripe
|
* cannot get stripe from raid5_get_active_stripe
|
||||||
* try replay some stripes
|
* try replay some stripes
|
||||||
@ -2159,20 +2161,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
|
|||||||
r5c_recovery_replay_stripes(
|
r5c_recovery_replay_stripes(
|
||||||
cached_stripe_list, ctx);
|
cached_stripe_list, ctx);
|
||||||
sh = r5c_recovery_alloc_stripe(
|
sh = r5c_recovery_alloc_stripe(
|
||||||
conf, stripe_sect);
|
conf, stripe_sect, 1);
|
||||||
}
|
}
|
||||||
if (!sh) {
|
if (!sh) {
|
||||||
|
int new_size = conf->min_nr_stripes * 2;
|
||||||
pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
|
pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
|
||||||
mdname(mddev),
|
mdname(mddev),
|
||||||
conf->min_nr_stripes * 2);
|
new_size);
|
||||||
raid5_set_cache_size(mddev,
|
ret = raid5_set_cache_size(mddev, new_size);
|
||||||
conf->min_nr_stripes * 2);
|
if (conf->min_nr_stripes <= new_size / 2) {
|
||||||
sh = r5c_recovery_alloc_stripe(conf,
|
pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n",
|
||||||
stripe_sect);
|
mdname(mddev),
|
||||||
|
ret,
|
||||||
|
new_size,
|
||||||
|
conf->min_nr_stripes,
|
||||||
|
conf->max_nr_stripes);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
sh = r5c_recovery_alloc_stripe(
|
||||||
|
conf, stripe_sect, 0);
|
||||||
}
|
}
|
||||||
if (!sh) {
|
if (!sh) {
|
||||||
pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
|
pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
|
||||||
mdname(mddev));
|
mdname(mddev));
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
list_add_tail(&sh->lru, cached_stripe_list);
|
list_add_tail(&sh->lru, cached_stripe_list);
|
||||||
|
@ -6357,6 +6357,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
|
|||||||
int
|
int
|
||||||
raid5_set_cache_size(struct mddev *mddev, int size)
|
raid5_set_cache_size(struct mddev *mddev, int size)
|
||||||
{
|
{
|
||||||
|
int result = 0;
|
||||||
struct r5conf *conf = mddev->private;
|
struct r5conf *conf = mddev->private;
|
||||||
|
|
||||||
if (size <= 16 || size > 32768)
|
if (size <= 16 || size > 32768)
|
||||||
@ -6373,11 +6374,14 @@ raid5_set_cache_size(struct mddev *mddev, int size)
|
|||||||
|
|
||||||
mutex_lock(&conf->cache_size_mutex);
|
mutex_lock(&conf->cache_size_mutex);
|
||||||
while (size > conf->max_nr_stripes)
|
while (size > conf->max_nr_stripes)
|
||||||
if (!grow_one_stripe(conf, GFP_KERNEL))
|
if (!grow_one_stripe(conf, GFP_KERNEL)) {
|
||||||
|
conf->min_nr_stripes = conf->max_nr_stripes;
|
||||||
|
result = -ENOMEM;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
mutex_unlock(&conf->cache_size_mutex);
|
mutex_unlock(&conf->cache_size_mutex);
|
||||||
|
|
||||||
return 0;
|
return result;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(raid5_set_cache_size);
|
EXPORT_SYMBOL(raid5_set_cache_size);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user