md: factor out a helper exceed_read_errors() to check read_errors
Move check_decay_read_errors() to raid1-10.c and factor out a helper exceed_read_errors() to check if read_errors exceeds the limit, so that raid1 can also use it. There are no functional changes. Signed-off-by: Li Nan <linan122@huawei.com> Signed-off-by: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20231215023852.3478228-2-linan666@huaweicloud.com
This commit is contained in:
parent
dc1cc22ed5
commit
1979dbbe32
@ -173,3 +173,57 @@ static inline void raid1_prepare_flush_writes(struct bitmap *bitmap)
|
||||
else
|
||||
md_bitmap_unplug(bitmap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Used by fix_read_error() to decay the per rdev read_errors.
|
||||
* We halve the read error count for every hour that has elapsed
|
||||
* since the last recorded read error.
|
||||
*/
|
||||
static inline void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
long cur_time_mon;
|
||||
unsigned long hours_since_last;
|
||||
unsigned int read_errors = atomic_read(&rdev->read_errors);
|
||||
|
||||
cur_time_mon = ktime_get_seconds();
|
||||
|
||||
if (rdev->last_read_error == 0) {
|
||||
/* first time we've seen a read error */
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
return;
|
||||
}
|
||||
|
||||
hours_since_last = (long)(cur_time_mon -
|
||||
rdev->last_read_error) / 3600;
|
||||
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
|
||||
/*
|
||||
* if hours_since_last is > the number of bits in read_errors
|
||||
* just set read errors to 0. We do this to avoid
|
||||
* overflowing the shift of read_errors by hours_since_last.
|
||||
*/
|
||||
if (hours_since_last >= 8 * sizeof(read_errors))
|
||||
atomic_set(&rdev->read_errors, 0);
|
||||
else
|
||||
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
|
||||
}
|
||||
|
||||
static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
|
||||
int read_errors;
|
||||
|
||||
check_decay_read_errors(mddev, rdev);
|
||||
read_errors = atomic_inc_return(&rdev->read_errors);
|
||||
if (read_errors > max_read_errors) {
|
||||
pr_notice("md/"RAID_1_10_NAME":%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
|
||||
mdname(mddev), rdev->bdev, read_errors, max_read_errors);
|
||||
pr_notice("md/"RAID_1_10_NAME":%s: %pg: Failing raid device\n",
|
||||
mdname(mddev), rdev->bdev);
|
||||
md_error(mddev, rdev);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -49,6 +49,7 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
|
||||
#define raid1_log(md, fmt, args...) \
|
||||
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
|
||||
|
||||
#define RAID_1_10_NAME "raid1"
|
||||
#include "raid1-10.c"
|
||||
|
||||
#define START(node) ((node)->start)
|
||||
|
@ -19,6 +19,8 @@
|
||||
#include <linux/raid/md_p.h>
|
||||
#include <trace/events/block.h>
|
||||
#include "md.h"
|
||||
|
||||
#define RAID_1_10_NAME "raid10"
|
||||
#include "raid10.h"
|
||||
#include "raid0.h"
|
||||
#include "md-bitmap.h"
|
||||
@ -2592,42 +2594,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Used by fix_read_error() to decay the per rdev read_errors.
|
||||
* We halve the read error count for every hour that has elapsed
|
||||
* since the last recorded read error.
|
||||
*
|
||||
*/
|
||||
static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
long cur_time_mon;
|
||||
unsigned long hours_since_last;
|
||||
unsigned int read_errors = atomic_read(&rdev->read_errors);
|
||||
|
||||
cur_time_mon = ktime_get_seconds();
|
||||
|
||||
if (rdev->last_read_error == 0) {
|
||||
/* first time we've seen a read error */
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
return;
|
||||
}
|
||||
|
||||
hours_since_last = (long)(cur_time_mon -
|
||||
rdev->last_read_error) / 3600;
|
||||
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
|
||||
/*
|
||||
* if hours_since_last is > the number of bits in read_errors
|
||||
* just set read errors to 0. We do this to avoid
|
||||
* overflowing the shift of read_errors by hours_since_last.
|
||||
*/
|
||||
if (hours_since_last >= 8 * sizeof(read_errors))
|
||||
atomic_set(&rdev->read_errors, 0);
|
||||
else
|
||||
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
|
||||
}
|
||||
|
||||
static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
|
||||
int sectors, struct page *page, enum req_op op)
|
||||
{
|
||||
@ -2665,7 +2631,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
int sect = 0; /* Offset from r10_bio->sector */
|
||||
int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
|
||||
struct md_rdev *rdev;
|
||||
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
|
||||
int d = r10_bio->devs[slot].devnum;
|
||||
|
||||
/* still own a reference to this rdev, so it cannot
|
||||
@ -2678,15 +2643,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
||||
more fix_read_error() attempts */
|
||||
return;
|
||||
|
||||
check_decay_read_errors(mddev, rdev);
|
||||
atomic_inc(&rdev->read_errors);
|
||||
if (atomic_read(&rdev->read_errors) > max_read_errors) {
|
||||
pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
|
||||
mdname(mddev), rdev->bdev,
|
||||
atomic_read(&rdev->read_errors), max_read_errors);
|
||||
pr_notice("md/raid10:%s: %pg: Failing raid device\n",
|
||||
mdname(mddev), rdev->bdev);
|
||||
md_error(mddev, rdev);
|
||||
if (exceed_read_errors(mddev, rdev)) {
|
||||
r10_bio->devs[slot].bio = IO_BLOCKED;
|
||||
return;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user