md update for v3.12
Headline item is multithreading for RAID5 so that more IO/sec can be supported on fast (SSD) devices. Also TILE-Gx SIMD suppor for RAID6 calculations and an assortment of bug fixes. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.19 (GNU/Linux) iQIVAwUAUi6dRTnsnt1WYoG5AQIqMBAAm/XUEqfyBNUiTPHmIU/OyReOlfsp8A2o xtcmSzaCtIUz4btPszUrw3PShqnk+lXXX2AB0rp3PzfOgyNYXBRKbzOf3eGr2VEp L/Cm0iSWHqQ7V7MoV5ZrqtvuyJV1a7FK3a3VaoKaUk424o4sZ7P67t/YZAnTCP/i 9wQoPeIOJ8YjZsaAQjzI3q7yRMRE8ytyBnF4NdgeMyr2p17w2e9pnmNfCTo4wnWs Nu2wPr2QCPQXr/FoIhdIVEy3kVatqH8qXG8Fw+5n07HJYxGCvQZLDuoOVDYyFeoW gnNq2MMgLZm/7Nzqd1bN+QQZuBCd5JL4VJ2G4vLfYrn3ZSdSysrVKQXFKYG3Gkua 1KP4Pv0hndAl4DtGbUk8CiZp6b+c5qeWvq+sO2NuhUGmumFMK2q4DJhITNexjmrs Eg4opnR8JMLDkYD6o52Ziu5KQR/q1PKRLj80eoVuqB2QQM5+NPb4s3k2WN+53lQD L9fH2alUxxSK+5R8ykk923QQ/XErMUwXaka+O/gGFAlYvaaW/GKTxFnKn/GIXAkc tKW88zB+zA5EZEFec+K43z1UjtGxMWsryvDN55ON2iV+LIZBISm7krroBeR55cyO +3tHlPsga0pO+9DdSm7hvZeWRrq5ZJTiZmL/e2FYygrC5tFAY0p+z49fK3e9Th13 C85G7fg3yDY= =zLxh -----END PGP SIGNATURE----- Merge tag 'md/3.12' of git://neil.brown.name/md Pull md update from Neil Brown: "Headline item is multithreading for RAID5 so that more IO/sec can be supported on fast (SSD) devices. Also TILE-Gx SIMD suppor for RAID6 calculations and an assortment of bug fixes" * tag 'md/3.12' of git://neil.brown.name/md: raid5: only wakeup necessary threads md/raid5: flush out all pending requests before proceeding with reshape. md/raid5: use seqcount to protect access to shape in make_request. raid5: sysfs entry to control worker thread number raid5: offload stripe handle to workqueue raid5: fix stripe release order raid5: make release_stripe lockless md: avoid deadlock when dirty buffers during md_stop. md: Don't test all of mddev->flags at once. md: Fix apparent cut-and-paste error in super_90_validate raid6/test: replace echo -e with printf RAID: add tilegx SIMD implementation of raid6 md: fix safe_mode buglet. md: don't call md_allow_write in get_bitmap_file.
This commit is contained in:
commit
4d7696f1b0
@ -1180,7 +1180,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
|
||||
mddev->bitmap_info.offset =
|
||||
mddev->bitmap_info.default_offset;
|
||||
mddev->bitmap_info.space =
|
||||
mddev->bitmap_info.space;
|
||||
mddev->bitmap_info.default_space;
|
||||
}
|
||||
|
||||
} else if (mddev->pers == NULL) {
|
||||
@ -3429,7 +3429,7 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
|
||||
mddev->safemode_delay = (msec*HZ)/1000;
|
||||
if (mddev->safemode_delay == 0)
|
||||
mddev->safemode_delay = 1;
|
||||
if (mddev->safemode_delay < old_delay)
|
||||
if (mddev->safemode_delay < old_delay || old_delay == 0)
|
||||
md_safemode_timeout((unsigned long)mddev);
|
||||
}
|
||||
return len;
|
||||
@ -5144,7 +5144,7 @@ int md_run(struct mddev *mddev)
|
||||
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
|
||||
if (mddev->flags)
|
||||
if (mddev->flags & MD_UPDATE_SB_FLAGS)
|
||||
md_update_sb(mddev, 0);
|
||||
|
||||
md_new_event(mddev);
|
||||
@ -5289,7 +5289,7 @@ static void __md_stop_writes(struct mddev *mddev)
|
||||
md_super_wait(mddev);
|
||||
|
||||
if (mddev->ro == 0 &&
|
||||
(!mddev->in_sync || mddev->flags)) {
|
||||
(!mddev->in_sync || (mddev->flags & MD_UPDATE_SB_FLAGS))) {
|
||||
/* mark array as shutdown cleanly */
|
||||
mddev->in_sync = 1;
|
||||
md_update_sb(mddev, 1);
|
||||
@ -5337,8 +5337,14 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
|
||||
err = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
if (bdev)
|
||||
sync_blockdev(bdev);
|
||||
if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
|
||||
/* Someone opened the device since we flushed it
|
||||
* so page cache could be dirty and it is too late
|
||||
* to flush. So abort
|
||||
*/
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (mddev->pers) {
|
||||
__md_stop_writes(mddev);
|
||||
|
||||
@ -5373,14 +5379,14 @@ static int do_md_stop(struct mddev * mddev, int mode,
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (bdev)
|
||||
/* It is possible IO was issued on some other
|
||||
* open file which was closed before we took ->open_mutex.
|
||||
* As that was not the last close __blkdev_put will not
|
||||
* have called sync_blockdev, so we must.
|
||||
if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
|
||||
/* Someone opened the device since we flushed it
|
||||
* so page cache could be dirty and it is too late
|
||||
* to flush. So abort
|
||||
*/
|
||||
sync_blockdev(bdev);
|
||||
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (mddev->pers) {
|
||||
if (mddev->ro)
|
||||
set_disk_ro(disk, 0);
|
||||
@ -5628,10 +5634,7 @@ static int get_bitmap_file(struct mddev * mddev, void __user * arg)
|
||||
char *ptr, *buf = NULL;
|
||||
int err = -ENOMEM;
|
||||
|
||||
if (md_allow_write(mddev))
|
||||
file = kmalloc(sizeof(*file), GFP_NOIO);
|
||||
else
|
||||
file = kmalloc(sizeof(*file), GFP_KERNEL);
|
||||
file = kmalloc(sizeof(*file), GFP_NOIO);
|
||||
|
||||
if (!file)
|
||||
goto out;
|
||||
@ -6420,6 +6423,20 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
!test_bit(MD_RECOVERY_NEEDED,
|
||||
&mddev->flags),
|
||||
msecs_to_jiffies(5000));
|
||||
if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
|
||||
/* Need to flush page cache, and ensure no-one else opens
|
||||
* and writes
|
||||
*/
|
||||
mutex_lock(&mddev->open_mutex);
|
||||
if (atomic_read(&mddev->openers) > 1) {
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
set_bit(MD_STILL_CLOSED, &mddev->flags);
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
sync_blockdev(bdev);
|
||||
}
|
||||
err = mddev_lock(mddev);
|
||||
if (err) {
|
||||
printk(KERN_INFO
|
||||
@ -6673,6 +6690,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
|
||||
|
||||
err = 0;
|
||||
atomic_inc(&mddev->openers);
|
||||
clear_bit(MD_STILL_CLOSED, &mddev->flags);
|
||||
mutex_unlock(&mddev->open_mutex);
|
||||
|
||||
check_disk_change(bdev);
|
||||
@ -7817,7 +7835,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
}
|
||||
|
||||
if (mddev->flags)
|
||||
if (mddev->flags & MD_UPDATE_SB_FLAGS)
|
||||
md_update_sb(mddev, 0);
|
||||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
|
||||
|
@ -204,12 +204,16 @@ struct mddev {
|
||||
struct md_personality *pers;
|
||||
dev_t unit;
|
||||
int md_minor;
|
||||
struct list_head disks;
|
||||
struct list_head disks;
|
||||
unsigned long flags;
|
||||
#define MD_CHANGE_DEVS 0 /* Some device status has changed */
|
||||
#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
|
||||
#define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */
|
||||
#define MD_UPDATE_SB_FLAGS (1 | 2 | 4) /* If these are set, md_update_sb needed */
|
||||
#define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */
|
||||
#define MD_STILL_CLOSED 4 /* If set, then array has not been opened since
|
||||
* md_ioctl checked on it.
|
||||
*/
|
||||
|
||||
int suspended;
|
||||
atomic_t active_io;
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/nodemask.h>
|
||||
#include <trace/events/block.h>
|
||||
|
||||
#include "md.h"
|
||||
@ -60,6 +61,10 @@
|
||||
#include "raid0.h"
|
||||
#include "bitmap.h"
|
||||
|
||||
#define cpu_to_group(cpu) cpu_to_node(cpu)
|
||||
#define ANY_GROUP NUMA_NO_NODE
|
||||
|
||||
static struct workqueue_struct *raid5_wq;
|
||||
/*
|
||||
* Stripe cache
|
||||
*/
|
||||
@ -72,6 +77,7 @@
|
||||
#define BYPASS_THRESHOLD 1
|
||||
#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head))
|
||||
#define HASH_MASK (NR_HASH - 1)
|
||||
#define MAX_STRIPE_BATCH 8
|
||||
|
||||
static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
|
||||
{
|
||||
@ -200,6 +206,49 @@ static int stripe_operations_active(struct stripe_head *sh)
|
||||
test_bit(STRIPE_COMPUTE_RUN, &sh->state);
|
||||
}
|
||||
|
||||
static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
|
||||
{
|
||||
struct r5conf *conf = sh->raid_conf;
|
||||
struct r5worker_group *group;
|
||||
int thread_cnt;
|
||||
int i, cpu = sh->cpu;
|
||||
|
||||
if (!cpu_online(cpu)) {
|
||||
cpu = cpumask_any(cpu_online_mask);
|
||||
sh->cpu = cpu;
|
||||
}
|
||||
|
||||
if (list_empty(&sh->lru)) {
|
||||
struct r5worker_group *group;
|
||||
group = conf->worker_groups + cpu_to_group(cpu);
|
||||
list_add_tail(&sh->lru, &group->handle_list);
|
||||
group->stripes_cnt++;
|
||||
sh->group = group;
|
||||
}
|
||||
|
||||
if (conf->worker_cnt_per_group == 0) {
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
return;
|
||||
}
|
||||
|
||||
group = conf->worker_groups + cpu_to_group(sh->cpu);
|
||||
|
||||
group->workers[0].working = true;
|
||||
/* at least one worker should run to avoid race */
|
||||
queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work);
|
||||
|
||||
thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1;
|
||||
/* wakeup more workers */
|
||||
for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) {
|
||||
if (group->workers[i].working == false) {
|
||||
group->workers[i].working = true;
|
||||
queue_work_on(sh->cpu, raid5_wq,
|
||||
&group->workers[i].work);
|
||||
thread_cnt--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
|
||||
{
|
||||
BUG_ON(!list_empty(&sh->lru));
|
||||
@ -214,7 +263,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
|
||||
else {
|
||||
clear_bit(STRIPE_DELAYED, &sh->state);
|
||||
clear_bit(STRIPE_BIT_DELAY, &sh->state);
|
||||
list_add_tail(&sh->lru, &conf->handle_list);
|
||||
if (conf->worker_cnt_per_group == 0) {
|
||||
list_add_tail(&sh->lru, &conf->handle_list);
|
||||
} else {
|
||||
raid5_wakeup_stripe_thread(sh);
|
||||
return;
|
||||
}
|
||||
}
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
} else {
|
||||
@ -239,12 +293,62 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
|
||||
do_release_stripe(conf, sh);
|
||||
}
|
||||
|
||||
static struct llist_node *llist_reverse_order(struct llist_node *head)
|
||||
{
|
||||
struct llist_node *new_head = NULL;
|
||||
|
||||
while (head) {
|
||||
struct llist_node *tmp = head;
|
||||
head = head->next;
|
||||
tmp->next = new_head;
|
||||
new_head = tmp;
|
||||
}
|
||||
|
||||
return new_head;
|
||||
}
|
||||
|
||||
/* should hold conf->device_lock already */
|
||||
static int release_stripe_list(struct r5conf *conf)
|
||||
{
|
||||
struct stripe_head *sh;
|
||||
int count = 0;
|
||||
struct llist_node *head;
|
||||
|
||||
head = llist_del_all(&conf->released_stripes);
|
||||
head = llist_reverse_order(head);
|
||||
while (head) {
|
||||
sh = llist_entry(head, struct stripe_head, release_list);
|
||||
head = llist_next(head);
|
||||
/* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */
|
||||
smp_mb();
|
||||
clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state);
|
||||
/*
|
||||
* Don't worry the bit is set here, because if the bit is set
|
||||
* again, the count is always > 1. This is true for
|
||||
* STRIPE_ON_UNPLUG_LIST bit too.
|
||||
*/
|
||||
__release_stripe(conf, sh);
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static void release_stripe(struct stripe_head *sh)
|
||||
{
|
||||
struct r5conf *conf = sh->raid_conf;
|
||||
unsigned long flags;
|
||||
bool wakeup;
|
||||
|
||||
if (test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
|
||||
goto slow_path;
|
||||
wakeup = llist_add(&sh->release_list, &conf->released_stripes);
|
||||
if (wakeup)
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
return;
|
||||
slow_path:
|
||||
local_irq_save(flags);
|
||||
/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
|
||||
if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
|
||||
do_release_stripe(conf, sh);
|
||||
spin_unlock(&conf->device_lock);
|
||||
@ -359,6 +463,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
|
||||
raid5_build_block(sh, i, previous);
|
||||
}
|
||||
insert_hash(conf, sh);
|
||||
sh->cpu = smp_processor_id();
|
||||
}
|
||||
|
||||
static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
|
||||
@ -491,7 +596,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
|
||||
if (atomic_read(&sh->count)) {
|
||||
BUG_ON(!list_empty(&sh->lru)
|
||||
&& !test_bit(STRIPE_EXPANDING, &sh->state)
|
||||
&& !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
|
||||
&& !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
|
||||
&& !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
|
||||
} else {
|
||||
if (!test_bit(STRIPE_HANDLE, &sh->state))
|
||||
atomic_inc(&conf->active_stripes);
|
||||
@ -499,6 +605,10 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
|
||||
!test_bit(STRIPE_EXPANDING, &sh->state))
|
||||
BUG();
|
||||
list_del_init(&sh->lru);
|
||||
if (sh->group) {
|
||||
sh->group->stripes_cnt--;
|
||||
sh->group = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (sh == NULL);
|
||||
@ -3779,6 +3889,7 @@ static void raid5_activate_delayed(struct r5conf *conf)
|
||||
if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
|
||||
atomic_inc(&conf->preread_active_stripes);
|
||||
list_add_tail(&sh->lru, &conf->hold_list);
|
||||
raid5_wakeup_stripe_thread(sh);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4058,18 +4169,35 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
|
||||
* head of the hold_list has changed, i.e. the head was promoted to the
|
||||
* handle_list.
|
||||
*/
|
||||
static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
|
||||
static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
|
||||
{
|
||||
struct stripe_head *sh;
|
||||
struct stripe_head *sh = NULL, *tmp;
|
||||
struct list_head *handle_list = NULL;
|
||||
struct r5worker_group *wg = NULL;
|
||||
|
||||
if (conf->worker_cnt_per_group == 0) {
|
||||
handle_list = &conf->handle_list;
|
||||
} else if (group != ANY_GROUP) {
|
||||
handle_list = &conf->worker_groups[group].handle_list;
|
||||
wg = &conf->worker_groups[group];
|
||||
} else {
|
||||
int i;
|
||||
for (i = 0; i < conf->group_cnt; i++) {
|
||||
handle_list = &conf->worker_groups[i].handle_list;
|
||||
wg = &conf->worker_groups[i];
|
||||
if (!list_empty(handle_list))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n",
|
||||
__func__,
|
||||
list_empty(&conf->handle_list) ? "empty" : "busy",
|
||||
list_empty(handle_list) ? "empty" : "busy",
|
||||
list_empty(&conf->hold_list) ? "empty" : "busy",
|
||||
atomic_read(&conf->pending_full_writes), conf->bypass_count);
|
||||
|
||||
if (!list_empty(&conf->handle_list)) {
|
||||
sh = list_entry(conf->handle_list.next, typeof(*sh), lru);
|
||||
if (!list_empty(handle_list)) {
|
||||
sh = list_entry(handle_list->next, typeof(*sh), lru);
|
||||
|
||||
if (list_empty(&conf->hold_list))
|
||||
conf->bypass_count = 0;
|
||||
@ -4087,14 +4215,32 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
|
||||
((conf->bypass_threshold &&
|
||||
conf->bypass_count > conf->bypass_threshold) ||
|
||||
atomic_read(&conf->pending_full_writes) == 0)) {
|
||||
sh = list_entry(conf->hold_list.next,
|
||||
typeof(*sh), lru);
|
||||
conf->bypass_count -= conf->bypass_threshold;
|
||||
if (conf->bypass_count < 0)
|
||||
conf->bypass_count = 0;
|
||||
} else
|
||||
|
||||
list_for_each_entry(tmp, &conf->hold_list, lru) {
|
||||
if (conf->worker_cnt_per_group == 0 ||
|
||||
group == ANY_GROUP ||
|
||||
!cpu_online(tmp->cpu) ||
|
||||
cpu_to_group(tmp->cpu) == group) {
|
||||
sh = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (sh) {
|
||||
conf->bypass_count -= conf->bypass_threshold;
|
||||
if (conf->bypass_count < 0)
|
||||
conf->bypass_count = 0;
|
||||
}
|
||||
wg = NULL;
|
||||
}
|
||||
|
||||
if (!sh)
|
||||
return NULL;
|
||||
|
||||
if (wg) {
|
||||
wg->stripes_cnt--;
|
||||
sh->group = NULL;
|
||||
}
|
||||
list_del_init(&sh->lru);
|
||||
atomic_inc(&sh->count);
|
||||
BUG_ON(atomic_read(&sh->count) != 1);
|
||||
@ -4127,6 +4273,10 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
|
||||
*/
|
||||
smp_mb__before_clear_bit();
|
||||
clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
|
||||
/*
|
||||
* STRIPE_ON_RELEASE_LIST could be set here. In that
|
||||
* case, the count is always > 1 here
|
||||
*/
|
||||
__release_stripe(conf, sh);
|
||||
cnt++;
|
||||
}
|
||||
@ -4286,8 +4436,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
|
||||
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
|
||||
DEFINE_WAIT(w);
|
||||
int previous;
|
||||
int seq;
|
||||
|
||||
retry:
|
||||
seq = read_seqcount_begin(&conf->gen_lock);
|
||||
previous = 0;
|
||||
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
|
||||
if (unlikely(conf->reshape_progress != MaxSector)) {
|
||||
@ -4349,6 +4501,13 @@ static void make_request(struct mddev *mddev, struct bio * bi)
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
if (read_seqcount_retry(&conf->gen_lock, seq)) {
|
||||
/* Might have got the wrong stripe_head
|
||||
* by accident
|
||||
*/
|
||||
release_stripe(sh);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (rw == WRITE &&
|
||||
logical_sector >= mddev->suspend_lo &&
|
||||
@ -4788,14 +4947,14 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
|
||||
return handled;
|
||||
}
|
||||
|
||||
#define MAX_STRIPE_BATCH 8
|
||||
static int handle_active_stripes(struct r5conf *conf)
|
||||
static int handle_active_stripes(struct r5conf *conf, int group,
|
||||
struct r5worker *worker)
|
||||
{
|
||||
struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
|
||||
int i, batch_size = 0;
|
||||
|
||||
while (batch_size < MAX_STRIPE_BATCH &&
|
||||
(sh = __get_priority_stripe(conf)) != NULL)
|
||||
(sh = __get_priority_stripe(conf, group)) != NULL)
|
||||
batch[batch_size++] = sh;
|
||||
|
||||
if (batch_size == 0)
|
||||
@ -4813,6 +4972,39 @@ static int handle_active_stripes(struct r5conf *conf)
|
||||
return batch_size;
|
||||
}
|
||||
|
||||
static void raid5_do_work(struct work_struct *work)
|
||||
{
|
||||
struct r5worker *worker = container_of(work, struct r5worker, work);
|
||||
struct r5worker_group *group = worker->group;
|
||||
struct r5conf *conf = group->conf;
|
||||
int group_id = group - conf->worker_groups;
|
||||
int handled;
|
||||
struct blk_plug plug;
|
||||
|
||||
pr_debug("+++ raid5worker active\n");
|
||||
|
||||
blk_start_plug(&plug);
|
||||
handled = 0;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
while (1) {
|
||||
int batch_size, released;
|
||||
|
||||
released = release_stripe_list(conf);
|
||||
|
||||
batch_size = handle_active_stripes(conf, group_id, worker);
|
||||
worker->working = false;
|
||||
if (!batch_size && !released)
|
||||
break;
|
||||
handled += batch_size;
|
||||
}
|
||||
pr_debug("%d stripes handled\n", handled);
|
||||
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
pr_debug("--- raid5worker inactive\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* This is our raid5 kernel thread.
|
||||
*
|
||||
@ -4836,7 +5028,9 @@ static void raid5d(struct md_thread *thread)
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
while (1) {
|
||||
struct bio *bio;
|
||||
int batch_size;
|
||||
int batch_size, released;
|
||||
|
||||
released = release_stripe_list(conf);
|
||||
|
||||
if (
|
||||
!list_empty(&conf->bitmap_list)) {
|
||||
@ -4860,8 +5054,8 @@ static void raid5d(struct md_thread *thread)
|
||||
handled++;
|
||||
}
|
||||
|
||||
batch_size = handle_active_stripes(conf);
|
||||
if (!batch_size)
|
||||
batch_size = handle_active_stripes(conf, ANY_GROUP, NULL);
|
||||
if (!batch_size && !released)
|
||||
break;
|
||||
handled += batch_size;
|
||||
|
||||
@ -4989,10 +5183,70 @@ stripe_cache_active_show(struct mddev *mddev, char *page)
|
||||
static struct md_sysfs_entry
|
||||
raid5_stripecache_active = __ATTR_RO(stripe_cache_active);
|
||||
|
||||
static ssize_t
|
||||
raid5_show_group_thread_cnt(struct mddev *mddev, char *page)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
if (conf)
|
||||
return sprintf(page, "%d\n", conf->worker_cnt_per_group);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int alloc_thread_groups(struct r5conf *conf, int cnt);
|
||||
static ssize_t
|
||||
raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
unsigned long new;
|
||||
int err;
|
||||
struct r5worker_group *old_groups;
|
||||
int old_group_cnt;
|
||||
|
||||
if (len >= PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
if (!conf)
|
||||
return -ENODEV;
|
||||
|
||||
if (kstrtoul(page, 10, &new))
|
||||
return -EINVAL;
|
||||
|
||||
if (new == conf->worker_cnt_per_group)
|
||||
return len;
|
||||
|
||||
mddev_suspend(mddev);
|
||||
|
||||
old_groups = conf->worker_groups;
|
||||
old_group_cnt = conf->worker_cnt_per_group;
|
||||
|
||||
conf->worker_groups = NULL;
|
||||
err = alloc_thread_groups(conf, new);
|
||||
if (err) {
|
||||
conf->worker_groups = old_groups;
|
||||
conf->worker_cnt_per_group = old_group_cnt;
|
||||
} else {
|
||||
if (old_groups)
|
||||
kfree(old_groups[0].workers);
|
||||
kfree(old_groups);
|
||||
}
|
||||
|
||||
mddev_resume(mddev);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
return len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry
|
||||
raid5_group_thread_cnt = __ATTR(group_thread_cnt, S_IRUGO | S_IWUSR,
|
||||
raid5_show_group_thread_cnt,
|
||||
raid5_store_group_thread_cnt);
|
||||
|
||||
static struct attribute *raid5_attrs[] = {
|
||||
&raid5_stripecache_size.attr,
|
||||
&raid5_stripecache_active.attr,
|
||||
&raid5_preread_bypass_threshold.attr,
|
||||
&raid5_group_thread_cnt.attr,
|
||||
NULL,
|
||||
};
|
||||
static struct attribute_group raid5_attrs_group = {
|
||||
@ -5000,6 +5254,54 @@ static struct attribute_group raid5_attrs_group = {
|
||||
.attrs = raid5_attrs,
|
||||
};
|
||||
|
||||
static int alloc_thread_groups(struct r5conf *conf, int cnt)
|
||||
{
|
||||
int i, j;
|
||||
ssize_t size;
|
||||
struct r5worker *workers;
|
||||
|
||||
conf->worker_cnt_per_group = cnt;
|
||||
if (cnt == 0) {
|
||||
conf->worker_groups = NULL;
|
||||
return 0;
|
||||
}
|
||||
conf->group_cnt = num_possible_nodes();
|
||||
size = sizeof(struct r5worker) * cnt;
|
||||
workers = kzalloc(size * conf->group_cnt, GFP_NOIO);
|
||||
conf->worker_groups = kzalloc(sizeof(struct r5worker_group) *
|
||||
conf->group_cnt, GFP_NOIO);
|
||||
if (!conf->worker_groups || !workers) {
|
||||
kfree(workers);
|
||||
kfree(conf->worker_groups);
|
||||
conf->worker_groups = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < conf->group_cnt; i++) {
|
||||
struct r5worker_group *group;
|
||||
|
||||
group = &conf->worker_groups[i];
|
||||
INIT_LIST_HEAD(&group->handle_list);
|
||||
group->conf = conf;
|
||||
group->workers = workers + i * cnt;
|
||||
|
||||
for (j = 0; j < cnt; j++) {
|
||||
group->workers[j].group = group;
|
||||
INIT_WORK(&group->workers[j].work, raid5_do_work);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_thread_groups(struct r5conf *conf)
|
||||
{
|
||||
if (conf->worker_groups)
|
||||
kfree(conf->worker_groups[0].workers);
|
||||
kfree(conf->worker_groups);
|
||||
conf->worker_groups = NULL;
|
||||
}
|
||||
|
||||
static sector_t
|
||||
raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
|
||||
{
|
||||
@ -5040,6 +5342,7 @@ static void raid5_free_percpu(struct r5conf *conf)
|
||||
|
||||
static void free_conf(struct r5conf *conf)
|
||||
{
|
||||
free_thread_groups(conf);
|
||||
shrink_stripes(conf);
|
||||
raid5_free_percpu(conf);
|
||||
kfree(conf->disks);
|
||||
@ -5168,7 +5471,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL);
|
||||
if (conf == NULL)
|
||||
goto abort;
|
||||
/* Don't enable multi-threading by default*/
|
||||
if (alloc_thread_groups(conf, 0))
|
||||
goto abort;
|
||||
spin_lock_init(&conf->device_lock);
|
||||
seqcount_init(&conf->gen_lock);
|
||||
init_waitqueue_head(&conf->wait_for_stripe);
|
||||
init_waitqueue_head(&conf->wait_for_overlap);
|
||||
INIT_LIST_HEAD(&conf->handle_list);
|
||||
@ -5176,6 +5483,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
INIT_LIST_HEAD(&conf->delayed_list);
|
||||
INIT_LIST_HEAD(&conf->bitmap_list);
|
||||
INIT_LIST_HEAD(&conf->inactive_list);
|
||||
init_llist_head(&conf->released_stripes);
|
||||
atomic_set(&conf->active_stripes, 0);
|
||||
atomic_set(&conf->preread_active_stripes, 0);
|
||||
atomic_set(&conf->active_aligned_reads, 0);
|
||||
@ -5980,6 +6288,7 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||
|
||||
atomic_set(&conf->reshape_stripes, 0);
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
write_seqcount_begin(&conf->gen_lock);
|
||||
conf->previous_raid_disks = conf->raid_disks;
|
||||
conf->raid_disks += mddev->delta_disks;
|
||||
conf->prev_chunk_sectors = conf->chunk_sectors;
|
||||
@ -5996,8 +6305,16 @@ static int raid5_start_reshape(struct mddev *mddev)
|
||||
else
|
||||
conf->reshape_progress = 0;
|
||||
conf->reshape_safe = conf->reshape_progress;
|
||||
write_seqcount_end(&conf->gen_lock);
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
/* Now make sure any requests that proceeded on the assumption
|
||||
* the reshape wasn't running - like Discard or Read - have
|
||||
* completed.
|
||||
*/
|
||||
mddev_suspend(mddev);
|
||||
mddev_resume(mddev);
|
||||
|
||||
/* Add some new drives, as many as will fit.
|
||||
* We know there are enough to make the newly sized array work.
|
||||
* Don't add devices if we are reducing the number of
|
||||
@ -6472,6 +6789,10 @@ static struct md_personality raid4_personality =
|
||||
|
||||
static int __init raid5_init(void)
|
||||
{
|
||||
raid5_wq = alloc_workqueue("raid5wq",
|
||||
WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0);
|
||||
if (!raid5_wq)
|
||||
return -ENOMEM;
|
||||
register_md_personality(&raid6_personality);
|
||||
register_md_personality(&raid5_personality);
|
||||
register_md_personality(&raid4_personality);
|
||||
@ -6483,6 +6804,7 @@ static void raid5_exit(void)
|
||||
unregister_md_personality(&raid6_personality);
|
||||
unregister_md_personality(&raid5_personality);
|
||||
unregister_md_personality(&raid4_personality);
|
||||
destroy_workqueue(raid5_wq);
|
||||
}
|
||||
|
||||
module_init(raid5_init);
|
||||
|
@ -197,6 +197,7 @@ enum reconstruct_states {
|
||||
struct stripe_head {
|
||||
struct hlist_node hash;
|
||||
struct list_head lru; /* inactive_list or handle_list */
|
||||
struct llist_node release_list;
|
||||
struct r5conf *raid_conf;
|
||||
short generation; /* increments with every
|
||||
* reshape */
|
||||
@ -211,6 +212,8 @@ struct stripe_head {
|
||||
enum check_states check_state;
|
||||
enum reconstruct_states reconstruct_state;
|
||||
spinlock_t stripe_lock;
|
||||
int cpu;
|
||||
struct r5worker_group *group;
|
||||
/**
|
||||
* struct stripe_operations
|
||||
* @target - STRIPE_OP_COMPUTE_BLK target
|
||||
@ -321,6 +324,7 @@ enum {
|
||||
STRIPE_OPS_REQ_PENDING,
|
||||
STRIPE_ON_UNPLUG_LIST,
|
||||
STRIPE_DISCARD,
|
||||
STRIPE_ON_RELEASE_LIST,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -363,6 +367,19 @@ struct disk_info {
|
||||
struct md_rdev *rdev, *replacement;
|
||||
};
|
||||
|
||||
struct r5worker {
|
||||
struct work_struct work;
|
||||
struct r5worker_group *group;
|
||||
bool working;
|
||||
};
|
||||
|
||||
struct r5worker_group {
|
||||
struct list_head handle_list;
|
||||
struct r5conf *conf;
|
||||
struct r5worker *workers;
|
||||
int stripes_cnt;
|
||||
};
|
||||
|
||||
struct r5conf {
|
||||
struct hlist_head *stripe_hashtbl;
|
||||
struct mddev *mddev;
|
||||
@ -386,6 +403,7 @@ struct r5conf {
|
||||
int prev_chunk_sectors;
|
||||
int prev_algo;
|
||||
short generation; /* increments with every reshape */
|
||||
seqcount_t gen_lock; /* lock against generation changes */
|
||||
unsigned long reshape_checkpoint; /* Time we last updated
|
||||
* metadata */
|
||||
long long min_offset_diff; /* minimum difference between
|
||||
@ -445,6 +463,7 @@ struct r5conf {
|
||||
*/
|
||||
atomic_t active_stripes;
|
||||
struct list_head inactive_list;
|
||||
struct llist_head released_stripes;
|
||||
wait_queue_head_t wait_for_stripe;
|
||||
wait_queue_head_t wait_for_overlap;
|
||||
int inactive_blocked; /* release of inactive stripes blocked,
|
||||
@ -458,6 +477,9 @@ struct r5conf {
|
||||
* the new thread here until we fully activate the array.
|
||||
*/
|
||||
struct md_thread *thread;
|
||||
struct r5worker_group *worker_groups;
|
||||
int group_cnt;
|
||||
int worker_cnt_per_group;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -101,6 +101,7 @@ extern const struct raid6_calls raid6_altivec8;
|
||||
extern const struct raid6_calls raid6_avx2x1;
|
||||
extern const struct raid6_calls raid6_avx2x2;
|
||||
extern const struct raid6_calls raid6_avx2x4;
|
||||
extern const struct raid6_calls raid6_tilegx8;
|
||||
|
||||
struct raid6_recov_calls {
|
||||
void (*data2)(int, size_t, int, int, void **);
|
||||
|
@ -6,6 +6,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
|
||||
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
|
||||
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
|
||||
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
|
||||
raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
|
||||
|
||||
hostprogs-y += mktables
|
||||
|
||||
@ -110,6 +111,11 @@ $(obj)/neon8.c: UNROLL := 8
|
||||
$(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE
|
||||
$(call if_changed,unroll)
|
||||
|
||||
targets += tilegx8.c
|
||||
$(obj)/tilegx8.c: UNROLL := 8
|
||||
$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE
|
||||
$(call if_changed,unroll)
|
||||
|
||||
quiet_cmd_mktable = TABLE $@
|
||||
cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
|
||||
|
||||
|
@ -65,6 +65,9 @@ const struct raid6_calls * const raid6_algos[] = {
|
||||
&raid6_altivec2,
|
||||
&raid6_altivec4,
|
||||
&raid6_altivec8,
|
||||
#endif
|
||||
#if defined(CONFIG_TILEGX)
|
||||
&raid6_tilegx8,
|
||||
#endif
|
||||
&raid6_intx1,
|
||||
&raid6_intx2,
|
||||
|
@ -40,13 +40,16 @@ else ifeq ($(HAS_NEON),yes)
|
||||
OBJS += neon.o neon1.o neon2.o neon4.o neon8.o
|
||||
CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
|
||||
else
|
||||
HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\
|
||||
HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
|
||||
gcc -c -x c - >&/dev/null && \
|
||||
rm ./-.o && echo yes)
|
||||
ifeq ($(HAS_ALTIVEC),yes)
|
||||
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
|
||||
endif
|
||||
endif
|
||||
ifeq ($(ARCH),tilegx)
|
||||
OBJS += tilegx8.o
|
||||
endif
|
||||
|
||||
.c.o:
|
||||
$(CC) $(CFLAGS) -c -o $@ $<
|
||||
@ -109,11 +112,15 @@ int16.c: int.uc ../unroll.awk
|
||||
int32.c: int.uc ../unroll.awk
|
||||
$(AWK) ../unroll.awk -vN=32 < int.uc > $@
|
||||
|
||||
tilegx8.c: tilegx.uc ../unroll.awk
|
||||
$(AWK) ../unroll.awk -vN=8 < tilegx.uc > $@
|
||||
|
||||
tables.c: mktables
|
||||
./mktables > tables.c
|
||||
|
||||
clean:
|
||||
rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
|
||||
rm -f tilegx*.c
|
||||
|
||||
spotless: clean
|
||||
rm -f *~
|
||||
|
86
lib/raid6/tilegx.uc
Normal file
86
lib/raid6/tilegx.uc
Normal file
@ -0,0 +1,86 @@
|
||||
/* -*- linux-c -*- ------------------------------------------------------- *
|
||||
*
|
||||
* Copyright 2002 H. Peter Anvin - All Rights Reserved
|
||||
* Copyright 2012 Tilera Corporation - All Rights Reserved
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
|
||||
* Boston MA 02111-1307, USA; either version 2 of the License, or
|
||||
* (at your option) any later version; incorporated herein by reference.
|
||||
*
|
||||
* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* tilegx$#.c
|
||||
*
|
||||
* $#-way unrolled TILE-Gx SIMD for RAID-6 math.
|
||||
*
|
||||
* This file is postprocessed using unroll.awk.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/raid/pq.h>
|
||||
|
||||
/* Create 8 byte copies of constant byte */
|
||||
# define NBYTES(x) (__insn_v1addi(0, x))
|
||||
# define NSIZE 8
|
||||
|
||||
/*
|
||||
* The SHLBYTE() operation shifts each byte left by 1, *not*
|
||||
* rolling over into the next byte
|
||||
*/
|
||||
static inline __attribute_const__ u64 SHLBYTE(u64 v)
|
||||
{
|
||||
/* Vector One Byte Shift Left Immediate. */
|
||||
return __insn_v1shli(v, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* The MASK() operation returns 0xFF in any byte for which the high
|
||||
* bit is 1, 0x00 for any byte for which the high bit is 0.
|
||||
*/
|
||||
static inline __attribute_const__ u64 MASK(u64 v)
|
||||
{
|
||||
/* Vector One Byte Shift Right Signed Immediate. */
|
||||
return __insn_v1shrsi(v, 7);
|
||||
}
|
||||
|
||||
|
||||
void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
|
||||
{
|
||||
u8 **dptr = (u8 **)ptrs;
|
||||
u64 *p, *q;
|
||||
int d, z, z0;
|
||||
|
||||
u64 wd$$, wq$$, wp$$, w1$$, w2$$;
|
||||
u64 x1d = NBYTES(0x1d);
|
||||
u64 * z0ptr;
|
||||
|
||||
z0 = disks - 3; /* Highest data disk */
|
||||
p = (u64 *)dptr[z0+1]; /* XOR parity */
|
||||
q = (u64 *)dptr[z0+2]; /* RS syndrome */
|
||||
|
||||
z0ptr = (u64 *)&dptr[z0][0];
|
||||
for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
|
||||
wq$$ = wp$$ = *z0ptr++;
|
||||
for ( z = z0-1 ; z >= 0 ; z-- ) {
|
||||
wd$$ = *(u64 *)&dptr[z][d+$$*NSIZE];
|
||||
wp$$ = wp$$ ^ wd$$;
|
||||
w2$$ = MASK(wq$$);
|
||||
w1$$ = SHLBYTE(wq$$);
|
||||
w2$$ = w2$$ & x1d;
|
||||
w1$$ = w1$$ ^ w2$$;
|
||||
wq$$ = w1$$ ^ wd$$;
|
||||
}
|
||||
*p++ = wp$$;
|
||||
*q++ = wq$$;
|
||||
}
|
||||
}
|
||||
|
||||
const struct raid6_calls raid6_tilegx$# = {
|
||||
raid6_tilegx$#_gen_syndrome,
|
||||
NULL,
|
||||
"tilegx$#",
|
||||
0
|
||||
};
|
Loading…
Reference in New Issue
Block a user