From 0a593fbbc245a85940ed34caa3aa1e4cb060c54b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 17 Apr 2021 09:29:49 -0600 Subject: [PATCH 001/117] null_blk: poll queue support There's currently no way to experiment with polled IO with null_blk, which seems like an oversight. This patch adds support for polled IO. We keep a list of issued IOs on submit, and then process that list when mq_ops->poll() is invoked. A new parameter is added, poll_queues. It defaults to 1 like the submit queues, meaning we'll have 1 poll queue available. Fixes-by: Bart Van Assche Fixes-by: Pavel Begunkov Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/baca710d-0f2a-16e2-60bd-b105b854e0ae@kernel.dk Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 108 ++++++++++++++++++++++++++++-- drivers/block/null_blk/null_blk.h | 4 ++ 2 files changed, 108 insertions(+), 4 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index e5cbcf582233..f4af95c2f9a9 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -92,6 +92,10 @@ static int g_submit_queues = 1; module_param_named(submit_queues, g_submit_queues, int, 0444); MODULE_PARM_DESC(submit_queues, "Number of submission queues"); +static int g_poll_queues = 1; +module_param_named(poll_queues, g_poll_queues, int, 0444); +MODULE_PARM_DESC(poll_queues, "Number of IOPOLL submission queues"); + static int g_home_node = NUMA_NO_NODE; module_param_named(home_node, g_home_node, int, 0444); MODULE_PARM_DESC(home_node, "Home node for the device"); @@ -347,6 +351,7 @@ static int nullb_apply_submit_queues(struct nullb_device *dev, NULLB_DEVICE_ATTR(size, ulong, NULL); NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL); NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues); +NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_submit_queues); NULLB_DEVICE_ATTR(home_node, uint, NULL); NULLB_DEVICE_ATTR(queue_mode, uint, NULL); NULLB_DEVICE_ATTR(blocksize, uint, NULL); @@ -466,6 +471,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_size, &nullb_device_attr_completion_nsec, &nullb_device_attr_submit_queues, + &nullb_device_attr_poll_queues, &nullb_device_attr_home_node, &nullb_device_attr_queue_mode, &nullb_device_attr_blocksize, @@ -593,6 +599,7 @@ static struct nullb_device *null_alloc_dev(void) dev->size = g_gb * 1024; dev->completion_nsec = g_completion_nsec; dev->submit_queues = g_submit_queues; + dev->poll_queues = g_poll_queues; dev->home_node = g_home_node; dev->queue_mode = g_queue_mode; dev->blocksize = g_bs; @@ -1454,12 +1461,80 @@ static bool should_requeue_request(struct request *rq) return false; } +static int null_map_queues(struct blk_mq_tag_set *set) +{ + struct nullb *nullb = set->driver_data; + int i, qoff; + + for (i = 0, qoff = 0; i < set->nr_maps; i++) { + struct blk_mq_queue_map *map = &set->map[i]; + + switch (i) { + case HCTX_TYPE_DEFAULT: + if (nullb) + map->nr_queues = nullb->dev->submit_queues; + else + map->nr_queues = g_submit_queues; + break; + case HCTX_TYPE_READ: + map->nr_queues = 0; + continue; + case HCTX_TYPE_POLL: + if (nullb) + map->nr_queues = nullb->dev->poll_queues; + else + map->nr_queues = g_poll_queues; + break; + } + map->queue_offset = qoff; + qoff += map->nr_queues; + blk_mq_map_queues(map); + } + + return 0; +} + +static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) +{ + struct nullb_queue *nq = hctx->driver_data; + LIST_HEAD(list); + int nr = 0; + + spin_lock(&nq->poll_lock); + list_splice_init(&nq->poll_list, &list); + spin_unlock(&nq->poll_lock); + + while (!list_empty(&list)) { + struct nullb_cmd *cmd; + struct request *req; + + req = list_first_entry(&list, struct request, queuelist); + list_del_init(&req->queuelist); + cmd = blk_mq_rq_to_pdu(req); + cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req), + blk_rq_sectors(req)); + end_cmd(cmd); + nr++; + } + + return nr; +} + static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) { + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); pr_info("rq %p timed out\n", rq); + if (hctx->type == HCTX_TYPE_POLL) { + struct nullb_queue *nq = hctx->driver_data; + + spin_lock(&nq->poll_lock); + list_del_init(&rq->queuelist); + spin_unlock(&nq->poll_lock); + } + /* * If the device is marked as blocking (i.e. memory backed or zoned * device), the submission path may be blocked waiting for resources @@ -1480,10 +1555,11 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, struct nullb_queue *nq = hctx->driver_data; sector_t nr_sectors = blk_rq_sectors(bd->rq); sector_t sector = blk_rq_pos(bd->rq); + const bool is_poll = hctx->type == HCTX_TYPE_POLL; might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); - if (nq->dev->irqmode == NULL_IRQ_TIMER) { + if (!is_poll && nq->dev->irqmode == NULL_IRQ_TIMER) { hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cmd->timer.function = null_cmd_timer_expired; } @@ -1507,6 +1583,13 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } } + + if (is_poll) { + spin_lock(&nq->poll_lock); + list_add_tail(&bd->rq->queuelist, &nq->poll_list); + spin_unlock(&nq->poll_lock); + return BLK_STS_OK; + } if (cmd->fake_timeout) return BLK_STS_OK; @@ -1542,6 +1625,8 @@ static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) init_waitqueue_head(&nq->wait); nq->queue_depth = nullb->queue_depth; nq->dev = nullb->dev; + INIT_LIST_HEAD(&nq->poll_list); + spin_lock_init(&nq->poll_lock); } static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, @@ -1567,6 +1652,8 @@ static const struct blk_mq_ops null_mq_ops = { .queue_rq = null_queue_rq, .complete = null_complete_rq, .timeout = null_timeout_rq, + .poll = null_poll, + .map_queues = null_map_queues, .init_hctx = null_init_hctx, .exit_hctx = null_exit_hctx, }; @@ -1663,13 +1750,17 @@ static int setup_commands(struct nullb_queue *nq) static int setup_queues(struct nullb *nullb) { - nullb->queues = kcalloc(nr_cpu_ids, sizeof(struct nullb_queue), + int nqueues = nr_cpu_ids; + + if (g_poll_queues) + nqueues += g_poll_queues; + + nullb->queues = kcalloc(nqueues, sizeof(struct nullb_queue), GFP_KERNEL); if (!nullb->queues) return -ENOMEM; nullb->queue_depth = nullb->dev->hw_queue_depth; - return 0; } @@ -1721,9 +1812,14 @@ static int null_gendisk_register(struct nullb *nullb) static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) { + int poll_queues; + set->ops = &null_mq_ops; set->nr_hw_queues = nullb ? nullb->dev->submit_queues : g_submit_queues; + poll_queues = nullb ? nullb->dev->poll_queues : g_poll_queues; + if (poll_queues) + set->nr_hw_queues += poll_queues; set->queue_depth = nullb ? nullb->dev->hw_queue_depth : g_hw_queue_depth; set->numa_node = nullb ? nullb->dev->home_node : g_home_node; @@ -1733,7 +1829,11 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) set->flags |= BLK_MQ_F_NO_SCHED; if (g_shared_tag_bitmap) set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; - set->driver_data = NULL; + set->driver_data = nullb; + if (g_poll_queues) + set->nr_maps = 3; + else + set->nr_maps = 1; if ((nullb && nullb->dev->blocking) || g_blocking) set->flags |= BLK_MQ_F_BLOCKING; diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 64bef125d1df..90c3eefb3ca3 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -32,6 +32,9 @@ struct nullb_queue { struct nullb_device *dev; unsigned int requeue_selection; + struct list_head poll_list; + spinlock_t poll_lock; + struct nullb_cmd *cmds; }; @@ -83,6 +86,7 @@ struct nullb_device { unsigned int zone_max_open; /* max number of open zones */ unsigned int zone_max_active; /* max number of active zones */ unsigned int submit_queues; /* number of submission queues */ + unsigned int poll_queues; /* number of IOPOLL submission queues */ unsigned int home_node; /* home node for the device */ unsigned int queue_mode; /* block interface */ unsigned int blocksize; /* block size */ From 905705f083a936e49d5259e0bb539c53d5e0a9be Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 14:59:57 -0700 Subject: [PATCH 002/117] loop: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/loop.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 7bf4686af774..00ee365ed5e0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2394,13 +2394,19 @@ static int loop_add(int i) disk->event_flags = DISK_EVENT_FLAG_UEVENT; sprintf(disk->disk_name, "loop%d", i); /* Make this loop device reachable from pathname. */ - add_disk(disk); + err = add_disk(disk); + if (err) + goto out_cleanup_disk; + /* Show this loop device. */ mutex_lock(&loop_ctl_mutex); lo->idr_visible = true; mutex_unlock(&loop_ctl_mutex); + return i; +out_cleanup_disk: + blk_cleanup_disk(disk); out_cleanup_tags: blk_mq_free_tag_set(&lo->tag_set); out_free_idr: From e1654f413fe08ffbc3292d8d2b8958b2cc5cb5e8 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 14:59:58 -0700 Subject: [PATCH 003/117] nbd: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1183f7872b71..f4101d3efe7d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1762,7 +1762,9 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) disk->fops = &nbd_fops; disk->private_data = nbd; sprintf(disk->disk_name, "nbd%d", index); - add_disk(disk); + err = add_disk(disk); + if (err) + goto out_err_disk; /* * Now publish the device. @@ -1771,6 +1773,8 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) nbd_total_devices++; return nbd; +out_err_disk: + blk_cleanup_disk(disk); out_free_idr: mutex_lock(&nbd_index_mutex); idr_remove(&nbd_index_idr, index); From d9c2bd252a4578419e0b863bfe3dd97b858ccd8e Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:00:57 -0700 Subject: [PATCH 004/117] aoe: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 06b360f7123a..e436b0e8eff5 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -417,7 +417,9 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); - device_add_disk(NULL, gd, aoe_attr_groups); + err = device_add_disk(NULL, gd, aoe_attr_groups); + if (err) + goto out_disk_cleanup; aoedisk_add_debugfs(d); spin_lock_irqsave(&d->lock, flags); @@ -426,6 +428,8 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); return; +out_disk_cleanup: + blk_cleanup_disk(gd); err_tagset: blk_mq_free_tag_set(set); err_mempool: From e92ab4eda516a5bfd96c087282ebe9521deba4f4 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:00:59 -0700 Subject: [PATCH 005/117] drbd: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 55234a558e98..19db80a1e409 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2794,7 +2794,9 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig goto out_idr_remove_vol; } - add_disk(disk); + err = add_disk(disk); + if (err) + goto out_cleanup_disk; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2808,6 +2810,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; +out_cleanup_disk: + blk_cleanup_disk(disk); out_idr_remove_vol: idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: From d1df6021b70ce7823df89941c0c97e746fa2ad92 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:02 -0700 Subject: [PATCH 006/117] n64cart: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/n64cart.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index b168ca25b6c9..78282f01f581 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -115,6 +115,7 @@ static const struct block_device_operations n64cart_fops = { static int __init n64cart_probe(struct platform_device *pdev) { struct gendisk *disk; + int err = -ENOMEM; if (!start || !size) { pr_err("start or size not specified\n"); @@ -132,7 +133,7 @@ static int __init n64cart_probe(struct platform_device *pdev) disk = blk_alloc_disk(NUMA_NO_NODE); if (!disk) - return -ENOMEM; + goto out; disk->first_minor = 0; disk->flags = GENHD_FL_NO_PART_SCAN; @@ -147,11 +148,18 @@ static int __init n64cart_probe(struct platform_device *pdev) blk_queue_physical_block_size(disk->queue, 4096); blk_queue_logical_block_size(disk->queue, 4096); - add_disk(disk); + err = add_disk(disk); + if (err) + goto out_cleanup_disk; pr_info("n64cart: %u kb disk\n", size / 1024); return 0; + +out_cleanup_disk: + blk_cleanup_disk(disk); +out: + return err; } static struct platform_driver n64cart_driver = { From 7d8b72aaddd3ec5f350d3e9988d6735a7b9b18e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Sep 2021 15:01:03 -0700 Subject: [PATCH 007/117] pcd: move the identify buffer into pcd_identify No need to pass it through a bunch of functions. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index f9cdd11f02f5..8903fdaa2046 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -630,10 +630,11 @@ static int pcd_drive_status(struct cdrom_device_info *cdi, int slot_nr) return CDS_DISC_OK; } -static int pcd_identify(struct pcd_unit *cd, char *id) +static int pcd_identify(struct pcd_unit *cd) { - int k, s; char id_cmd[12] = { 0x12, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0 }; + char id[18]; + int k, s; pcd_bufblk = -1; @@ -664,15 +665,15 @@ static int pcd_identify(struct pcd_unit *cd, char *id) * returns 0, with id set if drive is detected * -1, if drive detection failed */ -static int pcd_probe(struct pcd_unit *cd, int ms, char *id) +static int pcd_probe(struct pcd_unit *cd, int ms) { if (ms == -1) { for (cd->drive = 0; cd->drive <= 1; cd->drive++) - if (!pcd_reset(cd) && !pcd_identify(cd, id)) + if (!pcd_reset(cd) && !pcd_identify(cd)) return 0; } else { cd->drive = ms; - if (!pcd_reset(cd) && !pcd_identify(cd, id)) + if (!pcd_reset(cd) && !pcd_identify(cd)) return 0; } return -1; @@ -709,7 +710,6 @@ static void pcd_probe_capabilities(void) static int pcd_detect(void) { - char id[18]; int k, unit; struct pcd_unit *cd; @@ -727,7 +727,7 @@ static int pcd_detect(void) cd = pcd; if (cd->disk && pi_init(cd->pi, 1, -1, -1, -1, -1, -1, pcd_buffer, PI_PCD, verbose, cd->name)) { - if (!pcd_probe(cd, -1, id)) { + if (!pcd_probe(cd, -1)) { cd->present = 1; k++; } else @@ -744,7 +744,7 @@ static int pcd_detect(void) conf[D_UNI], conf[D_PRO], conf[D_DLY], pcd_buffer, PI_PCD, verbose, cd->name)) continue; - if (!pcd_probe(cd, conf[D_SLV], id)) { + if (!pcd_probe(cd, conf[D_SLV])) { cd->present = 1; k++; } else From af761f277b7fd896c27cb1100b25f11567987822 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Sep 2021 15:01:04 -0700 Subject: [PATCH 008/117] pcd: cleanup initialization Refactor the pcd initialization to have a dedicated helper to initialize a single disk. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 290 +++++++++++++++++-------------------- 1 file changed, 129 insertions(+), 161 deletions(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 8903fdaa2046..93ed63626232 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -183,8 +183,6 @@ static int pcd_audio_ioctl(struct cdrom_device_info *cdi, static int pcd_packet(struct cdrom_device_info *cdi, struct packet_command *cgc); -static int pcd_detect(void); -static void pcd_probe_capabilities(void); static void do_pcd_read_drq(void); static blk_status_t pcd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd); @@ -302,53 +300,6 @@ static const struct blk_mq_ops pcd_mq_ops = { .queue_rq = pcd_queue_rq, }; -static void pcd_init_units(void) -{ - struct pcd_unit *cd; - int unit; - - pcd_drive_count = 0; - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { - struct gendisk *disk; - - if (blk_mq_alloc_sq_tag_set(&cd->tag_set, &pcd_mq_ops, 1, - BLK_MQ_F_SHOULD_MERGE)) - continue; - - disk = blk_mq_alloc_disk(&cd->tag_set, cd); - if (IS_ERR(disk)) { - blk_mq_free_tag_set(&cd->tag_set); - continue; - } - - INIT_LIST_HEAD(&cd->rq_list); - blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); - cd->disk = disk; - cd->pi = &cd->pia; - cd->present = 0; - cd->last_sense = 0; - cd->changed = 1; - cd->drive = (*drives[unit])[D_SLV]; - if ((*drives[unit])[D_PRT]) - pcd_drive_count++; - - cd->name = &cd->info.name[0]; - snprintf(cd->name, sizeof(cd->info.name), "%s%d", name, unit); - cd->info.ops = &pcd_dops; - cd->info.handle = cd; - cd->info.speed = 0; - cd->info.capacity = 1; - cd->info.mask = 0; - disk->major = major; - disk->first_minor = unit; - disk->minors = 1; - strcpy(disk->disk_name, cd->name); /* umm... */ - disk->fops = &pcd_bdops; - disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - disk->events = DISK_EVENT_MEDIA_CHANGE; - } -} - static int pcd_open(struct cdrom_device_info *cdi, int purpose) { struct pcd_unit *cd = cdi->handle; @@ -679,90 +630,31 @@ static int pcd_probe(struct pcd_unit *cd, int ms) return -1; } -static void pcd_probe_capabilities(void) +static int pcd_probe_capabilities(struct pcd_unit *cd) { - int unit, r; - char buffer[32]; char cmd[12] = { 0x5a, 1 << 3, 0x2a, 0, 0, 0, 0, 18, 0, 0, 0, 0 }; - struct pcd_unit *cd; + char buffer[32]; + int ret; - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { - if (!cd->present) - continue; - r = pcd_atapi(cd, cmd, 18, buffer, "mode sense capabilities"); - if (r) - continue; - /* we should now have the cap page */ - if ((buffer[11] & 1) == 0) - cd->info.mask |= CDC_CD_R; - if ((buffer[11] & 2) == 0) - cd->info.mask |= CDC_CD_RW; - if ((buffer[12] & 1) == 0) - cd->info.mask |= CDC_PLAY_AUDIO; - if ((buffer[14] & 1) == 0) - cd->info.mask |= CDC_LOCK; - if ((buffer[14] & 8) == 0) - cd->info.mask |= CDC_OPEN_TRAY; - if ((buffer[14] >> 6) == 0) - cd->info.mask |= CDC_CLOSE_TRAY; - } -} + ret = pcd_atapi(cd, cmd, 18, buffer, "mode sense capabilities"); + if (ret) + return ret; -static int pcd_detect(void) -{ - int k, unit; - struct pcd_unit *cd; + /* we should now have the cap page */ + if ((buffer[11] & 1) == 0) + cd->info.mask |= CDC_CD_R; + if ((buffer[11] & 2) == 0) + cd->info.mask |= CDC_CD_RW; + if ((buffer[12] & 1) == 0) + cd->info.mask |= CDC_PLAY_AUDIO; + if ((buffer[14] & 1) == 0) + cd->info.mask |= CDC_LOCK; + if ((buffer[14] & 8) == 0) + cd->info.mask |= CDC_OPEN_TRAY; + if ((buffer[14] >> 6) == 0) + cd->info.mask |= CDC_CLOSE_TRAY; - printk("%s: %s version %s, major %d, nice %d\n", - name, name, PCD_VERSION, major, nice); - - par_drv = pi_register_driver(name); - if (!par_drv) { - pr_err("failed to register %s driver\n", name); - return -1; - } - - k = 0; - if (pcd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */ - cd = pcd; - if (cd->disk && pi_init(cd->pi, 1, -1, -1, -1, -1, -1, - pcd_buffer, PI_PCD, verbose, cd->name)) { - if (!pcd_probe(cd, -1)) { - cd->present = 1; - k++; - } else - pi_release(cd->pi); - } - } else { - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { - int *conf = *drives[unit]; - if (!conf[D_PRT]) - continue; - if (!cd->disk) - continue; - if (!pi_init(cd->pi, 0, conf[D_PRT], conf[D_MOD], - conf[D_UNI], conf[D_PRO], conf[D_DLY], - pcd_buffer, PI_PCD, verbose, cd->name)) - continue; - if (!pcd_probe(cd, conf[D_SLV])) { - cd->present = 1; - k++; - } else - pi_release(cd->pi); - } - } - if (k) - return 0; - - printk("%s: No CD-ROM drive found\n", name); - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { - if (!cd->disk) - continue; - blk_cleanup_disk(cd->disk); - blk_mq_free_tag_set(&cd->tag_set); - } - pi_unregister_driver(par_drv); - return -1; + return 0; } /* I/O request processing */ @@ -999,43 +891,121 @@ static int pcd_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn) return 0; } +static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port, + int mode, int unit, int protocol, int delay, int ms) +{ + struct gendisk *disk; + int ret; + + ret = blk_mq_alloc_sq_tag_set(&cd->tag_set, &pcd_mq_ops, 1, + BLK_MQ_F_SHOULD_MERGE); + if (ret) + return ret; + + disk = blk_mq_alloc_disk(&cd->tag_set, cd); + if (IS_ERR(disk)) { + ret = PTR_ERR(disk); + goto out_free_tag_set; + } + + INIT_LIST_HEAD(&cd->rq_list); + blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); + cd->disk = disk; + cd->pi = &cd->pia; + cd->present = 0; + cd->last_sense = 0; + cd->changed = 1; + cd->drive = (*drives[cd - pcd])[D_SLV]; + + cd->name = &cd->info.name[0]; + snprintf(cd->name, sizeof(cd->info.name), "%s%d", name, unit); + cd->info.ops = &pcd_dops; + cd->info.handle = cd; + cd->info.speed = 0; + cd->info.capacity = 1; + cd->info.mask = 0; + disk->major = major; + disk->first_minor = unit; + disk->minors = 1; + strcpy(disk->disk_name, cd->name); /* umm... */ + disk->fops = &pcd_bdops; + disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; + disk->events = DISK_EVENT_MEDIA_CHANGE; + + if (!pi_init(cd->pi, autoprobe, port, mode, unit, protocol, delay, + pcd_buffer, PI_PCD, verbose, cd->name)) + goto out_free_disk; + if (pcd_probe(cd, ms)) + goto out_pi_release; + + cd->present = 1; + pcd_probe_capabilities(cd); + register_cdrom(cd->disk, &cd->info); + add_disk(cd->disk); + return 0; + +out_pi_release: + pi_release(cd->pi); +out_free_disk: + blk_cleanup_disk(cd->disk); +out_free_tag_set: + blk_mq_free_tag_set(&cd->tag_set); + return ret; +} + static int __init pcd_init(void) { - struct pcd_unit *cd; - int unit; + int found = 0, unit; if (disable) return -EINVAL; - pcd_init_units(); - - if (pcd_detect()) - return -ENODEV; - - /* get the atapi capabilities page */ - pcd_probe_capabilities(); - - if (register_blkdev(major, name)) { - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { - if (!cd->disk) - continue; - - blk_cleanup_queue(cd->disk->queue); - blk_mq_free_tag_set(&cd->tag_set); - put_disk(cd->disk); - } + if (register_blkdev(major, name)) return -EBUSY; + + pr_info("%s: %s version %s, major %d, nice %d\n", + name, name, PCD_VERSION, major, nice); + + par_drv = pi_register_driver(name); + if (!par_drv) { + pr_err("failed to register %s driver\n", name); + goto out_unregister_blkdev; } - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { - if (cd->present) { - register_cdrom(cd->disk, &cd->info); - cd->disk->private_data = cd; - add_disk(cd->disk); + for (unit = 0; unit < PCD_UNITS; unit++) { + if ((*drives[unit])[D_PRT]) + pcd_drive_count++; + } + + if (pcd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */ + if (!pcd_init_unit(pcd, 1, -1, -1, -1, -1, -1, -1)) + found++; + } else { + for (unit = 0; unit < PCD_UNITS; unit++) { + struct pcd_unit *cd = &pcd[unit]; + int *conf = *drives[unit]; + + if (!conf[D_PRT]) + continue; + if (!pcd_init_unit(cd, 0, conf[D_PRT], conf[D_MOD], + conf[D_UNI], conf[D_PRO], conf[D_DLY], + conf[D_SLV])) + found++; } } + if (!found) { + pr_info("%s: No CD-ROM drive found\n", name); + goto out_unregister_pi_driver; + } + return 0; + +out_unregister_pi_driver: + pi_unregister_driver(par_drv); +out_unregister_blkdev: + unregister_blkdev(major, name); + return -ENODEV; } static void __exit pcd_exit(void) @@ -1044,20 +1014,18 @@ static void __exit pcd_exit(void) int unit; for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { - if (!cd->disk) + if (!cd->present) continue; - if (cd->present) { - del_gendisk(cd->disk); - pi_release(cd->pi); - unregister_cdrom(&cd->info); - } - blk_cleanup_queue(cd->disk->queue); + del_gendisk(cd->disk); + pi_release(cd->pi); + unregister_cdrom(&cd->info); + blk_cleanup_disk(cd->disk); + blk_mq_free_tag_set(&cd->tag_set); - put_disk(cd->disk); } - unregister_blkdev(major, name); pi_unregister_driver(par_drv); + unregister_blkdev(major, name); } MODULE_LICENSE("GPL"); From fb367e6baeb0789d3f272a9aa21f5116e8ebd9dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Sep 2021 15:01:05 -0700 Subject: [PATCH 009/117] pf: cleanup initialization Refactor the pf initialization to have a dedicated helper to initialize a single disk. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/paride/pf.c | 227 +++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 126 deletions(-) diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index d5b9c88ba76f..f471d48a87bc 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -214,7 +214,6 @@ static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo); static void pf_release(struct gendisk *disk, fmode_t mode); -static int pf_detect(void); static void do_pf_read(void); static void do_pf_read_start(void); static void do_pf_write(void); @@ -285,45 +284,6 @@ static const struct blk_mq_ops pf_mq_ops = { .queue_rq = pf_queue_rq, }; -static void __init pf_init_units(void) -{ - struct pf_unit *pf; - int unit; - - pf_drive_count = 0; - for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) { - struct gendisk *disk; - - if (blk_mq_alloc_sq_tag_set(&pf->tag_set, &pf_mq_ops, 1, - BLK_MQ_F_SHOULD_MERGE)) - continue; - - disk = blk_mq_alloc_disk(&pf->tag_set, pf); - if (IS_ERR(disk)) { - blk_mq_free_tag_set(&pf->tag_set); - continue; - } - - INIT_LIST_HEAD(&pf->rq_list); - blk_queue_max_segments(disk->queue, cluster); - blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); - pf->disk = disk; - pf->pi = &pf->pia; - pf->media_status = PF_NM; - pf->drive = (*drives[unit])[D_SLV]; - pf->lun = (*drives[unit])[D_LUN]; - snprintf(pf->name, PF_NAMELEN, "%s%d", name, unit); - disk->major = major; - disk->first_minor = unit; - disk->minors = 1; - strcpy(disk->disk_name, pf->name); - disk->fops = &pf_fops; - disk->events = DISK_EVENT_MEDIA_CHANGE; - if (!(*drives[unit])[D_PRT]) - pf_drive_count++; - } -} - static int pf_open(struct block_device *bdev, fmode_t mode) { struct pf_unit *pf = bdev->bd_disk->private_data; @@ -718,59 +678,6 @@ static int pf_probe(struct pf_unit *pf) return -1; } -static int pf_detect(void) -{ - struct pf_unit *pf = units; - int k, unit; - - printk("%s: %s version %s, major %d, cluster %d, nice %d\n", - name, name, PF_VERSION, major, cluster, nice); - - par_drv = pi_register_driver(name); - if (!par_drv) { - pr_err("failed to register %s driver\n", name); - return -1; - } - k = 0; - if (pf_drive_count == 0) { - if (pi_init(pf->pi, 1, -1, -1, -1, -1, -1, pf_scratch, PI_PF, - verbose, pf->name)) { - if (!pf_probe(pf) && pf->disk) { - pf->present = 1; - k++; - } else - pi_release(pf->pi); - } - - } else - for (unit = 0; unit < PF_UNITS; unit++, pf++) { - int *conf = *drives[unit]; - if (!conf[D_PRT]) - continue; - if (pi_init(pf->pi, 0, conf[D_PRT], conf[D_MOD], - conf[D_UNI], conf[D_PRO], conf[D_DLY], - pf_scratch, PI_PF, verbose, pf->name)) { - if (pf->disk && !pf_probe(pf)) { - pf->present = 1; - k++; - } else - pi_release(pf->pi); - } - } - if (k) - return 0; - - printk("%s: No ATAPI disk detected\n", name); - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { - if (!pf->disk) - continue; - blk_cleanup_disk(pf->disk); - blk_mq_free_tag_set(&pf->tag_set); - } - pi_unregister_driver(par_drv); - return -1; -} - /* The i/o request engine */ static int pf_start(struct pf_unit *pf, int cmd, int b, int c) @@ -1014,61 +921,129 @@ static void do_pf_write_done(void) next_request(0); } +static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port, + int mode, int unit, int protocol, int delay, int ms) +{ + struct gendisk *disk; + int ret; + + ret = blk_mq_alloc_sq_tag_set(&pf->tag_set, &pf_mq_ops, 1, + BLK_MQ_F_SHOULD_MERGE); + if (ret) + return ret; + + disk = blk_mq_alloc_disk(&pf->tag_set, pf); + if (IS_ERR(disk)) { + ret = PTR_ERR(disk); + goto out_free_tag_set; + } + disk->major = major; + disk->first_minor = pf - units; + disk->minors = 1; + strcpy(disk->disk_name, pf->name); + disk->fops = &pf_fops; + disk->events = DISK_EVENT_MEDIA_CHANGE; + disk->private_data = pf; + + blk_queue_max_segments(disk->queue, cluster); + blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); + + INIT_LIST_HEAD(&pf->rq_list); + pf->disk = disk; + pf->pi = &pf->pia; + pf->media_status = PF_NM; + pf->drive = (*drives[disk->first_minor])[D_SLV]; + pf->lun = (*drives[disk->first_minor])[D_LUN]; + snprintf(pf->name, PF_NAMELEN, "%s%d", name, disk->first_minor); + + if (!pi_init(pf->pi, autoprobe, port, mode, unit, protocol, delay, + pf_scratch, PI_PF, verbose, pf->name)) + goto out_free_disk; + if (pf_probe(pf)) + goto out_pi_release; + + add_disk(disk); + pf->present = 1; + return 0; + +out_pi_release: + pi_release(pf->pi); +out_free_disk: + blk_cleanup_disk(pf->disk); +out_free_tag_set: + blk_mq_free_tag_set(&pf->tag_set); + return ret; +} + static int __init pf_init(void) { /* preliminary initialisation */ struct pf_unit *pf; - int unit; + int found = 0, unit; if (disable) return -EINVAL; - pf_init_units(); - - if (pf_detect()) - return -ENODEV; - pf_busy = 0; - - if (register_blkdev(major, name)) { - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { - if (!pf->disk) - continue; - blk_cleanup_queue(pf->disk->queue); - blk_mq_free_tag_set(&pf->tag_set); - put_disk(pf->disk); - } + if (register_blkdev(major, name)) return -EBUSY; + + printk("%s: %s version %s, major %d, cluster %d, nice %d\n", + name, name, PF_VERSION, major, cluster, nice); + + par_drv = pi_register_driver(name); + if (!par_drv) { + pr_err("failed to register %s driver\n", name); + goto out_unregister_blkdev; } - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { - struct gendisk *disk = pf->disk; - - if (!pf->present) - continue; - disk->private_data = pf; - add_disk(disk); + for (unit = 0; unit < PF_UNITS; unit++) { + if (!(*drives[unit])[D_PRT]) + pf_drive_count++; } + + pf = units; + if (pf_drive_count == 0) { + if (pf_init_unit(pf, 1, -1, -1, -1, -1, -1, verbose)) + found++; + } else { + for (unit = 0; unit < PF_UNITS; unit++, pf++) { + int *conf = *drives[unit]; + if (!conf[D_PRT]) + continue; + if (pf_init_unit(pf, 0, conf[D_PRT], conf[D_MOD], + conf[D_UNI], conf[D_PRO], conf[D_DLY], + verbose)) + found++; + } + } + if (!found) { + printk("%s: No ATAPI disk detected\n", name); + goto out_unregister_pi_driver; + } + pf_busy = 0; return 0; + +out_unregister_pi_driver: + pi_unregister_driver(par_drv); +out_unregister_blkdev: + unregister_blkdev(major, name); + return -ENODEV; } static void __exit pf_exit(void) { struct pf_unit *pf; int unit; - unregister_blkdev(major, name); + for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { - if (!pf->disk) + if (!pf->present) continue; - - if (pf->present) - del_gendisk(pf->disk); - - blk_cleanup_queue(pf->disk->queue); + del_gendisk(pf->disk); + blk_cleanup_disk(pf->disk); blk_mq_free_tag_set(&pf->tag_set); - put_disk(pf->disk); - - if (pf->present) - pi_release(pf->pi); + pi_release(pf->pi); } + + unregister_blkdev(major, name); } MODULE_LICENSE("GPL"); From 1ad392add59c2a7cc3148b2e3041696370b05e5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 27 Sep 2021 15:01:06 -0700 Subject: [PATCH 010/117] pd: cleanup initialization Refactor the pf initialization to have a dedicated helper to initialize a single disk. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/paride/pd.c | 140 +++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 675327df6aff..500b89a4bdaf 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -875,9 +875,27 @@ static const struct blk_mq_ops pd_mq_ops = { .queue_rq = pd_queue_rq, }; -static void pd_probe_drive(struct pd_unit *disk) +static int pd_probe_drive(struct pd_unit *disk, int autoprobe, int port, + int mode, int unit, int protocol, int delay) { + int index = disk - pd; + int *parm = *drives[index]; struct gendisk *p; + int ret; + + disk->pi = &disk->pia; + disk->access = 0; + disk->changed = 1; + disk->capacity = 0; + disk->drive = parm[D_SLV]; + snprintf(disk->name, PD_NAMELEN, "%s%c", name, 'a' + index); + disk->alt_geom = parm[D_GEO]; + disk->standby = parm[D_SBY]; + INIT_LIST_HEAD(&disk->rq_list); + + if (!pi_init(disk->pi, autoprobe, port, mode, unit, protocol, delay, + pd_scratch, PI_PD, verbose, disk->name)) + return -ENXIO; memset(&disk->tag_set, 0, sizeof(disk->tag_set)); disk->tag_set.ops = &pd_mq_ops; @@ -887,14 +905,14 @@ static void pd_probe_drive(struct pd_unit *disk) disk->tag_set.queue_depth = 2; disk->tag_set.numa_node = NUMA_NO_NODE; disk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; - - if (blk_mq_alloc_tag_set(&disk->tag_set)) - return; + ret = blk_mq_alloc_tag_set(&disk->tag_set); + if (ret) + goto pi_release; p = blk_mq_alloc_disk(&disk->tag_set, disk); if (IS_ERR(p)) { - blk_mq_free_tag_set(&disk->tag_set); - return; + ret = PTR_ERR(p); + goto free_tag_set; } disk->gd = p; @@ -905,102 +923,84 @@ static void pd_probe_drive(struct pd_unit *disk) p->minors = 1 << PD_BITS; p->events = DISK_EVENT_MEDIA_CHANGE; p->private_data = disk; - blk_queue_max_hw_sectors(p->queue, cluster); blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH); if (disk->drive == -1) { - for (disk->drive = 0; disk->drive <= 1; disk->drive++) - if (pd_special_command(disk, pd_identify) == 0) - return; - } else if (pd_special_command(disk, pd_identify) == 0) - return; - disk->gd = NULL; + for (disk->drive = 0; disk->drive <= 1; disk->drive++) { + ret = pd_special_command(disk, pd_identify); + if (ret == 0) + break; + } + } else { + ret = pd_special_command(disk, pd_identify); + } + if (ret) + goto put_disk; + set_capacity(disk->gd, disk->capacity); + add_disk(disk->gd); + return 0; +put_disk: put_disk(p); + disk->gd = NULL; +free_tag_set: + blk_mq_free_tag_set(&disk->tag_set); +pi_release: + pi_release(disk->pi); + return ret; } -static int pd_detect(void) +static int __init pd_init(void) { int found = 0, unit, pd_drive_count = 0; struct pd_unit *disk; - for (unit = 0; unit < PD_UNITS; unit++) { - int *parm = *drives[unit]; - struct pd_unit *disk = pd + unit; - disk->pi = &disk->pia; - disk->access = 0; - disk->changed = 1; - disk->capacity = 0; - disk->drive = parm[D_SLV]; - snprintf(disk->name, PD_NAMELEN, "%s%c", name, 'a'+unit); - disk->alt_geom = parm[D_GEO]; - disk->standby = parm[D_SBY]; - if (parm[D_PRT]) - pd_drive_count++; - INIT_LIST_HEAD(&disk->rq_list); - } + if (disable) + return -ENODEV; + + if (register_blkdev(major, name)) + return -ENODEV; + + printk("%s: %s version %s, major %d, cluster %d, nice %d\n", + name, name, PD_VERSION, major, cluster, nice); par_drv = pi_register_driver(name); if (!par_drv) { pr_err("failed to register %s driver\n", name); - return -1; + goto out_unregister_blkdev; + } + + for (unit = 0; unit < PD_UNITS; unit++) { + int *parm = *drives[unit]; + + if (parm[D_PRT]) + pd_drive_count++; } if (pd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */ - disk = pd; - if (pi_init(disk->pi, 1, -1, -1, -1, -1, -1, pd_scratch, - PI_PD, verbose, disk->name)) { - pd_probe_drive(disk); - if (!disk->gd) - pi_release(disk->pi); - } - + if (!pd_probe_drive(pd, 1, -1, -1, -1, -1, -1)) + found++; } else { for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) { int *parm = *drives[unit]; if (!parm[D_PRT]) continue; - if (pi_init(disk->pi, 0, parm[D_PRT], parm[D_MOD], - parm[D_UNI], parm[D_PRO], parm[D_DLY], - pd_scratch, PI_PD, verbose, disk->name)) { - pd_probe_drive(disk); - if (!disk->gd) - pi_release(disk->pi); - } - } - } - for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) { - if (disk->gd) { - set_capacity(disk->gd, disk->capacity); - add_disk(disk->gd); - found = 1; + if (!pd_probe_drive(disk, 0, parm[D_PRT], parm[D_MOD], + parm[D_UNI], parm[D_PRO], parm[D_DLY])) + found++; } } if (!found) { printk("%s: no valid drive found\n", name); - pi_unregister_driver(par_drv); + goto out_pi_unregister_driver; } - return found; -} - -static int __init pd_init(void) -{ - if (disable) - goto out1; - - if (register_blkdev(major, name)) - goto out1; - - printk("%s: %s version %s, major %d, cluster %d, nice %d\n", - name, name, PD_VERSION, major, cluster, nice); - if (!pd_detect()) - goto out2; return 0; -out2: +out_pi_unregister_driver: + pi_unregister_driver(par_drv); +out_unregister_blkdev: unregister_blkdev(major, name); -out1: return -ENODEV; } From 4dfbd1390af60765774d9565858d1a6fadacde32 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:07 -0700 Subject: [PATCH 011/117] pcd: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 93ed63626232..a7fab3830d7b 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -941,9 +941,13 @@ static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port, cd->present = 1; pcd_probe_capabilities(cd); register_cdrom(cd->disk, &cd->info); - add_disk(cd->disk); + ret = add_disk(cd->disk); + if (ret) + goto out_unreg_cdrom; return 0; +out_unreg_cdrom: + unregister_cdrom(&cd->info); out_pi_release: pi_release(cd->pi); out_free_disk: From 2b6cabce3954be3341e0fe7b20a27902821fd3dd Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:08 -0700 Subject: [PATCH 012/117] pcd: fix ordering of unregister_cdrom() We first register cdrom and then we add_disk() and so we we should likewise unregister the cdrom first and then del_gendisk(). Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index a7fab3830d7b..82a654fc4db8 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -1021,9 +1021,9 @@ static void __exit pcd_exit(void) if (!cd->present) continue; + unregister_cdrom(&cd->info); del_gendisk(cd->disk); pi_release(cd->pi); - unregister_cdrom(&cd->info); blk_cleanup_disk(cd->disk); blk_mq_free_tag_set(&cd->tag_set); From b6fa069971bc427e19b8f3882a808f24530994ed Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:09 -0700 Subject: [PATCH 013/117] pcd: capture errors on cdrom_register() No errors were being captured wehen cdrom_register() fails, capture the error and return the error. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 82a654fc4db8..4cc0d141db78 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -940,7 +940,9 @@ static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port, cd->present = 1; pcd_probe_capabilities(cd); - register_cdrom(cd->disk, &cd->info); + ret = register_cdrom(cd->disk, &cd->info); + if (ret) + goto out_pi_release; ret = add_disk(cd->disk); if (ret) goto out_unreg_cdrom; From 3dfdd5f333bf16ec5057d508a574a3302ed84cfa Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:10 -0700 Subject: [PATCH 014/117] pd: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/paride/pd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 500b89a4bdaf..e59759bcf416 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -938,8 +938,12 @@ static int pd_probe_drive(struct pd_unit *disk, int autoprobe, int port, if (ret) goto put_disk; set_capacity(disk->gd, disk->capacity); - add_disk(disk->gd); + ret = add_disk(disk->gd); + if (ret) + goto cleanup_disk; return 0; +cleanup_disk: + blk_cleanup_disk(disk->gd); put_disk: put_disk(p); disk->gd = NULL; From 4a32e1cdb745ea9f66358810b0ce85698033f57e Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:48 -0700 Subject: [PATCH 015/117] mtip32xx: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. The read_capacity_error error label already does what we need, so just re-use that. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 901855717cb5..d0b40309f47e 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3633,7 +3633,9 @@ skip_create_disk: set_capacity(dd->disk, capacity); /* Enable the block device and add it to /dev */ - device_add_disk(&dd->pdev->dev, dd->disk, mtip_disk_attr_groups); + rv = device_add_disk(&dd->pdev->dev, dd->disk, mtip_disk_attr_groups); + if (rv) + goto read_capacity_error; if (dd->mtip_svc_handler) { set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); From 7b505627568c088b364705a86234fa1f2beb01b9 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:49 -0700 Subject: [PATCH 016/117] pktcdvd: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. The out_mem2 error label already does what we need so re-use that. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index cb52cce6fb03..e48d4771d4c1 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2728,7 +2728,9 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) /* inherit events of the host device */ disk->events = pd->bdev->bd_disk->events; - add_disk(disk); + ret = add_disk(disk); + if (ret) + goto out_mem2; pkt_sysfs_dev_new(pd); pkt_debugfs_dev_new(pd); From 54494d10031b4bc043af43251bff0d10cca6857a Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:53 -0700 Subject: [PATCH 017/117] block/rsxx: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 4 +++- drivers/block/rsxx/dev.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 83636714b8d7..8d9d69f5dfbc 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -935,7 +935,9 @@ static int rsxx_pci_probe(struct pci_dev *dev, card->size8 = 0; } - rsxx_attach_dev(card); + st = rsxx_attach_dev(card); + if (st) + goto failed_create_dev; /************* Setup Debugfs *************/ rsxx_debugfs_dev_new(card); diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index 268252380e88..dd33f1bdf3b8 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -191,6 +191,8 @@ static bool rsxx_discard_supported(struct rsxx_cardinfo *card) int rsxx_attach_dev(struct rsxx_cardinfo *card) { + int err = 0; + mutex_lock(&card->dev_lock); /* The block device requires the stripe size from the config. */ @@ -199,13 +201,17 @@ int rsxx_attach_dev(struct rsxx_cardinfo *card) set_capacity(card->gendisk, card->size8 >> 9); else set_capacity(card->gendisk, 0); - device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL); - card->bdev_attached = 1; + err = device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL); + if (err == 0) + card->bdev_attached = 1; } mutex_unlock(&card->dev_lock); - return 0; + if (err) + blk_cleanup_disk(card->gendisk); + + return err; } void rsxx_detach_dev(struct rsxx_cardinfo *card) From 637208e74a861d993c3a8eea3f9f1df4415930e0 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:55 -0700 Subject: [PATCH 018/117] block/sx8: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. A completion is used to notify the initial probe what is happening and so we must defer error handling on completion. Do this by remembering the error and using the shared cleanup function. The tags are shared and so are hanlded later for the driver already. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/sx8.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 420cd952ddc4..1c79248c4826 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -297,6 +297,7 @@ struct carm_host { struct work_struct fsm_task; + int probe_err; struct completion probe_comp; }; @@ -1181,8 +1182,11 @@ static void carm_fsm_task (struct work_struct *work) struct gendisk *disk = port->disk; set_capacity(disk, port->capacity); - add_disk(disk); - activated++; + host->probe_err = add_disk(disk); + if (!host->probe_err) + activated++; + else + break; } printk(KERN_INFO DRV_NAME "(%s): %d ports activated\n", @@ -1192,11 +1196,9 @@ static void carm_fsm_task (struct work_struct *work) reschedule = 1; break; } - case HST_PROBE_FINISHED: complete(&host->probe_comp); break; - case HST_ERROR: /* FIXME: TODO */ break; @@ -1507,7 +1509,10 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_free_irq; DPRINTK("waiting for probe_comp\n"); + host->probe_err = -ENODEV; wait_for_completion(&host->probe_comp); + if (host->probe_err) + goto err_out_free_irq; printk(KERN_INFO "%s: pci %s, ports %d, io %llx, irq %u, major %d\n", host->name, pci_name(pdev), (int) CARM_MAX_PORTS, From 4fac63f8a871bc2e38dce4944c9d964a62bac3e6 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:56 -0700 Subject: [PATCH 019/117] pf: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/paride/pf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index f471d48a87bc..380d80e507c7 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -962,7 +962,9 @@ static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port, if (pf_probe(pf)) goto out_pi_release; - add_disk(disk); + ret = add_disk(disk); + if (ret) + goto out_pi_release; pf->present = 1; return 0; From d6ac27c60fec4dc59473e39abf924e430a9ea320 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:27 -0700 Subject: [PATCH 020/117] cdrom/gdrom: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/cdrom/gdrom.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 8e1fe75af93f..d50cc1fd34d5 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -805,9 +805,14 @@ static int probe_gdrom(struct platform_device *devptr) err = -ENOMEM; goto probe_fail_free_irqs; } - add_disk(gd.disk); + err = add_disk(gd.disk); + if (err) + goto probe_fail_add_disk; + return 0; +probe_fail_add_disk: + kfree(gd.toc); probe_fail_free_irqs: free_irq(HW_EVENT_GDROM_DMA, &gd); free_irq(HW_EVENT_GDROM_CMD, &gd); From 27c97abc30e2b9ad2288977c0ecbef4d50553f57 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:28 -0700 Subject: [PATCH 021/117] rbd: add add_disk() error handling We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Signed-off-by: Jens Axboe --- drivers/block/rbd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bf60aebd0cfb..953fa134cd3d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -7054,7 +7054,9 @@ static ssize_t do_rbd_add(struct bus_type *bus, if (rc) goto err_out_image_lock; - device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); + rc = device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); + if (rc) + goto err_out_cleanup_disk; spin_lock(&rbd_dev_list_lock); list_add_tail(&rbd_dev->node, &rbd_dev_list); @@ -7068,6 +7070,8 @@ out: module_put(THIS_MODULE); return rc; +err_out_cleanup_disk: + rbd_free_disk(rbd_dev); err_out_image_lock: rbd_dev_image_unlock(rbd_dev); rbd_dev_device_release(rbd_dev); From 2d4bcf76429713c2ca0093c11b5b75072db95a50 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:49 -0700 Subject: [PATCH 022/117] block/swim3: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-2-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/swim3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 965af0a3e95b..f7e3482e846b 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1229,7 +1229,9 @@ static int swim3_attach(struct macio_dev *mdev, disk->flags |= GENHD_FL_REMOVABLE; sprintf(disk->disk_name, "fd%d", floppy_count); set_capacity(disk, 2880); - add_disk(disk); + rc = add_disk(disk); + if (rc) + goto out_cleanup_disk; disks[floppy_count++] = disk; return 0; From 2598a2bb357d64baaa94368133ddbc900b9eb246 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:50 -0700 Subject: [PATCH 023/117] floppy: fix add_disk() assumption on exit due to new developments After the patch titled "floppy: use blk_mq_alloc_disk and blk_cleanup_disk" the floppy driver was modified to allocate the blk_mq_alloc_disk() which allocates the disk with the queue. This is further clarified later with the patch titled "block: remove alloc_disk and alloc_disk_node". This clarifies that: Most drivers should use and have been converted to use blk_alloc_disk and blk_mq_alloc_disk. Only the scsi ULPs and dasd still allocate a disk separately from the request_queue so don't bother with convenience macros for something that should not see significant new users and remove these wrappers. And then we have the patch titled, "block: hold a request_queue reference for the lifetime of struct gendisk" which ensures that a queue is *always* present for sure during the entire lifetime of a disk. In the floppy driver's case then the disk always comes with the queue. So even if even if the queue was cleaned up on exit, putting the disk *is* still required, and likewise, blk_cleanup_queue() on a null queue should not happen now as disk->queue is valid from disk allocation time on. Automatic backport code scrapers should hopefully not cherry pick this patch as a stable fix candidate without full due dilligence to ensure all the work done on the block layer to make this happen is merged first. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-3-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 6288ce888414..2ee4d3e7ea2d 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4954,19 +4954,6 @@ static void __exit floppy_module_exit(void) blk_cleanup_queue(disks[drive][i]->queue); } blk_mq_free_tag_set(&tag_sets[drive]); - - /* - * These disks have not called add_disk(). Don't put down - * queue reference in put_disk(). - */ - if (!(allowed_drive_mask & (1 << drive)) || - fdc_state[FDC(drive)].version == FDC_NONE) { - for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { - if (disks[drive][i]) - disks[drive][i]->queue = NULL; - } - } - for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { if (disks[drive][i]) put_disk(disks[drive][i]); From 3776339ae7acaf9590c668e86f45005fc9aff014 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:51 -0700 Subject: [PATCH 024/117] floppy: use blk_cleanup_disk() Use the blk_cleanup_queue() followed by put_disk() can be replaced with blk_cleanup_disk(). No need for two separate loops. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-4-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 2ee4d3e7ea2d..74996728fc24 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4951,13 +4951,9 @@ static void __exit floppy_module_exit(void) } for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { if (disks[drive][i]) - blk_cleanup_queue(disks[drive][i]->queue); + blk_cleanup_disk(disks[drive][i]); } blk_mq_free_tag_set(&tag_sets[drive]); - for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { - if (disks[drive][i]) - put_disk(disks[drive][i]); - } } cancel_delayed_work_sync(&fd_timeout); From 662167e59d2f3c15a44a88088fc6c1a67c8a3650 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:52 -0700 Subject: [PATCH 025/117] floppy: fix calling platform_device_unregister() on invalid drives platform_device_unregister() should only be called when a respective platform_device_register() is called. However the floppy driver currently allows failures when registring a drive and a bail out could easily cause an invalid call to platform_device_unregister() where it was not intended. Fix this by adding a bool to keep track of when the platform device was registered for a drive. This does not fix any known panic / bug. This issue was found through code inspection while preparing the driver to use the up and coming support for device_add_disk() error handling. From what I can tell from code inspection, chances of this ever happening should be insanely small, perhaps OOM. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-5-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 74996728fc24..acde3cb63ef4 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4479,6 +4479,7 @@ static const struct blk_mq_ops floppy_mq_ops = { }; static struct platform_device floppy_device[N_DRIVE]; +static bool registered[N_DRIVE]; static bool floppy_available(int drive) { @@ -4694,6 +4695,8 @@ static int __init do_floppy_init(void) if (err) goto out_remove_drives; + registered[drive] = true; + device_add_disk(&floppy_device[drive].dev, disks[drive][0], NULL); } @@ -4704,7 +4707,8 @@ out_remove_drives: while (drive--) { if (floppy_available(drive)) { del_gendisk(disks[drive][0]); - platform_device_unregister(&floppy_device[drive]); + if (registered[drive]) + platform_device_unregister(&floppy_device[drive]); } } out_release_dma: @@ -4947,7 +4951,8 @@ static void __exit floppy_module_exit(void) if (disks[drive][i]) del_gendisk(disks[drive][i]); } - platform_device_unregister(&floppy_device[drive]); + if (registered[drive]) + platform_device_unregister(&floppy_device[drive]); } for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { if (disks[drive][i]) From 47d34aa2d211e9cef61dd85b7b0011c9f61dd0ae Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:53 -0700 Subject: [PATCH 026/117] floppy: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-6-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index acde3cb63ef4..3873e789478e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4697,8 +4697,10 @@ static int __init do_floppy_init(void) registered[drive] = true; - device_add_disk(&floppy_device[drive].dev, disks[drive][0], - NULL); + err = device_add_disk(&floppy_device[drive].dev, + disks[drive][0], NULL); + if (err) + goto out_remove_drives; } return 0; From a2379420c7d7cb14a8b214fc7c0e2f55f66393ac Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:54 -0700 Subject: [PATCH 027/117] amiflop: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. The caller for fd_alloc_disk() deals with the rest of the cleanup like the tag. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-7-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/amiflop.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 2909fd9e72fb..bf5c124c5452 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1780,6 +1780,7 @@ static const struct blk_mq_ops amiflop_mq_ops = { static int fd_alloc_disk(int drive, int system) { struct gendisk *disk; + int err; disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL); if (IS_ERR(disk)) @@ -1798,8 +1799,10 @@ static int fd_alloc_disk(int drive, int system) set_capacity(disk, 880 * 2); unit[drive].gendisk[system] = disk; - add_disk(disk); - return 0; + err = add_disk(disk); + if (err) + blk_cleanup_disk(disk); + return err; } static int fd_alloc_drive(int drive) From b76a30c254d987b4ae7d47415081121d4c0a7423 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:55 -0700 Subject: [PATCH 028/117] swim: simplify using blk_cleanup_disk() on swim_remove() We can simplify swim_remove() by using one call instead of two, just as other drivers do. Use that pattern. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-8-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/swim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 3911d0833e1b..868d59476065 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -912,9 +912,8 @@ static int swim_remove(struct platform_device *dev) for (drive = 0; drive < swd->floppy_count; drive++) { del_gendisk(swd->unit[drive].disk); - blk_cleanup_queue(swd->unit[drive].disk->queue); + blk_cleanup_disk(swd->unit[drive].disk); blk_mq_free_tag_set(&swd->unit[drive].tag_set); - put_disk(swd->unit[drive].disk); } unregister_blkdev(FLOPPY_MAJOR, "fd"); From 4e9abe72530a2baf5f80d60e8d0bcdb84964d2e4 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:56 -0700 Subject: [PATCH 029/117] swim: add helper for disk cleanup Disk cleanup can be shared between exit and bringup. Use a helper to do the work required. The only functional change at this point is we're being overly paraoid on exit to check for a null disk as well now, and this should be safe. We'll later expand on this, this change just makes subsequent changes easier to read. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-9-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/swim.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 868d59476065..4f87d1af7c60 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -772,6 +772,17 @@ static const struct blk_mq_ops swim_mq_ops = { .queue_rq = swim_queue_rq, }; +static void swim_cleanup_floppy_disk(struct floppy_state *fs) +{ + struct gendisk *disk = fs->disk; + + if (!disk) + return; + + blk_cleanup_disk(disk); + blk_mq_free_tag_set(&fs->tag_set); +} + static int swim_floppy_init(struct swim_priv *swd) { int err; @@ -836,12 +847,7 @@ static int swim_floppy_init(struct swim_priv *swd) exit_put_disks: unregister_blkdev(FLOPPY_MAJOR, "fd"); do { - struct gendisk *disk = swd->unit[drive].disk; - - if (!disk) - continue; - blk_cleanup_disk(disk); - blk_mq_free_tag_set(&swd->unit[drive].tag_set); + swim_cleanup_floppy_disk(&swd->unit[drive]); } while (drive--); return err; } @@ -912,8 +918,7 @@ static int swim_remove(struct platform_device *dev) for (drive = 0; drive < swd->floppy_count; drive++) { del_gendisk(swd->unit[drive].disk); - blk_cleanup_disk(swd->unit[drive].disk); - blk_mq_free_tag_set(&swd->unit[drive].tag_set); + swim_cleanup_floppy_disk(&swd->unit[drive]); } unregister_blkdev(FLOPPY_MAJOR, "fd"); From 9ef41effb9b65088053e31c741c2a1ec97190117 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:57 -0700 Subject: [PATCH 030/117] swim: add a floppy registration bool which triggers del_gendisk() Instead of calling del_gendisk() on exit alone, let's add a registration bool to the floppy disk state, this way this can be done on the shared caller, swim_cleanup_floppy_disk(). This will be more useful in subsequent patches. Right now, this just shuffles functionality out to a helper in a safe way. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-10-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/swim.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 4f87d1af7c60..eed453528f4c 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -185,6 +185,7 @@ struct floppy_state { int track; int ref_count; + bool registered; struct gendisk *disk; struct blk_mq_tag_set tag_set; @@ -779,6 +780,9 @@ static void swim_cleanup_floppy_disk(struct floppy_state *fs) if (!disk) return; + if (fs->registered) + del_gendisk(fs->disk); + blk_cleanup_disk(disk); blk_mq_free_tag_set(&fs->tag_set); } @@ -840,6 +844,7 @@ static int swim_floppy_init(struct swim_priv *swd) swd->unit[drive].disk->private_data = &swd->unit[drive]; set_capacity(swd->unit[drive].disk, 2880); add_disk(swd->unit[drive].disk); + swd->unit[drive].registered = true; } return 0; @@ -916,10 +921,8 @@ static int swim_remove(struct platform_device *dev) int drive; struct resource *res; - for (drive = 0; drive < swd->floppy_count; drive++) { - del_gendisk(swd->unit[drive].disk); + for (drive = 0; drive < swd->floppy_count; drive++) swim_cleanup_floppy_disk(&swd->unit[drive]); - } unregister_blkdev(FLOPPY_MAJOR, "fd"); From 625a28a7e60c7c026e4d2929c49c8461fad4b0f3 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:58 -0700 Subject: [PATCH 031/117] swim: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Since we have a caller to do our unwinding for the disk, and this is already dealt with safely we can re-use our existing error path goto label which already deals with the cleanup. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-11-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/swim.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/swim.c b/drivers/block/swim.c index eed453528f4c..821594cd1315 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -843,7 +843,9 @@ static int swim_floppy_init(struct swim_priv *swd) swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE; swd->unit[drive].disk->private_data = &swd->unit[drive]; set_capacity(swd->unit[drive].disk, 2880); - add_disk(swd->unit[drive].disk); + err = add_disk(swd->unit[drive].disk); + if (err) + goto exit_put_disks; swd->unit[drive].registered = true; } From 44a469b6acae6ad05c4acca8429467d1d50a8b8d Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:02:59 -0700 Subject: [PATCH 032/117] block/ataflop: use the blk_cleanup_disk() helper Use the helper to replace two lines with one. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-12-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 58e921ab5729..bc8cf7a991af 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2077,8 +2077,7 @@ static int __init atari_floppy_init (void) err: while (--i >= 0) { - blk_cleanup_queue(unit[i].disk[0]->queue); - put_disk(unit[i].disk[0]); + blk_cleanup_disk(unit[i].disk[0]); blk_mq_free_tag_set(&unit[i].tag_set); } @@ -2136,8 +2135,7 @@ static void __exit atari_floppy_exit(void) if (!unit[i].disk[type]) continue; del_gendisk(unit[i].disk[type]); - blk_cleanup_queue(unit[i].disk[type]->queue); - put_disk(unit[i].disk[type]); + blk_cleanup_disk(unit[i].disk[type]); } blk_mq_free_tag_set(&unit[i].tag_set); } From 573effb298011d3fcabc9b12025cf637f8a07911 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:03:00 -0700 Subject: [PATCH 033/117] block/ataflop: add registration bool before calling del_gendisk() The ataflop assumes del_gendisk() is safe to call, this is only true because add_disk() does not return a failure, but that will change soon. And so, before we get to adding error handling for that case, let's make sure we keep track of which disks actually get registered. Then we use this to only call del_gendisk for them. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-13-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index bc8cf7a991af..fe16105ecdf5 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -299,6 +299,7 @@ static struct atari_floppy_struct { disk change detection) */ int flags; /* flags */ struct gendisk *disk[NUM_DISK_MINORS]; + bool registered[NUM_DISK_MINORS]; int ref; int type; struct blk_mq_tag_set tag_set; @@ -2001,8 +2002,10 @@ static void ataflop_probe(dev_t dev) return; mutex_lock(&ataflop_probe_lock); if (!unit[drive].disk[type]) { - if (ataflop_alloc_disk(drive, type) == 0) + if (ataflop_alloc_disk(drive, type) == 0) { add_disk(unit[drive].disk[type]); + unit[drive].registered[type] = true; + } } mutex_unlock(&ataflop_probe_lock); } @@ -2066,6 +2069,7 @@ static int __init atari_floppy_init (void) unit[i].track = -1; unit[i].flags = 0; add_disk(unit[i].disk[0]); + unit[i].registered[0] = true; } printk(KERN_INFO "Atari floppy driver: max. %cD, %strack buffering\n", @@ -2134,7 +2138,8 @@ static void __exit atari_floppy_exit(void) for (type = 0; type < NUM_DISK_MINORS; type++) { if (!unit[i].disk[type]) continue; - del_gendisk(unit[i].disk[type]); + if (unit[i].registered[type]) + del_gendisk(unit[i].disk[type]); blk_cleanup_disk(unit[i].disk[type]); } blk_mq_free_tag_set(&unit[i].tag_set); From deae1138d04758c7f8939fcb8aee330bc37e3015 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:03:01 -0700 Subject: [PATCH 034/117] block/ataflop: provide a helper for cleanup up an atari disk Instead of using two separate code paths for cleaning up an atari disk, use one. We take the more careful approach to check for *all* disk types, as is done on exit. The init path didn't have that check as the alternative disk types are only probed for later, they are not initialized by default. Yes, there is a shared tag for all disks. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-14-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index fe16105ecdf5..a07cb9b79a6c 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2010,6 +2010,20 @@ static void ataflop_probe(dev_t dev) mutex_unlock(&ataflop_probe_lock); } +static void atari_cleanup_floppy_disk(struct atari_floppy_struct *fs) +{ + int type; + + for (type = 0; type < NUM_DISK_MINORS; type++) { + if (!fs->disk[type]) + continue; + if (fs->registered[type]) + del_gendisk(fs->disk[type]); + blk_cleanup_disk(fs->disk[type]); + } + blk_mq_free_tag_set(&fs->tag_set); +} + static int __init atari_floppy_init (void) { int i; @@ -2080,10 +2094,8 @@ static int __init atari_floppy_init (void) return 0; err: - while (--i >= 0) { - blk_cleanup_disk(unit[i].disk[0]); - blk_mq_free_tag_set(&unit[i].tag_set); - } + while (--i >= 0) + atari_cleanup_floppy_disk(&unit[i]); unregister_blkdev(FLOPPY_MAJOR, "fd"); out_unlock: @@ -2132,18 +2144,10 @@ __setup("floppy=", atari_floppy_setup); static void __exit atari_floppy_exit(void) { - int i, type; + int i; - for (i = 0; i < FD_MAX_UNITS; i++) { - for (type = 0; type < NUM_DISK_MINORS; type++) { - if (!unit[i].disk[type]) - continue; - if (unit[i].registered[type]) - del_gendisk(unit[i].disk[type]); - blk_cleanup_disk(unit[i].disk[type]); - } - blk_mq_free_tag_set(&unit[i].tag_set); - } + for (i = 0; i < FD_MAX_UNITS; i++) + atari_cleanup_floppy_disk(&unit[i]); unregister_blkdev(FLOPPY_MAJOR, "fd"); del_timer_sync(&fd_timer); From 2f1510708970c873c5eee190b77071f59f67cef8 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:03:02 -0700 Subject: [PATCH 035/117] block/ataflop: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20210927220302.1073499-15-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index a07cb9b79a6c..9bc5cce6b29a 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2082,7 +2082,9 @@ static int __init atari_floppy_init (void) for (i = 0; i < FD_MAX_UNITS; i++) { unit[i].track = -1; unit[i].flags = 0; - add_disk(unit[i].disk[0]); + ret = add_disk(unit[i].disk[0]); + if (ret) + goto err_out_dma; unit[i].registered[0] = true; } @@ -2093,6 +2095,8 @@ static int __init atari_floppy_init (void) return 0; +err_out_dma: + atari_stram_free(DMABuffer); err: while (--i >= 0) atari_cleanup_floppy_disk(&unit[i]); From db8eda9c43361023678aa23eb0dceb0a411af0f3 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 27 Sep 2021 15:01:01 -0700 Subject: [PATCH 036/117] xtensa/platforms/iss/simdisk: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Signed-off-by: Luis Chamberlain Acked-by: Max Filippov Link: https://lore.kernel.org/r/20210927220110.1066271-7-mcgrof@kernel.org Signed-off-by: Jens Axboe --- arch/xtensa/platforms/iss/simdisk.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index ddd1fe3db474..07b642c1916a 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -258,6 +258,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, struct proc_dir_entry *procdir) { char tmp[2] = { '0' + which, 0 }; + int err = -ENOMEM; dev->fd = -1; dev->filename = NULL; @@ -266,7 +267,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, dev->gd = blk_alloc_disk(NUMA_NO_NODE); if (!dev->gd) - return -ENOMEM; + goto out; dev->gd->major = simdisk_major; dev->gd->first_minor = which; dev->gd->minors = SIMDISK_MINORS; @@ -274,10 +275,18 @@ static int __init simdisk_setup(struct simdisk *dev, int which, dev->gd->private_data = dev; snprintf(dev->gd->disk_name, 32, "simdisk%d", which); set_capacity(dev->gd, 0); - add_disk(dev->gd); + err = add_disk(dev->gd); + if (err) + goto out_cleanup_disk; dev->procfile = proc_create_data(tmp, 0644, procdir, &simdisk_proc_ops, dev); + return 0; + +out_cleanup_disk: + blk_cleanup_disk(dev->gd); +out: + return err; } static int __init simdisk_init(void) From d0ac7a30e41174c794fbfa53ea986d9555e5b9f4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Oct 2021 15:26:23 +0300 Subject: [PATCH 037/117] pcd: fix error codes in pcd_init_unit() Return -ENODEV on these error paths instead of returning success. Fixes: af761f277b7f ("pcd: cleanup initialization") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211001122623.GA2283@kili Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 4cc0d141db78..f6b1d63e96e1 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -613,8 +613,7 @@ static int pcd_identify(struct pcd_unit *cd) } /* - * returns 0, with id set if drive is detected - * -1, if drive detection failed + * returns 0, with id set if drive is detected, otherwise an error code. */ static int pcd_probe(struct pcd_unit *cd, int ms) { @@ -627,7 +626,7 @@ static int pcd_probe(struct pcd_unit *cd, int ms) if (!pcd_reset(cd) && !pcd_identify(cd)) return 0; } - return -1; + return -ENODEV; } static int pcd_probe_capabilities(struct pcd_unit *cd) @@ -933,9 +932,12 @@ static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port, disk->events = DISK_EVENT_MEDIA_CHANGE; if (!pi_init(cd->pi, autoprobe, port, mode, unit, protocol, delay, - pcd_buffer, PI_PCD, verbose, cd->name)) + pcd_buffer, PI_PCD, verbose, cd->name)) { + ret = -ENODEV; goto out_free_disk; - if (pcd_probe(cd, ms)) + } + ret = pcd_probe(cd, ms); + if (ret) goto out_pi_release; cd->present = 1; From cfc03eabda8224c681087a4c6c51d1cc595ebfaf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Oct 2021 15:26:54 +0300 Subject: [PATCH 038/117] pf: fix error codes in pf_init_unit() Return a negative error code instead of success on these error paths. Fixes: fb367e6baeb0 ("pf: cleanup initialization") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211001122654.GB2283@kili Signed-off-by: Jens Axboe --- drivers/block/paride/pf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 380d80e507c7..bf8d0ef41a0a 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -651,9 +651,9 @@ static int pf_identify(struct pf_unit *pf) return 0; } -/* returns 0, with id set if drive is detected - -1, if drive detection failed -*/ +/* + * returns 0, with id set if drive is detected, otherwise an error code. + */ static int pf_probe(struct pf_unit *pf) { if (pf->drive == -1) { @@ -675,7 +675,7 @@ static int pf_probe(struct pf_unit *pf) if (!pf_identify(pf)) return 0; } - return -1; + return -ENODEV; } /* The i/o request engine */ @@ -957,9 +957,12 @@ static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port, snprintf(pf->name, PF_NAMELEN, "%s%d", name, disk->first_minor); if (!pi_init(pf->pi, autoprobe, port, mode, unit, protocol, delay, - pf_scratch, PI_PF, verbose, pf->name)) + pf_scratch, PI_PF, verbose, pf->name)) { + ret = -ENODEV; goto out_free_disk; - if (pf_probe(pf)) + } + ret = pf_probe(pf); + if (ret) goto out_pi_release; ret = add_disk(disk); From 5deae20c552aec0750cc7b95e84ca94121aac3b3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Oct 2021 15:27:22 +0300 Subject: [PATCH 039/117] sx8: fix an error code in carm_init_one() Return a negative error code here on this error path instead of returning success. Fixes: 637208e74a86 ("block/sx8: add error handling support for add_disk()") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211001122722.GC2283@kili Signed-off-by: Jens Axboe --- drivers/block/sx8.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 1c79248c4826..d1676fe0da1a 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -1511,8 +1511,10 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) DPRINTK("waiting for probe_comp\n"); host->probe_err = -ENODEV; wait_for_completion(&host->probe_comp); - if (host->probe_err) + if (host->probe_err) { + rc = host->probe_err; goto err_out_free_irq; + } printk(KERN_INFO "%s: pci %s, ports %d, io %llx, irq %u, major %d\n", host->name, pci_name(pdev), (int) CARM_MAX_PORTS, From 1f0a258f114b5b152855d31179f902cb10bdfb59 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 1 Oct 2021 19:23:26 -0600 Subject: [PATCH 040/117] swim3: add missing major.h include swim3 got this through blkdev.h previously, but blkdev.h is not including it anymore. Include it specifically for the driver, otherwise FLOPPY_MAJOR is undefined and breaks the compile on PPC if swim3 is configured. Fixes: b81e0c2372e6 ("block: drop unused includes in ") Reported-by: Naresh Kamboju Acked-by: Randy Dunlap # build-tested Signed-off-by: Jens Axboe --- drivers/block/swim3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index f7e3482e846b..4b91c9aa5892 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include From 9be68dd7ac0e13be2ac57770c1f921d6b3294c6e Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 1 Sep 2021 13:38:30 +0200 Subject: [PATCH 041/117] md: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. We just do the unwinding of what was not done before, and are sure to unlock prior to bailing. Signed-off-by: Luis Chamberlain Signed-off-by: Christoph Hellwig Signed-off-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 22310d5d8d41..eff3d23e1fcd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5700,7 +5700,11 @@ static int md_alloc(dev_t dev, char *name) disk->flags |= GENHD_FL_EXT_DEVT; disk->events |= DISK_EVENT_MEDIA_CHANGE; mddev->gendisk = disk; - add_disk(disk); + error = add_disk(disk); + if (error) { + blk_cleanup_disk(disk); + goto abort; + } error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); if (error) { From 51238e7fbd6182e36dbc093c92ae93142c57c0f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Sep 2021 13:38:31 +0200 Subject: [PATCH 042/117] md: add the bitmap group to the default groups for the md kobject Replace the deprecated default_attrs with the default_groups mechanism, and add the always visible bitmap group to the groups created add kobject_add time. Signed-off-by: Christoph Hellwig Signed-off-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index eff3d23e1fcd..e04a23dc46bf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5490,6 +5490,10 @@ static struct attribute *md_default_attrs[] = { NULL, }; +static const struct attribute_group md_default_group = { + .attrs = md_default_attrs, +}; + static struct attribute *md_redundancy_attrs[] = { &md_scan_mode.attr, &md_last_scan_mode.attr, @@ -5512,6 +5516,12 @@ static const struct attribute_group md_redundancy_group = { .attrs = md_redundancy_attrs, }; +static const struct attribute_group *md_attr_groups[] = { + &md_default_group, + &md_bitmap_group, + NULL, +}; + static ssize_t md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { @@ -5587,7 +5597,7 @@ static const struct sysfs_ops md_sysfs_ops = { static struct kobj_type md_ktype = { .release = md_free, .sysfs_ops = &md_sysfs_ops, - .default_attrs = md_default_attrs, + .default_groups = md_attr_groups, }; int mdp_major = 0; @@ -5596,7 +5606,6 @@ static void mddev_delayed_delete(struct work_struct *ws) { struct mddev *mddev = container_of(ws, struct mddev, del_work); - sysfs_remove_group(&mddev->kobj, &md_bitmap_group); kobject_del(&mddev->kobj); kobject_put(&mddev->kobj); } @@ -5715,9 +5724,6 @@ static int md_alloc(dev_t dev, char *name) disk->disk_name); error = 0; } - if (mddev->kobj.sd && - sysfs_create_group(&mddev->kobj, &md_bitmap_group)) - pr_debug("pointless warning\n"); abort: mutex_unlock(&disks_mutex); if (!error && mddev->kobj.sd) { From 94f3cd7d832c28681f1dea54b4dd8606e5e2bc75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Sep 2021 13:38:32 +0200 Subject: [PATCH 043/117] md: extend disks_mutex coverage disks_mutex is intended to serialize md_alloc. Extended it to also cover the kobject_uevent call and getting the sysfs dirent to help reducing error handling complexity. Signed-off-by: Christoph Hellwig Signed-off-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index e04a23dc46bf..946bbedf5dcf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5725,12 +5725,12 @@ static int md_alloc(dev_t dev, char *name) error = 0; } abort: - mutex_unlock(&disks_mutex); if (!error && mddev->kobj.sd) { kobject_uevent(&mddev->kobj, KOBJ_ADD); mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level"); } + mutex_unlock(&disks_mutex); mddev_put(mddev); return error; } From 7ad1069166c0ccdd572d27e01cc7f7f84477df1e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Sep 2021 13:38:33 +0200 Subject: [PATCH 044/117] md: properly unwind when failing to add the kobject in md_alloc Add proper error handling to delete the gendisk when failing to add the md kobject and clean up the error unwinding in general. Signed-off-by: Christoph Hellwig Signed-off-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 946bbedf5dcf..c6b15ff0074f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5672,7 +5672,7 @@ static int md_alloc(dev_t dev, char *name) strcmp(mddev2->gendisk->disk_name, name) == 0) { spin_unlock(&all_mddevs_lock); error = -EEXIST; - goto abort; + goto out_unlock_disks_mutex; } spin_unlock(&all_mddevs_lock); } @@ -5685,7 +5685,7 @@ static int md_alloc(dev_t dev, char *name) error = -ENOMEM; disk = blk_alloc_disk(NUMA_NO_NODE); if (!disk) - goto abort; + goto out_unlock_disks_mutex; disk->major = MAJOR(mddev->unit); disk->first_minor = unit << shift; @@ -5710,26 +5710,23 @@ static int md_alloc(dev_t dev, char *name) disk->events |= DISK_EVENT_MEDIA_CHANGE; mddev->gendisk = disk; error = add_disk(disk); - if (error) { - blk_cleanup_disk(disk); - goto abort; - } + if (error) + goto out_cleanup_disk; error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); - if (error) { - /* This isn't possible, but as kobject_init_and_add is marked - * __must_check, we must do something with the result - */ - pr_debug("md: cannot register %s/md - name in use\n", - disk->disk_name); - error = 0; - } - abort: - if (!error && mddev->kobj.sd) { - kobject_uevent(&mddev->kobj, KOBJ_ADD); - mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); - mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level"); - } + if (error) + goto out_del_gendisk; + + kobject_uevent(&mddev->kobj, KOBJ_ADD); + mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); + mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level"); + goto out_unlock_disks_mutex; + +out_del_gendisk: + del_gendisk(disk); +out_cleanup_disk: + blk_cleanup_disk(disk); +out_unlock_disks_mutex: mutex_unlock(&disks_mutex); mddev_put(mddev); return error; From fd3b6975e9c11c4fa00965f82a0bfbb3b7b44101 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 4 Oct 2021 23:34:48 +0800 Subject: [PATCH 045/117] md/raid1: only allocate write behind bio for WriteMostly device Commit 6607cd319b6b91bff94e90f798a61c031650b514 ("raid1: ensure write behind bio has less than BIO_MAX_VECS sectors") tried to guarantee the size of behind bio is not bigger than BIO_MAX_VECS sectors. Unfortunately the same calltrace still could happen since an array could enable write-behind without write mostly device. To match the manpage of mdadm (which says "write-behind is only attempted on drives marked as write-mostly"), we need to check WriteMostly flag to avoid such unexpected behavior. [1]. https://bugzilla.kernel.org/show_bug.cgi?id=213181#c25 Cc: stable@vger.kernel.org # v5.12+ Cc: Jens Stutte Reported-by: Jens Stutte Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 19598bd38939..6ba12f0f0f03 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1496,7 +1496,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (!r1_bio->bios[i]) continue; - if (first_clone) { + if (first_clone && test_bit(WriteMostly, &rdev->flags)) { /* do behind I/O ? * Not if there are too many, or cannot * allocate memory, or a reader on WriteMostly From 2e94275ed582d9bfe1abc5d5ee565715cc3e6b38 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 4 Oct 2021 23:34:50 +0800 Subject: [PATCH 046/117] md/raid1: use rdev in raid1_write_request directly We already get rdev from conf->mirrors[i].rdev at the beginning of the loop, so just use it. Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/raid1.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6ba12f0f0f03..7dc8026cf6ee 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1529,13 +1529,12 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, r1_bio->bios[i] = mbio; - mbio->bi_iter.bi_sector = (r1_bio->sector + - conf->mirrors[i].rdev->data_offset); - bio_set_dev(mbio, conf->mirrors[i].rdev->bdev); + mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset); + bio_set_dev(mbio, rdev->bdev); mbio->bi_end_io = raid1_end_write_request; mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA)); - if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) && - !test_bit(WriteMostly, &conf->mirrors[i].rdev->flags) && + if (test_bit(FailFast, &rdev->flags) && + !test_bit(WriteMostly, &rdev->flags) && conf->raid_disks - mddev->degraded > 1) mbio->bi_opf |= MD_FAILFAST; mbio->bi_private = r1_bio; @@ -1546,7 +1545,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, trace_block_bio_remap(mbio, disk_devt(mddev->gendisk), r1_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ - mbio->bi_bdev = (void *)conf->mirrors[i].rdev; + mbio->bi_bdev = (void *)rdev; cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug)); if (cb) From c6efe4341d1ff7719f9918ea23a34f055359eb02 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 4 Oct 2021 23:34:52 +0800 Subject: [PATCH 047/117] md/raid5: call roundup_pow_of_two in raid5_run Let's call roundup_pow_of_two here instead of open code. Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/raid5.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 02ed53b20654..4ea9e7b647b0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7732,10 +7732,7 @@ static int raid5_run(struct mddev *mddev) * discard data disk but write parity disk */ stripe = stripe * PAGE_SIZE; - /* Round up to power of 2, as discard handling - * currently assumes that */ - while ((stripe-1) & stripe) - stripe = (stripe | (stripe-1)) + 1; + stripe = roundup_pow_of_two(stripe); mddev->queue->limits.discard_alignment = stripe; mddev->queue->limits.discard_granularity = stripe; From 5467948604baba60e9c4610de8edb8efe7f01f18 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 4 Oct 2021 23:34:53 +0800 Subject: [PATCH 048/117] md: remove unused argument from md_new_event Actually, mddev is not used by md_new_event. Signed-off-by: Guoqing Jiang Signed-off-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.c | 30 +++++++++++++++--------------- drivers/md/md.h | 2 +- drivers/md/raid10.c | 2 +- drivers/md/raid5.c | 2 +- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index c6b15ff0074f..3db44b3408c0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -354,7 +354,7 @@ static bool create_on_open = true; */ static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); static atomic_t md_event_count; -void md_new_event(struct mddev *mddev) +void md_new_event(void) { atomic_inc(&md_event_count); wake_up(&md_event_waiters); @@ -2886,7 +2886,7 @@ static int add_bound_rdev(struct md_rdev *rdev) if (mddev->degraded) set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - md_new_event(mddev); + md_new_event(); md_wakeup_thread(mddev->thread); return 0; } @@ -3002,7 +3002,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); md_wakeup_thread(mddev->thread); } - md_new_event(mddev); + md_new_event(); } } } else if (cmd_match(buf, "writemostly")) { @@ -4099,7 +4099,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) if (!mddev->thread) md_update_sb(mddev, 1); sysfs_notify_dirent_safe(mddev->sysfs_level); - md_new_event(mddev); + md_new_event(); rv = len; out_unlock: mddev_unlock(mddev); @@ -4620,7 +4620,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) export_rdev(rdev); mddev_unlock(mddev); if (!err) - md_new_event(mddev); + md_new_event(); return err ? err : len; } @@ -6041,7 +6041,7 @@ int md_run(struct mddev *mddev) if (mddev->sb_flags) md_update_sb(mddev, 0); - md_new_event(mddev); + md_new_event(); return 0; bitmap_abort: @@ -6431,7 +6431,7 @@ static int do_md_stop(struct mddev *mddev, int mode, if (mddev->hold_active == UNTIL_STOP) mddev->hold_active = 0; } - md_new_event(mddev); + md_new_event(); sysfs_notify_dirent_safe(mddev->sysfs_state); return 0; } @@ -6935,7 +6935,7 @@ kick_rdev: md_wakeup_thread(mddev->thread); else md_update_sb(mddev, 1); - md_new_event(mddev); + md_new_event(); return 0; busy: @@ -7008,7 +7008,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev) */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); - md_new_event(mddev); + md_new_event(); return 0; abort_export: @@ -7982,7 +7982,7 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev) md_wakeup_thread(mddev->thread); if (mddev->event_work.func) queue_work(md_misc_wq, &mddev->event_work); - md_new_event(mddev); + md_new_event(); } EXPORT_SYMBOL(md_error); @@ -8866,7 +8866,7 @@ void md_do_sync(struct md_thread *thread) mddev->curr_resync = 3; /* no longer delayed */ mddev->curr_resync_completed = j; sysfs_notify_dirent_safe(mddev->sysfs_completed); - md_new_event(mddev); + md_new_event(); update_time = jiffies; blk_start_plug(&plug); @@ -8937,7 +8937,7 @@ void md_do_sync(struct md_thread *thread) /* this is the earliest that rebuild will be * visible in /proc/mdstat */ - md_new_event(mddev); + md_new_event(); if (last_check + window > io_sectors || j == max_sectors) continue; @@ -9161,7 +9161,7 @@ static int remove_and_add_spares(struct mddev *mddev, sysfs_link_rdev(mddev, rdev); if (!test_bit(Journal, &rdev->flags)) spares++; - md_new_event(mddev); + md_new_event(); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); } } @@ -9195,7 +9195,7 @@ static void md_start_sync(struct work_struct *ws) } else md_wakeup_thread(mddev->sync_thread); sysfs_notify_dirent_safe(mddev->sysfs_action); - md_new_event(mddev); + md_new_event(); } /* @@ -9454,7 +9454,7 @@ void md_reap_sync_thread(struct mddev *mddev) /* flag recovery needed just to double check */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); sysfs_notify_dirent_safe(mddev->sysfs_action); - md_new_event(mddev); + md_new_event(); if (mddev->event_work.func) queue_work(md_misc_wq, &mddev->event_work); } diff --git a/drivers/md/md.h b/drivers/md/md.h index 4c96c36bd01a..53ea7a6961de 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -731,7 +731,7 @@ extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct page *page, int op, int op_flags, bool metadata_op); extern void md_do_sync(struct md_thread *thread); -extern void md_new_event(struct mddev *mddev); +extern void md_new_event(void); extern void md_allow_write(struct mddev *mddev); extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev); extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index aa2636582841..dde98f65bd04 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4647,7 +4647,7 @@ out: } conf->reshape_checkpoint = jiffies; md_wakeup_thread(mddev->sync_thread); - md_new_event(mddev); + md_new_event(); return 0; abort: diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4ea9e7b647b0..9c1a5877cf9f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -8279,7 +8279,7 @@ static int raid5_start_reshape(struct mddev *mddev) } conf->reshape_checkpoint = jiffies; md_wakeup_thread(mddev->sync_thread); - md_new_event(mddev); + md_new_event(); return 0; } From 8b9e2291e355a0eafdd5b1e21a94a6659f24b351 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Wed, 13 Oct 2021 22:59:33 +0800 Subject: [PATCH 049/117] md: update superblock after changing rdev flags in state_store When the in memory flag is changed, we need to persist the change in the rdev superblock flags. This is needed for "writemostly" and "failfast". Reviewed-by: Li Feng Signed-off-by: Xiao Ni Signed-off-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 3db44b3408c0..e8666bdc0d28 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2976,7 +2976,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) * -write_error - clears WriteErrorSeen * {,-}failfast - set/clear FailFast */ + + struct mddev *mddev = rdev->mddev; int err = -EINVAL; + bool need_update_sb = false; + if (cmd_match(buf, "faulty") && rdev->mddev->pers) { md_error(rdev->mddev, rdev); if (test_bit(Faulty, &rdev->flags)) @@ -2991,7 +2995,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) if (rdev->raid_disk >= 0) err = -EBUSY; else { - struct mddev *mddev = rdev->mddev; err = 0; if (mddev_is_clustered(mddev)) err = md_cluster_ops->remove_disk(mddev, rdev); @@ -3008,10 +3011,12 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) } else if (cmd_match(buf, "writemostly")) { set_bit(WriteMostly, &rdev->flags); mddev_create_serial_pool(rdev->mddev, rdev, false); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "-writemostly")) { mddev_destroy_serial_pool(rdev->mddev, rdev, false); clear_bit(WriteMostly, &rdev->flags); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "blocked")) { set_bit(Blocked, &rdev->flags); @@ -3037,9 +3042,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) err = 0; } else if (cmd_match(buf, "failfast")) { set_bit(FailFast, &rdev->flags); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "-failfast")) { clear_bit(FailFast, &rdev->flags); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags)) { @@ -3118,6 +3125,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) clear_bit(ExternalBbl, &rdev->flags); err = 0; } + if (need_update_sb) + md_update_sb(mddev, 1); if (!err) sysfs_notify_dirent_safe(rdev->sysfs_state); return err ? err : len; From c573d586999cb7e694efad79f0cb69a8215bbef6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 14 Oct 2021 20:07:50 +0200 Subject: [PATCH 050/117] mtip32xx: Remove redundant 'flush_workqueue()' calls 'destroy_workqueue()' already drains the queue before destroying it, so there is no need to flush it explicitly. Remove the redundant 'flush_workqueue()' calls. This was generated with coccinelle: @@ expression E; @@ - flush_workqueue(E); destroy_workqueue(E); Signed-off-by: Christophe JAILLET Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/0fea349c808c6cfbf549b0e33701320c7860c8b7.1634234221.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index d0b40309f47e..c91b9010c1a6 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4063,7 +4063,6 @@ block_initialize_err: msi_initialize_err: if (dd->isr_workq) { - flush_workqueue(dd->isr_workq); destroy_workqueue(dd->isr_workq); drop_cpu(dd->work[0].cpu_binding); drop_cpu(dd->work[1].cpu_binding); @@ -4121,7 +4120,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) mtip_block_remove(dd); if (dd->isr_workq) { - flush_workqueue(dd->isr_workq); destroy_workqueue(dd->isr_workq); drop_cpu(dd->work[0].cpu_binding); drop_cpu(dd->work[1].cpu_binding); From 4e6eef5dc25b528e08ac5b5f64f6ca9d9987241d Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 16 Sep 2021 17:33:44 +0800 Subject: [PATCH 051/117] nbd: don't handle response without a corresponding request message While handling a response message from server, nbd_read_stat() will try to get request by tag, and then complete the request. However, this is problematic if nbd haven't sent a corresponding request message: t1 t2 submit_bio nbd_queue_rq blk_mq_start_request recv_work nbd_read_stat blk_mq_tag_to_rq blk_mq_complete_request nbd_send_cmd Thus add a new cmd flag 'NBD_CMD_INFLIGHT', it will be set in nbd_send_cmd() and checked in nbd_read_stat(). Noted that this patch can't fix that blk_mq_tag_to_rq() might return a freed request, and this will be fixed in following patches. Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210916093350.1410403-2-yukuai3@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index f4101d3efe7d..d18ba557a69d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -131,6 +131,12 @@ struct nbd_device { }; #define NBD_CMD_REQUEUED 1 +/* + * This flag will be set if nbd_queue_rq() succeed, and will be checked and + * cleared in completion. Both setting and clearing of the flag are protected + * by cmd->lock. + */ +#define NBD_CMD_INFLIGHT 2 struct nbd_cmd { struct nbd_device *nbd; @@ -405,6 +411,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, if (!mutex_trylock(&cmd->lock)) return BLK_EH_RESET_TIMER; + __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); if (!refcount_inc_not_zero(&nbd->config_refs)) { cmd->status = BLK_STS_TIMEOUT; mutex_unlock(&cmd->lock); @@ -734,6 +741,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); + if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { + dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)", + tag, cmd->status, cmd->flags); + ret = -ENOENT; + goto out; + } if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", req, cmd->cmd_cookie, nbd_handle_to_cookie(handle)); @@ -833,6 +846,7 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved) return true; mutex_lock(&cmd->lock); + __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); cmd->status = BLK_STS_IOERR; mutex_unlock(&cmd->lock); @@ -969,7 +983,13 @@ again: * returns EAGAIN can be retried on a different socket. */ ret = nbd_send_cmd(nbd, cmd, index); - if (ret == -EAGAIN) { + /* + * Access to this flag is protected by cmd->lock, thus it's safe to set + * the flag after nbd_send_cmd() succeed to send request to server. + */ + if (!ret) + __set_bit(NBD_CMD_INFLIGHT, &cmd->flags); + else if (ret == -EAGAIN) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Request send failed, requeueing\n"); nbd_mark_nsock_dead(nbd, nsock, 1); From 07175cb1baf4c51051b1fbd391097e349f9a02a9 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 16 Sep 2021 17:33:45 +0800 Subject: [PATCH 052/117] nbd: make sure request completion won't concurrent commit cddce0116058 ("nbd: Aovid double completion of a request") try to fix that nbd_clear_que() and recv_work() can complete a request concurrently. However, the problem still exists: t1 t2 t3 nbd_disconnect_and_put flush_workqueue recv_work blk_mq_complete_request blk_mq_complete_request_remote -> this is true WRITE_ONCE(rq->state, MQ_RQ_COMPLETE) blk_mq_raise_softirq blk_done_softirq blk_complete_reqs nbd_complete_rq blk_mq_end_request blk_mq_free_request WRITE_ONCE(rq->state, MQ_RQ_IDLE) nbd_clear_que blk_mq_tagset_busy_iter nbd_clear_req __blk_mq_free_request blk_mq_put_tag blk_mq_complete_request -> complete again There are three places where request can be completed in nbd: recv_work(), nbd_clear_que() and nbd_xmit_timeout(). Since they all hold cmd->lock before completing the request, it's easy to avoid the problem by setting and checking a cmd flag. Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210916093350.1410403-3-yukuai3@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d18ba557a69d..0bb3c1e2d575 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -411,7 +411,11 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, if (!mutex_trylock(&cmd->lock)) return BLK_EH_RESET_TIMER; - __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); + if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { + mutex_unlock(&cmd->lock); + return BLK_EH_DONE; + } + if (!refcount_inc_not_zero(&nbd->config_refs)) { cmd->status = BLK_STS_TIMEOUT; mutex_unlock(&cmd->lock); @@ -846,7 +850,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved) return true; mutex_lock(&cmd->lock); - __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); + if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { + mutex_unlock(&cmd->lock); + return true; + } cmd->status = BLK_STS_IOERR; mutex_unlock(&cmd->lock); From fcf3d633d8e101e84a0e072ab79957b938ac7e06 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 16 Sep 2021 17:33:46 +0800 Subject: [PATCH 053/117] nbd: check sock index in nbd_read_stat() The sock that clent send request in nbd_send_cmd() and receive reply in nbd_read_stat() should be the same. Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210916093350.1410403-4-yukuai3@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 0bb3c1e2d575..49c501cf8a8b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -751,6 +751,10 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ret = -ENOENT; goto out; } + if (cmd->index != index) { + dev_err(disk_to_dev(nbd->disk), "Unexpected reply %d from different sock %d (expected %d)", + tag, index, cmd->index); + } if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) { dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n", req, cmd->cmd_cookie, nbd_handle_to_cookie(handle)); From 0de2b7a4dd08eeb82b8cc3fe5d6b8ba49a32f7bd Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 16 Sep 2021 17:33:47 +0800 Subject: [PATCH 054/117] nbd: don't start request if nbd_queue_rq() failed commit 6a468d5990ec ("nbd: don't start req until after the dead connection logic") move blk_mq_start_request() from nbd_queue_rq() to nbd_handle_cmd() to skip starting request if the connection is dead. However, request is still started in other error paths. Currently, blk_mq_end_request() will be called immediately if nbd_queue_rq() failed, thus start request in such situation is useless. So remove blk_mq_start_request() from error paths in nbd_handle_cmd(). Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210916093350.1410403-5-yukuai3@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 49c501cf8a8b..87066cad5711 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -939,7 +939,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) if (!refcount_inc_not_zero(&nbd->config_refs)) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Socks array is empty\n"); - blk_mq_start_request(req); return -EINVAL; } config = nbd->config; @@ -948,7 +947,6 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted send on invalid socket\n"); nbd_config_put(nbd); - blk_mq_start_request(req); return -EINVAL; } cmd->status = BLK_STS_OK; @@ -972,7 +970,6 @@ again: */ sock_shutdown(nbd); nbd_config_put(nbd); - blk_mq_start_request(req); return -EIO; } goto again; From f52c0e08237e7864a44311fc78bc9bf2e045611b Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 16 Sep 2021 17:33:48 +0800 Subject: [PATCH 055/117] nbd: clean up return value checking of sock_xmit() Check if sock_xmit() return 0 is useless because it'll never return 0, comment it and remove such checkings. Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210916093350.1410403-6-yukuai3@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 87066cad5711..1d34560d8c1e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -495,7 +495,8 @@ done: } /* - * Send or receive packet. + * Send or receive packet. Return a positive value on success and + * negtive value on failue, and never return 0. */ static int sock_xmit(struct nbd_device *nbd, int index, int send, struct iov_iter *iter, int msg_flags, int *sent) @@ -621,7 +622,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) result = sock_xmit(nbd, index, 1, &from, (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); trace_nbd_header_sent(req, handle); - if (result <= 0) { + if (result < 0) { if (was_interrupted(result)) { /* If we havne't sent anything we can just return BUSY, * however if we have sent something we need to make @@ -665,7 +666,7 @@ send_pages: skip = 0; } result = sock_xmit(nbd, index, 1, &from, flags, &sent); - if (result <= 0) { + if (result < 0) { if (was_interrupted(result)) { /* We've already sent the header, we * have no choice but to set pending and @@ -717,7 +718,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) reply.magic = 0; iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply)); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); - if (result <= 0) { + if (result < 0) { if (!nbd_disconnected(config)) dev_err(disk_to_dev(nbd->disk), "Receive control failed (result %d)\n", result); @@ -788,7 +789,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) rq_for_each_segment(bvec, req, iter) { iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len); result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); - if (result <= 0) { + if (result < 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); /* @@ -1234,7 +1235,7 @@ static void send_disconnects(struct nbd_device *nbd) iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request)); mutex_lock(&nsock->tx_lock); ret = sock_xmit(nbd, i, 1, &from, 0, NULL); - if (ret <= 0) + if (ret < 0) dev_err(disk_to_dev(nbd->disk), "Send disconnect failed %d\n", ret); mutex_unlock(&nsock->tx_lock); From 3fe1db626a56cdf259c348404f2c5429e2f065a1 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 16 Sep 2021 17:33:49 +0800 Subject: [PATCH 056/117] nbd: partition nbd_read_stat() into nbd_read_reply() and nbd_handle_reply() Prepare to fix uaf in nbd_read_stat(), no functional changes. Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210916093350.1410403-7-yukuai3@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 80 ++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1d34560d8c1e..06e292f0b0ae 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -700,38 +700,45 @@ out: return 0; } -/* NULL returned = something went wrong, inform userspace */ -static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) +static int nbd_read_reply(struct nbd_device *nbd, int index, + struct nbd_reply *reply) +{ + struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)}; + struct iov_iter to; + int result; + + reply->magic = 0; + iov_iter_kvec(&to, READ, &iov, 1, sizeof(*reply)); + result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); + if (result < 0) { + if (!nbd_disconnected(nbd->config)) + dev_err(disk_to_dev(nbd->disk), + "Receive control failed (result %d)\n", result); + return result; + } + + if (ntohl(reply->magic) != NBD_REPLY_MAGIC) { + dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", + (unsigned long)ntohl(reply->magic)); + return -EPROTO; + } + + return 0; +} + +/* NULL returned = something went wrong, inform userspace */ +static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index, + struct nbd_reply *reply) { - struct nbd_config *config = nbd->config; int result; - struct nbd_reply reply; struct nbd_cmd *cmd; struct request *req = NULL; u64 handle; u16 hwq; u32 tag; - struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; - struct iov_iter to; int ret = 0; - reply.magic = 0; - iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply)); - result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); - if (result < 0) { - if (!nbd_disconnected(config)) - dev_err(disk_to_dev(nbd->disk), - "Receive control failed (result %d)\n", result); - return ERR_PTR(result); - } - - if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { - dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n", - (unsigned long)ntohl(reply.magic)); - return ERR_PTR(-EPROTO); - } - - memcpy(&handle, reply.handle, sizeof(handle)); + memcpy(&handle, reply->handle, sizeof(handle)); tag = nbd_handle_to_tag(handle); hwq = blk_mq_unique_tag_to_hwq(tag); if (hwq < nbd->tag_set.nr_hw_queues) @@ -774,9 +781,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ret = -ENOENT; goto out; } - if (ntohl(reply.error)) { + if (ntohl(reply->error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", - ntohl(reply.error)); + ntohl(reply->error)); cmd->status = BLK_STS_IOERR; goto out; } @@ -785,6 +792,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (rq_data_dir(req) != WRITE) { struct req_iterator iter; struct bio_vec bvec; + struct iov_iter to; rq_for_each_segment(bvec, req, iter) { iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len); @@ -798,7 +806,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) * and let the timeout stuff handle resubmitting * this request onto another connection. */ - if (nbd_disconnected(config)) { + if (nbd_disconnected(nbd->config)) { cmd->status = BLK_STS_IOERR; goto out; } @@ -822,24 +830,30 @@ static void recv_work(struct work_struct *work) work); struct nbd_device *nbd = args->nbd; struct nbd_config *config = nbd->config; + struct nbd_sock *nsock; struct nbd_cmd *cmd; struct request *rq; while (1) { - cmd = nbd_read_stat(nbd, args->index); - if (IS_ERR(cmd)) { - struct nbd_sock *nsock = config->socks[args->index]; + struct nbd_reply reply; - mutex_lock(&nsock->tx_lock); - nbd_mark_nsock_dead(nbd, nsock, 1); - mutex_unlock(&nsock->tx_lock); + if (nbd_read_reply(nbd, args->index, &reply)) + break; + + cmd = nbd_handle_reply(nbd, args->index, &reply); + if (IS_ERR(cmd)) break; - } rq = blk_mq_rq_from_pdu(cmd); if (likely(!blk_should_fake_timeout(rq->q))) blk_mq_complete_request(rq); } + + nsock = config->socks[args->index]; + mutex_lock(&nsock->tx_lock); + nbd_mark_nsock_dead(nbd, nsock, 1); + mutex_unlock(&nsock->tx_lock); + nbd_config_put(nbd); atomic_dec(&config->recv_threads); wake_up(&config->recv_wq); From 8663b210f8c169a49aaeed3af92471a147545477 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 16 Sep 2021 22:18:10 +0800 Subject: [PATCH 057/117] nbd: fix uaf in nbd_handle_reply() There is a problem that nbd_handle_reply() might access freed request: 1) At first, a normal io is submitted and completed with scheduler: internel_tag = blk_mq_get_tag -> get tag from sched_tags blk_mq_rq_ctx_init sched_tags->rq[internel_tag] = sched_tag->static_rq[internel_tag] ... blk_mq_get_driver_tag __blk_mq_get_driver_tag -> get tag from tags tags->rq[tag] = sched_tag->static_rq[internel_tag] So, both tags->rq[tag] and sched_tags->rq[internel_tag] are pointing to the request: sched_tags->static_rq[internal_tag]. Even if the io is finished. 2) nbd server send a reply with random tag directly: recv_work nbd_handle_reply blk_mq_tag_to_rq(tags, tag) rq = tags->rq[tag] 3) if the sched_tags->static_rq is freed: blk_mq_sched_free_requests blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i) -> step 2) access rq before clearing rq mapping blk_mq_clear_rq_mapping(set, tags, hctx_idx); __free_pages() -> rq is freed here 4) Then, nbd continue to use the freed request in nbd_handle_reply Fix the problem by get 'q_usage_counter' before blk_mq_tag_to_rq(), thus request is ensured not to be freed because 'q_usage_counter' is not zero. Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210916141810.2325276-1-yukuai3@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 06e292f0b0ae..0d064fab6186 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -830,6 +830,7 @@ static void recv_work(struct work_struct *work) work); struct nbd_device *nbd = args->nbd; struct nbd_config *config = nbd->config; + struct request_queue *q = nbd->disk->queue; struct nbd_sock *nsock; struct nbd_cmd *cmd; struct request *rq; @@ -840,13 +841,28 @@ static void recv_work(struct work_struct *work) if (nbd_read_reply(nbd, args->index, &reply)) break; - cmd = nbd_handle_reply(nbd, args->index, &reply); - if (IS_ERR(cmd)) + /* + * Grab .q_usage_counter so request pool won't go away, then no + * request use-after-free is possible during nbd_handle_reply(). + * If queue is frozen, there won't be any inflight requests, we + * needn't to handle the incoming garbage message. + */ + if (!percpu_ref_tryget(&q->q_usage_counter)) { + dev_err(disk_to_dev(nbd->disk), "%s: no io inflight\n", + __func__); break; + } + + cmd = nbd_handle_reply(nbd, args->index, &reply); + if (IS_ERR(cmd)) { + percpu_ref_put(&q->q_usage_counter); + break; + } rq = blk_mq_rq_from_pdu(cmd); if (likely(!blk_should_fake_timeout(rq->q))) blk_mq_complete_request(rq); + percpu_ref_put(&q->q_usage_counter); } nsock = config->socks[args->index]; From 86d46fdaa12ae5befc16b8d73fc85a3ca0399ea6 Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Tue, 19 Oct 2021 19:13:21 +1300 Subject: [PATCH 058/117] block: ataflop: fix breakage introduced at blk-mq refactoring Refactoring of the Atari floppy driver when converting to blk-mq has broken the state machine in not-so-subtle ways: finish_fdc() must be called when operations on the floppy device have completed. This is crucial in order to relase the ST-DMA lock, which protects against concurrent access to the ST-DMA controller by other drivers (some DMA related, most just related to device register access - broken beyond compare, I know). When rewriting the driver's old do_request() function, the fact that finish_fdc() was called only when all queued requests had completed appears to have been overlooked. Instead, the new request function calls finish_fdc() immediately after the last request has been queued. finish_fdc() executes a dummy seek after most requests, and this overwrites the state machine's interrupt hander that was set up to wait for completion of the read/write request just prior. To make matters worse, finish_fdc() is called before device interrupts are re-enabled, making certain that the read/write interupt is missed. Shifting the finish_fdc() call into the read/write request completion handler ensures the driver waits for the request to actually complete. With a queue depth of 2, we won't see long request sequences, so calling finish_fdc() unconditionally just adds a little overhead for the dummy seeks, and keeps the code simple. While we're at it, kill ataflop_commit_rqs() which does nothing but run finish_fdc() unconditionally, again likely wiping out an in-flight request. Signed-off-by: Michael Schmitz Fixes: 6ec3938cff95 ("ataflop: convert to blk-mq") CC: linux-block@vger.kernel.org CC: Tetsuo Handa Link: https://lore.kernel.org/r/20211019061321.26425-1-schmitzmic@gmail.com Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 9bc5cce6b29a..3d217f1d7440 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -655,9 +655,6 @@ static inline void copy_buffer(void *from, void *to) *p2++ = *p1++; } - - - /* General Interrupt Handling */ static void (*FloppyIRQHandler)( int status ) = NULL; @@ -1230,6 +1227,7 @@ static void fd_rwsec_done1(int status) } else { /* all sectors finished */ + finish_fdc(); fd_end_request_cur(BLK_STS_OK); } return; @@ -1477,15 +1475,6 @@ static void setup_req_params( int drive ) ReqTrack, ReqSector, (unsigned long)ReqData )); } -static void ataflop_commit_rqs(struct blk_mq_hw_ctx *hctx) -{ - spin_lock_irq(&ataflop_lock); - atari_disable_irq(IRQ_MFP_FDC); - finish_fdc(); - atari_enable_irq(IRQ_MFP_FDC); - spin_unlock_irq(&ataflop_lock); -} - static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1493,6 +1482,8 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, int drive = floppy - unit; int type = floppy->type; + DPRINT(("Queue request: drive %d type %d last %d\n", drive, type, bd->last)); + spin_lock_irq(&ataflop_lock); if (fd_request) { spin_unlock_irq(&ataflop_lock); @@ -1552,8 +1543,6 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, setup_req_params( drive ); do_fd_action( drive ); - if (bd->last) - finish_fdc(); atari_enable_irq( IRQ_MFP_FDC ); out: @@ -1964,7 +1953,6 @@ static const struct block_device_operations floppy_fops = { static const struct blk_mq_ops ataflop_mq_ops = { .queue_rq = ataflop_queue_rq, - .commit_rqs = ataflop_commit_rqs, }; static int ataflop_alloc_disk(unsigned int drive, unsigned int type) From 9c3d29296fe4c297447d2055e7a9535c981a8370 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Oct 2021 06:45:06 -0600 Subject: [PATCH 059/117] nvme: move command clear into the various setup helpers We don't have to worry about doing extra memsets by moving it outside the protection of RQF_DONTPREP, as nvme doesn't do partial completions. This is in preparation for making the read/write fast path not do a full memset of the command. Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 10 +++++++--- drivers/nvme/host/zns.c | 2 ++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3109bdf137e4..824790bed2f5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -834,6 +834,7 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, static inline void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd) { + memset(cmnd, 0, sizeof(*cmnd)); cmnd->common.opcode = nvme_cmd_flush; cmnd->common.nsid = cpu_to_le32(ns->head->ns_id); } @@ -885,6 +886,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, return BLK_STS_IOERR; } + memset(cmnd, 0, sizeof(*cmnd)); cmnd->dsm.opcode = nvme_cmd_dsm; cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id); cmnd->dsm.nr = cpu_to_le32(segments - 1); @@ -901,6 +903,8 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd) { + memset(cmnd, 0, sizeof(*cmnd)); + if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) return nvme_setup_discard(ns, req, cmnd); @@ -925,6 +929,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, u16 control = 0; u32 dsmgmt = 0; + memset(cmnd, 0, sizeof(*cmnd)); + if (req->cmd_flags & REQ_FUA) control |= NVME_RW_FUA; if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) @@ -993,10 +999,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; blk_status_t ret = BLK_STS_OK; - if (!(req->rq_flags & RQF_DONTPREP)) { + if (!(req->rq_flags & RQF_DONTPREP)) nvme_clear_nvme_request(req); - memset(cmd, 0, sizeof(*cmd)); - } switch (req_op(req)) { case REQ_OP_DRV_IN: diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index d95010481fce..bfc259e0d7b8 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -233,6 +233,8 @@ out_free: blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, struct nvme_command *c, enum nvme_zone_mgmt_action action) { + memset(c, 0, sizeof(*c)); + c->zms.opcode = nvme_cmd_zone_mgmt_send; c->zms.nsid = cpu_to_le32(ns->head->ns_id); c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); From a9a7e30fd918588bc312ba782426e3a1282df359 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Oct 2021 06:47:18 -0600 Subject: [PATCH 060/117] nvme: don't memset() the normal read/write command This memset in the fast path costs a lot of cycles on my setup. Here's a top-of-profile of doing ~6.7M IOPS: + 5.90% io_uring [nvme] [k] nvme_queue_rq + 5.32% io_uring [nvme_core] [k] nvme_setup_cmd + 5.17% io_uring [kernel.vmlinux] [k] io_submit_sqes + 4.97% io_uring [kernel.vmlinux] [k] blkdev_direct_IO and a perf diff with this patch: 0.92% +4.40% [nvme_core] [k] nvme_setup_cmd reducing it from 5.3% to only 0.9%. This takes it from the 2nd most cycle consumer to something that's mostly irrelevant. Reviewed-by: Chaitanya Kulkarni Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 824790bed2f5..c415c3faf420 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -929,8 +929,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, u16 control = 0; u32 dsmgmt = 0; - memset(cmnd, 0, sizeof(*cmnd)); - if (req->cmd_flags & REQ_FUA) control |= NVME_RW_FUA; if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) @@ -940,9 +938,15 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; cmnd->rw.opcode = op; + cmnd->rw.flags = 0; cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); + cmnd->rw.rsvd2 = 0; + cmnd->rw.metadata = 0; cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); + cmnd->rw.reftag = 0; + cmnd->rw.apptag = 0; + cmnd->rw.appmask = 0; if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams) nvme_assign_write_stream(ctrl, req, &control, &dsmgmt); From 0c98057be9efa32de78dbc4685fc73da9d71faa1 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 20 Oct 2021 15:39:59 +0800 Subject: [PATCH 061/117] nbd: Fix use-after-free in pid_show I got issue as follows: [ 263.886511] BUG: KASAN: use-after-free in pid_show+0x11f/0x13f [ 263.888359] Read of size 4 at addr ffff8880bf0648c0 by task cat/746 [ 263.890479] CPU: 0 PID: 746 Comm: cat Not tainted 4.19.90-dirty #140 [ 263.893162] Call Trace: [ 263.893509] dump_stack+0x108/0x15f [ 263.893999] print_address_description+0xa5/0x372 [ 263.894641] kasan_report.cold+0x236/0x2a8 [ 263.895696] __asan_report_load4_noabort+0x25/0x30 [ 263.896365] pid_show+0x11f/0x13f [ 263.897422] dev_attr_show+0x48/0x90 [ 263.898361] sysfs_kf_seq_show+0x24d/0x4b0 [ 263.899479] kernfs_seq_show+0x14e/0x1b0 [ 263.900029] seq_read+0x43f/0x1150 [ 263.900499] kernfs_fop_read+0xc7/0x5a0 [ 263.903764] vfs_read+0x113/0x350 [ 263.904231] ksys_read+0x103/0x270 [ 263.905230] __x64_sys_read+0x77/0xc0 [ 263.906284] do_syscall_64+0x106/0x360 [ 263.906797] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reproduce this issue as follows: 1. nbd-server 8000 /tmp/disk 2. nbd-client localhost 8000 /dev/nbd1 3. cat /sys/block/nbd1/pid Then trigger use-after-free in pid_show. Reason is after do step '2', nbd-client progress is already exit. So it's task_struct already freed. To solve this issue, revert part of 6521d39a64b3's modify and remove useless 'recv_task' member of nbd_device. Fixes: 6521d39a64b3 ("nbd: Remove variable 'pid'") Signed-off-by: Ye Bin Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20211020073959.2679255-1-yebin10@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 0d064fab6186..0ee104fbb628 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -122,10 +122,10 @@ struct nbd_device { struct work_struct remove_work; struct list_head list; - struct task_struct *task_recv; struct task_struct *task_setup; unsigned long flags; + pid_t pid; /* pid of nbd-client, if attached */ char *backend; }; @@ -223,7 +223,7 @@ static ssize_t pid_show(struct device *dev, struct gendisk *disk = dev_to_disk(dev); struct nbd_device *nbd = (struct nbd_device *)disk->private_data; - return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); + return sprintf(buf, "%d\n", nbd->pid); } static const struct device_attribute pid_attr = { @@ -335,7 +335,7 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, nbd->config->bytesize = bytesize; nbd->config->blksize_bits = __ffs(blksize); - if (!nbd->task_recv) + if (!nbd->pid) return 0; if (nbd->config->flags & NBD_FLAG_SEND_TRIM) { @@ -1300,7 +1300,7 @@ static void nbd_config_put(struct nbd_device *nbd) if (test_and_clear_bit(NBD_RT_HAS_PID_FILE, &config->runtime_flags)) device_remove_file(disk_to_dev(nbd->disk), &pid_attr); - nbd->task_recv = NULL; + nbd->pid = 0; if (test_and_clear_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags)) { device_remove_file(disk_to_dev(nbd->disk), &backend_attr); @@ -1341,7 +1341,7 @@ static int nbd_start_device(struct nbd_device *nbd) int num_connections = config->num_connections; int error = 0, i; - if (nbd->task_recv) + if (nbd->pid) return -EBUSY; if (!config->socks) return -EINVAL; @@ -1360,7 +1360,7 @@ static int nbd_start_device(struct nbd_device *nbd) } blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections); - nbd->task_recv = current; + nbd->pid = task_pid_nr(current); nbd_parse_flags(nbd); @@ -1616,8 +1616,8 @@ static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) { struct nbd_device *nbd = s->private; - if (nbd->task_recv) - seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); + if (nbd->pid) + seq_printf(s, "recv: %d\n", nbd->pid); return 0; } @@ -2198,7 +2198,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) mutex_lock(&nbd->config_lock); config = nbd->config; if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) || - !nbd->task_recv) { + !nbd->pid) { dev_err(nbd_to_dev(nbd), "not configured, cannot reconfigure\n"); ret = -EINVAL; From 169bbdacaa473a2f3abd3ab8170e1c7795931560 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Oct 2021 13:51:18 +0200 Subject: [PATCH 062/117] s390/dasd: handle request magic consistently as unsigned int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Get rid of the rather odd casts to character pointer of the dasd_ccw_req magic member and simply use the unsigned int value unmodified everywhere. Acked-by: Jan Höppner Signed-off-by: Heiko Carstens Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20211020115124.1735254-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_3990_erp.c | 6 +++--- drivers/s390/block/dasd_erp.c | 8 ++++---- drivers/s390/block/dasd_int.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index 4691a3c35d72..299001ad9a32 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -201,7 +201,7 @@ dasd_3990_erp_DCTL(struct dasd_ccw_req * erp, char modifier) struct ccw1 *ccw; struct dasd_ccw_req *dctl_cqr; - dctl_cqr = dasd_alloc_erp_request((char *) &erp->magic, 1, + dctl_cqr = dasd_alloc_erp_request(erp->magic, 1, sizeof(struct DCTL_data), device); if (IS_ERR(dctl_cqr)) { @@ -1652,7 +1652,7 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense) } /* Build new ERP request including DE/LO */ - erp = dasd_alloc_erp_request((char *) &cqr->magic, + erp = dasd_alloc_erp_request(cqr->magic, 2 + 1,/* DE/LO + TIC */ sizeof(struct DE_eckd_data) + sizeof(struct LO_eckd_data), device); @@ -2388,7 +2388,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr) } /* allocate additional request block */ - erp = dasd_alloc_erp_request((char *) &cqr->magic, + erp = dasd_alloc_erp_request(cqr->magic, cplength, datasize, device); if (IS_ERR(erp)) { if (cqr->retries <= 0) { diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c index ba4fa372d02d..c07e6e713518 100644 --- a/drivers/s390/block/dasd_erp.c +++ b/drivers/s390/block/dasd_erp.c @@ -24,7 +24,7 @@ #include "dasd_int.h" struct dasd_ccw_req * -dasd_alloc_erp_request(char *magic, int cplength, int datasize, +dasd_alloc_erp_request(unsigned int magic, int cplength, int datasize, struct dasd_device * device) { unsigned long flags; @@ -33,8 +33,8 @@ dasd_alloc_erp_request(char *magic, int cplength, int datasize, int size; /* Sanity checks */ - BUG_ON( magic == NULL || datasize > PAGE_SIZE || - (cplength*sizeof(struct ccw1)) > PAGE_SIZE); + BUG_ON(datasize > PAGE_SIZE || + (cplength*sizeof(struct ccw1)) > PAGE_SIZE); size = (sizeof(struct dasd_ccw_req) + 7L) & -8L; if (cplength > 0) @@ -62,7 +62,7 @@ dasd_alloc_erp_request(char *magic, int cplength, int datasize, cqr->data = data; memset(cqr->data, 0, datasize); } - strncpy((char *) &cqr->magic, magic, 4); + cqr->magic = magic; ASCEBC((char *) &cqr->magic, 4); set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); dasd_get_device(device); diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 155428bfed8a..15d8731c1eee 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -887,7 +887,7 @@ void dasd_proc_exit(void); /* externals in dasd_erp.c */ struct dasd_ccw_req *dasd_default_erp_action(struct dasd_ccw_req *); struct dasd_ccw_req *dasd_default_erp_postaction(struct dasd_ccw_req *); -struct dasd_ccw_req *dasd_alloc_erp_request(char *, int, int, +struct dasd_ccw_req *dasd_alloc_erp_request(unsigned int, int, int, struct dasd_device *); void dasd_free_erp_request(struct dasd_ccw_req *, struct dasd_device *); void dasd_log_sense(struct dasd_ccw_req *, struct irb *); From 10c78e53eea3d9359d4e2a0a4a17a73aacd85497 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 20 Oct 2021 13:51:19 +0200 Subject: [PATCH 063/117] s390/dasd: fix kernel doc comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this: drivers/s390/block/dasd_ioctl.c:666: warning: Function parameter or member 'disk' not described in 'dasd_biodasdinfo' drivers/s390/block/dasd_ioctl.c:666: warning: Function parameter or member 'info' not described in 'dasd_biodasdinfo' Acked-by: Jan Höppner Signed-off-by: Heiko Carstens Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20211020115124.1735254-3-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 468cbeb539ff..95349f95758c 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -650,8 +650,8 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode, /** * dasd_biodasdinfo() - fill out the dasd information structure - * @disk [in]: pointer to gendisk structure that references a DASD - * @info [out]: pointer to the dasd_information2_t structure + * @disk: [in] pointer to gendisk structure that references a DASD + * @info: [out] pointer to the dasd_information2_t structure * * Provide access to DASD specific information. * The gendisk structure is checked if it belongs to the DASD driver by From 23596961b43752be871ea3a5756c7267f8140cff Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 20 Oct 2021 13:51:20 +0200 Subject: [PATCH 064/117] s390/dasd: split up dasd_eckd_read_conf Move the cabling check out of dasd_eckd_read_conf and split it up into separate functions to improve readability and re-use functions. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20211020115124.1735254-4-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 130 +++++++++++++-------------------- 1 file changed, 51 insertions(+), 79 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 460e0f1cca53..f6ff26472936 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1071,15 +1071,55 @@ static void dasd_eckd_read_fc_security(struct dasd_device *device) } } +static void dasd_eckd_get_uid_string(struct dasd_eckd_private *private, + char *print_uid) +{ + struct dasd_uid *uid; + + uid = &private->uid; + if (strlen(uid->vduit) > 0) + snprintf(print_uid, sizeof(*print_uid), + "%s.%s.%04x.%02x.%s", + uid->vendor, uid->serial, uid->ssid, + uid->real_unit_addr, uid->vduit); + else + snprintf(print_uid, sizeof(*print_uid), + "%s.%s.%04x.%02x", + uid->vendor, uid->serial, uid->ssid, + uid->real_unit_addr); +} + +static int dasd_eckd_check_cabling(struct dasd_device *device, + void *conf_data, __u8 lpm) +{ + struct dasd_eckd_private *private = device->private; + char print_path_uid[60], print_device_uid[60]; + struct dasd_eckd_private path_private; + + path_private.conf_data = conf_data; + path_private.conf_len = DASD_ECKD_RCD_DATA_SIZE; + if (dasd_eckd_identify_conf_parts(&path_private)) + return 1; + + if (dasd_eckd_compare_path_uid(device, &path_private)) { + dasd_eckd_get_uid_string(&path_private, print_path_uid); + dasd_eckd_get_uid_string(private, print_device_uid); + dev_err(&device->cdev->dev, + "Not all channel paths lead to the same device, path %02X leads to device %s instead of %s\n", + lpm, print_path_uid, print_device_uid); + return 1; + } + + return 0; +} + static int dasd_eckd_read_conf(struct dasd_device *device) { void *conf_data; int conf_len, conf_data_saved; int rc, path_err, pos; __u8 lpm, opm; - struct dasd_eckd_private *private, path_private; - struct dasd_uid *uid; - char print_path_uid[60], print_device_uid[60]; + struct dasd_eckd_private *private; private = device->private; opm = ccw_device_get_path_mask(device->cdev); @@ -1123,59 +1163,11 @@ static int dasd_eckd_read_conf(struct dasd_device *device) */ dasd_eckd_generate_uid(device); conf_data_saved++; - } else { - path_private.conf_data = conf_data; - path_private.conf_len = DASD_ECKD_RCD_DATA_SIZE; - if (dasd_eckd_identify_conf_parts( - &path_private)) { - path_private.conf_data = NULL; - path_private.conf_len = 0; - kfree(conf_data); - continue; - } - if (dasd_eckd_compare_path_uid( - device, &path_private)) { - uid = &path_private.uid; - if (strlen(uid->vduit) > 0) - snprintf(print_path_uid, - sizeof(print_path_uid), - "%s.%s.%04x.%02x.%s", - uid->vendor, uid->serial, - uid->ssid, uid->real_unit_addr, - uid->vduit); - else - snprintf(print_path_uid, - sizeof(print_path_uid), - "%s.%s.%04x.%02x", - uid->vendor, uid->serial, - uid->ssid, - uid->real_unit_addr); - uid = &private->uid; - if (strlen(uid->vduit) > 0) - snprintf(print_device_uid, - sizeof(print_device_uid), - "%s.%s.%04x.%02x.%s", - uid->vendor, uid->serial, - uid->ssid, uid->real_unit_addr, - uid->vduit); - else - snprintf(print_device_uid, - sizeof(print_device_uid), - "%s.%s.%04x.%02x", - uid->vendor, uid->serial, - uid->ssid, - uid->real_unit_addr); - dev_err(&device->cdev->dev, - "Not all channel paths lead to " - "the same device, path %02X leads to " - "device %s instead of %s\n", lpm, - print_path_uid, print_device_uid); - path_err = -EINVAL; - dasd_path_add_cablepm(device, lpm); - continue; - } - path_private.conf_data = NULL; - path_private.conf_len = 0; + } else if (dasd_eckd_check_cabling(device, conf_data, lpm)) { + dasd_path_add_cablepm(device, lpm); + path_err = -EINVAL; + kfree(conf_data); + continue; } pos = pathmask_to_pos(lpm); @@ -1300,7 +1292,6 @@ static void dasd_eckd_path_available_action(struct dasd_device *device, struct pe_handler_work_data *data) { struct dasd_eckd_private path_private; - struct dasd_uid *uid; __u8 path_rcd_buf[DASD_ECKD_RCD_DATA_SIZE]; __u8 lpm, opm, npm, ppm, epm, hpfpm, cablepm; struct dasd_conf_data *conf_data; @@ -1397,19 +1388,8 @@ static void dasd_eckd_path_available_action(struct dasd_device *device, if (rebuild_device_uid(device, data) || dasd_eckd_compare_path_uid( device, &path_private)) { - uid = &path_private.uid; - if (strlen(uid->vduit) > 0) - snprintf(print_uid, sizeof(print_uid), - "%s.%s.%04x.%02x.%s", - uid->vendor, uid->serial, - uid->ssid, uid->real_unit_addr, - uid->vduit); - else - snprintf(print_uid, sizeof(print_uid), - "%s.%s.%04x.%02x", - uid->vendor, uid->serial, - uid->ssid, - uid->real_unit_addr); + dasd_eckd_get_uid_string(&path_private, + print_uid); dev_err(&device->cdev->dev, "The newly added channel path %02X " "will not be used because it leads " @@ -5820,15 +5800,7 @@ static int dasd_eckd_reload_device(struct dasd_device *device) dasd_eckd_get_uid(device, &uid); if (old_base != uid.base_unit_addr) { - if (strlen(uid.vduit) > 0) - snprintf(print_uid, sizeof(print_uid), - "%s.%s.%04x.%02x.%s", uid.vendor, uid.serial, - uid.ssid, uid.base_unit_addr, uid.vduit); - else - snprintf(print_uid, sizeof(print_uid), - "%s.%s.%04x.%02x", uid.vendor, uid.serial, - uid.ssid, uid.base_unit_addr); - + dasd_eckd_get_uid_string(private, print_uid); dev_info(&device->cdev->dev, "An Alias device was reassigned to a new base device " "with UID: %s\n", print_uid); From 74e2f2110258d5cb5f3bcbf3f9813d523eb049b9 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 20 Oct 2021 13:51:21 +0200 Subject: [PATCH 065/117] s390/dasd: move dasd_eckd_read_fc_security dasd_eckd_read_conf is called multiple times during device setup but the fc_security feature needs to be read only once. So move it into the calling function. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20211020115124.1735254-5-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index f6ff26472936..248006291c29 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1189,8 +1189,6 @@ static int dasd_eckd_read_conf(struct dasd_device *device) } } - dasd_eckd_read_fc_security(device); - return path_err; } @@ -2101,6 +2099,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device) if (rc) goto out_err3; + dasd_eckd_read_fc_security(device); dasd_path_create_kobjects(device); /* Read Feature Codes */ @@ -5788,6 +5787,8 @@ static int dasd_eckd_reload_device(struct dasd_device *device) if (rc) goto out_err; + dasd_eckd_read_fc_security(device); + rc = dasd_eckd_generate_uid(device); if (rc) goto out_err; From 542e30ce8e6e1104e99c78a520a821b05b6ea98b Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 20 Oct 2021 13:51:22 +0200 Subject: [PATCH 066/117] s390/dasd: summarize dasd configuration data in a separate structure Summarize the dasd configuration data in a separate structure so that functions that need temporary config data do not need to allocate the whole eckd_private structure. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20211020115124.1735254-6-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 182 ++++++++++++++++----------------- drivers/s390/block/dasd_eckd.h | 13 ++- 2 files changed, 98 insertions(+), 97 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 248006291c29..6cfaaddb4fbb 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -560,8 +560,8 @@ static int prefix_LRE(struct ccw1 *ccw, struct PFX_eckd_data *pfxdata, return -EINVAL; } pfxdata->format = format; - pfxdata->base_address = basepriv->ned->unit_addr; - pfxdata->base_lss = basepriv->ned->ID; + pfxdata->base_address = basepriv->conf.ned->unit_addr; + pfxdata->base_lss = basepriv->conf.ned->ID; pfxdata->validity.define_extent = 1; /* private uid is kept up to date, conf_data may be outdated */ @@ -736,32 +736,30 @@ dasd_eckd_cdl_reclen(int recid) return LABEL_SIZE; } /* create unique id from private structure. */ -static void create_uid(struct dasd_eckd_private *private) +static void create_uid(struct dasd_conf *conf, struct dasd_uid *uid) { int count; - struct dasd_uid *uid; - uid = &private->uid; memset(uid, 0, sizeof(struct dasd_uid)); - memcpy(uid->vendor, private->ned->HDA_manufacturer, + memcpy(uid->vendor, conf->ned->HDA_manufacturer, sizeof(uid->vendor) - 1); EBCASC(uid->vendor, sizeof(uid->vendor) - 1); - memcpy(uid->serial, &private->ned->serial, + memcpy(uid->serial, &conf->ned->serial, sizeof(uid->serial) - 1); EBCASC(uid->serial, sizeof(uid->serial) - 1); - uid->ssid = private->gneq->subsystemID; - uid->real_unit_addr = private->ned->unit_addr; - if (private->sneq) { - uid->type = private->sneq->sua_flags; + uid->ssid = conf->gneq->subsystemID; + uid->real_unit_addr = conf->ned->unit_addr; + if (conf->sneq) { + uid->type = conf->sneq->sua_flags; if (uid->type == UA_BASE_PAV_ALIAS) - uid->base_unit_addr = private->sneq->base_unit_addr; + uid->base_unit_addr = conf->sneq->base_unit_addr; } else { uid->type = UA_BASE_DEVICE; } - if (private->vdsneq) { + if (conf->vdsneq) { for (count = 0; count < 16; count++) { sprintf(uid->vduit+2*count, "%02x", - private->vdsneq->uit[count]); + conf->vdsneq->uit[count]); } } } @@ -776,10 +774,10 @@ static int dasd_eckd_generate_uid(struct dasd_device *device) if (!private) return -ENODEV; - if (!private->ned || !private->gneq) + if (!private->conf.ned || !private->conf.gneq) return -ENODEV; spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - create_uid(private); + create_uid(&private->conf, &private->uid); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); return 0; } @@ -803,14 +801,15 @@ static int dasd_eckd_get_uid(struct dasd_device *device, struct dasd_uid *uid) * return 0 for match */ static int dasd_eckd_compare_path_uid(struct dasd_device *device, - struct dasd_eckd_private *private) + struct dasd_conf *path_conf) { struct dasd_uid device_uid; + struct dasd_uid path_uid; - create_uid(private); + create_uid(path_conf, &path_uid); dasd_eckd_get_uid(device, &device_uid); - return memcmp(&device_uid, &private->uid, sizeof(struct dasd_uid)); + return memcmp(&device_uid, &path_uid, sizeof(struct dasd_uid)); } static void dasd_eckd_fill_rcd_cqr(struct dasd_device *device, @@ -946,34 +945,34 @@ out_error: return ret; } -static int dasd_eckd_identify_conf_parts(struct dasd_eckd_private *private) +static int dasd_eckd_identify_conf_parts(struct dasd_conf *conf) { struct dasd_sneq *sneq; int i, count; - private->ned = NULL; - private->sneq = NULL; - private->vdsneq = NULL; - private->gneq = NULL; - count = private->conf_len / sizeof(struct dasd_sneq); - sneq = (struct dasd_sneq *)private->conf_data; + conf->ned = NULL; + conf->sneq = NULL; + conf->vdsneq = NULL; + conf->gneq = NULL; + count = conf->len / sizeof(struct dasd_sneq); + sneq = (struct dasd_sneq *)conf->data; for (i = 0; i < count; ++i) { if (sneq->flags.identifier == 1 && sneq->format == 1) - private->sneq = sneq; + conf->sneq = sneq; else if (sneq->flags.identifier == 1 && sneq->format == 4) - private->vdsneq = (struct vd_sneq *)sneq; + conf->vdsneq = (struct vd_sneq *)sneq; else if (sneq->flags.identifier == 2) - private->gneq = (struct dasd_gneq *)sneq; + conf->gneq = (struct dasd_gneq *)sneq; else if (sneq->flags.identifier == 3 && sneq->res1 == 1) - private->ned = (struct dasd_ned *)sneq; + conf->ned = (struct dasd_ned *)sneq; sneq++; } - if (!private->ned || !private->gneq) { - private->ned = NULL; - private->sneq = NULL; - private->vdsneq = NULL; - private->gneq = NULL; + if (!conf->ned || !conf->gneq) { + conf->ned = NULL; + conf->sneq = NULL; + conf->vdsneq = NULL; + conf->gneq = NULL; return -EINVAL; } return 0; @@ -1016,9 +1015,9 @@ static void dasd_eckd_store_conf_data(struct dasd_device *device, * with the new one if this points to the same data */ cdp = device->path[chp].conf_data; - if (private->conf_data == cdp) { - private->conf_data = (void *)conf_data; - dasd_eckd_identify_conf_parts(private); + if (private->conf.data == cdp) { + private->conf.data = (void *)conf_data; + dasd_eckd_identify_conf_parts(&private->conf); } ccw_device_get_schid(device->cdev, &sch_id); device->path[chp].conf_data = conf_data; @@ -1036,8 +1035,8 @@ static void dasd_eckd_clear_conf_data(struct dasd_device *device) struct dasd_eckd_private *private = device->private; int i; - private->conf_data = NULL; - private->conf_len = 0; + private->conf.data = NULL; + private->conf.len = 0; for (i = 0; i < 8; i++) { kfree(device->path[i].conf_data); device->path[i].conf_data = NULL; @@ -1071,22 +1070,22 @@ static void dasd_eckd_read_fc_security(struct dasd_device *device) } } -static void dasd_eckd_get_uid_string(struct dasd_eckd_private *private, +static void dasd_eckd_get_uid_string(struct dasd_conf *conf, char *print_uid) { - struct dasd_uid *uid; + struct dasd_uid uid; - uid = &private->uid; - if (strlen(uid->vduit) > 0) + create_uid(conf, &uid); + if (strlen(uid.vduit) > 0) snprintf(print_uid, sizeof(*print_uid), "%s.%s.%04x.%02x.%s", - uid->vendor, uid->serial, uid->ssid, - uid->real_unit_addr, uid->vduit); + uid.vendor, uid.serial, uid.ssid, + uid.real_unit_addr, uid.vduit); else snprintf(print_uid, sizeof(*print_uid), "%s.%s.%04x.%02x", - uid->vendor, uid->serial, uid->ssid, - uid->real_unit_addr); + uid.vendor, uid.serial, uid.ssid, + uid.real_unit_addr); } static int dasd_eckd_check_cabling(struct dasd_device *device, @@ -1094,16 +1093,16 @@ static int dasd_eckd_check_cabling(struct dasd_device *device, { struct dasd_eckd_private *private = device->private; char print_path_uid[60], print_device_uid[60]; - struct dasd_eckd_private path_private; + struct dasd_conf path_conf; - path_private.conf_data = conf_data; - path_private.conf_len = DASD_ECKD_RCD_DATA_SIZE; - if (dasd_eckd_identify_conf_parts(&path_private)) + path_conf.data = conf_data; + path_conf.len = DASD_ECKD_RCD_DATA_SIZE; + if (dasd_eckd_identify_conf_parts(&path_conf)) return 1; - if (dasd_eckd_compare_path_uid(device, &path_private)) { - dasd_eckd_get_uid_string(&path_private, print_path_uid); - dasd_eckd_get_uid_string(private, print_device_uid); + if (dasd_eckd_compare_path_uid(device, &path_conf)) { + dasd_eckd_get_uid_string(&path_conf, print_path_uid); + dasd_eckd_get_uid_string(&private->conf, print_device_uid); dev_err(&device->cdev->dev, "Not all channel paths lead to the same device, path %02X leads to device %s instead of %s\n", lpm, print_path_uid, print_device_uid); @@ -1149,11 +1148,11 @@ static int dasd_eckd_read_conf(struct dasd_device *device) if (!conf_data_saved) { /* initially clear previously stored conf_data */ dasd_eckd_clear_conf_data(device); - private->conf_data = conf_data; - private->conf_len = conf_len; - if (dasd_eckd_identify_conf_parts(private)) { - private->conf_data = NULL; - private->conf_len = 0; + private->conf.data = conf_data; + private->conf.len = conf_len; + if (dasd_eckd_identify_conf_parts(&private->conf)) { + private->conf.data = NULL; + private->conf.len = 0; kfree(conf_data); continue; } @@ -1203,7 +1202,7 @@ static u32 get_fcx_max_data(struct dasd_device *device) return 0; /* is transport mode supported? */ fcx_in_css = css_general_characteristics.fcx; - fcx_in_gneq = private->gneq->reserved2[7] & 0x04; + fcx_in_gneq = private->conf.gneq->reserved2[7] & 0x04; fcx_in_features = private->features.feature[40] & 0x80; tpm = fcx_in_css && fcx_in_gneq && fcx_in_features; @@ -1272,9 +1271,9 @@ static int rebuild_device_uid(struct dasd_device *device, "returned error %d", rc); break; } - memcpy(private->conf_data, data->rcd_buffer, + memcpy(private->conf.data, data->rcd_buffer, DASD_ECKD_RCD_DATA_SIZE); - if (dasd_eckd_identify_conf_parts(private)) { + if (dasd_eckd_identify_conf_parts(&private->conf)) { rc = -ENODEV; } else /* first valid path is enough */ break; @@ -1289,10 +1288,10 @@ static int rebuild_device_uid(struct dasd_device *device, static void dasd_eckd_path_available_action(struct dasd_device *device, struct pe_handler_work_data *data) { - struct dasd_eckd_private path_private; __u8 path_rcd_buf[DASD_ECKD_RCD_DATA_SIZE]; __u8 lpm, opm, npm, ppm, epm, hpfpm, cablepm; struct dasd_conf_data *conf_data; + struct dasd_conf path_conf; unsigned long flags; char print_uid[60]; int rc, pos; @@ -1356,11 +1355,11 @@ static void dasd_eckd_path_available_action(struct dasd_device *device, */ memcpy(&path_rcd_buf, data->rcd_buffer, DASD_ECKD_RCD_DATA_SIZE); - path_private.conf_data = (void *) &path_rcd_buf; - path_private.conf_len = DASD_ECKD_RCD_DATA_SIZE; - if (dasd_eckd_identify_conf_parts(&path_private)) { - path_private.conf_data = NULL; - path_private.conf_len = 0; + path_conf.data = (void *)&path_rcd_buf; + path_conf.len = DASD_ECKD_RCD_DATA_SIZE; + if (dasd_eckd_identify_conf_parts(&path_conf)) { + path_conf.data = NULL; + path_conf.len = 0; continue; } @@ -1371,7 +1370,7 @@ static void dasd_eckd_path_available_action(struct dasd_device *device, * the first working path UID will be used as device UID */ if (dasd_path_get_opm(device) && - dasd_eckd_compare_path_uid(device, &path_private)) { + dasd_eckd_compare_path_uid(device, &path_conf)) { /* * the comparison was not successful * rebuild the device UID with at least one @@ -1385,9 +1384,8 @@ static void dasd_eckd_path_available_action(struct dasd_device *device, */ if (rebuild_device_uid(device, data) || dasd_eckd_compare_path_uid( - device, &path_private)) { - dasd_eckd_get_uid_string(&path_private, - print_uid); + device, &path_conf)) { + dasd_eckd_get_uid_string(&path_conf, print_uid); dev_err(&device->cdev->dev, "The newly added channel path %02X " "will not be used because it leads " @@ -1603,8 +1601,8 @@ static int dasd_eckd_read_vol_info(struct dasd_device *device) prssdp = cqr->data; prssdp->order = PSF_ORDER_PRSSD; prssdp->suborder = PSF_SUBORDER_VSQ; /* Volume Storage Query */ - prssdp->lss = private->ned->ID; - prssdp->volume = private->ned->unit_addr; + prssdp->lss = private->conf.ned->ID; + prssdp->volume = private->conf.ned->unit_addr; ccw = cqr->cpaddr; ccw->cmd_code = DASD_ECKD_CCW_PSF; @@ -2063,11 +2061,11 @@ dasd_eckd_check_characteristics(struct dasd_device *device) device->path_thrhld = DASD_ECKD_PATH_THRHLD; device->path_interval = DASD_ECKD_PATH_INTERVAL; - if (private->gneq) { + if (private->conf.gneq) { value = 1; - for (i = 0; i < private->gneq->timeout.value; i++) + for (i = 0; i < private->conf.gneq->timeout.value; i++) value = 10 * value; - value = value * private->gneq->timeout.number; + value = value * private->conf.gneq->timeout.number; /* do not accept useless values */ if (value != 0 && value <= DASD_EXPIRES_MAX) device->default_expires = value; @@ -2174,10 +2172,10 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) return; dasd_alias_disconnect_device_from_lcu(device); - private->ned = NULL; - private->sneq = NULL; - private->vdsneq = NULL; - private->gneq = NULL; + private->conf.ned = NULL; + private->conf.sneq = NULL; + private->conf.vdsneq = NULL; + private->conf.gneq = NULL; dasd_eckd_clear_conf_data(device); dasd_path_remove_kobjects(device); } @@ -3729,8 +3727,8 @@ dasd_eckd_dso_ras(struct dasd_device *device, struct dasd_block *block, * subset. */ ras_data->op_flags.guarantee_init = !!(features->feature[56] & 0x01); - ras_data->lss = private->ned->ID; - ras_data->dev_addr = private->ned->unit_addr; + ras_data->lss = private->conf.ned->ID; + ras_data->dev_addr = private->conf.ned->unit_addr; ras_data->nr_exts = nr_exts; if (by_extent) { @@ -4272,8 +4270,8 @@ static int prepare_itcw(struct itcw *itcw, memset(&pfxdata, 0, sizeof(pfxdata)); pfxdata.format = 1; /* PFX with LRE */ - pfxdata.base_address = basepriv->ned->unit_addr; - pfxdata.base_lss = basepriv->ned->ID; + pfxdata.base_address = basepriv->conf.ned->unit_addr; + pfxdata.base_lss = basepriv->conf.ned->ID; pfxdata.validity.define_extent = 1; /* private uid is kept up to date, conf_data may be outdated */ @@ -4942,9 +4940,9 @@ dasd_eckd_fill_info(struct dasd_device * device, info->characteristics_size = sizeof(private->rdc_data); memcpy(info->characteristics, &private->rdc_data, sizeof(private->rdc_data)); - info->confdata_size = min((unsigned long)private->conf_len, - sizeof(info->configuration_data)); - memcpy(info->configuration_data, private->conf_data, + info->confdata_size = min_t(unsigned long, private->conf.len, + sizeof(info->configuration_data)); + memcpy(info->configuration_data, private->conf.data, info->confdata_size); return 0; } @@ -5801,7 +5799,7 @@ static int dasd_eckd_reload_device(struct dasd_device *device) dasd_eckd_get_uid(device, &uid); if (old_base != uid.base_unit_addr) { - dasd_eckd_get_uid_string(private, print_uid); + dasd_eckd_get_uid_string(&private->conf, print_uid); dev_info(&device->cdev->dev, "An Alias device was reassigned to a new base device " "with UID: %s\n", print_uid); @@ -5939,8 +5937,8 @@ static int dasd_eckd_query_host_access(struct dasd_device *device, prssdp->order = PSF_ORDER_PRSSD; prssdp->suborder = PSF_SUBORDER_QHA; /* query host access */ /* LSS and Volume that will be queried */ - prssdp->lss = private->ned->ID; - prssdp->volume = private->ned->unit_addr; + prssdp->lss = private->conf.ned->ID; + prssdp->volume = private->conf.ned->unit_addr; /* all other bytes of prssdp must be zero */ ccw = cqr->cpaddr; diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h index 65e4630ad2ae..a91b265441cc 100644 --- a/drivers/s390/block/dasd_eckd.h +++ b/drivers/s390/block/dasd_eckd.h @@ -658,16 +658,19 @@ struct dasd_conf_data { struct dasd_gneq gneq; } __packed; -struct dasd_eckd_private { - struct dasd_eckd_characteristics rdc_data; - u8 *conf_data; - int conf_len; - +struct dasd_conf { + u8 *data; + int len; /* pointers to specific parts in the conf_data */ struct dasd_ned *ned; struct dasd_sneq *sneq; struct vd_sneq *vdsneq; struct dasd_gneq *gneq; +}; + +struct dasd_eckd_private { + struct dasd_eckd_characteristics rdc_data; + struct dasd_conf conf; struct eckd_count count_area[5]; int init_cqr_status; From 9dffede0115e96d0ff0a07e4382569a9c6dba735 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 20 Oct 2021 13:51:23 +0200 Subject: [PATCH 067/117] s390/dasd: fix missing path conf_data after failed allocation dasd_eckd_path_available_action() does a memory allocation to store the per path configuration data permanently. In the unlikely case that this allocation fails there is no conf_data stored for the corresponding path. This is OK since this is not necessary for an operational path but some features like control unit initiated reconfiguration (CUIR) do not work. To fix this add the path to the 'to be verified pathmask' again and schedule the handler again. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20211020115124.1735254-7-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 6cfaaddb4fbb..8410a25a65c1 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1403,6 +1403,14 @@ static void dasd_eckd_path_available_action(struct dasd_device *device, if (conf_data) { memcpy(conf_data, data->rcd_buffer, DASD_ECKD_RCD_DATA_SIZE); + } else { + /* + * path is operational but path config data could not + * be stored due to low mem condition + * add it to the error path mask and schedule a path + * verification later that this could be added again + */ + epm |= lpm; } pos = pathmask_to_pos(lpm); dasd_eckd_store_conf_data(device, conf_data, pos); @@ -1423,7 +1431,10 @@ static void dasd_eckd_path_available_action(struct dasd_device *device, } dasd_path_add_nppm(device, npm); dasd_path_add_ppm(device, ppm); - dasd_path_add_tbvpm(device, epm); + if (epm) { + dasd_path_add_tbvpm(device, epm); + dasd_device_set_timer(device, 50); + } dasd_path_add_cablepm(device, cablepm); dasd_path_add_nohpfpm(device, hpfpm); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); From a8e5d491dfc184c6b78cbb7f44107b01229c9df2 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 20 Oct 2021 13:51:24 +0200 Subject: [PATCH 068/117] s390/dasd: fix possibly missed path verification __dasd_device_check_path_events() calls the discipline path event handler. This handler can leave the 'to be verified pathmask' populated for an additional verification. There is a race window where the worker has finished before dasd_path_clear_all_verify() is called which resets the tbvpm. Due to this there could be outstanding path verifications missed. Fix by clearing the pathmasks before calling the handler and add them again in case of an error. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20211020115124.1735254-8-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 9 ++++++--- drivers/s390/block/dasd_int.h | 9 +++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index e34c6cc61983..8e87a31e329d 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2077,12 +2077,15 @@ static void __dasd_device_check_path_events(struct dasd_device *device) if (device->stopped & ~(DASD_STOPPED_DC_WAIT)) return; + + dasd_path_clear_all_verify(device); + dasd_path_clear_all_fcsec(device); + rc = device->discipline->pe_handler(device, tbvpm, fcsecpm); if (rc) { + dasd_path_add_tbvpm(device, tbvpm); + dasd_path_add_fcsecpm(device, fcsecpm); dasd_device_set_timer(device, 50); - } else { - dasd_path_clear_all_verify(device); - dasd_path_clear_all_fcsec(device); } }; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 15d8731c1eee..8b458010f88a 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -1305,6 +1305,15 @@ static inline void dasd_path_add_ppm(struct dasd_device *device, __u8 pm) dasd_path_preferred(device, chp); } +static inline void dasd_path_add_fcsecpm(struct dasd_device *device, __u8 pm) +{ + int chp; + + for (chp = 0; chp < 8; chp++) + if (pm & (0x80 >> chp)) + dasd_path_fcsec(device, chp); +} + /* * set functions for path masks * the existing path mask will be replaced by the given path mask From a307e2abfc22880a3026bc2f2a997402b7c2d833 Mon Sep 17 00:00:00 2001 From: Ding Senjie Date: Wed, 20 Oct 2021 22:38:05 +0800 Subject: [PATCH 069/117] md: bcache: Fix spelling of 'acquire' acqurie -> acquire Signed-off-by: Ding Senjie Reviewed-by: Hannes Reinecke Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211020143812.6403-2-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f2874c77ff79..330d6c167265 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2750,7 +2750,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) * The reason bch_register_lock is not held to call * bch_cache_set_stop() and bcache_device_stop() is to * avoid potential deadlock during reboot, because cache - * set or bcache device stopping process will acqurie + * set or bcache device stopping process will acquire * bch_register_lock too. * * We are safe here because bcache_is_reboot sets to From 0a2b3e363566c4cc8792d37c5e73b9d9295e075c Mon Sep 17 00:00:00 2001 From: Coly Li Date: Wed, 20 Oct 2021 22:38:06 +0800 Subject: [PATCH 070/117] bcache: reserve never used bits from bkey.high There sre 3 bits in member high of struct bkey are never used, and no plan to support them in future, - HEADER_SIZE, start at bit 58, length 2 bits - KEY_PINNED, start at bit 55, length 1 bit No any kernel code, or user space tool references or accesses the three bits. Therefore it is possible and feasible to reserve the valuable bits from bkey.high. They can be used in future for other purpose. Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211020143812.6403-3-colyli@suse.de Signed-off-by: Jens Axboe --- include/uapi/linux/bcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index cf7399f03b71..97413586195b 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -43,9 +43,9 @@ static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \ #define KEY_MAX_U64S 8 KEY_FIELD(KEY_PTRS, high, 60, 3) -KEY_FIELD(HEADER_SIZE, high, 58, 2) +KEY_FIELD(__PAD0, high, 58, 2) KEY_FIELD(KEY_CSUM, high, 56, 2) -KEY_FIELD(KEY_PINNED, high, 55, 1) +KEY_FIELD(__PAD1, high, 55, 1) KEY_FIELD(KEY_DIRTY, high, 36, 1) KEY_FIELD(KEY_SIZE, high, 20, KEY_SIZE_BITS) From d55f7cb2e5c053010d2b527494da9bbb722a78ba Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 20 Oct 2021 22:38:07 +0800 Subject: [PATCH 071/117] bcache: fix error info in register_bcache() In register_bcache(), there are several cases we didn't set correct error info (return value and/or error message): - if kzalloc() fails, it needs to return ENOMEM and print "cannot allocate memory"; - if register_cache() fails, it's better to propagate its return value rather than using default EINVAL. Signed-off-by: Chao Yu Reviewed-by: Hannes Reinecke Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211020143812.6403-4-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 330d6c167265..62b0140b0a73 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2617,8 +2617,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (SB_IS_BDEV(sb)) { struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); - if (!dc) + if (!dc) { + ret = -ENOMEM; + err = "cannot allocate memory"; goto out_put_sb_page; + } mutex_lock(&bch_register_lock); ret = register_bdev(sb, sb_disk, bdev, dc); @@ -2629,11 +2632,15 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, } else { struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); - if (!ca) + if (!ca) { + ret = -ENOMEM; + err = "cannot allocate memory"; goto out_put_sb_page; + } /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(sb, sb_disk, bdev, ca) != 0) + ret = register_cache(sb, sb_disk, bdev, ca); + if (ret) goto out_free_sb; } From 0259d4498ba48454749ecfb9c81e892cdb8d1a32 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Wed, 20 Oct 2021 22:38:08 +0800 Subject: [PATCH 072/117] bcache: move calc_cached_dev_sectors to proper place on backing device detach Calculation of cache_set's cached sectors is done by travelling cached_devs list as shown below: static void calc_cached_dev_sectors(struct cache_set *c) { ... list_for_each_entry(dc, &c->cached_devs, list) sectors += bdev_sectors(dc->bdev); c->cached_dev_sectors = sectors; } But cached_dev won't be unlinked from c->cached_devs list until we call following list_move(&dc->list, &uncached_devices), so previous fix in 'commit 46010141da6677b81cc77f9b47f8ac62bd1cbfd3 ("bcache: recal cached_dev_sectors on detach")' is wrong, now we move it to its right place. Signed-off-by: Lin Feng Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211020143812.6403-5-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 62b0140b0a73..dced2ea17431 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1154,9 +1154,9 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_lock(&bch_register_lock); - calc_cached_dev_sectors(dc->disk.c); bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); + calc_cached_dev_sectors(dc->disk.c); clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); From 7e84c2150731faec088ebfe33459f61d118b2497 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Oct 2021 22:38:09 +0800 Subject: [PATCH 073/117] bcache: remove the cache_dev_name field from struct cache Just use the %pg format specifier to print the name directly. Signed-off-by: Christoph Hellwig Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211020143812.6403-6-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 2 -- drivers/md/bcache/io.c | 8 ++++---- drivers/md/bcache/super.c | 7 +++---- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 5fc989a6d452..47ff9ecea2e2 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -470,8 +470,6 @@ struct cache { atomic_long_t meta_sectors_written; atomic_long_t btree_sectors_written; atomic_long_t sectors_written; - - char cache_dev_name[BDEVNAME_SIZE]; }; struct gc_stat { diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index e4388fe3ab7e..564357de7640 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -123,13 +123,13 @@ void bch_count_io_errors(struct cache *ca, errors >>= IO_ERROR_SHIFT; if (errors < ca->set->error_limit) - pr_err("%s: IO error on %s%s\n", - ca->cache_dev_name, m, + pr_err("%pg: IO error on %s%s\n", + ca->bdev, m, is_read ? ", recovering." : "."); else bch_cache_set_error(ca->set, - "%s: too many IO errors %s\n", - ca->cache_dev_name, m); + "%pg: too many IO errors %s\n", + ca->bdev, m); } } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index dced2ea17431..88cdce218f5c 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2338,7 +2338,7 @@ err_btree_alloc: err_free: module_put(THIS_MODULE); if (err) - pr_notice("error %s: %s\n", ca->cache_dev_name, err); + pr_notice("error %pg: %s\n", ca->bdev, err); return ret; } @@ -2348,7 +2348,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, const char *err = NULL; /* must be set for any error case */ int ret = 0; - bdevname(bdev, ca->cache_dev_name); memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; ca->bdev->bd_holder = ca; @@ -2390,14 +2389,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto out; } - pr_info("registered cache device %s\n", ca->cache_dev_name); + pr_info("registered cache device %pg\n", ca->bdev); out: kobject_put(&ca->kobj); err: if (err) - pr_notice("error %s: %s\n", ca->cache_dev_name, err); + pr_notice("error %pg: %s\n", ca->bdev, err); return ret; } From 0f5cd7815f7f4bb1dd340a9aeb9b9d6a7c7eec22 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Oct 2021 22:38:10 +0800 Subject: [PATCH 074/117] bcache: remove the backing_dev_name field from struct cached_dev Just use the %pg format specifier to print the name directly. Signed-off-by: Christoph Hellwig Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211020143812.6403-7-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 2 -- drivers/md/bcache/debug.c | 4 ++-- drivers/md/bcache/io.c | 8 +++---- drivers/md/bcache/request.c | 4 ++-- drivers/md/bcache/super.c | 48 ++++++++++++++++--------------------- drivers/md/bcache/sysfs.c | 2 +- 6 files changed, 29 insertions(+), 39 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 47ff9ecea2e2..941685409c68 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -395,8 +395,6 @@ struct cached_dev { atomic_t io_errors; unsigned int error_limit; unsigned int offline_seconds; - - char backing_dev_name[BDEVNAME_SIZE]; }; enum alloc_reserve { diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 116edda845c3..e803cad864be 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -137,8 +137,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) p2 + bv.bv_offset, bv.bv_len), dc->disk.c, - "verify failed at dev %s sector %llu", - dc->backing_dev_name, + "verify failed at dev %pg sector %llu", + dc->bdev, (uint64_t) bio->bi_iter.bi_sector); kunmap_atomic(p1); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 564357de7640..9c6f9ec55b72 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -65,15 +65,15 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors. */ if (bio->bi_opf & REQ_RAHEAD) { - pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n", - dc->backing_dev_name); + pr_warn_ratelimited("%pg: Read-ahead I/O failed on backing device, ignore\n", + dc->bdev); return; } errors = atomic_add_return(1, &dc->io_errors); if (errors < dc->error_limit) - pr_err("%s: IO error on backing device, unrecoverable\n", - dc->backing_dev_name); + pr_err("%pg: IO error on backing device, unrecoverable\n", + dc->bdev); else bch_cached_dev_error(dc); } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 23b28edae90f..f86909a66ac6 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -651,8 +651,8 @@ static void backing_request_endio(struct bio *bio) */ if (unlikely(s->iop.writeback && bio->bi_opf & REQ_PREFLUSH)) { - pr_err("Can't flush %s: returned bi_status %i\n", - dc->backing_dev_name, bio->bi_status); + pr_err("Can't flush %pg: returned bi_status %i\n", + dc->bdev, bio->bi_status); } else { /* set to orig_bio->bi_status in bio_complete() */ s->iop.status = bio->bi_status; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 88cdce218f5c..dc35f6e1d8d3 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1026,8 +1026,8 @@ static int cached_dev_status_update(void *arg) dc->offline_seconds = 0; if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) { - pr_err("%s: device offline for %d seconds\n", - dc->backing_dev_name, + pr_err("%pg: device offline for %d seconds\n", + dc->bdev, BACKING_DEV_OFFLINE_TIMEOUT); pr_err("%s: disable I/O request due to backing device offline\n", dc->disk.name); @@ -1058,15 +1058,13 @@ int bch_cached_dev_run(struct cached_dev *dc) }; if (dc->io_disable) { - pr_err("I/O disabled on cached dev %s\n", - dc->backing_dev_name); + pr_err("I/O disabled on cached dev %pg\n", dc->bdev); ret = -EIO; goto out; } if (atomic_xchg(&dc->running, 1)) { - pr_info("cached dev %s is running already\n", - dc->backing_dev_name); + pr_info("cached dev %pg is running already\n", dc->bdev); ret = -EBUSY; goto out; } @@ -1163,7 +1161,7 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_unlock(&bch_register_lock); - pr_info("Caching disabled for %s\n", dc->backing_dev_name); + pr_info("Caching disabled for %pg\n", dc->bdev); /* Drop ref we took in cached_dev_detach() */ closure_put(&dc->disk.cl); @@ -1203,29 +1201,27 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, return -ENOENT; if (dc->disk.c) { - pr_err("Can't attach %s: already attached\n", - dc->backing_dev_name); + pr_err("Can't attach %pg: already attached\n", dc->bdev); return -EINVAL; } if (test_bit(CACHE_SET_STOPPING, &c->flags)) { - pr_err("Can't attach %s: shutting down\n", - dc->backing_dev_name); + pr_err("Can't attach %pg: shutting down\n", dc->bdev); return -EINVAL; } if (dc->sb.block_size < c->cache->sb.block_size) { /* Will die */ - pr_err("Couldn't attach %s: block size less than set's block size\n", - dc->backing_dev_name); + pr_err("Couldn't attach %pg: block size less than set's block size\n", + dc->bdev); return -EINVAL; } /* Check whether already attached */ list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { - pr_err("Tried to attach %s but duplicate UUID already attached\n", - dc->backing_dev_name); + pr_err("Tried to attach %pg but duplicate UUID already attached\n", + dc->bdev); return -EINVAL; } @@ -1243,15 +1239,13 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, if (!u) { if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - pr_err("Couldn't find uuid for %s in set\n", - dc->backing_dev_name); + pr_err("Couldn't find uuid for %pg in set\n", dc->bdev); return -ENOENT; } u = uuid_find_empty(c); if (!u) { - pr_err("Not caching %s, no room for UUID\n", - dc->backing_dev_name); + pr_err("Not caching %pg, no room for UUID\n", dc->bdev); return -EINVAL; } } @@ -1319,8 +1313,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, */ kthread_stop(dc->writeback_thread); cancel_writeback_rate_update_dwork(dc); - pr_err("Couldn't run cached device %s\n", - dc->backing_dev_name); + pr_err("Couldn't run cached device %pg\n", dc->bdev); return ret; } @@ -1336,8 +1329,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); - pr_info("Caching %s as %s on set %pU\n", - dc->backing_dev_name, + pr_info("Caching %pg as %s on set %pU\n", + dc->bdev, dc->disk.disk->disk_name, dc->disk.c->set_uuid); return 0; @@ -1461,7 +1454,6 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, struct cache_set *c; int ret = -ENOMEM; - bdevname(bdev, dc->backing_dev_name); memcpy(&dc->sb, sb, sizeof(struct cache_sb)); dc->bdev = bdev; dc->bdev->bd_holder = dc; @@ -1476,7 +1468,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; - pr_info("registered backing device %s\n", dc->backing_dev_name); + pr_info("registered backing device %pg\n", dc->bdev); list_add(&dc->list, &uncached_devices); /* attach to a matched cache set if it exists */ @@ -1493,7 +1485,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, return 0; err: - pr_notice("error %s: %s\n", dc->backing_dev_name, err); + pr_notice("error %pg: %s\n", dc->bdev, err); bcache_device_stop(&dc->disk); return ret; } @@ -1621,8 +1613,8 @@ bool bch_cached_dev_error(struct cached_dev *dc) /* make others know io_disable is true earlier */ smp_mb(); - pr_err("stop %s: too many IO errors on backing device %s\n", - dc->disk.disk->disk_name, dc->backing_dev_name); + pr_err("stop %s: too many IO errors on backing device %pg\n", + dc->disk.disk->disk_name, dc->bdev); bcache_device_stop(&dc->disk); return true; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 05ac1d6fbbf3..1f0dce30fa75 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -271,7 +271,7 @@ SHOW(__bch_cached_dev) } if (attr == &sysfs_backing_dev_name) { - snprintf(buf, BDEVNAME_SIZE + 1, "%s", dc->backing_dev_name); + snprintf(buf, BDEVNAME_SIZE + 1, "%pg", dc->bdev); strcat(buf, "\n"); return strlen(buf); } From 00387bd21dac98f9e793294c895768d9e5441f82 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Oct 2021 22:38:11 +0800 Subject: [PATCH 075/117] bcache: use bvec_kmap_local in bch_data_verify Using local kmaps slightly reduces the chances to stray writes, and the bvec interface cleans up the code a little bit. Also switch from page_address to bvec_kmap_local for cbv to be on the safe side and to avoid pointlessly poking into bvec internals. Signed-off-by: Christoph Hellwig Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211020143812.6403-8-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/debug.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index e803cad864be..6230dfdd9286 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -127,21 +127,20 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) citer.bi_size = UINT_MAX; bio_for_each_segment(bv, bio, iter) { - void *p1 = kmap_atomic(bv.bv_page); + void *p1 = bvec_kmap_local(&bv); void *p2; cbv = bio_iter_iovec(check, citer); - p2 = page_address(cbv.bv_page); + p2 = bvec_kmap_local(&cbv); - cache_set_err_on(memcmp(p1 + bv.bv_offset, - p2 + bv.bv_offset, - bv.bv_len), + cache_set_err_on(memcmp(p1, p2, bv.bv_len), dc->disk.c, "verify failed at dev %pg sector %llu", dc->bdev, (uint64_t) bio->bi_iter.bi_sector); - kunmap_atomic(p1); + kunmap_local(p2); + kunmap_local(p1); bio_advance_iter(check, &citer, bv.bv_len); } From 39fa7a95552cc851029267b97c1317f1dea61cad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Oct 2021 22:38:12 +0800 Subject: [PATCH 076/117] bcache: remove bch_crc64_update bch_crc64_update is an entirely pointless wrapper around crc64_be. Signed-off-by: Christoph Hellwig Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211020143812.6403-9-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 2 +- drivers/md/bcache/request.c | 2 +- drivers/md/bcache/util.h | 8 -------- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 0595559de174..93b67b8d31c3 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -141,7 +141,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i) uint64_t crc = b->key.ptr[0]; void *data = (void *) i + 8, *end = bset_bkey_last(i); - crc = bch_crc64_update(crc, data, end - data); + crc = crc64_be(crc, data, end - data); return crc ^ 0xffffffffffffffffULL; } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index f86909a66ac6..d15aae6c51c1 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -46,7 +46,7 @@ static void bio_csum(struct bio *bio, struct bkey *k) bio_for_each_segment(bv, bio, iter) { void *d = kmap(bv.bv_page) + bv.bv_offset; - csum = bch_crc64_update(csum, d, bv.bv_len); + csum = crc64_be(csum, d, bv.bv_len); kunmap(bv.bv_page); } diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index b64460a76267..6274d6a17e5e 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -548,14 +548,6 @@ static inline uint64_t bch_crc64(const void *p, size_t len) return crc ^ 0xffffffffffffffffULL; } -static inline uint64_t bch_crc64_update(uint64_t crc, - const void *p, - size_t len) -{ - crc = crc64_be(crc, p, len); - return crc; -} - /* * A stepwise-linear pseudo-exponential. This returns 1 << (x >> * frac_bits), with the less-significant bits filled in by linear From f6f09c15a767fad1206068bc09ec62d3cf273460 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 6 Oct 2021 11:13:13 +0200 Subject: [PATCH 077/117] nvme: generate uevent once a multipath namespace is operational again When fast_io_fail_tmo is set I/O will be aborted while recovery is still ongoing. This causes MD to set the namespace to failed, and no futher I/O will be submitted to that namespace. However, once the recovery succeeds and the namespace becomes operational again the NVMe subsystem doesn't send a notification, so MD cannot automatically reinstate operation and requires manual interaction. This patch will send a KOBJ_CHANGE uevent per multipathed namespace once the underlying controller transitions to LIVE, allowing an automatic MD reassembly with these udev rules: /etc/udev/rules.d/65-md-auto-re-add.rules: SUBSYSTEM!="block", GOTO="md_end" ACTION!="change", GOTO="md_end" ENV{ID_FS_TYPE}!="linux_raid_member", GOTO="md_end" PROGRAM="/sbin/md_raid_auto_readd.sh $devnode" LABEL="md_end" /sbin/md_raid_auto_readd.sh: MDADM=/sbin/mdadm DEVNAME=$1 export $(${MDADM} --examine --export ${DEVNAME}) if [ -z "${MD_UUID}" ]; then exit 1 fi UUID_LINK=$(readlink /dev/disk/by-id/md-uuid-${MD_UUID}) MD_DEVNAME=${UUID_LINK##*/} export $(${MDADM} --detail --export /dev/${MD_DEVNAME}) if [ -z "${MD_METADATA}" ] ; then exit 1 fi if [ $(cat /sys/block/${MD_DEVNAME}/md/degraded) != 1 ]; then echo "${MD_DEVNAME}: array not degraded, nothing to do" exit 0 fi MD_STATE=$(cat /sys/block/${MD_DEVNAME}/md/array_state) if [ ${MD_STATE} != "clean" ] ; then echo "${MD_DEVNAME}: array state ${MD_STATE}, cannot re-add" exit 1 fi MD_VARNAME="MD_DEVICE_dev_${DEVNAME##*/}_ROLE" if [ ${!MD_VARNAME} = "spare" ] ; then ${MDADM} --manage /dev/${MD_DEVNAME} --re-add ${DEVNAME} fi Changes to v2: - Add udev rules example to description Changes to v1: - use disk_uevent() as suggested by hch Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 11440c86881e..22c6b64f4d93 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -105,8 +105,11 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { - if (ns->head->disk) - kblockd_schedule_work(&ns->head->requeue_work); + if (!ns->head->disk) + continue; + kblockd_schedule_work(&ns->head->requeue_work); + if (ctrl->state == NVME_CTRL_LIVE) + disk_uevent(ns->head->disk, KOBJ_CHANGE); } up_read(&ctrl->namespaces_rwsem); } From 01d838164b4c305c1cafb0c3f71fb0027d99358b Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 23 Aug 2021 05:56:48 -0700 Subject: [PATCH 078/117] nvme-fc: add support for ->map_queues NVMe FC don't have support for map queues, unlike the PCI, RDMA and TCP transports. Add a ->map_queues callout for the LLDDs to provide such functionality. Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 24 ++++++++++++++++++++++++ include/linux/nvme-fc-driver.h | 7 +++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index aa14ad963d91..34f7a7c236bf 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -16,6 +16,7 @@ #include #include "fc.h" #include +#include /* *************************** Data Structures/Defines ****************** */ @@ -2841,6 +2842,28 @@ nvme_fc_complete_rq(struct request *rq) nvme_fc_ctrl_put(ctrl); } +static int nvme_fc_map_queues(struct blk_mq_tag_set *set) +{ + struct nvme_fc_ctrl *ctrl = set->driver_data; + int i; + + for (i = 0; i < set->nr_maps; i++) { + struct blk_mq_queue_map *map = &set->map[i]; + + if (!map->nr_queues) { + WARN_ON(i == HCTX_TYPE_DEFAULT); + continue; + } + + /* Call LLDD map queue functionality if defined */ + if (ctrl->lport->ops->map_queues) + ctrl->lport->ops->map_queues(&ctrl->lport->localport, + map); + else + blk_mq_map_queues(map); + } + return 0; +} static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, @@ -2849,6 +2872,7 @@ static const struct blk_mq_ops nvme_fc_mq_ops = { .exit_request = nvme_fc_exit_request, .init_hctx = nvme_fc_init_hctx, .timeout = nvme_fc_timeout, + .map_queues = nvme_fc_map_queues, }; static int diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 2a38f2b477a5..cb909edb76c4 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -7,6 +7,7 @@ #define _NVME_FC_DRIVER_H 1 #include +#include /* @@ -497,6 +498,8 @@ struct nvme_fc_port_template { int (*xmt_ls_rsp)(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *rport, struct nvmefc_ls_rsp *ls_rsp); + void (*map_queues)(struct nvme_fc_local_port *localport, + struct blk_mq_queue_map *map); u32 max_hw_queues; u16 max_sgl_segments; @@ -779,6 +782,10 @@ struct nvmet_fc_target_port { * LS received. * Entrypoint is Mandatory. * + * @map_queues: This functions lets the driver expose the queue mapping + * to the block layer. + * Entrypoint is Optional. + * * @fcp_op: Called to perform a data transfer or transmit a response. * The nvmefc_tgt_fcp_req structure is the same LLDD-supplied * exchange structure specified in the nvmet_fc_rcv_fcp_req() call From 2b2af50ae8367b0655e12ccfe6252d5c1d7ae7bf Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Mon, 23 Aug 2021 05:56:49 -0700 Subject: [PATCH 079/117] qla2xxx: add ->map_queues support for nvme Implement ->map queues and use the block layer blk_mq_pci_map_queues helper for mapping queues to CPUs. With this mapping minimum 10%+ increase in performance is noticed. Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Signed-off-by: Christoph Hellwig --- drivers/scsi/qla2xxx/qla_nvme.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 1c5da2dbd6f9..253055cf9daf 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include static struct nvme_fc_port_template qla_nvme_fc_transport; @@ -642,6 +644,18 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, return rval; } +static void qla_nvme_map_queues(struct nvme_fc_local_port *lport, + struct blk_mq_queue_map *map) +{ + struct scsi_qla_host *vha = lport->private; + int rc; + + rc = blk_mq_pci_map_queues(map, vha->hw->pdev, vha->irq_offset); + if (rc) + ql_log(ql_log_warn, vha, 0x21de, + "pci map queue failed 0x%x", rc); +} + static void qla_nvme_localport_delete(struct nvme_fc_local_port *lport) { struct scsi_qla_host *vha = lport->private; @@ -676,6 +690,7 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = { .ls_abort = qla_nvme_ls_abort, .fcp_io = qla_nvme_post_cmd, .fcp_abort = qla_nvme_fcp_abort, + .map_queues = qla_nvme_map_queues, .max_hw_queues = 8, .max_sgl_segments = 1024, .max_dif_sgl_segments = 64, From e3e19dcc4c416d65f99f13d55be2b787f8d0050e Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 6 Oct 2021 08:09:43 +0000 Subject: [PATCH 080/117] nvmet: fix use-after-free when a port is removed When a port is removed through configfs, any connected controllers are starting teardown flow asynchronously and can still send commands. This causes a use-after-free bug for any command that dereferences req->port (like in nvmet_parse_io_cmd). To fix this, wait for all the teardown scheduled works to complete (like release_work at rdma/tcp drivers). This ensures there are no active controllers when the port is eventually removed. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index be5d82421e3a..496d775c6770 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1553,6 +1553,8 @@ static void nvmet_port_release(struct config_item *item) { struct nvmet_port *port = to_nvmet_port(item); + /* Let inflight controllers teardown complete */ + flush_scheduled_work(); list_del(&port->global_entry); kfree(port->ana_state); From fcf73a804c7d6bbf0ea63531c6122aa363852e04 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 6 Oct 2021 08:09:44 +0000 Subject: [PATCH 081/117] nvmet-rdma: fix use-after-free when a port is removed When removing a port, all its controllers are being removed, but there are queues on the port that doesn't belong to any controller (during connection time). This causes a use-after-free bug for any command that dereferences req->port (like in nvmet_alloc_ctrl). Those queues should be destroyed before freeing the port via configfs. Destroy the remaining queues after the RDMA-CM was destroyed guarantees that no new queue will be created. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 38d1f292ecc2..6f75e6be2ce3 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1819,12 +1819,36 @@ restart: mutex_unlock(&nvmet_rdma_queue_mutex); } +static void nvmet_rdma_destroy_port_queues(struct nvmet_rdma_port *port) +{ + struct nvmet_rdma_queue *queue, *tmp; + struct nvmet_port *nport = port->nport; + + mutex_lock(&nvmet_rdma_queue_mutex); + list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list, + queue_list) { + if (queue->port != nport) + continue; + + list_del_init(&queue->queue_list); + __nvmet_rdma_queue_disconnect(queue); + } + mutex_unlock(&nvmet_rdma_queue_mutex); +} + static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) { struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); if (cm_id) rdma_destroy_id(cm_id); + + /* + * Destroy the remaining queues, which are not belong to any + * controller yet. Do it here after the RDMA-CM was destroyed + * guarantees that no new queue will be created. + */ + nvmet_rdma_destroy_port_queues(port); } static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) From 2351ead99ce9164fb42555aee3f96af84c4839e9 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 6 Oct 2021 08:09:45 +0000 Subject: [PATCH 082/117] nvmet-tcp: fix use-after-free when a port is removed When removing a port, all its controllers are being removed, but there are queues on the port that doesn't belong to any controller (during connection time). This causes a use-after-free bug for any command that dereferences req->port (like in nvmet_alloc_ctrl). Those queues should be destroyed before freeing the port via configfs. Destroy the remaining queues after the accept_work was cancelled guarantees that no new queue will be created. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 07ee347ea3f3..6eb0b3153477 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1737,6 +1737,17 @@ err_port: return ret; } +static void nvmet_tcp_destroy_port_queues(struct nvmet_tcp_port *port) +{ + struct nvmet_tcp_queue *queue; + + mutex_lock(&nvmet_tcp_queue_mutex); + list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) + if (queue->port == port) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + mutex_unlock(&nvmet_tcp_queue_mutex); +} + static void nvmet_tcp_remove_port(struct nvmet_port *nport) { struct nvmet_tcp_port *port = nport->priv; @@ -1746,6 +1757,11 @@ static void nvmet_tcp_remove_port(struct nvmet_port *nport) port->sock->sk->sk_user_data = NULL; write_unlock_bh(&port->sock->sk->sk_callback_lock); cancel_work_sync(&port->accept_work); + /* + * Destroy the remaining queues, which are not belong to any + * controller yet. + */ + nvmet_tcp_destroy_port_queues(port); sock_release(port->sock); kfree(port); From 44c3c6257e99c6284f312206de73783575fc8906 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 23 Sep 2021 00:55:35 +0300 Subject: [PATCH 083/117] nvme-rdma: limit the maximal queue size for RDMA controllers Corrent limit of 1024 isn't valid for some of the RDMA based ctrls. In case the target expose a cap of larger amount of entries (e.g. 1024), the initiator may fail to create a QP with this size. Thus limit to a value that works for all RDMA adapters. Future general solution should use RDMA/core API to calculate this size according to device capabilities and number of WRs needed per NVMe IO request. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 7 +++++++ include/linux/nvme-rdma.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1624da3702d4..027ee57cbdb0 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1112,6 +1112,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1); } + if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) { + dev_warn(ctrl->ctrl.device, + "ctrl sqsize %u > max queue size %u, clamping down\n", + ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE); + ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1; + } + if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { dev_warn(ctrl->ctrl.device, "sqsize %u > ctrl maxcmd %u, clamping down\n", diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 3ec8e50efa16..4dd7e6fe92fb 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -6,6 +6,8 @@ #ifndef _LINUX_NVME_RDMA_H #define _LINUX_NVME_RDMA_H +#define NVME_RDMA_MAX_QUEUE_SIZE 128 + enum nvme_rdma_cm_fmt { NVME_RDMA_CM_FMT_1_0 = 0x0, }; From 6d1555cc41c088d738b4968009b32aaeda8542a3 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 23 Sep 2021 00:55:36 +0300 Subject: [PATCH 084/117] nvmet: add get_max_queue_size op for controllers Some transports, such as RDMA, would like to set the queue size according to device/port/ctrl characteristics. Add a new nvmet transport op that is called during ctrl initialization. This will not effect transports that don't implement this option. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 8 +++++--- drivers/nvme/target/nvmet.h | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b8425fa34300..93107af3310d 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1205,7 +1205,10 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl) /* CC.EN timeout in 500msec units: */ ctrl->cap |= (15ULL << 24); /* maximum queue entries supported: */ - ctrl->cap |= NVMET_QUEUE_SIZE - 1; + if (ctrl->ops->get_max_queue_size) + ctrl->cap |= ctrl->ops->get_max_queue_size(ctrl) - 1; + else + ctrl->cap |= NVMET_QUEUE_SIZE - 1; if (nvmet_is_passthru_subsys(ctrl->subsys)) nvmet_passthrough_override_cap(ctrl); @@ -1367,6 +1370,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, mutex_init(&ctrl->lock); ctrl->port = req->port; + ctrl->ops = req->ops; INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); INIT_LIST_HEAD(&ctrl->async_events); @@ -1405,8 +1409,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, } ctrl->cntlid = ret; - ctrl->ops = req->ops; - /* * Discovery controllers may use some arbitrary high value * in order to cleanup stale discovery sessions diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7143c7fa7464..f8e0ee131dc6 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -309,6 +309,7 @@ struct nvmet_fabrics_ops { u16 (*install_queue)(struct nvmet_sq *nvme_sq); void (*discovery_chg)(struct nvmet_port *port); u8 (*get_mdts)(const struct nvmet_ctrl *ctrl); + u16 (*get_max_queue_size)(const struct nvmet_ctrl *ctrl); }; #define NVMET_MAX_INLINE_BIOVEC 8 From c7d792f9b8b0502c807ecda57aeb5eac70cc7ab9 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 23 Sep 2021 00:55:37 +0300 Subject: [PATCH 085/117] nvmet-rdma: implement get_max_queue_size controller op Limit the maximal queue size for RDMA controllers. Today, the target reports a limit of 1024 and this limit isn't valid for some of the RDMA based controllers. For now, limit RDMA transport to 128 entries (the max queue depth configured for Linux NVMe/RDMA host). Future general solution should use RDMA/core API to calculate this size according to device capabilities and number of WRs needed per NVMe IO request. Reported-by: Mark Ruijter Reviewed-by: Chaitanya Kulkarni Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 6f75e6be2ce3..1deb4043e242 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -2000,6 +2000,11 @@ static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) return NVMET_RDMA_MAX_MDTS; } +static u16 nvmet_rdma_get_max_queue_size(const struct nvmet_ctrl *ctrl) +{ + return NVME_RDMA_MAX_QUEUE_SIZE; +} + static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_RDMA, @@ -2011,6 +2016,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .delete_ctrl = nvmet_rdma_delete_ctrl, .disc_traddr = nvmet_rdma_disc_port_addr, .get_mdts = nvmet_rdma_get_mdts, + .get_max_queue_size = nvmet_rdma_get_max_queue_size, }; static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) From 626851e9225df93eda00f6b256cb74ad0860e418 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 22 Sep 2021 08:35:19 +0200 Subject: [PATCH 086/117] nvmet: make discovery NQN configurable TPAR8013 allows for unique discovery NQNs, so make the discovery controller NQN configurable by exposing a subsys attribute 'discovery_nqn'. Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 39 ++++++++++++++++++++++++++++++++++ drivers/nvme/target/core.c | 3 ++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 496d775c6770..091a0ca16361 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1233,6 +1233,44 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_subsys_, attr_model); +static ssize_t nvmet_subsys_attr_discovery_nqn_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%s\n", + nvmet_disc_subsys->subsysnqn); +} + +static ssize_t nvmet_subsys_attr_discovery_nqn_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + char *subsysnqn; + int len; + + len = strcspn(page, "\n"); + if (!len) + return -EINVAL; + + subsysnqn = kmemdup_nul(page, len, GFP_KERNEL); + if (!subsysnqn) + return -ENOMEM; + + /* + * The discovery NQN must be different from subsystem NQN. + */ + if (!strcmp(subsysnqn, subsys->subsysnqn)) { + kfree(subsysnqn); + return -EBUSY; + } + down_write(&nvmet_config_sem); + kfree(nvmet_disc_subsys->subsysnqn); + nvmet_disc_subsys->subsysnqn = subsysnqn; + up_write(&nvmet_config_sem); + + return count; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_discovery_nqn); + #ifdef CONFIG_BLK_DEV_INTEGRITY static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item, char *page) @@ -1262,6 +1300,7 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_cntlid_min, &nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_model, + &nvmet_subsys_attr_attr_discovery_nqn, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_subsys_attr_attr_pi_enable, #endif diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 93107af3310d..e1f41436ea2c 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1493,7 +1493,8 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, if (!port) return NULL; - if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) { + if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn) || + !strcmp(nvmet_disc_subsys->subsysnqn, subsysnqn)) { if (!kref_get_unless_zero(&nvmet_disc_subsys->ref)) return NULL; return nvmet_disc_subsys; From e15a8a9755659ff5972f30de4dd64867c97f242d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 22 Sep 2021 08:35:20 +0200 Subject: [PATCH 087/117] nvme: add CNTRLTYPE definitions for 'identify controller' Update the 'identify controller' structure to define the newly added CNTRLTYPE field. Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b7c4c4130b65..ed2428918bca 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -31,6 +31,12 @@ enum nvme_subsys_type { NVME_NQN_NVME = 2, /* NVME type target subsystem */ }; +enum nvme_ctrl_type { + NVME_CTRL_IO = 1, /* I/O controller */ + NVME_CTRL_DISC = 2, /* Discovery controller */ + NVME_CTRL_ADMIN = 3, /* Administrative controller */ +}; + /* Address Family codes for Discovery Log Page entry ADRFAM field */ enum { NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */ @@ -244,7 +250,9 @@ struct nvme_id_ctrl { __le32 rtd3e; __le32 oaes; __le32 ctratt; - __u8 rsvd100[28]; + __u8 rsvd100[11]; + __u8 cntrltype; + __u8 fguid[16]; __le16 crdt1; __le16 crdt2; __le16 crdt3; From a294711ed5123f757ed8ed2f103c851b8ee416c9 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 22 Sep 2021 08:35:21 +0200 Subject: [PATCH 088/117] nvmet: add nvmet_is_disc_subsys() helper Add a helper function to determine if a given subsystem is a discovery subsystem. Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/core.c | 6 +++--- drivers/nvme/target/nvmet.h | 5 +++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index aa6d84d8848e..b653ea4244fe 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -1008,7 +1008,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) if (nvme_is_fabrics(cmd)) return nvmet_parse_fabrics_cmd(req); - if (nvmet_req_subsys(req)->type == NVME_NQN_DISC) + if (nvmet_is_disc_subsys(nvmet_req_subsys(req))) return nvmet_parse_discovery_cmd(req); ret = nvmet_check_ctrl_status(req); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index e1f41436ea2c..d844fb9ef2eb 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1140,7 +1140,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) * should verify iosqes,iocqes are zeroed, however that * would break backwards compatibility, so don't enforce it. */ - if (ctrl->subsys->type != NVME_NQN_DISC && + if (!nvmet_is_disc_subsys(ctrl->subsys) && (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) { ctrl->csts = NVME_CSTS_CFS; @@ -1281,7 +1281,7 @@ bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn) if (subsys->allow_any_host) return true; - if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */ + if (nvmet_is_disc_subsys(subsys)) /* allow all access to disc subsys */ return true; list_for_each_entry(p, &subsys->hosts, entry) { @@ -1413,7 +1413,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, * Discovery controllers may use some arbitrary high value * in order to cleanup stale discovery sessions */ - if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato) + if (nvmet_is_disc_subsys(ctrl->subsys) && !kato) kato = NVMET_DISC_KATO_MS; /* keep-alive timeout in seconds */ diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f8e0ee131dc6..f31dcc4fb1a2 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -577,6 +577,11 @@ static inline struct nvmet_subsys *nvmet_req_subsys(struct nvmet_req *req) return req->sq->ctrl->subsys; } +static inline bool nvmet_is_disc_subsys(struct nvmet_subsys *subsys) +{ + return subsys->type == NVME_NQN_DISC; +} + #ifdef CONFIG_NVME_TARGET_PASSTHRU void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys); int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys); From d3aef70124e7f69975eabeb340866bc91672532d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 22 Sep 2021 08:35:22 +0200 Subject: [PATCH 089/117] nvmet: set 'CNTRLTYPE' in the identify controller data Set the correct 'CNTRLTYPE' field in the identify controller data. Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 5 +++++ drivers/nvme/target/discovery.c | 2 ++ drivers/nvme/target/fabrics-cmd.c | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index b653ea4244fe..3616a06f7836 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -374,6 +374,11 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->rab = 6; + if (nvmet_is_disc_subsys(ctrl->subsys)) + id->cntrltype = NVME_CTRL_DISC; + else + id->cntrltype = NVME_CTRL_IO; + /* * XXX: figure out how we can assign a IEEE OUI, but until then * the safest is to leave it as zeroes. diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 7aa62bc6ae84..7b360f8d07e9 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -268,6 +268,8 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) memcpy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE), ' '); + id->cntrltype = NVME_CTRL_DISC; + /* no limit on data transfer sizes for now */ id->mdts = 0; id->cntlid = cpu_to_le16(ctrl->cntlid); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 7d0454cee920..70fb587e9413 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -221,7 +221,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } - pr_info("creating controller %d for subsystem %s for NQN %s%s.\n", + pr_info("creating %s controller %d for subsystem %s for NQN %s%s.\n", + nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm", ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, ctrl->pi_support ? " T10-PI is enabled" : ""); req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); From 954ae16681f6bdf684f016ca626329302a38e177 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 22 Sep 2021 08:35:23 +0200 Subject: [PATCH 090/117] nvme: expose subsystem type in sysfs attribute 'subsystype' With unique discovery controller NQNs we cannot distinguish the subsystem type by the NQN alone, but need to check the subsystem type, too. So expose the subsystem type in a new sysfs attribute 'subsystype'. Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 27 +++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 1 + 2 files changed, 28 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c415c3faf420..0f0f64db1a91 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2620,6 +2620,24 @@ static ssize_t nvme_subsys_show_nqn(struct device *dev, } static SUBSYS_ATTR_RO(subsysnqn, S_IRUGO, nvme_subsys_show_nqn); +static ssize_t nvme_subsys_show_type(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_subsystem *subsys = + container_of(dev, struct nvme_subsystem, dev); + + switch (subsys->subtype) { + case NVME_NQN_DISC: + return sysfs_emit(buf, "discovery\n"); + case NVME_NQN_NVME: + return sysfs_emit(buf, "nvm\n"); + default: + return sysfs_emit(buf, "reserved\n"); + } +} +static SUBSYS_ATTR_RO(subsystype, S_IRUGO, nvme_subsys_show_type); + #define nvme_subsys_show_str_function(field) \ static ssize_t subsys_##field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -2640,6 +2658,7 @@ static struct attribute *nvme_subsys_attrs[] = { &subsys_attr_serial.attr, &subsys_attr_firmware_rev.attr, &subsys_attr_subsysnqn.attr, + &subsys_attr_subsystype.attr, #ifdef CONFIG_NVME_MULTIPATH &subsys_attr_iopolicy.attr, #endif @@ -2710,6 +2729,14 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; + + /* Versions prior to 1.4 don't necessarily report a valid type */ + if (id->cntrltype == NVME_CTRL_DISC || + !strcmp(subsys->subnqn, NVME_DISC_SUBSYS_NAME)) + subsys->subtype = NVME_NQN_DISC; + else + subsys->subtype = NVME_NQN_NVME; + subsys->awupf = le16_to_cpu(id->awupf); #ifdef CONFIG_NVME_MULTIPATH subsys->iopolicy = NVME_IOPOLICY_NUMA; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ef2467b93adb..ba3edd4d02f0 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -372,6 +372,7 @@ struct nvme_subsystem { char model[40]; char firmware_rev[8]; u8 cmic; + enum nvme_subsys_type subtype; u16 vendor_id; u16 awupf; /* 0's based awupf value. */ struct ida ns_ida; From 20e8b689c9088027b7495ffd6f80812c11ecc872 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 22 Sep 2021 08:35:24 +0200 Subject: [PATCH 091/117] nvme: Add connect option 'discovery' Add a connect option 'discovery' to specify that the connection should be made to a discovery controller, not a normal I/O controller. With discovery controllers supporting unique subsystem NQNs we cannot easily distinguish by the subsystem NQN if this should be a discovery connection, but we need this information to blank out options not supported by discovery controllers. Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 7 +++++++ drivers/nvme/host/fabrics.c | 6 +++++- drivers/nvme/host/fabrics.h | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0f0f64db1a91..fd856eec0912 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2737,6 +2737,13 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) else subsys->subtype = NVME_NQN_NVME; + if (nvme_discovery_ctrl(ctrl) && subsys->subtype != NVME_NQN_DISC) { + dev_err(ctrl->device, + "Subsystem %s is not a discovery controller", + subsys->subnqn); + kfree(subsys); + return -EINVAL; + } subsys->awupf = le16_to_cpu(id->awupf); #ifdef CONFIG_NVME_MULTIPATH subsys->iopolicy = NVME_IOPOLICY_NUMA; diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 668c6bb7a567..c5a2b71c5268 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -548,6 +548,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" }, { NVMF_OPT_TOS, "tos=%d" }, { NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" }, + { NVMF_OPT_DISCOVERY, "discovery" }, { NVMF_OPT_ERR, NULL } }; @@ -823,6 +824,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->tos = token; break; + case NVMF_OPT_DISCOVERY: + opts->discovery_nqn = true; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -949,7 +953,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options); #define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \ NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\ - NVMF_OPT_DISABLE_SQFLOW |\ + NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\ NVMF_OPT_FAIL_FAST_TMO) static struct nvme_ctrl * diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a146cb903869..b61b666e10ec 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -67,6 +67,7 @@ enum { NVMF_OPT_TOS = 1 << 19, NVMF_OPT_FAIL_FAST_TMO = 1 << 20, NVMF_OPT_HOST_IFACE = 1 << 21, + NVMF_OPT_DISCOVERY = 1 << 22, }; /** From e5ea42faa773c6a6bb5d9e9f5c2cc808940b5a55 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 22 Sep 2021 08:35:25 +0200 Subject: [PATCH 092/117] nvme: display correct subsystem NQN With discovery controllers supporting unique subsystem NQNs the actual subsystem NQN might be different from that one passed in via the connect args. So add a helper to display the resulting subsystem NQN. Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/fabrics.h | 7 +++++++ drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/host/tcp.c | 2 +- 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd856eec0912..3825b596ca16 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -222,7 +222,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl) { dev_info(ctrl->device, - "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn); + "Removing ctrl: NQN \"%s\"\n", nvmf_ctrl_subsysnqn(ctrl)); flush_work(&ctrl->reset_work); nvme_stop_ctrl(ctrl); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index b61b666e10ec..c3203ff1c654 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -179,6 +179,13 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, return true; } +static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl) +{ + if (!ctrl->subsys) + return ctrl->opts->subsysnqn; + return ctrl->subsys->subnqn; +} + int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 34f7a7c236bf..be9892894849 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3596,7 +3596,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, dev_info(ctrl->ctrl.device, "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", - ctrl->cnum, ctrl->ctrl.opts->subsysnqn); + ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl)); return &ctrl->ctrl; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 027ee57cbdb0..5ba386f00c3f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2393,7 +2393,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, goto out_uninit_ctrl; dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", - ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); mutex_lock(&nvme_rdma_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 9ce3458ee1dd..07156ea9d1a8 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2582,7 +2582,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, goto out_uninit_ctrl; dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", - ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); mutex_lock(&nvme_tcp_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); From 571b5444d1eee112e82eebd0cf877dc510b8b5a5 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 23 Sep 2021 13:17:43 +0300 Subject: [PATCH 093/117] nvmet: use macro definition for setting nmic value This makes the code more readable. Signed-off-by: Max Gurtovoy Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 3616a06f7836..dce53030b375 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -541,7 +541,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) * Our namespace might always be shared. Not just with other * controllers, but also with any other user of the block device. */ - id->nmic = (1 << 0); + id->nmic = NVME_NS_NMIC_SHARED; id->anagrpid = cpu_to_le32(req->ns->anagrpid); memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid)); From d56ae18f063e38eba47550c632519c9fe3f76b19 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 23 Sep 2021 13:17:44 +0300 Subject: [PATCH 094/117] nvmet: use macro definitions for setting cmic value This makes the code more readable. Signed-off-by: Max Gurtovoy Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 3 ++- include/linux/nvme.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index dce53030b375..6e75890bc8d9 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -385,7 +385,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) */ /* we support multiple ports, multiples hosts and ANA: */ - id->cmic = (1 << 0) | (1 << 1) | (1 << 3); + id->cmic = NVME_CTRL_CMIC_MULTI_PORT | NVME_CTRL_CMIC_MULTI_CTRL | + NVME_CTRL_CMIC_ANA; /* Limit MDTS according to transport capability */ if (ctrl->ops->get_mdts) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index ed2428918bca..357482dedb59 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -320,6 +320,7 @@ struct nvme_id_ctrl { }; enum { + NVME_CTRL_CMIC_MULTI_PORT = 1 << 0, NVME_CTRL_CMIC_MULTI_CTRL = 1 << 1, NVME_CTRL_CMIC_ANA = 1 << 3, NVME_CTRL_ONCS_COMPARE = 1 << 0, From 11384580e3322b41dbaa68dbcd949af875b8aa22 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:52:08 -0700 Subject: [PATCH 095/117] nvme-multipath: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Since we now can tell for sure when a disk was added, move setting the bit NVME_NSHEAD_DISK_LIVE only when we did add the disk successfully. Nothing to do here as the cleanup is done elsewhere. We take care and use test_and_set_bit() because it is protects against two nvme paths simultaneously calling device_add_disk() on the same namespace head. Signed-off-by: Luis Chamberlain Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 22c6b64f4d93..bc1e7d8a2c0f 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -509,13 +509,23 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) static void nvme_mpath_set_live(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; + int rc; if (!head->disk) return; + /* + * test_and_set_bit() is used because it is protecting against two nvme + * paths simultaneously calling device_add_disk() on the same namespace + * head. + */ if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { - device_add_disk(&head->subsys->dev, head->disk, - nvme_ns_id_attr_groups); + rc = device_add_disk(&head->subsys->dev, head->disk, + nvme_ns_id_attr_groups); + if (rc) { + clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags); + return; + } nvme_add_ns_head_cdev(head); } From 09748122009aed7bfaa7acc33c10c083a4758322 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 17 Oct 2021 11:58:16 +0300 Subject: [PATCH 096/117] nvme-rdma: fix error code in nvme_rdma_setup_ctrl In case that icdoff is not zero or mandatory keyed sgls are not supported by the NVMe/RDMA target, we'll go to error flow but we'll return 0 to the caller. Fix it by returning an appropriate error code. Fixes: c66e2998c8ca ("nvme-rdma: centralize controller setup sequence") Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 5ba386f00c3f..9317f26e51e0 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1096,11 +1096,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) return ret; if (ctrl->ctrl.icdoff) { + ret = -EOPNOTSUPP; dev_err(ctrl->ctrl.device, "icdoff is not supported!\n"); goto destroy_admin; } if (!(ctrl->ctrl.sgls & (1 << 2))) { + ret = -EOPNOTSUPP; dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not supported!\n"); goto destroy_admin; From 58847f12fe7823c56f844218abcca6920901097d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 14 Oct 2021 09:45:42 -0700 Subject: [PATCH 097/117] nvme-pci: clear shadow doorbell memory on resets The host memory doorbell and event buffers need to be initialized on each reset so the driver doesn't observe stale values from the previous instantiation. Signed-off-by: Keith Busch Tested-by: John Levon Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ed684874842f..6e05cfb4879f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -245,8 +245,15 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) { unsigned int mem_size = nvme_dbbuf_size(dev); - if (dev->dbbuf_dbs) + if (dev->dbbuf_dbs) { + /* + * Clear the dbbuf memory so the driver doesn't observe stale + * values from the previous instantiation. + */ + memset(dev->dbbuf_dbs, 0, mem_size); + memset(dev->dbbuf_eis, 0, mem_size); return 0; + } dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, &dev->dbbuf_dbs_dma_addr, From 2b81a5f015199f3d585ce710190a9e87714d3c1e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 20 Oct 2021 07:59:10 +0200 Subject: [PATCH 098/117] nvme: drop scan_lock and always kick requeue list when removing namespaces When reading the partition table on initial scan hits an I/O error the I/O will hang with the scan_mutex held: [<0>] do_read_cache_page+0x49b/0x790 [<0>] read_part_sector+0x39/0xe0 [<0>] read_lba+0xf9/0x1d0 [<0>] efi_partition+0xf1/0x7f0 [<0>] bdev_disk_changed+0x1ee/0x550 [<0>] blkdev_get_whole+0x81/0x90 [<0>] blkdev_get_by_dev+0x128/0x2e0 [<0>] device_add_disk+0x377/0x3c0 [<0>] nvme_mpath_set_live+0x130/0x1b0 [nvme_core] [<0>] nvme_mpath_add_disk+0x150/0x160 [nvme_core] [<0>] nvme_alloc_ns+0x417/0x950 [nvme_core] [<0>] nvme_validate_or_alloc_ns+0xe9/0x1e0 [nvme_core] [<0>] nvme_scan_work+0x168/0x310 [nvme_core] [<0>] process_one_work+0x231/0x420 and trying to delete the controller will deadlock as it tries to grab the scan mutex: [<0>] nvme_mpath_clear_ctrl_paths+0x25/0x80 [nvme_core] [<0>] nvme_remove_namespaces+0x31/0xf0 [nvme_core] [<0>] nvme_do_delete_ctrl+0x4b/0x80 [nvme_core] As we're now properly ordering the namespace list there is no need to hold the scan_mutex in nvme_mpath_clear_ctrl_paths() anymore. And we always need to kick the requeue list as the path will be marked as unusable and I/O will be requeued _without_ a current path. Signed-off-by: Hannes Reinecke Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index bc1e7d8a2c0f..954e84df6eb7 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -146,13 +146,12 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->scan_lock); down_read(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) - if (nvme_mpath_clear_current_path(ns)) - kblockd_schedule_work(&ns->head->requeue_work); + list_for_each_entry(ns, &ctrl->namespaces, list) { + nvme_mpath_clear_current_path(ns); + kblockd_schedule_work(&ns->head->requeue_work); + } up_read(&ctrl->namespaces_rwsem); - mutex_unlock(&ctrl->scan_lock); } void nvme_mpath_revalidate_paths(struct nvme_ns *ns) From 117d5b6d00ee02f73d7065fe906e2ef1af74bb68 Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sun, 17 Oct 2021 11:56:50 +0200 Subject: [PATCH 099/117] nvmet: use struct_size over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. In this case this is not actually dynamic size: all the operands involved in the calculation are constant values. However it is better to refactor this anyway, just to keep the open-coded math idiom out of code. So, use the struct_size() helper to do the arithmetic instead of the argument "size + count * size" in the kmalloc() function. This code was detected with the help of Coccinelle and audited and fixed manually. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 6e75890bc8d9..403de678fd06 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -278,8 +278,8 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req) u16 status; status = NVME_SC_INTERNAL; - desc = kmalloc(sizeof(struct nvme_ana_group_desc) + - NVMET_MAX_NAMESPACES * sizeof(__le32), GFP_KERNEL); + desc = kmalloc(struct_size(desc, nsids, NVMET_MAX_NAMESPACES), + GFP_KERNEL); if (!desc) goto out; From ff06ed7e815ccd3f80cfd4d5dd30687bc722e871 Mon Sep 17 00:00:00 2001 From: Ye Guojin Date: Thu, 21 Oct 2021 06:49:31 +0000 Subject: [PATCH 100/117] block: aoe: fixup coccinelle warnings coccicheck complains about the use of snprintf() in sysfs show functions: WARNING use scnprintf or sprintf Use sysfs_emit instead of scnprintf or sprintf makes more sense. Reported-by: Zeal Robot Signed-off-by: Ye Guojin Link: https://lore.kernel.org/r/20211021064931.1047687-1-ye.guojin@zte.com.cn Signed-off-by: Jens Axboe --- drivers/block/aoe/aoeblk.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index e436b0e8eff5..52484bcdedb9 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -37,8 +37,7 @@ static ssize_t aoedisk_show_state(struct device *dev, struct gendisk *disk = dev_to_disk(dev); struct aoedev *d = disk->private_data; - return snprintf(page, PAGE_SIZE, - "%s%s\n", + return sysfs_emit(page, "%s%s\n", (d->flags & DEVFL_UP) ? "up" : "down", (d->flags & DEVFL_KICKME) ? ",kickme" : (d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : ""); @@ -52,8 +51,8 @@ static ssize_t aoedisk_show_mac(struct device *dev, struct aoetgt *t = d->targets[0]; if (t == NULL) - return snprintf(page, PAGE_SIZE, "none\n"); - return snprintf(page, PAGE_SIZE, "%pm\n", t->addr); + return sysfs_emit(page, "none\n"); + return sysfs_emit(page, "%pm\n", t->addr); } static ssize_t aoedisk_show_netif(struct device *dev, struct device_attribute *attr, char *page) @@ -85,7 +84,7 @@ static ssize_t aoedisk_show_netif(struct device *dev, ne = nd; nd = nds; if (*nd == NULL) - return snprintf(page, PAGE_SIZE, "none\n"); + return sysfs_emit(page, "none\n"); for (p = page; nd < ne; nd++) p += scnprintf(p, PAGE_SIZE - (p-page), "%s%s", p == page ? "" : ",", (*nd)->name); @@ -99,7 +98,7 @@ static ssize_t aoedisk_show_fwver(struct device *dev, struct gendisk *disk = dev_to_disk(dev); struct aoedev *d = disk->private_data; - return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver); + return sysfs_emit(page, "0x%04x\n", (unsigned int) d->fw_ver); } static ssize_t aoedisk_show_payload(struct device *dev, struct device_attribute *attr, char *page) @@ -107,7 +106,7 @@ static ssize_t aoedisk_show_payload(struct device *dev, struct gendisk *disk = dev_to_disk(dev); struct aoedev *d = disk->private_data; - return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt); + return sysfs_emit(page, "%lu\n", d->maxbcnt); } static int aoedisk_debugfs_show(struct seq_file *s, void *ignored) From e7089f65dd51afeda5eb760506b5950d95f9ec29 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:30:22 -0700 Subject: [PATCH 101/117] dm: add add_disk() error handling We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. There are two calls to dm_setup_md_queue() which can fail then, one on dm_early_create() and we can easily see that the error path there calls dm_destroy in the error path. The other use case is on the ioctl table_load case. If that fails userspace needs to call the DM_DEV_REMOVE_CMD to cleanup the state - similar to any other failure. Reviewed-by: Hannes Reinecke Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211015233028.2167651-4-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/md/dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7870e6460633..79d4ac4aab05 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2078,7 +2078,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) if (r) return r; - add_disk(md->disk); + r = add_disk(md->disk); + if (r) + return r; r = dm_sysfs_init(md); if (r) { From 2961c3bbcaec0ed7fb7b9a465b3796f37f2294e5 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:30:23 -0700 Subject: [PATCH 102/117] bcache: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. This driver doesn't do any unwinding with blk_cleanup_disk() even on errors after add_disk() and so we follow that tradition. Acked-by: Coly Li Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211015233028.2167651-5-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index dc35f6e1d8d3..84a48eed8e24 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1080,7 +1080,9 @@ int bch_cached_dev_run(struct cached_dev *dc) closure_sync(&cl); } - add_disk(d->disk); + ret = add_disk(d->disk); + if (ret) + goto out; bd_link_disk_holder(dc->bdev, dc->disk.disk); /* * won't show up in the uevent file, use udevadm monitor -e instead @@ -1526,10 +1528,11 @@ static void flash_dev_flush(struct closure *cl) static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) { + int err = -ENOMEM; struct bcache_device *d = kzalloc(sizeof(struct bcache_device), GFP_KERNEL); if (!d) - return -ENOMEM; + goto err_ret; closure_init(&d->cl, NULL); set_closure_fn(&d->cl, flash_dev_flush, system_wq); @@ -1543,9 +1546,12 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) bcache_device_attach(d, c, u - c->uuids); bch_sectors_dirty_init(d); bch_flash_dev_request_init(d); - add_disk(d->disk); + err = add_disk(d->disk); + if (err) + goto err; - if (kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache")) + err = kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache"); + if (err) goto err; bcache_device_link(d, c, "volume"); @@ -1559,7 +1565,8 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) return 0; err: kobject_put(&d->kobj); - return -ENOMEM; +err_ret: + return err; } static int flash_devs_run(struct cache_set *c) From 293a7c528803321479593d42d0898bb5a9769db1 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:30:24 -0700 Subject: [PATCH 103/117] xen-blkfront: add error handling support for add_disk() We never checked for errors on device_add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. The function xlvbd_alloc_gendisk() typically does the unwinding on error on allocating the disk and creating the tag, but since all that error handling was stuffed inside xlvbd_alloc_gendisk() we must repeat the tag free'ing as well. We set the info->rq to NULL to ensure blkif_free() doesn't crash on blk_mq_stop_hw_queues() on device_add_disk() error as the queue will be long gone by then. Reviewed-by: Juergen Gross Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211015233028.2167651-6-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index df0deb927760..8e3983e456f3 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2386,7 +2386,13 @@ static void blkfront_connect(struct blkfront_info *info) for_each_rinfo(info, rinfo, i) kick_pending_request_queues(rinfo); - device_add_disk(&info->xbdev->dev, info->gd, NULL); + err = device_add_disk(&info->xbdev->dev, info->gd, NULL); + if (err) { + blk_cleanup_disk(info->gd); + blk_mq_free_tag_set(&info->tag_set); + info->rq = NULL; + goto fail; + } info->is_ready = 1; return; From 21fd880d3da7564bab68979417cab7408e4f9642 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:30:25 -0700 Subject: [PATCH 104/117] m68k/emu/nfblock: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211015233028.2167651-7-mcgrof@kernel.org Signed-off-by: Jens Axboe --- arch/m68k/emu/nfblock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 4ef457ba5220..9c57b245dc12 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -99,6 +99,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) { struct nfhd_device *dev; int dev_id = id - NFHD_DEV_OFFSET; + int err = -ENOMEM; pr_info("nfhd%u: found device with %u blocks (%u bytes)\n", dev_id, blocks, bsize); @@ -129,16 +130,20 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) sprintf(dev->disk->disk_name, "nfhd%u", dev_id); set_capacity(dev->disk, (sector_t)blocks * (bsize / 512)); blk_queue_logical_block_size(dev->disk->queue, bsize); - add_disk(dev->disk); + err = add_disk(dev->disk); + if (err) + goto out_cleanup_disk; list_add_tail(&dev->list, &nfhd_list); return 0; +out_cleanup_disk: + blk_cleanup_disk(dev->disk); free_dev: kfree(dev); out: - return -ENOMEM; + return err; } static int __init nfhd_init(void) From 66638f163a2b5c5b462ca38525129b14a20117eb Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:30:26 -0700 Subject: [PATCH 105/117] um/drivers/ubd_kern: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. ubd_disk_register() never returned an error, so just fix that now and let the caller handle the error condition. Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211015233028.2167651-8-mcgrof@kernel.org Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index fefd343412c7..69d2d0049a61 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -855,8 +855,8 @@ static const struct attribute_group *ubd_attr_groups[] = { NULL, }; -static void ubd_disk_register(int major, u64 size, int unit, - struct gendisk *disk) +static int ubd_disk_register(int major, u64 size, int unit, + struct gendisk *disk) { disk->major = major; disk->first_minor = unit << UBD_SHIFT; @@ -873,7 +873,7 @@ static void ubd_disk_register(int major, u64 size, int unit, disk->private_data = &ubd_devs[unit]; disk->queue = ubd_devs[unit].queue; - device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups); + return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups); } #define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE)) @@ -920,10 +920,15 @@ static int ubd_add(int n, char **error_out) blk_queue_write_cache(ubd_dev->queue, true, false); blk_queue_max_segments(ubd_dev->queue, MAX_SG); blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); - ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk); + err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk); + if (err) + goto out_cleanup_disk; + ubd_gendisk[n] = disk; return 0; +out_cleanup_disk: + blk_cleanup_disk(disk); out_cleanup_tags: blk_mq_free_tag_set(&ubd_dev->tag_set); out: From 2e9e31bea01997450397d64da43b6675e0adb9e3 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:30:27 -0700 Subject: [PATCH 106/117] rnbd: add error handling support for add_disk() We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Acked-by: Jack Wang Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211015233028.2167651-9-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 0ec0191d4196..2df0657cdf00 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1384,8 +1384,10 @@ static void setup_request_queue(struct rnbd_clt_dev *dev) blk_queue_write_cache(dev->queue, dev->wc, dev->fua); } -static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx) +static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx) { + int err; + dev->gd->major = rnbd_client_major; dev->gd->first_minor = idx << RNBD_PART_BITS; dev->gd->minors = 1 << RNBD_PART_BITS; @@ -1410,7 +1412,11 @@ static void rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx) if (!dev->rotational) blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); - add_disk(dev->gd); + err = add_disk(dev->gd); + if (err) + blk_cleanup_disk(dev->gd); + + return err; } static int rnbd_client_setup_device(struct rnbd_clt_dev *dev) @@ -1426,8 +1432,7 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev) rnbd_init_mq_hw_queues(dev); setup_request_queue(dev); - rnbd_clt_setup_gen_disk(dev, idx); - return 0; + return rnbd_clt_setup_gen_disk(dev, idx); } static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, From 83b863f4a3f0de4ece7802d9121fed0c3e64145f Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 15 Oct 2021 16:30:28 -0700 Subject: [PATCH 107/117] mtd: add add_disk() error handling We never checked for errors on add_disk() as this function returned void. Now that this is fixed, use the shiny new error handling. Acked-by: Miquel Raynal Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/r/20211015233028.2167651-10-mcgrof@kernel.org Signed-off-by: Jens Axboe --- drivers/mtd/mtd_blkdevs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index b8ae1ec14e17..4eaba6f4ec68 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -384,7 +384,9 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) if (new->readonly) set_disk_ro(gd, 1); - device_add_disk(&new->mtd->dev, gd, NULL); + ret = device_add_disk(&new->mtd->dev, gd, NULL); + if (ret) + goto out_cleanup_disk; if (new->disk_attributes) { ret = sysfs_create_group(&disk_to_dev(gd)->kobj, @@ -393,6 +395,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) } return 0; +out_cleanup_disk: + blk_cleanup_disk(new->disk); out_free_tag_set: blk_mq_free_tag_set(new->tag_set); out_kfree_tag_set: From 47e9624616c80c9879feda536c48c6a3a0ed9835 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Oct 2021 09:56:39 +0200 Subject: [PATCH 108/117] block: remove support for cryptoloop and the xor transfer Support for cyrptoloop has been officially marked broken and deprecated in favor of dm-crypt (which supports the same broken algorithms if needed) in Linux 2.6.4 (released in March 2004), and support for it has been entirely removed from losetup in util-linux 2.23 (released in April 2013). The XOR transfer has never been more than a toy to demonstrate the transfer in the bad old times of crypto export restrictions. Remove them as they have some nasty interactions with loop device life times due to the iteration over all loop devices in loop_unregister_transfer. Suggested-by: Milan Broz Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211019075639.2333969-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 23 --- drivers/block/Makefile | 1 - drivers/block/cryptoloop.c | 206 -------------------- drivers/block/loop.c | 376 +++---------------------------------- drivers/block/loop.h | 30 --- 5 files changed, 26 insertions(+), 610 deletions(-) delete mode 100644 drivers/block/cryptoloop.c diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ab3e37aa1830..f7f32eeaee63 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -180,14 +180,6 @@ config BLK_DEV_LOOP bits of, say, a sound file). This is also safe if the file resides on a remote file server. - There are several ways of encrypting disks. Some of these require - kernel patches. The vanilla kernel offers the cryptoloop option - and a Device Mapper target (which is superior, as it supports all - file systems). If you want to use the cryptoloop, say Y to both - LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12 - or later) version of util-linux. Additionally, be aware that - the cryptoloop is not safe for storing journaled filesystems. - Note that this loop device has nothing to do with the loopback device used for network connections from the machine to itself. @@ -211,21 +203,6 @@ config BLK_DEV_LOOP_MIN_COUNT is used, it can be set to 0, since needed loop devices can be dynamically allocated with the /dev/loop-control interface. -config BLK_DEV_CRYPTOLOOP - tristate "Cryptoloop Support (DEPRECATED)" - select CRYPTO - select CRYPTO_CBC - depends on BLK_DEV_LOOP - help - Say Y here if you want to be able to use the ciphers that are - provided by the CryptoAPI as loop transformation. This might be - used as hard disk encryption. - - WARNING: This device is not safe for journaled file systems like - ext3 or Reiserfs. Please use the Device Mapper crypto module - instead, which can be configured to be on-disk compatible with the - cryptoloop device. cryptoloop support will be removed in Linux 5.16. - source "drivers/block/drbd/Kconfig" config BLK_DEV_NBD diff --git a/drivers/block/Makefile b/drivers/block/Makefile index bc68817ef496..11a74f17c9ad 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -24,7 +24,6 @@ obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o -obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c deleted file mode 100644 index f0a91faa43a8..000000000000 --- a/drivers/block/cryptoloop.c +++ /dev/null @@ -1,206 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - Linux loop encryption enabling module - - Copyright (C) 2002 Herbert Valerio Riedel - Copyright (C) 2003 Fruhwirth Clemens - - */ - -#include - -#include -#include -#include -#include -#include -#include -#include "loop.h" - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("loop blockdevice transferfunction adaptor / CryptoAPI"); -MODULE_AUTHOR("Herbert Valerio Riedel "); - -#define LOOP_IV_SECTOR_BITS 9 -#define LOOP_IV_SECTOR_SIZE (1 << LOOP_IV_SECTOR_BITS) - -static int -cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info) -{ - int err = -EINVAL; - int cipher_len; - int mode_len; - char cms[LO_NAME_SIZE]; /* cipher-mode string */ - char *mode; - char *cmsp = cms; /* c-m string pointer */ - struct crypto_sync_skcipher *tfm; - - /* encryption breaks for non sector aligned offsets */ - - if (info->lo_offset % LOOP_IV_SECTOR_SIZE) - goto out; - - strncpy(cms, info->lo_crypt_name, LO_NAME_SIZE); - cms[LO_NAME_SIZE - 1] = 0; - - cipher_len = strcspn(cmsp, "-"); - - mode = cmsp + cipher_len; - mode_len = 0; - if (*mode) { - mode++; - mode_len = strcspn(mode, "-"); - } - - if (!mode_len) { - mode = "cbc"; - mode_len = 3; - } - - if (cipher_len + mode_len + 3 > LO_NAME_SIZE) - return -EINVAL; - - memmove(cms, mode, mode_len); - cmsp = cms + mode_len; - *cmsp++ = '('; - memcpy(cmsp, info->lo_crypt_name, cipher_len); - cmsp += cipher_len; - *cmsp++ = ')'; - *cmsp = 0; - - tfm = crypto_alloc_sync_skcipher(cms, 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - err = crypto_sync_skcipher_setkey(tfm, info->lo_encrypt_key, - info->lo_encrypt_key_size); - - if (err != 0) - goto out_free_tfm; - - lo->key_data = tfm; - return 0; - - out_free_tfm: - crypto_free_sync_skcipher(tfm); - - out: - return err; -} - - -typedef int (*encdec_cbc_t)(struct skcipher_request *req); - -static int -cryptoloop_transfer(struct loop_device *lo, int cmd, - struct page *raw_page, unsigned raw_off, - struct page *loop_page, unsigned loop_off, - int size, sector_t IV) -{ - struct crypto_sync_skcipher *tfm = lo->key_data; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); - struct scatterlist sg_out; - struct scatterlist sg_in; - - encdec_cbc_t encdecfunc; - struct page *in_page, *out_page; - unsigned in_offs, out_offs; - int err; - - skcipher_request_set_sync_tfm(req, tfm); - skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, - NULL, NULL); - - sg_init_table(&sg_out, 1); - sg_init_table(&sg_in, 1); - - if (cmd == READ) { - in_page = raw_page; - in_offs = raw_off; - out_page = loop_page; - out_offs = loop_off; - encdecfunc = crypto_skcipher_decrypt; - } else { - in_page = loop_page; - in_offs = loop_off; - out_page = raw_page; - out_offs = raw_off; - encdecfunc = crypto_skcipher_encrypt; - } - - while (size > 0) { - const int sz = min(size, LOOP_IV_SECTOR_SIZE); - u32 iv[4] = { 0, }; - iv[0] = cpu_to_le32(IV & 0xffffffff); - - sg_set_page(&sg_in, in_page, sz, in_offs); - sg_set_page(&sg_out, out_page, sz, out_offs); - - skcipher_request_set_crypt(req, &sg_in, &sg_out, sz, iv); - err = encdecfunc(req); - if (err) - goto out; - - IV++; - size -= sz; - in_offs += sz; - out_offs += sz; - } - - err = 0; - -out: - skcipher_request_zero(req); - return err; -} - -static int -cryptoloop_ioctl(struct loop_device *lo, int cmd, unsigned long arg) -{ - return -EINVAL; -} - -static int -cryptoloop_release(struct loop_device *lo) -{ - struct crypto_sync_skcipher *tfm = lo->key_data; - if (tfm != NULL) { - crypto_free_sync_skcipher(tfm); - lo->key_data = NULL; - return 0; - } - printk(KERN_ERR "cryptoloop_release(): tfm == NULL?\n"); - return -EINVAL; -} - -static struct loop_func_table cryptoloop_funcs = { - .number = LO_CRYPT_CRYPTOAPI, - .init = cryptoloop_init, - .ioctl = cryptoloop_ioctl, - .transfer = cryptoloop_transfer, - .release = cryptoloop_release, - .owner = THIS_MODULE -}; - -static int __init -init_cryptoloop(void) -{ - int rc = loop_register_transfer(&cryptoloop_funcs); - - if (rc) - printk(KERN_ERR "cryptoloop: loop_register_transfer failed\n"); - else - pr_warn("the cryptoloop driver has been deprecated and will be removed in in Linux 5.16\n"); - return rc; -} - -static void __exit -cleanup_cryptoloop(void) -{ - if (loop_unregister_transfer(LO_CRYPT_CRYPTOAPI)) - printk(KERN_ERR - "cryptoloop: loop_unregister_transfer failed\n"); -} - -module_init(init_cryptoloop); -module_exit(cleanup_cryptoloop); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 00ee365ed5e0..302ac8f4f8ac 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -133,58 +133,6 @@ static void loop_global_unlock(struct loop_device *lo, bool global) static int max_part; static int part_shift; -static int transfer_xor(struct loop_device *lo, int cmd, - struct page *raw_page, unsigned raw_off, - struct page *loop_page, unsigned loop_off, - int size, sector_t real_block) -{ - char *raw_buf = kmap_atomic(raw_page) + raw_off; - char *loop_buf = kmap_atomic(loop_page) + loop_off; - char *in, *out, *key; - int i, keysize; - - if (cmd == READ) { - in = raw_buf; - out = loop_buf; - } else { - in = loop_buf; - out = raw_buf; - } - - key = lo->lo_encrypt_key; - keysize = lo->lo_encrypt_key_size; - for (i = 0; i < size; i++) - *out++ = *in++ ^ key[(i & 511) % keysize]; - - kunmap_atomic(loop_buf); - kunmap_atomic(raw_buf); - cond_resched(); - return 0; -} - -static int xor_init(struct loop_device *lo, const struct loop_info64 *info) -{ - if (unlikely(info->lo_encrypt_key_size <= 0)) - return -EINVAL; - return 0; -} - -static struct loop_func_table none_funcs = { - .number = LO_CRYPT_NONE, -}; - -static struct loop_func_table xor_funcs = { - .number = LO_CRYPT_XOR, - .transfer = transfer_xor, - .init = xor_init -}; - -/* xfer_funcs[0] is special - its release function is never called */ -static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { - &none_funcs, - &xor_funcs -}; - static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) { loff_t loopsize; @@ -228,8 +176,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) /* * We support direct I/O only if lo_offset is aligned with the * logical I/O size of backing device, and the logical block - * size of loop is bigger than the backing device's and the loop - * needn't transform transfer. + * size of loop is bigger than the backing device's. * * TODO: the above condition may be loosed in the future, and * direct I/O may be switched runtime at that time because most @@ -238,8 +185,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) if (dio) { if (queue_logical_block_size(lo->lo_queue) >= sb_bsize && !(lo->lo_offset & dio_align) && - mapping->a_ops->direct_IO && - !lo->transfer) + mapping->a_ops->direct_IO) use_dio = true; else use_dio = false; @@ -299,24 +245,6 @@ static void loop_set_size(struct loop_device *lo, loff_t size) kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); } -static inline int -lo_do_transfer(struct loop_device *lo, int cmd, - struct page *rpage, unsigned roffs, - struct page *lpage, unsigned loffs, - int size, sector_t rblock) -{ - int ret; - - ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); - if (likely(!ret)) - return 0; - - printk_ratelimited(KERN_ERR - "loop: Transfer error at byte offset %llu, length %i.\n", - (unsigned long long)rblock << 9, size); - return ret; -} - static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) { struct iov_iter i; @@ -356,41 +284,6 @@ static int lo_write_simple(struct loop_device *lo, struct request *rq, return ret; } -/* - * This is the slow, transforming version that needs to double buffer the - * data as it cannot do the transformations in place without having direct - * access to the destination pages of the backing file. - */ -static int lo_write_transfer(struct loop_device *lo, struct request *rq, - loff_t pos) -{ - struct bio_vec bvec, b; - struct req_iterator iter; - struct page *page; - int ret = 0; - - page = alloc_page(GFP_NOIO); - if (unlikely(!page)) - return -ENOMEM; - - rq_for_each_segment(bvec, rq, iter) { - ret = lo_do_transfer(lo, WRITE, page, 0, bvec.bv_page, - bvec.bv_offset, bvec.bv_len, pos >> 9); - if (unlikely(ret)) - break; - - b.bv_page = page; - b.bv_offset = 0; - b.bv_len = bvec.bv_len; - ret = lo_write_bvec(lo->lo_backing_file, &b, &pos); - if (ret < 0) - break; - } - - __free_page(page); - return ret; -} - static int lo_read_simple(struct loop_device *lo, struct request *rq, loff_t pos) { @@ -420,64 +313,12 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq, return 0; } -static int lo_read_transfer(struct loop_device *lo, struct request *rq, - loff_t pos) -{ - struct bio_vec bvec, b; - struct req_iterator iter; - struct iov_iter i; - struct page *page; - ssize_t len; - int ret = 0; - - page = alloc_page(GFP_NOIO); - if (unlikely(!page)) - return -ENOMEM; - - rq_for_each_segment(bvec, rq, iter) { - loff_t offset = pos; - - b.bv_page = page; - b.bv_offset = 0; - b.bv_len = bvec.bv_len; - - iov_iter_bvec(&i, READ, &b, 1, b.bv_len); - len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); - if (len < 0) { - ret = len; - goto out_free_page; - } - - ret = lo_do_transfer(lo, READ, page, 0, bvec.bv_page, - bvec.bv_offset, len, offset >> 9); - if (ret) - goto out_free_page; - - flush_dcache_page(bvec.bv_page); - - if (len != bvec.bv_len) { - struct bio *bio; - - __rq_for_each_bio(bio, rq) - zero_fill_bio(bio); - break; - } - } - - ret = 0; -out_free_page: - __free_page(page); - return ret; -} - static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, int mode) { /* * We use fallocate to manipulate the space mappings used by the image - * a.k.a. discard/zerorange. However we do not support this if - * encryption is enabled, because it may give an attacker useful - * information. + * a.k.a. discard/zerorange. */ struct file *file = lo->lo_backing_file; struct request_queue *q = lo->lo_queue; @@ -660,16 +501,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) case REQ_OP_DISCARD: return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE); case REQ_OP_WRITE: - if (lo->transfer) - return lo_write_transfer(lo, rq, pos); - else if (cmd->use_aio) + if (cmd->use_aio) return lo_rw_aio(lo, cmd, pos, WRITE); else return lo_write_simple(lo, rq, pos); case REQ_OP_READ: - if (lo->transfer) - return lo_read_transfer(lo, rq, pos); - else if (cmd->use_aio) + if (cmd->use_aio) return lo_rw_aio(lo, cmd, pos, READ); else return lo_read_simple(lo, rq, pos); @@ -934,7 +771,7 @@ static void loop_config_discard(struct loop_device *lo) * not blkdev_issue_discard(). This maintains consistent behavior with * file-backed loop devices: discarded regions read back as zero. */ - if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { + if (S_ISBLK(inode->i_mode)) { struct request_queue *backingq = bdev_get_queue(I_BDEV(inode)); max_discard_sectors = backingq->limits.max_write_zeroes_sectors; @@ -943,11 +780,9 @@ static void loop_config_discard(struct loop_device *lo) /* * We use punch hole to reclaim the free space used by the - * image a.k.a. discard. However we do not support discard if - * encryption is enabled, because it may give an attacker - * useful information. + * image a.k.a. discard. */ - } else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) { + } else if (!file->f_op->fallocate) { max_discard_sectors = 0; granularity = 0; @@ -1084,43 +919,6 @@ static void loop_update_rotational(struct loop_device *lo) blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); } -static int -loop_release_xfer(struct loop_device *lo) -{ - int err = 0; - struct loop_func_table *xfer = lo->lo_encryption; - - if (xfer) { - if (xfer->release) - err = xfer->release(lo); - lo->transfer = NULL; - lo->lo_encryption = NULL; - module_put(xfer->owner); - } - return err; -} - -static int -loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, - const struct loop_info64 *i) -{ - int err = 0; - - if (xfer) { - struct module *owner = xfer->owner; - - if (!try_module_get(owner)) - return -EINVAL; - if (xfer->init) - err = xfer->init(lo, i); - if (err) - module_put(owner); - else - lo->lo_encryption = xfer; - } - return err; -} - /** * loop_set_status_from_info - configure device from loop_info * @lo: struct loop_device to configure @@ -1133,55 +931,27 @@ static int loop_set_status_from_info(struct loop_device *lo, const struct loop_info64 *info) { - int err; - struct loop_func_table *xfer; - kuid_t uid = current_uid(); - if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) return -EINVAL; - err = loop_release_xfer(lo); - if (err) - return err; - - if (info->lo_encrypt_type) { - unsigned int type = info->lo_encrypt_type; - - if (type >= MAX_LO_CRYPT) - return -EINVAL; - xfer = xfer_funcs[type]; - if (xfer == NULL) - return -EINVAL; - } else - xfer = NULL; - - err = loop_init_xfer(lo, xfer, info); - if (err) - return err; + switch (info->lo_encrypt_type) { + case LO_CRYPT_NONE: + break; + case LO_CRYPT_XOR: + pr_warn("support for the xor transformation has been removed.\n"); + return -EINVAL; + case LO_CRYPT_CRYPTOAPI: + pr_warn("support for cryptoloop has been removed. Use dm-crypt instead.\n"); + return -EINVAL; + default: + return -EINVAL; + } lo->lo_offset = info->lo_offset; lo->lo_sizelimit = info->lo_sizelimit; memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); - memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); lo->lo_file_name[LO_NAME_SIZE-1] = 0; - lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; - - if (!xfer) - xfer = &none_funcs; - lo->transfer = xfer->transfer; - lo->ioctl = xfer->ioctl; - lo->lo_flags = info->lo_flags; - - lo->lo_encrypt_key_size = info->lo_encrypt_key_size; - lo->lo_init[0] = info->lo_init[0]; - lo->lo_init[1] = info->lo_init[1]; - if (info->lo_encrypt_key_size) { - memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, - info->lo_encrypt_key_size); - lo->lo_key_owner = uid; - } - return 0; } @@ -1381,16 +1151,9 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) lo->lo_backing_file = NULL; spin_unlock_irq(&lo->lo_lock); - loop_release_xfer(lo); - lo->transfer = NULL; - lo->ioctl = NULL; lo->lo_device = NULL; - lo->lo_encryption = NULL; lo->lo_offset = 0; lo->lo_sizelimit = 0; - lo->lo_encrypt_key_size = 0; - memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); - memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); memset(lo->lo_file_name, 0, LO_NAME_SIZE); blk_queue_logical_block_size(lo->lo_queue, 512); blk_queue_physical_block_size(lo->lo_queue, 512); @@ -1498,7 +1261,6 @@ static int loop_set_status(struct loop_device *lo, const struct loop_info64 *info) { int err; - kuid_t uid = current_uid(); int prev_lo_flags; bool partscan = false; bool size_changed = false; @@ -1506,12 +1268,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) err = mutex_lock_killable(&lo->lo_mutex); if (err) return err; - if (lo->lo_encrypt_key_size && - !uid_eq(lo->lo_key_owner, uid) && - !capable(CAP_SYS_ADMIN)) { - err = -EPERM; - goto out_unlock; - } if (lo->lo_state != Lo_bound) { err = -ENXIO; goto out_unlock; @@ -1597,14 +1353,6 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) info->lo_sizelimit = lo->lo_sizelimit; info->lo_flags = lo->lo_flags; memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); - memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); - info->lo_encrypt_type = - lo->lo_encryption ? lo->lo_encryption->number : 0; - if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { - info->lo_encrypt_key_size = lo->lo_encrypt_key_size; - memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, - lo->lo_encrypt_key_size); - } /* Drop lo_mutex while we call into the filesystem. */ path = lo->lo_backing_file->f_path; @@ -1630,16 +1378,8 @@ loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) info64->lo_rdevice = info->lo_rdevice; info64->lo_offset = info->lo_offset; info64->lo_sizelimit = 0; - info64->lo_encrypt_type = info->lo_encrypt_type; - info64->lo_encrypt_key_size = info->lo_encrypt_key_size; info64->lo_flags = info->lo_flags; - info64->lo_init[0] = info->lo_init[0]; - info64->lo_init[1] = info->lo_init[1]; - if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) - memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); - else - memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); - memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); + memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); } static int @@ -1651,16 +1391,8 @@ loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info) info->lo_inode = info64->lo_inode; info->lo_rdevice = info64->lo_rdevice; info->lo_offset = info64->lo_offset; - info->lo_encrypt_type = info64->lo_encrypt_type; - info->lo_encrypt_key_size = info64->lo_encrypt_key_size; info->lo_flags = info64->lo_flags; - info->lo_init[0] = info64->lo_init[0]; - info->lo_init[1] = info64->lo_init[1]; - if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) - memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); - else - memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); - memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); + memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); /* error in case values were truncated */ if (info->lo_device != info64->lo_device || @@ -1809,7 +1541,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, err = loop_set_block_size(lo, arg); break; default: - err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; + err = -EINVAL; } mutex_unlock(&lo->lo_mutex); return err; @@ -1885,7 +1617,6 @@ struct compat_loop_info { compat_ulong_t lo_inode; /* ioctl r/o */ compat_dev_t lo_rdevice; /* ioctl r/o */ compat_int_t lo_offset; - compat_int_t lo_encrypt_type; compat_int_t lo_encrypt_key_size; /* ioctl w/o */ compat_int_t lo_flags; /* ioctl r/o */ char lo_name[LO_NAME_SIZE]; @@ -1914,16 +1645,8 @@ loop_info64_from_compat(const struct compat_loop_info __user *arg, info64->lo_rdevice = info.lo_rdevice; info64->lo_offset = info.lo_offset; info64->lo_sizelimit = 0; - info64->lo_encrypt_type = info.lo_encrypt_type; - info64->lo_encrypt_key_size = info.lo_encrypt_key_size; info64->lo_flags = info.lo_flags; - info64->lo_init[0] = info.lo_init[0]; - info64->lo_init[1] = info.lo_init[1]; - if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) - memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE); - else - memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); - memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE); + memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); return 0; } @@ -1943,24 +1666,14 @@ loop_info64_to_compat(const struct loop_info64 *info64, info.lo_inode = info64->lo_inode; info.lo_rdevice = info64->lo_rdevice; info.lo_offset = info64->lo_offset; - info.lo_encrypt_type = info64->lo_encrypt_type; - info.lo_encrypt_key_size = info64->lo_encrypt_key_size; info.lo_flags = info64->lo_flags; - info.lo_init[0] = info64->lo_init[0]; - info.lo_init[1] = info64->lo_init[1]; - if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) - memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE); - else - memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); - memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); + memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); /* error in case values were truncated */ if (info.lo_device != info64->lo_device || info.lo_rdevice != info64->lo_rdevice || info.lo_inode != info64->lo_inode || - info.lo_offset != info64->lo_offset || - info.lo_init[0] != info64->lo_init[0] || - info.lo_init[1] != info64->lo_init[1]) + info.lo_offset != info64->lo_offset) return -EOVERFLOW; if (copy_to_user(arg, &info, sizeof(info))) @@ -2101,43 +1814,6 @@ MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); -int loop_register_transfer(struct loop_func_table *funcs) -{ - unsigned int n = funcs->number; - - if (n >= MAX_LO_CRYPT || xfer_funcs[n]) - return -EINVAL; - xfer_funcs[n] = funcs; - return 0; -} - -int loop_unregister_transfer(int number) -{ - unsigned int n = number; - struct loop_func_table *xfer; - - if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) - return -EINVAL; - /* - * This function is called from only cleanup_cryptoloop(). - * Given that each loop device that has a transfer enabled holds a - * reference to the module implementing it we should never get here - * with a transfer that is set (unless forced module unloading is - * requested). Thus, check module's refcount and warn if this is - * not a clean unloading. - */ -#ifdef CONFIG_MODULE_UNLOAD - if (xfer->owner && module_refcount(xfer->owner) != -1) - pr_err("Danger! Unregistering an in use transfer function.\n"); -#endif - - xfer_funcs[n] = NULL; - return 0; -} - -EXPORT_SYMBOL(loop_register_transfer); -EXPORT_SYMBOL(loop_unregister_transfer); - static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 04c88dd6eabd..082d4b6bfc6a 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -32,23 +32,10 @@ struct loop_device { loff_t lo_offset; loff_t lo_sizelimit; int lo_flags; - int (*transfer)(struct loop_device *, int cmd, - struct page *raw_page, unsigned raw_off, - struct page *loop_page, unsigned loop_off, - int size, sector_t real_block); char lo_file_name[LO_NAME_SIZE]; - char lo_crypt_name[LO_NAME_SIZE]; - char lo_encrypt_key[LO_KEY_SIZE]; - int lo_encrypt_key_size; - struct loop_func_table *lo_encryption; - __u32 lo_init[2]; - kuid_t lo_key_owner; /* Who set the key */ - int (*ioctl)(struct loop_device *, int cmd, - unsigned long arg); struct file * lo_backing_file; struct block_device *lo_device; - void *key_data; gfp_t old_gfp_mask; @@ -82,21 +69,4 @@ struct loop_cmd { struct cgroup_subsys_state *memcg_css; }; -/* Support for loadable transfer modules */ -struct loop_func_table { - int number; /* filter type */ - int (*transfer)(struct loop_device *lo, int cmd, - struct page *raw_page, unsigned raw_off, - struct page *loop_page, unsigned loop_off, - int size, sector_t real_block); - int (*init)(struct loop_device *, const struct loop_info64 *); - /* release is called from loop_unregister_transfer or clr_fd */ - int (*release)(struct loop_device *); - int (*ioctl)(struct loop_device *, int cmd, unsigned long arg); - struct module *owner; -}; - -int loop_register_transfer(struct loop_func_table *funcs); -int loop_unregister_transfer(int number); - #endif From d28e4dff085c5a87025c9a0a85fb798bd8e9ca17 Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Sun, 24 Oct 2021 13:20:13 +1300 Subject: [PATCH 109/117] block: ataflop: more blk-mq refactoring fixes As it turns out, my earlier patch in commit 86d46fdaa12a (block: ataflop: fix breakage introduced at blk-mq refactoring) was incomplete. This patch fixes any remaining issues found during more testing and code review. Requests exceeding 4 k are handled in 4k segments but __blk_mq_end_request() is never called on these (still sectors outstanding on the request). With redo_fd_request() removed, there is no provision to kick off processing of the next segment, causing requests exceeding 4k to hang. (By setting /sys/block/fd0/queue/max_sectors_k <= 4 as workaround, this behaviour can be avoided). Instead of reintroducing redo_fd_request(), requeue the remainder of the request by calling blk_mq_requeue_request() on incomplete requests (i.e. when blk_update_request() still returns true), and rely on the block layer to queue the residual as new request. Both error handling and formatting needs to release the ST-DMA lock, so call finish_fdc() on these (this was previously handled by redo_fd_request()). finish_fdc() may be called legitimately without the ST-DMA lock held - make sure we only release the lock if we actually held it. In a similar way, early exit due to errors in ataflop_queue_rq() must release the lock. After minor errors, fd_error sets up to recalibrate the drive but never re-runs the current operation (another task handled by redo_fd_request() before). Call do_fd_action() to get the next steps (seek, retry read/write) underway. Signed-off-by: Michael Schmitz Fixes: 6ec3938cff95f (ataflop: convert to blk-mq) CC: linux-block@vger.kernel.org Link: https://lore.kernel.org/r/20211024002013.9332-1-schmitzmic@gmail.com Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 45 +++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 3d217f1d7440..2622803ef71a 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -458,10 +458,20 @@ static DEFINE_TIMER(fd_timer, check_change); static void fd_end_request_cur(blk_status_t err) { + DPRINT(("fd_end_request_cur(), bytes %d of %d\n", + blk_rq_cur_bytes(fd_request), + blk_rq_bytes(fd_request))); + if (!blk_update_request(fd_request, err, blk_rq_cur_bytes(fd_request))) { + DPRINT(("calling __blk_mq_end_request()\n")); __blk_mq_end_request(fd_request, err); fd_request = NULL; + } else { + /* requeue rest of request */ + DPRINT(("calling blk_mq_requeue_request()\n")); + blk_mq_requeue_request(fd_request, true); + fd_request = NULL; } } @@ -699,12 +709,21 @@ static void fd_error( void ) if (fd_request->error_count >= MAX_ERRORS) { printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); fd_end_request_cur(BLK_STS_IOERR); + finish_fdc(); + return; } else if (fd_request->error_count == RECALIBRATE_ERRORS) { printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive ); if (SelectedDrive != -1) SUD.track = -1; } + /* need to re-run request to recalibrate */ + atari_disable_irq( IRQ_MFP_FDC ); + + setup_req_params( SelectedDrive ); + do_fd_action( SelectedDrive ); + + atari_enable_irq( IRQ_MFP_FDC ); } @@ -731,8 +750,10 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) if (type) { type--; if (type >= NUM_DISK_MINORS || - minor2disktype[type].drive_types > DriveType) + minor2disktype[type].drive_types > DriveType) { + finish_fdc(); return -EINVAL; + } } q = unit[drive].disk[type]->queue; @@ -750,6 +771,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) } if (!UDT || desc->track >= UDT->blocks/UDT->spt/2 || desc->head >= 2) { + finish_fdc(); ret = -EINVAL; goto out; } @@ -790,6 +812,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) wait_for_completion(&format_wait); + finish_fdc(); ret = FormatError ? -EIO : 0; out: blk_mq_unquiesce_queue(q); @@ -824,6 +847,7 @@ static void do_fd_action( int drive ) else { /* all sectors finished */ fd_end_request_cur(BLK_STS_OK); + finish_fdc(); return; } } @@ -1227,8 +1251,8 @@ static void fd_rwsec_done1(int status) } else { /* all sectors finished */ - finish_fdc(); fd_end_request_cur(BLK_STS_OK); + finish_fdc(); } return; @@ -1350,7 +1374,7 @@ static void fd_times_out(struct timer_list *unused) static void finish_fdc( void ) { - if (!NeedSeek) { + if (!NeedSeek || !stdma_is_locked_by(floppy_irq)) { finish_fdc_done( 0 ); } else { @@ -1385,7 +1409,8 @@ static void finish_fdc_done( int dummy ) start_motor_off_timer(); local_irq_save(flags); - stdma_release(); + if (stdma_is_locked_by(floppy_irq)) + stdma_release(); local_irq_restore(flags); DPRINT(("finish_fdc() finished\n")); @@ -1482,7 +1507,9 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, int drive = floppy - unit; int type = floppy->type; - DPRINT(("Queue request: drive %d type %d last %d\n", drive, type, bd->last)); + DPRINT(("Queue request: drive %d type %d sectors %d of %d last %d\n", + drive, type, blk_rq_cur_sectors(bd->rq), + blk_rq_sectors(bd->rq), bd->last)); spin_lock_irq(&ataflop_lock); if (fd_request) { @@ -1504,6 +1531,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, /* drive not connected */ printk(KERN_ERR "Unknown Device: fd%d\n", drive ); fd_end_request_cur(BLK_STS_IOERR); + stdma_release(); goto out; } @@ -1520,11 +1548,13 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, if (--type >= NUM_DISK_MINORS) { printk(KERN_WARNING "fd%d: invalid disk format", drive ); fd_end_request_cur(BLK_STS_IOERR); + stdma_release(); goto out; } if (minor2disktype[type].drive_types > DriveType) { printk(KERN_WARNING "fd%d: unsupported disk format", drive ); fd_end_request_cur(BLK_STS_IOERR); + stdma_release(); goto out; } type = minor2disktype[type].index; @@ -1625,6 +1655,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, /* what if type > 0 here? Overwrite specified entry ? */ if (type) { /* refuse to re-set a predefined type for now */ + finish_fdc(); return -EINVAL; } @@ -1692,8 +1723,10 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, /* sanity check */ if (setprm.track != dtp->blocks/dtp->spt/2 || - setprm.head != 2) + setprm.head != 2) { + finish_fdc(); return -EINVAL; + } UDT = dtp; set_capacity(disk, UDT->blocks); From 785d584c30ffc1224027536fe55bdc15ee509f14 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 18 Oct 2021 17:21:37 +0200 Subject: [PATCH 110/117] nvme: add new discovery log page entry definitions TP8014 adds a new SUBTYPE value and a new field EFLAGS for the discovery log page entry. Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 357482dedb59..855dd9b3e84b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -27,8 +27,14 @@ #define NVME_NSID_ALL 0xffffffff enum nvme_subsys_type { - NVME_NQN_DISC = 1, /* Discovery type target subsystem */ - NVME_NQN_NVME = 2, /* NVME type target subsystem */ + /* Referral to another discovery type target subsystem */ + NVME_NQN_DISC = 1, + + /* NVME type target subsystem */ + NVME_NQN_NVME = 2, + + /* Current discovery type target subsystem */ + NVME_NQN_CURR = 3, }; enum nvme_ctrl_type { @@ -1312,6 +1318,12 @@ struct nvmf_common_command { #define MAX_DISC_LOGS 255 +/* Discovery log page entry flags (EFLAGS): */ +enum { + NVME_DISC_EFLAGS_EPCSD = (1 << 1), + NVME_DISC_EFLAGS_DUPRETINFO = (1 << 0), +}; + /* Discovery log page entry */ struct nvmf_disc_rsp_page_entry { __u8 trtype; @@ -1321,7 +1333,8 @@ struct nvmf_disc_rsp_page_entry { __le16 portid; __le16 cntlid; __le16 asqsz; - __u8 resv8[22]; + __le16 eflags; + __u8 resv10[20]; char trsvcid[NVMF_TRSVCID_SIZE]; __u8 resv64[192]; char subnqn[NVMF_NQN_FIELD_LEN]; From 598e75934c38e2e8af6be92374053e59df8071ad Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 18 Oct 2021 17:21:36 +0200 Subject: [PATCH 111/117] nvmet: switch check for subsystem type Invert the check for discovery subsystem type to allow for additional discovery subsystem types. Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/nvmet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f31dcc4fb1a2..af193423c10b 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -579,7 +579,7 @@ static inline struct nvmet_subsys *nvmet_req_subsys(struct nvmet_req *req) static inline bool nvmet_is_disc_subsys(struct nvmet_subsys *subsys) { - return subsys->type == NVME_NQN_DISC; + return subsys->type != NVME_NQN_NVME; } #ifdef CONFIG_NVME_TARGET_PASSTHRU From 2953b30b1d9feb1bc555682e64e6479d197b9231 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 18 Oct 2021 17:21:38 +0200 Subject: [PATCH 112/117] nvmet: register discovery subsystem as 'current' Register the discovery subsystem as the 'current' discovery subsystem, and add a new discovery log page entry for it. Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 1 + drivers/nvme/target/discovery.c | 17 +++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index d844fb9ef2eb..5119c687de68 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1541,6 +1541,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, subsys->max_qid = NVMET_NR_QUEUES; break; case NVME_NQN_DISC: + case NVME_NQN_CURR: subsys->max_qid = 0; break; default: diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 7b360f8d07e9..c2162eef8ce1 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -146,7 +146,7 @@ static size_t discovery_log_entries(struct nvmet_req *req) struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_subsys_link *p; struct nvmet_port *r; - size_t entries = 0; + size_t entries = 1; list_for_each_entry(p, &req->port->subsystems, entry) { if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) @@ -171,6 +171,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req) u32 numrec = 0; u16 status = 0; void *buffer; + char traddr[NVMF_TRADDR_SIZE]; if (!nvmet_check_transfer_len(req, data_len)) return; @@ -203,15 +204,19 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req) status = NVME_SC_INTERNAL; goto out; } - hdr = buffer; - list_for_each_entry(p, &req->port->subsystems, entry) { - char traddr[NVMF_TRADDR_SIZE]; + nvmet_set_disc_traddr(req, req->port, traddr); + + nvmet_format_discovery_entry(hdr, req->port, + nvmet_disc_subsys->subsysnqn, + traddr, NVME_NQN_CURR, numrec); + numrec++; + + list_for_each_entry(p, &req->port->subsystems, entry) { if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) continue; - nvmet_set_disc_traddr(req, req->port, traddr); nvmet_format_discovery_entry(hdr, req->port, p->subsys->subsysnqn, traddr, NVME_NQN_NVME, numrec); @@ -389,7 +394,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) int __init nvmet_init_discovery(void) { nvmet_disc_subsys = - nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC); + nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_CURR); return PTR_ERR_OR_ZERO(nvmet_disc_subsys); } From d156cfcafbd0eae4224ea007d95ebda467eb0c46 Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sun, 24 Oct 2021 19:29:21 +0200 Subject: [PATCH 113/117] nvmet: use flex_array_size and struct_size In an effort to avoid open-coded arithmetic in the kernel [1], use the flex_array_size() and struct_size() helpers instead of an open-coded calculation. [1] https://github.com/KSPP/linux/issues/160 Signed-off-by: Len Baker Reviewed-by: Sagi Grimberg Reviewed-by: Gustavo A. R. Silva Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- drivers/nvme/target/admin-cmd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 954e84df6eb7..7f2071f2460c 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -562,7 +562,7 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, return -EINVAL; nr_nsids = le32_to_cpu(desc->nnsids); - nsid_buf_size = nr_nsids * sizeof(__le32); + nsid_buf_size = flex_array_size(desc, nsids, nr_nsids); if (WARN_ON_ONCE(desc->grpid == 0)) return -EINVAL; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 403de678fd06..6fb24746de06 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -264,7 +264,7 @@ static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid, desc->chgcnt = cpu_to_le64(nvmet_ana_chgcnt); desc->state = req->port->ana_state[grpid]; memset(desc->rsvd17, 0, sizeof(desc->rsvd17)); - return sizeof(struct nvme_ana_group_desc) + count * sizeof(__le32); + return struct_size(desc, nsids, count); } static void nvmet_execute_get_log_page_ana(struct nvmet_req *req) From cf2197ca4b8c199d188593ca6800ea1827c42171 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 29 Oct 2021 14:09:29 +0800 Subject: [PATCH 114/117] bcache: move uapi header bcache.h to bcache code directory The header file include/uapi/linux/bcache.h is not really a user space API heaer. This file defines the ondisk format of bcache internal meta data but no one includes it from user space, bcache-tools has its own copy of this header with minor modification. Therefore, this patch moves include/uapi/linux/bcache.h to bcache code directory as drivers/md/bcache/bcache_ondisk.h. Suggested-by: Arnd Bergmann Suggested-by: Christoph Hellwig Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211029060930.119923-2-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 2 +- .../uapi/linux/bcache.h => drivers/md/bcache/bcache_ondisk.h | 0 drivers/md/bcache/bset.h | 2 +- drivers/md/bcache/features.c | 2 +- drivers/md/bcache/features.h | 3 ++- 5 files changed, 5 insertions(+), 4 deletions(-) rename include/uapi/linux/bcache.h => drivers/md/bcache/bcache_ondisk.h (100%) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 941685409c68..9ed9c955add7 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -178,7 +178,6 @@ #define pr_fmt(fmt) "bcache: %s() " fmt, __func__ -#include #include #include #include @@ -190,6 +189,7 @@ #include #include +#include "bcache_ondisk.h" #include "bset.h" #include "util.h" #include "closure.h" diff --git a/include/uapi/linux/bcache.h b/drivers/md/bcache/bcache_ondisk.h similarity index 100% rename from include/uapi/linux/bcache.h rename to drivers/md/bcache/bcache_ondisk.h diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index a50dcfda656f..d795c84246b0 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -2,10 +2,10 @@ #ifndef _BCACHE_BSET_H #define _BCACHE_BSET_H -#include #include #include +#include "bcache_ondisk.h" #include "util.h" /* for time_stats */ /* diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c index 6d2b7b84a7b7..634922c5601d 100644 --- a/drivers/md/bcache/features.c +++ b/drivers/md/bcache/features.c @@ -6,7 +6,7 @@ * Copyright 2020 Coly Li * */ -#include +#include "bcache_ondisk.h" #include "bcache.h" #include "features.h" diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index d1c8fd3977fc..09161b89c63e 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -2,10 +2,11 @@ #ifndef _BCACHE_FEATURES_H #define _BCACHE_FEATURES_H -#include #include #include +#include "bcache_ondisk.h" + #define BCH_FEATURE_COMPAT 0 #define BCH_FEATURE_RO_COMPAT 1 #define BCH_FEATURE_INCOMPAT 2 From 1b86db5f4e025840e0bf7cef2b10e84531954386 Mon Sep 17 00:00:00 2001 From: Qing Wang Date: Fri, 29 Oct 2021 14:09:30 +0800 Subject: [PATCH 115/117] bcache: replace snprintf in show functions with sysfs_emit coccicheck complains about the use of snprintf() in sysfs show functions. Fix the following coccicheck warning: drivers/md/bcache/sysfs.h:54:12-20: WARNING: use scnprintf or sprintf. Implement sysfs_print() by sysfs_emit() and remove snprint() since no one uses it any more. Suggested-by: Coly Li Signed-off-by: Qing Wang Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211029060930.119923-3-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/sysfs.h | 18 ++++++++++++++++-- drivers/md/bcache/util.h | 17 ----------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h index 215df32f567b..c1752ba2e05b 100644 --- a/drivers/md/bcache/sysfs.h +++ b/drivers/md/bcache/sysfs.h @@ -51,13 +51,27 @@ STORE(fn) \ #define sysfs_printf(file, fmt, ...) \ do { \ if (attr == &sysfs_ ## file) \ - return snprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__); \ + return sysfs_emit(buf, fmt "\n", __VA_ARGS__); \ } while (0) #define sysfs_print(file, var) \ do { \ if (attr == &sysfs_ ## file) \ - return snprint(buf, PAGE_SIZE, var); \ + return sysfs_emit(buf, \ + __builtin_types_compatible_p(typeof(var), int) \ + ? "%i\n" : \ + __builtin_types_compatible_p(typeof(var), unsigned int) \ + ? "%u\n" : \ + __builtin_types_compatible_p(typeof(var), long) \ + ? "%li\n" : \ + __builtin_types_compatible_p(typeof(var), unsigned long)\ + ? "%lu\n" : \ + __builtin_types_compatible_p(typeof(var), int64_t) \ + ? "%lli\n" : \ + __builtin_types_compatible_p(typeof(var), uint64_t) \ + ? "%llu\n" : \ + __builtin_types_compatible_p(typeof(var), const char *) \ + ? "%s\n" : "%i\n", var); \ } while (0) #define sysfs_hprint(file, val) \ diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 6274d6a17e5e..cdb165517d0b 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -340,23 +340,6 @@ static inline int bch_strtoul_h(const char *cp, long *res) _r; \ }) -#define snprint(buf, size, var) \ - snprintf(buf, size, \ - __builtin_types_compatible_p(typeof(var), int) \ - ? "%i\n" : \ - __builtin_types_compatible_p(typeof(var), unsigned int) \ - ? "%u\n" : \ - __builtin_types_compatible_p(typeof(var), long) \ - ? "%li\n" : \ - __builtin_types_compatible_p(typeof(var), unsigned long)\ - ? "%lu\n" : \ - __builtin_types_compatible_p(typeof(var), int64_t) \ - ? "%lli\n" : \ - __builtin_types_compatible_p(typeof(var), uint64_t) \ - ? "%llu\n" : \ - __builtin_types_compatible_p(typeof(var), const char *) \ - ? "%s\n" : "%i\n", var) - ssize_t bch_hprint(char *buf, int64_t v); bool bch_is_zero(const char *p, size_t n); From df75db1fc1e5608271397de37cab43371bb838d2 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 29 Oct 2021 17:50:29 +0800 Subject: [PATCH 116/117] block: ataflop: Fix warning comparing pointer to 0 Fix the following coccicheck warning: ./drivers/block/ataflop.c:1464:20-21: WARNING comparing pointer to 0. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Link: https://lore.kernel.org/r/1635501029-81391-1-git-send-email-jiapeng.chong@linux.alibaba.com Signed-off-by: Jens Axboe --- drivers/block/ataflop.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 2622803ef71a..d14bdc3589b2 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1460,8 +1460,7 @@ static int floppy_revalidate(struct gendisk *disk) unsigned int drive = p - unit; if (test_bit(drive, &changed_floppies) || - test_bit(drive, &fake_change) || - p->disktype == 0) { + test_bit(drive, &fake_change) || !p->disktype) { if (UD.flags & FTD_MSG) printk(KERN_ERR "floppy: clear format %p!\n", UDT); BufferDrive = -1; From 15dfc662ef31a20b59097d59b0792b06770255fa Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 29 Oct 2021 19:39:26 +0900 Subject: [PATCH 117/117] null_blk: Fix handling of submit_queues and poll_queues attributes Commit 0a593fbbc245 ("null_blk: poll queue support") introduced the poll queue feature to null_blk. After this change, null_blk device has both submit queues and poll queues, and null_map_queues() callback maps the both queues for corresponding hardware contexts. The commit also added the device configuration attribute 'poll_queues' in same manner as the existing attribute 'submit_queues'. These attributes allow to modify the numbers of queues. However, when the new values are stored to these attributes, the values are just handled only for the corresponding queue. When number of submit_queue is updated, number of poll_queue is not counted, or vice versa. This caused inconsistent number of queues and queue mapping and resulted in null-ptr-dereference. This failure was observed in blktests block/029 and block/030. To avoid the inconsistency, fix the attribute updates to care both submit_queues and poll_queues. Introduce the helper function nullb_update_nr_hw_queues() to handle stores to the both two attributes. Add poll_queues field to the struct nullb_device to track the number in same manner as submit_queues. Add two more fields prev_submit_queues and prev_poll_queues to keep the previous values before change. In case the block layer failed to update the nr_hw_queues, refer the previous values in null_map_queues() to map queues in same manner as before change. Also add poll_queues value checks in nullb_update_nr_hw_queues() and null_validate_conf(). They ensure the poll_queues value of each device is within the range from 1 to module parameter value of poll_queues. Fixes: 0a593fbbc245 ("null_blk: poll queue support") Reported-by: Yi Zhang Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20211029103926.845635-1-shinichiro.kawasaki@wdc.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 106 ++++++++++++++++++++++++------ drivers/block/null_blk/null_blk.h | 2 + 2 files changed, 89 insertions(+), 19 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index f4af95c2f9a9..323af5c9c802 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -328,30 +328,69 @@ nullb_device_##NAME##_store(struct config_item *item, const char *page, \ } \ CONFIGFS_ATTR(nullb_device_, NAME); -static int nullb_apply_submit_queues(struct nullb_device *dev, - unsigned int submit_queues) -{ - struct nullb *nullb = dev->nullb; - struct blk_mq_tag_set *set; +static int nullb_update_nr_hw_queues(struct nullb_device *dev, + unsigned int submit_queues, + unsigned int poll_queues) - if (!nullb) +{ + struct blk_mq_tag_set *set; + int ret, nr_hw_queues; + + if (!dev->nullb) return 0; + /* + * Make sure at least one queue exists for each of submit and poll. + */ + if (!submit_queues || !poll_queues) + return -EINVAL; + /* * Make sure that null_init_hctx() does not access nullb->queues[] past * the end of that array. */ - if (submit_queues > nr_cpu_ids) + if (submit_queues > nr_cpu_ids || poll_queues > g_poll_queues) return -EINVAL; - set = nullb->tag_set; - blk_mq_update_nr_hw_queues(set, submit_queues); - return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM; + + /* + * Keep previous and new queue numbers in nullb_device for reference in + * the call back function null_map_queues(). + */ + dev->prev_submit_queues = dev->submit_queues; + dev->prev_poll_queues = dev->poll_queues; + dev->submit_queues = submit_queues; + dev->poll_queues = poll_queues; + + set = dev->nullb->tag_set; + nr_hw_queues = submit_queues + poll_queues; + blk_mq_update_nr_hw_queues(set, nr_hw_queues); + ret = set->nr_hw_queues == nr_hw_queues ? 0 : -ENOMEM; + + if (ret) { + /* on error, revert the queue numbers */ + dev->submit_queues = dev->prev_submit_queues; + dev->poll_queues = dev->prev_poll_queues; + } + + return ret; +} + +static int nullb_apply_submit_queues(struct nullb_device *dev, + unsigned int submit_queues) +{ + return nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues); +} + +static int nullb_apply_poll_queues(struct nullb_device *dev, + unsigned int poll_queues) +{ + return nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues); } NULLB_DEVICE_ATTR(size, ulong, NULL); NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL); NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues); -NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_submit_queues); +NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_poll_queues); NULLB_DEVICE_ATTR(home_node, uint, NULL); NULLB_DEVICE_ATTR(queue_mode, uint, NULL); NULLB_DEVICE_ATTR(blocksize, uint, NULL); @@ -599,7 +638,9 @@ static struct nullb_device *null_alloc_dev(void) dev->size = g_gb * 1024; dev->completion_nsec = g_completion_nsec; dev->submit_queues = g_submit_queues; + dev->prev_submit_queues = g_submit_queues; dev->poll_queues = g_poll_queues; + dev->prev_poll_queues = g_poll_queues; dev->home_node = g_home_node; dev->queue_mode = g_queue_mode; dev->blocksize = g_bs; @@ -1465,25 +1506,45 @@ static int null_map_queues(struct blk_mq_tag_set *set) { struct nullb *nullb = set->driver_data; int i, qoff; + unsigned int submit_queues = g_submit_queues; + unsigned int poll_queues = g_poll_queues; + + if (nullb) { + struct nullb_device *dev = nullb->dev; + + /* + * Refer nr_hw_queues of the tag set to check if the expected + * number of hardware queues are prepared. If block layer failed + * to prepare them, use previous numbers of submit queues and + * poll queues to map queues. + */ + if (set->nr_hw_queues == + dev->submit_queues + dev->poll_queues) { + submit_queues = dev->submit_queues; + poll_queues = dev->poll_queues; + } else if (set->nr_hw_queues == + dev->prev_submit_queues + dev->prev_poll_queues) { + submit_queues = dev->prev_submit_queues; + poll_queues = dev->prev_poll_queues; + } else { + pr_warn("tag set has unexpected nr_hw_queues: %d\n", + set->nr_hw_queues); + return -EINVAL; + } + } for (i = 0, qoff = 0; i < set->nr_maps; i++) { struct blk_mq_queue_map *map = &set->map[i]; switch (i) { case HCTX_TYPE_DEFAULT: - if (nullb) - map->nr_queues = nullb->dev->submit_queues; - else - map->nr_queues = g_submit_queues; + map->nr_queues = submit_queues; break; case HCTX_TYPE_READ: map->nr_queues = 0; continue; case HCTX_TYPE_POLL: - if (nullb) - map->nr_queues = nullb->dev->poll_queues; - else - map->nr_queues = g_poll_queues; + map->nr_queues = poll_queues; break; } map->queue_offset = qoff; @@ -1853,6 +1914,13 @@ static int null_validate_conf(struct nullb_device *dev) dev->submit_queues = nr_cpu_ids; else if (dev->submit_queues == 0) dev->submit_queues = 1; + dev->prev_submit_queues = dev->submit_queues; + + if (dev->poll_queues > g_poll_queues) + dev->poll_queues = g_poll_queues; + else if (dev->poll_queues == 0) + dev->poll_queues = 1; + dev->prev_poll_queues = dev->poll_queues; dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER); diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 90c3eefb3ca3..78eb56b0ca55 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -86,7 +86,9 @@ struct nullb_device { unsigned int zone_max_open; /* max number of open zones */ unsigned int zone_max_active; /* max number of active zones */ unsigned int submit_queues; /* number of submission queues */ + unsigned int prev_submit_queues; /* number of submission queues before change */ unsigned int poll_queues; /* number of IOPOLL submission queues */ + unsigned int prev_poll_queues; /* number of IOPOLL submission queues before change */ unsigned int home_node; /* home node for the device */ unsigned int queue_mode; /* block interface */ unsigned int blocksize; /* block size */