From 9f365cb8bbd0162963d6852651d7c9e30adcb7b5 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 14 May 2024 13:47:23 -0700 Subject: [PATCH 1/8] scsi: mpi3mr: Use proper format specifier in mpi3mr_sas_port_add() When building for a 32-bit platform such as ARM or i386, for which size_t is unsigned int, there is a warning due to using an unsigned long format specifier: drivers/scsi/mpi3mr/mpi3mr_transport.c:1370:11: error: format specifies type 'unsigned long' but the argument has type 'unsigned int' [-Werror,-Wformat] 1369 | ioc_warn(mrioc, "skipping port %u, max allowed value is %lu\n", | ~~~ | %u 1370 | i, sizeof(mr_sas_port->phy_mask) * 8); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use the proper format specifier for size_t, %zu, to resolve the warning for all platforms. Fixes: 3668651def2c ("scsi: mpi3mr: Sanitise num_phys") Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240514-mpi3mr-fix-wformat-v1-1-f1ad49217e5e@kernel.org Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c index 7ca9a7c2709c..5d261c2f2d20 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_transport.c +++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c @@ -1366,7 +1366,7 @@ static struct mpi3mr_sas_port *mpi3mr_sas_port_add(struct mpi3mr_ioc *mrioc, continue; if (i > sizeof(mr_sas_port->phy_mask) * 8) { - ioc_warn(mrioc, "skipping port %u, max allowed value is %lu\n", + ioc_warn(mrioc, "skipping port %u, max allowed value is %zu\n", i, sizeof(mr_sas_port->phy_mask) * 8); goto out_fail; } From 10157b1fc1a762293381e9145041253420dfc6ad Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Tue, 14 May 2024 16:03:44 +0200 Subject: [PATCH 2/8] scsi: core: alua: I/O errors for ALUA state transitions When a host is configured with a few LUNs and I/O is running, injecting FC faults repeatedly leads to path recovery problems. The LUNs have 4 paths each and 3 of them come back active after say an FC fault which makes 2 of the paths go down, instead of all 4. This happens after several iterations of continuous FC faults. Reason here is that we're returning an I/O error whenever we're encountering sense code 06/04/0a (LOGICAL UNIT NOT ACCESSIBLE, ASYMMETRIC ACCESS STATE TRANSITION) instead of retrying. [mwilck: The original patch was developed by Rajashekhar M A and Hannes Reinecke. I moved the code to alua_check_sense() as suggested by Mike Christie [1]. Evan Milne had raised the question whether pg->state should be set to transitioning in the UA case [2]. I believe that doing this is correct. SCSI_ACCESS_STATE_TRANSITIONING by itself doesn't cause I/O errors. Our handler schedules an RTPG, which will only result in an I/O error condition if the transitioning timeout expires.] [1] https://lore.kernel.org/all/0bc96e82-fdda-4187-148d-5b34f81d4942@oracle.com/ [2] https://lore.kernel.org/all/CAGtn9r=kicnTDE2o7Gt5Y=yoidHYD7tG8XdMHEBJTBraVEoOCw@mail.gmail.com/ Co-developed-by: Rajashekhar M A Co-developed-by: Hannes Reinecke Signed-off-by: Hannes Reinecke Signed-off-by: Martin Wilck Link: https://lore.kernel.org/r/20240514140344.19538-1-mwilck@suse.com Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 31 +++++++++++++++------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index a226dc1b65d7..4eb0837298d4 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -414,28 +414,40 @@ static char print_alua_state(unsigned char state) } } -static enum scsi_disposition alua_check_sense(struct scsi_device *sdev, - struct scsi_sense_hdr *sense_hdr) +static void alua_handle_state_transition(struct scsi_device *sdev) { struct alua_dh_data *h = sdev->handler_data; struct alua_port_group *pg; + rcu_read_lock(); + pg = rcu_dereference(h->pg); + if (pg) + pg->state = SCSI_ACCESS_STATE_TRANSITIONING; + rcu_read_unlock(); + alua_check(sdev, false); +} + +static enum scsi_disposition alua_check_sense(struct scsi_device *sdev, + struct scsi_sense_hdr *sense_hdr) +{ switch (sense_hdr->sense_key) { case NOT_READY: if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { /* * LUN Not Accessible - ALUA state transition */ - rcu_read_lock(); - pg = rcu_dereference(h->pg); - if (pg) - pg->state = SCSI_ACCESS_STATE_TRANSITIONING; - rcu_read_unlock(); - alua_check(sdev, false); + alua_handle_state_transition(sdev); return NEEDS_RETRY; } break; case UNIT_ATTENTION: + if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { + /* + * LUN Not Accessible - ALUA state transition + */ + alua_handle_state_transition(sdev); + return NEEDS_RETRY; + } if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { /* * Power On, Reset, or Bus Device Reset. @@ -502,7 +514,8 @@ static int alua_tur(struct scsi_device *sdev) retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, ALUA_FAILOVER_RETRIES, &sense_hdr); - if (sense_hdr.sense_key == NOT_READY && + if ((sense_hdr.sense_key == NOT_READY || + sense_hdr.sense_key == UNIT_ATTENTION) && sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) return SCSI_DH_RETRY; else if (retval) From 9fad9d560af5c654bb38e0b07ee54a4e9acdc5cd Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Wed, 8 May 2024 17:22:51 +0000 Subject: [PATCH 3/8] scsi: sr: Fix unintentional arithmetic wraparound Running syzkaller with the newly reintroduced signed integer overflow sanitizer produces this report: [ 65.194362] ------------[ cut here ]------------ [ 65.197752] UBSAN: signed-integer-overflow in ../drivers/scsi/sr_ioctl.c:436:9 [ 65.203607] -2147483648 * 177 cannot be represented in type 'int' [ 65.207911] CPU: 2 PID: 10416 Comm: syz-executor.1 Not tainted 6.8.0-rc2-00035-gb3ef86b5a957 #1 [ 65.213585] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 65.219923] Call Trace: [ 65.221556] [ 65.223029] dump_stack_lvl+0x93/0xd0 [ 65.225573] handle_overflow+0x171/0x1b0 [ 65.228219] sr_select_speed+0xeb/0xf0 [ 65.230786] ? __pm_runtime_resume+0xe6/0x130 [ 65.233606] sr_block_ioctl+0x15d/0x1d0 ... Historically, the signed integer overflow sanitizer did not work in the kernel due to its interaction with `-fwrapv` but this has since been changed [1] in the newest version of Clang. It was re-enabled in the kernel with Commit 557f8c582a9b ("ubsan: Reintroduce signed overflow sanitizer"). Firstly, let's change the type of "speed" to unsigned long as sr_select_speed()'s only caller passes in an unsigned long anyways. $ git grep '\.select_speed' | drivers/scsi/sr.c: .select_speed = sr_select_speed, ... | static int cdrom_ioctl_select_speed(struct cdrom_device_info *cdi, | unsigned long arg) | { | ... | return cdi->ops->select_speed(cdi, arg); | } Next, let's add an extra check to make sure we don't exceed 0xffff/177 (350) since 0xffff is the max speed. This has two benefits: 1) we deal with integer overflow before it happens and 2) we properly respect the max speed of 0xffff. There are some "magic" numbers here but I did not want to change more than what was necessary. Link: https://github.com/llvm/llvm-project/pull/82432 [1] Closes: https://github.com/KSPP/linux/issues/357 Cc: linux-hardening@vger.kernel.org Signed-off-by: Justin Stitt Link: https://lore.kernel.org/r/20240508-b4-b4-sio-sr_select_speed-v2-1-00b68f724290@google.com Reviewed-by: Kees Cook Signed-off-by: Martin K. Petersen --- Documentation/cdrom/cdrom-standard.rst | 4 ++-- drivers/scsi/sr.h | 2 +- drivers/scsi/sr_ioctl.c | 5 ++++- include/linux/cdrom.h | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst index 7964fe134277..6c1303cff159 100644 --- a/Documentation/cdrom/cdrom-standard.rst +++ b/Documentation/cdrom/cdrom-standard.rst @@ -217,7 +217,7 @@ current *struct* is:: int (*media_changed)(struct cdrom_device_info *, int); int (*tray_move)(struct cdrom_device_info *, int); int (*lock_door)(struct cdrom_device_info *, int); - int (*select_speed)(struct cdrom_device_info *, int); + int (*select_speed)(struct cdrom_device_info *, unsigned long); int (*get_last_session) (struct cdrom_device_info *, struct cdrom_multisession *); int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *); @@ -396,7 +396,7 @@ action need be taken, and the return value should be 0. :: - int select_speed(struct cdrom_device_info *cdi, int speed) + int select_speed(struct cdrom_device_info *cdi, unsigned long speed) Some CD-ROM drives are capable of changing their head-speed. There are several reasons for changing the speed of a CD-ROM drive. Badly diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h index 1175f2e213b5..dc899277b3a4 100644 --- a/drivers/scsi/sr.h +++ b/drivers/scsi/sr.h @@ -65,7 +65,7 @@ int sr_disk_status(struct cdrom_device_info *); int sr_get_last_session(struct cdrom_device_info *, struct cdrom_multisession *); int sr_get_mcn(struct cdrom_device_info *, struct cdrom_mcn *); int sr_reset(struct cdrom_device_info *); -int sr_select_speed(struct cdrom_device_info *cdi, int speed); +int sr_select_speed(struct cdrom_device_info *cdi, unsigned long speed); int sr_audio_ioctl(struct cdrom_device_info *, unsigned int, void *); int sr_is_xa(Scsi_CD *); diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index 5b0b35e60e61..a0d2556a27bb 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -425,11 +425,14 @@ int sr_reset(struct cdrom_device_info *cdi) return 0; } -int sr_select_speed(struct cdrom_device_info *cdi, int speed) +int sr_select_speed(struct cdrom_device_info *cdi, unsigned long speed) { Scsi_CD *cd = cdi->handle; struct packet_command cgc; + /* avoid exceeding the max speed or overflowing integer bounds */ + speed = clamp(0, speed, 0xffff / 177); + if (speed == 0) speed = 0xffff; /* set to max */ else diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 98c6fd0b39b6..fdfb61ccf55a 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -77,7 +77,7 @@ struct cdrom_device_ops { unsigned int clearing, int slot); int (*tray_move) (struct cdrom_device_info *, int); int (*lock_door) (struct cdrom_device_info *, int); - int (*select_speed) (struct cdrom_device_info *, int); + int (*select_speed) (struct cdrom_device_info *, unsigned long); int (*get_last_session) (struct cdrom_device_info *, struct cdrom_multisession *); int (*get_mcn) (struct cdrom_device_info *, From 51071f0831ea975fc045526dd7e17efe669dc6e1 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Wed, 15 May 2024 14:40:59 +0530 Subject: [PATCH 4/8] scsi: qedf: Don't process stag work during unload and recovery Stag work can cause issues during unload and recovery, hence don't process it. Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240515091101.18754-2-skashyap@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index fd12439cbaab..c457caae07b3 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -3997,6 +3997,22 @@ void qedf_stag_change_work(struct work_struct *work) struct qedf_ctx *qedf = container_of(work, struct qedf_ctx, stag_work.work); + if (!qedf) { + QEDF_ERR(&qedf->dbg_ctx, "qedf is NULL"); + return; + } + + if (test_bit(QEDF_IN_RECOVERY, &qedf->flags)) { + QEDF_ERR(&qedf->dbg_ctx, + "Already is in recovery, hence not calling software context reset.\n"); + return; + } + + if (test_bit(QEDF_UNLOADING, &qedf->flags)) { + QEDF_ERR(&qedf->dbg_ctx, "Driver unloading\n"); + return; + } + printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.", dev_name(&qedf->pdev->dev), __func__, __LINE__, qedf->dbg_ctx.host_no); From 78e88472b60936025b83eba57cffa59d3501dc07 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Wed, 15 May 2024 14:41:00 +0530 Subject: [PATCH 5/8] scsi: qedf: Wait for stag work during unload If stag work is already scheduled and unload is called, it can lead to issues as unload cleans up the work element. Wait for stag work to get completed before cleanup during unload. Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240515091101.18754-3-skashyap@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf.h | 1 + drivers/scsi/qedf/qedf_main.c | 30 +++++++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h index 5058e01b65a2..98afdfe63600 100644 --- a/drivers/scsi/qedf/qedf.h +++ b/drivers/scsi/qedf/qedf.h @@ -363,6 +363,7 @@ struct qedf_ctx { #define QEDF_IN_RECOVERY 5 #define QEDF_DBG_STOP_IO 6 #define QEDF_PROBING 8 +#define QEDF_STAG_IN_PROGRESS 9 unsigned long flags; /* Miscellaneous state flags */ int fipvlan_retries; u8 num_queues; diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index c457caae07b3..eca0190d9a82 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -318,11 +318,18 @@ static struct fc_seq *qedf_elsct_send(struct fc_lport *lport, u32 did, */ if (resp == fc_lport_flogi_resp) { qedf->flogi_cnt++; + qedf->flogi_pending++; + + if (test_bit(QEDF_UNLOADING, &qedf->flags)) { + QEDF_ERR(&qedf->dbg_ctx, "Driver unloading\n"); + qedf->flogi_pending = 0; + } + if (qedf->flogi_pending >= QEDF_FLOGI_RETRY_CNT) { schedule_delayed_work(&qedf->stag_work, 2); return NULL; } - qedf->flogi_pending++; + return fc_elsct_send(lport, did, fp, op, qedf_flogi_resp, arg, timeout); } @@ -912,13 +919,14 @@ void qedf_ctx_soft_reset(struct fc_lport *lport) struct qedf_ctx *qedf; struct qed_link_output if_link; + qedf = lport_priv(lport); + if (lport->vport) { + clear_bit(QEDF_STAG_IN_PROGRESS, &qedf->flags); printk_ratelimited("Cannot issue host reset on NPIV port.\n"); return; } - qedf = lport_priv(lport); - qedf->flogi_pending = 0; /* For host reset, essentially do a soft link up/down */ atomic_set(&qedf->link_state, QEDF_LINK_DOWN); @@ -938,6 +946,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport) if (!if_link.link_up) { QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC, "Physical link is not up.\n"); + clear_bit(QEDF_STAG_IN_PROGRESS, &qedf->flags); return; } /* Flush and wait to make sure link down is processed */ @@ -950,6 +959,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport) "Queue link up work.\n"); queue_delayed_work(qedf->link_update_wq, &qedf->link_update, 0); + clear_bit(QEDF_STAG_IN_PROGRESS, &qedf->flags); } /* Reset the host by gracefully logging out and then logging back in */ @@ -3721,6 +3731,7 @@ static void __qedf_remove(struct pci_dev *pdev, int mode) { struct qedf_ctx *qedf; int rc; + int cnt = 0; if (!pdev) { QEDF_ERR(NULL, "pdev is NULL.\n"); @@ -3738,6 +3749,17 @@ static void __qedf_remove(struct pci_dev *pdev, int mode) return; } +stag_in_prog: + if (test_bit(QEDF_STAG_IN_PROGRESS, &qedf->flags)) { + QEDF_ERR(&qedf->dbg_ctx, "Stag in progress, cnt=%d.\n", cnt); + cnt++; + + if (cnt < 5) { + msleep(500); + goto stag_in_prog; + } + } + if (mode != QEDF_MODE_RECOVERY) set_bit(QEDF_UNLOADING, &qedf->flags); @@ -4013,6 +4035,8 @@ void qedf_stag_change_work(struct work_struct *work) return; } + set_bit(QEDF_STAG_IN_PROGRESS, &qedf->flags); + printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.", dev_name(&qedf->pdev->dev), __func__, __LINE__, qedf->dbg_ctx.host_no); From 6c3bb589debd763dc4b94803ddf3c13b4fcca776 Mon Sep 17 00:00:00 2001 From: Saurav Kashyap Date: Wed, 15 May 2024 14:41:01 +0530 Subject: [PATCH 6/8] scsi: qedf: Set qed_slowpath_params to zero before use Zero qed_slowpath_params before use. Signed-off-by: Saurav Kashyap Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20240515091101.18754-4-skashyap@marvell.com Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index eca0190d9a82..49adddf978cc 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -3473,6 +3473,7 @@ retry_probe: } /* Start the Slowpath-process */ + memset(&slowpath_params, 0, sizeof(struct qed_slowpath_params)); slowpath_params.int_mode = QED_INT_MODE_MSIX; slowpath_params.drv_major = QEDF_DRIVER_MAJOR_VER; slowpath_params.drv_minor = QEDF_DRIVER_MINOR_VER; From e4f5f8298cf6ddae43210d236ad65ac2c6379559 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Mon, 13 May 2024 07:59:56 -0400 Subject: [PATCH 7/8] scsi: mpt3sas: Add missing kerneldoc parameter descriptions Add missing kerneldoc parameter descriptions to _scsih_set_debug_level(). Signed-off-by: Deming Wang Link: https://lore.kernel.org/r/20240513115956.1576-1-wangdeming@inspur.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 89ef43a5ef86..12d08d8ba538 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -302,8 +302,8 @@ struct _scsi_io_transfer { /** * _scsih_set_debug_level - global setting of ioc->logging_level. - * @val: ? - * @kp: ? + * @val: value of the parameter to be set + * @kp: pointer to kernel_param structure * * Note: The logging levels are defined in mpt3sas_debug.h. */ From d09c05aa35909adb7d29f92f0cd79fdcd1338ef0 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 20 May 2024 22:30:40 -0400 Subject: [PATCH 8/8] scsi: core: Handle devices which return an unusually large VPD page count Peter Schneider reported that a system would no longer boot after updating to 6.8.4. Peter bisected the issue and identified commit b5fc07a5fb56 ("scsi: core: Consult supported VPD page list prior to fetching page") as being the culprit. Turns out the enclosure device in Peter's system reports a byteswapped page length for VPD page 0. It reports "02 00" as page length instead of "00 02". This causes us to attempt to access 516 bytes (page length + header) of information despite only 2 pages being present. Limit the page search scope to the size of our VPD buffer to guard against devices returning a larger page count than requested. Link: https://lore.kernel.org/r/20240521023040.2703884-1-martin.petersen@oracle.com Fixes: b5fc07a5fb56 ("scsi: core: Consult supported VPD page list prior to fetching page") Cc: stable@vger.kernel.org Reported-by: Peter Schneider Closes: https://lore.kernel.org/all/eec6ebbf-061b-4a7b-96dc-ea748aa4d035@googlemail.com/ Tested-by: Peter Schneider Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 3e0c0381277a..f0464db3f9de 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -350,6 +350,13 @@ static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) if (result < SCSI_VPD_HEADER_SIZE) return 0; + if (result > sizeof(vpd)) { + dev_warn_once(&sdev->sdev_gendev, + "%s: long VPD page 0 length: %d bytes\n", + __func__, result); + result = sizeof(vpd); + } + result -= SCSI_VPD_HEADER_SIZE; if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result)) return 0;