From 62fa3ce05d5d73c5eccc40b2db493f55fecfc446 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 26 Oct 2022 13:13:56 -0500 Subject: [PATCH 1/3] scsi: ibmvfc: Avoid path failures during live migration Fix an issue reported when performing a live migration when multipath is configured with a short fast fail timeout of 5 seconds and also to have no_path_retry set to fail. In this scenario, all paths would go into the devloss state while the ibmvfc driver went through discovery to log back in. On a loaded system, the discovery might take longer than 5 seconds, which was resulting in all paths being marked failed, which then resulted in a read only filesystem. This patch changes the migration code in ibmvfc to avoid deleting rports at all in this scenario, so we avoid losing all paths. Signed-off-by: Brian King Link: https://lore.kernel.org/r/20221026181356.148517-1-brking@linux.vnet.ibm.com Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 00684e11976b..1a0c0b7289d2 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -708,8 +708,13 @@ static void ibmvfc_init_host(struct ibmvfc_host *vhost) memset(vhost->async_crq.msgs.async, 0, PAGE_SIZE); vhost->async_crq.cur = 0; - list_for_each_entry(tgt, &vhost->targets, queue) - ibmvfc_del_tgt(tgt); + list_for_each_entry(tgt, &vhost->targets, queue) { + if (vhost->client_migrated) + tgt->need_login = 1; + else + ibmvfc_del_tgt(tgt); + } + scsi_block_requests(vhost->host); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT); vhost->job_step = ibmvfc_npiv_login; @@ -3235,9 +3240,12 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost, /* We need to re-setup the interpartition connection */ dev_info(vhost->dev, "Partition migrated, Re-enabling adapter\n"); vhost->client_migrated = 1; + + scsi_block_requests(vhost->host); ibmvfc_purge_requests(vhost, DID_REQUEUE); - ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN); + ibmvfc_set_host_state(vhost, IBMVFC_LINK_DOWN); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_REENABLE); + wake_up(&vhost->work_wait_q); } else if (crq->format == IBMVFC_PARTNER_FAILED || crq->format == IBMVFC_PARTNER_DEREGISTER) { dev_err(vhost->dev, "Host partner adapter deregistered or failed (rc=%d)\n", crq->format); ibmvfc_purge_requests(vhost, DID_ERROR); From 5d7bebf2dfb0dc97aac1fbace0910e557ecdb16f Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 7 Nov 2022 20:48:28 +0800 Subject: [PATCH 2/3] scsi: scsi_transport_sas: Fix error handling in sas_phy_add() If transport_add_device() fails in sas_phy_add(), the kernel will crash trying to delete the device in transport_remove_device() called from sas_remove_host(). Unable to handle kernel NULL pointer dereference at virtual address 0000000000000108 CPU: 61 PID: 42829 Comm: rmmod Kdump: loaded Tainted: G W 6.1.0-rc1+ #173 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : device_del+0x54/0x3d0 lr : device_del+0x37c/0x3d0 Call trace: device_del+0x54/0x3d0 attribute_container_class_device_del+0x28/0x38 transport_remove_classdev+0x6c/0x80 attribute_container_device_trigger+0x108/0x110 transport_remove_device+0x28/0x38 sas_phy_delete+0x30/0x60 [scsi_transport_sas] do_sas_phy_delete+0x6c/0x80 [scsi_transport_sas] device_for_each_child+0x68/0xb0 sas_remove_children+0x40/0x50 [scsi_transport_sas] sas_remove_host+0x20/0x38 [scsi_transport_sas] hisi_sas_remove+0x40/0x68 [hisi_sas_main] hisi_sas_v2_remove+0x20/0x30 [hisi_sas_v2_hw] platform_remove+0x2c/0x60 Fix this by checking and handling return value of transport_add_device() in sas_phy_add(). Fixes: c7ebbbce366c ("[SCSI] SAS transport class") Suggested-by: John Garry Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221107124828.115557-1-yangyingliang@huawei.com Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_sas.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 2f88c61216ee..74b99f2b0b74 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -722,12 +722,17 @@ int sas_phy_add(struct sas_phy *phy) int error; error = device_add(&phy->dev); - if (!error) { - transport_add_device(&phy->dev); - transport_configure_device(&phy->dev); - } + if (error) + return error; - return error; + error = transport_add_device(&phy->dev); + if (error) { + device_del(&phy->dev); + return error; + } + transport_configure_device(&phy->dev); + + return 0; } EXPORT_SYMBOL(sas_phy_add); From ecb8c2580d37dbb641451049376d80c8afaa387f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 2 Nov 2022 12:32:48 -0700 Subject: [PATCH 3/3] scsi: scsi_debug: Make the READ CAPACITY response compliant with ZBC From ZBC-1: - RC BASIS = 0: The RETURNED LOGICAL BLOCK ADDRESS field indicates the highest LBA of a contiguous range of zones that are not sequential write required zones starting with the first zone. - RC BASIS = 1: The RETURNED LOGICAL BLOCK ADDRESS field indicates the LBA of the last logical block on the logical unit. The current scsi_debug READ CAPACITY response does not comply with the above if there are one or more sequential write required zones. SCSI initiators need a way to retrieve the largest valid LBA from SCSI devices. Reporting the largest valid LBA if there are one or more sequential zones requires to set the RC BASIS field in the READ CAPACITY response to one. Hence this patch. Cc: Douglas Gilbert Cc: Damien Le Moal Suggested-by: Damien Le Moal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20221102193248.3177608-1-bvanassche@acm.org Reviewed-by: Damien Le Moal Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 697fc57bc711..629853662b82 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1899,6 +1899,13 @@ static int resp_readcap16(struct scsi_cmnd *scp, arr[14] |= 0x40; } + /* + * Since the scsi_debug READ CAPACITY implementation always reports the + * total disk capacity, set RC BASIS = 1 for host-managed ZBC devices. + */ + if (devip->zmodel == BLK_ZONED_HM) + arr[12] |= 1 << 4; + arr[15] = sdebug_lowest_aligned & 0xff; if (have_dif_prot) {