diff --git a/drivers/vfio/pci/pds/pci_drv.c b/drivers/vfio/pci/pds/pci_drv.c index 03989f02a1f4..ab4b5958e413 100644 --- a/drivers/vfio/pci/pds/pci_drv.c +++ b/drivers/vfio/pci/pds/pci_drv.c @@ -19,6 +19,113 @@ #define PDS_VFIO_DRV_DESCRIPTION "AMD/Pensando VFIO Device Driver" #define PCI_VENDOR_ID_PENSANDO 0x1dd8 +static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio) +{ + bool deferred_reset_needed = false; + + /* + * Documentation states that the kernel migration driver must not + * generate asynchronous device state transitions outside of + * manipulation by the user or the VFIO_DEVICE_RESET ioctl. + * + * Since recovery is an asynchronous event received from the device, + * initiate a deferred reset. Issue a deferred reset in the following + * situations: + * 1. Migration is in progress, which will cause the next step of + * the migration to fail. + * 2. If the device is in a state that will be set to + * VFIO_DEVICE_STATE_RUNNING on the next action (i.e. VM is + * shutdown and device is in VFIO_DEVICE_STATE_STOP). + */ + mutex_lock(&pds_vfio->state_mutex); + if ((pds_vfio->state != VFIO_DEVICE_STATE_RUNNING && + pds_vfio->state != VFIO_DEVICE_STATE_ERROR) || + (pds_vfio->state == VFIO_DEVICE_STATE_RUNNING && + pds_vfio_dirty_is_enabled(pds_vfio))) + deferred_reset_needed = true; + mutex_unlock(&pds_vfio->state_mutex); + + /* + * On the next user initiated state transition, the device will + * transition to the VFIO_DEVICE_STATE_ERROR. At this point it's the user's + * responsibility to reset the device. + * + * If a VFIO_DEVICE_RESET is requested post recovery and before the next + * state transition, then the deferred reset state will be set to + * VFIO_DEVICE_STATE_RUNNING. + */ + if (deferred_reset_needed) { + spin_lock(&pds_vfio->reset_lock); + pds_vfio->deferred_reset = true; + pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_ERROR; + spin_unlock(&pds_vfio->reset_lock); + } +} + +static int pds_vfio_pci_notify_handler(struct notifier_block *nb, + unsigned long ecode, void *data) +{ + struct pds_vfio_pci_device *pds_vfio = + container_of(nb, struct pds_vfio_pci_device, nb); + struct device *dev = pds_vfio_to_dev(pds_vfio); + union pds_core_notifyq_comp *event = data; + + dev_dbg(dev, "%s: event code %lu\n", __func__, ecode); + + /* + * We don't need to do anything for RESET state==0 as there is no notify + * or feedback mechanism available, and it is possible that we won't + * even see a state==0 event since the pds_core recovery is pending. + * + * Any requests from VFIO while state==0 will fail, which will return + * error and may cause migration to fail. + */ + if (ecode == PDS_EVENT_RESET) { + dev_info(dev, "%s: PDS_EVENT_RESET event received, state==%d\n", + __func__, event->reset.state); + /* + * pds_core device finished recovery and sent us the + * notification (state == 1) to allow us to recover + */ + if (event->reset.state == 1) + pds_vfio_recovery(pds_vfio); + } + + return 0; +} + +static int +pds_vfio_pci_register_event_handler(struct pds_vfio_pci_device *pds_vfio) +{ + struct device *dev = pds_vfio_to_dev(pds_vfio); + struct notifier_block *nb = &pds_vfio->nb; + int err; + + if (!nb->notifier_call) { + nb->notifier_call = pds_vfio_pci_notify_handler; + err = pdsc_register_notify(nb); + if (err) { + nb->notifier_call = NULL; + dev_err(dev, + "failed to register pds event handler: %pe\n", + ERR_PTR(err)); + return -EINVAL; + } + dev_dbg(dev, "pds event handler registered\n"); + } + + return 0; +} + +static void +pds_vfio_pci_unregister_event_handler(struct pds_vfio_pci_device *pds_vfio) +{ + if (pds_vfio->nb.notifier_call) { + pdsc_unregister_notify(&pds_vfio->nb); + pds_vfio->nb.notifier_call = NULL; + } +} + static int pds_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -43,8 +150,14 @@ static int pds_vfio_pci_probe(struct pci_dev *pdev, goto out_unregister_coredev; } + err = pds_vfio_pci_register_event_handler(pds_vfio); + if (err) + goto out_unregister_client; + return 0; +out_unregister_client: + pds_vfio_unregister_client_cmd(pds_vfio); out_unregister_coredev: vfio_pci_core_unregister_device(&pds_vfio->vfio_coredev); out_put_vdev: @@ -56,6 +169,7 @@ static void pds_vfio_pci_remove(struct pci_dev *pdev) { struct pds_vfio_pci_device *pds_vfio = pds_vfio_pci_drvdata(pdev); + pds_vfio_pci_unregister_event_handler(pds_vfio); pds_vfio_unregister_client_cmd(pds_vfio); vfio_pci_core_unregister_device(&pds_vfio->vfio_coredev); vfio_put_device(&pds_vfio->vfio_coredev.vdev); diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index 9e6a96b5db62..b46174f5eb09 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -33,11 +33,12 @@ again: if (pds_vfio->deferred_reset) { pds_vfio->deferred_reset = false; if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) { - pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; pds_vfio_put_restore_file(pds_vfio); pds_vfio_put_save_file(pds_vfio); pds_vfio_dirty_disable(pds_vfio, false); } + pds_vfio->state = pds_vfio->deferred_reset_state; + pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; spin_unlock(&pds_vfio->reset_lock); goto again; } @@ -49,6 +50,7 @@ void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio) { spin_lock(&pds_vfio->reset_lock); pds_vfio->deferred_reset = true; + pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; if (!mutex_trylock(&pds_vfio->state_mutex)) { spin_unlock(&pds_vfio->reset_lock); return; @@ -67,7 +69,14 @@ pds_vfio_set_device_state(struct vfio_device *vdev, struct file *res = NULL; mutex_lock(&pds_vfio->state_mutex); - while (new_state != pds_vfio->state) { + /* + * only way to transition out of VFIO_DEVICE_STATE_ERROR is via + * VFIO_DEVICE_RESET, so prevent the state machine from running since + * vfio_mig_get_next_state() will throw a WARN_ON() when transitioning + * from VFIO_DEVICE_STATE_ERROR to any other state + */ + while (pds_vfio->state != VFIO_DEVICE_STATE_ERROR && + new_state != pds_vfio->state) { enum vfio_device_mig_state next_state; int err = vfio_mig_get_next_state(vdev, pds_vfio->state, @@ -89,6 +98,9 @@ pds_vfio_set_device_state(struct vfio_device *vdev, } } pds_vfio_state_mutex_unlock(pds_vfio); + /* still waiting on a deferred_reset */ + if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) + res = ERR_PTR(-EIO); return res; } @@ -169,6 +181,7 @@ static int pds_vfio_open_device(struct vfio_device *vdev) mutex_init(&pds_vfio->state_mutex); pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; + pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; vfio_pci_core_finish_enable(&pds_vfio->vfio_coredev); diff --git a/drivers/vfio/pci/pds/vfio_dev.h b/drivers/vfio/pci/pds/vfio_dev.h index a314f9ffd6ed..b8f2d667608f 100644 --- a/drivers/vfio/pci/pds/vfio_dev.h +++ b/drivers/vfio/pci/pds/vfio_dev.h @@ -20,6 +20,8 @@ struct pds_vfio_pci_device { enum vfio_device_mig_state state; spinlock_t reset_lock; /* protect reset_done flow */ u8 deferred_reset; + enum vfio_device_mig_state deferred_reset_state; + struct notifier_block nb; int vf_id; u16 client_id;