vfio/mlx5: Use its own PCI reset_done error handler
Register its own handler for pci_error_handlers.reset_done and update state accordingly. Link: https://lore.kernel.org/all/20220224142024.147653-16-yishaih@nvidia.com Reviewed-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
This commit is contained in:
parent
915076f70e
commit
88faa5e8ea
@ -29,9 +29,12 @@ struct mlx5vf_pci_core_device {
|
||||
struct vfio_pci_core_device core_device;
|
||||
u16 vhca_id;
|
||||
u8 migrate_cap:1;
|
||||
u8 deferred_reset:1;
|
||||
/* protect migration state */
|
||||
struct mutex state_mutex;
|
||||
enum vfio_device_mig_state mig_state;
|
||||
/* protect the reset_done flow */
|
||||
spinlock_t reset_lock;
|
||||
struct mlx5_vf_migration_file *resuming_migf;
|
||||
struct mlx5_vf_migration_file *saving_migf;
|
||||
};
|
||||
@ -437,6 +440,25 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called in all state_mutex unlock cases to
|
||||
* handle a 'deferred_reset' if exists.
|
||||
*/
|
||||
static void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev)
|
||||
{
|
||||
again:
|
||||
spin_lock(&mvdev->reset_lock);
|
||||
if (mvdev->deferred_reset) {
|
||||
mvdev->deferred_reset = false;
|
||||
spin_unlock(&mvdev->reset_lock);
|
||||
mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
|
||||
mlx5vf_disable_fds(mvdev);
|
||||
goto again;
|
||||
}
|
||||
mutex_unlock(&mvdev->state_mutex);
|
||||
spin_unlock(&mvdev->reset_lock);
|
||||
}
|
||||
|
||||
static struct file *
|
||||
mlx5vf_pci_set_device_state(struct vfio_device *vdev,
|
||||
enum vfio_device_mig_state new_state)
|
||||
@ -465,7 +487,7 @@ mlx5vf_pci_set_device_state(struct vfio_device *vdev,
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&mvdev->state_mutex);
|
||||
mlx5vf_state_mutex_unlock(mvdev);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -477,10 +499,34 @@ static int mlx5vf_pci_get_device_state(struct vfio_device *vdev,
|
||||
|
||||
mutex_lock(&mvdev->state_mutex);
|
||||
*curr_state = mvdev->mig_state;
|
||||
mutex_unlock(&mvdev->state_mutex);
|
||||
mlx5vf_state_mutex_unlock(mvdev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mlx5vf_pci_aer_reset_done(struct pci_dev *pdev)
|
||||
{
|
||||
struct mlx5vf_pci_core_device *mvdev = dev_get_drvdata(&pdev->dev);
|
||||
|
||||
if (!mvdev->migrate_cap)
|
||||
return;
|
||||
|
||||
/*
|
||||
* As the higher VFIO layers are holding locks across reset and using
|
||||
* those same locks with the mm_lock we need to prevent ABBA deadlock
|
||||
* with the state_mutex and mm_lock.
|
||||
* In case the state_mutex was taken already we defer the cleanup work
|
||||
* to the unlock flow of the other running context.
|
||||
*/
|
||||
spin_lock(&mvdev->reset_lock);
|
||||
mvdev->deferred_reset = true;
|
||||
if (!mutex_trylock(&mvdev->state_mutex)) {
|
||||
spin_unlock(&mvdev->reset_lock);
|
||||
return;
|
||||
}
|
||||
spin_unlock(&mvdev->reset_lock);
|
||||
mlx5vf_state_mutex_unlock(mvdev);
|
||||
}
|
||||
|
||||
static int mlx5vf_pci_open_device(struct vfio_device *core_vdev)
|
||||
{
|
||||
struct mlx5vf_pci_core_device *mvdev = container_of(
|
||||
@ -562,6 +608,7 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev,
|
||||
VFIO_MIGRATION_STOP_COPY |
|
||||
VFIO_MIGRATION_P2P;
|
||||
mutex_init(&mvdev->state_mutex);
|
||||
spin_lock_init(&mvdev->reset_lock);
|
||||
}
|
||||
mlx5_vf_put_core_dev(mdev);
|
||||
}
|
||||
@ -596,11 +643,17 @@ static const struct pci_device_id mlx5vf_pci_table[] = {
|
||||
|
||||
MODULE_DEVICE_TABLE(pci, mlx5vf_pci_table);
|
||||
|
||||
static const struct pci_error_handlers mlx5vf_err_handlers = {
|
||||
.reset_done = mlx5vf_pci_aer_reset_done,
|
||||
.error_detected = vfio_pci_core_aer_err_detected,
|
||||
};
|
||||
|
||||
static struct pci_driver mlx5vf_pci_driver = {
|
||||
.name = KBUILD_MODNAME,
|
||||
.id_table = mlx5vf_pci_table,
|
||||
.probe = mlx5vf_pci_probe,
|
||||
.remove = mlx5vf_pci_remove,
|
||||
.err_handler = &mlx5vf_err_handlers,
|
||||
};
|
||||
|
||||
static void __exit mlx5vf_pci_cleanup(void)
|
||||
|
Loading…
Reference in New Issue
Block a user