4813724c4b
This patch adds INTx handling during runtime suspend/resume. All the suspend/resume related code for the user to put the device into the low power state will be added in subsequent patches. The INTx lines may be shared among devices. Whenever any INTx interrupt comes for the VFIO devices, then vfio_intx_handler() will be called for each device sharing the interrupt. Inside vfio_intx_handler(), it calls pci_check_and_mask_intx() and checks if the interrupt has been generated for the current device. Now, if the device is already in the D3cold state, then the config space can not be read. Attempt to read config space in D3cold state can cause system unresponsiveness in a few systems. To prevent this, mask INTx in runtime suspend callback, and unmask the same in runtime resume callback. If INTx has been already masked, then no handling is needed in runtime suspend/resume callbacks. 'pm_intx_masked' tracks this, and vfio_pci_intx_mask() has been updated to return true if the INTx vfio_pci_irq_ctx.masked value is changed inside this function. For the runtime suspend which is triggered for the no user of VFIO device, the 'irq_type' will be VFIO_PCI_NUM_IRQS and these callbacks won't do anything. The MSI/MSI-X are not shared so similar handling should not be needed for MSI/MSI-X. vfio_msihandler() triggers eventfd_signal() without doing any device-specific config access. When the user performs any config access or IOCTL after receiving the eventfd notification, then the device will be moved to the D0 state first before servicing any request. Another option was to check this flag 'pm_intx_masked' inside vfio_intx_handler() instead of masking the interrupts. This flag is being set inside the runtime_suspend callback but the device can be in non-D3cold state (for example, if the user has disabled D3cold explicitly by sysfs, the D3cold is not supported in the platform, etc.). Also, in D3cold supported case, the device will be in D0 till the PCI core moves the device into D3cold. In this case, there is a possibility that the device can generate an interrupt. Adding check in the IRQ handler will not clear the IRQ status and the interrupt line will still be asserted. This can cause interrupt flooding. Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com> Link: https://lore.kernel.org/r/20220829114850.4341-4-abhsahu@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
105 lines
3.0 KiB
C
105 lines
3.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
#ifndef VFIO_PCI_PRIV_H
|
|
#define VFIO_PCI_PRIV_H
|
|
|
|
#include <linux/vfio_pci_core.h>
|
|
|
|
/* Special capability IDs predefined access */
|
|
#define PCI_CAP_ID_INVALID 0xFF /* default raw access */
|
|
#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */
|
|
|
|
/* Cap maximum number of ioeventfds per device (arbitrary) */
|
|
#define VFIO_PCI_IOEVENTFD_MAX 1000
|
|
|
|
struct vfio_pci_ioeventfd {
|
|
struct list_head next;
|
|
struct vfio_pci_core_device *vdev;
|
|
struct virqfd *virqfd;
|
|
void __iomem *addr;
|
|
uint64_t data;
|
|
loff_t pos;
|
|
int bar;
|
|
int count;
|
|
bool test_mem;
|
|
};
|
|
|
|
bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev);
|
|
void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev);
|
|
|
|
int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
|
|
unsigned index, unsigned start, unsigned count,
|
|
void *data);
|
|
|
|
ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, char __user *buf,
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
|
|
ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
|
|
#ifdef CONFIG_VFIO_PCI_VGA
|
|
ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
#else
|
|
static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev,
|
|
char __user *buf, size_t count,
|
|
loff_t *ppos, bool iswrite)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
#endif
|
|
|
|
int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
|
|
uint64_t data, int count, int fd);
|
|
|
|
int vfio_pci_init_perm_bits(void);
|
|
void vfio_pci_uninit_perm_bits(void);
|
|
|
|
int vfio_config_init(struct vfio_pci_core_device *vdev);
|
|
void vfio_config_free(struct vfio_pci_core_device *vdev);
|
|
|
|
int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev,
|
|
pci_power_t state);
|
|
|
|
bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev);
|
|
void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev);
|
|
u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev);
|
|
void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev,
|
|
u16 cmd);
|
|
|
|
#ifdef CONFIG_VFIO_PCI_IGD
|
|
int vfio_pci_igd_init(struct vfio_pci_core_device *vdev);
|
|
#else
|
|
static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
|
|
{
|
|
return -ENODEV;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_VFIO_PCI_ZDEV_KVM
|
|
int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
|
|
struct vfio_info_cap *caps);
|
|
int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev);
|
|
void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev);
|
|
#else
|
|
static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
|
|
struct vfio_info_cap *caps)
|
|
{
|
|
return -ENODEV;
|
|
}
|
|
|
|
static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
|
|
{}
|
|
#endif
|
|
|
|
static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
|
|
{
|
|
return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
|
|
}
|
|
|
|
#endif
|