450744051d
INTx masking has two components, the first is that we need the ability to prevent the device from continuing to assert INTx. This is provided via the DisINTx bit in the command register and is the only thing we can really probe for when testing if INTx masking is supported. The second component is that the device needs to indicate if INTx is asserted via the interrupt status bit in the device status register. With these two features we can generically determine if one of the devices we own is asserting INTx, signal the user, and mask the interrupt while the user services the device. Generally if one or both of these components is broken we resort to APIC level interrupt masking, which requires an exclusive interrupt since we have no way to determine the source of the interrupt in a shared configuration. This often makes it difficult or impossible to configure the system for userspace use of the device, for an interrupt mode that the user may not need. One possible configuration of broken INTx masking is that the DisINTx support is fully functional, but the interrupt status bit never signals interrupt assertion. In this case we do have the ability to prevent the device from asserting INTx, but lack the ability to identify the interrupt source. For this case we can simply pretend that the device lacks INTx support entirely, keeping DisINTx set on the physical device, virtualizing this bit for the user, and virtualizing the interrupt pin register to indicate no INTx support. We already support virtualization of the DisINTx bit and already virtualize the interrupt pin for platforms without INTx support. By tying these components together, setting DisINTx on open and reset, and identifying devices broken in this particular way, we can provide support for them w/o the handicap of APIC level INTx masking. Intel i40e (XL710/X710) 10/20/40GbE NICs have been identified as being broken in this specific way. We leave the vfio-pci.nointxmask option as a mechanism to bypass this support, enabling INTx on the device with all the requirements of APIC level masking. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Cc: John Ronciak <john.ronciak@intel.com> Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
135 lines
3.9 KiB
C
135 lines
3.9 KiB
C
/*
|
|
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
|
|
* Author: Alex Williamson <alex.williamson@redhat.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* Derived from original vfio:
|
|
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
|
|
* Author: Tom Lyon, pugs@cisco.com
|
|
*/
|
|
|
|
#include <linux/mutex.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/irqbypass.h>
|
|
#include <linux/types.h>
|
|
|
|
#ifndef VFIO_PCI_PRIVATE_H
|
|
#define VFIO_PCI_PRIVATE_H
|
|
|
|
#define VFIO_PCI_OFFSET_SHIFT 40
|
|
|
|
#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
|
|
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
|
|
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
|
|
|
|
/* Special capability IDs predefined access */
|
|
#define PCI_CAP_ID_INVALID 0xFF /* default raw access */
|
|
#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */
|
|
|
|
struct vfio_pci_irq_ctx {
|
|
struct eventfd_ctx *trigger;
|
|
struct virqfd *unmask;
|
|
struct virqfd *mask;
|
|
char *name;
|
|
bool masked;
|
|
struct irq_bypass_producer producer;
|
|
};
|
|
|
|
struct vfio_pci_device;
|
|
struct vfio_pci_region;
|
|
|
|
struct vfio_pci_regops {
|
|
size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
void (*release)(struct vfio_pci_device *vdev,
|
|
struct vfio_pci_region *region);
|
|
};
|
|
|
|
struct vfio_pci_region {
|
|
u32 type;
|
|
u32 subtype;
|
|
const struct vfio_pci_regops *ops;
|
|
void *data;
|
|
size_t size;
|
|
u32 flags;
|
|
};
|
|
|
|
struct vfio_pci_device {
|
|
struct pci_dev *pdev;
|
|
void __iomem *barmap[PCI_STD_RESOURCE_END + 1];
|
|
u8 *pci_config_map;
|
|
u8 *vconfig;
|
|
struct perm_bits *msi_perm;
|
|
spinlock_t irqlock;
|
|
struct mutex igate;
|
|
struct msix_entry *msix;
|
|
struct vfio_pci_irq_ctx *ctx;
|
|
int num_ctx;
|
|
int irq_type;
|
|
int num_regions;
|
|
struct vfio_pci_region *region;
|
|
u8 msi_qmax;
|
|
u8 msix_bar;
|
|
u16 msix_size;
|
|
u32 msix_offset;
|
|
u32 rbar[7];
|
|
bool pci_2_3;
|
|
bool virq_disabled;
|
|
bool reset_works;
|
|
bool extended_caps;
|
|
bool bardirty;
|
|
bool has_vga;
|
|
bool needs_reset;
|
|
bool nointx;
|
|
struct pci_saved_state *pci_saved_state;
|
|
int refcnt;
|
|
struct eventfd_ctx *err_trigger;
|
|
struct eventfd_ctx *req_trigger;
|
|
};
|
|
|
|
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
|
|
#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX)
|
|
#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
|
|
#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev)))
|
|
#define irq_is(vdev, type) (vdev->irq_type == type)
|
|
|
|
extern void vfio_pci_intx_mask(struct vfio_pci_device *vdev);
|
|
extern void vfio_pci_intx_unmask(struct vfio_pci_device *vdev);
|
|
|
|
extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
|
|
uint32_t flags, unsigned index,
|
|
unsigned start, unsigned count, void *data);
|
|
|
|
extern ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev,
|
|
char __user *buf, size_t count,
|
|
loff_t *ppos, bool iswrite);
|
|
|
|
extern ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
|
|
extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
|
|
size_t count, loff_t *ppos, bool iswrite);
|
|
|
|
extern int vfio_pci_init_perm_bits(void);
|
|
extern void vfio_pci_uninit_perm_bits(void);
|
|
|
|
extern int vfio_config_init(struct vfio_pci_device *vdev);
|
|
extern void vfio_config_free(struct vfio_pci_device *vdev);
|
|
|
|
extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
|
|
unsigned int type, unsigned int subtype,
|
|
const struct vfio_pci_regops *ops,
|
|
size_t size, u32 flags, void *data);
|
|
#ifdef CONFIG_VFIO_PCI_IGD
|
|
extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
|
|
#else
|
|
static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
|
|
{
|
|
return -ENODEV;
|
|
}
|
|
#endif
|
|
#endif /* VFIO_PCI_PRIVATE_H */
|