KVM: s390x: Fixes and features for 5.20
* First part of deferred teardown * CPU Topology * interpretive execution for PCI instructions * PV attestation * Minor fixes -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEoWuZBM6M3lCBSfTnuARItAMU6BMFAmLZaj0ACgkQuARItAMU 6BPmPw/+PIZ7xxoiuUCCWlFi+RsfgTH1OWGoLL2B8zQHyJJ+IS4puGqKfVdusl56 GhPPlTkBG9gbhRXUMDWnKgp4JLOIj8ngbgQ8zcD/xeH8tzm4iRy+uj7PxCe+kQGB CY3rL42HwEvFJUbLzi0/8ahLjz6t7+hq7okMdhq9/MoIJBp/w74QZb+chpbuCpi/ /nwdfon85NYTLCK73g6kjjHzzA3hQfUmheVkOt288Qp3yKLh77blJ79SRl3xHeuH PBTqsLoxMiJyFfhJQ9U5HnKcIMAD6vA4ayoj1PAZgToYt40B3k8K6HQF+07TcZuy KABAqmUMNgbapUC74+U45d1eLAl1TGbZSuaq5ymb9jyFyMxGN2Vp6K9O6Ebd+ghy NsGc+CLRPDTiwi/D+QQpGrIksqcMEy+ocGGjbeIuRQxao6C9+KladP5nRxXJpDZf Vrupw+gDjBPozmQAA50VtR+ad5rdEdbSmUvo0nIeBpQ4VJxLFnA0tXl4XOEEFr2M 214yv691KFXhVtaV0lHRKoCQ78OAix6XyjHI0rukGnqNNo7w+V3htU0oG0ifWEU9 96BpKKpjfsCRhN1h2BF8MTPMEb6AGsI2hLCm3neGxrcT7KM1chk2Z3qlGdrc3xZT BIj7H23trC6HG0vCi1rHAn+S0zmcnt5TCaRUNQF5nUeLYfV1FC0= =IB9Y -----END PGP SIGNATURE----- Merge tag 'kvm-s390-next-5.20-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD KVM: s390x: Fixes and features for 5.20 * First part of deferred teardown * CPU Topology * interpretive execution for PCI instructions * PV attestation * Minor fixes
This commit is contained in:
commit
a4850b5590
@ -5955,6 +5955,52 @@ KVM_PV_DUMP_CPU
|
||||
Provides encrypted dump data like register values.
|
||||
The length of the returned data is provided by uv_info.guest_cpu_stor_len.
|
||||
|
||||
4.137 KVM_S390_ZPCI_OP
|
||||
----------------------
|
||||
|
||||
:Capability: KVM_CAP_S390_ZPCI_OP
|
||||
:Architectures: s390
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_s390_zpci_op (in)
|
||||
:Returns: 0 on success, <0 on error
|
||||
|
||||
Used to manage hardware-assisted virtualization features for zPCI devices.
|
||||
|
||||
Parameters are specified via the following structure::
|
||||
|
||||
struct kvm_s390_zpci_op {
|
||||
/* in */
|
||||
__u32 fh; /* target device */
|
||||
__u8 op; /* operation to perform */
|
||||
__u8 pad[3];
|
||||
union {
|
||||
/* for KVM_S390_ZPCIOP_REG_AEN */
|
||||
struct {
|
||||
__u64 ibv; /* Guest addr of interrupt bit vector */
|
||||
__u64 sb; /* Guest addr of summary bit */
|
||||
__u32 flags;
|
||||
__u32 noi; /* Number of interrupts */
|
||||
__u8 isc; /* Guest interrupt subclass */
|
||||
__u8 sbo; /* Offset of guest summary bit vector */
|
||||
__u16 pad;
|
||||
} reg_aen;
|
||||
__u64 reserved[8];
|
||||
} u;
|
||||
};
|
||||
|
||||
The type of operation is specified in the "op" field.
|
||||
KVM_S390_ZPCIOP_REG_AEN is used to register the VM for adapter event
|
||||
notification interpretation, which will allow firmware delivery of adapter
|
||||
events directly to the vm, with KVM providing a backup delivery mechanism;
|
||||
KVM_S390_ZPCIOP_DEREG_AEN is used to subsequently disable interpretation of
|
||||
adapter event notifications.
|
||||
|
||||
The target zPCI function must also be specified via the "fh" field. For the
|
||||
KVM_S390_ZPCIOP_REG_AEN operation, additional information to establish firmware
|
||||
delivery must be provided via the "reg_aen" struct.
|
||||
|
||||
The "pad" and "reserved" fields may be used for future extensions and should be
|
||||
set to 0s by userspace.
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
@ -8223,6 +8269,31 @@ The capability has no effect if the nx_huge_pages module parameter is not set.
|
||||
|
||||
This capability may only be set before any vCPUs are created.
|
||||
|
||||
8.39 KVM_CAP_S390_CPU_TOPOLOGY
|
||||
------------------------------
|
||||
|
||||
:Capability: KVM_CAP_S390_CPU_TOPOLOGY
|
||||
:Architectures: s390
|
||||
:Type: vm
|
||||
|
||||
This capability indicates that KVM will provide the S390 CPU Topology
|
||||
facility which consist of the interpretation of the PTF instruction for
|
||||
the function code 2 along with interception and forwarding of both the
|
||||
PTF instruction with function codes 0 or 1 and the STSI(15,1,x)
|
||||
instruction to the userland hypervisor.
|
||||
|
||||
The stfle facility 11, CPU Topology facility, should not be indicated
|
||||
to the guest without this capability.
|
||||
|
||||
When this capability is present, KVM provides a new attribute group
|
||||
on vm fd, KVM_S390_VM_CPU_TOPOLOGY.
|
||||
This new attribute allows to get, set or clear the Modified Change
|
||||
Topology Report (MTCR) bit of the SCA through the kvm_device_attr
|
||||
structure.
|
||||
|
||||
When getting the Modified Change Topology Report value, the attr->addr
|
||||
must point to a byte where the value will be stored or retrieved from.
|
||||
|
||||
9. Known KVM API problems
|
||||
=========================
|
||||
|
||||
|
@ -17453,6 +17453,7 @@ M: Eric Farman <farman@linux.ibm.com>
|
||||
L: linux-s390@vger.kernel.org
|
||||
L: kvm@vger.kernel.org
|
||||
S: Supported
|
||||
F: arch/s390/kvm/pci*
|
||||
F: drivers/vfio/pci/vfio_pci_zdev.c
|
||||
F: include/uapi/linux/vfio_zdev.h
|
||||
|
||||
|
@ -45,6 +45,8 @@ void uv_query_info(void)
|
||||
uv_info.supp_se_hdr_pcf = uvcb.supp_se_hdr_pcf;
|
||||
uv_info.conf_dump_storage_state_len = uvcb.conf_dump_storage_state_len;
|
||||
uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len;
|
||||
uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver;
|
||||
uv_info.supp_att_pflags = uvcb.supp_att_pflags;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
|
||||
|
@ -12,10 +12,11 @@
|
||||
|
||||
#include <linux/bit_spinlock.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <asm/tpi.h>
|
||||
|
||||
struct airq_struct {
|
||||
struct hlist_node list; /* Handler queueing. */
|
||||
void (*handler)(struct airq_struct *airq, bool floating);
|
||||
void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
|
||||
u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
|
||||
u8 lsi_mask; /* Local-Summary-Indicator mask */
|
||||
u8 isc; /* Interrupt-subclass */
|
||||
@ -46,8 +47,10 @@ struct airq_iv {
|
||||
#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
|
||||
#define AIRQ_IV_DATA 8 /* Allocate the data array */
|
||||
#define AIRQ_IV_CACHELINE 16 /* Cacheline alignment for the vector */
|
||||
#define AIRQ_IV_GUESTVEC 32 /* Vector is a pinned guest page */
|
||||
|
||||
struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
|
||||
struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags,
|
||||
unsigned long *vec);
|
||||
void airq_iv_release(struct airq_iv *iv);
|
||||
unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
|
||||
void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
|
||||
|
@ -147,5 +147,42 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
|
||||
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
|
||||
unsigned long gaddr, unsigned long vmaddr);
|
||||
int gmap_mark_unmergeable(void);
|
||||
void s390_reset_acc(struct mm_struct *mm);
|
||||
void s390_unlist_old_asce(struct gmap *gmap);
|
||||
int s390_replace_asce(struct gmap *gmap);
|
||||
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
|
||||
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, bool interruptible);
|
||||
|
||||
/**
|
||||
* s390_uv_destroy_range - Destroy a range of pages in the given mm.
|
||||
* @mm: the mm on which to operate on
|
||||
* @start: the start of the range
|
||||
* @end: the end of the range
|
||||
*
|
||||
* This function will call cond_sched, so it should not generate stalls, but
|
||||
* it will otherwise only return when it completed.
|
||||
*/
|
||||
static inline void s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
(void)__s390_uv_destroy_range(mm, start, end, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* s390_uv_destroy_range_interruptible - Destroy a range of pages in the
|
||||
* given mm, but stop when a fatal signal is received.
|
||||
* @mm: the mm on which to operate on
|
||||
* @start: the start of the range
|
||||
* @end: the end of the range
|
||||
*
|
||||
* This function will call cond_sched, so it should not generate stalls. If
|
||||
* a fatal signal is received, it will return with -EINTR immediately,
|
||||
* without finishing destroying the whole range. Upon successful
|
||||
* completion, 0 is returned.
|
||||
*/
|
||||
static inline int s390_uv_destroy_range_interruptible(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
return __s390_uv_destroy_range(mm, start, end, true);
|
||||
}
|
||||
#endif /* _ASM_S390_GMAP_H */
|
||||
|
@ -19,6 +19,8 @@
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <asm/debug.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/fpu/api.h>
|
||||
@ -93,19 +95,30 @@ union ipte_control {
|
||||
};
|
||||
};
|
||||
|
||||
union sca_utility {
|
||||
__u16 val;
|
||||
struct {
|
||||
__u16 mtcr : 1;
|
||||
__u16 reserved : 15;
|
||||
};
|
||||
};
|
||||
|
||||
struct bsca_block {
|
||||
union ipte_control ipte_control;
|
||||
__u64 reserved[5];
|
||||
__u64 mcn;
|
||||
__u64 reserved2;
|
||||
union sca_utility utility;
|
||||
__u8 reserved2[6];
|
||||
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
|
||||
};
|
||||
|
||||
struct esca_block {
|
||||
union ipte_control ipte_control;
|
||||
__u64 reserved1[7];
|
||||
__u64 reserved1[6];
|
||||
union sca_utility utility;
|
||||
__u8 reserved2[6];
|
||||
__u64 mcn[4];
|
||||
__u64 reserved2[20];
|
||||
__u64 reserved3[20];
|
||||
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
|
||||
};
|
||||
|
||||
@ -249,12 +262,16 @@ struct kvm_s390_sie_block {
|
||||
#define ECB_SPECI 0x08
|
||||
#define ECB_SRSI 0x04
|
||||
#define ECB_HOSTPROTINT 0x02
|
||||
#define ECB_PTF 0x01
|
||||
__u8 ecb; /* 0x0061 */
|
||||
#define ECB2_CMMA 0x80
|
||||
#define ECB2_IEP 0x20
|
||||
#define ECB2_PFMFI 0x08
|
||||
#define ECB2_ESCA 0x04
|
||||
#define ECB2_ZPCI_LSI 0x02
|
||||
__u8 ecb2; /* 0x0062 */
|
||||
#define ECB3_AISI 0x20
|
||||
#define ECB3_AISII 0x10
|
||||
#define ECB3_DEA 0x08
|
||||
#define ECB3_AES 0x04
|
||||
#define ECB3_RI 0x01
|
||||
@ -759,6 +776,7 @@ struct kvm_vm_stat {
|
||||
u64 inject_pfault_done;
|
||||
u64 inject_service_signal;
|
||||
u64 inject_virtio;
|
||||
u64 aen_forward;
|
||||
};
|
||||
|
||||
struct kvm_arch_memory_slot {
|
||||
@ -924,6 +942,7 @@ struct kvm_s390_pv {
|
||||
unsigned long stor_base;
|
||||
void *stor_var;
|
||||
bool dumping;
|
||||
struct mmu_notifier mmu_notifier;
|
||||
};
|
||||
|
||||
struct kvm_arch{
|
||||
@ -940,6 +959,7 @@ struct kvm_arch{
|
||||
int use_cmma;
|
||||
int use_pfmfi;
|
||||
int use_skf;
|
||||
int use_zpci_interp;
|
||||
int user_cpu_state_ctrl;
|
||||
int user_sigp;
|
||||
int user_stsi;
|
||||
@ -963,6 +983,8 @@ struct kvm_arch{
|
||||
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
|
||||
struct kvm_s390_gisa_interrupt gisa_int;
|
||||
struct kvm_s390_pv pv;
|
||||
struct list_head kzdev_list;
|
||||
spinlock_t kzdev_list_lock;
|
||||
};
|
||||
|
||||
#define KVM_HVA_ERR_BAD (-1UL)
|
||||
@ -1013,4 +1035,19 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#define __KVM_HAVE_ARCH_VM_FREE
|
||||
void kvm_arch_free_vm(struct kvm *kvm);
|
||||
|
||||
#ifdef CONFIG_VFIO_PCI_ZDEV_KVM
|
||||
int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm);
|
||||
void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev);
|
||||
#else
|
||||
static inline int kvm_s390_pci_register_kvm(struct zpci_dev *dev,
|
||||
struct kvm *kvm)
|
||||
{
|
||||
return -EPERM;
|
||||
}
|
||||
static inline void kvm_s390_pci_unregister_kvm(struct zpci_dev *dev) {}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -18,7 +18,7 @@ typedef struct {
|
||||
unsigned long asce_limit;
|
||||
unsigned long vdso_base;
|
||||
/* The mmu context belongs to a secure guest. */
|
||||
atomic_t is_protected;
|
||||
atomic_t protected_count;
|
||||
/*
|
||||
* The following bitfields need a down_write on the mm
|
||||
* semaphore when they are written to. As they are only
|
||||
|
@ -26,7 +26,7 @@ static inline int init_new_context(struct task_struct *tsk,
|
||||
INIT_LIST_HEAD(&mm->context.gmap_list);
|
||||
cpumask_clear(&mm->context.cpu_attach_mask);
|
||||
atomic_set(&mm->context.flush_count, 0);
|
||||
atomic_set(&mm->context.is_protected, 0);
|
||||
atomic_set(&mm->context.protected_count, 0);
|
||||
mm->context.gmap_asce = 0;
|
||||
mm->context.flush_mm = 0;
|
||||
#ifdef CONFIG_PGSTE
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <asm-generic/pci.h>
|
||||
#include <asm/pci_clp.h>
|
||||
#include <asm/pci_debug.h>
|
||||
#include <asm/pci_insn.h>
|
||||
#include <asm/sclp.h>
|
||||
|
||||
#define PCIBIOS_MIN_IO 0x1000
|
||||
@ -97,6 +98,7 @@ struct zpci_bar_struct {
|
||||
};
|
||||
|
||||
struct s390_domain;
|
||||
struct kvm_zdev;
|
||||
|
||||
#define ZPCI_FUNCTIONS_PER_BUS 256
|
||||
struct zpci_bus {
|
||||
@ -123,11 +125,14 @@ struct zpci_dev {
|
||||
enum zpci_state state;
|
||||
u32 fid; /* function ID, used by sclp */
|
||||
u32 fh; /* function handle, used by insn's */
|
||||
u32 gisa; /* GISA designation for passthrough */
|
||||
u16 vfn; /* virtual function number */
|
||||
u16 pchid; /* physical channel ID */
|
||||
u16 maxstbl; /* Maximum store block size */
|
||||
u8 pfgid; /* function group ID */
|
||||
u8 pft; /* pci function type */
|
||||
u8 port;
|
||||
u8 dtsm; /* Supported DT mask */
|
||||
u8 rid_available : 1;
|
||||
u8 has_hp_slot : 1;
|
||||
u8 has_resources : 1;
|
||||
@ -186,7 +191,10 @@ struct zpci_dev {
|
||||
|
||||
struct dentry *debugfs_dev;
|
||||
|
||||
/* IOMMU and passthrough */
|
||||
struct s390_domain *s390_domain; /* s390 IOMMU domain data */
|
||||
struct kvm_zdev *kzdev;
|
||||
struct mutex kzdev_lock;
|
||||
};
|
||||
|
||||
static inline bool zdev_enabled(struct zpci_dev *zdev)
|
||||
@ -198,6 +206,9 @@ extern const struct attribute_group *zpci_attr_groups[];
|
||||
extern unsigned int s390_pci_force_floating __initdata;
|
||||
extern unsigned int s390_pci_no_rid;
|
||||
|
||||
extern union zpci_sic_iib *zpci_aipb;
|
||||
extern struct airq_iv *zpci_aif_sbv;
|
||||
|
||||
/* -----------------------------------------------------------------------------
|
||||
Prototypes
|
||||
----------------------------------------------------------------------------- */
|
||||
|
@ -153,9 +153,11 @@ struct clp_rsp_query_pci_grp {
|
||||
u8 : 6;
|
||||
u8 frame : 1;
|
||||
u8 refresh : 1; /* TLB refresh mode */
|
||||
u16 reserved2;
|
||||
u16 : 3;
|
||||
u16 maxstbl : 13; /* Maximum store block size */
|
||||
u16 mui;
|
||||
u16 : 16;
|
||||
u8 dtsm; /* Supported DT mask */
|
||||
u8 reserved3;
|
||||
u16 maxfaal;
|
||||
u16 : 4;
|
||||
u16 dnoi : 12;
|
||||
@ -173,7 +175,8 @@ struct clp_req_set_pci {
|
||||
u16 reserved2;
|
||||
u8 oc; /* operation controls */
|
||||
u8 ndas; /* number of dma spaces */
|
||||
u64 reserved3;
|
||||
u32 reserved3;
|
||||
u32 gisa; /* GISA designation */
|
||||
} __packed;
|
||||
|
||||
/* Set PCI function response */
|
||||
|
@ -98,6 +98,15 @@ struct zpci_fib {
|
||||
u32 gd;
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Set Interruption Controls Operation Controls */
|
||||
#define SIC_IRQ_MODE_ALL 0
|
||||
#define SIC_IRQ_MODE_SINGLE 1
|
||||
#define SIC_SET_AENI_CONTROLS 2
|
||||
#define SIC_IRQ_MODE_DIRECT 4
|
||||
#define SIC_IRQ_MODE_D_ALL 16
|
||||
#define SIC_IRQ_MODE_D_SINGLE 17
|
||||
#define SIC_IRQ_MODE_SET_CPU 18
|
||||
|
||||
/* directed interruption information block */
|
||||
struct zpci_diib {
|
||||
u32 : 1;
|
||||
@ -119,9 +128,20 @@ struct zpci_cdiib {
|
||||
u64 : 64;
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* adapter interruption parameters block */
|
||||
struct zpci_aipb {
|
||||
u64 faisb;
|
||||
u64 gait;
|
||||
u16 : 13;
|
||||
u16 afi : 3;
|
||||
u32 : 32;
|
||||
u16 faal;
|
||||
} __packed __aligned(8);
|
||||
|
||||
union zpci_sic_iib {
|
||||
struct zpci_diib diib;
|
||||
struct zpci_cdiib cdiib;
|
||||
struct zpci_aipb aipb;
|
||||
};
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(have_mio);
|
||||
@ -134,13 +154,6 @@ int __zpci_store(u64 data, u64 req, u64 offset);
|
||||
int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
|
||||
int __zpci_store_block(const u64 *data, u64 req, u64 offset);
|
||||
void zpci_barrier(void);
|
||||
int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
|
||||
|
||||
static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
|
||||
{
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
|
||||
return __zpci_set_irq_ctrl(ctl, isc, &iib);
|
||||
}
|
||||
int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
|
||||
|
||||
#endif
|
||||
|
@ -525,7 +525,7 @@ static inline int mm_has_pgste(struct mm_struct *mm)
|
||||
static inline int mm_is_protected(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
if (unlikely(atomic_read(&mm->context.is_protected)))
|
||||
if (unlikely(atomic_read(&mm->context.protected_count)))
|
||||
return 1;
|
||||
#endif
|
||||
return 0;
|
||||
@ -1182,9 +1182,22 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
|
||||
} else {
|
||||
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
|
||||
}
|
||||
/* At this point the reference through the mapping is still present */
|
||||
if (mm_is_protected(mm) && pte_present(res))
|
||||
uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
|
||||
/* Nothing to do */
|
||||
if (!mm_is_protected(mm) || !pte_present(res))
|
||||
return res;
|
||||
/*
|
||||
* At this point the reference through the mapping is still present.
|
||||
* The notifier should have destroyed all protected vCPUs at this
|
||||
* point, so the destroy should be successful.
|
||||
*/
|
||||
if (full && !uv_destroy_owned_page(pte_val(res) & PAGE_MASK))
|
||||
return res;
|
||||
/*
|
||||
* If something went wrong and the page could not be destroyed, or
|
||||
* if this is not a mm teardown, the slower export is used as
|
||||
* fallback instead.
|
||||
*/
|
||||
uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -88,6 +88,10 @@ struct sclp_info {
|
||||
unsigned char has_sipl : 1;
|
||||
unsigned char has_dirq : 1;
|
||||
unsigned char has_iplcc : 1;
|
||||
unsigned char has_zpci_lsi : 1;
|
||||
unsigned char has_aisii : 1;
|
||||
unsigned char has_aeni : 1;
|
||||
unsigned char has_aisi : 1;
|
||||
unsigned int ibc;
|
||||
unsigned int mtid;
|
||||
unsigned int mtid_cp;
|
||||
|
@ -19,6 +19,19 @@ struct tpi_info {
|
||||
u32 :12;
|
||||
} __packed __aligned(4);
|
||||
|
||||
/* I/O-Interruption Code as stored by TPI for an Adapter I/O */
|
||||
struct tpi_adapter_info {
|
||||
u32 aism:8;
|
||||
u32 :22;
|
||||
u32 error:1;
|
||||
u32 forward:1;
|
||||
u32 reserved;
|
||||
u32 adapter_IO:1;
|
||||
u32 directed_irq:1;
|
||||
u32 isc:3;
|
||||
u32 :27;
|
||||
} __packed __aligned(4);
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_S390_TPI_H */
|
||||
|
@ -124,7 +124,10 @@ struct uv_cb_qui {
|
||||
u64 reservedc0; /* 0x00c0 */
|
||||
u64 conf_dump_storage_state_len; /* 0x00c8 */
|
||||
u64 conf_dump_finalize_len; /* 0x00d0 */
|
||||
u8 reservedd8[256 - 216]; /* 0x00d8 */
|
||||
u64 reservedd8; /* 0x00d8 */
|
||||
u64 supp_att_req_hdr_ver; /* 0x00e0 */
|
||||
u64 supp_att_pflags; /* 0x00e8 */
|
||||
u8 reservedf0[256 - 240]; /* 0x00f0 */
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Initialize Ultravisor */
|
||||
@ -350,6 +353,8 @@ struct uv_info {
|
||||
unsigned long supp_se_hdr_pcf;
|
||||
unsigned long conf_dump_storage_state_len;
|
||||
unsigned long conf_dump_finalize_len;
|
||||
unsigned long supp_att_req_hdr_ver;
|
||||
unsigned long supp_att_pflags;
|
||||
};
|
||||
|
||||
extern struct uv_info uv_info;
|
||||
@ -421,6 +426,7 @@ static inline int is_prot_virt_host(void)
|
||||
}
|
||||
|
||||
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
|
||||
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
|
||||
int uv_destroy_owned_page(unsigned long paddr);
|
||||
int uv_convert_from_secure(unsigned long paddr);
|
||||
int uv_convert_owned_from_secure(unsigned long paddr);
|
||||
|
@ -74,6 +74,7 @@ struct kvm_s390_io_adapter_req {
|
||||
#define KVM_S390_VM_CRYPTO 2
|
||||
#define KVM_S390_VM_CPU_MODEL 3
|
||||
#define KVM_S390_VM_MIGRATION 4
|
||||
#define KVM_S390_VM_CPU_TOPOLOGY 5
|
||||
|
||||
/* kvm attributes for mem_ctrl */
|
||||
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
|
||||
|
@ -234,6 +234,32 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
|
||||
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* should_export_before_import - Determine whether an export is needed
|
||||
* before an import-like operation
|
||||
* @uvcb: the Ultravisor control block of the UVC to be performed
|
||||
* @mm: the mm of the process
|
||||
*
|
||||
* Returns whether an export is needed before every import-like operation.
|
||||
* This is needed for shared pages, which don't trigger a secure storage
|
||||
* exception when accessed from a different guest.
|
||||
*
|
||||
* Although considered as one, the Unpin Page UVC is not an actual import,
|
||||
* so it is not affected.
|
||||
*
|
||||
* No export is needed also when there is only one protected VM, because the
|
||||
* page cannot belong to the wrong VM in that case (there is no "other VM"
|
||||
* it can belong to).
|
||||
*
|
||||
* Return: true if an export is needed before every import, otherwise false.
|
||||
*/
|
||||
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
|
||||
{
|
||||
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
|
||||
return false;
|
||||
return atomic_read(&mm->context.protected_count) > 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Requests the Ultravisor to make a page accessible to a guest.
|
||||
* If it's brought in the first time, it will be cleared. If
|
||||
@ -277,6 +303,8 @@ again:
|
||||
|
||||
lock_page(page);
|
||||
ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
|
||||
if (should_export_before_import(uvcb, gmap->mm))
|
||||
uv_convert_from_secure(page_to_phys(page));
|
||||
rc = make_secure_pte(ptep, uaddr, page, uvcb);
|
||||
pte_unmap_unlock(ptep, ptelock);
|
||||
unlock_page(page);
|
||||
@ -334,6 +362,61 @@ int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
|
||||
|
||||
/**
|
||||
* gmap_destroy_page - Destroy a guest page.
|
||||
* @gmap: the gmap of the guest
|
||||
* @gaddr: the guest address to destroy
|
||||
*
|
||||
* An attempt will be made to destroy the given guest page. If the attempt
|
||||
* fails, an attempt is made to export the page. If both attempts fail, an
|
||||
* appropriate error is returned.
|
||||
*/
|
||||
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long uaddr;
|
||||
struct page *page;
|
||||
int rc;
|
||||
|
||||
rc = -EFAULT;
|
||||
mmap_read_lock(gmap->mm);
|
||||
|
||||
uaddr = __gmap_translate(gmap, gaddr);
|
||||
if (IS_ERR_VALUE(uaddr))
|
||||
goto out;
|
||||
vma = vma_lookup(gmap->mm, uaddr);
|
||||
if (!vma)
|
||||
goto out;
|
||||
/*
|
||||
* Huge pages should not be able to become secure
|
||||
*/
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
goto out;
|
||||
|
||||
rc = 0;
|
||||
/* we take an extra reference here */
|
||||
page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
|
||||
if (IS_ERR_OR_NULL(page))
|
||||
goto out;
|
||||
rc = uv_destroy_owned_page(page_to_phys(page));
|
||||
/*
|
||||
* Fault handlers can race; it is possible that two CPUs will fault
|
||||
* on the same secure page. One CPU can destroy the page, reboot,
|
||||
* re-enter secure mode and import it, while the second CPU was
|
||||
* stuck at the beginning of the handler. At some point the second
|
||||
* CPU will be able to progress, and it will not be able to destroy
|
||||
* the page. In that case we do not want to terminate the process,
|
||||
* we instead try to export the page.
|
||||
*/
|
||||
if (rc)
|
||||
rc = uv_convert_owned_from_secure(page_to_phys(page));
|
||||
put_page(page);
|
||||
out:
|
||||
mmap_read_unlock(gmap->mm);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_destroy_page);
|
||||
|
||||
/*
|
||||
* To be called with the page locked or with an extra reference! This will
|
||||
* prevent gmap_make_secure from touching the page concurrently. Having 2
|
||||
@ -479,6 +562,24 @@ static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
|
||||
static struct kobj_attribute uv_query_max_guest_addr_attr =
|
||||
__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
|
||||
|
||||
static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_req_hdr_ver);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
|
||||
__ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
|
||||
|
||||
static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_pflags);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_supp_att_pflags_attr =
|
||||
__ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
|
||||
|
||||
static struct attribute *uv_query_attrs[] = {
|
||||
&uv_query_facilities_attr.attr,
|
||||
&uv_query_feature_indications_attr.attr,
|
||||
@ -490,6 +591,8 @@ static struct attribute *uv_query_attrs[] = {
|
||||
&uv_query_dump_storage_state_len_attr.attr,
|
||||
&uv_query_dump_finalize_len_attr.attr,
|
||||
&uv_query_dump_cpu_len_attr.attr,
|
||||
&uv_query_supp_att_req_hdr_ver_attr.attr,
|
||||
&uv_query_supp_att_pflags_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -34,6 +34,7 @@ config KVM
|
||||
select SRCU
|
||||
select KVM_VFIO
|
||||
select INTERVAL_TREE
|
||||
select MMU_NOTIFIER
|
||||
help
|
||||
Support hosting paravirtualized guest machines using the SIE
|
||||
virtualization capability on the mainframe. This should work
|
||||
|
@ -10,4 +10,5 @@ ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
|
||||
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
|
||||
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
|
||||
|
||||
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
|
||||
obj-$(CONFIG_KVM) += kvm.o
|
||||
|
@ -262,77 +262,77 @@ struct aste {
|
||||
/* .. more fields there */
|
||||
};
|
||||
|
||||
int ipte_lock_held(struct kvm_vcpu *vcpu)
|
||||
int ipte_lock_held(struct kvm *kvm)
|
||||
{
|
||||
if (vcpu->arch.sie_block->eca & ECA_SII) {
|
||||
if (sclp.has_siif) {
|
||||
int rc;
|
||||
|
||||
read_lock(&vcpu->kvm->arch.sca_lock);
|
||||
rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
return rc;
|
||||
}
|
||||
return vcpu->kvm->arch.ipte_lock_count != 0;
|
||||
return kvm->arch.ipte_lock_count != 0;
|
||||
}
|
||||
|
||||
static void ipte_lock_simple(struct kvm_vcpu *vcpu)
|
||||
static void ipte_lock_simple(struct kvm *kvm)
|
||||
{
|
||||
union ipte_control old, new, *ic;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.ipte_mutex);
|
||||
vcpu->kvm->arch.ipte_lock_count++;
|
||||
if (vcpu->kvm->arch.ipte_lock_count > 1)
|
||||
mutex_lock(&kvm->arch.ipte_mutex);
|
||||
kvm->arch.ipte_lock_count++;
|
||||
if (kvm->arch.ipte_lock_count > 1)
|
||||
goto out;
|
||||
retry:
|
||||
read_lock(&vcpu->kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(vcpu->kvm);
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(kvm);
|
||||
do {
|
||||
old = READ_ONCE(*ic);
|
||||
if (old.k) {
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
}
|
||||
new = old;
|
||||
new.k = 1;
|
||||
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
out:
|
||||
mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
|
||||
mutex_unlock(&kvm->arch.ipte_mutex);
|
||||
}
|
||||
|
||||
static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
|
||||
static void ipte_unlock_simple(struct kvm *kvm)
|
||||
{
|
||||
union ipte_control old, new, *ic;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.ipte_mutex);
|
||||
vcpu->kvm->arch.ipte_lock_count--;
|
||||
if (vcpu->kvm->arch.ipte_lock_count)
|
||||
mutex_lock(&kvm->arch.ipte_mutex);
|
||||
kvm->arch.ipte_lock_count--;
|
||||
if (kvm->arch.ipte_lock_count)
|
||||
goto out;
|
||||
read_lock(&vcpu->kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(vcpu->kvm);
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(kvm);
|
||||
do {
|
||||
old = READ_ONCE(*ic);
|
||||
new = old;
|
||||
new.k = 0;
|
||||
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
wake_up(&vcpu->kvm->arch.ipte_wq);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
wake_up(&kvm->arch.ipte_wq);
|
||||
out:
|
||||
mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
|
||||
mutex_unlock(&kvm->arch.ipte_mutex);
|
||||
}
|
||||
|
||||
static void ipte_lock_siif(struct kvm_vcpu *vcpu)
|
||||
static void ipte_lock_siif(struct kvm *kvm)
|
||||
{
|
||||
union ipte_control old, new, *ic;
|
||||
|
||||
retry:
|
||||
read_lock(&vcpu->kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(vcpu->kvm);
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(kvm);
|
||||
do {
|
||||
old = READ_ONCE(*ic);
|
||||
if (old.kg) {
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
}
|
||||
@ -340,15 +340,15 @@ retry:
|
||||
new.k = 1;
|
||||
new.kh++;
|
||||
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
}
|
||||
|
||||
static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
|
||||
static void ipte_unlock_siif(struct kvm *kvm)
|
||||
{
|
||||
union ipte_control old, new, *ic;
|
||||
|
||||
read_lock(&vcpu->kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(vcpu->kvm);
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(kvm);
|
||||
do {
|
||||
old = READ_ONCE(*ic);
|
||||
new = old;
|
||||
@ -356,25 +356,25 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
|
||||
if (!new.kh)
|
||||
new.k = 0;
|
||||
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
if (!new.kh)
|
||||
wake_up(&vcpu->kvm->arch.ipte_wq);
|
||||
wake_up(&kvm->arch.ipte_wq);
|
||||
}
|
||||
|
||||
void ipte_lock(struct kvm_vcpu *vcpu)
|
||||
void ipte_lock(struct kvm *kvm)
|
||||
{
|
||||
if (vcpu->arch.sie_block->eca & ECA_SII)
|
||||
ipte_lock_siif(vcpu);
|
||||
if (sclp.has_siif)
|
||||
ipte_lock_siif(kvm);
|
||||
else
|
||||
ipte_lock_simple(vcpu);
|
||||
ipte_lock_simple(kvm);
|
||||
}
|
||||
|
||||
void ipte_unlock(struct kvm_vcpu *vcpu)
|
||||
void ipte_unlock(struct kvm *kvm)
|
||||
{
|
||||
if (vcpu->arch.sie_block->eca & ECA_SII)
|
||||
ipte_unlock_siif(vcpu);
|
||||
if (sclp.has_siif)
|
||||
ipte_unlock_siif(kvm);
|
||||
else
|
||||
ipte_unlock_simple(vcpu);
|
||||
ipte_unlock_simple(kvm);
|
||||
}
|
||||
|
||||
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
|
||||
@ -1086,7 +1086,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
|
||||
try_storage_prot_override = storage_prot_override_applicable(vcpu);
|
||||
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
|
||||
if (need_ipte_lock)
|
||||
ipte_lock(vcpu);
|
||||
ipte_lock(vcpu->kvm);
|
||||
/*
|
||||
* Since we do the access further down ultimately via a move instruction
|
||||
* that does key checking and returns an error in case of a protection
|
||||
@ -1127,7 +1127,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
|
||||
}
|
||||
out_unlock:
|
||||
if (need_ipte_lock)
|
||||
ipte_unlock(vcpu);
|
||||
ipte_unlock(vcpu->kvm);
|
||||
if (nr_pages > ARRAY_SIZE(gpa_array))
|
||||
vfree(gpas);
|
||||
return rc;
|
||||
@ -1199,10 +1199,10 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
|
||||
rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
|
||||
if (rc)
|
||||
return rc;
|
||||
ipte_lock(vcpu);
|
||||
ipte_lock(vcpu->kvm);
|
||||
rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
|
||||
access_key);
|
||||
ipte_unlock(vcpu);
|
||||
ipte_unlock(vcpu->kvm);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -1465,7 +1465,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
|
||||
* tables/pointers we read stay valid - unshadowing is however
|
||||
* always possible - only guest_table_lock protects us.
|
||||
*/
|
||||
ipte_lock(vcpu);
|
||||
ipte_lock(vcpu->kvm);
|
||||
|
||||
rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
|
||||
if (rc)
|
||||
@ -1499,7 +1499,7 @@ shadow_page:
|
||||
pte.p |= dat_protection;
|
||||
if (!rc)
|
||||
rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
|
||||
ipte_unlock(vcpu);
|
||||
ipte_unlock(vcpu->kvm);
|
||||
mmap_read_unlock(sg->mm);
|
||||
return rc;
|
||||
}
|
||||
|
@ -440,9 +440,9 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
|
||||
return access_guest_real(vcpu, gra, data, len, 0);
|
||||
}
|
||||
|
||||
void ipte_lock(struct kvm_vcpu *vcpu);
|
||||
void ipte_unlock(struct kvm_vcpu *vcpu);
|
||||
int ipte_lock_held(struct kvm_vcpu *vcpu);
|
||||
void ipte_lock(struct kvm *kvm);
|
||||
void ipte_unlock(struct kvm *kvm);
|
||||
int ipte_lock_held(struct kvm *kvm);
|
||||
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
|
||||
|
||||
/* MVPG PEI indication bits */
|
||||
|
@ -528,12 +528,27 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int handle_pv_notification(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (vcpu->arch.sie_block->ipa == 0xb210)
|
||||
return handle_pv_spx(vcpu);
|
||||
if (vcpu->arch.sie_block->ipa == 0xb220)
|
||||
return handle_pv_sclp(vcpu);
|
||||
if (vcpu->arch.sie_block->ipa == 0xb9a4)
|
||||
return handle_pv_uvc(vcpu);
|
||||
if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
|
||||
/*
|
||||
* Besides external call, other SIGP orders also cause a
|
||||
* 108 (pv notify) intercept. In contrast to external call,
|
||||
* these orders need to be emulated and hence the appropriate
|
||||
* place to handle them is in handle_instruction().
|
||||
* So first try kvm_s390_handle_sigp_pei() and if that isn't
|
||||
* successful, go on with handle_instruction().
|
||||
*/
|
||||
ret = kvm_s390_handle_sigp_pei(vcpu);
|
||||
if (!ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return handle_instruction(vcpu);
|
||||
}
|
||||
|
@ -28,9 +28,11 @@
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/airq.h>
|
||||
#include <asm/tpi.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
#include "trace-s390.h"
|
||||
#include "pci.h"
|
||||
|
||||
#define PFAULT_INIT 0x0600
|
||||
#define PFAULT_DONE 0x0680
|
||||
@ -702,7 +704,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
|
||||
/*
|
||||
* We indicate floating repressible conditions along with
|
||||
* other pending conditions. Channel Report Pending and Channel
|
||||
* Subsystem damage are the only two and and are indicated by
|
||||
* Subsystem damage are the only two and are indicated by
|
||||
* bits in mcic and masked in cr14.
|
||||
*/
|
||||
if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
|
||||
@ -3311,10 +3313,87 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
|
||||
|
||||
static void gib_alert_irq_handler(struct airq_struct *airq, bool floating)
|
||||
static void aen_host_forward(unsigned long si)
|
||||
{
|
||||
struct kvm_s390_gisa_interrupt *gi;
|
||||
struct zpci_gaite *gaite;
|
||||
struct kvm *kvm;
|
||||
|
||||
gaite = (struct zpci_gaite *)aift->gait +
|
||||
(si * sizeof(struct zpci_gaite));
|
||||
if (gaite->count == 0)
|
||||
return;
|
||||
if (gaite->aisb != 0)
|
||||
set_bit_inv(gaite->aisbo, (unsigned long *)gaite->aisb);
|
||||
|
||||
kvm = kvm_s390_pci_si_to_kvm(aift, si);
|
||||
if (!kvm)
|
||||
return;
|
||||
gi = &kvm->arch.gisa_int;
|
||||
|
||||
if (!(gi->origin->g1.simm & AIS_MODE_MASK(gaite->gisc)) ||
|
||||
!(gi->origin->g1.nimm & AIS_MODE_MASK(gaite->gisc))) {
|
||||
gisa_set_ipm_gisc(gi->origin, gaite->gisc);
|
||||
if (hrtimer_active(&gi->timer))
|
||||
hrtimer_cancel(&gi->timer);
|
||||
hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
|
||||
kvm->stat.aen_forward++;
|
||||
}
|
||||
}
|
||||
|
||||
static void aen_process_gait(u8 isc)
|
||||
{
|
||||
bool found = false, first = true;
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
unsigned long si, flags;
|
||||
|
||||
spin_lock_irqsave(&aift->gait_lock, flags);
|
||||
|
||||
if (!aift->gait) {
|
||||
spin_unlock_irqrestore(&aift->gait_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
for (si = 0;;) {
|
||||
/* Scan adapter summary indicator bit vector */
|
||||
si = airq_iv_scan(aift->sbv, si, airq_iv_end(aift->sbv));
|
||||
if (si == -1UL) {
|
||||
if (first || found) {
|
||||
/* Re-enable interrupts. */
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, isc,
|
||||
&iib);
|
||||
first = found = false;
|
||||
} else {
|
||||
/* Interrupts on and all bits processed */
|
||||
break;
|
||||
}
|
||||
found = false;
|
||||
si = 0;
|
||||
/* Scan again after re-enabling interrupts */
|
||||
continue;
|
||||
}
|
||||
found = true;
|
||||
aen_host_forward(si);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&aift->gait_lock, flags);
|
||||
}
|
||||
|
||||
static void gib_alert_irq_handler(struct airq_struct *airq,
|
||||
struct tpi_info *tpi_info)
|
||||
{
|
||||
struct tpi_adapter_info *info = (struct tpi_adapter_info *)tpi_info;
|
||||
|
||||
inc_irq_stat(IRQIO_GAL);
|
||||
process_gib_alert_list();
|
||||
|
||||
if ((info->forward || info->error) &&
|
||||
IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
|
||||
aen_process_gait(info->isc);
|
||||
if (info->aism != 0)
|
||||
process_gib_alert_list();
|
||||
} else {
|
||||
process_gib_alert_list();
|
||||
}
|
||||
}
|
||||
|
||||
static struct airq_struct gib_alert_irq = {
|
||||
@ -3326,6 +3405,11 @@ void kvm_s390_gib_destroy(void)
|
||||
{
|
||||
if (!gib)
|
||||
return;
|
||||
if (kvm_s390_pci_interp_allowed() && aift) {
|
||||
mutex_lock(&aift->aift_lock);
|
||||
kvm_s390_pci_aen_exit();
|
||||
mutex_unlock(&aift->aift_lock);
|
||||
}
|
||||
chsc_sgib(0);
|
||||
unregister_adapter_interrupt(&gib_alert_irq);
|
||||
free_page((unsigned long)gib);
|
||||
@ -3363,6 +3447,14 @@ int kvm_s390_gib_init(u8 nisc)
|
||||
goto out_unreg_gal;
|
||||
}
|
||||
|
||||
if (kvm_s390_pci_interp_allowed()) {
|
||||
if (kvm_s390_pci_aen_init(nisc)) {
|
||||
pr_err("Initializing AEN for PCI failed\n");
|
||||
rc = -EIO;
|
||||
goto out_unreg_gal;
|
||||
}
|
||||
}
|
||||
|
||||
KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
|
||||
goto out;
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/lowcore.h>
|
||||
@ -47,6 +48,7 @@
|
||||
#include <asm/fpu/api.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
#include "pci.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
@ -63,7 +65,8 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||
STATS_DESC_COUNTER(VM, inject_float_mchk),
|
||||
STATS_DESC_COUNTER(VM, inject_pfault_done),
|
||||
STATS_DESC_COUNTER(VM, inject_service_signal),
|
||||
STATS_DESC_COUNTER(VM, inject_virtio)
|
||||
STATS_DESC_COUNTER(VM, inject_virtio),
|
||||
STATS_DESC_COUNTER(VM, aen_forward)
|
||||
};
|
||||
|
||||
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||
@ -502,6 +505,14 @@ int kvm_arch_init(void *opaque)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kvm_s390_pci_interp_allowed()) {
|
||||
rc = kvm_s390_pci_init();
|
||||
if (rc) {
|
||||
pr_err("Unable to allocate AIFT for PCI\n");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
rc = kvm_s390_gib_init(GAL_ISC);
|
||||
if (rc)
|
||||
goto out;
|
||||
@ -516,6 +527,8 @@ out:
|
||||
void kvm_arch_exit(void)
|
||||
{
|
||||
kvm_s390_gib_destroy();
|
||||
if (kvm_s390_pci_interp_allowed())
|
||||
kvm_s390_pci_exit();
|
||||
debug_unregister(kvm_s390_dbf);
|
||||
debug_unregister(kvm_s390_dbf_uv);
|
||||
}
|
||||
@ -626,6 +639,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KVM_CAP_S390_ZPCI_OP:
|
||||
r = kvm_s390_pci_interp_allowed();
|
||||
break;
|
||||
case KVM_CAP_S390_CPU_TOPOLOGY:
|
||||
r = test_facility(11);
|
||||
break;
|
||||
default:
|
||||
r = 0;
|
||||
}
|
||||
@ -837,6 +856,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
||||
icpt_operexc_on_all_vcpus(kvm);
|
||||
r = 0;
|
||||
break;
|
||||
case KVM_CAP_S390_CPU_TOPOLOGY:
|
||||
r = -EINVAL;
|
||||
mutex_lock(&kvm->lock);
|
||||
if (kvm->created_vcpus) {
|
||||
r = -EBUSY;
|
||||
} else if (test_facility(11)) {
|
||||
set_kvm_facility(kvm->arch.model.fac_mask, 11);
|
||||
set_kvm_facility(kvm->arch.model.fac_list, 11);
|
||||
r = 0;
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
|
||||
r ? "(not available)" : "(success)");
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
@ -1039,6 +1072,42 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Only set the ECB bits after guest requests zPCI interpretation */
|
||||
if (!vcpu->kvm->arch.use_zpci_interp)
|
||||
return;
|
||||
|
||||
vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
|
||||
vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
|
||||
}
|
||||
|
||||
void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
unsigned long i;
|
||||
|
||||
lockdep_assert_held(&kvm->lock);
|
||||
|
||||
if (!kvm_s390_pci_interp_allowed())
|
||||
return;
|
||||
|
||||
/*
|
||||
* If host is configured for PCI and the necessary facilities are
|
||||
* available, turn on interpretation for the life of this guest
|
||||
*/
|
||||
kvm->arch.use_zpci_interp = 1;
|
||||
|
||||
kvm_s390_vcpu_block_all(kvm);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
kvm_s390_vcpu_pci_setup(vcpu);
|
||||
kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
|
||||
}
|
||||
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
}
|
||||
|
||||
static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
|
||||
{
|
||||
unsigned long cx;
|
||||
@ -1711,6 +1780,57 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_update_topology_change_report - update CPU topology change report
|
||||
* @kvm: guest KVM description
|
||||
* @val: set or clear the MTCR bit
|
||||
*
|
||||
* Updates the Multiprocessor Topology-Change-Report bit to signal
|
||||
* the guest with a topology change.
|
||||
* This is only relevant if the topology facility is present.
|
||||
*
|
||||
* The SCA version, bsca or esca, doesn't matter as offset is the same.
|
||||
*/
|
||||
static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
|
||||
{
|
||||
union sca_utility new, old;
|
||||
struct bsca_block *sca;
|
||||
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
sca = kvm->arch.sca;
|
||||
do {
|
||||
old = READ_ONCE(sca->utility);
|
||||
new = old;
|
||||
new.mtcr = val;
|
||||
} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
}
|
||||
|
||||
static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
if (!test_kvm_facility(kvm, 11))
|
||||
return -ENXIO;
|
||||
|
||||
kvm_s390_update_topology_change_report(kvm, !!attr->attr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
u8 topo;
|
||||
|
||||
if (!test_kvm_facility(kvm, 11))
|
||||
return -ENXIO;
|
||||
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
|
||||
return put_user(topo, (u8 __user *)attr->addr);
|
||||
}
|
||||
|
||||
static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
@ -1731,6 +1851,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = kvm_s390_vm_set_migration(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CPU_TOPOLOGY:
|
||||
ret = kvm_s390_set_topo_change_indication(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@ -1756,6 +1879,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = kvm_s390_vm_get_migration(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CPU_TOPOLOGY:
|
||||
ret = kvm_s390_get_topo_change_indication(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@ -1829,6 +1955,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = 0;
|
||||
break;
|
||||
case KVM_S390_VM_CPU_TOPOLOGY:
|
||||
ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@ -2186,12 +2315,25 @@ out:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
|
||||
/**
|
||||
* kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
|
||||
* non protected.
|
||||
* @kvm: the VM whose protected vCPUs are to be converted
|
||||
* @rc: return value for the RC field of the UVC (in case of error)
|
||||
* @rrc: return value for the RRC field of the UVC (in case of error)
|
||||
*
|
||||
* Does not stop in case of error, tries to convert as many
|
||||
* CPUs as possible. In case of error, the RC and RRC of the last error are
|
||||
* returned.
|
||||
*
|
||||
* Return: 0 in case of success, otherwise -EIO
|
||||
*/
|
||||
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
u16 rc, rrc;
|
||||
int ret = 0;
|
||||
unsigned long i;
|
||||
u16 _rc, _rrc;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* We ignore failures and try to destroy as many CPUs as possible.
|
||||
@ -2203,9 +2345,9 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
|
||||
*/
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
mutex_lock(&vcpu->mutex);
|
||||
if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
|
||||
*rcp = rc;
|
||||
*rrcp = rrc;
|
||||
if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
|
||||
*rc = _rc;
|
||||
*rrc = _rrc;
|
||||
ret = -EIO;
|
||||
}
|
||||
mutex_unlock(&vcpu->mutex);
|
||||
@ -2216,6 +2358,17 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
|
||||
* to protected.
|
||||
* @kvm: the VM whose protected vCPUs are to be converted
|
||||
* @rc: return value for the RC field of the UVC (in case of error)
|
||||
* @rrc: return value for the RRC field of the UVC (in case of error)
|
||||
*
|
||||
* Tries to undo the conversion in case of error.
|
||||
*
|
||||
* Return: 0 in case of success, otherwise -EIO
|
||||
*/
|
||||
static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
{
|
||||
unsigned long i;
|
||||
@ -2772,6 +2925,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
case KVM_S390_ZPCI_OP: {
|
||||
struct kvm_s390_zpci_op args;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
|
||||
break;
|
||||
if (copy_from_user(&args, argp, sizeof(args))) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
r = kvm_s390_pci_zpci_op(kvm, &args);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
@ -2933,6 +3099,14 @@ static void sca_dispose(struct kvm *kvm)
|
||||
kvm->arch.sca = NULL;
|
||||
}
|
||||
|
||||
void kvm_arch_free_vm(struct kvm *kvm)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
|
||||
kvm_s390_pci_clear_list(kvm);
|
||||
|
||||
__kvm_arch_free_vm(kvm);
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
|
||||
@ -3015,6 +3189,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
||||
kvm_s390_crypto_init(kvm);
|
||||
|
||||
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
|
||||
mutex_lock(&kvm->lock);
|
||||
kvm_s390_pci_init_list(kvm);
|
||||
kvm_s390_vcpu_pci_enable_interp(kvm);
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
mutex_init(&kvm->arch.float_int.ais_lock);
|
||||
spin_lock_init(&kvm->arch.float_int.lock);
|
||||
for (i = 0; i < FIRQ_LIST_COUNT; i++)
|
||||
@ -3068,6 +3249,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
if (!kvm_is_ucontrol(vcpu->kvm))
|
||||
sca_del_vcpu(vcpu);
|
||||
kvm_s390_update_topology_change_report(vcpu->kvm, 1);
|
||||
|
||||
if (kvm_is_ucontrol(vcpu->kvm))
|
||||
gmap_remove(vcpu->arch.gmap);
|
||||
@ -3095,6 +3277,15 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
*/
|
||||
if (kvm_s390_pv_get_handle(kvm))
|
||||
kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
|
||||
/*
|
||||
* Remove the mmu notifier only when the whole KVM VM is torn down,
|
||||
* and only if one was registered to begin with. If the VM is
|
||||
* currently not protected, but has been previously been protected,
|
||||
* then it's possible that the notifier is still registered.
|
||||
*/
|
||||
if (kvm->arch.pv.mmu_notifier.ops)
|
||||
mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
|
||||
|
||||
debug_unregister(kvm->arch.dbf);
|
||||
free_page((unsigned long)kvm->arch.sie_page2);
|
||||
if (!kvm_is_ucontrol(kvm))
|
||||
@ -3461,6 +3652,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
|
||||
if (test_kvm_facility(vcpu->kvm, 9))
|
||||
vcpu->arch.sie_block->ecb |= ECB_SRSI;
|
||||
if (test_kvm_facility(vcpu->kvm, 11))
|
||||
vcpu->arch.sie_block->ecb |= ECB_PTF;
|
||||
if (test_kvm_facility(vcpu->kvm, 73))
|
||||
vcpu->arch.sie_block->ecb |= ECB_TE;
|
||||
if (!kvm_is_ucontrol(vcpu->kvm))
|
||||
@ -3513,6 +3706,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_s390_vcpu_crypto_setup(vcpu);
|
||||
|
||||
kvm_s390_vcpu_pci_setup(vcpu);
|
||||
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
if (kvm_s390_pv_is_protected(vcpu->kvm)) {
|
||||
rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
|
||||
@ -3592,6 +3787,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
rc = kvm_s390_vcpu_setup(vcpu);
|
||||
if (rc)
|
||||
goto out_ucontrol_uninit;
|
||||
|
||||
kvm_s390_update_topology_change_report(vcpu->kvm, 1);
|
||||
return 0;
|
||||
|
||||
out_ucontrol_uninit:
|
||||
|
@ -379,6 +379,7 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
|
||||
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
|
||||
|
||||
/* implemented in diag.c */
|
||||
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
|
||||
@ -512,6 +513,16 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
|
||||
*/
|
||||
void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm);
|
||||
|
||||
/**
|
||||
* kvm_s390_vcpu_pci_enable_interp
|
||||
*
|
||||
* Set the associated PCI attributes for each vcpu to allow for zPCI Load/Store
|
||||
* interpretation as well as adapter interruption forwarding.
|
||||
*
|
||||
* @kvm: the KVM guest
|
||||
*/
|
||||
void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm);
|
||||
|
||||
/**
|
||||
* diag9c_forwarding_hz
|
||||
*
|
||||
|
690
arch/s390/kvm/pci.c
Normal file
690
arch/s390/kvm/pci.c
Normal file
@ -0,0 +1,690 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* s390 kvm PCI passthrough support
|
||||
*
|
||||
* Copyright IBM Corp. 2022
|
||||
*
|
||||
* Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/pci.h>
|
||||
#include <asm/pci.h>
|
||||
#include <asm/pci_insn.h>
|
||||
#include <asm/pci_io.h>
|
||||
#include <asm/sclp.h>
|
||||
#include "pci.h"
|
||||
#include "kvm-s390.h"
|
||||
|
||||
struct zpci_aift *aift;
|
||||
|
||||
static inline int __set_irq_noiib(u16 ctl, u8 isc)
|
||||
{
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
|
||||
return zpci_set_irq_ctrl(ctl, isc, &iib);
|
||||
}
|
||||
|
||||
void kvm_s390_pci_aen_exit(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kvm_zdev **gait_kzdev;
|
||||
|
||||
lockdep_assert_held(&aift->aift_lock);
|
||||
|
||||
/*
|
||||
* Contents of the aipb remain registered for the life of the host
|
||||
* kernel, the information preserved in zpci_aipb and zpci_aif_sbv
|
||||
* in case we insert the KVM module again later. Clear the AIFT
|
||||
* information and free anything not registered with underlying
|
||||
* firmware.
|
||||
*/
|
||||
spin_lock_irqsave(&aift->gait_lock, flags);
|
||||
gait_kzdev = aift->kzdev;
|
||||
aift->gait = NULL;
|
||||
aift->sbv = NULL;
|
||||
aift->kzdev = NULL;
|
||||
spin_unlock_irqrestore(&aift->gait_lock, flags);
|
||||
|
||||
kfree(gait_kzdev);
|
||||
}
|
||||
|
||||
static int zpci_setup_aipb(u8 nisc)
|
||||
{
|
||||
struct page *page;
|
||||
int size, rc;
|
||||
|
||||
zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL);
|
||||
if (!zpci_aipb)
|
||||
return -ENOMEM;
|
||||
|
||||
aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, 0);
|
||||
if (!aift->sbv) {
|
||||
rc = -ENOMEM;
|
||||
goto free_aipb;
|
||||
}
|
||||
zpci_aif_sbv = aift->sbv;
|
||||
size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES *
|
||||
sizeof(struct zpci_gaite)));
|
||||
page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size);
|
||||
if (!page) {
|
||||
rc = -ENOMEM;
|
||||
goto free_sbv;
|
||||
}
|
||||
aift->gait = (struct zpci_gaite *)page_to_phys(page);
|
||||
|
||||
zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
|
||||
zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
|
||||
zpci_aipb->aipb.afi = nisc;
|
||||
zpci_aipb->aipb.faal = ZPCI_NR_DEVICES;
|
||||
|
||||
/* Setup Adapter Event Notification Interpretation */
|
||||
if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) {
|
||||
rc = -EIO;
|
||||
goto free_gait;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
free_gait:
|
||||
free_pages((unsigned long)aift->gait, size);
|
||||
free_sbv:
|
||||
airq_iv_release(aift->sbv);
|
||||
zpci_aif_sbv = NULL;
|
||||
free_aipb:
|
||||
kfree(zpci_aipb);
|
||||
zpci_aipb = NULL;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int zpci_reset_aipb(u8 nisc)
|
||||
{
|
||||
/*
|
||||
* AEN registration can only happen once per system boot. If
|
||||
* an aipb already exists then AEN was already registered and
|
||||
* we can re-use the aipb contents. This can only happen if
|
||||
* the KVM module was removed and re-inserted. However, we must
|
||||
* ensure that the same forwarding ISC is used as this is assigned
|
||||
* during KVM module load.
|
||||
*/
|
||||
if (zpci_aipb->aipb.afi != nisc)
|
||||
return -EINVAL;
|
||||
|
||||
aift->sbv = zpci_aif_sbv;
|
||||
aift->gait = (struct zpci_gaite *)zpci_aipb->aipb.gait;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_s390_pci_aen_init(u8 nisc)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
/* If already enabled for AEN, bail out now */
|
||||
if (aift->gait || aift->sbv)
|
||||
return -EPERM;
|
||||
|
||||
mutex_lock(&aift->aift_lock);
|
||||
aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev),
|
||||
GFP_KERNEL);
|
||||
if (!aift->kzdev) {
|
||||
rc = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (!zpci_aipb)
|
||||
rc = zpci_setup_aipb(nisc);
|
||||
else
|
||||
rc = zpci_reset_aipb(nisc);
|
||||
if (rc)
|
||||
goto free_zdev;
|
||||
|
||||
/* Enable floating IRQs */
|
||||
if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) {
|
||||
rc = -EIO;
|
||||
kvm_s390_pci_aen_exit();
|
||||
}
|
||||
|
||||
goto unlock;
|
||||
|
||||
free_zdev:
|
||||
kfree(aift->kzdev);
|
||||
unlock:
|
||||
mutex_unlock(&aift->aift_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Modify PCI: Register floating adapter interruption forwarding */
|
||||
static int kvm_zpci_set_airq(struct zpci_dev *zdev)
|
||||
{
|
||||
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
|
||||
struct zpci_fib fib = {};
|
||||
u8 status;
|
||||
|
||||
fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
|
||||
fib.fmt0.sum = 1; /* enable summary notifications */
|
||||
fib.fmt0.noi = airq_iv_end(zdev->aibv);
|
||||
fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
|
||||
fib.fmt0.aibvo = 0;
|
||||
fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
|
||||
fib.fmt0.aisbo = zdev->aisb & 63;
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
|
||||
}
|
||||
|
||||
/* Modify PCI: Unregister floating adapter interruption forwarding */
|
||||
static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
|
||||
{
|
||||
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
|
||||
struct zpci_fib fib = {};
|
||||
u8 cc, status;
|
||||
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc == 3 || (cc == 1 && status == 24))
|
||||
/* Function already gone or IRQs already deregistered. */
|
||||
cc = 0;
|
||||
|
||||
return cc ? -EIO : 0;
|
||||
}
|
||||
|
||||
static inline void unaccount_mem(unsigned long nr_pages)
|
||||
{
|
||||
struct user_struct *user = get_uid(current_user());
|
||||
|
||||
if (user)
|
||||
atomic_long_sub(nr_pages, &user->locked_vm);
|
||||
if (current->mm)
|
||||
atomic64_sub(nr_pages, ¤t->mm->pinned_vm);
|
||||
}
|
||||
|
||||
static inline int account_mem(unsigned long nr_pages)
|
||||
{
|
||||
struct user_struct *user = get_uid(current_user());
|
||||
unsigned long page_limit, cur_pages, new_pages;
|
||||
|
||||
page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||
|
||||
do {
|
||||
cur_pages = atomic_long_read(&user->locked_vm);
|
||||
new_pages = cur_pages + nr_pages;
|
||||
if (new_pages > page_limit)
|
||||
return -ENOMEM;
|
||||
} while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
|
||||
new_pages) != cur_pages);
|
||||
|
||||
atomic64_add(nr_pages, ¤t->mm->pinned_vm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
|
||||
bool assist)
|
||||
{
|
||||
struct page *pages[1], *aibv_page, *aisb_page = NULL;
|
||||
unsigned int msi_vecs, idx;
|
||||
struct zpci_gaite *gaite;
|
||||
unsigned long hva, bit;
|
||||
struct kvm *kvm;
|
||||
phys_addr_t gaddr;
|
||||
int rc = 0, gisc, npages, pcount = 0;
|
||||
|
||||
/*
|
||||
* Interrupt forwarding is only applicable if the device is already
|
||||
* enabled for interpretation
|
||||
*/
|
||||
if (zdev->gisa == 0)
|
||||
return -EINVAL;
|
||||
|
||||
kvm = zdev->kzdev->kvm;
|
||||
msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
|
||||
|
||||
/* Get the associated forwarding ISC - if invalid, return the error */
|
||||
gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
|
||||
if (gisc < 0)
|
||||
return gisc;
|
||||
|
||||
/* Replace AIBV address */
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
|
||||
npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
if (npages < 1) {
|
||||
rc = -EIO;
|
||||
goto out;
|
||||
}
|
||||
aibv_page = pages[0];
|
||||
pcount++;
|
||||
gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
|
||||
fib->fmt0.aibv = gaddr;
|
||||
|
||||
/* Pin the guest AISB if one was specified */
|
||||
if (fib->fmt0.sum == 1) {
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
|
||||
npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM,
|
||||
pages);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
if (npages < 1) {
|
||||
rc = -EIO;
|
||||
goto unpin1;
|
||||
}
|
||||
aisb_page = pages[0];
|
||||
pcount++;
|
||||
}
|
||||
|
||||
/* Account for pinned pages, roll back on failure */
|
||||
if (account_mem(pcount))
|
||||
goto unpin2;
|
||||
|
||||
/* AISB must be allocated before we can fill in GAITE */
|
||||
mutex_lock(&aift->aift_lock);
|
||||
bit = airq_iv_alloc_bit(aift->sbv);
|
||||
if (bit == -1UL)
|
||||
goto unlock;
|
||||
zdev->aisb = bit; /* store the summary bit number */
|
||||
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
|
||||
AIRQ_IV_BITLOCK |
|
||||
AIRQ_IV_GUESTVEC,
|
||||
phys_to_virt(fib->fmt0.aibv));
|
||||
|
||||
spin_lock_irq(&aift->gait_lock);
|
||||
gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
|
||||
sizeof(struct zpci_gaite));
|
||||
|
||||
/* If assist not requested, host will get all alerts */
|
||||
if (assist)
|
||||
gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
|
||||
else
|
||||
gaite->gisa = 0;
|
||||
|
||||
gaite->gisc = fib->fmt0.isc;
|
||||
gaite->count++;
|
||||
gaite->aisbo = fib->fmt0.aisbo;
|
||||
gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
|
||||
~PAGE_MASK));
|
||||
aift->kzdev[zdev->aisb] = zdev->kzdev;
|
||||
spin_unlock_irq(&aift->gait_lock);
|
||||
|
||||
/* Update guest FIB for re-issue */
|
||||
fib->fmt0.aisbo = zdev->aisb & 63;
|
||||
fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
|
||||
fib->fmt0.isc = gisc;
|
||||
|
||||
/* Save some guest fib values in the host for later use */
|
||||
zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
|
||||
zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
|
||||
mutex_unlock(&aift->aift_lock);
|
||||
|
||||
/* Issue the clp to setup the irq now */
|
||||
rc = kvm_zpci_set_airq(zdev);
|
||||
return rc;
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&aift->aift_lock);
|
||||
unpin2:
|
||||
if (fib->fmt0.sum == 1)
|
||||
unpin_user_page(aisb_page);
|
||||
unpin1:
|
||||
unpin_user_page(aibv_page);
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
|
||||
{
|
||||
struct kvm_zdev *kzdev = zdev->kzdev;
|
||||
struct zpci_gaite *gaite;
|
||||
struct page *vpage = NULL, *spage = NULL;
|
||||
int rc, pcount = 0;
|
||||
u8 isc;
|
||||
|
||||
if (zdev->gisa == 0)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&aift->aift_lock);
|
||||
|
||||
/*
|
||||
* If the clear fails due to an error, leave now unless we know this
|
||||
* device is about to go away (force) -- In that case clear the GAITE
|
||||
* regardless.
|
||||
*/
|
||||
rc = kvm_zpci_clear_airq(zdev);
|
||||
if (rc && !force)
|
||||
goto out;
|
||||
|
||||
if (zdev->kzdev->fib.fmt0.aibv == 0)
|
||||
goto out;
|
||||
spin_lock_irq(&aift->gait_lock);
|
||||
gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
|
||||
sizeof(struct zpci_gaite));
|
||||
isc = gaite->gisc;
|
||||
gaite->count--;
|
||||
if (gaite->count == 0) {
|
||||
/* Release guest AIBV and AISB */
|
||||
vpage = phys_to_page(kzdev->fib.fmt0.aibv);
|
||||
if (gaite->aisb != 0)
|
||||
spage = phys_to_page(gaite->aisb);
|
||||
/* Clear the GAIT entry */
|
||||
gaite->aisb = 0;
|
||||
gaite->gisc = 0;
|
||||
gaite->aisbo = 0;
|
||||
gaite->gisa = 0;
|
||||
aift->kzdev[zdev->aisb] = 0;
|
||||
/* Clear zdev info */
|
||||
airq_iv_free_bit(aift->sbv, zdev->aisb);
|
||||
airq_iv_release(zdev->aibv);
|
||||
zdev->aisb = 0;
|
||||
zdev->aibv = NULL;
|
||||
}
|
||||
spin_unlock_irq(&aift->gait_lock);
|
||||
kvm_s390_gisc_unregister(kzdev->kvm, isc);
|
||||
kzdev->fib.fmt0.isc = 0;
|
||||
kzdev->fib.fmt0.aibv = 0;
|
||||
|
||||
if (vpage) {
|
||||
unpin_user_page(vpage);
|
||||
pcount++;
|
||||
}
|
||||
if (spage) {
|
||||
unpin_user_page(spage);
|
||||
pcount++;
|
||||
}
|
||||
if (pcount > 0)
|
||||
unaccount_mem(pcount);
|
||||
out:
|
||||
mutex_unlock(&aift->aift_lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
|
||||
{
|
||||
struct kvm_zdev *kzdev;
|
||||
|
||||
kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
|
||||
if (!kzdev)
|
||||
return -ENOMEM;
|
||||
|
||||
kzdev->zdev = zdev;
|
||||
zdev->kzdev = kzdev;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
|
||||
{
|
||||
struct kvm_zdev *kzdev;
|
||||
|
||||
kzdev = zdev->kzdev;
|
||||
WARN_ON(kzdev->zdev != zdev);
|
||||
zdev->kzdev = NULL;
|
||||
kfree(kzdev);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Register device with the specified KVM. If interpetation facilities are
|
||||
* available, enable them and let userspace indicate whether or not they will
|
||||
* be used (specify SHM bit to disable).
|
||||
*/
|
||||
int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (!zdev)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&zdev->kzdev_lock);
|
||||
|
||||
if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
kvm_get_kvm(kvm);
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
rc = kvm_s390_pci_dev_open(zdev);
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* If interpretation facilities aren't available, add the device to
|
||||
* the kzdev list but don't enable for interpretation.
|
||||
*/
|
||||
if (!kvm_s390_pci_interp_allowed())
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If this is the first request to use an interpreted device, make the
|
||||
* necessary vcpu changes
|
||||
*/
|
||||
if (!kvm->arch.use_zpci_interp)
|
||||
kvm_s390_vcpu_pci_enable_interp(kvm);
|
||||
|
||||
if (zdev_enabled(zdev)) {
|
||||
rc = zpci_disable_device(zdev);
|
||||
if (rc)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Store information about the identity of the kvm guest allowed to
|
||||
* access this device via interpretation to be used by host CLP
|
||||
*/
|
||||
zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
|
||||
|
||||
rc = zpci_enable_device(zdev);
|
||||
if (rc)
|
||||
goto clear_gisa;
|
||||
|
||||
/* Re-register the IOMMU that was already created */
|
||||
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
|
||||
virt_to_phys(zdev->dma_table));
|
||||
if (rc)
|
||||
goto clear_gisa;
|
||||
|
||||
out:
|
||||
zdev->kzdev->kvm = kvm;
|
||||
|
||||
spin_lock(&kvm->arch.kzdev_list_lock);
|
||||
list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
|
||||
spin_unlock(&kvm->arch.kzdev_list_lock);
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
return 0;
|
||||
|
||||
clear_gisa:
|
||||
zdev->gisa = 0;
|
||||
err:
|
||||
if (zdev->kzdev)
|
||||
kvm_s390_pci_dev_release(zdev);
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
kvm_put_kvm(kvm);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm);
|
||||
|
||||
void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev)
|
||||
{
|
||||
struct kvm *kvm;
|
||||
|
||||
if (!zdev)
|
||||
return;
|
||||
|
||||
mutex_lock(&zdev->kzdev_lock);
|
||||
|
||||
if (WARN_ON(!zdev->kzdev)) {
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
kvm = zdev->kzdev->kvm;
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/*
|
||||
* A 0 gisa means interpretation was never enabled, just remove the
|
||||
* device from the list.
|
||||
*/
|
||||
if (zdev->gisa == 0)
|
||||
goto out;
|
||||
|
||||
/* Forwarding must be turned off before interpretation */
|
||||
if (zdev->kzdev->fib.fmt0.aibv != 0)
|
||||
kvm_s390_pci_aif_disable(zdev, true);
|
||||
|
||||
/* Remove the host CLP guest designation */
|
||||
zdev->gisa = 0;
|
||||
|
||||
if (zdev_enabled(zdev)) {
|
||||
if (zpci_disable_device(zdev))
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (zpci_enable_device(zdev))
|
||||
goto out;
|
||||
|
||||
/* Re-register the IOMMU that was already created */
|
||||
zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
|
||||
virt_to_phys(zdev->dma_table));
|
||||
|
||||
out:
|
||||
spin_lock(&kvm->arch.kzdev_list_lock);
|
||||
list_del(&zdev->kzdev->entry);
|
||||
spin_unlock(&kvm->arch.kzdev_list_lock);
|
||||
kvm_s390_pci_dev_release(zdev);
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
|
||||
kvm_put_kvm(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_s390_pci_unregister_kvm);
|
||||
|
||||
void kvm_s390_pci_init_list(struct kvm *kvm)
|
||||
{
|
||||
spin_lock_init(&kvm->arch.kzdev_list_lock);
|
||||
INIT_LIST_HEAD(&kvm->arch.kzdev_list);
|
||||
}
|
||||
|
||||
void kvm_s390_pci_clear_list(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* This list should already be empty, either via vfio device closures
|
||||
* or kvm fd cleanup.
|
||||
*/
|
||||
spin_lock(&kvm->arch.kzdev_list_lock);
|
||||
WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list));
|
||||
spin_unlock(&kvm->arch.kzdev_list_lock);
|
||||
}
|
||||
|
||||
static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
|
||||
{
|
||||
struct zpci_dev *zdev = NULL;
|
||||
struct kvm_zdev *kzdev;
|
||||
|
||||
spin_lock(&kvm->arch.kzdev_list_lock);
|
||||
list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
|
||||
if (kzdev->zdev->fh == fh) {
|
||||
zdev = kzdev->zdev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&kvm->arch.kzdev_list_lock);
|
||||
|
||||
return zdev;
|
||||
}
|
||||
|
||||
static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
|
||||
struct kvm_s390_zpci_op *args)
|
||||
{
|
||||
struct zpci_fib fib = {};
|
||||
bool hostflag;
|
||||
|
||||
fib.fmt0.aibv = args->u.reg_aen.ibv;
|
||||
fib.fmt0.isc = args->u.reg_aen.isc;
|
||||
fib.fmt0.noi = args->u.reg_aen.noi;
|
||||
if (args->u.reg_aen.sb != 0) {
|
||||
fib.fmt0.aisb = args->u.reg_aen.sb;
|
||||
fib.fmt0.aisbo = args->u.reg_aen.sbo;
|
||||
fib.fmt0.sum = 1;
|
||||
} else {
|
||||
fib.fmt0.aisb = 0;
|
||||
fib.fmt0.aisbo = 0;
|
||||
fib.fmt0.sum = 0;
|
||||
}
|
||||
|
||||
hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
|
||||
return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
|
||||
}
|
||||
|
||||
int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
|
||||
{
|
||||
struct kvm_zdev *kzdev;
|
||||
struct zpci_dev *zdev;
|
||||
int r;
|
||||
|
||||
zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
|
||||
if (!zdev)
|
||||
return -ENODEV;
|
||||
|
||||
mutex_lock(&zdev->kzdev_lock);
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
kzdev = zdev->kzdev;
|
||||
if (!kzdev) {
|
||||
r = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
if (kzdev->kvm != kvm) {
|
||||
r = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
switch (args->op) {
|
||||
case KVM_S390_ZPCIOP_REG_AEN:
|
||||
/* Fail on unknown flags */
|
||||
if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
r = kvm_s390_pci_zpci_reg_aen(zdev, args);
|
||||
break;
|
||||
case KVM_S390_ZPCIOP_DEREG_AEN:
|
||||
r = kvm_s390_pci_aif_disable(zdev, false);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_s390_pci_init(void)
|
||||
{
|
||||
aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
|
||||
if (!aift)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&aift->gait_lock);
|
||||
mutex_init(&aift->aift_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_s390_pci_exit(void)
|
||||
{
|
||||
mutex_destroy(&aift->aift_lock);
|
||||
|
||||
kfree(aift);
|
||||
}
|
87
arch/s390/kvm/pci.h
Normal file
87
arch/s390/kvm/pci.h
Normal file
@ -0,0 +1,87 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* s390 kvm PCI passthrough support
|
||||
*
|
||||
* Copyright IBM Corp. 2022
|
||||
*
|
||||
* Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef __KVM_S390_PCI_H
|
||||
#define __KVM_S390_PCI_H
|
||||
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/pci.h>
|
||||
#include <asm/airq.h>
|
||||
#include <asm/cpu.h>
|
||||
|
||||
struct kvm_zdev {
|
||||
struct zpci_dev *zdev;
|
||||
struct kvm *kvm;
|
||||
struct zpci_fib fib;
|
||||
struct list_head entry;
|
||||
};
|
||||
|
||||
struct zpci_gaite {
|
||||
u32 gisa;
|
||||
u8 gisc;
|
||||
u8 count;
|
||||
u8 reserved;
|
||||
u8 aisbo;
|
||||
u64 aisb;
|
||||
};
|
||||
|
||||
struct zpci_aift {
|
||||
struct zpci_gaite *gait;
|
||||
struct airq_iv *sbv;
|
||||
struct kvm_zdev **kzdev;
|
||||
spinlock_t gait_lock; /* Protects the gait, used during AEN forward */
|
||||
struct mutex aift_lock; /* Protects the other structures in aift */
|
||||
};
|
||||
|
||||
extern struct zpci_aift *aift;
|
||||
|
||||
static inline struct kvm *kvm_s390_pci_si_to_kvm(struct zpci_aift *aift,
|
||||
unsigned long si)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) || aift->kzdev == 0 ||
|
||||
aift->kzdev[si] == 0)
|
||||
return 0;
|
||||
return aift->kzdev[si]->kvm;
|
||||
};
|
||||
|
||||
int kvm_s390_pci_aen_init(u8 nisc);
|
||||
void kvm_s390_pci_aen_exit(void);
|
||||
|
||||
void kvm_s390_pci_init_list(struct kvm *kvm);
|
||||
void kvm_s390_pci_clear_list(struct kvm *kvm);
|
||||
|
||||
int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args);
|
||||
|
||||
int kvm_s390_pci_init(void);
|
||||
void kvm_s390_pci_exit(void);
|
||||
|
||||
static inline bool kvm_s390_pci_interp_allowed(void)
|
||||
{
|
||||
struct cpuid cpu_id;
|
||||
|
||||
get_cpu_id(&cpu_id);
|
||||
switch (cpu_id.machine) {
|
||||
case 0x2817:
|
||||
case 0x2818:
|
||||
case 0x2827:
|
||||
case 0x2828:
|
||||
case 0x2964:
|
||||
case 0x2965:
|
||||
/* No SHM on certain machines */
|
||||
return false;
|
||||
default:
|
||||
return (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) &&
|
||||
sclp.has_zpci_lsi && sclp.has_aeni && sclp.has_aisi &&
|
||||
sclp.has_aisii);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __KVM_S390_PCI_H */
|
@ -442,7 +442,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
|
||||
vcpu->stat.instruction_ipte_interlock++;
|
||||
if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
|
||||
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu->kvm));
|
||||
kvm_s390_retry_instr(vcpu);
|
||||
VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
|
||||
return 0;
|
||||
@ -873,10 +873,18 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
if (fc > 3) {
|
||||
kvm_s390_set_psw_cc(vcpu, 3);
|
||||
return 0;
|
||||
}
|
||||
/* Bailout forbidden function codes */
|
||||
if (fc > 3 && fc != 15)
|
||||
goto out_no_data;
|
||||
|
||||
/*
|
||||
* fc 15 is provided only with
|
||||
* - PTF/CPU topology support through facility 15
|
||||
* - KVM_CAP_S390_USER_STSI
|
||||
*/
|
||||
if (fc == 15 && (!test_kvm_facility(vcpu->kvm, 11) ||
|
||||
!vcpu->kvm->arch.user_stsi))
|
||||
goto out_no_data;
|
||||
|
||||
if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
|
||||
|| vcpu->run->s.regs.gprs[1] & 0xffff0000)
|
||||
@ -910,6 +918,10 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
goto out_no_data;
|
||||
handle_stsi_3_2_2(vcpu, (void *) mem);
|
||||
break;
|
||||
case 15: /* fc 15 is fully handled in userspace */
|
||||
insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
|
||||
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
|
||||
return -EREMOTE;
|
||||
}
|
||||
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
|
||||
memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
|
||||
@ -1471,7 +1483,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
|
||||
access_key = (operand2 & 0xf0) >> 4;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
|
||||
ipte_lock(vcpu);
|
||||
ipte_lock(vcpu->kvm);
|
||||
|
||||
ret = guest_translate_address_with_key(vcpu, address, ar, &gpa,
|
||||
GACC_STORE, access_key);
|
||||
@ -1508,7 +1520,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
|
||||
ipte_unlock(vcpu);
|
||||
ipte_unlock(vcpu->kvm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -13,8 +13,19 @@
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/uv.h>
|
||||
#include <asm/mman.h>
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include "kvm-s390.h"
|
||||
|
||||
static void kvm_s390_clear_pv_state(struct kvm *kvm)
|
||||
{
|
||||
kvm->arch.pv.handle = 0;
|
||||
kvm->arch.pv.guest_len = 0;
|
||||
kvm->arch.pv.stor_base = 0;
|
||||
kvm->arch.pv.stor_var = NULL;
|
||||
}
|
||||
|
||||
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
|
||||
{
|
||||
int cc;
|
||||
@ -109,7 +120,7 @@ static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
|
||||
vfree(kvm->arch.pv.stor_var);
|
||||
free_pages(kvm->arch.pv.stor_base,
|
||||
get_order(uv_info.guest_base_stor_len));
|
||||
memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
|
||||
kvm_s390_clear_pv_state(kvm);
|
||||
}
|
||||
|
||||
static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
|
||||
@ -153,21 +164,51 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
{
|
||||
int cc;
|
||||
|
||||
/* make all pages accessible before destroying the guest */
|
||||
s390_reset_acc(kvm->mm);
|
||||
|
||||
cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
|
||||
UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
|
||||
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
|
||||
atomic_set(&kvm->mm->context.is_protected, 0);
|
||||
/*
|
||||
* if the mm still has a mapping, make all its pages accessible
|
||||
* before destroying the guest
|
||||
*/
|
||||
if (mmget_not_zero(kvm->mm)) {
|
||||
s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
|
||||
mmput(kvm->mm);
|
||||
}
|
||||
|
||||
if (!cc) {
|
||||
atomic_dec(&kvm->mm->context.protected_count);
|
||||
kvm_s390_pv_dealloc_vm(kvm);
|
||||
} else {
|
||||
/* Intended memory leak on "impossible" error */
|
||||
s390_replace_asce(kvm->arch.gmap);
|
||||
}
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
|
||||
WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
|
||||
/* Inteded memory leak on "impossible" error */
|
||||
if (!cc)
|
||||
kvm_s390_pv_dealloc_vm(kvm);
|
||||
|
||||
return cc ? -EIO : 0;
|
||||
}
|
||||
|
||||
static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
|
||||
u16 dummy;
|
||||
|
||||
/*
|
||||
* No locking is needed since this is the last thread of the last user of this
|
||||
* struct mm.
|
||||
* When the struct kvm gets deinitialized, this notifier is also
|
||||
* unregistered. This means that if this notifier runs, then the
|
||||
* struct kvm is still valid.
|
||||
*/
|
||||
kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
|
||||
}
|
||||
|
||||
static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
|
||||
.release = kvm_s390_pv_mmu_notifier_release,
|
||||
};
|
||||
|
||||
int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct uv_cb_cgc uvcb = {
|
||||
@ -198,14 +239,22 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
/* Outputs */
|
||||
kvm->arch.pv.handle = uvcb.guest_handle;
|
||||
|
||||
atomic_inc(&kvm->mm->context.protected_count);
|
||||
if (cc) {
|
||||
if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
|
||||
if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
|
||||
kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
|
||||
else
|
||||
} else {
|
||||
atomic_dec(&kvm->mm->context.protected_count);
|
||||
kvm_s390_pv_dealloc_vm(kvm);
|
||||
}
|
||||
return -EIO;
|
||||
}
|
||||
kvm->arch.gmap->guest_handle = uvcb.guest_handle;
|
||||
/* Add the notifier only once. No races because we hold kvm->lock */
|
||||
if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
|
||||
kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
|
||||
mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -225,8 +274,6 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
|
||||
*rrc = uvcb.header.rrc;
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
|
||||
*rc, *rrc);
|
||||
if (!cc)
|
||||
atomic_set(&kvm->mm->context.is_protected, 1);
|
||||
return cc ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
|
@ -480,9 +480,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
|
||||
struct kvm_vcpu *dest_vcpu;
|
||||
u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
|
||||
|
||||
trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
|
||||
|
||||
if (order_code == SIGP_EXTERNAL_CALL) {
|
||||
trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
|
||||
|
||||
dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
|
||||
BUG_ON(dest_vcpu == NULL);
|
||||
|
||||
|
@ -503,6 +503,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
/* Host-protection-interruption introduced with ESOP */
|
||||
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
|
||||
scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
|
||||
/*
|
||||
* CPU Topology
|
||||
* This facility only uses the utility field of the SCA and none of
|
||||
* the cpu entries that are problematic with the other interpretation
|
||||
* facilities so we can pass it through
|
||||
*/
|
||||
if (test_kvm_facility(vcpu->kvm, 11))
|
||||
scb_s->ecb |= scb_o->ecb & ECB_PTF;
|
||||
/* transactional execution */
|
||||
if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
|
||||
/* remap the prefix is tx is toggled on */
|
||||
|
@ -754,6 +754,7 @@ void do_secure_storage_access(struct pt_regs *regs)
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm;
|
||||
struct page *page;
|
||||
struct gmap *gmap;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
@ -783,6 +784,17 @@ void do_secure_storage_access(struct pt_regs *regs)
|
||||
}
|
||||
|
||||
switch (get_fault_type(regs)) {
|
||||
case GMAP_FAULT:
|
||||
mm = current->mm;
|
||||
gmap = (struct gmap *)S390_lowcore.gmap;
|
||||
mmap_read_lock(mm);
|
||||
addr = __gmap_translate(gmap, addr);
|
||||
mmap_read_unlock(mm);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
do_fault_error(regs, VM_ACCESS_FLAGS, VM_FAULT_BADMAP);
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
case USER_FAULT:
|
||||
mm = current->mm;
|
||||
mmap_read_lock(mm);
|
||||
@ -811,7 +823,6 @@ void do_secure_storage_access(struct pt_regs *regs)
|
||||
if (rc)
|
||||
BUG();
|
||||
break;
|
||||
case GMAP_FAULT:
|
||||
default:
|
||||
do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
|
||||
WARN_ON_ONCE(1);
|
||||
@ -837,6 +848,16 @@ NOKPROBE_SYMBOL(do_non_secure_storage_access);
|
||||
|
||||
void do_secure_storage_violation(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
|
||||
struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
|
||||
|
||||
/*
|
||||
* If the VM has been rebooted, its address space might still contain
|
||||
* secure pages from the previous boot.
|
||||
* Clear the page so it can be reused.
|
||||
*/
|
||||
if (!gmap_destroy_page(gmap, gaddr))
|
||||
return;
|
||||
/*
|
||||
* Either KVM messed up the secure guest mapping or the same
|
||||
* page is mapped into multiple secure guests.
|
||||
|
@ -2697,41 +2697,168 @@ void s390_reset_cmma(struct mm_struct *mm)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(s390_reset_cmma);
|
||||
|
||||
/*
|
||||
* make inaccessible pages accessible again
|
||||
*/
|
||||
static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
|
||||
unsigned long next, struct mm_walk *walk)
|
||||
#define GATHER_GET_PAGES 32
|
||||
|
||||
struct reset_walk_state {
|
||||
unsigned long next;
|
||||
unsigned long count;
|
||||
unsigned long pfns[GATHER_GET_PAGES];
|
||||
};
|
||||
|
||||
static int s390_gather_pages(pte_t *ptep, unsigned long addr,
|
||||
unsigned long next, struct mm_walk *walk)
|
||||
{
|
||||
struct reset_walk_state *p = walk->private;
|
||||
pte_t pte = READ_ONCE(*ptep);
|
||||
|
||||
/* There is a reference through the mapping */
|
||||
if (pte_present(pte))
|
||||
WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK));
|
||||
if (pte_present(pte)) {
|
||||
/* we have a reference from the mapping, take an extra one */
|
||||
get_page(phys_to_page(pte_val(pte)));
|
||||
p->pfns[p->count] = phys_to_pfn(pte_val(pte));
|
||||
p->next = next;
|
||||
p->count++;
|
||||
}
|
||||
return p->count >= GATHER_GET_PAGES;
|
||||
}
|
||||
|
||||
static const struct mm_walk_ops gather_pages_ops = {
|
||||
.pte_entry = s390_gather_pages,
|
||||
};
|
||||
|
||||
/*
|
||||
* Call the Destroy secure page UVC on each page in the given array of PFNs.
|
||||
* Each page needs to have an extra reference, which will be released here.
|
||||
*/
|
||||
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
/* we always have an extra reference */
|
||||
uv_destroy_owned_page(pfn_to_phys(pfns[i]));
|
||||
/* get rid of the extra reference */
|
||||
put_page(pfn_to_page(pfns[i]));
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);
|
||||
|
||||
/**
|
||||
* __s390_uv_destroy_range - Call the destroy secure page UVC on each page
|
||||
* in the given range of the given address space.
|
||||
* @mm: the mm to operate on
|
||||
* @start: the start of the range
|
||||
* @end: the end of the range
|
||||
* @interruptible: if not 0, stop when a fatal signal is received
|
||||
*
|
||||
* Walk the given range of the given address space and call the destroy
|
||||
* secure page UVC on each page. Optionally exit early if a fatal signal is
|
||||
* pending.
|
||||
*
|
||||
* Return: 0 on success, -EINTR if the function stopped before completing
|
||||
*/
|
||||
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, bool interruptible)
|
||||
{
|
||||
struct reset_walk_state state = { .next = start };
|
||||
int r = 1;
|
||||
|
||||
while (r > 0) {
|
||||
state.count = 0;
|
||||
mmap_read_lock(mm);
|
||||
r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);
|
||||
mmap_read_unlock(mm);
|
||||
cond_resched();
|
||||
s390_uv_destroy_pfns(state.count, state.pfns);
|
||||
if (interruptible && fatal_signal_pending(current))
|
||||
return -EINTR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
|
||||
|
||||
/**
|
||||
* s390_unlist_old_asce - Remove the topmost level of page tables from the
|
||||
* list of page tables of the gmap.
|
||||
* @gmap: the gmap whose table is to be removed
|
||||
*
|
||||
* On s390x, KVM keeps a list of all pages containing the page tables of the
|
||||
* gmap (the CRST list). This list is used at tear down time to free all
|
||||
* pages that are now not needed anymore.
|
||||
*
|
||||
* This function removes the topmost page of the tree (the one pointed to by
|
||||
* the ASCE) from the CRST list.
|
||||
*
|
||||
* This means that it will not be freed when the VM is torn down, and needs
|
||||
* to be handled separately by the caller, unless a leak is actually
|
||||
* intended. Notice that this function will only remove the page from the
|
||||
* list, the page will still be used as a top level page table (and ASCE).
|
||||
*/
|
||||
void s390_unlist_old_asce(struct gmap *gmap)
|
||||
{
|
||||
struct page *old;
|
||||
|
||||
old = virt_to_page(gmap->table);
|
||||
spin_lock(&gmap->guest_table_lock);
|
||||
list_del(&old->lru);
|
||||
/*
|
||||
* Sometimes the topmost page might need to be "removed" multiple
|
||||
* times, for example if the VM is rebooted into secure mode several
|
||||
* times concurrently, or if s390_replace_asce fails after calling
|
||||
* s390_remove_old_asce and is attempted again later. In that case
|
||||
* the old asce has been removed from the list, and therefore it
|
||||
* will not be freed when the VM terminates, but the ASCE is still
|
||||
* in use and still pointed to.
|
||||
* A subsequent call to replace_asce will follow the pointer and try
|
||||
* to remove the same page from the list again.
|
||||
* Therefore it's necessary that the page of the ASCE has valid
|
||||
* pointers, so list_del can work (and do nothing) without
|
||||
* dereferencing stale or invalid pointers.
|
||||
*/
|
||||
INIT_LIST_HEAD(&old->lru);
|
||||
spin_unlock(&gmap->guest_table_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
|
||||
|
||||
/**
|
||||
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
|
||||
* @gmap: the gmap whose ASCE needs to be replaced
|
||||
*
|
||||
* If the allocation of the new top level page table fails, the ASCE is not
|
||||
* replaced.
|
||||
* In any case, the old ASCE is always removed from the gmap CRST list.
|
||||
* Therefore the caller has to make sure to save a pointer to it
|
||||
* beforehand, unless a leak is actually intended.
|
||||
*/
|
||||
int s390_replace_asce(struct gmap *gmap)
|
||||
{
|
||||
unsigned long asce;
|
||||
struct page *page;
|
||||
void *table;
|
||||
|
||||
s390_unlist_old_asce(gmap);
|
||||
|
||||
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
table = page_to_virt(page);
|
||||
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
|
||||
|
||||
/*
|
||||
* The caller has to deal with the old ASCE, but here we make sure
|
||||
* the new one is properly added to the CRST list, so that
|
||||
* it will be freed when the VM is torn down.
|
||||
*/
|
||||
spin_lock(&gmap->guest_table_lock);
|
||||
list_add(&page->lru, &gmap->crst_list);
|
||||
spin_unlock(&gmap->guest_table_lock);
|
||||
|
||||
/* Set new table origin while preserving existing ASCE control bits */
|
||||
asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
|
||||
WRITE_ONCE(gmap->asce, asce);
|
||||
WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
|
||||
WRITE_ONCE(gmap->table, table);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct mm_walk_ops reset_acc_walk_ops = {
|
||||
.pte_entry = __s390_reset_acc,
|
||||
};
|
||||
|
||||
#include <linux/sched/mm.h>
|
||||
void s390_reset_acc(struct mm_struct *mm)
|
||||
{
|
||||
if (!mm_is_protected(mm))
|
||||
return;
|
||||
/*
|
||||
* we might be called during
|
||||
* reset: we walk the pages and clear
|
||||
* close of all kvm file descriptors: we walk the pages and clear
|
||||
* exit of process on fd closure: vma already gone, do nothing
|
||||
*/
|
||||
if (!mmget_not_zero(mm))
|
||||
return;
|
||||
mmap_read_lock(mm);
|
||||
walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(s390_reset_acc);
|
||||
EXPORT_SYMBOL_GPL(s390_replace_asce);
|
||||
|
@ -61,6 +61,12 @@ DEFINE_STATIC_KEY_FALSE(have_mio);
|
||||
|
||||
static struct kmem_cache *zdev_fmb_cache;
|
||||
|
||||
/* AEN structures that must be preserved over KVM module re-insertion */
|
||||
union zpci_sic_iib *zpci_aipb;
|
||||
EXPORT_SYMBOL_GPL(zpci_aipb);
|
||||
struct airq_iv *zpci_aif_sbv;
|
||||
EXPORT_SYMBOL_GPL(zpci_aif_sbv);
|
||||
|
||||
struct zpci_dev *get_zdev_by_fid(u32 fid)
|
||||
{
|
||||
struct zpci_dev *tmp, *zdev = NULL;
|
||||
@ -120,11 +126,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
|
||||
fib.pba = base;
|
||||
fib.pal = limit;
|
||||
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
|
||||
fib.gd = zdev->gisa;
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc)
|
||||
zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
|
||||
return cc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zpci_register_ioat);
|
||||
|
||||
/* Modify PCI: Unregister I/O address translation parameters */
|
||||
int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
|
||||
@ -133,6 +141,8 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
|
||||
struct zpci_fib fib = {0};
|
||||
u8 cc, status;
|
||||
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc)
|
||||
zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
|
||||
@ -160,6 +170,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
|
||||
atomic64_set(&zdev->unmapped_pages, 0);
|
||||
|
||||
fib.fmb_addr = virt_to_phys(zdev->fmb);
|
||||
fib.gd = zdev->gisa;
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc) {
|
||||
kmem_cache_free(zdev_fmb_cache, zdev->fmb);
|
||||
@ -178,6 +189,8 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev)
|
||||
if (!zdev->fmb)
|
||||
return -EINVAL;
|
||||
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
/* Function measurement is disabled if fmb address is zero */
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc == 3) /* Function already gone. */
|
||||
@ -700,6 +713,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
|
||||
zpci_update_fh(zdev, fh);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zpci_enable_device);
|
||||
|
||||
int zpci_disable_device(struct zpci_dev *zdev)
|
||||
{
|
||||
@ -723,6 +737,7 @@ int zpci_disable_device(struct zpci_dev *zdev)
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zpci_disable_device);
|
||||
|
||||
/**
|
||||
* zpci_hot_reset_device - perform a reset of the given zPCI function
|
||||
@ -816,6 +831,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
|
||||
|
||||
kref_init(&zdev->kref);
|
||||
mutex_init(&zdev->lock);
|
||||
mutex_init(&zdev->kzdev_lock);
|
||||
|
||||
rc = zpci_init_iommu(zdev);
|
||||
if (rc)
|
||||
|
@ -106,6 +106,8 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
|
||||
zdev->max_msi = response->noi;
|
||||
zdev->fmb_update = response->mui;
|
||||
zdev->version = response->version;
|
||||
zdev->maxstbl = response->maxstbl;
|
||||
zdev->dtsm = response->dtsm;
|
||||
|
||||
switch (response->version) {
|
||||
case 1:
|
||||
@ -229,12 +231,16 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 comma
|
||||
{
|
||||
struct clp_req_rsp_set_pci *rrb;
|
||||
int rc, retries = 100;
|
||||
u32 gisa = 0;
|
||||
|
||||
*fh = 0;
|
||||
rrb = clp_alloc_block(GFP_KERNEL);
|
||||
if (!rrb)
|
||||
return -ENOMEM;
|
||||
|
||||
if (command != CLP_SET_DISABLE_PCI_FN)
|
||||
gisa = zdev->gisa;
|
||||
|
||||
do {
|
||||
memset(rrb, 0, sizeof(*rrb));
|
||||
rrb->request.hdr.len = sizeof(rrb->request);
|
||||
@ -243,6 +249,7 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 comma
|
||||
rrb->request.fh = zdev->fh;
|
||||
rrb->request.oc = command;
|
||||
rrb->request.ndas = nr_dma_as;
|
||||
rrb->request.gisa = gisa;
|
||||
|
||||
rc = clp_req(rrb, CLP_LPS_PCI);
|
||||
if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
|
||||
|
@ -92,6 +92,7 @@ u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
|
||||
|
||||
return cc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zpci_mod_fc);
|
||||
|
||||
/* Refresh PCI Translations */
|
||||
static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
|
||||
@ -138,7 +139,7 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
|
||||
}
|
||||
|
||||
/* Set Interruption Controls */
|
||||
int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
|
||||
int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
|
||||
{
|
||||
if (!test_facility(72))
|
||||
return -EIO;
|
||||
@ -149,6 +150,7 @@ int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zpci_set_irq_ctrl);
|
||||
|
||||
/* PCI Load */
|
||||
static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
|
||||
|
@ -11,16 +11,10 @@
|
||||
|
||||
#include <asm/isc.h>
|
||||
#include <asm/airq.h>
|
||||
#include <asm/tpi.h>
|
||||
|
||||
static enum {FLOATING, DIRECTED} irq_delivery;
|
||||
|
||||
#define SIC_IRQ_MODE_ALL 0
|
||||
#define SIC_IRQ_MODE_SINGLE 1
|
||||
#define SIC_IRQ_MODE_DIRECT 4
|
||||
#define SIC_IRQ_MODE_D_ALL 16
|
||||
#define SIC_IRQ_MODE_D_SINGLE 17
|
||||
#define SIC_IRQ_MODE_SET_CPU 18
|
||||
|
||||
/*
|
||||
* summary bit vector
|
||||
* FLOATING - summary bit per function
|
||||
@ -49,6 +43,7 @@ static int zpci_set_airq(struct zpci_dev *zdev)
|
||||
fib.fmt0.aibvo = 0; /* each zdev has its own interrupt vector */
|
||||
fib.fmt0.aisb = virt_to_phys(zpci_sbv->vector) + (zdev->aisb / 64) * 8;
|
||||
fib.fmt0.aisbo = zdev->aisb & 63;
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
|
||||
}
|
||||
@ -60,6 +55,8 @@ static int zpci_clear_airq(struct zpci_dev *zdev)
|
||||
struct zpci_fib fib = {0};
|
||||
u8 cc, status;
|
||||
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc == 3 || (cc == 1 && status == 24))
|
||||
/* Function already gone or IRQs already deregistered. */
|
||||
@ -78,6 +75,7 @@ static int zpci_set_directed_irq(struct zpci_dev *zdev)
|
||||
fib.fmt = 1;
|
||||
fib.fmt1.noi = zdev->msi_nr_irqs;
|
||||
fib.fmt1.dibvo = zdev->msi_first_bit;
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
|
||||
}
|
||||
@ -90,6 +88,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev)
|
||||
u8 cc, status;
|
||||
|
||||
fib.fmt = 1;
|
||||
fib.gd = zdev->gisa;
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc == 3 || (cc == 1 && status == 24))
|
||||
/* Function already gone or IRQs already deregistered. */
|
||||
@ -153,6 +152,7 @@ static struct irq_chip zpci_irq_chip = {
|
||||
static void zpci_handle_cpu_local_irq(bool rescan)
|
||||
{
|
||||
struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
unsigned long bit;
|
||||
int irqs_on = 0;
|
||||
|
||||
@ -164,7 +164,7 @@ static void zpci_handle_cpu_local_irq(bool rescan)
|
||||
/* End of second scan with interrupts on. */
|
||||
break;
|
||||
/* First scan complete, reenable interrupts. */
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC))
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &iib))
|
||||
break;
|
||||
bit = 0;
|
||||
continue;
|
||||
@ -192,6 +192,7 @@ static void zpci_handle_remote_irq(void *data)
|
||||
static void zpci_handle_fallback_irq(void)
|
||||
{
|
||||
struct cpu_irq_data *cpu_data;
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
unsigned long cpu;
|
||||
int irqs_on = 0;
|
||||
|
||||
@ -202,7 +203,7 @@ static void zpci_handle_fallback_irq(void)
|
||||
/* End of second scan with interrupts on. */
|
||||
break;
|
||||
/* First scan complete, reenable interrupts. */
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
|
||||
break;
|
||||
cpu = 0;
|
||||
continue;
|
||||
@ -216,8 +217,11 @@ static void zpci_handle_fallback_irq(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
|
||||
static void zpci_directed_irq_handler(struct airq_struct *airq,
|
||||
struct tpi_info *tpi_info)
|
||||
{
|
||||
bool floating = !tpi_info->directed_irq;
|
||||
|
||||
if (floating) {
|
||||
inc_irq_stat(IRQIO_PCF);
|
||||
zpci_handle_fallback_irq();
|
||||
@ -227,8 +231,10 @@ static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
|
||||
}
|
||||
}
|
||||
|
||||
static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
|
||||
static void zpci_floating_irq_handler(struct airq_struct *airq,
|
||||
struct tpi_info *tpi_info)
|
||||
{
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
unsigned long si, ai;
|
||||
struct airq_iv *aibv;
|
||||
int irqs_on = 0;
|
||||
@ -242,7 +248,7 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
|
||||
/* End of second scan with interrupts on. */
|
||||
break;
|
||||
/* First scan complete, reenable interrupts. */
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
|
||||
break;
|
||||
si = 0;
|
||||
continue;
|
||||
@ -291,7 +297,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
|
||||
zdev->aisb = bit;
|
||||
|
||||
/* Create adapter interrupt vector */
|
||||
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
|
||||
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
|
||||
if (!zdev->aibv)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -402,11 +408,12 @@ static struct airq_struct zpci_airq = {
|
||||
static void __init cpu_enable_directed_irq(void *unused)
|
||||
{
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
union zpci_sic_iib ziib = {{0}};
|
||||
|
||||
iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
|
||||
|
||||
__zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC);
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
|
||||
}
|
||||
|
||||
static int __init zpci_directed_irq_init(void)
|
||||
@ -414,14 +421,14 @@ static int __init zpci_directed_irq_init(void)
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
unsigned int cpu;
|
||||
|
||||
zpci_sbv = airq_iv_create(num_possible_cpus(), 0);
|
||||
zpci_sbv = airq_iv_create(num_possible_cpus(), 0, NULL);
|
||||
if (!zpci_sbv)
|
||||
return -ENOMEM;
|
||||
|
||||
iib.diib.isc = PCI_ISC;
|
||||
iib.diib.nr_cpus = num_possible_cpus();
|
||||
iib.diib.disb_addr = virt_to_phys(zpci_sbv->vector);
|
||||
__zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
|
||||
|
||||
zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
|
||||
GFP_KERNEL);
|
||||
@ -436,7 +443,7 @@ static int __init zpci_directed_irq_init(void)
|
||||
zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
|
||||
AIRQ_IV_DATA |
|
||||
AIRQ_IV_CACHELINE |
|
||||
(!cpu ? AIRQ_IV_ALLOC : 0));
|
||||
(!cpu ? AIRQ_IV_ALLOC : 0), NULL);
|
||||
if (!zpci_ibv[cpu])
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -453,7 +460,7 @@ static int __init zpci_floating_irq_init(void)
|
||||
if (!zpci_ibv)
|
||||
return -ENOMEM;
|
||||
|
||||
zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
|
||||
zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
|
||||
if (!zpci_sbv)
|
||||
goto out_free;
|
||||
|
||||
@ -466,6 +473,7 @@ out_free:
|
||||
|
||||
int __init zpci_irq_init(void)
|
||||
{
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
int rc;
|
||||
|
||||
irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
|
||||
@ -497,7 +505,7 @@ int __init zpci_irq_init(void)
|
||||
* Enable floating IRQs (with suppression after one IRQ). When using
|
||||
* directed IRQs this enables the fallback path.
|
||||
*/
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC);
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib);
|
||||
|
||||
return 0;
|
||||
out_airq:
|
||||
|
@ -111,6 +111,7 @@ static struct facility_def facility_defs[] = {
|
||||
193, /* bear enhancement facility */
|
||||
194, /* rdp enhancement facility */
|
||||
196, /* processor activity instrumentation facility */
|
||||
197, /* processor activity instrumentation extension 1 */
|
||||
-1 /* END */
|
||||
}
|
||||
},
|
||||
|
@ -45,6 +45,10 @@ static void __init sclp_early_facilities_detect(void)
|
||||
sclp.has_gisaf = !!(sccb->fac118 & 0x08);
|
||||
sclp.has_hvs = !!(sccb->fac119 & 0x80);
|
||||
sclp.has_kss = !!(sccb->fac98 & 0x01);
|
||||
sclp.has_aisii = !!(sccb->fac118 & 0x40);
|
||||
sclp.has_aeni = !!(sccb->fac118 & 0x20);
|
||||
sclp.has_aisi = !!(sccb->fac118 & 0x10);
|
||||
sclp.has_zpci_lsi = !!(sccb->fac118 & 0x01);
|
||||
if (sccb->fac85 & 0x02)
|
||||
S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
|
||||
if (sccb->fac91 & 0x40)
|
||||
|
@ -99,7 +99,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy)
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(airq, head, list)
|
||||
if ((*airq->lsi_ptr & airq->lsi_mask) != 0)
|
||||
airq->handler(airq, !tpi_info->directed_irq);
|
||||
airq->handler(airq, tpi_info);
|
||||
rcu_read_unlock();
|
||||
|
||||
return IRQ_HANDLED;
|
||||
@ -122,10 +122,12 @@ static inline unsigned long iv_size(unsigned long bits)
|
||||
* airq_iv_create - create an interrupt vector
|
||||
* @bits: number of bits in the interrupt vector
|
||||
* @flags: allocation flags
|
||||
* @vec: pointer to pinned guest memory if AIRQ_IV_GUESTVEC
|
||||
*
|
||||
* Returns a pointer to an interrupt vector structure
|
||||
*/
|
||||
struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags)
|
||||
struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags,
|
||||
unsigned long *vec)
|
||||
{
|
||||
struct airq_iv *iv;
|
||||
unsigned long size;
|
||||
@ -146,6 +148,8 @@ struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags)
|
||||
&iv->vector_dma);
|
||||
if (!iv->vector)
|
||||
goto out_free;
|
||||
} else if (flags & AIRQ_IV_GUESTVEC) {
|
||||
iv->vector = vec;
|
||||
} else {
|
||||
iv->vector = cio_dma_zalloc(size);
|
||||
if (!iv->vector)
|
||||
@ -185,7 +189,7 @@ out_free:
|
||||
kfree(iv->avail);
|
||||
if (iv->flags & AIRQ_IV_CACHELINE && iv->vector)
|
||||
dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
|
||||
else
|
||||
else if (!(iv->flags & AIRQ_IV_GUESTVEC))
|
||||
cio_dma_free(iv->vector, size);
|
||||
kfree(iv);
|
||||
out:
|
||||
@ -204,7 +208,7 @@ void airq_iv_release(struct airq_iv *iv)
|
||||
kfree(iv->bitlock);
|
||||
if (iv->flags & AIRQ_IV_CACHELINE)
|
||||
dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
|
||||
else
|
||||
else if (!(iv->flags & AIRQ_IV_GUESTVEC))
|
||||
cio_dma_free(iv->vector, iv_size(iv->bits));
|
||||
kfree(iv->avail);
|
||||
kfree(iv);
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <asm/qdio.h>
|
||||
#include <asm/airq.h>
|
||||
#include <asm/isc.h>
|
||||
#include <asm/tpi.h>
|
||||
|
||||
#include "cio.h"
|
||||
#include "ioasm.h"
|
||||
@ -93,9 +94,10 @@ static inline u32 clear_shared_ind(void)
|
||||
/**
|
||||
* tiqdio_thinint_handler - thin interrupt handler for qdio
|
||||
* @airq: pointer to adapter interrupt descriptor
|
||||
* @floating: flag to recognize floating vs. directed interrupts (unused)
|
||||
* @tpi_info: interrupt information (e.g. floating vs directed -- unused)
|
||||
*/
|
||||
static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating)
|
||||
static void tiqdio_thinint_handler(struct airq_struct *airq,
|
||||
struct tpi_info *tpi_info)
|
||||
{
|
||||
u64 irq_time = S390_lowcore.int_clock;
|
||||
u32 si_used = clear_shared_ind();
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <asm/airq.h>
|
||||
#include <asm/tpi.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/isc.h>
|
||||
#include <linux/hrtimer.h>
|
||||
@ -131,7 +132,8 @@ static int ap_max_adapter_id = 63;
|
||||
static struct bus_type ap_bus_type;
|
||||
|
||||
/* Adapter interrupt definitions */
|
||||
static void ap_interrupt_handler(struct airq_struct *airq, bool floating);
|
||||
static void ap_interrupt_handler(struct airq_struct *airq,
|
||||
struct tpi_info *tpi_info);
|
||||
|
||||
static bool ap_irq_flag;
|
||||
|
||||
@ -452,9 +454,10 @@ static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused)
|
||||
/**
|
||||
* ap_interrupt_handler() - Schedule ap_tasklet on interrupt
|
||||
* @airq: pointer to adapter interrupt descriptor
|
||||
* @floating: ignored
|
||||
* @tpi_info: ignored
|
||||
*/
|
||||
static void ap_interrupt_handler(struct airq_struct *airq, bool floating)
|
||||
static void ap_interrupt_handler(struct airq_struct *airq,
|
||||
struct tpi_info *tpi_info)
|
||||
{
|
||||
inc_irq_stat(IRQIO_APB);
|
||||
tasklet_schedule(&ap_tasklet);
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <asm/virtio-ccw.h>
|
||||
#include <asm/isc.h>
|
||||
#include <asm/airq.h>
|
||||
#include <asm/tpi.h>
|
||||
|
||||
/*
|
||||
* virtio related functions
|
||||
@ -204,7 +205,8 @@ static void drop_airq_indicator(struct virtqueue *vq, struct airq_info *info)
|
||||
write_unlock_irqrestore(&info->lock, flags);
|
||||
}
|
||||
|
||||
static void virtio_airq_handler(struct airq_struct *airq, bool floating)
|
||||
static void virtio_airq_handler(struct airq_struct *airq,
|
||||
struct tpi_info *tpi_info)
|
||||
{
|
||||
struct airq_info *info = container_of(airq, struct airq_info, airq);
|
||||
unsigned long ai;
|
||||
@ -240,7 +242,7 @@ static struct airq_info *new_airq_info(int index)
|
||||
return NULL;
|
||||
rwlock_init(&info->lock);
|
||||
info->aiv = airq_iv_create(VIRTIO_IV_BITS, AIRQ_IV_ALLOC | AIRQ_IV_PTR
|
||||
| AIRQ_IV_CACHELINE);
|
||||
| AIRQ_IV_CACHELINE, NULL);
|
||||
if (!info->aiv) {
|
||||
kfree(info);
|
||||
return NULL;
|
||||
|
@ -44,6 +44,17 @@ config VFIO_PCI_IGD
|
||||
To enable Intel IGD assignment through vfio-pci, say Y.
|
||||
endif
|
||||
|
||||
config VFIO_PCI_ZDEV_KVM
|
||||
bool "VFIO PCI extensions for s390x KVM passthrough"
|
||||
depends on S390 && KVM
|
||||
default y
|
||||
help
|
||||
Support s390x-specific extensions to enable support for enhancements
|
||||
to KVM passthrough capabilities, such as interpretive execution of
|
||||
zPCI instructions.
|
||||
|
||||
To enable s390x KVM vfio-pci extensions, say Y.
|
||||
|
||||
source "drivers/vfio/pci/mlx5/Kconfig"
|
||||
|
||||
source "drivers/vfio/pci/hisilicon/Kconfig"
|
||||
|
@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
|
||||
vfio-pci-core-$(CONFIG_S390) += vfio_pci_zdev.o
|
||||
vfio-pci-core-$(CONFIG_VFIO_PCI_ZDEV_KVM) += vfio_pci_zdev.o
|
||||
obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o
|
||||
|
||||
vfio-pci-y := vfio_pci.o
|
||||
|
@ -316,10 +316,14 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
|
||||
pci_write_config_word(pdev, PCI_COMMAND, cmd);
|
||||
}
|
||||
|
||||
ret = vfio_config_init(vdev);
|
||||
ret = vfio_pci_zdev_open_device(vdev);
|
||||
if (ret)
|
||||
goto out_free_state;
|
||||
|
||||
ret = vfio_config_init(vdev);
|
||||
if (ret)
|
||||
goto out_free_zdev;
|
||||
|
||||
msix_pos = pdev->msix_cap;
|
||||
if (msix_pos) {
|
||||
u16 flags;
|
||||
@ -340,6 +344,8 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_zdev:
|
||||
vfio_pci_zdev_close_device(vdev);
|
||||
out_free_state:
|
||||
kfree(vdev->pci_saved_state);
|
||||
vdev->pci_saved_state = NULL;
|
||||
@ -418,6 +424,8 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
|
||||
|
||||
vdev->needs_reset = true;
|
||||
|
||||
vfio_pci_zdev_close_device(vdev);
|
||||
|
||||
/*
|
||||
* If we have saved state, restore it. If we can reset the device,
|
||||
* even better. Resetting with current state seems better than
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/vfio_zdev.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/pci_clp.h>
|
||||
#include <asm/pci_io.h>
|
||||
|
||||
@ -23,14 +24,15 @@ static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
|
||||
{
|
||||
struct vfio_device_info_cap_zpci_base cap = {
|
||||
.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE,
|
||||
.header.version = 1,
|
||||
.header.version = 2,
|
||||
.start_dma = zdev->start_dma,
|
||||
.end_dma = zdev->end_dma,
|
||||
.pchid = zdev->pchid,
|
||||
.vfn = zdev->vfn,
|
||||
.fmb_length = zdev->fmb_length,
|
||||
.pft = zdev->pft,
|
||||
.gid = zdev->pfgid
|
||||
.gid = zdev->pfgid,
|
||||
.fh = zdev->fh
|
||||
};
|
||||
|
||||
return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
|
||||
@ -43,14 +45,16 @@ static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
|
||||
{
|
||||
struct vfio_device_info_cap_zpci_group cap = {
|
||||
.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP,
|
||||
.header.version = 1,
|
||||
.header.version = 2,
|
||||
.dasm = zdev->dma_mask,
|
||||
.msi_addr = zdev->msi_addr,
|
||||
.flags = VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH,
|
||||
.mui = zdev->fmb_update,
|
||||
.noi = zdev->max_msi,
|
||||
.maxstbl = ZPCI_MAX_WRITE_SIZE,
|
||||
.version = zdev->version
|
||||
.version = zdev->version,
|
||||
.reserved = 0,
|
||||
.imaxstbl = zdev->maxstbl
|
||||
};
|
||||
|
||||
return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
|
||||
@ -136,3 +140,26 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
|
||||
{
|
||||
struct zpci_dev *zdev = to_zpci(vdev->pdev);
|
||||
|
||||
if (!zdev)
|
||||
return -ENODEV;
|
||||
|
||||
if (!vdev->vdev.kvm)
|
||||
return 0;
|
||||
|
||||
return kvm_s390_pci_register_kvm(zdev, vdev->vdev.kvm);
|
||||
}
|
||||
|
||||
void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
|
||||
{
|
||||
struct zpci_dev *zdev = to_zpci(vdev->pdev);
|
||||
|
||||
if (!zdev || !vdev->vdev.kvm)
|
||||
return;
|
||||
|
||||
kvm_s390_pci_unregister_kvm(zdev);
|
||||
}
|
||||
|
@ -24,7 +24,8 @@ struct user_struct {
|
||||
kuid_t uid;
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \
|
||||
defined(CONFIG_NET) || defined(CONFIG_IO_URING)
|
||||
defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \
|
||||
defined(CONFIG_VFIO_PCI_ZDEV_KVM)
|
||||
atomic_long_t locked_vm;
|
||||
#endif
|
||||
#ifdef CONFIG_WATCH_QUEUE
|
||||
|
@ -206,15 +206,25 @@ static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_S390
|
||||
#ifdef CONFIG_VFIO_PCI_ZDEV_KVM
|
||||
extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
|
||||
struct vfio_info_cap *caps);
|
||||
int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev);
|
||||
void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev);
|
||||
#else
|
||||
static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
|
||||
struct vfio_info_cap *caps)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
|
||||
{}
|
||||
#endif
|
||||
|
||||
/* Will be exported for vfio pci drivers usage */
|
||||
|
@ -1167,6 +1167,8 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_X86_TRIPLE_FAULT_EVENT 218
|
||||
#define KVM_CAP_X86_NOTIFY_VMEXIT 219
|
||||
#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220
|
||||
#define KVM_CAP_S390_ZPCI_OP 221
|
||||
#define KVM_CAP_S390_CPU_TOPOLOGY 222
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@ -2186,4 +2188,34 @@ struct kvm_stats_desc {
|
||||
#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1ULL << 0)
|
||||
#define KVM_X86_NOTIFY_VMEXIT_USER (1ULL << 1)
|
||||
|
||||
/* Available with KVM_CAP_S390_ZPCI_OP */
|
||||
#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op)
|
||||
|
||||
struct kvm_s390_zpci_op {
|
||||
/* in */
|
||||
__u32 fh; /* target device */
|
||||
__u8 op; /* operation to perform */
|
||||
__u8 pad[3];
|
||||
union {
|
||||
/* for KVM_S390_ZPCIOP_REG_AEN */
|
||||
struct {
|
||||
__u64 ibv; /* Guest addr of interrupt bit vector */
|
||||
__u64 sb; /* Guest addr of summary bit */
|
||||
__u32 flags;
|
||||
__u32 noi; /* Number of interrupts */
|
||||
__u8 isc; /* Guest interrupt subclass */
|
||||
__u8 sbo; /* Offset of guest summary bit vector */
|
||||
__u16 pad;
|
||||
} reg_aen;
|
||||
__u64 reserved[8];
|
||||
} u;
|
||||
};
|
||||
|
||||
/* types for kvm_s390_zpci_op->op */
|
||||
#define KVM_S390_ZPCIOP_REG_AEN 0
|
||||
#define KVM_S390_ZPCIOP_DEREG_AEN 1
|
||||
|
||||
/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
|
||||
#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
|
||||
|
||||
#endif /* __LINUX_KVM_H */
|
||||
|
@ -29,6 +29,9 @@ struct vfio_device_info_cap_zpci_base {
|
||||
__u16 fmb_length; /* Measurement Block Length (in bytes) */
|
||||
__u8 pft; /* PCI Function Type */
|
||||
__u8 gid; /* PCI function group ID */
|
||||
/* End of version 1 */
|
||||
__u32 fh; /* PCI function handle */
|
||||
/* End of version 2 */
|
||||
};
|
||||
|
||||
/**
|
||||
@ -47,6 +50,10 @@ struct vfio_device_info_cap_zpci_group {
|
||||
__u16 noi; /* Maximum number of MSIs */
|
||||
__u16 maxstbl; /* Maximum Store Block Length */
|
||||
__u8 version; /* Supported PCI Version */
|
||||
/* End of version 1 */
|
||||
__u8 reserved;
|
||||
__u16 imaxstbl; /* Maximum Interpreted Store Block Length */
|
||||
/* End of version 2 */
|
||||
};
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user