ARM:
* Unwinder implementations for both nVHE modes (classic and protected), complete with an overflow stack * Rework of the sysreg access from userspace, with a complete rewrite of the vgic-v3 view to allign with the rest of the infrastructure * Disagregation of the vcpu flags in separate sets to better track their use model. * A fix for the GICv2-on-v3 selftest * A small set of cosmetic fixes RISC-V: * Track ISA extensions used by Guest using bitmap * Added system instruction emulation framework * Added CSR emulation framework * Added gfp_custom flag in struct kvm_mmu_memory_cache * Added G-stage ioremap() and iounmap() functions * Added support for Svpbmt inside Guest s390: * add an interface to provide a hypervisor dump for secure guests * improve selftests to use TAP interface * enable interpretive execution of zPCI instructions (for PCI passthrough) * First part of deferred teardown * CPU Topology * PV attestation * Minor fixes x86: * Permit guests to ignore single-bit ECC errors * Intel IPI virtualization * Allow getting/setting pending triple fault with KVM_GET/SET_VCPU_EVENTS * PEBS virtualization * Simplify PMU emulation by just using PERF_TYPE_RAW events * More accurate event reinjection on SVM (avoid retrying instructions) * Allow getting/setting the state of the speaker port data bit * Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls are inconsistent * "Notify" VM exit (detect microarchitectural hangs) for Intel * Use try_cmpxchg64 instead of cmpxchg64 * Ignore benign host accesses to PMU MSRs when PMU is disabled * Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior * Allow NX huge page mitigation to be disabled on a per-vm basis * Port eager page splitting to shadow MMU as well * Enable CMCI capability by default and handle injected UCNA errors * Expose pid of vcpu threads in debugfs * x2AVIC support for AMD * cleanup PIO emulation * Fixes for LLDT/LTR emulation * Don't require refcounted "struct page" to create huge SPTEs * Miscellaneous cleanups: ** MCE MSR emulation ** Use separate namespaces for guest PTEs and shadow PTEs bitmasks ** PIO emulation ** Reorganize rmap API, mostly around rmap destruction ** Do not workaround very old KVM bugs for L0 that runs with nesting enabled ** new selftests API for CPUID Generic: * Fix races in gfn->pfn cache refresh; do not pin pages tracked by the cache * new selftests API using struct kvm_vcpu instead of a (vm, id) tuple -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmLnyo4UHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroMtQQf/XjVWiRcWLPR9dqzRM/vvRXpiG+UL jU93R7m6ma99aqTtrxV/AE+kHgamBlma3Cwo+AcWk9uCVNbIhFjv2YKg6HptKU0e oJT3zRYp+XIjEo7Kfw+TwroZbTlG6gN83l1oBLFMqiFmHsMLnXSI2mm8MXyi3dNB vR2uIcTAl58KIprqNNsYJ2dNn74ogOMiXYx9XzoA9/5Xb6c0h4rreHJa5t+0s9RO Gz7Io3PxumgsbJngjyL1Ve5oxhlIAcZA8DU0PQmjxo3eS+k6BcmavGFd45gNL5zg iLpCh4k86spmzh8CWkAAwWPQE4dZknK6jTctJc0OFVad3Z7+X7n0E8TFrA== =PM8o -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm updates from Paolo Bonzini: "Quite a large pull request due to a selftest API overhaul and some patches that had come in too late for 5.19. ARM: - Unwinder implementations for both nVHE modes (classic and protected), complete with an overflow stack - Rework of the sysreg access from userspace, with a complete rewrite of the vgic-v3 view to allign with the rest of the infrastructure - Disagregation of the vcpu flags in separate sets to better track their use model. - A fix for the GICv2-on-v3 selftest - A small set of cosmetic fixes RISC-V: - Track ISA extensions used by Guest using bitmap - Added system instruction emulation framework - Added CSR emulation framework - Added gfp_custom flag in struct kvm_mmu_memory_cache - Added G-stage ioremap() and iounmap() functions - Added support for Svpbmt inside Guest s390: - add an interface to provide a hypervisor dump for secure guests - improve selftests to use TAP interface - enable interpretive execution of zPCI instructions (for PCI passthrough) - First part of deferred teardown - CPU Topology - PV attestation - Minor fixes x86: - Permit guests to ignore single-bit ECC errors - Intel IPI virtualization - Allow getting/setting pending triple fault with KVM_GET/SET_VCPU_EVENTS - PEBS virtualization - Simplify PMU emulation by just using PERF_TYPE_RAW events - More accurate event reinjection on SVM (avoid retrying instructions) - Allow getting/setting the state of the speaker port data bit - Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls are inconsistent - "Notify" VM exit (detect microarchitectural hangs) for Intel - Use try_cmpxchg64 instead of cmpxchg64 - Ignore benign host accesses to PMU MSRs when PMU is disabled - Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior - Allow NX huge page mitigation to be disabled on a per-vm basis - Port eager page splitting to shadow MMU as well - Enable CMCI capability by default and handle injected UCNA errors - Expose pid of vcpu threads in debugfs - x2AVIC support for AMD - cleanup PIO emulation - Fixes for LLDT/LTR emulation - Don't require refcounted "struct page" to create huge SPTEs - Miscellaneous cleanups: - MCE MSR emulation - Use separate namespaces for guest PTEs and shadow PTEs bitmasks - PIO emulation - Reorganize rmap API, mostly around rmap destruction - Do not workaround very old KVM bugs for L0 that runs with nesting enabled - new selftests API for CPUID Generic: - Fix races in gfn->pfn cache refresh; do not pin pages tracked by the cache - new selftests API using struct kvm_vcpu instead of a (vm, id) tuple" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (606 commits) selftests: kvm: set rax before vmcall selftests: KVM: Add exponent check for boolean stats selftests: KVM: Provide descriptive assertions in kvm_binary_stats_test selftests: KVM: Check stat name before other fields KVM: x86/mmu: remove unused variable RISC-V: KVM: Add support for Svpbmt inside Guest/VM RISC-V: KVM: Use PAGE_KERNEL_IO in kvm_riscv_gstage_ioremap() RISC-V: KVM: Add G-stage ioremap() and iounmap() functions KVM: Add gfp_custom flag in struct kvm_mmu_memory_cache RISC-V: KVM: Add extensible CSR emulation framework RISC-V: KVM: Add extensible system instruction emulation framework RISC-V: KVM: Factor-out instruction emulation into separate sources RISC-V: KVM: move preempt_disable() call in kvm_arch_vcpu_ioctl_run RISC-V: KVM: Make kvm_riscv_guest_timer_init a void function RISC-V: KVM: Fix variable spelling mistake RISC-V: KVM: Improve ISA extension by using a bitmap KVM, x86/mmu: Fix the comment around kvm_tdp_mmu_zap_leafs() KVM: SVM: Dump Virtual Machine Save Area (VMSA) to klog KVM: x86/mmu: Treat NX as a valid SPTE bit for NPT KVM: x86: Do not block APIC write for non ICR registers ...
This commit is contained in:
commit
7c5c3a6177
@ -2424,8 +2424,7 @@
|
||||
the KVM_CLEAR_DIRTY ioctl, and only for the pages being
|
||||
cleared.
|
||||
|
||||
Eager page splitting currently only supports splitting
|
||||
huge pages mapped by the TDP MMU.
|
||||
Eager page splitting is only supported when kvm.tdp_mmu=Y.
|
||||
|
||||
Default is Y (on).
|
||||
|
||||
|
@ -1150,6 +1150,10 @@ The following bits are defined in the flags field:
|
||||
fields contain a valid state. This bit will be set whenever
|
||||
KVM_CAP_EXCEPTION_PAYLOAD is enabled.
|
||||
|
||||
- KVM_VCPUEVENT_VALID_TRIPLE_FAULT may be set to signal that the
|
||||
triple_fault_pending field contains a valid state. This bit will
|
||||
be set whenever KVM_CAP_X86_TRIPLE_FAULT_EVENT is enabled.
|
||||
|
||||
ARM64:
|
||||
^^^^^^
|
||||
|
||||
@ -1245,6 +1249,10 @@ can be set in the flags field to signal that the
|
||||
exception_has_payload, exception_payload, and exception.pending fields
|
||||
contain a valid state and shall be written into the VCPU.
|
||||
|
||||
If KVM_CAP_X86_TRIPLE_FAULT_EVENT is enabled, KVM_VCPUEVENT_VALID_TRIPLE_FAULT
|
||||
can be set in flags field to signal that the triple_fault field contains
|
||||
a valid state and shall be written into the VCPU.
|
||||
|
||||
ARM64:
|
||||
^^^^^^
|
||||
|
||||
@ -2998,7 +3006,9 @@ KVM_CREATE_PIT2. The state is returned in the following structure::
|
||||
Valid flags are::
|
||||
|
||||
/* disable PIT in HPET legacy mode */
|
||||
#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
|
||||
#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
|
||||
/* speaker port data bit enabled */
|
||||
#define KVM_PIT_FLAGS_SPEAKER_DATA_ON 0x00000002
|
||||
|
||||
This IOCTL replaces the obsolete KVM_GET_PIT.
|
||||
|
||||
@ -5127,7 +5137,15 @@ into ESA mode. This reset is a superset of the initial reset.
|
||||
__u32 reserved[3];
|
||||
};
|
||||
|
||||
cmd values:
|
||||
**Ultravisor return codes**
|
||||
The Ultravisor return (reason) codes are provided by the kernel if a
|
||||
Ultravisor call has been executed to achieve the results expected by
|
||||
the command. Therefore they are independent of the IOCTL return
|
||||
code. If KVM changes `rc`, its value will always be greater than 0
|
||||
hence setting it to 0 before issuing a PV command is advised to be
|
||||
able to detect a change of `rc`.
|
||||
|
||||
**cmd values:**
|
||||
|
||||
KVM_PV_ENABLE
|
||||
Allocate memory and register the VM with the Ultravisor, thereby
|
||||
@ -5143,7 +5161,6 @@ KVM_PV_ENABLE
|
||||
===== =============================
|
||||
|
||||
KVM_PV_DISABLE
|
||||
|
||||
Deregister the VM from the Ultravisor and reclaim the memory that
|
||||
had been donated to the Ultravisor, making it usable by the kernel
|
||||
again. All registered VCPUs are converted back to non-protected
|
||||
@ -5160,6 +5177,117 @@ KVM_PV_VM_VERIFY
|
||||
Verify the integrity of the unpacked image. Only if this succeeds,
|
||||
KVM is allowed to start protected VCPUs.
|
||||
|
||||
KVM_PV_INFO
|
||||
:Capability: KVM_CAP_S390_PROTECTED_DUMP
|
||||
|
||||
Presents an API that provides Ultravisor related data to userspace
|
||||
via subcommands. len_max is the size of the user space buffer,
|
||||
len_written is KVM's indication of how much bytes of that buffer
|
||||
were actually written to. len_written can be used to determine the
|
||||
valid fields if more response fields are added in the future.
|
||||
|
||||
::
|
||||
|
||||
enum pv_cmd_info_id {
|
||||
KVM_PV_INFO_VM,
|
||||
KVM_PV_INFO_DUMP,
|
||||
};
|
||||
|
||||
struct kvm_s390_pv_info_header {
|
||||
__u32 id;
|
||||
__u32 len_max;
|
||||
__u32 len_written;
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
struct kvm_s390_pv_info {
|
||||
struct kvm_s390_pv_info_header header;
|
||||
struct kvm_s390_pv_info_dump dump;
|
||||
struct kvm_s390_pv_info_vm vm;
|
||||
};
|
||||
|
||||
**subcommands:**
|
||||
|
||||
KVM_PV_INFO_VM
|
||||
This subcommand provides basic Ultravisor information for PV
|
||||
hosts. These values are likely also exported as files in the sysfs
|
||||
firmware UV query interface but they are more easily available to
|
||||
programs in this API.
|
||||
|
||||
The installed calls and feature_indication members provide the
|
||||
installed UV calls and the UV's other feature indications.
|
||||
|
||||
The max_* members provide information about the maximum number of PV
|
||||
vcpus, PV guests and PV guest memory size.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_s390_pv_info_vm {
|
||||
__u64 inst_calls_list[4];
|
||||
__u64 max_cpus;
|
||||
__u64 max_guests;
|
||||
__u64 max_guest_addr;
|
||||
__u64 feature_indication;
|
||||
};
|
||||
|
||||
|
||||
KVM_PV_INFO_DUMP
|
||||
This subcommand provides information related to dumping PV guests.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_s390_pv_info_dump {
|
||||
__u64 dump_cpu_buffer_len;
|
||||
__u64 dump_config_mem_buffer_per_1m;
|
||||
__u64 dump_config_finalize_len;
|
||||
};
|
||||
|
||||
KVM_PV_DUMP
|
||||
:Capability: KVM_CAP_S390_PROTECTED_DUMP
|
||||
|
||||
Presents an API that provides calls which facilitate dumping a
|
||||
protected VM.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_s390_pv_dmp {
|
||||
__u64 subcmd;
|
||||
__u64 buff_addr;
|
||||
__u64 buff_len;
|
||||
__u64 gaddr; /* For dump storage state */
|
||||
};
|
||||
|
||||
**subcommands:**
|
||||
|
||||
KVM_PV_DUMP_INIT
|
||||
Initializes the dump process of a protected VM. If this call does
|
||||
not succeed all other subcommands will fail with -EINVAL. This
|
||||
subcommand will return -EINVAL if a dump process has not yet been
|
||||
completed.
|
||||
|
||||
Not all PV vms can be dumped, the owner needs to set `dump
|
||||
allowed` PCF bit 34 in the SE header to allow dumping.
|
||||
|
||||
KVM_PV_DUMP_CONFIG_STOR_STATE
|
||||
Stores `buff_len` bytes of tweak component values starting with
|
||||
the 1MB block specified by the absolute guest address
|
||||
(`gaddr`). `buff_len` needs to be `conf_dump_storage_state_len`
|
||||
aligned and at least >= the `conf_dump_storage_state_len` value
|
||||
provided by the dump uv_info data. buff_user might be written to
|
||||
even if an error rc is returned. For instance if we encounter a
|
||||
fault after writing the first page of data.
|
||||
|
||||
KVM_PV_DUMP_COMPLETE
|
||||
If the subcommand succeeds it completes the dump process and lets
|
||||
KVM_PV_DUMP_INIT be called again.
|
||||
|
||||
On success `conf_dump_finalize_len` bytes of completion data will be
|
||||
stored to the `buff_addr`. The completion data contains a key
|
||||
derivation seed, IV, tweak nonce and encryption keys as well as an
|
||||
authentication tag all of which are needed to decrypt the dump at a
|
||||
later time.
|
||||
|
||||
|
||||
4.126 KVM_X86_SET_MSR_FILTER
|
||||
----------------------------
|
||||
|
||||
@ -5811,6 +5939,78 @@ of CPUID leaf 0xD on the host.
|
||||
|
||||
This ioctl injects an event channel interrupt directly to the guest vCPU.
|
||||
|
||||
4.136 KVM_S390_PV_CPU_COMMAND
|
||||
-----------------------------
|
||||
|
||||
:Capability: KVM_CAP_S390_PROTECTED_DUMP
|
||||
:Architectures: s390
|
||||
:Type: vcpu ioctl
|
||||
:Parameters: none
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
This ioctl closely mirrors `KVM_S390_PV_COMMAND` but handles requests
|
||||
for vcpus. It re-uses the kvm_s390_pv_dmp struct and hence also shares
|
||||
the command ids.
|
||||
|
||||
**command:**
|
||||
|
||||
KVM_PV_DUMP
|
||||
Presents an API that provides calls which facilitate dumping a vcpu
|
||||
of a protected VM.
|
||||
|
||||
**subcommand:**
|
||||
|
||||
KVM_PV_DUMP_CPU
|
||||
Provides encrypted dump data like register values.
|
||||
The length of the returned data is provided by uv_info.guest_cpu_stor_len.
|
||||
|
||||
4.137 KVM_S390_ZPCI_OP
|
||||
----------------------
|
||||
|
||||
:Capability: KVM_CAP_S390_ZPCI_OP
|
||||
:Architectures: s390
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_s390_zpci_op (in)
|
||||
:Returns: 0 on success, <0 on error
|
||||
|
||||
Used to manage hardware-assisted virtualization features for zPCI devices.
|
||||
|
||||
Parameters are specified via the following structure::
|
||||
|
||||
struct kvm_s390_zpci_op {
|
||||
/* in */
|
||||
__u32 fh; /* target device */
|
||||
__u8 op; /* operation to perform */
|
||||
__u8 pad[3];
|
||||
union {
|
||||
/* for KVM_S390_ZPCIOP_REG_AEN */
|
||||
struct {
|
||||
__u64 ibv; /* Guest addr of interrupt bit vector */
|
||||
__u64 sb; /* Guest addr of summary bit */
|
||||
__u32 flags;
|
||||
__u32 noi; /* Number of interrupts */
|
||||
__u8 isc; /* Guest interrupt subclass */
|
||||
__u8 sbo; /* Offset of guest summary bit vector */
|
||||
__u16 pad;
|
||||
} reg_aen;
|
||||
__u64 reserved[8];
|
||||
} u;
|
||||
};
|
||||
|
||||
The type of operation is specified in the "op" field.
|
||||
KVM_S390_ZPCIOP_REG_AEN is used to register the VM for adapter event
|
||||
notification interpretation, which will allow firmware delivery of adapter
|
||||
events directly to the vm, with KVM providing a backup delivery mechanism;
|
||||
KVM_S390_ZPCIOP_DEREG_AEN is used to subsequently disable interpretation of
|
||||
adapter event notifications.
|
||||
|
||||
The target zPCI function must also be specified via the "fh" field. For the
|
||||
KVM_S390_ZPCIOP_REG_AEN operation, additional information to establish firmware
|
||||
delivery must be provided via the "reg_aen" struct.
|
||||
|
||||
The "pad" and "reserved" fields may be used for future extensions and should be
|
||||
set to 0s by userspace.
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
|
||||
@ -6414,6 +6614,26 @@ array field represents return values. The userspace should update the return
|
||||
values of SBI call before resuming the VCPU. For more details on RISC-V SBI
|
||||
spec refer, https://github.com/riscv/riscv-sbi-doc.
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_NOTIFY */
|
||||
struct {
|
||||
#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
|
||||
__u32 flags;
|
||||
} notify;
|
||||
|
||||
Used on x86 systems. When the VM capability KVM_CAP_X86_NOTIFY_VMEXIT is
|
||||
enabled, a VM exit generated if no event window occurs in VM non-root mode
|
||||
for a specified amount of time. Once KVM_X86_NOTIFY_VMEXIT_USER is set when
|
||||
enabling the cap, it would exit to userspace with the exit reason
|
||||
KVM_EXIT_NOTIFY for further handling. The "flags" field contains more
|
||||
detailed info.
|
||||
|
||||
The valid value for 'flags' is:
|
||||
|
||||
- KVM_NOTIFY_CONTEXT_INVALID -- the VM context is corrupted and not valid
|
||||
in VMCS. It would run into unknown result if resume the target VM.
|
||||
|
||||
::
|
||||
|
||||
/* Fix the size of the union. */
|
||||
@ -7357,8 +7577,71 @@ The valid bits in cap.args[0] are:
|
||||
hypercall instructions. Executing the
|
||||
incorrect hypercall instruction will
|
||||
generate a #UD within the guest.
|
||||
|
||||
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS By default, KVM emulates MONITOR/MWAIT (if
|
||||
they are intercepted) as NOPs regardless of
|
||||
whether or not MONITOR/MWAIT are supported
|
||||
according to guest CPUID. When this quirk
|
||||
is disabled and KVM_X86_DISABLE_EXITS_MWAIT
|
||||
is not set (MONITOR/MWAIT are intercepted),
|
||||
KVM will inject a #UD on MONITOR/MWAIT if
|
||||
they're unsupported per guest CPUID. Note,
|
||||
KVM will modify MONITOR/MWAIT support in
|
||||
guest CPUID on writes to MISC_ENABLE if
|
||||
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is
|
||||
disabled.
|
||||
=================================== ============================================
|
||||
|
||||
7.32 KVM_CAP_MAX_VCPU_ID
|
||||
------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Target: VM
|
||||
:Parameters: args[0] - maximum APIC ID value set for current VM
|
||||
:Returns: 0 on success, -EINVAL if args[0] is beyond KVM_MAX_VCPU_IDS
|
||||
supported in KVM or if it has been set.
|
||||
|
||||
This capability allows userspace to specify maximum possible APIC ID
|
||||
assigned for current VM session prior to the creation of vCPUs, saving
|
||||
memory for data structures indexed by the APIC ID. Userspace is able
|
||||
to calculate the limit to APIC ID values from designated
|
||||
CPU topology.
|
||||
|
||||
The value can be changed only until KVM_ENABLE_CAP is set to a nonzero
|
||||
value or until a vCPU is created. Upon creation of the first vCPU,
|
||||
if the value was set to zero or KVM_ENABLE_CAP was not invoked, KVM
|
||||
uses the return value of KVM_CHECK_EXTENSION(KVM_CAP_MAX_VCPU_ID) as
|
||||
the maximum APIC ID.
|
||||
|
||||
7.33 KVM_CAP_X86_NOTIFY_VMEXIT
|
||||
------------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Target: VM
|
||||
:Parameters: args[0] is the value of notify window as well as some flags
|
||||
:Returns: 0 on success, -EINVAL if args[0] contains invalid flags or notify
|
||||
VM exit is unsupported.
|
||||
|
||||
Bits 63:32 of args[0] are used for notify window.
|
||||
Bits 31:0 of args[0] are for some flags. Valid bits are::
|
||||
|
||||
#define KVM_X86_NOTIFY_VMEXIT_ENABLED (1 << 0)
|
||||
#define KVM_X86_NOTIFY_VMEXIT_USER (1 << 1)
|
||||
|
||||
This capability allows userspace to configure the notify VM exit on/off
|
||||
in per-VM scope during VM creation. Notify VM exit is disabled by default.
|
||||
When userspace sets KVM_X86_NOTIFY_VMEXIT_ENABLED bit in args[0], VMM will
|
||||
enable this feature with the notify window provided, which will generate
|
||||
a VM exit if no event window occurs in VM non-root mode for a specified of
|
||||
time (notify window).
|
||||
|
||||
If KVM_X86_NOTIFY_VMEXIT_USER is set in args[0], upon notify VM exits happen,
|
||||
KVM would exit to userspace for handling.
|
||||
|
||||
This capability is aimed to mitigate the threat that malicious VMs can
|
||||
cause CPU stuck (due to event windows don't open up) and make the CPU
|
||||
unavailable to host or other VMs.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
@ -7965,6 +8248,61 @@ should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
|
||||
When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
|
||||
type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
|
||||
|
||||
8.37 KVM_CAP_S390_PROTECTED_DUMP
|
||||
--------------------------------
|
||||
|
||||
:Capability: KVM_CAP_S390_PROTECTED_DUMP
|
||||
:Architectures: s390
|
||||
:Type: vm
|
||||
|
||||
This capability indicates that KVM and the Ultravisor support dumping
|
||||
PV guests. The `KVM_PV_DUMP` command is available for the
|
||||
`KVM_S390_PV_COMMAND` ioctl and the `KVM_PV_INFO` command provides
|
||||
dump related UV data. Also the vcpu ioctl `KVM_S390_PV_CPU_COMMAND` is
|
||||
available and supports the `KVM_PV_DUMP_CPU` subcommand.
|
||||
|
||||
8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
|
||||
---------------------------
|
||||
|
||||
:Capability KVM_CAP_VM_DISABLE_NX_HUGE_PAGES
|
||||
:Architectures: x86
|
||||
:Type: vm
|
||||
:Parameters: arg[0] must be 0.
|
||||
:Returns 0 on success, -EPERM if the userspace process does not
|
||||
have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been
|
||||
created.
|
||||
|
||||
This capability disables the NX huge pages mitigation for iTLB MULTIHIT.
|
||||
|
||||
The capability has no effect if the nx_huge_pages module parameter is not set.
|
||||
|
||||
This capability may only be set before any vCPUs are created.
|
||||
|
||||
8.39 KVM_CAP_S390_CPU_TOPOLOGY
|
||||
------------------------------
|
||||
|
||||
:Capability: KVM_CAP_S390_CPU_TOPOLOGY
|
||||
:Architectures: s390
|
||||
:Type: vm
|
||||
|
||||
This capability indicates that KVM will provide the S390 CPU Topology
|
||||
facility which consist of the interpretation of the PTF instruction for
|
||||
the function code 2 along with interception and forwarding of both the
|
||||
PTF instruction with function codes 0 or 1 and the STSI(15,1,x)
|
||||
instruction to the userland hypervisor.
|
||||
|
||||
The stfle facility 11, CPU Topology facility, should not be indicated
|
||||
to the guest without this capability.
|
||||
|
||||
When this capability is present, KVM provides a new attribute group
|
||||
on vm fd, KVM_S390_VM_CPU_TOPOLOGY.
|
||||
This new attribute allows to get, set or clear the Modified Change
|
||||
Topology Report (MTCR) bit of the SCA through the kvm_device_attr
|
||||
structure.
|
||||
|
||||
When getting the Modified Change Topology Report value, the attr->addr
|
||||
must point to a byte where the value will be stored or retrieved from.
|
||||
|
||||
9. Known KVM API problems
|
||||
=========================
|
||||
|
||||
|
@ -10,3 +10,4 @@ KVM for s390 systems
|
||||
s390-diag
|
||||
s390-pv
|
||||
s390-pv-boot
|
||||
s390-pv-dump
|
||||
|
64
Documentation/virt/kvm/s390/s390-pv-dump.rst
Normal file
64
Documentation/virt/kvm/s390/s390-pv-dump.rst
Normal file
@ -0,0 +1,64 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================================
|
||||
s390 (IBM Z) Protected Virtualization dumps
|
||||
===========================================
|
||||
|
||||
Summary
|
||||
-------
|
||||
|
||||
Dumping a VM is an essential tool for debugging problems inside
|
||||
it. This is especially true when a protected VM runs into trouble as
|
||||
there's no way to access its memory and registers from the outside
|
||||
while it's running.
|
||||
|
||||
However when dumping a protected VM we need to maintain its
|
||||
confidentiality until the dump is in the hands of the VM owner who
|
||||
should be the only one capable of analysing it.
|
||||
|
||||
The confidentiality of the VM dump is ensured by the Ultravisor who
|
||||
provides an interface to KVM over which encrypted CPU and memory data
|
||||
can be requested. The encryption is based on the Customer
|
||||
Communication Key which is the key that's used to encrypt VM data in a
|
||||
way that the customer is able to decrypt.
|
||||
|
||||
|
||||
Dump process
|
||||
------------
|
||||
|
||||
A dump is done in 3 steps:
|
||||
|
||||
**Initiation**
|
||||
|
||||
This step initializes the dump process, generates cryptographic seeds
|
||||
and extracts dump keys with which the VM dump data will be encrypted.
|
||||
|
||||
**Data gathering**
|
||||
|
||||
Currently there are two types of data that can be gathered from a VM:
|
||||
the memory and the vcpu state.
|
||||
|
||||
The vcpu state contains all the important registers, general, floating
|
||||
point, vector, control and tod/timers of a vcpu. The vcpu dump can
|
||||
contain incomplete data if a vcpu is dumped while an instruction is
|
||||
emulated with help of the hypervisor. This is indicated by a flag bit
|
||||
in the dump data. For the same reason it is very important to not only
|
||||
write out the encrypted vcpu state, but also the unencrypted state
|
||||
from the hypervisor.
|
||||
|
||||
The memory state is further divided into the encrypted memory and its
|
||||
metadata comprised of the encryption tweaks and status flags. The
|
||||
encrypted memory can simply be read once it has been exported. The
|
||||
time of the export does not matter as no re-encryption is
|
||||
needed. Memory that has been swapped out and hence was exported can be
|
||||
read from the swap and written to the dump target without need for any
|
||||
special actions.
|
||||
|
||||
The tweaks / status flags for the exported pages need to be requested
|
||||
from the Ultravisor.
|
||||
|
||||
**Finalization**
|
||||
|
||||
The finalization step will provide the data needed to be able to
|
||||
decrypt the vcpu and memory data and end the dump process. When this
|
||||
step completes successfully a new dump initiation can be started.
|
@ -17784,6 +17784,7 @@ M: Eric Farman <farman@linux.ibm.com>
|
||||
L: linux-s390@vger.kernel.org
|
||||
L: kvm@vger.kernel.org
|
||||
S: Supported
|
||||
F: arch/s390/kvm/pci*
|
||||
F: drivers/vfio/pci/vfio_pci_zdev.c
|
||||
F: include/uapi/linux/vfio_zdev.h
|
||||
|
||||
|
@ -176,6 +176,22 @@ struct kvm_nvhe_init_params {
|
||||
unsigned long vtcr;
|
||||
};
|
||||
|
||||
/*
|
||||
* Used by the host in EL1 to dump the nVHE hypervisor backtrace on
|
||||
* hyp_panic() in non-protected mode.
|
||||
*
|
||||
* @stack_base: hyp VA of the hyp_stack base.
|
||||
* @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
|
||||
* @fp: hyp FP where the backtrace begins.
|
||||
* @pc: hyp PC where the backtrace begins.
|
||||
*/
|
||||
struct kvm_nvhe_stacktrace_info {
|
||||
unsigned long stack_base;
|
||||
unsigned long overflow_stack_base;
|
||||
unsigned long fp;
|
||||
unsigned long pc;
|
||||
};
|
||||
|
||||
/* Translate a kernel address @ptr into its equivalent linear mapping */
|
||||
#define kvm_ksym_ref(ptr) \
|
||||
({ \
|
||||
|
@ -473,9 +473,18 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
|
||||
|
||||
static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
|
||||
WARN_ON(vcpu_get_flag(vcpu, PENDING_EXCEPTION));
|
||||
vcpu_set_flag(vcpu, INCREMENT_PC);
|
||||
}
|
||||
|
||||
#define kvm_pend_exception(v, e) \
|
||||
do { \
|
||||
WARN_ON(vcpu_get_flag((v), INCREMENT_PC)); \
|
||||
vcpu_set_flag((v), PENDING_EXCEPTION); \
|
||||
vcpu_set_flag((v), e); \
|
||||
} while (0)
|
||||
|
||||
|
||||
static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
|
||||
{
|
||||
return test_bit(feature, vcpu->arch.features);
|
||||
|
@ -325,8 +325,30 @@ struct kvm_vcpu_arch {
|
||||
/* Exception Information */
|
||||
struct kvm_vcpu_fault_info fault;
|
||||
|
||||
/* Miscellaneous vcpu state flags */
|
||||
u64 flags;
|
||||
/* Ownership of the FP regs */
|
||||
enum {
|
||||
FP_STATE_FREE,
|
||||
FP_STATE_HOST_OWNED,
|
||||
FP_STATE_GUEST_OWNED,
|
||||
} fp_state;
|
||||
|
||||
/* Configuration flags, set once and for all before the vcpu can run */
|
||||
u8 cflags;
|
||||
|
||||
/* Input flags to the hypervisor code, potentially cleared after use */
|
||||
u8 iflags;
|
||||
|
||||
/* State flags for kernel bookkeeping, unused by the hypervisor code */
|
||||
u8 sflags;
|
||||
|
||||
/*
|
||||
* Don't run the guest (internal implementation need).
|
||||
*
|
||||
* Contrary to the flags above, this is set/cleared outside of
|
||||
* a vcpu context, and thus cannot be mixed with the flags
|
||||
* themselves (or the flag accesses need to be made atomic).
|
||||
*/
|
||||
bool pause;
|
||||
|
||||
/*
|
||||
* We maintain more than a single set of debug registers to support
|
||||
@ -376,9 +398,6 @@ struct kvm_vcpu_arch {
|
||||
/* vcpu power state */
|
||||
struct kvm_mp_state mp_state;
|
||||
|
||||
/* Don't run the guest (internal implementation need) */
|
||||
bool pause;
|
||||
|
||||
/* Cache some mmu pages needed inside spinlock regions */
|
||||
struct kvm_mmu_memory_cache mmu_page_cache;
|
||||
|
||||
@ -392,10 +411,6 @@ struct kvm_vcpu_arch {
|
||||
/* Additional reset state */
|
||||
struct vcpu_reset_state reset_state;
|
||||
|
||||
/* True when deferrable sysregs are loaded on the physical CPU,
|
||||
* see kvm_vcpu_load_sysregs_vhe and kvm_vcpu_put_sysregs_vhe. */
|
||||
bool sysregs_loaded_on_cpu;
|
||||
|
||||
/* Guest PV state */
|
||||
struct {
|
||||
u64 last_steal;
|
||||
@ -403,6 +418,124 @@ struct kvm_vcpu_arch {
|
||||
} steal;
|
||||
};
|
||||
|
||||
/*
|
||||
* Each 'flag' is composed of a comma-separated triplet:
|
||||
*
|
||||
* - the flag-set it belongs to in the vcpu->arch structure
|
||||
* - the value for that flag
|
||||
* - the mask for that flag
|
||||
*
|
||||
* __vcpu_single_flag() builds such a triplet for a single-bit flag.
|
||||
* unpack_vcpu_flag() extract the flag value from the triplet for
|
||||
* direct use outside of the flag accessors.
|
||||
*/
|
||||
#define __vcpu_single_flag(_set, _f) _set, (_f), (_f)
|
||||
|
||||
#define __unpack_flag(_set, _f, _m) _f
|
||||
#define unpack_vcpu_flag(...) __unpack_flag(__VA_ARGS__)
|
||||
|
||||
#define __build_check_flag(v, flagset, f, m) \
|
||||
do { \
|
||||
typeof(v->arch.flagset) *_fset; \
|
||||
\
|
||||
/* Check that the flags fit in the mask */ \
|
||||
BUILD_BUG_ON(HWEIGHT(m) != HWEIGHT((f) | (m))); \
|
||||
/* Check that the flags fit in the type */ \
|
||||
BUILD_BUG_ON((sizeof(*_fset) * 8) <= __fls(m)); \
|
||||
} while (0)
|
||||
|
||||
#define __vcpu_get_flag(v, flagset, f, m) \
|
||||
({ \
|
||||
__build_check_flag(v, flagset, f, m); \
|
||||
\
|
||||
v->arch.flagset & (m); \
|
||||
})
|
||||
|
||||
#define __vcpu_set_flag(v, flagset, f, m) \
|
||||
do { \
|
||||
typeof(v->arch.flagset) *fset; \
|
||||
\
|
||||
__build_check_flag(v, flagset, f, m); \
|
||||
\
|
||||
fset = &v->arch.flagset; \
|
||||
if (HWEIGHT(m) > 1) \
|
||||
*fset &= ~(m); \
|
||||
*fset |= (f); \
|
||||
} while (0)
|
||||
|
||||
#define __vcpu_clear_flag(v, flagset, f, m) \
|
||||
do { \
|
||||
typeof(v->arch.flagset) *fset; \
|
||||
\
|
||||
__build_check_flag(v, flagset, f, m); \
|
||||
\
|
||||
fset = &v->arch.flagset; \
|
||||
*fset &= ~(m); \
|
||||
} while (0)
|
||||
|
||||
#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
|
||||
#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
|
||||
#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
|
||||
|
||||
/* SVE exposed to guest */
|
||||
#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0))
|
||||
/* SVE config completed */
|
||||
#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
|
||||
/* PTRAUTH exposed to guest */
|
||||
#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
|
||||
|
||||
/* Exception pending */
|
||||
#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
|
||||
/*
|
||||
* PC increment. Overlaps with EXCEPT_MASK on purpose so that it can't
|
||||
* be set together with an exception...
|
||||
*/
|
||||
#define INCREMENT_PC __vcpu_single_flag(iflags, BIT(1))
|
||||
/* Target EL/MODE (not a single flag, but let's abuse the macro) */
|
||||
#define EXCEPT_MASK __vcpu_single_flag(iflags, GENMASK(3, 1))
|
||||
|
||||
/* Helpers to encode exceptions with minimum fuss */
|
||||
#define __EXCEPT_MASK_VAL unpack_vcpu_flag(EXCEPT_MASK)
|
||||
#define __EXCEPT_SHIFT __builtin_ctzl(__EXCEPT_MASK_VAL)
|
||||
#define __vcpu_except_flags(_f) iflags, (_f << __EXCEPT_SHIFT), __EXCEPT_MASK_VAL
|
||||
|
||||
/*
|
||||
* When PENDING_EXCEPTION is set, EXCEPT_MASK can take the following
|
||||
* values:
|
||||
*
|
||||
* For AArch32 EL1:
|
||||
*/
|
||||
#define EXCEPT_AA32_UND __vcpu_except_flags(0)
|
||||
#define EXCEPT_AA32_IABT __vcpu_except_flags(1)
|
||||
#define EXCEPT_AA32_DABT __vcpu_except_flags(2)
|
||||
/* For AArch64: */
|
||||
#define EXCEPT_AA64_EL1_SYNC __vcpu_except_flags(0)
|
||||
#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1)
|
||||
#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2)
|
||||
#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3)
|
||||
/* For AArch64 with NV (one day): */
|
||||
#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4)
|
||||
#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
|
||||
#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
|
||||
#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
|
||||
/* Guest debug is live */
|
||||
#define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4))
|
||||
/* Save SPE context if active */
|
||||
#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
|
||||
/* Save TRBE context if active */
|
||||
#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
|
||||
|
||||
/* SVE enabled for host EL0 */
|
||||
#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
|
||||
/* SME enabled for EL0 */
|
||||
#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
|
||||
/* Physical CPU not in supported_cpus */
|
||||
#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
|
||||
/* WFIT instruction trapped */
|
||||
#define IN_WFIT __vcpu_single_flag(sflags, BIT(3))
|
||||
/* vcpu system registers loaded on physical CPU */
|
||||
#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
|
||||
|
||||
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
|
||||
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
|
||||
sve_ffr_offset((vcpu)->arch.sve_max_vl))
|
||||
@ -423,70 +556,31 @@ struct kvm_vcpu_arch {
|
||||
__size_ret; \
|
||||
})
|
||||
|
||||
/* vcpu_arch flags field values: */
|
||||
#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
|
||||
#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
|
||||
#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
|
||||
#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
|
||||
#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
|
||||
#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
|
||||
#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
|
||||
#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */
|
||||
/*
|
||||
* Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
|
||||
* set together with an exception...
|
||||
*/
|
||||
#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */
|
||||
#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
|
||||
/*
|
||||
* When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
|
||||
* take the following values:
|
||||
*
|
||||
* For AArch32 EL1:
|
||||
*/
|
||||
#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9)
|
||||
#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9)
|
||||
#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9)
|
||||
/* For AArch64: */
|
||||
#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9)
|
||||
#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9)
|
||||
#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9)
|
||||
#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9)
|
||||
#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11)
|
||||
#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11)
|
||||
|
||||
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
|
||||
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
|
||||
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
|
||||
#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
|
||||
#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */
|
||||
#define KVM_ARM64_WFIT (1 << 17) /* WFIT instruction trapped */
|
||||
|
||||
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
|
||||
KVM_GUESTDBG_USE_SW_BP | \
|
||||
KVM_GUESTDBG_USE_HW | \
|
||||
KVM_GUESTDBG_SINGLESTEP)
|
||||
|
||||
#define vcpu_has_sve(vcpu) (system_supports_sve() && \
|
||||
((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
|
||||
vcpu_get_flag(vcpu, GUEST_HAS_SVE))
|
||||
|
||||
#ifdef CONFIG_ARM64_PTR_AUTH
|
||||
#define vcpu_has_ptrauth(vcpu) \
|
||||
((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
|
||||
cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
|
||||
(vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH)
|
||||
vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
|
||||
#else
|
||||
#define vcpu_has_ptrauth(vcpu) false
|
||||
#endif
|
||||
|
||||
#define vcpu_on_unsupported_cpu(vcpu) \
|
||||
((vcpu)->arch.flags & KVM_ARM64_ON_UNSUPPORTED_CPU)
|
||||
vcpu_get_flag(vcpu, ON_UNSUPPORTED_CPU)
|
||||
|
||||
#define vcpu_set_on_unsupported_cpu(vcpu) \
|
||||
((vcpu)->arch.flags |= KVM_ARM64_ON_UNSUPPORTED_CPU)
|
||||
vcpu_set_flag(vcpu, ON_UNSUPPORTED_CPU)
|
||||
|
||||
#define vcpu_clear_on_unsupported_cpu(vcpu) \
|
||||
((vcpu)->arch.flags &= ~KVM_ARM64_ON_UNSUPPORTED_CPU)
|
||||
vcpu_clear_flag(vcpu, ON_UNSUPPORTED_CPU)
|
||||
|
||||
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.regs)
|
||||
|
||||
@ -620,8 +714,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
|
||||
|
||||
unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
|
||||
int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
|
||||
int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
|
||||
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
|
||||
|
||||
int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
|
||||
struct kvm_vcpu_events *events);
|
||||
@ -831,8 +923,7 @@ void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
|
||||
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
|
||||
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
||||
|
||||
#define kvm_arm_vcpu_sve_finalized(vcpu) \
|
||||
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
|
||||
#define kvm_arm_vcpu_sve_finalized(vcpu) vcpu_get_flag(vcpu, VCPU_SVE_FINALIZED)
|
||||
|
||||
#define kvm_has_mte(kvm) \
|
||||
(system_supports_mte() && \
|
||||
|
@ -113,6 +113,14 @@
|
||||
|
||||
#define OVERFLOW_STACK_SIZE SZ_4K
|
||||
|
||||
/*
|
||||
* With the minimum frame size of [x29, x30], exactly half the combined
|
||||
* sizes of the hyp and overflow stacks is the maximum size needed to
|
||||
* save the unwinded stacktrace; plus an additional entry to delimit the
|
||||
* end.
|
||||
*/
|
||||
#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
|
||||
|
||||
/*
|
||||
* Alignment of kernel segments (e.g. .text, .data).
|
||||
*
|
||||
|
@ -8,52 +8,20 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/llist.h>
|
||||
|
||||
#include <asm/memory.h>
|
||||
#include <asm/pointer_auth.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/sdei.h>
|
||||
|
||||
enum stack_type {
|
||||
STACK_TYPE_UNKNOWN,
|
||||
STACK_TYPE_TASK,
|
||||
STACK_TYPE_IRQ,
|
||||
STACK_TYPE_OVERFLOW,
|
||||
STACK_TYPE_SDEI_NORMAL,
|
||||
STACK_TYPE_SDEI_CRITICAL,
|
||||
__NR_STACK_TYPES
|
||||
};
|
||||
|
||||
struct stack_info {
|
||||
unsigned long low;
|
||||
unsigned long high;
|
||||
enum stack_type type;
|
||||
};
|
||||
#include <asm/stacktrace/common.h>
|
||||
|
||||
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
|
||||
const char *loglvl);
|
||||
|
||||
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
|
||||
|
||||
static inline bool on_stack(unsigned long sp, unsigned long size,
|
||||
unsigned long low, unsigned long high,
|
||||
enum stack_type type, struct stack_info *info)
|
||||
{
|
||||
if (!low)
|
||||
return false;
|
||||
|
||||
if (sp < low || sp + size < sp || sp + size > high)
|
||||
return false;
|
||||
|
||||
if (info) {
|
||||
info->low = low;
|
||||
info->high = high;
|
||||
info->type = type;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool on_irq_stack(unsigned long sp, unsigned long size,
|
||||
struct stack_info *info)
|
||||
{
|
||||
@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
|
||||
struct stack_info *info) { return false; }
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* We can only safely access per-cpu stacks from current in a non-preemptible
|
||||
* context.
|
||||
*/
|
||||
static inline bool on_accessible_stack(const struct task_struct *tsk,
|
||||
unsigned long sp, unsigned long size,
|
||||
struct stack_info *info)
|
||||
{
|
||||
if (info)
|
||||
info->type = STACK_TYPE_UNKNOWN;
|
||||
|
||||
if (on_task_stack(tsk, sp, size, info))
|
||||
return true;
|
||||
if (tsk != current || preemptible())
|
||||
return false;
|
||||
if (on_irq_stack(sp, size, info))
|
||||
return true;
|
||||
if (on_overflow_stack(sp, size, info))
|
||||
return true;
|
||||
if (on_sdei_stack(sp, size, info))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* __ASM_STACKTRACE_H */
|
||||
|
199
arch/arm64/include/asm/stacktrace/common.h
Normal file
199
arch/arm64/include/asm/stacktrace/common.h
Normal file
@ -0,0 +1,199 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Common arm64 stack unwinder code.
|
||||
*
|
||||
* To implement a new arm64 stack unwinder:
|
||||
* 1) Include this header
|
||||
*
|
||||
* 2) Call into unwind_next_common() from your top level unwind
|
||||
* function, passing it the validation and translation callbacks
|
||||
* (though the later can be NULL if no translation is required).
|
||||
*
|
||||
* See: arch/arm64/kernel/stacktrace.c for the reference implementation.
|
||||
*
|
||||
* Copyright (C) 2012 ARM Ltd.
|
||||
*/
|
||||
#ifndef __ASM_STACKTRACE_COMMON_H
|
||||
#define __ASM_STACKTRACE_COMMON_H
|
||||
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
enum stack_type {
|
||||
STACK_TYPE_UNKNOWN,
|
||||
STACK_TYPE_TASK,
|
||||
STACK_TYPE_IRQ,
|
||||
STACK_TYPE_OVERFLOW,
|
||||
STACK_TYPE_SDEI_NORMAL,
|
||||
STACK_TYPE_SDEI_CRITICAL,
|
||||
STACK_TYPE_HYP,
|
||||
__NR_STACK_TYPES
|
||||
};
|
||||
|
||||
struct stack_info {
|
||||
unsigned long low;
|
||||
unsigned long high;
|
||||
enum stack_type type;
|
||||
};
|
||||
|
||||
/*
|
||||
* A snapshot of a frame record or fp/lr register values, along with some
|
||||
* accounting information necessary for robust unwinding.
|
||||
*
|
||||
* @fp: The fp value in the frame record (or the real fp)
|
||||
* @pc: The lr value in the frame record (or the real lr)
|
||||
*
|
||||
* @stacks_done: Stacks which have been entirely unwound, for which it is no
|
||||
* longer valid to unwind to.
|
||||
*
|
||||
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
|
||||
* of 0. This is used to ensure that within a stack, each
|
||||
* subsequent frame record is at an increasing address.
|
||||
* @prev_type: The type of stack this frame record was on, or a synthetic
|
||||
* value of STACK_TYPE_UNKNOWN. This is used to detect a
|
||||
* transition from one stack to another.
|
||||
*
|
||||
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
|
||||
* associated with the most recently encountered replacement lr
|
||||
* value.
|
||||
*
|
||||
* @task: The task being unwound.
|
||||
*/
|
||||
struct unwind_state {
|
||||
unsigned long fp;
|
||||
unsigned long pc;
|
||||
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
|
||||
unsigned long prev_fp;
|
||||
enum stack_type prev_type;
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
struct llist_node *kr_cur;
|
||||
#endif
|
||||
struct task_struct *task;
|
||||
};
|
||||
|
||||
static inline bool on_stack(unsigned long sp, unsigned long size,
|
||||
unsigned long low, unsigned long high,
|
||||
enum stack_type type, struct stack_info *info)
|
||||
{
|
||||
if (!low)
|
||||
return false;
|
||||
|
||||
if (sp < low || sp + size < sp || sp + size > high)
|
||||
return false;
|
||||
|
||||
if (info) {
|
||||
info->low = low;
|
||||
info->high = high;
|
||||
info->type = type;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void unwind_init_common(struct unwind_state *state,
|
||||
struct task_struct *task)
|
||||
{
|
||||
state->task = task;
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
state->kr_cur = NULL;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Prime the first unwind.
|
||||
*
|
||||
* In unwind_next() we'll check that the FP points to a valid stack,
|
||||
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
|
||||
* treated as a transition to whichever stack that happens to be. The
|
||||
* prev_fp value won't be used, but we set it to 0 such that it is
|
||||
* definitely not an accessible stack address.
|
||||
*/
|
||||
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
|
||||
state->prev_fp = 0;
|
||||
state->prev_type = STACK_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
/*
|
||||
* stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to
|
||||
* a kernel address.
|
||||
*
|
||||
* @fp: the frame pointer to be updated to its kernel address.
|
||||
* @type: the stack type associated with frame pointer @fp
|
||||
*
|
||||
* Returns true and success and @fp is updated to the corresponding
|
||||
* kernel virtual address; otherwise returns false.
|
||||
*/
|
||||
typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp,
|
||||
enum stack_type type);
|
||||
|
||||
/*
|
||||
* on_accessible_stack_fn() - Check whether a stack range is on any
|
||||
* of the possible stacks.
|
||||
*
|
||||
* @tsk: task whose stack is being unwound
|
||||
* @sp: stack address being checked
|
||||
* @size: size of the stack range being checked
|
||||
* @info: stack unwinding context
|
||||
*/
|
||||
typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
|
||||
unsigned long sp, unsigned long size,
|
||||
struct stack_info *info);
|
||||
|
||||
static inline int unwind_next_common(struct unwind_state *state,
|
||||
struct stack_info *info,
|
||||
on_accessible_stack_fn accessible,
|
||||
stack_trace_translate_fp_fn translate_fp)
|
||||
{
|
||||
unsigned long fp = state->fp, kern_fp = fp;
|
||||
struct task_struct *tsk = state->task;
|
||||
|
||||
if (fp & 0x7)
|
||||
return -EINVAL;
|
||||
|
||||
if (!accessible(tsk, fp, 16, info))
|
||||
return -EINVAL;
|
||||
|
||||
if (test_bit(info->type, state->stacks_done))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If fp is not from the current address space perform the necessary
|
||||
* translation before dereferencing it to get the next fp.
|
||||
*/
|
||||
if (translate_fp && !translate_fp(&kern_fp, info->type))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* As stacks grow downward, any valid record on the same stack must be
|
||||
* at a strictly higher address than the prior record.
|
||||
*
|
||||
* Stacks can nest in several valid orders, e.g.
|
||||
*
|
||||
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
|
||||
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
|
||||
* HYP -> OVERFLOW
|
||||
*
|
||||
* ... but the nesting itself is strict. Once we transition from one
|
||||
* stack to another, it's never valid to unwind back to that first
|
||||
* stack.
|
||||
*/
|
||||
if (info->type == state->prev_type) {
|
||||
if (fp <= state->prev_fp)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
__set_bit(state->prev_type, state->stacks_done);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record this frame record's values and location. The prev_fp and
|
||||
* prev_type are only meaningful to the next unwind_next() invocation.
|
||||
*/
|
||||
state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
|
||||
state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
|
||||
state->prev_fp = fp;
|
||||
state->prev_type = info->type;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* __ASM_STACKTRACE_COMMON_H */
|
55
arch/arm64/include/asm/stacktrace/nvhe.h
Normal file
55
arch/arm64/include/asm/stacktrace/nvhe.h
Normal file
@ -0,0 +1,55 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* KVM nVHE hypervisor stack tracing support.
|
||||
*
|
||||
* The unwinder implementation depends on the nVHE mode:
|
||||
*
|
||||
* 1) Non-protected nVHE mode - the host can directly access the
|
||||
* HYP stack pages and unwind the HYP stack in EL1. This saves having
|
||||
* to allocate shared buffers for the host to read the unwinded
|
||||
* stacktrace.
|
||||
*
|
||||
* 2) pKVM (protected nVHE) mode - the host cannot directly access
|
||||
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
|
||||
* buffer where the host can read and print the stacktrace.
|
||||
*
|
||||
* Copyright (C) 2022 Google LLC
|
||||
*/
|
||||
#ifndef __ASM_STACKTRACE_NVHE_H
|
||||
#define __ASM_STACKTRACE_NVHE_H
|
||||
|
||||
#include <asm/stacktrace/common.h>
|
||||
|
||||
/*
|
||||
* kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc
|
||||
*
|
||||
* @state : unwind_state to initialize
|
||||
* @fp : frame pointer at which to start the unwinding.
|
||||
* @pc : program counter at which to start the unwinding.
|
||||
*/
|
||||
static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
|
||||
unsigned long fp,
|
||||
unsigned long pc)
|
||||
{
|
||||
unwind_init_common(state, NULL);
|
||||
|
||||
state->fp = fp;
|
||||
state->pc = pc;
|
||||
}
|
||||
|
||||
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||
/*
|
||||
* Conventional (non-protected) nVHE HYP stack unwinder
|
||||
*
|
||||
* In non-protected mode, the unwinding is done from kernel proper context
|
||||
* (by the host in EL1).
|
||||
*/
|
||||
|
||||
DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
|
||||
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
|
||||
DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
|
||||
|
||||
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
|
||||
|
||||
#endif /* __KVM_NVHE_HYPERVISOR__ */
|
||||
#endif /* __ASM_STACKTRACE_NVHE_H */
|
@ -7,74 +7,15 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/stacktrace.h>
|
||||
|
||||
#include <asm/irq.h>
|
||||
#include <asm/pointer_auth.h>
|
||||
#include <asm/stack_pointer.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
/*
|
||||
* A snapshot of a frame record or fp/lr register values, along with some
|
||||
* accounting information necessary for robust unwinding.
|
||||
*
|
||||
* @fp: The fp value in the frame record (or the real fp)
|
||||
* @pc: The lr value in the frame record (or the real lr)
|
||||
*
|
||||
* @stacks_done: Stacks which have been entirely unwound, for which it is no
|
||||
* longer valid to unwind to.
|
||||
*
|
||||
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
|
||||
* of 0. This is used to ensure that within a stack, each
|
||||
* subsequent frame record is at an increasing address.
|
||||
* @prev_type: The type of stack this frame record was on, or a synthetic
|
||||
* value of STACK_TYPE_UNKNOWN. This is used to detect a
|
||||
* transition from one stack to another.
|
||||
*
|
||||
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
|
||||
* associated with the most recently encountered replacement lr
|
||||
* value.
|
||||
*
|
||||
* @task: The task being unwound.
|
||||
*/
|
||||
struct unwind_state {
|
||||
unsigned long fp;
|
||||
unsigned long pc;
|
||||
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
|
||||
unsigned long prev_fp;
|
||||
enum stack_type prev_type;
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
struct llist_node *kr_cur;
|
||||
#endif
|
||||
struct task_struct *task;
|
||||
};
|
||||
|
||||
static void unwind_init_common(struct unwind_state *state,
|
||||
struct task_struct *task)
|
||||
{
|
||||
state->task = task;
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
state->kr_cur = NULL;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Prime the first unwind.
|
||||
*
|
||||
* In unwind_next() we'll check that the FP points to a valid stack,
|
||||
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
|
||||
* treated as a transition to whichever stack that happens to be. The
|
||||
* prev_fp value won't be used, but we set it to 0 such that it is
|
||||
* definitely not an accessible stack address.
|
||||
*/
|
||||
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
|
||||
state->prev_fp = 0;
|
||||
state->prev_type = STACK_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Start an unwind from a pt_regs.
|
||||
*
|
||||
@ -126,6 +67,31 @@ static inline void unwind_init_from_task(struct unwind_state *state,
|
||||
state->pc = thread_saved_pc(task);
|
||||
}
|
||||
|
||||
/*
|
||||
* We can only safely access per-cpu stacks from current in a non-preemptible
|
||||
* context.
|
||||
*/
|
||||
static bool on_accessible_stack(const struct task_struct *tsk,
|
||||
unsigned long sp, unsigned long size,
|
||||
struct stack_info *info)
|
||||
{
|
||||
if (info)
|
||||
info->type = STACK_TYPE_UNKNOWN;
|
||||
|
||||
if (on_task_stack(tsk, sp, size, info))
|
||||
return true;
|
||||
if (tsk != current || preemptible())
|
||||
return false;
|
||||
if (on_irq_stack(sp, size, info))
|
||||
return true;
|
||||
if (on_overflow_stack(sp, size, info))
|
||||
return true;
|
||||
if (on_sdei_stack(sp, size, info))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unwind from one frame record (A) to the next frame record (B).
|
||||
*
|
||||
@ -138,48 +104,15 @@ static int notrace unwind_next(struct unwind_state *state)
|
||||
struct task_struct *tsk = state->task;
|
||||
unsigned long fp = state->fp;
|
||||
struct stack_info info;
|
||||
int err;
|
||||
|
||||
/* Final frame; nothing to unwind */
|
||||
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
|
||||
return -ENOENT;
|
||||
|
||||
if (fp & 0x7)
|
||||
return -EINVAL;
|
||||
|
||||
if (!on_accessible_stack(tsk, fp, 16, &info))
|
||||
return -EINVAL;
|
||||
|
||||
if (test_bit(info.type, state->stacks_done))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* As stacks grow downward, any valid record on the same stack must be
|
||||
* at a strictly higher address than the prior record.
|
||||
*
|
||||
* Stacks can nest in several valid orders, e.g.
|
||||
*
|
||||
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
|
||||
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
|
||||
*
|
||||
* ... but the nesting itself is strict. Once we transition from one
|
||||
* stack to another, it's never valid to unwind back to that first
|
||||
* stack.
|
||||
*/
|
||||
if (info.type == state->prev_type) {
|
||||
if (fp <= state->prev_fp)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
__set_bit(state->prev_type, state->stacks_done);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record this frame record's values and location. The prev_fp and
|
||||
* prev_type are only meaningful to the next unwind_next() invocation.
|
||||
*/
|
||||
state->fp = READ_ONCE(*(unsigned long *)(fp));
|
||||
state->pc = READ_ONCE(*(unsigned long *)(fp + 8));
|
||||
state->prev_fp = fp;
|
||||
state->prev_type = info.type;
|
||||
err = unwind_next_common(state, &info, on_accessible_stack, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
state->pc = ptrauth_strip_insn_pac(state->pc);
|
||||
|
||||
|
@ -56,4 +56,17 @@ config NVHE_EL2_DEBUG
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config PROTECTED_NVHE_STACKTRACE
|
||||
bool "Protected KVM hypervisor stacktraces"
|
||||
depends on NVHE_EL2_DEBUG
|
||||
default n
|
||||
help
|
||||
Say Y here to enable pKVM hypervisor stacktraces on hyp_panic()
|
||||
|
||||
If using protected nVHE mode, but cannot afford the associated
|
||||
memory cost (less than 0.75 page per CPU) of pKVM stacktraces,
|
||||
say N.
|
||||
|
||||
If unsure, or not using protected nVHE (pKVM), say N.
|
||||
|
||||
endif # VIRTUALIZATION
|
||||
|
@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/
|
||||
|
||||
kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
|
||||
inject_fault.o va_layout.o handle_exit.o \
|
||||
guest.o debug.o reset.o sys_regs.o \
|
||||
guest.o debug.o reset.o sys_regs.o stacktrace.o \
|
||||
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
|
||||
arch_timer.o trng.o vmid.o \
|
||||
vgic/vgic.o vgic/vgic-init.o \
|
||||
|
@ -242,7 +242,7 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
|
||||
static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
|
||||
(vcpu->arch.flags & KVM_ARM64_WFIT));
|
||||
vcpu_get_flag(vcpu, IN_WFIT));
|
||||
}
|
||||
|
||||
static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
|
||||
|
@ -49,7 +49,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
|
||||
|
||||
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
|
||||
DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
|
||||
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
|
||||
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
|
||||
@ -330,6 +330,12 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
|
||||
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
|
||||
|
||||
/*
|
||||
* Default value for the FP state, will be overloaded at load
|
||||
* time if we support FP (pretty likely)
|
||||
*/
|
||||
vcpu->arch.fp_state = FP_STATE_FREE;
|
||||
|
||||
/* Set up the timer */
|
||||
kvm_timer_vcpu_init(vcpu);
|
||||
|
||||
@ -659,7 +665,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
|
||||
preempt_enable();
|
||||
|
||||
kvm_vcpu_halt(vcpu);
|
||||
vcpu->arch.flags &= ~KVM_ARM64_WFIT;
|
||||
vcpu_clear_flag(vcpu, IN_WFIT);
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
|
||||
preempt_disable();
|
||||
@ -1015,8 +1021,8 @@ out:
|
||||
* the vcpu state. Note that this relies on __kvm_adjust_pc()
|
||||
* being preempt-safe on VHE.
|
||||
*/
|
||||
if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION |
|
||||
KVM_ARM64_INCREMENT_PC)))
|
||||
if (unlikely(vcpu_get_flag(vcpu, PENDING_EXCEPTION) ||
|
||||
vcpu_get_flag(vcpu, INCREMENT_PC)))
|
||||
kvm_call_hyp(__kvm_adjust_pc, vcpu);
|
||||
|
||||
vcpu_put(vcpu);
|
||||
@ -1414,18 +1420,11 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
|
||||
struct kvm_arm_device_addr *dev_addr)
|
||||
{
|
||||
unsigned long dev_id, type;
|
||||
|
||||
dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
|
||||
KVM_ARM_DEVICE_ID_SHIFT;
|
||||
type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
|
||||
KVM_ARM_DEVICE_TYPE_SHIFT;
|
||||
|
||||
switch (dev_id) {
|
||||
switch (FIELD_GET(KVM_ARM_DEVICE_ID_MASK, dev_addr->id)) {
|
||||
case KVM_ARM_DEVICE_VGIC_V2:
|
||||
if (!vgic_present)
|
||||
return -ENXIO;
|
||||
return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
|
||||
return kvm_set_legacy_vgic_v2_addr(kvm, dev_addr);
|
||||
default:
|
||||
return -ENODEV;
|
||||
}
|
||||
|
@ -104,11 +104,11 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
|
||||
* Trap debug register access when one of the following is true:
|
||||
* - Userspace is using the hardware to debug the guest
|
||||
* (KVM_GUESTDBG_USE_HW is set).
|
||||
* - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear).
|
||||
* - The guest is not using debug (DEBUG_DIRTY clear).
|
||||
* - The guest has enabled the OS Lock (debug exceptions are blocked).
|
||||
*/
|
||||
if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
|
||||
!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) ||
|
||||
!vcpu_get_flag(vcpu, DEBUG_DIRTY) ||
|
||||
kvm_vcpu_os_lock_enabled(vcpu))
|
||||
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
|
||||
|
||||
@ -147,8 +147,8 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
|
||||
* debug related registers.
|
||||
*
|
||||
* Additionally, KVM only traps guest accesses to the debug registers if
|
||||
* the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
|
||||
* flag on vcpu->arch.flags). Since the guest must not interfere
|
||||
* the guest is not actively using them (see the DEBUG_DIRTY
|
||||
* flag on vcpu->arch.iflags). Since the guest must not interfere
|
||||
* with the hardware state when debugging the guest, we must ensure that
|
||||
* trapping is enabled whenever we are debugging the guest using the
|
||||
* debug registers.
|
||||
@ -205,9 +205,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
|
||||
*
|
||||
* We simply switch the debug_ptr to point to our new
|
||||
* external_debug_state which has been populated by the
|
||||
* debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY
|
||||
* mechanism ensures the registers are updated on the
|
||||
* world switch.
|
||||
* debug ioctl. The existing DEBUG_DIRTY mechanism ensures
|
||||
* the registers are updated on the world switch.
|
||||
*/
|
||||
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
|
||||
/* Enable breakpoints/watchpoints */
|
||||
@ -216,7 +215,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
|
||||
vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
|
||||
|
||||
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
|
||||
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
|
||||
vcpu_set_flag(vcpu, DEBUG_DIRTY);
|
||||
|
||||
trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
|
||||
&vcpu->arch.debug_ptr->dbg_bcr[0],
|
||||
@ -246,7 +245,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
|
||||
|
||||
/* If KDE or MDE are set, perform a full save/restore cycle. */
|
||||
if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
|
||||
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
|
||||
vcpu_set_flag(vcpu, DEBUG_DIRTY);
|
||||
|
||||
/* Write mdcr_el2 changes since vcpu_load on VHE systems */
|
||||
if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
|
||||
@ -298,16 +297,16 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVER_SHIFT) &&
|
||||
!(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT)))
|
||||
vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_SPE;
|
||||
vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE);
|
||||
|
||||
/* Check if we have TRBE implemented and available at the host */
|
||||
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRBE_SHIFT) &&
|
||||
!(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG))
|
||||
vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_TRBE;
|
||||
vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.flags &= ~(KVM_ARM64_DEBUG_STATE_SAVE_SPE |
|
||||
KVM_ARM64_DEBUG_STATE_SAVE_TRBE);
|
||||
vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE);
|
||||
vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
|
||||
}
|
||||
|
@ -77,12 +77,14 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
BUG_ON(!current->mm);
|
||||
BUG_ON(test_thread_flag(TIF_SVE));
|
||||
|
||||
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
|
||||
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
|
||||
if (!system_supports_fpsimd())
|
||||
return;
|
||||
|
||||
vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED;
|
||||
vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
|
||||
|
||||
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
|
||||
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
|
||||
vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
|
||||
|
||||
/*
|
||||
* We don't currently support SME guests but if we leave
|
||||
@ -94,29 +96,28 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
|
||||
* operations. Do this for ZA as well for now for simplicity.
|
||||
*/
|
||||
if (system_supports_sme()) {
|
||||
vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED;
|
||||
vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
|
||||
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
|
||||
vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;
|
||||
vcpu_set_flag(vcpu, HOST_SME_ENABLED);
|
||||
|
||||
if (read_sysreg_s(SYS_SVCR) &
|
||||
(SVCR_SM_MASK | SVCR_ZA_MASK)) {
|
||||
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
|
||||
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
|
||||
vcpu->arch.fp_state = FP_STATE_FREE;
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Called just before entering the guest once we are no longer
|
||||
* preemptable. Syncs the host's TIF_FOREIGN_FPSTATE with the KVM
|
||||
* mirror of the flag used by the hypervisor.
|
||||
* Called just before entering the guest once we are no longer preemptable
|
||||
* and interrupts are disabled. If we have managed to run anything using
|
||||
* FP while we were preemptible (such as off the back of an interrupt),
|
||||
* then neither the host nor the guest own the FP hardware (and it was the
|
||||
* responsibility of the code that used FP to save the existing state).
|
||||
*/
|
||||
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
|
||||
vcpu->arch.flags |= KVM_ARM64_FP_FOREIGN_FPSTATE;
|
||||
else
|
||||
vcpu->arch.flags &= ~KVM_ARM64_FP_FOREIGN_FPSTATE;
|
||||
vcpu->arch.fp_state = FP_STATE_FREE;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -130,7 +131,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
|
||||
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
|
||||
/*
|
||||
* Currently we do not support SME guests so SVCR is
|
||||
* always 0 and we just need a variable to point to.
|
||||
@ -163,7 +164,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
if (has_vhe() && system_supports_sme()) {
|
||||
/* Also restore EL0 state seen on entry */
|
||||
if (vcpu->arch.flags & KVM_ARM64_HOST_SME_ENABLED)
|
||||
if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
|
||||
sysreg_clear_set(CPACR_EL1, 0,
|
||||
CPACR_EL1_SMEN_EL0EN |
|
||||
CPACR_EL1_SMEN_EL1EN);
|
||||
@ -173,7 +174,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
CPACR_EL1_SMEN_EL1EN);
|
||||
}
|
||||
|
||||
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
|
||||
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
|
||||
if (vcpu_has_sve(vcpu)) {
|
||||
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
|
||||
|
||||
@ -192,7 +193,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
||||
* for EL0. To avoid spurious traps, restore the trap state
|
||||
* seen by kvm_arch_vcpu_load_fp():
|
||||
*/
|
||||
if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
|
||||
if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED))
|
||||
sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
|
||||
else
|
||||
sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
#include <asm/stacktrace/nvhe.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
#include <kvm/arm_hypercalls.h>
|
||||
@ -120,7 +121,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
|
||||
kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
|
||||
} else {
|
||||
if (esr & ESR_ELx_WFx_ISS_WFxT)
|
||||
vcpu->arch.flags |= KVM_ARM64_WFIT;
|
||||
vcpu_set_flag(vcpu, IN_WFIT);
|
||||
|
||||
kvm_vcpu_wfi(vcpu);
|
||||
}
|
||||
@ -347,12 +348,15 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
|
||||
kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
|
||||
else
|
||||
kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr,
|
||||
(void *)panic_addr);
|
||||
(void *)(panic_addr + kaslr_offset()));
|
||||
} else {
|
||||
kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr,
|
||||
(void *)panic_addr);
|
||||
(void *)(panic_addr + kaslr_offset()));
|
||||
}
|
||||
|
||||
/* Dump the nVHE hypervisor backtrace */
|
||||
kvm_nvhe_dump_backtrace(hyp_offset);
|
||||
|
||||
/*
|
||||
* Hyp has panicked and we're going to handle that by panicking the
|
||||
* kernel. The kernel offset will be revealed in the panic so we're
|
||||
|
@ -303,14 +303,14 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
|
||||
static void kvm_inject_exception(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu_el1_is_32bit(vcpu)) {
|
||||
switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
|
||||
case KVM_ARM64_EXCEPT_AA32_UND:
|
||||
switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
|
||||
case unpack_vcpu_flag(EXCEPT_AA32_UND):
|
||||
enter_exception32(vcpu, PSR_AA32_MODE_UND, 4);
|
||||
break;
|
||||
case KVM_ARM64_EXCEPT_AA32_IABT:
|
||||
case unpack_vcpu_flag(EXCEPT_AA32_IABT):
|
||||
enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12);
|
||||
break;
|
||||
case KVM_ARM64_EXCEPT_AA32_DABT:
|
||||
case unpack_vcpu_flag(EXCEPT_AA32_DABT):
|
||||
enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16);
|
||||
break;
|
||||
default:
|
||||
@ -318,9 +318,8 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
|
||||
case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
|
||||
KVM_ARM64_EXCEPT_AA64_EL1):
|
||||
switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) {
|
||||
case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC):
|
||||
enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
|
||||
break;
|
||||
default:
|
||||
@ -340,12 +339,12 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
void __kvm_adjust_pc(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
|
||||
if (vcpu_get_flag(vcpu, PENDING_EXCEPTION)) {
|
||||
kvm_inject_exception(vcpu);
|
||||
vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
|
||||
KVM_ARM64_EXCEPT_MASK);
|
||||
} else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
|
||||
vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
|
||||
vcpu_clear_flag(vcpu, EXCEPT_MASK);
|
||||
} else if (vcpu_get_flag(vcpu, INCREMENT_PC)) {
|
||||
kvm_skip_instr(vcpu);
|
||||
vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
|
||||
vcpu_clear_flag(vcpu, INCREMENT_PC);
|
||||
}
|
||||
}
|
||||
|
@ -132,7 +132,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
|
||||
struct kvm_guest_debug_arch *host_dbg;
|
||||
struct kvm_guest_debug_arch *guest_dbg;
|
||||
|
||||
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
|
||||
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
|
||||
return;
|
||||
|
||||
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
|
||||
@ -151,7 +151,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
|
||||
struct kvm_guest_debug_arch *host_dbg;
|
||||
struct kvm_guest_debug_arch *guest_dbg;
|
||||
|
||||
if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
|
||||
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
|
||||
return;
|
||||
|
||||
host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
|
||||
@ -162,7 +162,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
|
||||
__debug_save_state(guest_dbg, guest_ctxt);
|
||||
__debug_restore_state(host_dbg, host_ctxt);
|
||||
|
||||
vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY;
|
||||
vcpu_clear_flag(vcpu, DEBUG_DIRTY);
|
||||
}
|
||||
|
||||
#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */
|
||||
|
@ -37,22 +37,10 @@ struct kvm_exception_table_entry {
|
||||
extern struct kvm_exception_table_entry __start___kvm_ex_table;
|
||||
extern struct kvm_exception_table_entry __stop___kvm_ex_table;
|
||||
|
||||
/* Check whether the FP regs were dirtied while in the host-side run loop: */
|
||||
static inline bool update_fp_enabled(struct kvm_vcpu *vcpu)
|
||||
/* Check whether the FP regs are owned by the guest */
|
||||
static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* When the system doesn't support FP/SIMD, we cannot rely on
|
||||
* the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
|
||||
* abort on the very first access to FP and thus we should never
|
||||
* see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
|
||||
* trap the accesses.
|
||||
*/
|
||||
if (!system_supports_fpsimd() ||
|
||||
vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE)
|
||||
vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
|
||||
KVM_ARM64_FP_HOST);
|
||||
|
||||
return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
|
||||
return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED;
|
||||
}
|
||||
|
||||
/* Save the 32-bit only FPSIMD system register state */
|
||||
@ -191,10 +179,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
isb();
|
||||
|
||||
/* Write out the host state if it's in the registers */
|
||||
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
|
||||
if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
|
||||
__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
|
||||
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
|
||||
}
|
||||
|
||||
/* Restore the guest state */
|
||||
if (sve_guest)
|
||||
@ -206,7 +192,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
if (!(read_sysreg(hcr_el2) & HCR_RW))
|
||||
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
|
||||
|
||||
vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
|
||||
vcpu->arch.fp_state = FP_STATE_GUEST_OWNED;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -195,7 +195,7 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)
|
||||
__vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2);
|
||||
__vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2);
|
||||
|
||||
if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
|
||||
if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
|
||||
__vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2);
|
||||
}
|
||||
|
||||
@ -212,7 +212,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
|
||||
write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2);
|
||||
write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2);
|
||||
|
||||
if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)
|
||||
if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY))
|
||||
write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
|
||||
}
|
||||
|
||||
|
@ -12,13 +12,13 @@ HOST_EXTRACFLAGS += -I$(objtree)/include
|
||||
lib-objs := clear_page.o copy_page.o memcpy.o memset.o
|
||||
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
|
||||
|
||||
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
|
||||
hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
|
||||
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
|
||||
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
|
||||
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o
|
||||
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
|
||||
obj-$(CONFIG_DEBUG_LIST) += list_debug.o
|
||||
obj-y += $(lib-objs)
|
||||
hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
|
||||
hyp-obj-y += $(lib-objs)
|
||||
|
||||
##
|
||||
## Build rules for compiling nVHE hyp code
|
||||
@ -26,9 +26,9 @@ obj-y += $(lib-objs)
|
||||
## file containing all nVHE hyp code and data.
|
||||
##
|
||||
|
||||
hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y))
|
||||
hyp-obj := $(patsubst %.o,%.nvhe.o,$(hyp-obj-y))
|
||||
obj-y := kvm_nvhe.o
|
||||
extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
|
||||
targets += $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
|
||||
|
||||
# 1) Compile all source files to `.nvhe.o` object files. The file extension
|
||||
# avoids file name clashes for files shared with VHE.
|
||||
|
@ -84,10 +84,10 @@ static void __debug_restore_trace(u64 trfcr_el1)
|
||||
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Disable and flush SPE data generation */
|
||||
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE)
|
||||
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
|
||||
__debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
|
||||
/* Disable and flush Self-Hosted Trace generation */
|
||||
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE)
|
||||
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
|
||||
__debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1);
|
||||
}
|
||||
|
||||
@ -98,9 +98,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
|
||||
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE)
|
||||
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
|
||||
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
|
||||
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE)
|
||||
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
|
||||
__debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1);
|
||||
}
|
||||
|
||||
|
@ -177,13 +177,8 @@ SYM_FUNC_END(__host_hvc)
|
||||
b hyp_panic
|
||||
|
||||
.L__hyp_sp_overflow\@:
|
||||
/*
|
||||
* Reset SP to the top of the stack, to allow handling the hyp_panic.
|
||||
* This corrupts the stack but is ok, since we won't be attempting
|
||||
* any unwinding here.
|
||||
*/
|
||||
ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1
|
||||
mov sp, x0
|
||||
/* Switch to the overflow stack */
|
||||
adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
|
||||
|
||||
b hyp_panic_bad_stack
|
||||
ASM_BUG()
|
||||
|
160
arch/arm64/kvm/hyp/nvhe/stacktrace.c
Normal file
160
arch/arm64/kvm/hyp/nvhe/stacktrace.c
Normal file
@ -0,0 +1,160 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* KVM nVHE hypervisor stack tracing support.
|
||||
*
|
||||
* Copyright (C) 2022 Google LLC
|
||||
*/
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm/percpu.h>
|
||||
|
||||
DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
|
||||
__aligned(16);
|
||||
|
||||
DEFINE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
|
||||
|
||||
/*
|
||||
* hyp_prepare_backtrace - Prepare non-protected nVHE backtrace.
|
||||
*
|
||||
* @fp : frame pointer at which to start the unwinding.
|
||||
* @pc : program counter at which to start the unwinding.
|
||||
*
|
||||
* Save the information needed by the host to unwind the non-protected
|
||||
* nVHE hypervisor stack in EL1.
|
||||
*/
|
||||
static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
|
||||
{
|
||||
struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info);
|
||||
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
|
||||
|
||||
stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE);
|
||||
stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack);
|
||||
stacktrace_info->fp = fp;
|
||||
stacktrace_info->pc = pc;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
|
||||
#include <asm/stacktrace/nvhe.h>
|
||||
|
||||
DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace);
|
||||
|
||||
static bool on_overflow_stack(unsigned long sp, unsigned long size,
|
||||
struct stack_info *info)
|
||||
{
|
||||
unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack);
|
||||
unsigned long high = low + OVERFLOW_STACK_SIZE;
|
||||
|
||||
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
|
||||
}
|
||||
|
||||
static bool on_hyp_stack(unsigned long sp, unsigned long size,
|
||||
struct stack_info *info)
|
||||
{
|
||||
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
|
||||
unsigned long high = params->stack_hyp_va;
|
||||
unsigned long low = high - PAGE_SIZE;
|
||||
|
||||
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
|
||||
}
|
||||
|
||||
static bool on_accessible_stack(const struct task_struct *tsk,
|
||||
unsigned long sp, unsigned long size,
|
||||
struct stack_info *info)
|
||||
{
|
||||
if (info)
|
||||
info->type = STACK_TYPE_UNKNOWN;
|
||||
|
||||
return (on_overflow_stack(sp, size, info) ||
|
||||
on_hyp_stack(sp, size, info));
|
||||
}
|
||||
|
||||
static int unwind_next(struct unwind_state *state)
|
||||
{
|
||||
struct stack_info info;
|
||||
|
||||
return unwind_next_common(state, &info, on_accessible_stack, NULL);
|
||||
}
|
||||
|
||||
static void notrace unwind(struct unwind_state *state,
|
||||
stack_trace_consume_fn consume_entry,
|
||||
void *cookie)
|
||||
{
|
||||
while (1) {
|
||||
int ret;
|
||||
|
||||
if (!consume_entry(cookie, state->pc))
|
||||
break;
|
||||
ret = unwind_next(state);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* pkvm_save_backtrace_entry - Saves a protected nVHE HYP stacktrace entry
|
||||
*
|
||||
* @arg : index of the entry in the stacktrace buffer
|
||||
* @where : the program counter corresponding to the stack frame
|
||||
*
|
||||
* Save the return address of a stack frame to the shared stacktrace buffer.
|
||||
* The host can access this shared buffer from EL1 to dump the backtrace.
|
||||
*/
|
||||
static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
|
||||
{
|
||||
unsigned long *stacktrace = this_cpu_ptr(pkvm_stacktrace);
|
||||
int *idx = (int *)arg;
|
||||
|
||||
/*
|
||||
* Need 2 free slots: 1 for current entry and 1 for the
|
||||
* delimiter.
|
||||
*/
|
||||
if (*idx > ARRAY_SIZE(pkvm_stacktrace) - 2)
|
||||
return false;
|
||||
|
||||
stacktrace[*idx] = where;
|
||||
stacktrace[++*idx] = 0UL;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* pkvm_save_backtrace - Saves the protected nVHE HYP stacktrace
|
||||
*
|
||||
* @fp : frame pointer at which to start the unwinding.
|
||||
* @pc : program counter at which to start the unwinding.
|
||||
*
|
||||
* Save the unwinded stack addresses to the shared stacktrace buffer.
|
||||
* The host can access this shared buffer from EL1 to dump the backtrace.
|
||||
*/
|
||||
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
|
||||
{
|
||||
struct unwind_state state;
|
||||
int idx = 0;
|
||||
|
||||
kvm_nvhe_unwind_init(&state, fp, pc);
|
||||
|
||||
unwind(&state, pkvm_save_backtrace_entry, &idx);
|
||||
}
|
||||
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
|
||||
/*
|
||||
* kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace
|
||||
*
|
||||
* @fp : frame pointer at which to start the unwinding.
|
||||
* @pc : program counter at which to start the unwinding.
|
||||
*
|
||||
* Saves the information needed by the host to dump the nVHE hypervisor
|
||||
* backtrace.
|
||||
*/
|
||||
void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc)
|
||||
{
|
||||
if (is_protected_kvm_enabled())
|
||||
pkvm_save_backtrace(fp, pc);
|
||||
else
|
||||
hyp_prepare_backtrace(fp, pc);
|
||||
}
|
@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
|
||||
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
|
||||
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
|
||||
|
||||
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
|
||||
|
||||
static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val;
|
||||
@ -43,7 +45,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
|
||||
val = vcpu->arch.cptr_el2;
|
||||
val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
|
||||
if (!update_fp_enabled(vcpu)) {
|
||||
if (!guest_owns_fp_regs(vcpu)) {
|
||||
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
|
||||
__activate_traps_fpsimd32(vcpu);
|
||||
}
|
||||
@ -123,7 +125,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
cptr = CPTR_EL2_DEFAULT;
|
||||
if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED))
|
||||
if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
|
||||
cptr |= CPTR_EL2_TZ;
|
||||
if (cpus_have_final_cap(ARM64_SME))
|
||||
cptr &= ~CPTR_EL2_TSM;
|
||||
@ -335,7 +337,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
__sysreg_restore_state_nvhe(host_ctxt);
|
||||
|
||||
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
|
||||
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
|
||||
__fpsimd_save_fpexc32(vcpu);
|
||||
|
||||
__debug_switch_to_host(vcpu);
|
||||
@ -375,6 +377,10 @@ asmlinkage void __noreturn hyp_panic(void)
|
||||
__sysreg_restore_state_nvhe(host_ctxt);
|
||||
}
|
||||
|
||||
/* Prepare to dump kvm nvhe hyp stacktrace */
|
||||
kvm_nvhe_prepare_backtrace((unsigned long)__builtin_frame_address(0),
|
||||
_THIS_IP_);
|
||||
|
||||
__hyp_do_panic(host_ctxt, spsr, elr, par);
|
||||
unreachable();
|
||||
}
|
||||
@ -386,5 +392,5 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void)
|
||||
|
||||
asmlinkage void kvm_unexpected_el2_exception(void)
|
||||
{
|
||||
return __kvm_unexpected_el2_exception();
|
||||
__kvm_unexpected_el2_exception();
|
||||
}
|
||||
|
@ -38,9 +38,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
|
||||
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
|
||||
*vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
|
||||
|
||||
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
|
||||
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
|
||||
KVM_ARM64_PENDING_EXCEPTION);
|
||||
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
|
||||
|
||||
__kvm_adjust_pc(vcpu);
|
||||
|
||||
|
@ -55,7 +55,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
||||
|
||||
val |= CPTR_EL2_TAM;
|
||||
|
||||
if (update_fp_enabled(vcpu)) {
|
||||
if (guest_owns_fp_regs(vcpu)) {
|
||||
if (vcpu_has_sve(vcpu))
|
||||
val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
|
||||
} else {
|
||||
@ -175,7 +175,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
|
||||
|
||||
sysreg_restore_host_state_vhe(host_ctxt);
|
||||
|
||||
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
|
||||
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
|
||||
__fpsimd_save_fpexc32(vcpu);
|
||||
|
||||
__debug_switch_to_host(vcpu);
|
||||
@ -249,5 +249,5 @@ void __noreturn hyp_panic(void)
|
||||
|
||||
asmlinkage void kvm_unexpected_el2_exception(void)
|
||||
{
|
||||
return __kvm_unexpected_el2_exception();
|
||||
__kvm_unexpected_el2_exception();
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu)
|
||||
__sysreg_restore_user_state(guest_ctxt);
|
||||
__sysreg_restore_el1_state(guest_ctxt);
|
||||
|
||||
vcpu->arch.sysregs_loaded_on_cpu = true;
|
||||
vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
|
||||
|
||||
activate_traps_vhe_load(vcpu);
|
||||
}
|
||||
@ -110,5 +110,5 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu)
|
||||
/* Restore host user state */
|
||||
__sysreg_restore_user_state(host_ctxt);
|
||||
|
||||
vcpu->arch.sysregs_loaded_on_cpu = false;
|
||||
vcpu_clear_flag(vcpu, SYSREGS_ON_CPU);
|
||||
}
|
||||
|
@ -20,9 +20,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
|
||||
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
|
||||
u64 esr = 0;
|
||||
|
||||
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
|
||||
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
|
||||
KVM_ARM64_PENDING_EXCEPTION);
|
||||
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
|
||||
|
||||
vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
|
||||
|
||||
@ -52,9 +50,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
|
||||
|
||||
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
|
||||
KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
|
||||
KVM_ARM64_PENDING_EXCEPTION);
|
||||
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);
|
||||
|
||||
/*
|
||||
* Build an unknown exception, depending on the instruction
|
||||
@ -73,8 +69,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void inject_undef32(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND |
|
||||
KVM_ARM64_PENDING_EXCEPTION);
|
||||
kvm_pend_exception(vcpu, EXCEPT_AA32_UND);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -97,14 +92,12 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
|
||||
far = vcpu_read_sys_reg(vcpu, FAR_EL1);
|
||||
|
||||
if (is_pabt) {
|
||||
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT |
|
||||
KVM_ARM64_PENDING_EXCEPTION);
|
||||
kvm_pend_exception(vcpu, EXCEPT_AA32_IABT);
|
||||
far &= GENMASK(31, 0);
|
||||
far |= (u64)addr << 32;
|
||||
vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2);
|
||||
} else { /* !iabt */
|
||||
vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT |
|
||||
KVM_ARM64_PENDING_EXCEPTION);
|
||||
kvm_pend_exception(vcpu, EXCEPT_AA32_DABT);
|
||||
far &= GENMASK(63, 32);
|
||||
far |= addr;
|
||||
vcpu_write_sys_reg(vcpu, fsr, ESR_EL1);
|
||||
|
@ -786,7 +786,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
||||
{
|
||||
phys_addr_t addr;
|
||||
int ret = 0;
|
||||
struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
|
||||
struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO };
|
||||
struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
|
||||
KVM_PGTABLE_PROT_R |
|
||||
|
@ -81,7 +81,7 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
|
||||
* KVM_REG_ARM64_SVE_VLS. Allocation is deferred until
|
||||
* kvm_arm_vcpu_finalize(), which freezes the configuration.
|
||||
*/
|
||||
vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
|
||||
vcpu_set_flag(vcpu, GUEST_HAS_SVE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -120,7 +120,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
vcpu->arch.sve_state = buf;
|
||||
vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
|
||||
vcpu_set_flag(vcpu, VCPU_SVE_FINALIZED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -177,7 +177,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
|
||||
!system_has_full_ptr_auth())
|
||||
return -EINVAL;
|
||||
|
||||
vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH;
|
||||
vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
218
arch/arm64/kvm/stacktrace.c
Normal file
218
arch/arm64/kvm/stacktrace.c
Normal file
@ -0,0 +1,218 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* KVM nVHE hypervisor stack tracing support.
|
||||
*
|
||||
* The unwinder implementation depends on the nVHE mode:
|
||||
*
|
||||
* 1) Non-protected nVHE mode - the host can directly access the
|
||||
* HYP stack pages and unwind the HYP stack in EL1. This saves having
|
||||
* to allocate shared buffers for the host to read the unwinded
|
||||
* stacktrace.
|
||||
*
|
||||
* 2) pKVM (protected nVHE) mode - the host cannot directly access
|
||||
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
|
||||
* buffer where the host can read and print the stacktrace.
|
||||
*
|
||||
* Copyright (C) 2022 Google LLC
|
||||
*/
|
||||
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/stacktrace/nvhe.h>
|
||||
|
||||
/*
|
||||
* kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs
|
||||
*
|
||||
* The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to
|
||||
* allow for guard pages below the stack. Consequently, the fixed offset address
|
||||
* translation macros won't work here.
|
||||
*
|
||||
* The kernel VA is calculated as an offset from the kernel VA of the hypervisor
|
||||
* stack base.
|
||||
*
|
||||
* Returns true on success and updates @addr to its corresponding kernel VA;
|
||||
* otherwise returns false.
|
||||
*/
|
||||
static bool kvm_nvhe_stack_kern_va(unsigned long *addr,
|
||||
enum stack_type type)
|
||||
{
|
||||
struct kvm_nvhe_stacktrace_info *stacktrace_info;
|
||||
unsigned long hyp_base, kern_base, hyp_offset;
|
||||
|
||||
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
|
||||
|
||||
switch (type) {
|
||||
case STACK_TYPE_HYP:
|
||||
kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
|
||||
hyp_base = (unsigned long)stacktrace_info->stack_base;
|
||||
break;
|
||||
case STACK_TYPE_OVERFLOW:
|
||||
kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack);
|
||||
hyp_base = (unsigned long)stacktrace_info->overflow_stack_base;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
hyp_offset = *addr - hyp_base;
|
||||
|
||||
*addr = kern_base + hyp_offset;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool on_overflow_stack(unsigned long sp, unsigned long size,
|
||||
struct stack_info *info)
|
||||
{
|
||||
struct kvm_nvhe_stacktrace_info *stacktrace_info
|
||||
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
|
||||
unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base;
|
||||
unsigned long high = low + OVERFLOW_STACK_SIZE;
|
||||
|
||||
return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
|
||||
}
|
||||
|
||||
static bool on_hyp_stack(unsigned long sp, unsigned long size,
|
||||
struct stack_info *info)
|
||||
{
|
||||
struct kvm_nvhe_stacktrace_info *stacktrace_info
|
||||
= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
|
||||
unsigned long low = (unsigned long)stacktrace_info->stack_base;
|
||||
unsigned long high = low + PAGE_SIZE;
|
||||
|
||||
return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
|
||||
}
|
||||
|
||||
static bool on_accessible_stack(const struct task_struct *tsk,
|
||||
unsigned long sp, unsigned long size,
|
||||
struct stack_info *info)
|
||||
{
|
||||
if (info)
|
||||
info->type = STACK_TYPE_UNKNOWN;
|
||||
|
||||
return (on_overflow_stack(sp, size, info) ||
|
||||
on_hyp_stack(sp, size, info));
|
||||
}
|
||||
|
||||
static int unwind_next(struct unwind_state *state)
|
||||
{
|
||||
struct stack_info info;
|
||||
|
||||
return unwind_next_common(state, &info, on_accessible_stack,
|
||||
kvm_nvhe_stack_kern_va);
|
||||
}
|
||||
|
||||
static void unwind(struct unwind_state *state,
|
||||
stack_trace_consume_fn consume_entry, void *cookie)
|
||||
{
|
||||
while (1) {
|
||||
int ret;
|
||||
|
||||
if (!consume_entry(cookie, state->pc))
|
||||
break;
|
||||
ret = unwind_next(state);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry
|
||||
*
|
||||
* @arg : the hypervisor offset, used for address translation
|
||||
* @where : the program counter corresponding to the stack frame
|
||||
*/
|
||||
static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where)
|
||||
{
|
||||
unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0);
|
||||
unsigned long hyp_offset = (unsigned long)arg;
|
||||
|
||||
/* Mask tags and convert to kern addr */
|
||||
where = (where & va_mask) + hyp_offset;
|
||||
kvm_err(" [<%016lx>] %pB\n", where, (void *)(where + kaslr_offset()));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void kvm_nvhe_dump_backtrace_start(void)
|
||||
{
|
||||
kvm_err("nVHE call trace:\n");
|
||||
}
|
||||
|
||||
static void kvm_nvhe_dump_backtrace_end(void)
|
||||
{
|
||||
kvm_err("---[ end nVHE call trace ]---\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* hyp_dump_backtrace - Dump the non-protected nVHE backtrace.
|
||||
*
|
||||
* @hyp_offset: hypervisor offset, used for address translation.
|
||||
*
|
||||
* The host can directly access HYP stack pages in non-protected
|
||||
* mode, so the unwinding is done directly from EL1. This removes
|
||||
* the need for shared buffers between host and hypervisor for
|
||||
* the stacktrace.
|
||||
*/
|
||||
static void hyp_dump_backtrace(unsigned long hyp_offset)
|
||||
{
|
||||
struct kvm_nvhe_stacktrace_info *stacktrace_info;
|
||||
struct unwind_state state;
|
||||
|
||||
stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
|
||||
|
||||
kvm_nvhe_unwind_init(&state, stacktrace_info->fp, stacktrace_info->pc);
|
||||
|
||||
kvm_nvhe_dump_backtrace_start();
|
||||
unwind(&state, kvm_nvhe_dump_backtrace_entry, (void *)hyp_offset);
|
||||
kvm_nvhe_dump_backtrace_end();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE
|
||||
DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)],
|
||||
pkvm_stacktrace);
|
||||
|
||||
/*
|
||||
* pkvm_dump_backtrace - Dump the protected nVHE HYP backtrace.
|
||||
*
|
||||
* @hyp_offset: hypervisor offset, used for address translation.
|
||||
*
|
||||
* Dumping of the pKVM HYP backtrace is done by reading the
|
||||
* stack addresses from the shared stacktrace buffer, since the
|
||||
* host cannot directly access hypervisor memory in protected
|
||||
* mode.
|
||||
*/
|
||||
static void pkvm_dump_backtrace(unsigned long hyp_offset)
|
||||
{
|
||||
unsigned long *stacktrace
|
||||
= (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace);
|
||||
int i;
|
||||
|
||||
kvm_nvhe_dump_backtrace_start();
|
||||
/* The saved stacktrace is terminated by a null entry */
|
||||
for (i = 0;
|
||||
i < ARRAY_SIZE(kvm_nvhe_sym(pkvm_stacktrace)) && stacktrace[i];
|
||||
i++)
|
||||
kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]);
|
||||
kvm_nvhe_dump_backtrace_end();
|
||||
}
|
||||
#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
static void pkvm_dump_backtrace(unsigned long hyp_offset)
|
||||
{
|
||||
kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n");
|
||||
}
|
||||
#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */
|
||||
|
||||
/*
|
||||
* kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace.
|
||||
*
|
||||
* @hyp_offset: hypervisor offset, used for address translation.
|
||||
*/
|
||||
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset)
|
||||
{
|
||||
if (is_protected_kvm_enabled())
|
||||
pkvm_dump_backtrace(hyp_offset);
|
||||
else
|
||||
hyp_dump_backtrace(hyp_offset);
|
||||
}
|
@ -34,18 +34,11 @@
|
||||
#include "trace.h"
|
||||
|
||||
/*
|
||||
* All of this file is extremely similar to the ARM coproc.c, but the
|
||||
* types are different. My gut feeling is that it should be pretty
|
||||
* easy to merge, but that would be an ABI breakage -- again. VFP
|
||||
* would also need to be abstracted.
|
||||
*
|
||||
* For AArch32, we only take care of what is being trapped. Anything
|
||||
* that has to do with init and userspace access has to go via the
|
||||
* 64bit interface.
|
||||
*/
|
||||
|
||||
static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
|
||||
static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
|
||||
static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
|
||||
|
||||
static bool read_from_write_only(struct kvm_vcpu *vcpu,
|
||||
@ -72,7 +65,7 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
|
||||
{
|
||||
u64 val = 0x8badf00d8badf00d;
|
||||
|
||||
if (vcpu->arch.sysregs_loaded_on_cpu &&
|
||||
if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
|
||||
__vcpu_read_sys_reg_from_cpu(reg, &val))
|
||||
return val;
|
||||
|
||||
@ -81,7 +74,7 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
|
||||
|
||||
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
|
||||
{
|
||||
if (vcpu->arch.sysregs_loaded_on_cpu &&
|
||||
if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) &&
|
||||
__vcpu_write_sys_reg_to_cpu(val, reg))
|
||||
return;
|
||||
|
||||
@ -321,16 +314,8 @@ static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 val)
|
||||
{
|
||||
u64 id = sys_reg_to_index(rd);
|
||||
u64 val;
|
||||
int err;
|
||||
|
||||
err = reg_from_user(&val, uaddr, id);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* The only modifiable bit is the OSLK bit. Refuse the write if
|
||||
* userspace attempts to change any other bit in the register.
|
||||
@ -387,7 +372,7 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
if (p->is_write) {
|
||||
vcpu_write_sys_reg(vcpu, p->regval, r->reg);
|
||||
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
|
||||
vcpu_set_flag(vcpu, DEBUG_DIRTY);
|
||||
} else {
|
||||
p->regval = vcpu_read_sys_reg(vcpu, r->reg);
|
||||
}
|
||||
@ -403,8 +388,8 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
|
||||
* A 32 bit write to a debug register leave top bits alone
|
||||
* A 32 bit read from a debug register only returns the bottom bits
|
||||
*
|
||||
* All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
|
||||
* hyp.S code switches between host and guest values in future.
|
||||
* All writes will set the DEBUG_DIRTY flag to ensure the hyp code
|
||||
* switches between host and guest values in future.
|
||||
*/
|
||||
static void reg_to_dbg(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
@ -420,7 +405,7 @@ static void reg_to_dbg(struct kvm_vcpu *vcpu,
|
||||
val |= (p->regval & (mask >> shift)) << shift;
|
||||
*dbg_reg = val;
|
||||
|
||||
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
|
||||
vcpu_set_flag(vcpu, DEBUG_DIRTY);
|
||||
}
|
||||
|
||||
static void dbg_to_reg(struct kvm_vcpu *vcpu,
|
||||
@ -451,22 +436,16 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 val)
|
||||
{
|
||||
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
|
||||
|
||||
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
|
||||
return -EFAULT;
|
||||
vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 *val)
|
||||
{
|
||||
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
|
||||
|
||||
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
|
||||
return -EFAULT;
|
||||
*val = vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -493,23 +472,16 @@ static bool trap_bcr(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 val)
|
||||
{
|
||||
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
|
||||
|
||||
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
|
||||
return -EFAULT;
|
||||
|
||||
vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 *val)
|
||||
{
|
||||
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
|
||||
|
||||
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
|
||||
return -EFAULT;
|
||||
*val = vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -537,22 +509,16 @@ static bool trap_wvr(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 val)
|
||||
{
|
||||
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
|
||||
|
||||
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
|
||||
return -EFAULT;
|
||||
vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 *val)
|
||||
{
|
||||
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
|
||||
|
||||
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
|
||||
return -EFAULT;
|
||||
*val = vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -579,22 +545,16 @@ static bool trap_wcr(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 val)
|
||||
{
|
||||
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
|
||||
|
||||
if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
|
||||
return -EFAULT;
|
||||
vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 *val)
|
||||
{
|
||||
__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
|
||||
|
||||
if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
|
||||
return -EFAULT;
|
||||
*val = vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1227,16 +1187,9 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
|
||||
|
||||
static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 val)
|
||||
{
|
||||
const u64 id = sys_reg_to_index(rd);
|
||||
u8 csv2, csv3;
|
||||
int err;
|
||||
u64 val;
|
||||
|
||||
err = reg_from_user(&val, uaddr, id);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as
|
||||
@ -1262,7 +1215,7 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
|
||||
vcpu->kvm->arch.pfr0_csv2 = csv2;
|
||||
vcpu->kvm->arch.pfr0_csv3 = csv3 ;
|
||||
vcpu->kvm->arch.pfr0_csv3 = csv3;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1275,27 +1228,17 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
|
||||
* to be changed.
|
||||
*/
|
||||
static int __get_id_reg(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd, void __user *uaddr,
|
||||
const struct sys_reg_desc *rd, u64 *val,
|
||||
bool raz)
|
||||
{
|
||||
const u64 id = sys_reg_to_index(rd);
|
||||
const u64 val = read_id_reg(vcpu, rd, raz);
|
||||
|
||||
return reg_to_user(uaddr, &val, id);
|
||||
*val = read_id_reg(vcpu, rd, raz);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __set_id_reg(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd, void __user *uaddr,
|
||||
const struct sys_reg_desc *rd, u64 val,
|
||||
bool raz)
|
||||
{
|
||||
const u64 id = sys_reg_to_index(rd);
|
||||
int err;
|
||||
u64 val;
|
||||
|
||||
err = reg_from_user(&val, uaddr, id);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* This is what we mean by invariant: you can't change it. */
|
||||
if (val != read_id_reg(vcpu, rd, raz))
|
||||
return -EINVAL;
|
||||
@ -1304,47 +1247,37 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu,
|
||||
}
|
||||
|
||||
static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 *val)
|
||||
{
|
||||
bool raz = sysreg_visible_as_raz(vcpu, rd);
|
||||
|
||||
return __get_id_reg(vcpu, rd, uaddr, raz);
|
||||
return __get_id_reg(vcpu, rd, val, raz);
|
||||
}
|
||||
|
||||
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 val)
|
||||
{
|
||||
bool raz = sysreg_visible_as_raz(vcpu, rd);
|
||||
|
||||
return __set_id_reg(vcpu, rd, uaddr, raz);
|
||||
return __set_id_reg(vcpu, rd, val, raz);
|
||||
}
|
||||
|
||||
static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 val)
|
||||
{
|
||||
return __set_id_reg(vcpu, rd, uaddr, true);
|
||||
return __set_id_reg(vcpu, rd, val, true);
|
||||
}
|
||||
|
||||
static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 *val)
|
||||
{
|
||||
const u64 id = sys_reg_to_index(rd);
|
||||
const u64 val = 0;
|
||||
|
||||
return reg_to_user(uaddr, &val, id);
|
||||
*val = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
u64 val)
|
||||
{
|
||||
int err;
|
||||
u64 val;
|
||||
|
||||
/* Perform the access even if we are going to ignore the value */
|
||||
err = reg_from_user(&val, uaddr, sys_reg_to_index(rd));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2639,35 +2572,34 @@ static bool index_to_params(u64 id, struct sys_reg_params *params)
|
||||
}
|
||||
}
|
||||
|
||||
const struct sys_reg_desc *find_reg_by_id(u64 id,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc table[],
|
||||
unsigned int num)
|
||||
const struct sys_reg_desc *get_reg_by_id(u64 id,
|
||||
const struct sys_reg_desc table[],
|
||||
unsigned int num)
|
||||
{
|
||||
if (!index_to_params(id, params))
|
||||
struct sys_reg_params params;
|
||||
|
||||
if (!index_to_params(id, ¶ms))
|
||||
return NULL;
|
||||
|
||||
return find_reg(params, table, num);
|
||||
return find_reg(¶ms, table, num);
|
||||
}
|
||||
|
||||
/* Decode an index value, and find the sys_reg_desc entry. */
|
||||
static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
|
||||
u64 id)
|
||||
static const struct sys_reg_desc *
|
||||
id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
|
||||
const struct sys_reg_desc table[], unsigned int num)
|
||||
|
||||
{
|
||||
const struct sys_reg_desc *r;
|
||||
struct sys_reg_params params;
|
||||
|
||||
/* We only do sys_reg for now. */
|
||||
if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
|
||||
return NULL;
|
||||
|
||||
if (!index_to_params(id, ¶ms))
|
||||
return NULL;
|
||||
|
||||
r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
|
||||
r = get_reg_by_id(id, table, num);
|
||||
|
||||
/* Not saved in the sys_reg array and not otherwise accessible? */
|
||||
if (r && !(r->reg || r->get_user))
|
||||
if (r && (!(r->reg || r->get_user) || sysreg_hidden(vcpu, r)))
|
||||
r = NULL;
|
||||
|
||||
return r;
|
||||
@ -2707,48 +2639,30 @@ static struct sys_reg_desc invariant_sys_regs[] = {
|
||||
{ SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
|
||||
};
|
||||
|
||||
static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
|
||||
static int get_invariant_sys_reg(u64 id, u64 __user *uaddr)
|
||||
{
|
||||
if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
|
||||
{
|
||||
if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_invariant_sys_reg(u64 id, void __user *uaddr)
|
||||
{
|
||||
struct sys_reg_params params;
|
||||
const struct sys_reg_desc *r;
|
||||
|
||||
r = find_reg_by_id(id, ¶ms, invariant_sys_regs,
|
||||
ARRAY_SIZE(invariant_sys_regs));
|
||||
r = get_reg_by_id(id, invariant_sys_regs,
|
||||
ARRAY_SIZE(invariant_sys_regs));
|
||||
if (!r)
|
||||
return -ENOENT;
|
||||
|
||||
return reg_to_user(uaddr, &r->val, id);
|
||||
return put_user(r->val, uaddr);
|
||||
}
|
||||
|
||||
static int set_invariant_sys_reg(u64 id, void __user *uaddr)
|
||||
static int set_invariant_sys_reg(u64 id, u64 __user *uaddr)
|
||||
{
|
||||
struct sys_reg_params params;
|
||||
const struct sys_reg_desc *r;
|
||||
int err;
|
||||
u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
|
||||
u64 val;
|
||||
|
||||
r = find_reg_by_id(id, ¶ms, invariant_sys_regs,
|
||||
ARRAY_SIZE(invariant_sys_regs));
|
||||
r = get_reg_by_id(id, invariant_sys_regs,
|
||||
ARRAY_SIZE(invariant_sys_regs));
|
||||
if (!r)
|
||||
return -ENOENT;
|
||||
|
||||
err = reg_from_user(&val, uaddr, id);
|
||||
if (err)
|
||||
return err;
|
||||
if (get_user(val, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
/* This is what we mean by invariant: you can't change it. */
|
||||
if (r->val != val)
|
||||
@ -2839,54 +2753,86 @@ static int demux_c15_set(u64 id, void __user *uaddr)
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
|
||||
const struct sys_reg_desc table[], unsigned int num)
|
||||
{
|
||||
u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
|
||||
const struct sys_reg_desc *r;
|
||||
u64 val;
|
||||
int ret;
|
||||
|
||||
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
|
||||
if (!r)
|
||||
return -ENOENT;
|
||||
|
||||
if (r->get_user) {
|
||||
ret = (r->get_user)(vcpu, r, &val);
|
||||
} else {
|
||||
val = __vcpu_sys_reg(vcpu, r->reg);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = put_user(val, uaddr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
{
|
||||
const struct sys_reg_desc *r;
|
||||
void __user *uaddr = (void __user *)(unsigned long)reg->addr;
|
||||
int err;
|
||||
|
||||
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
|
||||
return demux_c15_get(reg->id, uaddr);
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
|
||||
return -ENOENT;
|
||||
err = get_invariant_sys_reg(reg->id, uaddr);
|
||||
if (err != -ENOENT)
|
||||
return err;
|
||||
|
||||
r = index_to_sys_reg_desc(vcpu, reg->id);
|
||||
return kvm_sys_reg_get_user(vcpu, reg,
|
||||
sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
|
||||
}
|
||||
|
||||
int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
|
||||
const struct sys_reg_desc table[], unsigned int num)
|
||||
{
|
||||
u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr;
|
||||
const struct sys_reg_desc *r;
|
||||
u64 val;
|
||||
int ret;
|
||||
|
||||
if (get_user(val, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
|
||||
if (!r)
|
||||
return get_invariant_sys_reg(reg->id, uaddr);
|
||||
|
||||
/* Check for regs disabled by runtime config */
|
||||
if (sysreg_hidden(vcpu, r))
|
||||
return -ENOENT;
|
||||
|
||||
if (r->get_user)
|
||||
return (r->get_user)(vcpu, r, reg, uaddr);
|
||||
if (r->set_user) {
|
||||
ret = (r->set_user)(vcpu, r, val);
|
||||
} else {
|
||||
__vcpu_sys_reg(vcpu, r->reg) = val;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
||||
{
|
||||
const struct sys_reg_desc *r;
|
||||
void __user *uaddr = (void __user *)(unsigned long)reg->addr;
|
||||
int err;
|
||||
|
||||
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
|
||||
return demux_c15_set(reg->id, uaddr);
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
|
||||
return -ENOENT;
|
||||
err = set_invariant_sys_reg(reg->id, uaddr);
|
||||
if (err != -ENOENT)
|
||||
return err;
|
||||
|
||||
r = index_to_sys_reg_desc(vcpu, reg->id);
|
||||
if (!r)
|
||||
return set_invariant_sys_reg(reg->id, uaddr);
|
||||
|
||||
/* Check for regs disabled by runtime config */
|
||||
if (sysreg_hidden(vcpu, r))
|
||||
return -ENOENT;
|
||||
|
||||
if (r->set_user)
|
||||
return (r->set_user)(vcpu, r, reg, uaddr);
|
||||
|
||||
return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
|
||||
return kvm_sys_reg_set_user(vcpu, reg,
|
||||
sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
|
||||
}
|
||||
|
||||
static unsigned int num_demux_regs(void)
|
||||
|
@ -75,9 +75,9 @@ struct sys_reg_desc {
|
||||
|
||||
/* Custom get/set_user functions, fallback to generic if NULL */
|
||||
int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr);
|
||||
u64 *val);
|
||||
int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr);
|
||||
u64 val);
|
||||
|
||||
/* Return mask of REG_* runtime visibility overrides */
|
||||
unsigned int (*visibility)(const struct kvm_vcpu *vcpu,
|
||||
@ -190,10 +190,16 @@ find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[],
|
||||
return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg);
|
||||
}
|
||||
|
||||
const struct sys_reg_desc *find_reg_by_id(u64 id,
|
||||
struct sys_reg_params *params,
|
||||
const struct sys_reg_desc table[],
|
||||
unsigned int num);
|
||||
const struct sys_reg_desc *get_reg_by_id(u64 id,
|
||||
const struct sys_reg_desc table[],
|
||||
unsigned int num);
|
||||
|
||||
int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
|
||||
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
|
||||
int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
|
||||
const struct sys_reg_desc table[], unsigned int num);
|
||||
int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
|
||||
const struct sys_reg_desc table[], unsigned int num);
|
||||
|
||||
#define AA32(_x) .aarch32_map = AA32_##_x
|
||||
#define Op0(_x) .Op0 = _x
|
||||
|
@ -10,293 +10,357 @@
|
||||
#include "vgic/vgic.h"
|
||||
#include "sys_regs.h"
|
||||
|
||||
static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
static int set_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 val)
|
||||
{
|
||||
u32 host_pri_bits, host_id_bits, host_seis, host_a3v, seis, a3v;
|
||||
struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
|
||||
/*
|
||||
* Disallow restoring VM state if not supported by this
|
||||
* hardware.
|
||||
*/
|
||||
host_pri_bits = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1;
|
||||
if (host_pri_bits > vgic_v3_cpu->num_pri_bits)
|
||||
return -EINVAL;
|
||||
|
||||
vgic_v3_cpu->num_pri_bits = host_pri_bits;
|
||||
|
||||
host_id_bits = FIELD_GET(ICC_CTLR_EL1_ID_BITS_MASK, val);
|
||||
if (host_id_bits > vgic_v3_cpu->num_id_bits)
|
||||
return -EINVAL;
|
||||
|
||||
vgic_v3_cpu->num_id_bits = host_id_bits;
|
||||
|
||||
host_seis = FIELD_GET(ICH_VTR_SEIS_MASK, kvm_vgic_global_state.ich_vtr_el2);
|
||||
seis = FIELD_GET(ICC_CTLR_EL1_SEIS_MASK, val);
|
||||
if (host_seis != seis)
|
||||
return -EINVAL;
|
||||
|
||||
host_a3v = FIELD_GET(ICH_VTR_A3V_MASK, kvm_vgic_global_state.ich_vtr_el2);
|
||||
a3v = FIELD_GET(ICC_CTLR_EL1_A3V_MASK, val);
|
||||
if (host_a3v != a3v)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
|
||||
* The vgic_set_vmcr() will convert to ICH_VMCR layout.
|
||||
*/
|
||||
vmcr.cbpr = FIELD_GET(ICC_CTLR_EL1_CBPR_MASK, val);
|
||||
vmcr.eoim = FIELD_GET(ICC_CTLR_EL1_EOImode_MASK, val);
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 *valp)
|
||||
{
|
||||
struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_vmcr vmcr;
|
||||
u64 val;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (p->is_write) {
|
||||
val = p->regval;
|
||||
val = 0;
|
||||
val |= FIELD_PREP(ICC_CTLR_EL1_PRI_BITS_MASK, vgic_v3_cpu->num_pri_bits - 1);
|
||||
val |= FIELD_PREP(ICC_CTLR_EL1_ID_BITS_MASK, vgic_v3_cpu->num_id_bits);
|
||||
val |= FIELD_PREP(ICC_CTLR_EL1_SEIS_MASK,
|
||||
FIELD_GET(ICH_VTR_SEIS_MASK,
|
||||
kvm_vgic_global_state.ich_vtr_el2));
|
||||
val |= FIELD_PREP(ICC_CTLR_EL1_A3V_MASK,
|
||||
FIELD_GET(ICH_VTR_A3V_MASK, kvm_vgic_global_state.ich_vtr_el2));
|
||||
/*
|
||||
* The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
|
||||
* Extract it directly using ICC_CTLR_EL1 reg definitions.
|
||||
*/
|
||||
val |= FIELD_PREP(ICC_CTLR_EL1_CBPR_MASK, vmcr.cbpr);
|
||||
val |= FIELD_PREP(ICC_CTLR_EL1_EOImode_MASK, vmcr.eoim);
|
||||
|
||||
/*
|
||||
* Disallow restoring VM state if not supported by this
|
||||
* hardware.
|
||||
*/
|
||||
host_pri_bits = ((val & ICC_CTLR_EL1_PRI_BITS_MASK) >>
|
||||
ICC_CTLR_EL1_PRI_BITS_SHIFT) + 1;
|
||||
if (host_pri_bits > vgic_v3_cpu->num_pri_bits)
|
||||
return false;
|
||||
*valp = val;
|
||||
|
||||
vgic_v3_cpu->num_pri_bits = host_pri_bits;
|
||||
|
||||
host_id_bits = (val & ICC_CTLR_EL1_ID_BITS_MASK) >>
|
||||
ICC_CTLR_EL1_ID_BITS_SHIFT;
|
||||
if (host_id_bits > vgic_v3_cpu->num_id_bits)
|
||||
return false;
|
||||
|
||||
vgic_v3_cpu->num_id_bits = host_id_bits;
|
||||
|
||||
host_seis = ((kvm_vgic_global_state.ich_vtr_el2 &
|
||||
ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT);
|
||||
seis = (val & ICC_CTLR_EL1_SEIS_MASK) >>
|
||||
ICC_CTLR_EL1_SEIS_SHIFT;
|
||||
if (host_seis != seis)
|
||||
return false;
|
||||
|
||||
host_a3v = ((kvm_vgic_global_state.ich_vtr_el2 &
|
||||
ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT);
|
||||
a3v = (val & ICC_CTLR_EL1_A3V_MASK) >> ICC_CTLR_EL1_A3V_SHIFT;
|
||||
if (host_a3v != a3v)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
|
||||
* The vgic_set_vmcr() will convert to ICH_VMCR layout.
|
||||
*/
|
||||
vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT;
|
||||
vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
val = 0;
|
||||
val |= (vgic_v3_cpu->num_pri_bits - 1) <<
|
||||
ICC_CTLR_EL1_PRI_BITS_SHIFT;
|
||||
val |= vgic_v3_cpu->num_id_bits << ICC_CTLR_EL1_ID_BITS_SHIFT;
|
||||
val |= ((kvm_vgic_global_state.ich_vtr_el2 &
|
||||
ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT) <<
|
||||
ICC_CTLR_EL1_SEIS_SHIFT;
|
||||
val |= ((kvm_vgic_global_state.ich_vtr_el2 &
|
||||
ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT) <<
|
||||
ICC_CTLR_EL1_A3V_SHIFT;
|
||||
/*
|
||||
* The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
|
||||
* Extract it directly using ICC_CTLR_EL1 reg definitions.
|
||||
*/
|
||||
val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK;
|
||||
val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
|
||||
|
||||
p->regval = val;
|
||||
}
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool access_gic_pmr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
static int set_gic_pmr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 val)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (p->is_write) {
|
||||
vmcr.pmr = (p->regval & ICC_PMR_EL1_MASK) >> ICC_PMR_EL1_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
p->regval = (vmcr.pmr << ICC_PMR_EL1_SHIFT) & ICC_PMR_EL1_MASK;
|
||||
}
|
||||
vmcr.pmr = FIELD_GET(ICC_PMR_EL1_MASK, val);
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool access_gic_bpr0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
static int get_gic_pmr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 *val)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (p->is_write) {
|
||||
vmcr.bpr = (p->regval & ICC_BPR0_EL1_MASK) >>
|
||||
ICC_BPR0_EL1_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
p->regval = (vmcr.bpr << ICC_BPR0_EL1_SHIFT) &
|
||||
ICC_BPR0_EL1_MASK;
|
||||
}
|
||||
*val = FIELD_PREP(ICC_PMR_EL1_MASK, vmcr.pmr);
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
static int set_gic_bpr0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 val)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
if (!p->is_write)
|
||||
p->regval = 0;
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
vmcr.bpr = FIELD_GET(ICC_BPR0_EL1_MASK, val);
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_gic_bpr0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 *val)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
*val = FIELD_PREP(ICC_BPR0_EL1_MASK, vmcr.bpr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_gic_bpr1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 val)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (!vmcr.cbpr) {
|
||||
if (p->is_write) {
|
||||
vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
|
||||
ICC_BPR1_EL1_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
p->regval = (vmcr.abpr << ICC_BPR1_EL1_SHIFT) &
|
||||
ICC_BPR1_EL1_MASK;
|
||||
}
|
||||
} else {
|
||||
if (!p->is_write)
|
||||
p->regval = min((vmcr.bpr + 1), 7U);
|
||||
vmcr.abpr = FIELD_GET(ICC_BPR1_EL1_MASK, val);
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
}
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool access_gic_grpen0(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
static int get_gic_bpr1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 *val)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (p->is_write) {
|
||||
vmcr.grpen0 = (p->regval & ICC_IGRPEN0_EL1_MASK) >>
|
||||
ICC_IGRPEN0_EL1_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
p->regval = (vmcr.grpen0 << ICC_IGRPEN0_EL1_SHIFT) &
|
||||
ICC_IGRPEN0_EL1_MASK;
|
||||
}
|
||||
if (!vmcr.cbpr)
|
||||
*val = FIELD_PREP(ICC_BPR1_EL1_MASK, vmcr.abpr);
|
||||
else
|
||||
*val = min((vmcr.bpr + 1), 7U);
|
||||
|
||||
return true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool access_gic_grpen1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
static int set_gic_grpen0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 val)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (p->is_write) {
|
||||
vmcr.grpen1 = (p->regval & ICC_IGRPEN1_EL1_MASK) >>
|
||||
ICC_IGRPEN1_EL1_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
p->regval = (vmcr.grpen1 << ICC_IGRPEN1_EL1_SHIFT) &
|
||||
ICC_IGRPEN1_EL1_MASK;
|
||||
}
|
||||
vmcr.grpen0 = FIELD_GET(ICC_IGRPEN0_EL1_MASK, val);
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p, u8 apr, u8 idx)
|
||||
static int get_gic_grpen0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 *val)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
*val = FIELD_PREP(ICC_IGRPEN0_EL1_MASK, vmcr.grpen0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_gic_grpen1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 val)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
vmcr.grpen1 = FIELD_GET(ICC_IGRPEN1_EL1_MASK, val);
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_gic_grpen1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 *val)
|
||||
{
|
||||
struct vgic_vmcr vmcr;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
*val = FIELD_GET(ICC_IGRPEN1_EL1_MASK, vmcr.grpen1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void set_apr_reg(struct kvm_vcpu *vcpu, u64 val, u8 apr, u8 idx)
|
||||
{
|
||||
struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
uint32_t *ap_reg;
|
||||
|
||||
if (apr)
|
||||
ap_reg = &vgicv3->vgic_ap1r[idx];
|
||||
vgicv3->vgic_ap1r[idx] = val;
|
||||
else
|
||||
ap_reg = &vgicv3->vgic_ap0r[idx];
|
||||
|
||||
if (p->is_write)
|
||||
*ap_reg = p->regval;
|
||||
else
|
||||
p->regval = *ap_reg;
|
||||
vgicv3->vgic_ap0r[idx] = val;
|
||||
}
|
||||
|
||||
static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r, u8 apr)
|
||||
static u64 get_apr_reg(struct kvm_vcpu *vcpu, u8 apr, u8 idx)
|
||||
{
|
||||
struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
if (apr)
|
||||
return vgicv3->vgic_ap1r[idx];
|
||||
else
|
||||
return vgicv3->vgic_ap0r[idx];
|
||||
}
|
||||
|
||||
static int set_gic_ap0r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 val)
|
||||
|
||||
{
|
||||
u8 idx = r->Op2 & 3;
|
||||
|
||||
if (idx > vgic_v3_max_apr_idx(vcpu))
|
||||
goto err;
|
||||
return -EINVAL;
|
||||
|
||||
vgic_v3_access_apr_reg(vcpu, p, apr, idx);
|
||||
return true;
|
||||
err:
|
||||
if (!p->is_write)
|
||||
p->regval = 0;
|
||||
|
||||
return false;
|
||||
set_apr_reg(vcpu, val, 0, idx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool access_gic_ap0r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
static int get_gic_ap0r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 *val)
|
||||
{
|
||||
u8 idx = r->Op2 & 3;
|
||||
|
||||
if (idx > vgic_v3_max_apr_idx(vcpu))
|
||||
return -EINVAL;
|
||||
|
||||
*val = get_apr_reg(vcpu, 0, idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_gic_ap1r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 val)
|
||||
|
||||
{
|
||||
return access_gic_aprn(vcpu, p, r, 0);
|
||||
u8 idx = r->Op2 & 3;
|
||||
|
||||
if (idx > vgic_v3_max_apr_idx(vcpu))
|
||||
return -EINVAL;
|
||||
|
||||
set_apr_reg(vcpu, val, 1, idx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool access_gic_ap1r(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
static int get_gic_ap1r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 *val)
|
||||
{
|
||||
return access_gic_aprn(vcpu, p, r, 1);
|
||||
u8 idx = r->Op2 & 3;
|
||||
|
||||
if (idx > vgic_v3_max_apr_idx(vcpu))
|
||||
return -EINVAL;
|
||||
|
||||
*val = get_apr_reg(vcpu, 1, idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
static int set_gic_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 val)
|
||||
{
|
||||
/* Validate SRE bit */
|
||||
if (!(val & ICC_SRE_EL1_SRE))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_gic_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
|
||||
u64 *val)
|
||||
{
|
||||
struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
/* Validate SRE bit */
|
||||
if (p->is_write) {
|
||||
if (!(p->regval & ICC_SRE_EL1_SRE))
|
||||
return false;
|
||||
} else {
|
||||
p->regval = vgicv3->vgic_sre;
|
||||
}
|
||||
*val = vgicv3->vgic_sre;
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct sys_reg_desc gic_v3_icc_reg_descs[] = {
|
||||
{ SYS_DESC(SYS_ICC_PMR_EL1), access_gic_pmr },
|
||||
{ SYS_DESC(SYS_ICC_BPR0_EL1), access_gic_bpr0 },
|
||||
{ SYS_DESC(SYS_ICC_AP0R0_EL1), access_gic_ap0r },
|
||||
{ SYS_DESC(SYS_ICC_AP0R1_EL1), access_gic_ap0r },
|
||||
{ SYS_DESC(SYS_ICC_AP0R2_EL1), access_gic_ap0r },
|
||||
{ SYS_DESC(SYS_ICC_AP0R3_EL1), access_gic_ap0r },
|
||||
{ SYS_DESC(SYS_ICC_AP1R0_EL1), access_gic_ap1r },
|
||||
{ SYS_DESC(SYS_ICC_AP1R1_EL1), access_gic_ap1r },
|
||||
{ SYS_DESC(SYS_ICC_AP1R2_EL1), access_gic_ap1r },
|
||||
{ SYS_DESC(SYS_ICC_AP1R3_EL1), access_gic_ap1r },
|
||||
{ SYS_DESC(SYS_ICC_BPR1_EL1), access_gic_bpr1 },
|
||||
{ SYS_DESC(SYS_ICC_CTLR_EL1), access_gic_ctlr },
|
||||
{ SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
|
||||
{ SYS_DESC(SYS_ICC_IGRPEN0_EL1), access_gic_grpen0 },
|
||||
{ SYS_DESC(SYS_ICC_IGRPEN1_EL1), access_gic_grpen1 },
|
||||
{ SYS_DESC(SYS_ICC_PMR_EL1),
|
||||
.set_user = set_gic_pmr, .get_user = get_gic_pmr, },
|
||||
{ SYS_DESC(SYS_ICC_BPR0_EL1),
|
||||
.set_user = set_gic_bpr0, .get_user = get_gic_bpr0, },
|
||||
{ SYS_DESC(SYS_ICC_AP0R0_EL1),
|
||||
.set_user = set_gic_ap0r, .get_user = get_gic_ap0r, },
|
||||
{ SYS_DESC(SYS_ICC_AP0R1_EL1),
|
||||
.set_user = set_gic_ap0r, .get_user = get_gic_ap0r, },
|
||||
{ SYS_DESC(SYS_ICC_AP0R2_EL1),
|
||||
.set_user = set_gic_ap0r, .get_user = get_gic_ap0r, },
|
||||
{ SYS_DESC(SYS_ICC_AP0R3_EL1),
|
||||
.set_user = set_gic_ap0r, .get_user = get_gic_ap0r, },
|
||||
{ SYS_DESC(SYS_ICC_AP1R0_EL1),
|
||||
.set_user = set_gic_ap1r, .get_user = get_gic_ap1r, },
|
||||
{ SYS_DESC(SYS_ICC_AP1R1_EL1),
|
||||
.set_user = set_gic_ap1r, .get_user = get_gic_ap1r, },
|
||||
{ SYS_DESC(SYS_ICC_AP1R2_EL1),
|
||||
.set_user = set_gic_ap1r, .get_user = get_gic_ap1r, },
|
||||
{ SYS_DESC(SYS_ICC_AP1R3_EL1),
|
||||
.set_user = set_gic_ap1r, .get_user = get_gic_ap1r, },
|
||||
{ SYS_DESC(SYS_ICC_BPR1_EL1),
|
||||
.set_user = set_gic_bpr1, .get_user = get_gic_bpr1, },
|
||||
{ SYS_DESC(SYS_ICC_CTLR_EL1),
|
||||
.set_user = set_gic_ctlr, .get_user = get_gic_ctlr, },
|
||||
{ SYS_DESC(SYS_ICC_SRE_EL1),
|
||||
.set_user = set_gic_sre, .get_user = get_gic_sre, },
|
||||
{ SYS_DESC(SYS_ICC_IGRPEN0_EL1),
|
||||
.set_user = set_gic_grpen0, .get_user = get_gic_grpen0, },
|
||||
{ SYS_DESC(SYS_ICC_IGRPEN1_EL1),
|
||||
.set_user = set_gic_grpen1, .get_user = get_gic_grpen1, },
|
||||
};
|
||||
|
||||
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
|
||||
u64 *reg)
|
||||
static u64 attr_to_id(u64 attr)
|
||||
{
|
||||
struct sys_reg_params params;
|
||||
u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64;
|
||||
return ARM64_SYS_REG(FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP0_MASK, attr),
|
||||
FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP1_MASK, attr),
|
||||
FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_CRN_MASK, attr),
|
||||
FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_CRM_MASK, attr),
|
||||
FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP2_MASK, attr));
|
||||
}
|
||||
|
||||
params.regval = *reg;
|
||||
params.is_write = is_write;
|
||||
|
||||
if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs,
|
||||
ARRAY_SIZE(gic_v3_icc_reg_descs)))
|
||||
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
||||
{
|
||||
if (get_reg_by_id(attr_to_id(attr->attr), gic_v3_icc_reg_descs,
|
||||
ARRAY_SIZE(gic_v3_icc_reg_descs)))
|
||||
return 0;
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id,
|
||||
u64 *reg)
|
||||
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr,
|
||||
bool is_write)
|
||||
{
|
||||
struct sys_reg_params params;
|
||||
const struct sys_reg_desc *r;
|
||||
u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64;
|
||||
struct kvm_one_reg reg = {
|
||||
.id = attr_to_id(attr->attr),
|
||||
.addr = attr->addr,
|
||||
};
|
||||
|
||||
if (is_write)
|
||||
params.regval = *reg;
|
||||
params.is_write = is_write;
|
||||
|
||||
r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs,
|
||||
ARRAY_SIZE(gic_v3_icc_reg_descs));
|
||||
if (!r)
|
||||
return -ENXIO;
|
||||
|
||||
if (!r->access(vcpu, ¶ms, r))
|
||||
return -EINVAL;
|
||||
|
||||
if (!is_write)
|
||||
*reg = params.regval;
|
||||
|
||||
return 0;
|
||||
return kvm_sys_reg_set_user(vcpu, ®, gic_v3_icc_reg_descs,
|
||||
ARRAY_SIZE(gic_v3_icc_reg_descs));
|
||||
else
|
||||
return kvm_sys_reg_get_user(vcpu, ®, gic_v3_icc_reg_descs,
|
||||
ARRAY_SIZE(gic_v3_icc_reg_descs));
|
||||
}
|
||||
|
@ -41,11 +41,42 @@ static int vgic_check_type(struct kvm *kvm, int type_needed)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr)
|
||||
{
|
||||
struct vgic_dist *vgic = &kvm->arch.vgic;
|
||||
int r;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
switch (FIELD_GET(KVM_ARM_DEVICE_TYPE_MASK, dev_addr->id)) {
|
||||
case KVM_VGIC_V2_ADDR_TYPE_DIST:
|
||||
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
if (!r)
|
||||
r = vgic_check_iorange(kvm, vgic->vgic_dist_base, dev_addr->addr,
|
||||
SZ_4K, KVM_VGIC_V2_DIST_SIZE);
|
||||
if (!r)
|
||||
vgic->vgic_dist_base = dev_addr->addr;
|
||||
break;
|
||||
case KVM_VGIC_V2_ADDR_TYPE_CPU:
|
||||
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
if (!r)
|
||||
r = vgic_check_iorange(kvm, vgic->vgic_cpu_base, dev_addr->addr,
|
||||
SZ_4K, KVM_VGIC_V2_CPU_SIZE);
|
||||
if (!r)
|
||||
vgic->vgic_cpu_base = dev_addr->addr;
|
||||
break;
|
||||
default:
|
||||
r = -ENODEV;
|
||||
}
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_vgic_addr - set or get vgic VM base addresses
|
||||
* @kvm: pointer to the vm struct
|
||||
* @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
|
||||
* @addr: pointer to address value
|
||||
* @attr: pointer to the attribute being retrieved/updated
|
||||
* @write: if true set the address in the VM address space, if false read the
|
||||
* address
|
||||
*
|
||||
@ -57,15 +88,22 @@ static int vgic_check_type(struct kvm *kvm, int type_needed)
|
||||
* overlapping regions in case of a virtual GICv3 here, since we don't know
|
||||
* the number of VCPUs yet, so we defer this check to map_resources().
|
||||
*/
|
||||
int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
|
||||
static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool write)
|
||||
{
|
||||
int r = 0;
|
||||
u64 __user *uaddr = (u64 __user *)attr->addr;
|
||||
struct vgic_dist *vgic = &kvm->arch.vgic;
|
||||
phys_addr_t *addr_ptr, alignment, size;
|
||||
u64 undef_value = VGIC_ADDR_UNDEF;
|
||||
u64 addr;
|
||||
int r;
|
||||
|
||||
/* Reading a redistributor region addr implies getting the index */
|
||||
if (write || attr->attr == KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION)
|
||||
if (get_user(addr, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
switch (type) {
|
||||
switch (attr->attr) {
|
||||
case KVM_VGIC_V2_ADDR_TYPE_DIST:
|
||||
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
addr_ptr = &vgic->vgic_dist_base;
|
||||
@ -91,7 +129,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
|
||||
if (r)
|
||||
break;
|
||||
if (write) {
|
||||
r = vgic_v3_set_redist_base(kvm, 0, *addr, 0);
|
||||
r = vgic_v3_set_redist_base(kvm, 0, addr, 0);
|
||||
goto out;
|
||||
}
|
||||
rdreg = list_first_entry_or_null(&vgic->rd_regions,
|
||||
@ -111,14 +149,12 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
|
||||
if (r)
|
||||
break;
|
||||
|
||||
index = *addr & KVM_VGIC_V3_RDIST_INDEX_MASK;
|
||||
index = addr & KVM_VGIC_V3_RDIST_INDEX_MASK;
|
||||
|
||||
if (write) {
|
||||
gpa_t base = *addr & KVM_VGIC_V3_RDIST_BASE_MASK;
|
||||
u32 count = (*addr & KVM_VGIC_V3_RDIST_COUNT_MASK)
|
||||
>> KVM_VGIC_V3_RDIST_COUNT_SHIFT;
|
||||
u8 flags = (*addr & KVM_VGIC_V3_RDIST_FLAGS_MASK)
|
||||
>> KVM_VGIC_V3_RDIST_FLAGS_SHIFT;
|
||||
gpa_t base = addr & KVM_VGIC_V3_RDIST_BASE_MASK;
|
||||
u32 count = FIELD_GET(KVM_VGIC_V3_RDIST_COUNT_MASK, addr);
|
||||
u8 flags = FIELD_GET(KVM_VGIC_V3_RDIST_FLAGS_MASK, addr);
|
||||
|
||||
if (!count || flags)
|
||||
r = -EINVAL;
|
||||
@ -134,9 +170,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
|
||||
goto out;
|
||||
}
|
||||
|
||||
*addr = index;
|
||||
*addr |= rdreg->base;
|
||||
*addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT;
|
||||
addr = index;
|
||||
addr |= rdreg->base;
|
||||
addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT;
|
||||
goto out;
|
||||
}
|
||||
default:
|
||||
@ -147,15 +183,19 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
|
||||
goto out;
|
||||
|
||||
if (write) {
|
||||
r = vgic_check_iorange(kvm, *addr_ptr, *addr, alignment, size);
|
||||
r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size);
|
||||
if (!r)
|
||||
*addr_ptr = *addr;
|
||||
*addr_ptr = addr;
|
||||
} else {
|
||||
*addr = *addr_ptr;
|
||||
addr = *addr_ptr;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
if (!r && !write)
|
||||
r = put_user(addr, uaddr);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
@ -165,17 +205,9 @@ static int vgic_set_common_attr(struct kvm_device *dev,
|
||||
int r;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_ADDR: {
|
||||
u64 __user *uaddr = (u64 __user *)(long)attr->addr;
|
||||
u64 addr;
|
||||
unsigned long type = (unsigned long)attr->attr;
|
||||
|
||||
if (copy_from_user(&addr, uaddr, sizeof(addr)))
|
||||
return -EFAULT;
|
||||
|
||||
r = kvm_vgic_addr(dev->kvm, type, &addr, true);
|
||||
case KVM_DEV_ARM_VGIC_GRP_ADDR:
|
||||
r = kvm_vgic_addr(dev->kvm, attr, true);
|
||||
return (r == -ENODEV) ? -ENXIO : r;
|
||||
}
|
||||
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
|
||||
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
|
||||
u32 val;
|
||||
@ -214,6 +246,24 @@ static int vgic_set_common_attr(struct kvm_device *dev,
|
||||
r = vgic_init(dev->kvm);
|
||||
mutex_unlock(&dev->kvm->lock);
|
||||
return r;
|
||||
case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
|
||||
/*
|
||||
* OK, this one isn't common at all, but we
|
||||
* want to handle all control group attributes
|
||||
* in a single place.
|
||||
*/
|
||||
if (vgic_check_type(dev->kvm, KVM_DEV_TYPE_ARM_VGIC_V3))
|
||||
return -ENXIO;
|
||||
mutex_lock(&dev->kvm->lock);
|
||||
|
||||
if (!lock_all_vcpus(dev->kvm)) {
|
||||
mutex_unlock(&dev->kvm->lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
r = vgic_v3_save_pending_tables(dev->kvm);
|
||||
unlock_all_vcpus(dev->kvm);
|
||||
mutex_unlock(&dev->kvm->lock);
|
||||
return r;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -228,22 +278,9 @@ static int vgic_get_common_attr(struct kvm_device *dev,
|
||||
int r = -ENXIO;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_ADDR: {
|
||||
u64 __user *uaddr = (u64 __user *)(long)attr->addr;
|
||||
u64 addr;
|
||||
unsigned long type = (unsigned long)attr->attr;
|
||||
|
||||
if (copy_from_user(&addr, uaddr, sizeof(addr)))
|
||||
return -EFAULT;
|
||||
|
||||
r = kvm_vgic_addr(dev->kvm, type, &addr, false);
|
||||
if (r)
|
||||
return (r == -ENODEV) ? -ENXIO : r;
|
||||
|
||||
if (copy_to_user(uaddr, &addr, sizeof(addr)))
|
||||
return -EFAULT;
|
||||
break;
|
||||
}
|
||||
case KVM_DEV_ARM_VGIC_GRP_ADDR:
|
||||
r = kvm_vgic_addr(dev->kvm, attr, false);
|
||||
return (r == -ENODEV) ? -ENXIO : r;
|
||||
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
|
||||
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
|
||||
|
||||
@ -348,17 +385,18 @@ bool lock_all_vcpus(struct kvm *kvm)
|
||||
*
|
||||
* @dev: kvm device handle
|
||||
* @attr: kvm device attribute
|
||||
* @reg: address the value is read or written
|
||||
* @is_write: true if userspace is writing a register
|
||||
*/
|
||||
static int vgic_v2_attr_regs_access(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr,
|
||||
u32 *reg, bool is_write)
|
||||
bool is_write)
|
||||
{
|
||||
u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr;
|
||||
struct vgic_reg_attr reg_attr;
|
||||
gpa_t addr;
|
||||
struct kvm_vcpu *vcpu;
|
||||
int ret;
|
||||
u32 val;
|
||||
|
||||
ret = vgic_v2_parse_attr(dev, attr, ®_attr);
|
||||
if (ret)
|
||||
@ -367,6 +405,10 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
|
||||
vcpu = reg_attr.vcpu;
|
||||
addr = reg_attr.addr;
|
||||
|
||||
if (is_write)
|
||||
if (get_user(val, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
mutex_lock(&dev->kvm->lock);
|
||||
|
||||
ret = vgic_init(dev->kvm);
|
||||
@ -380,10 +422,10 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
|
||||
ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg);
|
||||
ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, &val);
|
||||
break;
|
||||
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
|
||||
ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg);
|
||||
ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, &val);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
@ -393,57 +435,35 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
|
||||
unlock_all_vcpus(dev->kvm);
|
||||
out:
|
||||
mutex_unlock(&dev->kvm->lock);
|
||||
|
||||
if (!ret && !is_write)
|
||||
ret = put_user(val, uaddr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vgic_v2_set_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = vgic_set_common_attr(dev, attr);
|
||||
if (ret != -ENXIO)
|
||||
return ret;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
|
||||
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
|
||||
u32 reg;
|
||||
|
||||
if (get_user(reg, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
return vgic_v2_attr_regs_access(dev, attr, ®, true);
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
|
||||
return vgic_v2_attr_regs_access(dev, attr, true);
|
||||
default:
|
||||
return vgic_set_common_attr(dev, attr);
|
||||
}
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static int vgic_v2_get_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = vgic_get_common_attr(dev, attr);
|
||||
if (ret != -ENXIO)
|
||||
return ret;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
|
||||
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
|
||||
u32 reg = 0;
|
||||
|
||||
ret = vgic_v2_attr_regs_access(dev, attr, ®, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
return put_user(reg, uaddr);
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
|
||||
return vgic_v2_attr_regs_access(dev, attr, false);
|
||||
default:
|
||||
return vgic_get_common_attr(dev, attr);
|
||||
}
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static int vgic_v2_has_attr(struct kvm_device *dev,
|
||||
@ -512,18 +532,18 @@ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
|
||||
*
|
||||
* @dev: kvm device handle
|
||||
* @attr: kvm device attribute
|
||||
* @reg: address the value is read or written
|
||||
* @is_write: true if userspace is writing a register
|
||||
*/
|
||||
static int vgic_v3_attr_regs_access(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr,
|
||||
u64 *reg, bool is_write)
|
||||
bool is_write)
|
||||
{
|
||||
struct vgic_reg_attr reg_attr;
|
||||
gpa_t addr;
|
||||
struct kvm_vcpu *vcpu;
|
||||
bool uaccess;
|
||||
u32 val;
|
||||
int ret;
|
||||
u32 tmp32;
|
||||
|
||||
ret = vgic_v3_parse_attr(dev, attr, ®_attr);
|
||||
if (ret)
|
||||
@ -532,6 +552,21 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
|
||||
vcpu = reg_attr.vcpu;
|
||||
addr = reg_attr.addr;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
|
||||
/* Sysregs uaccess is performed by the sysreg handling code */
|
||||
uaccess = false;
|
||||
break;
|
||||
default:
|
||||
uaccess = true;
|
||||
}
|
||||
|
||||
if (uaccess && is_write) {
|
||||
u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr;
|
||||
if (get_user(val, uaddr))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
mutex_lock(&dev->kvm->lock);
|
||||
|
||||
if (unlikely(!vgic_initialized(dev->kvm))) {
|
||||
@ -546,29 +581,14 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
|
||||
if (is_write)
|
||||
tmp32 = *reg;
|
||||
|
||||
ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &tmp32);
|
||||
if (!is_write)
|
||||
*reg = tmp32;
|
||||
ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &val);
|
||||
break;
|
||||
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
|
||||
if (is_write)
|
||||
tmp32 = *reg;
|
||||
|
||||
ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &tmp32);
|
||||
if (!is_write)
|
||||
*reg = tmp32;
|
||||
ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &val);
|
||||
break;
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
|
||||
u64 regid;
|
||||
|
||||
regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
|
||||
ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write,
|
||||
regid, reg);
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
|
||||
ret = vgic_v3_cpu_sysregs_uaccess(vcpu, attr, is_write);
|
||||
break;
|
||||
}
|
||||
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
|
||||
unsigned int info, intid;
|
||||
|
||||
@ -578,7 +598,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
|
||||
intid = attr->attr &
|
||||
KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK;
|
||||
ret = vgic_v3_line_level_info_uaccess(vcpu, is_write,
|
||||
intid, reg);
|
||||
intid, &val);
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
@ -592,117 +612,41 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
|
||||
unlock_all_vcpus(dev->kvm);
|
||||
out:
|
||||
mutex_unlock(&dev->kvm->lock);
|
||||
|
||||
if (!ret && uaccess && !is_write) {
|
||||
u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr;
|
||||
ret = put_user(val, uaddr);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vgic_v3_set_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = vgic_set_common_attr(dev, attr);
|
||||
if (ret != -ENXIO)
|
||||
return ret;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: {
|
||||
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
|
||||
u32 tmp32;
|
||||
u64 reg;
|
||||
|
||||
if (get_user(tmp32, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
reg = tmp32;
|
||||
return vgic_v3_attr_regs_access(dev, attr, ®, true);
|
||||
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
|
||||
return vgic_v3_attr_regs_access(dev, attr, true);
|
||||
default:
|
||||
return vgic_set_common_attr(dev, attr);
|
||||
}
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
|
||||
u64 __user *uaddr = (u64 __user *)(long)attr->addr;
|
||||
u64 reg;
|
||||
|
||||
if (get_user(reg, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
return vgic_v3_attr_regs_access(dev, attr, ®, true);
|
||||
}
|
||||
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
|
||||
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
|
||||
u64 reg;
|
||||
u32 tmp32;
|
||||
|
||||
if (get_user(tmp32, uaddr))
|
||||
return -EFAULT;
|
||||
|
||||
reg = tmp32;
|
||||
return vgic_v3_attr_regs_access(dev, attr, ®, true);
|
||||
}
|
||||
case KVM_DEV_ARM_VGIC_GRP_CTRL: {
|
||||
int ret;
|
||||
|
||||
switch (attr->attr) {
|
||||
case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
|
||||
mutex_lock(&dev->kvm->lock);
|
||||
|
||||
if (!lock_all_vcpus(dev->kvm)) {
|
||||
mutex_unlock(&dev->kvm->lock);
|
||||
return -EBUSY;
|
||||
}
|
||||
ret = vgic_v3_save_pending_tables(dev->kvm);
|
||||
unlock_all_vcpus(dev->kvm);
|
||||
mutex_unlock(&dev->kvm->lock);
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static int vgic_v3_get_attr(struct kvm_device *dev,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = vgic_get_common_attr(dev, attr);
|
||||
if (ret != -ENXIO)
|
||||
return ret;
|
||||
|
||||
switch (attr->group) {
|
||||
case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: {
|
||||
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
|
||||
u64 reg;
|
||||
u32 tmp32;
|
||||
|
||||
ret = vgic_v3_attr_regs_access(dev, attr, ®, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
tmp32 = reg;
|
||||
return put_user(tmp32, uaddr);
|
||||
case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
|
||||
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
|
||||
return vgic_v3_attr_regs_access(dev, attr, false);
|
||||
default:
|
||||
return vgic_get_common_attr(dev, attr);
|
||||
}
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
|
||||
u64 __user *uaddr = (u64 __user *)(long)attr->addr;
|
||||
u64 reg;
|
||||
|
||||
ret = vgic_v3_attr_regs_access(dev, attr, ®, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
return put_user(reg, uaddr);
|
||||
}
|
||||
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
|
||||
u32 __user *uaddr = (u32 __user *)(long)attr->addr;
|
||||
u64 reg;
|
||||
u32 tmp32;
|
||||
|
||||
ret = vgic_v3_attr_regs_access(dev, attr, ®, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
tmp32 = reg;
|
||||
return put_user(tmp32, uaddr);
|
||||
}
|
||||
}
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static int vgic_v3_has_attr(struct kvm_device *dev,
|
||||
|
@ -986,12 +986,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
|
||||
iodev.base_addr = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: {
|
||||
u64 reg, id;
|
||||
|
||||
id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK);
|
||||
return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, ®);
|
||||
}
|
||||
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
|
||||
return vgic_v3_has_cpu_sysregs_attr(vcpu, attr);
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
@ -1158,7 +1154,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
||||
}
|
||||
|
||||
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
||||
u32 intid, u64 *val)
|
||||
u32 intid, u32 *val)
|
||||
{
|
||||
if (intid % 32)
|
||||
return -EINVAL;
|
||||
|
@ -775,10 +775,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
|
||||
}
|
||||
}
|
||||
|
||||
u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
|
||||
u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
|
||||
{
|
||||
int i;
|
||||
u64 val = 0;
|
||||
u32 val = 0;
|
||||
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
@ -798,7 +798,7 @@ u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
|
||||
}
|
||||
|
||||
void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
|
||||
const u64 val)
|
||||
const u32 val)
|
||||
{
|
||||
int i;
|
||||
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
|
||||
|
@ -207,10 +207,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
|
||||
int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
|
||||
bool is_write, int offset, u32 *val);
|
||||
|
||||
u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid);
|
||||
u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid);
|
||||
|
||||
void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
|
||||
const u64 val);
|
||||
const u32 val);
|
||||
|
||||
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
|
||||
|
||||
|
@ -245,12 +245,11 @@ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
||||
int offset, u32 *val);
|
||||
int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
||||
int offset, u32 *val);
|
||||
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
||||
u64 id, u64 *val);
|
||||
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
|
||||
u64 *reg);
|
||||
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr, bool is_write);
|
||||
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
|
||||
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
||||
u32 intid, u64 *val);
|
||||
u32 intid, u32 *val);
|
||||
int kvm_register_vgic_device(unsigned long type);
|
||||
void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
|
@ -156,6 +156,18 @@
|
||||
(_AC(1, UL) << IRQ_S_TIMER) | \
|
||||
(_AC(1, UL) << IRQ_S_EXT))
|
||||
|
||||
/* xENVCFG flags */
|
||||
#define ENVCFG_STCE (_AC(1, ULL) << 63)
|
||||
#define ENVCFG_PBMTE (_AC(1, ULL) << 62)
|
||||
#define ENVCFG_CBZE (_AC(1, UL) << 7)
|
||||
#define ENVCFG_CBCFE (_AC(1, UL) << 6)
|
||||
#define ENVCFG_CBIE_SHIFT 4
|
||||
#define ENVCFG_CBIE (_AC(0x3, UL) << ENVCFG_CBIE_SHIFT)
|
||||
#define ENVCFG_CBIE_ILL _AC(0x0, UL)
|
||||
#define ENVCFG_CBIE_FLUSH _AC(0x1, UL)
|
||||
#define ENVCFG_CBIE_INV _AC(0x3, UL)
|
||||
#define ENVCFG_FIOM _AC(0x1, UL)
|
||||
|
||||
/* symbolic CSR names: */
|
||||
#define CSR_CYCLE 0xc00
|
||||
#define CSR_TIME 0xc01
|
||||
@ -252,7 +264,9 @@
|
||||
#define CSR_HTIMEDELTA 0x605
|
||||
#define CSR_HCOUNTEREN 0x606
|
||||
#define CSR_HGEIE 0x607
|
||||
#define CSR_HENVCFG 0x60a
|
||||
#define CSR_HTIMEDELTAH 0x615
|
||||
#define CSR_HENVCFGH 0x61a
|
||||
#define CSR_HTVAL 0x643
|
||||
#define CSR_HIP 0x644
|
||||
#define CSR_HVIP 0x645
|
||||
@ -264,6 +278,8 @@
|
||||
#define CSR_MISA 0x301
|
||||
#define CSR_MIE 0x304
|
||||
#define CSR_MTVEC 0x305
|
||||
#define CSR_MENVCFG 0x30a
|
||||
#define CSR_MENVCFGH 0x31a
|
||||
#define CSR_MSCRATCH 0x340
|
||||
#define CSR_MEPC 0x341
|
||||
#define CSR_MCAUSE 0x342
|
||||
|
@ -14,7 +14,9 @@
|
||||
#include <linux/kvm_types.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <asm/csr.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/kvm_vcpu_fp.h>
|
||||
#include <asm/kvm_vcpu_insn.h>
|
||||
#include <asm/kvm_vcpu_timer.h>
|
||||
|
||||
#define KVM_MAX_VCPUS 1024
|
||||
@ -63,6 +65,8 @@ struct kvm_vcpu_stat {
|
||||
u64 wfi_exit_stat;
|
||||
u64 mmio_exit_user;
|
||||
u64 mmio_exit_kernel;
|
||||
u64 csr_exit_user;
|
||||
u64 csr_exit_kernel;
|
||||
u64 exits;
|
||||
};
|
||||
|
||||
@ -90,14 +94,6 @@ struct kvm_arch {
|
||||
struct kvm_guest_timer timer;
|
||||
};
|
||||
|
||||
struct kvm_mmio_decode {
|
||||
unsigned long insn;
|
||||
int insn_len;
|
||||
int len;
|
||||
int shift;
|
||||
int return_handled;
|
||||
};
|
||||
|
||||
struct kvm_sbi_context {
|
||||
int return_handled;
|
||||
};
|
||||
@ -170,7 +166,7 @@ struct kvm_vcpu_arch {
|
||||
int last_exit_cpu;
|
||||
|
||||
/* ISA feature bits (similar to MISA) */
|
||||
unsigned long isa;
|
||||
DECLARE_BITMAP(isa, RISCV_ISA_EXT_MAX);
|
||||
|
||||
/* SSCRATCH, STVEC, and SCOUNTEREN of Host */
|
||||
unsigned long host_sscratch;
|
||||
@ -216,6 +212,9 @@ struct kvm_vcpu_arch {
|
||||
/* MMIO instruction details */
|
||||
struct kvm_mmio_decode mmio_decode;
|
||||
|
||||
/* CSR instruction details */
|
||||
struct kvm_csr_decode csr_decode;
|
||||
|
||||
/* SBI context */
|
||||
struct kvm_sbi_context sbi_context;
|
||||
|
||||
@ -285,6 +284,11 @@ void kvm_riscv_hfence_vvma_gva(struct kvm *kvm,
|
||||
void kvm_riscv_hfence_vvma_all(struct kvm *kvm,
|
||||
unsigned long hbase, unsigned long hmask);
|
||||
|
||||
int kvm_riscv_gstage_ioremap(struct kvm *kvm, gpa_t gpa,
|
||||
phys_addr_t hpa, unsigned long size,
|
||||
bool writable, bool in_atomic);
|
||||
void kvm_riscv_gstage_iounmap(struct kvm *kvm, gpa_t gpa,
|
||||
unsigned long size);
|
||||
int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
|
||||
struct kvm_memory_slot *memslot,
|
||||
gpa_t gpa, unsigned long hva, bool is_write);
|
||||
@ -303,14 +307,12 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu);
|
||||
|
||||
void __kvm_riscv_unpriv_trap(void);
|
||||
|
||||
void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu);
|
||||
unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
|
||||
bool read_insn,
|
||||
unsigned long guest_addr,
|
||||
struct kvm_cpu_trap *trap);
|
||||
void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
|
||||
struct kvm_cpu_trap *trap);
|
||||
int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
|
||||
int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_cpu_trap *trap);
|
||||
|
||||
|
@ -22,9 +22,9 @@ void __kvm_riscv_fp_d_restore(struct kvm_cpu_context *context);
|
||||
|
||||
void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
|
||||
unsigned long isa);
|
||||
const unsigned long *isa);
|
||||
void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
|
||||
unsigned long isa);
|
||||
const unsigned long *isa);
|
||||
void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx);
|
||||
void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx);
|
||||
#else
|
||||
@ -32,12 +32,12 @@ static inline void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
}
|
||||
static inline void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
|
||||
unsigned long isa)
|
||||
const unsigned long *isa)
|
||||
{
|
||||
}
|
||||
static inline void kvm_riscv_vcpu_guest_fp_restore(
|
||||
struct kvm_cpu_context *cntx,
|
||||
unsigned long isa)
|
||||
const unsigned long *isa)
|
||||
{
|
||||
}
|
||||
static inline void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
|
||||
|
48
arch/riscv/include/asm/kvm_vcpu_insn.h
Normal file
48
arch/riscv/include/asm/kvm_vcpu_insn.h
Normal file
@ -0,0 +1,48 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2022 Ventana Micro Systems Inc.
|
||||
*/
|
||||
|
||||
#ifndef __KVM_VCPU_RISCV_INSN_H
|
||||
#define __KVM_VCPU_RISCV_INSN_H
|
||||
|
||||
struct kvm_vcpu;
|
||||
struct kvm_run;
|
||||
struct kvm_cpu_trap;
|
||||
|
||||
struct kvm_mmio_decode {
|
||||
unsigned long insn;
|
||||
int insn_len;
|
||||
int len;
|
||||
int shift;
|
||||
int return_handled;
|
||||
};
|
||||
|
||||
struct kvm_csr_decode {
|
||||
unsigned long insn;
|
||||
int return_handled;
|
||||
};
|
||||
|
||||
/* Return values used by function emulating a particular instruction */
|
||||
enum kvm_insn_return {
|
||||
KVM_INSN_EXIT_TO_USER_SPACE = 0,
|
||||
KVM_INSN_CONTINUE_NEXT_SEPC,
|
||||
KVM_INSN_CONTINUE_SAME_SEPC,
|
||||
KVM_INSN_ILLEGAL_TRAP,
|
||||
KVM_INSN_VIRTUAL_TRAP
|
||||
};
|
||||
|
||||
void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu);
|
||||
int kvm_riscv_vcpu_csr_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
|
||||
int kvm_riscv_vcpu_virtual_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_cpu_trap *trap);
|
||||
|
||||
int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
unsigned long fault_addr,
|
||||
unsigned long htinst);
|
||||
int kvm_riscv_vcpu_mmio_store(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
unsigned long fault_addr,
|
||||
unsigned long htinst);
|
||||
int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
|
||||
|
||||
#endif
|
@ -39,6 +39,6 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu);
|
||||
int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
|
||||
int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
|
||||
void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu);
|
||||
int kvm_riscv_guest_timer_init(struct kvm *kvm);
|
||||
void kvm_riscv_guest_timer_init(struct kvm *kvm);
|
||||
|
||||
#endif
|
||||
|
@ -96,6 +96,7 @@ enum KVM_RISCV_ISA_EXT_ID {
|
||||
KVM_RISCV_ISA_EXT_H,
|
||||
KVM_RISCV_ISA_EXT_I,
|
||||
KVM_RISCV_ISA_EXT_M,
|
||||
KVM_RISCV_ISA_EXT_SVPBMT,
|
||||
KVM_RISCV_ISA_EXT_MAX,
|
||||
};
|
||||
|
||||
|
@ -17,6 +17,7 @@ kvm-y += mmu.o
|
||||
kvm-y += vcpu.o
|
||||
kvm-y += vcpu_exit.o
|
||||
kvm-y += vcpu_fp.o
|
||||
kvm-y += vcpu_insn.o
|
||||
kvm-y += vcpu_switch.o
|
||||
kvm-y += vcpu_sbi.o
|
||||
kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
|
||||
|
@ -343,23 +343,24 @@ static void gstage_wp_memory_region(struct kvm *kvm, int slot)
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
||||
static int gstage_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
|
||||
unsigned long size, bool writable)
|
||||
int kvm_riscv_gstage_ioremap(struct kvm *kvm, gpa_t gpa,
|
||||
phys_addr_t hpa, unsigned long size,
|
||||
bool writable, bool in_atomic)
|
||||
{
|
||||
pte_t pte;
|
||||
int ret = 0;
|
||||
unsigned long pfn;
|
||||
phys_addr_t addr, end;
|
||||
struct kvm_mmu_memory_cache pcache;
|
||||
|
||||
memset(&pcache, 0, sizeof(pcache));
|
||||
pcache.gfp_zero = __GFP_ZERO;
|
||||
struct kvm_mmu_memory_cache pcache = {
|
||||
.gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0,
|
||||
.gfp_zero = __GFP_ZERO,
|
||||
};
|
||||
|
||||
end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
pfn = __phys_to_pfn(hpa);
|
||||
|
||||
for (addr = gpa; addr < end; addr += PAGE_SIZE) {
|
||||
pte = pfn_pte(pfn, PAGE_KERNEL);
|
||||
pte = pfn_pte(pfn, PAGE_KERNEL_IO);
|
||||
|
||||
if (!writable)
|
||||
pte = pte_wrprotect(pte);
|
||||
@ -382,6 +383,13 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_riscv_gstage_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size)
|
||||
{
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
gstage_unmap_range(kvm, gpa, size, false);
|
||||
spin_unlock(&kvm->mmu_lock);
|
||||
}
|
||||
|
||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||
struct kvm_memory_slot *slot,
|
||||
gfn_t gfn_offset,
|
||||
@ -517,8 +525,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = gstage_ioremap(kvm, gpa, pa,
|
||||
vm_end - vm_start, writable);
|
||||
ret = kvm_riscv_gstage_ioremap(kvm, gpa, pa,
|
||||
vm_end - vm_start,
|
||||
writable, false);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@ -611,7 +620,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
|
||||
{
|
||||
int ret;
|
||||
kvm_pfn_t hfn;
|
||||
bool writeable;
|
||||
bool writable;
|
||||
short vma_pageshift;
|
||||
gfn_t gfn = gpa >> PAGE_SHIFT;
|
||||
struct vm_area_struct *vma;
|
||||
@ -659,7 +668,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
|
||||
|
||||
mmu_seq = kvm->mmu_notifier_seq;
|
||||
|
||||
hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable);
|
||||
hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable);
|
||||
if (hfn == KVM_PFN_ERR_HWPOISON) {
|
||||
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva,
|
||||
vma_pageshift, current);
|
||||
@ -673,14 +682,14 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
|
||||
* for write faults.
|
||||
*/
|
||||
if (logging && !is_write)
|
||||
writeable = false;
|
||||
writable = false;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
||||
if (mmu_notifier_retry(kvm, mmu_seq))
|
||||
goto out_unlock;
|
||||
|
||||
if (writeable) {
|
||||
if (writable) {
|
||||
kvm_set_pfn_dirty(hfn);
|
||||
mark_page_dirty(kvm, gfn);
|
||||
ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
|
||||
|
@ -26,6 +26,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
||||
STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
|
||||
STATS_DESC_COUNTER(VCPU, mmio_exit_user),
|
||||
STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
|
||||
STATS_DESC_COUNTER(VCPU, csr_exit_user),
|
||||
STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
|
||||
STATS_DESC_COUNTER(VCPU, exits)
|
||||
};
|
||||
|
||||
@ -38,16 +40,58 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||
sizeof(kvm_vcpu_stats_desc),
|
||||
};
|
||||
|
||||
#define KVM_RISCV_ISA_DISABLE_ALLOWED (riscv_isa_extension_mask(d) | \
|
||||
riscv_isa_extension_mask(f))
|
||||
#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0)
|
||||
|
||||
#define KVM_RISCV_ISA_DISABLE_NOT_ALLOWED (riscv_isa_extension_mask(a) | \
|
||||
riscv_isa_extension_mask(c) | \
|
||||
riscv_isa_extension_mask(i) | \
|
||||
riscv_isa_extension_mask(m))
|
||||
/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
|
||||
static const unsigned long kvm_isa_ext_arr[] = {
|
||||
RISCV_ISA_EXT_a,
|
||||
RISCV_ISA_EXT_c,
|
||||
RISCV_ISA_EXT_d,
|
||||
RISCV_ISA_EXT_f,
|
||||
RISCV_ISA_EXT_h,
|
||||
RISCV_ISA_EXT_i,
|
||||
RISCV_ISA_EXT_m,
|
||||
RISCV_ISA_EXT_SVPBMT,
|
||||
};
|
||||
|
||||
#define KVM_RISCV_ISA_ALLOWED (KVM_RISCV_ISA_DISABLE_ALLOWED | \
|
||||
KVM_RISCV_ISA_DISABLE_NOT_ALLOWED)
|
||||
static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
|
||||
if (kvm_isa_ext_arr[i] == base_ext)
|
||||
return i;
|
||||
}
|
||||
|
||||
return KVM_RISCV_ISA_EXT_MAX;
|
||||
}
|
||||
|
||||
static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
|
||||
{
|
||||
switch (ext) {
|
||||
case KVM_RISCV_ISA_EXT_H:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
|
||||
{
|
||||
switch (ext) {
|
||||
case KVM_RISCV_ISA_EXT_A:
|
||||
case KVM_RISCV_ISA_EXT_C:
|
||||
case KVM_RISCV_ISA_EXT_I:
|
||||
case KVM_RISCV_ISA_EXT_M:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
@ -99,13 +143,20 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpu_context *cntx;
|
||||
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
|
||||
unsigned long host_isa, i;
|
||||
|
||||
/* Mark this VCPU never ran */
|
||||
vcpu->arch.ran_atleast_once = false;
|
||||
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
|
||||
bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
|
||||
|
||||
/* Setup ISA features available to VCPU */
|
||||
vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
|
||||
for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
|
||||
host_isa = kvm_isa_ext_arr[i];
|
||||
if (__riscv_isa_extension_available(NULL, host_isa) &&
|
||||
kvm_riscv_vcpu_isa_enable_allowed(i))
|
||||
set_bit(host_isa, vcpu->arch.isa);
|
||||
}
|
||||
|
||||
/* Setup VCPU hfence queue */
|
||||
spin_lock_init(&vcpu->arch.hfence_lock);
|
||||
@ -199,7 +250,7 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
|
||||
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_CONFIG_REG(isa):
|
||||
reg_val = vcpu->arch.isa;
|
||||
reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
@ -219,7 +270,7 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
KVM_REG_SIZE_MASK |
|
||||
KVM_REG_RISCV_CONFIG);
|
||||
unsigned long reg_val;
|
||||
unsigned long i, isa_ext, reg_val;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
@ -227,13 +278,32 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
return -EFAULT;
|
||||
|
||||
/* This ONE REG interface is only defined for single letter extensions */
|
||||
if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
|
||||
return -EINVAL;
|
||||
|
||||
switch (reg_num) {
|
||||
case KVM_REG_RISCV_CONFIG_REG(isa):
|
||||
if (!vcpu->arch.ran_atleast_once) {
|
||||
/* Ignore the disable request for these extensions */
|
||||
vcpu->arch.isa = reg_val | KVM_RISCV_ISA_DISABLE_NOT_ALLOWED;
|
||||
vcpu->arch.isa &= riscv_isa_extension_base(NULL);
|
||||
vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
|
||||
/* Ignore the enable/disable request for certain extensions */
|
||||
for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
|
||||
isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
|
||||
if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
|
||||
reg_val &= ~BIT(i);
|
||||
continue;
|
||||
}
|
||||
if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
|
||||
if (reg_val & BIT(i))
|
||||
reg_val &= ~BIT(i);
|
||||
if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
|
||||
if (!(reg_val & BIT(i)))
|
||||
reg_val |= BIT(i);
|
||||
}
|
||||
reg_val &= riscv_isa_extension_base(NULL);
|
||||
/* Do not modify anything beyond single letter extensions */
|
||||
reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
|
||||
(reg_val & KVM_RISCV_BASE_ISA_MASK);
|
||||
vcpu->arch.isa[0] = reg_val;
|
||||
kvm_riscv_vcpu_fp_reset(vcpu);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
@ -374,17 +444,6 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
|
||||
static unsigned long kvm_isa_ext_arr[] = {
|
||||
RISCV_ISA_EXT_a,
|
||||
RISCV_ISA_EXT_c,
|
||||
RISCV_ISA_EXT_d,
|
||||
RISCV_ISA_EXT_f,
|
||||
RISCV_ISA_EXT_h,
|
||||
RISCV_ISA_EXT_i,
|
||||
RISCV_ISA_EXT_m,
|
||||
};
|
||||
|
||||
static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
|
||||
const struct kvm_one_reg *reg)
|
||||
{
|
||||
@ -399,11 +458,12 @@ static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (reg_num >= KVM_RISCV_ISA_EXT_MAX || reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
|
||||
if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
|
||||
reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
|
||||
return -EINVAL;
|
||||
|
||||
host_isa_ext = kvm_isa_ext_arr[reg_num];
|
||||
if (__riscv_isa_extension_available(&vcpu->arch.isa, host_isa_ext))
|
||||
if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
|
||||
reg_val = 1; /* Mark the given extension as available */
|
||||
|
||||
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
|
||||
@ -422,12 +482,12 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
|
||||
KVM_REG_RISCV_ISA_EXT);
|
||||
unsigned long reg_val;
|
||||
unsigned long host_isa_ext;
|
||||
unsigned long host_isa_ext_mask;
|
||||
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
|
||||
return -EINVAL;
|
||||
|
||||
if (reg_num >= KVM_RISCV_ISA_EXT_MAX || reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
|
||||
if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
|
||||
reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
|
||||
@ -437,30 +497,19 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
|
||||
if (!__riscv_isa_extension_available(NULL, host_isa_ext))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (host_isa_ext >= RISCV_ISA_EXT_BASE &&
|
||||
host_isa_ext < RISCV_ISA_EXT_MAX) {
|
||||
/*
|
||||
* Multi-letter ISA extension. Currently there is no provision
|
||||
* to enable/disable the multi-letter ISA extensions for guests.
|
||||
* Return success if the request is to enable any ISA extension
|
||||
* that is available in the hardware.
|
||||
* Return -EOPNOTSUPP otherwise.
|
||||
*/
|
||||
if (!reg_val)
|
||||
return -EOPNOTSUPP;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Single letter base ISA extension */
|
||||
if (!vcpu->arch.ran_atleast_once) {
|
||||
host_isa_ext_mask = BIT_MASK(host_isa_ext);
|
||||
if (!reg_val && (host_isa_ext_mask & KVM_RISCV_ISA_DISABLE_ALLOWED))
|
||||
vcpu->arch.isa &= ~host_isa_ext_mask;
|
||||
/*
|
||||
* All multi-letter extension and a few single letter
|
||||
* extension can be disabled
|
||||
*/
|
||||
if (reg_val == 1 &&
|
||||
kvm_riscv_vcpu_isa_enable_allowed(reg_num))
|
||||
set_bit(host_isa_ext, vcpu->arch.isa);
|
||||
else if (!reg_val &&
|
||||
kvm_riscv_vcpu_isa_disable_allowed(reg_num))
|
||||
clear_bit(host_isa_ext, vcpu->arch.isa);
|
||||
else
|
||||
vcpu->arch.isa |= host_isa_ext_mask;
|
||||
vcpu->arch.isa &= riscv_isa_extension_base(NULL);
|
||||
vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
|
||||
return -EINVAL;
|
||||
kvm_riscv_vcpu_fp_reset(vcpu);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
@ -729,6 +778,19 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void kvm_riscv_vcpu_update_config(const unsigned long *isa)
|
||||
{
|
||||
u64 henvcfg = 0;
|
||||
|
||||
if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SVPBMT))
|
||||
henvcfg |= ENVCFG_PBMTE;
|
||||
|
||||
csr_write(CSR_HENVCFG, henvcfg);
|
||||
#ifdef CONFIG_32BIT
|
||||
csr_write(CSR_HENVCFGH, henvcfg >> 32);
|
||||
#endif
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
{
|
||||
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
|
||||
@ -743,6 +805,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
csr_write(CSR_HVIP, csr->hvip);
|
||||
csr_write(CSR_VSATP, csr->vsatp);
|
||||
|
||||
kvm_riscv_vcpu_update_config(vcpu->arch.isa);
|
||||
|
||||
kvm_riscv_gstage_update_hgatp(vcpu);
|
||||
|
||||
kvm_riscv_vcpu_timer_restore(vcpu);
|
||||
@ -853,22 +917,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
|
||||
/* Process MMIO value returned from user-space */
|
||||
if (run->exit_reason == KVM_EXIT_MMIO) {
|
||||
switch (run->exit_reason) {
|
||||
case KVM_EXIT_MMIO:
|
||||
/* Process MMIO value returned from user-space */
|
||||
ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
|
||||
if (ret) {
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/* Process SBI value returned from user-space */
|
||||
if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
|
||||
break;
|
||||
case KVM_EXIT_RISCV_SBI:
|
||||
/* Process SBI value returned from user-space */
|
||||
ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
|
||||
if (ret) {
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case KVM_EXIT_RISCV_CSR:
|
||||
/* Process CSR value returned from user-space */
|
||||
ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run);
|
||||
break;
|
||||
default:
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
if (ret) {
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (run->immediate_exit) {
|
||||
@ -890,8 +958,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_riscv_check_vcpu_requests(vcpu);
|
||||
|
||||
preempt_disable();
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
/*
|
||||
@ -928,7 +994,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
kvm_request_pending(vcpu)) {
|
||||
vcpu->mode = OUTSIDE_GUEST_MODE;
|
||||
local_irq_enable();
|
||||
preempt_enable();
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
continue;
|
||||
}
|
||||
@ -962,6 +1027,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
/* Syncup interrupts state with HW */
|
||||
kvm_riscv_vcpu_sync_interrupts(vcpu);
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/*
|
||||
* We must ensure that any pending interrupts are taken before
|
||||
* we exit guest timing so that timer ticks are accounted as
|
||||
|
@ -6,435 +6,34 @@
|
||||
* Anup Patel <anup.patel@wdc.com>
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/csr.h>
|
||||
|
||||
#define INSN_OPCODE_MASK 0x007c
|
||||
#define INSN_OPCODE_SHIFT 2
|
||||
#define INSN_OPCODE_SYSTEM 28
|
||||
|
||||
#define INSN_MASK_WFI 0xffffffff
|
||||
#define INSN_MATCH_WFI 0x10500073
|
||||
|
||||
#define INSN_MATCH_LB 0x3
|
||||
#define INSN_MASK_LB 0x707f
|
||||
#define INSN_MATCH_LH 0x1003
|
||||
#define INSN_MASK_LH 0x707f
|
||||
#define INSN_MATCH_LW 0x2003
|
||||
#define INSN_MASK_LW 0x707f
|
||||
#define INSN_MATCH_LD 0x3003
|
||||
#define INSN_MASK_LD 0x707f
|
||||
#define INSN_MATCH_LBU 0x4003
|
||||
#define INSN_MASK_LBU 0x707f
|
||||
#define INSN_MATCH_LHU 0x5003
|
||||
#define INSN_MASK_LHU 0x707f
|
||||
#define INSN_MATCH_LWU 0x6003
|
||||
#define INSN_MASK_LWU 0x707f
|
||||
#define INSN_MATCH_SB 0x23
|
||||
#define INSN_MASK_SB 0x707f
|
||||
#define INSN_MATCH_SH 0x1023
|
||||
#define INSN_MASK_SH 0x707f
|
||||
#define INSN_MATCH_SW 0x2023
|
||||
#define INSN_MASK_SW 0x707f
|
||||
#define INSN_MATCH_SD 0x3023
|
||||
#define INSN_MASK_SD 0x707f
|
||||
|
||||
#define INSN_MATCH_C_LD 0x6000
|
||||
#define INSN_MASK_C_LD 0xe003
|
||||
#define INSN_MATCH_C_SD 0xe000
|
||||
#define INSN_MASK_C_SD 0xe003
|
||||
#define INSN_MATCH_C_LW 0x4000
|
||||
#define INSN_MASK_C_LW 0xe003
|
||||
#define INSN_MATCH_C_SW 0xc000
|
||||
#define INSN_MASK_C_SW 0xe003
|
||||
#define INSN_MATCH_C_LDSP 0x6002
|
||||
#define INSN_MASK_C_LDSP 0xe003
|
||||
#define INSN_MATCH_C_SDSP 0xe002
|
||||
#define INSN_MASK_C_SDSP 0xe003
|
||||
#define INSN_MATCH_C_LWSP 0x4002
|
||||
#define INSN_MASK_C_LWSP 0xe003
|
||||
#define INSN_MATCH_C_SWSP 0xc002
|
||||
#define INSN_MASK_C_SWSP 0xe003
|
||||
|
||||
#define INSN_16BIT_MASK 0x3
|
||||
|
||||
#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK)
|
||||
|
||||
#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4)
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define LOG_REGBYTES 3
|
||||
#else
|
||||
#define LOG_REGBYTES 2
|
||||
#endif
|
||||
#define REGBYTES (1 << LOG_REGBYTES)
|
||||
|
||||
#define SH_RD 7
|
||||
#define SH_RS1 15
|
||||
#define SH_RS2 20
|
||||
#define SH_RS2C 2
|
||||
|
||||
#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
|
||||
#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \
|
||||
(RV_X(x, 10, 3) << 3) | \
|
||||
(RV_X(x, 5, 1) << 6))
|
||||
#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \
|
||||
(RV_X(x, 5, 2) << 6))
|
||||
#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \
|
||||
(RV_X(x, 12, 1) << 5) | \
|
||||
(RV_X(x, 2, 2) << 6))
|
||||
#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \
|
||||
(RV_X(x, 12, 1) << 5) | \
|
||||
(RV_X(x, 2, 3) << 6))
|
||||
#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \
|
||||
(RV_X(x, 7, 2) << 6))
|
||||
#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \
|
||||
(RV_X(x, 7, 3) << 6))
|
||||
#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3))
|
||||
#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3))
|
||||
#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5)
|
||||
|
||||
#define SHIFT_RIGHT(x, y) \
|
||||
((y) < 0 ? ((x) << -(y)) : ((x) >> (y)))
|
||||
|
||||
#define REG_MASK \
|
||||
((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES))
|
||||
|
||||
#define REG_OFFSET(insn, pos) \
|
||||
(SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK)
|
||||
|
||||
#define REG_PTR(insn, pos, regs) \
|
||||
((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)))
|
||||
|
||||
#define GET_RM(insn) (((insn) >> 12) & 7)
|
||||
|
||||
#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
|
||||
#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
|
||||
#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
|
||||
#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs))
|
||||
#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs))
|
||||
#define GET_SP(regs) (*REG_PTR(2, 0, regs))
|
||||
#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val))
|
||||
#define IMM_I(insn) ((s32)(insn) >> 20)
|
||||
#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \
|
||||
(s32)(((insn) >> 7) & 0x1f))
|
||||
#define MASK_FUNCT3 0x7000
|
||||
|
||||
static int truly_illegal_insn(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *run,
|
||||
ulong insn)
|
||||
{
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
|
||||
/* Redirect trap to Guest VCPU */
|
||||
utrap.sepc = vcpu->arch.guest_context.sepc;
|
||||
utrap.scause = EXC_INST_ILLEGAL;
|
||||
utrap.stval = insn;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int system_opcode_insn(struct kvm_vcpu *vcpu,
|
||||
struct kvm_run *run,
|
||||
ulong insn)
|
||||
{
|
||||
if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) {
|
||||
vcpu->stat.wfi_exit_stat++;
|
||||
kvm_riscv_vcpu_wfi(vcpu);
|
||||
vcpu->arch.guest_context.sepc += INSN_LEN(insn);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return truly_illegal_insn(vcpu, run, insn);
|
||||
}
|
||||
|
||||
static int virtual_inst_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_cpu_trap *trap)
|
||||
{
|
||||
unsigned long insn = trap->stval;
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
struct kvm_cpu_context *ct;
|
||||
|
||||
if (unlikely(INSN_IS_16BIT(insn))) {
|
||||
if (insn == 0) {
|
||||
ct = &vcpu->arch.guest_context;
|
||||
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true,
|
||||
ct->sepc,
|
||||
&utrap);
|
||||
if (utrap.scause) {
|
||||
utrap.sepc = ct->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (INSN_IS_16BIT(insn))
|
||||
return truly_illegal_insn(vcpu, run, insn);
|
||||
}
|
||||
|
||||
switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) {
|
||||
case INSN_OPCODE_SYSTEM:
|
||||
return system_opcode_insn(vcpu, run, insn);
|
||||
default:
|
||||
return truly_illegal_insn(vcpu, run, insn);
|
||||
}
|
||||
}
|
||||
|
||||
static int emulate_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
unsigned long fault_addr, unsigned long htinst)
|
||||
{
|
||||
u8 data_buf[8];
|
||||
unsigned long insn;
|
||||
int shift = 0, len = 0, insn_len = 0;
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
|
||||
|
||||
/* Determine trapped instruction */
|
||||
if (htinst & 0x1) {
|
||||
/*
|
||||
* Bit[0] == 1 implies trapped instruction value is
|
||||
* transformed instruction or custom instruction.
|
||||
*/
|
||||
insn = htinst | INSN_16BIT_MASK;
|
||||
insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
|
||||
} else {
|
||||
/*
|
||||
* Bit[0] == 0 implies trapped instruction value is
|
||||
* zero or special value.
|
||||
*/
|
||||
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
|
||||
&utrap);
|
||||
if (utrap.scause) {
|
||||
/* Redirect trap if we failed to read instruction */
|
||||
utrap.sepc = ct->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
return 1;
|
||||
}
|
||||
insn_len = INSN_LEN(insn);
|
||||
}
|
||||
|
||||
/* Decode length of MMIO and shift */
|
||||
if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
|
||||
len = 4;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LB) == INSN_MATCH_LB) {
|
||||
len = 1;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) {
|
||||
len = 1;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) {
|
||||
len = 8;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) {
|
||||
len = 4;
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) {
|
||||
len = 2;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) {
|
||||
len = 2;
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) {
|
||||
len = 8;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
insn = RVC_RS2S(insn) << SH_RD;
|
||||
} else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 8;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) {
|
||||
len = 4;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
insn = RVC_RS2S(insn) << SH_RD;
|
||||
} else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 4;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Fault address should be aligned to length of MMIO */
|
||||
if (fault_addr & (len - 1))
|
||||
return -EIO;
|
||||
|
||||
/* Save instruction decode info */
|
||||
vcpu->arch.mmio_decode.insn = insn;
|
||||
vcpu->arch.mmio_decode.insn_len = insn_len;
|
||||
vcpu->arch.mmio_decode.shift = shift;
|
||||
vcpu->arch.mmio_decode.len = len;
|
||||
vcpu->arch.mmio_decode.return_handled = 0;
|
||||
|
||||
/* Update MMIO details in kvm_run struct */
|
||||
run->mmio.is_write = false;
|
||||
run->mmio.phys_addr = fault_addr;
|
||||
run->mmio.len = len;
|
||||
|
||||
/* Try to handle MMIO access in the kernel */
|
||||
if (!kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_addr, len, data_buf)) {
|
||||
/* Successfully handled MMIO access in the kernel so resume */
|
||||
memcpy(run->mmio.data, data_buf, len);
|
||||
vcpu->stat.mmio_exit_kernel++;
|
||||
kvm_riscv_vcpu_mmio_return(vcpu, run);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Exit to userspace for MMIO emulation */
|
||||
vcpu->stat.mmio_exit_user++;
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int emulate_store(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
unsigned long fault_addr, unsigned long htinst)
|
||||
{
|
||||
u8 data8;
|
||||
u16 data16;
|
||||
u32 data32;
|
||||
u64 data64;
|
||||
ulong data;
|
||||
unsigned long insn;
|
||||
int len = 0, insn_len = 0;
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
|
||||
|
||||
/* Determine trapped instruction */
|
||||
if (htinst & 0x1) {
|
||||
/*
|
||||
* Bit[0] == 1 implies trapped instruction value is
|
||||
* transformed instruction or custom instruction.
|
||||
*/
|
||||
insn = htinst | INSN_16BIT_MASK;
|
||||
insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
|
||||
} else {
|
||||
/*
|
||||
* Bit[0] == 0 implies trapped instruction value is
|
||||
* zero or special value.
|
||||
*/
|
||||
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
|
||||
&utrap);
|
||||
if (utrap.scause) {
|
||||
/* Redirect trap if we failed to read instruction */
|
||||
utrap.sepc = ct->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
return 1;
|
||||
}
|
||||
insn_len = INSN_LEN(insn);
|
||||
}
|
||||
|
||||
data = GET_RS2(insn, &vcpu->arch.guest_context);
|
||||
data8 = data16 = data32 = data64 = data;
|
||||
|
||||
if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) {
|
||||
len = 4;
|
||||
} else if ((insn & INSN_MASK_SB) == INSN_MATCH_SB) {
|
||||
len = 1;
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
|
||||
len = 8;
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
|
||||
len = 2;
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) {
|
||||
len = 8;
|
||||
data64 = GET_RS2S(insn, &vcpu->arch.guest_context);
|
||||
} else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 8;
|
||||
data64 = GET_RS2C(insn, &vcpu->arch.guest_context);
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) {
|
||||
len = 4;
|
||||
data32 = GET_RS2S(insn, &vcpu->arch.guest_context);
|
||||
} else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 4;
|
||||
data32 = GET_RS2C(insn, &vcpu->arch.guest_context);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Fault address should be aligned to length of MMIO */
|
||||
if (fault_addr & (len - 1))
|
||||
return -EIO;
|
||||
|
||||
/* Save instruction decode info */
|
||||
vcpu->arch.mmio_decode.insn = insn;
|
||||
vcpu->arch.mmio_decode.insn_len = insn_len;
|
||||
vcpu->arch.mmio_decode.shift = 0;
|
||||
vcpu->arch.mmio_decode.len = len;
|
||||
vcpu->arch.mmio_decode.return_handled = 0;
|
||||
|
||||
/* Copy data to kvm_run instance */
|
||||
switch (len) {
|
||||
case 1:
|
||||
*((u8 *)run->mmio.data) = data8;
|
||||
break;
|
||||
case 2:
|
||||
*((u16 *)run->mmio.data) = data16;
|
||||
break;
|
||||
case 4:
|
||||
*((u32 *)run->mmio.data) = data32;
|
||||
break;
|
||||
case 8:
|
||||
*((u64 *)run->mmio.data) = data64;
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Update MMIO details in kvm_run struct */
|
||||
run->mmio.is_write = true;
|
||||
run->mmio.phys_addr = fault_addr;
|
||||
run->mmio.len = len;
|
||||
|
||||
/* Try to handle MMIO access in the kernel */
|
||||
if (!kvm_io_bus_write(vcpu, KVM_MMIO_BUS,
|
||||
fault_addr, len, run->mmio.data)) {
|
||||
/* Successfully handled MMIO access in the kernel so resume */
|
||||
vcpu->stat.mmio_exit_kernel++;
|
||||
kvm_riscv_vcpu_mmio_return(vcpu, run);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Exit to userspace for MMIO emulation */
|
||||
vcpu->stat.mmio_exit_user++;
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_cpu_trap *trap)
|
||||
{
|
||||
struct kvm_memory_slot *memslot;
|
||||
unsigned long hva, fault_addr;
|
||||
bool writeable;
|
||||
bool writable;
|
||||
gfn_t gfn;
|
||||
int ret;
|
||||
|
||||
fault_addr = (trap->htval << 2) | (trap->stval & 0x3);
|
||||
gfn = fault_addr >> PAGE_SHIFT;
|
||||
memslot = gfn_to_memslot(vcpu->kvm, gfn);
|
||||
hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable);
|
||||
hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
|
||||
|
||||
if (kvm_is_error_hva(hva) ||
|
||||
(trap->scause == EXC_STORE_GUEST_PAGE_FAULT && !writeable)) {
|
||||
(trap->scause == EXC_STORE_GUEST_PAGE_FAULT && !writable)) {
|
||||
switch (trap->scause) {
|
||||
case EXC_LOAD_GUEST_PAGE_FAULT:
|
||||
return emulate_load(vcpu, run, fault_addr,
|
||||
trap->htinst);
|
||||
return kvm_riscv_vcpu_mmio_load(vcpu, run,
|
||||
fault_addr,
|
||||
trap->htinst);
|
||||
case EXC_STORE_GUEST_PAGE_FAULT:
|
||||
return emulate_store(vcpu, run, fault_addr,
|
||||
trap->htinst);
|
||||
return kvm_riscv_vcpu_mmio_store(vcpu, run,
|
||||
fault_addr,
|
||||
trap->htinst);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
};
|
||||
@ -448,21 +47,6 @@ static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_wfi -- Emulate wait for interrupt (WFI) behaviour
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
*/
|
||||
void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_arch_vcpu_runnable(vcpu)) {
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
kvm_vcpu_halt(vcpu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_unpriv_read -- Read machine word from Guest memory
|
||||
*
|
||||
@ -601,66 +185,6 @@ void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
|
||||
vcpu->arch.guest_context.sepc = csr_read(CSR_VSTVEC);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation
|
||||
* or in-kernel IO emulation
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
* @run: The VCPU run struct containing the mmio data
|
||||
*/
|
||||
int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
u8 data8;
|
||||
u16 data16;
|
||||
u32 data32;
|
||||
u64 data64;
|
||||
ulong insn;
|
||||
int len, shift;
|
||||
|
||||
if (vcpu->arch.mmio_decode.return_handled)
|
||||
return 0;
|
||||
|
||||
vcpu->arch.mmio_decode.return_handled = 1;
|
||||
insn = vcpu->arch.mmio_decode.insn;
|
||||
|
||||
if (run->mmio.is_write)
|
||||
goto done;
|
||||
|
||||
len = vcpu->arch.mmio_decode.len;
|
||||
shift = vcpu->arch.mmio_decode.shift;
|
||||
|
||||
switch (len) {
|
||||
case 1:
|
||||
data8 = *((u8 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data8 << shift >> shift);
|
||||
break;
|
||||
case 2:
|
||||
data16 = *((u16 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data16 << shift >> shift);
|
||||
break;
|
||||
case 4:
|
||||
data32 = *((u32 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data32 << shift >> shift);
|
||||
break;
|
||||
case 8:
|
||||
data64 = *((u64 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data64 << shift >> shift);
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
done:
|
||||
/* Move to next instruction */
|
||||
vcpu->arch.guest_context.sepc += vcpu->arch.mmio_decode.insn_len;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
|
||||
* proper exit to userspace.
|
||||
@ -680,7 +204,7 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
switch (trap->scause) {
|
||||
case EXC_VIRTUAL_INST_FAULT:
|
||||
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
|
||||
ret = virtual_inst_fault(vcpu, run, trap);
|
||||
ret = kvm_riscv_vcpu_virtual_insn(vcpu, run, trap);
|
||||
break;
|
||||
case EXC_INST_GUEST_PAGE_FAULT:
|
||||
case EXC_LOAD_GUEST_PAGE_FAULT:
|
||||
|
@ -16,12 +16,11 @@
|
||||
#ifdef CONFIG_FPU
|
||||
void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long isa = vcpu->arch.isa;
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
|
||||
cntx->sstatus &= ~SR_FS;
|
||||
if (riscv_isa_extension_available(&isa, f) ||
|
||||
riscv_isa_extension_available(&isa, d))
|
||||
if (riscv_isa_extension_available(vcpu->arch.isa, f) ||
|
||||
riscv_isa_extension_available(vcpu->arch.isa, d))
|
||||
cntx->sstatus |= SR_FS_INITIAL;
|
||||
else
|
||||
cntx->sstatus |= SR_FS_OFF;
|
||||
@ -34,24 +33,24 @@ static void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
|
||||
unsigned long isa)
|
||||
const unsigned long *isa)
|
||||
{
|
||||
if ((cntx->sstatus & SR_FS) == SR_FS_DIRTY) {
|
||||
if (riscv_isa_extension_available(&isa, d))
|
||||
if (riscv_isa_extension_available(isa, d))
|
||||
__kvm_riscv_fp_d_save(cntx);
|
||||
else if (riscv_isa_extension_available(&isa, f))
|
||||
else if (riscv_isa_extension_available(isa, f))
|
||||
__kvm_riscv_fp_f_save(cntx);
|
||||
kvm_riscv_vcpu_fp_clean(cntx);
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
|
||||
unsigned long isa)
|
||||
const unsigned long *isa)
|
||||
{
|
||||
if ((cntx->sstatus & SR_FS) != SR_FS_OFF) {
|
||||
if (riscv_isa_extension_available(&isa, d))
|
||||
if (riscv_isa_extension_available(isa, d))
|
||||
__kvm_riscv_fp_d_restore(cntx);
|
||||
else if (riscv_isa_extension_available(&isa, f))
|
||||
else if (riscv_isa_extension_available(isa, f))
|
||||
__kvm_riscv_fp_f_restore(cntx);
|
||||
kvm_riscv_vcpu_fp_clean(cntx);
|
||||
}
|
||||
@ -80,7 +79,6 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
|
||||
unsigned long rtype)
|
||||
{
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
unsigned long isa = vcpu->arch.isa;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
@ -89,7 +87,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
|
||||
void *reg_val;
|
||||
|
||||
if ((rtype == KVM_REG_RISCV_FP_F) &&
|
||||
riscv_isa_extension_available(&isa, f)) {
|
||||
riscv_isa_extension_available(vcpu->arch.isa, f)) {
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
|
||||
return -EINVAL;
|
||||
if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
|
||||
@ -100,7 +98,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
|
||||
else
|
||||
return -EINVAL;
|
||||
} else if ((rtype == KVM_REG_RISCV_FP_D) &&
|
||||
riscv_isa_extension_available(&isa, d)) {
|
||||
riscv_isa_extension_available(vcpu->arch.isa, d)) {
|
||||
if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
|
||||
return -EINVAL;
|
||||
@ -126,7 +124,6 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
|
||||
unsigned long rtype)
|
||||
{
|
||||
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
|
||||
unsigned long isa = vcpu->arch.isa;
|
||||
unsigned long __user *uaddr =
|
||||
(unsigned long __user *)(unsigned long)reg->addr;
|
||||
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
|
||||
@ -135,7 +132,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
|
||||
void *reg_val;
|
||||
|
||||
if ((rtype == KVM_REG_RISCV_FP_F) &&
|
||||
riscv_isa_extension_available(&isa, f)) {
|
||||
riscv_isa_extension_available(vcpu->arch.isa, f)) {
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
|
||||
return -EINVAL;
|
||||
if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
|
||||
@ -146,7 +143,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
|
||||
else
|
||||
return -EINVAL;
|
||||
} else if ((rtype == KVM_REG_RISCV_FP_D) &&
|
||||
riscv_isa_extension_available(&isa, d)) {
|
||||
riscv_isa_extension_available(vcpu->arch.isa, d)) {
|
||||
if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
|
||||
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
|
||||
return -EINVAL;
|
||||
|
752
arch/riscv/kvm/vcpu_insn.c
Normal file
752
arch/riscv/kvm/vcpu_insn.c
Normal file
@ -0,0 +1,752 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
|
||||
* Copyright (c) 2022 Ventana Micro Systems Inc.
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#define INSN_OPCODE_MASK 0x007c
|
||||
#define INSN_OPCODE_SHIFT 2
|
||||
#define INSN_OPCODE_SYSTEM 28
|
||||
|
||||
#define INSN_MASK_WFI 0xffffffff
|
||||
#define INSN_MATCH_WFI 0x10500073
|
||||
|
||||
#define INSN_MATCH_CSRRW 0x1073
|
||||
#define INSN_MASK_CSRRW 0x707f
|
||||
#define INSN_MATCH_CSRRS 0x2073
|
||||
#define INSN_MASK_CSRRS 0x707f
|
||||
#define INSN_MATCH_CSRRC 0x3073
|
||||
#define INSN_MASK_CSRRC 0x707f
|
||||
#define INSN_MATCH_CSRRWI 0x5073
|
||||
#define INSN_MASK_CSRRWI 0x707f
|
||||
#define INSN_MATCH_CSRRSI 0x6073
|
||||
#define INSN_MASK_CSRRSI 0x707f
|
||||
#define INSN_MATCH_CSRRCI 0x7073
|
||||
#define INSN_MASK_CSRRCI 0x707f
|
||||
|
||||
#define INSN_MATCH_LB 0x3
|
||||
#define INSN_MASK_LB 0x707f
|
||||
#define INSN_MATCH_LH 0x1003
|
||||
#define INSN_MASK_LH 0x707f
|
||||
#define INSN_MATCH_LW 0x2003
|
||||
#define INSN_MASK_LW 0x707f
|
||||
#define INSN_MATCH_LD 0x3003
|
||||
#define INSN_MASK_LD 0x707f
|
||||
#define INSN_MATCH_LBU 0x4003
|
||||
#define INSN_MASK_LBU 0x707f
|
||||
#define INSN_MATCH_LHU 0x5003
|
||||
#define INSN_MASK_LHU 0x707f
|
||||
#define INSN_MATCH_LWU 0x6003
|
||||
#define INSN_MASK_LWU 0x707f
|
||||
#define INSN_MATCH_SB 0x23
|
||||
#define INSN_MASK_SB 0x707f
|
||||
#define INSN_MATCH_SH 0x1023
|
||||
#define INSN_MASK_SH 0x707f
|
||||
#define INSN_MATCH_SW 0x2023
|
||||
#define INSN_MASK_SW 0x707f
|
||||
#define INSN_MATCH_SD 0x3023
|
||||
#define INSN_MASK_SD 0x707f
|
||||
|
||||
#define INSN_MATCH_C_LD 0x6000
|
||||
#define INSN_MASK_C_LD 0xe003
|
||||
#define INSN_MATCH_C_SD 0xe000
|
||||
#define INSN_MASK_C_SD 0xe003
|
||||
#define INSN_MATCH_C_LW 0x4000
|
||||
#define INSN_MASK_C_LW 0xe003
|
||||
#define INSN_MATCH_C_SW 0xc000
|
||||
#define INSN_MASK_C_SW 0xe003
|
||||
#define INSN_MATCH_C_LDSP 0x6002
|
||||
#define INSN_MASK_C_LDSP 0xe003
|
||||
#define INSN_MATCH_C_SDSP 0xe002
|
||||
#define INSN_MASK_C_SDSP 0xe003
|
||||
#define INSN_MATCH_C_LWSP 0x4002
|
||||
#define INSN_MASK_C_LWSP 0xe003
|
||||
#define INSN_MATCH_C_SWSP 0xc002
|
||||
#define INSN_MASK_C_SWSP 0xe003
|
||||
|
||||
#define INSN_16BIT_MASK 0x3
|
||||
|
||||
#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK)
|
||||
|
||||
#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4)
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define LOG_REGBYTES 3
|
||||
#else
|
||||
#define LOG_REGBYTES 2
|
||||
#endif
|
||||
#define REGBYTES (1 << LOG_REGBYTES)
|
||||
|
||||
#define SH_RD 7
|
||||
#define SH_RS1 15
|
||||
#define SH_RS2 20
|
||||
#define SH_RS2C 2
|
||||
#define MASK_RX 0x1f
|
||||
|
||||
#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
|
||||
#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \
|
||||
(RV_X(x, 10, 3) << 3) | \
|
||||
(RV_X(x, 5, 1) << 6))
|
||||
#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \
|
||||
(RV_X(x, 5, 2) << 6))
|
||||
#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \
|
||||
(RV_X(x, 12, 1) << 5) | \
|
||||
(RV_X(x, 2, 2) << 6))
|
||||
#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \
|
||||
(RV_X(x, 12, 1) << 5) | \
|
||||
(RV_X(x, 2, 3) << 6))
|
||||
#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \
|
||||
(RV_X(x, 7, 2) << 6))
|
||||
#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \
|
||||
(RV_X(x, 7, 3) << 6))
|
||||
#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3))
|
||||
#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3))
|
||||
#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5)
|
||||
|
||||
#define SHIFT_RIGHT(x, y) \
|
||||
((y) < 0 ? ((x) << -(y)) : ((x) >> (y)))
|
||||
|
||||
#define REG_MASK \
|
||||
((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES))
|
||||
|
||||
#define REG_OFFSET(insn, pos) \
|
||||
(SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK)
|
||||
|
||||
#define REG_PTR(insn, pos, regs) \
|
||||
((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)))
|
||||
|
||||
#define GET_FUNCT3(insn) (((insn) >> 12) & 7)
|
||||
|
||||
#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
|
||||
#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
|
||||
#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
|
||||
#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs))
|
||||
#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs))
|
||||
#define GET_SP(regs) (*REG_PTR(2, 0, regs))
|
||||
#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val))
|
||||
#define IMM_I(insn) ((s32)(insn) >> 20)
|
||||
#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \
|
||||
(s32)(((insn) >> 7) & 0x1f))
|
||||
|
||||
struct insn_func {
|
||||
unsigned long mask;
|
||||
unsigned long match;
|
||||
/*
|
||||
* Possible return values are as follows:
|
||||
* 1) Returns < 0 for error case
|
||||
* 2) Returns 0 for exit to user-space
|
||||
* 3) Returns 1 to continue with next sepc
|
||||
* 4) Returns 2 to continue with same sepc
|
||||
* 5) Returns 3 to inject illegal instruction trap and continue
|
||||
* 6) Returns 4 to inject virtual instruction trap and continue
|
||||
*
|
||||
* Use enum kvm_insn_return for return values
|
||||
*/
|
||||
int (*func)(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn);
|
||||
};
|
||||
|
||||
static int truly_illegal_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
ulong insn)
|
||||
{
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
|
||||
/* Redirect trap to Guest VCPU */
|
||||
utrap.sepc = vcpu->arch.guest_context.sepc;
|
||||
utrap.scause = EXC_INST_ILLEGAL;
|
||||
utrap.stval = insn;
|
||||
utrap.htval = 0;
|
||||
utrap.htinst = 0;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int truly_virtual_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
ulong insn)
|
||||
{
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
|
||||
/* Redirect trap to Guest VCPU */
|
||||
utrap.sepc = vcpu->arch.guest_context.sepc;
|
||||
utrap.scause = EXC_VIRTUAL_INST_FAULT;
|
||||
utrap.stval = insn;
|
||||
utrap.htval = 0;
|
||||
utrap.htinst = 0;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_wfi -- Emulate wait for interrupt (WFI) behaviour
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
*/
|
||||
void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!kvm_arch_vcpu_runnable(vcpu)) {
|
||||
kvm_vcpu_srcu_read_unlock(vcpu);
|
||||
kvm_vcpu_halt(vcpu);
|
||||
kvm_vcpu_srcu_read_lock(vcpu);
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static int wfi_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn)
|
||||
{
|
||||
vcpu->stat.wfi_exit_stat++;
|
||||
kvm_riscv_vcpu_wfi(vcpu);
|
||||
return KVM_INSN_CONTINUE_NEXT_SEPC;
|
||||
}
|
||||
|
||||
struct csr_func {
|
||||
unsigned int base;
|
||||
unsigned int count;
|
||||
/*
|
||||
* Possible return values are as same as "func" callback in
|
||||
* "struct insn_func".
|
||||
*/
|
||||
int (*func)(struct kvm_vcpu *vcpu, unsigned int csr_num,
|
||||
unsigned long *val, unsigned long new_val,
|
||||
unsigned long wr_mask);
|
||||
};
|
||||
|
||||
static const struct csr_func csr_funcs[] = { };
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_csr_return -- Handle CSR read/write after user space
|
||||
* emulation or in-kernel emulation
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
* @run: The VCPU run struct containing the CSR data
|
||||
*
|
||||
* Returns > 0 upon failure and 0 upon success
|
||||
*/
|
||||
int kvm_riscv_vcpu_csr_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
ulong insn;
|
||||
|
||||
if (vcpu->arch.csr_decode.return_handled)
|
||||
return 0;
|
||||
vcpu->arch.csr_decode.return_handled = 1;
|
||||
|
||||
/* Update destination register for CSR reads */
|
||||
insn = vcpu->arch.csr_decode.insn;
|
||||
if ((insn >> SH_RD) & MASK_RX)
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
run->riscv_csr.ret_value);
|
||||
|
||||
/* Move to next instruction */
|
||||
vcpu->arch.guest_context.sepc += INSN_LEN(insn);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int csr_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn)
|
||||
{
|
||||
int i, rc = KVM_INSN_ILLEGAL_TRAP;
|
||||
unsigned int csr_num = insn >> SH_RS2;
|
||||
unsigned int rs1_num = (insn >> SH_RS1) & MASK_RX;
|
||||
ulong rs1_val = GET_RS1(insn, &vcpu->arch.guest_context);
|
||||
const struct csr_func *tcfn, *cfn = NULL;
|
||||
ulong val = 0, wr_mask = 0, new_val = 0;
|
||||
|
||||
/* Decode the CSR instruction */
|
||||
switch (GET_FUNCT3(insn)) {
|
||||
case GET_FUNCT3(INSN_MATCH_CSRRW):
|
||||
wr_mask = -1UL;
|
||||
new_val = rs1_val;
|
||||
break;
|
||||
case GET_FUNCT3(INSN_MATCH_CSRRS):
|
||||
wr_mask = rs1_val;
|
||||
new_val = -1UL;
|
||||
break;
|
||||
case GET_FUNCT3(INSN_MATCH_CSRRC):
|
||||
wr_mask = rs1_val;
|
||||
new_val = 0;
|
||||
break;
|
||||
case GET_FUNCT3(INSN_MATCH_CSRRWI):
|
||||
wr_mask = -1UL;
|
||||
new_val = rs1_num;
|
||||
break;
|
||||
case GET_FUNCT3(INSN_MATCH_CSRRSI):
|
||||
wr_mask = rs1_num;
|
||||
new_val = -1UL;
|
||||
break;
|
||||
case GET_FUNCT3(INSN_MATCH_CSRRCI):
|
||||
wr_mask = rs1_num;
|
||||
new_val = 0;
|
||||
break;
|
||||
default:
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Save instruction decode info */
|
||||
vcpu->arch.csr_decode.insn = insn;
|
||||
vcpu->arch.csr_decode.return_handled = 0;
|
||||
|
||||
/* Update CSR details in kvm_run struct */
|
||||
run->riscv_csr.csr_num = csr_num;
|
||||
run->riscv_csr.new_value = new_val;
|
||||
run->riscv_csr.write_mask = wr_mask;
|
||||
run->riscv_csr.ret_value = 0;
|
||||
|
||||
/* Find in-kernel CSR function */
|
||||
for (i = 0; i < ARRAY_SIZE(csr_funcs); i++) {
|
||||
tcfn = &csr_funcs[i];
|
||||
if ((tcfn->base <= csr_num) &&
|
||||
(csr_num < (tcfn->base + tcfn->count))) {
|
||||
cfn = tcfn;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* First try in-kernel CSR emulation */
|
||||
if (cfn && cfn->func) {
|
||||
rc = cfn->func(vcpu, csr_num, &val, new_val, wr_mask);
|
||||
if (rc > KVM_INSN_EXIT_TO_USER_SPACE) {
|
||||
if (rc == KVM_INSN_CONTINUE_NEXT_SEPC) {
|
||||
run->riscv_csr.ret_value = val;
|
||||
vcpu->stat.csr_exit_kernel++;
|
||||
kvm_riscv_vcpu_csr_return(vcpu, run);
|
||||
rc = KVM_INSN_CONTINUE_SAME_SEPC;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* Exit to user-space for CSR emulation */
|
||||
if (rc <= KVM_INSN_EXIT_TO_USER_SPACE) {
|
||||
vcpu->stat.csr_exit_user++;
|
||||
run->exit_reason = KVM_EXIT_RISCV_CSR;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static const struct insn_func system_opcode_funcs[] = {
|
||||
{
|
||||
.mask = INSN_MASK_CSRRW,
|
||||
.match = INSN_MATCH_CSRRW,
|
||||
.func = csr_insn,
|
||||
},
|
||||
{
|
||||
.mask = INSN_MASK_CSRRS,
|
||||
.match = INSN_MATCH_CSRRS,
|
||||
.func = csr_insn,
|
||||
},
|
||||
{
|
||||
.mask = INSN_MASK_CSRRC,
|
||||
.match = INSN_MATCH_CSRRC,
|
||||
.func = csr_insn,
|
||||
},
|
||||
{
|
||||
.mask = INSN_MASK_CSRRWI,
|
||||
.match = INSN_MATCH_CSRRWI,
|
||||
.func = csr_insn,
|
||||
},
|
||||
{
|
||||
.mask = INSN_MASK_CSRRSI,
|
||||
.match = INSN_MATCH_CSRRSI,
|
||||
.func = csr_insn,
|
||||
},
|
||||
{
|
||||
.mask = INSN_MASK_CSRRCI,
|
||||
.match = INSN_MATCH_CSRRCI,
|
||||
.func = csr_insn,
|
||||
},
|
||||
{
|
||||
.mask = INSN_MASK_WFI,
|
||||
.match = INSN_MATCH_WFI,
|
||||
.func = wfi_insn,
|
||||
},
|
||||
};
|
||||
|
||||
static int system_opcode_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
ulong insn)
|
||||
{
|
||||
int i, rc = KVM_INSN_ILLEGAL_TRAP;
|
||||
const struct insn_func *ifn;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(system_opcode_funcs); i++) {
|
||||
ifn = &system_opcode_funcs[i];
|
||||
if ((insn & ifn->mask) == ifn->match) {
|
||||
rc = ifn->func(vcpu, run, insn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (rc) {
|
||||
case KVM_INSN_ILLEGAL_TRAP:
|
||||
return truly_illegal_insn(vcpu, run, insn);
|
||||
case KVM_INSN_VIRTUAL_TRAP:
|
||||
return truly_virtual_insn(vcpu, run, insn);
|
||||
case KVM_INSN_CONTINUE_NEXT_SEPC:
|
||||
vcpu->arch.guest_context.sepc += INSN_LEN(insn);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return (rc <= 0) ? rc : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_virtual_insn -- Handle virtual instruction trap
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
* @run: The VCPU run struct containing the mmio data
|
||||
* @trap: Trap details
|
||||
*
|
||||
* Returns > 0 to continue run-loop
|
||||
* Returns 0 to exit run-loop and handle in user-space.
|
||||
* Returns < 0 to report failure and exit run-loop
|
||||
*/
|
||||
int kvm_riscv_vcpu_virtual_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
struct kvm_cpu_trap *trap)
|
||||
{
|
||||
unsigned long insn = trap->stval;
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
struct kvm_cpu_context *ct;
|
||||
|
||||
if (unlikely(INSN_IS_16BIT(insn))) {
|
||||
if (insn == 0) {
|
||||
ct = &vcpu->arch.guest_context;
|
||||
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true,
|
||||
ct->sepc,
|
||||
&utrap);
|
||||
if (utrap.scause) {
|
||||
utrap.sepc = ct->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (INSN_IS_16BIT(insn))
|
||||
return truly_illegal_insn(vcpu, run, insn);
|
||||
}
|
||||
|
||||
switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) {
|
||||
case INSN_OPCODE_SYSTEM:
|
||||
return system_opcode_insn(vcpu, run, insn);
|
||||
default:
|
||||
return truly_illegal_insn(vcpu, run, insn);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_mmio_load -- Emulate MMIO load instruction
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
* @run: The VCPU run struct containing the mmio data
|
||||
* @fault_addr: Guest physical address to load
|
||||
* @htinst: Transformed encoding of the load instruction
|
||||
*
|
||||
* Returns > 0 to continue run-loop
|
||||
* Returns 0 to exit run-loop and handle in user-space.
|
||||
* Returns < 0 to report failure and exit run-loop
|
||||
*/
|
||||
int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
unsigned long fault_addr,
|
||||
unsigned long htinst)
|
||||
{
|
||||
u8 data_buf[8];
|
||||
unsigned long insn;
|
||||
int shift = 0, len = 0, insn_len = 0;
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
|
||||
|
||||
/* Determine trapped instruction */
|
||||
if (htinst & 0x1) {
|
||||
/*
|
||||
* Bit[0] == 1 implies trapped instruction value is
|
||||
* transformed instruction or custom instruction.
|
||||
*/
|
||||
insn = htinst | INSN_16BIT_MASK;
|
||||
insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
|
||||
} else {
|
||||
/*
|
||||
* Bit[0] == 0 implies trapped instruction value is
|
||||
* zero or special value.
|
||||
*/
|
||||
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
|
||||
&utrap);
|
||||
if (utrap.scause) {
|
||||
/* Redirect trap if we failed to read instruction */
|
||||
utrap.sepc = ct->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
return 1;
|
||||
}
|
||||
insn_len = INSN_LEN(insn);
|
||||
}
|
||||
|
||||
/* Decode length of MMIO and shift */
|
||||
if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
|
||||
len = 4;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LB) == INSN_MATCH_LB) {
|
||||
len = 1;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) {
|
||||
len = 1;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) {
|
||||
len = 8;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) {
|
||||
len = 4;
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) {
|
||||
len = 2;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) {
|
||||
len = 2;
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) {
|
||||
len = 8;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
insn = RVC_RS2S(insn) << SH_RD;
|
||||
} else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 8;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) {
|
||||
len = 4;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
insn = RVC_RS2S(insn) << SH_RD;
|
||||
} else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 4;
|
||||
shift = 8 * (sizeof(ulong) - len);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Fault address should be aligned to length of MMIO */
|
||||
if (fault_addr & (len - 1))
|
||||
return -EIO;
|
||||
|
||||
/* Save instruction decode info */
|
||||
vcpu->arch.mmio_decode.insn = insn;
|
||||
vcpu->arch.mmio_decode.insn_len = insn_len;
|
||||
vcpu->arch.mmio_decode.shift = shift;
|
||||
vcpu->arch.mmio_decode.len = len;
|
||||
vcpu->arch.mmio_decode.return_handled = 0;
|
||||
|
||||
/* Update MMIO details in kvm_run struct */
|
||||
run->mmio.is_write = false;
|
||||
run->mmio.phys_addr = fault_addr;
|
||||
run->mmio.len = len;
|
||||
|
||||
/* Try to handle MMIO access in the kernel */
|
||||
if (!kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_addr, len, data_buf)) {
|
||||
/* Successfully handled MMIO access in the kernel so resume */
|
||||
memcpy(run->mmio.data, data_buf, len);
|
||||
vcpu->stat.mmio_exit_kernel++;
|
||||
kvm_riscv_vcpu_mmio_return(vcpu, run);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Exit to userspace for MMIO emulation */
|
||||
vcpu->stat.mmio_exit_user++;
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_mmio_store -- Emulate MMIO store instruction
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
* @run: The VCPU run struct containing the mmio data
|
||||
* @fault_addr: Guest physical address to store
|
||||
* @htinst: Transformed encoding of the store instruction
|
||||
*
|
||||
* Returns > 0 to continue run-loop
|
||||
* Returns 0 to exit run-loop and handle in user-space.
|
||||
* Returns < 0 to report failure and exit run-loop
|
||||
*/
|
||||
int kvm_riscv_vcpu_mmio_store(struct kvm_vcpu *vcpu, struct kvm_run *run,
|
||||
unsigned long fault_addr,
|
||||
unsigned long htinst)
|
||||
{
|
||||
u8 data8;
|
||||
u16 data16;
|
||||
u32 data32;
|
||||
u64 data64;
|
||||
ulong data;
|
||||
unsigned long insn;
|
||||
int len = 0, insn_len = 0;
|
||||
struct kvm_cpu_trap utrap = { 0 };
|
||||
struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
|
||||
|
||||
/* Determine trapped instruction */
|
||||
if (htinst & 0x1) {
|
||||
/*
|
||||
* Bit[0] == 1 implies trapped instruction value is
|
||||
* transformed instruction or custom instruction.
|
||||
*/
|
||||
insn = htinst | INSN_16BIT_MASK;
|
||||
insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
|
||||
} else {
|
||||
/*
|
||||
* Bit[0] == 0 implies trapped instruction value is
|
||||
* zero or special value.
|
||||
*/
|
||||
insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
|
||||
&utrap);
|
||||
if (utrap.scause) {
|
||||
/* Redirect trap if we failed to read instruction */
|
||||
utrap.sepc = ct->sepc;
|
||||
kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
|
||||
return 1;
|
||||
}
|
||||
insn_len = INSN_LEN(insn);
|
||||
}
|
||||
|
||||
data = GET_RS2(insn, &vcpu->arch.guest_context);
|
||||
data8 = data16 = data32 = data64 = data;
|
||||
|
||||
if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) {
|
||||
len = 4;
|
||||
} else if ((insn & INSN_MASK_SB) == INSN_MATCH_SB) {
|
||||
len = 1;
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
|
||||
len = 8;
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
|
||||
len = 2;
|
||||
#ifdef CONFIG_64BIT
|
||||
} else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) {
|
||||
len = 8;
|
||||
data64 = GET_RS2S(insn, &vcpu->arch.guest_context);
|
||||
} else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 8;
|
||||
data64 = GET_RS2C(insn, &vcpu->arch.guest_context);
|
||||
#endif
|
||||
} else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) {
|
||||
len = 4;
|
||||
data32 = GET_RS2S(insn, &vcpu->arch.guest_context);
|
||||
} else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP &&
|
||||
((insn >> SH_RD) & 0x1f)) {
|
||||
len = 4;
|
||||
data32 = GET_RS2C(insn, &vcpu->arch.guest_context);
|
||||
} else {
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Fault address should be aligned to length of MMIO */
|
||||
if (fault_addr & (len - 1))
|
||||
return -EIO;
|
||||
|
||||
/* Save instruction decode info */
|
||||
vcpu->arch.mmio_decode.insn = insn;
|
||||
vcpu->arch.mmio_decode.insn_len = insn_len;
|
||||
vcpu->arch.mmio_decode.shift = 0;
|
||||
vcpu->arch.mmio_decode.len = len;
|
||||
vcpu->arch.mmio_decode.return_handled = 0;
|
||||
|
||||
/* Copy data to kvm_run instance */
|
||||
switch (len) {
|
||||
case 1:
|
||||
*((u8 *)run->mmio.data) = data8;
|
||||
break;
|
||||
case 2:
|
||||
*((u16 *)run->mmio.data) = data16;
|
||||
break;
|
||||
case 4:
|
||||
*((u32 *)run->mmio.data) = data32;
|
||||
break;
|
||||
case 8:
|
||||
*((u64 *)run->mmio.data) = data64;
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* Update MMIO details in kvm_run struct */
|
||||
run->mmio.is_write = true;
|
||||
run->mmio.phys_addr = fault_addr;
|
||||
run->mmio.len = len;
|
||||
|
||||
/* Try to handle MMIO access in the kernel */
|
||||
if (!kvm_io_bus_write(vcpu, KVM_MMIO_BUS,
|
||||
fault_addr, len, run->mmio.data)) {
|
||||
/* Successfully handled MMIO access in the kernel so resume */
|
||||
vcpu->stat.mmio_exit_kernel++;
|
||||
kvm_riscv_vcpu_mmio_return(vcpu, run);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Exit to userspace for MMIO emulation */
|
||||
vcpu->stat.mmio_exit_user++;
|
||||
run->exit_reason = KVM_EXIT_MMIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation
|
||||
* or in-kernel IO emulation
|
||||
*
|
||||
* @vcpu: The VCPU pointer
|
||||
* @run: The VCPU run struct containing the mmio data
|
||||
*/
|
||||
int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
|
||||
{
|
||||
u8 data8;
|
||||
u16 data16;
|
||||
u32 data32;
|
||||
u64 data64;
|
||||
ulong insn;
|
||||
int len, shift;
|
||||
|
||||
if (vcpu->arch.mmio_decode.return_handled)
|
||||
return 0;
|
||||
|
||||
vcpu->arch.mmio_decode.return_handled = 1;
|
||||
insn = vcpu->arch.mmio_decode.insn;
|
||||
|
||||
if (run->mmio.is_write)
|
||||
goto done;
|
||||
|
||||
len = vcpu->arch.mmio_decode.len;
|
||||
shift = vcpu->arch.mmio_decode.shift;
|
||||
|
||||
switch (len) {
|
||||
case 1:
|
||||
data8 = *((u8 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data8 << shift >> shift);
|
||||
break;
|
||||
case 2:
|
||||
data16 = *((u16 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data16 << shift >> shift);
|
||||
break;
|
||||
case 4:
|
||||
data32 = *((u32 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data32 << shift >> shift);
|
||||
break;
|
||||
case 8:
|
||||
data64 = *((u64 *)run->mmio.data);
|
||||
SET_RD(insn, &vcpu->arch.guest_context,
|
||||
(ulong)data64 << shift >> shift);
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
done:
|
||||
/* Move to next instruction */
|
||||
vcpu->arch.guest_context.sepc += vcpu->arch.mmio_decode.insn_len;
|
||||
|
||||
return 0;
|
||||
}
|
@ -214,12 +214,10 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
|
||||
#endif
|
||||
}
|
||||
|
||||
int kvm_riscv_guest_timer_init(struct kvm *kvm)
|
||||
void kvm_riscv_guest_timer_init(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_guest_timer *gt = &kvm->arch.timer;
|
||||
|
||||
riscv_cs_get_mult_shift(>->nsec_mult, >->nsec_shift);
|
||||
gt->time_delta = -get_cycles64();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -41,7 +41,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
return r;
|
||||
}
|
||||
|
||||
return kvm_riscv_guest_timer_init(kvm);
|
||||
kvm_riscv_guest_timer_init(kvm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
|
@ -41,6 +41,12 @@ void uv_query_info(void)
|
||||
uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
|
||||
uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id;
|
||||
uv_info.uv_feature_indications = uvcb.uv_feature_indications;
|
||||
uv_info.supp_se_hdr_ver = uvcb.supp_se_hdr_versions;
|
||||
uv_info.supp_se_hdr_pcf = uvcb.supp_se_hdr_pcf;
|
||||
uv_info.conf_dump_storage_state_len = uvcb.conf_dump_storage_state_len;
|
||||
uv_info.conf_dump_finalize_len = uvcb.conf_dump_finalize_len;
|
||||
uv_info.supp_att_req_hdr_ver = uvcb.supp_att_req_hdr_ver;
|
||||
uv_info.supp_att_pflags = uvcb.supp_att_pflags;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
|
||||
|
@ -12,10 +12,11 @@
|
||||
|
||||
#include <linux/bit_spinlock.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <asm/tpi.h>
|
||||
|
||||
struct airq_struct {
|
||||
struct hlist_node list; /* Handler queueing. */
|
||||
void (*handler)(struct airq_struct *airq, bool floating);
|
||||
void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
|
||||
u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
|
||||
u8 lsi_mask; /* Local-Summary-Indicator mask */
|
||||
u8 isc; /* Interrupt-subclass */
|
||||
@ -46,8 +47,10 @@ struct airq_iv {
|
||||
#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
|
||||
#define AIRQ_IV_DATA 8 /* Allocate the data array */
|
||||
#define AIRQ_IV_CACHELINE 16 /* Cacheline alignment for the vector */
|
||||
#define AIRQ_IV_GUESTVEC 32 /* Vector is a pinned guest page */
|
||||
|
||||
struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
|
||||
struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags,
|
||||
unsigned long *vec);
|
||||
void airq_iv_release(struct airq_iv *iv);
|
||||
unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
|
||||
void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
|
||||
|
@ -147,5 +147,42 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start,
|
||||
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
|
||||
unsigned long gaddr, unsigned long vmaddr);
|
||||
int gmap_mark_unmergeable(void);
|
||||
void s390_reset_acc(struct mm_struct *mm);
|
||||
void s390_unlist_old_asce(struct gmap *gmap);
|
||||
int s390_replace_asce(struct gmap *gmap);
|
||||
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
|
||||
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, bool interruptible);
|
||||
|
||||
/**
|
||||
* s390_uv_destroy_range - Destroy a range of pages in the given mm.
|
||||
* @mm: the mm on which to operate on
|
||||
* @start: the start of the range
|
||||
* @end: the end of the range
|
||||
*
|
||||
* This function will call cond_sched, so it should not generate stalls, but
|
||||
* it will otherwise only return when it completed.
|
||||
*/
|
||||
static inline void s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
(void)__s390_uv_destroy_range(mm, start, end, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* s390_uv_destroy_range_interruptible - Destroy a range of pages in the
|
||||
* given mm, but stop when a fatal signal is received.
|
||||
* @mm: the mm on which to operate on
|
||||
* @start: the start of the range
|
||||
* @end: the end of the range
|
||||
*
|
||||
* This function will call cond_sched, so it should not generate stalls. If
|
||||
* a fatal signal is received, it will return with -EINTR immediately,
|
||||
* without finishing destroying the whole range. Upon successful
|
||||
* completion, 0 is returned.
|
||||
*/
|
||||
static inline int s390_uv_destroy_range_interruptible(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
return __s390_uv_destroy_range(mm, start, end, true);
|
||||
}
|
||||
#endif /* _ASM_S390_GMAP_H */
|
||||
|
@ -19,6 +19,8 @@
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <asm/debug.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/fpu/api.h>
|
||||
@ -93,19 +95,30 @@ union ipte_control {
|
||||
};
|
||||
};
|
||||
|
||||
union sca_utility {
|
||||
__u16 val;
|
||||
struct {
|
||||
__u16 mtcr : 1;
|
||||
__u16 reserved : 15;
|
||||
};
|
||||
};
|
||||
|
||||
struct bsca_block {
|
||||
union ipte_control ipte_control;
|
||||
__u64 reserved[5];
|
||||
__u64 mcn;
|
||||
__u64 reserved2;
|
||||
union sca_utility utility;
|
||||
__u8 reserved2[6];
|
||||
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
|
||||
};
|
||||
|
||||
struct esca_block {
|
||||
union ipte_control ipte_control;
|
||||
__u64 reserved1[7];
|
||||
__u64 reserved1[6];
|
||||
union sca_utility utility;
|
||||
__u8 reserved2[6];
|
||||
__u64 mcn[4];
|
||||
__u64 reserved2[20];
|
||||
__u64 reserved3[20];
|
||||
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
|
||||
};
|
||||
|
||||
@ -249,12 +262,16 @@ struct kvm_s390_sie_block {
|
||||
#define ECB_SPECI 0x08
|
||||
#define ECB_SRSI 0x04
|
||||
#define ECB_HOSTPROTINT 0x02
|
||||
#define ECB_PTF 0x01
|
||||
__u8 ecb; /* 0x0061 */
|
||||
#define ECB2_CMMA 0x80
|
||||
#define ECB2_IEP 0x20
|
||||
#define ECB2_PFMFI 0x08
|
||||
#define ECB2_ESCA 0x04
|
||||
#define ECB2_ZPCI_LSI 0x02
|
||||
__u8 ecb2; /* 0x0062 */
|
||||
#define ECB3_AISI 0x20
|
||||
#define ECB3_AISII 0x10
|
||||
#define ECB3_DEA 0x08
|
||||
#define ECB3_AES 0x04
|
||||
#define ECB3_RI 0x01
|
||||
@ -759,6 +776,7 @@ struct kvm_vm_stat {
|
||||
u64 inject_pfault_done;
|
||||
u64 inject_service_signal;
|
||||
u64 inject_virtio;
|
||||
u64 aen_forward;
|
||||
};
|
||||
|
||||
struct kvm_arch_memory_slot {
|
||||
@ -923,6 +941,8 @@ struct kvm_s390_pv {
|
||||
u64 guest_len;
|
||||
unsigned long stor_base;
|
||||
void *stor_var;
|
||||
bool dumping;
|
||||
struct mmu_notifier mmu_notifier;
|
||||
};
|
||||
|
||||
struct kvm_arch{
|
||||
@ -939,6 +959,7 @@ struct kvm_arch{
|
||||
int use_cmma;
|
||||
int use_pfmfi;
|
||||
int use_skf;
|
||||
int use_zpci_interp;
|
||||
int user_cpu_state_ctrl;
|
||||
int user_sigp;
|
||||
int user_stsi;
|
||||
@ -962,6 +983,8 @@ struct kvm_arch{
|
||||
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
|
||||
struct kvm_s390_gisa_interrupt gisa_int;
|
||||
struct kvm_s390_pv pv;
|
||||
struct list_head kzdev_list;
|
||||
spinlock_t kzdev_list_lock;
|
||||
};
|
||||
|
||||
#define KVM_HVA_ERR_BAD (-1UL)
|
||||
@ -1012,4 +1035,19 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
|
||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
#define __KVM_HAVE_ARCH_VM_FREE
|
||||
void kvm_arch_free_vm(struct kvm *kvm);
|
||||
|
||||
#ifdef CONFIG_VFIO_PCI_ZDEV_KVM
|
||||
int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm);
|
||||
void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev);
|
||||
#else
|
||||
static inline int kvm_s390_pci_register_kvm(struct zpci_dev *dev,
|
||||
struct kvm *kvm)
|
||||
{
|
||||
return -EPERM;
|
||||
}
|
||||
static inline void kvm_s390_pci_unregister_kvm(struct zpci_dev *dev) {}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -18,7 +18,7 @@ typedef struct {
|
||||
unsigned long asce_limit;
|
||||
unsigned long vdso_base;
|
||||
/* The mmu context belongs to a secure guest. */
|
||||
atomic_t is_protected;
|
||||
atomic_t protected_count;
|
||||
/*
|
||||
* The following bitfields need a down_write on the mm
|
||||
* semaphore when they are written to. As they are only
|
||||
|
@ -26,7 +26,7 @@ static inline int init_new_context(struct task_struct *tsk,
|
||||
INIT_LIST_HEAD(&mm->context.gmap_list);
|
||||
cpumask_clear(&mm->context.cpu_attach_mask);
|
||||
atomic_set(&mm->context.flush_count, 0);
|
||||
atomic_set(&mm->context.is_protected, 0);
|
||||
atomic_set(&mm->context.protected_count, 0);
|
||||
mm->context.gmap_asce = 0;
|
||||
mm->context.flush_mm = 0;
|
||||
#ifdef CONFIG_PGSTE
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <asm-generic/pci.h>
|
||||
#include <asm/pci_clp.h>
|
||||
#include <asm/pci_debug.h>
|
||||
#include <asm/pci_insn.h>
|
||||
#include <asm/sclp.h>
|
||||
|
||||
#define PCIBIOS_MIN_IO 0x1000
|
||||
@ -97,6 +98,7 @@ struct zpci_bar_struct {
|
||||
};
|
||||
|
||||
struct s390_domain;
|
||||
struct kvm_zdev;
|
||||
|
||||
#define ZPCI_FUNCTIONS_PER_BUS 256
|
||||
struct zpci_bus {
|
||||
@ -123,11 +125,14 @@ struct zpci_dev {
|
||||
enum zpci_state state;
|
||||
u32 fid; /* function ID, used by sclp */
|
||||
u32 fh; /* function handle, used by insn's */
|
||||
u32 gisa; /* GISA designation for passthrough */
|
||||
u16 vfn; /* virtual function number */
|
||||
u16 pchid; /* physical channel ID */
|
||||
u16 maxstbl; /* Maximum store block size */
|
||||
u8 pfgid; /* function group ID */
|
||||
u8 pft; /* pci function type */
|
||||
u8 port;
|
||||
u8 dtsm; /* Supported DT mask */
|
||||
u8 rid_available : 1;
|
||||
u8 has_hp_slot : 1;
|
||||
u8 has_resources : 1;
|
||||
@ -186,7 +191,10 @@ struct zpci_dev {
|
||||
|
||||
struct dentry *debugfs_dev;
|
||||
|
||||
/* IOMMU and passthrough */
|
||||
struct s390_domain *s390_domain; /* s390 IOMMU domain data */
|
||||
struct kvm_zdev *kzdev;
|
||||
struct mutex kzdev_lock;
|
||||
};
|
||||
|
||||
static inline bool zdev_enabled(struct zpci_dev *zdev)
|
||||
@ -198,6 +206,9 @@ extern const struct attribute_group *zpci_attr_groups[];
|
||||
extern unsigned int s390_pci_force_floating __initdata;
|
||||
extern unsigned int s390_pci_no_rid;
|
||||
|
||||
extern union zpci_sic_iib *zpci_aipb;
|
||||
extern struct airq_iv *zpci_aif_sbv;
|
||||
|
||||
/* -----------------------------------------------------------------------------
|
||||
Prototypes
|
||||
----------------------------------------------------------------------------- */
|
||||
|
@ -153,9 +153,11 @@ struct clp_rsp_query_pci_grp {
|
||||
u8 : 6;
|
||||
u8 frame : 1;
|
||||
u8 refresh : 1; /* TLB refresh mode */
|
||||
u16 reserved2;
|
||||
u16 : 3;
|
||||
u16 maxstbl : 13; /* Maximum store block size */
|
||||
u16 mui;
|
||||
u16 : 16;
|
||||
u8 dtsm; /* Supported DT mask */
|
||||
u8 reserved3;
|
||||
u16 maxfaal;
|
||||
u16 : 4;
|
||||
u16 dnoi : 12;
|
||||
@ -173,7 +175,8 @@ struct clp_req_set_pci {
|
||||
u16 reserved2;
|
||||
u8 oc; /* operation controls */
|
||||
u8 ndas; /* number of dma spaces */
|
||||
u64 reserved3;
|
||||
u32 reserved3;
|
||||
u32 gisa; /* GISA designation */
|
||||
} __packed;
|
||||
|
||||
/* Set PCI function response */
|
||||
|
@ -98,6 +98,15 @@ struct zpci_fib {
|
||||
u32 gd;
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Set Interruption Controls Operation Controls */
|
||||
#define SIC_IRQ_MODE_ALL 0
|
||||
#define SIC_IRQ_MODE_SINGLE 1
|
||||
#define SIC_SET_AENI_CONTROLS 2
|
||||
#define SIC_IRQ_MODE_DIRECT 4
|
||||
#define SIC_IRQ_MODE_D_ALL 16
|
||||
#define SIC_IRQ_MODE_D_SINGLE 17
|
||||
#define SIC_IRQ_MODE_SET_CPU 18
|
||||
|
||||
/* directed interruption information block */
|
||||
struct zpci_diib {
|
||||
u32 : 1;
|
||||
@ -119,9 +128,20 @@ struct zpci_cdiib {
|
||||
u64 : 64;
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* adapter interruption parameters block */
|
||||
struct zpci_aipb {
|
||||
u64 faisb;
|
||||
u64 gait;
|
||||
u16 : 13;
|
||||
u16 afi : 3;
|
||||
u32 : 32;
|
||||
u16 faal;
|
||||
} __packed __aligned(8);
|
||||
|
||||
union zpci_sic_iib {
|
||||
struct zpci_diib diib;
|
||||
struct zpci_cdiib cdiib;
|
||||
struct zpci_aipb aipb;
|
||||
};
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(have_mio);
|
||||
@ -134,13 +154,6 @@ int __zpci_store(u64 data, u64 req, u64 offset);
|
||||
int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
|
||||
int __zpci_store_block(const u64 *data, u64 req, u64 offset);
|
||||
void zpci_barrier(void);
|
||||
int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
|
||||
|
||||
static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
|
||||
{
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
|
||||
return __zpci_set_irq_ctrl(ctl, isc, &iib);
|
||||
}
|
||||
int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
|
||||
|
||||
#endif
|
||||
|
@ -525,7 +525,7 @@ static inline int mm_has_pgste(struct mm_struct *mm)
|
||||
static inline int mm_is_protected(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_PGSTE
|
||||
if (unlikely(atomic_read(&mm->context.is_protected)))
|
||||
if (unlikely(atomic_read(&mm->context.protected_count)))
|
||||
return 1;
|
||||
#endif
|
||||
return 0;
|
||||
@ -1182,9 +1182,22 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
|
||||
} else {
|
||||
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
|
||||
}
|
||||
/* At this point the reference through the mapping is still present */
|
||||
if (mm_is_protected(mm) && pte_present(res))
|
||||
uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
|
||||
/* Nothing to do */
|
||||
if (!mm_is_protected(mm) || !pte_present(res))
|
||||
return res;
|
||||
/*
|
||||
* At this point the reference through the mapping is still present.
|
||||
* The notifier should have destroyed all protected vCPUs at this
|
||||
* point, so the destroy should be successful.
|
||||
*/
|
||||
if (full && !uv_destroy_owned_page(pte_val(res) & PAGE_MASK))
|
||||
return res;
|
||||
/*
|
||||
* If something went wrong and the page could not be destroyed, or
|
||||
* if this is not a mm teardown, the slower export is used as
|
||||
* fallback instead.
|
||||
*/
|
||||
uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -88,6 +88,10 @@ struct sclp_info {
|
||||
unsigned char has_sipl : 1;
|
||||
unsigned char has_dirq : 1;
|
||||
unsigned char has_iplcc : 1;
|
||||
unsigned char has_zpci_lsi : 1;
|
||||
unsigned char has_aisii : 1;
|
||||
unsigned char has_aeni : 1;
|
||||
unsigned char has_aisi : 1;
|
||||
unsigned int ibc;
|
||||
unsigned int mtid;
|
||||
unsigned int mtid_cp;
|
||||
|
@ -19,6 +19,19 @@ struct tpi_info {
|
||||
u32 :12;
|
||||
} __packed __aligned(4);
|
||||
|
||||
/* I/O-Interruption Code as stored by TPI for an Adapter I/O */
|
||||
struct tpi_adapter_info {
|
||||
u32 aism:8;
|
||||
u32 :22;
|
||||
u32 error:1;
|
||||
u32 forward:1;
|
||||
u32 reserved;
|
||||
u32 adapter_IO:1;
|
||||
u32 directed_irq:1;
|
||||
u32 isc:3;
|
||||
u32 :27;
|
||||
} __packed __aligned(4);
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_S390_TPI_H */
|
||||
|
@ -50,6 +50,10 @@
|
||||
#define UVC_CMD_SET_UNSHARE_ALL 0x0340
|
||||
#define UVC_CMD_PIN_PAGE_SHARED 0x0341
|
||||
#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342
|
||||
#define UVC_CMD_DUMP_INIT 0x0400
|
||||
#define UVC_CMD_DUMP_CONF_STOR_STATE 0x0401
|
||||
#define UVC_CMD_DUMP_CPU 0x0402
|
||||
#define UVC_CMD_DUMP_COMPLETE 0x0403
|
||||
#define UVC_CMD_SET_SHARED_ACCESS 0x1000
|
||||
#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
|
||||
#define UVC_CMD_RETR_ATTEST 0x1020
|
||||
@ -77,6 +81,10 @@ enum uv_cmds_inst {
|
||||
BIT_UVC_CMD_UNSHARE_ALL = 20,
|
||||
BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
|
||||
BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
|
||||
BIT_UVC_CMD_DUMP_INIT = 24,
|
||||
BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25,
|
||||
BIT_UVC_CMD_DUMP_CPU = 26,
|
||||
BIT_UVC_CMD_DUMP_COMPLETE = 27,
|
||||
BIT_UVC_CMD_RETR_ATTEST = 28,
|
||||
};
|
||||
|
||||
@ -110,7 +118,16 @@ struct uv_cb_qui {
|
||||
u8 reserved88[158 - 136]; /* 0x0088 */
|
||||
u16 max_guest_cpu_id; /* 0x009e */
|
||||
u64 uv_feature_indications; /* 0x00a0 */
|
||||
u8 reserveda8[200 - 168]; /* 0x00a8 */
|
||||
u64 reserveda8; /* 0x00a8 */
|
||||
u64 supp_se_hdr_versions; /* 0x00b0 */
|
||||
u64 supp_se_hdr_pcf; /* 0x00b8 */
|
||||
u64 reservedc0; /* 0x00c0 */
|
||||
u64 conf_dump_storage_state_len; /* 0x00c8 */
|
||||
u64 conf_dump_finalize_len; /* 0x00d0 */
|
||||
u64 reservedd8; /* 0x00d8 */
|
||||
u64 supp_att_req_hdr_ver; /* 0x00e0 */
|
||||
u64 supp_att_pflags; /* 0x00e8 */
|
||||
u8 reservedf0[256 - 240]; /* 0x00f0 */
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Initialize Ultravisor */
|
||||
@ -240,6 +257,31 @@ struct uv_cb_attest {
|
||||
u64 reserved168[4]; /* 0x0168 */
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct uv_cb_dump_cpu {
|
||||
struct uv_cb_header header;
|
||||
u64 reserved08[2];
|
||||
u64 cpu_handle;
|
||||
u64 dump_area_origin;
|
||||
u64 reserved28[5];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct uv_cb_dump_stor_state {
|
||||
struct uv_cb_header header;
|
||||
u64 reserved08[2];
|
||||
u64 config_handle;
|
||||
u64 dump_area_origin;
|
||||
u64 gaddr;
|
||||
u64 reserved28[4];
|
||||
} __packed __aligned(8);
|
||||
|
||||
struct uv_cb_dump_complete {
|
||||
struct uv_cb_header header;
|
||||
u64 reserved08[2];
|
||||
u64 config_handle;
|
||||
u64 dump_area_origin;
|
||||
u64 reserved30[5];
|
||||
} __packed __aligned(8);
|
||||
|
||||
static inline int __uv_call(unsigned long r1, unsigned long r2)
|
||||
{
|
||||
int cc;
|
||||
@ -307,6 +349,12 @@ struct uv_info {
|
||||
unsigned int max_num_sec_conf;
|
||||
unsigned short max_guest_cpu_id;
|
||||
unsigned long uv_feature_indications;
|
||||
unsigned long supp_se_hdr_ver;
|
||||
unsigned long supp_se_hdr_pcf;
|
||||
unsigned long conf_dump_storage_state_len;
|
||||
unsigned long conf_dump_finalize_len;
|
||||
unsigned long supp_att_req_hdr_ver;
|
||||
unsigned long supp_att_pflags;
|
||||
};
|
||||
|
||||
extern struct uv_info uv_info;
|
||||
@ -378,6 +426,7 @@ static inline int is_prot_virt_host(void)
|
||||
}
|
||||
|
||||
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
|
||||
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
|
||||
int uv_destroy_owned_page(unsigned long paddr);
|
||||
int uv_convert_from_secure(unsigned long paddr);
|
||||
int uv_convert_owned_from_secure(unsigned long paddr);
|
||||
|
@ -74,6 +74,7 @@ struct kvm_s390_io_adapter_req {
|
||||
#define KVM_S390_VM_CRYPTO 2
|
||||
#define KVM_S390_VM_CPU_MODEL 3
|
||||
#define KVM_S390_VM_MIGRATION 4
|
||||
#define KVM_S390_VM_CPU_TOPOLOGY 5
|
||||
|
||||
/* kvm attributes for mem_ctrl */
|
||||
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
|
||||
|
@ -234,6 +234,32 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
|
||||
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* should_export_before_import - Determine whether an export is needed
|
||||
* before an import-like operation
|
||||
* @uvcb: the Ultravisor control block of the UVC to be performed
|
||||
* @mm: the mm of the process
|
||||
*
|
||||
* Returns whether an export is needed before every import-like operation.
|
||||
* This is needed for shared pages, which don't trigger a secure storage
|
||||
* exception when accessed from a different guest.
|
||||
*
|
||||
* Although considered as one, the Unpin Page UVC is not an actual import,
|
||||
* so it is not affected.
|
||||
*
|
||||
* No export is needed also when there is only one protected VM, because the
|
||||
* page cannot belong to the wrong VM in that case (there is no "other VM"
|
||||
* it can belong to).
|
||||
*
|
||||
* Return: true if an export is needed before every import, otherwise false.
|
||||
*/
|
||||
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
|
||||
{
|
||||
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
|
||||
return false;
|
||||
return atomic_read(&mm->context.protected_count) > 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Requests the Ultravisor to make a page accessible to a guest.
|
||||
* If it's brought in the first time, it will be cleared. If
|
||||
@ -277,6 +303,8 @@ again:
|
||||
|
||||
lock_page(page);
|
||||
ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
|
||||
if (should_export_before_import(uvcb, gmap->mm))
|
||||
uv_convert_from_secure(page_to_phys(page));
|
||||
rc = make_secure_pte(ptep, uaddr, page, uvcb);
|
||||
pte_unmap_unlock(ptep, ptelock);
|
||||
unlock_page(page);
|
||||
@ -334,6 +362,61 @@ int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
|
||||
|
||||
/**
|
||||
* gmap_destroy_page - Destroy a guest page.
|
||||
* @gmap: the gmap of the guest
|
||||
* @gaddr: the guest address to destroy
|
||||
*
|
||||
* An attempt will be made to destroy the given guest page. If the attempt
|
||||
* fails, an attempt is made to export the page. If both attempts fail, an
|
||||
* appropriate error is returned.
|
||||
*/
|
||||
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long uaddr;
|
||||
struct page *page;
|
||||
int rc;
|
||||
|
||||
rc = -EFAULT;
|
||||
mmap_read_lock(gmap->mm);
|
||||
|
||||
uaddr = __gmap_translate(gmap, gaddr);
|
||||
if (IS_ERR_VALUE(uaddr))
|
||||
goto out;
|
||||
vma = vma_lookup(gmap->mm, uaddr);
|
||||
if (!vma)
|
||||
goto out;
|
||||
/*
|
||||
* Huge pages should not be able to become secure
|
||||
*/
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
goto out;
|
||||
|
||||
rc = 0;
|
||||
/* we take an extra reference here */
|
||||
page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
|
||||
if (IS_ERR_OR_NULL(page))
|
||||
goto out;
|
||||
rc = uv_destroy_owned_page(page_to_phys(page));
|
||||
/*
|
||||
* Fault handlers can race; it is possible that two CPUs will fault
|
||||
* on the same secure page. One CPU can destroy the page, reboot,
|
||||
* re-enter secure mode and import it, while the second CPU was
|
||||
* stuck at the beginning of the handler. At some point the second
|
||||
* CPU will be able to progress, and it will not be able to destroy
|
||||
* the page. In that case we do not want to terminate the process,
|
||||
* we instead try to export the page.
|
||||
*/
|
||||
if (rc)
|
||||
rc = uv_convert_owned_from_secure(page_to_phys(page));
|
||||
put_page(page);
|
||||
out:
|
||||
mmap_read_unlock(gmap->mm);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(gmap_destroy_page);
|
||||
|
||||
/*
|
||||
* To be called with the page locked or with an extra reference! This will
|
||||
* prevent gmap_make_secure from touching the page concurrently. Having 2
|
||||
@ -392,6 +475,54 @@ static ssize_t uv_query_facilities(struct kobject *kobj,
|
||||
static struct kobj_attribute uv_query_facilities_attr =
|
||||
__ATTR(facilities, 0444, uv_query_facilities, NULL);
|
||||
|
||||
static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_supp_se_hdr_ver_attr =
|
||||
__ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL);
|
||||
|
||||
static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
|
||||
__ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
|
||||
|
||||
static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "%lx\n",
|
||||
uv_info.guest_cpu_stor_len);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_dump_cpu_len_attr =
|
||||
__ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
|
||||
|
||||
static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "%lx\n",
|
||||
uv_info.conf_dump_storage_state_len);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_dump_storage_state_len_attr =
|
||||
__ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
|
||||
|
||||
static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "%lx\n",
|
||||
uv_info.conf_dump_finalize_len);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_dump_finalize_len_attr =
|
||||
__ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL);
|
||||
|
||||
static ssize_t uv_query_feature_indications(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
@ -431,12 +562,37 @@ static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
|
||||
static struct kobj_attribute uv_query_max_guest_addr_attr =
|
||||
__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
|
||||
|
||||
static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_req_hdr_ver);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
|
||||
__ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
|
||||
|
||||
static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.supp_att_pflags);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_supp_att_pflags_attr =
|
||||
__ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
|
||||
|
||||
static struct attribute *uv_query_attrs[] = {
|
||||
&uv_query_facilities_attr.attr,
|
||||
&uv_query_feature_indications_attr.attr,
|
||||
&uv_query_max_guest_cpus_attr.attr,
|
||||
&uv_query_max_guest_vms_attr.attr,
|
||||
&uv_query_max_guest_addr_attr.attr,
|
||||
&uv_query_supp_se_hdr_ver_attr.attr,
|
||||
&uv_query_supp_se_hdr_pcf_attr.attr,
|
||||
&uv_query_dump_storage_state_len_attr.attr,
|
||||
&uv_query_dump_finalize_len_attr.attr,
|
||||
&uv_query_dump_cpu_len_attr.attr,
|
||||
&uv_query_supp_att_req_hdr_ver_attr.attr,
|
||||
&uv_query_supp_att_pflags_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -34,6 +34,7 @@ config KVM
|
||||
select SRCU
|
||||
select KVM_VFIO
|
||||
select INTERVAL_TREE
|
||||
select MMU_NOTIFIER
|
||||
help
|
||||
Support hosting paravirtualized guest machines using the SIE
|
||||
virtualization capability on the mainframe. This should work
|
||||
|
@ -10,4 +10,5 @@ ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
|
||||
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
|
||||
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
|
||||
|
||||
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
|
||||
obj-$(CONFIG_KVM) += kvm.o
|
||||
|
@ -262,77 +262,77 @@ struct aste {
|
||||
/* .. more fields there */
|
||||
};
|
||||
|
||||
int ipte_lock_held(struct kvm_vcpu *vcpu)
|
||||
int ipte_lock_held(struct kvm *kvm)
|
||||
{
|
||||
if (vcpu->arch.sie_block->eca & ECA_SII) {
|
||||
if (sclp.has_siif) {
|
||||
int rc;
|
||||
|
||||
read_lock(&vcpu->kvm->arch.sca_lock);
|
||||
rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
return rc;
|
||||
}
|
||||
return vcpu->kvm->arch.ipte_lock_count != 0;
|
||||
return kvm->arch.ipte_lock_count != 0;
|
||||
}
|
||||
|
||||
static void ipte_lock_simple(struct kvm_vcpu *vcpu)
|
||||
static void ipte_lock_simple(struct kvm *kvm)
|
||||
{
|
||||
union ipte_control old, new, *ic;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.ipte_mutex);
|
||||
vcpu->kvm->arch.ipte_lock_count++;
|
||||
if (vcpu->kvm->arch.ipte_lock_count > 1)
|
||||
mutex_lock(&kvm->arch.ipte_mutex);
|
||||
kvm->arch.ipte_lock_count++;
|
||||
if (kvm->arch.ipte_lock_count > 1)
|
||||
goto out;
|
||||
retry:
|
||||
read_lock(&vcpu->kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(vcpu->kvm);
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(kvm);
|
||||
do {
|
||||
old = READ_ONCE(*ic);
|
||||
if (old.k) {
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
}
|
||||
new = old;
|
||||
new.k = 1;
|
||||
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
out:
|
||||
mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
|
||||
mutex_unlock(&kvm->arch.ipte_mutex);
|
||||
}
|
||||
|
||||
static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
|
||||
static void ipte_unlock_simple(struct kvm *kvm)
|
||||
{
|
||||
union ipte_control old, new, *ic;
|
||||
|
||||
mutex_lock(&vcpu->kvm->arch.ipte_mutex);
|
||||
vcpu->kvm->arch.ipte_lock_count--;
|
||||
if (vcpu->kvm->arch.ipte_lock_count)
|
||||
mutex_lock(&kvm->arch.ipte_mutex);
|
||||
kvm->arch.ipte_lock_count--;
|
||||
if (kvm->arch.ipte_lock_count)
|
||||
goto out;
|
||||
read_lock(&vcpu->kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(vcpu->kvm);
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(kvm);
|
||||
do {
|
||||
old = READ_ONCE(*ic);
|
||||
new = old;
|
||||
new.k = 0;
|
||||
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
wake_up(&vcpu->kvm->arch.ipte_wq);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
wake_up(&kvm->arch.ipte_wq);
|
||||
out:
|
||||
mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
|
||||
mutex_unlock(&kvm->arch.ipte_mutex);
|
||||
}
|
||||
|
||||
static void ipte_lock_siif(struct kvm_vcpu *vcpu)
|
||||
static void ipte_lock_siif(struct kvm *kvm)
|
||||
{
|
||||
union ipte_control old, new, *ic;
|
||||
|
||||
retry:
|
||||
read_lock(&vcpu->kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(vcpu->kvm);
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(kvm);
|
||||
do {
|
||||
old = READ_ONCE(*ic);
|
||||
if (old.kg) {
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
}
|
||||
@ -340,15 +340,15 @@ retry:
|
||||
new.k = 1;
|
||||
new.kh++;
|
||||
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
}
|
||||
|
||||
static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
|
||||
static void ipte_unlock_siif(struct kvm *kvm)
|
||||
{
|
||||
union ipte_control old, new, *ic;
|
||||
|
||||
read_lock(&vcpu->kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(vcpu->kvm);
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
ic = kvm_s390_get_ipte_control(kvm);
|
||||
do {
|
||||
old = READ_ONCE(*ic);
|
||||
new = old;
|
||||
@ -356,25 +356,25 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
|
||||
if (!new.kh)
|
||||
new.k = 0;
|
||||
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
|
||||
read_unlock(&vcpu->kvm->arch.sca_lock);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
if (!new.kh)
|
||||
wake_up(&vcpu->kvm->arch.ipte_wq);
|
||||
wake_up(&kvm->arch.ipte_wq);
|
||||
}
|
||||
|
||||
void ipte_lock(struct kvm_vcpu *vcpu)
|
||||
void ipte_lock(struct kvm *kvm)
|
||||
{
|
||||
if (vcpu->arch.sie_block->eca & ECA_SII)
|
||||
ipte_lock_siif(vcpu);
|
||||
if (sclp.has_siif)
|
||||
ipte_lock_siif(kvm);
|
||||
else
|
||||
ipte_lock_simple(vcpu);
|
||||
ipte_lock_simple(kvm);
|
||||
}
|
||||
|
||||
void ipte_unlock(struct kvm_vcpu *vcpu)
|
||||
void ipte_unlock(struct kvm *kvm)
|
||||
{
|
||||
if (vcpu->arch.sie_block->eca & ECA_SII)
|
||||
ipte_unlock_siif(vcpu);
|
||||
if (sclp.has_siif)
|
||||
ipte_unlock_siif(kvm);
|
||||
else
|
||||
ipte_unlock_simple(vcpu);
|
||||
ipte_unlock_simple(kvm);
|
||||
}
|
||||
|
||||
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
|
||||
@ -1086,7 +1086,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
|
||||
try_storage_prot_override = storage_prot_override_applicable(vcpu);
|
||||
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
|
||||
if (need_ipte_lock)
|
||||
ipte_lock(vcpu);
|
||||
ipte_lock(vcpu->kvm);
|
||||
/*
|
||||
* Since we do the access further down ultimately via a move instruction
|
||||
* that does key checking and returns an error in case of a protection
|
||||
@ -1127,7 +1127,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
|
||||
}
|
||||
out_unlock:
|
||||
if (need_ipte_lock)
|
||||
ipte_unlock(vcpu);
|
||||
ipte_unlock(vcpu->kvm);
|
||||
if (nr_pages > ARRAY_SIZE(gpa_array))
|
||||
vfree(gpas);
|
||||
return rc;
|
||||
@ -1199,10 +1199,10 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
|
||||
rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
|
||||
if (rc)
|
||||
return rc;
|
||||
ipte_lock(vcpu);
|
||||
ipte_lock(vcpu->kvm);
|
||||
rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
|
||||
access_key);
|
||||
ipte_unlock(vcpu);
|
||||
ipte_unlock(vcpu->kvm);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -1465,7 +1465,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
|
||||
* tables/pointers we read stay valid - unshadowing is however
|
||||
* always possible - only guest_table_lock protects us.
|
||||
*/
|
||||
ipte_lock(vcpu);
|
||||
ipte_lock(vcpu->kvm);
|
||||
|
||||
rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
|
||||
if (rc)
|
||||
@ -1499,7 +1499,7 @@ shadow_page:
|
||||
pte.p |= dat_protection;
|
||||
if (!rc)
|
||||
rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
|
||||
ipte_unlock(vcpu);
|
||||
ipte_unlock(vcpu->kvm);
|
||||
mmap_read_unlock(sg->mm);
|
||||
return rc;
|
||||
}
|
||||
|
@ -440,9 +440,9 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
|
||||
return access_guest_real(vcpu, gra, data, len, 0);
|
||||
}
|
||||
|
||||
void ipte_lock(struct kvm_vcpu *vcpu);
|
||||
void ipte_unlock(struct kvm_vcpu *vcpu);
|
||||
int ipte_lock_held(struct kvm_vcpu *vcpu);
|
||||
void ipte_lock(struct kvm *kvm);
|
||||
void ipte_unlock(struct kvm *kvm);
|
||||
int ipte_lock_held(struct kvm *kvm);
|
||||
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
|
||||
|
||||
/* MVPG PEI indication bits */
|
||||
|
@ -528,12 +528,27 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
|
||||
|
||||
static int handle_pv_notification(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (vcpu->arch.sie_block->ipa == 0xb210)
|
||||
return handle_pv_spx(vcpu);
|
||||
if (vcpu->arch.sie_block->ipa == 0xb220)
|
||||
return handle_pv_sclp(vcpu);
|
||||
if (vcpu->arch.sie_block->ipa == 0xb9a4)
|
||||
return handle_pv_uvc(vcpu);
|
||||
if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
|
||||
/*
|
||||
* Besides external call, other SIGP orders also cause a
|
||||
* 108 (pv notify) intercept. In contrast to external call,
|
||||
* these orders need to be emulated and hence the appropriate
|
||||
* place to handle them is in handle_instruction().
|
||||
* So first try kvm_s390_handle_sigp_pei() and if that isn't
|
||||
* successful, go on with handle_instruction().
|
||||
*/
|
||||
ret = kvm_s390_handle_sigp_pei(vcpu);
|
||||
if (!ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return handle_instruction(vcpu);
|
||||
}
|
||||
|
@ -28,9 +28,11 @@
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/airq.h>
|
||||
#include <asm/tpi.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
#include "trace-s390.h"
|
||||
#include "pci.h"
|
||||
|
||||
#define PFAULT_INIT 0x0600
|
||||
#define PFAULT_DONE 0x0680
|
||||
@ -702,7 +704,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
|
||||
/*
|
||||
* We indicate floating repressible conditions along with
|
||||
* other pending conditions. Channel Report Pending and Channel
|
||||
* Subsystem damage are the only two and and are indicated by
|
||||
* Subsystem damage are the only two and are indicated by
|
||||
* bits in mcic and masked in cr14.
|
||||
*/
|
||||
if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
|
||||
@ -3311,10 +3313,87 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
|
||||
|
||||
static void gib_alert_irq_handler(struct airq_struct *airq, bool floating)
|
||||
static void aen_host_forward(unsigned long si)
|
||||
{
|
||||
struct kvm_s390_gisa_interrupt *gi;
|
||||
struct zpci_gaite *gaite;
|
||||
struct kvm *kvm;
|
||||
|
||||
gaite = (struct zpci_gaite *)aift->gait +
|
||||
(si * sizeof(struct zpci_gaite));
|
||||
if (gaite->count == 0)
|
||||
return;
|
||||
if (gaite->aisb != 0)
|
||||
set_bit_inv(gaite->aisbo, (unsigned long *)gaite->aisb);
|
||||
|
||||
kvm = kvm_s390_pci_si_to_kvm(aift, si);
|
||||
if (!kvm)
|
||||
return;
|
||||
gi = &kvm->arch.gisa_int;
|
||||
|
||||
if (!(gi->origin->g1.simm & AIS_MODE_MASK(gaite->gisc)) ||
|
||||
!(gi->origin->g1.nimm & AIS_MODE_MASK(gaite->gisc))) {
|
||||
gisa_set_ipm_gisc(gi->origin, gaite->gisc);
|
||||
if (hrtimer_active(&gi->timer))
|
||||
hrtimer_cancel(&gi->timer);
|
||||
hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
|
||||
kvm->stat.aen_forward++;
|
||||
}
|
||||
}
|
||||
|
||||
static void aen_process_gait(u8 isc)
|
||||
{
|
||||
bool found = false, first = true;
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
unsigned long si, flags;
|
||||
|
||||
spin_lock_irqsave(&aift->gait_lock, flags);
|
||||
|
||||
if (!aift->gait) {
|
||||
spin_unlock_irqrestore(&aift->gait_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
for (si = 0;;) {
|
||||
/* Scan adapter summary indicator bit vector */
|
||||
si = airq_iv_scan(aift->sbv, si, airq_iv_end(aift->sbv));
|
||||
if (si == -1UL) {
|
||||
if (first || found) {
|
||||
/* Re-enable interrupts. */
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, isc,
|
||||
&iib);
|
||||
first = found = false;
|
||||
} else {
|
||||
/* Interrupts on and all bits processed */
|
||||
break;
|
||||
}
|
||||
found = false;
|
||||
si = 0;
|
||||
/* Scan again after re-enabling interrupts */
|
||||
continue;
|
||||
}
|
||||
found = true;
|
||||
aen_host_forward(si);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&aift->gait_lock, flags);
|
||||
}
|
||||
|
||||
static void gib_alert_irq_handler(struct airq_struct *airq,
|
||||
struct tpi_info *tpi_info)
|
||||
{
|
||||
struct tpi_adapter_info *info = (struct tpi_adapter_info *)tpi_info;
|
||||
|
||||
inc_irq_stat(IRQIO_GAL);
|
||||
process_gib_alert_list();
|
||||
|
||||
if ((info->forward || info->error) &&
|
||||
IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
|
||||
aen_process_gait(info->isc);
|
||||
if (info->aism != 0)
|
||||
process_gib_alert_list();
|
||||
} else {
|
||||
process_gib_alert_list();
|
||||
}
|
||||
}
|
||||
|
||||
static struct airq_struct gib_alert_irq = {
|
||||
@ -3326,6 +3405,11 @@ void kvm_s390_gib_destroy(void)
|
||||
{
|
||||
if (!gib)
|
||||
return;
|
||||
if (kvm_s390_pci_interp_allowed() && aift) {
|
||||
mutex_lock(&aift->aift_lock);
|
||||
kvm_s390_pci_aen_exit();
|
||||
mutex_unlock(&aift->aift_lock);
|
||||
}
|
||||
chsc_sgib(0);
|
||||
unregister_adapter_interrupt(&gib_alert_irq);
|
||||
free_page((unsigned long)gib);
|
||||
@ -3363,6 +3447,14 @@ int kvm_s390_gib_init(u8 nisc)
|
||||
goto out_unreg_gal;
|
||||
}
|
||||
|
||||
if (kvm_s390_pci_interp_allowed()) {
|
||||
if (kvm_s390_pci_aen_init(nisc)) {
|
||||
pr_err("Initializing AEN for PCI failed\n");
|
||||
rc = -EIO;
|
||||
goto out_unreg_gal;
|
||||
}
|
||||
}
|
||||
|
||||
KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
|
||||
goto out;
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/lowcore.h>
|
||||
@ -47,6 +48,7 @@
|
||||
#include <asm/fpu/api.h>
|
||||
#include "kvm-s390.h"
|
||||
#include "gaccess.h"
|
||||
#include "pci.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
@ -63,7 +65,8 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||
STATS_DESC_COUNTER(VM, inject_float_mchk),
|
||||
STATS_DESC_COUNTER(VM, inject_pfault_done),
|
||||
STATS_DESC_COUNTER(VM, inject_service_signal),
|
||||
STATS_DESC_COUNTER(VM, inject_virtio)
|
||||
STATS_DESC_COUNTER(VM, inject_virtio),
|
||||
STATS_DESC_COUNTER(VM, aen_forward)
|
||||
};
|
||||
|
||||
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||
@ -502,6 +505,14 @@ int kvm_arch_init(void *opaque)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (kvm_s390_pci_interp_allowed()) {
|
||||
rc = kvm_s390_pci_init();
|
||||
if (rc) {
|
||||
pr_err("Unable to allocate AIFT for PCI\n");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
rc = kvm_s390_gib_init(GAL_ISC);
|
||||
if (rc)
|
||||
goto out;
|
||||
@ -516,6 +527,8 @@ out:
|
||||
void kvm_arch_exit(void)
|
||||
{
|
||||
kvm_s390_gib_destroy();
|
||||
if (kvm_s390_pci_interp_allowed())
|
||||
kvm_s390_pci_exit();
|
||||
debug_unregister(kvm_s390_dbf);
|
||||
debug_unregister(kvm_s390_dbf_uv);
|
||||
}
|
||||
@ -606,6 +619,32 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_S390_PROTECTED:
|
||||
r = is_prot_virt_host();
|
||||
break;
|
||||
case KVM_CAP_S390_PROTECTED_DUMP: {
|
||||
u64 pv_cmds_dump[] = {
|
||||
BIT_UVC_CMD_DUMP_INIT,
|
||||
BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
|
||||
BIT_UVC_CMD_DUMP_CPU,
|
||||
BIT_UVC_CMD_DUMP_COMPLETE,
|
||||
};
|
||||
int i;
|
||||
|
||||
r = is_prot_virt_host();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
|
||||
if (!test_bit_inv(pv_cmds_dump[i],
|
||||
(unsigned long *)&uv_info.inst_calls_list)) {
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KVM_CAP_S390_ZPCI_OP:
|
||||
r = kvm_s390_pci_interp_allowed();
|
||||
break;
|
||||
case KVM_CAP_S390_CPU_TOPOLOGY:
|
||||
r = test_facility(11);
|
||||
break;
|
||||
default:
|
||||
r = 0;
|
||||
}
|
||||
@ -817,6 +856,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
||||
icpt_operexc_on_all_vcpus(kvm);
|
||||
r = 0;
|
||||
break;
|
||||
case KVM_CAP_S390_CPU_TOPOLOGY:
|
||||
r = -EINVAL;
|
||||
mutex_lock(&kvm->lock);
|
||||
if (kvm->created_vcpus) {
|
||||
r = -EBUSY;
|
||||
} else if (test_facility(11)) {
|
||||
set_kvm_facility(kvm->arch.model.fac_mask, 11);
|
||||
set_kvm_facility(kvm->arch.model.fac_list, 11);
|
||||
r = 0;
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
|
||||
r ? "(not available)" : "(success)");
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
@ -1019,6 +1072,42 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Only set the ECB bits after guest requests zPCI interpretation */
|
||||
if (!vcpu->kvm->arch.use_zpci_interp)
|
||||
return;
|
||||
|
||||
vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
|
||||
vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
|
||||
}
|
||||
|
||||
void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
unsigned long i;
|
||||
|
||||
lockdep_assert_held(&kvm->lock);
|
||||
|
||||
if (!kvm_s390_pci_interp_allowed())
|
||||
return;
|
||||
|
||||
/*
|
||||
* If host is configured for PCI and the necessary facilities are
|
||||
* available, turn on interpretation for the life of this guest
|
||||
*/
|
||||
kvm->arch.use_zpci_interp = 1;
|
||||
|
||||
kvm_s390_vcpu_block_all(kvm);
|
||||
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
kvm_s390_vcpu_pci_setup(vcpu);
|
||||
kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
|
||||
}
|
||||
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
}
|
||||
|
||||
static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
|
||||
{
|
||||
unsigned long cx;
|
||||
@ -1691,6 +1780,57 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_update_topology_change_report - update CPU topology change report
|
||||
* @kvm: guest KVM description
|
||||
* @val: set or clear the MTCR bit
|
||||
*
|
||||
* Updates the Multiprocessor Topology-Change-Report bit to signal
|
||||
* the guest with a topology change.
|
||||
* This is only relevant if the topology facility is present.
|
||||
*
|
||||
* The SCA version, bsca or esca, doesn't matter as offset is the same.
|
||||
*/
|
||||
static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
|
||||
{
|
||||
union sca_utility new, old;
|
||||
struct bsca_block *sca;
|
||||
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
sca = kvm->arch.sca;
|
||||
do {
|
||||
old = READ_ONCE(sca->utility);
|
||||
new = old;
|
||||
new.mtcr = val;
|
||||
} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
}
|
||||
|
||||
static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
if (!test_kvm_facility(kvm, 11))
|
||||
return -ENXIO;
|
||||
|
||||
kvm_s390_update_topology_change_report(kvm, !!attr->attr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
|
||||
struct kvm_device_attr *attr)
|
||||
{
|
||||
u8 topo;
|
||||
|
||||
if (!test_kvm_facility(kvm, 11))
|
||||
return -ENXIO;
|
||||
|
||||
read_lock(&kvm->arch.sca_lock);
|
||||
topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
|
||||
read_unlock(&kvm->arch.sca_lock);
|
||||
|
||||
return put_user(topo, (u8 __user *)attr->addr);
|
||||
}
|
||||
|
||||
static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
{
|
||||
int ret;
|
||||
@ -1711,6 +1851,9 @@ static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = kvm_s390_vm_set_migration(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CPU_TOPOLOGY:
|
||||
ret = kvm_s390_set_topo_change_indication(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@ -1736,6 +1879,9 @@ static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = kvm_s390_vm_get_migration(kvm, attr);
|
||||
break;
|
||||
case KVM_S390_VM_CPU_TOPOLOGY:
|
||||
ret = kvm_s390_get_topo_change_indication(kvm, attr);
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@ -1809,6 +1955,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
|
||||
case KVM_S390_VM_MIGRATION:
|
||||
ret = 0;
|
||||
break;
|
||||
case KVM_S390_VM_CPU_TOPOLOGY:
|
||||
ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
|
||||
break;
|
||||
default:
|
||||
ret = -ENXIO;
|
||||
break;
|
||||
@ -2166,12 +2315,25 @@ out:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
|
||||
/**
|
||||
* kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
|
||||
* non protected.
|
||||
* @kvm: the VM whose protected vCPUs are to be converted
|
||||
* @rc: return value for the RC field of the UVC (in case of error)
|
||||
* @rrc: return value for the RRC field of the UVC (in case of error)
|
||||
*
|
||||
* Does not stop in case of error, tries to convert as many
|
||||
* CPUs as possible. In case of error, the RC and RRC of the last error are
|
||||
* returned.
|
||||
*
|
||||
* Return: 0 in case of success, otherwise -EIO
|
||||
*/
|
||||
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
u16 rc, rrc;
|
||||
int ret = 0;
|
||||
unsigned long i;
|
||||
u16 _rc, _rrc;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* We ignore failures and try to destroy as many CPUs as possible.
|
||||
@ -2183,9 +2345,9 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
|
||||
*/
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
mutex_lock(&vcpu->mutex);
|
||||
if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
|
||||
*rcp = rc;
|
||||
*rrcp = rrc;
|
||||
if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
|
||||
*rc = _rc;
|
||||
*rrc = _rrc;
|
||||
ret = -EIO;
|
||||
}
|
||||
mutex_unlock(&vcpu->mutex);
|
||||
@ -2196,6 +2358,17 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
|
||||
* to protected.
|
||||
* @kvm: the VM whose protected vCPUs are to be converted
|
||||
* @rc: return value for the RC field of the UVC (in case of error)
|
||||
* @rrc: return value for the RRC field of the UVC (in case of error)
|
||||
*
|
||||
* Tries to undo the conversion in case of error.
|
||||
*
|
||||
* Return: 0 in case of success, otherwise -EIO
|
||||
*/
|
||||
static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
{
|
||||
unsigned long i;
|
||||
@ -2220,6 +2393,115 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we provide user space with a direct interface to query UV
|
||||
* related data like UV maxima and available features as well as
|
||||
* feature specific data.
|
||||
*
|
||||
* To facilitate future extension of the data structures we'll try to
|
||||
* write data up to the maximum requested length.
|
||||
*/
|
||||
static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
|
||||
{
|
||||
ssize_t len_min;
|
||||
|
||||
switch (info->header.id) {
|
||||
case KVM_PV_INFO_VM: {
|
||||
len_min = sizeof(info->header) + sizeof(info->vm);
|
||||
|
||||
if (info->header.len_max < len_min)
|
||||
return -EINVAL;
|
||||
|
||||
memcpy(info->vm.inst_calls_list,
|
||||
uv_info.inst_calls_list,
|
||||
sizeof(uv_info.inst_calls_list));
|
||||
|
||||
/* It's max cpuid not max cpus, so it's off by one */
|
||||
info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
|
||||
info->vm.max_guests = uv_info.max_num_sec_conf;
|
||||
info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
|
||||
info->vm.feature_indication = uv_info.uv_feature_indications;
|
||||
|
||||
return len_min;
|
||||
}
|
||||
case KVM_PV_INFO_DUMP: {
|
||||
len_min = sizeof(info->header) + sizeof(info->dump);
|
||||
|
||||
if (info->header.len_max < len_min)
|
||||
return -EINVAL;
|
||||
|
||||
info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
|
||||
info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
|
||||
info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
|
||||
return len_min;
|
||||
}
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
|
||||
struct kvm_s390_pv_dmp dmp)
|
||||
{
|
||||
int r = -EINVAL;
|
||||
void __user *result_buff = (void __user *)dmp.buff_addr;
|
||||
|
||||
switch (dmp.subcmd) {
|
||||
case KVM_PV_DUMP_INIT: {
|
||||
if (kvm->arch.pv.dumping)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Block SIE entry as concurrent dump UVCs could lead
|
||||
* to validities.
|
||||
*/
|
||||
kvm_s390_vcpu_block_all(kvm);
|
||||
|
||||
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
|
||||
UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
|
||||
cmd->rc, cmd->rrc);
|
||||
if (!r) {
|
||||
kvm->arch.pv.dumping = true;
|
||||
} else {
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
r = -EINVAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case KVM_PV_DUMP_CONFIG_STOR_STATE: {
|
||||
if (!kvm->arch.pv.dumping)
|
||||
break;
|
||||
|
||||
/*
|
||||
* gaddr is an output parameter since we might stop
|
||||
* early. As dmp will be copied back in our caller, we
|
||||
* don't need to do it ourselves.
|
||||
*/
|
||||
r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
|
||||
&cmd->rc, &cmd->rrc);
|
||||
break;
|
||||
}
|
||||
case KVM_PV_DUMP_COMPLETE: {
|
||||
if (!kvm->arch.pv.dumping)
|
||||
break;
|
||||
|
||||
r = -EINVAL;
|
||||
if (dmp.buff_len < uv_info.conf_dump_finalize_len)
|
||||
break;
|
||||
|
||||
r = kvm_s390_pv_dump_complete(kvm, result_buff,
|
||||
&cmd->rc, &cmd->rrc);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
|
||||
{
|
||||
int r = 0;
|
||||
@ -2356,6 +2638,68 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
|
||||
cmd->rc, cmd->rrc);
|
||||
break;
|
||||
}
|
||||
case KVM_PV_INFO: {
|
||||
struct kvm_s390_pv_info info = {};
|
||||
ssize_t data_len;
|
||||
|
||||
/*
|
||||
* No need to check the VM protection here.
|
||||
*
|
||||
* Maybe user space wants to query some of the data
|
||||
* when the VM is still unprotected. If we see the
|
||||
* need to fence a new data command we can still
|
||||
* return an error in the info handler.
|
||||
*/
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&info, argp, sizeof(info.header)))
|
||||
break;
|
||||
|
||||
r = -EINVAL;
|
||||
if (info.header.len_max < sizeof(info.header))
|
||||
break;
|
||||
|
||||
data_len = kvm_s390_handle_pv_info(&info);
|
||||
if (data_len < 0) {
|
||||
r = data_len;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If a data command struct is extended (multiple
|
||||
* times) this can be used to determine how much of it
|
||||
* is valid.
|
||||
*/
|
||||
info.header.len_written = data_len;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_to_user(argp, &info, data_len))
|
||||
break;
|
||||
|
||||
r = 0;
|
||||
break;
|
||||
}
|
||||
case KVM_PV_DUMP: {
|
||||
struct kvm_s390_pv_dmp dmp;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!kvm_s390_pv_is_protected(kvm))
|
||||
break;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&dmp, argp, sizeof(dmp)))
|
||||
break;
|
||||
|
||||
r = kvm_s390_pv_dmp(kvm, cmd, dmp);
|
||||
if (r)
|
||||
break;
|
||||
|
||||
if (copy_to_user(argp, &dmp, sizeof(dmp))) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
@ -2581,6 +2925,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
case KVM_S390_ZPCI_OP: {
|
||||
struct kvm_s390_zpci_op args;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
|
||||
break;
|
||||
if (copy_from_user(&args, argp, sizeof(args))) {
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
r = kvm_s390_pci_zpci_op(kvm, &args);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
@ -2742,6 +3099,14 @@ static void sca_dispose(struct kvm *kvm)
|
||||
kvm->arch.sca = NULL;
|
||||
}
|
||||
|
||||
void kvm_arch_free_vm(struct kvm *kvm)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
|
||||
kvm_s390_pci_clear_list(kvm);
|
||||
|
||||
__kvm_arch_free_vm(kvm);
|
||||
}
|
||||
|
||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
|
||||
@ -2824,6 +3189,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||
|
||||
kvm_s390_crypto_init(kvm);
|
||||
|
||||
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
|
||||
mutex_lock(&kvm->lock);
|
||||
kvm_s390_pci_init_list(kvm);
|
||||
kvm_s390_vcpu_pci_enable_interp(kvm);
|
||||
mutex_unlock(&kvm->lock);
|
||||
}
|
||||
|
||||
mutex_init(&kvm->arch.float_int.ais_lock);
|
||||
spin_lock_init(&kvm->arch.float_int.lock);
|
||||
for (i = 0; i < FIRQ_LIST_COUNT; i++)
|
||||
@ -2877,6 +3249,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||
kvm_clear_async_pf_completion_queue(vcpu);
|
||||
if (!kvm_is_ucontrol(vcpu->kvm))
|
||||
sca_del_vcpu(vcpu);
|
||||
kvm_s390_update_topology_change_report(vcpu->kvm, 1);
|
||||
|
||||
if (kvm_is_ucontrol(vcpu->kvm))
|
||||
gmap_remove(vcpu->arch.gmap);
|
||||
@ -2904,6 +3277,15 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
*/
|
||||
if (kvm_s390_pv_get_handle(kvm))
|
||||
kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
|
||||
/*
|
||||
* Remove the mmu notifier only when the whole KVM VM is torn down,
|
||||
* and only if one was registered to begin with. If the VM is
|
||||
* currently not protected, but has been previously been protected,
|
||||
* then it's possible that the notifier is still registered.
|
||||
*/
|
||||
if (kvm->arch.pv.mmu_notifier.ops)
|
||||
mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
|
||||
|
||||
debug_unregister(kvm->arch.dbf);
|
||||
free_page((unsigned long)kvm->arch.sie_page2);
|
||||
if (!kvm_is_ucontrol(kvm))
|
||||
@ -3047,9 +3429,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
|
||||
if (!sclp.has_esca || !sclp.has_64bscao)
|
||||
return false;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
|
||||
}
|
||||
@ -3272,6 +3652,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
|
||||
if (test_kvm_facility(vcpu->kvm, 9))
|
||||
vcpu->arch.sie_block->ecb |= ECB_SRSI;
|
||||
if (test_kvm_facility(vcpu->kvm, 11))
|
||||
vcpu->arch.sie_block->ecb |= ECB_PTF;
|
||||
if (test_kvm_facility(vcpu->kvm, 73))
|
||||
vcpu->arch.sie_block->ecb |= ECB_TE;
|
||||
if (!kvm_is_ucontrol(vcpu->kvm))
|
||||
@ -3324,6 +3706,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
|
||||
|
||||
kvm_s390_vcpu_crypto_setup(vcpu);
|
||||
|
||||
kvm_s390_vcpu_pci_setup(vcpu);
|
||||
|
||||
mutex_lock(&vcpu->kvm->lock);
|
||||
if (kvm_s390_pv_is_protected(vcpu->kvm)) {
|
||||
rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
|
||||
@ -3403,6 +3787,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
rc = kvm_s390_vcpu_setup(vcpu);
|
||||
if (rc)
|
||||
goto out_ucontrol_uninit;
|
||||
|
||||
kvm_s390_update_topology_change_report(vcpu->kvm, 1);
|
||||
return 0;
|
||||
|
||||
out_ucontrol_uninit:
|
||||
@ -4473,6 +4859,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
|
||||
struct kvm_run *kvm_run = vcpu->run;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* Running a VM while dumping always has the potential to
|
||||
* produce inconsistent dump data. But for PV vcpus a SIE
|
||||
* entry while dumping could also lead to a fatal validity
|
||||
* intercept which we absolutely want to avoid.
|
||||
*/
|
||||
if (vcpu->kvm->arch.pv.dumping)
|
||||
return -EINVAL;
|
||||
|
||||
if (kvm_run->immediate_exit)
|
||||
return -EINTR;
|
||||
|
||||
@ -4912,6 +5307,48 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
|
||||
struct kvm_pv_cmd *cmd)
|
||||
{
|
||||
struct kvm_s390_pv_dmp dmp;
|
||||
void *data;
|
||||
int ret;
|
||||
|
||||
/* Dump initialization is a prerequisite */
|
||||
if (!vcpu->kvm->arch.pv.dumping)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
|
||||
return -EFAULT;
|
||||
|
||||
/* We only handle this subcmd right now */
|
||||
if (dmp.subcmd != KVM_PV_DUMP_CPU)
|
||||
return -EINVAL;
|
||||
|
||||
/* CPU dump length is the same as create cpu storage donation. */
|
||||
if (dmp.buff_len != uv_info.guest_cpu_stor_len)
|
||||
return -EINVAL;
|
||||
|
||||
data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
|
||||
|
||||
VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
|
||||
vcpu->vcpu_id, cmd->rc, cmd->rrc);
|
||||
|
||||
if (ret)
|
||||
ret = -EINVAL;
|
||||
|
||||
/* On success copy over the dump data */
|
||||
if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
|
||||
ret = -EFAULT;
|
||||
|
||||
kvfree(data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
unsigned int ioctl, unsigned long arg)
|
||||
{
|
||||
@ -5076,6 +5513,33 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
||||
irq_state.len);
|
||||
break;
|
||||
}
|
||||
case KVM_S390_PV_CPU_COMMAND: {
|
||||
struct kvm_pv_cmd cmd;
|
||||
|
||||
r = -EINVAL;
|
||||
if (!is_prot_virt_host())
|
||||
break;
|
||||
|
||||
r = -EFAULT;
|
||||
if (copy_from_user(&cmd, argp, sizeof(cmd)))
|
||||
break;
|
||||
|
||||
r = -EINVAL;
|
||||
if (cmd.flags)
|
||||
break;
|
||||
|
||||
/* We only handle this cmd right now */
|
||||
if (cmd.cmd != KVM_PV_DUMP)
|
||||
break;
|
||||
|
||||
r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
|
||||
|
||||
/* Always copy over UV rc / rrc data */
|
||||
if (copy_to_user((__u8 __user *)argp, &cmd.rc,
|
||||
sizeof(cmd.rc) + sizeof(cmd.rrc)))
|
||||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
r = -ENOTTY;
|
||||
}
|
||||
|
@ -250,6 +250,11 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
|
||||
int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
|
||||
unsigned long tweak, u16 *rc, u16 *rrc);
|
||||
int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
|
||||
int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc);
|
||||
int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
|
||||
u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
|
||||
int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
|
||||
u16 *rc, u16 *rrc);
|
||||
|
||||
static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
|
||||
{
|
||||
@ -374,6 +379,7 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
|
||||
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
|
||||
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
|
||||
int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc);
|
||||
|
||||
/* implemented in diag.c */
|
||||
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
|
||||
@ -507,6 +513,16 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
|
||||
*/
|
||||
void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm);
|
||||
|
||||
/**
|
||||
* kvm_s390_vcpu_pci_enable_interp
|
||||
*
|
||||
* Set the associated PCI attributes for each vcpu to allow for zPCI Load/Store
|
||||
* interpretation as well as adapter interruption forwarding.
|
||||
*
|
||||
* @kvm: the KVM guest
|
||||
*/
|
||||
void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm);
|
||||
|
||||
/**
|
||||
* diag9c_forwarding_hz
|
||||
*
|
||||
|
690
arch/s390/kvm/pci.c
Normal file
690
arch/s390/kvm/pci.c
Normal file
@ -0,0 +1,690 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* s390 kvm PCI passthrough support
|
||||
*
|
||||
* Copyright IBM Corp. 2022
|
||||
*
|
||||
* Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/pci.h>
|
||||
#include <asm/pci.h>
|
||||
#include <asm/pci_insn.h>
|
||||
#include <asm/pci_io.h>
|
||||
#include <asm/sclp.h>
|
||||
#include "pci.h"
|
||||
#include "kvm-s390.h"
|
||||
|
||||
struct zpci_aift *aift;
|
||||
|
||||
static inline int __set_irq_noiib(u16 ctl, u8 isc)
|
||||
{
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
|
||||
return zpci_set_irq_ctrl(ctl, isc, &iib);
|
||||
}
|
||||
|
||||
void kvm_s390_pci_aen_exit(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kvm_zdev **gait_kzdev;
|
||||
|
||||
lockdep_assert_held(&aift->aift_lock);
|
||||
|
||||
/*
|
||||
* Contents of the aipb remain registered for the life of the host
|
||||
* kernel, the information preserved in zpci_aipb and zpci_aif_sbv
|
||||
* in case we insert the KVM module again later. Clear the AIFT
|
||||
* information and free anything not registered with underlying
|
||||
* firmware.
|
||||
*/
|
||||
spin_lock_irqsave(&aift->gait_lock, flags);
|
||||
gait_kzdev = aift->kzdev;
|
||||
aift->gait = NULL;
|
||||
aift->sbv = NULL;
|
||||
aift->kzdev = NULL;
|
||||
spin_unlock_irqrestore(&aift->gait_lock, flags);
|
||||
|
||||
kfree(gait_kzdev);
|
||||
}
|
||||
|
||||
static int zpci_setup_aipb(u8 nisc)
|
||||
{
|
||||
struct page *page;
|
||||
int size, rc;
|
||||
|
||||
zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL);
|
||||
if (!zpci_aipb)
|
||||
return -ENOMEM;
|
||||
|
||||
aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, 0);
|
||||
if (!aift->sbv) {
|
||||
rc = -ENOMEM;
|
||||
goto free_aipb;
|
||||
}
|
||||
zpci_aif_sbv = aift->sbv;
|
||||
size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES *
|
||||
sizeof(struct zpci_gaite)));
|
||||
page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size);
|
||||
if (!page) {
|
||||
rc = -ENOMEM;
|
||||
goto free_sbv;
|
||||
}
|
||||
aift->gait = (struct zpci_gaite *)page_to_phys(page);
|
||||
|
||||
zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
|
||||
zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
|
||||
zpci_aipb->aipb.afi = nisc;
|
||||
zpci_aipb->aipb.faal = ZPCI_NR_DEVICES;
|
||||
|
||||
/* Setup Adapter Event Notification Interpretation */
|
||||
if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) {
|
||||
rc = -EIO;
|
||||
goto free_gait;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
free_gait:
|
||||
free_pages((unsigned long)aift->gait, size);
|
||||
free_sbv:
|
||||
airq_iv_release(aift->sbv);
|
||||
zpci_aif_sbv = NULL;
|
||||
free_aipb:
|
||||
kfree(zpci_aipb);
|
||||
zpci_aipb = NULL;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int zpci_reset_aipb(u8 nisc)
|
||||
{
|
||||
/*
|
||||
* AEN registration can only happen once per system boot. If
|
||||
* an aipb already exists then AEN was already registered and
|
||||
* we can re-use the aipb contents. This can only happen if
|
||||
* the KVM module was removed and re-inserted. However, we must
|
||||
* ensure that the same forwarding ISC is used as this is assigned
|
||||
* during KVM module load.
|
||||
*/
|
||||
if (zpci_aipb->aipb.afi != nisc)
|
||||
return -EINVAL;
|
||||
|
||||
aift->sbv = zpci_aif_sbv;
|
||||
aift->gait = (struct zpci_gaite *)zpci_aipb->aipb.gait;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_s390_pci_aen_init(u8 nisc)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
/* If already enabled for AEN, bail out now */
|
||||
if (aift->gait || aift->sbv)
|
||||
return -EPERM;
|
||||
|
||||
mutex_lock(&aift->aift_lock);
|
||||
aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev),
|
||||
GFP_KERNEL);
|
||||
if (!aift->kzdev) {
|
||||
rc = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (!zpci_aipb)
|
||||
rc = zpci_setup_aipb(nisc);
|
||||
else
|
||||
rc = zpci_reset_aipb(nisc);
|
||||
if (rc)
|
||||
goto free_zdev;
|
||||
|
||||
/* Enable floating IRQs */
|
||||
if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) {
|
||||
rc = -EIO;
|
||||
kvm_s390_pci_aen_exit();
|
||||
}
|
||||
|
||||
goto unlock;
|
||||
|
||||
free_zdev:
|
||||
kfree(aift->kzdev);
|
||||
unlock:
|
||||
mutex_unlock(&aift->aift_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Modify PCI: Register floating adapter interruption forwarding */
|
||||
static int kvm_zpci_set_airq(struct zpci_dev *zdev)
|
||||
{
|
||||
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
|
||||
struct zpci_fib fib = {};
|
||||
u8 status;
|
||||
|
||||
fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
|
||||
fib.fmt0.sum = 1; /* enable summary notifications */
|
||||
fib.fmt0.noi = airq_iv_end(zdev->aibv);
|
||||
fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
|
||||
fib.fmt0.aibvo = 0;
|
||||
fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
|
||||
fib.fmt0.aisbo = zdev->aisb & 63;
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
|
||||
}
|
||||
|
||||
/* Modify PCI: Unregister floating adapter interruption forwarding */
|
||||
static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
|
||||
{
|
||||
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
|
||||
struct zpci_fib fib = {};
|
||||
u8 cc, status;
|
||||
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc == 3 || (cc == 1 && status == 24))
|
||||
/* Function already gone or IRQs already deregistered. */
|
||||
cc = 0;
|
||||
|
||||
return cc ? -EIO : 0;
|
||||
}
|
||||
|
||||
static inline void unaccount_mem(unsigned long nr_pages)
|
||||
{
|
||||
struct user_struct *user = get_uid(current_user());
|
||||
|
||||
if (user)
|
||||
atomic_long_sub(nr_pages, &user->locked_vm);
|
||||
if (current->mm)
|
||||
atomic64_sub(nr_pages, ¤t->mm->pinned_vm);
|
||||
}
|
||||
|
||||
static inline int account_mem(unsigned long nr_pages)
|
||||
{
|
||||
struct user_struct *user = get_uid(current_user());
|
||||
unsigned long page_limit, cur_pages, new_pages;
|
||||
|
||||
page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
||||
|
||||
do {
|
||||
cur_pages = atomic_long_read(&user->locked_vm);
|
||||
new_pages = cur_pages + nr_pages;
|
||||
if (new_pages > page_limit)
|
||||
return -ENOMEM;
|
||||
} while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
|
||||
new_pages) != cur_pages);
|
||||
|
||||
atomic64_add(nr_pages, ¤t->mm->pinned_vm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
|
||||
bool assist)
|
||||
{
|
||||
struct page *pages[1], *aibv_page, *aisb_page = NULL;
|
||||
unsigned int msi_vecs, idx;
|
||||
struct zpci_gaite *gaite;
|
||||
unsigned long hva, bit;
|
||||
struct kvm *kvm;
|
||||
phys_addr_t gaddr;
|
||||
int rc = 0, gisc, npages, pcount = 0;
|
||||
|
||||
/*
|
||||
* Interrupt forwarding is only applicable if the device is already
|
||||
* enabled for interpretation
|
||||
*/
|
||||
if (zdev->gisa == 0)
|
||||
return -EINVAL;
|
||||
|
||||
kvm = zdev->kzdev->kvm;
|
||||
msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
|
||||
|
||||
/* Get the associated forwarding ISC - if invalid, return the error */
|
||||
gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
|
||||
if (gisc < 0)
|
||||
return gisc;
|
||||
|
||||
/* Replace AIBV address */
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
|
||||
npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
if (npages < 1) {
|
||||
rc = -EIO;
|
||||
goto out;
|
||||
}
|
||||
aibv_page = pages[0];
|
||||
pcount++;
|
||||
gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
|
||||
fib->fmt0.aibv = gaddr;
|
||||
|
||||
/* Pin the guest AISB if one was specified */
|
||||
if (fib->fmt0.sum == 1) {
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
|
||||
npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM,
|
||||
pages);
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
if (npages < 1) {
|
||||
rc = -EIO;
|
||||
goto unpin1;
|
||||
}
|
||||
aisb_page = pages[0];
|
||||
pcount++;
|
||||
}
|
||||
|
||||
/* Account for pinned pages, roll back on failure */
|
||||
if (account_mem(pcount))
|
||||
goto unpin2;
|
||||
|
||||
/* AISB must be allocated before we can fill in GAITE */
|
||||
mutex_lock(&aift->aift_lock);
|
||||
bit = airq_iv_alloc_bit(aift->sbv);
|
||||
if (bit == -1UL)
|
||||
goto unlock;
|
||||
zdev->aisb = bit; /* store the summary bit number */
|
||||
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
|
||||
AIRQ_IV_BITLOCK |
|
||||
AIRQ_IV_GUESTVEC,
|
||||
phys_to_virt(fib->fmt0.aibv));
|
||||
|
||||
spin_lock_irq(&aift->gait_lock);
|
||||
gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
|
||||
sizeof(struct zpci_gaite));
|
||||
|
||||
/* If assist not requested, host will get all alerts */
|
||||
if (assist)
|
||||
gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
|
||||
else
|
||||
gaite->gisa = 0;
|
||||
|
||||
gaite->gisc = fib->fmt0.isc;
|
||||
gaite->count++;
|
||||
gaite->aisbo = fib->fmt0.aisbo;
|
||||
gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
|
||||
~PAGE_MASK));
|
||||
aift->kzdev[zdev->aisb] = zdev->kzdev;
|
||||
spin_unlock_irq(&aift->gait_lock);
|
||||
|
||||
/* Update guest FIB for re-issue */
|
||||
fib->fmt0.aisbo = zdev->aisb & 63;
|
||||
fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
|
||||
fib->fmt0.isc = gisc;
|
||||
|
||||
/* Save some guest fib values in the host for later use */
|
||||
zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
|
||||
zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
|
||||
mutex_unlock(&aift->aift_lock);
|
||||
|
||||
/* Issue the clp to setup the irq now */
|
||||
rc = kvm_zpci_set_airq(zdev);
|
||||
return rc;
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&aift->aift_lock);
|
||||
unpin2:
|
||||
if (fib->fmt0.sum == 1)
|
||||
unpin_user_page(aisb_page);
|
||||
unpin1:
|
||||
unpin_user_page(aibv_page);
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
|
||||
{
|
||||
struct kvm_zdev *kzdev = zdev->kzdev;
|
||||
struct zpci_gaite *gaite;
|
||||
struct page *vpage = NULL, *spage = NULL;
|
||||
int rc, pcount = 0;
|
||||
u8 isc;
|
||||
|
||||
if (zdev->gisa == 0)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&aift->aift_lock);
|
||||
|
||||
/*
|
||||
* If the clear fails due to an error, leave now unless we know this
|
||||
* device is about to go away (force) -- In that case clear the GAITE
|
||||
* regardless.
|
||||
*/
|
||||
rc = kvm_zpci_clear_airq(zdev);
|
||||
if (rc && !force)
|
||||
goto out;
|
||||
|
||||
if (zdev->kzdev->fib.fmt0.aibv == 0)
|
||||
goto out;
|
||||
spin_lock_irq(&aift->gait_lock);
|
||||
gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
|
||||
sizeof(struct zpci_gaite));
|
||||
isc = gaite->gisc;
|
||||
gaite->count--;
|
||||
if (gaite->count == 0) {
|
||||
/* Release guest AIBV and AISB */
|
||||
vpage = phys_to_page(kzdev->fib.fmt0.aibv);
|
||||
if (gaite->aisb != 0)
|
||||
spage = phys_to_page(gaite->aisb);
|
||||
/* Clear the GAIT entry */
|
||||
gaite->aisb = 0;
|
||||
gaite->gisc = 0;
|
||||
gaite->aisbo = 0;
|
||||
gaite->gisa = 0;
|
||||
aift->kzdev[zdev->aisb] = 0;
|
||||
/* Clear zdev info */
|
||||
airq_iv_free_bit(aift->sbv, zdev->aisb);
|
||||
airq_iv_release(zdev->aibv);
|
||||
zdev->aisb = 0;
|
||||
zdev->aibv = NULL;
|
||||
}
|
||||
spin_unlock_irq(&aift->gait_lock);
|
||||
kvm_s390_gisc_unregister(kzdev->kvm, isc);
|
||||
kzdev->fib.fmt0.isc = 0;
|
||||
kzdev->fib.fmt0.aibv = 0;
|
||||
|
||||
if (vpage) {
|
||||
unpin_user_page(vpage);
|
||||
pcount++;
|
||||
}
|
||||
if (spage) {
|
||||
unpin_user_page(spage);
|
||||
pcount++;
|
||||
}
|
||||
if (pcount > 0)
|
||||
unaccount_mem(pcount);
|
||||
out:
|
||||
mutex_unlock(&aift->aift_lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
|
||||
{
|
||||
struct kvm_zdev *kzdev;
|
||||
|
||||
kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
|
||||
if (!kzdev)
|
||||
return -ENOMEM;
|
||||
|
||||
kzdev->zdev = zdev;
|
||||
zdev->kzdev = kzdev;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
|
||||
{
|
||||
struct kvm_zdev *kzdev;
|
||||
|
||||
kzdev = zdev->kzdev;
|
||||
WARN_ON(kzdev->zdev != zdev);
|
||||
zdev->kzdev = NULL;
|
||||
kfree(kzdev);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Register device with the specified KVM. If interpetation facilities are
|
||||
* available, enable them and let userspace indicate whether or not they will
|
||||
* be used (specify SHM bit to disable).
|
||||
*/
|
||||
int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (!zdev)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&zdev->kzdev_lock);
|
||||
|
||||
if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
kvm_get_kvm(kvm);
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
rc = kvm_s390_pci_dev_open(zdev);
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* If interpretation facilities aren't available, add the device to
|
||||
* the kzdev list but don't enable for interpretation.
|
||||
*/
|
||||
if (!kvm_s390_pci_interp_allowed())
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If this is the first request to use an interpreted device, make the
|
||||
* necessary vcpu changes
|
||||
*/
|
||||
if (!kvm->arch.use_zpci_interp)
|
||||
kvm_s390_vcpu_pci_enable_interp(kvm);
|
||||
|
||||
if (zdev_enabled(zdev)) {
|
||||
rc = zpci_disable_device(zdev);
|
||||
if (rc)
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Store information about the identity of the kvm guest allowed to
|
||||
* access this device via interpretation to be used by host CLP
|
||||
*/
|
||||
zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
|
||||
|
||||
rc = zpci_enable_device(zdev);
|
||||
if (rc)
|
||||
goto clear_gisa;
|
||||
|
||||
/* Re-register the IOMMU that was already created */
|
||||
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
|
||||
virt_to_phys(zdev->dma_table));
|
||||
if (rc)
|
||||
goto clear_gisa;
|
||||
|
||||
out:
|
||||
zdev->kzdev->kvm = kvm;
|
||||
|
||||
spin_lock(&kvm->arch.kzdev_list_lock);
|
||||
list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
|
||||
spin_unlock(&kvm->arch.kzdev_list_lock);
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
return 0;
|
||||
|
||||
clear_gisa:
|
||||
zdev->gisa = 0;
|
||||
err:
|
||||
if (zdev->kzdev)
|
||||
kvm_s390_pci_dev_release(zdev);
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
kvm_put_kvm(kvm);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm);
|
||||
|
||||
void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev)
|
||||
{
|
||||
struct kvm *kvm;
|
||||
|
||||
if (!zdev)
|
||||
return;
|
||||
|
||||
mutex_lock(&zdev->kzdev_lock);
|
||||
|
||||
if (WARN_ON(!zdev->kzdev)) {
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
kvm = zdev->kzdev->kvm;
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/*
|
||||
* A 0 gisa means interpretation was never enabled, just remove the
|
||||
* device from the list.
|
||||
*/
|
||||
if (zdev->gisa == 0)
|
||||
goto out;
|
||||
|
||||
/* Forwarding must be turned off before interpretation */
|
||||
if (zdev->kzdev->fib.fmt0.aibv != 0)
|
||||
kvm_s390_pci_aif_disable(zdev, true);
|
||||
|
||||
/* Remove the host CLP guest designation */
|
||||
zdev->gisa = 0;
|
||||
|
||||
if (zdev_enabled(zdev)) {
|
||||
if (zpci_disable_device(zdev))
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (zpci_enable_device(zdev))
|
||||
goto out;
|
||||
|
||||
/* Re-register the IOMMU that was already created */
|
||||
zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
|
||||
virt_to_phys(zdev->dma_table));
|
||||
|
||||
out:
|
||||
spin_lock(&kvm->arch.kzdev_list_lock);
|
||||
list_del(&zdev->kzdev->entry);
|
||||
spin_unlock(&kvm->arch.kzdev_list_lock);
|
||||
kvm_s390_pci_dev_release(zdev);
|
||||
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
|
||||
kvm_put_kvm(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_s390_pci_unregister_kvm);
|
||||
|
||||
void kvm_s390_pci_init_list(struct kvm *kvm)
|
||||
{
|
||||
spin_lock_init(&kvm->arch.kzdev_list_lock);
|
||||
INIT_LIST_HEAD(&kvm->arch.kzdev_list);
|
||||
}
|
||||
|
||||
void kvm_s390_pci_clear_list(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* This list should already be empty, either via vfio device closures
|
||||
* or kvm fd cleanup.
|
||||
*/
|
||||
spin_lock(&kvm->arch.kzdev_list_lock);
|
||||
WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list));
|
||||
spin_unlock(&kvm->arch.kzdev_list_lock);
|
||||
}
|
||||
|
||||
static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
|
||||
{
|
||||
struct zpci_dev *zdev = NULL;
|
||||
struct kvm_zdev *kzdev;
|
||||
|
||||
spin_lock(&kvm->arch.kzdev_list_lock);
|
||||
list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
|
||||
if (kzdev->zdev->fh == fh) {
|
||||
zdev = kzdev->zdev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&kvm->arch.kzdev_list_lock);
|
||||
|
||||
return zdev;
|
||||
}
|
||||
|
||||
static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
|
||||
struct kvm_s390_zpci_op *args)
|
||||
{
|
||||
struct zpci_fib fib = {};
|
||||
bool hostflag;
|
||||
|
||||
fib.fmt0.aibv = args->u.reg_aen.ibv;
|
||||
fib.fmt0.isc = args->u.reg_aen.isc;
|
||||
fib.fmt0.noi = args->u.reg_aen.noi;
|
||||
if (args->u.reg_aen.sb != 0) {
|
||||
fib.fmt0.aisb = args->u.reg_aen.sb;
|
||||
fib.fmt0.aisbo = args->u.reg_aen.sbo;
|
||||
fib.fmt0.sum = 1;
|
||||
} else {
|
||||
fib.fmt0.aisb = 0;
|
||||
fib.fmt0.aisbo = 0;
|
||||
fib.fmt0.sum = 0;
|
||||
}
|
||||
|
||||
hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
|
||||
return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
|
||||
}
|
||||
|
||||
int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
|
||||
{
|
||||
struct kvm_zdev *kzdev;
|
||||
struct zpci_dev *zdev;
|
||||
int r;
|
||||
|
||||
zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
|
||||
if (!zdev)
|
||||
return -ENODEV;
|
||||
|
||||
mutex_lock(&zdev->kzdev_lock);
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
kzdev = zdev->kzdev;
|
||||
if (!kzdev) {
|
||||
r = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
if (kzdev->kvm != kvm) {
|
||||
r = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
switch (args->op) {
|
||||
case KVM_S390_ZPCIOP_REG_AEN:
|
||||
/* Fail on unknown flags */
|
||||
if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) {
|
||||
r = -EINVAL;
|
||||
break;
|
||||
}
|
||||
r = kvm_s390_pci_zpci_reg_aen(zdev, args);
|
||||
break;
|
||||
case KVM_S390_ZPCIOP_DEREG_AEN:
|
||||
r = kvm_s390_pci_aif_disable(zdev, false);
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->lock);
|
||||
mutex_unlock(&zdev->kzdev_lock);
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_s390_pci_init(void)
|
||||
{
|
||||
aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
|
||||
if (!aift)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&aift->gait_lock);
|
||||
mutex_init(&aift->aift_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_s390_pci_exit(void)
|
||||
{
|
||||
mutex_destroy(&aift->aift_lock);
|
||||
|
||||
kfree(aift);
|
||||
}
|
87
arch/s390/kvm/pci.h
Normal file
87
arch/s390/kvm/pci.h
Normal file
@ -0,0 +1,87 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* s390 kvm PCI passthrough support
|
||||
*
|
||||
* Copyright IBM Corp. 2022
|
||||
*
|
||||
* Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef __KVM_S390_PCI_H
|
||||
#define __KVM_S390_PCI_H
|
||||
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/pci.h>
|
||||
#include <asm/airq.h>
|
||||
#include <asm/cpu.h>
|
||||
|
||||
struct kvm_zdev {
|
||||
struct zpci_dev *zdev;
|
||||
struct kvm *kvm;
|
||||
struct zpci_fib fib;
|
||||
struct list_head entry;
|
||||
};
|
||||
|
||||
struct zpci_gaite {
|
||||
u32 gisa;
|
||||
u8 gisc;
|
||||
u8 count;
|
||||
u8 reserved;
|
||||
u8 aisbo;
|
||||
u64 aisb;
|
||||
};
|
||||
|
||||
struct zpci_aift {
|
||||
struct zpci_gaite *gait;
|
||||
struct airq_iv *sbv;
|
||||
struct kvm_zdev **kzdev;
|
||||
spinlock_t gait_lock; /* Protects the gait, used during AEN forward */
|
||||
struct mutex aift_lock; /* Protects the other structures in aift */
|
||||
};
|
||||
|
||||
extern struct zpci_aift *aift;
|
||||
|
||||
static inline struct kvm *kvm_s390_pci_si_to_kvm(struct zpci_aift *aift,
|
||||
unsigned long si)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) || aift->kzdev == 0 ||
|
||||
aift->kzdev[si] == 0)
|
||||
return 0;
|
||||
return aift->kzdev[si]->kvm;
|
||||
};
|
||||
|
||||
int kvm_s390_pci_aen_init(u8 nisc);
|
||||
void kvm_s390_pci_aen_exit(void);
|
||||
|
||||
void kvm_s390_pci_init_list(struct kvm *kvm);
|
||||
void kvm_s390_pci_clear_list(struct kvm *kvm);
|
||||
|
||||
int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args);
|
||||
|
||||
int kvm_s390_pci_init(void);
|
||||
void kvm_s390_pci_exit(void);
|
||||
|
||||
static inline bool kvm_s390_pci_interp_allowed(void)
|
||||
{
|
||||
struct cpuid cpu_id;
|
||||
|
||||
get_cpu_id(&cpu_id);
|
||||
switch (cpu_id.machine) {
|
||||
case 0x2817:
|
||||
case 0x2818:
|
||||
case 0x2827:
|
||||
case 0x2828:
|
||||
case 0x2964:
|
||||
case 0x2965:
|
||||
/* No SHM on certain machines */
|
||||
return false;
|
||||
default:
|
||||
return (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM) &&
|
||||
sclp.has_zpci_lsi && sclp.has_aeni && sclp.has_aisi &&
|
||||
sclp.has_aisii);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __KVM_S390_PCI_H */
|
@ -442,7 +442,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
|
||||
vcpu->stat.instruction_ipte_interlock++;
|
||||
if (psw_bits(vcpu->arch.sie_block->gpsw).pstate)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
|
||||
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu->kvm));
|
||||
kvm_s390_retry_instr(vcpu);
|
||||
VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
|
||||
return 0;
|
||||
@ -873,10 +873,18 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
|
||||
|
||||
if (fc > 3) {
|
||||
kvm_s390_set_psw_cc(vcpu, 3);
|
||||
return 0;
|
||||
}
|
||||
/* Bailout forbidden function codes */
|
||||
if (fc > 3 && fc != 15)
|
||||
goto out_no_data;
|
||||
|
||||
/*
|
||||
* fc 15 is provided only with
|
||||
* - PTF/CPU topology support through facility 15
|
||||
* - KVM_CAP_S390_USER_STSI
|
||||
*/
|
||||
if (fc == 15 && (!test_kvm_facility(vcpu->kvm, 11) ||
|
||||
!vcpu->kvm->arch.user_stsi))
|
||||
goto out_no_data;
|
||||
|
||||
if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
|
||||
|| vcpu->run->s.regs.gprs[1] & 0xffff0000)
|
||||
@ -910,6 +918,10 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
|
||||
goto out_no_data;
|
||||
handle_stsi_3_2_2(vcpu, (void *) mem);
|
||||
break;
|
||||
case 15: /* fc 15 is fully handled in userspace */
|
||||
insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
|
||||
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
|
||||
return -EREMOTE;
|
||||
}
|
||||
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
|
||||
memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
|
||||
@ -1471,7 +1483,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
|
||||
access_key = (operand2 & 0xf0) >> 4;
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
|
||||
ipte_lock(vcpu);
|
||||
ipte_lock(vcpu->kvm);
|
||||
|
||||
ret = guest_translate_address_with_key(vcpu, address, ar, &gpa,
|
||||
GACC_STORE, access_key);
|
||||
@ -1508,7 +1520,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
|
||||
ipte_unlock(vcpu);
|
||||
ipte_unlock(vcpu->kvm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -7,13 +7,25 @@
|
||||
*/
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <asm/gmap.h>
|
||||
#include <asm/uv.h>
|
||||
#include <asm/mman.h>
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include "kvm-s390.h"
|
||||
|
||||
static void kvm_s390_clear_pv_state(struct kvm *kvm)
|
||||
{
|
||||
kvm->arch.pv.handle = 0;
|
||||
kvm->arch.pv.guest_len = 0;
|
||||
kvm->arch.pv.stor_base = 0;
|
||||
kvm->arch.pv.stor_var = NULL;
|
||||
}
|
||||
|
||||
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
|
||||
{
|
||||
int cc;
|
||||
@ -108,7 +120,7 @@ static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
|
||||
vfree(kvm->arch.pv.stor_var);
|
||||
free_pages(kvm->arch.pv.stor_base,
|
||||
get_order(uv_info.guest_base_stor_len));
|
||||
memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
|
||||
kvm_s390_clear_pv_state(kvm);
|
||||
}
|
||||
|
||||
static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
|
||||
@ -152,21 +164,51 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
{
|
||||
int cc;
|
||||
|
||||
/* make all pages accessible before destroying the guest */
|
||||
s390_reset_acc(kvm->mm);
|
||||
|
||||
cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
|
||||
UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
|
||||
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
|
||||
atomic_set(&kvm->mm->context.is_protected, 0);
|
||||
/*
|
||||
* if the mm still has a mapping, make all its pages accessible
|
||||
* before destroying the guest
|
||||
*/
|
||||
if (mmget_not_zero(kvm->mm)) {
|
||||
s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
|
||||
mmput(kvm->mm);
|
||||
}
|
||||
|
||||
if (!cc) {
|
||||
atomic_dec(&kvm->mm->context.protected_count);
|
||||
kvm_s390_pv_dealloc_vm(kvm);
|
||||
} else {
|
||||
/* Intended memory leak on "impossible" error */
|
||||
s390_replace_asce(kvm->arch.gmap);
|
||||
}
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
|
||||
WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
|
||||
/* Inteded memory leak on "impossible" error */
|
||||
if (!cc)
|
||||
kvm_s390_pv_dealloc_vm(kvm);
|
||||
|
||||
return cc ? -EIO : 0;
|
||||
}
|
||||
|
||||
static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
|
||||
u16 dummy;
|
||||
|
||||
/*
|
||||
* No locking is needed since this is the last thread of the last user of this
|
||||
* struct mm.
|
||||
* When the struct kvm gets deinitialized, this notifier is also
|
||||
* unregistered. This means that if this notifier runs, then the
|
||||
* struct kvm is still valid.
|
||||
*/
|
||||
kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
|
||||
}
|
||||
|
||||
static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
|
||||
.release = kvm_s390_pv_mmu_notifier_release,
|
||||
};
|
||||
|
||||
int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct uv_cb_cgc uvcb = {
|
||||
@ -197,14 +239,22 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
|
||||
/* Outputs */
|
||||
kvm->arch.pv.handle = uvcb.guest_handle;
|
||||
|
||||
atomic_inc(&kvm->mm->context.protected_count);
|
||||
if (cc) {
|
||||
if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
|
||||
if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
|
||||
kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
|
||||
else
|
||||
} else {
|
||||
atomic_dec(&kvm->mm->context.protected_count);
|
||||
kvm_s390_pv_dealloc_vm(kvm);
|
||||
}
|
||||
return -EIO;
|
||||
}
|
||||
kvm->arch.gmap->guest_handle = uvcb.guest_handle;
|
||||
/* Add the notifier only once. No races because we hold kvm->lock */
|
||||
if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
|
||||
kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
|
||||
mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -224,8 +274,6 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
|
||||
*rrc = uvcb.header.rrc;
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
|
||||
*rc, *rrc);
|
||||
if (!cc)
|
||||
atomic_set(&kvm->mm->context.is_protected, 1);
|
||||
return cc ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
@ -298,3 +346,200 @@ int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct uv_cb_dump_cpu uvcb = {
|
||||
.header.cmd = UVC_CMD_DUMP_CPU,
|
||||
.header.len = sizeof(uvcb),
|
||||
.cpu_handle = vcpu->arch.pv.handle,
|
||||
.dump_area_origin = (u64)buff,
|
||||
};
|
||||
int cc;
|
||||
|
||||
cc = uv_call_sched(0, (u64)&uvcb);
|
||||
*rc = uvcb.header.rc;
|
||||
*rrc = uvcb.header.rrc;
|
||||
return cc;
|
||||
}
|
||||
|
||||
/* Size of the cache for the storage state dump data. 1MB for now */
|
||||
#define DUMP_BUFF_LEN HPAGE_SIZE
|
||||
|
||||
/**
|
||||
* kvm_s390_pv_dump_stor_state
|
||||
*
|
||||
* @kvm: pointer to the guest's KVM struct
|
||||
* @buff_user: Userspace pointer where we will write the results to
|
||||
* @gaddr: Starting absolute guest address for which the storage state
|
||||
* is requested.
|
||||
* @buff_user_len: Length of the buff_user buffer
|
||||
* @rc: Pointer to where the uvcb return code is stored
|
||||
* @rrc: Pointer to where the uvcb return reason code is stored
|
||||
*
|
||||
* Stores buff_len bytes of tweak component values to buff_user
|
||||
* starting with the 1MB block specified by the absolute guest address
|
||||
* (gaddr). The gaddr pointer will be updated with the last address
|
||||
* for which data was written when returning to userspace. buff_user
|
||||
* might be written to even if an error rc is returned. For instance
|
||||
* if we encounter a fault after writing the first page of data.
|
||||
*
|
||||
* Context: kvm->lock needs to be held
|
||||
*
|
||||
* Return:
|
||||
* 0 on success
|
||||
* -ENOMEM if allocating the cache fails
|
||||
* -EINVAL if gaddr is not aligned to 1MB
|
||||
* -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
|
||||
* -EINVAL if the UV call fails, rc and rrc will be set in this case
|
||||
* -EFAULT if copying the result to buff_user failed
|
||||
*/
|
||||
int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
|
||||
u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct uv_cb_dump_stor_state uvcb = {
|
||||
.header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
|
||||
.header.len = sizeof(uvcb),
|
||||
.config_handle = kvm->arch.pv.handle,
|
||||
.gaddr = *gaddr,
|
||||
.dump_area_origin = 0,
|
||||
};
|
||||
const u64 increment_len = uv_info.conf_dump_storage_state_len;
|
||||
size_t buff_kvm_size;
|
||||
size_t size_done = 0;
|
||||
u8 *buff_kvm = NULL;
|
||||
int cc, ret;
|
||||
|
||||
ret = -EINVAL;
|
||||
/* UV call processes 1MB guest storage chunks at a time */
|
||||
if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* We provide the storage state for 1MB chunks of guest
|
||||
* storage. The buffer will need to be aligned to
|
||||
* conf_dump_storage_state_len so we don't end on a partial
|
||||
* chunk.
|
||||
*/
|
||||
if (!buff_user_len ||
|
||||
!IS_ALIGNED(buff_user_len, increment_len))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Allocate a buffer from which we will later copy to the user
|
||||
* process. We don't want userspace to dictate our buffer size
|
||||
* so we limit it to DUMP_BUFF_LEN.
|
||||
*/
|
||||
ret = -ENOMEM;
|
||||
buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
|
||||
buff_kvm = vzalloc(buff_kvm_size);
|
||||
if (!buff_kvm)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
uvcb.dump_area_origin = (u64)buff_kvm;
|
||||
/* We will loop until the user buffer is filled or an error occurs */
|
||||
do {
|
||||
/* Get 1MB worth of guest storage state data */
|
||||
cc = uv_call_sched(0, (u64)&uvcb);
|
||||
|
||||
/* All or nothing */
|
||||
if (cc) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
size_done += increment_len;
|
||||
uvcb.dump_area_origin += increment_len;
|
||||
buff_user_len -= increment_len;
|
||||
uvcb.gaddr += HPAGE_SIZE;
|
||||
|
||||
/* KVM Buffer full, time to copy to the process */
|
||||
if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
|
||||
if (copy_to_user(buff_user, buff_kvm, size_done)) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
buff_user += size_done;
|
||||
size_done = 0;
|
||||
uvcb.dump_area_origin = (u64)buff_kvm;
|
||||
}
|
||||
} while (buff_user_len);
|
||||
|
||||
/* Report back where we ended dumping */
|
||||
*gaddr = uvcb.gaddr;
|
||||
|
||||
/* Lets only log errors, we don't want to spam */
|
||||
out:
|
||||
if (ret)
|
||||
KVM_UV_EVENT(kvm, 3,
|
||||
"PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
|
||||
uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
|
||||
*rc = uvcb.header.rc;
|
||||
*rrc = uvcb.header.rrc;
|
||||
vfree(buff_kvm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_s390_pv_dump_complete
|
||||
*
|
||||
* @kvm: pointer to the guest's KVM struct
|
||||
* @buff_user: Userspace pointer where we will write the results to
|
||||
* @rc: Pointer to where the uvcb return code is stored
|
||||
* @rrc: Pointer to where the uvcb return reason code is stored
|
||||
*
|
||||
* Completes the dumping operation and writes the completion data to
|
||||
* user space.
|
||||
*
|
||||
* Context: kvm->lock needs to be held
|
||||
*
|
||||
* Return:
|
||||
* 0 on success
|
||||
* -ENOMEM if allocating the completion buffer fails
|
||||
* -EINVAL if the UV call fails, rc and rrc will be set in this case
|
||||
* -EFAULT if copying the result to buff_user failed
|
||||
*/
|
||||
int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
|
||||
u16 *rc, u16 *rrc)
|
||||
{
|
||||
struct uv_cb_dump_complete complete = {
|
||||
.header.len = sizeof(complete),
|
||||
.header.cmd = UVC_CMD_DUMP_COMPLETE,
|
||||
.config_handle = kvm_s390_pv_get_handle(kvm),
|
||||
};
|
||||
u64 *compl_data;
|
||||
int ret;
|
||||
|
||||
/* Allocate dump area */
|
||||
compl_data = vzalloc(uv_info.conf_dump_finalize_len);
|
||||
if (!compl_data)
|
||||
return -ENOMEM;
|
||||
complete.dump_area_origin = (u64)compl_data;
|
||||
|
||||
ret = uv_call_sched(0, (u64)&complete);
|
||||
*rc = complete.header.rc;
|
||||
*rrc = complete.header.rrc;
|
||||
KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
|
||||
complete.header.rc, complete.header.rrc);
|
||||
|
||||
if (!ret) {
|
||||
/*
|
||||
* kvm_s390_pv_dealloc_vm() will also (mem)set
|
||||
* this to false on a reboot or other destroy
|
||||
* operation for this vm.
|
||||
*/
|
||||
kvm->arch.pv.dumping = false;
|
||||
kvm_s390_vcpu_unblock_all(kvm);
|
||||
ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
|
||||
if (ret)
|
||||
ret = -EFAULT;
|
||||
}
|
||||
vfree(compl_data);
|
||||
/* If the UVC returned an error, translate it to -EINVAL */
|
||||
if (ret > 0)
|
||||
ret = -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
@ -480,9 +480,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
|
||||
struct kvm_vcpu *dest_vcpu;
|
||||
u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
|
||||
|
||||
trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
|
||||
|
||||
if (order_code == SIGP_EXTERNAL_CALL) {
|
||||
trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
|
||||
|
||||
dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
|
||||
BUG_ON(dest_vcpu == NULL);
|
||||
|
||||
|
@ -503,6 +503,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
|
||||
/* Host-protection-interruption introduced with ESOP */
|
||||
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
|
||||
scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
|
||||
/*
|
||||
* CPU Topology
|
||||
* This facility only uses the utility field of the SCA and none of
|
||||
* the cpu entries that are problematic with the other interpretation
|
||||
* facilities so we can pass it through
|
||||
*/
|
||||
if (test_kvm_facility(vcpu->kvm, 11))
|
||||
scb_s->ecb |= scb_o->ecb & ECB_PTF;
|
||||
/* transactional execution */
|
||||
if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
|
||||
/* remap the prefix is tx is toggled on */
|
||||
|
@ -754,6 +754,7 @@ void do_secure_storage_access(struct pt_regs *regs)
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm;
|
||||
struct page *page;
|
||||
struct gmap *gmap;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
@ -783,6 +784,17 @@ void do_secure_storage_access(struct pt_regs *regs)
|
||||
}
|
||||
|
||||
switch (get_fault_type(regs)) {
|
||||
case GMAP_FAULT:
|
||||
mm = current->mm;
|
||||
gmap = (struct gmap *)S390_lowcore.gmap;
|
||||
mmap_read_lock(mm);
|
||||
addr = __gmap_translate(gmap, addr);
|
||||
mmap_read_unlock(mm);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
do_fault_error(regs, VM_ACCESS_FLAGS, VM_FAULT_BADMAP);
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
case USER_FAULT:
|
||||
mm = current->mm;
|
||||
mmap_read_lock(mm);
|
||||
@ -811,7 +823,6 @@ void do_secure_storage_access(struct pt_regs *regs)
|
||||
if (rc)
|
||||
BUG();
|
||||
break;
|
||||
case GMAP_FAULT:
|
||||
default:
|
||||
do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
|
||||
WARN_ON_ONCE(1);
|
||||
@ -837,6 +848,16 @@ NOKPROBE_SYMBOL(do_non_secure_storage_access);
|
||||
|
||||
void do_secure_storage_violation(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
|
||||
struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
|
||||
|
||||
/*
|
||||
* If the VM has been rebooted, its address space might still contain
|
||||
* secure pages from the previous boot.
|
||||
* Clear the page so it can be reused.
|
||||
*/
|
||||
if (!gmap_destroy_page(gmap, gaddr))
|
||||
return;
|
||||
/*
|
||||
* Either KVM messed up the secure guest mapping or the same
|
||||
* page is mapped into multiple secure guests.
|
||||
|
@ -2697,41 +2697,168 @@ void s390_reset_cmma(struct mm_struct *mm)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(s390_reset_cmma);
|
||||
|
||||
/*
|
||||
* make inaccessible pages accessible again
|
||||
*/
|
||||
static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
|
||||
unsigned long next, struct mm_walk *walk)
|
||||
#define GATHER_GET_PAGES 32
|
||||
|
||||
struct reset_walk_state {
|
||||
unsigned long next;
|
||||
unsigned long count;
|
||||
unsigned long pfns[GATHER_GET_PAGES];
|
||||
};
|
||||
|
||||
static int s390_gather_pages(pte_t *ptep, unsigned long addr,
|
||||
unsigned long next, struct mm_walk *walk)
|
||||
{
|
||||
struct reset_walk_state *p = walk->private;
|
||||
pte_t pte = READ_ONCE(*ptep);
|
||||
|
||||
/* There is a reference through the mapping */
|
||||
if (pte_present(pte))
|
||||
WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK));
|
||||
if (pte_present(pte)) {
|
||||
/* we have a reference from the mapping, take an extra one */
|
||||
get_page(phys_to_page(pte_val(pte)));
|
||||
p->pfns[p->count] = phys_to_pfn(pte_val(pte));
|
||||
p->next = next;
|
||||
p->count++;
|
||||
}
|
||||
return p->count >= GATHER_GET_PAGES;
|
||||
}
|
||||
|
||||
static const struct mm_walk_ops gather_pages_ops = {
|
||||
.pte_entry = s390_gather_pages,
|
||||
};
|
||||
|
||||
/*
|
||||
* Call the Destroy secure page UVC on each page in the given array of PFNs.
|
||||
* Each page needs to have an extra reference, which will be released here.
|
||||
*/
|
||||
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
/* we always have an extra reference */
|
||||
uv_destroy_owned_page(pfn_to_phys(pfns[i]));
|
||||
/* get rid of the extra reference */
|
||||
put_page(pfn_to_page(pfns[i]));
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(s390_uv_destroy_pfns);
|
||||
|
||||
/**
|
||||
* __s390_uv_destroy_range - Call the destroy secure page UVC on each page
|
||||
* in the given range of the given address space.
|
||||
* @mm: the mm to operate on
|
||||
* @start: the start of the range
|
||||
* @end: the end of the range
|
||||
* @interruptible: if not 0, stop when a fatal signal is received
|
||||
*
|
||||
* Walk the given range of the given address space and call the destroy
|
||||
* secure page UVC on each page. Optionally exit early if a fatal signal is
|
||||
* pending.
|
||||
*
|
||||
* Return: 0 on success, -EINTR if the function stopped before completing
|
||||
*/
|
||||
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
||||
unsigned long end, bool interruptible)
|
||||
{
|
||||
struct reset_walk_state state = { .next = start };
|
||||
int r = 1;
|
||||
|
||||
while (r > 0) {
|
||||
state.count = 0;
|
||||
mmap_read_lock(mm);
|
||||
r = walk_page_range(mm, state.next, end, &gather_pages_ops, &state);
|
||||
mmap_read_unlock(mm);
|
||||
cond_resched();
|
||||
s390_uv_destroy_pfns(state.count, state.pfns);
|
||||
if (interruptible && fatal_signal_pending(current))
|
||||
return -EINTR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
|
||||
|
||||
/**
|
||||
* s390_unlist_old_asce - Remove the topmost level of page tables from the
|
||||
* list of page tables of the gmap.
|
||||
* @gmap: the gmap whose table is to be removed
|
||||
*
|
||||
* On s390x, KVM keeps a list of all pages containing the page tables of the
|
||||
* gmap (the CRST list). This list is used at tear down time to free all
|
||||
* pages that are now not needed anymore.
|
||||
*
|
||||
* This function removes the topmost page of the tree (the one pointed to by
|
||||
* the ASCE) from the CRST list.
|
||||
*
|
||||
* This means that it will not be freed when the VM is torn down, and needs
|
||||
* to be handled separately by the caller, unless a leak is actually
|
||||
* intended. Notice that this function will only remove the page from the
|
||||
* list, the page will still be used as a top level page table (and ASCE).
|
||||
*/
|
||||
void s390_unlist_old_asce(struct gmap *gmap)
|
||||
{
|
||||
struct page *old;
|
||||
|
||||
old = virt_to_page(gmap->table);
|
||||
spin_lock(&gmap->guest_table_lock);
|
||||
list_del(&old->lru);
|
||||
/*
|
||||
* Sometimes the topmost page might need to be "removed" multiple
|
||||
* times, for example if the VM is rebooted into secure mode several
|
||||
* times concurrently, or if s390_replace_asce fails after calling
|
||||
* s390_remove_old_asce and is attempted again later. In that case
|
||||
* the old asce has been removed from the list, and therefore it
|
||||
* will not be freed when the VM terminates, but the ASCE is still
|
||||
* in use and still pointed to.
|
||||
* A subsequent call to replace_asce will follow the pointer and try
|
||||
* to remove the same page from the list again.
|
||||
* Therefore it's necessary that the page of the ASCE has valid
|
||||
* pointers, so list_del can work (and do nothing) without
|
||||
* dereferencing stale or invalid pointers.
|
||||
*/
|
||||
INIT_LIST_HEAD(&old->lru);
|
||||
spin_unlock(&gmap->guest_table_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
|
||||
|
||||
/**
|
||||
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
|
||||
* @gmap: the gmap whose ASCE needs to be replaced
|
||||
*
|
||||
* If the allocation of the new top level page table fails, the ASCE is not
|
||||
* replaced.
|
||||
* In any case, the old ASCE is always removed from the gmap CRST list.
|
||||
* Therefore the caller has to make sure to save a pointer to it
|
||||
* beforehand, unless a leak is actually intended.
|
||||
*/
|
||||
int s390_replace_asce(struct gmap *gmap)
|
||||
{
|
||||
unsigned long asce;
|
||||
struct page *page;
|
||||
void *table;
|
||||
|
||||
s390_unlist_old_asce(gmap);
|
||||
|
||||
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
table = page_to_virt(page);
|
||||
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
|
||||
|
||||
/*
|
||||
* The caller has to deal with the old ASCE, but here we make sure
|
||||
* the new one is properly added to the CRST list, so that
|
||||
* it will be freed when the VM is torn down.
|
||||
*/
|
||||
spin_lock(&gmap->guest_table_lock);
|
||||
list_add(&page->lru, &gmap->crst_list);
|
||||
spin_unlock(&gmap->guest_table_lock);
|
||||
|
||||
/* Set new table origin while preserving existing ASCE control bits */
|
||||
asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
|
||||
WRITE_ONCE(gmap->asce, asce);
|
||||
WRITE_ONCE(gmap->mm->context.gmap_asce, asce);
|
||||
WRITE_ONCE(gmap->table, table);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct mm_walk_ops reset_acc_walk_ops = {
|
||||
.pte_entry = __s390_reset_acc,
|
||||
};
|
||||
|
||||
#include <linux/sched/mm.h>
|
||||
void s390_reset_acc(struct mm_struct *mm)
|
||||
{
|
||||
if (!mm_is_protected(mm))
|
||||
return;
|
||||
/*
|
||||
* we might be called during
|
||||
* reset: we walk the pages and clear
|
||||
* close of all kvm file descriptors: we walk the pages and clear
|
||||
* exit of process on fd closure: vma already gone, do nothing
|
||||
*/
|
||||
if (!mmget_not_zero(mm))
|
||||
return;
|
||||
mmap_read_lock(mm);
|
||||
walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(s390_reset_acc);
|
||||
EXPORT_SYMBOL_GPL(s390_replace_asce);
|
||||
|
@ -61,6 +61,12 @@ DEFINE_STATIC_KEY_FALSE(have_mio);
|
||||
|
||||
static struct kmem_cache *zdev_fmb_cache;
|
||||
|
||||
/* AEN structures that must be preserved over KVM module re-insertion */
|
||||
union zpci_sic_iib *zpci_aipb;
|
||||
EXPORT_SYMBOL_GPL(zpci_aipb);
|
||||
struct airq_iv *zpci_aif_sbv;
|
||||
EXPORT_SYMBOL_GPL(zpci_aif_sbv);
|
||||
|
||||
struct zpci_dev *get_zdev_by_fid(u32 fid)
|
||||
{
|
||||
struct zpci_dev *tmp, *zdev = NULL;
|
||||
@ -120,11 +126,13 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
|
||||
fib.pba = base;
|
||||
fib.pal = limit;
|
||||
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
|
||||
fib.gd = zdev->gisa;
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc)
|
||||
zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
|
||||
return cc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zpci_register_ioat);
|
||||
|
||||
/* Modify PCI: Unregister I/O address translation parameters */
|
||||
int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
|
||||
@ -133,6 +141,8 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
|
||||
struct zpci_fib fib = {0};
|
||||
u8 cc, status;
|
||||
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc)
|
||||
zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
|
||||
@ -160,6 +170,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
|
||||
atomic64_set(&zdev->unmapped_pages, 0);
|
||||
|
||||
fib.fmb_addr = virt_to_phys(zdev->fmb);
|
||||
fib.gd = zdev->gisa;
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc) {
|
||||
kmem_cache_free(zdev_fmb_cache, zdev->fmb);
|
||||
@ -178,6 +189,8 @@ int zpci_fmb_disable_device(struct zpci_dev *zdev)
|
||||
if (!zdev->fmb)
|
||||
return -EINVAL;
|
||||
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
/* Function measurement is disabled if fmb address is zero */
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc == 3) /* Function already gone. */
|
||||
@ -700,6 +713,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
|
||||
zpci_update_fh(zdev, fh);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zpci_enable_device);
|
||||
|
||||
int zpci_disable_device(struct zpci_dev *zdev)
|
||||
{
|
||||
@ -723,6 +737,7 @@ int zpci_disable_device(struct zpci_dev *zdev)
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zpci_disable_device);
|
||||
|
||||
/**
|
||||
* zpci_hot_reset_device - perform a reset of the given zPCI function
|
||||
@ -816,6 +831,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
|
||||
|
||||
kref_init(&zdev->kref);
|
||||
mutex_init(&zdev->lock);
|
||||
mutex_init(&zdev->kzdev_lock);
|
||||
|
||||
rc = zpci_init_iommu(zdev);
|
||||
if (rc)
|
||||
|
@ -106,6 +106,8 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
|
||||
zdev->max_msi = response->noi;
|
||||
zdev->fmb_update = response->mui;
|
||||
zdev->version = response->version;
|
||||
zdev->maxstbl = response->maxstbl;
|
||||
zdev->dtsm = response->dtsm;
|
||||
|
||||
switch (response->version) {
|
||||
case 1:
|
||||
@ -229,12 +231,16 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 comma
|
||||
{
|
||||
struct clp_req_rsp_set_pci *rrb;
|
||||
int rc, retries = 100;
|
||||
u32 gisa = 0;
|
||||
|
||||
*fh = 0;
|
||||
rrb = clp_alloc_block(GFP_KERNEL);
|
||||
if (!rrb)
|
||||
return -ENOMEM;
|
||||
|
||||
if (command != CLP_SET_DISABLE_PCI_FN)
|
||||
gisa = zdev->gisa;
|
||||
|
||||
do {
|
||||
memset(rrb, 0, sizeof(*rrb));
|
||||
rrb->request.hdr.len = sizeof(rrb->request);
|
||||
@ -243,6 +249,7 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 comma
|
||||
rrb->request.fh = zdev->fh;
|
||||
rrb->request.oc = command;
|
||||
rrb->request.ndas = nr_dma_as;
|
||||
rrb->request.gisa = gisa;
|
||||
|
||||
rc = clp_req(rrb, CLP_LPS_PCI);
|
||||
if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
|
||||
|
@ -92,6 +92,7 @@ u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status)
|
||||
|
||||
return cc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zpci_mod_fc);
|
||||
|
||||
/* Refresh PCI Translations */
|
||||
static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
|
||||
@ -138,7 +139,7 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
|
||||
}
|
||||
|
||||
/* Set Interruption Controls */
|
||||
int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
|
||||
int zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
|
||||
{
|
||||
if (!test_facility(72))
|
||||
return -EIO;
|
||||
@ -149,6 +150,7 @@ int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(zpci_set_irq_ctrl);
|
||||
|
||||
/* PCI Load */
|
||||
static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
|
||||
|
@ -11,16 +11,10 @@
|
||||
|
||||
#include <asm/isc.h>
|
||||
#include <asm/airq.h>
|
||||
#include <asm/tpi.h>
|
||||
|
||||
static enum {FLOATING, DIRECTED} irq_delivery;
|
||||
|
||||
#define SIC_IRQ_MODE_ALL 0
|
||||
#define SIC_IRQ_MODE_SINGLE 1
|
||||
#define SIC_IRQ_MODE_DIRECT 4
|
||||
#define SIC_IRQ_MODE_D_ALL 16
|
||||
#define SIC_IRQ_MODE_D_SINGLE 17
|
||||
#define SIC_IRQ_MODE_SET_CPU 18
|
||||
|
||||
/*
|
||||
* summary bit vector
|
||||
* FLOATING - summary bit per function
|
||||
@ -49,6 +43,7 @@ static int zpci_set_airq(struct zpci_dev *zdev)
|
||||
fib.fmt0.aibvo = 0; /* each zdev has its own interrupt vector */
|
||||
fib.fmt0.aisb = virt_to_phys(zpci_sbv->vector) + (zdev->aisb / 64) * 8;
|
||||
fib.fmt0.aisbo = zdev->aisb & 63;
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
|
||||
}
|
||||
@ -60,6 +55,8 @@ static int zpci_clear_airq(struct zpci_dev *zdev)
|
||||
struct zpci_fib fib = {0};
|
||||
u8 cc, status;
|
||||
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc == 3 || (cc == 1 && status == 24))
|
||||
/* Function already gone or IRQs already deregistered. */
|
||||
@ -78,6 +75,7 @@ static int zpci_set_directed_irq(struct zpci_dev *zdev)
|
||||
fib.fmt = 1;
|
||||
fib.fmt1.noi = zdev->msi_nr_irqs;
|
||||
fib.fmt1.dibvo = zdev->msi_first_bit;
|
||||
fib.gd = zdev->gisa;
|
||||
|
||||
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
|
||||
}
|
||||
@ -90,6 +88,7 @@ static int zpci_clear_directed_irq(struct zpci_dev *zdev)
|
||||
u8 cc, status;
|
||||
|
||||
fib.fmt = 1;
|
||||
fib.gd = zdev->gisa;
|
||||
cc = zpci_mod_fc(req, &fib, &status);
|
||||
if (cc == 3 || (cc == 1 && status == 24))
|
||||
/* Function already gone or IRQs already deregistered. */
|
||||
@ -153,6 +152,7 @@ static struct irq_chip zpci_irq_chip = {
|
||||
static void zpci_handle_cpu_local_irq(bool rescan)
|
||||
{
|
||||
struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
unsigned long bit;
|
||||
int irqs_on = 0;
|
||||
|
||||
@ -164,7 +164,7 @@ static void zpci_handle_cpu_local_irq(bool rescan)
|
||||
/* End of second scan with interrupts on. */
|
||||
break;
|
||||
/* First scan complete, reenable interrupts. */
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC))
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &iib))
|
||||
break;
|
||||
bit = 0;
|
||||
continue;
|
||||
@ -192,6 +192,7 @@ static void zpci_handle_remote_irq(void *data)
|
||||
static void zpci_handle_fallback_irq(void)
|
||||
{
|
||||
struct cpu_irq_data *cpu_data;
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
unsigned long cpu;
|
||||
int irqs_on = 0;
|
||||
|
||||
@ -202,7 +203,7 @@ static void zpci_handle_fallback_irq(void)
|
||||
/* End of second scan with interrupts on. */
|
||||
break;
|
||||
/* First scan complete, reenable interrupts. */
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
|
||||
break;
|
||||
cpu = 0;
|
||||
continue;
|
||||
@ -216,8 +217,11 @@ static void zpci_handle_fallback_irq(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
|
||||
static void zpci_directed_irq_handler(struct airq_struct *airq,
|
||||
struct tpi_info *tpi_info)
|
||||
{
|
||||
bool floating = !tpi_info->directed_irq;
|
||||
|
||||
if (floating) {
|
||||
inc_irq_stat(IRQIO_PCF);
|
||||
zpci_handle_fallback_irq();
|
||||
@ -227,8 +231,10 @@ static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
|
||||
}
|
||||
}
|
||||
|
||||
static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
|
||||
static void zpci_floating_irq_handler(struct airq_struct *airq,
|
||||
struct tpi_info *tpi_info)
|
||||
{
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
unsigned long si, ai;
|
||||
struct airq_iv *aibv;
|
||||
int irqs_on = 0;
|
||||
@ -242,7 +248,7 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
|
||||
/* End of second scan with interrupts on. */
|
||||
break;
|
||||
/* First scan complete, reenable interrupts. */
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
|
||||
if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib))
|
||||
break;
|
||||
si = 0;
|
||||
continue;
|
||||
@ -291,7 +297,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
|
||||
zdev->aisb = bit;
|
||||
|
||||
/* Create adapter interrupt vector */
|
||||
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
|
||||
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
|
||||
if (!zdev->aibv)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -402,11 +408,12 @@ static struct airq_struct zpci_airq = {
|
||||
static void __init cpu_enable_directed_irq(void *unused)
|
||||
{
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
union zpci_sic_iib ziib = {{0}};
|
||||
|
||||
iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
|
||||
|
||||
__zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC);
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
|
||||
}
|
||||
|
||||
static int __init zpci_directed_irq_init(void)
|
||||
@ -414,14 +421,14 @@ static int __init zpci_directed_irq_init(void)
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
unsigned int cpu;
|
||||
|
||||
zpci_sbv = airq_iv_create(num_possible_cpus(), 0);
|
||||
zpci_sbv = airq_iv_create(num_possible_cpus(), 0, NULL);
|
||||
if (!zpci_sbv)
|
||||
return -ENOMEM;
|
||||
|
||||
iib.diib.isc = PCI_ISC;
|
||||
iib.diib.nr_cpus = num_possible_cpus();
|
||||
iib.diib.disb_addr = virt_to_phys(zpci_sbv->vector);
|
||||
__zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
|
||||
|
||||
zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
|
||||
GFP_KERNEL);
|
||||
@ -436,7 +443,7 @@ static int __init zpci_directed_irq_init(void)
|
||||
zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
|
||||
AIRQ_IV_DATA |
|
||||
AIRQ_IV_CACHELINE |
|
||||
(!cpu ? AIRQ_IV_ALLOC : 0));
|
||||
(!cpu ? AIRQ_IV_ALLOC : 0), NULL);
|
||||
if (!zpci_ibv[cpu])
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -453,7 +460,7 @@ static int __init zpci_floating_irq_init(void)
|
||||
if (!zpci_ibv)
|
||||
return -ENOMEM;
|
||||
|
||||
zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
|
||||
zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
|
||||
if (!zpci_sbv)
|
||||
goto out_free;
|
||||
|
||||
@ -466,6 +473,7 @@ out_free:
|
||||
|
||||
int __init zpci_irq_init(void)
|
||||
{
|
||||
union zpci_sic_iib iib = {{0}};
|
||||
int rc;
|
||||
|
||||
irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
|
||||
@ -497,7 +505,7 @@ int __init zpci_irq_init(void)
|
||||
* Enable floating IRQs (with suppression after one IRQ). When using
|
||||
* directed IRQs this enables the fallback path.
|
||||
*/
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC);
|
||||
zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC, &iib);
|
||||
|
||||
return 0;
|
||||
out_airq:
|
||||
|
@ -111,6 +111,7 @@ static struct facility_def facility_defs[] = {
|
||||
193, /* bear enhancement facility */
|
||||
194, /* rdp enhancement facility */
|
||||
196, /* processor activity instrumentation facility */
|
||||
197, /* processor activity instrumentation extension 1 */
|
||||
-1 /* END */
|
||||
}
|
||||
},
|
||||
|
@ -693,9 +693,9 @@ void x86_pmu_disable_all(void)
|
||||
}
|
||||
}
|
||||
|
||||
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
|
||||
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data)
|
||||
{
|
||||
return static_call(x86_pmu_guest_get_msrs)(nr);
|
||||
return static_call(x86_pmu_guest_get_msrs)(nr, data);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
|
||||
|
||||
@ -2103,14 +2103,15 @@ static int __init init_hw_perf_events(void)
|
||||
}
|
||||
if (err != 0) {
|
||||
pr_cont("no PMU driver, software events only.\n");
|
||||
return 0;
|
||||
err = 0;
|
||||
goto out_bad_pmu;
|
||||
}
|
||||
|
||||
pmu_check_apic();
|
||||
|
||||
/* sanity check that the hardware exists or is emulated */
|
||||
if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed))
|
||||
return 0;
|
||||
goto out_bad_pmu;
|
||||
|
||||
pr_cont("%s PMU driver.\n", x86_pmu.name);
|
||||
|
||||
@ -2219,6 +2220,8 @@ out1:
|
||||
cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
|
||||
out:
|
||||
cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
|
||||
out_bad_pmu:
|
||||
memset(&x86_pmu, 0, sizeof(x86_pmu));
|
||||
return err;
|
||||
}
|
||||
early_initcall(init_hw_perf_events);
|
||||
@ -2990,6 +2993,11 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
|
||||
void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
|
||||
{
|
||||
if (!x86_pmu_initialized()) {
|
||||
memset(cap, 0, sizeof(*cap));
|
||||
return;
|
||||
}
|
||||
|
||||
cap->version = x86_pmu.version;
|
||||
/*
|
||||
* KVM doesn't support the hybrid PMU yet.
|
||||
@ -3002,5 +3010,17 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
|
||||
cap->bit_width_fixed = x86_pmu.cntval_bits;
|
||||
cap->events_mask = (unsigned int)x86_pmu.events_maskl;
|
||||
cap->events_mask_len = x86_pmu.events_mask_len;
|
||||
cap->pebs_ept = x86_pmu.pebs_ept;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
|
||||
|
||||
u64 perf_get_hw_event_config(int hw_event)
|
||||
{
|
||||
int max = x86_pmu.max_events;
|
||||
|
||||
if (hw_event < max)
|
||||
return x86_pmu.event_map(array_index_nospec(hw_event, max));
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_get_hw_event_config);
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/hardirq.h>
|
||||
@ -2852,6 +2853,47 @@ static void intel_pmu_reset(void)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* We may be running with guest PEBS events created by KVM, and the
|
||||
* PEBS records are logged into the guest's DS and invisible to host.
|
||||
*
|
||||
* In the case of guest PEBS overflow, we only trigger a fake event
|
||||
* to emulate the PEBS overflow PMI for guest PEBS counters in KVM.
|
||||
* The guest will then vm-entry and check the guest DS area to read
|
||||
* the guest PEBS records.
|
||||
*
|
||||
* The contents and other behavior of the guest event do not matter.
|
||||
*/
|
||||
static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
u64 guest_pebs_idxs = cpuc->pebs_enabled & ~cpuc->intel_ctrl_host_mask;
|
||||
struct perf_event *event = NULL;
|
||||
int bit;
|
||||
|
||||
if (!unlikely(perf_guest_state()))
|
||||
return;
|
||||
|
||||
if (!x86_pmu.pebs_ept || !x86_pmu.pebs_active ||
|
||||
!guest_pebs_idxs)
|
||||
return;
|
||||
|
||||
for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
|
||||
INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
|
||||
event = cpuc->events[bit];
|
||||
if (!event->attr.precise_ip)
|
||||
continue;
|
||||
|
||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||
if (perf_event_overflow(event, data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
|
||||
/* Inject one fake event is enough. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
@ -2891,10 +2933,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||
* counters from the GLOBAL_STATUS mask and we always process PEBS
|
||||
* events via drain_pebs().
|
||||
*/
|
||||
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
|
||||
status &= ~cpuc->pebs_enabled;
|
||||
else
|
||||
status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
|
||||
status &= ~(cpuc->pebs_enabled & x86_pmu.pebs_capable);
|
||||
|
||||
/*
|
||||
* PEBS overflow sets bit 62 in the global status register
|
||||
@ -2903,6 +2942,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||
u64 pebs_enabled = cpuc->pebs_enabled;
|
||||
|
||||
handled++;
|
||||
x86_pmu_handle_guest_pebs(regs, &data);
|
||||
x86_pmu.drain_pebs(regs, &data);
|
||||
status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
|
||||
|
||||
@ -3930,40 +3970,98 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
|
||||
/*
|
||||
* Currently, the only caller of this function is the atomic_switch_perf_msrs().
|
||||
* The host perf conext helps to prepare the values of the real hardware for
|
||||
* a set of msrs that need to be switched atomically in a vmx transaction.
|
||||
*
|
||||
* For example, the pseudocode needed to add a new msr should look like:
|
||||
*
|
||||
* arr[(*nr)++] = (struct perf_guest_switch_msr){
|
||||
* .msr = the hardware msr address,
|
||||
* .host = the value the hardware has when it doesn't run a guest,
|
||||
* .guest = the value the hardware has when it runs a guest,
|
||||
* };
|
||||
*
|
||||
* These values have nothing to do with the emulated values the guest sees
|
||||
* when it uses {RD,WR}MSR, which should be handled by the KVM context,
|
||||
* specifically in the intel_pmu_{get,set}_msr().
|
||||
*/
|
||||
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
|
||||
struct kvm_pmu *kvm_pmu = (struct kvm_pmu *)data;
|
||||
u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
|
||||
u64 pebs_mask = cpuc->pebs_enabled & x86_pmu.pebs_capable;
|
||||
int global_ctrl, pebs_enable;
|
||||
|
||||
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
|
||||
arr[0].host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
|
||||
arr[0].guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask;
|
||||
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
|
||||
arr[0].guest &= ~cpuc->pebs_enabled;
|
||||
else
|
||||
arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
|
||||
*nr = 1;
|
||||
*nr = 0;
|
||||
global_ctrl = (*nr)++;
|
||||
arr[global_ctrl] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_CORE_PERF_GLOBAL_CTRL,
|
||||
.host = intel_ctrl & ~cpuc->intel_ctrl_guest_mask,
|
||||
.guest = intel_ctrl & (~cpuc->intel_ctrl_host_mask | ~pebs_mask),
|
||||
};
|
||||
|
||||
if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
|
||||
/*
|
||||
* If PMU counter has PEBS enabled it is not enough to
|
||||
* disable counter on a guest entry since PEBS memory
|
||||
* write can overshoot guest entry and corrupt guest
|
||||
* memory. Disabling PEBS solves the problem.
|
||||
*
|
||||
* Don't do this if the CPU already enforces it.
|
||||
*/
|
||||
arr[1].msr = MSR_IA32_PEBS_ENABLE;
|
||||
arr[1].host = cpuc->pebs_enabled;
|
||||
arr[1].guest = 0;
|
||||
*nr = 2;
|
||||
if (!x86_pmu.pebs)
|
||||
return arr;
|
||||
|
||||
/*
|
||||
* If PMU counter has PEBS enabled it is not enough to
|
||||
* disable counter on a guest entry since PEBS memory
|
||||
* write can overshoot guest entry and corrupt guest
|
||||
* memory. Disabling PEBS solves the problem.
|
||||
*
|
||||
* Don't do this if the CPU already enforces it.
|
||||
*/
|
||||
if (x86_pmu.pebs_no_isolation) {
|
||||
arr[(*nr)++] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_IA32_PEBS_ENABLE,
|
||||
.host = cpuc->pebs_enabled,
|
||||
.guest = 0,
|
||||
};
|
||||
return arr;
|
||||
}
|
||||
|
||||
if (!kvm_pmu || !x86_pmu.pebs_ept)
|
||||
return arr;
|
||||
|
||||
arr[(*nr)++] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_IA32_DS_AREA,
|
||||
.host = (unsigned long)cpuc->ds,
|
||||
.guest = kvm_pmu->ds_area,
|
||||
};
|
||||
|
||||
if (x86_pmu.intel_cap.pebs_baseline) {
|
||||
arr[(*nr)++] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_PEBS_DATA_CFG,
|
||||
.host = cpuc->pebs_data_cfg,
|
||||
.guest = kvm_pmu->pebs_data_cfg,
|
||||
};
|
||||
}
|
||||
|
||||
pebs_enable = (*nr)++;
|
||||
arr[pebs_enable] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_IA32_PEBS_ENABLE,
|
||||
.host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask,
|
||||
.guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
|
||||
};
|
||||
|
||||
if (arr[pebs_enable].host) {
|
||||
/* Disable guest PEBS if host PEBS is enabled. */
|
||||
arr[pebs_enable].guest = 0;
|
||||
} else {
|
||||
/* Disable guest PEBS for cross-mapped PEBS counters. */
|
||||
arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask;
|
||||
/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
|
||||
arr[global_ctrl].guest |= arr[pebs_enable].guest;
|
||||
}
|
||||
|
||||
return arr;
|
||||
}
|
||||
|
||||
static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
|
||||
static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
|
||||
@ -5650,6 +5748,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.events_mask_len = eax.split.mask_length;
|
||||
|
||||
x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
|
||||
x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
|
||||
|
||||
/*
|
||||
* Quirk: v2 perfmon does not report fixed-purpose events, so
|
||||
@ -5834,6 +5933,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.pebs_capable = ~0ULL;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_PEBS_ALL;
|
||||
x86_pmu.get_event_constraints = glp_get_event_constraints;
|
||||
@ -6138,6 +6238,7 @@ __init int intel_pmu_init(void)
|
||||
|
||||
case INTEL_FAM6_ICELAKE_X:
|
||||
case INTEL_FAM6_ICELAKE_D:
|
||||
x86_pmu.pebs_ept = 1;
|
||||
pmem = true;
|
||||
fallthrough;
|
||||
case INTEL_FAM6_ICELAKE_L:
|
||||
@ -6190,6 +6291,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.pebs_capable = ~0ULL;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
x86_pmu.flags |= PMU_FL_PEBS_ALL;
|
||||
@ -6235,6 +6337,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.pebs_capable = ~0ULL;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
x86_pmu.flags |= PMU_FL_PEBS_ALL;
|
||||
@ -6399,8 +6502,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.intel_ctrl);
|
||||
/*
|
||||
* Access LBR MSR may cause #GP under certain circumstances.
|
||||
* E.g. KVM doesn't support LBR MSR
|
||||
* Check all LBT MSR here.
|
||||
* Check all LBR MSR here.
|
||||
* Disable LBR access if any LBR MSRs can not be accessed.
|
||||
*/
|
||||
if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL))
|
||||
|
@ -828,7 +828,8 @@ struct x86_pmu {
|
||||
pebs_prec_dist :1,
|
||||
pebs_no_tlb :1,
|
||||
pebs_no_isolation :1,
|
||||
pebs_block :1;
|
||||
pebs_block :1,
|
||||
pebs_ept :1;
|
||||
int pebs_record_size;
|
||||
int pebs_buffer_size;
|
||||
int max_pebs_events;
|
||||
@ -838,6 +839,7 @@ struct x86_pmu {
|
||||
u64 (*pebs_latency_data)(struct perf_event *event, u64 status);
|
||||
unsigned long large_pebs_flags;
|
||||
u64 rtm_abort_event;
|
||||
u64 pebs_capable;
|
||||
|
||||
/*
|
||||
* Intel LBR
|
||||
@ -913,7 +915,7 @@ struct x86_pmu {
|
||||
/*
|
||||
* Intel host/guest support (KVM)
|
||||
*/
|
||||
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
|
||||
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr, void *data);
|
||||
|
||||
/*
|
||||
* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
|
||||
|
@ -46,7 +46,7 @@ static void hv_apic_icr_write(u32 low, u32 id)
|
||||
{
|
||||
u64 reg_val;
|
||||
|
||||
reg_val = SET_APIC_DEST_FIELD(id);
|
||||
reg_val = SET_XAPIC_DEST_FIELD(id);
|
||||
reg_val = reg_val << 32;
|
||||
reg_val |= low;
|
||||
|
||||
|
@ -89,8 +89,8 @@
|
||||
#define APIC_DM_EXTINT 0x00700
|
||||
#define APIC_VECTOR_MASK 0x000FF
|
||||
#define APIC_ICR2 0x310
|
||||
#define GET_APIC_DEST_FIELD(x) (((x) >> 24) & 0xFF)
|
||||
#define SET_APIC_DEST_FIELD(x) ((x) << 24)
|
||||
#define GET_XAPIC_DEST_FIELD(x) (((x) >> 24) & 0xFF)
|
||||
#define SET_XAPIC_DEST_FIELD(x) ((x) << 24)
|
||||
#define APIC_LVTT 0x320
|
||||
#define APIC_LVTTHMR 0x330
|
||||
#define APIC_LVTPC 0x340
|
||||
|
@ -353,6 +353,7 @@
|
||||
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
|
||||
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
|
||||
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
|
||||
#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
|
||||
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
|
||||
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user