ARM:
- Add MTE support in guests, complete with tag save/restore interface - Reduce the impact of CMOs by moving them in the page-table code - Allow device block mappings at stage-2 - Reduce the footprint of the vmemmap in protected mode - Support the vGIC on dumb systems such as the Apple M1 - Add selftest infrastructure to support multiple configuration and apply that to PMU/non-PMU setups - Add selftests for the debug architecture - The usual crop of PMU fixes PPC: - Support for the H_RPT_INVALIDATE hypercall - Conversion of Book3S entry/exit to C - Bug fixes S390: - new HW facilities for guests - make inline assembly more robust with KASAN and co x86: - Allow userspace to handle emulation errors (unknown instructions) - Lazy allocation of the rmap (host physical -> guest physical address) - Support for virtualizing TSC scaling on VMX machines - Optimizations to avoid shattering huge pages at the beginning of live migration - Support for initializing the PDPTRs without loading them from memory - Many TLB flushing cleanups - Refuse to load if two-stage paging is available but NX is not (this has been a requirement in practice for over a year) - A large series that separates the MMU mode (WP/SMAP/SMEP etc.) from CR0/CR4/EFER, using the MMU mode everywhere once it is computed from the CPU registers - Use PM notifier to notify the guest about host suspend or hibernate - Support for passing arguments to Hyper-V hypercalls using XMM registers - Support for Hyper-V TLB flush hypercalls and enlightened MSR bitmap on AMD processors - Hide Hyper-V hypercalls that are not included in the guest CPUID - Fixes for live migration of virtual machines that use the Hyper-V "enlightened VMCS" optimization of nested virtualization - Bugfixes (not many) Generic: - Support for retrieving statistics without debugfs - Cleanups for the KVM selftests API -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmDV9UYUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroOIRgf/XX8fKLh24RnTOs2ldIu2AfRGVrT4 QMrr8MxhmtukBAszk2xKvBt8/6gkUjdaIC3xqEnVjxaDaUvZaEtP7CQlF5JV45rn iv1zyxUKucXrnIOr+gCioIT7qBlh207zV35ArKioP9Y83cWx9uAs22pfr6g+7RxO h8bJZlJbSG6IGr3voANCIb9UyjU1V/l8iEHqRwhmr/A5rARPfD7g8lfMEQeGkzX6 +/UydX2fumB3tl8e2iMQj6vLVdSOsCkehvpHK+Z33EpkKhan7GwZ2sZ05WmXV/nY QLAYfD10KegoNWl5Ay4GTp4hEAIYVrRJCLC+wnLdc0U8udbfCuTC31LK4w== =NcRh -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm updates from Paolo Bonzini: "This covers all architectures (except MIPS) so I don't expect any other feature pull requests this merge window. ARM: - Add MTE support in guests, complete with tag save/restore interface - Reduce the impact of CMOs by moving them in the page-table code - Allow device block mappings at stage-2 - Reduce the footprint of the vmemmap in protected mode - Support the vGIC on dumb systems such as the Apple M1 - Add selftest infrastructure to support multiple configuration and apply that to PMU/non-PMU setups - Add selftests for the debug architecture - The usual crop of PMU fixes PPC: - Support for the H_RPT_INVALIDATE hypercall - Conversion of Book3S entry/exit to C - Bug fixes S390: - new HW facilities for guests - make inline assembly more robust with KASAN and co x86: - Allow userspace to handle emulation errors (unknown instructions) - Lazy allocation of the rmap (host physical -> guest physical address) - Support for virtualizing TSC scaling on VMX machines - Optimizations to avoid shattering huge pages at the beginning of live migration - Support for initializing the PDPTRs without loading them from memory - Many TLB flushing cleanups - Refuse to load if two-stage paging is available but NX is not (this has been a requirement in practice for over a year) - A large series that separates the MMU mode (WP/SMAP/SMEP etc.) from CR0/CR4/EFER, using the MMU mode everywhere once it is computed from the CPU registers - Use PM notifier to notify the guest about host suspend or hibernate - Support for passing arguments to Hyper-V hypercalls using XMM registers - Support for Hyper-V TLB flush hypercalls and enlightened MSR bitmap on AMD processors - Hide Hyper-V hypercalls that are not included in the guest CPUID - Fixes for live migration of virtual machines that use the Hyper-V "enlightened VMCS" optimization of nested virtualization - Bugfixes (not many) Generic: - Support for retrieving statistics without debugfs - Cleanups for the KVM selftests API" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (314 commits) KVM: x86: rename apic_access_page_done to apic_access_memslot_enabled kvm: x86: disable the narrow guest module parameter on unload selftests: kvm: Allows userspace to handle emulation errors. kvm: x86: Allow userspace to handle emulation errors KVM: x86/mmu: Let guest use GBPAGES if supported in hardware and TDP is on KVM: x86/mmu: Get CR4.SMEP from MMU, not vCPU, in shadow page fault KVM: x86/mmu: Get CR0.WP from MMU, not vCPU, in shadow page fault KVM: x86/mmu: Drop redundant rsvd bits reset for nested NPT KVM: x86/mmu: Optimize and clean up so called "last nonleaf level" logic KVM: x86: Enhance comments for MMU roles and nested transition trickiness KVM: x86/mmu: WARN on any reserved SPTE value when making a valid SPTE KVM: x86/mmu: Add helpers to do full reserved SPTE checks w/ generic MMU KVM: x86/mmu: Use MMU's role to determine PTTYPE KVM: x86/mmu: Collapse 32-bit PAE and 64-bit statements for helpers KVM: x86/mmu: Add a helper to calculate root from role_regs KVM: x86/mmu: Add helper to update paging metadata KVM: x86/mmu: Don't update nested guest's paging bitmasks if CR0.PG=0 KVM: x86/mmu: Consolidate reset_rsvds_bits_mask() calls KVM: x86/mmu: Use MMU role_regs to get LA57, and drop vCPU LA57 helper KVM: x86/mmu: Get nested MMU's root level from the MMU's role ...
This commit is contained in:
commit
36824f198c
@ -688,9 +688,14 @@ MSRs that have been set successfully.
|
|||||||
Defines the vcpu responses to the cpuid instruction. Applications
|
Defines the vcpu responses to the cpuid instruction. Applications
|
||||||
should use the KVM_SET_CPUID2 ioctl if available.
|
should use the KVM_SET_CPUID2 ioctl if available.
|
||||||
|
|
||||||
Note, when this IOCTL fails, KVM gives no guarantees that previous valid CPUID
|
Caveat emptor:
|
||||||
configuration (if there is) is not corrupted. Userspace can get a copy of the
|
- If this IOCTL fails, KVM gives no guarantees that previous valid CPUID
|
||||||
resulting CPUID configuration through KVM_GET_CPUID2 in case.
|
configuration (if there is) is not corrupted. Userspace can get a copy
|
||||||
|
of the resulting CPUID configuration through KVM_GET_CPUID2 in case.
|
||||||
|
- Using KVM_SET_CPUID{,2} after KVM_RUN, i.e. changing the guest vCPU model
|
||||||
|
after running the guest, may cause guest instability.
|
||||||
|
- Using heterogeneous CPUID configurations, modulo APIC IDs, topology, etc...
|
||||||
|
may cause guest instability.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
@ -5034,6 +5039,260 @@ see KVM_XEN_VCPU_SET_ATTR above.
|
|||||||
The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
|
The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
|
||||||
with the KVM_XEN_VCPU_GET_ATTR ioctl.
|
with the KVM_XEN_VCPU_GET_ATTR ioctl.
|
||||||
|
|
||||||
|
4.130 KVM_ARM_MTE_COPY_TAGS
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
:Capability: KVM_CAP_ARM_MTE
|
||||||
|
:Architectures: arm64
|
||||||
|
:Type: vm ioctl
|
||||||
|
:Parameters: struct kvm_arm_copy_mte_tags
|
||||||
|
:Returns: number of bytes copied, < 0 on error (-EINVAL for incorrect
|
||||||
|
arguments, -EFAULT if memory cannot be accessed).
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
struct kvm_arm_copy_mte_tags {
|
||||||
|
__u64 guest_ipa;
|
||||||
|
__u64 length;
|
||||||
|
void __user *addr;
|
||||||
|
__u64 flags;
|
||||||
|
__u64 reserved[2];
|
||||||
|
};
|
||||||
|
|
||||||
|
Copies Memory Tagging Extension (MTE) tags to/from guest tag memory. The
|
||||||
|
``guest_ipa`` and ``length`` fields must be ``PAGE_SIZE`` aligned. The ``addr``
|
||||||
|
field must point to a buffer which the tags will be copied to or from.
|
||||||
|
|
||||||
|
``flags`` specifies the direction of copy, either ``KVM_ARM_TAGS_TO_GUEST`` or
|
||||||
|
``KVM_ARM_TAGS_FROM_GUEST``.
|
||||||
|
|
||||||
|
The size of the buffer to store the tags is ``(length / 16)`` bytes
|
||||||
|
(granules in MTE are 16 bytes long). Each byte contains a single tag
|
||||||
|
value. This matches the format of ``PTRACE_PEEKMTETAGS`` and
|
||||||
|
``PTRACE_POKEMTETAGS``.
|
||||||
|
|
||||||
|
If an error occurs before any data is copied then a negative error code is
|
||||||
|
returned. If some tags have been copied before an error occurs then the number
|
||||||
|
of bytes successfully copied is returned. If the call completes successfully
|
||||||
|
then ``length`` is returned.
|
||||||
|
|
||||||
|
4.131 KVM_GET_SREGS2
|
||||||
|
------------------
|
||||||
|
|
||||||
|
:Capability: KVM_CAP_SREGS2
|
||||||
|
:Architectures: x86
|
||||||
|
:Type: vcpu ioctl
|
||||||
|
:Parameters: struct kvm_sregs2 (out)
|
||||||
|
:Returns: 0 on success, -1 on error
|
||||||
|
|
||||||
|
Reads special registers from the vcpu.
|
||||||
|
This ioctl (when supported) replaces the KVM_GET_SREGS.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
struct kvm_sregs2 {
|
||||||
|
/* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
|
||||||
|
struct kvm_segment cs, ds, es, fs, gs, ss;
|
||||||
|
struct kvm_segment tr, ldt;
|
||||||
|
struct kvm_dtable gdt, idt;
|
||||||
|
__u64 cr0, cr2, cr3, cr4, cr8;
|
||||||
|
__u64 efer;
|
||||||
|
__u64 apic_base;
|
||||||
|
__u64 flags;
|
||||||
|
__u64 pdptrs[4];
|
||||||
|
};
|
||||||
|
|
||||||
|
flags values for ``kvm_sregs2``:
|
||||||
|
|
||||||
|
``KVM_SREGS2_FLAGS_PDPTRS_VALID``
|
||||||
|
|
||||||
|
Indicates thats the struct contain valid PDPTR values.
|
||||||
|
|
||||||
|
|
||||||
|
4.132 KVM_SET_SREGS2
|
||||||
|
------------------
|
||||||
|
|
||||||
|
:Capability: KVM_CAP_SREGS2
|
||||||
|
:Architectures: x86
|
||||||
|
:Type: vcpu ioctl
|
||||||
|
:Parameters: struct kvm_sregs2 (in)
|
||||||
|
:Returns: 0 on success, -1 on error
|
||||||
|
|
||||||
|
Writes special registers into the vcpu.
|
||||||
|
See KVM_GET_SREGS2 for the data structures.
|
||||||
|
This ioctl (when supported) replaces the KVM_SET_SREGS.
|
||||||
|
|
||||||
|
4.133 KVM_GET_STATS_FD
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
:Capability: KVM_CAP_STATS_BINARY_FD
|
||||||
|
:Architectures: all
|
||||||
|
:Type: vm ioctl, vcpu ioctl
|
||||||
|
:Parameters: none
|
||||||
|
:Returns: statistics file descriptor on success, < 0 on error
|
||||||
|
|
||||||
|
Errors:
|
||||||
|
|
||||||
|
====== ======================================================
|
||||||
|
ENOMEM if the fd could not be created due to lack of memory
|
||||||
|
EMFILE if the number of opened files exceeds the limit
|
||||||
|
====== ======================================================
|
||||||
|
|
||||||
|
The returned file descriptor can be used to read VM/vCPU statistics data in
|
||||||
|
binary format. The data in the file descriptor consists of four blocks
|
||||||
|
organized as follows:
|
||||||
|
|
||||||
|
+-------------+
|
||||||
|
| Header |
|
||||||
|
+-------------+
|
||||||
|
| id string |
|
||||||
|
+-------------+
|
||||||
|
| Descriptors |
|
||||||
|
+-------------+
|
||||||
|
| Stats Data |
|
||||||
|
+-------------+
|
||||||
|
|
||||||
|
Apart from the header starting at offset 0, please be aware that it is
|
||||||
|
not guaranteed that the four blocks are adjacent or in the above order;
|
||||||
|
the offsets of the id, descriptors and data blocks are found in the
|
||||||
|
header. However, all four blocks are aligned to 64 bit offsets in the
|
||||||
|
file and they do not overlap.
|
||||||
|
|
||||||
|
All blocks except the data block are immutable. Userspace can read them
|
||||||
|
only one time after retrieving the file descriptor, and then use ``pread`` or
|
||||||
|
``lseek`` to read the statistics repeatedly.
|
||||||
|
|
||||||
|
All data is in system endianness.
|
||||||
|
|
||||||
|
The format of the header is as follows::
|
||||||
|
|
||||||
|
struct kvm_stats_header {
|
||||||
|
__u32 flags;
|
||||||
|
__u32 name_size;
|
||||||
|
__u32 num_desc;
|
||||||
|
__u32 id_offset;
|
||||||
|
__u32 desc_offset;
|
||||||
|
__u32 data_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
The ``flags`` field is not used at the moment. It is always read as 0.
|
||||||
|
|
||||||
|
The ``name_size`` field is the size (in byte) of the statistics name string
|
||||||
|
(including trailing '\0') which is contained in the "id string" block and
|
||||||
|
appended at the end of every descriptor.
|
||||||
|
|
||||||
|
The ``num_desc`` field is the number of descriptors that are included in the
|
||||||
|
descriptor block. (The actual number of values in the data block may be
|
||||||
|
larger, since each descriptor may comprise more than one value).
|
||||||
|
|
||||||
|
The ``id_offset`` field is the offset of the id string from the start of the
|
||||||
|
file indicated by the file descriptor. It is a multiple of 8.
|
||||||
|
|
||||||
|
The ``desc_offset`` field is the offset of the Descriptors block from the start
|
||||||
|
of the file indicated by the file descriptor. It is a multiple of 8.
|
||||||
|
|
||||||
|
The ``data_offset`` field is the offset of the Stats Data block from the start
|
||||||
|
of the file indicated by the file descriptor. It is a multiple of 8.
|
||||||
|
|
||||||
|
The id string block contains a string which identifies the file descriptor on
|
||||||
|
which KVM_GET_STATS_FD was invoked. The size of the block, including the
|
||||||
|
trailing ``'\0'``, is indicated by the ``name_size`` field in the header.
|
||||||
|
|
||||||
|
The descriptors block is only needed to be read once for the lifetime of the
|
||||||
|
file descriptor contains a sequence of ``struct kvm_stats_desc``, each followed
|
||||||
|
by a string of size ``name_size``.
|
||||||
|
|
||||||
|
#define KVM_STATS_TYPE_SHIFT 0
|
||||||
|
#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT)
|
||||||
|
#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT)
|
||||||
|
#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT)
|
||||||
|
#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT)
|
||||||
|
|
||||||
|
#define KVM_STATS_UNIT_SHIFT 4
|
||||||
|
#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT)
|
||||||
|
#define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT)
|
||||||
|
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
|
||||||
|
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
|
||||||
|
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
|
||||||
|
|
||||||
|
#define KVM_STATS_BASE_SHIFT 8
|
||||||
|
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
|
||||||
|
#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT)
|
||||||
|
#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT)
|
||||||
|
|
||||||
|
struct kvm_stats_desc {
|
||||||
|
__u32 flags;
|
||||||
|
__s16 exponent;
|
||||||
|
__u16 size;
|
||||||
|
__u32 offset;
|
||||||
|
__u32 unused;
|
||||||
|
char name[];
|
||||||
|
};
|
||||||
|
|
||||||
|
The ``flags`` field contains the type and unit of the statistics data described
|
||||||
|
by this descriptor. Its endianness is CPU native.
|
||||||
|
The following flags are supported:
|
||||||
|
|
||||||
|
Bits 0-3 of ``flags`` encode the type:
|
||||||
|
* ``KVM_STATS_TYPE_CUMULATIVE``
|
||||||
|
The statistics data is cumulative. The value of data can only be increased.
|
||||||
|
Most of the counters used in KVM are of this type.
|
||||||
|
The corresponding ``size`` field for this type is always 1.
|
||||||
|
All cumulative statistics data are read/write.
|
||||||
|
* ``KVM_STATS_TYPE_INSTANT``
|
||||||
|
The statistics data is instantaneous. Its value can be increased or
|
||||||
|
decreased. This type is usually used as a measurement of some resources,
|
||||||
|
like the number of dirty pages, the number of large pages, etc.
|
||||||
|
All instant statistics are read only.
|
||||||
|
The corresponding ``size`` field for this type is always 1.
|
||||||
|
* ``KVM_STATS_TYPE_PEAK``
|
||||||
|
The statistics data is peak. The value of data can only be increased, and
|
||||||
|
represents a peak value for a measurement, for example the maximum number
|
||||||
|
of items in a hash table bucket, the longest time waited and so on.
|
||||||
|
The corresponding ``size`` field for this type is always 1.
|
||||||
|
|
||||||
|
Bits 4-7 of ``flags`` encode the unit:
|
||||||
|
* ``KVM_STATS_UNIT_NONE``
|
||||||
|
There is no unit for the value of statistics data. This usually means that
|
||||||
|
the value is a simple counter of an event.
|
||||||
|
* ``KVM_STATS_UNIT_BYTES``
|
||||||
|
It indicates that the statistics data is used to measure memory size, in the
|
||||||
|
unit of Byte, KiByte, MiByte, GiByte, etc. The unit of the data is
|
||||||
|
determined by the ``exponent`` field in the descriptor.
|
||||||
|
* ``KVM_STATS_UNIT_SECONDS``
|
||||||
|
It indicates that the statistics data is used to measure time or latency.
|
||||||
|
* ``KVM_STATS_UNIT_CYCLES``
|
||||||
|
It indicates that the statistics data is used to measure CPU clock cycles.
|
||||||
|
|
||||||
|
Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the
|
||||||
|
unit:
|
||||||
|
* ``KVM_STATS_BASE_POW10``
|
||||||
|
The scale is based on power of 10. It is used for measurement of time and
|
||||||
|
CPU clock cycles. For example, an exponent of -9 can be used with
|
||||||
|
``KVM_STATS_UNIT_SECONDS`` to express that the unit is nanoseconds.
|
||||||
|
* ``KVM_STATS_BASE_POW2``
|
||||||
|
The scale is based on power of 2. It is used for measurement of memory size.
|
||||||
|
For example, an exponent of 20 can be used with ``KVM_STATS_UNIT_BYTES`` to
|
||||||
|
express that the unit is MiB.
|
||||||
|
|
||||||
|
The ``size`` field is the number of values of this statistics data. Its
|
||||||
|
value is usually 1 for most of simple statistics. 1 means it contains an
|
||||||
|
unsigned 64bit data.
|
||||||
|
|
||||||
|
The ``offset`` field is the offset from the start of Data Block to the start of
|
||||||
|
the corresponding statistics data.
|
||||||
|
|
||||||
|
The ``unused`` field is reserved for future support for other types of
|
||||||
|
statistics data, like log/linear histogram. Its value is always 0 for the types
|
||||||
|
defined above.
|
||||||
|
|
||||||
|
The ``name`` field is the name string of the statistics data. The name string
|
||||||
|
starts at the end of ``struct kvm_stats_desc``. The maximum length including
|
||||||
|
the trailing ``'\0'``, is indicated by ``name_size`` in the header.
|
||||||
|
|
||||||
|
The Stats Data block contains an array of 64-bit values in the same order
|
||||||
|
as the descriptors in Descriptors block.
|
||||||
|
|
||||||
5. The kvm_run structure
|
5. The kvm_run structure
|
||||||
========================
|
========================
|
||||||
|
|
||||||
@ -6323,6 +6582,7 @@ KVM_RUN_BUS_LOCK flag is used to distinguish between them.
|
|||||||
This capability can be used to check / enable 2nd DAWR feature provided
|
This capability can be used to check / enable 2nd DAWR feature provided
|
||||||
by POWER10 processor.
|
by POWER10 processor.
|
||||||
|
|
||||||
|
|
||||||
7.24 KVM_CAP_VM_COPY_ENC_CONTEXT_FROM
|
7.24 KVM_CAP_VM_COPY_ENC_CONTEXT_FROM
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
|
|
||||||
@ -6362,6 +6622,66 @@ default.
|
|||||||
|
|
||||||
See Documentation/x86/sgx/2.Kernel-internals.rst for more details.
|
See Documentation/x86/sgx/2.Kernel-internals.rst for more details.
|
||||||
|
|
||||||
|
7.26 KVM_CAP_PPC_RPT_INVALIDATE
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
:Capability: KVM_CAP_PPC_RPT_INVALIDATE
|
||||||
|
:Architectures: ppc
|
||||||
|
:Type: vm
|
||||||
|
|
||||||
|
This capability indicates that the kernel is capable of handling
|
||||||
|
H_RPT_INVALIDATE hcall.
|
||||||
|
|
||||||
|
In order to enable the use of H_RPT_INVALIDATE in the guest,
|
||||||
|
user space might have to advertise it for the guest. For example,
|
||||||
|
IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
|
||||||
|
present in the "ibm,hypertas-functions" device-tree property.
|
||||||
|
|
||||||
|
This capability is enabled for hypervisors on platforms like POWER9
|
||||||
|
that support radix MMU.
|
||||||
|
|
||||||
|
7.27 KVM_CAP_EXIT_ON_EMULATION_FAILURE
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
:Architectures: x86
|
||||||
|
:Parameters: args[0] whether the feature should be enabled or not
|
||||||
|
|
||||||
|
When this capability is enabled, an emulation failure will result in an exit
|
||||||
|
to userspace with KVM_INTERNAL_ERROR (except when the emulator was invoked
|
||||||
|
to handle a VMware backdoor instruction). Furthermore, KVM will now provide up
|
||||||
|
to 15 instruction bytes for any exit to userspace resulting from an emulation
|
||||||
|
failure. When these exits to userspace occur use the emulation_failure struct
|
||||||
|
instead of the internal struct. They both have the same layout, but the
|
||||||
|
emulation_failure struct matches the content better. It also explicitly
|
||||||
|
defines the 'flags' field which is used to describe the fields in the struct
|
||||||
|
that are valid (ie: if KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES is
|
||||||
|
set in the 'flags' field then both 'insn_size' and 'insn_bytes' have valid data
|
||||||
|
in them.)
|
||||||
|
|
||||||
|
7.28 KVM_CAP_ARM_MTE
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
:Architectures: arm64
|
||||||
|
:Parameters: none
|
||||||
|
|
||||||
|
This capability indicates that KVM (and the hardware) supports exposing the
|
||||||
|
Memory Tagging Extensions (MTE) to the guest. It must also be enabled by the
|
||||||
|
VMM before creating any VCPUs to allow the guest access. Note that MTE is only
|
||||||
|
available to a guest running in AArch64 mode and enabling this capability will
|
||||||
|
cause attempts to create AArch32 VCPUs to fail.
|
||||||
|
|
||||||
|
When enabled the guest is able to access tags associated with any memory given
|
||||||
|
to the guest. KVM will ensure that the tags are maintained during swap or
|
||||||
|
hibernation of the host; however the VMM needs to manually save/restore the
|
||||||
|
tags as appropriate if the VM is migrated.
|
||||||
|
|
||||||
|
When this capability is enabled all memory in memslots must be mapped as
|
||||||
|
not-shareable (no MAP_SHARED), attempts to create a memslot with a
|
||||||
|
MAP_SHARED mmap will result in an -EINVAL return.
|
||||||
|
|
||||||
|
When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
|
||||||
|
perform a bulk copy of tags to/from the guest.
|
||||||
|
|
||||||
8. Other capabilities.
|
8. Other capabilities.
|
||||||
======================
|
======================
|
||||||
|
|
||||||
@ -6891,3 +7211,33 @@ This capability is always enabled.
|
|||||||
This capability indicates that the KVM virtual PTP service is
|
This capability indicates that the KVM virtual PTP service is
|
||||||
supported in the host. A VMM can check whether the service is
|
supported in the host. A VMM can check whether the service is
|
||||||
available to the guest on migration.
|
available to the guest on migration.
|
||||||
|
|
||||||
|
8.33 KVM_CAP_HYPERV_ENFORCE_CPUID
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
Architectures: x86
|
||||||
|
|
||||||
|
When enabled, KVM will disable emulated Hyper-V features provided to the
|
||||||
|
guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all
|
||||||
|
currently implmented Hyper-V features are provided unconditionally when
|
||||||
|
Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001)
|
||||||
|
leaf.
|
||||||
|
|
||||||
|
8.34 KVM_CAP_EXIT_HYPERCALL
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
:Capability: KVM_CAP_EXIT_HYPERCALL
|
||||||
|
:Architectures: x86
|
||||||
|
:Type: vm
|
||||||
|
|
||||||
|
This capability, if enabled, will cause KVM to exit to userspace
|
||||||
|
with KVM_EXIT_HYPERCALL exit reason to process some hypercalls.
|
||||||
|
|
||||||
|
Calling KVM_CHECK_EXTENSION for this capability will return a bitmask
|
||||||
|
of hypercalls that can be configured to exit to userspace.
|
||||||
|
Right now, the only such hypercall is KVM_HC_MAP_GPA_RANGE.
|
||||||
|
|
||||||
|
The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset
|
||||||
|
of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace
|
||||||
|
the hypercalls whose corresponding bit is in the argument, and return
|
||||||
|
ENOSYS for the others.
|
||||||
|
@ -96,6 +96,13 @@ KVM_FEATURE_MSI_EXT_DEST_ID 15 guest checks this feature bit
|
|||||||
before using extended destination
|
before using extended destination
|
||||||
ID bits in MSI address bits 11-5.
|
ID bits in MSI address bits 11-5.
|
||||||
|
|
||||||
|
KVM_FEATURE_HC_MAP_GPA_RANGE 16 guest checks this feature bit before
|
||||||
|
using the map gpa range hypercall
|
||||||
|
to notify the page state change
|
||||||
|
|
||||||
|
KVM_FEATURE_MIGRATION_CONTROL 17 guest checks this feature bit before
|
||||||
|
using MSR_KVM_MIGRATION_CONTROL
|
||||||
|
|
||||||
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 host will warn if no guest-side
|
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 host will warn if no guest-side
|
||||||
per-cpu warps are expected in
|
per-cpu warps are expected in
|
||||||
kvmclock
|
kvmclock
|
||||||
|
@ -169,3 +169,24 @@ a0: destination APIC ID
|
|||||||
|
|
||||||
:Usage example: When sending a call-function IPI-many to vCPUs, yield if
|
:Usage example: When sending a call-function IPI-many to vCPUs, yield if
|
||||||
any of the IPI target vCPUs was preempted.
|
any of the IPI target vCPUs was preempted.
|
||||||
|
|
||||||
|
8. KVM_HC_MAP_GPA_RANGE
|
||||||
|
-------------------------
|
||||||
|
:Architecture: x86
|
||||||
|
:Status: active
|
||||||
|
:Purpose: Request KVM to map a GPA range with the specified attributes.
|
||||||
|
|
||||||
|
a0: the guest physical address of the start page
|
||||||
|
a1: the number of (4kb) pages (must be contiguous in GPA space)
|
||||||
|
a2: attributes
|
||||||
|
|
||||||
|
Where 'attributes' :
|
||||||
|
* bits 3:0 - preferred page size encoding 0 = 4kb, 1 = 2mb, 2 = 1gb, etc...
|
||||||
|
* bit 4 - plaintext = 0, encrypted = 1
|
||||||
|
* bits 63:5 - reserved (must be zero)
|
||||||
|
|
||||||
|
**Implementation note**: this hypercall is implemented in userspace via
|
||||||
|
the KVM_CAP_EXIT_HYPERCALL capability. Userspace must enable that capability
|
||||||
|
before advertising KVM_FEATURE_HC_MAP_GPA_RANGE in the guest CPUID. In
|
||||||
|
addition, if the guest supports KVM_FEATURE_MIGRATION_CONTROL, userspace
|
||||||
|
must also set up an MSR filter to process writes to MSR_KVM_MIGRATION_CONTROL.
|
||||||
|
@ -16,6 +16,11 @@ The acquisition orders for mutexes are as follows:
|
|||||||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||||
them together is quite rare.
|
them together is quite rare.
|
||||||
|
|
||||||
|
- Unlike kvm->slots_lock, kvm->slots_arch_lock is released before
|
||||||
|
synchronize_srcu(&kvm->srcu). Therefore kvm->slots_arch_lock
|
||||||
|
can be taken inside a kvm->srcu read-side critical section,
|
||||||
|
while kvm->slots_lock cannot.
|
||||||
|
|
||||||
On x86:
|
On x86:
|
||||||
|
|
||||||
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
|
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
|
||||||
|
@ -180,8 +180,8 @@ Shadow pages contain the following information:
|
|||||||
role.gpte_is_8_bytes:
|
role.gpte_is_8_bytes:
|
||||||
Reflects the size of the guest PTE for which the page is valid, i.e. '1'
|
Reflects the size of the guest PTE for which the page is valid, i.e. '1'
|
||||||
if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
|
if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
|
||||||
role.nxe:
|
role.efer_nx:
|
||||||
Contains the value of efer.nxe for which the page is valid.
|
Contains the value of efer.nx for which the page is valid.
|
||||||
role.cr0_wp:
|
role.cr0_wp:
|
||||||
Contains the value of cr0.wp for which the page is valid.
|
Contains the value of cr0.wp for which the page is valid.
|
||||||
role.smep_andnot_wp:
|
role.smep_andnot_wp:
|
||||||
@ -192,9 +192,6 @@ Shadow pages contain the following information:
|
|||||||
Contains the value of cr4.smap && !cr0.wp for which the page is valid
|
Contains the value of cr4.smap && !cr0.wp for which the page is valid
|
||||||
(pages for which this is true are different from other pages; see the
|
(pages for which this is true are different from other pages; see the
|
||||||
treatment of cr0.wp=0 below).
|
treatment of cr0.wp=0 below).
|
||||||
role.ept_sp:
|
|
||||||
This is a virtual flag to denote a shadowed nested EPT page. ept_sp
|
|
||||||
is true if "cr0_wp && smap_andnot_wp", an otherwise invalid combination.
|
|
||||||
role.smm:
|
role.smm:
|
||||||
Is 1 if the page is valid in system management mode. This field
|
Is 1 if the page is valid in system management mode. This field
|
||||||
determines which of the kvm_memslots array was used to build this
|
determines which of the kvm_memslots array was used to build this
|
||||||
|
@ -376,3 +376,16 @@ data:
|
|||||||
write '1' to bit 0 of the MSR, this causes the host to re-scan its queue
|
write '1' to bit 0 of the MSR, this causes the host to re-scan its queue
|
||||||
and check if there are more notifications pending. The MSR is available
|
and check if there are more notifications pending. The MSR is available
|
||||||
if KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
|
if KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
|
||||||
|
|
||||||
|
MSR_KVM_MIGRATION_CONTROL:
|
||||||
|
0x4b564d08
|
||||||
|
|
||||||
|
data:
|
||||||
|
This MSR is available if KVM_FEATURE_MIGRATION_CONTROL is present in
|
||||||
|
CPUID. Bit 0 represents whether live migration of the guest is allowed.
|
||||||
|
|
||||||
|
When a guest is started, bit 0 will be 0 if the guest has encrypted
|
||||||
|
memory and 1 if the guest does not have encrypted memory. If the
|
||||||
|
guest is communicating page encryption status to the host using the
|
||||||
|
``KVM_HC_MAP_GPA_RANGE`` hypercall, it can set bit 0 in this MSR to
|
||||||
|
allow live migration of the guest.
|
||||||
|
@ -9991,6 +9991,8 @@ F: arch/arm64/include/asm/kvm*
|
|||||||
F: arch/arm64/include/uapi/asm/kvm*
|
F: arch/arm64/include/uapi/asm/kvm*
|
||||||
F: arch/arm64/kvm/
|
F: arch/arm64/kvm/
|
||||||
F: include/kvm/arm_*
|
F: include/kvm/arm_*
|
||||||
|
F: tools/testing/selftests/kvm/*/aarch64/
|
||||||
|
F: tools/testing/selftests/kvm/aarch64/
|
||||||
|
|
||||||
KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
|
KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
|
||||||
M: Huacai Chen <chenhuacai@kernel.org>
|
M: Huacai Chen <chenhuacai@kernel.org>
|
||||||
|
@ -12,7 +12,8 @@
|
|||||||
#include <asm/types.h>
|
#include <asm/types.h>
|
||||||
|
|
||||||
/* Hyp Configuration Register (HCR) bits */
|
/* Hyp Configuration Register (HCR) bits */
|
||||||
#define HCR_ATA (UL(1) << 56)
|
#define HCR_ATA_SHIFT 56
|
||||||
|
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
|
||||||
#define HCR_FWB (UL(1) << 46)
|
#define HCR_FWB (UL(1) << 46)
|
||||||
#define HCR_API (UL(1) << 41)
|
#define HCR_API (UL(1) << 41)
|
||||||
#define HCR_APK (UL(1) << 40)
|
#define HCR_APK (UL(1) << 40)
|
||||||
|
@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
|
|||||||
if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
|
if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
|
||||||
vcpu_el1_is_32bit(vcpu))
|
vcpu_el1_is_32bit(vcpu))
|
||||||
vcpu->arch.hcr_el2 |= HCR_TID2;
|
vcpu->arch.hcr_el2 |= HCR_TID2;
|
||||||
|
|
||||||
|
if (kvm_has_mte(vcpu->kvm))
|
||||||
|
vcpu->arch.hcr_el2 |= HCR_ATA;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
|
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
|
||||||
|
@ -46,6 +46,7 @@
|
|||||||
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
|
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
|
||||||
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
|
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
|
||||||
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
|
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
|
||||||
|
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
|
||||||
|
|
||||||
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
||||||
KVM_DIRTY_LOG_INITIALLY_SET)
|
KVM_DIRTY_LOG_INITIALLY_SET)
|
||||||
@ -132,6 +133,9 @@ struct kvm_arch {
|
|||||||
|
|
||||||
u8 pfr0_csv2;
|
u8 pfr0_csv2;
|
||||||
u8 pfr0_csv3;
|
u8 pfr0_csv3;
|
||||||
|
|
||||||
|
/* Memory Tagging Extension enabled for the guest */
|
||||||
|
bool mte_enabled;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_vcpu_fault_info {
|
struct kvm_vcpu_fault_info {
|
||||||
@ -206,6 +210,12 @@ enum vcpu_sysreg {
|
|||||||
CNTP_CVAL_EL0,
|
CNTP_CVAL_EL0,
|
||||||
CNTP_CTL_EL0,
|
CNTP_CTL_EL0,
|
||||||
|
|
||||||
|
/* Memory Tagging Extension registers */
|
||||||
|
RGSR_EL1, /* Random Allocation Tag Seed Register */
|
||||||
|
GCR_EL1, /* Tag Control Register */
|
||||||
|
TFSR_EL1, /* Tag Fault Status Register (EL1) */
|
||||||
|
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
|
||||||
|
|
||||||
/* 32bit specific registers. Keep them at the end of the range */
|
/* 32bit specific registers. Keep them at the end of the range */
|
||||||
DACR32_EL2, /* Domain Access Control Register */
|
DACR32_EL2, /* Domain Access Control Register */
|
||||||
IFSR32_EL2, /* Instruction Fault Status Register */
|
IFSR32_EL2, /* Instruction Fault Status Register */
|
||||||
@ -556,16 +566,11 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct kvm_vm_stat {
|
struct kvm_vm_stat {
|
||||||
ulong remote_tlb_flush;
|
struct kvm_vm_stat_generic generic;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_vcpu_stat {
|
struct kvm_vcpu_stat {
|
||||||
u64 halt_successful_poll;
|
struct kvm_vcpu_stat_generic generic;
|
||||||
u64 halt_attempted_poll;
|
|
||||||
u64 halt_poll_success_ns;
|
|
||||||
u64 halt_poll_fail_ns;
|
|
||||||
u64 halt_poll_invalid;
|
|
||||||
u64 halt_wakeup;
|
|
||||||
u64 hvc_exit_stat;
|
u64 hvc_exit_stat;
|
||||||
u64 wfe_exit_stat;
|
u64 wfe_exit_stat;
|
||||||
u64 wfi_exit_stat;
|
u64 wfi_exit_stat;
|
||||||
@ -721,6 +726,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
|
|||||||
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
|
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
|
||||||
struct kvm_device_attr *attr);
|
struct kvm_device_attr *attr);
|
||||||
|
|
||||||
|
long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
|
||||||
|
struct kvm_arm_copy_mte_tags *copy_tags);
|
||||||
|
|
||||||
/* Guest/host FPSIMD coordination helpers */
|
/* Guest/host FPSIMD coordination helpers */
|
||||||
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
|
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
|
||||||
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
|
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
|
||||||
@ -769,6 +777,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
|||||||
#define kvm_arm_vcpu_sve_finalized(vcpu) \
|
#define kvm_arm_vcpu_sve_finalized(vcpu) \
|
||||||
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
|
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
|
||||||
|
|
||||||
|
#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled)
|
||||||
#define kvm_vcpu_has_pmu(vcpu) \
|
#define kvm_vcpu_has_pmu(vcpu) \
|
||||||
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
|
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
|
||||||
|
|
||||||
|
@ -188,10 +188,8 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
|
|||||||
return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
|
return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
|
static inline void __clean_dcache_guest_page(void *va, size_t size)
|
||||||
{
|
{
|
||||||
void *va = page_address(pfn_to_page(pfn));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* With FWB, we ensure that the guest always accesses memory using
|
* With FWB, we ensure that the guest always accesses memory using
|
||||||
* cacheable attributes, and we don't have to clean to PoC when
|
* cacheable attributes, and we don't have to clean to PoC when
|
||||||
@ -204,18 +202,14 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
|
|||||||
kvm_flush_dcache_to_poc(va, size);
|
kvm_flush_dcache_to_poc(va, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
|
static inline void __invalidate_icache_guest_page(void *va, size_t size)
|
||||||
unsigned long size)
|
|
||||||
{
|
{
|
||||||
if (icache_is_aliasing()) {
|
if (icache_is_aliasing()) {
|
||||||
/* any kind of VIPT cache */
|
/* any kind of VIPT cache */
|
||||||
icache_inval_all_pou();
|
icache_inval_all_pou();
|
||||||
} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
|
} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
|
||||||
/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
|
/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
|
||||||
void *va = page_address(pfn_to_page(pfn));
|
icache_inval_pou((unsigned long)va, (unsigned long)va + size);
|
||||||
|
|
||||||
icache_inval_pou((unsigned long)va,
|
|
||||||
(unsigned long)va + size);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
66
arch/arm64/include/asm/kvm_mte.h
Normal file
66
arch/arm64/include/asm/kvm_mte.h
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2020-2021 ARM Ltd.
|
||||||
|
*/
|
||||||
|
#ifndef __ASM_KVM_MTE_H
|
||||||
|
#define __ASM_KVM_MTE_H
|
||||||
|
|
||||||
|
#ifdef __ASSEMBLY__
|
||||||
|
|
||||||
|
#include <asm/sysreg.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_ARM64_MTE
|
||||||
|
|
||||||
|
.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
|
||||||
|
alternative_if_not ARM64_MTE
|
||||||
|
b .L__skip_switch\@
|
||||||
|
alternative_else_nop_endif
|
||||||
|
mrs \reg1, hcr_el2
|
||||||
|
tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
|
||||||
|
|
||||||
|
mrs_s \reg1, SYS_RGSR_EL1
|
||||||
|
str \reg1, [\h_ctxt, #CPU_RGSR_EL1]
|
||||||
|
mrs_s \reg1, SYS_GCR_EL1
|
||||||
|
str \reg1, [\h_ctxt, #CPU_GCR_EL1]
|
||||||
|
|
||||||
|
ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1]
|
||||||
|
msr_s SYS_RGSR_EL1, \reg1
|
||||||
|
ldr \reg1, [\g_ctxt, #CPU_GCR_EL1]
|
||||||
|
msr_s SYS_GCR_EL1, \reg1
|
||||||
|
|
||||||
|
.L__skip_switch\@:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
|
||||||
|
alternative_if_not ARM64_MTE
|
||||||
|
b .L__skip_switch\@
|
||||||
|
alternative_else_nop_endif
|
||||||
|
mrs \reg1, hcr_el2
|
||||||
|
tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
|
||||||
|
|
||||||
|
mrs_s \reg1, SYS_RGSR_EL1
|
||||||
|
str \reg1, [\g_ctxt, #CPU_RGSR_EL1]
|
||||||
|
mrs_s \reg1, SYS_GCR_EL1
|
||||||
|
str \reg1, [\g_ctxt, #CPU_GCR_EL1]
|
||||||
|
|
||||||
|
ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1]
|
||||||
|
msr_s SYS_RGSR_EL1, \reg1
|
||||||
|
ldr \reg1, [\h_ctxt, #CPU_GCR_EL1]
|
||||||
|
msr_s SYS_GCR_EL1, \reg1
|
||||||
|
|
||||||
|
isb
|
||||||
|
|
||||||
|
.L__skip_switch\@:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#else /* !CONFIG_ARM64_MTE */
|
||||||
|
|
||||||
|
.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
|
||||||
|
.endm
|
||||||
|
|
||||||
|
#endif /* CONFIG_ARM64_MTE */
|
||||||
|
#endif /* __ASSEMBLY__ */
|
||||||
|
#endif /* __ASM_KVM_MTE_H */
|
@ -27,23 +27,29 @@ typedef u64 kvm_pte_t;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* struct kvm_pgtable_mm_ops - Memory management callbacks.
|
* struct kvm_pgtable_mm_ops - Memory management callbacks.
|
||||||
* @zalloc_page: Allocate a single zeroed memory page. The @arg parameter
|
* @zalloc_page: Allocate a single zeroed memory page.
|
||||||
* can be used by the walker to pass a memcache. The
|
* The @arg parameter can be used by the walker
|
||||||
* initial refcount of the page is 1.
|
* to pass a memcache. The initial refcount of
|
||||||
* @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The
|
* the page is 1.
|
||||||
* @size parameter is in bytes, and is rounded-up to the
|
* @zalloc_pages_exact: Allocate an exact number of zeroed memory pages.
|
||||||
* next page boundary. The resulting allocation is
|
* The @size parameter is in bytes, and is rounded
|
||||||
* physically contiguous.
|
* up to the next page boundary. The resulting
|
||||||
* @free_pages_exact: Free an exact number of memory pages previously
|
* allocation is physically contiguous.
|
||||||
* allocated by zalloc_pages_exact.
|
* @free_pages_exact: Free an exact number of memory pages previously
|
||||||
* @get_page: Increment the refcount on a page.
|
* allocated by zalloc_pages_exact.
|
||||||
* @put_page: Decrement the refcount on a page. When the refcount
|
* @get_page: Increment the refcount on a page.
|
||||||
* reaches 0 the page is automatically freed.
|
* @put_page: Decrement the refcount on a page. When the
|
||||||
* @page_count: Return the refcount of a page.
|
* refcount reaches 0 the page is automatically
|
||||||
* @phys_to_virt: Convert a physical address into a virtual address mapped
|
* freed.
|
||||||
* in the current context.
|
* @page_count: Return the refcount of a page.
|
||||||
* @virt_to_phys: Convert a virtual address mapped in the current context
|
* @phys_to_virt: Convert a physical address into a virtual
|
||||||
* into a physical address.
|
* address mapped in the current context.
|
||||||
|
* @virt_to_phys: Convert a virtual address mapped in the current
|
||||||
|
* context into a physical address.
|
||||||
|
* @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC
|
||||||
|
* for the specified memory address range.
|
||||||
|
* @icache_inval_pou: Invalidate the instruction cache to the PoU
|
||||||
|
* for the specified memory address range.
|
||||||
*/
|
*/
|
||||||
struct kvm_pgtable_mm_ops {
|
struct kvm_pgtable_mm_ops {
|
||||||
void* (*zalloc_page)(void *arg);
|
void* (*zalloc_page)(void *arg);
|
||||||
@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops {
|
|||||||
int (*page_count)(void *addr);
|
int (*page_count)(void *addr);
|
||||||
void* (*phys_to_virt)(phys_addr_t phys);
|
void* (*phys_to_virt)(phys_addr_t phys);
|
||||||
phys_addr_t (*virt_to_phys)(void *addr);
|
phys_addr_t (*virt_to_phys)(void *addr);
|
||||||
|
void (*dcache_clean_inval_poc)(void *addr, size_t size);
|
||||||
|
void (*icache_inval_pou)(void *addr, size_t size);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#define MTE_GRANULE_SIZE UL(16)
|
#define MTE_GRANULE_SIZE UL(16)
|
||||||
#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1))
|
#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1))
|
||||||
|
#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE)
|
||||||
#define MTE_TAG_SHIFT 56
|
#define MTE_TAG_SHIFT 56
|
||||||
#define MTE_TAG_SIZE 4
|
#define MTE_TAG_SIZE 4
|
||||||
#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
|
#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
|
||||||
|
@ -38,7 +38,7 @@ void mte_free_tag_storage(char *storage);
|
|||||||
#define PG_mte_tagged PG_arch_2
|
#define PG_mte_tagged PG_arch_2
|
||||||
|
|
||||||
void mte_zero_clear_page_tags(void *addr);
|
void mte_zero_clear_page_tags(void *addr);
|
||||||
void mte_sync_tags(pte_t *ptep, pte_t pte);
|
void mte_sync_tags(pte_t old_pte, pte_t pte);
|
||||||
void mte_copy_page_tags(void *kto, const void *kfrom);
|
void mte_copy_page_tags(void *kto, const void *kfrom);
|
||||||
void mte_thread_init_user(void);
|
void mte_thread_init_user(void);
|
||||||
void mte_thread_switch(struct task_struct *next);
|
void mte_thread_switch(struct task_struct *next);
|
||||||
@ -57,7 +57,7 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
|
|||||||
static inline void mte_zero_clear_page_tags(void *addr)
|
static inline void mte_zero_clear_page_tags(void *addr)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
|
static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
|
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
|
||||||
|
@ -314,9 +314,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|||||||
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
|
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
|
||||||
__sync_icache_dcache(pte);
|
__sync_icache_dcache(pte);
|
||||||
|
|
||||||
if (system_supports_mte() &&
|
/*
|
||||||
pte_present(pte) && pte_tagged(pte) && !pte_special(pte))
|
* If the PTE would provide user space access to the tags associated
|
||||||
mte_sync_tags(ptep, pte);
|
* with it then ensure that the MTE tags are synchronised. Although
|
||||||
|
* pte_access_permitted() returns false for exec only mappings, they
|
||||||
|
* don't expose tags (instruction fetches don't check tags).
|
||||||
|
*/
|
||||||
|
if (system_supports_mte() && pte_access_permitted(pte, false) &&
|
||||||
|
!pte_special(pte)) {
|
||||||
|
pte_t old_pte = READ_ONCE(*ptep);
|
||||||
|
/*
|
||||||
|
* We only need to synchronise if the new PTE has tags enabled
|
||||||
|
* or if swapping in (in which case another mapping may have
|
||||||
|
* set tags in the past even if this PTE isn't tagged).
|
||||||
|
* (!pte_none() && !pte_present()) is an open coded version of
|
||||||
|
* is_swap_pte()
|
||||||
|
*/
|
||||||
|
if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
|
||||||
|
mte_sync_tags(old_pte, pte);
|
||||||
|
}
|
||||||
|
|
||||||
__check_racy_pte_update(mm, ptep, pte);
|
__check_racy_pte_update(mm, ptep, pte);
|
||||||
|
|
||||||
|
@ -651,7 +651,8 @@
|
|||||||
|
|
||||||
#define INIT_SCTLR_EL2_MMU_ON \
|
#define INIT_SCTLR_EL2_MMU_ON \
|
||||||
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
|
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
|
||||||
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
|
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
|
||||||
|
SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
|
||||||
|
|
||||||
#define INIT_SCTLR_EL2_MMU_OFF \
|
#define INIT_SCTLR_EL2_MMU_OFF \
|
||||||
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
|
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
|
||||||
|
@ -184,6 +184,17 @@ struct kvm_vcpu_events {
|
|||||||
__u32 reserved[12];
|
__u32 reserved[12];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kvm_arm_copy_mte_tags {
|
||||||
|
__u64 guest_ipa;
|
||||||
|
__u64 length;
|
||||||
|
void __user *addr;
|
||||||
|
__u64 flags;
|
||||||
|
__u64 reserved[2];
|
||||||
|
};
|
||||||
|
|
||||||
|
#define KVM_ARM_TAGS_TO_GUEST 0
|
||||||
|
#define KVM_ARM_TAGS_FROM_GUEST 1
|
||||||
|
|
||||||
/* If you need to interpret the index values, here is the key: */
|
/* If you need to interpret the index values, here is the key: */
|
||||||
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
|
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
|
||||||
#define KVM_REG_ARM_COPROC_SHIFT 16
|
#define KVM_REG_ARM_COPROC_SHIFT 16
|
||||||
|
@ -113,6 +113,8 @@ int main(void)
|
|||||||
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
|
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
|
||||||
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
|
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
|
||||||
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
|
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
|
||||||
|
DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
|
||||||
|
DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
|
||||||
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
|
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
|
||||||
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
|
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
|
||||||
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
|
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
|
||||||
|
@ -32,10 +32,9 @@ DEFINE_STATIC_KEY_FALSE(mte_async_mode);
|
|||||||
EXPORT_SYMBOL_GPL(mte_async_mode);
|
EXPORT_SYMBOL_GPL(mte_async_mode);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
|
static void mte_sync_page_tags(struct page *page, pte_t old_pte,
|
||||||
|
bool check_swap, bool pte_is_tagged)
|
||||||
{
|
{
|
||||||
pte_t old_pte = READ_ONCE(*ptep);
|
|
||||||
|
|
||||||
if (check_swap && is_swap_pte(old_pte)) {
|
if (check_swap && is_swap_pte(old_pte)) {
|
||||||
swp_entry_t entry = pte_to_swp_entry(old_pte);
|
swp_entry_t entry = pte_to_swp_entry(old_pte);
|
||||||
|
|
||||||
@ -43,6 +42,9 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!pte_is_tagged)
|
||||||
|
return;
|
||||||
|
|
||||||
page_kasan_tag_reset(page);
|
page_kasan_tag_reset(page);
|
||||||
/*
|
/*
|
||||||
* We need smp_wmb() in between setting the flags and clearing the
|
* We need smp_wmb() in between setting the flags and clearing the
|
||||||
@ -55,16 +57,22 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
|
|||||||
mte_clear_page_tags(page_address(page));
|
mte_clear_page_tags(page_address(page));
|
||||||
}
|
}
|
||||||
|
|
||||||
void mte_sync_tags(pte_t *ptep, pte_t pte)
|
void mte_sync_tags(pte_t old_pte, pte_t pte)
|
||||||
{
|
{
|
||||||
struct page *page = pte_page(pte);
|
struct page *page = pte_page(pte);
|
||||||
long i, nr_pages = compound_nr(page);
|
long i, nr_pages = compound_nr(page);
|
||||||
bool check_swap = nr_pages == 1;
|
bool check_swap = nr_pages == 1;
|
||||||
|
bool pte_is_tagged = pte_tagged(pte);
|
||||||
|
|
||||||
|
/* Early out if there's nothing to do */
|
||||||
|
if (!check_swap && !pte_is_tagged)
|
||||||
|
return;
|
||||||
|
|
||||||
/* if PG_mte_tagged is set, tags have already been initialised */
|
/* if PG_mte_tagged is set, tags have already been initialised */
|
||||||
for (i = 0; i < nr_pages; i++, page++) {
|
for (i = 0; i < nr_pages; i++, page++) {
|
||||||
if (!test_and_set_bit(PG_mte_tagged, &page->flags))
|
if (!test_and_set_bit(PG_mte_tagged, &page->flags))
|
||||||
mte_sync_page_tags(page, ptep, check_swap);
|
mte_sync_page_tags(page, old_pte, check_swap,
|
||||||
|
pte_is_tagged);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ obj-$(CONFIG_KVM) += kvm.o
|
|||||||
obj-$(CONFIG_KVM) += hyp/
|
obj-$(CONFIG_KVM) += hyp/
|
||||||
|
|
||||||
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
|
kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
|
||||||
$(KVM)/vfio.o $(KVM)/irqchip.o \
|
$(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \
|
||||||
arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
|
arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
|
||||||
inject_fault.o va_layout.o handle_exit.o \
|
inject_fault.o va_layout.o handle_exit.o \
|
||||||
guest.o debug.o reset.o sys_regs.o \
|
guest.o debug.o reset.o sys_regs.o \
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <linux/kvm_host.h>
|
#include <linux/kvm_host.h>
|
||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
#include <linux/irq.h>
|
#include <linux/irq.h>
|
||||||
|
#include <linux/irqdomain.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
|
||||||
#include <clocksource/arm_arch_timer.h>
|
#include <clocksource/arm_arch_timer.h>
|
||||||
@ -973,6 +974,135 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
|
||||||
|
{
|
||||||
|
if (vcpu)
|
||||||
|
irqd_set_forwarded_to_vcpu(d);
|
||||||
|
else
|
||||||
|
irqd_clr_forwarded_to_vcpu(d);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int timer_irq_set_irqchip_state(struct irq_data *d,
|
||||||
|
enum irqchip_irq_state which, bool val)
|
||||||
|
{
|
||||||
|
if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
|
||||||
|
return irq_chip_set_parent_state(d, which, val);
|
||||||
|
|
||||||
|
if (val)
|
||||||
|
irq_chip_mask_parent(d);
|
||||||
|
else
|
||||||
|
irq_chip_unmask_parent(d);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void timer_irq_eoi(struct irq_data *d)
|
||||||
|
{
|
||||||
|
if (!irqd_is_forwarded_to_vcpu(d))
|
||||||
|
irq_chip_eoi_parent(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void timer_irq_ack(struct irq_data *d)
|
||||||
|
{
|
||||||
|
d = d->parent_data;
|
||||||
|
if (d->chip->irq_ack)
|
||||||
|
d->chip->irq_ack(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct irq_chip timer_chip = {
|
||||||
|
.name = "KVM",
|
||||||
|
.irq_ack = timer_irq_ack,
|
||||||
|
.irq_mask = irq_chip_mask_parent,
|
||||||
|
.irq_unmask = irq_chip_unmask_parent,
|
||||||
|
.irq_eoi = timer_irq_eoi,
|
||||||
|
.irq_set_type = irq_chip_set_type_parent,
|
||||||
|
.irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity,
|
||||||
|
.irq_set_irqchip_state = timer_irq_set_irqchip_state,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
|
||||||
|
unsigned int nr_irqs, void *arg)
|
||||||
|
{
|
||||||
|
irq_hw_number_t hwirq = (uintptr_t)arg;
|
||||||
|
|
||||||
|
return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
|
||||||
|
&timer_chip, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
|
||||||
|
unsigned int nr_irqs)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct irq_domain_ops timer_domain_ops = {
|
||||||
|
.alloc = timer_irq_domain_alloc,
|
||||||
|
.free = timer_irq_domain_free,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct irq_ops arch_timer_irq_ops = {
|
||||||
|
.get_input_level = kvm_arch_timer_get_input_level,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
|
||||||
|
{
|
||||||
|
*flags = irq_get_trigger_type(virq);
|
||||||
|
if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
|
||||||
|
kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
|
||||||
|
virq);
|
||||||
|
*flags = IRQF_TRIGGER_LOW;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kvm_irq_init(struct arch_timer_kvm_info *info)
|
||||||
|
{
|
||||||
|
struct irq_domain *domain = NULL;
|
||||||
|
|
||||||
|
if (info->virtual_irq <= 0) {
|
||||||
|
kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
|
||||||
|
info->virtual_irq);
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
|
host_vtimer_irq = info->virtual_irq;
|
||||||
|
kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
|
||||||
|
|
||||||
|
if (kvm_vgic_global_state.no_hw_deactivation) {
|
||||||
|
struct fwnode_handle *fwnode;
|
||||||
|
struct irq_data *data;
|
||||||
|
|
||||||
|
fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
|
||||||
|
if (!fwnode)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
/* Assume both vtimer and ptimer in the same parent */
|
||||||
|
data = irq_get_irq_data(host_vtimer_irq);
|
||||||
|
domain = irq_domain_create_hierarchy(data->domain, 0,
|
||||||
|
NR_KVM_TIMERS, fwnode,
|
||||||
|
&timer_domain_ops, NULL);
|
||||||
|
if (!domain) {
|
||||||
|
irq_domain_free_fwnode(fwnode);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
|
||||||
|
WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
|
||||||
|
(void *)TIMER_VTIMER));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info->physical_irq > 0) {
|
||||||
|
host_ptimer_irq = info->physical_irq;
|
||||||
|
kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
|
||||||
|
|
||||||
|
if (domain)
|
||||||
|
WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
|
||||||
|
(void *)TIMER_PTIMER));
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int kvm_timer_hyp_init(bool has_gic)
|
int kvm_timer_hyp_init(bool has_gic)
|
||||||
{
|
{
|
||||||
struct arch_timer_kvm_info *info;
|
struct arch_timer_kvm_info *info;
|
||||||
@ -986,23 +1116,12 @@ int kvm_timer_hyp_init(bool has_gic)
|
|||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = kvm_irq_init(info);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
/* First, do the virtual EL1 timer irq */
|
/* First, do the virtual EL1 timer irq */
|
||||||
|
|
||||||
if (info->virtual_irq <= 0) {
|
|
||||||
kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
|
|
||||||
info->virtual_irq);
|
|
||||||
return -ENODEV;
|
|
||||||
}
|
|
||||||
host_vtimer_irq = info->virtual_irq;
|
|
||||||
|
|
||||||
host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
|
|
||||||
if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
|
|
||||||
host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
|
|
||||||
kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
|
|
||||||
host_vtimer_irq);
|
|
||||||
host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
|
err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
|
||||||
"kvm guest vtimer", kvm_get_running_vcpus());
|
"kvm guest vtimer", kvm_get_running_vcpus());
|
||||||
if (err) {
|
if (err) {
|
||||||
@ -1027,15 +1146,6 @@ int kvm_timer_hyp_init(bool has_gic)
|
|||||||
/* Now let's do the physical EL1 timer irq */
|
/* Now let's do the physical EL1 timer irq */
|
||||||
|
|
||||||
if (info->physical_irq > 0) {
|
if (info->physical_irq > 0) {
|
||||||
host_ptimer_irq = info->physical_irq;
|
|
||||||
host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
|
|
||||||
if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
|
|
||||||
host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
|
|
||||||
kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
|
|
||||||
host_ptimer_irq);
|
|
||||||
host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
|
err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
|
||||||
"kvm guest ptimer", kvm_get_running_vcpus());
|
"kvm guest ptimer", kvm_get_running_vcpus());
|
||||||
if (err) {
|
if (err) {
|
||||||
@ -1143,7 +1253,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
|
|||||||
ret = kvm_vgic_map_phys_irq(vcpu,
|
ret = kvm_vgic_map_phys_irq(vcpu,
|
||||||
map.direct_vtimer->host_timer_irq,
|
map.direct_vtimer->host_timer_irq,
|
||||||
map.direct_vtimer->irq.irq,
|
map.direct_vtimer->irq.irq,
|
||||||
kvm_arch_timer_get_input_level);
|
&arch_timer_irq_ops);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -1151,7 +1261,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
|
|||||||
ret = kvm_vgic_map_phys_irq(vcpu,
|
ret = kvm_vgic_map_phys_irq(vcpu,
|
||||||
map.direct_ptimer->host_timer_irq,
|
map.direct_ptimer->host_timer_irq,
|
||||||
map.direct_ptimer->irq.irq,
|
map.direct_ptimer->irq.irq,
|
||||||
kvm_arch_timer_get_input_level);
|
&arch_timer_irq_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
|||||||
r = 0;
|
r = 0;
|
||||||
kvm->arch.return_nisv_io_abort_to_user = true;
|
kvm->arch.return_nisv_io_abort_to_user = true;
|
||||||
break;
|
break;
|
||||||
|
case KVM_CAP_ARM_MTE:
|
||||||
|
if (!system_supports_mte() || kvm->created_vcpus)
|
||||||
|
return -EINVAL;
|
||||||
|
r = 0;
|
||||||
|
kvm->arch.mte_enabled = true;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
r = -EINVAL;
|
r = -EINVAL;
|
||||||
break;
|
break;
|
||||||
@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||||||
*/
|
*/
|
||||||
r = 1;
|
r = 1;
|
||||||
break;
|
break;
|
||||||
|
case KVM_CAP_ARM_MTE:
|
||||||
|
r = system_supports_mte();
|
||||||
|
break;
|
||||||
case KVM_CAP_STEAL_TIME:
|
case KVM_CAP_STEAL_TIME:
|
||||||
r = kvm_arm_pvtime_supported();
|
r = kvm_arm_pvtime_supported();
|
||||||
break;
|
break;
|
||||||
@ -689,6 +698,10 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
|
|||||||
vgic_v4_load(vcpu);
|
vgic_v4_load(vcpu);
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
|
||||||
|
kvm_pmu_handle_pmcr(vcpu,
|
||||||
|
__vcpu_sys_reg(vcpu, PMCR_EL0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1359,6 +1372,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
case KVM_ARM_MTE_COPY_TAGS: {
|
||||||
|
struct kvm_arm_copy_mte_tags copy_tags;
|
||||||
|
|
||||||
|
if (copy_from_user(©_tags, argp, sizeof(copy_tags)))
|
||||||
|
return -EFAULT;
|
||||||
|
return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags);
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -28,20 +28,40 @@
|
|||||||
|
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
|
||||||
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||||
VCPU_STAT("halt_successful_poll", halt_successful_poll),
|
KVM_GENERIC_VM_STATS()
|
||||||
VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
|
};
|
||||||
VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
|
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||||
VCPU_STAT("halt_wakeup", halt_wakeup),
|
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||||
VCPU_STAT("hvc_exit_stat", hvc_exit_stat),
|
|
||||||
VCPU_STAT("wfe_exit_stat", wfe_exit_stat),
|
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||||
VCPU_STAT("wfi_exit_stat", wfi_exit_stat),
|
.name_size = KVM_STATS_NAME_SIZE,
|
||||||
VCPU_STAT("mmio_exit_user", mmio_exit_user),
|
.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
|
||||||
VCPU_STAT("mmio_exit_kernel", mmio_exit_kernel),
|
.id_offset = sizeof(struct kvm_stats_header),
|
||||||
VCPU_STAT("exits", exits),
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||||
VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||||
VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
|
sizeof(kvm_vm_stats_desc),
|
||||||
{ NULL }
|
};
|
||||||
|
|
||||||
|
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
||||||
|
KVM_GENERIC_VCPU_STATS(),
|
||||||
|
STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
|
||||||
|
STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
|
||||||
|
STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
|
||||||
|
STATS_DESC_COUNTER(VCPU, mmio_exit_user),
|
||||||
|
STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
|
||||||
|
STATS_DESC_COUNTER(VCPU, exits)
|
||||||
|
};
|
||||||
|
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||||
|
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||||
|
|
||||||
|
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||||
|
.name_size = KVM_STATS_NAME_SIZE,
|
||||||
|
.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
|
||||||
|
.id_offset = sizeof(struct kvm_stats_header),
|
||||||
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||||
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||||
|
sizeof(kvm_vcpu_stats_desc),
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool core_reg_offset_is_vreg(u64 off)
|
static bool core_reg_offset_is_vreg(u64 off)
|
||||||
@ -995,3 +1015,89 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
|
|||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
|
||||||
|
struct kvm_arm_copy_mte_tags *copy_tags)
|
||||||
|
{
|
||||||
|
gpa_t guest_ipa = copy_tags->guest_ipa;
|
||||||
|
size_t length = copy_tags->length;
|
||||||
|
void __user *tags = copy_tags->addr;
|
||||||
|
gpa_t gfn;
|
||||||
|
bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!kvm_has_mte(kvm))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (copy_tags->reserved[0] || copy_tags->reserved[1])
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
gfn = gpa_to_gfn(guest_ipa);
|
||||||
|
|
||||||
|
mutex_lock(&kvm->slots_lock);
|
||||||
|
|
||||||
|
while (length > 0) {
|
||||||
|
kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
|
||||||
|
void *maddr;
|
||||||
|
unsigned long num_tags;
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
if (is_error_noslot_pfn(pfn)) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
page = pfn_to_online_page(pfn);
|
||||||
|
if (!page) {
|
||||||
|
/* Reject ZONE_DEVICE memory */
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
maddr = page_address(page);
|
||||||
|
|
||||||
|
if (!write) {
|
||||||
|
if (test_bit(PG_mte_tagged, &page->flags))
|
||||||
|
num_tags = mte_copy_tags_to_user(tags, maddr,
|
||||||
|
MTE_GRANULES_PER_PAGE);
|
||||||
|
else
|
||||||
|
/* No tags in memory, so write zeros */
|
||||||
|
num_tags = MTE_GRANULES_PER_PAGE -
|
||||||
|
clear_user(tags, MTE_GRANULES_PER_PAGE);
|
||||||
|
kvm_release_pfn_clean(pfn);
|
||||||
|
} else {
|
||||||
|
num_tags = mte_copy_tags_from_user(maddr, tags,
|
||||||
|
MTE_GRANULES_PER_PAGE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set the flag after checking the write
|
||||||
|
* completed fully
|
||||||
|
*/
|
||||||
|
if (num_tags == MTE_GRANULES_PER_PAGE)
|
||||||
|
set_bit(PG_mte_tagged, &page->flags);
|
||||||
|
|
||||||
|
kvm_release_pfn_dirty(pfn);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num_tags != MTE_GRANULES_PER_PAGE) {
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
gfn++;
|
||||||
|
tags += num_tags;
|
||||||
|
length -= PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
mutex_unlock(&kvm->slots_lock);
|
||||||
|
/* If some data has been copied report the number of bytes copied */
|
||||||
|
if (length != copy_tags->length)
|
||||||
|
return copy_tags->length - length;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#include <asm/kvm_arm.h>
|
#include <asm/kvm_arm.h>
|
||||||
#include <asm/kvm_asm.h>
|
#include <asm/kvm_asm.h>
|
||||||
#include <asm/kvm_mmu.h>
|
#include <asm/kvm_mmu.h>
|
||||||
|
#include <asm/kvm_mte.h>
|
||||||
#include <asm/kvm_ptrauth.h>
|
#include <asm/kvm_ptrauth.h>
|
||||||
|
|
||||||
.text
|
.text
|
||||||
@ -51,6 +52,9 @@ alternative_else_nop_endif
|
|||||||
|
|
||||||
add x29, x0, #VCPU_CONTEXT
|
add x29, x0, #VCPU_CONTEXT
|
||||||
|
|
||||||
|
// mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
|
||||||
|
mte_switch_to_guest x29, x1, x2
|
||||||
|
|
||||||
// Macro ptrauth_switch_to_guest format:
|
// Macro ptrauth_switch_to_guest format:
|
||||||
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
|
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
|
||||||
// The below macro to restore guest keys is not implemented in C code
|
// The below macro to restore guest keys is not implemented in C code
|
||||||
@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
|
|||||||
// when this feature is enabled for kernel code.
|
// when this feature is enabled for kernel code.
|
||||||
ptrauth_switch_to_hyp x1, x2, x3, x4, x5
|
ptrauth_switch_to_hyp x1, x2, x3, x4, x5
|
||||||
|
|
||||||
|
// mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
|
||||||
|
mte_switch_to_hyp x1, x2, x3
|
||||||
|
|
||||||
// Restore hyp's sp_el0
|
// Restore hyp's sp_el0
|
||||||
restore_sp_el0 x2, x3
|
restore_sp_el0 x2, x3
|
||||||
|
|
||||||
|
@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
|
|||||||
new |= (old & PSR_C_BIT);
|
new |= (old & PSR_C_BIT);
|
||||||
new |= (old & PSR_V_BIT);
|
new |= (old & PSR_V_BIT);
|
||||||
|
|
||||||
// TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
|
if (kvm_has_mte(vcpu->kvm))
|
||||||
|
new |= PSR_TCO_BIT;
|
||||||
|
|
||||||
new |= (old & PSR_DIT_BIT);
|
new |= (old & PSR_DIT_BIT);
|
||||||
|
|
||||||
|
@ -76,6 +76,7 @@ el1_trap:
|
|||||||
b __guest_exit
|
b __guest_exit
|
||||||
|
|
||||||
el1_irq:
|
el1_irq:
|
||||||
|
el1_fiq:
|
||||||
get_vcpu_ptr x1, x0
|
get_vcpu_ptr x1, x0
|
||||||
mov x0, #ARM_EXCEPTION_IRQ
|
mov x0, #ARM_EXCEPTION_IRQ
|
||||||
b __guest_exit
|
b __guest_exit
|
||||||
@ -131,7 +132,6 @@ SYM_CODE_END(\label)
|
|||||||
invalid_vector el2t_error_invalid
|
invalid_vector el2t_error_invalid
|
||||||
invalid_vector el2h_irq_invalid
|
invalid_vector el2h_irq_invalid
|
||||||
invalid_vector el2h_fiq_invalid
|
invalid_vector el2h_fiq_invalid
|
||||||
invalid_vector el1_fiq_invalid
|
|
||||||
|
|
||||||
.ltorg
|
.ltorg
|
||||||
|
|
||||||
@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector)
|
|||||||
|
|
||||||
valid_vect el1_sync // Synchronous 64-bit EL1
|
valid_vect el1_sync // Synchronous 64-bit EL1
|
||||||
valid_vect el1_irq // IRQ 64-bit EL1
|
valid_vect el1_irq // IRQ 64-bit EL1
|
||||||
invalid_vect el1_fiq_invalid // FIQ 64-bit EL1
|
valid_vect el1_fiq // FIQ 64-bit EL1
|
||||||
valid_vect el1_error // Error 64-bit EL1
|
valid_vect el1_error // Error 64-bit EL1
|
||||||
|
|
||||||
valid_vect el1_sync // Synchronous 32-bit EL1
|
valid_vect el1_sync // Synchronous 32-bit EL1
|
||||||
valid_vect el1_irq // IRQ 32-bit EL1
|
valid_vect el1_irq // IRQ 32-bit EL1
|
||||||
invalid_vect el1_fiq_invalid // FIQ 32-bit EL1
|
valid_vect el1_fiq // FIQ 32-bit EL1
|
||||||
valid_vect el1_error // Error 32-bit EL1
|
valid_vect el1_error // Error 32-bit EL1
|
||||||
SYM_CODE_END(__kvm_hyp_vector)
|
SYM_CODE_END(__kvm_hyp_vector)
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include <asm/kvm_asm.h>
|
#include <asm/kvm_asm.h>
|
||||||
#include <asm/kvm_emulate.h>
|
#include <asm/kvm_emulate.h>
|
||||||
#include <asm/kvm_hyp.h>
|
#include <asm/kvm_hyp.h>
|
||||||
|
#include <asm/kvm_mmu.h>
|
||||||
|
|
||||||
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
|
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
|
||||||
{
|
{
|
||||||
@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
|
|||||||
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
|
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
|
||||||
|
{
|
||||||
|
struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
|
||||||
|
|
||||||
|
if (!vcpu)
|
||||||
|
vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
|
||||||
|
|
||||||
|
return kvm_has_mte(kern_hyp_va(vcpu->kvm));
|
||||||
|
}
|
||||||
|
|
||||||
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
|
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
|
||||||
{
|
{
|
||||||
ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
|
ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
|
||||||
@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
|
|||||||
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
|
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
|
||||||
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
|
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
|
||||||
|
|
||||||
|
if (ctxt_has_mte(ctxt)) {
|
||||||
|
ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
|
||||||
|
ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
|
||||||
|
}
|
||||||
|
|
||||||
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
|
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
|
||||||
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
|
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
|
||||||
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
|
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
|
||||||
@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
|
|||||||
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
|
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
|
||||||
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
|
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
|
||||||
|
|
||||||
|
if (ctxt_has_mte(ctxt)) {
|
||||||
|
write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
|
||||||
|
write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
|
||||||
|
}
|
||||||
|
|
||||||
if (!has_vhe() &&
|
if (!has_vhe() &&
|
||||||
cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
|
cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
|
||||||
ctxt->__hyp_running_vcpu) {
|
ctxt->__hyp_running_vcpu) {
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#include <nvhe/memory.h>
|
#include <nvhe/memory.h>
|
||||||
#include <nvhe/spinlock.h>
|
#include <nvhe/spinlock.h>
|
||||||
|
|
||||||
#define HYP_NO_ORDER UINT_MAX
|
#define HYP_NO_ORDER USHRT_MAX
|
||||||
|
|
||||||
struct hyp_pool {
|
struct hyp_pool {
|
||||||
/*
|
/*
|
||||||
@ -19,48 +19,13 @@ struct hyp_pool {
|
|||||||
struct list_head free_area[MAX_ORDER];
|
struct list_head free_area[MAX_ORDER];
|
||||||
phys_addr_t range_start;
|
phys_addr_t range_start;
|
||||||
phys_addr_t range_end;
|
phys_addr_t range_end;
|
||||||
unsigned int max_order;
|
unsigned short max_order;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void hyp_page_ref_inc(struct hyp_page *p)
|
|
||||||
{
|
|
||||||
struct hyp_pool *pool = hyp_page_to_pool(p);
|
|
||||||
|
|
||||||
hyp_spin_lock(&pool->lock);
|
|
||||||
p->refcount++;
|
|
||||||
hyp_spin_unlock(&pool->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
|
|
||||||
{
|
|
||||||
struct hyp_pool *pool = hyp_page_to_pool(p);
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
hyp_spin_lock(&pool->lock);
|
|
||||||
p->refcount--;
|
|
||||||
ret = (p->refcount == 0);
|
|
||||||
hyp_spin_unlock(&pool->lock);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void hyp_set_page_refcounted(struct hyp_page *p)
|
|
||||||
{
|
|
||||||
struct hyp_pool *pool = hyp_page_to_pool(p);
|
|
||||||
|
|
||||||
hyp_spin_lock(&pool->lock);
|
|
||||||
if (p->refcount) {
|
|
||||||
hyp_spin_unlock(&pool->lock);
|
|
||||||
BUG();
|
|
||||||
}
|
|
||||||
p->refcount = 1;
|
|
||||||
hyp_spin_unlock(&pool->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Allocation */
|
/* Allocation */
|
||||||
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
|
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
|
||||||
void hyp_get_page(void *addr);
|
void hyp_get_page(struct hyp_pool *pool, void *addr);
|
||||||
void hyp_put_page(void *addr);
|
void hyp_put_page(struct hyp_pool *pool, void *addr);
|
||||||
|
|
||||||
/* Used pages cannot be freed */
|
/* Used pages cannot be freed */
|
||||||
int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
|
int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
|
||||||
|
@ -23,7 +23,7 @@ extern struct host_kvm host_kvm;
|
|||||||
int __pkvm_prot_finalize(void);
|
int __pkvm_prot_finalize(void);
|
||||||
int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
|
int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
|
||||||
|
|
||||||
int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
|
int kvm_host_prepare_stage2(void *pgt_pool_base);
|
||||||
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
|
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
|
||||||
|
|
||||||
static __always_inline void __load_host_stage2(void)
|
static __always_inline void __load_host_stage2(void)
|
||||||
|
@ -7,12 +7,9 @@
|
|||||||
|
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
|
||||||
struct hyp_pool;
|
|
||||||
struct hyp_page {
|
struct hyp_page {
|
||||||
unsigned int refcount;
|
unsigned short refcount;
|
||||||
unsigned int order;
|
unsigned short order;
|
||||||
struct hyp_pool *pool;
|
|
||||||
struct list_head node;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
extern u64 __hyp_vmemmap;
|
extern u64 __hyp_vmemmap;
|
||||||
|
@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void)
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long host_s2_mem_pgtable_pages(void)
|
static inline unsigned long host_s2_pgtable_pages(void)
|
||||||
{
|
{
|
||||||
|
unsigned long res;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Include an extra 16 pages to safely upper-bound the worst case of
|
* Include an extra 16 pages to safely upper-bound the worst case of
|
||||||
* concatenated pgds.
|
* concatenated pgds.
|
||||||
*/
|
*/
|
||||||
return __hyp_pgtable_total_pages() + 16;
|
res = __hyp_pgtable_total_pages() + 16;
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned long host_s2_dev_pgtable_pages(void)
|
|
||||||
{
|
|
||||||
/* Allow 1 GiB for MMIO mappings */
|
/* Allow 1 GiB for MMIO mappings */
|
||||||
return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
|
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
|
||||||
|
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __KVM_HYP_MM_H */
|
#endif /* __KVM_HYP_MM_H */
|
||||||
|
@ -23,8 +23,7 @@
|
|||||||
extern unsigned long hyp_nr_cpus;
|
extern unsigned long hyp_nr_cpus;
|
||||||
struct host_kvm host_kvm;
|
struct host_kvm host_kvm;
|
||||||
|
|
||||||
static struct hyp_pool host_s2_mem;
|
static struct hyp_pool host_s2_pool;
|
||||||
static struct hyp_pool host_s2_dev;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copies of the host's CPU features registers holding sanitized values.
|
* Copies of the host's CPU features registers holding sanitized values.
|
||||||
@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1;
|
|||||||
|
|
||||||
static void *host_s2_zalloc_pages_exact(size_t size)
|
static void *host_s2_zalloc_pages_exact(size_t size)
|
||||||
{
|
{
|
||||||
return hyp_alloc_pages(&host_s2_mem, get_order(size));
|
return hyp_alloc_pages(&host_s2_pool, get_order(size));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *host_s2_zalloc_page(void *pool)
|
static void *host_s2_zalloc_page(void *pool)
|
||||||
@ -44,20 +43,24 @@ static void *host_s2_zalloc_page(void *pool)
|
|||||||
return hyp_alloc_pages(pool, 0);
|
return hyp_alloc_pages(pool, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
|
static void host_s2_get_page(void *addr)
|
||||||
|
{
|
||||||
|
hyp_get_page(&host_s2_pool, addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void host_s2_put_page(void *addr)
|
||||||
|
{
|
||||||
|
hyp_put_page(&host_s2_pool, addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int prepare_s2_pool(void *pgt_pool_base)
|
||||||
{
|
{
|
||||||
unsigned long nr_pages, pfn;
|
unsigned long nr_pages, pfn;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
pfn = hyp_virt_to_pfn(mem_pgt_pool);
|
pfn = hyp_virt_to_pfn(pgt_pool_base);
|
||||||
nr_pages = host_s2_mem_pgtable_pages();
|
nr_pages = host_s2_pgtable_pages();
|
||||||
ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0);
|
ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
pfn = hyp_virt_to_pfn(dev_pgt_pool);
|
|
||||||
nr_pages = host_s2_dev_pgtable_pages();
|
|
||||||
ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -67,8 +70,8 @@ static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
|
|||||||
.phys_to_virt = hyp_phys_to_virt,
|
.phys_to_virt = hyp_phys_to_virt,
|
||||||
.virt_to_phys = hyp_virt_to_phys,
|
.virt_to_phys = hyp_virt_to_phys,
|
||||||
.page_count = hyp_page_count,
|
.page_count = hyp_page_count,
|
||||||
.get_page = hyp_get_page,
|
.get_page = host_s2_get_page,
|
||||||
.put_page = hyp_put_page,
|
.put_page = host_s2_put_page,
|
||||||
};
|
};
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -86,7 +89,7 @@ static void prepare_host_vtcr(void)
|
|||||||
id_aa64mmfr1_el1_sys_val, phys_shift);
|
id_aa64mmfr1_el1_sys_val, phys_shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
|
int kvm_host_prepare_stage2(void *pgt_pool_base)
|
||||||
{
|
{
|
||||||
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
|
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
|
||||||
int ret;
|
int ret;
|
||||||
@ -94,7 +97,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
|
|||||||
prepare_host_vtcr();
|
prepare_host_vtcr();
|
||||||
hyp_spin_lock_init(&host_kvm.lock);
|
hyp_spin_lock_init(&host_kvm.lock);
|
||||||
|
|
||||||
ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool);
|
ret = prepare_s2_pool(pgt_pool_base);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -199,11 +202,10 @@ static bool range_is_memory(u64 start, u64 end)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline int __host_stage2_idmap(u64 start, u64 end,
|
static inline int __host_stage2_idmap(u64 start, u64 end,
|
||||||
enum kvm_pgtable_prot prot,
|
enum kvm_pgtable_prot prot)
|
||||||
struct hyp_pool *pool)
|
|
||||||
{
|
{
|
||||||
return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
|
return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
|
||||||
prot, pool);
|
prot, &host_s2_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int host_stage2_idmap(u64 addr)
|
static int host_stage2_idmap(u64 addr)
|
||||||
@ -211,7 +213,6 @@ static int host_stage2_idmap(u64 addr)
|
|||||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
|
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
|
||||||
struct kvm_mem_range range;
|
struct kvm_mem_range range;
|
||||||
bool is_memory = find_mem_range(addr, &range);
|
bool is_memory = find_mem_range(addr, &range);
|
||||||
struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (is_memory)
|
if (is_memory)
|
||||||
@ -222,22 +223,21 @@ static int host_stage2_idmap(u64 addr)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
ret = __host_stage2_idmap(range.start, range.end, prot, pool);
|
ret = __host_stage2_idmap(range.start, range.end, prot);
|
||||||
if (is_memory || ret != -ENOMEM)
|
if (ret != -ENOMEM)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* host_s2_mem has been provided with enough pages to cover all of
|
* The pool has been provided with enough pages to cover all of memory
|
||||||
* memory with page granularity, so we should never hit the ENOMEM case.
|
* with page granularity, but it is difficult to know how much of the
|
||||||
* However, it is difficult to know how much of the MMIO range we will
|
* MMIO range we will need to cover upfront, so we may need to 'recycle'
|
||||||
* need to cover upfront, so we may need to 'recycle' the pages if we
|
* the pages if we run out.
|
||||||
* run out.
|
|
||||||
*/
|
*/
|
||||||
ret = host_stage2_unmap_dev_all();
|
ret = host_stage2_unmap_dev_all();
|
||||||
if (ret)
|
if (ret)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
ret = __host_stage2_idmap(range.start, range.end, prot, pool);
|
ret = __host_stage2_idmap(range.start, range.end, prot);
|
||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
hyp_spin_unlock(&host_kvm.lock);
|
hyp_spin_unlock(&host_kvm.lock);
|
||||||
@ -258,7 +258,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
|
|||||||
|
|
||||||
hyp_spin_lock(&host_kvm.lock);
|
hyp_spin_lock(&host_kvm.lock);
|
||||||
ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
|
ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
|
||||||
&host_s2_mem, pkvm_hyp_id);
|
&host_s2_pool, pkvm_hyp_id);
|
||||||
hyp_spin_unlock(&host_kvm.lock);
|
hyp_spin_unlock(&host_kvm.lock);
|
||||||
|
|
||||||
return ret != -EAGAIN ? ret : 0;
|
return ret != -EAGAIN ? ret : 0;
|
||||||
|
@ -32,7 +32,7 @@ u64 __hyp_vmemmap;
|
|||||||
*/
|
*/
|
||||||
static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
|
static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
|
||||||
struct hyp_page *p,
|
struct hyp_page *p,
|
||||||
unsigned int order)
|
unsigned short order)
|
||||||
{
|
{
|
||||||
phys_addr_t addr = hyp_page_to_phys(p);
|
phys_addr_t addr = hyp_page_to_phys(p);
|
||||||
|
|
||||||
@ -51,21 +51,49 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
|
|||||||
/* Find a buddy page currently available for allocation */
|
/* Find a buddy page currently available for allocation */
|
||||||
static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
|
static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
|
||||||
struct hyp_page *p,
|
struct hyp_page *p,
|
||||||
unsigned int order)
|
unsigned short order)
|
||||||
{
|
{
|
||||||
struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
|
struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
|
||||||
|
|
||||||
if (!buddy || buddy->order != order || list_empty(&buddy->node))
|
if (!buddy || buddy->order != order || buddy->refcount)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
return buddy;
|
return buddy;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pages that are available for allocation are tracked in free-lists, so we use
|
||||||
|
* the pages themselves to store the list nodes to avoid wasting space. As the
|
||||||
|
* allocator always returns zeroed pages (which are zeroed on the hyp_put_page()
|
||||||
|
* path to optimize allocation speed), we also need to clean-up the list node in
|
||||||
|
* each page when we take it out of the list.
|
||||||
|
*/
|
||||||
|
static inline void page_remove_from_list(struct hyp_page *p)
|
||||||
|
{
|
||||||
|
struct list_head *node = hyp_page_to_virt(p);
|
||||||
|
|
||||||
|
__list_del_entry(node);
|
||||||
|
memset(node, 0, sizeof(*node));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void page_add_to_list(struct hyp_page *p, struct list_head *head)
|
||||||
|
{
|
||||||
|
struct list_head *node = hyp_page_to_virt(p);
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(node);
|
||||||
|
list_add_tail(node, head);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct hyp_page *node_to_page(struct list_head *node)
|
||||||
|
{
|
||||||
|
return hyp_virt_to_page(node);
|
||||||
|
}
|
||||||
|
|
||||||
static void __hyp_attach_page(struct hyp_pool *pool,
|
static void __hyp_attach_page(struct hyp_pool *pool,
|
||||||
struct hyp_page *p)
|
struct hyp_page *p)
|
||||||
{
|
{
|
||||||
unsigned int order = p->order;
|
unsigned short order = p->order;
|
||||||
struct hyp_page *buddy;
|
struct hyp_page *buddy;
|
||||||
|
|
||||||
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
|
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
|
||||||
@ -83,32 +111,23 @@ static void __hyp_attach_page(struct hyp_pool *pool,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
/* Take the buddy out of its list, and coallesce with @p */
|
/* Take the buddy out of its list, and coallesce with @p */
|
||||||
list_del_init(&buddy->node);
|
page_remove_from_list(buddy);
|
||||||
buddy->order = HYP_NO_ORDER;
|
buddy->order = HYP_NO_ORDER;
|
||||||
p = min(p, buddy);
|
p = min(p, buddy);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Mark the new head, and insert it */
|
/* Mark the new head, and insert it */
|
||||||
p->order = order;
|
p->order = order;
|
||||||
list_add_tail(&p->node, &pool->free_area[order]);
|
page_add_to_list(p, &pool->free_area[order]);
|
||||||
}
|
|
||||||
|
|
||||||
static void hyp_attach_page(struct hyp_page *p)
|
|
||||||
{
|
|
||||||
struct hyp_pool *pool = hyp_page_to_pool(p);
|
|
||||||
|
|
||||||
hyp_spin_lock(&pool->lock);
|
|
||||||
__hyp_attach_page(pool, p);
|
|
||||||
hyp_spin_unlock(&pool->lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
|
static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
|
||||||
struct hyp_page *p,
|
struct hyp_page *p,
|
||||||
unsigned int order)
|
unsigned short order)
|
||||||
{
|
{
|
||||||
struct hyp_page *buddy;
|
struct hyp_page *buddy;
|
||||||
|
|
||||||
list_del_init(&p->node);
|
page_remove_from_list(p);
|
||||||
while (p->order > order) {
|
while (p->order > order) {
|
||||||
/*
|
/*
|
||||||
* The buddy of order n - 1 currently has HYP_NO_ORDER as it
|
* The buddy of order n - 1 currently has HYP_NO_ORDER as it
|
||||||
@ -119,30 +138,64 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
|
|||||||
p->order--;
|
p->order--;
|
||||||
buddy = __find_buddy_nocheck(pool, p, p->order);
|
buddy = __find_buddy_nocheck(pool, p, p->order);
|
||||||
buddy->order = p->order;
|
buddy->order = p->order;
|
||||||
list_add_tail(&buddy->node, &pool->free_area[buddy->order]);
|
page_add_to_list(buddy, &pool->free_area[buddy->order]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
void hyp_put_page(void *addr)
|
static inline void hyp_page_ref_inc(struct hyp_page *p)
|
||||||
{
|
{
|
||||||
struct hyp_page *p = hyp_virt_to_page(addr);
|
BUG_ON(p->refcount == USHRT_MAX);
|
||||||
|
p->refcount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
|
||||||
|
{
|
||||||
|
p->refcount--;
|
||||||
|
return (p->refcount == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void hyp_set_page_refcounted(struct hyp_page *p)
|
||||||
|
{
|
||||||
|
BUG_ON(p->refcount);
|
||||||
|
p->refcount = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
|
||||||
|
{
|
||||||
if (hyp_page_ref_dec_and_test(p))
|
if (hyp_page_ref_dec_and_test(p))
|
||||||
hyp_attach_page(p);
|
__hyp_attach_page(pool, p);
|
||||||
}
|
}
|
||||||
|
|
||||||
void hyp_get_page(void *addr)
|
/*
|
||||||
|
* Changes to the buddy tree and page refcounts must be done with the hyp_pool
|
||||||
|
* lock held. If a refcount change requires an update to the buddy tree (e.g.
|
||||||
|
* hyp_put_page()), both operations must be done within the same critical
|
||||||
|
* section to guarantee transient states (e.g. a page with null refcount but
|
||||||
|
* not yet attached to a free list) can't be observed by well-behaved readers.
|
||||||
|
*/
|
||||||
|
void hyp_put_page(struct hyp_pool *pool, void *addr)
|
||||||
{
|
{
|
||||||
struct hyp_page *p = hyp_virt_to_page(addr);
|
struct hyp_page *p = hyp_virt_to_page(addr);
|
||||||
|
|
||||||
hyp_page_ref_inc(p);
|
hyp_spin_lock(&pool->lock);
|
||||||
|
__hyp_put_page(pool, p);
|
||||||
|
hyp_spin_unlock(&pool->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
|
void hyp_get_page(struct hyp_pool *pool, void *addr)
|
||||||
{
|
{
|
||||||
unsigned int i = order;
|
struct hyp_page *p = hyp_virt_to_page(addr);
|
||||||
|
|
||||||
|
hyp_spin_lock(&pool->lock);
|
||||||
|
hyp_page_ref_inc(p);
|
||||||
|
hyp_spin_unlock(&pool->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
|
||||||
|
{
|
||||||
|
unsigned short i = order;
|
||||||
struct hyp_page *p;
|
struct hyp_page *p;
|
||||||
|
|
||||||
hyp_spin_lock(&pool->lock);
|
hyp_spin_lock(&pool->lock);
|
||||||
@ -156,11 +209,11 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Extract it from the tree at the right order */
|
/* Extract it from the tree at the right order */
|
||||||
p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
|
p = node_to_page(pool->free_area[i].next);
|
||||||
p = __hyp_extract_page(pool, p, order);
|
p = __hyp_extract_page(pool, p, order);
|
||||||
|
|
||||||
hyp_spin_unlock(&pool->lock);
|
|
||||||
hyp_set_page_refcounted(p);
|
hyp_set_page_refcounted(p);
|
||||||
|
hyp_spin_unlock(&pool->lock);
|
||||||
|
|
||||||
return hyp_page_to_virt(p);
|
return hyp_page_to_virt(p);
|
||||||
}
|
}
|
||||||
@ -181,15 +234,14 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
|
|||||||
|
|
||||||
/* Init the vmemmap portion */
|
/* Init the vmemmap portion */
|
||||||
p = hyp_phys_to_page(phys);
|
p = hyp_phys_to_page(phys);
|
||||||
memset(p, 0, sizeof(*p) * nr_pages);
|
|
||||||
for (i = 0; i < nr_pages; i++) {
|
for (i = 0; i < nr_pages; i++) {
|
||||||
p[i].pool = pool;
|
p[i].order = 0;
|
||||||
INIT_LIST_HEAD(&p[i].node);
|
hyp_set_page_refcounted(&p[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Attach the unused pages to the buddy tree */
|
/* Attach the unused pages to the buddy tree */
|
||||||
for (i = reserved_pages; i < nr_pages; i++)
|
for (i = reserved_pages; i < nr_pages; i++)
|
||||||
__hyp_attach_page(pool, &p[i]);
|
__hyp_put_page(pool, &p[i]);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus;
|
|||||||
|
|
||||||
static void *vmemmap_base;
|
static void *vmemmap_base;
|
||||||
static void *hyp_pgt_base;
|
static void *hyp_pgt_base;
|
||||||
static void *host_s2_mem_pgt_base;
|
static void *host_s2_pgt_base;
|
||||||
static void *host_s2_dev_pgt_base;
|
|
||||||
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
|
static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
|
||||||
|
|
||||||
static int divide_memory_pool(void *virt, unsigned long size)
|
static int divide_memory_pool(void *virt, unsigned long size)
|
||||||
@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size)
|
|||||||
if (!hyp_pgt_base)
|
if (!hyp_pgt_base)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
nr_pages = host_s2_mem_pgtable_pages();
|
nr_pages = host_s2_pgtable_pages();
|
||||||
host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages);
|
host_s2_pgt_base = hyp_early_alloc_contig(nr_pages);
|
||||||
if (!host_s2_mem_pgt_base)
|
if (!host_s2_pgt_base)
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
nr_pages = host_s2_dev_pgtable_pages();
|
|
||||||
host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages);
|
|
||||||
if (!host_s2_dev_pgt_base)
|
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -144,6 +138,16 @@ static void *hyp_zalloc_hyp_page(void *arg)
|
|||||||
return hyp_alloc_pages(&hpool, 0);
|
return hyp_alloc_pages(&hpool, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void hpool_get_page(void *addr)
|
||||||
|
{
|
||||||
|
hyp_get_page(&hpool, addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void hpool_put_page(void *addr)
|
||||||
|
{
|
||||||
|
hyp_put_page(&hpool, addr);
|
||||||
|
}
|
||||||
|
|
||||||
void __noreturn __pkvm_init_finalise(void)
|
void __noreturn __pkvm_init_finalise(void)
|
||||||
{
|
{
|
||||||
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
|
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
|
||||||
@ -159,7 +163,7 @@ void __noreturn __pkvm_init_finalise(void)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base);
|
ret = kvm_host_prepare_stage2(host_s2_pgt_base);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
@ -167,8 +171,8 @@ void __noreturn __pkvm_init_finalise(void)
|
|||||||
.zalloc_page = hyp_zalloc_hyp_page,
|
.zalloc_page = hyp_zalloc_hyp_page,
|
||||||
.phys_to_virt = hyp_phys_to_virt,
|
.phys_to_virt = hyp_phys_to_virt,
|
||||||
.virt_to_phys = hyp_virt_to_phys,
|
.virt_to_phys = hyp_virt_to_phys,
|
||||||
.get_page = hyp_get_page,
|
.get_page = hpool_get_page,
|
||||||
.put_page = hyp_put_page,
|
.put_page = hpool_put_page,
|
||||||
};
|
};
|
||||||
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
|
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
|
||||||
|
|
||||||
|
@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
|
|||||||
mm_ops->put_page(ptep);
|
mm_ops->put_page(ptep);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
|
||||||
|
{
|
||||||
|
u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
|
||||||
|
return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool stage2_pte_executable(kvm_pte_t pte)
|
||||||
|
{
|
||||||
|
return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
|
||||||
|
}
|
||||||
|
|
||||||
static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
||||||
kvm_pte_t *ptep,
|
kvm_pte_t *ptep,
|
||||||
struct stage2_map_data *data)
|
struct stage2_map_data *data)
|
||||||
{
|
{
|
||||||
kvm_pte_t new, old = *ptep;
|
kvm_pte_t new, old = *ptep;
|
||||||
u64 granule = kvm_granule_size(level), phys = data->phys;
|
u64 granule = kvm_granule_size(level), phys = data->phys;
|
||||||
|
struct kvm_pgtable *pgt = data->mmu->pgt;
|
||||||
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
|
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
|
||||||
|
|
||||||
if (!kvm_block_mapping_supported(addr, end, phys, level))
|
if (!kvm_block_mapping_supported(addr, end, phys, level))
|
||||||
@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
|||||||
stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
|
stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Perform CMOs before installation of the guest stage-2 PTE */
|
||||||
|
if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
|
||||||
|
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
|
||||||
|
granule);
|
||||||
|
|
||||||
|
if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
|
||||||
|
mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
|
||||||
|
|
||||||
smp_store_release(ptep, new);
|
smp_store_release(ptep, new);
|
||||||
if (stage2_pte_is_counted(new))
|
if (stage2_pte_is_counted(new))
|
||||||
mm_ops->get_page(ptep);
|
mm_ops->get_page(ptep);
|
||||||
@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
|
|
||||||
{
|
|
||||||
u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
|
|
||||||
return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||||
enum kvm_pgtable_walk_flags flag,
|
enum kvm_pgtable_walk_flags flag,
|
||||||
void * const arg)
|
void * const arg)
|
||||||
@ -864,10 +878,11 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct stage2_attr_data {
|
struct stage2_attr_data {
|
||||||
kvm_pte_t attr_set;
|
kvm_pte_t attr_set;
|
||||||
kvm_pte_t attr_clr;
|
kvm_pte_t attr_clr;
|
||||||
kvm_pte_t pte;
|
kvm_pte_t pte;
|
||||||
u32 level;
|
u32 level;
|
||||||
|
struct kvm_pgtable_mm_ops *mm_ops;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||||
@ -876,6 +891,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|||||||
{
|
{
|
||||||
kvm_pte_t pte = *ptep;
|
kvm_pte_t pte = *ptep;
|
||||||
struct stage2_attr_data *data = arg;
|
struct stage2_attr_data *data = arg;
|
||||||
|
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
|
||||||
|
|
||||||
if (!kvm_pte_valid(pte))
|
if (!kvm_pte_valid(pte))
|
||||||
return 0;
|
return 0;
|
||||||
@ -890,8 +906,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|||||||
* but worst-case the access flag update gets lost and will be
|
* but worst-case the access flag update gets lost and will be
|
||||||
* set on the next access instead.
|
* set on the next access instead.
|
||||||
*/
|
*/
|
||||||
if (data->pte != pte)
|
if (data->pte != pte) {
|
||||||
|
/*
|
||||||
|
* Invalidate instruction cache before updating the guest
|
||||||
|
* stage-2 PTE if we are going to add executable permission.
|
||||||
|
*/
|
||||||
|
if (mm_ops->icache_inval_pou &&
|
||||||
|
stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
|
||||||
|
mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
|
||||||
|
kvm_granule_size(level));
|
||||||
WRITE_ONCE(*ptep, pte);
|
WRITE_ONCE(*ptep, pte);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -906,6 +931,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
|
|||||||
struct stage2_attr_data data = {
|
struct stage2_attr_data data = {
|
||||||
.attr_set = attr_set & attr_mask,
|
.attr_set = attr_set & attr_mask,
|
||||||
.attr_clr = attr_clr & attr_mask,
|
.attr_clr = attr_clr & attr_mask,
|
||||||
|
.mm_ops = pgt->mm_ops,
|
||||||
};
|
};
|
||||||
struct kvm_pgtable_walker walker = {
|
struct kvm_pgtable_walker walker = {
|
||||||
.cb = stage2_attr_walker,
|
.cb = stage2_attr_walker,
|
||||||
|
@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
hyp_mem_pages += hyp_s1_pgtable_pages();
|
hyp_mem_pages += hyp_s1_pgtable_pages();
|
||||||
hyp_mem_pages += host_s2_mem_pgtable_pages();
|
hyp_mem_pages += host_s2_pgtable_pages();
|
||||||
hyp_mem_pages += host_s2_dev_pgtable_pages();
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The hyp_vmemmap needs to be backed by pages, but these pages
|
* The hyp_vmemmap needs to be backed by pages, but these pages
|
||||||
|
@ -126,6 +126,16 @@ static void *kvm_host_va(phys_addr_t phys)
|
|||||||
return __va(phys);
|
return __va(phys);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void clean_dcache_guest_page(void *va, size_t size)
|
||||||
|
{
|
||||||
|
__clean_dcache_guest_page(va, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void invalidate_icache_guest_page(void *va, size_t size)
|
||||||
|
{
|
||||||
|
__invalidate_icache_guest_page(va, size);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unmapping vs dcache management:
|
* Unmapping vs dcache management:
|
||||||
*
|
*
|
||||||
@ -432,6 +442,8 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
|
|||||||
.page_count = kvm_host_page_count,
|
.page_count = kvm_host_page_count,
|
||||||
.phys_to_virt = kvm_host_va,
|
.phys_to_virt = kvm_host_va,
|
||||||
.virt_to_phys = kvm_host_pa,
|
.virt_to_phys = kvm_host_pa,
|
||||||
|
.dcache_clean_inval_poc = clean_dcache_guest_page,
|
||||||
|
.icache_inval_pou = invalidate_icache_guest_page,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -693,16 +705,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
|||||||
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
|
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
|
|
||||||
{
|
|
||||||
__clean_dcache_guest_page(pfn, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
|
|
||||||
{
|
|
||||||
__invalidate_icache_guest_page(pfn, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
|
static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
|
||||||
{
|
{
|
||||||
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
|
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
|
||||||
@ -822,6 +824,74 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
|
|||||||
return PAGE_SIZE;
|
return PAGE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
|
||||||
|
{
|
||||||
|
unsigned long pa;
|
||||||
|
|
||||||
|
if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
|
||||||
|
return huge_page_shift(hstate_vma(vma));
|
||||||
|
|
||||||
|
if (!(vma->vm_flags & VM_PFNMAP))
|
||||||
|
return PAGE_SHIFT;
|
||||||
|
|
||||||
|
VM_BUG_ON(is_vm_hugetlb_page(vma));
|
||||||
|
|
||||||
|
pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
|
||||||
|
|
||||||
|
#ifndef __PAGETABLE_PMD_FOLDED
|
||||||
|
if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
|
||||||
|
ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
|
||||||
|
ALIGN(hva, PUD_SIZE) <= vma->vm_end)
|
||||||
|
return PUD_SHIFT;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
|
||||||
|
ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
|
||||||
|
ALIGN(hva, PMD_SIZE) <= vma->vm_end)
|
||||||
|
return PMD_SHIFT;
|
||||||
|
|
||||||
|
return PAGE_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
|
||||||
|
* able to see the page's tags and therefore they must be initialised first. If
|
||||||
|
* PG_mte_tagged is set, tags have already been initialised.
|
||||||
|
*
|
||||||
|
* The race in the test/set of the PG_mte_tagged flag is handled by:
|
||||||
|
* - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
|
||||||
|
* racing to santise the same page
|
||||||
|
* - mmap_lock protects between a VM faulting a page in and the VMM performing
|
||||||
|
* an mprotect() to add VM_MTE
|
||||||
|
*/
|
||||||
|
static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
|
||||||
|
unsigned long size)
|
||||||
|
{
|
||||||
|
unsigned long i, nr_pages = size >> PAGE_SHIFT;
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
if (!kvm_has_mte(kvm))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pfn_to_online_page() is used to reject ZONE_DEVICE pages
|
||||||
|
* that may not support tags.
|
||||||
|
*/
|
||||||
|
page = pfn_to_online_page(pfn);
|
||||||
|
|
||||||
|
if (!page)
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
for (i = 0; i < nr_pages; i++, page++) {
|
||||||
|
if (!test_bit(PG_mte_tagged, &page->flags)) {
|
||||||
|
mte_clear_page_tags(page_address(page));
|
||||||
|
set_bit(PG_mte_tagged, &page->flags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||||
struct kvm_memory_slot *memslot, unsigned long hva,
|
struct kvm_memory_slot *memslot, unsigned long hva,
|
||||||
unsigned long fault_status)
|
unsigned long fault_status)
|
||||||
@ -830,6 +900,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||||||
bool write_fault, writable, force_pte = false;
|
bool write_fault, writable, force_pte = false;
|
||||||
bool exec_fault;
|
bool exec_fault;
|
||||||
bool device = false;
|
bool device = false;
|
||||||
|
bool shared;
|
||||||
unsigned long mmu_seq;
|
unsigned long mmu_seq;
|
||||||
struct kvm *kvm = vcpu->kvm;
|
struct kvm *kvm = vcpu->kvm;
|
||||||
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
|
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
|
||||||
@ -853,7 +924,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Let's check if we will get back a huge page backed by hugetlbfs */
|
/*
|
||||||
|
* Let's check if we will get back a huge page backed by hugetlbfs, or
|
||||||
|
* get block mapping for device MMIO region.
|
||||||
|
*/
|
||||||
mmap_read_lock(current->mm);
|
mmap_read_lock(current->mm);
|
||||||
vma = find_vma_intersection(current->mm, hva, hva + 1);
|
vma = find_vma_intersection(current->mm, hva, hva + 1);
|
||||||
if (unlikely(!vma)) {
|
if (unlikely(!vma)) {
|
||||||
@ -862,17 +936,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_vm_hugetlb_page(vma))
|
/*
|
||||||
vma_shift = huge_page_shift(hstate_vma(vma));
|
* logging_active is guaranteed to never be true for VM_PFNMAP
|
||||||
else
|
* memslots.
|
||||||
vma_shift = PAGE_SHIFT;
|
*/
|
||||||
|
if (logging_active) {
|
||||||
if (logging_active ||
|
|
||||||
(vma->vm_flags & VM_PFNMAP)) {
|
|
||||||
force_pte = true;
|
force_pte = true;
|
||||||
vma_shift = PAGE_SHIFT;
|
vma_shift = PAGE_SHIFT;
|
||||||
|
} else {
|
||||||
|
vma_shift = get_vma_page_shift(vma, hva);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
shared = (vma->vm_flags & VM_PFNMAP);
|
||||||
|
|
||||||
switch (vma_shift) {
|
switch (vma_shift) {
|
||||||
#ifndef __PAGETABLE_PMD_FOLDED
|
#ifndef __PAGETABLE_PMD_FOLDED
|
||||||
case PUD_SHIFT:
|
case PUD_SHIFT:
|
||||||
@ -943,8 +1019,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
if (kvm_is_device_pfn(pfn)) {
|
if (kvm_is_device_pfn(pfn)) {
|
||||||
|
/*
|
||||||
|
* If the page was identified as device early by looking at
|
||||||
|
* the VMA flags, vma_pagesize is already representing the
|
||||||
|
* largest quantity we can map. If instead it was mapped
|
||||||
|
* via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
|
||||||
|
* and must not be upgraded.
|
||||||
|
*
|
||||||
|
* In both cases, we don't let transparent_hugepage_adjust()
|
||||||
|
* change things at the last minute.
|
||||||
|
*/
|
||||||
device = true;
|
device = true;
|
||||||
force_pte = true;
|
|
||||||
} else if (logging_active && !write_fault) {
|
} else if (logging_active && !write_fault) {
|
||||||
/*
|
/*
|
||||||
* Only actually map the page as writable if this was a write
|
* Only actually map the page as writable if this was a write
|
||||||
@ -965,19 +1050,25 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||||||
* If we are not forced to use page mapping, check if we are
|
* If we are not forced to use page mapping, check if we are
|
||||||
* backed by a THP and thus use block mapping if possible.
|
* backed by a THP and thus use block mapping if possible.
|
||||||
*/
|
*/
|
||||||
if (vma_pagesize == PAGE_SIZE && !force_pte)
|
if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
|
||||||
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
|
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
|
||||||
&pfn, &fault_ipa);
|
&pfn, &fault_ipa);
|
||||||
|
|
||||||
|
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||||
|
/* Check the VMM hasn't introduced a new VM_SHARED VMA */
|
||||||
|
if (!shared)
|
||||||
|
ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
|
||||||
|
else
|
||||||
|
ret = -EFAULT;
|
||||||
|
if (ret)
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
if (writable)
|
if (writable)
|
||||||
prot |= KVM_PGTABLE_PROT_W;
|
prot |= KVM_PGTABLE_PROT_W;
|
||||||
|
|
||||||
if (fault_status != FSC_PERM && !device)
|
if (exec_fault)
|
||||||
clean_dcache_guest_page(pfn, vma_pagesize);
|
|
||||||
|
|
||||||
if (exec_fault) {
|
|
||||||
prot |= KVM_PGTABLE_PROT_X;
|
prot |= KVM_PGTABLE_PROT_X;
|
||||||
invalidate_icache_guest_page(pfn, vma_pagesize);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (device)
|
if (device)
|
||||||
prot |= KVM_PGTABLE_PROT_DEVICE;
|
prot |= KVM_PGTABLE_PROT_DEVICE;
|
||||||
@ -1168,19 +1259,22 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||||
{
|
{
|
||||||
kvm_pfn_t pfn = pte_pfn(range->pte);
|
kvm_pfn_t pfn = pte_pfn(range->pte);
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (!kvm->arch.mmu.pgt)
|
if (!kvm->arch.mmu.pgt)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
WARN_ON(range->end - range->start != 1);
|
WARN_ON(range->end - range->start != 1);
|
||||||
|
|
||||||
/*
|
ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
|
||||||
* We've moved a page around, probably through CoW, so let's treat it
|
if (ret)
|
||||||
* just like a translation fault and clean the cache to the PoC.
|
return false;
|
||||||
*/
|
|
||||||
clean_dcache_guest_page(pfn, PAGE_SIZE);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* We've moved a page around, probably through CoW, so let's treat
|
||||||
|
* it just like a translation fault and the map handler will clean
|
||||||
|
* the cache to the PoC.
|
||||||
|
*
|
||||||
* The MMU notifiers will have unmapped a huge PMD before calling
|
* The MMU notifiers will have unmapped a huge PMD before calling
|
||||||
* ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
|
* ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
|
||||||
* therefore we never need to clear out a huge PMD through this
|
* therefore we never need to clear out a huge PMD through this
|
||||||
@ -1346,7 +1440,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||||||
{
|
{
|
||||||
hva_t hva = mem->userspace_addr;
|
hva_t hva = mem->userspace_addr;
|
||||||
hva_t reg_end = hva + mem->memory_size;
|
hva_t reg_end = hva + mem->memory_size;
|
||||||
bool writable = !(mem->flags & KVM_MEM_READONLY);
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
|
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
|
||||||
@ -1363,8 +1456,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||||||
mmap_read_lock(current->mm);
|
mmap_read_lock(current->mm);
|
||||||
/*
|
/*
|
||||||
* A memory region could potentially cover multiple VMAs, and any holes
|
* A memory region could potentially cover multiple VMAs, and any holes
|
||||||
* between them, so iterate over all of them to find out if we can map
|
* between them, so iterate over all of them.
|
||||||
* any of them right now.
|
|
||||||
*
|
*
|
||||||
* +--------------------------------------------+
|
* +--------------------------------------------+
|
||||||
* +---------------+----------------+ +----------------+
|
* +---------------+----------------+ +----------------+
|
||||||
@ -1375,51 +1467,29 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||||||
*/
|
*/
|
||||||
do {
|
do {
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
hva_t vm_start, vm_end;
|
|
||||||
|
|
||||||
vma = find_vma_intersection(current->mm, hva, reg_end);
|
vma = find_vma_intersection(current->mm, hva, reg_end);
|
||||||
if (!vma)
|
if (!vma)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Take the intersection of this VMA with the memory region
|
* VM_SHARED mappings are not allowed with MTE to avoid races
|
||||||
|
* when updating the PG_mte_tagged page flag, see
|
||||||
|
* sanitise_mte_tags for more details.
|
||||||
*/
|
*/
|
||||||
vm_start = max(hva, vma->vm_start);
|
if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
|
||||||
vm_end = min(reg_end, vma->vm_end);
|
return -EINVAL;
|
||||||
|
|
||||||
if (vma->vm_flags & VM_PFNMAP) {
|
if (vma->vm_flags & VM_PFNMAP) {
|
||||||
gpa_t gpa = mem->guest_phys_addr +
|
|
||||||
(vm_start - mem->userspace_addr);
|
|
||||||
phys_addr_t pa;
|
|
||||||
|
|
||||||
pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
|
|
||||||
pa += vm_start - vma->vm_start;
|
|
||||||
|
|
||||||
/* IO region dirty page logging not allowed */
|
/* IO region dirty page logging not allowed */
|
||||||
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
|
|
||||||
vm_end - vm_start,
|
|
||||||
writable);
|
|
||||||
if (ret)
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
hva = vm_end;
|
hva = min(reg_end, vma->vm_end);
|
||||||
} while (hva < reg_end);
|
} while (hva < reg_end);
|
||||||
|
|
||||||
if (change == KVM_MR_FLAGS_ONLY)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
spin_lock(&kvm->mmu_lock);
|
|
||||||
if (ret)
|
|
||||||
unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
|
|
||||||
else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
|
|
||||||
stage2_flush_memslot(kvm, memslot);
|
|
||||||
spin_unlock(&kvm->mmu_lock);
|
|
||||||
out:
|
|
||||||
mmap_read_unlock(current->mm);
|
mmap_read_unlock(current->mm);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -578,6 +578,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
|||||||
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
|
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
|
||||||
|
|
||||||
if (val & ARMV8_PMU_PMCR_P) {
|
if (val & ARMV8_PMU_PMCR_P) {
|
||||||
|
mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
|
||||||
for_each_set_bit(i, &mask, 32)
|
for_each_set_bit(i, &mask, 32)
|
||||||
kvm_pmu_set_counter_value(vcpu, i, 0);
|
kvm_pmu_set_counter_value(vcpu, i, 0);
|
||||||
}
|
}
|
||||||
@ -850,6 +851,9 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* One-off reload of the PMU on first run */
|
||||||
|
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
|
|||||||
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
|
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/* MTE is incompatible with AArch32 */
|
||||||
|
if (kvm_has_mte(vcpu->kvm) && is32bit)
|
||||||
|
return false;
|
||||||
|
|
||||||
/* Check that the vcpus are either all 32bit or all 64bit */
|
/* Check that the vcpus are either all 32bit or all 64bit */
|
||||||
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
|
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
|
||||||
if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
|
if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
|
||||||
|
@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
|||||||
break;
|
break;
|
||||||
case SYS_ID_AA64PFR1_EL1:
|
case SYS_ID_AA64PFR1_EL1:
|
||||||
val &= ~FEATURE(ID_AA64PFR1_MTE);
|
val &= ~FEATURE(ID_AA64PFR1_MTE);
|
||||||
|
if (kvm_has_mte(vcpu->kvm)) {
|
||||||
|
u64 pfr, mte;
|
||||||
|
|
||||||
|
pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
|
||||||
|
mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
|
||||||
|
val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case SYS_ID_AA64ISAR1_EL1:
|
case SYS_ID_AA64ISAR1_EL1:
|
||||||
if (!vcpu_has_ptrauth(vcpu))
|
if (!vcpu_has_ptrauth(vcpu))
|
||||||
@ -1302,6 +1309,23 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
|
||||||
|
const struct sys_reg_desc *rd)
|
||||||
|
{
|
||||||
|
if (kvm_has_mte(vcpu->kvm))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return REG_HIDDEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MTE_REG(name) { \
|
||||||
|
SYS_DESC(SYS_##name), \
|
||||||
|
.access = undef_access, \
|
||||||
|
.reset = reset_unknown, \
|
||||||
|
.reg = name, \
|
||||||
|
.visibility = mte_visibility, \
|
||||||
|
}
|
||||||
|
|
||||||
/* sys_reg_desc initialiser for known cpufeature ID registers */
|
/* sys_reg_desc initialiser for known cpufeature ID registers */
|
||||||
#define ID_SANITISED(name) { \
|
#define ID_SANITISED(name) { \
|
||||||
SYS_DESC(SYS_##name), \
|
SYS_DESC(SYS_##name), \
|
||||||
@ -1470,8 +1494,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||||||
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
|
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
|
||||||
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
|
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
|
||||||
|
|
||||||
{ SYS_DESC(SYS_RGSR_EL1), undef_access },
|
MTE_REG(RGSR_EL1),
|
||||||
{ SYS_DESC(SYS_GCR_EL1), undef_access },
|
MTE_REG(GCR_EL1),
|
||||||
|
|
||||||
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
|
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
|
||||||
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
|
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
|
||||||
@ -1498,8 +1522,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||||||
{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
|
{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
|
||||||
{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
|
{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
|
||||||
|
|
||||||
{ SYS_DESC(SYS_TFSR_EL1), undef_access },
|
MTE_REG(TFSR_EL1),
|
||||||
{ SYS_DESC(SYS_TFSRE0_EL1), undef_access },
|
MTE_REG(TFSRE0_EL1),
|
||||||
|
|
||||||
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
|
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
|
||||||
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
|
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
|
||||||
|
@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
|
|||||||
return IRQ_HANDLED;
|
return IRQ_HANDLED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct gic_kvm_info *gic_kvm_info;
|
||||||
|
|
||||||
|
void __init vgic_set_kvm_info(const struct gic_kvm_info *info)
|
||||||
|
{
|
||||||
|
BUG_ON(gic_kvm_info != NULL);
|
||||||
|
gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL);
|
||||||
|
if (gic_kvm_info)
|
||||||
|
*gic_kvm_info = *info;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
|
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
|
||||||
*
|
*
|
||||||
@ -509,18 +519,29 @@ void kvm_vgic_init_cpu_hardware(void)
|
|||||||
*/
|
*/
|
||||||
int kvm_vgic_hyp_init(void)
|
int kvm_vgic_hyp_init(void)
|
||||||
{
|
{
|
||||||
const struct gic_kvm_info *gic_kvm_info;
|
bool has_mask;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
gic_kvm_info = gic_get_kvm_info();
|
|
||||||
if (!gic_kvm_info)
|
if (!gic_kvm_info)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
if (!gic_kvm_info->maint_irq) {
|
has_mask = !gic_kvm_info->no_maint_irq_mask;
|
||||||
|
|
||||||
|
if (has_mask && !gic_kvm_info->maint_irq) {
|
||||||
kvm_err("No vgic maintenance irq\n");
|
kvm_err("No vgic maintenance irq\n");
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we get one of these oddball non-GICs, taint the kernel,
|
||||||
|
* as we have no idea of how they *really* behave.
|
||||||
|
*/
|
||||||
|
if (gic_kvm_info->no_hw_deactivation) {
|
||||||
|
kvm_info("Non-architectural vgic, tainting kernel\n");
|
||||||
|
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
|
||||||
|
kvm_vgic_global_state.no_hw_deactivation = true;
|
||||||
|
}
|
||||||
|
|
||||||
switch (gic_kvm_info->type) {
|
switch (gic_kvm_info->type) {
|
||||||
case GIC_V2:
|
case GIC_V2:
|
||||||
ret = vgic_v2_probe(gic_kvm_info);
|
ret = vgic_v2_probe(gic_kvm_info);
|
||||||
@ -536,10 +557,17 @@ int kvm_vgic_hyp_init(void)
|
|||||||
ret = -ENODEV;
|
ret = -ENODEV;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
|
||||||
|
|
||||||
|
kfree(gic_kvm_info);
|
||||||
|
gic_kvm_info = NULL;
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
|
if (!has_mask)
|
||||||
|
return 0;
|
||||||
|
|
||||||
ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
|
ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
|
||||||
vgic_maintenance_handler,
|
vgic_maintenance_handler,
|
||||||
"vgic", kvm_get_running_vcpus());
|
"vgic", kvm_get_running_vcpus());
|
||||||
|
@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
|
|||||||
* If this causes us to lower the level, we have to also clear
|
* If this causes us to lower the level, we have to also clear
|
||||||
* the physical active state, since we will otherwise never be
|
* the physical active state, since we will otherwise never be
|
||||||
* told when the interrupt becomes asserted again.
|
* told when the interrupt becomes asserted again.
|
||||||
|
*
|
||||||
|
* Another case is when the interrupt requires a helping hand
|
||||||
|
* on deactivation (no HW deactivation, for example).
|
||||||
*/
|
*/
|
||||||
if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
|
if (vgic_irq_is_mapped_level(irq)) {
|
||||||
irq->line_level = vgic_get_phys_line_level(irq);
|
bool resample = false;
|
||||||
|
|
||||||
if (!irq->line_level)
|
if (val & GICH_LR_PENDING_BIT) {
|
||||||
|
irq->line_level = vgic_get_phys_line_level(irq);
|
||||||
|
resample = !irq->line_level;
|
||||||
|
} else if (vgic_irq_needs_resampling(irq) &&
|
||||||
|
!(irq->active || irq->pending_latch)) {
|
||||||
|
resample = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (resample)
|
||||||
vgic_irq_set_phys_active(irq, false);
|
vgic_irq_set_phys_active(irq, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
|||||||
if (irq->group)
|
if (irq->group)
|
||||||
val |= GICH_LR_GROUP1;
|
val |= GICH_LR_GROUP1;
|
||||||
|
|
||||||
if (irq->hw) {
|
if (irq->hw && !vgic_irq_needs_resampling(irq)) {
|
||||||
val |= GICH_LR_HW;
|
val |= GICH_LR_HW;
|
||||||
val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
|
val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
|
||||||
/*
|
/*
|
||||||
|
@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
|
|||||||
* If this causes us to lower the level, we have to also clear
|
* If this causes us to lower the level, we have to also clear
|
||||||
* the physical active state, since we will otherwise never be
|
* the physical active state, since we will otherwise never be
|
||||||
* told when the interrupt becomes asserted again.
|
* told when the interrupt becomes asserted again.
|
||||||
|
*
|
||||||
|
* Another case is when the interrupt requires a helping hand
|
||||||
|
* on deactivation (no HW deactivation, for example).
|
||||||
*/
|
*/
|
||||||
if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
|
if (vgic_irq_is_mapped_level(irq)) {
|
||||||
irq->line_level = vgic_get_phys_line_level(irq);
|
bool resample = false;
|
||||||
|
|
||||||
if (!irq->line_level)
|
if (val & ICH_LR_PENDING_BIT) {
|
||||||
|
irq->line_level = vgic_get_phys_line_level(irq);
|
||||||
|
resample = !irq->line_level;
|
||||||
|
} else if (vgic_irq_needs_resampling(irq) &&
|
||||||
|
!(irq->active || irq->pending_latch)) {
|
||||||
|
resample = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (resample)
|
||||||
vgic_irq_set_phys_active(irq, false);
|
vgic_irq_set_phys_active(irq, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (irq->hw) {
|
if (irq->hw && !vgic_irq_needs_resampling(irq)) {
|
||||||
val |= ICH_LR_HW;
|
val |= ICH_LR_HW;
|
||||||
val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
|
val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
|
||||||
/*
|
/*
|
||||||
|
@ -182,8 +182,8 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq)
|
|||||||
|
|
||||||
BUG_ON(!irq->hw);
|
BUG_ON(!irq->hw);
|
||||||
|
|
||||||
if (irq->get_input_level)
|
if (irq->ops && irq->ops->get_input_level)
|
||||||
return irq->get_input_level(irq->intid);
|
return irq->ops->get_input_level(irq->intid);
|
||||||
|
|
||||||
WARN_ON(irq_get_irqchip_state(irq->host_irq,
|
WARN_ON(irq_get_irqchip_state(irq->host_irq,
|
||||||
IRQCHIP_STATE_PENDING,
|
IRQCHIP_STATE_PENDING,
|
||||||
@ -480,7 +480,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
|
|||||||
/* @irq->irq_lock must be held */
|
/* @irq->irq_lock must be held */
|
||||||
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
|
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
|
||||||
unsigned int host_irq,
|
unsigned int host_irq,
|
||||||
bool (*get_input_level)(int vindid))
|
struct irq_ops *ops)
|
||||||
{
|
{
|
||||||
struct irq_desc *desc;
|
struct irq_desc *desc;
|
||||||
struct irq_data *data;
|
struct irq_data *data;
|
||||||
@ -500,7 +500,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
|
|||||||
irq->hw = true;
|
irq->hw = true;
|
||||||
irq->host_irq = host_irq;
|
irq->host_irq = host_irq;
|
||||||
irq->hwintid = data->hwirq;
|
irq->hwintid = data->hwirq;
|
||||||
irq->get_input_level = get_input_level;
|
irq->ops = ops;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -509,11 +509,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
|
|||||||
{
|
{
|
||||||
irq->hw = false;
|
irq->hw = false;
|
||||||
irq->hwintid = 0;
|
irq->hwintid = 0;
|
||||||
irq->get_input_level = NULL;
|
irq->ops = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
|
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
|
||||||
u32 vintid, bool (*get_input_level)(int vindid))
|
u32 vintid, struct irq_ops *ops)
|
||||||
{
|
{
|
||||||
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
@ -522,7 +522,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
|
|||||||
BUG_ON(!irq);
|
BUG_ON(!irq);
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||||
ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
|
ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
|
||||||
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||||
vgic_put_irq(vcpu->kvm, irq);
|
vgic_put_irq(vcpu->kvm, irq);
|
||||||
|
|
||||||
|
@ -109,10 +109,11 @@ static inline bool kvm_is_error_hva(unsigned long addr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct kvm_vm_stat {
|
struct kvm_vm_stat {
|
||||||
ulong remote_tlb_flush;
|
struct kvm_vm_stat_generic generic;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_vcpu_stat {
|
struct kvm_vcpu_stat {
|
||||||
|
struct kvm_vcpu_stat_generic generic;
|
||||||
u64 wait_exits;
|
u64 wait_exits;
|
||||||
u64 cache_exits;
|
u64 cache_exits;
|
||||||
u64 signal_exits;
|
u64 signal_exits;
|
||||||
@ -142,12 +143,6 @@ struct kvm_vcpu_stat {
|
|||||||
#ifdef CONFIG_CPU_LOONGSON64
|
#ifdef CONFIG_CPU_LOONGSON64
|
||||||
u64 vz_cpucfg_exits;
|
u64 vz_cpucfg_exits;
|
||||||
#endif
|
#endif
|
||||||
u64 halt_successful_poll;
|
|
||||||
u64 halt_attempted_poll;
|
|
||||||
u64 halt_poll_success_ns;
|
|
||||||
u64 halt_poll_fail_ns;
|
|
||||||
u64 halt_poll_invalid;
|
|
||||||
u64 halt_wakeup;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_arch_memory_slot {
|
struct kvm_arch_memory_slot {
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# Makefile for KVM support for MIPS
|
# Makefile for KVM support for MIPS
|
||||||
#
|
#
|
||||||
|
|
||||||
common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o)
|
common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
|
||||||
|
|
||||||
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
|
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
|
||||||
|
|
||||||
|
@ -38,43 +38,63 @@
|
|||||||
#define VECTORSPACING 0x100 /* for EI/VI mode */
|
#define VECTORSPACING 0x100 /* for EI/VI mode */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||||
VCPU_STAT("wait", wait_exits),
|
KVM_GENERIC_VM_STATS()
|
||||||
VCPU_STAT("cache", cache_exits),
|
};
|
||||||
VCPU_STAT("signal", signal_exits),
|
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||||
VCPU_STAT("interrupt", int_exits),
|
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||||
VCPU_STAT("cop_unusable", cop_unusable_exits),
|
|
||||||
VCPU_STAT("tlbmod", tlbmod_exits),
|
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||||
VCPU_STAT("tlbmiss_ld", tlbmiss_ld_exits),
|
.name_size = KVM_STATS_NAME_SIZE,
|
||||||
VCPU_STAT("tlbmiss_st", tlbmiss_st_exits),
|
.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
|
||||||
VCPU_STAT("addrerr_st", addrerr_st_exits),
|
.id_offset = sizeof(struct kvm_stats_header),
|
||||||
VCPU_STAT("addrerr_ld", addrerr_ld_exits),
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||||
VCPU_STAT("syscall", syscall_exits),
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||||
VCPU_STAT("resvd_inst", resvd_inst_exits),
|
sizeof(kvm_vm_stats_desc),
|
||||||
VCPU_STAT("break_inst", break_inst_exits),
|
};
|
||||||
VCPU_STAT("trap_inst", trap_inst_exits),
|
|
||||||
VCPU_STAT("msa_fpe", msa_fpe_exits),
|
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
||||||
VCPU_STAT("fpe", fpe_exits),
|
KVM_GENERIC_VCPU_STATS(),
|
||||||
VCPU_STAT("msa_disabled", msa_disabled_exits),
|
STATS_DESC_COUNTER(VCPU, wait_exits),
|
||||||
VCPU_STAT("flush_dcache", flush_dcache_exits),
|
STATS_DESC_COUNTER(VCPU, cache_exits),
|
||||||
VCPU_STAT("vz_gpsi", vz_gpsi_exits),
|
STATS_DESC_COUNTER(VCPU, signal_exits),
|
||||||
VCPU_STAT("vz_gsfc", vz_gsfc_exits),
|
STATS_DESC_COUNTER(VCPU, int_exits),
|
||||||
VCPU_STAT("vz_hc", vz_hc_exits),
|
STATS_DESC_COUNTER(VCPU, cop_unusable_exits),
|
||||||
VCPU_STAT("vz_grr", vz_grr_exits),
|
STATS_DESC_COUNTER(VCPU, tlbmod_exits),
|
||||||
VCPU_STAT("vz_gva", vz_gva_exits),
|
STATS_DESC_COUNTER(VCPU, tlbmiss_ld_exits),
|
||||||
VCPU_STAT("vz_ghfc", vz_ghfc_exits),
|
STATS_DESC_COUNTER(VCPU, tlbmiss_st_exits),
|
||||||
VCPU_STAT("vz_gpa", vz_gpa_exits),
|
STATS_DESC_COUNTER(VCPU, addrerr_st_exits),
|
||||||
VCPU_STAT("vz_resvd", vz_resvd_exits),
|
STATS_DESC_COUNTER(VCPU, addrerr_ld_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, syscall_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, resvd_inst_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, break_inst_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, trap_inst_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, msa_fpe_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, fpe_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, msa_disabled_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, flush_dcache_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, vz_gpsi_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, vz_gsfc_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, vz_hc_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, vz_grr_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, vz_gva_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, vz_ghfc_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, vz_gpa_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, vz_resvd_exits),
|
||||||
#ifdef CONFIG_CPU_LOONGSON64
|
#ifdef CONFIG_CPU_LOONGSON64
|
||||||
VCPU_STAT("vz_cpucfg", vz_cpucfg_exits),
|
STATS_DESC_COUNTER(VCPU, vz_cpucfg_exits),
|
||||||
#endif
|
#endif
|
||||||
VCPU_STAT("halt_successful_poll", halt_successful_poll),
|
};
|
||||||
VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
|
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||||
VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
|
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||||
VCPU_STAT("halt_wakeup", halt_wakeup),
|
|
||||||
VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
|
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||||
VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
|
.name_size = KVM_STATS_NAME_SIZE,
|
||||||
{NULL}
|
.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
|
||||||
|
.id_offset = sizeof(struct kvm_stats_header),
|
||||||
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||||
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||||
|
sizeof(kvm_vcpu_stats_desc),
|
||||||
};
|
};
|
||||||
|
|
||||||
bool kvm_trace_guest_mode_change;
|
bool kvm_trace_guest_mode_change;
|
||||||
|
@ -120,6 +120,7 @@ extern s32 patch__call_flush_branch_caches3;
|
|||||||
extern s32 patch__flush_count_cache_return;
|
extern s32 patch__flush_count_cache_return;
|
||||||
extern s32 patch__flush_link_stack_return;
|
extern s32 patch__flush_link_stack_return;
|
||||||
extern s32 patch__call_kvm_flush_link_stack;
|
extern s32 patch__call_kvm_flush_link_stack;
|
||||||
|
extern s32 patch__call_kvm_flush_link_stack_p9;
|
||||||
extern s32 patch__memset_nocache, patch__memcpy_nocache;
|
extern s32 patch__memset_nocache, patch__memcpy_nocache;
|
||||||
|
|
||||||
extern long flush_branch_caches;
|
extern long flush_branch_caches;
|
||||||
@ -140,7 +141,7 @@ void kvmhv_load_host_pmu(void);
|
|||||||
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
|
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
|
||||||
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
|
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
|
void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
|
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
|
||||||
long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
|
long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
|
||||||
|
@ -19,6 +19,7 @@ struct mmu_psize_def {
|
|||||||
int penc[MMU_PAGE_COUNT]; /* HPTE encoding */
|
int penc[MMU_PAGE_COUNT]; /* HPTE encoding */
|
||||||
unsigned int tlbiel; /* tlbiel supported for that page size */
|
unsigned int tlbiel; /* tlbiel supported for that page size */
|
||||||
unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
|
unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
|
||||||
|
unsigned long h_rpt_pgsize; /* H_RPT_INVALIDATE page size encoding */
|
||||||
union {
|
union {
|
||||||
unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
|
unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
|
||||||
unsigned long ap; /* Ap encoding used by PowerISA 3.0 */
|
unsigned long ap; /* Ap encoding used by PowerISA 3.0 */
|
||||||
|
@ -4,6 +4,10 @@
|
|||||||
|
|
||||||
#include <asm/hvcall.h>
|
#include <asm/hvcall.h>
|
||||||
|
|
||||||
|
#define RIC_FLUSH_TLB 0
|
||||||
|
#define RIC_FLUSH_PWC 1
|
||||||
|
#define RIC_FLUSH_ALL 2
|
||||||
|
|
||||||
struct vm_area_struct;
|
struct vm_area_struct;
|
||||||
struct mm_struct;
|
struct mm_struct;
|
||||||
struct mmu_gather;
|
struct mmu_gather;
|
||||||
|
@ -98,6 +98,36 @@ static inline int cpu_last_thread_sibling(int cpu)
|
|||||||
return cpu | (threads_per_core - 1);
|
return cpu | (threads_per_core - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* tlb_thread_siblings are siblings which share a TLB. This is not
|
||||||
|
* architected, is not something a hypervisor could emulate and a future
|
||||||
|
* CPU may change behaviour even in compat mode, so this should only be
|
||||||
|
* used on PowerNV, and only with care.
|
||||||
|
*/
|
||||||
|
static inline int cpu_first_tlb_thread_sibling(int cpu)
|
||||||
|
{
|
||||||
|
if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
|
||||||
|
return cpu & ~0x6; /* Big Core */
|
||||||
|
else
|
||||||
|
return cpu_first_thread_sibling(cpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int cpu_last_tlb_thread_sibling(int cpu)
|
||||||
|
{
|
||||||
|
if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
|
||||||
|
return cpu | 0x6; /* Big Core */
|
||||||
|
else
|
||||||
|
return cpu_last_thread_sibling(cpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int cpu_tlb_thread_sibling_step(void)
|
||||||
|
{
|
||||||
|
if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
|
||||||
|
return 2; /* Big Core */
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
static inline u32 get_tensr(void)
|
static inline u32 get_tensr(void)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_BOOKE
|
#ifdef CONFIG_BOOKE
|
||||||
|
@ -35,6 +35,19 @@
|
|||||||
/* PACA save area size in u64 units (exgen, exmc, etc) */
|
/* PACA save area size in u64 units (exgen, exmc, etc) */
|
||||||
#define EX_SIZE 10
|
#define EX_SIZE 10
|
||||||
|
|
||||||
|
/* PACA save area offsets */
|
||||||
|
#define EX_R9 0
|
||||||
|
#define EX_R10 8
|
||||||
|
#define EX_R11 16
|
||||||
|
#define EX_R12 24
|
||||||
|
#define EX_R13 32
|
||||||
|
#define EX_DAR 40
|
||||||
|
#define EX_DSISR 48
|
||||||
|
#define EX_CCR 52
|
||||||
|
#define EX_CFAR 56
|
||||||
|
#define EX_PPR 64
|
||||||
|
#define EX_CTR 72
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* maximum recursive depth of MCE exceptions
|
* maximum recursive depth of MCE exceptions
|
||||||
*/
|
*/
|
||||||
|
@ -413,9 +413,9 @@
|
|||||||
#define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */
|
#define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */
|
||||||
#define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */
|
#define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */
|
||||||
#define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */
|
#define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */
|
||||||
/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */
|
/* Invalidate caching of Process Table Entries if H_RPTI_TYPE_NESTED is clear */
|
||||||
#define H_RPTI_TYPE_PRT 0x0008
|
#define H_RPTI_TYPE_PRT 0x0008
|
||||||
/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */
|
/* Invalidate caching of Partition Table Entries if H_RPTI_TYPE_NESTED is set */
|
||||||
#define H_RPTI_TYPE_PAT 0x0008
|
#define H_RPTI_TYPE_PAT 0x0008
|
||||||
#define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
|
#define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
|
||||||
H_RPTI_TYPE_PRT)
|
H_RPTI_TYPE_PRT)
|
||||||
|
@ -147,6 +147,7 @@
|
|||||||
#define KVM_GUEST_MODE_SKIP 2
|
#define KVM_GUEST_MODE_SKIP 2
|
||||||
#define KVM_GUEST_MODE_GUEST_HV 3
|
#define KVM_GUEST_MODE_GUEST_HV 3
|
||||||
#define KVM_GUEST_MODE_HOST_HV 4
|
#define KVM_GUEST_MODE_HOST_HV 4
|
||||||
|
#define KVM_GUEST_MODE_HV_P9 5 /* ISA >= v3.0 path */
|
||||||
|
|
||||||
#define KVM_INST_FETCH_FAILED -1
|
#define KVM_INST_FETCH_FAILED -1
|
||||||
|
|
||||||
|
@ -307,6 +307,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
|
|||||||
void kvmhv_release_all_nested(struct kvm *kvm);
|
void kvmhv_release_all_nested(struct kvm *kvm);
|
||||||
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
|
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
|
||||||
long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
|
long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
|
||||||
|
long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
|
||||||
|
unsigned long type, unsigned long pg_sizes,
|
||||||
|
unsigned long start, unsigned long end);
|
||||||
int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
|
int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
|
||||||
u64 time_limit, unsigned long lpcr);
|
u64 time_limit, unsigned long lpcr);
|
||||||
void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
|
void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
|
||||||
|
@ -153,9 +153,17 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
|
|||||||
return radix;
|
return radix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
|
||||||
|
|
||||||
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
|
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Invalid HDSISR value which is used to indicate when HW has not set the reg.
|
||||||
|
* Used to work around an errata.
|
||||||
|
*/
|
||||||
|
#define HDSISR_CANARY 0x7fff
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use a lock bit in HPTE dword 0 to synchronize updates and
|
* We use a lock bit in HPTE dword 0 to synchronize updates and
|
||||||
* accesses to each HPTE, and another bit to indicate non-present
|
* accesses to each HPTE, and another bit to indicate non-present
|
||||||
|
@ -81,12 +81,13 @@ struct kvmppc_book3s_shadow_vcpu;
|
|||||||
struct kvm_nested_guest;
|
struct kvm_nested_guest;
|
||||||
|
|
||||||
struct kvm_vm_stat {
|
struct kvm_vm_stat {
|
||||||
ulong remote_tlb_flush;
|
struct kvm_vm_stat_generic generic;
|
||||||
ulong num_2M_pages;
|
u64 num_2M_pages;
|
||||||
ulong num_1G_pages;
|
u64 num_1G_pages;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_vcpu_stat {
|
struct kvm_vcpu_stat {
|
||||||
|
struct kvm_vcpu_stat_generic generic;
|
||||||
u64 sum_exits;
|
u64 sum_exits;
|
||||||
u64 mmio_exits;
|
u64 mmio_exits;
|
||||||
u64 signal_exits;
|
u64 signal_exits;
|
||||||
@ -102,14 +103,8 @@ struct kvm_vcpu_stat {
|
|||||||
u64 emulated_inst_exits;
|
u64 emulated_inst_exits;
|
||||||
u64 dec_exits;
|
u64 dec_exits;
|
||||||
u64 ext_intr_exits;
|
u64 ext_intr_exits;
|
||||||
u64 halt_poll_success_ns;
|
|
||||||
u64 halt_poll_fail_ns;
|
|
||||||
u64 halt_wait_ns;
|
u64 halt_wait_ns;
|
||||||
u64 halt_successful_poll;
|
|
||||||
u64 halt_attempted_poll;
|
|
||||||
u64 halt_successful_wait;
|
u64 halt_successful_wait;
|
||||||
u64 halt_poll_invalid;
|
|
||||||
u64 halt_wakeup;
|
|
||||||
u64 dbell_exits;
|
u64 dbell_exits;
|
||||||
u64 gdbell_exits;
|
u64 gdbell_exits;
|
||||||
u64 ld;
|
u64 ld;
|
||||||
@ -298,7 +293,6 @@ struct kvm_arch {
|
|||||||
u8 fwnmi_enabled;
|
u8 fwnmi_enabled;
|
||||||
u8 secure_guest;
|
u8 secure_guest;
|
||||||
u8 svm_enabled;
|
u8 svm_enabled;
|
||||||
bool threads_indep;
|
|
||||||
bool nested_enable;
|
bool nested_enable;
|
||||||
bool dawr1_enabled;
|
bool dawr1_enabled;
|
||||||
pgd_t *pgtable;
|
pgd_t *pgtable;
|
||||||
@ -684,7 +678,12 @@ struct kvm_vcpu_arch {
|
|||||||
ulong fault_dar;
|
ulong fault_dar;
|
||||||
u32 fault_dsisr;
|
u32 fault_dsisr;
|
||||||
unsigned long intr_msr;
|
unsigned long intr_msr;
|
||||||
ulong fault_gpa; /* guest real address of page fault (POWER9) */
|
/*
|
||||||
|
* POWER9 and later: fault_gpa contains the guest real address of page
|
||||||
|
* fault for a radix guest, or segment descriptor (equivalent to result
|
||||||
|
* from slbmfev of SLB entry that translated the EA) for hash guests.
|
||||||
|
*/
|
||||||
|
ulong fault_gpa;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_BOOKE
|
#ifdef CONFIG_BOOKE
|
||||||
|
@ -129,6 +129,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
|
|||||||
extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
|
extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
|
||||||
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
|
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
|
||||||
extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
|
extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
|
||||||
|
extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu);
|
||||||
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
|
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
|
||||||
extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
|
extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
|
||||||
extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
|
extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
|
||||||
@ -606,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm);
|
|||||||
extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
|
extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
|
||||||
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
|
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
|
||||||
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
|
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
|
||||||
|
extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
|
||||||
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
|
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
|
||||||
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
|
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
|
||||||
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
|
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
|
||||||
@ -638,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
|
|||||||
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
|
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
|
||||||
static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
|
static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
|
||||||
{ return 0; }
|
{ return 0; }
|
||||||
|
static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
|
||||||
|
{ return 0; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_XIVE
|
#ifdef CONFIG_KVM_XIVE
|
||||||
@ -655,8 +659,6 @@ extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
|
|||||||
u32 *priority);
|
u32 *priority);
|
||||||
extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
|
extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
|
||||||
extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
|
extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
|
||||||
extern void kvmppc_xive_init_module(void);
|
|
||||||
extern void kvmppc_xive_exit_module(void);
|
|
||||||
|
|
||||||
extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
|
extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
|
||||||
struct kvm_vcpu *vcpu, u32 cpu);
|
struct kvm_vcpu *vcpu, u32 cpu);
|
||||||
@ -671,6 +673,8 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
|
|||||||
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
|
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
|
||||||
int level, bool line_status);
|
int level, bool line_status);
|
||||||
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
|
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
|
||||||
|
extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
|
||||||
|
extern void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
|
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
@ -680,8 +684,6 @@ static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
|
|||||||
extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
|
extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
|
||||||
struct kvm_vcpu *vcpu, u32 cpu);
|
struct kvm_vcpu *vcpu, u32 cpu);
|
||||||
extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
|
extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
|
||||||
extern void kvmppc_xive_native_init_module(void);
|
|
||||||
extern void kvmppc_xive_native_exit_module(void);
|
|
||||||
extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
|
extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
|
||||||
union kvmppc_one_reg *val);
|
union kvmppc_one_reg *val);
|
||||||
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
|
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
|
||||||
@ -695,8 +697,6 @@ static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
|
|||||||
u32 *priority) { return -1; }
|
u32 *priority) { return -1; }
|
||||||
static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
|
static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
|
||||||
static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
|
static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
|
||||||
static inline void kvmppc_xive_init_module(void) { }
|
|
||||||
static inline void kvmppc_xive_exit_module(void) { }
|
|
||||||
|
|
||||||
static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
|
static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
|
||||||
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
|
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
|
||||||
@ -711,14 +711,14 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
|
|||||||
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
|
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
|
||||||
int level, bool line_status) { return -ENODEV; }
|
int level, bool line_status) { return -ENODEV; }
|
||||||
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
|
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
|
||||||
|
static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
|
||||||
|
static inline void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { }
|
||||||
|
|
||||||
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
|
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
|
||||||
{ return 0; }
|
{ return 0; }
|
||||||
static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
|
static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
|
||||||
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
|
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
|
||||||
static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
|
static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
|
||||||
static inline void kvmppc_xive_native_init_module(void) { }
|
|
||||||
static inline void kvmppc_xive_native_exit_module(void) { }
|
|
||||||
static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
|
static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
|
||||||
union kvmppc_one_reg *val)
|
union kvmppc_one_reg *val)
|
||||||
{ return 0; }
|
{ return 0; }
|
||||||
@ -754,7 +754,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
|
|||||||
unsigned long tce_value, unsigned long npages);
|
unsigned long tce_value, unsigned long npages);
|
||||||
long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
|
long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
|
||||||
unsigned int yield_count);
|
unsigned int yield_count);
|
||||||
long kvmppc_h_random(struct kvm_vcpu *vcpu);
|
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu);
|
||||||
void kvmhv_commence_exit(int trap);
|
void kvmhv_commence_exit(int trap);
|
||||||
void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
|
void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
|
||||||
void kvmppc_subcore_enter_guest(void);
|
void kvmppc_subcore_enter_guest(void);
|
||||||
|
@ -122,12 +122,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
|
|
||||||
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
|
|
||||||
#else
|
|
||||||
static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern void switch_cop(struct mm_struct *next);
|
extern void switch_cop(struct mm_struct *next);
|
||||||
extern int use_cop(unsigned long acop, struct mm_struct *mm);
|
extern int use_cop(unsigned long acop, struct mm_struct *mm);
|
||||||
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
|
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
|
||||||
@ -222,6 +216,18 @@ static inline void mm_context_add_copro(struct mm_struct *mm) { }
|
|||||||
static inline void mm_context_remove_copro(struct mm_struct *mm) { }
|
static inline void mm_context_remove_copro(struct mm_struct *mm) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
|
||||||
|
void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
|
||||||
|
unsigned long type, unsigned long pg_sizes,
|
||||||
|
unsigned long start, unsigned long end);
|
||||||
|
#else
|
||||||
|
static inline void do_h_rpt_invalidate_prt(unsigned long pid,
|
||||||
|
unsigned long lpid,
|
||||||
|
unsigned long type,
|
||||||
|
unsigned long pg_sizes,
|
||||||
|
unsigned long start,
|
||||||
|
unsigned long end) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
struct task_struct *tsk);
|
struct task_struct *tsk);
|
||||||
|
@ -97,6 +97,18 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
|
|||||||
extern void secondary_cpu_time_init(void);
|
extern void secondary_cpu_time_init(void);
|
||||||
extern void __init time_init(void);
|
extern void __init time_init(void);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PPC64
|
||||||
|
static inline unsigned long test_irq_work_pending(void)
|
||||||
|
{
|
||||||
|
unsigned long x;
|
||||||
|
|
||||||
|
asm volatile("lbz %0,%1(13)"
|
||||||
|
: "=r" (x)
|
||||||
|
: "i" (offsetof(struct paca_struct, irq_work_pending)));
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
DECLARE_PER_CPU(u64, decrementers_next_tb);
|
DECLARE_PER_CPU(u64, decrementers_next_tb);
|
||||||
|
|
||||||
/* Convert timebase ticks to nanoseconds */
|
/* Convert timebase ticks to nanoseconds */
|
||||||
|
@ -534,7 +534,6 @@ int main(void)
|
|||||||
OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
|
OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
|
||||||
OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
|
OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
|
||||||
OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
|
OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
|
||||||
OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa);
|
|
||||||
OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
|
OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
|
||||||
OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
|
OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
|
||||||
OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
|
OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
|
||||||
|
@ -21,22 +21,6 @@
|
|||||||
#include <asm/feature-fixups.h>
|
#include <asm/feature-fixups.h>
|
||||||
#include <asm/kup.h>
|
#include <asm/kup.h>
|
||||||
|
|
||||||
/* PACA save area offsets (exgen, exmc, etc) */
|
|
||||||
#define EX_R9 0
|
|
||||||
#define EX_R10 8
|
|
||||||
#define EX_R11 16
|
|
||||||
#define EX_R12 24
|
|
||||||
#define EX_R13 32
|
|
||||||
#define EX_DAR 40
|
|
||||||
#define EX_DSISR 48
|
|
||||||
#define EX_CCR 52
|
|
||||||
#define EX_CFAR 56
|
|
||||||
#define EX_PPR 64
|
|
||||||
#define EX_CTR 72
|
|
||||||
.if EX_SIZE != 10
|
|
||||||
.error "EX_SIZE is wrong"
|
|
||||||
.endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Following are fixed section helper macros.
|
* Following are fixed section helper macros.
|
||||||
*
|
*
|
||||||
@ -133,7 +117,6 @@ name:
|
|||||||
#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
|
#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
|
||||||
#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
|
#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
|
||||||
#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
|
#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
|
||||||
#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */
|
|
||||||
#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
|
#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
|
||||||
#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
|
#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
|
||||||
#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
|
#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
|
||||||
@ -190,9 +173,6 @@ do_define_int n
|
|||||||
.ifndef IMASK
|
.ifndef IMASK
|
||||||
IMASK=0
|
IMASK=0
|
||||||
.endif
|
.endif
|
||||||
.ifndef IKVM_SKIP
|
|
||||||
IKVM_SKIP=0
|
|
||||||
.endif
|
|
||||||
.ifndef IKVM_REAL
|
.ifndef IKVM_REAL
|
||||||
IKVM_REAL=0
|
IKVM_REAL=0
|
||||||
.endif
|
.endif
|
||||||
@ -207,8 +187,6 @@ do_define_int n
|
|||||||
.endif
|
.endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
|
||||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
|
||||||
/*
|
/*
|
||||||
* All interrupts which set HSRR registers, as well as SRESET and MCE and
|
* All interrupts which set HSRR registers, as well as SRESET and MCE and
|
||||||
* syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
|
* syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
|
||||||
@ -238,88 +216,28 @@ do_define_int n
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If an interrupt is taken while a guest is running, it is immediately routed
|
* If an interrupt is taken while a guest is running, it is immediately routed
|
||||||
* to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first
|
* to KVM to handle.
|
||||||
* to kvmppc_interrupt_hv, which handles the PR guest case.
|
|
||||||
*/
|
*/
|
||||||
#define kvmppc_interrupt kvmppc_interrupt_hv
|
|
||||||
#else
|
|
||||||
#define kvmppc_interrupt kvmppc_interrupt_pr
|
|
||||||
#endif
|
|
||||||
|
|
||||||
.macro KVMTEST name
|
.macro KVMTEST name handler
|
||||||
|
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
||||||
lbz r10,HSTATE_IN_GUEST(r13)
|
lbz r10,HSTATE_IN_GUEST(r13)
|
||||||
cmpwi r10,0
|
cmpwi r10,0
|
||||||
bne \name\()_kvm
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro GEN_KVM name
|
|
||||||
.balign IFETCH_ALIGN_BYTES
|
|
||||||
\name\()_kvm:
|
|
||||||
|
|
||||||
.if IKVM_SKIP
|
|
||||||
cmpwi r10,KVM_GUEST_MODE_SKIP
|
|
||||||
beq 89f
|
|
||||||
.else
|
|
||||||
BEGIN_FTR_SECTION
|
|
||||||
ld r10,IAREA+EX_CFAR(r13)
|
|
||||||
std r10,HSTATE_CFAR(r13)
|
|
||||||
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
|
|
||||||
.endif
|
|
||||||
|
|
||||||
ld r10,IAREA+EX_CTR(r13)
|
|
||||||
mtctr r10
|
|
||||||
BEGIN_FTR_SECTION
|
|
||||||
ld r10,IAREA+EX_PPR(r13)
|
|
||||||
std r10,HSTATE_PPR(r13)
|
|
||||||
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
|
||||||
ld r11,IAREA+EX_R11(r13)
|
|
||||||
ld r12,IAREA+EX_R12(r13)
|
|
||||||
std r12,HSTATE_SCRATCH0(r13)
|
|
||||||
sldi r12,r9,32
|
|
||||||
ld r9,IAREA+EX_R9(r13)
|
|
||||||
ld r10,IAREA+EX_R10(r13)
|
|
||||||
/* HSRR variants have the 0x2 bit added to their trap number */
|
/* HSRR variants have the 0x2 bit added to their trap number */
|
||||||
.if IHSRR_IF_HVMODE
|
.if IHSRR_IF_HVMODE
|
||||||
BEGIN_FTR_SECTION
|
BEGIN_FTR_SECTION
|
||||||
ori r12,r12,(IVEC + 0x2)
|
li r10,(IVEC + 0x2)
|
||||||
FTR_SECTION_ELSE
|
FTR_SECTION_ELSE
|
||||||
ori r12,r12,(IVEC)
|
li r10,(IVEC)
|
||||||
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
|
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
|
||||||
.elseif IHSRR
|
.elseif IHSRR
|
||||||
ori r12,r12,(IVEC+ 0x2)
|
li r10,(IVEC + 0x2)
|
||||||
.else
|
.else
|
||||||
ori r12,r12,(IVEC)
|
li r10,(IVEC)
|
||||||
.endif
|
.endif
|
||||||
b kvmppc_interrupt
|
bne \handler
|
||||||
|
|
||||||
.if IKVM_SKIP
|
|
||||||
89: mtocrf 0x80,r9
|
|
||||||
ld r10,IAREA+EX_CTR(r13)
|
|
||||||
mtctr r10
|
|
||||||
ld r9,IAREA+EX_R9(r13)
|
|
||||||
ld r10,IAREA+EX_R10(r13)
|
|
||||||
ld r11,IAREA+EX_R11(r13)
|
|
||||||
ld r12,IAREA+EX_R12(r13)
|
|
||||||
.if IHSRR_IF_HVMODE
|
|
||||||
BEGIN_FTR_SECTION
|
|
||||||
b kvmppc_skip_Hinterrupt
|
|
||||||
FTR_SECTION_ELSE
|
|
||||||
b kvmppc_skip_interrupt
|
|
||||||
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
|
|
||||||
.elseif IHSRR
|
|
||||||
b kvmppc_skip_Hinterrupt
|
|
||||||
.else
|
|
||||||
b kvmppc_skip_interrupt
|
|
||||||
.endif
|
|
||||||
.endif
|
|
||||||
.endm
|
|
||||||
|
|
||||||
#else
|
|
||||||
.macro KVMTEST name
|
|
||||||
.endm
|
|
||||||
.macro GEN_KVM name
|
|
||||||
.endm
|
|
||||||
#endif
|
#endif
|
||||||
|
.endm
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is the BOOK3S interrupt entry code macro.
|
* This is the BOOK3S interrupt entry code macro.
|
||||||
@ -461,7 +379,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
|
|||||||
DEFINE_FIXED_SYMBOL(\name\()_common_real)
|
DEFINE_FIXED_SYMBOL(\name\()_common_real)
|
||||||
\name\()_common_real:
|
\name\()_common_real:
|
||||||
.if IKVM_REAL
|
.if IKVM_REAL
|
||||||
KVMTEST \name
|
KVMTEST \name kvm_interrupt
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
ld r10,PACAKMSR(r13) /* get MSR value for kernel */
|
ld r10,PACAKMSR(r13) /* get MSR value for kernel */
|
||||||
@ -484,7 +402,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
|
|||||||
DEFINE_FIXED_SYMBOL(\name\()_common_virt)
|
DEFINE_FIXED_SYMBOL(\name\()_common_virt)
|
||||||
\name\()_common_virt:
|
\name\()_common_virt:
|
||||||
.if IKVM_VIRT
|
.if IKVM_VIRT
|
||||||
KVMTEST \name
|
KVMTEST \name kvm_interrupt
|
||||||
1:
|
1:
|
||||||
.endif
|
.endif
|
||||||
.endif /* IVIRT */
|
.endif /* IVIRT */
|
||||||
@ -498,7 +416,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
|
|||||||
DEFINE_FIXED_SYMBOL(\name\()_common_real)
|
DEFINE_FIXED_SYMBOL(\name\()_common_real)
|
||||||
\name\()_common_real:
|
\name\()_common_real:
|
||||||
.if IKVM_REAL
|
.if IKVM_REAL
|
||||||
KVMTEST \name
|
KVMTEST \name kvm_interrupt
|
||||||
.endif
|
.endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
@ -1000,8 +918,6 @@ EXC_COMMON_BEGIN(system_reset_common)
|
|||||||
EXCEPTION_RESTORE_REGS
|
EXCEPTION_RESTORE_REGS
|
||||||
RFI_TO_USER_OR_KERNEL
|
RFI_TO_USER_OR_KERNEL
|
||||||
|
|
||||||
GEN_KVM system_reset
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0x200 - Machine Check Interrupt (MCE).
|
* Interrupt 0x200 - Machine Check Interrupt (MCE).
|
||||||
@ -1070,7 +986,6 @@ INT_DEFINE_BEGIN(machine_check)
|
|||||||
ISET_RI=0
|
ISET_RI=0
|
||||||
IDAR=1
|
IDAR=1
|
||||||
IDSISR=1
|
IDSISR=1
|
||||||
IKVM_SKIP=1
|
|
||||||
IKVM_REAL=1
|
IKVM_REAL=1
|
||||||
INT_DEFINE_END(machine_check)
|
INT_DEFINE_END(machine_check)
|
||||||
|
|
||||||
@ -1166,7 +1081,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
|
|||||||
/*
|
/*
|
||||||
* Check if we are coming from guest. If yes, then run the normal
|
* Check if we are coming from guest. If yes, then run the normal
|
||||||
* exception handler which will take the
|
* exception handler which will take the
|
||||||
* machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
|
* machine_check_kvm->kvm_interrupt branch to deliver the MC event
|
||||||
* to guest.
|
* to guest.
|
||||||
*/
|
*/
|
||||||
lbz r11,HSTATE_IN_GUEST(r13)
|
lbz r11,HSTATE_IN_GUEST(r13)
|
||||||
@ -1236,8 +1151,6 @@ EXC_COMMON_BEGIN(machine_check_common)
|
|||||||
bl machine_check_exception
|
bl machine_check_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM machine_check
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_PPC_P7_NAP
|
#ifdef CONFIG_PPC_P7_NAP
|
||||||
/*
|
/*
|
||||||
@ -1342,7 +1255,6 @@ INT_DEFINE_BEGIN(data_access)
|
|||||||
IVEC=0x300
|
IVEC=0x300
|
||||||
IDAR=1
|
IDAR=1
|
||||||
IDSISR=1
|
IDSISR=1
|
||||||
IKVM_SKIP=1
|
|
||||||
IKVM_REAL=1
|
IKVM_REAL=1
|
||||||
INT_DEFINE_END(data_access)
|
INT_DEFINE_END(data_access)
|
||||||
|
|
||||||
@ -1373,8 +1285,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
|
|||||||
REST_NVGPRS(r1)
|
REST_NVGPRS(r1)
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM data_access
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0x380 - Data Segment Interrupt (DSLB).
|
* Interrupt 0x380 - Data Segment Interrupt (DSLB).
|
||||||
@ -1396,7 +1306,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
|
|||||||
INT_DEFINE_BEGIN(data_access_slb)
|
INT_DEFINE_BEGIN(data_access_slb)
|
||||||
IVEC=0x380
|
IVEC=0x380
|
||||||
IDAR=1
|
IDAR=1
|
||||||
IKVM_SKIP=1
|
|
||||||
IKVM_REAL=1
|
IKVM_REAL=1
|
||||||
INT_DEFINE_END(data_access_slb)
|
INT_DEFINE_END(data_access_slb)
|
||||||
|
|
||||||
@ -1425,8 +1334,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
|
|||||||
bl do_bad_slb_fault
|
bl do_bad_slb_fault
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM data_access_slb
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0x400 - Instruction Storage Interrupt (ISI).
|
* Interrupt 0x400 - Instruction Storage Interrupt (ISI).
|
||||||
@ -1463,8 +1370,6 @@ MMU_FTR_SECTION_ELSE
|
|||||||
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
|
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM instruction_access
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
|
* Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
|
||||||
@ -1509,8 +1414,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
|
|||||||
bl do_bad_slb_fault
|
bl do_bad_slb_fault
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM instruction_access_slb
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0x500 - External Interrupt.
|
* Interrupt 0x500 - External Interrupt.
|
||||||
@ -1555,8 +1458,6 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
|
|||||||
bl do_IRQ
|
bl do_IRQ
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM hardware_interrupt
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0x600 - Alignment Interrupt
|
* Interrupt 0x600 - Alignment Interrupt
|
||||||
@ -1584,8 +1485,6 @@ EXC_COMMON_BEGIN(alignment_common)
|
|||||||
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
|
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM alignment
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0x700 - Program Interrupt (program check).
|
* Interrupt 0x700 - Program Interrupt (program check).
|
||||||
@ -1693,8 +1592,6 @@ EXC_COMMON_BEGIN(program_check_common)
|
|||||||
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
|
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM program_check
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Interrupt 0x800 - Floating-Point Unavailable Interrupt.
|
* Interrupt 0x800 - Floating-Point Unavailable Interrupt.
|
||||||
@ -1744,8 +1641,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
|||||||
b interrupt_return
|
b interrupt_return
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GEN_KVM fp_unavailable
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0x900 - Decrementer Interrupt.
|
* Interrupt 0x900 - Decrementer Interrupt.
|
||||||
@ -1784,8 +1679,6 @@ EXC_COMMON_BEGIN(decrementer_common)
|
|||||||
bl timer_interrupt
|
bl timer_interrupt
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM decrementer
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0x980 - Hypervisor Decrementer Interrupt.
|
* Interrupt 0x980 - Hypervisor Decrementer Interrupt.
|
||||||
@ -1831,8 +1724,6 @@ EXC_COMMON_BEGIN(hdecrementer_common)
|
|||||||
ld r13,PACA_EXGEN+EX_R13(r13)
|
ld r13,PACA_EXGEN+EX_R13(r13)
|
||||||
HRFI_TO_KERNEL
|
HRFI_TO_KERNEL
|
||||||
|
|
||||||
GEN_KVM hdecrementer
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
|
* Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
|
||||||
@ -1872,8 +1763,6 @@ EXC_COMMON_BEGIN(doorbell_super_common)
|
|||||||
#endif
|
#endif
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM doorbell_super
|
|
||||||
|
|
||||||
|
|
||||||
EXC_REAL_NONE(0xb00, 0x100)
|
EXC_REAL_NONE(0xb00, 0x100)
|
||||||
EXC_VIRT_NONE(0x4b00, 0x100)
|
EXC_VIRT_NONE(0x4b00, 0x100)
|
||||||
@ -1923,7 +1812,7 @@ INT_DEFINE_END(system_call)
|
|||||||
GET_PACA(r13)
|
GET_PACA(r13)
|
||||||
std r10,PACA_EXGEN+EX_R10(r13)
|
std r10,PACA_EXGEN+EX_R10(r13)
|
||||||
INTERRUPT_TO_KERNEL
|
INTERRUPT_TO_KERNEL
|
||||||
KVMTEST system_call /* uses r10, branch to system_call_kvm */
|
KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
|
||||||
mfctr r9
|
mfctr r9
|
||||||
#else
|
#else
|
||||||
mr r9,r13
|
mr r9,r13
|
||||||
@ -1979,14 +1868,16 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
|
|||||||
EXC_VIRT_END(system_call, 0x4c00, 0x100)
|
EXC_VIRT_END(system_call, 0x4c00, 0x100)
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
||||||
TRAMP_REAL_BEGIN(system_call_kvm)
|
TRAMP_REAL_BEGIN(kvm_hcall)
|
||||||
/*
|
std r9,PACA_EXGEN+EX_R9(r13)
|
||||||
* This is a hcall, so register convention is as above, with these
|
std r11,PACA_EXGEN+EX_R11(r13)
|
||||||
* differences:
|
std r12,PACA_EXGEN+EX_R12(r13)
|
||||||
* r13 = PACA
|
mfcr r9
|
||||||
* ctr = orig r13
|
mfctr r10
|
||||||
* orig r10 saved in PACA
|
std r10,PACA_EXGEN+EX_R13(r13)
|
||||||
*/
|
li r10,0
|
||||||
|
std r10,PACA_EXGEN+EX_CFAR(r13)
|
||||||
|
std r10,PACA_EXGEN+EX_CTR(r13)
|
||||||
/*
|
/*
|
||||||
* Save the PPR (on systems that support it) before changing to
|
* Save the PPR (on systems that support it) before changing to
|
||||||
* HMT_MEDIUM. That allows the KVM code to save that value into the
|
* HMT_MEDIUM. That allows the KVM code to save that value into the
|
||||||
@ -1994,31 +1885,24 @@ TRAMP_REAL_BEGIN(system_call_kvm)
|
|||||||
*/
|
*/
|
||||||
BEGIN_FTR_SECTION
|
BEGIN_FTR_SECTION
|
||||||
mfspr r10,SPRN_PPR
|
mfspr r10,SPRN_PPR
|
||||||
std r10,HSTATE_PPR(r13)
|
std r10,PACA_EXGEN+EX_PPR(r13)
|
||||||
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||||
|
|
||||||
HMT_MEDIUM
|
HMT_MEDIUM
|
||||||
mfctr r10
|
|
||||||
SET_SCRATCH0(r10)
|
|
||||||
mfcr r10
|
|
||||||
std r12,HSTATE_SCRATCH0(r13)
|
|
||||||
sldi r12,r10,32
|
|
||||||
ori r12,r12,0xc00
|
|
||||||
#ifdef CONFIG_RELOCATABLE
|
#ifdef CONFIG_RELOCATABLE
|
||||||
/*
|
/*
|
||||||
* Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
|
* Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
|
||||||
* outside the head section.
|
* outside the head section.
|
||||||
*/
|
*/
|
||||||
__LOAD_FAR_HANDLER(r10, kvmppc_interrupt)
|
__LOAD_FAR_HANDLER(r10, kvmppc_hcall)
|
||||||
mtctr r10
|
mtctr r10
|
||||||
ld r10,PACA_EXGEN+EX_R10(r13)
|
|
||||||
bctr
|
bctr
|
||||||
#else
|
#else
|
||||||
ld r10,PACA_EXGEN+EX_R10(r13)
|
b kvmppc_hcall
|
||||||
b kvmppc_interrupt
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xd00 - Trace Interrupt.
|
* Interrupt 0xd00 - Trace Interrupt.
|
||||||
* This is a synchronous interrupt in response to instruction step or
|
* This is a synchronous interrupt in response to instruction step or
|
||||||
@ -2043,8 +1927,6 @@ EXC_COMMON_BEGIN(single_step_common)
|
|||||||
bl single_step_exception
|
bl single_step_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM single_step
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
|
* Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
|
||||||
@ -2063,7 +1945,6 @@ INT_DEFINE_BEGIN(h_data_storage)
|
|||||||
IHSRR=1
|
IHSRR=1
|
||||||
IDAR=1
|
IDAR=1
|
||||||
IDSISR=1
|
IDSISR=1
|
||||||
IKVM_SKIP=1
|
|
||||||
IKVM_REAL=1
|
IKVM_REAL=1
|
||||||
IKVM_VIRT=1
|
IKVM_VIRT=1
|
||||||
INT_DEFINE_END(h_data_storage)
|
INT_DEFINE_END(h_data_storage)
|
||||||
@ -2084,8 +1965,6 @@ MMU_FTR_SECTION_ELSE
|
|||||||
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
|
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM h_data_storage
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
|
* Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
|
||||||
@ -2111,8 +1990,6 @@ EXC_COMMON_BEGIN(h_instr_storage_common)
|
|||||||
bl unknown_exception
|
bl unknown_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM h_instr_storage
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
|
* Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
|
||||||
@ -2137,8 +2014,6 @@ EXC_COMMON_BEGIN(emulation_assist_common)
|
|||||||
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
|
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM emulation_assist
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
|
* Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
|
||||||
@ -2210,16 +2085,12 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
|
|||||||
EXCEPTION_RESTORE_REGS hsrr=1
|
EXCEPTION_RESTORE_REGS hsrr=1
|
||||||
GEN_INT_ENTRY hmi_exception, virt=0
|
GEN_INT_ENTRY hmi_exception, virt=0
|
||||||
|
|
||||||
GEN_KVM hmi_exception_early
|
|
||||||
|
|
||||||
EXC_COMMON_BEGIN(hmi_exception_common)
|
EXC_COMMON_BEGIN(hmi_exception_common)
|
||||||
GEN_COMMON hmi_exception
|
GEN_COMMON hmi_exception
|
||||||
addi r3,r1,STACK_FRAME_OVERHEAD
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
||||||
bl handle_hmi_exception
|
bl handle_hmi_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM hmi_exception
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
|
* Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
|
||||||
@ -2250,8 +2121,6 @@ EXC_COMMON_BEGIN(h_doorbell_common)
|
|||||||
#endif
|
#endif
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM h_doorbell
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
|
* Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
|
||||||
@ -2278,8 +2147,6 @@ EXC_COMMON_BEGIN(h_virt_irq_common)
|
|||||||
bl do_IRQ
|
bl do_IRQ
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM h_virt_irq
|
|
||||||
|
|
||||||
|
|
||||||
EXC_REAL_NONE(0xec0, 0x20)
|
EXC_REAL_NONE(0xec0, 0x20)
|
||||||
EXC_VIRT_NONE(0x4ec0, 0x20)
|
EXC_VIRT_NONE(0x4ec0, 0x20)
|
||||||
@ -2323,8 +2190,6 @@ EXC_COMMON_BEGIN(performance_monitor_common)
|
|||||||
bl performance_monitor_exception
|
bl performance_monitor_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM performance_monitor
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xf20 - Vector Unavailable Interrupt.
|
* Interrupt 0xf20 - Vector Unavailable Interrupt.
|
||||||
@ -2374,8 +2239,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
|
|||||||
bl altivec_unavailable_exception
|
bl altivec_unavailable_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM altivec_unavailable
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xf40 - VSX Unavailable Interrupt.
|
* Interrupt 0xf40 - VSX Unavailable Interrupt.
|
||||||
@ -2424,8 +2287,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
|
|||||||
bl vsx_unavailable_exception
|
bl vsx_unavailable_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM vsx_unavailable
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xf60 - Facility Unavailable Interrupt.
|
* Interrupt 0xf60 - Facility Unavailable Interrupt.
|
||||||
@ -2454,8 +2315,6 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
|
|||||||
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
|
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM facility_unavailable
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
|
* Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
|
||||||
@ -2484,8 +2343,6 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
|
|||||||
REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
|
REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM h_facility_unavailable
|
|
||||||
|
|
||||||
|
|
||||||
EXC_REAL_NONE(0xfa0, 0x20)
|
EXC_REAL_NONE(0xfa0, 0x20)
|
||||||
EXC_VIRT_NONE(0x4fa0, 0x20)
|
EXC_VIRT_NONE(0x4fa0, 0x20)
|
||||||
@ -2515,8 +2372,6 @@ EXC_COMMON_BEGIN(cbe_system_error_common)
|
|||||||
bl cbe_system_error_exception
|
bl cbe_system_error_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM cbe_system_error
|
|
||||||
|
|
||||||
#else /* CONFIG_CBE_RAS */
|
#else /* CONFIG_CBE_RAS */
|
||||||
EXC_REAL_NONE(0x1200, 0x100)
|
EXC_REAL_NONE(0x1200, 0x100)
|
||||||
EXC_VIRT_NONE(0x5200, 0x100)
|
EXC_VIRT_NONE(0x5200, 0x100)
|
||||||
@ -2548,8 +2403,6 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common)
|
|||||||
bl instruction_breakpoint_exception
|
bl instruction_breakpoint_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM instruction_breakpoint
|
|
||||||
|
|
||||||
|
|
||||||
EXC_REAL_NONE(0x1400, 0x100)
|
EXC_REAL_NONE(0x1400, 0x100)
|
||||||
EXC_VIRT_NONE(0x5400, 0x100)
|
EXC_VIRT_NONE(0x5400, 0x100)
|
||||||
@ -2670,8 +2523,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
|
|||||||
bl unknown_exception
|
bl unknown_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM denorm_exception
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_CBE_RAS
|
#ifdef CONFIG_CBE_RAS
|
||||||
INT_DEFINE_BEGIN(cbe_maintenance)
|
INT_DEFINE_BEGIN(cbe_maintenance)
|
||||||
@ -2689,8 +2540,6 @@ EXC_COMMON_BEGIN(cbe_maintenance_common)
|
|||||||
bl cbe_maintenance_exception
|
bl cbe_maintenance_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM cbe_maintenance
|
|
||||||
|
|
||||||
#else /* CONFIG_CBE_RAS */
|
#else /* CONFIG_CBE_RAS */
|
||||||
EXC_REAL_NONE(0x1600, 0x100)
|
EXC_REAL_NONE(0x1600, 0x100)
|
||||||
EXC_VIRT_NONE(0x5600, 0x100)
|
EXC_VIRT_NONE(0x5600, 0x100)
|
||||||
@ -2721,8 +2570,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
|
|||||||
#endif
|
#endif
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM altivec_assist
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef CONFIG_CBE_RAS
|
#ifdef CONFIG_CBE_RAS
|
||||||
INT_DEFINE_BEGIN(cbe_thermal)
|
INT_DEFINE_BEGIN(cbe_thermal)
|
||||||
@ -2740,8 +2587,6 @@ EXC_COMMON_BEGIN(cbe_thermal_common)
|
|||||||
bl cbe_thermal_exception
|
bl cbe_thermal_exception
|
||||||
b interrupt_return
|
b interrupt_return
|
||||||
|
|
||||||
GEN_KVM cbe_thermal
|
|
||||||
|
|
||||||
#else /* CONFIG_CBE_RAS */
|
#else /* CONFIG_CBE_RAS */
|
||||||
EXC_REAL_NONE(0x1800, 0x100)
|
EXC_REAL_NONE(0x1800, 0x100)
|
||||||
EXC_VIRT_NONE(0x5800, 0x100)
|
EXC_VIRT_NONE(0x5800, 0x100)
|
||||||
@ -2994,6 +2839,15 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
|
|||||||
|
|
||||||
USE_TEXT_SECTION()
|
USE_TEXT_SECTION()
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
||||||
|
kvm_interrupt:
|
||||||
|
/*
|
||||||
|
* The conditional branch in KVMTEST can't reach all the way,
|
||||||
|
* make a stub.
|
||||||
|
*/
|
||||||
|
b kvmppc_interrupt
|
||||||
|
#endif
|
||||||
|
|
||||||
_GLOBAL(do_uaccess_flush)
|
_GLOBAL(do_uaccess_flush)
|
||||||
UACCESS_FLUSH_FIXUP_SECTION
|
UACCESS_FLUSH_FIXUP_SECTION
|
||||||
nop
|
nop
|
||||||
@ -3009,32 +2863,6 @@ EXPORT_SYMBOL(do_uaccess_flush)
|
|||||||
MASKED_INTERRUPT
|
MASKED_INTERRUPT
|
||||||
MASKED_INTERRUPT hsrr=1
|
MASKED_INTERRUPT hsrr=1
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
|
|
||||||
kvmppc_skip_interrupt:
|
|
||||||
/*
|
|
||||||
* Here all GPRs are unchanged from when the interrupt happened
|
|
||||||
* except for r13, which is saved in SPRG_SCRATCH0.
|
|
||||||
*/
|
|
||||||
mfspr r13, SPRN_SRR0
|
|
||||||
addi r13, r13, 4
|
|
||||||
mtspr SPRN_SRR0, r13
|
|
||||||
GET_SCRATCH0(r13)
|
|
||||||
RFI_TO_KERNEL
|
|
||||||
b .
|
|
||||||
|
|
||||||
kvmppc_skip_Hinterrupt:
|
|
||||||
/*
|
|
||||||
* Here all GPRs are unchanged from when the interrupt happened
|
|
||||||
* except for r13, which is saved in SPRG_SCRATCH0.
|
|
||||||
*/
|
|
||||||
mfspr r13, SPRN_HSRR0
|
|
||||||
addi r13, r13, 4
|
|
||||||
mtspr SPRN_HSRR0, r13
|
|
||||||
GET_SCRATCH0(r13)
|
|
||||||
HRFI_TO_KERNEL
|
|
||||||
b .
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Relocation-on interrupts: A subset of the interrupts can be delivered
|
* Relocation-on interrupts: A subset of the interrupts can be delivered
|
||||||
* with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
|
* with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
|
||||||
|
@ -432,16 +432,19 @@ device_initcall(stf_barrier_debugfs_init);
|
|||||||
|
|
||||||
static void update_branch_cache_flush(void)
|
static void update_branch_cache_flush(void)
|
||||||
{
|
{
|
||||||
u32 *site;
|
u32 *site, __maybe_unused *site2;
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||||
site = &patch__call_kvm_flush_link_stack;
|
site = &patch__call_kvm_flush_link_stack;
|
||||||
|
site2 = &patch__call_kvm_flush_link_stack_p9;
|
||||||
// This controls the branch from guest_exit_cont to kvm_flush_link_stack
|
// This controls the branch from guest_exit_cont to kvm_flush_link_stack
|
||||||
if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
|
if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
|
||||||
patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
|
patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
|
||||||
|
patch_instruction_site(site2, ppc_inst(PPC_INST_NOP));
|
||||||
} else {
|
} else {
|
||||||
// Could use HW flush, but that could also flush count cache
|
// Could use HW flush, but that could also flush count cache
|
||||||
patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
|
patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
|
||||||
|
patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -508,16 +508,6 @@ EXPORT_SYMBOL(profile_pc);
|
|||||||
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
|
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_PPC64
|
#ifdef CONFIG_PPC64
|
||||||
static inline unsigned long test_irq_work_pending(void)
|
|
||||||
{
|
|
||||||
unsigned long x;
|
|
||||||
|
|
||||||
asm volatile("lbz %0,%1(13)"
|
|
||||||
: "=r" (x)
|
|
||||||
: "i" (offsetof(struct paca_struct, irq_work_pending)));
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void set_irq_work_pending_flag(void)
|
static inline void set_irq_work_pending_flag(void)
|
||||||
{
|
{
|
||||||
asm volatile("stb %0,%1(13)" : :
|
asm volatile("stb %0,%1(13)" : :
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
|
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
|
||||||
KVM := ../../../virt/kvm
|
KVM := ../../../virt/kvm
|
||||||
|
|
||||||
common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o
|
common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/binary_stats.o
|
||||||
common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
|
common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
|
||||||
common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
|
common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
|
||||||
|
|
||||||
@ -57,6 +57,7 @@ kvm-pr-y := \
|
|||||||
book3s_32_mmu.o
|
book3s_32_mmu.o
|
||||||
|
|
||||||
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
|
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
|
||||||
|
book3s_64_entry.o \
|
||||||
tm.o
|
tm.o
|
||||||
|
|
||||||
ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
|
ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
|
||||||
@ -86,6 +87,7 @@ kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
|
|||||||
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||||
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
|
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
|
||||||
book3s_hv_hmi.o \
|
book3s_hv_hmi.o \
|
||||||
|
book3s_hv_p9_entry.o \
|
||||||
book3s_hv_rmhandlers.o \
|
book3s_hv_rmhandlers.o \
|
||||||
book3s_hv_rm_mmu.o \
|
book3s_hv_rm_mmu.o \
|
||||||
book3s_hv_ras.o \
|
book3s_hv_ras.o \
|
||||||
|
@ -38,37 +38,66 @@
|
|||||||
|
|
||||||
/* #define EXIT_DEBUG */
|
/* #define EXIT_DEBUG */
|
||||||
|
|
||||||
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||||
VCPU_STAT("exits", sum_exits),
|
KVM_GENERIC_VM_STATS(),
|
||||||
VCPU_STAT("mmio", mmio_exits),
|
STATS_DESC_ICOUNTER(VM, num_2M_pages),
|
||||||
VCPU_STAT("sig", signal_exits),
|
STATS_DESC_ICOUNTER(VM, num_1G_pages)
|
||||||
VCPU_STAT("sysc", syscall_exits),
|
};
|
||||||
VCPU_STAT("inst_emu", emulated_inst_exits),
|
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||||
VCPU_STAT("dec", dec_exits),
|
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||||
VCPU_STAT("ext_intr", ext_intr_exits),
|
|
||||||
VCPU_STAT("queue_intr", queue_intr),
|
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||||
VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
|
.name_size = KVM_STATS_NAME_SIZE,
|
||||||
VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
|
.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
|
||||||
VCPU_STAT("halt_wait_ns", halt_wait_ns),
|
.id_offset = sizeof(struct kvm_stats_header),
|
||||||
VCPU_STAT("halt_successful_poll", halt_successful_poll),
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||||
VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||||
VCPU_STAT("halt_successful_wait", halt_successful_wait),
|
sizeof(kvm_vm_stats_desc),
|
||||||
VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
|
};
|
||||||
VCPU_STAT("halt_wakeup", halt_wakeup),
|
|
||||||
VCPU_STAT("pf_storage", pf_storage),
|
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
||||||
VCPU_STAT("sp_storage", sp_storage),
|
KVM_GENERIC_VCPU_STATS(),
|
||||||
VCPU_STAT("pf_instruc", pf_instruc),
|
STATS_DESC_COUNTER(VCPU, sum_exits),
|
||||||
VCPU_STAT("sp_instruc", sp_instruc),
|
STATS_DESC_COUNTER(VCPU, mmio_exits),
|
||||||
VCPU_STAT("ld", ld),
|
STATS_DESC_COUNTER(VCPU, signal_exits),
|
||||||
VCPU_STAT("ld_slow", ld_slow),
|
STATS_DESC_COUNTER(VCPU, light_exits),
|
||||||
VCPU_STAT("st", st),
|
STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
|
||||||
VCPU_STAT("st_slow", st_slow),
|
STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
|
||||||
VCPU_STAT("pthru_all", pthru_all),
|
STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
|
||||||
VCPU_STAT("pthru_host", pthru_host),
|
STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
|
||||||
VCPU_STAT("pthru_bad_aff", pthru_bad_aff),
|
STATS_DESC_COUNTER(VCPU, syscall_exits),
|
||||||
VM_STAT("largepages_2M", num_2M_pages, .mode = 0444),
|
STATS_DESC_COUNTER(VCPU, isi_exits),
|
||||||
VM_STAT("largepages_1G", num_1G_pages, .mode = 0444),
|
STATS_DESC_COUNTER(VCPU, dsi_exits),
|
||||||
{ NULL }
|
STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, dec_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, ext_intr_exits),
|
||||||
|
STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
|
||||||
|
STATS_DESC_COUNTER(VCPU, halt_successful_wait),
|
||||||
|
STATS_DESC_COUNTER(VCPU, dbell_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, gdbell_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, ld),
|
||||||
|
STATS_DESC_COUNTER(VCPU, st),
|
||||||
|
STATS_DESC_COUNTER(VCPU, pf_storage),
|
||||||
|
STATS_DESC_COUNTER(VCPU, pf_instruc),
|
||||||
|
STATS_DESC_COUNTER(VCPU, sp_storage),
|
||||||
|
STATS_DESC_COUNTER(VCPU, sp_instruc),
|
||||||
|
STATS_DESC_COUNTER(VCPU, queue_intr),
|
||||||
|
STATS_DESC_COUNTER(VCPU, ld_slow),
|
||||||
|
STATS_DESC_COUNTER(VCPU, st_slow),
|
||||||
|
STATS_DESC_COUNTER(VCPU, pthru_all),
|
||||||
|
STATS_DESC_COUNTER(VCPU, pthru_host),
|
||||||
|
STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
|
||||||
|
};
|
||||||
|
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||||
|
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||||
|
|
||||||
|
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||||
|
.name_size = KVM_STATS_NAME_SIZE,
|
||||||
|
.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
|
||||||
|
.id_offset = sizeof(struct kvm_stats_header),
|
||||||
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||||
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||||
|
sizeof(kvm_vcpu_stats_desc),
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
|
static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
|
||||||
@ -171,6 +200,12 @@ void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
|
EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
|
||||||
|
|
||||||
|
void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(kvmppc_core_queue_syscall);
|
||||||
|
|
||||||
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
|
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
|
||||||
{
|
{
|
||||||
/* might as well deliver this straight away */
|
/* might as well deliver this straight away */
|
||||||
@ -1044,13 +1079,10 @@ static int kvmppc_book3s_init(void)
|
|||||||
#ifdef CONFIG_KVM_XICS
|
#ifdef CONFIG_KVM_XICS
|
||||||
#ifdef CONFIG_KVM_XIVE
|
#ifdef CONFIG_KVM_XIVE
|
||||||
if (xics_on_xive()) {
|
if (xics_on_xive()) {
|
||||||
kvmppc_xive_init_module();
|
|
||||||
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
|
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
|
||||||
if (kvmppc_xive_native_supported()) {
|
if (kvmppc_xive_native_supported())
|
||||||
kvmppc_xive_native_init_module();
|
|
||||||
kvm_register_device_ops(&kvm_xive_native_ops,
|
kvm_register_device_ops(&kvm_xive_native_ops,
|
||||||
KVM_DEV_TYPE_XIVE);
|
KVM_DEV_TYPE_XIVE);
|
||||||
}
|
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
|
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
|
||||||
@ -1060,12 +1092,6 @@ static int kvmppc_book3s_init(void)
|
|||||||
|
|
||||||
static void kvmppc_book3s_exit(void)
|
static void kvmppc_book3s_exit(void)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_KVM_XICS
|
|
||||||
if (xics_on_xive()) {
|
|
||||||
kvmppc_xive_exit_module();
|
|
||||||
kvmppc_xive_native_exit_module();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
|
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
|
||||||
kvmppc_book3s_exit_pr();
|
kvmppc_book3s_exit_pr();
|
||||||
#endif
|
#endif
|
||||||
|
416
arch/powerpc/kvm/book3s_64_entry.S
Normal file
416
arch/powerpc/kvm/book3s_64_entry.S
Normal file
@ -0,0 +1,416 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||||
|
#include <asm/asm-offsets.h>
|
||||||
|
#include <asm/cache.h>
|
||||||
|
#include <asm/code-patching-asm.h>
|
||||||
|
#include <asm/exception-64s.h>
|
||||||
|
#include <asm/export.h>
|
||||||
|
#include <asm/kvm_asm.h>
|
||||||
|
#include <asm/kvm_book3s_asm.h>
|
||||||
|
#include <asm/mmu.h>
|
||||||
|
#include <asm/ppc_asm.h>
|
||||||
|
#include <asm/ptrace.h>
|
||||||
|
#include <asm/reg.h>
|
||||||
|
#include <asm/ultravisor-api.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* These are branched to from interrupt handlers in exception-64s.S which set
|
||||||
|
* IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is a hcall, so register convention is as
|
||||||
|
* Documentation/powerpc/papr_hcalls.rst.
|
||||||
|
*
|
||||||
|
* This may also be a syscall from PR-KVM userspace that is to be
|
||||||
|
* reflected to the PR guest kernel, so registers may be set up for
|
||||||
|
* a system call rather than hcall. We don't currently clobber
|
||||||
|
* anything here, but the 0xc00 handler has already clobbered CTR
|
||||||
|
* and CR0, so PR-KVM can not support a guest kernel that preserves
|
||||||
|
* those registers across its system calls.
|
||||||
|
*
|
||||||
|
* The state of registers is as kvmppc_interrupt, except CFAR is not
|
||||||
|
* saved, R13 is not in SCRATCH0, and R10 does not contain the trap.
|
||||||
|
*/
|
||||||
|
.global kvmppc_hcall
|
||||||
|
.balign IFETCH_ALIGN_BYTES
|
||||||
|
kvmppc_hcall:
|
||||||
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||||
|
lbz r10,HSTATE_IN_GUEST(r13)
|
||||||
|
cmpwi r10,KVM_GUEST_MODE_HV_P9
|
||||||
|
beq kvmppc_p9_exit_hcall
|
||||||
|
#endif
|
||||||
|
ld r10,PACA_EXGEN+EX_R13(r13)
|
||||||
|
SET_SCRATCH0(r10)
|
||||||
|
li r10,0xc00
|
||||||
|
/* Now we look like kvmppc_interrupt */
|
||||||
|
li r11,PACA_EXGEN
|
||||||
|
b .Lgot_save_area
|
||||||
|
|
||||||
|
/*
|
||||||
|
* KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that
|
||||||
|
* call convention:
|
||||||
|
*
|
||||||
|
* guest R9-R13, CTR, CFAR, PPR saved in PACA EX_xxx save area
|
||||||
|
* guest (H)DAR, (H)DSISR are also in the save area for relevant interrupts
|
||||||
|
* guest R13 also saved in SCRATCH0
|
||||||
|
* R13 = PACA
|
||||||
|
* R11 = (H)SRR0
|
||||||
|
* R12 = (H)SRR1
|
||||||
|
* R9 = guest CR
|
||||||
|
* PPR is set to medium
|
||||||
|
*
|
||||||
|
* With the addition for KVM:
|
||||||
|
* R10 = trap vector
|
||||||
|
*/
|
||||||
|
.global kvmppc_interrupt
|
||||||
|
.balign IFETCH_ALIGN_BYTES
|
||||||
|
kvmppc_interrupt:
|
||||||
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||||
|
std r10,HSTATE_SCRATCH0(r13)
|
||||||
|
lbz r10,HSTATE_IN_GUEST(r13)
|
||||||
|
cmpwi r10,KVM_GUEST_MODE_HV_P9
|
||||||
|
beq kvmppc_p9_exit_interrupt
|
||||||
|
ld r10,HSTATE_SCRATCH0(r13)
|
||||||
|
#endif
|
||||||
|
li r11,PACA_EXGEN
|
||||||
|
cmpdi r10,0x200
|
||||||
|
bgt+ .Lgot_save_area
|
||||||
|
li r11,PACA_EXMC
|
||||||
|
beq .Lgot_save_area
|
||||||
|
li r11,PACA_EXNMI
|
||||||
|
.Lgot_save_area:
|
||||||
|
add r11,r11,r13
|
||||||
|
BEGIN_FTR_SECTION
|
||||||
|
ld r12,EX_CFAR(r11)
|
||||||
|
std r12,HSTATE_CFAR(r13)
|
||||||
|
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
|
||||||
|
ld r12,EX_CTR(r11)
|
||||||
|
mtctr r12
|
||||||
|
BEGIN_FTR_SECTION
|
||||||
|
ld r12,EX_PPR(r11)
|
||||||
|
std r12,HSTATE_PPR(r13)
|
||||||
|
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||||
|
ld r12,EX_R12(r11)
|
||||||
|
std r12,HSTATE_SCRATCH0(r13)
|
||||||
|
sldi r12,r9,32
|
||||||
|
or r12,r12,r10
|
||||||
|
ld r9,EX_R9(r11)
|
||||||
|
ld r10,EX_R10(r11)
|
||||||
|
ld r11,EX_R11(r11)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hcalls and other interrupts come here after normalising register
|
||||||
|
* contents and save locations:
|
||||||
|
*
|
||||||
|
* R12 = (guest CR << 32) | interrupt vector
|
||||||
|
* R13 = PACA
|
||||||
|
* guest R12 saved in shadow HSTATE_SCRATCH0
|
||||||
|
* guest R13 saved in SPRN_SCRATCH0
|
||||||
|
*/
|
||||||
|
std r9,HSTATE_SCRATCH2(r13)
|
||||||
|
lbz r9,HSTATE_IN_GUEST(r13)
|
||||||
|
cmpwi r9,KVM_GUEST_MODE_SKIP
|
||||||
|
beq- .Lmaybe_skip
|
||||||
|
.Lno_skip:
|
||||||
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||||
|
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
|
||||||
|
cmpwi r9,KVM_GUEST_MODE_GUEST
|
||||||
|
beq kvmppc_interrupt_pr
|
||||||
|
#endif
|
||||||
|
b kvmppc_interrupt_hv
|
||||||
|
#else
|
||||||
|
b kvmppc_interrupt_pr
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* "Skip" interrupts are part of a trick KVM uses a with hash guests to load
|
||||||
|
* the faulting instruction in guest memory from the the hypervisor without
|
||||||
|
* walking page tables.
|
||||||
|
*
|
||||||
|
* When the guest takes a fault that requires the hypervisor to load the
|
||||||
|
* instruction (e.g., MMIO emulation), KVM is running in real-mode with HV=1
|
||||||
|
* and the guest MMU context loaded. It sets KVM_GUEST_MODE_SKIP, and sets
|
||||||
|
* MSR[DR]=1 while leaving MSR[IR]=0, so it continues to fetch HV instructions
|
||||||
|
* but loads and stores will access the guest context. This is used to load
|
||||||
|
* the faulting instruction using the faulting guest effective address.
|
||||||
|
*
|
||||||
|
* However the guest context may not be able to translate, or it may cause a
|
||||||
|
* machine check or other issue, which results in a fault in the host
|
||||||
|
* (even with KVM-HV).
|
||||||
|
*
|
||||||
|
* These faults come here because KVM_GUEST_MODE_SKIP was set, so if they
|
||||||
|
* are (or are likely) caused by that load, the instruction is skipped by
|
||||||
|
* just returning with the PC advanced +4, where it is noticed the load did
|
||||||
|
* not execute and it goes to the slow path which walks the page tables to
|
||||||
|
* read guest memory.
|
||||||
|
*/
|
||||||
|
.Lmaybe_skip:
|
||||||
|
cmpwi r12,BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||||
|
beq 1f
|
||||||
|
cmpwi r12,BOOK3S_INTERRUPT_DATA_STORAGE
|
||||||
|
beq 1f
|
||||||
|
cmpwi r12,BOOK3S_INTERRUPT_DATA_SEGMENT
|
||||||
|
beq 1f
|
||||||
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||||
|
/* HSRR interrupts get 2 added to interrupt number */
|
||||||
|
cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | 0x2
|
||||||
|
beq 2f
|
||||||
|
#endif
|
||||||
|
b .Lno_skip
|
||||||
|
1: mfspr r9,SPRN_SRR0
|
||||||
|
addi r9,r9,4
|
||||||
|
mtspr SPRN_SRR0,r9
|
||||||
|
ld r12,HSTATE_SCRATCH0(r13)
|
||||||
|
ld r9,HSTATE_SCRATCH2(r13)
|
||||||
|
GET_SCRATCH0(r13)
|
||||||
|
RFI_TO_KERNEL
|
||||||
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||||
|
2: mfspr r9,SPRN_HSRR0
|
||||||
|
addi r9,r9,4
|
||||||
|
mtspr SPRN_HSRR0,r9
|
||||||
|
ld r12,HSTATE_SCRATCH0(r13)
|
||||||
|
ld r9,HSTATE_SCRATCH2(r13)
|
||||||
|
GET_SCRATCH0(r13)
|
||||||
|
HRFI_TO_KERNEL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||||
|
|
||||||
|
/* Stack frame offsets for kvmppc_p9_enter_guest */
|
||||||
|
#define SFS (144 + STACK_FRAME_MIN_SIZE)
|
||||||
|
#define STACK_SLOT_NVGPRS (SFS - 144) /* 18 gprs */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* void kvmppc_p9_enter_guest(struct vcpu *vcpu);
|
||||||
|
*
|
||||||
|
* Enter the guest on a ISAv3.0 or later system.
|
||||||
|
*/
|
||||||
|
.balign IFETCH_ALIGN_BYTES
|
||||||
|
_GLOBAL(kvmppc_p9_enter_guest)
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
|
||||||
|
mflr r0
|
||||||
|
std r0,PPC_LR_STKOFF(r1)
|
||||||
|
stdu r1,-SFS(r1)
|
||||||
|
|
||||||
|
std r1,HSTATE_HOST_R1(r13)
|
||||||
|
|
||||||
|
mfcr r4
|
||||||
|
stw r4,SFS+8(r1)
|
||||||
|
|
||||||
|
reg = 14
|
||||||
|
.rept 18
|
||||||
|
std reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
|
||||||
|
reg = reg + 1
|
||||||
|
.endr
|
||||||
|
|
||||||
|
ld r4,VCPU_LR(r3)
|
||||||
|
mtlr r4
|
||||||
|
ld r4,VCPU_CTR(r3)
|
||||||
|
mtctr r4
|
||||||
|
ld r4,VCPU_XER(r3)
|
||||||
|
mtspr SPRN_XER,r4
|
||||||
|
|
||||||
|
ld r1,VCPU_CR(r3)
|
||||||
|
|
||||||
|
BEGIN_FTR_SECTION
|
||||||
|
ld r4,VCPU_CFAR(r3)
|
||||||
|
mtspr SPRN_CFAR,r4
|
||||||
|
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
|
||||||
|
BEGIN_FTR_SECTION
|
||||||
|
ld r4,VCPU_PPR(r3)
|
||||||
|
mtspr SPRN_PPR,r4
|
||||||
|
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
|
||||||
|
|
||||||
|
reg = 4
|
||||||
|
.rept 28
|
||||||
|
ld reg,__VCPU_GPR(reg)(r3)
|
||||||
|
reg = reg + 1
|
||||||
|
.endr
|
||||||
|
|
||||||
|
ld r4,VCPU_KVM(r3)
|
||||||
|
lbz r4,KVM_SECURE_GUEST(r4)
|
||||||
|
cmpdi r4,0
|
||||||
|
ld r4,VCPU_GPR(R4)(r3)
|
||||||
|
bne .Lret_to_ultra
|
||||||
|
|
||||||
|
mtcr r1
|
||||||
|
|
||||||
|
ld r0,VCPU_GPR(R0)(r3)
|
||||||
|
ld r1,VCPU_GPR(R1)(r3)
|
||||||
|
ld r2,VCPU_GPR(R2)(r3)
|
||||||
|
ld r3,VCPU_GPR(R3)(r3)
|
||||||
|
|
||||||
|
HRFI_TO_GUEST
|
||||||
|
b .
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use UV_RETURN ultracall to return control back to the Ultravisor
|
||||||
|
* after processing an hypercall or interrupt that was forwarded
|
||||||
|
* (a.k.a. reflected) to the Hypervisor.
|
||||||
|
*
|
||||||
|
* All registers have already been reloaded except the ucall requires:
|
||||||
|
* R0 = hcall result
|
||||||
|
* R2 = SRR1, so UV can detect a synthesized interrupt (if any)
|
||||||
|
* R3 = UV_RETURN
|
||||||
|
*/
|
||||||
|
.Lret_to_ultra:
|
||||||
|
mtcr r1
|
||||||
|
ld r1,VCPU_GPR(R1)(r3)
|
||||||
|
|
||||||
|
ld r0,VCPU_GPR(R3)(r3)
|
||||||
|
mfspr r2,SPRN_SRR1
|
||||||
|
LOAD_REG_IMMEDIATE(r3, UV_RETURN)
|
||||||
|
sc 2
|
||||||
|
|
||||||
|
/*
|
||||||
|
* kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from
|
||||||
|
* above if the interrupt was taken for a guest that was entered via
|
||||||
|
* kvmppc_p9_enter_guest().
|
||||||
|
*
|
||||||
|
* The exit code recovers the host stack and vcpu pointer, saves all guest GPRs
|
||||||
|
* and CR, LR, XER as well as guest MSR and NIA into the VCPU, then re-
|
||||||
|
* establishes the host stack and registers to return from the
|
||||||
|
* kvmppc_p9_enter_guest() function, which saves CTR and other guest registers
|
||||||
|
* (SPRs and FP, VEC, etc).
|
||||||
|
*/
|
||||||
|
.balign IFETCH_ALIGN_BYTES
|
||||||
|
kvmppc_p9_exit_hcall:
|
||||||
|
mfspr r11,SPRN_SRR0
|
||||||
|
mfspr r12,SPRN_SRR1
|
||||||
|
li r10,0xc00
|
||||||
|
std r10,HSTATE_SCRATCH0(r13)
|
||||||
|
|
||||||
|
.balign IFETCH_ALIGN_BYTES
|
||||||
|
kvmppc_p9_exit_interrupt:
|
||||||
|
/*
|
||||||
|
* If set to KVM_GUEST_MODE_HV_P9 but we're still in the
|
||||||
|
* hypervisor, that means we can't return from the entry stack.
|
||||||
|
*/
|
||||||
|
rldicl. r10,r12,64-MSR_HV_LG,63
|
||||||
|
bne- kvmppc_p9_bad_interrupt
|
||||||
|
|
||||||
|
std r1,HSTATE_SCRATCH1(r13)
|
||||||
|
std r3,HSTATE_SCRATCH2(r13)
|
||||||
|
ld r1,HSTATE_HOST_R1(r13)
|
||||||
|
ld r3,HSTATE_KVM_VCPU(r13)
|
||||||
|
|
||||||
|
std r9,VCPU_CR(r3)
|
||||||
|
|
||||||
|
1:
|
||||||
|
std r11,VCPU_PC(r3)
|
||||||
|
std r12,VCPU_MSR(r3)
|
||||||
|
|
||||||
|
reg = 14
|
||||||
|
.rept 18
|
||||||
|
std reg,__VCPU_GPR(reg)(r3)
|
||||||
|
reg = reg + 1
|
||||||
|
.endr
|
||||||
|
|
||||||
|
/* r1, r3, r9-r13 are saved to vcpu by C code */
|
||||||
|
std r0,VCPU_GPR(R0)(r3)
|
||||||
|
std r2,VCPU_GPR(R2)(r3)
|
||||||
|
reg = 4
|
||||||
|
.rept 5
|
||||||
|
std reg,__VCPU_GPR(reg)(r3)
|
||||||
|
reg = reg + 1
|
||||||
|
.endr
|
||||||
|
|
||||||
|
ld r2,PACATOC(r13)
|
||||||
|
|
||||||
|
mflr r4
|
||||||
|
std r4,VCPU_LR(r3)
|
||||||
|
mfspr r4,SPRN_XER
|
||||||
|
std r4,VCPU_XER(r3)
|
||||||
|
|
||||||
|
reg = 14
|
||||||
|
.rept 18
|
||||||
|
ld reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
|
||||||
|
reg = reg + 1
|
||||||
|
.endr
|
||||||
|
|
||||||
|
lwz r4,SFS+8(r1)
|
||||||
|
mtcr r4
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flush the link stack here, before executing the first blr on the
|
||||||
|
* way out of the guest.
|
||||||
|
*
|
||||||
|
* The link stack won't match coming out of the guest anyway so the
|
||||||
|
* only cost is the flush itself. The call clobbers r0.
|
||||||
|
*/
|
||||||
|
1: nop
|
||||||
|
patch_site 1b patch__call_kvm_flush_link_stack_p9
|
||||||
|
|
||||||
|
addi r1,r1,SFS
|
||||||
|
ld r0,PPC_LR_STKOFF(r1)
|
||||||
|
mtlr r0
|
||||||
|
blr
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Took an interrupt somewhere right before HRFID to guest, so registers are
|
||||||
|
* in a bad way. Return things hopefully enough to run host virtual code and
|
||||||
|
* run the Linux interrupt handler (SRESET or MCE) to print something useful.
|
||||||
|
*
|
||||||
|
* We could be really clever and save all host registers in known locations
|
||||||
|
* before setting HSTATE_IN_GUEST, then restoring them all here, and setting
|
||||||
|
* return address to a fixup that sets them up again. But that's a lot of
|
||||||
|
* effort for a small bit of code. Lots of other things to do first.
|
||||||
|
*/
|
||||||
|
kvmppc_p9_bad_interrupt:
|
||||||
|
BEGIN_MMU_FTR_SECTION
|
||||||
|
/*
|
||||||
|
* Hash host doesn't try to recover MMU (requires host SLB reload)
|
||||||
|
*/
|
||||||
|
b .
|
||||||
|
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
|
||||||
|
/*
|
||||||
|
* Clean up guest registers to give host a chance to run.
|
||||||
|
*/
|
||||||
|
li r10,0
|
||||||
|
mtspr SPRN_AMR,r10
|
||||||
|
mtspr SPRN_IAMR,r10
|
||||||
|
mtspr SPRN_CIABR,r10
|
||||||
|
mtspr SPRN_DAWRX0,r10
|
||||||
|
BEGIN_FTR_SECTION
|
||||||
|
mtspr SPRN_DAWRX1,r10
|
||||||
|
END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
|
||||||
|
mtspr SPRN_PID,r10
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Switch to host MMU mode
|
||||||
|
*/
|
||||||
|
ld r10, HSTATE_KVM_VCPU(r13)
|
||||||
|
ld r10, VCPU_KVM(r10)
|
||||||
|
lwz r10, KVM_HOST_LPID(r10)
|
||||||
|
mtspr SPRN_LPID,r10
|
||||||
|
|
||||||
|
ld r10, HSTATE_KVM_VCPU(r13)
|
||||||
|
ld r10, VCPU_KVM(r10)
|
||||||
|
ld r10, KVM_HOST_LPCR(r10)
|
||||||
|
mtspr SPRN_LPCR,r10
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
|
||||||
|
* MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
|
||||||
|
*/
|
||||||
|
li r10,KVM_GUEST_MODE_NONE
|
||||||
|
stb r10,HSTATE_IN_GUEST(r13)
|
||||||
|
li r10,MSR_RI
|
||||||
|
andc r12,r12,r10
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Go back to interrupt handler. MCE and SRESET have their specific
|
||||||
|
* PACA save area so they should be used directly. They set up their
|
||||||
|
* own stack. The other handlers all use EXGEN. They will use the
|
||||||
|
* guest r1 if it looks like a kernel stack, so just load the
|
||||||
|
* emergency stack and go to program check for all other interrupts.
|
||||||
|
*/
|
||||||
|
ld r10,HSTATE_SCRATCH0(r13)
|
||||||
|
cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK
|
||||||
|
beq machine_check_common
|
||||||
|
|
||||||
|
cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET
|
||||||
|
beq system_reset_common
|
||||||
|
|
||||||
|
b .
|
||||||
|
#endif
|
@ -21,6 +21,7 @@
|
|||||||
#include <asm/pte-walk.h>
|
#include <asm/pte-walk.h>
|
||||||
#include <asm/ultravisor.h>
|
#include <asm/ultravisor.h>
|
||||||
#include <asm/kvm_book3s_uvmem.h>
|
#include <asm/kvm_book3s_uvmem.h>
|
||||||
|
#include <asm/plpar_wrappers.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Supported radix tree geometry.
|
* Supported radix tree geometry.
|
||||||
@ -318,9 +319,19 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
psi = shift_to_mmu_psize(pshift);
|
psi = shift_to_mmu_psize(pshift);
|
||||||
rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
|
|
||||||
rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
|
if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
|
||||||
lpid, rb);
|
rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
|
||||||
|
rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
|
||||||
|
lpid, rb);
|
||||||
|
} else {
|
||||||
|
rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
|
||||||
|
H_RPTI_TYPE_NESTED |
|
||||||
|
H_RPTI_TYPE_TLB,
|
||||||
|
psize_to_rpti_pgsize(psi),
|
||||||
|
addr, addr + psize);
|
||||||
|
}
|
||||||
|
|
||||||
if (rc)
|
if (rc)
|
||||||
pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
|
pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
|
||||||
}
|
}
|
||||||
@ -334,8 +345,14 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
|
if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
|
||||||
lpid, TLBIEL_INVAL_SET_LPID);
|
rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
|
||||||
|
lpid, TLBIEL_INVAL_SET_LPID);
|
||||||
|
else
|
||||||
|
rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
|
||||||
|
H_RPTI_TYPE_NESTED |
|
||||||
|
H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
|
||||||
|
0, -1UL);
|
||||||
if (rc)
|
if (rc)
|
||||||
pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
|
pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
|
||||||
}
|
}
|
||||||
|
@ -391,10 +391,6 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
|
|||||||
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
|
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
|
||||||
/* liobn, ioba, tce); */
|
/* liobn, ioba, tce); */
|
||||||
|
|
||||||
/* For radix, we might be in virtual mode, so punt */
|
|
||||||
if (kvm_is_radix(vcpu->kvm))
|
|
||||||
return H_TOO_HARD;
|
|
||||||
|
|
||||||
stt = kvmppc_find_table(vcpu->kvm, liobn);
|
stt = kvmppc_find_table(vcpu->kvm, liobn);
|
||||||
if (!stt)
|
if (!stt)
|
||||||
return H_TOO_HARD;
|
return H_TOO_HARD;
|
||||||
@ -489,10 +485,6 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
|
|||||||
bool prereg = false;
|
bool prereg = false;
|
||||||
struct kvmppc_spapr_tce_iommu_table *stit;
|
struct kvmppc_spapr_tce_iommu_table *stit;
|
||||||
|
|
||||||
/* For radix, we might be in virtual mode, so punt */
|
|
||||||
if (kvm_is_radix(vcpu->kvm))
|
|
||||||
return H_TOO_HARD;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* used to check for invalidations in progress
|
* used to check for invalidations in progress
|
||||||
*/
|
*/
|
||||||
@ -602,10 +594,6 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
|
|||||||
long i, ret;
|
long i, ret;
|
||||||
struct kvmppc_spapr_tce_iommu_table *stit;
|
struct kvmppc_spapr_tce_iommu_table *stit;
|
||||||
|
|
||||||
/* For radix, we might be in virtual mode, so punt */
|
|
||||||
if (kvm_is_radix(vcpu->kvm))
|
|
||||||
return H_TOO_HARD;
|
|
||||||
|
|
||||||
stt = kvmppc_find_table(vcpu->kvm, liobn);
|
stt = kvmppc_find_table(vcpu->kvm, liobn);
|
||||||
if (!stt)
|
if (!stt)
|
||||||
return H_TOO_HARD;
|
return H_TOO_HARD;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -34,21 +34,6 @@
|
|||||||
#include "book3s_xics.h"
|
#include "book3s_xics.h"
|
||||||
#include "book3s_xive.h"
|
#include "book3s_xive.h"
|
||||||
|
|
||||||
/*
|
|
||||||
* The XIVE module will populate these when it loads
|
|
||||||
*/
|
|
||||||
unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
|
|
||||||
unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
|
|
||||||
int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
|
|
||||||
unsigned long mfrr);
|
|
||||||
int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
|
|
||||||
int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
|
|
||||||
EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
|
|
||||||
EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
|
|
||||||
EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
|
|
||||||
EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
|
|
||||||
EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
|
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
|
||||||
* should be power of 2.
|
* should be power of 2.
|
||||||
@ -196,16 +181,9 @@ int kvmppc_hwrng_present(void)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
|
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
|
||||||
|
|
||||||
long kvmppc_h_random(struct kvm_vcpu *vcpu)
|
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
int r;
|
if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
|
||||||
|
|
||||||
/* Only need to do the expensive mfmsr() on radix */
|
|
||||||
if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
|
|
||||||
r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]);
|
|
||||||
else
|
|
||||||
r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]);
|
|
||||||
if (r)
|
|
||||||
return H_SUCCESS;
|
return H_SUCCESS;
|
||||||
|
|
||||||
return H_HARDWARE;
|
return H_HARDWARE;
|
||||||
@ -221,15 +199,6 @@ void kvmhv_rm_send_ipi(int cpu)
|
|||||||
void __iomem *xics_phys;
|
void __iomem *xics_phys;
|
||||||
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
|
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
|
||||||
|
|
||||||
/* For a nested hypervisor, use the XICS via hcall */
|
|
||||||
if (kvmhv_on_pseries()) {
|
|
||||||
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
|
|
||||||
|
|
||||||
plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
|
|
||||||
IPI_PRIORITY);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* On POWER9 we can use msgsnd for any destination cpu. */
|
/* On POWER9 we can use msgsnd for any destination cpu. */
|
||||||
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
|
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
|
||||||
msg |= get_hard_smp_processor_id(cpu);
|
msg |= get_hard_smp_processor_id(cpu);
|
||||||
@ -442,19 +411,12 @@ static long kvmppc_read_one_intr(bool *again)
|
|||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/* Now read the interrupt from the ICP */
|
/* Now read the interrupt from the ICP */
|
||||||
if (kvmhv_on_pseries()) {
|
xics_phys = local_paca->kvm_hstate.xics_phys;
|
||||||
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
|
rc = 0;
|
||||||
|
if (!xics_phys)
|
||||||
rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
|
rc = opal_int_get_xirr(&xirr, false);
|
||||||
xirr = cpu_to_be32(retbuf[0]);
|
else
|
||||||
} else {
|
xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
|
||||||
xics_phys = local_paca->kvm_hstate.xics_phys;
|
|
||||||
rc = 0;
|
|
||||||
if (!xics_phys)
|
|
||||||
rc = opal_int_get_xirr(&xirr, false);
|
|
||||||
else
|
|
||||||
xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
|
|
||||||
}
|
|
||||||
if (rc < 0)
|
if (rc < 0)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@ -483,13 +445,7 @@ static long kvmppc_read_one_intr(bool *again)
|
|||||||
*/
|
*/
|
||||||
if (xisr == XICS_IPI) {
|
if (xisr == XICS_IPI) {
|
||||||
rc = 0;
|
rc = 0;
|
||||||
if (kvmhv_on_pseries()) {
|
if (xics_phys) {
|
||||||
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
|
|
||||||
|
|
||||||
plpar_hcall_raw(H_IPI, retbuf,
|
|
||||||
hard_smp_processor_id(), 0xff);
|
|
||||||
plpar_hcall_raw(H_EOI, retbuf, h_xirr);
|
|
||||||
} else if (xics_phys) {
|
|
||||||
__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
|
__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
|
||||||
__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
|
__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
|
||||||
} else {
|
} else {
|
||||||
@ -515,13 +471,7 @@ static long kvmppc_read_one_intr(bool *again)
|
|||||||
/* We raced with the host,
|
/* We raced with the host,
|
||||||
* we need to resend that IPI, bummer
|
* we need to resend that IPI, bummer
|
||||||
*/
|
*/
|
||||||
if (kvmhv_on_pseries()) {
|
if (xics_phys)
|
||||||
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
|
|
||||||
|
|
||||||
plpar_hcall_raw(H_IPI, retbuf,
|
|
||||||
hard_smp_processor_id(),
|
|
||||||
IPI_PRIORITY);
|
|
||||||
} else if (xics_phys)
|
|
||||||
__raw_rm_writeb(IPI_PRIORITY,
|
__raw_rm_writeb(IPI_PRIORITY,
|
||||||
xics_phys + XICS_MFRR);
|
xics_phys + XICS_MFRR);
|
||||||
else
|
else
|
||||||
@ -541,22 +491,13 @@ static long kvmppc_read_one_intr(bool *again)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_XICS
|
#ifdef CONFIG_KVM_XICS
|
||||||
static inline bool is_rm(void)
|
|
||||||
{
|
|
||||||
return !(mfmsr() & MSR_DR);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
|
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
if (!kvmppc_xics_enabled(vcpu))
|
if (!kvmppc_xics_enabled(vcpu))
|
||||||
return H_TOO_HARD;
|
return H_TOO_HARD;
|
||||||
if (xics_on_xive()) {
|
if (xics_on_xive())
|
||||||
if (is_rm())
|
return xive_rm_h_xirr(vcpu);
|
||||||
return xive_rm_h_xirr(vcpu);
|
else
|
||||||
if (unlikely(!__xive_vm_h_xirr))
|
|
||||||
return H_NOT_AVAILABLE;
|
|
||||||
return __xive_vm_h_xirr(vcpu);
|
|
||||||
} else
|
|
||||||
return xics_rm_h_xirr(vcpu);
|
return xics_rm_h_xirr(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -565,13 +506,9 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
|
|||||||
if (!kvmppc_xics_enabled(vcpu))
|
if (!kvmppc_xics_enabled(vcpu))
|
||||||
return H_TOO_HARD;
|
return H_TOO_HARD;
|
||||||
vcpu->arch.regs.gpr[5] = get_tb();
|
vcpu->arch.regs.gpr[5] = get_tb();
|
||||||
if (xics_on_xive()) {
|
if (xics_on_xive())
|
||||||
if (is_rm())
|
return xive_rm_h_xirr(vcpu);
|
||||||
return xive_rm_h_xirr(vcpu);
|
else
|
||||||
if (unlikely(!__xive_vm_h_xirr))
|
|
||||||
return H_NOT_AVAILABLE;
|
|
||||||
return __xive_vm_h_xirr(vcpu);
|
|
||||||
} else
|
|
||||||
return xics_rm_h_xirr(vcpu);
|
return xics_rm_h_xirr(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -579,13 +516,9 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
|
|||||||
{
|
{
|
||||||
if (!kvmppc_xics_enabled(vcpu))
|
if (!kvmppc_xics_enabled(vcpu))
|
||||||
return H_TOO_HARD;
|
return H_TOO_HARD;
|
||||||
if (xics_on_xive()) {
|
if (xics_on_xive())
|
||||||
if (is_rm())
|
return xive_rm_h_ipoll(vcpu, server);
|
||||||
return xive_rm_h_ipoll(vcpu, server);
|
else
|
||||||
if (unlikely(!__xive_vm_h_ipoll))
|
|
||||||
return H_NOT_AVAILABLE;
|
|
||||||
return __xive_vm_h_ipoll(vcpu, server);
|
|
||||||
} else
|
|
||||||
return H_TOO_HARD;
|
return H_TOO_HARD;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -594,13 +527,9 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
|
|||||||
{
|
{
|
||||||
if (!kvmppc_xics_enabled(vcpu))
|
if (!kvmppc_xics_enabled(vcpu))
|
||||||
return H_TOO_HARD;
|
return H_TOO_HARD;
|
||||||
if (xics_on_xive()) {
|
if (xics_on_xive())
|
||||||
if (is_rm())
|
return xive_rm_h_ipi(vcpu, server, mfrr);
|
||||||
return xive_rm_h_ipi(vcpu, server, mfrr);
|
else
|
||||||
if (unlikely(!__xive_vm_h_ipi))
|
|
||||||
return H_NOT_AVAILABLE;
|
|
||||||
return __xive_vm_h_ipi(vcpu, server, mfrr);
|
|
||||||
} else
|
|
||||||
return xics_rm_h_ipi(vcpu, server, mfrr);
|
return xics_rm_h_ipi(vcpu, server, mfrr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -608,13 +537,9 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
|
|||||||
{
|
{
|
||||||
if (!kvmppc_xics_enabled(vcpu))
|
if (!kvmppc_xics_enabled(vcpu))
|
||||||
return H_TOO_HARD;
|
return H_TOO_HARD;
|
||||||
if (xics_on_xive()) {
|
if (xics_on_xive())
|
||||||
if (is_rm())
|
return xive_rm_h_cppr(vcpu, cppr);
|
||||||
return xive_rm_h_cppr(vcpu, cppr);
|
else
|
||||||
if (unlikely(!__xive_vm_h_cppr))
|
|
||||||
return H_NOT_AVAILABLE;
|
|
||||||
return __xive_vm_h_cppr(vcpu, cppr);
|
|
||||||
} else
|
|
||||||
return xics_rm_h_cppr(vcpu, cppr);
|
return xics_rm_h_cppr(vcpu, cppr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -622,13 +547,9 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
|
|||||||
{
|
{
|
||||||
if (!kvmppc_xics_enabled(vcpu))
|
if (!kvmppc_xics_enabled(vcpu))
|
||||||
return H_TOO_HARD;
|
return H_TOO_HARD;
|
||||||
if (xics_on_xive()) {
|
if (xics_on_xive())
|
||||||
if (is_rm())
|
return xive_rm_h_eoi(vcpu, xirr);
|
||||||
return xive_rm_h_eoi(vcpu, xirr);
|
else
|
||||||
if (unlikely(!__xive_vm_h_eoi))
|
|
||||||
return H_NOT_AVAILABLE;
|
|
||||||
return __xive_vm_h_eoi(vcpu, xirr);
|
|
||||||
} else
|
|
||||||
return xics_rm_h_eoi(vcpu, xirr);
|
return xics_rm_h_eoi(vcpu, xirr);
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_KVM_XICS */
|
#endif /* CONFIG_KVM_XICS */
|
||||||
@ -800,7 +721,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
|
|||||||
* Thus we make all 4 threads use the same bit.
|
* Thus we make all 4 threads use the same bit.
|
||||||
*/
|
*/
|
||||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||||
pcpu = cpu_first_thread_sibling(pcpu);
|
pcpu = cpu_first_tlb_thread_sibling(pcpu);
|
||||||
|
|
||||||
if (nested)
|
if (nested)
|
||||||
need_tlb_flush = &nested->need_tlb_flush;
|
need_tlb_flush = &nested->need_tlb_flush;
|
||||||
|
@ -58,7 +58,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
|||||||
/*
|
/*
|
||||||
* Put whatever is in the decrementer into the
|
* Put whatever is in the decrementer into the
|
||||||
* hypervisor decrementer.
|
* hypervisor decrementer.
|
||||||
* Because of a hardware deviation in P8 and P9,
|
* Because of a hardware deviation in P8,
|
||||||
* we need to set LPCR[HDICE] before writing HDEC.
|
* we need to set LPCR[HDICE] before writing HDEC.
|
||||||
*/
|
*/
|
||||||
ld r5, HSTATE_KVM_VCORE(r13)
|
ld r5, HSTATE_KVM_VCORE(r13)
|
||||||
@ -67,15 +67,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
|||||||
ori r8, r9, LPCR_HDICE
|
ori r8, r9, LPCR_HDICE
|
||||||
mtspr SPRN_LPCR, r8
|
mtspr SPRN_LPCR, r8
|
||||||
isync
|
isync
|
||||||
andis. r0, r9, LPCR_LD@h
|
|
||||||
mfspr r8,SPRN_DEC
|
mfspr r8,SPRN_DEC
|
||||||
mftb r7
|
mftb r7
|
||||||
BEGIN_FTR_SECTION
|
|
||||||
/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
|
|
||||||
bne 32f
|
|
||||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
|
||||||
extsw r8,r8
|
extsw r8,r8
|
||||||
32: mtspr SPRN_HDEC,r8
|
mtspr SPRN_HDEC,r8
|
||||||
add r8,r8,r7
|
add r8,r8,r7
|
||||||
std r8,HSTATE_DECEXP(r13)
|
std r8,HSTATE_DECEXP(r13)
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#include <asm/pgalloc.h>
|
#include <asm/pgalloc.h>
|
||||||
#include <asm/pte-walk.h>
|
#include <asm/pte-walk.h>
|
||||||
#include <asm/reg.h>
|
#include <asm/reg.h>
|
||||||
|
#include <asm/plpar_wrappers.h>
|
||||||
|
|
||||||
static struct patb_entry *pseries_partition_tb;
|
static struct patb_entry *pseries_partition_tb;
|
||||||
|
|
||||||
@ -53,7 +54,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
|
|||||||
hr->dawrx1 = vcpu->arch.dawrx1;
|
hr->dawrx1 = vcpu->arch.dawrx1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void byteswap_pt_regs(struct pt_regs *regs)
|
/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
|
||||||
|
static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
unsigned long *addr = (unsigned long *) regs;
|
unsigned long *addr = (unsigned long *) regs;
|
||||||
|
|
||||||
@ -467,8 +469,15 @@ static void kvmhv_flush_lpid(unsigned int lpid)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
|
if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
|
||||||
lpid, TLBIEL_INVAL_SET_LPID);
|
rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
|
||||||
|
lpid, TLBIEL_INVAL_SET_LPID);
|
||||||
|
else
|
||||||
|
rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
|
||||||
|
H_RPTI_TYPE_NESTED |
|
||||||
|
H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
|
||||||
|
H_RPTI_TYPE_PAT,
|
||||||
|
H_RPTI_PAGE_ALL, 0, -1UL);
|
||||||
if (rc)
|
if (rc)
|
||||||
pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
|
pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
|
||||||
}
|
}
|
||||||
@ -1214,6 +1223,113 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
|
|||||||
return H_SUCCESS;
|
return H_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
|
||||||
|
unsigned long lpid, unsigned long ric)
|
||||||
|
{
|
||||||
|
struct kvm *kvm = vcpu->kvm;
|
||||||
|
struct kvm_nested_guest *gp;
|
||||||
|
|
||||||
|
gp = kvmhv_get_nested(kvm, lpid, false);
|
||||||
|
if (gp) {
|
||||||
|
kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
|
||||||
|
kvmhv_put_nested(gp);
|
||||||
|
}
|
||||||
|
return H_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of pages above which we invalidate the entire LPID rather than
|
||||||
|
* flush individual pages.
|
||||||
|
*/
|
||||||
|
static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33;
|
||||||
|
|
||||||
|
static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
|
||||||
|
unsigned long lpid,
|
||||||
|
unsigned long pg_sizes,
|
||||||
|
unsigned long start,
|
||||||
|
unsigned long end)
|
||||||
|
{
|
||||||
|
int ret = H_P4;
|
||||||
|
unsigned long addr, nr_pages;
|
||||||
|
struct mmu_psize_def *def;
|
||||||
|
unsigned long psize, ap, page_size;
|
||||||
|
bool flush_lpid;
|
||||||
|
|
||||||
|
for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
|
||||||
|
def = &mmu_psize_defs[psize];
|
||||||
|
if (!(pg_sizes & def->h_rpt_pgsize))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nr_pages = (end - start) >> def->shift;
|
||||||
|
flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
|
||||||
|
if (flush_lpid)
|
||||||
|
return do_tlb_invalidate_nested_all(vcpu, lpid,
|
||||||
|
RIC_FLUSH_TLB);
|
||||||
|
addr = start;
|
||||||
|
ap = mmu_get_ap(psize);
|
||||||
|
page_size = 1UL << def->shift;
|
||||||
|
do {
|
||||||
|
ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
|
||||||
|
get_epn(addr));
|
||||||
|
if (ret)
|
||||||
|
return H_P4;
|
||||||
|
addr += page_size;
|
||||||
|
} while (addr < end);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Performs partition-scoped invalidations for nested guests
|
||||||
|
* as part of H_RPT_INVALIDATE hcall.
|
||||||
|
*/
|
||||||
|
long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
|
||||||
|
unsigned long type, unsigned long pg_sizes,
|
||||||
|
unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If L2 lpid isn't valid, we need to return H_PARAMETER.
|
||||||
|
*
|
||||||
|
* However, nested KVM issues a L2 lpid flush call when creating
|
||||||
|
* partition table entries for L2. This happens even before the
|
||||||
|
* corresponding shadow lpid is created in HV which happens in
|
||||||
|
* H_ENTER_NESTED call. Since we can't differentiate this case from
|
||||||
|
* the invalid case, we ignore such flush requests and return success.
|
||||||
|
*/
|
||||||
|
if (!kvmhv_find_nested(vcpu->kvm, lpid))
|
||||||
|
return H_SUCCESS;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A flush all request can be handled by a full lpid flush only.
|
||||||
|
*/
|
||||||
|
if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
|
||||||
|
return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't need to handle a PWC flush like process table here,
|
||||||
|
* because intermediate partition scoped table in nested guest doesn't
|
||||||
|
* really have PWC. Only level we have PWC is in L0 and for nested
|
||||||
|
* invalidate at L0 we always do kvm_flush_lpid() which does
|
||||||
|
* radix__flush_all_lpid(). For range invalidate at any level, we
|
||||||
|
* are not removing the higher level page tables and hence there is
|
||||||
|
* no PWC invalidate needed.
|
||||||
|
*
|
||||||
|
* if (type & H_RPTI_TYPE_PWC) {
|
||||||
|
* ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC);
|
||||||
|
* if (ret)
|
||||||
|
* return H_P4;
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (start == 0 && end == -1)
|
||||||
|
return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB);
|
||||||
|
|
||||||
|
if (type & H_RPTI_TYPE_TLB)
|
||||||
|
return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes,
|
||||||
|
start, end);
|
||||||
|
return H_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/* Used to convert a nested guest real address to a L1 guest real address */
|
/* Used to convert a nested guest real address to a L1 guest real address */
|
||||||
static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
|
static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
|
||||||
struct kvm_nested_guest *gp,
|
struct kvm_nested_guest *gp,
|
||||||
|
508
arch/powerpc/kvm/book3s_hv_p9_entry.c
Normal file
508
arch/powerpc/kvm/book3s_hv_p9_entry.c
Normal file
@ -0,0 +1,508 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/kvm_host.h>
|
||||||
|
#include <asm/asm-prototypes.h>
|
||||||
|
#include <asm/dbell.h>
|
||||||
|
#include <asm/kvm_ppc.h>
|
||||||
|
#include <asm/ppc-opcode.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
|
||||||
|
static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
|
||||||
|
{
|
||||||
|
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
||||||
|
u64 tb = mftb() - vc->tb_offset_applied;
|
||||||
|
|
||||||
|
vcpu->arch.cur_activity = next;
|
||||||
|
vcpu->arch.cur_tb_start = tb;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
|
||||||
|
{
|
||||||
|
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
||||||
|
struct kvmhv_tb_accumulator *curr;
|
||||||
|
u64 tb = mftb() - vc->tb_offset_applied;
|
||||||
|
u64 prev_tb;
|
||||||
|
u64 delta;
|
||||||
|
u64 seq;
|
||||||
|
|
||||||
|
curr = vcpu->arch.cur_activity;
|
||||||
|
vcpu->arch.cur_activity = next;
|
||||||
|
prev_tb = vcpu->arch.cur_tb_start;
|
||||||
|
vcpu->arch.cur_tb_start = tb;
|
||||||
|
|
||||||
|
if (!curr)
|
||||||
|
return;
|
||||||
|
|
||||||
|
delta = tb - prev_tb;
|
||||||
|
|
||||||
|
seq = curr->seqcount;
|
||||||
|
curr->seqcount = seq + 1;
|
||||||
|
smp_wmb();
|
||||||
|
curr->tb_total += delta;
|
||||||
|
if (seq == 0 || delta < curr->tb_min)
|
||||||
|
curr->tb_min = delta;
|
||||||
|
if (delta > curr->tb_max)
|
||||||
|
curr->tb_max = delta;
|
||||||
|
smp_wmb();
|
||||||
|
curr->seqcount = seq + 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define start_timing(vcpu, next) __start_timing(vcpu, next)
|
||||||
|
#define end_timing(vcpu) __start_timing(vcpu, NULL)
|
||||||
|
#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next)
|
||||||
|
#else
|
||||||
|
#define start_timing(vcpu, next) do {} while (0)
|
||||||
|
#define end_timing(vcpu) do {} while (0)
|
||||||
|
#define accumulate_time(vcpu, next) do {} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
|
||||||
|
{
|
||||||
|
asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
|
||||||
|
asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mtslb(u64 slbee, u64 slbev)
|
||||||
|
{
|
||||||
|
asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void clear_slb_entry(unsigned int idx)
|
||||||
|
{
|
||||||
|
mtslb(idx, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void slb_clear_invalidate_partition(void)
|
||||||
|
{
|
||||||
|
clear_slb_entry(0);
|
||||||
|
asm volatile(PPC_SLBIA(6));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Malicious or buggy radix guests may have inserted SLB entries
|
||||||
|
* (only 0..3 because radix always runs with UPRT=1), so these must
|
||||||
|
* be cleared here to avoid side-channels. slbmte is used rather
|
||||||
|
* than slbia, as it won't clear cached translations.
|
||||||
|
*/
|
||||||
|
static void radix_clear_slb(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
clear_slb_entry(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
|
||||||
|
{
|
||||||
|
struct kvm_nested_guest *nested = vcpu->arch.nested;
|
||||||
|
u32 lpid;
|
||||||
|
|
||||||
|
lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All the isync()s are overkill but trivially follow the ISA
|
||||||
|
* requirements. Some can likely be replaced with justification
|
||||||
|
* comment for why they are not needed.
|
||||||
|
*/
|
||||||
|
isync();
|
||||||
|
mtspr(SPRN_LPID, lpid);
|
||||||
|
isync();
|
||||||
|
mtspr(SPRN_LPCR, lpcr);
|
||||||
|
isync();
|
||||||
|
mtspr(SPRN_PID, vcpu->arch.pid);
|
||||||
|
isync();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
|
||||||
|
{
|
||||||
|
u32 lpid;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
lpid = kvm->arch.lpid;
|
||||||
|
|
||||||
|
mtspr(SPRN_LPID, lpid);
|
||||||
|
mtspr(SPRN_LPCR, lpcr);
|
||||||
|
mtspr(SPRN_PID, vcpu->arch.pid);
|
||||||
|
|
||||||
|
for (i = 0; i < vcpu->arch.slb_max; i++)
|
||||||
|
mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
|
||||||
|
|
||||||
|
isync();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
|
||||||
|
{
|
||||||
|
isync();
|
||||||
|
mtspr(SPRN_PID, pid);
|
||||||
|
isync();
|
||||||
|
mtspr(SPRN_LPID, kvm->arch.host_lpid);
|
||||||
|
isync();
|
||||||
|
mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
|
||||||
|
isync();
|
||||||
|
|
||||||
|
if (!radix_enabled())
|
||||||
|
slb_restore_bolted_realmode();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void save_clear_host_mmu(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
if (!radix_enabled()) {
|
||||||
|
/*
|
||||||
|
* Hash host could save and restore host SLB entries to
|
||||||
|
* reduce SLB fault overheads of VM exits, but for now the
|
||||||
|
* existing code clears all entries and restores just the
|
||||||
|
* bolted ones when switching back to host.
|
||||||
|
*/
|
||||||
|
slb_clear_invalidate_partition();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
if (kvm_is_radix(kvm)) {
|
||||||
|
radix_clear_slb();
|
||||||
|
} else {
|
||||||
|
int i;
|
||||||
|
int nr = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This must run before switching to host (radix host can't
|
||||||
|
* access all SLBs).
|
||||||
|
*/
|
||||||
|
for (i = 0; i < vcpu->arch.slb_nr; i++) {
|
||||||
|
u64 slbee, slbev;
|
||||||
|
mfslb(i, &slbee, &slbev);
|
||||||
|
if (slbee & SLB_ESID_V) {
|
||||||
|
vcpu->arch.slb[nr].orige = slbee | i;
|
||||||
|
vcpu->arch.slb[nr].origv = slbev;
|
||||||
|
nr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vcpu->arch.slb_max = nr;
|
||||||
|
slb_clear_invalidate_partition();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
|
||||||
|
{
|
||||||
|
struct kvm *kvm = vcpu->kvm;
|
||||||
|
struct kvm_nested_guest *nested = vcpu->arch.nested;
|
||||||
|
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
||||||
|
s64 hdec;
|
||||||
|
u64 tb, purr, spurr;
|
||||||
|
u64 *exsave;
|
||||||
|
bool ri_set;
|
||||||
|
int trap;
|
||||||
|
unsigned long msr;
|
||||||
|
unsigned long host_hfscr;
|
||||||
|
unsigned long host_ciabr;
|
||||||
|
unsigned long host_dawr0;
|
||||||
|
unsigned long host_dawrx0;
|
||||||
|
unsigned long host_psscr;
|
||||||
|
unsigned long host_pidr;
|
||||||
|
unsigned long host_dawr1;
|
||||||
|
unsigned long host_dawrx1;
|
||||||
|
|
||||||
|
hdec = time_limit - mftb();
|
||||||
|
if (hdec < 0)
|
||||||
|
return BOOK3S_INTERRUPT_HV_DECREMENTER;
|
||||||
|
|
||||||
|
WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
|
||||||
|
WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
|
||||||
|
|
||||||
|
start_timing(vcpu, &vcpu->arch.rm_entry);
|
||||||
|
|
||||||
|
vcpu->arch.ceded = 0;
|
||||||
|
|
||||||
|
if (vc->tb_offset) {
|
||||||
|
u64 new_tb = mftb() + vc->tb_offset;
|
||||||
|
mtspr(SPRN_TBU40, new_tb);
|
||||||
|
tb = mftb();
|
||||||
|
if ((tb & 0xffffff) < (new_tb & 0xffffff))
|
||||||
|
mtspr(SPRN_TBU40, new_tb + 0x1000000);
|
||||||
|
vc->tb_offset_applied = vc->tb_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
msr = mfmsr();
|
||||||
|
|
||||||
|
host_hfscr = mfspr(SPRN_HFSCR);
|
||||||
|
host_ciabr = mfspr(SPRN_CIABR);
|
||||||
|
host_dawr0 = mfspr(SPRN_DAWR0);
|
||||||
|
host_dawrx0 = mfspr(SPRN_DAWRX0);
|
||||||
|
host_psscr = mfspr(SPRN_PSSCR);
|
||||||
|
host_pidr = mfspr(SPRN_PID);
|
||||||
|
if (cpu_has_feature(CPU_FTR_DAWR1)) {
|
||||||
|
host_dawr1 = mfspr(SPRN_DAWR1);
|
||||||
|
host_dawrx1 = mfspr(SPRN_DAWRX1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vc->pcr)
|
||||||
|
mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
|
||||||
|
mtspr(SPRN_DPDES, vc->dpdes);
|
||||||
|
mtspr(SPRN_VTB, vc->vtb);
|
||||||
|
|
||||||
|
local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
|
||||||
|
local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
|
||||||
|
mtspr(SPRN_PURR, vcpu->arch.purr);
|
||||||
|
mtspr(SPRN_SPURR, vcpu->arch.spurr);
|
||||||
|
|
||||||
|
if (dawr_enabled()) {
|
||||||
|
mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
|
||||||
|
mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
|
||||||
|
if (cpu_has_feature(CPU_FTR_DAWR1)) {
|
||||||
|
mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
|
||||||
|
mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
|
||||||
|
mtspr(SPRN_IC, vcpu->arch.ic);
|
||||||
|
|
||||||
|
mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
|
||||||
|
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
|
||||||
|
|
||||||
|
mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
|
||||||
|
|
||||||
|
mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
|
||||||
|
mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
|
||||||
|
* Interrupt (HDSI) the HDSISR is not be updated at all.
|
||||||
|
*
|
||||||
|
* To work around this we put a canary value into the HDSISR before
|
||||||
|
* returning to a guest and then check for this canary when we take a
|
||||||
|
* HDSI. If we find the canary on a HDSI, we know the hardware didn't
|
||||||
|
* update the HDSISR. In this case we return to the guest to retake the
|
||||||
|
* HDSI which should correctly update the HDSISR the second time HDSI
|
||||||
|
* entry.
|
||||||
|
*
|
||||||
|
* Just do this on all p9 processors for now.
|
||||||
|
*/
|
||||||
|
mtspr(SPRN_HDSISR, HDSISR_CANARY);
|
||||||
|
|
||||||
|
mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
|
||||||
|
mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
|
||||||
|
mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
|
||||||
|
mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
|
||||||
|
|
||||||
|
mtspr(SPRN_AMOR, ~0UL);
|
||||||
|
|
||||||
|
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Hash host, hash guest, or radix guest with prefetch bug, all have
|
||||||
|
* to disable the MMU before switching to guest MMU state.
|
||||||
|
*/
|
||||||
|
if (!radix_enabled() || !kvm_is_radix(kvm) ||
|
||||||
|
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
|
||||||
|
__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
|
||||||
|
|
||||||
|
save_clear_host_mmu(kvm);
|
||||||
|
|
||||||
|
if (kvm_is_radix(kvm)) {
|
||||||
|
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
|
||||||
|
if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
|
||||||
|
__mtmsrd(0, 1); /* clear RI */
|
||||||
|
|
||||||
|
} else {
|
||||||
|
switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
|
||||||
|
kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
|
||||||
|
* so set guest LPCR (with HDICE) before writing HDEC.
|
||||||
|
*/
|
||||||
|
mtspr(SPRN_HDEC, hdec);
|
||||||
|
|
||||||
|
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
|
||||||
|
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
|
||||||
|
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
|
||||||
|
mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
|
||||||
|
|
||||||
|
accumulate_time(vcpu, &vcpu->arch.guest_time);
|
||||||
|
|
||||||
|
kvmppc_p9_enter_guest(vcpu);
|
||||||
|
|
||||||
|
accumulate_time(vcpu, &vcpu->arch.rm_intr);
|
||||||
|
|
||||||
|
/* XXX: Could get these from r11/12 and paca exsave instead */
|
||||||
|
vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
|
||||||
|
vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
|
||||||
|
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
|
||||||
|
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
|
||||||
|
|
||||||
|
/* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
|
||||||
|
trap = local_paca->kvm_hstate.scratch0 & ~0x2;
|
||||||
|
|
||||||
|
/* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
|
||||||
|
ri_set = false;
|
||||||
|
if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
|
||||||
|
if (trap != BOOK3S_INTERRUPT_SYSCALL &&
|
||||||
|
(vcpu->arch.shregs.msr & MSR_RI))
|
||||||
|
ri_set = true;
|
||||||
|
exsave = local_paca->exgen;
|
||||||
|
} else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
|
||||||
|
exsave = local_paca->exnmi;
|
||||||
|
} else { /* trap == 0x200 */
|
||||||
|
exsave = local_paca->exmc;
|
||||||
|
}
|
||||||
|
|
||||||
|
vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
|
||||||
|
vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only set RI after reading machine check regs (DAR, DSISR, SRR0/1)
|
||||||
|
* and hstate scratch (which we need to move into exsave to make
|
||||||
|
* re-entrant vs SRESET/MCE)
|
||||||
|
*/
|
||||||
|
if (ri_set) {
|
||||||
|
if (unlikely(!(mfmsr() & MSR_RI))) {
|
||||||
|
__mtmsrd(MSR_RI, 1);
|
||||||
|
WARN_ON_ONCE(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WARN_ON_ONCE(mfmsr() & MSR_RI);
|
||||||
|
__mtmsrd(MSR_RI, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
|
||||||
|
vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
|
||||||
|
vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
|
||||||
|
vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
|
||||||
|
vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
|
||||||
|
vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
|
||||||
|
vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
|
||||||
|
vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
|
||||||
|
|
||||||
|
vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
|
||||||
|
|
||||||
|
if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
|
||||||
|
vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
|
||||||
|
vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
|
||||||
|
kvmppc_realmode_machine_check(vcpu);
|
||||||
|
|
||||||
|
} else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
|
||||||
|
kvmppc_realmode_hmi_handler();
|
||||||
|
|
||||||
|
} else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
|
||||||
|
vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
|
||||||
|
|
||||||
|
} else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
|
||||||
|
vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
|
||||||
|
vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
|
||||||
|
vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
|
||||||
|
|
||||||
|
} else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
|
||||||
|
vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
|
||||||
|
|
||||||
|
} else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
|
||||||
|
vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
|
||||||
|
|
||||||
|
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||||
|
/*
|
||||||
|
* Softpatch interrupt for transactional memory emulation cases
|
||||||
|
* on POWER9 DD2.2. This is early in the guest exit path - we
|
||||||
|
* haven't saved registers or done a treclaim yet.
|
||||||
|
*/
|
||||||
|
} else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
|
||||||
|
vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The cases we want to handle here are those where the guest
|
||||||
|
* is in real suspend mode and is trying to transition to
|
||||||
|
* transactional mode.
|
||||||
|
*/
|
||||||
|
if (local_paca->kvm_hstate.fake_suspend &&
|
||||||
|
(vcpu->arch.shregs.msr & MSR_TS_S)) {
|
||||||
|
if (kvmhv_p9_tm_emulation_early(vcpu)) {
|
||||||
|
/* Prevent it being handled again. */
|
||||||
|
trap = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
accumulate_time(vcpu, &vcpu->arch.rm_exit);
|
||||||
|
|
||||||
|
/* Advance host PURR/SPURR by the amount used by guest */
|
||||||
|
purr = mfspr(SPRN_PURR);
|
||||||
|
spurr = mfspr(SPRN_SPURR);
|
||||||
|
mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
|
||||||
|
purr - vcpu->arch.purr);
|
||||||
|
mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
|
||||||
|
spurr - vcpu->arch.spurr);
|
||||||
|
vcpu->arch.purr = purr;
|
||||||
|
vcpu->arch.spurr = spurr;
|
||||||
|
|
||||||
|
vcpu->arch.ic = mfspr(SPRN_IC);
|
||||||
|
vcpu->arch.pid = mfspr(SPRN_PID);
|
||||||
|
vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
|
||||||
|
|
||||||
|
vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
|
||||||
|
vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
|
||||||
|
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
|
||||||
|
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
|
||||||
|
|
||||||
|
/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
|
||||||
|
mtspr(SPRN_PSSCR, host_psscr |
|
||||||
|
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
|
||||||
|
mtspr(SPRN_HFSCR, host_hfscr);
|
||||||
|
mtspr(SPRN_CIABR, host_ciabr);
|
||||||
|
mtspr(SPRN_DAWR0, host_dawr0);
|
||||||
|
mtspr(SPRN_DAWRX0, host_dawrx0);
|
||||||
|
if (cpu_has_feature(CPU_FTR_DAWR1)) {
|
||||||
|
mtspr(SPRN_DAWR1, host_dawr1);
|
||||||
|
mtspr(SPRN_DAWRX1, host_dawrx1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kvm_is_radix(kvm)) {
|
||||||
|
/*
|
||||||
|
* Since this is radix, do a eieio; tlbsync; ptesync sequence
|
||||||
|
* in case we interrupted the guest between a tlbie and a
|
||||||
|
* ptesync.
|
||||||
|
*/
|
||||||
|
asm volatile("eieio; tlbsync; ptesync");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cp_abort is required if the processor supports local copy-paste
|
||||||
|
* to clear the copy buffer that was under control of the guest.
|
||||||
|
*/
|
||||||
|
if (cpu_has_feature(CPU_FTR_ARCH_31))
|
||||||
|
asm volatile(PPC_CP_ABORT);
|
||||||
|
|
||||||
|
vc->dpdes = mfspr(SPRN_DPDES);
|
||||||
|
vc->vtb = mfspr(SPRN_VTB);
|
||||||
|
mtspr(SPRN_DPDES, 0);
|
||||||
|
if (vc->pcr)
|
||||||
|
mtspr(SPRN_PCR, PCR_MASK);
|
||||||
|
|
||||||
|
if (vc->tb_offset_applied) {
|
||||||
|
u64 new_tb = mftb() - vc->tb_offset_applied;
|
||||||
|
mtspr(SPRN_TBU40, new_tb);
|
||||||
|
tb = mftb();
|
||||||
|
if ((tb & 0xffffff) < (new_tb & 0xffffff))
|
||||||
|
mtspr(SPRN_TBU40, new_tb + 0x1000000);
|
||||||
|
vc->tb_offset_applied = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
mtspr(SPRN_HDEC, 0x7fffffff);
|
||||||
|
|
||||||
|
save_clear_guest_mmu(kvm, vcpu);
|
||||||
|
switch_mmu_to_host(kvm, host_pidr);
|
||||||
|
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we are in real mode, only switch MMU on after the MMU is
|
||||||
|
* switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
|
||||||
|
*/
|
||||||
|
__mtmsrd(msr, 0);
|
||||||
|
|
||||||
|
end_timing(vcpu);
|
||||||
|
|
||||||
|
return trap;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
|
@ -46,6 +46,10 @@ static int global_invalidates(struct kvm *kvm)
|
|||||||
else
|
else
|
||||||
global = 1;
|
global = 1;
|
||||||
|
|
||||||
|
/* LPID has been switched to host if in virt mode so can't do local */
|
||||||
|
if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
|
||||||
|
global = 1;
|
||||||
|
|
||||||
if (!global) {
|
if (!global) {
|
||||||
/* any other core might now have stale TLB entries... */
|
/* any other core might now have stale TLB entries... */
|
||||||
smp_wmb();
|
smp_wmb();
|
||||||
@ -56,7 +60,7 @@ static int global_invalidates(struct kvm *kvm)
|
|||||||
* so use the bit for the first thread to represent the core.
|
* so use the bit for the first thread to represent the core.
|
||||||
*/
|
*/
|
||||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||||
cpu = cpu_first_thread_sibling(cpu);
|
cpu = cpu_first_tlb_thread_sibling(cpu);
|
||||||
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
|
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -398,6 +402,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||||||
vcpu->arch.pgdir, true,
|
vcpu->arch.pgdir, true,
|
||||||
&vcpu->arch.regs.gpr[4]);
|
&vcpu->arch.regs.gpr[4]);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_h_enter);
|
||||||
|
|
||||||
#ifdef __BIG_ENDIAN__
|
#ifdef __BIG_ENDIAN__
|
||||||
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
|
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
|
||||||
@ -542,6 +547,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||||||
return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
|
return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
|
||||||
&vcpu->arch.regs.gpr[4]);
|
&vcpu->arch.regs.gpr[4]);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_h_remove);
|
||||||
|
|
||||||
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
@ -660,6 +666,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
|
|||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
|
||||||
|
|
||||||
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
unsigned long pte_index, unsigned long avpn)
|
unsigned long pte_index, unsigned long avpn)
|
||||||
@ -730,6 +737,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||||||
|
|
||||||
return H_SUCCESS;
|
return H_SUCCESS;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_h_protect);
|
||||||
|
|
||||||
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
|
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
unsigned long pte_index)
|
unsigned long pte_index)
|
||||||
@ -770,6 +778,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||||||
}
|
}
|
||||||
return H_SUCCESS;
|
return H_SUCCESS;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_h_read);
|
||||||
|
|
||||||
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
|
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
unsigned long pte_index)
|
unsigned long pte_index)
|
||||||
@ -818,6 +827,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||||||
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
|
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
|
||||||
|
|
||||||
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
|
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
|
||||||
unsigned long pte_index)
|
unsigned long pte_index)
|
||||||
@ -865,6 +875,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
|
|||||||
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
|
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
|
||||||
|
|
||||||
static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
|
static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
|
||||||
unsigned long gpa, int writing, unsigned long *hpa,
|
unsigned long gpa, int writing, unsigned long *hpa,
|
||||||
@ -1283,3 +1294,4 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
|
|||||||
|
|
||||||
return -1; /* send fault up to host kernel mode */
|
return -1; /* send fault up to host kernel mode */
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);
|
||||||
|
@ -141,13 +141,6 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (xive_enabled() && kvmhv_on_pseries()) {
|
|
||||||
/* No XICS access or hypercalls available, too hard */
|
|
||||||
this_icp->rm_action |= XICS_RM_KICK_VCPU;
|
|
||||||
this_icp->rm_kick_target = vcpu;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if the core is loaded,
|
* Check if the core is loaded,
|
||||||
* if not, find an available host core to post to wake the VCPU,
|
* if not, find an available host core to post to wake the VCPU,
|
||||||
@ -771,14 +764,6 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
|
|||||||
void __iomem *xics_phys;
|
void __iomem *xics_phys;
|
||||||
int64_t rc;
|
int64_t rc;
|
||||||
|
|
||||||
if (kvmhv_on_pseries()) {
|
|
||||||
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
|
|
||||||
|
|
||||||
iosync();
|
|
||||||
plpar_hcall_raw(H_EOI, retbuf, hwirq);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = pnv_opal_pci_msi_eoi(c, hwirq);
|
rc = pnv_opal_pci_msi_eoi(c, hwirq);
|
||||||
|
|
||||||
if (rc)
|
if (rc)
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -493,7 +493,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
|
|||||||
if (!vcpu->arch.pending_exceptions) {
|
if (!vcpu->arch.pending_exceptions) {
|
||||||
kvm_vcpu_block(vcpu);
|
kvm_vcpu_block(vcpu);
|
||||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||||
vcpu->stat.halt_wakeup++;
|
vcpu->stat.generic.halt_wakeup++;
|
||||||
|
|
||||||
/* Unset POW bit after we woke up */
|
/* Unset POW bit after we woke up */
|
||||||
msr &= ~MSR_POW;
|
msr &= ~MSR_POW;
|
||||||
|
@ -378,7 +378,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
|
|||||||
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
|
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
|
||||||
kvm_vcpu_block(vcpu);
|
kvm_vcpu_block(vcpu);
|
||||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||||
vcpu->stat.halt_wakeup++;
|
vcpu->stat.generic.halt_wakeup++;
|
||||||
return EMULATE_DONE;
|
return EMULATE_DONE;
|
||||||
case H_LOGICAL_CI_LOAD:
|
case H_LOGICAL_CI_LOAD:
|
||||||
return kvmppc_h_pr_logical_ci_load(vcpu);
|
return kvmppc_h_pr_logical_ci_load(vcpu);
|
||||||
|
@ -164,12 +164,15 @@ kvmppc_interrupt_pr:
|
|||||||
/* 64-bit entry. Register usage at this point:
|
/* 64-bit entry. Register usage at this point:
|
||||||
*
|
*
|
||||||
* SPRG_SCRATCH0 = guest R13
|
* SPRG_SCRATCH0 = guest R13
|
||||||
|
* R9 = HSTATE_IN_GUEST
|
||||||
* R12 = (guest CR << 32) | exit handler id
|
* R12 = (guest CR << 32) | exit handler id
|
||||||
* R13 = PACA
|
* R13 = PACA
|
||||||
* HSTATE.SCRATCH0 = guest R12
|
* HSTATE.SCRATCH0 = guest R12
|
||||||
|
* HSTATE.SCRATCH2 = guest R9
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_PPC64
|
#ifdef CONFIG_PPC64
|
||||||
/* Match 32-bit entry */
|
/* Match 32-bit entry */
|
||||||
|
ld r9,HSTATE_SCRATCH2(r13)
|
||||||
rotldi r12, r12, 32 /* Flip R12 halves for stw */
|
rotldi r12, r12, 32 /* Flip R12 halves for stw */
|
||||||
stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
|
stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
|
||||||
srdi r12, r12, 32 /* shift trap into low half */
|
srdi r12, r12, 32 /* shift trap into low half */
|
||||||
|
@ -127,6 +127,71 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
|
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Pull a vcpu's context from the XIVE on guest exit.
|
||||||
|
* This assumes we are in virtual mode (MMU on)
|
||||||
|
*/
|
||||||
|
void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
|
||||||
|
|
||||||
|
if (!vcpu->arch.xive_pushed)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Should not have been pushed if there is no tima
|
||||||
|
*/
|
||||||
|
if (WARN_ON(!tima))
|
||||||
|
return;
|
||||||
|
|
||||||
|
eieio();
|
||||||
|
/* First load to pull the context, we ignore the value */
|
||||||
|
__raw_readl(tima + TM_SPC_PULL_OS_CTX);
|
||||||
|
/* Second load to recover the context state (Words 0 and 1) */
|
||||||
|
vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
|
||||||
|
|
||||||
|
/* Fixup some of the state for the next load */
|
||||||
|
vcpu->arch.xive_saved_state.lsmfb = 0;
|
||||||
|
vcpu->arch.xive_saved_state.ack = 0xff;
|
||||||
|
vcpu->arch.xive_pushed = 0;
|
||||||
|
eieio();
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
|
||||||
|
|
||||||
|
void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
|
||||||
|
|
||||||
|
if (!esc_vaddr)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* we are using XIVE with single escalation */
|
||||||
|
|
||||||
|
if (vcpu->arch.xive_esc_on) {
|
||||||
|
/*
|
||||||
|
* If we still have a pending escalation, abort the cede,
|
||||||
|
* and we must set PQ to 10 rather than 00 so that we don't
|
||||||
|
* potentially end up with two entries for the escalation
|
||||||
|
* interrupt in the XIVE interrupt queue. In that case
|
||||||
|
* we also don't want to set xive_esc_on to 1 here in
|
||||||
|
* case we race with xive_esc_irq().
|
||||||
|
*/
|
||||||
|
vcpu->arch.ceded = 0;
|
||||||
|
/*
|
||||||
|
* The escalation interrupts are special as we don't EOI them.
|
||||||
|
* There is no need to use the load-after-store ordering offset
|
||||||
|
* to set PQ to 10 as we won't use StoreEOI.
|
||||||
|
*/
|
||||||
|
__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
|
||||||
|
} else {
|
||||||
|
vcpu->arch.xive_esc_on = true;
|
||||||
|
mb();
|
||||||
|
__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
|
||||||
|
}
|
||||||
|
mb();
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a simple trigger for a generic XIVE IRQ. This must
|
* This is a simple trigger for a generic XIVE IRQ. This must
|
||||||
* only be called for interrupts that support a trigger page
|
* only be called for interrupts that support a trigger page
|
||||||
@ -2075,6 +2140,36 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
|
||||||
|
{
|
||||||
|
struct kvmppc_vcore *vc = vcpu->arch.vcore;
|
||||||
|
|
||||||
|
/* The VM should have configured XICS mode before doing XICS hcalls. */
|
||||||
|
if (!kvmppc_xics_enabled(vcpu))
|
||||||
|
return H_TOO_HARD;
|
||||||
|
|
||||||
|
switch (req) {
|
||||||
|
case H_XIRR:
|
||||||
|
return xive_vm_h_xirr(vcpu);
|
||||||
|
case H_CPPR:
|
||||||
|
return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
|
||||||
|
case H_EOI:
|
||||||
|
return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
|
||||||
|
case H_IPI:
|
||||||
|
return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
|
||||||
|
kvmppc_get_gpr(vcpu, 5));
|
||||||
|
case H_IPOLL:
|
||||||
|
return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
|
||||||
|
case H_XIRR_X:
|
||||||
|
xive_vm_h_xirr(vcpu);
|
||||||
|
kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
|
||||||
|
return H_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
return H_UNSUPPORTED;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
|
||||||
|
|
||||||
int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
|
int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
|
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
|
||||||
@ -2257,21 +2352,3 @@ struct kvm_device_ops kvm_xive_ops = {
|
|||||||
.get_attr = xive_get_attr,
|
.get_attr = xive_get_attr,
|
||||||
.has_attr = xive_has_attr,
|
.has_attr = xive_has_attr,
|
||||||
};
|
};
|
||||||
|
|
||||||
void kvmppc_xive_init_module(void)
|
|
||||||
{
|
|
||||||
__xive_vm_h_xirr = xive_vm_h_xirr;
|
|
||||||
__xive_vm_h_ipoll = xive_vm_h_ipoll;
|
|
||||||
__xive_vm_h_ipi = xive_vm_h_ipi;
|
|
||||||
__xive_vm_h_cppr = xive_vm_h_cppr;
|
|
||||||
__xive_vm_h_eoi = xive_vm_h_eoi;
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvmppc_xive_exit_module(void)
|
|
||||||
{
|
|
||||||
__xive_vm_h_xirr = NULL;
|
|
||||||
__xive_vm_h_ipoll = NULL;
|
|
||||||
__xive_vm_h_ipi = NULL;
|
|
||||||
__xive_vm_h_cppr = NULL;
|
|
||||||
__xive_vm_h_eoi = NULL;
|
|
||||||
}
|
|
||||||
|
@ -289,13 +289,6 @@ extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
|
|||||||
extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
|
extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
|
||||||
extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
|
extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
|
||||||
|
|
||||||
extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
|
|
||||||
extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
|
|
||||||
extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
|
|
||||||
unsigned long mfrr);
|
|
||||||
extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
|
|
||||||
extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Common Xive routines for XICS-over-XIVE and XIVE native
|
* Common Xive routines for XICS-over-XIVE and XIVE native
|
||||||
*/
|
*/
|
||||||
|
@ -1281,13 +1281,3 @@ struct kvm_device_ops kvm_xive_native_ops = {
|
|||||||
.has_attr = kvmppc_xive_native_has_attr,
|
.has_attr = kvmppc_xive_native_has_attr,
|
||||||
.mmap = kvmppc_xive_native_mmap,
|
.mmap = kvmppc_xive_native_mmap,
|
||||||
};
|
};
|
||||||
|
|
||||||
void kvmppc_xive_native_init_module(void)
|
|
||||||
{
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvmppc_xive_native_exit_module(void)
|
|
||||||
{
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
@ -36,29 +36,59 @@
|
|||||||
|
|
||||||
unsigned long kvmppc_booke_handlers;
|
unsigned long kvmppc_booke_handlers;
|
||||||
|
|
||||||
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||||
VCPU_STAT("mmio", mmio_exits),
|
KVM_GENERIC_VM_STATS(),
|
||||||
VCPU_STAT("sig", signal_exits),
|
STATS_DESC_ICOUNTER(VM, num_2M_pages),
|
||||||
VCPU_STAT("itlb_r", itlb_real_miss_exits),
|
STATS_DESC_ICOUNTER(VM, num_1G_pages)
|
||||||
VCPU_STAT("itlb_v", itlb_virt_miss_exits),
|
};
|
||||||
VCPU_STAT("dtlb_r", dtlb_real_miss_exits),
|
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||||
VCPU_STAT("dtlb_v", dtlb_virt_miss_exits),
|
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||||
VCPU_STAT("sysc", syscall_exits),
|
|
||||||
VCPU_STAT("isi", isi_exits),
|
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||||
VCPU_STAT("dsi", dsi_exits),
|
.name_size = KVM_STATS_NAME_SIZE,
|
||||||
VCPU_STAT("inst_emu", emulated_inst_exits),
|
.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
|
||||||
VCPU_STAT("dec", dec_exits),
|
.id_offset = sizeof(struct kvm_stats_header),
|
||||||
VCPU_STAT("ext_intr", ext_intr_exits),
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||||
VCPU_STAT("halt_successful_poll", halt_successful_poll),
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||||
VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
|
sizeof(kvm_vm_stats_desc),
|
||||||
VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
|
};
|
||||||
VCPU_STAT("halt_wakeup", halt_wakeup),
|
|
||||||
VCPU_STAT("doorbell", dbell_exits),
|
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
||||||
VCPU_STAT("guest doorbell", gdbell_exits),
|
KVM_GENERIC_VCPU_STATS(),
|
||||||
VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
|
STATS_DESC_COUNTER(VCPU, sum_exits),
|
||||||
VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
|
STATS_DESC_COUNTER(VCPU, mmio_exits),
|
||||||
VM_STAT("remote_tlb_flush", remote_tlb_flush),
|
STATS_DESC_COUNTER(VCPU, signal_exits),
|
||||||
{ NULL }
|
STATS_DESC_COUNTER(VCPU, light_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, syscall_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, isi_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, dsi_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, dec_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, ext_intr_exits),
|
||||||
|
STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
|
||||||
|
STATS_DESC_COUNTER(VCPU, halt_successful_wait),
|
||||||
|
STATS_DESC_COUNTER(VCPU, dbell_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, gdbell_exits),
|
||||||
|
STATS_DESC_COUNTER(VCPU, ld),
|
||||||
|
STATS_DESC_COUNTER(VCPU, st),
|
||||||
|
STATS_DESC_COUNTER(VCPU, pthru_all),
|
||||||
|
STATS_DESC_COUNTER(VCPU, pthru_host),
|
||||||
|
STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
|
||||||
|
};
|
||||||
|
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||||
|
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||||
|
|
||||||
|
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||||
|
.name_size = KVM_STATS_NAME_SIZE,
|
||||||
|
.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
|
||||||
|
.id_offset = sizeof(struct kvm_stats_header),
|
||||||
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||||
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||||
|
sizeof(kvm_vcpu_stats_desc),
|
||||||
};
|
};
|
||||||
|
|
||||||
/* TODO: use vcpu_printf() */
|
/* TODO: use vcpu_printf() */
|
||||||
|
@ -682,6 +682,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||||||
r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
|
r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
|
||||||
!kvmppc_hv_ops->enable_dawr1(NULL));
|
!kvmppc_hv_ops->enable_dawr1(NULL));
|
||||||
break;
|
break;
|
||||||
|
case KVM_CAP_PPC_RPT_INVALIDATE:
|
||||||
|
r = 1;
|
||||||
|
break;
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
r = 0;
|
r = 0;
|
||||||
|
@ -357,30 +357,19 @@ static void __init radix_init_pgtable(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Find out how many PID bits are supported */
|
/* Find out how many PID bits are supported */
|
||||||
if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
|
if (!cpu_has_feature(CPU_FTR_HVMODE) &&
|
||||||
if (!mmu_pid_bits)
|
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
|
||||||
mmu_pid_bits = 20;
|
|
||||||
mmu_base_pid = 1;
|
|
||||||
} else if (cpu_has_feature(CPU_FTR_HVMODE)) {
|
|
||||||
if (!mmu_pid_bits)
|
|
||||||
mmu_pid_bits = 20;
|
|
||||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
|
||||||
/*
|
/*
|
||||||
* When KVM is possible, we only use the top half of the
|
* Older versions of KVM on these machines perfer if the
|
||||||
* PID space to avoid collisions between host and guest PIDs
|
* guest only uses the low 19 PID bits.
|
||||||
* which can cause problems due to prefetch when exiting the
|
|
||||||
* guest with AIL=3
|
|
||||||
*/
|
*/
|
||||||
mmu_base_pid = 1 << (mmu_pid_bits - 1);
|
|
||||||
#else
|
|
||||||
mmu_base_pid = 1;
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
/* The guest uses the bottom half of the PID space */
|
|
||||||
if (!mmu_pid_bits)
|
if (!mmu_pid_bits)
|
||||||
mmu_pid_bits = 19;
|
mmu_pid_bits = 19;
|
||||||
mmu_base_pid = 1;
|
} else {
|
||||||
|
if (!mmu_pid_bits)
|
||||||
|
mmu_pid_bits = 20;
|
||||||
}
|
}
|
||||||
|
mmu_base_pid = 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate Partition table and process table for the
|
* Allocate Partition table and process table for the
|
||||||
@ -486,6 +475,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
|
|||||||
def = &mmu_psize_defs[idx];
|
def = &mmu_psize_defs[idx];
|
||||||
def->shift = shift;
|
def->shift = shift;
|
||||||
def->ap = ap;
|
def->ap = ap;
|
||||||
|
def->h_rpt_pgsize = psize_to_rpti_pgsize(idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* needed ? */
|
/* needed ? */
|
||||||
@ -560,9 +550,13 @@ void __init radix__early_init_devtree(void)
|
|||||||
*/
|
*/
|
||||||
mmu_psize_defs[MMU_PAGE_4K].shift = 12;
|
mmu_psize_defs[MMU_PAGE_4K].shift = 12;
|
||||||
mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
|
mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
|
||||||
|
mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize =
|
||||||
|
psize_to_rpti_pgsize(MMU_PAGE_4K);
|
||||||
|
|
||||||
mmu_psize_defs[MMU_PAGE_64K].shift = 16;
|
mmu_psize_defs[MMU_PAGE_64K].shift = 16;
|
||||||
mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
|
mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
|
||||||
|
mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
|
||||||
|
psize_to_rpti_pgsize(MMU_PAGE_64K);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -20,10 +20,6 @@
|
|||||||
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
#define RIC_FLUSH_TLB 0
|
|
||||||
#define RIC_FLUSH_PWC 1
|
|
||||||
#define RIC_FLUSH_ALL 2
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* tlbiel instruction for radix, set invalidation
|
* tlbiel instruction for radix, set invalidation
|
||||||
* i.e., r=1 and is=01 or is=10 or is=11
|
* i.e., r=1 and is=01 or is=10 or is=11
|
||||||
@ -130,6 +126,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
|
|||||||
trace_tlbie(0, 0, rb, rs, ric, prs, r);
|
trace_tlbie(0, 0, rb, rs, ric, prs, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __always_inline void __tlbie_pid_lpid(unsigned long pid,
|
||||||
|
unsigned long lpid,
|
||||||
|
unsigned long ric)
|
||||||
|
{
|
||||||
|
unsigned long rb, rs, prs, r;
|
||||||
|
|
||||||
|
rb = PPC_BIT(53); /* IS = 1 */
|
||||||
|
rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
|
||||||
|
prs = 1; /* process scoped */
|
||||||
|
r = 1; /* radix format */
|
||||||
|
|
||||||
|
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
|
||||||
|
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
|
||||||
|
trace_tlbie(0, 0, rb, rs, ric, prs, r);
|
||||||
|
}
|
||||||
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
|
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
|
||||||
{
|
{
|
||||||
unsigned long rb,rs,prs,r;
|
unsigned long rb,rs,prs,r;
|
||||||
@ -190,6 +201,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
|
|||||||
trace_tlbie(0, 0, rb, rs, ric, prs, r);
|
trace_tlbie(0, 0, rb, rs, ric, prs, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
|
||||||
|
unsigned long lpid,
|
||||||
|
unsigned long ap, unsigned long ric)
|
||||||
|
{
|
||||||
|
unsigned long rb, rs, prs, r;
|
||||||
|
|
||||||
|
rb = va & ~(PPC_BITMASK(52, 63));
|
||||||
|
rb |= ap << PPC_BITLSHIFT(58);
|
||||||
|
rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
|
||||||
|
prs = 1; /* process scoped */
|
||||||
|
r = 1; /* radix format */
|
||||||
|
|
||||||
|
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
|
||||||
|
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
|
||||||
|
trace_tlbie(0, 0, rb, rs, ric, prs, r);
|
||||||
|
}
|
||||||
|
|
||||||
static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
|
static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
|
||||||
unsigned long ap, unsigned long ric)
|
unsigned long ap, unsigned long ric)
|
||||||
{
|
{
|
||||||
@ -235,6 +263,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void fixup_tlbie_va_range_lpid(unsigned long va,
|
||||||
|
unsigned long pid,
|
||||||
|
unsigned long lpid,
|
||||||
|
unsigned long ap)
|
||||||
|
{
|
||||||
|
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline void fixup_tlbie_pid(unsigned long pid)
|
static inline void fixup_tlbie_pid(unsigned long pid)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -254,6 +298,25 @@ static inline void fixup_tlbie_pid(unsigned long pid)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We can use any address for the invalidation, pick one which is
|
||||||
|
* probably unused as an optimisation.
|
||||||
|
*/
|
||||||
|
unsigned long va = ((1UL << 52) - 1);
|
||||||
|
|
||||||
|
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
|
||||||
|
RIC_FLUSH_TLB);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
|
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
|
||||||
unsigned long ap)
|
unsigned long ap)
|
||||||
@ -344,6 +407,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
|
|||||||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
|
||||||
|
unsigned long ric)
|
||||||
|
{
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Workaround the fact that the "ric" argument to __tlbie_pid
|
||||||
|
* must be a compile-time contraint to match the "i" constraint
|
||||||
|
* in the asm statement.
|
||||||
|
*/
|
||||||
|
switch (ric) {
|
||||||
|
case RIC_FLUSH_TLB:
|
||||||
|
__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
|
||||||
|
fixup_tlbie_pid_lpid(pid, lpid);
|
||||||
|
break;
|
||||||
|
case RIC_FLUSH_PWC:
|
||||||
|
__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
|
||||||
|
break;
|
||||||
|
case RIC_FLUSH_ALL:
|
||||||
|
default:
|
||||||
|
__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
|
||||||
|
fixup_tlbie_pid_lpid(pid, lpid);
|
||||||
|
}
|
||||||
|
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
|
||||||
|
}
|
||||||
struct tlbiel_pid {
|
struct tlbiel_pid {
|
||||||
unsigned long pid;
|
unsigned long pid;
|
||||||
unsigned long ric;
|
unsigned long ric;
|
||||||
@ -469,6 +557,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
|
|||||||
fixup_tlbie_va_range(addr - page_size, pid, ap);
|
fixup_tlbie_va_range(addr - page_size, pid, ap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
|
||||||
|
unsigned long pid, unsigned long lpid,
|
||||||
|
unsigned long page_size,
|
||||||
|
unsigned long psize)
|
||||||
|
{
|
||||||
|
unsigned long addr;
|
||||||
|
unsigned long ap = mmu_get_ap(psize);
|
||||||
|
|
||||||
|
for (addr = start; addr < end; addr += page_size)
|
||||||
|
__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
|
||||||
|
|
||||||
|
fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
|
||||||
|
}
|
||||||
|
|
||||||
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
|
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
|
||||||
unsigned long psize, unsigned long ric)
|
unsigned long psize, unsigned long ric)
|
||||||
{
|
{
|
||||||
@ -549,6 +651,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
|
|||||||
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
asm volatile("eieio; tlbsync; ptesync": : :"memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
|
||||||
|
unsigned long pid, unsigned long lpid,
|
||||||
|
unsigned long page_size,
|
||||||
|
unsigned long psize, bool also_pwc)
|
||||||
|
{
|
||||||
|
asm volatile("ptesync" : : : "memory");
|
||||||
|
if (also_pwc)
|
||||||
|
__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
|
||||||
|
__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
|
||||||
|
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
|
||||||
|
}
|
||||||
|
|
||||||
static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
|
static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
|
||||||
unsigned long start, unsigned long end,
|
unsigned long start, unsigned long end,
|
||||||
unsigned long pid, unsigned long page_size,
|
unsigned long pid, unsigned long page_size,
|
||||||
@ -1338,47 +1452,57 @@ void radix__flush_tlb_all(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||||
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
|
/*
|
||||||
|
* Performs process-scoped invalidations for a given LPID
|
||||||
|
* as part of H_RPT_INVALIDATE hcall.
|
||||||
|
*/
|
||||||
|
void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
|
||||||
|
unsigned long type, unsigned long pg_sizes,
|
||||||
|
unsigned long start, unsigned long end)
|
||||||
{
|
{
|
||||||
unsigned long pid = mm->context.id;
|
unsigned long psize, nr_pages;
|
||||||
|
struct mmu_psize_def *def;
|
||||||
if (unlikely(pid == MMU_NO_CONTEXT))
|
bool flush_pid;
|
||||||
return;
|
|
||||||
|
|
||||||
if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
|
|
||||||
return;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this context hasn't run on that CPU before and KVM is
|
* A H_RPTI_TYPE_ALL request implies RIC=3, hence
|
||||||
* around, there's a slim chance that the guest on another
|
* do a single IS=1 based flush.
|
||||||
* CPU just brought in obsolete translation into the TLB of
|
|
||||||
* this CPU due to a bad prefetch using the guest PID on
|
|
||||||
* the way into the hypervisor.
|
|
||||||
*
|
|
||||||
* We work around this here. If KVM is possible, we check if
|
|
||||||
* any sibling thread is in KVM. If it is, the window may exist
|
|
||||||
* and thus we flush that PID from the core.
|
|
||||||
*
|
|
||||||
* A potential future improvement would be to mark which PIDs
|
|
||||||
* have never been used on the system and avoid it if the PID
|
|
||||||
* is new and the process has no other cpumask bit set.
|
|
||||||
*/
|
*/
|
||||||
if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
|
if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
|
||||||
int cpu = smp_processor_id();
|
_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
|
||||||
int sib = cpu_first_thread_sibling(cpu);
|
return;
|
||||||
bool flush = false;
|
}
|
||||||
|
|
||||||
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
|
if (type & H_RPTI_TYPE_PWC)
|
||||||
if (sib == cpu)
|
_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
|
||||||
continue;
|
|
||||||
if (!cpu_possible(sib))
|
/* Full PID flush */
|
||||||
continue;
|
if (start == 0 && end == -1)
|
||||||
if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
|
return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
|
||||||
flush = true;
|
|
||||||
|
/* Do range invalidation for all the valid page sizes */
|
||||||
|
for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
|
||||||
|
def = &mmu_psize_defs[psize];
|
||||||
|
if (!(pg_sizes & def->h_rpt_pgsize))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nr_pages = (end - start) >> def->shift;
|
||||||
|
flush_pid = nr_pages > tlb_single_page_flush_ceiling;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the number of pages spanning the range is above
|
||||||
|
* the ceiling, convert the request into a full PID flush.
|
||||||
|
* And since PID flush takes out all the page sizes, there
|
||||||
|
* is no need to consider remaining page sizes.
|
||||||
|
*/
|
||||||
|
if (flush_pid) {
|
||||||
|
_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if (flush)
|
_tlbie_va_range_lpid(start, end, pid, lpid,
|
||||||
_tlbiel_pid(pid, RIC_FLUSH_ALL);
|
(1UL << def->shift), psize, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
|
EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
|
||||||
|
|
||||||
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
|
||||||
|
@ -83,9 +83,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|||||||
if (cpu_has_feature(CPU_FTR_ALTIVEC))
|
if (cpu_has_feature(CPU_FTR_ALTIVEC))
|
||||||
asm volatile ("dssall");
|
asm volatile ("dssall");
|
||||||
|
|
||||||
if (new_on_cpu)
|
if (!new_on_cpu)
|
||||||
radix_kvm_prefetch_workaround(next);
|
|
||||||
else
|
|
||||||
membarrier_arch_switch_mm(prev, next, tsk);
|
membarrier_arch_switch_mm(prev, next, tsk);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -604,7 +604,7 @@ struct p9_sprs {
|
|||||||
u64 uamor;
|
u64 uamor;
|
||||||
};
|
};
|
||||||
|
|
||||||
static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
|
static unsigned long power9_idle_stop(unsigned long psscr)
|
||||||
{
|
{
|
||||||
int cpu = raw_smp_processor_id();
|
int cpu = raw_smp_processor_id();
|
||||||
int first = cpu_first_thread_sibling(cpu);
|
int first = cpu_first_thread_sibling(cpu);
|
||||||
@ -620,8 +620,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
|
|||||||
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
|
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
|
||||||
/* EC=ESL=0 case */
|
/* EC=ESL=0 case */
|
||||||
|
|
||||||
BUG_ON(!mmu_on);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wake synchronously. SRESET via xscom may still cause
|
* Wake synchronously. SRESET via xscom may still cause
|
||||||
* a 0x100 powersave wakeup with SRR1 reason!
|
* a 0x100 powersave wakeup with SRR1 reason!
|
||||||
@ -803,8 +801,7 @@ core_woken:
|
|||||||
__slb_restore_bolted_realmode();
|
__slb_restore_bolted_realmode();
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (mmu_on)
|
mtmsr(MSR_KERNEL);
|
||||||
mtmsr(MSR_KERNEL);
|
|
||||||
|
|
||||||
return srr1;
|
return srr1;
|
||||||
}
|
}
|
||||||
@ -895,7 +892,7 @@ struct p10_sprs {
|
|||||||
*/
|
*/
|
||||||
};
|
};
|
||||||
|
|
||||||
static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
|
static unsigned long power10_idle_stop(unsigned long psscr)
|
||||||
{
|
{
|
||||||
int cpu = raw_smp_processor_id();
|
int cpu = raw_smp_processor_id();
|
||||||
int first = cpu_first_thread_sibling(cpu);
|
int first = cpu_first_thread_sibling(cpu);
|
||||||
@ -909,8 +906,6 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
|
|||||||
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
|
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
|
||||||
/* EC=ESL=0 case */
|
/* EC=ESL=0 case */
|
||||||
|
|
||||||
BUG_ON(!mmu_on);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wake synchronously. SRESET via xscom may still cause
|
* Wake synchronously. SRESET via xscom may still cause
|
||||||
* a 0x100 powersave wakeup with SRR1 reason!
|
* a 0x100 powersave wakeup with SRR1 reason!
|
||||||
@ -991,8 +986,7 @@ core_woken:
|
|||||||
__slb_restore_bolted_realmode();
|
__slb_restore_bolted_realmode();
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (mmu_on)
|
mtmsr(MSR_KERNEL);
|
||||||
mtmsr(MSR_KERNEL);
|
|
||||||
|
|
||||||
return srr1;
|
return srr1;
|
||||||
}
|
}
|
||||||
@ -1002,40 +996,10 @@ static unsigned long arch300_offline_stop(unsigned long psscr)
|
|||||||
{
|
{
|
||||||
unsigned long srr1;
|
unsigned long srr1;
|
||||||
|
|
||||||
#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
|
||||||
__ppc64_runlatch_off();
|
|
||||||
if (cpu_has_feature(CPU_FTR_ARCH_31))
|
if (cpu_has_feature(CPU_FTR_ARCH_31))
|
||||||
srr1 = power10_idle_stop(psscr, true);
|
srr1 = power10_idle_stop(psscr);
|
||||||
else
|
else
|
||||||
srr1 = power9_idle_stop(psscr, true);
|
srr1 = power9_idle_stop(psscr);
|
||||||
__ppc64_runlatch_on();
|
|
||||||
#else
|
|
||||||
/*
|
|
||||||
* Tell KVM we're entering idle.
|
|
||||||
* This does not have to be done in real mode because the P9 MMU
|
|
||||||
* is independent per-thread. Some steppings share radix/hash mode
|
|
||||||
* between threads, but in that case KVM has a barrier sync in real
|
|
||||||
* mode before and after switching between radix and hash.
|
|
||||||
*
|
|
||||||
* kvm_start_guest must still be called in real mode though, hence
|
|
||||||
* the false argument.
|
|
||||||
*/
|
|
||||||
local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
|
|
||||||
|
|
||||||
__ppc64_runlatch_off();
|
|
||||||
if (cpu_has_feature(CPU_FTR_ARCH_31))
|
|
||||||
srr1 = power10_idle_stop(psscr, false);
|
|
||||||
else
|
|
||||||
srr1 = power9_idle_stop(psscr, false);
|
|
||||||
__ppc64_runlatch_on();
|
|
||||||
|
|
||||||
local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
|
|
||||||
/* Order setting hwthread_state vs. testing hwthread_req */
|
|
||||||
smp_mb();
|
|
||||||
if (local_paca->kvm_hstate.hwthread_req)
|
|
||||||
srr1 = idle_kvm_start_guest(srr1);
|
|
||||||
mtmsr(MSR_KERNEL);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return srr1;
|
return srr1;
|
||||||
}
|
}
|
||||||
@ -1055,9 +1019,9 @@ void arch300_idle_type(unsigned long stop_psscr_val,
|
|||||||
|
|
||||||
__ppc64_runlatch_off();
|
__ppc64_runlatch_off();
|
||||||
if (cpu_has_feature(CPU_FTR_ARCH_31))
|
if (cpu_has_feature(CPU_FTR_ARCH_31))
|
||||||
srr1 = power10_idle_stop(psscr, true);
|
srr1 = power10_idle_stop(psscr);
|
||||||
else
|
else
|
||||||
srr1 = power9_idle_stop(psscr, true);
|
srr1 = power9_idle_stop(psscr);
|
||||||
__ppc64_runlatch_on();
|
__ppc64_runlatch_on();
|
||||||
|
|
||||||
fini_irq_for_idle_irqsoff();
|
fini_irq_for_idle_irqsoff();
|
||||||
|
@ -361,6 +361,7 @@ struct sie_page {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_vcpu_stat {
|
struct kvm_vcpu_stat {
|
||||||
|
struct kvm_vcpu_stat_generic generic;
|
||||||
u64 exit_userspace;
|
u64 exit_userspace;
|
||||||
u64 exit_null;
|
u64 exit_null;
|
||||||
u64 exit_external_request;
|
u64 exit_external_request;
|
||||||
@ -370,13 +371,7 @@ struct kvm_vcpu_stat {
|
|||||||
u64 exit_validity;
|
u64 exit_validity;
|
||||||
u64 exit_instruction;
|
u64 exit_instruction;
|
||||||
u64 exit_pei;
|
u64 exit_pei;
|
||||||
u64 halt_successful_poll;
|
|
||||||
u64 halt_attempted_poll;
|
|
||||||
u64 halt_poll_invalid;
|
|
||||||
u64 halt_no_poll_steal;
|
u64 halt_no_poll_steal;
|
||||||
u64 halt_wakeup;
|
|
||||||
u64 halt_poll_success_ns;
|
|
||||||
u64 halt_poll_fail_ns;
|
|
||||||
u64 instruction_lctl;
|
u64 instruction_lctl;
|
||||||
u64 instruction_lctlg;
|
u64 instruction_lctlg;
|
||||||
u64 instruction_stctl;
|
u64 instruction_stctl;
|
||||||
@ -755,12 +750,12 @@ struct kvm_vcpu_arch {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_vm_stat {
|
struct kvm_vm_stat {
|
||||||
|
struct kvm_vm_stat_generic generic;
|
||||||
u64 inject_io;
|
u64 inject_io;
|
||||||
u64 inject_float_mchk;
|
u64 inject_float_mchk;
|
||||||
u64 inject_pfault_done;
|
u64 inject_pfault_done;
|
||||||
u64 inject_service_signal;
|
u64 inject_service_signal;
|
||||||
u64 inject_virtio;
|
u64 inject_virtio;
|
||||||
u64 remote_tlb_flush;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_arch_memory_slot {
|
struct kvm_arch_memory_slot {
|
||||||
|
@ -4,7 +4,8 @@
|
|||||||
# Copyright IBM Corp. 2008
|
# Copyright IBM Corp. 2008
|
||||||
|
|
||||||
KVM := ../../../virt/kvm
|
KVM := ../../../virt/kvm
|
||||||
common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
|
common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o \
|
||||||
|
$(KVM)/irqchip.o $(KVM)/vfio.o $(KVM)/binary_stats.o
|
||||||
|
|
||||||
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
|
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
|
||||||
|
|
||||||
|
@ -58,112 +58,132 @@
|
|||||||
#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
|
#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
|
||||||
(KVM_MAX_VCPUS + LOCAL_IRQS))
|
(KVM_MAX_VCPUS + LOCAL_IRQS))
|
||||||
|
|
||||||
struct kvm_stats_debugfs_item debugfs_entries[] = {
|
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
|
||||||
VCPU_STAT("userspace_handled", exit_userspace),
|
KVM_GENERIC_VM_STATS(),
|
||||||
VCPU_STAT("exit_null", exit_null),
|
STATS_DESC_COUNTER(VM, inject_io),
|
||||||
VCPU_STAT("pfault_sync", pfault_sync),
|
STATS_DESC_COUNTER(VM, inject_float_mchk),
|
||||||
VCPU_STAT("exit_validity", exit_validity),
|
STATS_DESC_COUNTER(VM, inject_pfault_done),
|
||||||
VCPU_STAT("exit_stop_request", exit_stop_request),
|
STATS_DESC_COUNTER(VM, inject_service_signal),
|
||||||
VCPU_STAT("exit_external_request", exit_external_request),
|
STATS_DESC_COUNTER(VM, inject_virtio)
|
||||||
VCPU_STAT("exit_io_request", exit_io_request),
|
};
|
||||||
VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
|
static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
|
||||||
VCPU_STAT("exit_instruction", exit_instruction),
|
sizeof(struct kvm_vm_stat) / sizeof(u64));
|
||||||
VCPU_STAT("exit_pei", exit_pei),
|
|
||||||
VCPU_STAT("exit_program_interruption", exit_program_interruption),
|
const struct kvm_stats_header kvm_vm_stats_header = {
|
||||||
VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
|
.name_size = KVM_STATS_NAME_SIZE,
|
||||||
VCPU_STAT("exit_operation_exception", exit_operation_exception),
|
.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
|
||||||
VCPU_STAT("halt_successful_poll", halt_successful_poll),
|
.id_offset = sizeof(struct kvm_stats_header),
|
||||||
VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||||
VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||||
VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
|
sizeof(kvm_vm_stats_desc),
|
||||||
VCPU_STAT("halt_wakeup", halt_wakeup),
|
};
|
||||||
VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
|
|
||||||
VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
|
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
|
||||||
VCPU_STAT("instruction_lctlg", instruction_lctlg),
|
KVM_GENERIC_VCPU_STATS(),
|
||||||
VCPU_STAT("instruction_lctl", instruction_lctl),
|
STATS_DESC_COUNTER(VCPU, exit_userspace),
|
||||||
VCPU_STAT("instruction_stctl", instruction_stctl),
|
STATS_DESC_COUNTER(VCPU, exit_null),
|
||||||
VCPU_STAT("instruction_stctg", instruction_stctg),
|
STATS_DESC_COUNTER(VCPU, exit_external_request),
|
||||||
VCPU_STAT("deliver_ckc", deliver_ckc),
|
STATS_DESC_COUNTER(VCPU, exit_io_request),
|
||||||
VCPU_STAT("deliver_cputm", deliver_cputm),
|
STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
|
||||||
VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
|
STATS_DESC_COUNTER(VCPU, exit_stop_request),
|
||||||
VCPU_STAT("deliver_external_call", deliver_external_call),
|
STATS_DESC_COUNTER(VCPU, exit_validity),
|
||||||
VCPU_STAT("deliver_service_signal", deliver_service_signal),
|
STATS_DESC_COUNTER(VCPU, exit_instruction),
|
||||||
VCPU_STAT("deliver_virtio", deliver_virtio),
|
STATS_DESC_COUNTER(VCPU, exit_pei),
|
||||||
VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
|
STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
|
||||||
VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
|
STATS_DESC_COUNTER(VCPU, instruction_lctl),
|
||||||
VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
|
STATS_DESC_COUNTER(VCPU, instruction_lctlg),
|
||||||
VCPU_STAT("deliver_program", deliver_program),
|
STATS_DESC_COUNTER(VCPU, instruction_stctl),
|
||||||
VCPU_STAT("deliver_io", deliver_io),
|
STATS_DESC_COUNTER(VCPU, instruction_stctg),
|
||||||
VCPU_STAT("deliver_machine_check", deliver_machine_check),
|
STATS_DESC_COUNTER(VCPU, exit_program_interruption),
|
||||||
VCPU_STAT("exit_wait_state", exit_wait_state),
|
STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
|
||||||
VCPU_STAT("inject_ckc", inject_ckc),
|
STATS_DESC_COUNTER(VCPU, exit_operation_exception),
|
||||||
VCPU_STAT("inject_cputm", inject_cputm),
|
STATS_DESC_COUNTER(VCPU, deliver_ckc),
|
||||||
VCPU_STAT("inject_external_call", inject_external_call),
|
STATS_DESC_COUNTER(VCPU, deliver_cputm),
|
||||||
VM_STAT("inject_float_mchk", inject_float_mchk),
|
STATS_DESC_COUNTER(VCPU, deliver_external_call),
|
||||||
VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
|
STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
|
||||||
VM_STAT("inject_io", inject_io),
|
STATS_DESC_COUNTER(VCPU, deliver_service_signal),
|
||||||
VCPU_STAT("inject_mchk", inject_mchk),
|
STATS_DESC_COUNTER(VCPU, deliver_virtio),
|
||||||
VM_STAT("inject_pfault_done", inject_pfault_done),
|
STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
|
||||||
VCPU_STAT("inject_program", inject_program),
|
STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
|
||||||
VCPU_STAT("inject_restart", inject_restart),
|
STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
|
||||||
VM_STAT("inject_service_signal", inject_service_signal),
|
STATS_DESC_COUNTER(VCPU, deliver_program),
|
||||||
VCPU_STAT("inject_set_prefix", inject_set_prefix),
|
STATS_DESC_COUNTER(VCPU, deliver_io),
|
||||||
VCPU_STAT("inject_stop_signal", inject_stop_signal),
|
STATS_DESC_COUNTER(VCPU, deliver_machine_check),
|
||||||
VCPU_STAT("inject_pfault_init", inject_pfault_init),
|
STATS_DESC_COUNTER(VCPU, exit_wait_state),
|
||||||
VM_STAT("inject_virtio", inject_virtio),
|
STATS_DESC_COUNTER(VCPU, inject_ckc),
|
||||||
VCPU_STAT("instruction_epsw", instruction_epsw),
|
STATS_DESC_COUNTER(VCPU, inject_cputm),
|
||||||
VCPU_STAT("instruction_gs", instruction_gs),
|
STATS_DESC_COUNTER(VCPU, inject_external_call),
|
||||||
VCPU_STAT("instruction_io_other", instruction_io_other),
|
STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
|
||||||
VCPU_STAT("instruction_lpsw", instruction_lpsw),
|
STATS_DESC_COUNTER(VCPU, inject_mchk),
|
||||||
VCPU_STAT("instruction_lpswe", instruction_lpswe),
|
STATS_DESC_COUNTER(VCPU, inject_pfault_init),
|
||||||
VCPU_STAT("instruction_pfmf", instruction_pfmf),
|
STATS_DESC_COUNTER(VCPU, inject_program),
|
||||||
VCPU_STAT("instruction_ptff", instruction_ptff),
|
STATS_DESC_COUNTER(VCPU, inject_restart),
|
||||||
VCPU_STAT("instruction_stidp", instruction_stidp),
|
STATS_DESC_COUNTER(VCPU, inject_set_prefix),
|
||||||
VCPU_STAT("instruction_sck", instruction_sck),
|
STATS_DESC_COUNTER(VCPU, inject_stop_signal),
|
||||||
VCPU_STAT("instruction_sckpf", instruction_sckpf),
|
STATS_DESC_COUNTER(VCPU, instruction_epsw),
|
||||||
VCPU_STAT("instruction_spx", instruction_spx),
|
STATS_DESC_COUNTER(VCPU, instruction_gs),
|
||||||
VCPU_STAT("instruction_stpx", instruction_stpx),
|
STATS_DESC_COUNTER(VCPU, instruction_io_other),
|
||||||
VCPU_STAT("instruction_stap", instruction_stap),
|
STATS_DESC_COUNTER(VCPU, instruction_lpsw),
|
||||||
VCPU_STAT("instruction_iske", instruction_iske),
|
STATS_DESC_COUNTER(VCPU, instruction_lpswe),
|
||||||
VCPU_STAT("instruction_ri", instruction_ri),
|
STATS_DESC_COUNTER(VCPU, instruction_pfmf),
|
||||||
VCPU_STAT("instruction_rrbe", instruction_rrbe),
|
STATS_DESC_COUNTER(VCPU, instruction_ptff),
|
||||||
VCPU_STAT("instruction_sske", instruction_sske),
|
STATS_DESC_COUNTER(VCPU, instruction_sck),
|
||||||
VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
|
STATS_DESC_COUNTER(VCPU, instruction_sckpf),
|
||||||
VCPU_STAT("instruction_essa", instruction_essa),
|
STATS_DESC_COUNTER(VCPU, instruction_stidp),
|
||||||
VCPU_STAT("instruction_stsi", instruction_stsi),
|
STATS_DESC_COUNTER(VCPU, instruction_spx),
|
||||||
VCPU_STAT("instruction_stfl", instruction_stfl),
|
STATS_DESC_COUNTER(VCPU, instruction_stpx),
|
||||||
VCPU_STAT("instruction_tb", instruction_tb),
|
STATS_DESC_COUNTER(VCPU, instruction_stap),
|
||||||
VCPU_STAT("instruction_tpi", instruction_tpi),
|
STATS_DESC_COUNTER(VCPU, instruction_iske),
|
||||||
VCPU_STAT("instruction_tprot", instruction_tprot),
|
STATS_DESC_COUNTER(VCPU, instruction_ri),
|
||||||
VCPU_STAT("instruction_tsch", instruction_tsch),
|
STATS_DESC_COUNTER(VCPU, instruction_rrbe),
|
||||||
VCPU_STAT("instruction_sthyi", instruction_sthyi),
|
STATS_DESC_COUNTER(VCPU, instruction_sske),
|
||||||
VCPU_STAT("instruction_sie", instruction_sie),
|
STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
|
||||||
VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
|
STATS_DESC_COUNTER(VCPU, instruction_stsi),
|
||||||
VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
|
STATS_DESC_COUNTER(VCPU, instruction_stfl),
|
||||||
VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
|
STATS_DESC_COUNTER(VCPU, instruction_tb),
|
||||||
VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
|
STATS_DESC_COUNTER(VCPU, instruction_tpi),
|
||||||
VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
|
STATS_DESC_COUNTER(VCPU, instruction_tprot),
|
||||||
VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
|
STATS_DESC_COUNTER(VCPU, instruction_tsch),
|
||||||
VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
|
STATS_DESC_COUNTER(VCPU, instruction_sie),
|
||||||
VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
|
STATS_DESC_COUNTER(VCPU, instruction_essa),
|
||||||
VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
|
STATS_DESC_COUNTER(VCPU, instruction_sthyi),
|
||||||
VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
|
||||||
VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
|
||||||
VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
|
||||||
VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
|
||||||
VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
|
||||||
VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
|
||||||
VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
|
||||||
VCPU_STAT("instruction_diag_10", diagnose_10),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
|
||||||
VCPU_STAT("instruction_diag_44", diagnose_44),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
|
||||||
VCPU_STAT("instruction_diag_9c", diagnose_9c),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
|
||||||
VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
|
||||||
VCPU_STAT("diag_9c_forward", diagnose_9c_forward),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
|
||||||
VCPU_STAT("instruction_diag_258", diagnose_258),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
|
||||||
VCPU_STAT("instruction_diag_308", diagnose_308),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
|
||||||
VCPU_STAT("instruction_diag_500", diagnose_500),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
|
||||||
VCPU_STAT("instruction_diag_other", diagnose_other),
|
STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
|
||||||
{ NULL }
|
STATS_DESC_COUNTER(VCPU, diagnose_10),
|
||||||
|
STATS_DESC_COUNTER(VCPU, diagnose_44),
|
||||||
|
STATS_DESC_COUNTER(VCPU, diagnose_9c),
|
||||||
|
STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored),
|
||||||
|
STATS_DESC_COUNTER(VCPU, diagnose_9c_forward),
|
||||||
|
STATS_DESC_COUNTER(VCPU, diagnose_258),
|
||||||
|
STATS_DESC_COUNTER(VCPU, diagnose_308),
|
||||||
|
STATS_DESC_COUNTER(VCPU, diagnose_500),
|
||||||
|
STATS_DESC_COUNTER(VCPU, diagnose_other),
|
||||||
|
STATS_DESC_COUNTER(VCPU, pfault_sync)
|
||||||
|
};
|
||||||
|
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
|
||||||
|
sizeof(struct kvm_vcpu_stat) / sizeof(u64));
|
||||||
|
|
||||||
|
const struct kvm_stats_header kvm_vcpu_stats_header = {
|
||||||
|
.name_size = KVM_STATS_NAME_SIZE,
|
||||||
|
.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
|
||||||
|
.id_offset = sizeof(struct kvm_stats_header),
|
||||||
|
.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
|
||||||
|
.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
|
||||||
|
sizeof(kvm_vcpu_stats_desc),
|
||||||
};
|
};
|
||||||
|
|
||||||
/* allow nested virtualization in KVM (if enabled by user space) */
|
/* allow nested virtualization in KVM (if enabled by user space) */
|
||||||
@ -329,31 +349,31 @@ static void allow_cpu_feat(unsigned long nr)
|
|||||||
|
|
||||||
static inline int plo_test_bit(unsigned char nr)
|
static inline int plo_test_bit(unsigned char nr)
|
||||||
{
|
{
|
||||||
register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
|
unsigned long function = (unsigned long)nr | 0x100;
|
||||||
int cc;
|
int cc;
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
" lgr 0,%[function]\n"
|
||||||
/* Parameter registers are ignored for "test bit" */
|
/* Parameter registers are ignored for "test bit" */
|
||||||
" plo 0,0,0,0(0)\n"
|
" plo 0,0,0,0(0)\n"
|
||||||
" ipm %0\n"
|
" ipm %0\n"
|
||||||
" srl %0,28\n"
|
" srl %0,28\n"
|
||||||
: "=d" (cc)
|
: "=d" (cc)
|
||||||
: "d" (r0)
|
: [function] "d" (function)
|
||||||
: "cc");
|
: "cc", "0");
|
||||||
return cc == 0;
|
return cc == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
|
static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
|
||||||
{
|
{
|
||||||
register unsigned long r0 asm("0") = 0; /* query function */
|
|
||||||
register unsigned long r1 asm("1") = (unsigned long) query;
|
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
/* Parameter regs are ignored */
|
" lghi 0,0\n"
|
||||||
|
" lgr 1,%[query]\n"
|
||||||
|
/* Parameter registers are ignored */
|
||||||
" .insn rrf,%[opc] << 16,2,4,6,0\n"
|
" .insn rrf,%[opc] << 16,2,4,6,0\n"
|
||||||
:
|
:
|
||||||
: "d" (r0), "a" (r1), [opc] "i" (opcode)
|
: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
|
||||||
: "cc", "memory");
|
: "cc", "memory", "0", "1");
|
||||||
}
|
}
|
||||||
|
|
||||||
#define INSN_SORTL 0xb938
|
#define INSN_SORTL 0xb938
|
||||||
@ -713,6 +733,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
|||||||
set_kvm_facility(kvm->arch.model.fac_mask, 152);
|
set_kvm_facility(kvm->arch.model.fac_mask, 152);
|
||||||
set_kvm_facility(kvm->arch.model.fac_list, 152);
|
set_kvm_facility(kvm->arch.model.fac_list, 152);
|
||||||
}
|
}
|
||||||
|
if (test_facility(192)) {
|
||||||
|
set_kvm_facility(kvm->arch.model.fac_mask, 192);
|
||||||
|
set_kvm_facility(kvm->arch.model.fac_list, 192);
|
||||||
|
}
|
||||||
r = 0;
|
r = 0;
|
||||||
} else
|
} else
|
||||||
r = -EINVAL;
|
r = -EINVAL;
|
||||||
|
@ -115,6 +115,10 @@ static struct facility_def facility_defs[] = {
|
|||||||
12, /* AP Query Configuration Information */
|
12, /* AP Query Configuration Information */
|
||||||
15, /* AP Facilities Test */
|
15, /* AP Facilities Test */
|
||||||
156, /* etoken facility */
|
156, /* etoken facility */
|
||||||
|
165, /* nnpa facility */
|
||||||
|
193, /* bear enhancement facility */
|
||||||
|
194, /* rdp enhancement facility */
|
||||||
|
196, /* processor activity instrumentation facility */
|
||||||
-1 /* END */
|
-1 /* END */
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -52,7 +52,7 @@
|
|||||||
* Support for passing hypercall input parameter block via XMM
|
* Support for passing hypercall input parameter block via XMM
|
||||||
* registers is available
|
* registers is available
|
||||||
*/
|
*/
|
||||||
#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4)
|
#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4)
|
||||||
/* Support for a virtual guest idle state is available */
|
/* Support for a virtual guest idle state is available */
|
||||||
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
|
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
|
||||||
/* Frequency MSRs available */
|
/* Frequency MSRs available */
|
||||||
@ -61,6 +61,11 @@
|
|||||||
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
|
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
|
||||||
/* Support for debug MSRs available */
|
/* Support for debug MSRs available */
|
||||||
#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11)
|
#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11)
|
||||||
|
/*
|
||||||
|
* Support for returning hypercall output block via XMM
|
||||||
|
* registers is available
|
||||||
|
*/
|
||||||
|
#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE BIT(15)
|
||||||
/* stimer Direct Mode is available */
|
/* stimer Direct Mode is available */
|
||||||
#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
|
#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
|
||||||
|
|
||||||
@ -133,6 +138,15 @@
|
|||||||
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
|
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
|
||||||
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
|
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is specific to AMD and specifies that enlightened TLB flush is
|
||||||
|
* supported. If guest opts in to this feature, ASID invalidations only
|
||||||
|
* flushes gva -> hpa mapping entries. To flush the TLB entries derived
|
||||||
|
* from NPT, hypercalls should be used (HvFlushGuestPhysicalAddressSpace
|
||||||
|
* or HvFlushGuestPhysicalAddressList).
|
||||||
|
*/
|
||||||
|
#define HV_X64_NESTED_ENLIGHTENED_TLB BIT(22)
|
||||||
|
|
||||||
/* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */
|
/* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */
|
||||||
#define HV_PARAVISOR_PRESENT BIT(0)
|
#define HV_PARAVISOR_PRESENT BIT(0)
|
||||||
|
|
||||||
@ -314,6 +328,9 @@ struct hv_tsc_emulation_status {
|
|||||||
#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
|
#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
|
||||||
#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
|
#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
|
||||||
|
|
||||||
|
/* Number of XMM registers used in hypercall input/output */
|
||||||
|
#define HV_HYPERCALL_MAX_XMM_REGISTERS 6
|
||||||
|
|
||||||
struct hv_nested_enlightenments_control {
|
struct hv_nested_enlightenments_control {
|
||||||
struct {
|
struct {
|
||||||
__u32 directhypercall:1;
|
__u32 directhypercall:1;
|
||||||
|
@ -87,7 +87,10 @@ KVM_X86_OP(set_identity_map_addr)
|
|||||||
KVM_X86_OP(get_mt_mask)
|
KVM_X86_OP(get_mt_mask)
|
||||||
KVM_X86_OP(load_mmu_pgd)
|
KVM_X86_OP(load_mmu_pgd)
|
||||||
KVM_X86_OP_NULL(has_wbinvd_exit)
|
KVM_X86_OP_NULL(has_wbinvd_exit)
|
||||||
KVM_X86_OP(write_l1_tsc_offset)
|
KVM_X86_OP(get_l2_tsc_offset)
|
||||||
|
KVM_X86_OP(get_l2_tsc_multiplier)
|
||||||
|
KVM_X86_OP(write_tsc_offset)
|
||||||
|
KVM_X86_OP(write_tsc_multiplier)
|
||||||
KVM_X86_OP(get_exit_info)
|
KVM_X86_OP(get_exit_info)
|
||||||
KVM_X86_OP(check_intercept)
|
KVM_X86_OP(check_intercept)
|
||||||
KVM_X86_OP(handle_exit_irqoff)
|
KVM_X86_OP(handle_exit_irqoff)
|
||||||
@ -106,8 +109,8 @@ KVM_X86_OP_NULL(set_hv_timer)
|
|||||||
KVM_X86_OP_NULL(cancel_hv_timer)
|
KVM_X86_OP_NULL(cancel_hv_timer)
|
||||||
KVM_X86_OP(setup_mce)
|
KVM_X86_OP(setup_mce)
|
||||||
KVM_X86_OP(smi_allowed)
|
KVM_X86_OP(smi_allowed)
|
||||||
KVM_X86_OP(pre_enter_smm)
|
KVM_X86_OP(enter_smm)
|
||||||
KVM_X86_OP(pre_leave_smm)
|
KVM_X86_OP(leave_smm)
|
||||||
KVM_X86_OP(enable_smi_window)
|
KVM_X86_OP(enable_smi_window)
|
||||||
KVM_X86_OP_NULL(mem_enc_op)
|
KVM_X86_OP_NULL(mem_enc_op)
|
||||||
KVM_X86_OP_NULL(mem_enc_reg_region)
|
KVM_X86_OP_NULL(mem_enc_reg_region)
|
||||||
|
@ -85,7 +85,7 @@
|
|||||||
#define KVM_REQ_APICV_UPDATE \
|
#define KVM_REQ_APICV_UPDATE \
|
||||||
KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||||
#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
|
#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
|
||||||
#define KVM_REQ_HV_TLB_FLUSH \
|
#define KVM_REQ_TLB_FLUSH_GUEST \
|
||||||
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
|
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
|
||||||
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
|
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
|
||||||
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
|
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
|
||||||
@ -269,12 +269,36 @@ enum x86_intercept_stage;
|
|||||||
struct kvm_kernel_irq_routing_entry;
|
struct kvm_kernel_irq_routing_entry;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the pages used as guest page table on soft mmu are tracked by
|
* kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
|
||||||
* kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
|
* also includes TDP pages) to determine whether or not a page can be used in
|
||||||
* by indirect shadow page can not be more than 15 bits.
|
* the given MMU context. This is a subset of the overall kvm_mmu_role to
|
||||||
|
* minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
|
||||||
|
* 2 bytes per gfn instead of 4 bytes per gfn.
|
||||||
*
|
*
|
||||||
* Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access,
|
* Indirect upper-level shadow pages are tracked for write-protection via
|
||||||
* @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
|
* gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create
|
||||||
|
* more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
|
||||||
|
* gfn_track will overflow and explosions will ensure.
|
||||||
|
*
|
||||||
|
* A unique shadow page (SP) for a gfn is created if and only if an existing SP
|
||||||
|
* cannot be reused. The ability to reuse a SP is tracked by its role, which
|
||||||
|
* incorporates various mode bits and properties of the SP. Roughly speaking,
|
||||||
|
* the number of unique SPs that can theoretically be created is 2^n, where n
|
||||||
|
* is the number of bits that are used to compute the role.
|
||||||
|
*
|
||||||
|
* But, even though there are 18 bits in the mask below, not all combinations
|
||||||
|
* of modes and flags are possible. The maximum number of possible upper-level
|
||||||
|
* shadow pages for a single gfn is in the neighborhood of 2^13.
|
||||||
|
*
|
||||||
|
* - invalid shadow pages are not accounted.
|
||||||
|
* - level is effectively limited to four combinations, not 16 as the number
|
||||||
|
* bits would imply, as 4k SPs are not tracked (allowed to go unsync).
|
||||||
|
* - level is effectively unused for non-PAE paging because there is exactly
|
||||||
|
* one upper level (see 4k SP exception above).
|
||||||
|
* - quadrant is used only for non-PAE paging and is exclusive with
|
||||||
|
* gpte_is_8_bytes.
|
||||||
|
* - execonly and ad_disabled are used only for nested EPT, which makes it
|
||||||
|
* exclusive with quadrant.
|
||||||
*/
|
*/
|
||||||
union kvm_mmu_page_role {
|
union kvm_mmu_page_role {
|
||||||
u32 word;
|
u32 word;
|
||||||
@ -285,7 +309,7 @@ union kvm_mmu_page_role {
|
|||||||
unsigned direct:1;
|
unsigned direct:1;
|
||||||
unsigned access:3;
|
unsigned access:3;
|
||||||
unsigned invalid:1;
|
unsigned invalid:1;
|
||||||
unsigned nxe:1;
|
unsigned efer_nx:1;
|
||||||
unsigned cr0_wp:1;
|
unsigned cr0_wp:1;
|
||||||
unsigned smep_andnot_wp:1;
|
unsigned smep_andnot_wp:1;
|
||||||
unsigned smap_andnot_wp:1;
|
unsigned smap_andnot_wp:1;
|
||||||
@ -303,13 +327,26 @@ union kvm_mmu_page_role {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
union kvm_mmu_extended_role {
|
|
||||||
/*
|
/*
|
||||||
* This structure complements kvm_mmu_page_role caching everything needed for
|
* kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties
|
||||||
* MMU configuration. If nothing in both these structures changed, MMU
|
* relevant to the current MMU configuration. When loading CR0, CR4, or EFER,
|
||||||
* re-configuration can be skipped. @valid bit is set on first usage so we don't
|
* including on nested transitions, if nothing in the full role changes then
|
||||||
* treat all-zero structure as valid data.
|
* MMU re-configuration can be skipped. @valid bit is set on first usage so we
|
||||||
|
* don't treat all-zero structure as valid data.
|
||||||
|
*
|
||||||
|
* The properties that are tracked in the extended role but not the page role
|
||||||
|
* are for things that either (a) do not affect the validity of the shadow page
|
||||||
|
* or (b) are indirectly reflected in the shadow page's role. For example,
|
||||||
|
* CR4.PKE only affects permission checks for software walks of the guest page
|
||||||
|
* tables (because KVM doesn't support Protection Keys with shadow paging), and
|
||||||
|
* CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level.
|
||||||
|
*
|
||||||
|
* Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role.
|
||||||
|
* If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and
|
||||||
|
* SMAP, but the MMU's permission checks for software walks need to be SMEP and
|
||||||
|
* SMAP aware regardless of CR0.WP.
|
||||||
*/
|
*/
|
||||||
|
union kvm_mmu_extended_role {
|
||||||
u32 word;
|
u32 word;
|
||||||
struct {
|
struct {
|
||||||
unsigned int valid:1;
|
unsigned int valid:1;
|
||||||
@ -320,7 +357,7 @@ union kvm_mmu_extended_role {
|
|||||||
unsigned int cr4_pke:1;
|
unsigned int cr4_pke:1;
|
||||||
unsigned int cr4_smap:1;
|
unsigned int cr4_smap:1;
|
||||||
unsigned int cr4_smep:1;
|
unsigned int cr4_smep:1;
|
||||||
unsigned int maxphyaddr:6;
|
unsigned int cr4_la57:1;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -420,11 +457,6 @@ struct kvm_mmu {
|
|||||||
|
|
||||||
struct rsvd_bits_validate guest_rsvd_check;
|
struct rsvd_bits_validate guest_rsvd_check;
|
||||||
|
|
||||||
/* Can have large pages at levels 2..last_nonleaf_level-1. */
|
|
||||||
u8 last_nonleaf_level;
|
|
||||||
|
|
||||||
bool nx;
|
|
||||||
|
|
||||||
u64 pdptrs[4]; /* pae */
|
u64 pdptrs[4]; /* pae */
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -543,6 +575,15 @@ struct kvm_vcpu_hv {
|
|||||||
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
|
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
|
||||||
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
|
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
|
||||||
cpumask_t tlb_flush;
|
cpumask_t tlb_flush;
|
||||||
|
bool enforce_cpuid;
|
||||||
|
struct {
|
||||||
|
u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */
|
||||||
|
u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */
|
||||||
|
u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */
|
||||||
|
u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
|
||||||
|
u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */
|
||||||
|
u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
|
||||||
|
} cpuid_cache;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Xen HVM per vcpu emulation context */
|
/* Xen HVM per vcpu emulation context */
|
||||||
@ -707,7 +748,7 @@ struct kvm_vcpu_arch {
|
|||||||
} st;
|
} st;
|
||||||
|
|
||||||
u64 l1_tsc_offset;
|
u64 l1_tsc_offset;
|
||||||
u64 tsc_offset;
|
u64 tsc_offset; /* current tsc offset */
|
||||||
u64 last_guest_tsc;
|
u64 last_guest_tsc;
|
||||||
u64 last_host_tsc;
|
u64 last_host_tsc;
|
||||||
u64 tsc_offset_adjustment;
|
u64 tsc_offset_adjustment;
|
||||||
@ -721,7 +762,8 @@ struct kvm_vcpu_arch {
|
|||||||
u32 virtual_tsc_khz;
|
u32 virtual_tsc_khz;
|
||||||
s64 ia32_tsc_adjust_msr;
|
s64 ia32_tsc_adjust_msr;
|
||||||
u64 msr_ia32_power_ctl;
|
u64 msr_ia32_power_ctl;
|
||||||
u64 tsc_scaling_ratio;
|
u64 l1_tsc_scaling_ratio;
|
||||||
|
u64 tsc_scaling_ratio; /* current scaling ratio */
|
||||||
|
|
||||||
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
|
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
|
||||||
unsigned nmi_pending; /* NMI queued after currently running handler */
|
unsigned nmi_pending; /* NMI queued after currently running handler */
|
||||||
@ -829,7 +871,7 @@ struct kvm_vcpu_arch {
|
|||||||
bool l1tf_flush_l1d;
|
bool l1tf_flush_l1d;
|
||||||
|
|
||||||
/* Host CPU on which VM-entry was most recently attempted */
|
/* Host CPU on which VM-entry was most recently attempted */
|
||||||
unsigned int last_vmentry_cpu;
|
int last_vmentry_cpu;
|
||||||
|
|
||||||
/* AMD MSRC001_0015 Hardware Configuration */
|
/* AMD MSRC001_0015 Hardware Configuration */
|
||||||
u64 msr_hwcr;
|
u64 msr_hwcr;
|
||||||
@ -851,6 +893,16 @@ struct kvm_vcpu_arch {
|
|||||||
|
|
||||||
/* Protected Guests */
|
/* Protected Guests */
|
||||||
bool guest_state_protected;
|
bool guest_state_protected;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set when PDPTS were loaded directly by the userspace without
|
||||||
|
* reading the guest memory
|
||||||
|
*/
|
||||||
|
bool pdptrs_from_userspace;
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_HYPERV)
|
||||||
|
hpa_t hv_root_tdp;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_lpage_info {
|
struct kvm_lpage_info {
|
||||||
@ -1002,7 +1054,7 @@ struct kvm_arch {
|
|||||||
struct kvm_apic_map __rcu *apic_map;
|
struct kvm_apic_map __rcu *apic_map;
|
||||||
atomic_t apic_map_dirty;
|
atomic_t apic_map_dirty;
|
||||||
|
|
||||||
bool apic_access_page_done;
|
bool apic_access_memslot_enabled;
|
||||||
unsigned long apicv_inhibit_reasons;
|
unsigned long apicv_inhibit_reasons;
|
||||||
|
|
||||||
gpa_t wall_clock;
|
gpa_t wall_clock;
|
||||||
@ -1062,11 +1114,19 @@ struct kvm_arch {
|
|||||||
bool exception_payload_enabled;
|
bool exception_payload_enabled;
|
||||||
|
|
||||||
bool bus_lock_detection_enabled;
|
bool bus_lock_detection_enabled;
|
||||||
|
/*
|
||||||
|
* If exit_on_emulation_error is set, and the in-kernel instruction
|
||||||
|
* emulator fails to emulate an instruction, allow userspace
|
||||||
|
* the opportunity to look at it.
|
||||||
|
*/
|
||||||
|
bool exit_on_emulation_error;
|
||||||
|
|
||||||
/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
|
/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
|
||||||
u32 user_space_msr_mask;
|
u32 user_space_msr_mask;
|
||||||
struct kvm_x86_msr_filter __rcu *msr_filter;
|
struct kvm_x86_msr_filter __rcu *msr_filter;
|
||||||
|
|
||||||
|
u32 hypercall_exit_enabled;
|
||||||
|
|
||||||
/* Guest can access the SGX PROVISIONKEY. */
|
/* Guest can access the SGX PROVISIONKEY. */
|
||||||
bool sgx_provisioning_allowed;
|
bool sgx_provisioning_allowed;
|
||||||
|
|
||||||
@ -1124,23 +1184,35 @@ struct kvm_arch {
|
|||||||
*/
|
*/
|
||||||
spinlock_t tdp_mmu_pages_lock;
|
spinlock_t tdp_mmu_pages_lock;
|
||||||
#endif /* CONFIG_X86_64 */
|
#endif /* CONFIG_X86_64 */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If set, rmaps have been allocated for all memslots and should be
|
||||||
|
* allocated for any newly created or modified memslots.
|
||||||
|
*/
|
||||||
|
bool memslots_have_rmaps;
|
||||||
|
|
||||||
|
#if IS_ENABLED(CONFIG_HYPERV)
|
||||||
|
hpa_t hv_root_tdp;
|
||||||
|
spinlock_t hv_root_tdp_lock;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_vm_stat {
|
struct kvm_vm_stat {
|
||||||
ulong mmu_shadow_zapped;
|
struct kvm_vm_stat_generic generic;
|
||||||
ulong mmu_pte_write;
|
u64 mmu_shadow_zapped;
|
||||||
ulong mmu_pde_zapped;
|
u64 mmu_pte_write;
|
||||||
ulong mmu_flooded;
|
u64 mmu_pde_zapped;
|
||||||
ulong mmu_recycled;
|
u64 mmu_flooded;
|
||||||
ulong mmu_cache_miss;
|
u64 mmu_recycled;
|
||||||
ulong mmu_unsync;
|
u64 mmu_cache_miss;
|
||||||
ulong remote_tlb_flush;
|
u64 mmu_unsync;
|
||||||
ulong lpages;
|
u64 lpages;
|
||||||
ulong nx_lpage_splits;
|
u64 nx_lpage_splits;
|
||||||
ulong max_mmu_page_hash_collisions;
|
u64 max_mmu_page_hash_collisions;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_vcpu_stat {
|
struct kvm_vcpu_stat {
|
||||||
|
struct kvm_vcpu_stat_generic generic;
|
||||||
u64 pf_fixed;
|
u64 pf_fixed;
|
||||||
u64 pf_guest;
|
u64 pf_guest;
|
||||||
u64 tlb_flush;
|
u64 tlb_flush;
|
||||||
@ -1154,10 +1226,6 @@ struct kvm_vcpu_stat {
|
|||||||
u64 nmi_window_exits;
|
u64 nmi_window_exits;
|
||||||
u64 l1d_flush;
|
u64 l1d_flush;
|
||||||
u64 halt_exits;
|
u64 halt_exits;
|
||||||
u64 halt_successful_poll;
|
|
||||||
u64 halt_attempted_poll;
|
|
||||||
u64 halt_poll_invalid;
|
|
||||||
u64 halt_wakeup;
|
|
||||||
u64 request_irq_exits;
|
u64 request_irq_exits;
|
||||||
u64 irq_exits;
|
u64 irq_exits;
|
||||||
u64 host_state_reload;
|
u64 host_state_reload;
|
||||||
@ -1168,11 +1236,10 @@ struct kvm_vcpu_stat {
|
|||||||
u64 irq_injections;
|
u64 irq_injections;
|
||||||
u64 nmi_injections;
|
u64 nmi_injections;
|
||||||
u64 req_event;
|
u64 req_event;
|
||||||
u64 halt_poll_success_ns;
|
|
||||||
u64 halt_poll_fail_ns;
|
|
||||||
u64 nested_run;
|
u64 nested_run;
|
||||||
u64 directed_yield_attempted;
|
u64 directed_yield_attempted;
|
||||||
u64 directed_yield_successful;
|
u64 directed_yield_successful;
|
||||||
|
u64 guest_mode;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct x86_instruction_info;
|
struct x86_instruction_info;
|
||||||
@ -1304,8 +1371,10 @@ struct kvm_x86_ops {
|
|||||||
|
|
||||||
bool (*has_wbinvd_exit)(void);
|
bool (*has_wbinvd_exit)(void);
|
||||||
|
|
||||||
/* Returns actual tsc_offset set in active VMCS */
|
u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
|
||||||
u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
|
u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
|
||||||
|
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
|
||||||
|
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Retrieve somewhat arbitrary exit information. Intended to be used
|
* Retrieve somewhat arbitrary exit information. Intended to be used
|
||||||
@ -1363,8 +1432,8 @@ struct kvm_x86_ops {
|
|||||||
void (*setup_mce)(struct kvm_vcpu *vcpu);
|
void (*setup_mce)(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
|
int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
|
||||||
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
|
int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
|
||||||
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
|
int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
|
||||||
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
|
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
|
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
|
||||||
@ -1423,6 +1492,7 @@ struct kvm_arch_async_pf {
|
|||||||
extern u32 __read_mostly kvm_nr_uret_msrs;
|
extern u32 __read_mostly kvm_nr_uret_msrs;
|
||||||
extern u64 __read_mostly host_efer;
|
extern u64 __read_mostly host_efer;
|
||||||
extern bool __read_mostly allow_smaller_maxphyaddr;
|
extern bool __read_mostly allow_smaller_maxphyaddr;
|
||||||
|
extern bool __read_mostly enable_apicv;
|
||||||
extern struct kvm_x86_ops kvm_x86_ops;
|
extern struct kvm_x86_ops kvm_x86_ops;
|
||||||
|
|
||||||
#define KVM_X86_OP(func) \
|
#define KVM_X86_OP(func) \
|
||||||
@ -1463,6 +1533,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
|
|||||||
void kvm_mmu_init_vm(struct kvm *kvm);
|
void kvm_mmu_init_vm(struct kvm *kvm);
|
||||||
void kvm_mmu_uninit_vm(struct kvm *kvm);
|
void kvm_mmu_uninit_vm(struct kvm *kvm);
|
||||||
|
|
||||||
|
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
|
||||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||||
struct kvm_memory_slot *memslot,
|
struct kvm_memory_slot *memslot,
|
||||||
@ -1477,7 +1548,6 @@ unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
|
|||||||
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
|
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
|
||||||
|
|
||||||
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
|
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
|
||||||
bool pdptrs_changed(struct kvm_vcpu *vcpu);
|
|
||||||
|
|
||||||
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||||
const void *val, int bytes);
|
const void *val, int bytes);
|
||||||
@ -1650,6 +1720,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
|
|||||||
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
|
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
|
||||||
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||||
ulong roots_to_free);
|
ulong roots_to_free);
|
||||||
|
void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu);
|
||||||
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
|
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
|
||||||
struct x86_exception *exception);
|
struct x86_exception *exception);
|
||||||
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
|
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
|
||||||
@ -1662,7 +1733,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
|
|||||||
struct x86_exception *exception);
|
struct x86_exception *exception);
|
||||||
|
|
||||||
bool kvm_apicv_activated(struct kvm *kvm);
|
bool kvm_apicv_activated(struct kvm *kvm);
|
||||||
void kvm_apicv_init(struct kvm *kvm, bool enable);
|
|
||||||
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
|
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
|
||||||
void kvm_request_apicv_update(struct kvm *kvm, bool activate,
|
void kvm_request_apicv_update(struct kvm *kvm, bool activate,
|
||||||
unsigned long bit);
|
unsigned long bit);
|
||||||
@ -1675,8 +1745,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
|
|||||||
void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
|
||||||
gva_t gva, hpa_t root_hpa);
|
gva_t gva, hpa_t root_hpa);
|
||||||
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
|
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
|
||||||
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
|
void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
|
||||||
bool skip_mmu_sync);
|
|
||||||
|
|
||||||
void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
|
void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
|
||||||
int tdp_huge_page_level);
|
int tdp_huge_page_level);
|
||||||
@ -1788,8 +1857,10 @@ static inline bool kvm_is_supported_user_return_msr(u32 msr)
|
|||||||
return kvm_find_user_return_msr(msr) >= 0;
|
return kvm_find_user_return_msr(msr) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
|
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio);
|
||||||
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
|
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
|
||||||
|
u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier);
|
||||||
|
u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier);
|
||||||
|
|
||||||
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
|
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
|
||||||
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
|
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
|
||||||
@ -1863,4 +1934,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
|
|||||||
|
|
||||||
int kvm_cpu_dirty_log_size(void);
|
int kvm_cpu_dirty_log_size(void);
|
||||||
|
|
||||||
|
int alloc_all_memslots_rmaps(struct kvm *kvm);
|
||||||
|
|
||||||
#endif /* _ASM_X86_KVM_HOST_H */
|
#endif /* _ASM_X86_KVM_HOST_H */
|
||||||
|
@ -156,6 +156,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
|||||||
u64 avic_physical_id; /* Offset 0xf8 */
|
u64 avic_physical_id; /* Offset 0xf8 */
|
||||||
u8 reserved_7[8];
|
u8 reserved_7[8];
|
||||||
u64 vmsa_pa; /* Used for an SEV-ES guest */
|
u64 vmsa_pa; /* Used for an SEV-ES guest */
|
||||||
|
u8 reserved_8[720];
|
||||||
|
/*
|
||||||
|
* Offset 0x3e0, 32 bytes reserved
|
||||||
|
* for use by hypervisor/software.
|
||||||
|
*/
|
||||||
|
u8 reserved_sw[32];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -314,7 +320,7 @@ struct ghcb {
|
|||||||
|
|
||||||
|
|
||||||
#define EXPECTED_VMCB_SAVE_AREA_SIZE 1032
|
#define EXPECTED_VMCB_SAVE_AREA_SIZE 1032
|
||||||
#define EXPECTED_VMCB_CONTROL_AREA_SIZE 272
|
#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024
|
||||||
#define EXPECTED_GHCB_SIZE PAGE_SIZE
|
#define EXPECTED_GHCB_SIZE PAGE_SIZE
|
||||||
|
|
||||||
static inline void __unused_size_checks(void)
|
static inline void __unused_size_checks(void)
|
||||||
@ -326,7 +332,6 @@ static inline void __unused_size_checks(void)
|
|||||||
|
|
||||||
struct vmcb {
|
struct vmcb {
|
||||||
struct vmcb_control_area control;
|
struct vmcb_control_area control;
|
||||||
u8 reserved_control[1024 - sizeof(struct vmcb_control_area)];
|
|
||||||
struct vmcb_save_area save;
|
struct vmcb_save_area save;
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
|
@ -159,6 +159,19 @@ struct kvm_sregs {
|
|||||||
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
|
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kvm_sregs2 {
|
||||||
|
/* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
|
||||||
|
struct kvm_segment cs, ds, es, fs, gs, ss;
|
||||||
|
struct kvm_segment tr, ldt;
|
||||||
|
struct kvm_dtable gdt, idt;
|
||||||
|
__u64 cr0, cr2, cr3, cr4, cr8;
|
||||||
|
__u64 efer;
|
||||||
|
__u64 apic_base;
|
||||||
|
__u64 flags;
|
||||||
|
__u64 pdptrs[4];
|
||||||
|
};
|
||||||
|
#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1
|
||||||
|
|
||||||
/* for KVM_GET_FPU and KVM_SET_FPU */
|
/* for KVM_GET_FPU and KVM_SET_FPU */
|
||||||
struct kvm_fpu {
|
struct kvm_fpu {
|
||||||
__u8 fpr[8][16];
|
__u8 fpr[8][16];
|
||||||
|
@ -33,6 +33,8 @@
|
|||||||
#define KVM_FEATURE_PV_SCHED_YIELD 13
|
#define KVM_FEATURE_PV_SCHED_YIELD 13
|
||||||
#define KVM_FEATURE_ASYNC_PF_INT 14
|
#define KVM_FEATURE_ASYNC_PF_INT 14
|
||||||
#define KVM_FEATURE_MSI_EXT_DEST_ID 15
|
#define KVM_FEATURE_MSI_EXT_DEST_ID 15
|
||||||
|
#define KVM_FEATURE_HC_MAP_GPA_RANGE 16
|
||||||
|
#define KVM_FEATURE_MIGRATION_CONTROL 17
|
||||||
|
|
||||||
#define KVM_HINTS_REALTIME 0
|
#define KVM_HINTS_REALTIME 0
|
||||||
|
|
||||||
@ -54,6 +56,7 @@
|
|||||||
#define MSR_KVM_POLL_CONTROL 0x4b564d05
|
#define MSR_KVM_POLL_CONTROL 0x4b564d05
|
||||||
#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
|
#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
|
||||||
#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
|
#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
|
||||||
|
#define MSR_KVM_MIGRATION_CONTROL 0x4b564d08
|
||||||
|
|
||||||
struct kvm_steal_time {
|
struct kvm_steal_time {
|
||||||
__u64 steal;
|
__u64 steal;
|
||||||
@ -90,6 +93,16 @@ struct kvm_clock_pairing {
|
|||||||
/* MSR_KVM_ASYNC_PF_INT */
|
/* MSR_KVM_ASYNC_PF_INT */
|
||||||
#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
|
#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
|
||||||
|
|
||||||
|
/* MSR_KVM_MIGRATION_CONTROL */
|
||||||
|
#define KVM_MIGRATION_READY (1 << 0)
|
||||||
|
|
||||||
|
/* KVM_HC_MAP_GPA_RANGE */
|
||||||
|
#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K 0
|
||||||
|
#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M (1 << 0)
|
||||||
|
#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G (1 << 1)
|
||||||
|
#define KVM_MAP_GPA_RANGE_ENC_STAT(n) (n << 4)
|
||||||
|
#define KVM_MAP_GPA_RANGE_ENCRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(1)
|
||||||
|
#define KVM_MAP_GPA_RANGE_DECRYPTED KVM_MAP_GPA_RANGE_ENC_STAT(0)
|
||||||
|
|
||||||
/* Operations for KVM_HC_MMU_OP */
|
/* Operations for KVM_HC_MMU_OP */
|
||||||
#define KVM_MMU_OP_WRITE_PTE 1
|
#define KVM_MMU_OP_WRITE_PTE 1
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user