linux/arch/riscv/include/asm/kvm_host.h
Linus Torvalds e8069f5a8e ARM64:
* Eager page splitting optimization for dirty logging, optionally
   allowing for a VM to avoid the cost of hugepage splitting in the stage-2
   fault path.
 
 * Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact with
   services that live in the Secure world. pKVM intervenes on FF-A calls
   to guarantee the host doesn't misuse memory donated to the hyp or a
   pKVM guest.
 
 * Support for running the split hypervisor with VHE enabled, known as
   'hVHE' mode. This is extremely useful for testing the split
   hypervisor on VHE-only systems, and paves the way for new use cases
   that depend on having two TTBRs available at EL2.
 
 * Generalized framework for configurable ID registers from userspace.
   KVM/arm64 currently prevents arbitrary CPU feature set configuration
   from userspace, but the intent is to relax this limitation and allow
   userspace to select a feature set consistent with the CPU.
 
 * Enable the use of Branch Target Identification (FEAT_BTI) in the
   hypervisor.
 
 * Use a separate set of pointer authentication keys for the hypervisor
   when running in protected mode, as the host is untrusted at runtime.
 
 * Ensure timer IRQs are consistently released in the init failure
   paths.
 
 * Avoid trapping CTR_EL0 on systems with Enhanced Virtualization Traps
   (FEAT_EVT), as it is a register commonly read from userspace.
 
 * Erratum workaround for the upcoming AmpereOne part, which has broken
   hardware A/D state management.
 
 RISC-V:
 
 * Redirect AMO load/store misaligned traps to KVM guest
 
 * Trap-n-emulate AIA in-kernel irqchip for KVM guest
 
 * Svnapot support for KVM Guest
 
 s390:
 
 * New uvdevice secret API
 
 * CMM selftest and fixes
 
 * fix racy access to target CPU for diag 9c
 
 x86:
 
 * Fix missing/incorrect #GP checks on ENCLS
 
 * Use standard mmu_notifier hooks for handling APIC access page
 
 * Drop now unnecessary TR/TSS load after VM-Exit on AMD
 
 * Print more descriptive information about the status of SEV and SEV-ES during
   module load
 
 * Add a test for splitting and reconstituting hugepages during and after
   dirty logging
 
 * Add support for CPU pinning in demand paging test
 
 * Add support for AMD PerfMonV2, with a variety of cleanups and minor fixes
   included along the way
 
 * Add a "nx_huge_pages=never" option to effectively avoid creating NX hugepage
   recovery threads (because nx_huge_pages=off can be toggled at runtime)
 
 * Move handling of PAT out of MTRR code and dedup SVM+VMX code
 
 * Fix output of PIC poll command emulation when there's an interrupt
 
 * Add a maintainer's handbook to document KVM x86 processes, preferred coding
   style, testing expectations, etc.
 
 * Misc cleanups, fixes and comments
 
 Generic:
 
 * Miscellaneous bugfixes and cleanups
 
 Selftests:
 
 * Generate dependency files so that partial rebuilds work as expected
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmSgHrIUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroORcAf+KkBlXwQMf+Q0Hy6Mfe0OtkKmh0Ae
 6HJ6dsuMfOHhWv5kgukh+qvuGUGzHq+gpVKmZg2yP3h3cLHOLUAYMCDm+rjXyjsk
 F4DbnJLfxq43Pe9PHRKFxxSecRcRYCNox0GD5UYL4PLKcH0FyfQrV+HVBK+GI8L3
 FDzUcyJkR12Lcj1qf++7fsbzfOshL0AJPmidQCoc6wkLJpUEr/nYUqlI1Kx3YNuQ
 LKmxFHS4l4/O/px3GKNDrLWDbrVlwciGIa3GZLS52PZdW3mAqT+cqcPcYK6SW71P
 m1vE80VbNELX5q3YSRoOXtedoZ3Pk97LEmz/xQAsJ/jri0Z5Syk0Ok0m/Q==
 =AMXp
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm updates from Paolo Bonzini:
 "ARM64:

   - Eager page splitting optimization for dirty logging, optionally
     allowing for a VM to avoid the cost of hugepage splitting in the
     stage-2 fault path.

   - Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact
     with services that live in the Secure world. pKVM intervenes on
     FF-A calls to guarantee the host doesn't misuse memory donated to
     the hyp or a pKVM guest.

   - Support for running the split hypervisor with VHE enabled, known as
     'hVHE' mode. This is extremely useful for testing the split
     hypervisor on VHE-only systems, and paves the way for new use cases
     that depend on having two TTBRs available at EL2.

   - Generalized framework for configurable ID registers from userspace.
     KVM/arm64 currently prevents arbitrary CPU feature set
     configuration from userspace, but the intent is to relax this
     limitation and allow userspace to select a feature set consistent
     with the CPU.

   - Enable the use of Branch Target Identification (FEAT_BTI) in the
     hypervisor.

   - Use a separate set of pointer authentication keys for the
     hypervisor when running in protected mode, as the host is untrusted
     at runtime.

   - Ensure timer IRQs are consistently released in the init failure
     paths.

   - Avoid trapping CTR_EL0 on systems with Enhanced Virtualization
     Traps (FEAT_EVT), as it is a register commonly read from userspace.

   - Erratum workaround for the upcoming AmpereOne part, which has
     broken hardware A/D state management.

  RISC-V:

   - Redirect AMO load/store misaligned traps to KVM guest

   - Trap-n-emulate AIA in-kernel irqchip for KVM guest

   - Svnapot support for KVM Guest

  s390:

   - New uvdevice secret API

   - CMM selftest and fixes

   - fix racy access to target CPU for diag 9c

  x86:

   - Fix missing/incorrect #GP checks on ENCLS

   - Use standard mmu_notifier hooks for handling APIC access page

   - Drop now unnecessary TR/TSS load after VM-Exit on AMD

   - Print more descriptive information about the status of SEV and
     SEV-ES during module load

   - Add a test for splitting and reconstituting hugepages during and
     after dirty logging

   - Add support for CPU pinning in demand paging test

   - Add support for AMD PerfMonV2, with a variety of cleanups and minor
     fixes included along the way

   - Add a "nx_huge_pages=never" option to effectively avoid creating NX
     hugepage recovery threads (because nx_huge_pages=off can be toggled
     at runtime)

   - Move handling of PAT out of MTRR code and dedup SVM+VMX code

   - Fix output of PIC poll command emulation when there's an interrupt

   - Add a maintainer's handbook to document KVM x86 processes,
     preferred coding style, testing expectations, etc.

   - Misc cleanups, fixes and comments

  Generic:

   - Miscellaneous bugfixes and cleanups

  Selftests:

   - Generate dependency files so that partial rebuilds work as
     expected"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (153 commits)
  Documentation/process: Add a maintainer handbook for KVM x86
  Documentation/process: Add a label for the tip tree handbook's coding style
  KVM: arm64: Fix misuse of KVM_ARM_VCPU_POWER_OFF bit index
  RISC-V: KVM: Remove unneeded semicolon
  RISC-V: KVM: Allow Svnapot extension for Guest/VM
  riscv: kvm: define vcpu_sbi_ext_pmu in header
  RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip
  RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC
  RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip
  RISC-V: KVM: Add in-kernel emulation of AIA APLIC
  RISC-V: KVM: Implement device interface for AIA irqchip
  RISC-V: KVM: Skeletal in-kernel AIA irqchip support
  RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero
  RISC-V: KVM: Add APLIC related defines
  RISC-V: KVM: Add IMSIC related defines
  RISC-V: KVM: Implement guest external interrupt line management
  KVM: x86: Remove PRIx* definitions as they are solely for user space
  s390/uv: Update query for secret-UVCs
  s390/uv: replace scnprintf with sysfs_emit
  s390/uvdevice: Add 'Lock Secret Store' UVC
  ...
2023-07-03 15:32:22 -07:00

349 lines
9.6 KiB
C

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
*
* Authors:
* Anup Patel <anup.patel@wdc.com>
*/
#ifndef __RISCV_KVM_HOST_H__
#define __RISCV_KVM_HOST_H__
#include <linux/types.h>
#include <linux/kvm.h>
#include <linux/kvm_types.h>
#include <linux/spinlock.h>
#include <asm/hwcap.h>
#include <asm/kvm_aia.h>
#include <asm/ptrace.h>
#include <asm/kvm_vcpu_fp.h>
#include <asm/kvm_vcpu_insn.h>
#include <asm/kvm_vcpu_sbi.h>
#include <asm/kvm_vcpu_timer.h>
#include <asm/kvm_vcpu_pmu.h>
#define KVM_MAX_VCPUS 1024
#define KVM_HALT_POLL_NS_DEFAULT 500000
#define KVM_VCPU_MAX_FEATURES 0
#define KVM_IRQCHIP_NUM_PINS 1024
#define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
#define KVM_REQ_UPDATE_HGATP KVM_ARCH_REQ(2)
#define KVM_REQ_FENCE_I \
KVM_ARCH_REQ_FLAGS(3, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_HFENCE_GVMA_VMID_ALL KVM_REQ_TLB_FLUSH
#define KVM_REQ_HFENCE_VVMA_ALL \
KVM_ARCH_REQ_FLAGS(4, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_HFENCE \
KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
enum kvm_riscv_hfence_type {
KVM_RISCV_HFENCE_UNKNOWN = 0,
KVM_RISCV_HFENCE_GVMA_VMID_GPA,
KVM_RISCV_HFENCE_VVMA_ASID_GVA,
KVM_RISCV_HFENCE_VVMA_ASID_ALL,
KVM_RISCV_HFENCE_VVMA_GVA,
};
struct kvm_riscv_hfence {
enum kvm_riscv_hfence_type type;
unsigned long asid;
unsigned long order;
gpa_t addr;
gpa_t size;
};
#define KVM_RISCV_VCPU_MAX_HFENCE 64
struct kvm_vm_stat {
struct kvm_vm_stat_generic generic;
};
struct kvm_vcpu_stat {
struct kvm_vcpu_stat_generic generic;
u64 ecall_exit_stat;
u64 wfi_exit_stat;
u64 mmio_exit_user;
u64 mmio_exit_kernel;
u64 csr_exit_user;
u64 csr_exit_kernel;
u64 signal_exits;
u64 exits;
};
struct kvm_arch_memory_slot {
};
struct kvm_vmid {
/*
* Writes to vmid_version and vmid happen with vmid_lock held
* whereas reads happen without any lock held.
*/
unsigned long vmid_version;
unsigned long vmid;
};
struct kvm_arch {
/* G-stage vmid */
struct kvm_vmid vmid;
/* G-stage page table */
pgd_t *pgd;
phys_addr_t pgd_phys;
/* Guest Timer */
struct kvm_guest_timer timer;
/* AIA Guest/VM context */
struct kvm_aia aia;
};
struct kvm_cpu_trap {
unsigned long sepc;
unsigned long scause;
unsigned long stval;
unsigned long htval;
unsigned long htinst;
};
struct kvm_cpu_context {
unsigned long zero;
unsigned long ra;
unsigned long sp;
unsigned long gp;
unsigned long tp;
unsigned long t0;
unsigned long t1;
unsigned long t2;
unsigned long s0;
unsigned long s1;
unsigned long a0;
unsigned long a1;
unsigned long a2;
unsigned long a3;
unsigned long a4;
unsigned long a5;
unsigned long a6;
unsigned long a7;
unsigned long s2;
unsigned long s3;
unsigned long s4;
unsigned long s5;
unsigned long s6;
unsigned long s7;
unsigned long s8;
unsigned long s9;
unsigned long s10;
unsigned long s11;
unsigned long t3;
unsigned long t4;
unsigned long t5;
unsigned long t6;
unsigned long sepc;
unsigned long sstatus;
unsigned long hstatus;
union __riscv_fp_state fp;
struct __riscv_v_ext_state vector;
};
struct kvm_vcpu_csr {
unsigned long vsstatus;
unsigned long vsie;
unsigned long vstvec;
unsigned long vsscratch;
unsigned long vsepc;
unsigned long vscause;
unsigned long vstval;
unsigned long hvip;
unsigned long vsatp;
unsigned long scounteren;
};
struct kvm_vcpu_arch {
/* VCPU ran at least once */
bool ran_atleast_once;
/* Last Host CPU on which Guest VCPU exited */
int last_exit_cpu;
/* ISA feature bits (similar to MISA) */
DECLARE_BITMAP(isa, RISCV_ISA_EXT_MAX);
/* Vendor, Arch, and Implementation details */
unsigned long mvendorid;
unsigned long marchid;
unsigned long mimpid;
/* SSCRATCH, STVEC, and SCOUNTEREN of Host */
unsigned long host_sscratch;
unsigned long host_stvec;
unsigned long host_scounteren;
/* CPU context of Host */
struct kvm_cpu_context host_context;
/* CPU context of Guest VCPU */
struct kvm_cpu_context guest_context;
/* CPU CSR context of Guest VCPU */
struct kvm_vcpu_csr guest_csr;
/* CPU context upon Guest VCPU reset */
struct kvm_cpu_context guest_reset_context;
/* CPU CSR context upon Guest VCPU reset */
struct kvm_vcpu_csr guest_reset_csr;
/*
* VCPU interrupts
*
* We have a lockless approach for tracking pending VCPU interrupts
* implemented using atomic bitops. The irqs_pending bitmap represent
* pending interrupts whereas irqs_pending_mask represent bits changed
* in irqs_pending. Our approach is modeled around multiple producer
* and single consumer problem where the consumer is the VCPU itself.
*/
#define KVM_RISCV_VCPU_NR_IRQS 64
DECLARE_BITMAP(irqs_pending, KVM_RISCV_VCPU_NR_IRQS);
DECLARE_BITMAP(irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS);
/* VCPU Timer */
struct kvm_vcpu_timer timer;
/* HFENCE request queue */
spinlock_t hfence_lock;
unsigned long hfence_head;
unsigned long hfence_tail;
struct kvm_riscv_hfence hfence_queue[KVM_RISCV_VCPU_MAX_HFENCE];
/* MMIO instruction details */
struct kvm_mmio_decode mmio_decode;
/* CSR instruction details */
struct kvm_csr_decode csr_decode;
/* SBI context */
struct kvm_vcpu_sbi_context sbi_context;
/* AIA VCPU context */
struct kvm_vcpu_aia aia_context;
/* Cache pages needed to program page tables with spinlock held */
struct kvm_mmu_memory_cache mmu_page_cache;
/* VCPU power-off state */
bool power_off;
/* Don't run the VCPU (blocked) */
bool pause;
/* Performance monitoring context */
struct kvm_pmu pmu_context;
};
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
#define KVM_ARCH_WANT_MMU_NOTIFIER
#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
gpa_t gpa, gpa_t gpsz,
unsigned long order);
void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid);
void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz,
unsigned long order);
void kvm_riscv_local_hfence_gvma_all(void);
void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid,
unsigned long asid,
unsigned long gva,
unsigned long gvsz,
unsigned long order);
void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid,
unsigned long asid);
void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid,
unsigned long gva, unsigned long gvsz,
unsigned long order);
void kvm_riscv_local_hfence_vvma_all(unsigned long vmid);
void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu);
void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu);
void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu);
void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu);
void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu);
void kvm_riscv_fence_i(struct kvm *kvm,
unsigned long hbase, unsigned long hmask);
void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm,
unsigned long hbase, unsigned long hmask,
gpa_t gpa, gpa_t gpsz,
unsigned long order);
void kvm_riscv_hfence_gvma_vmid_all(struct kvm *kvm,
unsigned long hbase, unsigned long hmask);
void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm,
unsigned long hbase, unsigned long hmask,
unsigned long gva, unsigned long gvsz,
unsigned long order, unsigned long asid);
void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm,
unsigned long hbase, unsigned long hmask,
unsigned long asid);
void kvm_riscv_hfence_vvma_gva(struct kvm *kvm,
unsigned long hbase, unsigned long hmask,
unsigned long gva, unsigned long gvsz,
unsigned long order);
void kvm_riscv_hfence_vvma_all(struct kvm *kvm,
unsigned long hbase, unsigned long hmask);
int kvm_riscv_gstage_ioremap(struct kvm *kvm, gpa_t gpa,
phys_addr_t hpa, unsigned long size,
bool writable, bool in_atomic);
void kvm_riscv_gstage_iounmap(struct kvm *kvm, gpa_t gpa,
unsigned long size);
int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot,
gpa_t gpa, unsigned long hva, bool is_write);
int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm);
void kvm_riscv_gstage_free_pgd(struct kvm *kvm);
void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu);
void __init kvm_riscv_gstage_mode_detect(void);
unsigned long __init kvm_riscv_gstage_mode(void);
int kvm_riscv_gstage_gpa_bits(void);
void __init kvm_riscv_gstage_vmid_detect(void);
unsigned long kvm_riscv_gstage_vmid_bits(void);
int kvm_riscv_gstage_vmid_init(struct kvm *kvm);
bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid);
void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu);
int kvm_riscv_setup_default_irq_routing(struct kvm *kvm, u32 lines);
void __kvm_riscv_unpriv_trap(void);
unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
bool read_insn,
unsigned long guest_addr,
struct kvm_cpu_trap *trap);
void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
struct kvm_cpu_trap *trap);
int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_cpu_trap *trap);
void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
#endif /* __RISCV_KVM_HOST_H__ */