kvm: x86: Add support for getting/setting expanded xstate buffer
With KVM_CAP_XSAVE, userspace uses a hardcoded 4KB buffer to get/set xstate data from/to KVM. This doesn't work when dynamic xfeatures (e.g. AMX) are exposed to the guest as they require a larger buffer size. Introduce a new capability (KVM_CAP_XSAVE2). Userspace VMM gets the required xstate buffer size via KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2). KVM_SET_XSAVE is extended to work with both legacy and new capabilities by doing properly-sized memdup_user() based on the guest fpu container. KVM_GET_XSAVE is kept for backward-compatible reason. Instead, KVM_GET_XSAVE2 is introduced under KVM_CAP_XSAVE2 as the preferred interface for getting xstate buffer (4KB or larger size) from KVM (Link: https://lkml.org/lkml/2021/12/15/510) Also, update the api doc with the new KVM_GET_XSAVE2 ioctl. Signed-off-by: Guang Zeng <guang.zeng@intel.com> Signed-off-by: Wei Wang <wei.w.wang@intel.com> Signed-off-by: Jing Liu <jing2.liu@intel.com> Signed-off-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Yang Zhong <yang.zhong@intel.com> Message-Id: <20220105123532.12586-19-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
c60427dd50
commit
be50b2065d
@ -1569,6 +1569,7 @@ otherwise it will return EBUSY error.
|
|||||||
|
|
||||||
struct kvm_xsave {
|
struct kvm_xsave {
|
||||||
__u32 region[1024];
|
__u32 region[1024];
|
||||||
|
__u32 extra[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
This ioctl would copy current vcpu's xsave struct to the userspace.
|
This ioctl would copy current vcpu's xsave struct to the userspace.
|
||||||
@ -1577,7 +1578,7 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
|
|||||||
4.43 KVM_SET_XSAVE
|
4.43 KVM_SET_XSAVE
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
:Capability: KVM_CAP_XSAVE
|
:Capability: KVM_CAP_XSAVE and KVM_CAP_XSAVE2
|
||||||
:Architectures: x86
|
:Architectures: x86
|
||||||
:Type: vcpu ioctl
|
:Type: vcpu ioctl
|
||||||
:Parameters: struct kvm_xsave (in)
|
:Parameters: struct kvm_xsave (in)
|
||||||
@ -1588,9 +1589,18 @@ This ioctl would copy current vcpu's xsave struct to the userspace.
|
|||||||
|
|
||||||
struct kvm_xsave {
|
struct kvm_xsave {
|
||||||
__u32 region[1024];
|
__u32 region[1024];
|
||||||
|
__u32 extra[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
This ioctl would copy userspace's xsave struct to the kernel.
|
This ioctl would copy userspace's xsave struct to the kernel. It copies
|
||||||
|
as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2),
|
||||||
|
when invoked on the vm file descriptor. The size value returned by
|
||||||
|
KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
|
||||||
|
Currently, it is only greater than 4096 if a dynamic feature has been
|
||||||
|
enabled with ``arch_prctl()``, but this may change in the future.
|
||||||
|
|
||||||
|
The offsets of the state save areas in struct kvm_xsave follow the
|
||||||
|
contents of CPUID leaf 0xD on the host.
|
||||||
|
|
||||||
|
|
||||||
4.44 KVM_GET_XCRS
|
4.44 KVM_GET_XCRS
|
||||||
@ -5535,6 +5545,34 @@ the trailing ``'\0'``, is indicated by ``name_size`` in the header.
|
|||||||
The Stats Data block contains an array of 64-bit values in the same order
|
The Stats Data block contains an array of 64-bit values in the same order
|
||||||
as the descriptors in Descriptors block.
|
as the descriptors in Descriptors block.
|
||||||
|
|
||||||
|
4.42 KVM_GET_XSAVE2
|
||||||
|
------------------
|
||||||
|
|
||||||
|
:Capability: KVM_CAP_XSAVE2
|
||||||
|
:Architectures: x86
|
||||||
|
:Type: vcpu ioctl
|
||||||
|
:Parameters: struct kvm_xsave (out)
|
||||||
|
:Returns: 0 on success, -1 on error
|
||||||
|
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
struct kvm_xsave {
|
||||||
|
__u32 region[1024];
|
||||||
|
__u32 extra[0];
|
||||||
|
};
|
||||||
|
|
||||||
|
This ioctl would copy current vcpu's xsave struct to the userspace. It
|
||||||
|
copies as many bytes as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
|
||||||
|
when invoked on the vm file descriptor. The size value returned by
|
||||||
|
KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) will always be at least 4096.
|
||||||
|
Currently, it is only greater than 4096 if a dynamic feature has been
|
||||||
|
enabled with ``arch_prctl()``, but this may change in the future.
|
||||||
|
|
||||||
|
The offsets of the state save areas in struct kvm_xsave follow the contents
|
||||||
|
of CPUID leaf 0xD on the host.
|
||||||
|
|
||||||
|
|
||||||
5. The kvm_run structure
|
5. The kvm_run structure
|
||||||
========================
|
========================
|
||||||
|
|
||||||
|
@ -373,9 +373,23 @@ struct kvm_debugregs {
|
|||||||
__u64 reserved[9];
|
__u64 reserved[9];
|
||||||
};
|
};
|
||||||
|
|
||||||
/* for KVM_CAP_XSAVE */
|
/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
|
||||||
struct kvm_xsave {
|
struct kvm_xsave {
|
||||||
|
/*
|
||||||
|
* KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
|
||||||
|
* as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
|
||||||
|
* respectively, when invoked on the vm file descriptor.
|
||||||
|
*
|
||||||
|
* The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
|
||||||
|
* will always be at least 4096. Currently, it is only greater
|
||||||
|
* than 4096 if a dynamic feature has been enabled with
|
||||||
|
* ``arch_prctl()``, but this may change in the future.
|
||||||
|
*
|
||||||
|
* The offsets of the state save areas in struct kvm_xsave follow
|
||||||
|
* the contents of CPUID leaf 0xD on the host.
|
||||||
|
*/
|
||||||
__u32 region[1024];
|
__u32 region[1024];
|
||||||
|
__u32 extra[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
#define KVM_MAX_XCRS 16
|
#define KVM_MAX_XCRS 16
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
|
u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
|
||||||
EXPORT_SYMBOL_GPL(kvm_cpu_caps);
|
EXPORT_SYMBOL_GPL(kvm_cpu_caps);
|
||||||
|
|
||||||
static u32 xstate_required_size(u64 xstate_bv, bool compacted)
|
u32 xstate_required_size(u64 xstate_bv, bool compacted)
|
||||||
{
|
{
|
||||||
int feature_bit = 0;
|
int feature_bit = 0;
|
||||||
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
|
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
|
||||||
|
@ -30,6 +30,8 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
|
|||||||
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
|
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
|
||||||
u32 *ecx, u32 *edx, bool exact_only);
|
u32 *ecx, u32 *edx, bool exact_only);
|
||||||
|
|
||||||
|
u32 xstate_required_size(u64 xstate_bv, bool compacted);
|
||||||
|
|
||||||
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
|
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
|
||||||
u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu);
|
u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
|
@ -4314,6 +4314,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||||||
else
|
else
|
||||||
r = 0;
|
r = 0;
|
||||||
break;
|
break;
|
||||||
|
case KVM_CAP_XSAVE2: {
|
||||||
|
u64 guest_perm = xstate_get_guest_group_perm();
|
||||||
|
|
||||||
|
r = xstate_required_size(supported_xcr0 & guest_perm, false);
|
||||||
|
if (r < sizeof(struct kvm_xsave))
|
||||||
|
r = sizeof(struct kvm_xsave);
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -4917,6 +4925,16 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
|
|||||||
vcpu->arch.pkru);
|
vcpu->arch.pkru);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
|
||||||
|
u8 *state, unsigned int size)
|
||||||
|
{
|
||||||
|
if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
|
||||||
|
return;
|
||||||
|
|
||||||
|
fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
|
||||||
|
state, size, vcpu->arch.pkru);
|
||||||
|
}
|
||||||
|
|
||||||
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
|
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
|
||||||
struct kvm_xsave *guest_xsave)
|
struct kvm_xsave *guest_xsave)
|
||||||
{
|
{
|
||||||
@ -5370,6 +5388,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case KVM_GET_XSAVE: {
|
case KVM_GET_XSAVE: {
|
||||||
|
r = -EINVAL;
|
||||||
|
if (vcpu->arch.guest_fpu.uabi_size > sizeof(struct kvm_xsave))
|
||||||
|
break;
|
||||||
|
|
||||||
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
|
u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
if (!u.xsave)
|
if (!u.xsave)
|
||||||
@ -5384,7 +5406,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case KVM_SET_XSAVE: {
|
case KVM_SET_XSAVE: {
|
||||||
u.xsave = memdup_user(argp, sizeof(*u.xsave));
|
int size = vcpu->arch.guest_fpu.uabi_size;
|
||||||
|
|
||||||
|
u.xsave = memdup_user(argp, size);
|
||||||
if (IS_ERR(u.xsave)) {
|
if (IS_ERR(u.xsave)) {
|
||||||
r = PTR_ERR(u.xsave);
|
r = PTR_ERR(u.xsave);
|
||||||
goto out_nofree;
|
goto out_nofree;
|
||||||
@ -5393,6 +5417,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
|
|||||||
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
|
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case KVM_GET_XSAVE2: {
|
||||||
|
int size = vcpu->arch.guest_fpu.uabi_size;
|
||||||
|
|
||||||
|
u.xsave = kzalloc(size, GFP_KERNEL_ACCOUNT);
|
||||||
|
r = -ENOMEM;
|
||||||
|
if (!u.xsave)
|
||||||
|
break;
|
||||||
|
|
||||||
|
kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
|
||||||
|
|
||||||
|
r = -EFAULT;
|
||||||
|
if (copy_to_user(argp, u.xsave, size))
|
||||||
|
break;
|
||||||
|
|
||||||
|
r = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case KVM_GET_XCRS: {
|
case KVM_GET_XCRS: {
|
||||||
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
|
u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
|
@ -1132,6 +1132,7 @@ struct kvm_ppc_resize_hpt {
|
|||||||
#define KVM_CAP_ARM_MTE 205
|
#define KVM_CAP_ARM_MTE 205
|
||||||
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
|
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
|
||||||
#define KVM_CAP_VM_GPA_BITS 207
|
#define KVM_CAP_VM_GPA_BITS 207
|
||||||
|
#define KVM_CAP_XSAVE2 208
|
||||||
|
|
||||||
#ifdef KVM_CAP_IRQ_ROUTING
|
#ifdef KVM_CAP_IRQ_ROUTING
|
||||||
|
|
||||||
@ -1622,6 +1623,9 @@ struct kvm_enc_region {
|
|||||||
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
|
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
|
||||||
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
|
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
|
||||||
|
|
||||||
|
/* Available with KVM_CAP_XSAVE2 */
|
||||||
|
#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
|
||||||
|
|
||||||
struct kvm_s390_pv_sec_parm {
|
struct kvm_s390_pv_sec_parm {
|
||||||
__u64 origin;
|
__u64 origin;
|
||||||
__u64 length;
|
__u64 length;
|
||||||
|
Loading…
Reference in New Issue
Block a user