Drivers: hv: vmbus: Support >64 VPs for a fully enlightened TDX/SNP VM
Don't set *this_cpu_ptr(hyperv_pcpu_input_arg) before the function set_memory_decrypted() returns, otherwise we run into this ticky issue: For a fully enlightened TDX/SNP VM, in hv_common_cpu_init(), *this_cpu_ptr(hyperv_pcpu_input_arg) is an encrypted page before the set_memory_decrypted() returns. When such a VM has more than 64 VPs, if the hyperv_pcpu_input_arg is not NULL, hv_common_cpu_init() -> set_memory_decrypted() -> ... -> cpa_flush() -> on_each_cpu() -> ... -> hv_send_ipi_mask() -> ... -> __send_ipi_mask_ex() tries to call hv_do_rep_hypercall() with the hyperv_pcpu_input_arg as the hypercall input page, which must be a decrypted page in such a VM, but the page is still encrypted at this point, and a fatal fault is triggered. Fix the issue by setting *this_cpu_ptr(hyperv_pcpu_input_arg) after set_memory_decrypted(): if the hyperv_pcpu_input_arg is NULL, __send_ipi_mask_ex() returns HV_STATUS_INVALID_PARAMETER immediately, and hv_send_ipi_mask() falls back to orig_apic.send_IPI_mask(), which can use x2apic_send_IPI_all(), which may be slightly slower than the hypercall but still works correctly in such a VM. Reviewed-by: Michael Kelley <mikelley@microsoft.com> Reviewed-by: Tianyu Lan <tiala@microsoft.com> Signed-off-by: Dexuan Cui <decui@microsoft.com> Signed-off-by: Wei Liu <wei.liu@kernel.org> Link: https://lore.kernel.org/r/20230824080712.30327-6-decui@microsoft.com
This commit is contained in:
parent
0719881bf8
commit
cceb4e0810
@ -360,6 +360,7 @@ int hv_common_cpu_init(unsigned int cpu)
|
|||||||
u64 msr_vp_index;
|
u64 msr_vp_index;
|
||||||
gfp_t flags;
|
gfp_t flags;
|
||||||
int pgcount = hv_root_partition ? 2 : 1;
|
int pgcount = hv_root_partition ? 2 : 1;
|
||||||
|
void *mem;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
|
/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
|
||||||
@ -372,25 +373,40 @@ int hv_common_cpu_init(unsigned int cpu)
|
|||||||
* allocated if this CPU was previously online and then taken offline
|
* allocated if this CPU was previously online and then taken offline
|
||||||
*/
|
*/
|
||||||
if (!*inputarg) {
|
if (!*inputarg) {
|
||||||
*inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
|
mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
|
||||||
if (!(*inputarg))
|
if (!mem)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
if (hv_root_partition) {
|
if (hv_root_partition) {
|
||||||
outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
|
outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
|
||||||
*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
|
*outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hv_isolation_type_en_snp() || hv_isolation_type_tdx()) {
|
if (hv_isolation_type_en_snp() || hv_isolation_type_tdx()) {
|
||||||
ret = set_memory_decrypted((unsigned long)*inputarg, pgcount);
|
ret = set_memory_decrypted((unsigned long)mem, pgcount);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/* It may be unsafe to free *inputarg */
|
/* It may be unsafe to free 'mem' */
|
||||||
*inputarg = NULL;
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(*inputarg, 0x00, pgcount * PAGE_SIZE);
|
memset(mem, 0x00, pgcount * HV_HYP_PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In a fully enlightened TDX/SNP VM with more than 64 VPs, if
|
||||||
|
* hyperv_pcpu_input_arg is not NULL, set_memory_decrypted() ->
|
||||||
|
* ... -> cpa_flush()-> ... -> __send_ipi_mask_ex() tries to
|
||||||
|
* use hyperv_pcpu_input_arg as the hypercall input page, which
|
||||||
|
* must be a decrypted page in such a VM, but the page is still
|
||||||
|
* encrypted before set_memory_decrypted() returns. Fix this by
|
||||||
|
* setting *inputarg after the above set_memory_decrypted(): if
|
||||||
|
* hyperv_pcpu_input_arg is NULL, __send_ipi_mask_ex() returns
|
||||||
|
* HV_STATUS_INVALID_PARAMETER immediately, and the function
|
||||||
|
* hv_send_ipi_mask() falls back to orig_apic.send_IPI_mask(),
|
||||||
|
* which may be slightly slower than the hypercall, but still
|
||||||
|
* works correctly in such a VM.
|
||||||
|
*/
|
||||||
|
*inputarg = mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
|
msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user