2020-03-31 12:17:38 -04:00
// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel - based Virtual Machine driver for Linux
*
* AMD SVM support
*
* Copyright ( C ) 2006 Qumranet , Inc .
* Copyright 2010 Red Hat , Inc . and / or its affiliates .
*
* Authors :
* Yaniv Kamay < yaniv @ qumranet . com >
* Avi Kivity < avi @ qumranet . com >
*/
# define pr_fmt(fmt) "SVM: " fmt
# include <linux/kvm_types.h>
# include <linux/hashtable.h>
# include <linux/amd-iommu.h>
# include <linux/kvm_host.h>
# include <asm/irq_remapping.h>
# include "trace.h"
# include "lapic.h"
# include "x86.h"
# include "irq.h"
# include "svm.h"
/* enable / disable AVIC */
int avic ;
# ifdef CONFIG_X86_LOCAL_APIC
module_param ( avic , int , S_IRUGO ) ;
# endif
# define SVM_AVIC_DOORBELL 0xc001011b
# define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
/*
* 0xff is broadcast , so the max index allowed for physical APIC ID
* table is 0xfe . APIC IDs above 0xff are reserved .
*/
# define AVIC_MAX_PHYSICAL_ID_COUNT 255
# define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
# define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
# define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
/* AVIC GATAG is encoded using VM and VCPU IDs */
# define AVIC_VCPU_ID_BITS 8
# define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
# define AVIC_VM_ID_BITS 24
# define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
# define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
# define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
( y & AVIC_VCPU_ID_MASK ) )
# define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
# define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
/* Note:
* This hash table is used to map VM_ID to a struct kvm_svm ,
* when handling AMD IOMMU GALOG notification to schedule in
* a particular vCPU .
*/
# define SVM_VM_DATA_HASH_BITS 8
static DEFINE_HASHTABLE ( svm_vm_data_hash , SVM_VM_DATA_HASH_BITS ) ;
static u32 next_vm_id = 0 ;
static bool next_vm_id_wrapped = 0 ;
static DEFINE_SPINLOCK ( svm_vm_data_hash_lock ) ;
/*
* This is a wrapper of struct amd_iommu_ir_data .
*/
struct amd_svm_iommu_ir {
struct list_head node ; /* Used by SVM for per-vcpu ir_list */
void * data ; /* Storing pointer to struct amd_ir_data */
} ;
enum avic_ipi_failure_cause {
AVIC_IPI_FAILURE_INVALID_INT_TYPE ,
AVIC_IPI_FAILURE_TARGET_NOT_RUNNING ,
AVIC_IPI_FAILURE_INVALID_TARGET ,
AVIC_IPI_FAILURE_INVALID_BACKING_PAGE ,
} ;
/* Note:
* This function is called from IOMMU driver to notify
* SVM to schedule in a particular vCPU of a particular VM .
*/
int avic_ga_log_notifier ( u32 ga_tag )
{
unsigned long flags ;
struct kvm_svm * kvm_svm ;
struct kvm_vcpu * vcpu = NULL ;
u32 vm_id = AVIC_GATAG_TO_VMID ( ga_tag ) ;
u32 vcpu_id = AVIC_GATAG_TO_VCPUID ( ga_tag ) ;
pr_debug ( " SVM: %s: vm_id=%#x, vcpu_id=%#x \n " , __func__ , vm_id , vcpu_id ) ;
trace_kvm_avic_ga_log ( vm_id , vcpu_id ) ;
spin_lock_irqsave ( & svm_vm_data_hash_lock , flags ) ;
hash_for_each_possible ( svm_vm_data_hash , kvm_svm , hnode , vm_id ) {
if ( kvm_svm - > avic_vm_id ! = vm_id )
continue ;
vcpu = kvm_get_vcpu_by_id ( & kvm_svm - > kvm , vcpu_id ) ;
break ;
}
spin_unlock_irqrestore ( & svm_vm_data_hash_lock , flags ) ;
/* Note:
* At this point , the IOMMU should have already set the pending
* bit in the vAPIC backing page . So , we just need to schedule
* in the vcpu .
*/
if ( vcpu )
kvm_vcpu_wake_up ( vcpu ) ;
return 0 ;
}
void avic_vm_destroy ( struct kvm * kvm )
{
unsigned long flags ;
struct kvm_svm * kvm_svm = to_kvm_svm ( kvm ) ;
if ( ! avic )
return ;
if ( kvm_svm - > avic_logical_id_table_page )
__free_page ( kvm_svm - > avic_logical_id_table_page ) ;
if ( kvm_svm - > avic_physical_id_table_page )
__free_page ( kvm_svm - > avic_physical_id_table_page ) ;
spin_lock_irqsave ( & svm_vm_data_hash_lock , flags ) ;
hash_del ( & kvm_svm - > hnode ) ;
spin_unlock_irqrestore ( & svm_vm_data_hash_lock , flags ) ;
}
int avic_vm_init ( struct kvm * kvm )
{
unsigned long flags ;
int err = - ENOMEM ;
struct kvm_svm * kvm_svm = to_kvm_svm ( kvm ) ;
struct kvm_svm * k2 ;
struct page * p_page ;
struct page * l_page ;
u32 vm_id ;
if ( ! avic )
return 0 ;
/* Allocating physical APIC ID table (4KB) */
2020-09-16 16:36:21 +08:00
p_page = alloc_page ( GFP_KERNEL_ACCOUNT | __GFP_ZERO ) ;
2020-03-31 12:17:38 -04:00
if ( ! p_page )
goto free_avic ;
kvm_svm - > avic_physical_id_table_page = p_page ;
/* Allocating logical APIC ID table (4KB) */
2020-09-16 16:36:21 +08:00
l_page = alloc_page ( GFP_KERNEL_ACCOUNT | __GFP_ZERO ) ;
2020-03-31 12:17:38 -04:00
if ( ! l_page )
goto free_avic ;
kvm_svm - > avic_logical_id_table_page = l_page ;
spin_lock_irqsave ( & svm_vm_data_hash_lock , flags ) ;
again :
vm_id = next_vm_id = ( next_vm_id + 1 ) & AVIC_VM_ID_MASK ;
if ( vm_id = = 0 ) { /* id is 1-based, zero is not okay */
next_vm_id_wrapped = 1 ;
goto again ;
}
/* Is it still in use? Only possible if wrapped at least once */
if ( next_vm_id_wrapped ) {
hash_for_each_possible ( svm_vm_data_hash , k2 , hnode , vm_id ) {
if ( k2 - > avic_vm_id = = vm_id )
goto again ;
}
}
kvm_svm - > avic_vm_id = vm_id ;
hash_add ( svm_vm_data_hash , & kvm_svm - > hnode , kvm_svm - > avic_vm_id ) ;
spin_unlock_irqrestore ( & svm_vm_data_hash_lock , flags ) ;
return 0 ;
free_avic :
avic_vm_destroy ( kvm ) ;
return err ;
}
void avic_init_vmcb ( struct vcpu_svm * svm )
{
struct vmcb * vmcb = svm - > vmcb ;
struct kvm_svm * kvm_svm = to_kvm_svm ( svm - > vcpu . kvm ) ;
phys_addr_t bpa = __sme_set ( page_to_phys ( svm - > avic_backing_page ) ) ;
phys_addr_t lpa = __sme_set ( page_to_phys ( kvm_svm - > avic_logical_id_table_page ) ) ;
phys_addr_t ppa = __sme_set ( page_to_phys ( kvm_svm - > avic_physical_id_table_page ) ) ;
vmcb - > control . avic_backing_page = bpa & AVIC_HPA_MASK ;
vmcb - > control . avic_logical_id = lpa & AVIC_HPA_MASK ;
vmcb - > control . avic_physical_id = ppa & AVIC_HPA_MASK ;
vmcb - > control . avic_physical_id | = AVIC_MAX_PHYSICAL_ID_COUNT ;
if ( kvm_apicv_activated ( svm - > vcpu . kvm ) )
vmcb - > control . int_ctl | = AVIC_ENABLE_MASK ;
else
vmcb - > control . int_ctl & = ~ AVIC_ENABLE_MASK ;
}
static u64 * avic_get_physical_id_entry ( struct kvm_vcpu * vcpu ,
unsigned int index )
{
u64 * avic_physical_id_table ;
struct kvm_svm * kvm_svm = to_kvm_svm ( vcpu - > kvm ) ;
if ( index > = AVIC_MAX_PHYSICAL_ID_COUNT )
return NULL ;
avic_physical_id_table = page_address ( kvm_svm - > avic_physical_id_table_page ) ;
return & avic_physical_id_table [ index ] ;
}
/**
* Note :
* AVIC hardware walks the nested page table to check permissions ,
* but does not use the SPA address specified in the leaf page
* table entry since it uses address in the AVIC_BACKING_PAGE pointer
* field of the VMCB . Therefore , we set up the
* APIC_ACCESS_PAGE_PRIVATE_MEMSLOT ( 4 KB ) here .
*/
static int avic_update_access_page ( struct kvm * kvm , bool activate )
{
2020-09-30 21:20:33 -04:00
void __user * ret ;
int r = 0 ;
2020-03-31 12:17:38 -04:00
mutex_lock ( & kvm - > slots_lock ) ;
/*
* During kvm_destroy_vm ( ) , kvm_pit_set_reinject ( ) could trigger
* APICv mode change , which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
* memory region . So , we need to ensure that kvm - > mm = = current - > mm .
*/
if ( ( kvm - > arch . apic_access_page_done = = activate ) | |
( kvm - > mm ! = current - > mm ) )
goto out ;
ret = __x86_set_memory_region ( kvm ,
APIC_ACCESS_PAGE_PRIVATE_MEMSLOT ,
APIC_DEFAULT_PHYS_BASE ,
activate ? PAGE_SIZE : 0 ) ;
2020-09-30 21:20:33 -04:00
if ( IS_ERR ( ret ) ) {
r = PTR_ERR ( ret ) ;
2020-03-31 12:17:38 -04:00
goto out ;
2020-09-30 21:20:33 -04:00
}
2020-03-31 12:17:38 -04:00
kvm - > arch . apic_access_page_done = activate ;
out :
mutex_unlock ( & kvm - > slots_lock ) ;
2020-09-30 21:20:33 -04:00
return r ;
2020-03-31 12:17:38 -04:00
}
static int avic_init_backing_page ( struct kvm_vcpu * vcpu )
{
u64 * entry , new_entry ;
int id = vcpu - > vcpu_id ;
struct vcpu_svm * svm = to_svm ( vcpu ) ;
if ( id > = AVIC_MAX_PHYSICAL_ID_COUNT )
return - EINVAL ;
if ( ! svm - > vcpu . arch . apic - > regs )
return - EINVAL ;
if ( kvm_apicv_activated ( vcpu - > kvm ) ) {
int ret ;
ret = avic_update_access_page ( vcpu - > kvm , true ) ;
if ( ret )
return ret ;
}
svm - > avic_backing_page = virt_to_page ( svm - > vcpu . arch . apic - > regs ) ;
/* Setting AVIC backing page address in the phy APIC ID table */
entry = avic_get_physical_id_entry ( vcpu , id ) ;
if ( ! entry )
return - EINVAL ;
new_entry = __sme_set ( ( page_to_phys ( svm - > avic_backing_page ) &
AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK ) |
AVIC_PHYSICAL_ID_ENTRY_VALID_MASK ) ;
WRITE_ONCE ( * entry , new_entry ) ;
svm - > avic_physical_id_cache = entry ;
return 0 ;
}
2021-02-04 16:57:42 -08:00
static void avic_kick_target_vcpus ( struct kvm * kvm , struct kvm_lapic * source ,
u32 icrl , u32 icrh )
{
struct kvm_vcpu * vcpu ;
int i ;
kvm_for_each_vcpu ( i , vcpu , kvm ) {
bool m = kvm_apic_match_dest ( vcpu , source ,
icrl & APIC_SHORT_MASK ,
GET_APIC_DEST_FIELD ( icrh ) ,
icrl & APIC_DEST_MASK ) ;
if ( m & & ! avic_vcpu_is_running ( vcpu ) )
kvm_vcpu_wake_up ( vcpu ) ;
}
}
2020-03-31 12:17:38 -04:00
int avic_incomplete_ipi_interception ( struct vcpu_svm * svm )
{
u32 icrh = svm - > vmcb - > control . exit_info_1 > > 32 ;
u32 icrl = svm - > vmcb - > control . exit_info_1 ;
u32 id = svm - > vmcb - > control . exit_info_2 > > 32 ;
u32 index = svm - > vmcb - > control . exit_info_2 & 0xFF ;
struct kvm_lapic * apic = svm - > vcpu . arch . apic ;
trace_kvm_avic_incomplete_ipi ( svm - > vcpu . vcpu_id , icrh , icrl , id , index ) ;
switch ( id ) {
case AVIC_IPI_FAILURE_INVALID_INT_TYPE :
/*
* AVIC hardware handles the generation of
* IPIs when the specified Message Type is Fixed
* ( also known as fixed delivery mode ) and
* the Trigger Mode is edge - triggered . The hardware
* also supports self and broadcast delivery modes
* specified via the Destination Shorthand ( DSH )
* field of the ICRL . Logical and physical APIC ID
* formats are supported . All other IPI types cause
* a # VMEXIT , which needs to emulated .
*/
kvm_lapic_reg_write ( apic , APIC_ICR2 , icrh ) ;
kvm_lapic_reg_write ( apic , APIC_ICR , icrl ) ;
break ;
2021-02-04 16:57:42 -08:00
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING :
2020-03-31 12:17:38 -04:00
/*
* At this point , we expect that the AVIC HW has already
* set the appropriate IRR bits on the valid target
* vcpus . So , we just need to kick the appropriate vcpu .
*/
2021-02-04 16:57:42 -08:00
avic_kick_target_vcpus ( svm - > vcpu . kvm , apic , icrl , icrh ) ;
2020-03-31 12:17:38 -04:00
break ;
case AVIC_IPI_FAILURE_INVALID_TARGET :
WARN_ONCE ( 1 , " Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x \n " ,
index , svm - > vcpu . vcpu_id , icrh , icrl ) ;
break ;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE :
WARN_ONCE ( 1 , " Invalid backing page \n " ) ;
break ;
default :
pr_err ( " Unknown IPI interception \n " ) ;
}
return 1 ;
}
static u32 * avic_get_logical_id_entry ( struct kvm_vcpu * vcpu , u32 ldr , bool flat )
{
struct kvm_svm * kvm_svm = to_kvm_svm ( vcpu - > kvm ) ;
int index ;
u32 * logical_apic_id_table ;
int dlid = GET_APIC_LOGICAL_ID ( ldr ) ;
if ( ! dlid )
return NULL ;
if ( flat ) { /* flat */
index = ffs ( dlid ) - 1 ;
if ( index > 7 )
return NULL ;
} else { /* cluster */
int cluster = ( dlid & 0xf0 ) > > 4 ;
int apic = ffs ( dlid & 0x0f ) - 1 ;
if ( ( apic < 0 ) | | ( apic > 7 ) | |
( cluster > = 0xf ) )
return NULL ;
index = ( cluster < < 2 ) + apic ;
}
logical_apic_id_table = ( u32 * ) page_address ( kvm_svm - > avic_logical_id_table_page ) ;
return & logical_apic_id_table [ index ] ;
}
static int avic_ldr_write ( struct kvm_vcpu * vcpu , u8 g_physical_id , u32 ldr )
{
bool flat ;
u32 * entry , new_entry ;
flat = kvm_lapic_get_reg ( vcpu - > arch . apic , APIC_DFR ) = = APIC_DFR_FLAT ;
entry = avic_get_logical_id_entry ( vcpu , ldr , flat ) ;
if ( ! entry )
return - EINVAL ;
new_entry = READ_ONCE ( * entry ) ;
new_entry & = ~ AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK ;
new_entry | = ( g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK ) ;
new_entry | = AVIC_LOGICAL_ID_ENTRY_VALID_MASK ;
WRITE_ONCE ( * entry , new_entry ) ;
return 0 ;
}
static void avic_invalidate_logical_id_entry ( struct kvm_vcpu * vcpu )
{
struct vcpu_svm * svm = to_svm ( vcpu ) ;
bool flat = svm - > dfr_reg = = APIC_DFR_FLAT ;
u32 * entry = avic_get_logical_id_entry ( vcpu , svm - > ldr_reg , flat ) ;
if ( entry )
clear_bit ( AVIC_LOGICAL_ID_ENTRY_VALID_BIT , ( unsigned long * ) entry ) ;
}
static int avic_handle_ldr_update ( struct kvm_vcpu * vcpu )
{
int ret = 0 ;
struct vcpu_svm * svm = to_svm ( vcpu ) ;
u32 ldr = kvm_lapic_get_reg ( vcpu - > arch . apic , APIC_LDR ) ;
u32 id = kvm_xapic_id ( vcpu - > arch . apic ) ;
if ( ldr = = svm - > ldr_reg )
return 0 ;
avic_invalidate_logical_id_entry ( vcpu ) ;
if ( ldr )
ret = avic_ldr_write ( vcpu , id , ldr ) ;
if ( ! ret )
svm - > ldr_reg = ldr ;
return ret ;
}
static int avic_handle_apic_id_update ( struct kvm_vcpu * vcpu )
{
u64 * old , * new ;
struct vcpu_svm * svm = to_svm ( vcpu ) ;
u32 id = kvm_xapic_id ( vcpu - > arch . apic ) ;
if ( vcpu - > vcpu_id = = id )
return 0 ;
old = avic_get_physical_id_entry ( vcpu , vcpu - > vcpu_id ) ;
new = avic_get_physical_id_entry ( vcpu , id ) ;
if ( ! new | | ! old )
return 1 ;
/* We need to move physical_id_entry to new offset */
* new = * old ;
* old = 0ULL ;
to_svm ( vcpu ) - > avic_physical_id_cache = new ;
/*
* Also update the guest physical APIC ID in the logical
* APIC ID table entry if already setup the LDR .
*/
if ( svm - > ldr_reg )
avic_handle_ldr_update ( vcpu ) ;
return 0 ;
}
static void avic_handle_dfr_update ( struct kvm_vcpu * vcpu )
{
struct vcpu_svm * svm = to_svm ( vcpu ) ;
u32 dfr = kvm_lapic_get_reg ( vcpu - > arch . apic , APIC_DFR ) ;
if ( svm - > dfr_reg = = dfr )
return ;
avic_invalidate_logical_id_entry ( vcpu ) ;
svm - > dfr_reg = dfr ;
}
static int avic_unaccel_trap_write ( struct vcpu_svm * svm )
{
struct kvm_lapic * apic = svm - > vcpu . arch . apic ;
u32 offset = svm - > vmcb - > control . exit_info_1 &
AVIC_UNACCEL_ACCESS_OFFSET_MASK ;
switch ( offset ) {
case APIC_ID :
if ( avic_handle_apic_id_update ( & svm - > vcpu ) )
return 0 ;
break ;
case APIC_LDR :
if ( avic_handle_ldr_update ( & svm - > vcpu ) )
return 0 ;
break ;
case APIC_DFR :
avic_handle_dfr_update ( & svm - > vcpu ) ;
break ;
default :
break ;
}
kvm_lapic_reg_write ( apic , offset , kvm_lapic_get_reg ( apic , offset ) ) ;
return 1 ;
}
static bool is_avic_unaccelerated_access_trap ( u32 offset )
{
bool ret = false ;
switch ( offset ) {
case APIC_ID :
case APIC_EOI :
case APIC_RRR :
case APIC_LDR :
case APIC_DFR :
case APIC_SPIV :
case APIC_ESR :
case APIC_ICR :
case APIC_LVTT :
case APIC_LVTTHMR :
case APIC_LVTPC :
case APIC_LVT0 :
case APIC_LVT1 :
case APIC_LVTERR :
case APIC_TMICT :
case APIC_TDCR :
ret = true ;
break ;
default :
break ;
}
return ret ;
}
int avic_unaccelerated_access_interception ( struct vcpu_svm * svm )
{
int ret = 0 ;
u32 offset = svm - > vmcb - > control . exit_info_1 &
AVIC_UNACCEL_ACCESS_OFFSET_MASK ;
u32 vector = svm - > vmcb - > control . exit_info_2 &
AVIC_UNACCEL_ACCESS_VECTOR_MASK ;
bool write = ( svm - > vmcb - > control . exit_info_1 > > 32 ) &
AVIC_UNACCEL_ACCESS_WRITE_MASK ;
bool trap = is_avic_unaccelerated_access_trap ( offset ) ;
trace_kvm_avic_unaccelerated_access ( svm - > vcpu . vcpu_id , offset ,
trap , write , vector ) ;
if ( trap ) {
/* Handling Trap */
WARN_ONCE ( ! write , " svm: Handling trap read. \n " ) ;
ret = avic_unaccel_trap_write ( svm ) ;
} else {
/* Handling Fault */
ret = kvm_emulate_instruction ( & svm - > vcpu , 0 ) ;
}
return ret ;
}
int avic_init_vcpu ( struct vcpu_svm * svm )
{
int ret ;
struct kvm_vcpu * vcpu = & svm - > vcpu ;
if ( ! avic | | ! irqchip_in_kernel ( vcpu - > kvm ) )
return 0 ;
ret = avic_init_backing_page ( & svm - > vcpu ) ;
if ( ret )
return ret ;
INIT_LIST_HEAD ( & svm - > ir_list ) ;
spin_lock_init ( & svm - > ir_list_lock ) ;
svm - > dfr_reg = APIC_DFR_FLAT ;
return ret ;
}
void avic_post_state_restore ( struct kvm_vcpu * vcpu )
{
if ( avic_handle_apic_id_update ( vcpu ) ! = 0 )
return ;
avic_handle_dfr_update ( vcpu ) ;
avic_handle_ldr_update ( vcpu ) ;
}
void svm_toggle_avic_for_irq_window ( struct kvm_vcpu * vcpu , bool activate )
{
if ( ! avic | | ! lapic_in_kernel ( vcpu ) )
return ;
srcu_read_unlock ( & vcpu - > kvm - > srcu , vcpu - > srcu_idx ) ;
kvm_request_apicv_update ( vcpu - > kvm , activate ,
APICV_INHIBIT_REASON_IRQWIN ) ;
vcpu - > srcu_idx = srcu_read_lock ( & vcpu - > kvm - > srcu ) ;
}
void svm_set_virtual_apic_mode ( struct kvm_vcpu * vcpu )
{
return ;
}
void svm_hwapic_irr_update ( struct kvm_vcpu * vcpu , int max_irr )
{
}
void svm_hwapic_isr_update ( struct kvm_vcpu * vcpu , int max_isr )
{
}
static int svm_set_pi_irte_mode ( struct kvm_vcpu * vcpu , bool activate )
{
int ret = 0 ;
unsigned long flags ;
struct amd_svm_iommu_ir * ir ;
struct vcpu_svm * svm = to_svm ( vcpu ) ;
if ( ! kvm_arch_has_assigned_device ( vcpu - > kvm ) )
return 0 ;
/*
* Here , we go through the per - vcpu ir_list to update all existing
* interrupt remapping table entry targeting this vcpu .
*/
spin_lock_irqsave ( & svm - > ir_list_lock , flags ) ;
if ( list_empty ( & svm - > ir_list ) )
goto out ;
list_for_each_entry ( ir , & svm - > ir_list , node ) {
if ( activate )
ret = amd_iommu_activate_guest_mode ( ir - > data ) ;
else
ret = amd_iommu_deactivate_guest_mode ( ir - > data ) ;
if ( ret )
break ;
}
out :
spin_unlock_irqrestore ( & svm - > ir_list_lock , flags ) ;
return ret ;
}
void svm_refresh_apicv_exec_ctrl ( struct kvm_vcpu * vcpu )
{
struct vcpu_svm * svm = to_svm ( vcpu ) ;
struct vmcb * vmcb = svm - > vmcb ;
bool activated = kvm_vcpu_apicv_active ( vcpu ) ;
if ( ! avic )
return ;
if ( activated ) {
/**
* During AVIC temporary deactivation , guest could update
* APIC ID , DFR and LDR registers , which would not be trapped
* by avic_unaccelerated_access_interception ( ) . In this case ,
* we need to check and update the AVIC logical APIC ID table
* accordingly before re - activating .
*/
avic_post_state_restore ( vcpu ) ;
vmcb - > control . int_ctl | = AVIC_ENABLE_MASK ;
} else {
vmcb - > control . int_ctl & = ~ AVIC_ENABLE_MASK ;
}
2020-06-25 10:03:23 +02:00
vmcb_mark_dirty ( vmcb , VMCB_AVIC ) ;
2020-03-31 12:17:38 -04:00
svm_set_pi_irte_mode ( vcpu , activated ) ;
}
void svm_load_eoi_exitmap ( struct kvm_vcpu * vcpu , u64 * eoi_exit_bitmap )
{
return ;
}
int svm_deliver_avic_intr ( struct kvm_vcpu * vcpu , int vec )
{
if ( ! vcpu - > arch . apicv_active )
return - 1 ;
kvm_lapic_set_irr ( vec , vcpu - > arch . apic ) ;
smp_mb__after_atomic ( ) ;
if ( avic_vcpu_is_running ( vcpu ) ) {
int cpuid = vcpu - > cpu ;
if ( cpuid ! = get_cpu ( ) )
wrmsrl ( SVM_AVIC_DOORBELL , kvm_cpu_get_apicid ( cpuid ) ) ;
put_cpu ( ) ;
} else
kvm_vcpu_wake_up ( vcpu ) ;
return 0 ;
}
bool svm_dy_apicv_has_pending_interrupt ( struct kvm_vcpu * vcpu )
{
return false ;
}
static void svm_ir_list_del ( struct vcpu_svm * svm , struct amd_iommu_pi_data * pi )
{
unsigned long flags ;
struct amd_svm_iommu_ir * cur ;
spin_lock_irqsave ( & svm - > ir_list_lock , flags ) ;
list_for_each_entry ( cur , & svm - > ir_list , node ) {
if ( cur - > data ! = pi - > ir_data )
continue ;
list_del ( & cur - > node ) ;
kfree ( cur ) ;
break ;
}
spin_unlock_irqrestore ( & svm - > ir_list_lock , flags ) ;
}
static int svm_ir_list_add ( struct vcpu_svm * svm , struct amd_iommu_pi_data * pi )
{
int ret = 0 ;
unsigned long flags ;
struct amd_svm_iommu_ir * ir ;
/**
* In some cases , the existing irte is updaed and re - set ,
* so we need to check here if it ' s already been * added
* to the ir_list .
*/
if ( pi - > ir_data & & ( pi - > prev_ga_tag ! = 0 ) ) {
struct kvm * kvm = svm - > vcpu . kvm ;
u32 vcpu_id = AVIC_GATAG_TO_VCPUID ( pi - > prev_ga_tag ) ;
struct kvm_vcpu * prev_vcpu = kvm_get_vcpu_by_id ( kvm , vcpu_id ) ;
struct vcpu_svm * prev_svm ;
if ( ! prev_vcpu ) {
ret = - EINVAL ;
goto out ;
}
prev_svm = to_svm ( prev_vcpu ) ;
svm_ir_list_del ( prev_svm , pi ) ;
}
/**
* Allocating new amd_iommu_pi_data , which will get
* add to the per - vcpu ir_list .
*/
ir = kzalloc ( sizeof ( struct amd_svm_iommu_ir ) , GFP_KERNEL_ACCOUNT ) ;
if ( ! ir ) {
ret = - ENOMEM ;
goto out ;
}
ir - > data = pi - > ir_data ;
spin_lock_irqsave ( & svm - > ir_list_lock , flags ) ;
list_add ( & ir - > node , & svm - > ir_list ) ;
spin_unlock_irqrestore ( & svm - > ir_list_lock , flags ) ;
out :
return ret ;
}
/**
* Note :
* The HW cannot support posting multicast / broadcast
* interrupts to a vCPU . So , we still use legacy interrupt
* remapping for these kind of interrupts .
*
* For lowest - priority interrupts , we only support
* those with single CPU as the destination , e . g . user
* configures the interrupts via / proc / irq or uses
* irqbalance to make the interrupts single - CPU .
*/
static int
get_pi_vcpu_info ( struct kvm * kvm , struct kvm_kernel_irq_routing_entry * e ,
struct vcpu_data * vcpu_info , struct vcpu_svm * * svm )
{
struct kvm_lapic_irq irq ;
struct kvm_vcpu * vcpu = NULL ;
kvm_set_msi_irq ( kvm , e , & irq ) ;
if ( ! kvm_intr_is_single_vcpu ( kvm , & irq , & vcpu ) | |
! kvm_irq_is_postable ( & irq ) ) {
pr_debug ( " SVM: %s: use legacy intr remap mode for irq %u \n " ,
__func__ , irq . vector ) ;
return - 1 ;
}
pr_debug ( " SVM: %s: use GA mode for irq %u \n " , __func__ ,
irq . vector ) ;
* svm = to_svm ( vcpu ) ;
vcpu_info - > pi_desc_addr = __sme_set ( page_to_phys ( ( * svm ) - > avic_backing_page ) ) ;
vcpu_info - > vector = irq . vector ;
return 0 ;
}
/*
* svm_update_pi_irte - set IRTE for Posted - Interrupts
*
* @ kvm : kvm
* @ host_irq : host irq of the interrupt
* @ guest_irq : gsi of the interrupt
* @ set : set or unset PI
* returns 0 on success , < 0 on failure
*/
int svm_update_pi_irte ( struct kvm * kvm , unsigned int host_irq ,
uint32_t guest_irq , bool set )
{
struct kvm_kernel_irq_routing_entry * e ;
struct kvm_irq_routing_table * irq_rt ;
int idx , ret = - EINVAL ;
if ( ! kvm_arch_has_assigned_device ( kvm ) | |
! irq_remapping_cap ( IRQ_POSTING_CAP ) )
return 0 ;
pr_debug ( " SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x \n " ,
__func__ , host_irq , guest_irq , set ) ;
idx = srcu_read_lock ( & kvm - > irq_srcu ) ;
irq_rt = srcu_dereference ( kvm - > irq_routing , & kvm - > irq_srcu ) ;
WARN_ON ( guest_irq > = irq_rt - > nr_rt_entries ) ;
hlist_for_each_entry ( e , & irq_rt - > map [ guest_irq ] , link ) {
struct vcpu_data vcpu_info ;
struct vcpu_svm * svm = NULL ;
if ( e - > type ! = KVM_IRQ_ROUTING_MSI )
continue ;
/**
* Here , we setup with legacy mode in the following cases :
* 1. When cannot target interrupt to a specific vcpu .
* 2. Unsetting posted interrupt .
* 3. APIC virtialization is disabled for the vcpu .
* 4. IRQ has incompatible delivery mode ( SMI , INIT , etc )
*/
if ( ! get_pi_vcpu_info ( kvm , e , & vcpu_info , & svm ) & & set & &
kvm_vcpu_apicv_active ( & svm - > vcpu ) ) {
struct amd_iommu_pi_data pi ;
/* Try to enable guest_mode in IRTE */
pi . base = __sme_set ( page_to_phys ( svm - > avic_backing_page ) &
AVIC_HPA_MASK ) ;
pi . ga_tag = AVIC_GATAG ( to_kvm_svm ( kvm ) - > avic_vm_id ,
svm - > vcpu . vcpu_id ) ;
pi . is_guest_mode = true ;
pi . vcpu_data = & vcpu_info ;
ret = irq_set_vcpu_affinity ( host_irq , & pi ) ;
/**
* Here , we successfully setting up vcpu affinity in
* IOMMU guest mode . Now , we need to store the posted
* interrupt information in a per - vcpu ir_list so that
* we can reference to them directly when we update vcpu
* scheduling information in IOMMU irte .
*/
if ( ! ret & & pi . is_guest_mode )
svm_ir_list_add ( svm , & pi ) ;
} else {
/* Use legacy mode in IRTE */
struct amd_iommu_pi_data pi ;
/**
* Here , pi is used to :
* - Tell IOMMU to use legacy mode for this interrupt .
* - Retrieve ga_tag of prior interrupt remapping data .
*/
2020-10-03 23:27:07 +00:00
pi . prev_ga_tag = 0 ;
2020-03-31 12:17:38 -04:00
pi . is_guest_mode = false ;
ret = irq_set_vcpu_affinity ( host_irq , & pi ) ;
/**
* Check if the posted interrupt was previously
* setup with the guest_mode by checking if the ga_tag
* was cached . If so , we need to clean up the per - vcpu
* ir_list .
*/
if ( ! ret & & pi . prev_ga_tag ) {
int id = AVIC_GATAG_TO_VCPUID ( pi . prev_ga_tag ) ;
struct kvm_vcpu * vcpu ;
vcpu = kvm_get_vcpu_by_id ( kvm , id ) ;
if ( vcpu )
svm_ir_list_del ( to_svm ( vcpu ) , & pi ) ;
}
}
if ( ! ret & & svm ) {
trace_kvm_pi_irte_update ( host_irq , svm - > vcpu . vcpu_id ,
e - > gsi , vcpu_info . vector ,
vcpu_info . pi_desc_addr , set ) ;
}
if ( ret < 0 ) {
pr_err ( " %s: failed to update PI IRTE \n " , __func__ ) ;
goto out ;
}
}
ret = 0 ;
out :
srcu_read_unlock ( & kvm - > irq_srcu , idx ) ;
return ret ;
}
bool svm_check_apicv_inhibit_reasons ( ulong bit )
{
ulong supported = BIT ( APICV_INHIBIT_REASON_DISABLE ) |
BIT ( APICV_INHIBIT_REASON_HYPERV ) |
BIT ( APICV_INHIBIT_REASON_NESTED ) |
BIT ( APICV_INHIBIT_REASON_IRQWIN ) |
BIT ( APICV_INHIBIT_REASON_PIT_REINJ ) |
BIT ( APICV_INHIBIT_REASON_X2APIC ) ;
return supported & BIT ( bit ) ;
}
void svm_pre_update_apicv_exec_ctrl ( struct kvm * kvm , bool activate )
{
avic_update_access_page ( kvm , activate ) ;
}
static inline int
avic_update_iommu_vcpu_affinity ( struct kvm_vcpu * vcpu , int cpu , bool r )
{
int ret = 0 ;
unsigned long flags ;
struct amd_svm_iommu_ir * ir ;
struct vcpu_svm * svm = to_svm ( vcpu ) ;
if ( ! kvm_arch_has_assigned_device ( vcpu - > kvm ) )
return 0 ;
/*
* Here , we go through the per - vcpu ir_list to update all existing
* interrupt remapping table entry targeting this vcpu .
*/
spin_lock_irqsave ( & svm - > ir_list_lock , flags ) ;
if ( list_empty ( & svm - > ir_list ) )
goto out ;
list_for_each_entry ( ir , & svm - > ir_list , node ) {
ret = amd_iommu_update_ga ( cpu , r , ir - > data ) ;
if ( ret )
break ;
}
out :
spin_unlock_irqrestore ( & svm - > ir_list_lock , flags ) ;
return ret ;
}
void avic_vcpu_load ( struct kvm_vcpu * vcpu , int cpu )
{
u64 entry ;
/* ID = 0xff (broadcast), ID > 0xff (reserved) */
int h_physical_id = kvm_cpu_get_apicid ( cpu ) ;
struct vcpu_svm * svm = to_svm ( vcpu ) ;
if ( ! kvm_vcpu_apicv_active ( vcpu ) )
return ;
/*
* Since the host physical APIC id is 8 bits ,
* we can support host APIC ID upto 255.
*/
if ( WARN_ON ( h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK ) )
return ;
entry = READ_ONCE ( * ( svm - > avic_physical_id_cache ) ) ;
WARN_ON ( entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK ) ;
entry & = ~ AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK ;
entry | = ( h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK ) ;
entry & = ~ AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK ;
if ( svm - > avic_is_running )
entry | = AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK ;
WRITE_ONCE ( * ( svm - > avic_physical_id_cache ) , entry ) ;
avic_update_iommu_vcpu_affinity ( vcpu , h_physical_id ,
svm - > avic_is_running ) ;
}
void avic_vcpu_put ( struct kvm_vcpu * vcpu )
{
u64 entry ;
struct vcpu_svm * svm = to_svm ( vcpu ) ;
if ( ! kvm_vcpu_apicv_active ( vcpu ) )
return ;
entry = READ_ONCE ( * ( svm - > avic_physical_id_cache ) ) ;
if ( entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK )
avic_update_iommu_vcpu_affinity ( vcpu , - 1 , 0 ) ;
entry & = ~ AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK ;
WRITE_ONCE ( * ( svm - > avic_physical_id_cache ) , entry ) ;
}
/**
* This function is called during VCPU halt / unhalt .
*/
static void avic_set_running ( struct kvm_vcpu * vcpu , bool is_run )
{
struct vcpu_svm * svm = to_svm ( vcpu ) ;
svm - > avic_is_running = is_run ;
if ( is_run )
avic_vcpu_load ( vcpu , vcpu - > cpu ) ;
else
avic_vcpu_put ( vcpu ) ;
}
void svm_vcpu_blocking ( struct kvm_vcpu * vcpu )
{
avic_set_running ( vcpu , false ) ;
}
void svm_vcpu_unblocking ( struct kvm_vcpu * vcpu )
{
if ( kvm_check_request ( KVM_REQ_APICV_UPDATE , vcpu ) )
kvm_vcpu_update_apicv ( vcpu ) ;
avic_set_running ( vcpu , true ) ;
}