2007-10-10 19:16:19 +04:00
/*
* Kernel - based Virtual Machine driver for Linux
*
* derived from drivers / kvm / kvm_main . c
*
* Copyright ( C ) 2006 Qumranet , Inc .
*
* Authors :
* Avi Kivity < avi @ qumranet . com >
* Yaniv Kamay < yaniv @ qumranet . com >
*
* This work is licensed under the terms of the GNU GPL , version 2. See
* the COPYING file in the top - level directory .
*
*/
KVM: Portability: split kvm_vcpu_ioctl
This patch splits kvm_vcpu_ioctl into archtecture independent parts, and
x86 specific parts which go to kvm_arch_vcpu_ioctl in x86.c.
Common ioctls for all architectures are:
KVM_RUN, KVM_GET/SET_(S-)REGS, KVM_TRANSLATE, KVM_INTERRUPT,
KVM_DEBUG_GUEST, KVM_SET_SIGNAL_MASK, KVM_GET/SET_FPU
Note that some PPC chips don't have an FPU, so we might need an #ifdef
around KVM_GET/SET_FPU one day.
x86 specific ioctls are:
KVM_GET/SET_LAPIC, KVM_SET_CPUID, KVM_GET/SET_MSRS
An interresting aspect is vcpu_load/vcpu_put. We now have a common
vcpu_load/put which does the preemption stuff, and an architecture
specific kvm_arch_vcpu_load/put. In the x86 case, this one calls the
vmx/svm function defined in kvm_x86_ops.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2007-10-11 21:16:52 +04:00
# include "kvm.h"
2007-10-10 19:16:19 +04:00
# include "x86.h"
2007-10-29 18:08:51 +03:00
# include "segment_descriptor.h"
KVM: Portability: split kvm_vcpu_ioctl
This patch splits kvm_vcpu_ioctl into archtecture independent parts, and
x86 specific parts which go to kvm_arch_vcpu_ioctl in x86.c.
Common ioctls for all architectures are:
KVM_RUN, KVM_GET/SET_(S-)REGS, KVM_TRANSLATE, KVM_INTERRUPT,
KVM_DEBUG_GUEST, KVM_SET_SIGNAL_MASK, KVM_GET/SET_FPU
Note that some PPC chips don't have an FPU, so we might need an #ifdef
around KVM_GET/SET_FPU one day.
x86 specific ioctls are:
KVM_GET/SET_LAPIC, KVM_SET_CPUID, KVM_GET/SET_MSRS
An interresting aspect is vcpu_load/vcpu_put. We now have a common
vcpu_load/put which does the preemption stuff, and an architecture
specific kvm_arch_vcpu_load/put. In the x86 case, this one calls the
vmx/svm function defined in kvm_x86_ops.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2007-10-11 21:16:52 +04:00
# include "irq.h"
# include <linux/kvm.h>
# include <linux/fs.h>
# include <linux/vmalloc.h>
2007-10-29 18:08:51 +03:00
# include <linux/module.h>
2007-10-10 19:16:19 +04:00
# include <asm/uaccess.h>
KVM: Portability: split kvm_vcpu_ioctl
This patch splits kvm_vcpu_ioctl into archtecture independent parts, and
x86 specific parts which go to kvm_arch_vcpu_ioctl in x86.c.
Common ioctls for all architectures are:
KVM_RUN, KVM_GET/SET_(S-)REGS, KVM_TRANSLATE, KVM_INTERRUPT,
KVM_DEBUG_GUEST, KVM_SET_SIGNAL_MASK, KVM_GET/SET_FPU
Note that some PPC chips don't have an FPU, so we might need an #ifdef
around KVM_GET/SET_FPU one day.
x86 specific ioctls are:
KVM_GET/SET_LAPIC, KVM_SET_CPUID, KVM_GET/SET_MSRS
An interresting aspect is vcpu_load/vcpu_put. We now have a common
vcpu_load/put which does the preemption stuff, and an architecture
specific kvm_arch_vcpu_load/put. In the x86 case, this one calls the
vmx/svm function defined in kvm_x86_ops.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2007-10-11 21:16:52 +04:00
# define MAX_IO_MSRS 256
KVM: Portability: Move control register helper functions to x86.c
This patch moves the definitions of CR0_RESERVED_BITS,
CR4_RESERVED_BITS, and CR8_RESERVED_BITS along with the following
functions from kvm_main.c to x86.c:
set_cr0(), set_cr3(), set_cr4(), set_cr8(), get_cr8(), lmsw(),
load_pdptrs()
The static function wrapper inject_gp is duplicated in kvm_main.c and
x86.c for now, the version in kvm_main.c should disappear once the last
user of it is gone too.
The function load_pdptrs is no longer static, and now defined in x86.h
for the time being, until the last user of it is gone from kvm_main.c.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2007-10-29 18:09:35 +03:00
# define CR0_RESERVED_BITS \
( ~ ( unsigned long ) ( X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG ) )
# define CR4_RESERVED_BITS \
( ~ ( unsigned long ) ( X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE \
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE ) )
# define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
KVM: Portability: split kvm_vcpu_ioctl
This patch splits kvm_vcpu_ioctl into archtecture independent parts, and
x86 specific parts which go to kvm_arch_vcpu_ioctl in x86.c.
Common ioctls for all architectures are:
KVM_RUN, KVM_GET/SET_(S-)REGS, KVM_TRANSLATE, KVM_INTERRUPT,
KVM_DEBUG_GUEST, KVM_SET_SIGNAL_MASK, KVM_GET/SET_FPU
Note that some PPC chips don't have an FPU, so we might need an #ifdef
around KVM_GET/SET_FPU one day.
x86 specific ioctls are:
KVM_GET/SET_LAPIC, KVM_SET_CPUID, KVM_GET/SET_MSRS
An interresting aspect is vcpu_load/vcpu_put. We now have a common
vcpu_load/put which does the preemption stuff, and an architecture
specific kvm_arch_vcpu_load/put. In the x86 case, this one calls the
vmx/svm function defined in kvm_x86_ops.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2007-10-11 21:16:52 +04:00
2007-10-29 18:08:51 +03:00
unsigned long segment_base ( u16 selector )
{
struct descriptor_table gdt ;
struct segment_descriptor * d ;
unsigned long table_base ;
unsigned long v ;
if ( selector = = 0 )
return 0 ;
asm ( " sgdt %0 " : " =m " ( gdt ) ) ;
table_base = gdt . base ;
if ( selector & 4 ) { /* from ldt */
u16 ldt_selector ;
asm ( " sldt %0 " : " =g " ( ldt_selector ) ) ;
table_base = segment_base ( ldt_selector ) ;
}
d = ( struct segment_descriptor * ) ( table_base + ( selector & ~ 7 ) ) ;
v = d - > base_low | ( ( unsigned long ) d - > base_mid < < 16 ) |
( ( unsigned long ) d - > base_high < < 24 ) ;
# ifdef CONFIG_X86_64
if ( d - > system = = 0 & & ( d - > type = = 2 | | d - > type = = 9 | | d - > type = = 11 ) )
v | = ( ( unsigned long ) \
( ( struct segment_descriptor_64 * ) d ) - > base_higher ) < < 32 ;
# endif
return v ;
}
EXPORT_SYMBOL_GPL ( segment_base ) ;
2007-10-29 18:09:10 +03:00
u64 kvm_get_apic_base ( struct kvm_vcpu * vcpu )
{
if ( irqchip_in_kernel ( vcpu - > kvm ) )
return vcpu - > apic_base ;
else
return vcpu - > apic_base ;
}
EXPORT_SYMBOL_GPL ( kvm_get_apic_base ) ;
void kvm_set_apic_base ( struct kvm_vcpu * vcpu , u64 data )
{
/* TODO: reserve bits check */
if ( irqchip_in_kernel ( vcpu - > kvm ) )
kvm_lapic_set_base ( vcpu , data ) ;
else
vcpu - > apic_base = data ;
}
EXPORT_SYMBOL_GPL ( kvm_set_apic_base ) ;
KVM: Portability: Move control register helper functions to x86.c
This patch moves the definitions of CR0_RESERVED_BITS,
CR4_RESERVED_BITS, and CR8_RESERVED_BITS along with the following
functions from kvm_main.c to x86.c:
set_cr0(), set_cr3(), set_cr4(), set_cr8(), get_cr8(), lmsw(),
load_pdptrs()
The static function wrapper inject_gp is duplicated in kvm_main.c and
x86.c for now, the version in kvm_main.c should disappear once the last
user of it is gone too.
The function load_pdptrs is no longer static, and now defined in x86.h
for the time being, until the last user of it is gone from kvm_main.c.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2007-10-29 18:09:35 +03:00
static void inject_gp ( struct kvm_vcpu * vcpu )
{
kvm_x86_ops - > inject_gp ( vcpu , 0 ) ;
}
/*
* Load the pae pdptrs . Return true is they are all valid .
*/
int load_pdptrs ( struct kvm_vcpu * vcpu , unsigned long cr3 )
{
gfn_t pdpt_gfn = cr3 > > PAGE_SHIFT ;
unsigned offset = ( ( cr3 & ( PAGE_SIZE - 1 ) ) > > 5 ) < < 2 ;
int i ;
int ret ;
u64 pdpte [ ARRAY_SIZE ( vcpu - > pdptrs ) ] ;
mutex_lock ( & vcpu - > kvm - > lock ) ;
ret = kvm_read_guest_page ( vcpu - > kvm , pdpt_gfn , pdpte ,
offset * sizeof ( u64 ) , sizeof ( pdpte ) ) ;
if ( ret < 0 ) {
ret = 0 ;
goto out ;
}
for ( i = 0 ; i < ARRAY_SIZE ( pdpte ) ; + + i ) {
if ( ( pdpte [ i ] & 1 ) & & ( pdpte [ i ] & 0xfffffff0000001e6ull ) ) {
ret = 0 ;
goto out ;
}
}
ret = 1 ;
memcpy ( vcpu - > pdptrs , pdpte , sizeof ( vcpu - > pdptrs ) ) ;
out :
mutex_unlock ( & vcpu - > kvm - > lock ) ;
return ret ;
}
void set_cr0 ( struct kvm_vcpu * vcpu , unsigned long cr0 )
{
if ( cr0 & CR0_RESERVED_BITS ) {
printk ( KERN_DEBUG " set_cr0: 0x%lx #GP, reserved bits 0x%lx \n " ,
cr0 , vcpu - > cr0 ) ;
inject_gp ( vcpu ) ;
return ;
}
if ( ( cr0 & X86_CR0_NW ) & & ! ( cr0 & X86_CR0_CD ) ) {
printk ( KERN_DEBUG " set_cr0: #GP, CD == 0 && NW == 1 \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
if ( ( cr0 & X86_CR0_PG ) & & ! ( cr0 & X86_CR0_PE ) ) {
printk ( KERN_DEBUG " set_cr0: #GP, set PG flag "
" and a clear PE flag \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
if ( ! is_paging ( vcpu ) & & ( cr0 & X86_CR0_PG ) ) {
# ifdef CONFIG_X86_64
if ( ( vcpu - > shadow_efer & EFER_LME ) ) {
int cs_db , cs_l ;
if ( ! is_pae ( vcpu ) ) {
printk ( KERN_DEBUG " set_cr0: #GP, start paging "
" in long mode while PAE is disabled \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
kvm_x86_ops - > get_cs_db_l_bits ( vcpu , & cs_db , & cs_l ) ;
if ( cs_l ) {
printk ( KERN_DEBUG " set_cr0: #GP, start paging "
" in long mode while CS.L == 1 \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
} else
# endif
if ( is_pae ( vcpu ) & & ! load_pdptrs ( vcpu , vcpu - > cr3 ) ) {
printk ( KERN_DEBUG " set_cr0: #GP, pdptrs "
" reserved bits \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
}
kvm_x86_ops - > set_cr0 ( vcpu , cr0 ) ;
vcpu - > cr0 = cr0 ;
mutex_lock ( & vcpu - > kvm - > lock ) ;
kvm_mmu_reset_context ( vcpu ) ;
mutex_unlock ( & vcpu - > kvm - > lock ) ;
return ;
}
EXPORT_SYMBOL_GPL ( set_cr0 ) ;
void lmsw ( struct kvm_vcpu * vcpu , unsigned long msw )
{
set_cr0 ( vcpu , ( vcpu - > cr0 & ~ 0x0ful ) | ( msw & 0x0f ) ) ;
}
EXPORT_SYMBOL_GPL ( lmsw ) ;
void set_cr4 ( struct kvm_vcpu * vcpu , unsigned long cr4 )
{
if ( cr4 & CR4_RESERVED_BITS ) {
printk ( KERN_DEBUG " set_cr4: #GP, reserved bits \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
if ( is_long_mode ( vcpu ) ) {
if ( ! ( cr4 & X86_CR4_PAE ) ) {
printk ( KERN_DEBUG " set_cr4: #GP, clearing PAE while "
" in long mode \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
} else if ( is_paging ( vcpu ) & & ! is_pae ( vcpu ) & & ( cr4 & X86_CR4_PAE )
& & ! load_pdptrs ( vcpu , vcpu - > cr3 ) ) {
printk ( KERN_DEBUG " set_cr4: #GP, pdptrs reserved bits \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
if ( cr4 & X86_CR4_VMXE ) {
printk ( KERN_DEBUG " set_cr4: #GP, setting VMXE \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
kvm_x86_ops - > set_cr4 ( vcpu , cr4 ) ;
vcpu - > cr4 = cr4 ;
mutex_lock ( & vcpu - > kvm - > lock ) ;
kvm_mmu_reset_context ( vcpu ) ;
mutex_unlock ( & vcpu - > kvm - > lock ) ;
}
EXPORT_SYMBOL_GPL ( set_cr4 ) ;
void set_cr3 ( struct kvm_vcpu * vcpu , unsigned long cr3 )
{
if ( is_long_mode ( vcpu ) ) {
if ( cr3 & CR3_L_MODE_RESERVED_BITS ) {
printk ( KERN_DEBUG " set_cr3: #GP, reserved bits \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
} else {
if ( is_pae ( vcpu ) ) {
if ( cr3 & CR3_PAE_RESERVED_BITS ) {
printk ( KERN_DEBUG
" set_cr3: #GP, reserved bits \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
if ( is_paging ( vcpu ) & & ! load_pdptrs ( vcpu , cr3 ) ) {
printk ( KERN_DEBUG " set_cr3: #GP, pdptrs "
" reserved bits \n " ) ;
inject_gp ( vcpu ) ;
return ;
}
}
/*
* We don ' t check reserved bits in nonpae mode , because
* this isn ' t enforced , and VMware depends on this .
*/
}
mutex_lock ( & vcpu - > kvm - > lock ) ;
/*
* Does the new cr3 value map to physical memory ? ( Note , we
* catch an invalid cr3 even in real - mode , because it would
* cause trouble later on when we turn on paging anyway . )
*
* A real CPU would silently accept an invalid cr3 and would
* attempt to use it - with largely undefined ( and often hard
* to debug ) behavior on the guest side .
*/
if ( unlikely ( ! gfn_to_memslot ( vcpu - > kvm , cr3 > > PAGE_SHIFT ) ) )
inject_gp ( vcpu ) ;
else {
vcpu - > cr3 = cr3 ;
vcpu - > mmu . new_cr3 ( vcpu ) ;
}
mutex_unlock ( & vcpu - > kvm - > lock ) ;
}
EXPORT_SYMBOL_GPL ( set_cr3 ) ;
void set_cr8 ( struct kvm_vcpu * vcpu , unsigned long cr8 )
{
if ( cr8 & CR8_RESERVED_BITS ) {
printk ( KERN_DEBUG " set_cr8: #GP, reserved bits 0x%lx \n " , cr8 ) ;
inject_gp ( vcpu ) ;
return ;
}
if ( irqchip_in_kernel ( vcpu - > kvm ) )
kvm_lapic_set_tpr ( vcpu , cr8 ) ;
else
vcpu - > cr8 = cr8 ;
}
EXPORT_SYMBOL_GPL ( set_cr8 ) ;
unsigned long get_cr8 ( struct kvm_vcpu * vcpu )
{
if ( irqchip_in_kernel ( vcpu - > kvm ) )
return kvm_lapic_get_cr8 ( vcpu ) ;
else
return vcpu - > cr8 ;
}
EXPORT_SYMBOL_GPL ( get_cr8 ) ;
2007-10-10 19:16:19 +04:00
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
* and KVM_SET_MSRS , and KVM_GET_MSR_INDEX_LIST .
*
* This list is modified at module load time to reflect the
* capabilities of the host cpu .
*/
static u32 msrs_to_save [ ] = {
MSR_IA32_SYSENTER_CS , MSR_IA32_SYSENTER_ESP , MSR_IA32_SYSENTER_EIP ,
MSR_K6_STAR ,
# ifdef CONFIG_X86_64
MSR_CSTAR , MSR_KERNEL_GS_BASE , MSR_SYSCALL_MASK , MSR_LSTAR ,
# endif
MSR_IA32_TIME_STAMP_COUNTER ,
} ;
static unsigned num_msrs_to_save ;
static u32 emulated_msrs [ ] = {
MSR_IA32_MISC_ENABLE ,
} ;
KVM: Portability: split kvm_vcpu_ioctl
This patch splits kvm_vcpu_ioctl into archtecture independent parts, and
x86 specific parts which go to kvm_arch_vcpu_ioctl in x86.c.
Common ioctls for all architectures are:
KVM_RUN, KVM_GET/SET_(S-)REGS, KVM_TRANSLATE, KVM_INTERRUPT,
KVM_DEBUG_GUEST, KVM_SET_SIGNAL_MASK, KVM_GET/SET_FPU
Note that some PPC chips don't have an FPU, so we might need an #ifdef
around KVM_GET/SET_FPU one day.
x86 specific ioctls are:
KVM_GET/SET_LAPIC, KVM_SET_CPUID, KVM_GET/SET_MSRS
An interresting aspect is vcpu_load/vcpu_put. We now have a common
vcpu_load/put which does the preemption stuff, and an architecture
specific kvm_arch_vcpu_load/put. In the x86 case, this one calls the
vmx/svm function defined in kvm_x86_ops.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2007-10-11 21:16:52 +04:00
/*
* Adapt set_msr ( ) to msr_io ( ) ' s calling convention
*/
static int do_set_msr ( struct kvm_vcpu * vcpu , unsigned index , u64 * data )
{
return kvm_set_msr ( vcpu , index , * data ) ;
}
/*
* Read or write a bunch of msrs . All parameters are kernel addresses .
*
* @ return number of msrs set successfully .
*/
static int __msr_io ( struct kvm_vcpu * vcpu , struct kvm_msrs * msrs ,
struct kvm_msr_entry * entries ,
int ( * do_msr ) ( struct kvm_vcpu * vcpu ,
unsigned index , u64 * data ) )
{
int i ;
vcpu_load ( vcpu ) ;
for ( i = 0 ; i < msrs - > nmsrs ; + + i )
if ( do_msr ( vcpu , entries [ i ] . index , & entries [ i ] . data ) )
break ;
vcpu_put ( vcpu ) ;
return i ;
}
/*
* Read or write a bunch of msrs . Parameters are user addresses .
*
* @ return number of msrs set successfully .
*/
static int msr_io ( struct kvm_vcpu * vcpu , struct kvm_msrs __user * user_msrs ,
int ( * do_msr ) ( struct kvm_vcpu * vcpu ,
unsigned index , u64 * data ) ,
int writeback )
{
struct kvm_msrs msrs ;
struct kvm_msr_entry * entries ;
int r , n ;
unsigned size ;
r = - EFAULT ;
if ( copy_from_user ( & msrs , user_msrs , sizeof msrs ) )
goto out ;
r = - E2BIG ;
if ( msrs . nmsrs > = MAX_IO_MSRS )
goto out ;
r = - ENOMEM ;
size = sizeof ( struct kvm_msr_entry ) * msrs . nmsrs ;
entries = vmalloc ( size ) ;
if ( ! entries )
goto out ;
r = - EFAULT ;
if ( copy_from_user ( entries , user_msrs - > entries , size ) )
goto out_free ;
r = n = __msr_io ( vcpu , & msrs , entries , do_msr ) ;
if ( r < 0 )
goto out_free ;
r = - EFAULT ;
if ( writeback & & copy_to_user ( user_msrs - > entries , entries , size ) )
goto out_free ;
r = n ;
out_free :
vfree ( entries ) ;
out :
return r ;
}
2007-10-10 19:16:19 +04:00
long kvm_arch_dev_ioctl ( struct file * filp ,
unsigned int ioctl , unsigned long arg )
{
void __user * argp = ( void __user * ) arg ;
long r ;
switch ( ioctl ) {
case KVM_GET_MSR_INDEX_LIST : {
struct kvm_msr_list __user * user_msr_list = argp ;
struct kvm_msr_list msr_list ;
unsigned n ;
r = - EFAULT ;
if ( copy_from_user ( & msr_list , user_msr_list , sizeof msr_list ) )
goto out ;
n = msr_list . nmsrs ;
msr_list . nmsrs = num_msrs_to_save + ARRAY_SIZE ( emulated_msrs ) ;
if ( copy_to_user ( user_msr_list , & msr_list , sizeof msr_list ) )
goto out ;
r = - E2BIG ;
if ( n < num_msrs_to_save )
goto out ;
r = - EFAULT ;
if ( copy_to_user ( user_msr_list - > indices , & msrs_to_save ,
num_msrs_to_save * sizeof ( u32 ) ) )
goto out ;
if ( copy_to_user ( user_msr_list - > indices
+ num_msrs_to_save * sizeof ( u32 ) ,
& emulated_msrs ,
ARRAY_SIZE ( emulated_msrs ) * sizeof ( u32 ) ) )
goto out ;
r = 0 ;
break ;
}
default :
r = - EINVAL ;
}
out :
return r ;
}
KVM: Portability: split kvm_vcpu_ioctl
This patch splits kvm_vcpu_ioctl into archtecture independent parts, and
x86 specific parts which go to kvm_arch_vcpu_ioctl in x86.c.
Common ioctls for all architectures are:
KVM_RUN, KVM_GET/SET_(S-)REGS, KVM_TRANSLATE, KVM_INTERRUPT,
KVM_DEBUG_GUEST, KVM_SET_SIGNAL_MASK, KVM_GET/SET_FPU
Note that some PPC chips don't have an FPU, so we might need an #ifdef
around KVM_GET/SET_FPU one day.
x86 specific ioctls are:
KVM_GET/SET_LAPIC, KVM_SET_CPUID, KVM_GET/SET_MSRS
An interresting aspect is vcpu_load/vcpu_put. We now have a common
vcpu_load/put which does the preemption stuff, and an architecture
specific kvm_arch_vcpu_load/put. In the x86 case, this one calls the
vmx/svm function defined in kvm_x86_ops.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
2007-10-11 21:16:52 +04:00
void kvm_arch_vcpu_load ( struct kvm_vcpu * vcpu , int cpu )
{
kvm_x86_ops - > vcpu_load ( vcpu , cpu ) ;
}
void kvm_arch_vcpu_put ( struct kvm_vcpu * vcpu )
{
kvm_x86_ops - > vcpu_put ( vcpu ) ;
}
static void cpuid_fix_nx_cap ( struct kvm_vcpu * vcpu )
{
u64 efer ;
int i ;
struct kvm_cpuid_entry * e , * entry ;
rdmsrl ( MSR_EFER , efer ) ;
entry = NULL ;
for ( i = 0 ; i < vcpu - > cpuid_nent ; + + i ) {
e = & vcpu - > cpuid_entries [ i ] ;
if ( e - > function = = 0x80000001 ) {
entry = e ;
break ;
}
}
if ( entry & & ( entry - > edx & ( 1 < < 20 ) ) & & ! ( efer & EFER_NX ) ) {
entry - > edx & = ~ ( 1 < < 20 ) ;
printk ( KERN_INFO " kvm: guest NX capability removed \n " ) ;
}
}
static int kvm_vcpu_ioctl_set_cpuid ( struct kvm_vcpu * vcpu ,
struct kvm_cpuid * cpuid ,
struct kvm_cpuid_entry __user * entries )
{
int r ;
r = - E2BIG ;
if ( cpuid - > nent > KVM_MAX_CPUID_ENTRIES )
goto out ;
r = - EFAULT ;
if ( copy_from_user ( & vcpu - > cpuid_entries , entries ,
cpuid - > nent * sizeof ( struct kvm_cpuid_entry ) ) )
goto out ;
vcpu - > cpuid_nent = cpuid - > nent ;
cpuid_fix_nx_cap ( vcpu ) ;
return 0 ;
out :
return r ;
}
static int kvm_vcpu_ioctl_get_lapic ( struct kvm_vcpu * vcpu ,
struct kvm_lapic_state * s )
{
vcpu_load ( vcpu ) ;
memcpy ( s - > regs , vcpu - > apic - > regs , sizeof * s ) ;
vcpu_put ( vcpu ) ;
return 0 ;
}
static int kvm_vcpu_ioctl_set_lapic ( struct kvm_vcpu * vcpu ,
struct kvm_lapic_state * s )
{
vcpu_load ( vcpu ) ;
memcpy ( vcpu - > apic - > regs , s - > regs , sizeof * s ) ;
kvm_apic_post_state_restore ( vcpu ) ;
vcpu_put ( vcpu ) ;
return 0 ;
}
long kvm_arch_vcpu_ioctl ( struct file * filp ,
unsigned int ioctl , unsigned long arg )
{
struct kvm_vcpu * vcpu = filp - > private_data ;
void __user * argp = ( void __user * ) arg ;
int r ;
switch ( ioctl ) {
case KVM_GET_LAPIC : {
struct kvm_lapic_state lapic ;
memset ( & lapic , 0 , sizeof lapic ) ;
r = kvm_vcpu_ioctl_get_lapic ( vcpu , & lapic ) ;
if ( r )
goto out ;
r = - EFAULT ;
if ( copy_to_user ( argp , & lapic , sizeof lapic ) )
goto out ;
r = 0 ;
break ;
}
case KVM_SET_LAPIC : {
struct kvm_lapic_state lapic ;
r = - EFAULT ;
if ( copy_from_user ( & lapic , argp , sizeof lapic ) )
goto out ;
r = kvm_vcpu_ioctl_set_lapic ( vcpu , & lapic ) ; ;
if ( r )
goto out ;
r = 0 ;
break ;
}
case KVM_SET_CPUID : {
struct kvm_cpuid __user * cpuid_arg = argp ;
struct kvm_cpuid cpuid ;
r = - EFAULT ;
if ( copy_from_user ( & cpuid , cpuid_arg , sizeof cpuid ) )
goto out ;
r = kvm_vcpu_ioctl_set_cpuid ( vcpu , & cpuid , cpuid_arg - > entries ) ;
if ( r )
goto out ;
break ;
}
case KVM_GET_MSRS :
r = msr_io ( vcpu , argp , kvm_get_msr , 1 ) ;
break ;
case KVM_SET_MSRS :
r = msr_io ( vcpu , argp , do_set_msr , 0 ) ;
break ;
default :
r = - EINVAL ;
}
out :
return r ;
}
2007-10-29 18:08:35 +03:00
static int kvm_vm_ioctl_set_tss_addr ( struct kvm * kvm , unsigned long addr )
{
int ret ;
if ( addr > ( unsigned int ) ( - 3 * PAGE_SIZE ) )
return - 1 ;
ret = kvm_x86_ops - > set_tss_addr ( kvm , addr ) ;
return ret ;
}
static int kvm_vm_ioctl_set_nr_mmu_pages ( struct kvm * kvm ,
u32 kvm_nr_mmu_pages )
{
if ( kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES )
return - EINVAL ;
mutex_lock ( & kvm - > lock ) ;
kvm_mmu_change_mmu_pages ( kvm , kvm_nr_mmu_pages ) ;
kvm - > n_requested_mmu_pages = kvm_nr_mmu_pages ;
mutex_unlock ( & kvm - > lock ) ;
return 0 ;
}
static int kvm_vm_ioctl_get_nr_mmu_pages ( struct kvm * kvm )
{
return kvm - > n_alloc_mmu_pages ;
}
/*
* Set a new alias region . Aliases map a portion of physical memory into
* another portion . This is useful for memory windows , for example the PC
* VGA region .
*/
static int kvm_vm_ioctl_set_memory_alias ( struct kvm * kvm ,
struct kvm_memory_alias * alias )
{
int r , n ;
struct kvm_mem_alias * p ;
r = - EINVAL ;
/* General sanity checks */
if ( alias - > memory_size & ( PAGE_SIZE - 1 ) )
goto out ;
if ( alias - > guest_phys_addr & ( PAGE_SIZE - 1 ) )
goto out ;
if ( alias - > slot > = KVM_ALIAS_SLOTS )
goto out ;
if ( alias - > guest_phys_addr + alias - > memory_size
< alias - > guest_phys_addr )
goto out ;
if ( alias - > target_phys_addr + alias - > memory_size
< alias - > target_phys_addr )
goto out ;
mutex_lock ( & kvm - > lock ) ;
p = & kvm - > aliases [ alias - > slot ] ;
p - > base_gfn = alias - > guest_phys_addr > > PAGE_SHIFT ;
p - > npages = alias - > memory_size > > PAGE_SHIFT ;
p - > target_gfn = alias - > target_phys_addr > > PAGE_SHIFT ;
for ( n = KVM_ALIAS_SLOTS ; n > 0 ; - - n )
if ( kvm - > aliases [ n - 1 ] . npages )
break ;
kvm - > naliases = n ;
kvm_mmu_zap_all ( kvm ) ;
mutex_unlock ( & kvm - > lock ) ;
return 0 ;
out :
return r ;
}
static int kvm_vm_ioctl_get_irqchip ( struct kvm * kvm , struct kvm_irqchip * chip )
{
int r ;
r = 0 ;
switch ( chip - > chip_id ) {
case KVM_IRQCHIP_PIC_MASTER :
memcpy ( & chip - > chip . pic ,
& pic_irqchip ( kvm ) - > pics [ 0 ] ,
sizeof ( struct kvm_pic_state ) ) ;
break ;
case KVM_IRQCHIP_PIC_SLAVE :
memcpy ( & chip - > chip . pic ,
& pic_irqchip ( kvm ) - > pics [ 1 ] ,
sizeof ( struct kvm_pic_state ) ) ;
break ;
case KVM_IRQCHIP_IOAPIC :
memcpy ( & chip - > chip . ioapic ,
ioapic_irqchip ( kvm ) ,
sizeof ( struct kvm_ioapic_state ) ) ;
break ;
default :
r = - EINVAL ;
break ;
}
return r ;
}
static int kvm_vm_ioctl_set_irqchip ( struct kvm * kvm , struct kvm_irqchip * chip )
{
int r ;
r = 0 ;
switch ( chip - > chip_id ) {
case KVM_IRQCHIP_PIC_MASTER :
memcpy ( & pic_irqchip ( kvm ) - > pics [ 0 ] ,
& chip - > chip . pic ,
sizeof ( struct kvm_pic_state ) ) ;
break ;
case KVM_IRQCHIP_PIC_SLAVE :
memcpy ( & pic_irqchip ( kvm ) - > pics [ 1 ] ,
& chip - > chip . pic ,
sizeof ( struct kvm_pic_state ) ) ;
break ;
case KVM_IRQCHIP_IOAPIC :
memcpy ( ioapic_irqchip ( kvm ) ,
& chip - > chip . ioapic ,
sizeof ( struct kvm_ioapic_state ) ) ;
break ;
default :
r = - EINVAL ;
break ;
}
kvm_pic_update_irq ( pic_irqchip ( kvm ) ) ;
return r ;
}
long kvm_arch_vm_ioctl ( struct file * filp ,
unsigned int ioctl , unsigned long arg )
{
struct kvm * kvm = filp - > private_data ;
void __user * argp = ( void __user * ) arg ;
int r = - EINVAL ;
switch ( ioctl ) {
case KVM_SET_TSS_ADDR :
r = kvm_vm_ioctl_set_tss_addr ( kvm , arg ) ;
if ( r < 0 )
goto out ;
break ;
case KVM_SET_MEMORY_REGION : {
struct kvm_memory_region kvm_mem ;
struct kvm_userspace_memory_region kvm_userspace_mem ;
r = - EFAULT ;
if ( copy_from_user ( & kvm_mem , argp , sizeof kvm_mem ) )
goto out ;
kvm_userspace_mem . slot = kvm_mem . slot ;
kvm_userspace_mem . flags = kvm_mem . flags ;
kvm_userspace_mem . guest_phys_addr = kvm_mem . guest_phys_addr ;
kvm_userspace_mem . memory_size = kvm_mem . memory_size ;
r = kvm_vm_ioctl_set_memory_region ( kvm , & kvm_userspace_mem , 0 ) ;
if ( r )
goto out ;
break ;
}
case KVM_SET_NR_MMU_PAGES :
r = kvm_vm_ioctl_set_nr_mmu_pages ( kvm , arg ) ;
if ( r )
goto out ;
break ;
case KVM_GET_NR_MMU_PAGES :
r = kvm_vm_ioctl_get_nr_mmu_pages ( kvm ) ;
break ;
case KVM_SET_MEMORY_ALIAS : {
struct kvm_memory_alias alias ;
r = - EFAULT ;
if ( copy_from_user ( & alias , argp , sizeof alias ) )
goto out ;
r = kvm_vm_ioctl_set_memory_alias ( kvm , & alias ) ;
if ( r )
goto out ;
break ;
}
case KVM_CREATE_IRQCHIP :
r = - ENOMEM ;
kvm - > vpic = kvm_create_pic ( kvm ) ;
if ( kvm - > vpic ) {
r = kvm_ioapic_init ( kvm ) ;
if ( r ) {
kfree ( kvm - > vpic ) ;
kvm - > vpic = NULL ;
goto out ;
}
} else
goto out ;
break ;
case KVM_IRQ_LINE : {
struct kvm_irq_level irq_event ;
r = - EFAULT ;
if ( copy_from_user ( & irq_event , argp , sizeof irq_event ) )
goto out ;
if ( irqchip_in_kernel ( kvm ) ) {
mutex_lock ( & kvm - > lock ) ;
if ( irq_event . irq < 16 )
kvm_pic_set_irq ( pic_irqchip ( kvm ) ,
irq_event . irq ,
irq_event . level ) ;
kvm_ioapic_set_irq ( kvm - > vioapic ,
irq_event . irq ,
irq_event . level ) ;
mutex_unlock ( & kvm - > lock ) ;
r = 0 ;
}
break ;
}
case KVM_GET_IRQCHIP : {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
struct kvm_irqchip chip ;
r = - EFAULT ;
if ( copy_from_user ( & chip , argp , sizeof chip ) )
goto out ;
r = - ENXIO ;
if ( ! irqchip_in_kernel ( kvm ) )
goto out ;
r = kvm_vm_ioctl_get_irqchip ( kvm , & chip ) ;
if ( r )
goto out ;
r = - EFAULT ;
if ( copy_to_user ( argp , & chip , sizeof chip ) )
goto out ;
r = 0 ;
break ;
}
case KVM_SET_IRQCHIP : {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
struct kvm_irqchip chip ;
r = - EFAULT ;
if ( copy_from_user ( & chip , argp , sizeof chip ) )
goto out ;
r = - ENXIO ;
if ( ! irqchip_in_kernel ( kvm ) )
goto out ;
r = kvm_vm_ioctl_set_irqchip ( kvm , & chip ) ;
if ( r )
goto out ;
r = 0 ;
break ;
}
default :
;
}
out :
return r ;
}
2007-10-10 19:16:19 +04:00
static __init void kvm_init_msr_list ( void )
{
u32 dummy [ 2 ] ;
unsigned i , j ;
for ( i = j = 0 ; i < ARRAY_SIZE ( msrs_to_save ) ; i + + ) {
if ( rdmsr_safe ( msrs_to_save [ i ] , & dummy [ 0 ] , & dummy [ 1 ] ) < 0 )
continue ;
if ( j < i )
msrs_to_save [ j ] = msrs_to_save [ i ] ;
j + + ;
}
num_msrs_to_save = j ;
}
__init void kvm_arch_init ( void )
{
kvm_init_msr_list ( ) ;
}