Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (249 commits)
  KVM: Move apic timer migration away from critical section
  KVM: Put kvm_para.h include outside __KERNEL__
  KVM: Fix unbounded preemption latency
  KVM: Initialize the mmu caches only after verifying cpu support
  KVM: MMU: Fix dirty page setting for pages removed from rmap
  KVM: Portability: Move kvm_fpu to asm-x86/kvm.h
  KVM: x86 emulator: Only allow VMCALL/VMMCALL trapped by #UD
  KVM: MMU: Merge shadow level check in FNAME(fetch)
  KVM: MMU: Move kvm_free_some_pages() into critical section
  KVM: MMU: Switch to mmu spinlock
  KVM: MMU: Avoid calling gfn_to_page() in mmu_set_spte()
  KVM: Add kvm_read_guest_atomic()
  KVM: MMU: Concurrent guest walkers
  KVM: Disable vapic support on Intel machines with FlexPriority
  KVM: Accelerated apic support
  KVM: local APIC TPR access reporting facility
  KVM: Print data for unimplemented wrmsr
  KVM: MMU: Add cache miss statistic
  KVM: MMU: Coalesce remote tlb flushes
  KVM: Expose ioapic to ia64 save/restore APIs
  ...
This commit is contained in:
Linus Torvalds 2008-01-31 09:30:10 +11:00
commit 2c57ee6f92
41 changed files with 10688 additions and 8297 deletions

View File

@ -107,6 +107,7 @@ config ARCH_SUPPORTS_OPROFILE
bool bool
default y default y
select HAVE_KVM
config ZONE_DMA32 config ZONE_DMA32
bool bool
@ -1598,4 +1599,6 @@ source "security/Kconfig"
source "crypto/Kconfig" source "crypto/Kconfig"
source "arch/x86/kvm/Kconfig"
source "lib/Kconfig" source "lib/Kconfig"

View File

@ -7,6 +7,8 @@ else
KBUILD_DEFCONFIG := $(ARCH)_defconfig KBUILD_DEFCONFIG := $(ARCH)_defconfig
endif endif
core-$(CONFIG_KVM) += arch/x86/kvm/
# BITS is used as extension for files which are available in a 32 bit # BITS is used as extension for files which are available in a 32 bit
# and a 64 bit version to simplify shared Makefiles. # and a 64 bit version to simplify shared Makefiles.
# e.g.: obj-y += foo_$(BITS).o # e.g.: obj-y += foo_$(BITS).o

View File

@ -1,9 +1,12 @@
# #
# KVM configuration # KVM configuration
# #
config HAVE_KVM
bool
menuconfig VIRTUALIZATION menuconfig VIRTUALIZATION
bool "Virtualization" bool "Virtualization"
depends on X86 depends on HAVE_KVM || X86
default y default y
---help--- ---help---
Say Y here to get to see options for using your Linux host to run other Say Y here to get to see options for using your Linux host to run other
@ -16,7 +19,7 @@ if VIRTUALIZATION
config KVM config KVM
tristate "Kernel-based Virtual Machine (KVM) support" tristate "Kernel-based Virtual Machine (KVM) support"
depends on X86 && EXPERIMENTAL depends on HAVE_KVM && EXPERIMENTAL
select PREEMPT_NOTIFIERS select PREEMPT_NOTIFIERS
select ANON_INODES select ANON_INODES
---help--- ---help---

View File

@ -2,7 +2,11 @@
# Makefile for Kernel-based Virtual Machine module # Makefile for Kernel-based Virtual Machine module
# #
kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o

View File

@ -28,6 +28,8 @@
#include <linux/mm.h> #include <linux/mm.h>
#include "irq.h" #include "irq.h"
#include <linux/kvm_host.h>
/* /*
* set irq level. If an edge is detected, then the IRR is set to 1 * set irq level. If an edge is detected, then the IRR is set to 1
*/ */
@ -181,10 +183,8 @@ int kvm_pic_read_irq(struct kvm_pic *s)
return intno; return intno;
} }
static void pic_reset(void *opaque) void kvm_pic_reset(struct kvm_kpic_state *s)
{ {
struct kvm_kpic_state *s = opaque;
s->last_irr = 0; s->last_irr = 0;
s->irr = 0; s->irr = 0;
s->imr = 0; s->imr = 0;
@ -209,7 +209,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
addr &= 1; addr &= 1;
if (addr == 0) { if (addr == 0) {
if (val & 0x10) { if (val & 0x10) {
pic_reset(s); /* init */ kvm_pic_reset(s); /* init */
/* /*
* deassert a pending interrupt * deassert a pending interrupt
*/ */

View File

@ -20,8 +20,8 @@
*/ */
#include <linux/module.h> #include <linux/module.h>
#include <linux/kvm_host.h>
#include "kvm.h"
#include "irq.h" #include "irq.h"
/* /*
@ -63,26 +63,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
} }
EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
static void vcpu_kick_intr(void *info)
{
#ifdef DEBUG
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
#endif
}
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{
int ipi_pcpu = vcpu->cpu;
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
++vcpu->stat.halt_wakeup;
}
if (vcpu->guest_mode)
smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
}
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{ {
kvm_inject_apic_timer_irqs(vcpu); kvm_inject_apic_timer_irqs(vcpu);

88
arch/x86/kvm/irq.h Normal file
View File

@ -0,0 +1,88 @@
/*
* irq.h: in kernel interrupt controller related definitions
* Copyright (c) 2007, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
*/
#ifndef __IRQ_H
#define __IRQ_H
#include <linux/mm_types.h>
#include <linux/hrtimer.h>
#include <linux/kvm_host.h>
#include "iodev.h"
#include "ioapic.h"
#include "lapic.h"
struct kvm;
struct kvm_vcpu;
typedef void irq_request_func(void *opaque, int level);
struct kvm_kpic_state {
u8 last_irr; /* edge detection */
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
u8 priority_add; /* highest irq priority */
u8 irq_base;
u8 read_reg_select;
u8 poll;
u8 special_mask;
u8 init_state;
u8 auto_eoi;
u8 rotate_on_auto_eoi;
u8 special_fully_nested_mode;
u8 init4; /* true if 4 byte init */
u8 elcr; /* PIIX edge/trigger selection */
u8 elcr_mask;
struct kvm_pic *pics_state;
};
struct kvm_pic {
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
irq_request_func *irq_request;
void *irq_request_opaque;
int output; /* intr from master PIC */
struct kvm_io_device dev;
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_pic_set_irq(void *opaque, int irq, int level);
int kvm_pic_read_irq(struct kvm_pic *s);
void kvm_pic_update_irq(struct kvm_pic *s);
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
{
return kvm->arch.vpic;
}
static inline int irqchip_in_kernel(struct kvm *kvm)
{
return pic_irqchip(kvm) != NULL;
}
void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
#endif

View File

@ -4,10 +4,10 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/kvm_host.h>
#include <asm/msr.h> #include <asm/msr.h>
#include "svm.h" #include "svm.h"
#include "kvm.h"
static const u32 host_save_user_msrs[] = { static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64

View File

@ -17,7 +17,7 @@
* the COPYING file in the top-level directory. * the COPYING file in the top-level directory.
*/ */
#include "kvm.h" #include <linux/kvm_host.h>
#include <linux/kvm.h> #include <linux/kvm.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/highmem.h> #include <linux/highmem.h>
@ -56,6 +56,7 @@
#define VEC_POS(v) ((v) & (32 - 1)) #define VEC_POS(v) ((v) & (32 - 1))
#define REG_POS(v) (((v) >> 5) << 4) #define REG_POS(v) (((v) >> 5) << 4)
static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
{ {
return *((u32 *) (apic->regs + reg_off)); return *((u32 *) (apic->regs + reg_off));
@ -88,7 +89,7 @@ static inline void apic_clear_vector(int vec, void *bitmap)
static inline int apic_hw_enabled(struct kvm_lapic *apic) static inline int apic_hw_enabled(struct kvm_lapic *apic)
{ {
return (apic)->vcpu->apic_base & MSR_IA32_APICBASE_ENABLE; return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
} }
static inline int apic_sw_enabled(struct kvm_lapic *apic) static inline int apic_sw_enabled(struct kvm_lapic *apic)
@ -172,7 +173,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
{ {
struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
int highest_irr; int highest_irr;
if (!apic) if (!apic)
@ -183,8 +184,10 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
} }
EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig) int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
{ {
struct kvm_lapic *apic = vcpu->arch.apic;
if (!apic_test_and_set_irr(vec, apic)) { if (!apic_test_and_set_irr(vec, apic)) {
/* a new pending irq is set in IRR */ /* a new pending irq is set in IRR */
if (trig) if (trig)
@ -268,7 +271,7 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode) int short_hand, int dest, int dest_mode)
{ {
int result = 0; int result = 0;
struct kvm_lapic *target = vcpu->apic; struct kvm_lapic *target = vcpu->arch.apic;
apic_debug("target %p, source %p, dest 0x%x, " apic_debug("target %p, source %p, dest 0x%x, "
"dest_mode 0x%x, short_hand 0x%x", "dest_mode 0x%x, short_hand 0x%x",
@ -335,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
} else } else
apic_clear_vector(vector, apic->regs + APIC_TMR); apic_clear_vector(vector, apic->regs + APIC_TMR);
if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE) if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
else if (vcpu->mp_state == VCPU_MP_STATE_HALTED) { else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) {
vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
if (waitqueue_active(&vcpu->wq)) if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq); wake_up_interruptible(&vcpu->wq);
} }
@ -359,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
case APIC_DM_INIT: case APIC_DM_INIT:
if (level) { if (level) {
if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE) if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
printk(KERN_DEBUG printk(KERN_DEBUG
"INIT on a runnable vcpu %d\n", "INIT on a runnable vcpu %d\n",
vcpu->vcpu_id); vcpu->vcpu_id);
vcpu->mp_state = VCPU_MP_STATE_INIT_RECEIVED; vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED;
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
} else { } else {
printk(KERN_DEBUG printk(KERN_DEBUG
@ -376,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
case APIC_DM_STARTUP: case APIC_DM_STARTUP:
printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
vcpu->vcpu_id, vector); vcpu->vcpu_id, vector);
if (vcpu->mp_state == VCPU_MP_STATE_INIT_RECEIVED) { if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
vcpu->sipi_vector = vector; vcpu->arch.sipi_vector = vector;
vcpu->mp_state = VCPU_MP_STATE_SIPI_RECEIVED; vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
if (waitqueue_active(&vcpu->wq)) if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq); wake_up_interruptible(&vcpu->wq);
} }
@ -392,15 +395,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
return result; return result;
} }
struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
unsigned long bitmap) unsigned long bitmap)
{ {
int vcpu_id;
int last; int last;
int next; int next;
struct kvm_lapic *apic; struct kvm_lapic *apic = NULL;
last = kvm->round_robin_prev_vcpu; last = kvm->arch.round_robin_prev_vcpu;
next = last; next = last;
do { do {
@ -408,25 +410,30 @@ struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
next = 0; next = 0;
if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap)) if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap))
continue; continue;
apic = kvm->vcpus[next]->apic; apic = kvm->vcpus[next]->arch.apic;
if (apic && apic_enabled(apic)) if (apic && apic_enabled(apic))
break; break;
apic = NULL; apic = NULL;
} while (next != last); } while (next != last);
kvm->round_robin_prev_vcpu = next; kvm->arch.round_robin_prev_vcpu = next;
if (!apic) { if (!apic)
vcpu_id = ffs(bitmap) - 1; printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
if (vcpu_id < 0) {
vcpu_id = 0;
printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
}
apic = kvm->vcpus[vcpu_id]->apic;
}
return apic; return apic;
} }
struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
unsigned long bitmap)
{
struct kvm_lapic *apic;
apic = kvm_apic_round_robin(kvm, vector, bitmap);
if (apic)
return apic->vcpu;
return NULL;
}
static void apic_set_eoi(struct kvm_lapic *apic) static void apic_set_eoi(struct kvm_lapic *apic)
{ {
int vector = apic_find_highest_isr(apic); int vector = apic_find_highest_isr(apic);
@ -458,7 +465,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
unsigned int delivery_mode = icr_low & APIC_MODE_MASK; unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
unsigned int vector = icr_low & APIC_VECTOR_MASK; unsigned int vector = icr_low & APIC_VECTOR_MASK;
struct kvm_lapic *target; struct kvm_vcpu *target;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
unsigned long lpr_map = 0; unsigned long lpr_map = 0;
int i; int i;
@ -474,20 +481,20 @@ static void apic_send_ipi(struct kvm_lapic *apic)
if (!vcpu) if (!vcpu)
continue; continue;
if (vcpu->apic && if (vcpu->arch.apic &&
apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) { apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
if (delivery_mode == APIC_DM_LOWEST) if (delivery_mode == APIC_DM_LOWEST)
set_bit(vcpu->vcpu_id, &lpr_map); set_bit(vcpu->vcpu_id, &lpr_map);
else else
__apic_accept_irq(vcpu->apic, delivery_mode, __apic_accept_irq(vcpu->arch.apic, delivery_mode,
vector, level, trig_mode); vector, level, trig_mode);
} }
} }
if (delivery_mode == APIC_DM_LOWEST) { if (delivery_mode == APIC_DM_LOWEST) {
target = kvm_apic_round_robin(vcpu->kvm, vector, lpr_map); target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
if (target != NULL) if (target != NULL)
__apic_accept_irq(target, delivery_mode, __apic_accept_irq(target->arch.apic, delivery_mode,
vector, level, trig_mode); vector, level, trig_mode);
} }
} }
@ -544,6 +551,23 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
return tmcct; return tmcct;
} }
static void __report_tpr_access(struct kvm_lapic *apic, bool write)
{
struct kvm_vcpu *vcpu = apic->vcpu;
struct kvm_run *run = vcpu->run;
set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests);
kvm_x86_ops->cache_regs(vcpu);
run->tpr_access.rip = vcpu->arch.rip;
run->tpr_access.is_write = write;
}
static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
{
if (apic->vcpu->arch.tpr_access_reporting)
__report_tpr_access(apic, write);
}
static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
{ {
u32 val = 0; u32 val = 0;
@ -561,6 +585,9 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
val = apic_get_tmcct(apic); val = apic_get_tmcct(apic);
break; break;
case APIC_TASKPRI:
report_tpr_access(apic, false);
/* fall thru */
default: default:
apic_update_ppr(apic); apic_update_ppr(apic);
val = apic_get_reg(apic, offset); val = apic_get_reg(apic, offset);
@ -670,6 +697,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
break; break;
case APIC_TASKPRI: case APIC_TASKPRI:
report_tpr_access(apic, true);
apic_set_tpr(apic, val & 0xff); apic_set_tpr(apic, val & 0xff);
break; break;
@ -762,19 +790,17 @@ static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
return ret; return ret;
} }
void kvm_free_apic(struct kvm_lapic *apic) void kvm_free_lapic(struct kvm_vcpu *vcpu)
{ {
if (!apic) if (!vcpu->arch.apic)
return; return;
hrtimer_cancel(&apic->timer.dev); hrtimer_cancel(&vcpu->arch.apic->timer.dev);
if (apic->regs_page) { if (vcpu->arch.apic->regs_page)
__free_page(apic->regs_page); __free_page(vcpu->arch.apic->regs_page);
apic->regs_page = 0;
}
kfree(apic); kfree(vcpu->arch.apic);
} }
/* /*
@ -785,16 +811,17 @@ void kvm_free_apic(struct kvm_lapic *apic)
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
{ {
struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
if (!apic) if (!apic)
return; return;
apic_set_tpr(apic, ((cr8 & 0x0f) << 4)); apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
| (apic_get_reg(apic, APIC_TASKPRI) & 4));
} }
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
{ {
struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
u64 tpr; u64 tpr;
if (!apic) if (!apic)
@ -807,29 +834,29 @@ EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
{ {
struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
if (!apic) { if (!apic) {
value |= MSR_IA32_APICBASE_BSP; value |= MSR_IA32_APICBASE_BSP;
vcpu->apic_base = value; vcpu->arch.apic_base = value;
return; return;
} }
if (apic->vcpu->vcpu_id) if (apic->vcpu->vcpu_id)
value &= ~MSR_IA32_APICBASE_BSP; value &= ~MSR_IA32_APICBASE_BSP;
vcpu->apic_base = value; vcpu->arch.apic_base = value;
apic->base_address = apic->vcpu->apic_base & apic->base_address = apic->vcpu->arch.apic_base &
MSR_IA32_APICBASE_BASE; MSR_IA32_APICBASE_BASE;
/* with FSB delivery interrupt, we can restart APIC functionality */ /* with FSB delivery interrupt, we can restart APIC functionality */
apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
"0x%lx.\n", apic->apic_base, apic->base_address); "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
} }
u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu) u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
{ {
return vcpu->apic_base; return vcpu->arch.apic_base;
} }
EXPORT_SYMBOL_GPL(kvm_lapic_get_base); EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
@ -841,7 +868,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_debug("%s\n", __FUNCTION__); apic_debug("%s\n", __FUNCTION__);
ASSERT(vcpu); ASSERT(vcpu);
apic = vcpu->apic; apic = vcpu->arch.apic;
ASSERT(apic != NULL); ASSERT(apic != NULL);
/* Stop the timer in case it's a reset to an active apic */ /* Stop the timer in case it's a reset to an active apic */
@ -872,19 +899,19 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
update_divide_count(apic); update_divide_count(apic);
atomic_set(&apic->timer.pending, 0); atomic_set(&apic->timer.pending, 0);
if (vcpu->vcpu_id == 0) if (vcpu->vcpu_id == 0)
vcpu->apic_base |= MSR_IA32_APICBASE_BSP; vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
apic_update_ppr(apic); apic_update_ppr(apic);
apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
"0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__, "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__,
vcpu, kvm_apic_id(apic), vcpu, kvm_apic_id(apic),
vcpu->apic_base, apic->base_address); vcpu->arch.apic_base, apic->base_address);
} }
EXPORT_SYMBOL_GPL(kvm_lapic_reset); EXPORT_SYMBOL_GPL(kvm_lapic_reset);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu) int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
{ {
struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
int ret = 0; int ret = 0;
if (!apic) if (!apic)
@ -908,9 +935,8 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
wait_queue_head_t *q = &apic->vcpu->wq; wait_queue_head_t *q = &apic->vcpu->wq;
atomic_inc(&apic->timer.pending); atomic_inc(&apic->timer.pending);
if (waitqueue_active(q)) if (waitqueue_active(q)) {
{ apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
apic->vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
wake_up_interruptible(q); wake_up_interruptible(q);
} }
if (apic_lvtt_period(apic)) { if (apic_lvtt_period(apic)) {
@ -956,13 +982,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
if (!apic) if (!apic)
goto nomem; goto nomem;
vcpu->apic = apic; vcpu->arch.apic = apic;
apic->regs_page = alloc_page(GFP_KERNEL); apic->regs_page = alloc_page(GFP_KERNEL);
if (apic->regs_page == NULL) { if (apic->regs_page == NULL) {
printk(KERN_ERR "malloc apic regs error for vcpu %x\n", printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
vcpu->vcpu_id); vcpu->vcpu_id);
goto nomem; goto nomem_free_apic;
} }
apic->regs = page_address(apic->regs_page); apic->regs = page_address(apic->regs_page);
memset(apic->regs, 0, PAGE_SIZE); memset(apic->regs, 0, PAGE_SIZE);
@ -971,7 +997,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
apic->timer.dev.function = apic_timer_fn; apic->timer.dev.function = apic_timer_fn;
apic->base_address = APIC_DEFAULT_PHYS_BASE; apic->base_address = APIC_DEFAULT_PHYS_BASE;
vcpu->apic_base = APIC_DEFAULT_PHYS_BASE; vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
kvm_lapic_reset(vcpu); kvm_lapic_reset(vcpu);
apic->dev.read = apic_mmio_read; apic->dev.read = apic_mmio_read;
@ -980,15 +1006,16 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
apic->dev.private = apic; apic->dev.private = apic;
return 0; return 0;
nomem_free_apic:
kfree(apic);
nomem: nomem:
kvm_free_apic(apic);
return -ENOMEM; return -ENOMEM;
} }
EXPORT_SYMBOL_GPL(kvm_create_lapic); EXPORT_SYMBOL_GPL(kvm_create_lapic);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
{ {
struct kvm_lapic *apic = vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
int highest_irr; int highest_irr;
if (!apic || !apic_enabled(apic)) if (!apic || !apic_enabled(apic))
@ -1004,11 +1031,11 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
{ {
u32 lvt0 = apic_get_reg(vcpu->apic, APIC_LVT0); u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
int r = 0; int r = 0;
if (vcpu->vcpu_id == 0) { if (vcpu->vcpu_id == 0) {
if (!apic_hw_enabled(vcpu->apic)) if (!apic_hw_enabled(vcpu->arch.apic))
r = 1; r = 1;
if ((lvt0 & APIC_LVT_MASKED) == 0 && if ((lvt0 & APIC_LVT_MASKED) == 0 &&
GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
@ -1019,7 +1046,7 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
{ {
struct kvm_lapic *apic = vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
if (apic && apic_lvt_enabled(apic, APIC_LVTT) && if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
atomic_read(&apic->timer.pending) > 0) { atomic_read(&apic->timer.pending) > 0) {
@ -1030,7 +1057,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec) void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{ {
struct kvm_lapic *apic = vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec) if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
apic->timer.last_update = ktime_add_ns( apic->timer.last_update = ktime_add_ns(
@ -1041,7 +1068,7 @@ void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
{ {
int vector = kvm_apic_has_interrupt(vcpu); int vector = kvm_apic_has_interrupt(vcpu);
struct kvm_lapic *apic = vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
if (vector == -1) if (vector == -1)
return -1; return -1;
@ -1054,9 +1081,9 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
{ {
struct kvm_lapic *apic = vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
apic->base_address = vcpu->apic_base & apic->base_address = vcpu->arch.apic_base &
MSR_IA32_APICBASE_BASE; MSR_IA32_APICBASE_BASE;
apic_set_reg(apic, APIC_LVR, APIC_VERSION); apic_set_reg(apic, APIC_LVR, APIC_VERSION);
apic_update_ppr(apic); apic_update_ppr(apic);
@ -1065,9 +1092,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
start_apic_timer(apic); start_apic_timer(apic);
} }
void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
{ {
struct kvm_lapic *apic = vcpu->apic; struct kvm_lapic *apic = vcpu->arch.apic;
struct hrtimer *timer; struct hrtimer *timer;
if (!apic) if (!apic)
@ -1077,4 +1104,51 @@ void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
if (hrtimer_cancel(timer)) if (hrtimer_cancel(timer))
hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
} }
EXPORT_SYMBOL_GPL(kvm_migrate_apic_timer);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
{
u32 data;
void *vapic;
if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
return;
vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
kunmap_atomic(vapic, KM_USER0);
apic_set_tpr(vcpu->arch.apic, data & 0xff);
}
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
{
u32 data, tpr;
int max_irr, max_isr;
struct kvm_lapic *apic;
void *vapic;
if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
return;
apic = vcpu->arch.apic;
tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
max_irr = apic_find_highest_irr(apic);
if (max_irr < 0)
max_irr = 0;
max_isr = apic_find_highest_isr(apic);
if (max_isr < 0)
max_isr = 0;
data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
*(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
kunmap_atomic(vapic, KM_USER0);
}
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
{
if (!irqchip_in_kernel(vcpu->kvm))
return;
vcpu->arch.apic->vapic_addr = vapic_addr;
}

50
arch/x86/kvm/lapic.h Normal file
View File

@ -0,0 +1,50 @@
#ifndef __KVM_X86_LAPIC_H
#define __KVM_X86_LAPIC_H
#include "iodev.h"
#include <linux/kvm_host.h>
struct kvm_lapic {
unsigned long base_address;
struct kvm_io_device dev;
struct {
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count;
ktime_t last_update;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu;
struct page *regs_page;
void *regs;
gpa_t vapic_addr;
struct page *vapic_page;
};
int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
#endif

1885
arch/x86/kvm/mmu.c Normal file

File diff suppressed because it is too large Load Diff

44
arch/x86/kvm/mmu.h Normal file
View File

@ -0,0 +1,44 @@
#ifndef __KVM_X86_MMU_H
#define __KVM_X86_MMU_H
#include <linux/kvm_host.h>
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
__kvm_mmu_free_some_pages(vcpu);
}
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
return 0;
return kvm_mmu_load(vcpu);
}
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
return vcpu->arch.shadow_efer & EFER_LME;
#else
return 0;
#endif
}
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr4 & X86_CR4_PAE;
}
static inline int is_pse(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr4 & X86_CR4_PSE;
}
static inline int is_paging(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr0 & X86_CR0_PG;
}
#endif

484
arch/x86/kvm/paging_tmpl.h Normal file
View File

@ -0,0 +1,484 @@
/*
* Kernel-based Virtual Machine driver for Linux
*
* This module enables machines with Intel VT-x extensions to run virtual
* machines without emulation or binary translation.
*
* MMU support
*
* Copyright (C) 2006 Qumranet, Inc.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
* Avi Kivity <avi@qumranet.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
/*
* We need the mmu code to access both 32-bit and 64-bit guest ptes,
* so the code in this file is compiled twice, once per pte size.
*/
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
#ifdef CONFIG_X86_64
#define PT_MAX_FULL_LEVELS 4
#define CMPXCHG cmpxchg
#else
#define CMPXCHG cmpxchg64
#define PT_MAX_FULL_LEVELS 2
#endif
#elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
#define FNAME(name) paging##32_##name
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
#define PT_LEVEL_BITS PT32_LEVEL_BITS
#define PT_MAX_FULL_LEVELS 2
#define CMPXCHG cmpxchg
#else
#error Invalid PTTYPE value
#endif
#define gpte_to_gfn FNAME(gpte_to_gfn)
#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)
/*
* The guest_walker structure emulates the behavior of the hardware page
* table walker.
*/
struct guest_walker {
int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t ptes[PT_MAX_FULL_LEVELS];
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
unsigned pt_access;
unsigned pte_access;
gfn_t gfn;
u32 error_code;
};
static gfn_t gpte_to_gfn(pt_element_t gpte)
{
return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
}
static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
{
return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
}
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
gfn_t table_gfn, unsigned index,
pt_element_t orig_pte, pt_element_t new_pte)
{
pt_element_t ret;
pt_element_t *table;
struct page *page;
page = gfn_to_page(kvm, table_gfn);
table = kmap_atomic(page, KM_USER0);
ret = CMPXCHG(&table[index], orig_pte, new_pte);
kunmap_atomic(table, KM_USER0);
kvm_release_page_dirty(page);
return (ret != orig_pte);
}
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
{
unsigned access;
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
if (is_nx(vcpu))
access &= ~(gpte >> PT64_NX_SHIFT);
#endif
return access;
}
/*
* Fetch a guest pte for a guest virtual address
*/
static int FNAME(walk_addr)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, gva_t addr,
int write_fault, int user_fault, int fetch_fault)
{
pt_element_t pte;
gfn_t table_gfn;
unsigned index, pt_access, pte_access;
gpa_t pte_gpa;
pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
walk:
walker->level = vcpu->arch.mmu.root_level;
pte = vcpu->arch.cr3;
#if PTTYPE == 64
if (!is_long_mode(vcpu)) {
pte = vcpu->arch.pdptrs[(addr >> 30) & 3];
if (!is_present_pte(pte))
goto not_present;
--walker->level;
}
#endif
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
(vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
pt_access = ACC_ALL;
for (;;) {
index = PT_INDEX(addr, walker->level);
table_gfn = gpte_to_gfn(pte);
pte_gpa = gfn_to_gpa(table_gfn);
pte_gpa += index * sizeof(pt_element_t);
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
walker->level - 1, table_gfn);
kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
if (!is_present_pte(pte))
goto not_present;
if (write_fault && !is_writeble_pte(pte))
if (user_fault || is_write_protection(vcpu))
goto access_error;
if (user_fault && !(pte & PT_USER_MASK))
goto access_error;
#if PTTYPE == 64
if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
goto access_error;
#endif
if (!(pte & PT_ACCESSED_MASK)) {
mark_page_dirty(vcpu->kvm, table_gfn);
if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
index, pte, pte|PT_ACCESSED_MASK))
goto walk;
pte |= PT_ACCESSED_MASK;
}
pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
walker->ptes[walker->level - 1] = pte;
if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = gpte_to_gfn(pte);
break;
}
if (walker->level == PT_DIRECTORY_LEVEL
&& (pte & PT_PAGE_SIZE_MASK)
&& (PTTYPE == 64 || is_pse(vcpu))) {
walker->gfn = gpte_to_gfn_pde(pte);
walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
if (PTTYPE == 32 && is_cpuid_PSE36())
walker->gfn += pse36_gfn_delta(pte);
break;
}
pt_access = pte_access;
--walker->level;
}
if (write_fault && !is_dirty_pte(pte)) {
bool ret;
mark_page_dirty(vcpu->kvm, table_gfn);
ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
pte|PT_DIRTY_MASK);
if (ret)
goto walk;
pte |= PT_DIRTY_MASK;
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
walker->ptes[walker->level - 1] = pte;
}
walker->pt_access = pt_access;
walker->pte_access = pte_access;
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
__FUNCTION__, (u64)pte, pt_access, pte_access);
return 1;
not_present:
walker->error_code = 0;
goto err;
access_error:
walker->error_code = PFERR_PRESENT_MASK;
err:
if (write_fault)
walker->error_code |= PFERR_WRITE_MASK;
if (user_fault)
walker->error_code |= PFERR_USER_MASK;
if (fetch_fault)
walker->error_code |= PFERR_FETCH_MASK;
return 0;
}
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
u64 *spte, const void *pte, int bytes,
int offset_in_pte)
{
pt_element_t gpte;
unsigned pte_access;
struct page *npage;
gpte = *(const pt_element_t *)pte;
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
if (!offset_in_pte && !is_present_pte(gpte))
set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
return;
}
if (bytes < sizeof(pt_element_t))
return;
pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
return;
npage = vcpu->arch.update_pte.page;
if (!npage)
return;
get_page(npage);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage);
}
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *walker,
int user_fault, int write_fault, int *ptwrite,
struct page *page)
{
hpa_t shadow_addr;
int level;
u64 *shadow_ent;
unsigned access = walker->pt_access;
if (!is_present_pte(walker->ptes[walker->level - 1]))
return NULL;
shadow_addr = vcpu->arch.mmu.root_hpa;
level = vcpu->arch.mmu.shadow_root_level;
if (level == PT32E_ROOT_LEVEL) {
shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
shadow_addr &= PT64_BASE_ADDR_MASK;
--level;
}
for (; ; level--) {
u32 index = SHADOW_PT_INDEX(addr, level);
struct kvm_mmu_page *shadow_page;
u64 shadow_pte;
int metaphysical;
gfn_t table_gfn;
bool new_page = 0;
shadow_ent = ((u64 *)__va(shadow_addr)) + index;
if (level == PT_PAGE_TABLE_LEVEL)
break;
if (is_shadow_present_pte(*shadow_ent)) {
shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
continue;
}
if (level - 1 == PT_PAGE_TABLE_LEVEL
&& walker->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1;
if (!is_dirty_pte(walker->ptes[level - 1]))
access &= ~ACC_WRITE_MASK;
table_gfn = gpte_to_gfn(walker->ptes[level - 1]);
} else {
metaphysical = 0;
table_gfn = walker->table_gfn[level - 2];
}
shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
metaphysical, access,
shadow_ent, &new_page);
if (new_page && !metaphysical) {
int r;
pt_element_t curr_pte;
r = kvm_read_guest_atomic(vcpu->kvm,
walker->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != walker->ptes[level - 2]) {
kvm_release_page_clean(page);
return NULL;
}
}
shadow_addr = __pa(shadow_page->spt);
shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
*shadow_ent = shadow_pte;
}
mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
user_fault, write_fault,
walker->ptes[walker->level-1] & PT_DIRTY_MASK,
ptwrite, walker->gfn, page);
return shadow_ent;
}
/*
* Page fault handler. There are several causes for a page fault:
* - there is no shadow pte for the guest pte
* - write access through a shadow pte marked read only so that we can set
* the dirty bit
* - write access to a shadow pte marked read only so we can update the page
* dirty bitmap, when userspace requests it
* - mmio access; in this case we will never install a present shadow pte
* - normal guest page fault due to the guest pte marked not present, not
* writable, or not executable
*
* Returns: 1 if we need to emulate the instruction, 0 otherwise, or
* a negative value on error.
*/
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
u32 error_code)
{
int write_fault = error_code & PFERR_WRITE_MASK;
int user_fault = error_code & PFERR_USER_MASK;
int fetch_fault = error_code & PFERR_FETCH_MASK;
struct guest_walker walker;
u64 *shadow_pte;
int write_pt = 0;
int r;
struct page *page;
pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
kvm_mmu_audit(vcpu, "pre page fault");
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
down_read(&current->mm->mmap_sem);
/*
* Look up the shadow pte for the faulting address.
*/
r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
fetch_fault);
/*
* The page is not mapped by the guest. Let the guest handle it.
*/
if (!r) {
pgprintk("%s: guest page fault\n", __FUNCTION__);
inject_page_fault(vcpu, addr, walker.error_code);
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
up_read(&current->mm->mmap_sem);
return 0;
}
page = gfn_to_page(vcpu->kvm, walker.gfn);
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
&write_pt, page);
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
shadow_pte, *shadow_pte, write_pt);
if (!write_pt)
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
/*
* mmio: emulate if accessible, otherwise its a guest fault.
*/
if (shadow_pte && is_io_pte(*shadow_pte)) {
spin_unlock(&vcpu->kvm->mmu_lock);
up_read(&current->mm->mmap_sem);
return 1;
}
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)");
spin_unlock(&vcpu->kvm->mmu_lock);
up_read(&current->mm->mmap_sem);
return write_pt;
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;
r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
if (r) {
gpa = gfn_to_gpa(walker.gfn);
gpa |= vaddr & ~PAGE_MASK;
}
return gpa;
}
static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
{
int i, offset = 0, r = 0;
pt_element_t pt;
if (sp->role.metaphysical
|| (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
nonpaging_prefetch_page(vcpu, sp);
return;
}
if (PTTYPE == 32)
offset = sp->role.quadrant << PT64_LEVEL_BITS;
for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
gpa_t pte_gpa = gfn_to_gpa(sp->gfn);
pte_gpa += (i+offset) * sizeof(pt_element_t);
r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &pt,
sizeof(pt_element_t));
if (r || is_present_pte(pt))
sp->spt[i] = shadow_trap_nonpresent_pte;
else
sp->spt[i] = shadow_notrap_nonpresent_pte;
}
}
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef SHADOW_PT_INDEX
#undef PT_LEVEL_MASK
#undef PT_DIR_BASE_ADDR_MASK
#undef PT_LEVEL_BITS
#undef PT_MAX_FULL_LEVELS
#undef gpte_to_gfn
#undef gpte_to_gfn_pde
#undef CMPXCHG

View File

@ -1,3 +1,6 @@
#ifndef __SEGMENT_DESCRIPTOR_H
#define __SEGMENT_DESCRIPTOR_H
struct segment_descriptor { struct segment_descriptor {
u16 limit_low; u16 limit_low;
u16 base_low; u16 base_low;
@ -14,4 +17,13 @@ struct segment_descriptor {
u8 base_high; u8 base_high;
} __attribute__((packed)); } __attribute__((packed));
#ifdef CONFIG_X86_64
/* LDT or TSS descriptor in the GDT. 16 bytes. */
struct segment_descriptor_64 {
struct segment_descriptor s;
u32 base_higher;
u32 pad_zero;
};
#endif
#endif

View File

@ -13,10 +13,11 @@
* the COPYING file in the top-level directory. * the COPYING file in the top-level directory.
* *
*/ */
#include <linux/kvm_host.h>
#include "kvm_svm.h" #include "kvm_svm.h"
#include "x86_emulate.h"
#include "irq.h" #include "irq.h"
#include "mmu.h"
#include <linux/module.h> #include <linux/module.h>
#include <linux/kernel.h> #include <linux/kernel.h>
@ -42,9 +43,6 @@ MODULE_LICENSE("GPL");
#define SEG_TYPE_LDT 2 #define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3 #define SEG_TYPE_BUSY_TSS16 3
#define KVM_EFER_LMA (1 << 10)
#define KVM_EFER_LME (1 << 8)
#define SVM_FEATURE_NPT (1 << 0) #define SVM_FEATURE_NPT (1 << 0)
#define SVM_FEATURE_LBRV (1 << 1) #define SVM_FEATURE_LBRV (1 << 1)
#define SVM_DEATURE_SVML (1 << 2) #define SVM_DEATURE_SVML (1 << 2)
@ -102,20 +100,20 @@ static inline u32 svm_has(u32 feat)
static inline u8 pop_irq(struct kvm_vcpu *vcpu) static inline u8 pop_irq(struct kvm_vcpu *vcpu)
{ {
int word_index = __ffs(vcpu->irq_summary); int word_index = __ffs(vcpu->arch.irq_summary);
int bit_index = __ffs(vcpu->irq_pending[word_index]); int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
int irq = word_index * BITS_PER_LONG + bit_index; int irq = word_index * BITS_PER_LONG + bit_index;
clear_bit(bit_index, &vcpu->irq_pending[word_index]); clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
if (!vcpu->irq_pending[word_index]) if (!vcpu->arch.irq_pending[word_index])
clear_bit(word_index, &vcpu->irq_summary); clear_bit(word_index, &vcpu->arch.irq_summary);
return irq; return irq;
} }
static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq) static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
{ {
set_bit(irq, vcpu->irq_pending); set_bit(irq, vcpu->arch.irq_pending);
set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
} }
static inline void clgi(void) static inline void clgi(void)
@ -184,35 +182,30 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{ {
if (!(efer & KVM_EFER_LMA)) if (!(efer & EFER_LMA))
efer &= ~KVM_EFER_LME; efer &= ~EFER_LME;
to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
vcpu->shadow_efer = efer; vcpu->arch.shadow_efer = efer;
} }
static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | svm->vmcb->control.event_inj = nr
SVM_EVTINJ_VALID_ERR | | SVM_EVTINJ_VALID
SVM_EVTINJ_TYPE_EXEPT | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
GP_VECTOR; | SVM_EVTINJ_TYPE_EXEPT;
svm->vmcb->control.event_inj_err = error_code; svm->vmcb->control.event_inj_err = error_code;
} }
static void inject_ud(struct kvm_vcpu *vcpu) static bool svm_exception_injected(struct kvm_vcpu *vcpu)
{ {
to_svm(vcpu)->vmcb->control.event_inj = SVM_EVTINJ_VALID | struct vcpu_svm *svm = to_svm(vcpu);
SVM_EVTINJ_TYPE_EXEPT |
UD_VECTOR;
}
static int is_page_fault(uint32_t info) return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
{
info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
return info == (PF_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT);
} }
static int is_external_interrupt(u32 info) static int is_external_interrupt(u32 info)
@ -229,17 +222,16 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
return; return;
} }
if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) { if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
__FUNCTION__, __FUNCTION__,
svm->vmcb->save.rip, svm->vmcb->save.rip,
svm->next_rip); svm->next_rip);
}
vcpu->rip = svm->vmcb->save.rip = svm->next_rip; vcpu->arch.rip = svm->vmcb->save.rip = svm->next_rip;
svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
vcpu->interrupt_window_open = 1; vcpu->arch.interrupt_window_open = 1;
} }
static int has_svm(void) static int has_svm(void)
@ -312,7 +304,7 @@ static void svm_hardware_enable(void *garbage)
svm_data->next_asid = svm_data->max_asid + 1; svm_data->next_asid = svm_data->max_asid + 1;
svm_features = cpuid_edx(SVM_CPUID_FUNC); svm_features = cpuid_edx(SVM_CPUID_FUNC);
asm volatile ( "sgdt %0" : "=m"(gdt_descr) ); asm volatile ("sgdt %0" : "=m"(gdt_descr));
gdt = (struct desc_struct *)gdt_descr.address; gdt = (struct desc_struct *)gdt_descr.address;
svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
@ -458,11 +450,13 @@ static void init_vmcb(struct vmcb *vmcb)
control->intercept_cr_read = INTERCEPT_CR0_MASK | control->intercept_cr_read = INTERCEPT_CR0_MASK |
INTERCEPT_CR3_MASK | INTERCEPT_CR3_MASK |
INTERCEPT_CR4_MASK; INTERCEPT_CR4_MASK |
INTERCEPT_CR8_MASK;
control->intercept_cr_write = INTERCEPT_CR0_MASK | control->intercept_cr_write = INTERCEPT_CR0_MASK |
INTERCEPT_CR3_MASK | INTERCEPT_CR3_MASK |
INTERCEPT_CR4_MASK; INTERCEPT_CR4_MASK |
INTERCEPT_CR8_MASK;
control->intercept_dr_read = INTERCEPT_DR0_MASK | control->intercept_dr_read = INTERCEPT_DR0_MASK |
INTERCEPT_DR1_MASK | INTERCEPT_DR1_MASK |
@ -476,7 +470,8 @@ static void init_vmcb(struct vmcb *vmcb)
INTERCEPT_DR5_MASK | INTERCEPT_DR5_MASK |
INTERCEPT_DR7_MASK; INTERCEPT_DR7_MASK;
control->intercept_exceptions = 1 << PF_VECTOR; control->intercept_exceptions = (1 << PF_VECTOR) |
(1 << UD_VECTOR);
control->intercept = (1ULL << INTERCEPT_INTR) | control->intercept = (1ULL << INTERCEPT_INTR) |
@ -543,8 +538,7 @@ static void init_vmcb(struct vmcb *vmcb)
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
save->efer = MSR_EFER_SVME_MASK; save->efer = MSR_EFER_SVME_MASK;
save->dr6 = 0xffff0ff0;
save->dr6 = 0xffff0ff0;
save->dr7 = 0x400; save->dr7 = 0x400;
save->rflags = 2; save->rflags = 2;
save->rip = 0x0000fff0; save->rip = 0x0000fff0;
@ -558,7 +552,7 @@ static void init_vmcb(struct vmcb *vmcb)
/* rdx = ?? */ /* rdx = ?? */
} }
static void svm_vcpu_reset(struct kvm_vcpu *vcpu) static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
@ -566,9 +560,11 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
if (vcpu->vcpu_id != 0) { if (vcpu->vcpu_id != 0) {
svm->vmcb->save.rip = 0; svm->vmcb->save.rip = 0;
svm->vmcb->save.cs.base = svm->vcpu.sipi_vector << 12; svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
svm->vmcb->save.cs.selector = svm->vcpu.sipi_vector << 8; svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
} }
return 0;
} }
static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
@ -587,12 +583,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
if (err) if (err)
goto free_svm; goto free_svm;
if (irqchip_in_kernel(kvm)) {
err = kvm_create_lapic(&svm->vcpu);
if (err < 0)
goto free_svm;
}
page = alloc_page(GFP_KERNEL); page = alloc_page(GFP_KERNEL);
if (!page) { if (!page) {
err = -ENOMEM; err = -ENOMEM;
@ -608,9 +598,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
fx_init(&svm->vcpu); fx_init(&svm->vcpu);
svm->vcpu.fpu_active = 1; svm->vcpu.fpu_active = 1;
svm->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (svm->vcpu.vcpu_id == 0) if (svm->vcpu.vcpu_id == 0)
svm->vcpu.apic_base |= MSR_IA32_APICBASE_BSP; svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
return &svm->vcpu; return &svm->vcpu;
@ -644,7 +634,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
* increasing TSC. * increasing TSC.
*/ */
rdtscll(tsc_this); rdtscll(tsc_this);
delta = vcpu->host_tsc - tsc_this; delta = vcpu->arch.host_tsc - tsc_this;
svm->vmcb->control.tsc_offset += delta; svm->vmcb->control.tsc_offset += delta;
vcpu->cpu = cpu; vcpu->cpu = cpu;
kvm_migrate_apic_timer(vcpu); kvm_migrate_apic_timer(vcpu);
@ -659,11 +649,11 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
int i; int i;
++vcpu->stat.host_state_reload;
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
rdtscll(vcpu->host_tsc); rdtscll(vcpu->arch.host_tsc);
kvm_put_guest_fpu(vcpu);
} }
static void svm_vcpu_decache(struct kvm_vcpu *vcpu) static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
@ -674,17 +664,17 @@ static void svm_cache_regs(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
vcpu->regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->rip = svm->vmcb->save.rip; vcpu->arch.rip = svm->vmcb->save.rip;
} }
static void svm_decache_regs(struct kvm_vcpu *vcpu) static void svm_decache_regs(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX]; svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->rip; svm->vmcb->save.rip = vcpu->arch.rip;
} }
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@ -782,24 +772,24 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
if (vcpu->shadow_efer & KVM_EFER_LME) { if (vcpu->arch.shadow_efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
vcpu->shadow_efer |= KVM_EFER_LMA; vcpu->arch.shadow_efer |= EFER_LMA;
svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME; svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
} }
if (is_paging(vcpu) && !(cr0 & X86_CR0_PG) ) { if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
vcpu->shadow_efer &= ~KVM_EFER_LMA; vcpu->arch.shadow_efer &= ~EFER_LMA;
svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME); svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
} }
} }
#endif #endif
if ((vcpu->cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
vcpu->fpu_active = 1; vcpu->fpu_active = 1;
} }
vcpu->cr0 = cr0; vcpu->arch.cr0 = cr0;
cr0 |= X86_CR0_PG | X86_CR0_WP; cr0 |= X86_CR0_PG | X86_CR0_WP;
cr0 &= ~(X86_CR0_CD | X86_CR0_NW); cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0; svm->vmcb->save.cr0 = cr0;
@ -807,7 +797,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{ {
vcpu->cr4 = cr4; vcpu->arch.cr4 = cr4;
to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE; to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
} }
@ -912,7 +902,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
svm->db_regs[dr] = value; svm->db_regs[dr] = value;
return; return;
case 4 ... 5: case 4 ... 5:
if (vcpu->cr4 & X86_CR4_DE) { if (vcpu->arch.cr4 & X86_CR4_DE) {
*exception = UD_VECTOR; *exception = UD_VECTOR;
return; return;
} }
@ -938,51 +928,30 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
struct kvm *kvm = svm->vcpu.kvm; struct kvm *kvm = svm->vcpu.kvm;
u64 fault_address; u64 fault_address;
u32 error_code; u32 error_code;
enum emulation_result er;
int r;
if (!irqchip_in_kernel(kvm) && if (!irqchip_in_kernel(kvm) &&
is_external_interrupt(exit_int_info)) is_external_interrupt(exit_int_info))
push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
mutex_lock(&kvm->lock);
fault_address = svm->vmcb->control.exit_info_2; fault_address = svm->vmcb->control.exit_info_2;
error_code = svm->vmcb->control.exit_info_1; error_code = svm->vmcb->control.exit_info_1;
r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
if (r < 0) { }
mutex_unlock(&kvm->lock);
return r;
}
if (!r) {
mutex_unlock(&kvm->lock);
return 1;
}
er = emulate_instruction(&svm->vcpu, kvm_run, fault_address,
error_code);
mutex_unlock(&kvm->lock);
switch (er) { static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
case EMULATE_DONE: {
return 1; int er;
case EMULATE_DO_MMIO:
++svm->vcpu.stat.mmio_exits;
return 0;
case EMULATE_FAIL:
kvm_report_emulation_failure(&svm->vcpu, "pagetable");
break;
default:
BUG();
}
kvm_run->exit_reason = KVM_EXIT_UNKNOWN; er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
return 0; if (er != EMULATE_DONE)
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
} }
static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{ {
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
if (!(svm->vcpu.cr0 & X86_CR0_TS)) if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
svm->vmcb->save.cr0 &= ~X86_CR0_TS; svm->vmcb->save.cr0 &= ~X86_CR0_TS;
svm->vcpu.fpu_active = 1; svm->vcpu.fpu_active = 1;
@ -1004,7 +973,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{ {
u32 io_info = svm->vmcb->control.exit_info_1; //address size bug? u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
int size, down, in, string, rep; int size, down, in, string, rep;
unsigned port; unsigned port;
@ -1015,7 +984,8 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
string = (io_info & SVM_IOIO_STR_MASK) != 0; string = (io_info & SVM_IOIO_STR_MASK) != 0;
if (string) { if (string) {
if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO) if (emulate_instruction(&svm->vcpu,
kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
return 0; return 0;
return 1; return 1;
} }
@ -1045,13 +1015,14 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{ {
svm->next_rip = svm->vmcb->save.rip + 3; svm->next_rip = svm->vmcb->save.rip + 3;
skip_emulated_instruction(&svm->vcpu); skip_emulated_instruction(&svm->vcpu);
return kvm_hypercall(&svm->vcpu, kvm_run); kvm_emulate_hypercall(&svm->vcpu);
return 1;
} }
static int invalid_op_interception(struct vcpu_svm *svm, static int invalid_op_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run) struct kvm_run *kvm_run)
{ {
inject_ud(&svm->vcpu); kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1; return 1;
} }
@ -1073,11 +1044,20 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
static int emulate_on_interception(struct vcpu_svm *svm, static int emulate_on_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run) struct kvm_run *kvm_run)
{ {
if (emulate_instruction(&svm->vcpu, NULL, 0, 0) != EMULATE_DONE) if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__); pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__);
return 1; return 1;
} }
static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
if (irqchip_in_kernel(svm->vcpu.kvm))
return 1;
kvm_run->exit_reason = KVM_EXIT_SET_TPR;
return 0;
}
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
@ -1124,14 +1104,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{ {
u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX]; u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
u64 data; u64 data;
if (svm_get_msr(&svm->vcpu, ecx, &data)) if (svm_get_msr(&svm->vcpu, ecx, &data))
svm_inject_gp(&svm->vcpu, 0); kvm_inject_gp(&svm->vcpu, 0);
else { else {
svm->vmcb->save.rax = data & 0xffffffff; svm->vmcb->save.rax = data & 0xffffffff;
svm->vcpu.regs[VCPU_REGS_RDX] = data >> 32; svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
svm->next_rip = svm->vmcb->save.rip + 2; svm->next_rip = svm->vmcb->save.rip + 2;
skip_emulated_instruction(&svm->vcpu); skip_emulated_instruction(&svm->vcpu);
} }
@ -1176,7 +1156,20 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
case MSR_IA32_SYSENTER_ESP: case MSR_IA32_SYSENTER_ESP:
svm->vmcb->save.sysenter_esp = data; svm->vmcb->save.sysenter_esp = data;
break; break;
case MSR_K7_EVNTSEL0:
case MSR_K7_EVNTSEL1:
case MSR_K7_EVNTSEL2:
case MSR_K7_EVNTSEL3:
/*
* only support writing 0 to the performance counters for now
* to make Windows happy. Should be replaced by a real
* performance counter emulation later.
*/
if (data != 0)
goto unhandled;
break;
default: default:
unhandled:
return kvm_set_msr_common(vcpu, ecx, data); return kvm_set_msr_common(vcpu, ecx, data);
} }
return 0; return 0;
@ -1184,12 +1177,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{ {
u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX]; u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
u64 data = (svm->vmcb->save.rax & -1u) u64 data = (svm->vmcb->save.rax & -1u)
| ((u64)(svm->vcpu.regs[VCPU_REGS_RDX] & -1u) << 32); | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
svm->next_rip = svm->vmcb->save.rip + 2; svm->next_rip = svm->vmcb->save.rip + 2;
if (svm_set_msr(&svm->vcpu, ecx, data)) if (svm_set_msr(&svm->vcpu, ecx, data))
svm_inject_gp(&svm->vcpu, 0); kvm_inject_gp(&svm->vcpu, 0);
else else
skip_emulated_instruction(&svm->vcpu); skip_emulated_instruction(&svm->vcpu);
return 1; return 1;
@ -1213,7 +1206,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
* possible * possible
*/ */
if (kvm_run->request_interrupt_window && if (kvm_run->request_interrupt_window &&
!svm->vcpu.irq_summary) { !svm->vcpu.arch.irq_summary) {
++svm->vcpu.stat.irq_window_exits; ++svm->vcpu.stat.irq_window_exits;
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
return 0; return 0;
@ -1227,10 +1220,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_READ_CR0] = emulate_on_interception, [SVM_EXIT_READ_CR0] = emulate_on_interception,
[SVM_EXIT_READ_CR3] = emulate_on_interception, [SVM_EXIT_READ_CR3] = emulate_on_interception,
[SVM_EXIT_READ_CR4] = emulate_on_interception, [SVM_EXIT_READ_CR4] = emulate_on_interception,
[SVM_EXIT_READ_CR8] = emulate_on_interception,
/* for now: */ /* for now: */
[SVM_EXIT_WRITE_CR0] = emulate_on_interception, [SVM_EXIT_WRITE_CR0] = emulate_on_interception,
[SVM_EXIT_WRITE_CR3] = emulate_on_interception, [SVM_EXIT_WRITE_CR3] = emulate_on_interception,
[SVM_EXIT_WRITE_CR4] = emulate_on_interception, [SVM_EXIT_WRITE_CR4] = emulate_on_interception,
[SVM_EXIT_WRITE_CR8] = cr8_write_interception,
[SVM_EXIT_READ_DR0] = emulate_on_interception, [SVM_EXIT_READ_DR0] = emulate_on_interception,
[SVM_EXIT_READ_DR1] = emulate_on_interception, [SVM_EXIT_READ_DR1] = emulate_on_interception,
[SVM_EXIT_READ_DR2] = emulate_on_interception, [SVM_EXIT_READ_DR2] = emulate_on_interception,
@ -1241,6 +1236,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_WRITE_DR3] = emulate_on_interception, [SVM_EXIT_WRITE_DR3] = emulate_on_interception,
[SVM_EXIT_WRITE_DR5] = emulate_on_interception, [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
[SVM_EXIT_WRITE_DR7] = emulate_on_interception, [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
[SVM_EXIT_INTR] = nop_on_interception, [SVM_EXIT_INTR] = nop_on_interception,
@ -1293,7 +1289,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
exit_code); exit_code);
if (exit_code >= ARRAY_SIZE(svm_exit_handlers) if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| svm_exit_handlers[exit_code] == 0) { || !svm_exit_handlers[exit_code]) {
kvm_run->exit_reason = KVM_EXIT_UNKNOWN; kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
kvm_run->hw.hardware_exit_reason = exit_code; kvm_run->hw.hardware_exit_reason = exit_code;
return 0; return 0;
@ -1307,7 +1303,7 @@ static void reload_tss(struct kvm_vcpu *vcpu)
int cpu = raw_smp_processor_id(); int cpu = raw_smp_processor_id();
struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
svm_data->tss_desc->type = 9; //available 32/64-bit TSS svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */
load_TR_desc(); load_TR_desc();
} }
@ -1348,7 +1344,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
struct vmcb *vmcb = svm->vmcb; struct vmcb *vmcb = svm->vmcb;
int intr_vector = -1; int intr_vector = -1;
kvm_inject_pending_timer_irqs(vcpu);
if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) && if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) { ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
intr_vector = vmcb->control.exit_int_info & intr_vector = vmcb->control.exit_int_info &
@ -1388,20 +1383,20 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
push_irq(&svm->vcpu, control->int_vector); push_irq(&svm->vcpu, control->int_vector);
} }
svm->vcpu.interrupt_window_open = svm->vcpu.arch.interrupt_window_open =
!(control->int_state & SVM_INTERRUPT_SHADOW_MASK); !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
} }
static void svm_do_inject_vector(struct vcpu_svm *svm) static void svm_do_inject_vector(struct vcpu_svm *svm)
{ {
struct kvm_vcpu *vcpu = &svm->vcpu; struct kvm_vcpu *vcpu = &svm->vcpu;
int word_index = __ffs(vcpu->irq_summary); int word_index = __ffs(vcpu->arch.irq_summary);
int bit_index = __ffs(vcpu->irq_pending[word_index]); int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
int irq = word_index * BITS_PER_LONG + bit_index; int irq = word_index * BITS_PER_LONG + bit_index;
clear_bit(bit_index, &vcpu->irq_pending[word_index]); clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
if (!vcpu->irq_pending[word_index]) if (!vcpu->arch.irq_pending[word_index])
clear_bit(word_index, &vcpu->irq_summary); clear_bit(word_index, &vcpu->arch.irq_summary);
svm_inject_irq(svm, irq); svm_inject_irq(svm, irq);
} }
@ -1411,11 +1406,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_control_area *control = &svm->vmcb->control;
svm->vcpu.interrupt_window_open = svm->vcpu.arch.interrupt_window_open =
(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
(svm->vmcb->save.rflags & X86_EFLAGS_IF)); (svm->vmcb->save.rflags & X86_EFLAGS_IF));
if (svm->vcpu.interrupt_window_open && svm->vcpu.irq_summary) if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
/* /*
* If interrupts enabled, and not blocked by sti or mov ss. Good. * If interrupts enabled, and not blocked by sti or mov ss. Good.
*/ */
@ -1424,13 +1419,18 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
/* /*
* Interrupts blocked. Wait for unblock. * Interrupts blocked. Wait for unblock.
*/ */
if (!svm->vcpu.interrupt_window_open && if (!svm->vcpu.arch.interrupt_window_open &&
(svm->vcpu.irq_summary || kvm_run->request_interrupt_window)) { (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
control->intercept |= 1ULL << INTERCEPT_VINTR; control->intercept |= 1ULL << INTERCEPT_VINTR;
} else else
control->intercept &= ~(1ULL << INTERCEPT_VINTR); control->intercept &= ~(1ULL << INTERCEPT_VINTR);
} }
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
return 0;
}
static void save_db_regs(unsigned long *db_regs) static void save_db_regs(unsigned long *db_regs)
{ {
asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0])); asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
@ -1472,7 +1472,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
svm->host_cr2 = kvm_read_cr2(); svm->host_cr2 = kvm_read_cr2();
svm->host_dr6 = read_dr6(); svm->host_dr6 = read_dr6();
svm->host_dr7 = read_dr7(); svm->host_dr7 = read_dr7();
svm->vmcb->save.cr2 = vcpu->cr2; svm->vmcb->save.cr2 = vcpu->arch.cr2;
if (svm->vmcb->save.dr7 & 0xff) { if (svm->vmcb->save.dr7 & 0xff) {
write_dr7(0); write_dr7(0);
@ -1486,13 +1486,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
asm volatile ( asm volatile (
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
"push %%rbx; push %%rcx; push %%rdx;" "push %%rbp; \n\t"
"push %%rsi; push %%rdi; push %%rbp;"
"push %%r8; push %%r9; push %%r10; push %%r11;"
"push %%r12; push %%r13; push %%r14; push %%r15;"
#else #else
"push %%ebx; push %%ecx; push %%edx;" "push %%ebp; \n\t"
"push %%esi; push %%edi; push %%ebp;"
#endif #endif
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
@ -1554,10 +1550,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
"mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t"
"pop %%r15; pop %%r14; pop %%r13; pop %%r12;" "pop %%rbp; \n\t"
"pop %%r11; pop %%r10; pop %%r9; pop %%r8;"
"pop %%rbp; pop %%rdi; pop %%rsi;"
"pop %%rdx; pop %%rcx; pop %%rbx; \n\t"
#else #else
"mov %%ebx, %c[rbx](%[svm]) \n\t" "mov %%ebx, %c[rbx](%[svm]) \n\t"
"mov %%ecx, %c[rcx](%[svm]) \n\t" "mov %%ecx, %c[rcx](%[svm]) \n\t"
@ -1566,34 +1559,40 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
"mov %%edi, %c[rdi](%[svm]) \n\t" "mov %%edi, %c[rdi](%[svm]) \n\t"
"mov %%ebp, %c[rbp](%[svm]) \n\t" "mov %%ebp, %c[rbp](%[svm]) \n\t"
"pop %%ebp; pop %%edi; pop %%esi;" "pop %%ebp; \n\t"
"pop %%edx; pop %%ecx; pop %%ebx; \n\t"
#endif #endif
: :
: [svm]"a"(svm), : [svm]"a"(svm),
[vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
[rbx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBX])), [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
[rcx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RCX])), [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
[rdx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDX])), [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
[rsi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RSI])), [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
[rdi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDI])), [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
[rbp]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBP])) [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
,[r8 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R8])), , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
[r9 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R9 ])), [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
[r10]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R10])), [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
[r11]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R11])), [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
[r12]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R12])), [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
[r13]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R13])), [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
[r14]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R14])), [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
[r15]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R15])) [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
#endif #endif
: "cc", "memory" ); : "cc", "memory"
#ifdef CONFIG_X86_64
, "rbx", "rcx", "rdx", "rsi", "rdi"
, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
#else
, "ebx", "ecx", "edx" , "esi", "edi"
#endif
);
if ((svm->vmcb->save.dr7 & 0xff)) if ((svm->vmcb->save.dr7 & 0xff))
load_db_regs(svm->host_db_regs); load_db_regs(svm->host_db_regs);
vcpu->cr2 = svm->vmcb->save.cr2; vcpu->arch.cr2 = svm->vmcb->save.cr2;
write_dr6(svm->host_dr6); write_dr6(svm->host_dr6);
write_dr7(svm->host_dr7); write_dr7(svm->host_dr7);
@ -1627,34 +1626,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
} }
} }
static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
unsigned long addr,
uint32_t err_code)
{
struct vcpu_svm *svm = to_svm(vcpu);
uint32_t exit_int_info = svm->vmcb->control.exit_int_info;
++vcpu->stat.pf_guest;
if (is_page_fault(exit_int_info)) {
svm->vmcb->control.event_inj_err = 0;
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
SVM_EVTINJ_VALID_ERR |
SVM_EVTINJ_TYPE_EXEPT |
DF_VECTOR;
return;
}
vcpu->cr2 = addr;
svm->vmcb->save.cr2 = addr;
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
SVM_EVTINJ_VALID_ERR |
SVM_EVTINJ_TYPE_EXEPT |
PF_VECTOR;
svm->vmcb->control.event_inj_err = err_code;
}
static int is_disabled(void) static int is_disabled(void)
{ {
u64 vm_cr; u64 vm_cr;
@ -1675,7 +1646,6 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[0] = 0x0f; hypercall[0] = 0x0f;
hypercall[1] = 0x01; hypercall[1] = 0x01;
hypercall[2] = 0xd9; hypercall[2] = 0xd9;
hypercall[3] = 0xc3;
} }
static void svm_check_processor_compat(void *rtn) static void svm_check_processor_compat(void *rtn)
@ -1683,6 +1653,11 @@ static void svm_check_processor_compat(void *rtn)
*(int *)rtn = 0; *(int *)rtn = 0;
} }
static bool svm_cpu_has_accelerated_tpr(void)
{
return false;
}
static struct kvm_x86_ops svm_x86_ops = { static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm, .cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled, .disabled_by_bios = is_disabled,
@ -1691,6 +1666,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.check_processor_compatibility = svm_check_processor_compat, .check_processor_compatibility = svm_check_processor_compat,
.hardware_enable = svm_hardware_enable, .hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable, .hardware_disable = svm_hardware_disable,
.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
.vcpu_create = svm_create_vcpu, .vcpu_create = svm_create_vcpu,
.vcpu_free = svm_free_vcpu, .vcpu_free = svm_free_vcpu,
@ -1725,9 +1701,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_rflags = svm_set_rflags, .set_rflags = svm_set_rflags,
.tlb_flush = svm_flush_tlb, .tlb_flush = svm_flush_tlb,
.inject_page_fault = svm_inject_page_fault,
.inject_gp = svm_inject_gp,
.run = svm_vcpu_run, .run = svm_vcpu_run,
.handle_exit = handle_exit, .handle_exit = handle_exit,
@ -1735,19 +1708,23 @@ static struct kvm_x86_ops svm_x86_ops = {
.patch_hypercall = svm_patch_hypercall, .patch_hypercall = svm_patch_hypercall,
.get_irq = svm_get_irq, .get_irq = svm_get_irq,
.set_irq = svm_set_irq, .set_irq = svm_set_irq,
.queue_exception = svm_queue_exception,
.exception_injected = svm_exception_injected,
.inject_pending_irq = svm_intr_assist, .inject_pending_irq = svm_intr_assist,
.inject_pending_vectors = do_interrupt_requests, .inject_pending_vectors = do_interrupt_requests,
.set_tss_addr = svm_set_tss_addr,
}; };
static int __init svm_init(void) static int __init svm_init(void)
{ {
return kvm_init_x86(&svm_x86_ops, sizeof(struct vcpu_svm), return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
THIS_MODULE); THIS_MODULE);
} }
static void __exit svm_exit(void) static void __exit svm_exit(void)
{ {
kvm_exit_x86(); kvm_exit();
} }
module_init(svm_init) module_init(svm_init)

View File

@ -204,6 +204,7 @@ struct __attribute__ ((__packed__)) vmcb {
#define INTERCEPT_CR0_MASK 1 #define INTERCEPT_CR0_MASK 1
#define INTERCEPT_CR3_MASK (1 << 3) #define INTERCEPT_CR3_MASK (1 << 3)
#define INTERCEPT_CR4_MASK (1 << 4) #define INTERCEPT_CR4_MASK (1 << 4)
#define INTERCEPT_CR8_MASK (1 << 8)
#define INTERCEPT_DR0_MASK 1 #define INTERCEPT_DR0_MASK 1
#define INTERCEPT_DR1_MASK (1 << 1) #define INTERCEPT_DR1_MASK (1 << 1)
@ -311,7 +312,7 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EXIT_ERR -1 #define SVM_EXIT_ERR -1
#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) // TS and MP #define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"

File diff suppressed because it is too large Load Diff

View File

@ -25,6 +25,9 @@
* *
*/ */
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
*/
#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 #define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 #define CPU_BASED_USE_TSC_OFFSETING 0x00000008
#define CPU_BASED_HLT_EXITING 0x00000080 #define CPU_BASED_HLT_EXITING 0x00000080
@ -42,6 +45,12 @@
#define CPU_BASED_MONITOR_EXITING 0x20000000 #define CPU_BASED_MONITOR_EXITING 0x20000000
#define CPU_BASED_PAUSE_EXITING 0x40000000 #define CPU_BASED_PAUSE_EXITING 0x40000000
#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
/*
* Definitions of Secondary Processor-Based VM-Execution Controls.
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_EXT_INTR_MASK 0x00000001
#define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_NMI_EXITING 0x00000008
@ -54,8 +63,6 @@
#define VM_ENTRY_SMM 0x00000400 #define VM_ENTRY_SMM 0x00000400
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
/* VMCS Encodings */ /* VMCS Encodings */
enum vmcs_field { enum vmcs_field {
GUEST_ES_SELECTOR = 0x00000800, GUEST_ES_SELECTOR = 0x00000800,
@ -89,6 +96,8 @@ enum vmcs_field {
TSC_OFFSET_HIGH = 0x00002011, TSC_OFFSET_HIGH = 0x00002011,
VIRTUAL_APIC_PAGE_ADDR = 0x00002012, VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
APIC_ACCESS_ADDR = 0x00002014,
APIC_ACCESS_ADDR_HIGH = 0x00002015,
VMCS_LINK_POINTER = 0x00002800, VMCS_LINK_POINTER = 0x00002800,
VMCS_LINK_POINTER_HIGH = 0x00002801, VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802, GUEST_IA32_DEBUGCTL = 0x00002802,
@ -214,6 +223,8 @@ enum vmcs_field {
#define EXIT_REASON_MSR_WRITE 32 #define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36 #define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_WBINVD 54
/* /*
* Interruption-information format * Interruption-information format
@ -230,13 +241,14 @@ enum vmcs_field {
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
/* /*
* Exit Qualifications for MOV for Control Register Access * Exit Qualifications for MOV for Control Register Access
*/ */
#define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control register */ #define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control reg.*/
#define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */ #define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */
#define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose register */ #define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose reg. */
#define LMSW_SOURCE_DATA_SHIFT 16 #define LMSW_SOURCE_DATA_SHIFT 16
#define LMSW_SOURCE_DATA (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */ #define LMSW_SOURCE_DATA (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
#define REG_EAX (0 << 8) #define REG_EAX (0 << 8)
@ -259,11 +271,11 @@ enum vmcs_field {
/* /*
* Exit Qualifications for MOV for Debug Register Access * Exit Qualifications for MOV for Debug Register Access
*/ */
#define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug register */ #define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug reg. */
#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ #define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
#define TYPE_MOV_TO_DR (0 << 4) #define TYPE_MOV_TO_DR (0 << 4)
#define TYPE_MOV_FROM_DR (1 << 4) #define TYPE_MOV_FROM_DR (1 << 4)
#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose register */ #define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */
/* segment AR */ /* segment AR */
@ -307,4 +319,6 @@ enum vmcs_field {
#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
#endif #endif

File diff suppressed because it is too large Load Diff

1912
arch/x86/kvm/x86_emulate.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -90,8 +90,6 @@ source "drivers/dca/Kconfig"
source "drivers/auxdisplay/Kconfig" source "drivers/auxdisplay/Kconfig"
source "drivers/kvm/Kconfig"
source "drivers/uio/Kconfig" source "drivers/uio/Kconfig"
source "drivers/virtio/Kconfig" source "drivers/virtio/Kconfig"

View File

@ -47,7 +47,6 @@ obj-$(CONFIG_SPI) += spi/
obj-$(CONFIG_PCCARD) += pcmcia/ obj-$(CONFIG_PCCARD) += pcmcia/
obj-$(CONFIG_DIO) += dio/ obj-$(CONFIG_DIO) += dio/
obj-$(CONFIG_SBUS) += sbus/ obj-$(CONFIG_SBUS) += sbus/
obj-$(CONFIG_KVM) += kvm/
obj-$(CONFIG_ZORRO) += zorro/ obj-$(CONFIG_ZORRO) += zorro/
obj-$(CONFIG_MAC) += macintosh/ obj-$(CONFIG_MAC) += macintosh/
obj-$(CONFIG_ATA_OVER_ETH) += block/aoe/ obj-$(CONFIG_ATA_OVER_ETH) += block/aoe/

View File

@ -1,165 +0,0 @@
/*
* irq.h: in kernel interrupt controller related definitions
* Copyright (c) 2007, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
*/
#ifndef __IRQ_H
#define __IRQ_H
#include "kvm.h"
typedef void irq_request_func(void *opaque, int level);
struct kvm_kpic_state {
u8 last_irr; /* edge detection */
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
u8 priority_add; /* highest irq priority */
u8 irq_base;
u8 read_reg_select;
u8 poll;
u8 special_mask;
u8 init_state;
u8 auto_eoi;
u8 rotate_on_auto_eoi;
u8 special_fully_nested_mode;
u8 init4; /* true if 4 byte init */
u8 elcr; /* PIIX edge/trigger selection */
u8 elcr_mask;
struct kvm_pic *pics_state;
};
struct kvm_pic {
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
irq_request_func *irq_request;
void *irq_request_opaque;
int output; /* intr from master PIC */
struct kvm_io_device dev;
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_pic_set_irq(void *opaque, int irq, int level);
int kvm_pic_read_irq(struct kvm_pic *s);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
void kvm_pic_update_irq(struct kvm_pic *s);
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
#define IOAPIC_EDGE_TRIG 0
#define IOAPIC_LEVEL_TRIG 1
#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
#define IOAPIC_MEM_LENGTH 0x100
/* Direct registers. */
#define IOAPIC_REG_SELECT 0x00
#define IOAPIC_REG_WINDOW 0x10
#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
/* Indirect registers. */
#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
#define IOAPIC_REG_VERSION 0x01
#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
struct kvm_ioapic {
u64 base_address;
u32 ioregsel;
u32 id;
u32 irr;
u32 pad;
union ioapic_redir_entry {
u64 bits;
struct {
u8 vector;
u8 delivery_mode:3;
u8 dest_mode:1;
u8 delivery_status:1;
u8 polarity:1;
u8 remote_irr:1;
u8 trig_mode:1;
u8 mask:1;
u8 reserve:7;
u8 reserved[4];
u8 dest_id;
} fields;
} redirtbl[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
};
struct kvm_lapic {
unsigned long base_address;
struct kvm_io_device dev;
struct {
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count;
ktime_t last_update;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu;
struct page *regs_page;
void *regs;
};
#ifdef DEBUG
#define ASSERT(x) \
do { \
if (!(x)) { \
printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
__FILE__, __LINE__, #x); \
BUG(); \
} \
} while (0)
#else
#define ASSERT(x) do { } while (0)
#endif
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
void kvm_free_apic(struct kvm_lapic *apic);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
unsigned long bitmap);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,511 +0,0 @@
/*
* Kernel-based Virtual Machine driver for Linux
*
* This module enables machines with Intel VT-x extensions to run virtual
* machines without emulation or binary translation.
*
* MMU support
*
* Copyright (C) 2006 Qumranet, Inc.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
* Avi Kivity <avi@qumranet.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
/*
* We need the mmu code to access both 32-bit and 64-bit guest ptes,
* so the code in this file is compiled twice, once per pte size.
*/
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
#ifdef CONFIG_X86_64
#define PT_MAX_FULL_LEVELS 4
#else
#define PT_MAX_FULL_LEVELS 2
#endif
#elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
#define FNAME(name) paging##32_##name
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
#define PT_MAX_FULL_LEVELS 2
#else
#error Invalid PTTYPE value
#endif
/*
* The guest_walker structure emulates the behavior of the hardware page
* table walker.
*/
struct guest_walker {
int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t *table;
pt_element_t pte;
pt_element_t *ptep;
struct page *page;
int index;
pt_element_t inherited_ar;
gfn_t gfn;
u32 error_code;
};
/*
* Fetch a guest pte for a guest virtual address
*/
static int FNAME(walk_addr)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, gva_t addr,
int write_fault, int user_fault, int fetch_fault)
{
hpa_t hpa;
struct kvm_memory_slot *slot;
pt_element_t *ptep;
pt_element_t root;
gfn_t table_gfn;
pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
walker->level = vcpu->mmu.root_level;
walker->table = NULL;
walker->page = NULL;
walker->ptep = NULL;
root = vcpu->cr3;
#if PTTYPE == 64
if (!is_long_mode(vcpu)) {
walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
root = *walker->ptep;
walker->pte = root;
if (!(root & PT_PRESENT_MASK))
goto not_present;
--walker->level;
}
#endif
table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
walker->table_gfn[walker->level - 1] = table_gfn;
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
walker->level - 1, table_gfn);
slot = gfn_to_memslot(vcpu->kvm, table_gfn);
hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
walker->table = kmap_atomic(walker->page, KM_USER0);
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
(vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
for (;;) {
int index = PT_INDEX(addr, walker->level);
hpa_t paddr;
ptep = &walker->table[index];
walker->index = index;
ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
((unsigned long)ptep & PAGE_MASK));
if (!is_present_pte(*ptep))
goto not_present;
if (write_fault && !is_writeble_pte(*ptep))
if (user_fault || is_write_protection(vcpu))
goto access_error;
if (user_fault && !(*ptep & PT_USER_MASK))
goto access_error;
#if PTTYPE == 64
if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK))
goto access_error;
#endif
if (!(*ptep & PT_ACCESSED_MASK)) {
mark_page_dirty(vcpu->kvm, table_gfn);
*ptep |= PT_ACCESSED_MASK;
}
if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT;
break;
}
if (walker->level == PT_DIRECTORY_LEVEL
&& (*ptep & PT_PAGE_SIZE_MASK)
&& (PTTYPE == 64 || is_pse(vcpu))) {
walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
>> PAGE_SHIFT;
walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
break;
}
walker->inherited_ar &= walker->table[index];
table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
kunmap_atomic(walker->table, KM_USER0);
paddr = safe_gpa_to_hpa(vcpu, table_gfn << PAGE_SHIFT);
walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
walker->table = kmap_atomic(walker->page, KM_USER0);
--walker->level;
walker->table_gfn[walker->level - 1 ] = table_gfn;
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
walker->level - 1, table_gfn);
}
walker->pte = *ptep;
if (walker->page)
walker->ptep = NULL;
if (walker->table)
kunmap_atomic(walker->table, KM_USER0);
pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
return 1;
not_present:
walker->error_code = 0;
goto err;
access_error:
walker->error_code = PFERR_PRESENT_MASK;
err:
if (write_fault)
walker->error_code |= PFERR_WRITE_MASK;
if (user_fault)
walker->error_code |= PFERR_USER_MASK;
if (fetch_fault)
walker->error_code |= PFERR_FETCH_MASK;
if (walker->table)
kunmap_atomic(walker->table, KM_USER0);
return 0;
}
static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
struct guest_walker *walker)
{
mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
}
static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
u64 *shadow_pte,
gpa_t gaddr,
pt_element_t gpte,
u64 access_bits,
int user_fault,
int write_fault,
int *ptwrite,
struct guest_walker *walker,
gfn_t gfn)
{
hpa_t paddr;
int dirty = gpte & PT_DIRTY_MASK;
u64 spte = *shadow_pte;
int was_rmapped = is_rmap_pte(spte);
pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d"
" user_fault %d gfn %lx\n",
__FUNCTION__, spte, (u64)gpte, access_bits,
write_fault, user_fault, gfn);
if (write_fault && !dirty) {
pt_element_t *guest_ent, *tmp = NULL;
if (walker->ptep)
guest_ent = walker->ptep;
else {
tmp = kmap_atomic(walker->page, KM_USER0);
guest_ent = &tmp[walker->index];
}
*guest_ent |= PT_DIRTY_MASK;
if (!walker->ptep)
kunmap_atomic(tmp, KM_USER0);
dirty = 1;
FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
}
spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK;
spte |= gpte & PT64_NX_MASK;
if (!dirty)
access_bits &= ~PT_WRITABLE_MASK;
paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
spte |= PT_PRESENT_MASK;
if (access_bits & PT_USER_MASK)
spte |= PT_USER_MASK;
if (is_error_hpa(paddr)) {
spte |= gaddr;
spte |= PT_SHADOW_IO_MARK;
spte &= ~PT_PRESENT_MASK;
set_shadow_pte(shadow_pte, spte);
return;
}
spte |= paddr;
if ((access_bits & PT_WRITABLE_MASK)
|| (write_fault && !is_write_protection(vcpu) && !user_fault)) {
struct kvm_mmu_page *shadow;
spte |= PT_WRITABLE_MASK;
if (user_fault) {
mmu_unshadow(vcpu, gfn);
goto unshadowed;
}
shadow = kvm_mmu_lookup_page(vcpu, gfn);
if (shadow) {
pgprintk("%s: found shadow page for %lx, marking ro\n",
__FUNCTION__, gfn);
access_bits &= ~PT_WRITABLE_MASK;
if (is_writeble_pte(spte)) {
spte &= ~PT_WRITABLE_MASK;
kvm_x86_ops->tlb_flush(vcpu);
}
if (write_fault)
*ptwrite = 1;
}
}
unshadowed:
if (access_bits & PT_WRITABLE_MASK)
mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
set_shadow_pte(shadow_pte, spte);
page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
if (!was_rmapped)
rmap_add(vcpu, shadow_pte);
}
static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte,
u64 *shadow_pte, u64 access_bits,
int user_fault, int write_fault, int *ptwrite,
struct guest_walker *walker, gfn_t gfn)
{
access_bits &= gpte;
FNAME(set_pte_common)(vcpu, shadow_pte, gpte & PT_BASE_ADDR_MASK,
gpte, access_bits, user_fault, write_fault,
ptwrite, walker, gfn);
}
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
u64 *spte, const void *pte, int bytes)
{
pt_element_t gpte;
if (bytes < sizeof(pt_element_t))
return;
gpte = *(const pt_element_t *)pte;
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK))
return;
pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0,
0, NULL, NULL,
(gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT);
}
static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t gpde,
u64 *shadow_pte, u64 access_bits,
int user_fault, int write_fault, int *ptwrite,
struct guest_walker *walker, gfn_t gfn)
{
gpa_t gaddr;
access_bits &= gpde;
gaddr = (gpa_t)gfn << PAGE_SHIFT;
if (PTTYPE == 32 && is_cpuid_PSE36())
gaddr |= (gpde & PT32_DIR_PSE36_MASK) <<
(32 - PT32_DIR_PSE36_SHIFT);
FNAME(set_pte_common)(vcpu, shadow_pte, gaddr,
gpde, access_bits, user_fault, write_fault,
ptwrite, walker, gfn);
}
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *walker,
int user_fault, int write_fault, int *ptwrite)
{
hpa_t shadow_addr;
int level;
u64 *shadow_ent;
u64 *prev_shadow_ent = NULL;
if (!is_present_pte(walker->pte))
return NULL;
shadow_addr = vcpu->mmu.root_hpa;
level = vcpu->mmu.shadow_root_level;
if (level == PT32E_ROOT_LEVEL) {
shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3];
shadow_addr &= PT64_BASE_ADDR_MASK;
--level;
}
for (; ; level--) {
u32 index = SHADOW_PT_INDEX(addr, level);
struct kvm_mmu_page *shadow_page;
u64 shadow_pte;
int metaphysical;
gfn_t table_gfn;
unsigned hugepage_access = 0;
shadow_ent = ((u64 *)__va(shadow_addr)) + index;
if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
if (level == PT_PAGE_TABLE_LEVEL)
break;
shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
prev_shadow_ent = shadow_ent;
continue;
}
if (level == PT_PAGE_TABLE_LEVEL)
break;
if (level - 1 == PT_PAGE_TABLE_LEVEL
&& walker->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1;
hugepage_access = walker->pte;
hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
if (walker->pte & PT64_NX_MASK)
hugepage_access |= (1 << 2);
hugepage_access >>= PT_WRITABLE_SHIFT;
table_gfn = (walker->pte & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT;
} else {
metaphysical = 0;
table_gfn = walker->table_gfn[level - 2];
}
shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
metaphysical, hugepage_access,
shadow_ent);
shadow_addr = __pa(shadow_page->spt);
shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
*shadow_ent = shadow_pte;
prev_shadow_ent = shadow_ent;
}
if (walker->level == PT_DIRECTORY_LEVEL) {
FNAME(set_pde)(vcpu, walker->pte, shadow_ent,
walker->inherited_ar, user_fault, write_fault,
ptwrite, walker, walker->gfn);
} else {
ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
FNAME(set_pte)(vcpu, walker->pte, shadow_ent,
walker->inherited_ar, user_fault, write_fault,
ptwrite, walker, walker->gfn);
}
return shadow_ent;
}
/*
* Page fault handler. There are several causes for a page fault:
* - there is no shadow pte for the guest pte
* - write access through a shadow pte marked read only so that we can set
* the dirty bit
* - write access to a shadow pte marked read only so we can update the page
* dirty bitmap, when userspace requests it
* - mmio access; in this case we will never install a present shadow pte
* - normal guest page fault due to the guest pte marked not present, not
* writable, or not executable
*
* Returns: 1 if we need to emulate the instruction, 0 otherwise, or
* a negative value on error.
*/
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
u32 error_code)
{
int write_fault = error_code & PFERR_WRITE_MASK;
int user_fault = error_code & PFERR_USER_MASK;
int fetch_fault = error_code & PFERR_FETCH_MASK;
struct guest_walker walker;
u64 *shadow_pte;
int write_pt = 0;
int r;
pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
kvm_mmu_audit(vcpu, "pre page fault");
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
/*
* Look up the shadow pte for the faulting address.
*/
r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
fetch_fault);
/*
* The page is not mapped by the guest. Let the guest handle it.
*/
if (!r) {
pgprintk("%s: guest page fault\n", __FUNCTION__);
inject_page_fault(vcpu, addr, walker.error_code);
vcpu->last_pt_write_count = 0; /* reset fork detector */
return 0;
}
shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
&write_pt);
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
shadow_pte, *shadow_pte, write_pt);
if (!write_pt)
vcpu->last_pt_write_count = 0; /* reset fork detector */
/*
* mmio: emulate if accessible, otherwise its a guest fault.
*/
if (is_io_pte(*shadow_pte))
return 1;
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)");
return write_pt;
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;
r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
if (r) {
gpa = (gpa_t)walker.gfn << PAGE_SHIFT;
gpa |= vaddr & ~PAGE_MASK;
}
return gpa;
}
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef SHADOW_PT_INDEX
#undef PT_LEVEL_MASK
#undef PT_DIR_BASE_ADDR_MASK
#undef PT_MAX_FULL_LEVELS

File diff suppressed because it is too large Load Diff

View File

@ -3,6 +3,7 @@ include include/asm-generic/Kbuild.asm
header-y += boot.h header-y += boot.h
header-y += bootparam.h header-y += bootparam.h
header-y += debugreg.h header-y += debugreg.h
header-y += kvm.h
header-y += ldt.h header-y += ldt.h
header-y += msr-index.h header-y += msr-index.h
header-y += prctl.h header-y += prctl.h

191
include/asm-x86/kvm.h Normal file
View File

@ -0,0 +1,191 @@
#ifndef __LINUX_KVM_X86_H
#define __LINUX_KVM_X86_H
/*
* KVM x86 specific structures and definitions
*
*/
#include <asm/types.h>
#include <linux/ioctl.h>
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
struct kvm_memory_alias {
__u32 slot; /* this has a different namespace than memory slots */
__u32 flags;
__u64 guest_phys_addr;
__u64 memory_size;
__u64 target_phys_addr;
};
/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
struct kvm_pic_state {
__u8 last_irr; /* edge detection */
__u8 irr; /* interrupt request register */
__u8 imr; /* interrupt mask register */
__u8 isr; /* interrupt service register */
__u8 priority_add; /* highest irq priority */
__u8 irq_base;
__u8 read_reg_select;
__u8 poll;
__u8 special_mask;
__u8 init_state;
__u8 auto_eoi;
__u8 rotate_on_auto_eoi;
__u8 special_fully_nested_mode;
__u8 init4; /* true if 4 byte init */
__u8 elcr; /* PIIX edge/trigger selection */
__u8 elcr_mask;
};
#define KVM_IOAPIC_NUM_PINS 24
struct kvm_ioapic_state {
__u64 base_address;
__u32 ioregsel;
__u32 id;
__u32 irr;
__u32 pad;
union {
__u64 bits;
struct {
__u8 vector;
__u8 delivery_mode:3;
__u8 dest_mode:1;
__u8 delivery_status:1;
__u8 polarity:1;
__u8 remote_irr:1;
__u8 trig_mode:1;
__u8 mask:1;
__u8 reserve:7;
__u8 reserved[4];
__u8 dest_id;
} fields;
} redirtbl[KVM_IOAPIC_NUM_PINS];
};
#define KVM_IRQCHIP_PIC_MASTER 0
#define KVM_IRQCHIP_PIC_SLAVE 1
#define KVM_IRQCHIP_IOAPIC 2
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
__u64 rax, rbx, rcx, rdx;
__u64 rsi, rdi, rsp, rbp;
__u64 r8, r9, r10, r11;
__u64 r12, r13, r14, r15;
__u64 rip, rflags;
};
/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
#define KVM_APIC_REG_SIZE 0x400
struct kvm_lapic_state {
char regs[KVM_APIC_REG_SIZE];
};
struct kvm_segment {
__u64 base;
__u32 limit;
__u16 selector;
__u8 type;
__u8 present, dpl, db, s, l, g, avl;
__u8 unusable;
__u8 padding;
};
struct kvm_dtable {
__u64 base;
__u16 limit;
__u16 padding[3];
};
/* for KVM_GET_SREGS and KVM_SET_SREGS */
struct kvm_sregs {
/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
struct kvm_segment cs, ds, es, fs, gs, ss;
struct kvm_segment tr, ldt;
struct kvm_dtable gdt, idt;
__u64 cr0, cr2, cr3, cr4, cr8;
__u64 efer;
__u64 apic_base;
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
__u16 fcw;
__u16 fsw;
__u8 ftwx; /* in fxsave format */
__u8 pad1;
__u16 last_opcode;
__u64 last_ip;
__u64 last_dp;
__u8 xmm[16][16];
__u32 mxcsr;
__u32 pad2;
};
struct kvm_msr_entry {
__u32 index;
__u32 reserved;
__u64 data;
};
/* for KVM_GET_MSRS and KVM_SET_MSRS */
struct kvm_msrs {
__u32 nmsrs; /* number of msrs in entries */
__u32 pad;
struct kvm_msr_entry entries[0];
};
/* for KVM_GET_MSR_INDEX_LIST */
struct kvm_msr_list {
__u32 nmsrs; /* number of msrs in entries */
__u32 indices[0];
};
struct kvm_cpuid_entry {
__u32 function;
__u32 eax;
__u32 ebx;
__u32 ecx;
__u32 edx;
__u32 padding;
};
/* for KVM_SET_CPUID */
struct kvm_cpuid {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry entries[0];
};
struct kvm_cpuid_entry2 {
__u32 function;
__u32 index;
__u32 flags;
__u32 eax;
__u32 ebx;
__u32 ecx;
__u32 edx;
__u32 padding[3];
};
#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
#define KVM_CPUID_FLAG_STATEFUL_FUNC 2
#define KVM_CPUID_FLAG_STATE_READ_NEXT 4
/* for KVM_SET_CPUID2 */
struct kvm_cpuid2 {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry2 entries[0];
};
#endif

View File

@ -1,23 +1,24 @@
#ifndef __KVM_H #/*
#define __KVM_H * Kernel-based Virtual Machine driver for Linux
*
/* * This header defines architecture specific interfaces, x86 version
*
* This work is licensed under the terms of the GNU GPL, version 2. See * This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory. * the COPYING file in the top-level directory.
*
*/ */
#ifndef ASM_KVM_HOST_H
#define ASM_KVM_HOST_H
#include <linux/types.h> #include <linux/types.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/preempt.h>
#include <asm/signal.h>
#include <linux/kvm.h> #include <linux/kvm.h>
#include <linux/kvm_para.h> #include <linux/kvm_para.h>
#include <linux/kvm_types.h>
#include <asm/desc.h>
#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
@ -37,15 +38,8 @@
#define INVALID_PAGE (~(hpa_t)0) #define INVALID_PAGE (~(hpa_t)0)
#define UNMAPPED_GVA (~(gpa_t)0) #define UNMAPPED_GVA (~(gpa_t)0)
#define KVM_MAX_VCPUS 4
#define KVM_ALIAS_SLOTS 4
#define KVM_MEMORY_SLOTS 8
#define KVM_NUM_MMU_PAGES 1024
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 40
#define DE_VECTOR 0 #define DE_VECTOR 0
#define UD_VECTOR 6
#define NM_VECTOR 7 #define NM_VECTOR 7
#define DF_VECTOR 8 #define DF_VECTOR 8
#define TS_VECTOR 10 #define TS_VECTOR 10
@ -59,119 +53,20 @@
#define IOPL_SHIFT 12 #define IOPL_SHIFT 12
#define KVM_PIO_PAGE_OFFSET 1 #define KVM_ALIAS_SLOTS 4
/* #define KVM_PERMILLE_MMU_PAGES 20
* vcpu->requests bit members #define KVM_MIN_ALLOC_MMU_PAGES 64
*/ #define KVM_NUM_MMU_PAGES 1024
#define KVM_TLB_FLUSH 0 #define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 40
/* extern spinlock_t kvm_lock;
* Address types: extern struct list_head vm_list;
*
* gva - guest virtual address
* gpa - guest physical address
* gfn - guest frame number
* hva - host virtual address
* hpa - host physical address
* hfn - host frame number
*/
typedef unsigned long gva_t;
typedef u64 gpa_t;
typedef unsigned long gfn_t;
typedef unsigned long hva_t;
typedef u64 hpa_t;
typedef unsigned long hfn_t;
#define NR_PTE_CHAIN_ENTRIES 5
struct kvm_pte_chain {
u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
struct hlist_node link;
};
/*
* kvm_mmu_page_role, below, is defined as:
*
* bits 0:3 - total guest paging levels (2-4, or zero for real mode)
* bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests
* bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
* bits 17:19 - "access" - the user, writable, and nx bits of a huge page pde
*/
union kvm_mmu_page_role {
unsigned word;
struct {
unsigned glevels : 4;
unsigned level : 4;
unsigned quadrant : 2;
unsigned pad_for_nice_hex_output : 6;
unsigned metaphysical : 1;
unsigned hugepage_access : 3;
};
};
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
/*
* The following two entries are used to key the shadow page in the
* hash table.
*/
gfn_t gfn;
union kvm_mmu_page_role role;
u64 *spt;
unsigned long slot_bitmap; /* One bit set per slot which has memory
* in this shadow page.
*/
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
union {
u64 *parent_pte; /* !multimapped */
struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
};
};
struct kvm_vcpu; struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache; struct kvm;
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
* mode.
*/
struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
hpa_t root_hpa;
int root_level;
int shadow_root_level;
u64 *pae_root;
};
#define KVM_NR_MEM_OBJS 20
struct kvm_mmu_memory_cache {
int nobjs;
void *objects[KVM_NR_MEM_OBJS];
};
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
*/
struct kvm_guest_debug {
int enabled;
unsigned long bp[4];
int singlestep;
};
enum { enum {
VCPU_REGS_RAX = 0, VCPU_REGS_RAX = 0,
@ -206,109 +101,94 @@ enum {
VCPU_SREG_LDTR, VCPU_SREG_LDTR,
}; };
struct kvm_pio_request { #include <asm/kvm_x86_emulate.h>
unsigned long count;
int cur_count;
struct page *guest_pages[2];
unsigned guest_page_offset;
int in;
int port;
int size;
int string;
int down;
int rep;
};
struct kvm_stat { #define KVM_NR_MEM_OBJS 40
u32 pf_fixed;
u32 pf_guest;
u32 tlb_flush;
u32 invlpg;
u32 exits;
u32 io_exits;
u32 mmio_exits;
u32 signal_exits;
u32 irq_window_exits;
u32 halt_exits;
u32 halt_wakeup;
u32 request_irq_exits;
u32 irq_exits;
u32 light_exits;
u32 efer_reload;
};
struct kvm_io_device {
void (*read)(struct kvm_io_device *this,
gpa_t addr,
int len,
void *val);
void (*write)(struct kvm_io_device *this,
gpa_t addr,
int len,
const void *val);
int (*in_range)(struct kvm_io_device *this, gpa_t addr);
void (*destructor)(struct kvm_io_device *this);
void *private;
};
static inline void kvm_iodevice_read(struct kvm_io_device *dev,
gpa_t addr,
int len,
void *val)
{
dev->read(dev, addr, len, val);
}
static inline void kvm_iodevice_write(struct kvm_io_device *dev,
gpa_t addr,
int len,
const void *val)
{
dev->write(dev, addr, len, val);
}
static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
{
return dev->in_range(dev, addr);
}
static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
{
if (dev->destructor)
dev->destructor(dev);
}
/* /*
* It would be nice to use something smarter than a linear search, TBD... * We don't want allocation failures within the mmu code, so we preallocate
* Thankfully we dont expect many devices to register (famous last words :), * enough memory for a single page fault in a cache.
* so until then it will suffice. At least its abstracted so we can change
* in one place.
*/ */
struct kvm_io_bus { struct kvm_mmu_memory_cache {
int dev_count; int nobjs;
#define NR_IOBUS_DEVS 6 void *objects[KVM_NR_MEM_OBJS];
struct kvm_io_device *devs[NR_IOBUS_DEVS];
}; };
void kvm_io_bus_init(struct kvm_io_bus *bus); #define NR_PTE_CHAIN_ENTRIES 5
void kvm_io_bus_destroy(struct kvm_io_bus *bus);
struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev);
struct kvm_vcpu { struct kvm_pte_chain {
struct kvm *kvm; u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
struct preempt_notifier preempt_notifier; struct hlist_node link;
int vcpu_id; };
struct mutex mutex;
int cpu; /*
* kvm_mmu_page_role, below, is defined as:
*
* bits 0:3 - total guest paging levels (2-4, or zero for real mode)
* bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests
* bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
* bits 17:19 - common access permissions for all ptes in this shadow page
*/
union kvm_mmu_page_role {
unsigned word;
struct {
unsigned glevels : 4;
unsigned level : 4;
unsigned quadrant : 2;
unsigned pad_for_nice_hex_output : 6;
unsigned metaphysical : 1;
unsigned access : 3;
};
};
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
/*
* The following two entries are used to key the shadow page in the
* hash table.
*/
gfn_t gfn;
union kvm_mmu_page_role role;
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
unsigned long slot_bitmap; /* One bit set per slot which has memory
* in this shadow page.
*/
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
union {
u64 *parent_pte; /* !multimapped */
struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
};
};
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
* mode.
*/
struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
void (*prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *page);
hpa_t root_hpa;
int root_level;
int shadow_root_level;
u64 *pae_root;
};
struct kvm_vcpu_arch {
u64 host_tsc; u64 host_tsc;
struct kvm_run *run;
int interrupt_window_open; int interrupt_window_open;
int guest_mode;
unsigned long requests;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
@ -317,9 +197,6 @@ struct kvm_vcpu {
unsigned long cr0; unsigned long cr0;
unsigned long cr2; unsigned long cr2;
unsigned long cr3; unsigned long cr3;
gpa_t para_state_gpa;
struct page *para_state_page;
gpa_t hypercall_gpa;
unsigned long cr4; unsigned long cr4;
unsigned long cr8; unsigned long cr8;
u64 pdptrs[4]; /* pae */ u64 pdptrs[4]; /* pae */
@ -334,6 +211,7 @@ struct kvm_vcpu {
int mp_state; int mp_state;
int sipi_vector; int sipi_vector;
u64 ia32_misc_enable_msr; u64 ia32_misc_enable_msr;
bool tpr_access_reporting;
struct kvm_mmu mmu; struct kvm_mmu mmu;
@ -344,29 +222,26 @@ struct kvm_vcpu {
gfn_t last_pt_write_gfn; gfn_t last_pt_write_gfn;
int last_pt_write_count; int last_pt_write_count;
u64 *last_pte_updated;
struct kvm_guest_debug guest_debug; struct {
gfn_t gfn; /* presumed gfn during guest pte update */
struct page *page; /* page corresponding to that gfn */
} update_pte;
struct i387_fxsave_struct host_fx_image; struct i387_fxsave_struct host_fx_image;
struct i387_fxsave_struct guest_fx_image; struct i387_fxsave_struct guest_fx_image;
int fpu_active;
int guest_fpu_loaded;
int mmio_needed;
int mmio_read_completed;
int mmio_is_write;
int mmio_size;
unsigned char mmio_data[8];
gpa_t mmio_phys_addr;
gva_t mmio_fault_cr2; gva_t mmio_fault_cr2;
struct kvm_pio_request pio; struct kvm_pio_request pio;
void *pio_data; void *pio_data;
wait_queue_head_t wq;
int sigset_active; struct kvm_queued_exception {
sigset_t sigset; bool pending;
bool has_error_code;
struct kvm_stat stat; u8 nr;
u32 error_code;
} exception;
struct { struct {
int active; int active;
@ -381,7 +256,10 @@ struct kvm_vcpu {
int halt_request; /* real mode on Intel only */ int halt_request; /* real mode on Intel only */
int cpuid_nent; int cpuid_nent;
struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
/* emulate context */
struct x86_emulate_ctxt emulate_ctxt;
}; };
struct kvm_mem_alias { struct kvm_mem_alias {
@ -390,51 +268,58 @@ struct kvm_mem_alias {
gfn_t target_gfn; gfn_t target_gfn;
}; };
struct kvm_memory_slot { struct kvm_arch{
gfn_t base_gfn;
unsigned long npages;
unsigned long flags;
struct page **phys_mem;
unsigned long *dirty_bitmap;
};
struct kvm {
struct mutex lock; /* protects everything except vcpus */
int naliases; int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
int nmemslots;
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS]; unsigned int n_free_mmu_pages;
unsigned int n_requested_mmu_pages;
unsigned int n_alloc_mmu_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/* /*
* Hash table of struct kvm_mmu_page. * Hash table of struct kvm_mmu_page.
*/ */
struct list_head active_mmu_pages; struct list_head active_mmu_pages;
int n_free_mmu_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
unsigned long rmap_overflow;
struct list_head vm_list;
struct file *filp;
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
struct kvm_pic *vpic; struct kvm_pic *vpic;
struct kvm_ioapic *vioapic; struct kvm_ioapic *vioapic;
int round_robin_prev_vcpu; int round_robin_prev_vcpu;
unsigned int tss_addr;
struct page *apic_access_page;
}; };
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) struct kvm_vm_stat {
{ u32 mmu_shadow_zapped;
return kvm->vpic; u32 mmu_pte_write;
} u32 mmu_pte_updated;
u32 mmu_pde_zapped;
u32 mmu_flooded;
u32 mmu_recycled;
u32 mmu_cache_miss;
u32 remote_tlb_flush;
};
static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) struct kvm_vcpu_stat {
{ u32 pf_fixed;
return kvm->vioapic; u32 pf_guest;
} u32 tlb_flush;
u32 invlpg;
static inline int irqchip_in_kernel(struct kvm *kvm) u32 exits;
{ u32 io_exits;
return pic_irqchip(kvm) != 0; u32 mmio_exits;
} u32 signal_exits;
u32 irq_window_exits;
u32 halt_exits;
u32 halt_wakeup;
u32 request_irq_exits;
u32 irq_exits;
u32 host_state_reload;
u32 efer_reload;
u32 fpu_reload;
u32 insn_emulation;
u32 insn_emulation_fail;
};
struct descriptor_table { struct descriptor_table {
u16 limit; u16 limit;
@ -449,11 +334,12 @@ struct kvm_x86_ops {
void (*check_processor_compatibility)(void *rtn); void (*check_processor_compatibility)(void *rtn);
int (*hardware_setup)(void); /* __init */ int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */ void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
/* Create, but do not attach this VCPU */ /* Create, but do not attach this VCPU */
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_reset)(struct kvm_vcpu *vcpu); int (*vcpu_reset)(struct kvm_vcpu *vcpu);
void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
@ -489,10 +375,6 @@ struct kvm_x86_ops {
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
void (*tlb_flush)(struct kvm_vcpu *vcpu); void (*tlb_flush)(struct kvm_vcpu *vcpu);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
unsigned long addr, u32 err_code);
void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);
void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
@ -501,54 +383,31 @@ struct kvm_x86_ops {
unsigned char *hypercall_addr); unsigned char *hypercall_addr);
int (*get_irq)(struct kvm_vcpu *vcpu); int (*get_irq)(struct kvm_vcpu *vcpu);
void (*set_irq)(struct kvm_vcpu *vcpu, int vec); void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code);
bool (*exception_injected)(struct kvm_vcpu *vcpu);
void (*inject_pending_irq)(struct kvm_vcpu *vcpu); void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
struct kvm_run *run); struct kvm_run *run);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
}; };
extern struct kvm_x86_ops *kvm_x86_ops; extern struct kvm_x86_ops *kvm_x86_ops;
/* The guest did something we don't support. */
#define pr_unimpl(vcpu, fmt, ...) \
do { \
if (printk_ratelimit()) \
printk(KERN_ERR "kvm: %i: cpu%i " fmt, \
current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
} while(0)
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
struct module *module);
void kvm_exit_x86(void);
int kvm_mmu_module_init(void); int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void); void kvm_mmu_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu); void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu);
int kvm_mmu_setup(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_zap_all(struct kvm *kvm);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
extern hpa_t bad_page_address;
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
enum emulation_result { enum emulation_result {
EMULATE_DONE, /* no further processing */ EMULATE_DONE, /* no further processing */
@ -556,8 +415,10 @@ enum emulation_result {
EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_FAIL, /* can't emulate this instruction */
}; };
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long cr2, u16 error_code); unsigned long cr2, u16 error_code, int emulation_type);
void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
@ -572,7 +433,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
struct x86_emulate_ctxt; struct x86_emulate_ctxt;
int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in, int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned port); int size, unsigned port);
int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int down, int size, unsigned long count, int down,
@ -581,7 +442,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
int emulate_clts(struct kvm_vcpu *vcpu); int emulate_clts(struct kvm_vcpu *vcpu);
int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
unsigned long *dest); unsigned long *dest);
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
unsigned long value); unsigned long value);
@ -597,15 +458,15 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
u32 error_code);
void fx_init(struct kvm_vcpu *vcpu); void fx_init(struct kvm_vcpu *vcpu);
void kvm_resched(struct kvm_vcpu *vcpu);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_flush_remote_tlbs(struct kvm *kvm);
int emulator_read_std(unsigned long addr, int emulator_read_std(unsigned long addr,
void *val, void *val,
unsigned int bytes, unsigned int bytes,
struct kvm_vcpu *vcpu); struct kvm_vcpu *vcpu);
int emulator_write_emulated(unsigned long addr, int emulator_write_emulated(unsigned long addr,
@ -615,6 +476,7 @@ int emulator_write_emulated(unsigned long addr,
unsigned long segment_base(u16 selector); unsigned long segment_base(u16 selector);
void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes); const u8 *new, int bytes);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
@ -622,66 +484,14 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu);
int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
static inline void kvm_guest_enter(void) int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
{
current->flags |= PF_VCPU;
}
static inline void kvm_guest_exit(void) int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code);
{
current->flags &= ~PF_VCPU;
}
static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
u32 error_code) int complete_pio(struct kvm_vcpu *vcpu);
{
return vcpu->mmu.page_fault(vcpu, gva, error_code);
}
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
__kvm_mmu_free_some_pages(vcpu);
}
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
if (likely(vcpu->mmu.root_hpa != INVALID_PAGE))
return 0;
return kvm_mmu_load(vcpu);
}
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
return vcpu->shadow_efer & EFER_LME;
#else
return 0;
#endif
}
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return vcpu->cr4 & X86_CR4_PAE;
}
static inline int is_pse(struct kvm_vcpu *vcpu)
{
return vcpu->cr4 & X86_CR4_PSE;
}
static inline int is_paging(struct kvm_vcpu *vcpu)
{
return vcpu->cr0 & X86_CR0_PG;
}
static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
{
return slot - kvm->memslots;
}
static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{ {
@ -693,55 +503,55 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
static inline u16 read_fs(void) static inline u16 read_fs(void)
{ {
u16 seg; u16 seg;
asm ("mov %%fs, %0" : "=g"(seg)); asm("mov %%fs, %0" : "=g"(seg));
return seg; return seg;
} }
static inline u16 read_gs(void) static inline u16 read_gs(void)
{ {
u16 seg; u16 seg;
asm ("mov %%gs, %0" : "=g"(seg)); asm("mov %%gs, %0" : "=g"(seg));
return seg; return seg;
} }
static inline u16 read_ldt(void) static inline u16 read_ldt(void)
{ {
u16 ldt; u16 ldt;
asm ("sldt %0" : "=g"(ldt)); asm("sldt %0" : "=g"(ldt));
return ldt; return ldt;
} }
static inline void load_fs(u16 sel) static inline void load_fs(u16 sel)
{ {
asm ("mov %0, %%fs" : : "rm"(sel)); asm("mov %0, %%fs" : : "rm"(sel));
} }
static inline void load_gs(u16 sel) static inline void load_gs(u16 sel)
{ {
asm ("mov %0, %%gs" : : "rm"(sel)); asm("mov %0, %%gs" : : "rm"(sel));
} }
#ifndef load_ldt #ifndef load_ldt
static inline void load_ldt(u16 sel) static inline void load_ldt(u16 sel)
{ {
asm ("lldt %0" : : "rm"(sel)); asm("lldt %0" : : "rm"(sel));
} }
#endif #endif
static inline void get_idt(struct descriptor_table *table) static inline void get_idt(struct descriptor_table *table)
{ {
asm ("sidt %0" : "=m"(*table)); asm("sidt %0" : "=m"(*table));
} }
static inline void get_gdt(struct descriptor_table *table) static inline void get_gdt(struct descriptor_table *table)
{ {
asm ("sgdt %0" : "=m"(*table)); asm("sgdt %0" : "=m"(*table));
} }
static inline unsigned long read_tr_base(void) static inline unsigned long read_tr_base(void)
{ {
u16 tr; u16 tr;
asm ("str %0" : "=g"(tr)); asm("str %0" : "=g"(tr));
return segment_base(tr); return segment_base(tr);
} }
@ -757,17 +567,17 @@ static inline unsigned long read_msr(unsigned long msr)
static inline void fx_save(struct i387_fxsave_struct *image) static inline void fx_save(struct i387_fxsave_struct *image)
{ {
asm ("fxsave (%0)":: "r" (image)); asm("fxsave (%0)":: "r" (image));
} }
static inline void fx_restore(struct i387_fxsave_struct *image) static inline void fx_restore(struct i387_fxsave_struct *image)
{ {
asm ("fxrstor (%0)":: "r" (image)); asm("fxrstor (%0)":: "r" (image));
} }
static inline void fpu_init(void) static inline void fpu_init(void)
{ {
asm ("finit"); asm("finit");
} }
static inline u32 get_rdx_init_val(void) static inline u32 get_rdx_init_val(void)
@ -775,6 +585,11 @@ static inline u32 get_rdx_init_val(void)
return 0x600; /* P6 family */ return 0x600; /* P6 family */
} }
static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
{
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}
#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" #define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" #define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" #define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"

105
include/asm-x86/kvm_para.h Normal file
View File

@ -0,0 +1,105 @@
#ifndef __X86_KVM_PARA_H
#define __X86_KVM_PARA_H
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
/* This CPUID returns a feature bitmap in eax. Before enabling a particular
* paravirtualization, the appropriate feature bit should be checked.
*/
#define KVM_CPUID_FEATURES 0x40000001
#ifdef __KERNEL__
#include <asm/processor.h>
/* This instruction is vmcall. On non-VT architectures, it will generate a
* trap that we will then rewrite to the appropriate instruction.
*/
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
* instruction. The hypervisor may replace it with something else but only the
* instructions are guaranteed to be supported.
*
* Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
* The hypercall number should be placed in rax and the return value will be
* placed in rax. No other registers will be clobbered unless explicited
* noted by the particular hypercall.
*/
static inline long kvm_hypercall0(unsigned int nr)
{
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr));
return ret;
}
static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
{
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1));
return ret;
}
static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
unsigned long p2)
{
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2));
return ret;
}
static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3));
return ret;
}
static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3,
unsigned long p4)
{
long ret;
asm volatile(KVM_HYPERCALL
: "=a"(ret)
: "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4));
return ret;
}
static inline int kvm_para_available(void)
{
unsigned int eax, ebx, ecx, edx;
char signature[13];
cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
memcpy(signature + 0, &ebx, 4);
memcpy(signature + 4, &ecx, 4);
memcpy(signature + 8, &edx, 4);
signature[12] = 0;
if (strcmp(signature, "KVMKVMKVM") == 0)
return 1;
return 0;
}
static inline unsigned int kvm_arch_para_features(void)
{
return cpuid_eax(KVM_CPUID_FEATURES);
}
#endif
#endif

View File

@ -62,17 +62,6 @@ struct x86_emulate_ops {
int (*read_std)(unsigned long addr, void *val, int (*read_std)(unsigned long addr, void *val,
unsigned int bytes, struct kvm_vcpu *vcpu); unsigned int bytes, struct kvm_vcpu *vcpu);
/*
* write_std: Write bytes of standard (non-emulated/special) memory.
* Used for stack operations, and others.
* @addr: [IN ] Linear address to which to write.
* @val: [IN ] Value to write to memory (low-order bytes used as
* required).
* @bytes: [IN ] Number of bytes to write to memory.
*/
int (*write_std)(unsigned long addr, const void *val,
unsigned int bytes, struct kvm_vcpu *vcpu);
/* /*
* read_emulated: Read bytes from emulated/special memory area. * read_emulated: Read bytes from emulated/special memory area.
* @addr: [IN ] Linear address from which to read. * @addr: [IN ] Linear address from which to read.
@ -112,13 +101,50 @@ struct x86_emulate_ops {
}; };
/* Type, address-of, and value of an instruction's operand. */
struct operand {
enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
unsigned int bytes;
unsigned long val, orig_val, *ptr;
};
struct fetch_cache {
u8 data[15];
unsigned long start;
unsigned long end;
};
struct decode_cache {
u8 twobyte;
u8 b;
u8 lock_prefix;
u8 rep_prefix;
u8 op_bytes;
u8 ad_bytes;
u8 rex_prefix;
struct operand src;
struct operand dst;
unsigned long *override_base;
unsigned int d;
unsigned long regs[NR_VCPU_REGS];
unsigned long eip;
/* modrm */
u8 modrm;
u8 modrm_mod;
u8 modrm_reg;
u8 modrm_rm;
u8 use_modrm_ea;
unsigned long modrm_ea;
unsigned long modrm_val;
struct fetch_cache fetch;
};
struct x86_emulate_ctxt { struct x86_emulate_ctxt {
/* Register state before/after emulation. */ /* Register state before/after emulation. */
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
/* Linear faulting address (if emulating a page-faulting instruction). */ /* Linear faulting address (if emulating a page-faulting instruction). */
unsigned long eflags; unsigned long eflags;
unsigned long cr2;
/* Emulated execution mode, represented by an X86EMUL_MODE value. */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */
int mode; int mode;
@ -129,8 +155,16 @@ struct x86_emulate_ctxt {
unsigned long ss_base; unsigned long ss_base;
unsigned long gs_base; unsigned long gs_base;
unsigned long fs_base; unsigned long fs_base;
/* decode cache */
struct decode_cache decode;
}; };
/* Repeat String Operation Prefix */
#define REPE_PREFIX 1
#define REPNE_PREFIX 2
/* Execution mode, passed to the emulator. */ /* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */
#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ #define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
@ -144,12 +178,9 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
#endif #endif
/* int x86_decode_insn(struct x86_emulate_ctxt *ctxt,
* x86_emulate_memop: Emulate an instruction that faulted attempting to struct x86_emulate_ops *ops);
* read/write a 'special' memory area. int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
* Returns -1 on failure, 0 on success. struct x86_emulate_ops *ops);
*/
int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops);
#endif /* __X86_EMULATE_H__ */ #endif /* __X86_EMULATE_H__ */

View File

@ -100,7 +100,6 @@ header-y += iso_fs.h
header-y += ixjuser.h header-y += ixjuser.h
header-y += jffs2.h header-y += jffs2.h
header-y += keyctl.h header-y += keyctl.h
header-y += kvm.h
header-y += limits.h header-y += limits.h
header-y += lock_dlm_plock.h header-y += lock_dlm_plock.h
header-y += magic.h header-y += magic.h
@ -256,6 +255,7 @@ unifdef-y += kd.h
unifdef-y += kernelcapi.h unifdef-y += kernelcapi.h
unifdef-y += kernel.h unifdef-y += kernel.h
unifdef-y += keyboard.h unifdef-y += keyboard.h
unifdef-$(CONFIG_HAVE_KVM) += kvm.h
unifdef-y += llc.h unifdef-y += llc.h
unifdef-y += loop.h unifdef-y += loop.h
unifdef-y += lp.h unifdef-y += lp.h

View File

@ -9,12 +9,10 @@
#include <asm/types.h> #include <asm/types.h>
#include <linux/ioctl.h> #include <linux/ioctl.h>
#include <asm/kvm.h>
#define KVM_API_VERSION 12 #define KVM_API_VERSION 12
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
/* for KVM_CREATE_MEMORY_REGION */ /* for KVM_CREATE_MEMORY_REGION */
struct kvm_memory_region { struct kvm_memory_region {
__u32 slot; __u32 slot;
@ -23,16 +21,18 @@ struct kvm_memory_region {
__u64 memory_size; /* bytes */ __u64 memory_size; /* bytes */
}; };
/* for KVM_SET_USER_MEMORY_REGION */
struct kvm_userspace_memory_region {
__u32 slot;
__u32 flags;
__u64 guest_phys_addr;
__u64 memory_size; /* bytes */
__u64 userspace_addr; /* start of the userspace allocated memory */
};
/* for kvm_memory_region::flags */ /* for kvm_memory_region::flags */
#define KVM_MEM_LOG_DIRTY_PAGES 1UL #define KVM_MEM_LOG_DIRTY_PAGES 1UL
struct kvm_memory_alias {
__u32 slot; /* this has a different namespace than memory slots */
__u32 flags;
__u64 guest_phys_addr;
__u64 memory_size;
__u64 target_phys_addr;
};
/* for KVM_IRQ_LINE */ /* for KVM_IRQ_LINE */
struct kvm_irq_level { struct kvm_irq_level {
@ -45,62 +45,18 @@ struct kvm_irq_level {
__u32 level; __u32 level;
}; };
/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
struct kvm_pic_state {
__u8 last_irr; /* edge detection */
__u8 irr; /* interrupt request register */
__u8 imr; /* interrupt mask register */
__u8 isr; /* interrupt service register */
__u8 priority_add; /* highest irq priority */
__u8 irq_base;
__u8 read_reg_select;
__u8 poll;
__u8 special_mask;
__u8 init_state;
__u8 auto_eoi;
__u8 rotate_on_auto_eoi;
__u8 special_fully_nested_mode;
__u8 init4; /* true if 4 byte init */
__u8 elcr; /* PIIX edge/trigger selection */
__u8 elcr_mask;
};
#define KVM_IOAPIC_NUM_PINS 24
struct kvm_ioapic_state {
__u64 base_address;
__u32 ioregsel;
__u32 id;
__u32 irr;
__u32 pad;
union {
__u64 bits;
struct {
__u8 vector;
__u8 delivery_mode:3;
__u8 dest_mode:1;
__u8 delivery_status:1;
__u8 polarity:1;
__u8 remote_irr:1;
__u8 trig_mode:1;
__u8 mask:1;
__u8 reserve:7;
__u8 reserved[4];
__u8 dest_id;
} fields;
} redirtbl[KVM_IOAPIC_NUM_PINS];
};
#define KVM_IRQCHIP_PIC_MASTER 0
#define KVM_IRQCHIP_PIC_SLAVE 1
#define KVM_IRQCHIP_IOAPIC 2
struct kvm_irqchip { struct kvm_irqchip {
__u32 chip_id; __u32 chip_id;
__u32 pad; __u32 pad;
union { union {
char dummy[512]; /* reserving space */ char dummy[512]; /* reserving space */
#ifdef CONFIG_X86
struct kvm_pic_state pic; struct kvm_pic_state pic;
#endif
#if defined(CONFIG_X86) || defined(CONFIG_IA64)
struct kvm_ioapic_state ioapic; struct kvm_ioapic_state ioapic;
#endif
} chip; } chip;
}; };
@ -116,6 +72,7 @@ struct kvm_irqchip {
#define KVM_EXIT_FAIL_ENTRY 9 #define KVM_EXIT_FAIL_ENTRY 9
#define KVM_EXIT_INTR 10 #define KVM_EXIT_INTR 10
#define KVM_EXIT_SET_TPR 11 #define KVM_EXIT_SET_TPR 11
#define KVM_EXIT_TPR_ACCESS 12
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run { struct kvm_run {
@ -174,90 +131,17 @@ struct kvm_run {
__u32 longmode; __u32 longmode;
__u32 pad; __u32 pad;
} hypercall; } hypercall;
/* KVM_EXIT_TPR_ACCESS */
struct {
__u64 rip;
__u32 is_write;
__u32 pad;
} tpr_access;
/* Fix the size of the union. */ /* Fix the size of the union. */
char padding[256]; char padding[256];
}; };
}; };
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
__u64 rax, rbx, rcx, rdx;
__u64 rsi, rdi, rsp, rbp;
__u64 r8, r9, r10, r11;
__u64 r12, r13, r14, r15;
__u64 rip, rflags;
};
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
__u16 fcw;
__u16 fsw;
__u8 ftwx; /* in fxsave format */
__u8 pad1;
__u16 last_opcode;
__u64 last_ip;
__u64 last_dp;
__u8 xmm[16][16];
__u32 mxcsr;
__u32 pad2;
};
/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
#define KVM_APIC_REG_SIZE 0x400
struct kvm_lapic_state {
char regs[KVM_APIC_REG_SIZE];
};
struct kvm_segment {
__u64 base;
__u32 limit;
__u16 selector;
__u8 type;
__u8 present, dpl, db, s, l, g, avl;
__u8 unusable;
__u8 padding;
};
struct kvm_dtable {
__u64 base;
__u16 limit;
__u16 padding[3];
};
/* for KVM_GET_SREGS and KVM_SET_SREGS */
struct kvm_sregs {
/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
struct kvm_segment cs, ds, es, fs, gs, ss;
struct kvm_segment tr, ldt;
struct kvm_dtable gdt, idt;
__u64 cr0, cr2, cr3, cr4, cr8;
__u64 efer;
__u64 apic_base;
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
struct kvm_msr_entry {
__u32 index;
__u32 reserved;
__u64 data;
};
/* for KVM_GET_MSRS and KVM_SET_MSRS */
struct kvm_msrs {
__u32 nmsrs; /* number of msrs in entries */
__u32 pad;
struct kvm_msr_entry entries[0];
};
/* for KVM_GET_MSR_INDEX_LIST */
struct kvm_msr_list {
__u32 nmsrs; /* number of msrs in entries */
__u32 indices[0];
};
/* for KVM_TRANSLATE */ /* for KVM_TRANSLATE */
struct kvm_translation { struct kvm_translation {
/* in */ /* in */
@ -302,28 +186,24 @@ struct kvm_dirty_log {
}; };
}; };
struct kvm_cpuid_entry {
__u32 function;
__u32 eax;
__u32 ebx;
__u32 ecx;
__u32 edx;
__u32 padding;
};
/* for KVM_SET_CPUID */
struct kvm_cpuid {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry entries[0];
};
/* for KVM_SET_SIGNAL_MASK */ /* for KVM_SET_SIGNAL_MASK */
struct kvm_signal_mask { struct kvm_signal_mask {
__u32 len; __u32 len;
__u8 sigset[0]; __u8 sigset[0];
}; };
/* for KVM_TPR_ACCESS_REPORTING */
struct kvm_tpr_access_ctl {
__u32 enabled;
__u32 flags;
__u32 reserved[8];
};
/* for KVM_SET_VAPIC_ADDR */
struct kvm_vapic_addr {
__u64 vapic_addr;
};
#define KVMIO 0xAE #define KVMIO 0xAE
/* /*
@ -347,11 +227,21 @@ struct kvm_signal_mask {
*/ */
#define KVM_CAP_IRQCHIP 0 #define KVM_CAP_IRQCHIP 0
#define KVM_CAP_HLT 1 #define KVM_CAP_HLT 1
#define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2
#define KVM_CAP_USER_MEMORY 3
#define KVM_CAP_SET_TSS_ADDR 4
#define KVM_CAP_EXT_CPUID 5
#define KVM_CAP_VAPIC 6
/* /*
* ioctls for VM fds * ioctls for VM fds
*/ */
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
struct kvm_userspace_memory_region)
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
/* /*
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
* a vcpu fd. * a vcpu fd.
@ -359,6 +249,7 @@ struct kvm_signal_mask {
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x48, struct kvm_cpuid2)
/* Device model IOC */ /* Device model IOC */
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
@ -384,5 +275,11 @@ struct kvm_signal_mask {
#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) #define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu)
#define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state) #define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state)
#define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state) #define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state)
#define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2)
#define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
/* Available with KVM_CAP_VAPIC */
#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
/* Available with KVM_CAP_VAPIC */
#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr)
#endif #endif

299
include/linux/kvm_host.h Normal file
View File

@ -0,0 +1,299 @@
#ifndef __KVM_HOST_H
#define __KVM_HOST_H
/*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#include <linux/types.h>
#include <linux/hardirq.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/preempt.h>
#include <asm/signal.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
#include <linux/kvm_types.h>
#include <asm/kvm_host.h>
#define KVM_MAX_VCPUS 4
#define KVM_MEMORY_SLOTS 8
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
#define KVM_PIO_PAGE_OFFSET 1
/*
* vcpu->requests bit members
*/
#define KVM_REQ_TLB_FLUSH 0
#define KVM_REQ_MIGRATE_TIMER 1
#define KVM_REQ_REPORT_TPR_ACCESS 2
struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache;
struct kvm_guest_debug {
int enabled;
unsigned long bp[4];
int singlestep;
};
/*
* It would be nice to use something smarter than a linear search, TBD...
* Thankfully we dont expect many devices to register (famous last words :),
* so until then it will suffice. At least its abstracted so we can change
* in one place.
*/
struct kvm_io_bus {
int dev_count;
#define NR_IOBUS_DEVS 6
struct kvm_io_device *devs[NR_IOBUS_DEVS];
};
void kvm_io_bus_init(struct kvm_io_bus *bus);
void kvm_io_bus_destroy(struct kvm_io_bus *bus);
struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev);
struct kvm_vcpu {
struct kvm *kvm;
struct preempt_notifier preempt_notifier;
int vcpu_id;
struct mutex mutex;
int cpu;
struct kvm_run *run;
int guest_mode;
unsigned long requests;
struct kvm_guest_debug guest_debug;
int fpu_active;
int guest_fpu_loaded;
wait_queue_head_t wq;
int sigset_active;
sigset_t sigset;
struct kvm_vcpu_stat stat;
#ifdef CONFIG_HAS_IOMEM
int mmio_needed;
int mmio_read_completed;
int mmio_is_write;
int mmio_size;
unsigned char mmio_data[8];
gpa_t mmio_phys_addr;
#endif
struct kvm_vcpu_arch arch;
};
struct kvm_memory_slot {
gfn_t base_gfn;
unsigned long npages;
unsigned long flags;
unsigned long *rmap;
unsigned long *dirty_bitmap;
unsigned long userspace_addr;
int user_alloc;
};
struct kvm {
struct mutex lock; /* protects the vcpus array and APIC accesses */
spinlock_t mmu_lock;
struct mm_struct *mm; /* userspace tied to this vm */
int nmemslots;
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
KVM_PRIVATE_MEM_SLOTS];
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
struct list_head vm_list;
struct file *filp;
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
struct kvm_vm_stat stat;
struct kvm_arch arch;
};
/* The guest did something we don't support. */
#define pr_unimpl(vcpu, fmt, ...) \
do { \
if (printk_ratelimit()) \
printk(KERN_ERR "kvm: %i: cpu%i " fmt, \
current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
} while (0)
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
void vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
void decache_vcpus_on_cpu(int cpu);
int kvm_init(void *opaque, unsigned int vcpu_size,
struct module *module);
void kvm_exit(void);
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
extern struct page *bad_page;
int is_error_page(struct page *page);
int kvm_is_error_hva(unsigned long addr);
int kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc);
int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc);
int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc);
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
unsigned long len);
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
int offset, int len);
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
unsigned long len);
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_resched(struct kvm_vcpu *vcpu);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_flush_remote_tlbs(struct kvm *kvm);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
int kvm_dev_ioctl_check_extension(long ext);
int kvm_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log, int *is_dirty);
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log);
int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
struct
kvm_userspace_memory_region *mem,
int user_alloc);
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
void kvm_arch_destroy_vm(struct kvm *kvm);
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr);
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs);
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs);
int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
struct kvm_debug_guest *dbg);
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
int kvm_arch_init(void *opaque);
void kvm_arch_exit(void);
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_arch_hardware_enable(void *garbage);
void kvm_arch_hardware_disable(void *garbage);
int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
void kvm_free_physmem(struct kvm *kvm);
struct kvm *kvm_arch_create_vm(void);
void kvm_arch_destroy_vm(struct kvm *kvm);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
static inline void kvm_guest_enter(void)
{
account_system_vtime(current);
current->flags |= PF_VCPU;
}
static inline void kvm_guest_exit(void)
{
account_system_vtime(current);
current->flags &= ~PF_VCPU;
}
static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
{
return slot - kvm->memslots;
}
static inline gpa_t gfn_to_gpa(gfn_t gfn)
{
return (gpa_t)gfn << PAGE_SHIFT;
}
static inline void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
{
set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
}
enum kvm_stat_kind {
KVM_STAT_VM,
KVM_STAT_VCPU,
};
struct kvm_stats_debugfs_item {
const char *name;
int offset;
enum kvm_stat_kind kind;
struct dentry *dentry;
};
extern struct kvm_stats_debugfs_item debugfs_entries[];
#endif

View File

@ -2,72 +2,30 @@
#define __LINUX_KVM_PARA_H #define __LINUX_KVM_PARA_H
/* /*
* Guest OS interface for KVM paravirtualization * This header file provides a method for making a hypercall to the host
* * Architectures should define:
* Note: this interface is totally experimental, and is certain to change * - kvm_hypercall0, kvm_hypercall1...
* as we make progress. * - kvm_arch_para_features
* - kvm_para_available
*/ */
/* Return values for hypercalls */
#define KVM_ENOSYS 1000
#define KVM_HC_VAPIC_POLL_IRQ 1
/* /*
* Per-VCPU descriptor area shared between guest and host. Writable to * hypercalls use architecture specific
* both guest and host. Registered with the host by the guest when
* a guest acknowledges paravirtual mode.
*
* NOTE: all addresses are guest-physical addresses (gpa), to make it
* easier for the hypervisor to map between the various addresses.
*/ */
struct kvm_vcpu_para_state { #include <asm/kvm_para.h>
/*
* API version information for compatibility. If there's any support
* mismatch (too old host trying to execute too new guest) then
* the host will deny entry into paravirtual mode. Any other
* combination (new host + old guest and new host + new guest)
* is supposed to work - new host versions will support all old
* guest API versions.
*/
u32 guest_version;
u32 host_version;
u32 size;
u32 ret;
/* #ifdef __KERNEL__
* The address of the vm exit instruction (VMCALL or VMMCALL), static inline int kvm_para_has_feature(unsigned int feature)
* which the host will patch according to the CPU model the {
* VM runs on: if (kvm_arch_para_features() & (1UL << feature))
*/ return 1;
u64 hypercall_gpa; return 0;
}
#endif /* __KERNEL__ */
#endif /* __LINUX_KVM_PARA_H */
} __attribute__ ((aligned(PAGE_SIZE)));
#define KVM_PARA_API_VERSION 1
/*
* This is used for an RDMSR's ECX parameter to probe for a KVM host.
* Hopefully no CPU vendor will use up this number. This is placed well
* out of way of the typical space occupied by CPU vendors' MSR indices,
* and we think (or at least hope) it wont be occupied in the future
* either.
*/
#define MSR_KVM_API_MAGIC 0x87655678
#define KVM_EINVAL 1
/*
* Hypercall calling convention:
*
* Each hypercall may have 0-6 parameters.
*
* 64-bit hypercall index is in RAX, goes from 0 to __NR_hypercalls-1
*
* 64-bit parameters 1-6 are in the standard gcc x86_64 calling convention
* order: RDI, RSI, RDX, RCX, R8, R9.
*
* 32-bit index is EBX, parameters are: EAX, ECX, EDX, ESI, EDI, EBP.
* (the first 3 are according to the gcc regparm calling convention)
*
* No registers are clobbered by the hypercall, except that the
* return value is in RAX.
*/
#define __NR_hypercalls 0
#endif

54
include/linux/kvm_types.h Normal file
View File

@ -0,0 +1,54 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
#ifndef __KVM_TYPES_H__
#define __KVM_TYPES_H__
#include <asm/types.h>
/*
* Address types:
*
* gva - guest virtual address
* gpa - guest physical address
* gfn - guest frame number
* hva - host virtual address
* hpa - host physical address
* hfn - host frame number
*/
typedef unsigned long gva_t;
typedef u64 gpa_t;
typedef unsigned long gfn_t;
typedef unsigned long hva_t;
typedef u64 hpa_t;
typedef unsigned long hfn_t;
struct kvm_pio_request {
unsigned long count;
int cur_count;
struct page *guest_pages[2];
unsigned guest_page_offset;
int in;
int port;
int size;
int string;
int down;
int rep;
};
#endif /* __KVM_TYPES_H__ */

View File

@ -393,6 +393,7 @@ void fastcall __mmdrop(struct mm_struct *mm)
destroy_context(mm); destroy_context(mm);
free_mm(mm); free_mm(mm);
} }
EXPORT_SYMBOL_GPL(__mmdrop);
/* /*
* Decrement the use count and release all resources for an mm. * Decrement the use count and release all resources for an mm.

View File

@ -26,7 +26,7 @@
* Based on Xen 3.1 code. * Based on Xen 3.1 code.
*/ */
#include "kvm.h" #include <linux/kvm_host.h>
#include <linux/kvm.h> #include <linux/kvm.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/highmem.h> #include <linux/highmem.h>
@ -34,14 +34,17 @@
#include <linux/hrtimer.h> #include <linux/hrtimer.h>
#include <linux/io.h> #include <linux/io.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/msr.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/current.h> #include <asm/current.h>
#include <asm/apicdef.h>
#include <asm/io_apic.h> #include "ioapic.h"
#include "irq.h" #include "lapic.h"
/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
#if 0
#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
#else
#define ioapic_debug(fmt, arg...) #define ioapic_debug(fmt, arg...)
#endif
static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq); static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
@ -113,7 +116,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
default: default:
index = (ioapic->ioregsel - 0x10) >> 1; index = (ioapic->ioregsel - 0x10) >> 1;
ioapic_debug("change redir index %x val %x", index, val); ioapic_debug("change redir index %x val %x\n", index, val);
if (index >= IOAPIC_NUM_PINS) if (index >= IOAPIC_NUM_PINS)
return; return;
if (ioapic->ioregsel & 1) { if (ioapic->ioregsel & 1) {
@ -131,16 +134,16 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
} }
static void ioapic_inj_irq(struct kvm_ioapic *ioapic, static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
struct kvm_lapic *target, struct kvm_vcpu *vcpu,
u8 vector, u8 trig_mode, u8 delivery_mode) u8 vector, u8 trig_mode, u8 delivery_mode)
{ {
ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode, ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
delivery_mode); delivery_mode);
ASSERT((delivery_mode == dest_Fixed) || ASSERT((delivery_mode == IOAPIC_FIXED) ||
(delivery_mode == dest_LowestPrio)); (delivery_mode == IOAPIC_LOWEST_PRIORITY));
kvm_apic_set_irq(target, vector, trig_mode); kvm_apic_set_irq(vcpu, vector, trig_mode);
} }
static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
@ -151,12 +154,12 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
struct kvm *kvm = ioapic->kvm; struct kvm *kvm = ioapic->kvm;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
ioapic_debug("dest %d dest_mode %d", dest, dest_mode); ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);
if (dest_mode == 0) { /* Physical mode. */ if (dest_mode == 0) { /* Physical mode. */
if (dest == 0xFF) { /* Broadcast. */ if (dest == 0xFF) { /* Broadcast. */
for (i = 0; i < KVM_MAX_VCPUS; ++i) for (i = 0; i < KVM_MAX_VCPUS; ++i)
if (kvm->vcpus[i] && kvm->vcpus[i]->apic) if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
mask |= 1 << i; mask |= 1 << i;
return mask; return mask;
} }
@ -164,8 +167,8 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
vcpu = kvm->vcpus[i]; vcpu = kvm->vcpus[i];
if (!vcpu) if (!vcpu)
continue; continue;
if (kvm_apic_match_physical_addr(vcpu->apic, dest)) { if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) {
if (vcpu->apic) if (vcpu->arch.apic)
mask = 1 << i; mask = 1 << i;
break; break;
} }
@ -175,11 +178,11 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
vcpu = kvm->vcpus[i]; vcpu = kvm->vcpus[i];
if (!vcpu) if (!vcpu)
continue; continue;
if (vcpu->apic && if (vcpu->arch.apic &&
kvm_apic_match_logical_addr(vcpu->apic, dest)) kvm_apic_match_logical_addr(vcpu->arch.apic, dest))
mask |= 1 << vcpu->vcpu_id; mask |= 1 << vcpu->vcpu_id;
} }
ioapic_debug("mask %x", mask); ioapic_debug("mask %x\n", mask);
return mask; return mask;
} }
@ -191,41 +194,39 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
u8 vector = ioapic->redirtbl[irq].fields.vector; u8 vector = ioapic->redirtbl[irq].fields.vector;
u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
u32 deliver_bitmask; u32 deliver_bitmask;
struct kvm_lapic *target;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
int vcpu_id; int vcpu_id;
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x", "vector=%x trig_mode=%x\n",
dest, dest_mode, delivery_mode, vector, trig_mode); dest, dest_mode, delivery_mode, vector, trig_mode);
deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode); deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
if (!deliver_bitmask) { if (!deliver_bitmask) {
ioapic_debug("no target on destination"); ioapic_debug("no target on destination\n");
return; return;
} }
switch (delivery_mode) { switch (delivery_mode) {
case dest_LowestPrio: case IOAPIC_LOWEST_PRIORITY:
target = vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask); deliver_bitmask);
if (target != NULL) if (vcpu != NULL)
ioapic_inj_irq(ioapic, target, vector, ioapic_inj_irq(ioapic, vcpu, vector,
trig_mode, delivery_mode); trig_mode, delivery_mode);
else else
ioapic_debug("null round robin: " ioapic_debug("null lowest prio vcpu: "
"mask=%x vector=%x delivery_mode=%x", "mask=%x vector=%x delivery_mode=%x\n",
deliver_bitmask, vector, dest_LowestPrio); deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
break; break;
case dest_Fixed: case IOAPIC_FIXED:
for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
if (!(deliver_bitmask & (1 << vcpu_id))) if (!(deliver_bitmask & (1 << vcpu_id)))
continue; continue;
deliver_bitmask &= ~(1 << vcpu_id); deliver_bitmask &= ~(1 << vcpu_id);
vcpu = ioapic->kvm->vcpus[vcpu_id]; vcpu = ioapic->kvm->vcpus[vcpu_id];
if (vcpu) { if (vcpu) {
target = vcpu->apic; ioapic_inj_irq(ioapic, vcpu, vector,
ioapic_inj_irq(ioapic, target, vector,
trig_mode, delivery_mode); trig_mode, delivery_mode);
} }
} }
@ -271,7 +272,7 @@ static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
{ {
struct kvm_ioapic *ioapic = kvm->vioapic; struct kvm_ioapic *ioapic = kvm->arch.vioapic;
union ioapic_redir_entry *ent; union ioapic_redir_entry *ent;
int gsi; int gsi;
@ -304,7 +305,7 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
u32 result; u32 result;
ioapic_debug("addr %lx", (unsigned long)addr); ioapic_debug("addr %lx\n", (unsigned long)addr);
ASSERT(!(addr & 0xf)); /* check alignment */ ASSERT(!(addr & 0xf)); /* check alignment */
addr &= 0xff; addr &= 0xff;
@ -341,8 +342,8 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
u32 data; u32 data;
ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n", ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
addr, len, val); (void*)addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */ ASSERT(!(addr & 0xf)); /* check alignment */
if (len == 4 || len == 8) if (len == 4 || len == 8)
data = *(u32 *) val; data = *(u32 *) val;
@ -360,24 +361,38 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
case IOAPIC_REG_WINDOW: case IOAPIC_REG_WINDOW:
ioapic_write_indirect(ioapic, data); ioapic_write_indirect(ioapic, data);
break; break;
#ifdef CONFIG_IA64
case IOAPIC_REG_EOI:
kvm_ioapic_update_eoi(ioapic->kvm, data);
break;
#endif
default: default:
break; break;
} }
} }
void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
{
int i;
for (i = 0; i < IOAPIC_NUM_PINS; i++)
ioapic->redirtbl[i].fields.mask = 1;
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
ioapic->ioregsel = 0;
ioapic->irr = 0;
ioapic->id = 0;
}
int kvm_ioapic_init(struct kvm *kvm) int kvm_ioapic_init(struct kvm *kvm)
{ {
struct kvm_ioapic *ioapic; struct kvm_ioapic *ioapic;
int i;
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
if (!ioapic) if (!ioapic)
return -ENOMEM; return -ENOMEM;
kvm->vioapic = ioapic; kvm->arch.vioapic = ioapic;
for (i = 0; i < IOAPIC_NUM_PINS; i++) kvm_ioapic_reset(ioapic);
ioapic->redirtbl[i].fields.mask = 1;
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
ioapic->dev.read = ioapic_mmio_read; ioapic->dev.read = ioapic_mmio_read;
ioapic->dev.write = ioapic_mmio_write; ioapic->dev.write = ioapic_mmio_write;
ioapic->dev.in_range = ioapic_in_range; ioapic->dev.in_range = ioapic_in_range;

95
virt/kvm/ioapic.h Normal file
View File

@ -0,0 +1,95 @@
#ifndef __KVM_IO_APIC_H
#define __KVM_IO_APIC_H
#include <linux/kvm_host.h>
#include "iodev.h"
struct kvm;
struct kvm_vcpu;
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
#define IOAPIC_EDGE_TRIG 0
#define IOAPIC_LEVEL_TRIG 1
#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
#define IOAPIC_MEM_LENGTH 0x100
/* Direct registers. */
#define IOAPIC_REG_SELECT 0x00
#define IOAPIC_REG_WINDOW 0x10
#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
/* Indirect registers. */
#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
#define IOAPIC_REG_VERSION 0x01
#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
/*ioapic delivery mode*/
#define IOAPIC_FIXED 0x0
#define IOAPIC_LOWEST_PRIORITY 0x1
#define IOAPIC_PMI 0x2
#define IOAPIC_NMI 0x4
#define IOAPIC_INIT 0x5
#define IOAPIC_EXTINT 0x7
struct kvm_ioapic {
u64 base_address;
u32 ioregsel;
u32 id;
u32 irr;
u32 pad;
union ioapic_redir_entry {
u64 bits;
struct {
u8 vector;
u8 delivery_mode:3;
u8 dest_mode:1;
u8 delivery_status:1;
u8 polarity:1;
u8 remote_irr:1;
u8 trig_mode:1;
u8 mask:1;
u8 reserve:7;
u8 reserved[4];
u8 dest_id;
} fields;
} redirtbl[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
};
#ifdef DEBUG
#define ASSERT(x) \
do { \
if (!(x)) { \
printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
__FILE__, __LINE__, #x); \
BUG(); \
} \
} while (0)
#else
#define ASSERT(x) do { } while (0)
#endif
static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
{
return kvm->arch.vioapic;
}
#ifdef CONFIG_IA64
static inline int irqchip_in_kernel(struct kvm *kvm)
{
return 1;
}
#endif
struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
unsigned long bitmap);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
#endif

63
virt/kvm/iodev.h Normal file
View File

@ -0,0 +1,63 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __KVM_IODEV_H__
#define __KVM_IODEV_H__
#include <linux/kvm_types.h>
struct kvm_io_device {
void (*read)(struct kvm_io_device *this,
gpa_t addr,
int len,
void *val);
void (*write)(struct kvm_io_device *this,
gpa_t addr,
int len,
const void *val);
int (*in_range)(struct kvm_io_device *this, gpa_t addr);
void (*destructor)(struct kvm_io_device *this);
void *private;
};
static inline void kvm_iodevice_read(struct kvm_io_device *dev,
gpa_t addr,
int len,
void *val)
{
dev->read(dev, addr, len, val);
}
static inline void kvm_iodevice_write(struct kvm_io_device *dev,
gpa_t addr,
int len,
const void *val)
{
dev->write(dev, addr, len, val);
}
static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
{
return dev->in_range(dev, addr);
}
static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
{
if (dev->destructor)
dev->destructor(dev);
}
#endif /* __KVM_IODEV_H__ */

1400
virt/kvm/kvm_main.c Normal file

File diff suppressed because it is too large Load Diff