Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (249 commits) KVM: Move apic timer migration away from critical section KVM: Put kvm_para.h include outside __KERNEL__ KVM: Fix unbounded preemption latency KVM: Initialize the mmu caches only after verifying cpu support KVM: MMU: Fix dirty page setting for pages removed from rmap KVM: Portability: Move kvm_fpu to asm-x86/kvm.h KVM: x86 emulator: Only allow VMCALL/VMMCALL trapped by #UD KVM: MMU: Merge shadow level check in FNAME(fetch) KVM: MMU: Move kvm_free_some_pages() into critical section KVM: MMU: Switch to mmu spinlock KVM: MMU: Avoid calling gfn_to_page() in mmu_set_spte() KVM: Add kvm_read_guest_atomic() KVM: MMU: Concurrent guest walkers KVM: Disable vapic support on Intel machines with FlexPriority KVM: Accelerated apic support KVM: local APIC TPR access reporting facility KVM: Print data for unimplemented wrmsr KVM: MMU: Add cache miss statistic KVM: MMU: Coalesce remote tlb flushes KVM: Expose ioapic to ia64 save/restore APIs ...
2008-01-31 09:30:10 +11:00 · 2008-01-31 09:30:10 +11:00 · 2c57ee6f92
commit 2c57ee6f92
parent f389e9fcec 2f52d58c92
41 changed files with 10688 additions and 8297 deletions
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@ -107,6 +107,7 @@ config ARCH_SUPPORTS_OPROFILE
 	bool
 	default y

+select HAVE_KVM

 config ZONE_DMA32
 	bool
@ -1598,4 +1599,6 @@ source "security/Kconfig"

 source "crypto/Kconfig"

+source "arch/x86/kvm/Kconfig"
+
 source "lib/Kconfig"
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@ -7,6 +7,8 @@ else
        KBUILD_DEFCONFIG := $(ARCH)_defconfig
 endif

+core-$(CONFIG_KVM) += arch/x86/kvm/
+
 # BITS is used as extension for files which are available in a 32 bit
 # and a 64 bit version to simplify shared Makefiles.
 # e.g.: obj-y += foo_$(BITS).o
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@ -1,9 +1,12 @@
 #
 # KVM configuration
 #
+config HAVE_KVM
+       bool
+
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
-	depends on X86
+	depends on HAVE_KVM || X86
 	default y
 	---help---
 	  Say Y here to get to see options for using your Linux host to run other
@ -16,7 +19,7 @@ if VIRTUALIZATION

 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
-	depends on X86 && EXPERIMENTAL
+	depends on HAVE_KVM && EXPERIMENTAL
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	---help---
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@ -2,7 +2,11 @@
 # Makefile for Kernel-based Virtual Machine module
 #

-kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
+
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@ -28,6 +28,8 @@
 #include <linux/mm.h>
 #include "irq.h"

+#include <linux/kvm_host.h>
+
 /*
 * set irq level. If an edge is detected, then the IRR is set to 1
 */
@ -181,10 +183,8 @@ int kvm_pic_read_irq(struct kvm_pic *s)
 	return intno;
 }

-static void pic_reset(void *opaque)
+void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-	struct kvm_kpic_state *s = opaque;
-
 	s->last_irr = 0;
 	s->irr = 0;
 	s->imr = 0;
@ -209,7 +209,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
 	addr &= 1;
 	if (addr == 0) {
 		if (val & 0x10) {
-			pic_reset(s);	/* init */
+			kvm_pic_reset(s);	/* init */
 			/*
 			 * deassert a pending interrupt
 			 */
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@ -20,8 +20,8 @@
 */

 #include <linux/module.h>
+#include <linux/kvm_host.h>

-#include "kvm.h"
 #include "irq.h"

 /*
@ -63,26 +63,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 }
 EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);

-static void vcpu_kick_intr(void *info)
-{
-#ifdef DEBUG
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
-	printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
-#endif
-}
-
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
-{
-	int ipi_pcpu = vcpu->cpu;
-
-	if (waitqueue_active(&vcpu->wq)) {
-		wake_up_interruptible(&vcpu->wq);
-		++vcpu->stat.halt_wakeup;
-	}
-	if (vcpu->guest_mode)
-		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
-}
-
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	kvm_inject_apic_timer_irqs(vcpu);
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@ -0,0 +1,88 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * Authors:
+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <linux/mm_types.h>
+#include <linux/hrtimer.h>
+#include <linux/kvm_host.h>
+
+#include "iodev.h"
+#include "ioapic.h"
+#include "lapic.h"
+
+struct kvm;
+struct kvm_vcpu;
+
+typedef void irq_request_func(void *opaque, int level);
+
+struct kvm_kpic_state {
+	u8 last_irr;	/* edge detection */
+	u8 irr;		/* interrupt request register */
+	u8 imr;		/* interrupt mask register */
+	u8 isr;		/* interrupt service register */
+	u8 priority_add;	/* highest irq priority */
+	u8 irq_base;
+	u8 read_reg_select;
+	u8 poll;
+	u8 special_mask;
+	u8 init_state;
+	u8 auto_eoi;
+	u8 rotate_on_auto_eoi;
+	u8 special_fully_nested_mode;
+	u8 init4;		/* true if 4 byte init */
+	u8 elcr;		/* PIIX edge/trigger selection */
+	u8 elcr_mask;
+	struct kvm_pic *pics_state;
+};
+
+struct kvm_pic {
+	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
+	irq_request_func *irq_request;
+	void *irq_request_opaque;
+	int output;		/* intr from master PIC */
+	struct kvm_io_device dev;
+};
+
+struct kvm_pic *kvm_create_pic(struct kvm *kvm);
+void kvm_pic_set_irq(void *opaque, int irq, int level);
+int kvm_pic_read_irq(struct kvm_pic *s);
+void kvm_pic_update_irq(struct kvm_pic *s);
+
+static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
+{
+	return kvm->arch.vpic;
+}
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return pic_irqchip(kvm) != NULL;
+}
+
+void kvm_pic_reset(struct kvm_kpic_state *s);
+
+void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
+void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
+
+#endif
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@ -4,10 +4,10 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/kvm_host.h>
 #include <asm/msr.h>

 #include "svm.h"
-#include "kvm.h"

 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@ -17,7 +17,7 @@
 * the COPYING file in the top-level directory.
 */

-#include "kvm.h"
+#include <linux/kvm_host.h>
 #include <linux/kvm.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
@ -56,6 +56,7 @@

 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
+
 static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
 {
 	return *((u32 *) (apic->regs + reg_off));
@ -88,7 +89,7 @@ static inline void apic_clear_vector(int vec, void *bitmap)

 static inline int apic_hw_enabled(struct kvm_lapic *apic)
 {
-	return (apic)->vcpu->apic_base & MSR_IA32_APICBASE_ENABLE;
+	return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
 }

 static inline int  apic_sw_enabled(struct kvm_lapic *apic)
@ -172,7 +173,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)

 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;
 	int highest_irr;

 	if (!apic)
@ -183,8 +184,10 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);

-int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig)
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
 {
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
 	if (!apic_test_and_set_irr(vec, apic)) {
 		/* a new pending irq is set in IRR */
 		if (trig)
@ -268,7 +271,7 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 			   int short_hand, int dest, int dest_mode)
 {
 	int result = 0;
-	struct kvm_lapic *target = vcpu->apic;
+	struct kvm_lapic *target = vcpu->arch.apic;

 	apic_debug("target %p, source %p, dest 0x%x, "
 		   "dest_mode 0x%x, short_hand 0x%x",
@ -335,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		} else
 			apic_clear_vector(vector, apic->regs + APIC_TMR);

-		if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
+		if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
 			kvm_vcpu_kick(vcpu);
-		else if (vcpu->mp_state == VCPU_MP_STATE_HALTED) {
-			vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+		else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) {
+			vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
 			if (waitqueue_active(&vcpu->wq))
 				wake_up_interruptible(&vcpu->wq);
 		}
@ -359,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,

 	case APIC_DM_INIT:
 		if (level) {
-			if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
+			if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
 				printk(KERN_DEBUG
 				       "INIT on a runnable vcpu %d\n",
 				       vcpu->vcpu_id);
-			vcpu->mp_state = VCPU_MP_STATE_INIT_RECEIVED;
+			vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED;
 			kvm_vcpu_kick(vcpu);
 		} else {
 			printk(KERN_DEBUG
@ -376,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	case APIC_DM_STARTUP:
 		printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
 		       vcpu->vcpu_id, vector);
-		if (vcpu->mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
-			vcpu->sipi_vector = vector;
-			vcpu->mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
+		if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
+			vcpu->arch.sipi_vector = vector;
+			vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
 			if (waitqueue_active(&vcpu->wq))
 				wake_up_interruptible(&vcpu->wq);
 		}
@ -392,15 +395,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	return result;
 }

-struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
+static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
 				       unsigned long bitmap)
 {
-	int vcpu_id;
 	int last;
 	int next;
-	struct kvm_lapic *apic;
+	struct kvm_lapic *apic = NULL;

-	last = kvm->round_robin_prev_vcpu;
+	last = kvm->arch.round_robin_prev_vcpu;
 	next = last;

 	do {
@ -408,25 +410,30 @@ struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
 			next = 0;
 		if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap))
 			continue;
-		apic = kvm->vcpus[next]->apic;
+		apic = kvm->vcpus[next]->arch.apic;
 		if (apic && apic_enabled(apic))
 			break;
 		apic = NULL;
 	} while (next != last);
-	kvm->round_robin_prev_vcpu = next;
+	kvm->arch.round_robin_prev_vcpu = next;

-	if (!apic) {
-		vcpu_id = ffs(bitmap) - 1;
-		if (vcpu_id < 0) {
-			vcpu_id = 0;
-			printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
-		}
-		apic = kvm->vcpus[vcpu_id]->apic;
-	}
+	if (!apic)
+		printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");

 	return apic;
 }

+struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
+		unsigned long bitmap)
+{
+	struct kvm_lapic *apic;
+
+	apic = kvm_apic_round_robin(kvm, vector, bitmap);
+	if (apic)
+		return apic->vcpu;
+	return NULL;
+}
+
 static void apic_set_eoi(struct kvm_lapic *apic)
 {
 	int vector = apic_find_highest_isr(apic);
@ -458,7 +465,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 	unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
 	unsigned int vector = icr_low & APIC_VECTOR_MASK;

-	struct kvm_lapic *target;
+	struct kvm_vcpu *target;
 	struct kvm_vcpu *vcpu;
 	unsigned long lpr_map = 0;
 	int i;
@ -474,20 +481,20 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 		if (!vcpu)
 			continue;

-		if (vcpu->apic &&
+		if (vcpu->arch.apic &&
 		    apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
 			if (delivery_mode == APIC_DM_LOWEST)
 				set_bit(vcpu->vcpu_id, &lpr_map);
 			else
-				__apic_accept_irq(vcpu->apic, delivery_mode,
+				__apic_accept_irq(vcpu->arch.apic, delivery_mode,
 						  vector, level, trig_mode);
 		}
 	}

 	if (delivery_mode == APIC_DM_LOWEST) {
-		target = kvm_apic_round_robin(vcpu->kvm, vector, lpr_map);
+		target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map);
 		if (target != NULL)
-			__apic_accept_irq(target, delivery_mode,
+			__apic_accept_irq(target->arch.apic, delivery_mode,
 					  vector, level, trig_mode);
 	}
 }
@ -544,6 +551,23 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
 	return tmcct;
 }

+static void __report_tpr_access(struct kvm_lapic *apic, bool write)
+{
+	struct kvm_vcpu *vcpu = apic->vcpu;
+	struct kvm_run *run = vcpu->run;
+
+	set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests);
+	kvm_x86_ops->cache_regs(vcpu);
+	run->tpr_access.rip = vcpu->arch.rip;
+	run->tpr_access.is_write = write;
+}
+
+static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
+{
+	if (apic->vcpu->arch.tpr_access_reporting)
+		__report_tpr_access(apic, write);
+}
+
 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 {
 	u32 val = 0;
@ -561,6 +585,9 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 		val = apic_get_tmcct(apic);
 		break;

+	case APIC_TASKPRI:
+		report_tpr_access(apic, false);
+		/* fall thru */
 	default:
 		apic_update_ppr(apic);
 		val = apic_get_reg(apic, offset);
@ -670,6 +697,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		break;

 	case APIC_TASKPRI:
+		report_tpr_access(apic, true);
 		apic_set_tpr(apic, val & 0xff);
 		break;

@ -762,19 +790,17 @@ static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
 	return ret;
 }

-void kvm_free_apic(struct kvm_lapic *apic)
+void kvm_free_lapic(struct kvm_vcpu *vcpu)
 {
-	if (!apic)
+	if (!vcpu->arch.apic)
 		return;

-	hrtimer_cancel(&apic->timer.dev);
+	hrtimer_cancel(&vcpu->arch.apic->timer.dev);

-	if (apic->regs_page) {
-		__free_page(apic->regs_page);
-		apic->regs_page = 0;
-	}
+	if (vcpu->arch.apic->regs_page)
+		__free_page(vcpu->arch.apic->regs_page);

-	kfree(apic);
+	kfree(vcpu->arch.apic);
 }

 /*
@ -785,16 +811,17 @@ void kvm_free_apic(struct kvm_lapic *apic)

 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;

 	if (!apic)
 		return;
-	apic_set_tpr(apic, ((cr8 & 0x0f) << 4));
+	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
+		     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
 }

 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;
 	u64 tpr;

 	if (!apic)
@ -807,29 +834,29 @@ EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8);

 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 {
-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;

 	if (!apic) {
 		value |= MSR_IA32_APICBASE_BSP;
-		vcpu->apic_base = value;
+		vcpu->arch.apic_base = value;
 		return;
 	}
 	if (apic->vcpu->vcpu_id)
 		value &= ~MSR_IA32_APICBASE_BSP;

-	vcpu->apic_base = value;
-	apic->base_address = apic->vcpu->apic_base &
+	vcpu->arch.apic_base = value;
+	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;

 	/* with FSB delivery interrupt, we can restart APIC functionality */
 	apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
-		   "0x%lx.\n", apic->apic_base, apic->base_address);
+		   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);

 }

 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
 {
-	return vcpu->apic_base;
+	return vcpu->arch.apic_base;
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_get_base);

@ -841,7 +868,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	apic_debug("%s\n", __FUNCTION__);

 	ASSERT(vcpu);
-	apic = vcpu->apic;
+	apic = vcpu->arch.apic;
 	ASSERT(apic != NULL);

 	/* Stop the timer in case it's a reset to an active apic */
@ -872,19 +899,19 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	update_divide_count(apic);
 	atomic_set(&apic->timer.pending, 0);
 	if (vcpu->vcpu_id == 0)
-		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
+		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
 	apic_update_ppr(apic);

 	apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
 		   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__,
 		   vcpu, kvm_apic_id(apic),
-		   vcpu->apic_base, apic->base_address);
+		   vcpu->arch.apic_base, apic->base_address);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_reset);

 int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;
 	int ret = 0;

 	if (!apic)
@ -908,9 +935,8 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 	wait_queue_head_t *q = &apic->vcpu->wq;

 	atomic_inc(&apic->timer.pending);
-	if (waitqueue_active(q))
-	{
-		apic->vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
+	if (waitqueue_active(q)) {
+		apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
 		wake_up_interruptible(q);
 	}
 	if (apic_lvtt_period(apic)) {
@ -956,13 +982,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	if (!apic)
 		goto nomem;

-	vcpu->apic = apic;
+	vcpu->arch.apic = apic;

 	apic->regs_page = alloc_page(GFP_KERNEL);
 	if (apic->regs_page == NULL) {
 		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
 		       vcpu->vcpu_id);
-		goto nomem;
+		goto nomem_free_apic;
 	}
 	apic->regs = page_address(apic->regs_page);
 	memset(apic->regs, 0, PAGE_SIZE);
@ -971,7 +997,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	apic->timer.dev.function = apic_timer_fn;
 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
-	vcpu->apic_base = APIC_DEFAULT_PHYS_BASE;
+	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;

 	kvm_lapic_reset(vcpu);
 	apic->dev.read = apic_mmio_read;
@ -980,15 +1006,16 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	apic->dev.private = apic;

 	return 0;
+nomem_free_apic:
+	kfree(apic);
 nomem:
-	kvm_free_apic(apic);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(kvm_create_lapic);

 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;
 	int highest_irr;

 	if (!apic || !apic_enabled(apic))
@ -1004,11 +1031,11 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)

 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
 {
-	u32 lvt0 = apic_get_reg(vcpu->apic, APIC_LVT0);
+	u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
 	int r = 0;

 	if (vcpu->vcpu_id == 0) {
-		if (!apic_hw_enabled(vcpu->apic))
+		if (!apic_hw_enabled(vcpu->arch.apic))
 			r = 1;
 		if ((lvt0 & APIC_LVT_MASKED) == 0 &&
 		    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
@ -1019,7 +1046,7 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)

 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;

 	if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
 		atomic_read(&apic->timer.pending) > 0) {
@ -1030,7 +1057,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)

 void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
 {
-	struct kvm_lapic *apic = vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;

 	if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
 		apic->timer.last_update = ktime_add_ns(
@ -1041,7 +1068,7 @@ void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 {
 	int vector = kvm_apic_has_interrupt(vcpu);
-	struct kvm_lapic *apic = vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;

 	if (vector == -1)
 		return -1;
@ -1054,9 +1081,9 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)

 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;

-	apic->base_address = vcpu->apic_base &
+	apic->base_address = vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
 	apic_update_ppr(apic);
@ -1065,9 +1092,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 	start_apic_timer(apic);
 }

-void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
+void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;
 	struct hrtimer *timer;

 	if (!apic)
@ -1077,4 +1104,51 @@ void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 	if (hrtimer_cancel(timer))
 		hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
 }
-EXPORT_SYMBOL_GPL(kvm_migrate_apic_timer);
+
+void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
+{
+	u32 data;
+	void *vapic;
+
+	if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
+		return;
+
+	vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
+	data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
+	kunmap_atomic(vapic, KM_USER0);
+
+	apic_set_tpr(vcpu->arch.apic, data & 0xff);
+}
+
+void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
+{
+	u32 data, tpr;
+	int max_irr, max_isr;
+	struct kvm_lapic *apic;
+	void *vapic;
+
+	if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
+		return;
+
+	apic = vcpu->arch.apic;
+	tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
+	max_irr = apic_find_highest_irr(apic);
+	if (max_irr < 0)
+		max_irr = 0;
+	max_isr = apic_find_highest_isr(apic);
+	if (max_isr < 0)
+		max_isr = 0;
+	data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
+
+	vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
+	*(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
+	kunmap_atomic(vapic, KM_USER0);
+}
+
+void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
+{
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return;
+
+	vcpu->arch.apic->vapic_addr = vapic_addr;
+}
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@ -0,0 +1,50 @@
+#ifndef __KVM_X86_LAPIC_H
+#define __KVM_X86_LAPIC_H
+
+#include "iodev.h"
+
+#include <linux/kvm_host.h>
+
+struct kvm_lapic {
+	unsigned long base_address;
+	struct kvm_io_device dev;
+	struct {
+		atomic_t pending;
+		s64 period;	/* unit: ns */
+		u32 divide_count;
+		ktime_t last_update;
+		struct hrtimer dev;
+	} timer;
+	struct kvm_vcpu *vcpu;
+	struct page *regs_page;
+	void *regs;
+	gpa_t vapic_addr;
+	struct page *vapic_page;
+};
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
+void kvm_free_lapic(struct kvm_vcpu *vcpu);
+
+int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
+int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
+int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
+void kvm_lapic_reset(struct kvm_vcpu *vcpu);
+u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
+void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
+
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
+void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
+int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
+int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
+void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+
+void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
+void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
+void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
+
+#endif
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@ -0,0 +1,44 @@
+#ifndef __KVM_X86_MMU_H
+#define __KVM_X86_MMU_H
+
+#include <linux/kvm_host.h>
+
+static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
+{
+	if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
+		__kvm_mmu_free_some_pages(vcpu);
+}
+
+static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
+{
+	if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
+		return 0;
+
+	return kvm_mmu_load(vcpu);
+}
+
+static inline int is_long_mode(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_X86_64
+	return vcpu->arch.shadow_efer & EFER_LME;
+#else
+	return 0;
+#endif
+}
+
+static inline int is_pae(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.cr4 & X86_CR4_PAE;
+}
+
+static inline int is_pse(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.cr4 & X86_CR4_PSE;
+}
+
+static inline int is_paging(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.cr0 & X86_CR0_PG;
+}
+
+#endif
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@ -0,0 +1,484 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables machines with Intel VT-x extensions to run virtual
+ * machines without emulation or binary translation.
+ *
+ * MMU support
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ *
+ * Authors:
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Avi Kivity   <avi@qumranet.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+/*
+ * We need the mmu code to access both 32-bit and 64-bit guest ptes,
+ * so the code in this file is compiled twice, once per pte size.
+ */
+
+#if PTTYPE == 64
+	#define pt_element_t u64
+	#define guest_walker guest_walker64
+	#define FNAME(name) paging##64_##name
+	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
+	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
+	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
+	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
+	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
+	#define PT_LEVEL_BITS PT64_LEVEL_BITS
+	#ifdef CONFIG_X86_64
+	#define PT_MAX_FULL_LEVELS 4
+	#define CMPXCHG cmpxchg
+	#else
+	#define CMPXCHG cmpxchg64
+	#define PT_MAX_FULL_LEVELS 2
+	#endif
+#elif PTTYPE == 32
+	#define pt_element_t u32
+	#define guest_walker guest_walker32
+	#define FNAME(name) paging##32_##name
+	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
+	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
+	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
+	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
+	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
+	#define PT_LEVEL_BITS PT32_LEVEL_BITS
+	#define PT_MAX_FULL_LEVELS 2
+	#define CMPXCHG cmpxchg
+#else
+	#error Invalid PTTYPE value
+#endif
+
+#define gpte_to_gfn FNAME(gpte_to_gfn)
+#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)
+
+/*
+ * The guest_walker structure emulates the behavior of the hardware page
+ * table walker.
+ */
+struct guest_walker {
+	int level;
+	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
+	pt_element_t ptes[PT_MAX_FULL_LEVELS];
+	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
+	unsigned pt_access;
+	unsigned pte_access;
+	gfn_t gfn;
+	u32 error_code;
+};
+
+static gfn_t gpte_to_gfn(pt_element_t gpte)
+{
+	return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
+}
+
+static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
+{
+	return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
+}
+
+static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
+			 gfn_t table_gfn, unsigned index,
+			 pt_element_t orig_pte, pt_element_t new_pte)
+{
+	pt_element_t ret;
+	pt_element_t *table;
+	struct page *page;
+
+	page = gfn_to_page(kvm, table_gfn);
+	table = kmap_atomic(page, KM_USER0);
+
+	ret = CMPXCHG(&table[index], orig_pte, new_pte);
+
+	kunmap_atomic(table, KM_USER0);
+
+	kvm_release_page_dirty(page);
+
+	return (ret != orig_pte);
+}
+
+static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
+{
+	unsigned access;
+
+	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
+#if PTTYPE == 64
+	if (is_nx(vcpu))
+		access &= ~(gpte >> PT64_NX_SHIFT);
+#endif
+	return access;
+}
+
+/*
+ * Fetch a guest pte for a guest virtual address
+ */
+static int FNAME(walk_addr)(struct guest_walker *walker,
+			    struct kvm_vcpu *vcpu, gva_t addr,
+			    int write_fault, int user_fault, int fetch_fault)
+{
+	pt_element_t pte;
+	gfn_t table_gfn;
+	unsigned index, pt_access, pte_access;
+	gpa_t pte_gpa;
+
+	pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
+walk:
+	walker->level = vcpu->arch.mmu.root_level;
+	pte = vcpu->arch.cr3;
+#if PTTYPE == 64
+	if (!is_long_mode(vcpu)) {
+		pte = vcpu->arch.pdptrs[(addr >> 30) & 3];
+		if (!is_present_pte(pte))
+			goto not_present;
+		--walker->level;
+	}
+#endif
+	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
+	       (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
+
+	pt_access = ACC_ALL;
+
+	for (;;) {
+		index = PT_INDEX(addr, walker->level);
+
+		table_gfn = gpte_to_gfn(pte);
+		pte_gpa = gfn_to_gpa(table_gfn);
+		pte_gpa += index * sizeof(pt_element_t);
+		walker->table_gfn[walker->level - 1] = table_gfn;
+		walker->pte_gpa[walker->level - 1] = pte_gpa;
+		pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
+			 walker->level - 1, table_gfn);
+
+		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
+
+		if (!is_present_pte(pte))
+			goto not_present;
+
+		if (write_fault && !is_writeble_pte(pte))
+			if (user_fault || is_write_protection(vcpu))
+				goto access_error;
+
+		if (user_fault && !(pte & PT_USER_MASK))
+			goto access_error;
+
+#if PTTYPE == 64
+		if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
+			goto access_error;
+#endif
+
+		if (!(pte & PT_ACCESSED_MASK)) {
+			mark_page_dirty(vcpu->kvm, table_gfn);
+			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
+			    index, pte, pte|PT_ACCESSED_MASK))
+				goto walk;
+			pte |= PT_ACCESSED_MASK;
+		}
+
+		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
+
+		walker->ptes[walker->level - 1] = pte;
+
+		if (walker->level == PT_PAGE_TABLE_LEVEL) {
+			walker->gfn = gpte_to_gfn(pte);
+			break;
+		}
+
+		if (walker->level == PT_DIRECTORY_LEVEL
+		    && (pte & PT_PAGE_SIZE_MASK)
+		    && (PTTYPE == 64 || is_pse(vcpu))) {
+			walker->gfn = gpte_to_gfn_pde(pte);
+			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
+			if (PTTYPE == 32 && is_cpuid_PSE36())
+				walker->gfn += pse36_gfn_delta(pte);
+			break;
+		}
+
+		pt_access = pte_access;
+		--walker->level;
+	}
+
+	if (write_fault && !is_dirty_pte(pte)) {
+		bool ret;
+
+		mark_page_dirty(vcpu->kvm, table_gfn);
+		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
+			    pte|PT_DIRTY_MASK);
+		if (ret)
+			goto walk;
+		pte |= PT_DIRTY_MASK;
+		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
+		walker->ptes[walker->level - 1] = pte;
+	}
+
+	walker->pt_access = pt_access;
+	walker->pte_access = pte_access;
+	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
+		 __FUNCTION__, (u64)pte, pt_access, pte_access);
+	return 1;
+
+not_present:
+	walker->error_code = 0;
+	goto err;
+
+access_error:
+	walker->error_code = PFERR_PRESENT_MASK;
+
+err:
+	if (write_fault)
+		walker->error_code |= PFERR_WRITE_MASK;
+	if (user_fault)
+		walker->error_code |= PFERR_USER_MASK;
+	if (fetch_fault)
+		walker->error_code |= PFERR_FETCH_MASK;
+	return 0;
+}
+
+static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
+			      u64 *spte, const void *pte, int bytes,
+			      int offset_in_pte)
+{
+	pt_element_t gpte;
+	unsigned pte_access;
+	struct page *npage;
+
+	gpte = *(const pt_element_t *)pte;
+	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
+		if (!offset_in_pte && !is_present_pte(gpte))
+			set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
+		return;
+	}
+	if (bytes < sizeof(pt_element_t))
+		return;
+	pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
+	pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
+	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
+		return;
+	npage = vcpu->arch.update_pte.page;
+	if (!npage)
+		return;
+	get_page(npage);
+	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
+		     gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage);
+}
+
+/*
+ * Fetch a shadow pte for a specific level in the paging hierarchy.
+ */
+static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+			 struct guest_walker *walker,
+			 int user_fault, int write_fault, int *ptwrite,
+			 struct page *page)
+{
+	hpa_t shadow_addr;
+	int level;
+	u64 *shadow_ent;
+	unsigned access = walker->pt_access;
+
+	if (!is_present_pte(walker->ptes[walker->level - 1]))
+		return NULL;
+
+	shadow_addr = vcpu->arch.mmu.root_hpa;
+	level = vcpu->arch.mmu.shadow_root_level;
+	if (level == PT32E_ROOT_LEVEL) {
+		shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+		shadow_addr &= PT64_BASE_ADDR_MASK;
+		--level;
+	}
+
+	for (; ; level--) {
+		u32 index = SHADOW_PT_INDEX(addr, level);
+		struct kvm_mmu_page *shadow_page;
+		u64 shadow_pte;
+		int metaphysical;
+		gfn_t table_gfn;
+		bool new_page = 0;
+
+		shadow_ent = ((u64 *)__va(shadow_addr)) + index;
+		if (level == PT_PAGE_TABLE_LEVEL)
+			break;
+		if (is_shadow_present_pte(*shadow_ent)) {
+			shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
+			continue;
+		}
+
+		if (level - 1 == PT_PAGE_TABLE_LEVEL
+		    && walker->level == PT_DIRECTORY_LEVEL) {
+			metaphysical = 1;
+			if (!is_dirty_pte(walker->ptes[level - 1]))
+				access &= ~ACC_WRITE_MASK;
+			table_gfn = gpte_to_gfn(walker->ptes[level - 1]);
+		} else {
+			metaphysical = 0;
+			table_gfn = walker->table_gfn[level - 2];
+		}
+		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+					       metaphysical, access,
+					       shadow_ent, &new_page);
+		if (new_page && !metaphysical) {
+			int r;
+			pt_element_t curr_pte;
+			r = kvm_read_guest_atomic(vcpu->kvm,
+						  walker->pte_gpa[level - 2],
+						  &curr_pte, sizeof(curr_pte));
+			if (r || curr_pte != walker->ptes[level - 2]) {
+				kvm_release_page_clean(page);
+				return NULL;
+			}
+		}
+		shadow_addr = __pa(shadow_page->spt);
+		shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
+			| PT_WRITABLE_MASK | PT_USER_MASK;
+		*shadow_ent = shadow_pte;
+	}
+
+	mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
+		     user_fault, write_fault,
+		     walker->ptes[walker->level-1] & PT_DIRTY_MASK,
+		     ptwrite, walker->gfn, page);
+
+	return shadow_ent;
+}
+
+/*
+ * Page fault handler.  There are several causes for a page fault:
+ *   - there is no shadow pte for the guest pte
+ *   - write access through a shadow pte marked read only so that we can set
+ *     the dirty bit
+ *   - write access to a shadow pte marked read only so we can update the page
+ *     dirty bitmap, when userspace requests it
+ *   - mmio access; in this case we will never install a present shadow pte
+ *   - normal guest page fault due to the guest pte marked not present, not
+ *     writable, or not executable
+ *
+ *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
+ *           a negative value on error.
+ */
+static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
+			       u32 error_code)
+{
+	int write_fault = error_code & PFERR_WRITE_MASK;
+	int user_fault = error_code & PFERR_USER_MASK;
+	int fetch_fault = error_code & PFERR_FETCH_MASK;
+	struct guest_walker walker;
+	u64 *shadow_pte;
+	int write_pt = 0;
+	int r;
+	struct page *page;
+
+	pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
+	kvm_mmu_audit(vcpu, "pre page fault");
+
+	r = mmu_topup_memory_caches(vcpu);
+	if (r)
+		return r;
+
+	down_read(&current->mm->mmap_sem);
+	/*
+	 * Look up the shadow pte for the faulting address.
+	 */
+	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
+			     fetch_fault);
+
+	/*
+	 * The page is not mapped by the guest.  Let the guest handle it.
+	 */
+	if (!r) {
+		pgprintk("%s: guest page fault\n", __FUNCTION__);
+		inject_page_fault(vcpu, addr, walker.error_code);
+		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
+		up_read(&current->mm->mmap_sem);
+		return 0;
+	}
+
+	page = gfn_to_page(vcpu->kvm, walker.gfn);
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	kvm_mmu_free_some_pages(vcpu);
+	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
+				  &write_pt, page);
+	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
+		 shadow_pte, *shadow_pte, write_pt);
+
+	if (!write_pt)
+		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
+
+	/*
+	 * mmio: emulate if accessible, otherwise its a guest fault.
+	 */
+	if (shadow_pte && is_io_pte(*shadow_pte)) {
+		spin_unlock(&vcpu->kvm->mmu_lock);
+		up_read(&current->mm->mmap_sem);
+		return 1;
+	}
+
+	++vcpu->stat.pf_fixed;
+	kvm_mmu_audit(vcpu, "post page fault (fixed)");
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	up_read(&current->mm->mmap_sem);
+
+	return write_pt;
+}
+
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
+{
+	struct guest_walker walker;
+	gpa_t gpa = UNMAPPED_GVA;
+	int r;
+
+	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
+
+	if (r) {
+		gpa = gfn_to_gpa(walker.gfn);
+		gpa |= vaddr & ~PAGE_MASK;
+	}
+
+	return gpa;
+}
+
+static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
+				 struct kvm_mmu_page *sp)
+{
+	int i, offset = 0, r = 0;
+	pt_element_t pt;
+
+	if (sp->role.metaphysical
+	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
+		nonpaging_prefetch_page(vcpu, sp);
+		return;
+	}
+
+	if (PTTYPE == 32)
+		offset = sp->role.quadrant << PT64_LEVEL_BITS;
+
+	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+		gpa_t pte_gpa = gfn_to_gpa(sp->gfn);
+		pte_gpa += (i+offset) * sizeof(pt_element_t);
+
+		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &pt,
+					  sizeof(pt_element_t));
+		if (r || is_present_pte(pt))
+			sp->spt[i] = shadow_trap_nonpresent_pte;
+		else
+			sp->spt[i] = shadow_notrap_nonpresent_pte;
+	}
+}
+
+#undef pt_element_t
+#undef guest_walker
+#undef FNAME
+#undef PT_BASE_ADDR_MASK
+#undef PT_INDEX
+#undef SHADOW_PT_INDEX
+#undef PT_LEVEL_MASK
+#undef PT_DIR_BASE_ADDR_MASK
+#undef PT_LEVEL_BITS
+#undef PT_MAX_FULL_LEVELS
+#undef gpte_to_gfn
+#undef gpte_to_gfn_pde
+#undef CMPXCHG
--- a/arch/x86/kvm/segment_descriptor.h
+++ b/arch/x86/kvm/segment_descriptor.h
@ -1,3 +1,6 @@
+#ifndef __SEGMENT_DESCRIPTOR_H
+#define __SEGMENT_DESCRIPTOR_H
+
 struct segment_descriptor {
 	u16 limit_low;
 	u16 base_low;
@ -14,4 +17,13 @@ struct segment_descriptor {
 	u8  base_high;
 } __attribute__((packed));

+#ifdef CONFIG_X86_64
+/* LDT or TSS descriptor in the GDT. 16 bytes. */
+struct segment_descriptor_64 {
+	struct segment_descriptor s;
+	u32 base_higher;
+	u32 pad_zero;
+};

+#endif
+#endif
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@ -13,10 +13,11 @@
 * the COPYING file in the top-level directory.
 *
 */
+#include <linux/kvm_host.h>

 #include "kvm_svm.h"
-#include "x86_emulate.h"
 #include "irq.h"
+#include "mmu.h"

 #include <linux/module.h>
 #include <linux/kernel.h>
@ -42,9 +43,6 @@ MODULE_LICENSE("GPL");
 #define SEG_TYPE_LDT 2
 #define SEG_TYPE_BUSY_TSS16 3

-#define KVM_EFER_LMA (1 << 10)
-#define KVM_EFER_LME (1 << 8)
-
 #define SVM_FEATURE_NPT  (1 << 0)
 #define SVM_FEATURE_LBRV (1 << 1)
 #define SVM_DEATURE_SVML (1 << 2)
@ -102,20 +100,20 @@ static inline u32 svm_has(u32 feat)

 static inline u8 pop_irq(struct kvm_vcpu *vcpu)
 {
-	int word_index = __ffs(vcpu->irq_summary);
-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
+	int word_index = __ffs(vcpu->arch.irq_summary);
+	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
 	int irq = word_index * BITS_PER_LONG + bit_index;

-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-	if (!vcpu->irq_pending[word_index])
-		clear_bit(word_index, &vcpu->irq_summary);
+	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
+	if (!vcpu->arch.irq_pending[word_index])
+		clear_bit(word_index, &vcpu->arch.irq_summary);
 	return irq;
 }

 static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
 {
-	set_bit(irq, vcpu->irq_pending);
-	set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
+	set_bit(irq, vcpu->arch.irq_pending);
+	set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
 }

 static inline void clgi(void)
@ -184,35 +182,30 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)

 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
-	if (!(efer & KVM_EFER_LMA))
-		efer &= ~KVM_EFER_LME;
+	if (!(efer & EFER_LMA))
+		efer &= ~EFER_LME;

 	to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
-	vcpu->shadow_efer = efer;
+	vcpu->arch.shadow_efer = efer;
 }

-static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
+static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
+				bool has_error_code, u32 error_code)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);

-	svm->vmcb->control.event_inj =		SVM_EVTINJ_VALID |
-						SVM_EVTINJ_VALID_ERR |
-						SVM_EVTINJ_TYPE_EXEPT |
-						GP_VECTOR;
+	svm->vmcb->control.event_inj = nr
+		| SVM_EVTINJ_VALID
+		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
+		| SVM_EVTINJ_TYPE_EXEPT;
 	svm->vmcb->control.event_inj_err = error_code;
 }

-static void inject_ud(struct kvm_vcpu *vcpu)
+static bool svm_exception_injected(struct kvm_vcpu *vcpu)
 {
-	to_svm(vcpu)->vmcb->control.event_inj = SVM_EVTINJ_VALID |
-						SVM_EVTINJ_TYPE_EXEPT |
-						UD_VECTOR;
-}
+	struct vcpu_svm *svm = to_svm(vcpu);

-static int is_page_fault(uint32_t info)
-{
-	info &= SVM_EVTINJ_VEC_MASK | SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
-	return info == (PF_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT);
+	return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
 }

 static int is_external_interrupt(u32 info)
@ -229,17 +222,16 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 		printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
 		return;
 	}
-	if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) {
+	if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
 		printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
 		       __FUNCTION__,
 		       svm->vmcb->save.rip,
 		       svm->next_rip);
-	}

-	vcpu->rip = svm->vmcb->save.rip = svm->next_rip;
+	vcpu->arch.rip = svm->vmcb->save.rip = svm->next_rip;
 	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;

-	vcpu->interrupt_window_open = 1;
+	vcpu->arch.interrupt_window_open = 1;
 }

 static int has_svm(void)
@ -312,7 +304,7 @@ static void svm_hardware_enable(void *garbage)
 	svm_data->next_asid = svm_data->max_asid + 1;
 	svm_features = cpuid_edx(SVM_CPUID_FUNC);

-	asm volatile ( "sgdt %0" : "=m"(gdt_descr) );
+	asm volatile ("sgdt %0" : "=m"(gdt_descr));
 	gdt = (struct desc_struct *)gdt_descr.address;
 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);

@ -458,11 +450,13 @@ static void init_vmcb(struct vmcb *vmcb)

 	control->intercept_cr_read = 	INTERCEPT_CR0_MASK |
 					INTERCEPT_CR3_MASK |
-					INTERCEPT_CR4_MASK;
+					INTERCEPT_CR4_MASK |
+					INTERCEPT_CR8_MASK;

 	control->intercept_cr_write = 	INTERCEPT_CR0_MASK |
 					INTERCEPT_CR3_MASK |
-					INTERCEPT_CR4_MASK;
+					INTERCEPT_CR4_MASK |
+					INTERCEPT_CR8_MASK;

 	control->intercept_dr_read = 	INTERCEPT_DR0_MASK |
 					INTERCEPT_DR1_MASK |
@ -476,7 +470,8 @@ static void init_vmcb(struct vmcb *vmcb)
 					INTERCEPT_DR5_MASK |
 					INTERCEPT_DR7_MASK;

-	control->intercept_exceptions = 1 << PF_VECTOR;
+	control->intercept_exceptions = (1 << PF_VECTOR) |
+					(1 << UD_VECTOR);


 	control->intercept = 	(1ULL << INTERCEPT_INTR) |
@ -543,8 +538,7 @@ static void init_vmcb(struct vmcb *vmcb)
 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);

 	save->efer = MSR_EFER_SVME_MASK;
-
-        save->dr6 = 0xffff0ff0;
+	save->dr6 = 0xffff0ff0;
 	save->dr7 = 0x400;
 	save->rflags = 2;
 	save->rip = 0x0000fff0;
@ -558,7 +552,7 @@ static void init_vmcb(struct vmcb *vmcb)
 	/* rdx = ?? */
 }

-static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
+static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);

@ -566,9 +560,11 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu)

 	if (vcpu->vcpu_id != 0) {
 		svm->vmcb->save.rip = 0;
-		svm->vmcb->save.cs.base = svm->vcpu.sipi_vector << 12;
-		svm->vmcb->save.cs.selector = svm->vcpu.sipi_vector << 8;
+		svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
+		svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
 	}
+
+	return 0;
 }

 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
@ -587,12 +583,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_svm;

-	if (irqchip_in_kernel(kvm)) {
-		err = kvm_create_lapic(&svm->vcpu);
-		if (err < 0)
-			goto free_svm;
-	}
-
 	page = alloc_page(GFP_KERNEL);
 	if (!page) {
 		err = -ENOMEM;
@ -608,9 +598,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)

 	fx_init(&svm->vcpu);
 	svm->vcpu.fpu_active = 1;
-	svm->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
 	if (svm->vcpu.vcpu_id == 0)
-		svm->vcpu.apic_base |= MSR_IA32_APICBASE_BSP;
+		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;

 	return &svm->vcpu;

@ -644,7 +634,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * increasing TSC.
 		 */
 		rdtscll(tsc_this);
-		delta = vcpu->host_tsc - tsc_this;
+		delta = vcpu->arch.host_tsc - tsc_this;
 		svm->vmcb->control.tsc_offset += delta;
 		vcpu->cpu = cpu;
 		kvm_migrate_apic_timer(vcpu);
@ -659,11 +649,11 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	int i;

+	++vcpu->stat.host_state_reload;
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
 		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);

-	rdtscll(vcpu->host_tsc);
-	kvm_put_guest_fpu(vcpu);
+	rdtscll(vcpu->arch.host_tsc);
 }

 static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
@ -674,17 +664,17 @@ static void svm_cache_regs(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);

-	vcpu->regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
-	vcpu->regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
-	vcpu->rip = svm->vmcb->save.rip;
+	vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
+	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
+	vcpu->arch.rip = svm->vmcb->save.rip;
 }

 static void svm_decache_regs(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
-	svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
-	svm->vmcb->save.rip = vcpu->rip;
+	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
+	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
+	svm->vmcb->save.rip = vcpu->arch.rip;
 }

 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@ -782,24 +772,24 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	struct vcpu_svm *svm = to_svm(vcpu);

 #ifdef CONFIG_X86_64
-	if (vcpu->shadow_efer & KVM_EFER_LME) {
+	if (vcpu->arch.shadow_efer & EFER_LME) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
-			vcpu->shadow_efer |= KVM_EFER_LMA;
-			svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
+			vcpu->arch.shadow_efer |= EFER_LMA;
+			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
 		}

-		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG) ) {
-			vcpu->shadow_efer &= ~KVM_EFER_LMA;
-			svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
+		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
+			vcpu->arch.shadow_efer &= ~EFER_LMA;
+			svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
 		}
 	}
 #endif
-	if ((vcpu->cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
+	if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
 		svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
 		vcpu->fpu_active = 1;
 	}

-	vcpu->cr0 = cr0;
+	vcpu->arch.cr0 = cr0;
 	cr0 |= X86_CR0_PG | X86_CR0_WP;
 	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
 	svm->vmcb->save.cr0 = cr0;
@ -807,7 +797,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)

 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       vcpu->cr4 = cr4;
+       vcpu->arch.cr4 = cr4;
       to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
 }

@ -912,7 +902,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 		svm->db_regs[dr] = value;
 		return;
 	case 4 ... 5:
-		if (vcpu->cr4 & X86_CR4_DE) {
+		if (vcpu->arch.cr4 & X86_CR4_DE) {
 			*exception = UD_VECTOR;
 			return;
 		}
@ -938,51 +928,30 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	struct kvm *kvm = svm->vcpu.kvm;
 	u64 fault_address;
 	u32 error_code;
-	enum emulation_result er;
-	int r;

 	if (!irqchip_in_kernel(kvm) &&
 		is_external_interrupt(exit_int_info))
 		push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);

-	mutex_lock(&kvm->lock);
-
 	fault_address  = svm->vmcb->control.exit_info_2;
 	error_code = svm->vmcb->control.exit_info_1;
-	r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
-	if (r < 0) {
-		mutex_unlock(&kvm->lock);
-		return r;
-	}
-	if (!r) {
-		mutex_unlock(&kvm->lock);
-		return 1;
-	}
-	er = emulate_instruction(&svm->vcpu, kvm_run, fault_address,
-				 error_code);
-	mutex_unlock(&kvm->lock);
+	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
+}

-	switch (er) {
-	case EMULATE_DONE:
-		return 1;
-	case EMULATE_DO_MMIO:
-		++svm->vcpu.stat.mmio_exits;
-		return 0;
-	case EMULATE_FAIL:
-		kvm_report_emulation_failure(&svm->vcpu, "pagetable");
-		break;
-	default:
-		BUG();
-	}
+static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	int er;

-	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-	return 0;
+	er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
+	if (er != EMULATE_DONE)
+		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+	return 1;
 }

 static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
-	if (!(svm->vcpu.cr0 & X86_CR0_TS))
+	if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
 		svm->vmcb->save.cr0 &= ~X86_CR0_TS;
 	svm->vcpu.fpu_active = 1;

@ -1004,7 +973,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)

 static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
-	u32 io_info = svm->vmcb->control.exit_info_1; //address size bug?
+	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
 	int size, down, in, string, rep;
 	unsigned port;

@ -1015,7 +984,8 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	string = (io_info & SVM_IOIO_STR_MASK) != 0;

 	if (string) {
-		if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO)
+		if (emulate_instruction(&svm->vcpu,
+					kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
 			return 0;
 		return 1;
 	}
@ -1045,13 +1015,14 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	svm->next_rip = svm->vmcb->save.rip + 3;
 	skip_emulated_instruction(&svm->vcpu);
-	return kvm_hypercall(&svm->vcpu, kvm_run);
+	kvm_emulate_hypercall(&svm->vcpu);
+	return 1;
 }

 static int invalid_op_interception(struct vcpu_svm *svm,
 				   struct kvm_run *kvm_run)
 {
-	inject_ud(&svm->vcpu);
+	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
 	return 1;
 }

@ -1073,11 +1044,20 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 static int emulate_on_interception(struct vcpu_svm *svm,
 				   struct kvm_run *kvm_run)
 {
-	if (emulate_instruction(&svm->vcpu, NULL, 0, 0) != EMULATE_DONE)
+	if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
 		pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__);
 	return 1;
 }

+static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
+	if (irqchip_in_kernel(svm->vcpu.kvm))
+		return 1;
+	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
+	return 0;
+}
+
 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@ -1124,14 +1104,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)

 static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
-	u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX];
+	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
 	u64 data;

 	if (svm_get_msr(&svm->vcpu, ecx, &data))
-		svm_inject_gp(&svm->vcpu, 0);
+		kvm_inject_gp(&svm->vcpu, 0);
 	else {
 		svm->vmcb->save.rax = data & 0xffffffff;
-		svm->vcpu.regs[VCPU_REGS_RDX] = data >> 32;
+		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
 		svm->next_rip = svm->vmcb->save.rip + 2;
 		skip_emulated_instruction(&svm->vcpu);
 	}
@ -1176,7 +1156,20 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 	case MSR_IA32_SYSENTER_ESP:
 		svm->vmcb->save.sysenter_esp = data;
 		break;
+	case MSR_K7_EVNTSEL0:
+	case MSR_K7_EVNTSEL1:
+	case MSR_K7_EVNTSEL2:
+	case MSR_K7_EVNTSEL3:
+		/*
+		 * only support writing 0 to the performance counters for now
+		 * to make Windows happy. Should be replaced by a real
+		 * performance counter emulation later.
+		 */
+		if (data != 0)
+			goto unhandled;
+		break;
 	default:
+	unhandled:
 		return kvm_set_msr_common(vcpu, ecx, data);
 	}
 	return 0;
@ -1184,12 +1177,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)

 static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
-	u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX];
+	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
 	u64 data = (svm->vmcb->save.rax & -1u)
-		| ((u64)(svm->vcpu.regs[VCPU_REGS_RDX] & -1u) << 32);
+		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 	svm->next_rip = svm->vmcb->save.rip + 2;
 	if (svm_set_msr(&svm->vcpu, ecx, data))
-		svm_inject_gp(&svm->vcpu, 0);
+		kvm_inject_gp(&svm->vcpu, 0);
 	else
 		skip_emulated_instruction(&svm->vcpu);
 	return 1;
@ -1213,7 +1206,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
 	 * possible
 	 */
 	if (kvm_run->request_interrupt_window &&
-	    !svm->vcpu.irq_summary) {
+	    !svm->vcpu.arch.irq_summary) {
 		++svm->vcpu.stat.irq_window_exits;
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
@ -1227,10 +1220,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_READ_CR0]           		= emulate_on_interception,
 	[SVM_EXIT_READ_CR3]           		= emulate_on_interception,
 	[SVM_EXIT_READ_CR4]           		= emulate_on_interception,
+	[SVM_EXIT_READ_CR8]           		= emulate_on_interception,
 	/* for now: */
 	[SVM_EXIT_WRITE_CR0]          		= emulate_on_interception,
 	[SVM_EXIT_WRITE_CR3]          		= emulate_on_interception,
 	[SVM_EXIT_WRITE_CR4]          		= emulate_on_interception,
+	[SVM_EXIT_WRITE_CR8]          		= cr8_write_interception,
 	[SVM_EXIT_READ_DR0] 			= emulate_on_interception,
 	[SVM_EXIT_READ_DR1]			= emulate_on_interception,
 	[SVM_EXIT_READ_DR2]			= emulate_on_interception,
@ -1241,6 +1236,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_WRITE_DR3]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR5]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR7]			= emulate_on_interception,
+	[SVM_EXIT_EXCP_BASE + UD_VECTOR]	= ud_interception,
 	[SVM_EXIT_EXCP_BASE + PF_VECTOR] 	= pf_interception,
 	[SVM_EXIT_EXCP_BASE + NM_VECTOR] 	= nm_interception,
 	[SVM_EXIT_INTR] 			= nop_on_interception,
@ -1293,7 +1289,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		       exit_code);

 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
-	    || svm_exit_handlers[exit_code] == 0) {
+	    || !svm_exit_handlers[exit_code]) {
 		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
 		kvm_run->hw.hardware_exit_reason = exit_code;
 		return 0;
@ -1307,7 +1303,7 @@ static void reload_tss(struct kvm_vcpu *vcpu)
 	int cpu = raw_smp_processor_id();

 	struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
-	svm_data->tss_desc->type = 9; //available 32/64-bit TSS
+	svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */
 	load_TR_desc();
 }

@ -1348,7 +1344,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
 	struct vmcb *vmcb = svm->vmcb;
 	int intr_vector = -1;

-	kvm_inject_pending_timer_irqs(vcpu);
 	if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
 	    ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
 		intr_vector = vmcb->control.exit_int_info &
@ -1388,20 +1383,20 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
 		push_irq(&svm->vcpu, control->int_vector);
 	}

-	svm->vcpu.interrupt_window_open =
+	svm->vcpu.arch.interrupt_window_open =
 		!(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
 }

 static void svm_do_inject_vector(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
-	int word_index = __ffs(vcpu->irq_summary);
-	int bit_index = __ffs(vcpu->irq_pending[word_index]);
+	int word_index = __ffs(vcpu->arch.irq_summary);
+	int bit_index = __ffs(vcpu->arch.irq_pending[word_index]);
 	int irq = word_index * BITS_PER_LONG + bit_index;

-	clear_bit(bit_index, &vcpu->irq_pending[word_index]);
-	if (!vcpu->irq_pending[word_index])
-		clear_bit(word_index, &vcpu->irq_summary);
+	clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]);
+	if (!vcpu->arch.irq_pending[word_index])
+		clear_bit(word_index, &vcpu->arch.irq_summary);
 	svm_inject_irq(svm, irq);
 }

@ -1411,11 +1406,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb_control_area *control = &svm->vmcb->control;

-	svm->vcpu.interrupt_window_open =
+	svm->vcpu.arch.interrupt_window_open =
 		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
 		 (svm->vmcb->save.rflags & X86_EFLAGS_IF));

-	if (svm->vcpu.interrupt_window_open && svm->vcpu.irq_summary)
+	if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
 		 */
@ -1424,13 +1419,18 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	/*
 	 * Interrupts blocked.  Wait for unblock.
 	 */
-	if (!svm->vcpu.interrupt_window_open &&
-	    (svm->vcpu.irq_summary || kvm_run->request_interrupt_window)) {
+	if (!svm->vcpu.arch.interrupt_window_open &&
+	    (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
 		control->intercept |= 1ULL << INTERCEPT_VINTR;
-	} else
+	 else
 		control->intercept &= ~(1ULL << INTERCEPT_VINTR);
 }

+static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
+{
+	return 0;
+}
+
 static void save_db_regs(unsigned long *db_regs)
 {
 	asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
@ -1472,7 +1472,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	svm->host_cr2 = kvm_read_cr2();
 	svm->host_dr6 = read_dr6();
 	svm->host_dr7 = read_dr7();
-	svm->vmcb->save.cr2 = vcpu->cr2;
+	svm->vmcb->save.cr2 = vcpu->arch.cr2;

 	if (svm->vmcb->save.dr7 & 0xff) {
 		write_dr7(0);
@ -1486,13 +1486,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)

 	asm volatile (
 #ifdef CONFIG_X86_64
-		"push %%rbx; push %%rcx; push %%rdx;"
-		"push %%rsi; push %%rdi; push %%rbp;"
-		"push %%r8;  push %%r9;  push %%r10; push %%r11;"
-		"push %%r12; push %%r13; push %%r14; push %%r15;"
+		"push %%rbp; \n\t"
 #else
-		"push %%ebx; push %%ecx; push %%edx;"
-		"push %%esi; push %%edi; push %%ebp;"
+		"push %%ebp; \n\t"
 #endif

 #ifdef CONFIG_X86_64
@ -1554,10 +1550,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		"mov %%r14, %c[r14](%[svm]) \n\t"
 		"mov %%r15, %c[r15](%[svm]) \n\t"

-		"pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
-		"pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
-		"pop  %%rbp; pop  %%rdi; pop  %%rsi;"
-		"pop  %%rdx; pop  %%rcx; pop  %%rbx; \n\t"
+		"pop  %%rbp; \n\t"
 #else
 		"mov %%ebx, %c[rbx](%[svm]) \n\t"
 		"mov %%ecx, %c[rcx](%[svm]) \n\t"
@ -1566,34 +1559,40 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		"mov %%edi, %c[rdi](%[svm]) \n\t"
 		"mov %%ebp, %c[rbp](%[svm]) \n\t"

-		"pop  %%ebp; pop  %%edi; pop  %%esi;"
-		"pop  %%edx; pop  %%ecx; pop  %%ebx; \n\t"
+		"pop  %%ebp; \n\t"
 #endif
 		:
 		: [svm]"a"(svm),
 		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
-		  [rbx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBX])),
-		  [rcx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RCX])),
-		  [rdx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDX])),
-		  [rsi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RSI])),
-		  [rdi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDI])),
-		  [rbp]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBP]))
+		  [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
+		  [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
+		  [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
+		  [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
+		  [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
+		  [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
 #ifdef CONFIG_X86_64
-		  ,[r8 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R8])),
-		  [r9 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R9 ])),
-		  [r10]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R10])),
-		  [r11]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R11])),
-		  [r12]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R12])),
-		  [r13]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R13])),
-		  [r14]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R14])),
-		  [r15]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R15]))
+		  , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
+		  [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
+		  [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
+		  [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
+		  [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
+		  [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
+		  [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
+		  [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
 #endif
-		: "cc", "memory" );
+		: "cc", "memory"
+#ifdef CONFIG_X86_64
+		, "rbx", "rcx", "rdx", "rsi", "rdi"
+		, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
+#else
+		, "ebx", "ecx", "edx" , "esi", "edi"
+#endif
+		);

 	if ((svm->vmcb->save.dr7 & 0xff))
 		load_db_regs(svm->host_db_regs);

-	vcpu->cr2 = svm->vmcb->save.cr2;
+	vcpu->arch.cr2 = svm->vmcb->save.cr2;

 	write_dr6(svm->host_dr6);
 	write_dr7(svm->host_dr7);
@ -1627,34 +1626,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 	}
 }

-static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
-				  unsigned long  addr,
-				  uint32_t err_code)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-	uint32_t exit_int_info = svm->vmcb->control.exit_int_info;
-
-	++vcpu->stat.pf_guest;
-
-	if (is_page_fault(exit_int_info)) {
-
-		svm->vmcb->control.event_inj_err = 0;
-		svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
-						SVM_EVTINJ_VALID_ERR |
-						SVM_EVTINJ_TYPE_EXEPT |
-						DF_VECTOR;
-		return;
-	}
-	vcpu->cr2 = addr;
-	svm->vmcb->save.cr2 = addr;
-	svm->vmcb->control.event_inj = 	SVM_EVTINJ_VALID |
-					SVM_EVTINJ_VALID_ERR |
-					SVM_EVTINJ_TYPE_EXEPT |
-					PF_VECTOR;
-	svm->vmcb->control.event_inj_err = err_code;
-}
-
-
 static int is_disabled(void)
 {
 	u64 vm_cr;
@ -1675,7 +1646,6 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 	hypercall[0] = 0x0f;
 	hypercall[1] = 0x01;
 	hypercall[2] = 0xd9;
-	hypercall[3] = 0xc3;
 }

 static void svm_check_processor_compat(void *rtn)
@ -1683,6 +1653,11 @@ static void svm_check_processor_compat(void *rtn)
 	*(int *)rtn = 0;
 }

+static bool svm_cpu_has_accelerated_tpr(void)
+{
+	return false;
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@ -1691,6 +1666,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.check_processor_compatibility = svm_check_processor_compat,
 	.hardware_enable = svm_hardware_enable,
 	.hardware_disable = svm_hardware_disable,
+	.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,

 	.vcpu_create = svm_create_vcpu,
 	.vcpu_free = svm_free_vcpu,
@ -1725,9 +1701,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.set_rflags = svm_set_rflags,

 	.tlb_flush = svm_flush_tlb,
-	.inject_page_fault = svm_inject_page_fault,
-
-	.inject_gp = svm_inject_gp,

 	.run = svm_vcpu_run,
 	.handle_exit = handle_exit,
@ -1735,19 +1708,23 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.patch_hypercall = svm_patch_hypercall,
 	.get_irq = svm_get_irq,
 	.set_irq = svm_set_irq,
+	.queue_exception = svm_queue_exception,
+	.exception_injected = svm_exception_injected,
 	.inject_pending_irq = svm_intr_assist,
 	.inject_pending_vectors = do_interrupt_requests,
+
+	.set_tss_addr = svm_set_tss_addr,
 };

 static int __init svm_init(void)
 {
-	return kvm_init_x86(&svm_x86_ops, sizeof(struct vcpu_svm),
+	return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
 			      THIS_MODULE);
 }

 static void __exit svm_exit(void)
 {
-	kvm_exit_x86();
+	kvm_exit();
 }

 module_init(svm_init)
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/kvm/svm.h
@ -204,6 +204,7 @@ struct __attribute__ ((__packed__)) vmcb {
 #define INTERCEPT_CR0_MASK 1
 #define INTERCEPT_CR3_MASK (1 << 3)
 #define INTERCEPT_CR4_MASK (1 << 4)
+#define INTERCEPT_CR8_MASK (1 << 8)

 #define INTERCEPT_DR0_MASK 1
 #define INTERCEPT_DR1_MASK (1 << 1)
@ -311,7 +312,7 @@ struct __attribute__ ((__packed__)) vmcb {

 #define SVM_EXIT_ERR		-1

-#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) // TS and MP
+#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */

 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
 #define SVM_VMRUN  ".byte 0x0f, 0x01, 0xd8"
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@ -25,6 +25,9 @@
 *
 */

+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
 #define CPU_BASED_VIRTUAL_INTR_PENDING          0x00000004
 #define CPU_BASED_USE_TSC_OFFSETING             0x00000008
 #define CPU_BASED_HLT_EXITING                   0x00000080
@ -42,6 +45,12 @@
 #define CPU_BASED_MONITOR_EXITING               0x20000000
 #define CPU_BASED_PAUSE_EXITING                 0x40000000
 #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   0x80000000
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
+

 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define PIN_BASED_NMI_EXITING                   0x00000008
@ -54,8 +63,6 @@
 #define VM_ENTRY_SMM                            0x00000400
 #define VM_ENTRY_DEACT_DUAL_MONITOR             0x00000800

-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
-
 /* VMCS Encodings */
 enum vmcs_field {
 	GUEST_ES_SELECTOR               = 0x00000800,
@ -89,6 +96,8 @@ enum vmcs_field {
 	TSC_OFFSET_HIGH                 = 0x00002011,
 	VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
 	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+	APIC_ACCESS_ADDR		= 0x00002014,
+	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
 	VMCS_LINK_POINTER               = 0x00002800,
 	VMCS_LINK_POINTER_HIGH          = 0x00002801,
 	GUEST_IA32_DEBUGCTL             = 0x00002802,
@ -214,6 +223,8 @@ enum vmcs_field {
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
+#define EXIT_REASON_WBINVD		54

 /*
 * Interruption-information format
@ -230,13 +241,14 @@ enum vmcs_field {

 #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
 #define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
+#define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */

 /*
 * Exit Qualifications for MOV for Control Register Access
 */
-#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control register */
+#define CONTROL_REG_ACCESS_NUM          0x7     /* 2:0, number of control reg.*/
 #define CONTROL_REG_ACCESS_TYPE         0x30    /* 5:4, access type */
-#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose register */
+#define CONTROL_REG_ACCESS_REG          0xf00   /* 10:8, general purpose reg. */
 #define LMSW_SOURCE_DATA_SHIFT 16
 #define LMSW_SOURCE_DATA  (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
 #define REG_EAX                         (0 << 8)
@ -259,11 +271,11 @@ enum vmcs_field {
 /*
 * Exit Qualifications for MOV for Debug Register Access
 */
-#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug register */
+#define DEBUG_REG_ACCESS_NUM            0x7     /* 2:0, number of debug reg. */
 #define DEBUG_REG_ACCESS_TYPE           0x10    /* 4, direction of access */
 #define TYPE_MOV_TO_DR                  (0 << 4)
 #define TYPE_MOV_FROM_DR                (1 << 4)
-#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose register */
+#define DEBUG_REG_ACCESS_REG            0xf00   /* 11:8, general purpose reg. */


 /* segment AR */
@ -307,4 +319,6 @@ enum vmcs_field {
 #define MSR_IA32_FEATURE_CONTROL_LOCKED         0x1
 #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED  0x4

+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	9
+
 #endif
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@ -90,8 +90,6 @@ source "drivers/dca/Kconfig"

 source "drivers/auxdisplay/Kconfig"

-source "drivers/kvm/Kconfig"
-
 source "drivers/uio/Kconfig"

 source "drivers/virtio/Kconfig"
--- a/drivers/Makefile
+++ b/drivers/Makefile
@ -47,7 +47,6 @@ obj-$(CONFIG_SPI)		+= spi/
 obj-$(CONFIG_PCCARD)		+= pcmcia/
 obj-$(CONFIG_DIO)		+= dio/
 obj-$(CONFIG_SBUS)		+= sbus/
-obj-$(CONFIG_KVM)		+= kvm/
 obj-$(CONFIG_ZORRO)		+= zorro/
 obj-$(CONFIG_MAC)		+= macintosh/
 obj-$(CONFIG_ATA_OVER_ETH)	+= block/aoe/
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@ -1,165 +0,0 @@
-/*
- * irq.h: in kernel interrupt controller related definitions
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- * Authors:
- *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
- *
- */
-
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include "kvm.h"
-
-typedef void irq_request_func(void *opaque, int level);
-
-struct kvm_kpic_state {
-	u8 last_irr;	/* edge detection */
-	u8 irr;		/* interrupt request register */
-	u8 imr;		/* interrupt mask register */
-	u8 isr;		/* interrupt service register */
-	u8 priority_add;	/* highest irq priority */
-	u8 irq_base;
-	u8 read_reg_select;
-	u8 poll;
-	u8 special_mask;
-	u8 init_state;
-	u8 auto_eoi;
-	u8 rotate_on_auto_eoi;
-	u8 special_fully_nested_mode;
-	u8 init4;		/* true if 4 byte init */
-	u8 elcr;		/* PIIX edge/trigger selection */
-	u8 elcr_mask;
-	struct kvm_pic *pics_state;
-};
-
-struct kvm_pic {
-	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
-	irq_request_func *irq_request;
-	void *irq_request_opaque;
-	int output;		/* intr from master PIC */
-	struct kvm_io_device dev;
-};
-
-struct kvm_pic *kvm_create_pic(struct kvm *kvm);
-void kvm_pic_set_irq(void *opaque, int irq, int level);
-int kvm_pic_read_irq(struct kvm_pic *s);
-int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
-int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
-void kvm_pic_update_irq(struct kvm_pic *s);
-
-#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
-#define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
-#define IOAPIC_EDGE_TRIG  0
-#define IOAPIC_LEVEL_TRIG 1
-
-#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
-#define IOAPIC_MEM_LENGTH            0x100
-
-/* Direct registers. */
-#define IOAPIC_REG_SELECT  0x00
-#define IOAPIC_REG_WINDOW  0x10
-#define IOAPIC_REG_EOI     0x40	/* IA64 IOSAPIC only */
-
-/* Indirect registers. */
-#define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
-#define IOAPIC_REG_VERSION 0x01
-#define IOAPIC_REG_ARB_ID  0x02	/* x86 IOAPIC only */
-
-struct kvm_ioapic {
-	u64 base_address;
-	u32 ioregsel;
-	u32 id;
-	u32 irr;
-	u32 pad;
-	union ioapic_redir_entry {
-		u64 bits;
-		struct {
-			u8 vector;
-			u8 delivery_mode:3;
-			u8 dest_mode:1;
-			u8 delivery_status:1;
-			u8 polarity:1;
-			u8 remote_irr:1;
-			u8 trig_mode:1;
-			u8 mask:1;
-			u8 reserve:7;
-			u8 reserved[4];
-			u8 dest_id;
-		} fields;
-	} redirtbl[IOAPIC_NUM_PINS];
-	struct kvm_io_device dev;
-	struct kvm *kvm;
-};
-
-struct kvm_lapic {
-	unsigned long base_address;
-	struct kvm_io_device dev;
-	struct {
-		atomic_t pending;
-		s64 period;	/* unit: ns */
-		u32 divide_count;
-		ktime_t last_update;
-		struct hrtimer dev;
-	} timer;
-	struct kvm_vcpu *vcpu;
-	struct page *regs_page;
-	void *regs;
-};
-
-#ifdef DEBUG
-#define ASSERT(x)  							\
-do {									\
-	if (!(x)) {							\
-		printk(KERN_EMERG "assertion failed %s: %d: %s\n",	\
-		       __FILE__, __LINE__, #x);				\
-		BUG();							\
-	}								\
-} while (0)
-#else
-#define ASSERT(x) do { } while (0)
-#endif
-
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
-int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
-int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
-int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
-void kvm_lapic_reset(struct kvm_vcpu *vcpu);
-void kvm_free_apic(struct kvm_lapic *apic);
-u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
-void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
-void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
-struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
-				       unsigned long bitmap);
-u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
-int kvm_ioapic_init(struct kvm *kvm);
-void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
-int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
-int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
-void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
-void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
-void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
-
-#endif
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@ -1,511 +0,0 @@
-/*
- * Kernel-based Virtual Machine driver for Linux
- *
- * This module enables machines with Intel VT-x extensions to run virtual
- * machines without emulation or binary translation.
- *
- * MMU support
- *
- * Copyright (C) 2006 Qumranet, Inc.
- *
- * Authors:
- *   Yaniv Kamay  <yaniv@qumranet.com>
- *   Avi Kivity   <avi@qumranet.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- */
-
-/*
- * We need the mmu code to access both 32-bit and 64-bit guest ptes,
- * so the code in this file is compiled twice, once per pte size.
- */
-
-#if PTTYPE == 64
-	#define pt_element_t u64
-	#define guest_walker guest_walker64
-	#define FNAME(name) paging##64_##name
-	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
-	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
-	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
-	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
-	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
-	#ifdef CONFIG_X86_64
-	#define PT_MAX_FULL_LEVELS 4
-	#else
-	#define PT_MAX_FULL_LEVELS 2
-	#endif
-#elif PTTYPE == 32
-	#define pt_element_t u32
-	#define guest_walker guest_walker32
-	#define FNAME(name) paging##32_##name
-	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
-	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
-	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
-	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
-	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
-	#define PT_MAX_FULL_LEVELS 2
-#else
-	#error Invalid PTTYPE value
-#endif
-
-/*
- * The guest_walker structure emulates the behavior of the hardware page
- * table walker.
- */
-struct guest_walker {
-	int level;
-	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
-	pt_element_t *table;
-	pt_element_t pte;
-	pt_element_t *ptep;
-	struct page *page;
-	int index;
-	pt_element_t inherited_ar;
-	gfn_t gfn;
-	u32 error_code;
-};
-
-/*
- * Fetch a guest pte for a guest virtual address
- */
-static int FNAME(walk_addr)(struct guest_walker *walker,
-			    struct kvm_vcpu *vcpu, gva_t addr,
-			    int write_fault, int user_fault, int fetch_fault)
-{
-	hpa_t hpa;
-	struct kvm_memory_slot *slot;
-	pt_element_t *ptep;
-	pt_element_t root;
-	gfn_t table_gfn;
-
-	pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
-	walker->level = vcpu->mmu.root_level;
-	walker->table = NULL;
-	walker->page = NULL;
-	walker->ptep = NULL;
-	root = vcpu->cr3;
-#if PTTYPE == 64
-	if (!is_long_mode(vcpu)) {
-		walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
-		root = *walker->ptep;
-		walker->pte = root;
-		if (!(root & PT_PRESENT_MASK))
-			goto not_present;
-		--walker->level;
-	}
-#endif
-	table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
-	walker->table_gfn[walker->level - 1] = table_gfn;
-	pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
-		 walker->level - 1, table_gfn);
-	slot = gfn_to_memslot(vcpu->kvm, table_gfn);
-	hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
-	walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
-	walker->table = kmap_atomic(walker->page, KM_USER0);
-
-	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
-	       (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
-
-	walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
-
-	for (;;) {
-		int index = PT_INDEX(addr, walker->level);
-		hpa_t paddr;
-
-		ptep = &walker->table[index];
-		walker->index = index;
-		ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
-		       ((unsigned long)ptep & PAGE_MASK));
-
-		if (!is_present_pte(*ptep))
-			goto not_present;
-
-		if (write_fault && !is_writeble_pte(*ptep))
-			if (user_fault || is_write_protection(vcpu))
-				goto access_error;
-
-		if (user_fault && !(*ptep & PT_USER_MASK))
-			goto access_error;
-
-#if PTTYPE == 64
-		if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK))
-			goto access_error;
-#endif
-
-		if (!(*ptep & PT_ACCESSED_MASK)) {
-			mark_page_dirty(vcpu->kvm, table_gfn);
-			*ptep |= PT_ACCESSED_MASK;
-		}
-
-		if (walker->level == PT_PAGE_TABLE_LEVEL) {
-			walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
-				>> PAGE_SHIFT;
-			break;
-		}
-
-		if (walker->level == PT_DIRECTORY_LEVEL
-		    && (*ptep & PT_PAGE_SIZE_MASK)
-		    && (PTTYPE == 64 || is_pse(vcpu))) {
-			walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
-				>> PAGE_SHIFT;
-			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
-			break;
-		}
-
-		walker->inherited_ar &= walker->table[index];
-		table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
-		kunmap_atomic(walker->table, KM_USER0);
-		paddr = safe_gpa_to_hpa(vcpu, table_gfn << PAGE_SHIFT);
-		walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
-		walker->table = kmap_atomic(walker->page, KM_USER0);
-		--walker->level;
-		walker->table_gfn[walker->level - 1 ] = table_gfn;
-		pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
-			 walker->level - 1, table_gfn);
-	}
-	walker->pte = *ptep;
-	if (walker->page)
-		walker->ptep = NULL;
-	if (walker->table)
-		kunmap_atomic(walker->table, KM_USER0);
-	pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
-	return 1;
-
-not_present:
-	walker->error_code = 0;
-	goto err;
-
-access_error:
-	walker->error_code = PFERR_PRESENT_MASK;
-
-err:
-	if (write_fault)
-		walker->error_code |= PFERR_WRITE_MASK;
-	if (user_fault)
-		walker->error_code |= PFERR_USER_MASK;
-	if (fetch_fault)
-		walker->error_code |= PFERR_FETCH_MASK;
-	if (walker->table)
-		kunmap_atomic(walker->table, KM_USER0);
-	return 0;
-}
-
-static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
-					struct guest_walker *walker)
-{
-	mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
-}
-
-static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
-				  u64 *shadow_pte,
-				  gpa_t gaddr,
-				  pt_element_t gpte,
-				  u64 access_bits,
-				  int user_fault,
-				  int write_fault,
-				  int *ptwrite,
-				  struct guest_walker *walker,
-				  gfn_t gfn)
-{
-	hpa_t paddr;
-	int dirty = gpte & PT_DIRTY_MASK;
-	u64 spte = *shadow_pte;
-	int was_rmapped = is_rmap_pte(spte);
-
-	pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d"
-		 " user_fault %d gfn %lx\n",
-		 __FUNCTION__, spte, (u64)gpte, access_bits,
-		 write_fault, user_fault, gfn);
-
-	if (write_fault && !dirty) {
-		pt_element_t *guest_ent, *tmp = NULL;
-
-		if (walker->ptep)
-			guest_ent = walker->ptep;
-		else {
-			tmp = kmap_atomic(walker->page, KM_USER0);
-			guest_ent = &tmp[walker->index];
-		}
-
-		*guest_ent |= PT_DIRTY_MASK;
-		if (!walker->ptep)
-			kunmap_atomic(tmp, KM_USER0);
-		dirty = 1;
-		FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
-	}
-
-	spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK;
-	spte |= gpte & PT64_NX_MASK;
-	if (!dirty)
-		access_bits &= ~PT_WRITABLE_MASK;
-
-	paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
-
-	spte |= PT_PRESENT_MASK;
-	if (access_bits & PT_USER_MASK)
-		spte |= PT_USER_MASK;
-
-	if (is_error_hpa(paddr)) {
-		spte |= gaddr;
-		spte |= PT_SHADOW_IO_MARK;
-		spte &= ~PT_PRESENT_MASK;
-		set_shadow_pte(shadow_pte, spte);
-		return;
-	}
-
-	spte |= paddr;
-
-	if ((access_bits & PT_WRITABLE_MASK)
-	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
-		struct kvm_mmu_page *shadow;
-
-		spte |= PT_WRITABLE_MASK;
-		if (user_fault) {
-			mmu_unshadow(vcpu, gfn);
-			goto unshadowed;
-		}
-
-		shadow = kvm_mmu_lookup_page(vcpu, gfn);
-		if (shadow) {
-			pgprintk("%s: found shadow page for %lx, marking ro\n",
-				 __FUNCTION__, gfn);
-			access_bits &= ~PT_WRITABLE_MASK;
-			if (is_writeble_pte(spte)) {
-				spte &= ~PT_WRITABLE_MASK;
-				kvm_x86_ops->tlb_flush(vcpu);
-			}
-			if (write_fault)
-				*ptwrite = 1;
-		}
-	}
-
-unshadowed:
-
-	if (access_bits & PT_WRITABLE_MASK)
-		mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
-
-	set_shadow_pte(shadow_pte, spte);
-	page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
-	if (!was_rmapped)
-		rmap_add(vcpu, shadow_pte);
-}
-
-static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte,
-			   u64 *shadow_pte, u64 access_bits,
-			   int user_fault, int write_fault, int *ptwrite,
-			   struct guest_walker *walker, gfn_t gfn)
-{
-	access_bits &= gpte;
-	FNAME(set_pte_common)(vcpu, shadow_pte, gpte & PT_BASE_ADDR_MASK,
-			      gpte, access_bits, user_fault, write_fault,
-			      ptwrite, walker, gfn);
-}
-
-static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
-			      u64 *spte, const void *pte, int bytes)
-{
-	pt_element_t gpte;
-
-	if (bytes < sizeof(pt_element_t))
-		return;
-	gpte = *(const pt_element_t *)pte;
-	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK))
-		return;
-	pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
-	FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0,
-		       0, NULL, NULL,
-		       (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT);
-}
-
-static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t gpde,
-			   u64 *shadow_pte, u64 access_bits,
-			   int user_fault, int write_fault, int *ptwrite,
-			   struct guest_walker *walker, gfn_t gfn)
-{
-	gpa_t gaddr;
-
-	access_bits &= gpde;
-	gaddr = (gpa_t)gfn << PAGE_SHIFT;
-	if (PTTYPE == 32 && is_cpuid_PSE36())
-		gaddr |= (gpde & PT32_DIR_PSE36_MASK) <<
-			(32 - PT32_DIR_PSE36_SHIFT);
-	FNAME(set_pte_common)(vcpu, shadow_pte, gaddr,
-			      gpde, access_bits, user_fault, write_fault,
-			      ptwrite, walker, gfn);
-}
-
-/*
- * Fetch a shadow pte for a specific level in the paging hierarchy.
- */
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
-			 struct guest_walker *walker,
-			 int user_fault, int write_fault, int *ptwrite)
-{
-	hpa_t shadow_addr;
-	int level;
-	u64 *shadow_ent;
-	u64 *prev_shadow_ent = NULL;
-
-	if (!is_present_pte(walker->pte))
-		return NULL;
-
-	shadow_addr = vcpu->mmu.root_hpa;
-	level = vcpu->mmu.shadow_root_level;
-	if (level == PT32E_ROOT_LEVEL) {
-		shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3];
-		shadow_addr &= PT64_BASE_ADDR_MASK;
-		--level;
-	}
-
-	for (; ; level--) {
-		u32 index = SHADOW_PT_INDEX(addr, level);
-		struct kvm_mmu_page *shadow_page;
-		u64 shadow_pte;
-		int metaphysical;
-		gfn_t table_gfn;
-		unsigned hugepage_access = 0;
-
-		shadow_ent = ((u64 *)__va(shadow_addr)) + index;
-		if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
-			if (level == PT_PAGE_TABLE_LEVEL)
-				break;
-			shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
-			prev_shadow_ent = shadow_ent;
-			continue;
-		}
-
-		if (level == PT_PAGE_TABLE_LEVEL)
-			break;
-
-		if (level - 1 == PT_PAGE_TABLE_LEVEL
-		    && walker->level == PT_DIRECTORY_LEVEL) {
-			metaphysical = 1;
-			hugepage_access = walker->pte;
-			hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
-			if (walker->pte & PT64_NX_MASK)
-				hugepage_access |= (1 << 2);
-			hugepage_access >>= PT_WRITABLE_SHIFT;
-			table_gfn = (walker->pte & PT_BASE_ADDR_MASK)
-				>> PAGE_SHIFT;
-		} else {
-			metaphysical = 0;
-			table_gfn = walker->table_gfn[level - 2];
-		}
-		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
-					       metaphysical, hugepage_access,
-					       shadow_ent);
-		shadow_addr = __pa(shadow_page->spt);
-		shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
-			| PT_WRITABLE_MASK | PT_USER_MASK;
-		*shadow_ent = shadow_pte;
-		prev_shadow_ent = shadow_ent;
-	}
-
-	if (walker->level == PT_DIRECTORY_LEVEL) {
-		FNAME(set_pde)(vcpu, walker->pte, shadow_ent,
-			       walker->inherited_ar, user_fault, write_fault,
-			       ptwrite, walker, walker->gfn);
-	} else {
-		ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
-		FNAME(set_pte)(vcpu, walker->pte, shadow_ent,
-			       walker->inherited_ar, user_fault, write_fault,
-			       ptwrite, walker, walker->gfn);
-	}
-	return shadow_ent;
-}
-
-/*
- * Page fault handler.  There are several causes for a page fault:
- *   - there is no shadow pte for the guest pte
- *   - write access through a shadow pte marked read only so that we can set
- *     the dirty bit
- *   - write access to a shadow pte marked read only so we can update the page
- *     dirty bitmap, when userspace requests it
- *   - mmio access; in this case we will never install a present shadow pte
- *   - normal guest page fault due to the guest pte marked not present, not
- *     writable, or not executable
- *
- *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
- *           a negative value on error.
- */
-static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
-			       u32 error_code)
-{
-	int write_fault = error_code & PFERR_WRITE_MASK;
-	int user_fault = error_code & PFERR_USER_MASK;
-	int fetch_fault = error_code & PFERR_FETCH_MASK;
-	struct guest_walker walker;
-	u64 *shadow_pte;
-	int write_pt = 0;
-	int r;
-
-	pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
-	kvm_mmu_audit(vcpu, "pre page fault");
-
-	r = mmu_topup_memory_caches(vcpu);
-	if (r)
-		return r;
-
-	/*
-	 * Look up the shadow pte for the faulting address.
-	 */
-	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
-			     fetch_fault);
-
-	/*
-	 * The page is not mapped by the guest.  Let the guest handle it.
-	 */
-	if (!r) {
-		pgprintk("%s: guest page fault\n", __FUNCTION__);
-		inject_page_fault(vcpu, addr, walker.error_code);
-		vcpu->last_pt_write_count = 0; /* reset fork detector */
-		return 0;
-	}
-
-	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
-				  &write_pt);
-	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
-		 shadow_pte, *shadow_pte, write_pt);
-
-	if (!write_pt)
-		vcpu->last_pt_write_count = 0; /* reset fork detector */
-
-	/*
-	 * mmio: emulate if accessible, otherwise its a guest fault.
-	 */
-	if (is_io_pte(*shadow_pte))
-		return 1;
-
-	++vcpu->stat.pf_fixed;
-	kvm_mmu_audit(vcpu, "post page fault (fixed)");
-
-	return write_pt;
-}
-
-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
-{
-	struct guest_walker walker;
-	gpa_t gpa = UNMAPPED_GVA;
-	int r;
-
-	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
-
-	if (r) {
-		gpa = (gpa_t)walker.gfn << PAGE_SHIFT;
-		gpa |= vaddr & ~PAGE_MASK;
-	}
-
-	return gpa;
-}
-
-#undef pt_element_t
-#undef guest_walker
-#undef FNAME
-#undef PT_BASE_ADDR_MASK
-#undef PT_INDEX
-#undef SHADOW_PT_INDEX
-#undef PT_LEVEL_MASK
-#undef PT_DIR_BASE_ADDR_MASK
-#undef PT_MAX_FULL_LEVELS
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@ -3,6 +3,7 @@ include include/asm-generic/Kbuild.asm
 header-y += boot.h
 header-y += bootparam.h
 header-y += debugreg.h
+header-y += kvm.h
 header-y += ldt.h
 header-y += msr-index.h
 header-y += prctl.h
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@ -0,0 +1,191 @@
+#ifndef __LINUX_KVM_X86_H
+#define __LINUX_KVM_X86_H
+
+/*
+ * KVM x86 specific structures and definitions
+ *
+ */
+
+#include <asm/types.h>
+#include <linux/ioctl.h>
+
+/* Architectural interrupt line count. */
+#define KVM_NR_INTERRUPTS 256
+
+struct kvm_memory_alias {
+	__u32 slot;  /* this has a different namespace than memory slots */
+	__u32 flags;
+	__u64 guest_phys_addr;
+	__u64 memory_size;
+	__u64 target_phys_addr;
+};
+
+/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
+struct kvm_pic_state {
+	__u8 last_irr;	/* edge detection */
+	__u8 irr;		/* interrupt request register */
+	__u8 imr;		/* interrupt mask register */
+	__u8 isr;		/* interrupt service register */
+	__u8 priority_add;	/* highest irq priority */
+	__u8 irq_base;
+	__u8 read_reg_select;
+	__u8 poll;
+	__u8 special_mask;
+	__u8 init_state;
+	__u8 auto_eoi;
+	__u8 rotate_on_auto_eoi;
+	__u8 special_fully_nested_mode;
+	__u8 init4;		/* true if 4 byte init */
+	__u8 elcr;		/* PIIX edge/trigger selection */
+	__u8 elcr_mask;
+};
+
+#define KVM_IOAPIC_NUM_PINS  24
+struct kvm_ioapic_state {
+	__u64 base_address;
+	__u32 ioregsel;
+	__u32 id;
+	__u32 irr;
+	__u32 pad;
+	union {
+		__u64 bits;
+		struct {
+			__u8 vector;
+			__u8 delivery_mode:3;
+			__u8 dest_mode:1;
+			__u8 delivery_status:1;
+			__u8 polarity:1;
+			__u8 remote_irr:1;
+			__u8 trig_mode:1;
+			__u8 mask:1;
+			__u8 reserve:7;
+			__u8 reserved[4];
+			__u8 dest_id;
+		} fields;
+	} redirtbl[KVM_IOAPIC_NUM_PINS];
+};
+
+#define KVM_IRQCHIP_PIC_MASTER   0
+#define KVM_IRQCHIP_PIC_SLAVE    1
+#define KVM_IRQCHIP_IOAPIC       2
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+	__u64 rax, rbx, rcx, rdx;
+	__u64 rsi, rdi, rsp, rbp;
+	__u64 r8,  r9,  r10, r11;
+	__u64 r12, r13, r14, r15;
+	__u64 rip, rflags;
+};
+
+/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
+#define KVM_APIC_REG_SIZE 0x400
+struct kvm_lapic_state {
+	char regs[KVM_APIC_REG_SIZE];
+};
+
+struct kvm_segment {
+	__u64 base;
+	__u32 limit;
+	__u16 selector;
+	__u8  type;
+	__u8  present, dpl, db, s, l, g, avl;
+	__u8  unusable;
+	__u8  padding;
+};
+
+struct kvm_dtable {
+	__u64 base;
+	__u16 limit;
+	__u16 padding[3];
+};
+
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+	/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
+	struct kvm_segment cs, ds, es, fs, gs, ss;
+	struct kvm_segment tr, ldt;
+	struct kvm_dtable gdt, idt;
+	__u64 cr0, cr2, cr3, cr4, cr8;
+	__u64 efer;
+	__u64 apic_base;
+	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+	__u8  fpr[8][16];
+	__u16 fcw;
+	__u16 fsw;
+	__u8  ftwx;  /* in fxsave format */
+	__u8  pad1;
+	__u16 last_opcode;
+	__u64 last_ip;
+	__u64 last_dp;
+	__u8  xmm[16][16];
+	__u32 mxcsr;
+	__u32 pad2;
+};
+
+struct kvm_msr_entry {
+	__u32 index;
+	__u32 reserved;
+	__u64 data;
+};
+
+/* for KVM_GET_MSRS and KVM_SET_MSRS */
+struct kvm_msrs {
+	__u32 nmsrs; /* number of msrs in entries */
+	__u32 pad;
+
+	struct kvm_msr_entry entries[0];
+};
+
+/* for KVM_GET_MSR_INDEX_LIST */
+struct kvm_msr_list {
+	__u32 nmsrs; /* number of msrs in entries */
+	__u32 indices[0];
+};
+
+
+struct kvm_cpuid_entry {
+	__u32 function;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding;
+};
+
+/* for KVM_SET_CPUID */
+struct kvm_cpuid {
+	__u32 nent;
+	__u32 padding;
+	struct kvm_cpuid_entry entries[0];
+};
+
+struct kvm_cpuid_entry2 {
+	__u32 function;
+	__u32 index;
+	__u32 flags;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding[3];
+};
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
+#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
+#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
+
+/* for KVM_SET_CPUID2 */
+struct kvm_cpuid2 {
+	__u32 nent;
+	__u32 padding;
+	struct kvm_cpuid_entry2 entries[0];
+};
+
+#endif
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@ -1,23 +1,24 @@
-#ifndef __KVM_H
-#define __KVM_H
-
-/*
+#/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This header defines architecture specific interfaces, x86 version
+ *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
+ *
 */

+#ifndef ASM_KVM_HOST_H
+#define ASM_KVM_HOST_H
+
 #include <linux/types.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
 #include <linux/mm.h>
-#include <linux/preempt.h>
-#include <asm/signal.h>

 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
+#include <linux/kvm_types.h>
+
+#include <asm/desc.h>

 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
@ -37,15 +38,8 @@
 #define INVALID_PAGE (~(hpa_t)0)
 #define UNMAPPED_GVA (~(gpa_t)0)

-#define KVM_MAX_VCPUS 4
-#define KVM_ALIAS_SLOTS 4
-#define KVM_MEMORY_SLOTS 8
-#define KVM_NUM_MMU_PAGES 1024
-#define KVM_MIN_FREE_MMU_PAGES 5
-#define KVM_REFILL_PAGES 25
-#define KVM_MAX_CPUID_ENTRIES 40
-
 #define DE_VECTOR 0
+#define UD_VECTOR 6
 #define NM_VECTOR 7
 #define DF_VECTOR 8
 #define TS_VECTOR 10
@ -59,119 +53,20 @@

 #define IOPL_SHIFT 12

-#define KVM_PIO_PAGE_OFFSET 1
+#define KVM_ALIAS_SLOTS 4

-/*
- * vcpu->requests bit members
- */
-#define KVM_TLB_FLUSH 0
+#define KVM_PERMILLE_MMU_PAGES 20
+#define KVM_MIN_ALLOC_MMU_PAGES 64
+#define KVM_NUM_MMU_PAGES 1024
+#define KVM_MIN_FREE_MMU_PAGES 5
+#define KVM_REFILL_PAGES 25
+#define KVM_MAX_CPUID_ENTRIES 40

-/*
- * Address types:
- *
- *  gva - guest virtual address
- *  gpa - guest physical address
- *  gfn - guest frame number
- *  hva - host virtual address
- *  hpa - host physical address
- *  hfn - host frame number
- */
-
-typedef unsigned long  gva_t;
-typedef u64            gpa_t;
-typedef unsigned long  gfn_t;
-
-typedef unsigned long  hva_t;
-typedef u64            hpa_t;
-typedef unsigned long  hfn_t;
-
-#define NR_PTE_CHAIN_ENTRIES 5
-
-struct kvm_pte_chain {
-	u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
-	struct hlist_node link;
-};
-
-/*
- * kvm_mmu_page_role, below, is defined as:
- *
- *   bits 0:3 - total guest paging levels (2-4, or zero for real mode)
- *   bits 4:7 - page table level for this shadow (1-4)
- *   bits 8:9 - page table quadrant for 2-level guests
- *   bit   16 - "metaphysical" - gfn is not a real page (huge page/real mode)
- *   bits 17:19 - "access" - the user, writable, and nx bits of a huge page pde
- */
-union kvm_mmu_page_role {
-	unsigned word;
-	struct {
-		unsigned glevels : 4;
-		unsigned level : 4;
-		unsigned quadrant : 2;
-		unsigned pad_for_nice_hex_output : 6;
-		unsigned metaphysical : 1;
-		unsigned hugepage_access : 3;
-	};
-};
-
-struct kvm_mmu_page {
-	struct list_head link;
-	struct hlist_node hash_link;
-
-	/*
-	 * The following two entries are used to key the shadow page in the
-	 * hash table.
-	 */
-	gfn_t gfn;
-	union kvm_mmu_page_role role;
-
-	u64 *spt;
-	unsigned long slot_bitmap; /* One bit set per slot which has memory
-				    * in this shadow page.
-				    */
-	int multimapped;         /* More than one parent_pte? */
-	int root_count;          /* Currently serving as active root */
-	union {
-		u64 *parent_pte;               /* !multimapped */
-		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
-	};
-};
+extern spinlock_t kvm_lock;
+extern struct list_head vm_list;

 struct kvm_vcpu;
-extern struct kmem_cache *kvm_vcpu_cache;
-
-/*
- * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
- * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
- * mode.
- */
-struct kvm_mmu {
-	void (*new_cr3)(struct kvm_vcpu *vcpu);
-	int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
-	void (*free)(struct kvm_vcpu *vcpu);
-	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
-	hpa_t root_hpa;
-	int root_level;
-	int shadow_root_level;
-
-	u64 *pae_root;
-};
-
-#define KVM_NR_MEM_OBJS 20
-
-struct kvm_mmu_memory_cache {
-	int nobjs;
-	void *objects[KVM_NR_MEM_OBJS];
-};
-
-/*
- * We don't want allocation failures within the mmu code, so we preallocate
- * enough memory for a single page fault in a cache.
- */
-struct kvm_guest_debug {
-	int enabled;
-	unsigned long bp[4];
-	int singlestep;
-};
+struct kvm;

 enum {
 	VCPU_REGS_RAX = 0,
@ -206,109 +101,94 @@ enum {
 	VCPU_SREG_LDTR,
 };

-struct kvm_pio_request {
-	unsigned long count;
-	int cur_count;
-	struct page *guest_pages[2];
-	unsigned guest_page_offset;
-	int in;
-	int port;
-	int size;
-	int string;
-	int down;
-	int rep;
-};
+#include <asm/kvm_x86_emulate.h>

-struct kvm_stat {
-	u32 pf_fixed;
-	u32 pf_guest;
-	u32 tlb_flush;
-	u32 invlpg;
-
-	u32 exits;
-	u32 io_exits;
-	u32 mmio_exits;
-	u32 signal_exits;
-	u32 irq_window_exits;
-	u32 halt_exits;
-	u32 halt_wakeup;
-	u32 request_irq_exits;
-	u32 irq_exits;
-	u32 light_exits;
-	u32 efer_reload;
-};
-
-struct kvm_io_device {
-	void (*read)(struct kvm_io_device *this,
-		     gpa_t addr,
-		     int len,
-		     void *val);
-	void (*write)(struct kvm_io_device *this,
-		      gpa_t addr,
-		      int len,
-		      const void *val);
-	int (*in_range)(struct kvm_io_device *this, gpa_t addr);
-	void (*destructor)(struct kvm_io_device *this);
-
-	void             *private;
-};
-
-static inline void kvm_iodevice_read(struct kvm_io_device *dev,
-				     gpa_t addr,
-				     int len,
-				     void *val)
-{
-	dev->read(dev, addr, len, val);
-}
-
-static inline void kvm_iodevice_write(struct kvm_io_device *dev,
-				      gpa_t addr,
-				      int len,
-				      const void *val)
-{
-	dev->write(dev, addr, len, val);
-}
-
-static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
-{
-	return dev->in_range(dev, addr);
-}
-
-static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
-{
-	if (dev->destructor)
-		dev->destructor(dev);
-}
+#define KVM_NR_MEM_OBJS 40

 /*
- * It would be nice to use something smarter than a linear search, TBD...
- * Thankfully we dont expect many devices to register (famous last words :),
- * so until then it will suffice.  At least its abstracted so we can change
- * in one place.
+ * We don't want allocation failures within the mmu code, so we preallocate
+ * enough memory for a single page fault in a cache.
 */
-struct kvm_io_bus {
-	int                   dev_count;
-#define NR_IOBUS_DEVS 6
-	struct kvm_io_device *devs[NR_IOBUS_DEVS];
+struct kvm_mmu_memory_cache {
+	int nobjs;
+	void *objects[KVM_NR_MEM_OBJS];
 };

-void kvm_io_bus_init(struct kvm_io_bus *bus);
-void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
-void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
-			     struct kvm_io_device *dev);
+#define NR_PTE_CHAIN_ENTRIES 5

-struct kvm_vcpu {
-	struct kvm *kvm;
-	struct preempt_notifier preempt_notifier;
-	int vcpu_id;
-	struct mutex mutex;
-	int   cpu;
+struct kvm_pte_chain {
+	u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
+	struct hlist_node link;
+};
+
+/*
+ * kvm_mmu_page_role, below, is defined as:
+ *
+ *   bits 0:3 - total guest paging levels (2-4, or zero for real mode)
+ *   bits 4:7 - page table level for this shadow (1-4)
+ *   bits 8:9 - page table quadrant for 2-level guests
+ *   bit   16 - "metaphysical" - gfn is not a real page (huge page/real mode)
+ *   bits 17:19 - common access permissions for all ptes in this shadow page
+ */
+union kvm_mmu_page_role {
+	unsigned word;
+	struct {
+		unsigned glevels : 4;
+		unsigned level : 4;
+		unsigned quadrant : 2;
+		unsigned pad_for_nice_hex_output : 6;
+		unsigned metaphysical : 1;
+		unsigned access : 3;
+	};
+};
+
+struct kvm_mmu_page {
+	struct list_head link;
+	struct hlist_node hash_link;
+
+	/*
+	 * The following two entries are used to key the shadow page in the
+	 * hash table.
+	 */
+	gfn_t gfn;
+	union kvm_mmu_page_role role;
+
+	u64 *spt;
+	/* hold the gfn of each spte inside spt */
+	gfn_t *gfns;
+	unsigned long slot_bitmap; /* One bit set per slot which has memory
+				    * in this shadow page.
+				    */
+	int multimapped;         /* More than one parent_pte? */
+	int root_count;          /* Currently serving as active root */
+	union {
+		u64 *parent_pte;               /* !multimapped */
+		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
+	};
+};
+
+/*
+ * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
+ * 32-bit).  The kvm_mmu structure abstracts the details of the current mmu
+ * mode.
+ */
+struct kvm_mmu {
+	void (*new_cr3)(struct kvm_vcpu *vcpu);
+	int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
+	void (*free)(struct kvm_vcpu *vcpu);
+	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
+	void (*prefetch_page)(struct kvm_vcpu *vcpu,
+			      struct kvm_mmu_page *page);
+	hpa_t root_hpa;
+	int root_level;
+	int shadow_root_level;
+
+	u64 *pae_root;
+};
+
+struct kvm_vcpu_arch {
 	u64 host_tsc;
-	struct kvm_run *run;
 	int interrupt_window_open;
-	int guest_mode;
-	unsigned long requests;
 	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
 	DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
 	unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
@ -317,9 +197,6 @@ struct kvm_vcpu {
 	unsigned long cr0;
 	unsigned long cr2;
 	unsigned long cr3;
-	gpa_t para_state_gpa;
-	struct page *para_state_page;
-	gpa_t hypercall_gpa;
 	unsigned long cr4;
 	unsigned long cr8;
 	u64 pdptrs[4]; /* pae */
@ -334,6 +211,7 @@ struct kvm_vcpu {
 	int mp_state;
 	int sipi_vector;
 	u64 ia32_misc_enable_msr;
+	bool tpr_access_reporting;

 	struct kvm_mmu mmu;

@ -344,29 +222,26 @@ struct kvm_vcpu {

 	gfn_t last_pt_write_gfn;
 	int   last_pt_write_count;
+	u64  *last_pte_updated;

-	struct kvm_guest_debug guest_debug;
+	struct {
+		gfn_t gfn;          /* presumed gfn during guest pte update */
+		struct page *page;  /* page corresponding to that gfn */
+	} update_pte;

 	struct i387_fxsave_struct host_fx_image;
 	struct i387_fxsave_struct guest_fx_image;
-	int fpu_active;
-	int guest_fpu_loaded;

-	int mmio_needed;
-	int mmio_read_completed;
-	int mmio_is_write;
-	int mmio_size;
-	unsigned char mmio_data[8];
-	gpa_t mmio_phys_addr;
 	gva_t mmio_fault_cr2;
 	struct kvm_pio_request pio;
 	void *pio_data;
-	wait_queue_head_t wq;

-	int sigset_active;
-	sigset_t sigset;
-
-	struct kvm_stat stat;
+	struct kvm_queued_exception {
+		bool pending;
+		bool has_error_code;
+		u8 nr;
+		u32 error_code;
+	} exception;

 	struct {
 		int active;
@ -381,7 +256,10 @@ struct kvm_vcpu {
 	int halt_request; /* real mode on Intel only */

 	int cpuid_nent;
-	struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+	struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+	/* emulate context */
+
+	struct x86_emulate_ctxt emulate_ctxt;
 };

 struct kvm_mem_alias {
@ -390,51 +268,58 @@ struct kvm_mem_alias {
 	gfn_t target_gfn;
 };

-struct kvm_memory_slot {
-	gfn_t base_gfn;
-	unsigned long npages;
-	unsigned long flags;
-	struct page **phys_mem;
-	unsigned long *dirty_bitmap;
-};
-
-struct kvm {
-	struct mutex lock; /* protects everything except vcpus */
+struct kvm_arch{
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
-	int nmemslots;
-	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
+
+	unsigned int n_free_mmu_pages;
+	unsigned int n_requested_mmu_pages;
+	unsigned int n_alloc_mmu_pages;
+	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 	/*
 	 * Hash table of struct kvm_mmu_page.
 	 */
 	struct list_head active_mmu_pages;
-	int n_free_mmu_pages;
-	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
-	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
-	unsigned long rmap_overflow;
-	struct list_head vm_list;
-	struct file *filp;
-	struct kvm_io_bus mmio_bus;
-	struct kvm_io_bus pio_bus;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
+
 	int round_robin_prev_vcpu;
+	unsigned int tss_addr;
+	struct page *apic_access_page;
 };

-static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
-{
-	return kvm->vpic;
-}
+struct kvm_vm_stat {
+	u32 mmu_shadow_zapped;
+	u32 mmu_pte_write;
+	u32 mmu_pte_updated;
+	u32 mmu_pde_zapped;
+	u32 mmu_flooded;
+	u32 mmu_recycled;
+	u32 mmu_cache_miss;
+	u32 remote_tlb_flush;
+};

-static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
-{
-	return kvm->vioapic;
-}
+struct kvm_vcpu_stat {
+	u32 pf_fixed;
+	u32 pf_guest;
+	u32 tlb_flush;
+	u32 invlpg;

-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return pic_irqchip(kvm) != 0;
-}
+	u32 exits;
+	u32 io_exits;
+	u32 mmio_exits;
+	u32 signal_exits;
+	u32 irq_window_exits;
+	u32 halt_exits;
+	u32 halt_wakeup;
+	u32 request_irq_exits;
+	u32 irq_exits;
+	u32 host_state_reload;
+	u32 efer_reload;
+	u32 fpu_reload;
+	u32 insn_emulation;
+	u32 insn_emulation_fail;
+};

 struct descriptor_table {
 	u16 limit;
@ -449,11 +334,12 @@ struct kvm_x86_ops {
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
+	bool (*cpu_has_accelerated_tpr)(void);

 	/* Create, but do not attach this VCPU */
 	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
 	void (*vcpu_free)(struct kvm_vcpu *vcpu);
-	void (*vcpu_reset)(struct kvm_vcpu *vcpu);
+	int (*vcpu_reset)(struct kvm_vcpu *vcpu);

 	void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
@ -489,10 +375,6 @@ struct kvm_x86_ops {
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);

 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
-	void (*inject_page_fault)(struct kvm_vcpu *vcpu,
-				  unsigned long addr, u32 err_code);
-
-	void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);

 	void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
 	int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
@ -501,54 +383,31 @@ struct kvm_x86_ops {
 				unsigned char *hypercall_addr);
 	int (*get_irq)(struct kvm_vcpu *vcpu);
 	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
+	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
+				bool has_error_code, u32 error_code);
+	bool (*exception_injected)(struct kvm_vcpu *vcpu);
 	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
 	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
 				       struct kvm_run *run);
+
+	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 };

 extern struct kvm_x86_ops *kvm_x86_ops;

-/* The guest did something we don't support. */
-#define pr_unimpl(vcpu, fmt, ...)					\
- do {									\
-	if (printk_ratelimit())						\
-		printk(KERN_ERR "kvm: %i: cpu%i " fmt,			\
-		       current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
- } while(0)
-
-#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
-#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
-
-int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
-void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
-
-int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
-		  struct module *module);
-void kvm_exit_x86(void);
-
 int kvm_mmu_module_init(void);
 void kvm_mmu_module_exit(void);

 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
 int kvm_mmu_setup(struct kvm_vcpu *vcpu);
+void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);

 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
 void kvm_mmu_zap_all(struct kvm *kvm);
-
-hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
-#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
-#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
-static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
-hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
-struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
-
-extern hpa_t bad_page_address;
-
-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
-struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
-void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
+unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);

 enum emulation_result {
 	EMULATE_DONE,       /* no further processing */
@ -556,8 +415,10 @@ enum emulation_result {
 	EMULATE_FAIL,         /* can't emulate this instruction */
 };

+#define EMULTYPE_NO_DECODE	    (1 << 0)
+#define EMULTYPE_TRAP_UD	    (1 << 1)
 int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
-			unsigned long cr2, u16 error_code);
+			unsigned long cr2, u16 error_code, int emulation_type);
 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
 void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
@ -572,7 +433,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);

 struct x86_emulate_ctxt;

-int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 		     int size, unsigned port);
 int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 			   int size, unsigned long count, int down,
@ -581,7 +442,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
 int emulate_clts(struct kvm_vcpu *vcpu);
-int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
+int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
 		    unsigned long *dest);
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
 		    unsigned long value);
@ -597,15 +458,15 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);

+void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
+void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
+void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
+			   u32 error_code);
+
 void fx_init(struct kvm_vcpu *vcpu);

-void kvm_resched(struct kvm_vcpu *vcpu);
-void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
-void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
-void kvm_flush_remote_tlbs(struct kvm *kvm);
-
 int emulator_read_std(unsigned long addr,
-                      void *val,
+		      void *val,
 		      unsigned int bytes,
 		      struct kvm_vcpu *vcpu);
 int emulator_write_emulated(unsigned long addr,
@ -615,6 +476,7 @@ int emulator_write_emulated(unsigned long addr,

 unsigned long segment_base(u16 selector);

+void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		       const u8 *new, int bytes);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
@ -622,66 +484,14 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);

-int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);

-static inline void kvm_guest_enter(void)
-{
-	current->flags |= PF_VCPU;
-}
+int kvm_fix_hypercall(struct kvm_vcpu *vcpu);

-static inline void kvm_guest_exit(void)
-{
-	current->flags &= ~PF_VCPU;
-}
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code);

-static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
-				     u32 error_code)
-{
-	return vcpu->mmu.page_fault(vcpu, gva, error_code);
-}
-
-static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
-{
-	if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
-		__kvm_mmu_free_some_pages(vcpu);
-}
-
-static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
-{
-	if (likely(vcpu->mmu.root_hpa != INVALID_PAGE))
-		return 0;
-
-	return kvm_mmu_load(vcpu);
-}
-
-static inline int is_long_mode(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_X86_64
-	return vcpu->shadow_efer & EFER_LME;
-#else
-	return 0;
-#endif
-}
-
-static inline int is_pae(struct kvm_vcpu *vcpu)
-{
-	return vcpu->cr4 & X86_CR4_PAE;
-}
-
-static inline int is_pse(struct kvm_vcpu *vcpu)
-{
-	return vcpu->cr4 & X86_CR4_PSE;
-}
-
-static inline int is_paging(struct kvm_vcpu *vcpu)
-{
-	return vcpu->cr0 & X86_CR0_PG;
-}
-
-static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
-	return slot - kvm->memslots;
-}
+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
+int complete_pio(struct kvm_vcpu *vcpu);

 static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 {
@ -693,55 +503,55 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
 static inline u16 read_fs(void)
 {
 	u16 seg;
-	asm ("mov %%fs, %0" : "=g"(seg));
+	asm("mov %%fs, %0" : "=g"(seg));
 	return seg;
 }

 static inline u16 read_gs(void)
 {
 	u16 seg;
-	asm ("mov %%gs, %0" : "=g"(seg));
+	asm("mov %%gs, %0" : "=g"(seg));
 	return seg;
 }

 static inline u16 read_ldt(void)
 {
 	u16 ldt;
-	asm ("sldt %0" : "=g"(ldt));
+	asm("sldt %0" : "=g"(ldt));
 	return ldt;
 }

 static inline void load_fs(u16 sel)
 {
-	asm ("mov %0, %%fs" : : "rm"(sel));
+	asm("mov %0, %%fs" : : "rm"(sel));
 }

 static inline void load_gs(u16 sel)
 {
-	asm ("mov %0, %%gs" : : "rm"(sel));
+	asm("mov %0, %%gs" : : "rm"(sel));
 }

 #ifndef load_ldt
 static inline void load_ldt(u16 sel)
 {
-	asm ("lldt %0" : : "rm"(sel));
+	asm("lldt %0" : : "rm"(sel));
 }
 #endif

 static inline void get_idt(struct descriptor_table *table)
 {
-	asm ("sidt %0" : "=m"(*table));
+	asm("sidt %0" : "=m"(*table));
 }

 static inline void get_gdt(struct descriptor_table *table)
 {
-	asm ("sgdt %0" : "=m"(*table));
+	asm("sgdt %0" : "=m"(*table));
 }

 static inline unsigned long read_tr_base(void)
 {
 	u16 tr;
-	asm ("str %0" : "=g"(tr));
+	asm("str %0" : "=g"(tr));
 	return segment_base(tr);
 }

@ -757,17 +567,17 @@ static inline unsigned long read_msr(unsigned long msr)

 static inline void fx_save(struct i387_fxsave_struct *image)
 {
-	asm ("fxsave (%0)":: "r" (image));
+	asm("fxsave (%0)":: "r" (image));
 }

 static inline void fx_restore(struct i387_fxsave_struct *image)
 {
-	asm ("fxrstor (%0)":: "r" (image));
+	asm("fxrstor (%0)":: "r" (image));
 }

 static inline void fpu_init(void)
 {
-	asm ("finit");
+	asm("finit");
 }

 static inline u32 get_rdx_init_val(void)
@ -775,6 +585,11 @@ static inline u32 get_rdx_init_val(void)
 	return 0x600; /* P6 family */
 }

+static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
+{
+	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+}
+
 #define ASM_VMX_VMCLEAR_RAX       ".byte 0x66, 0x0f, 0xc7, 0x30"
 #define ASM_VMX_VMLAUNCH          ".byte 0x0f, 0x01, 0xc2"
 #define ASM_VMX_VMRESUME          ".byte 0x0f, 0x01, 0xc3"
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@ -0,0 +1,105 @@
+#ifndef __X86_KVM_PARA_H
+#define __X86_KVM_PARA_H
+
+/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
+ * should be used to determine that a VM is running under KVM.
+ */
+#define KVM_CPUID_SIGNATURE	0x40000000
+
+/* This CPUID returns a feature bitmap in eax.  Before enabling a particular
+ * paravirtualization, the appropriate feature bit should be checked.
+ */
+#define KVM_CPUID_FEATURES	0x40000001
+
+#ifdef __KERNEL__
+#include <asm/processor.h>
+
+/* This instruction is vmcall.  On non-VT architectures, it will generate a
+ * trap that we will then rewrite to the appropriate instruction.
+ */
+#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
+
+/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
+ * instruction.  The hypervisor may replace it with something else but only the
+ * instructions are guaranteed to be supported.
+ *
+ * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
+ * The hypercall number should be placed in rax and the return value will be
+ * placed in rax.  No other registers will be clobbered unless explicited
+ * noted by the particular hypercall.
+ */
+
+static inline long kvm_hypercall0(unsigned int nr)
+{
+	long ret;
+	asm volatile(KVM_HYPERCALL
+		     : "=a"(ret)
+		     : "a"(nr));
+	return ret;
+}
+
+static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
+{
+	long ret;
+	asm volatile(KVM_HYPERCALL
+		     : "=a"(ret)
+		     : "a"(nr), "b"(p1));
+	return ret;
+}
+
+static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
+				  unsigned long p2)
+{
+	long ret;
+	asm volatile(KVM_HYPERCALL
+		     : "=a"(ret)
+		     : "a"(nr), "b"(p1), "c"(p2));
+	return ret;
+}
+
+static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
+				  unsigned long p2, unsigned long p3)
+{
+	long ret;
+	asm volatile(KVM_HYPERCALL
+		     : "=a"(ret)
+		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3));
+	return ret;
+}
+
+static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
+				  unsigned long p2, unsigned long p3,
+				  unsigned long p4)
+{
+	long ret;
+	asm volatile(KVM_HYPERCALL
+		     : "=a"(ret)
+		     : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4));
+	return ret;
+}
+
+static inline int kvm_para_available(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	char signature[13];
+
+	cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
+	memcpy(signature + 0, &ebx, 4);
+	memcpy(signature + 4, &ecx, 4);
+	memcpy(signature + 8, &edx, 4);
+	signature[12] = 0;
+
+	if (strcmp(signature, "KVMKVMKVM") == 0)
+		return 1;
+
+	return 0;
+}
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+	return cpuid_eax(KVM_CPUID_FEATURES);
+}
+
+#endif
+
+#endif
--- a/include/asm-x86/kvm_x86_emulate.h
+++ b/include/asm-x86/kvm_x86_emulate.h
@ -62,17 +62,6 @@ struct x86_emulate_ops {
 	int (*read_std)(unsigned long addr, void *val,
 			unsigned int bytes, struct kvm_vcpu *vcpu);

-	/*
-	 * write_std: Write bytes of standard (non-emulated/special) memory.
-	 *            Used for stack operations, and others.
-	 *  @addr:  [IN ] Linear address to which to write.
-	 *  @val:   [IN ] Value to write to memory (low-order bytes used as
-	 *                required).
-	 *  @bytes: [IN ] Number of bytes to write to memory.
-	 */
-	int (*write_std)(unsigned long addr, const void *val,
-			 unsigned int bytes, struct kvm_vcpu *vcpu);
-
 	/*
 	 * read_emulated: Read bytes from emulated/special memory area.
 	 *  @addr:  [IN ] Linear address from which to read.
@ -112,13 +101,50 @@ struct x86_emulate_ops {

 };

+/* Type, address-of, and value of an instruction's operand. */
+struct operand {
+	enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
+	unsigned int bytes;
+	unsigned long val, orig_val, *ptr;
+};
+
+struct fetch_cache {
+	u8 data[15];
+	unsigned long start;
+	unsigned long end;
+};
+
+struct decode_cache {
+	u8 twobyte;
+	u8 b;
+	u8 lock_prefix;
+	u8 rep_prefix;
+	u8 op_bytes;
+	u8 ad_bytes;
+	u8 rex_prefix;
+	struct operand src;
+	struct operand dst;
+	unsigned long *override_base;
+	unsigned int d;
+	unsigned long regs[NR_VCPU_REGS];
+	unsigned long eip;
+	/* modrm */
+	u8 modrm;
+	u8 modrm_mod;
+	u8 modrm_reg;
+	u8 modrm_rm;
+	u8 use_modrm_ea;
+	unsigned long modrm_ea;
+	unsigned long modrm_val;
+	struct fetch_cache fetch;
+};
+
 struct x86_emulate_ctxt {
 	/* Register state before/after emulation. */
 	struct kvm_vcpu *vcpu;

 	/* Linear faulting address (if emulating a page-faulting instruction). */
 	unsigned long eflags;
-	unsigned long cr2;

 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
 	int mode;
@ -129,8 +155,16 @@ struct x86_emulate_ctxt {
 	unsigned long ss_base;
 	unsigned long gs_base;
 	unsigned long fs_base;
+
+	/* decode cache */
+
+	struct decode_cache decode;
 };

+/* Repeat String Operation Prefix */
+#define REPE_PREFIX  1
+#define REPNE_PREFIX    2
+
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0	/* Real mode.             */
 #define X86EMUL_MODE_PROT16   2	/* 16-bit protected mode. */
@ -144,12 +178,9 @@ struct x86_emulate_ctxt {
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
 #endif

-/*
- * x86_emulate_memop: Emulate an instruction that faulted attempting to
- *                    read/write a 'special' memory area.
- * Returns -1 on failure, 0 on success.
- */
-int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
-		      struct x86_emulate_ops *ops);
+int x86_decode_insn(struct x86_emulate_ctxt *ctxt,
+		    struct x86_emulate_ops *ops);
+int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
+		     struct x86_emulate_ops *ops);

 #endif				/* __X86_EMULATE_H__ */
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@ -100,7 +100,6 @@ header-y += iso_fs.h
 header-y += ixjuser.h
 header-y += jffs2.h
 header-y += keyctl.h
-header-y += kvm.h
 header-y += limits.h
 header-y += lock_dlm_plock.h
 header-y += magic.h
@ -256,6 +255,7 @@ unifdef-y += kd.h
 unifdef-y += kernelcapi.h
 unifdef-y += kernel.h
 unifdef-y += keyboard.h
+unifdef-$(CONFIG_HAVE_KVM) += kvm.h
 unifdef-y += llc.h
 unifdef-y += loop.h
 unifdef-y += lp.h
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@ -9,12 +9,10 @@

 #include <asm/types.h>
 #include <linux/ioctl.h>
+#include <asm/kvm.h>

 #define KVM_API_VERSION 12

-/* Architectural interrupt line count. */
-#define KVM_NR_INTERRUPTS 256
-
 /* for KVM_CREATE_MEMORY_REGION */
 struct kvm_memory_region {
 	__u32 slot;
@ -23,16 +21,18 @@ struct kvm_memory_region {
 	__u64 memory_size; /* bytes */
 };

+/* for KVM_SET_USER_MEMORY_REGION */
+struct kvm_userspace_memory_region {
+	__u32 slot;
+	__u32 flags;
+	__u64 guest_phys_addr;
+	__u64 memory_size; /* bytes */
+	__u64 userspace_addr; /* start of the userspace allocated memory */
+};
+
 /* for kvm_memory_region::flags */
 #define KVM_MEM_LOG_DIRTY_PAGES  1UL

-struct kvm_memory_alias {
-	__u32 slot;  /* this has a different namespace than memory slots */
-	__u32 flags;
-	__u64 guest_phys_addr;
-	__u64 memory_size;
-	__u64 target_phys_addr;
-};

 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@ -45,62 +45,18 @@ struct kvm_irq_level {
 	__u32 level;
 };

-/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
-struct kvm_pic_state {
-	__u8 last_irr;	/* edge detection */
-	__u8 irr;		/* interrupt request register */
-	__u8 imr;		/* interrupt mask register */
-	__u8 isr;		/* interrupt service register */
-	__u8 priority_add;	/* highest irq priority */
-	__u8 irq_base;
-	__u8 read_reg_select;
-	__u8 poll;
-	__u8 special_mask;
-	__u8 init_state;
-	__u8 auto_eoi;
-	__u8 rotate_on_auto_eoi;
-	__u8 special_fully_nested_mode;
-	__u8 init4;		/* true if 4 byte init */
-	__u8 elcr;		/* PIIX edge/trigger selection */
-	__u8 elcr_mask;
-};
-
-#define KVM_IOAPIC_NUM_PINS  24
-struct kvm_ioapic_state {
-	__u64 base_address;
-	__u32 ioregsel;
-	__u32 id;
-	__u32 irr;
-	__u32 pad;
-	union {
-		__u64 bits;
-		struct {
-			__u8 vector;
-			__u8 delivery_mode:3;
-			__u8 dest_mode:1;
-			__u8 delivery_status:1;
-			__u8 polarity:1;
-			__u8 remote_irr:1;
-			__u8 trig_mode:1;
-			__u8 mask:1;
-			__u8 reserve:7;
-			__u8 reserved[4];
-			__u8 dest_id;
-		} fields;
-	} redirtbl[KVM_IOAPIC_NUM_PINS];
-};
-
-#define KVM_IRQCHIP_PIC_MASTER   0
-#define KVM_IRQCHIP_PIC_SLAVE    1
-#define KVM_IRQCHIP_IOAPIC       2

 struct kvm_irqchip {
 	__u32 chip_id;
 	__u32 pad;
        union {
 		char dummy[512];  /* reserving space */
+#ifdef CONFIG_X86
 		struct kvm_pic_state pic;
+#endif
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
 		struct kvm_ioapic_state ioapic;
+#endif
 	} chip;
 };

@ -116,6 +72,7 @@ struct kvm_irqchip {
 #define KVM_EXIT_FAIL_ENTRY       9
 #define KVM_EXIT_INTR             10
 #define KVM_EXIT_SET_TPR          11
+#define KVM_EXIT_TPR_ACCESS       12

 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
@ -174,90 +131,17 @@ struct kvm_run {
 			__u32 longmode;
 			__u32 pad;
 		} hypercall;
+		/* KVM_EXIT_TPR_ACCESS */
+		struct {
+			__u64 rip;
+			__u32 is_write;
+			__u32 pad;
+		} tpr_access;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
 };

-/* for KVM_GET_REGS and KVM_SET_REGS */
-struct kvm_regs {
-	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
-	__u64 rax, rbx, rcx, rdx;
-	__u64 rsi, rdi, rsp, rbp;
-	__u64 r8,  r9,  r10, r11;
-	__u64 r12, r13, r14, r15;
-	__u64 rip, rflags;
-};
-
-/* for KVM_GET_FPU and KVM_SET_FPU */
-struct kvm_fpu {
-	__u8  fpr[8][16];
-	__u16 fcw;
-	__u16 fsw;
-	__u8  ftwx;  /* in fxsave format */
-	__u8  pad1;
-	__u16 last_opcode;
-	__u64 last_ip;
-	__u64 last_dp;
-	__u8  xmm[16][16];
-	__u32 mxcsr;
-	__u32 pad2;
-};
-
-/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
-#define KVM_APIC_REG_SIZE 0x400
-struct kvm_lapic_state {
-	char regs[KVM_APIC_REG_SIZE];
-};
-
-struct kvm_segment {
-	__u64 base;
-	__u32 limit;
-	__u16 selector;
-	__u8  type;
-	__u8  present, dpl, db, s, l, g, avl;
-	__u8  unusable;
-	__u8  padding;
-};
-
-struct kvm_dtable {
-	__u64 base;
-	__u16 limit;
-	__u16 padding[3];
-};
-
-/* for KVM_GET_SREGS and KVM_SET_SREGS */
-struct kvm_sregs {
-	/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
-	struct kvm_segment cs, ds, es, fs, gs, ss;
-	struct kvm_segment tr, ldt;
-	struct kvm_dtable gdt, idt;
-	__u64 cr0, cr2, cr3, cr4, cr8;
-	__u64 efer;
-	__u64 apic_base;
-	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
-};
-
-struct kvm_msr_entry {
-	__u32 index;
-	__u32 reserved;
-	__u64 data;
-};
-
-/* for KVM_GET_MSRS and KVM_SET_MSRS */
-struct kvm_msrs {
-	__u32 nmsrs; /* number of msrs in entries */
-	__u32 pad;
-
-	struct kvm_msr_entry entries[0];
-};
-
-/* for KVM_GET_MSR_INDEX_LIST */
-struct kvm_msr_list {
-	__u32 nmsrs; /* number of msrs in entries */
-	__u32 indices[0];
-};
-
 /* for KVM_TRANSLATE */
 struct kvm_translation {
 	/* in */
@ -302,28 +186,24 @@ struct kvm_dirty_log {
 	};
 };

-struct kvm_cpuid_entry {
-	__u32 function;
-	__u32 eax;
-	__u32 ebx;
-	__u32 ecx;
-	__u32 edx;
-	__u32 padding;
-};
-
-/* for KVM_SET_CPUID */
-struct kvm_cpuid {
-	__u32 nent;
-	__u32 padding;
-	struct kvm_cpuid_entry entries[0];
-};
-
 /* for KVM_SET_SIGNAL_MASK */
 struct kvm_signal_mask {
 	__u32 len;
 	__u8  sigset[0];
 };

+/* for KVM_TPR_ACCESS_REPORTING */
+struct kvm_tpr_access_ctl {
+	__u32 enabled;
+	__u32 flags;
+	__u32 reserved[8];
+};
+
+/* for KVM_SET_VAPIC_ADDR */
+struct kvm_vapic_addr {
+	__u64 vapic_addr;
+};
+
 #define KVMIO 0xAE

 /*
@ -347,11 +227,21 @@ struct kvm_signal_mask {
 */
 #define KVM_CAP_IRQCHIP	  0
 #define KVM_CAP_HLT	  1
+#define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2
+#define KVM_CAP_USER_MEMORY 3
+#define KVM_CAP_SET_TSS_ADDR 4
+#define KVM_CAP_EXT_CPUID 5
+#define KVM_CAP_VAPIC 6

 /*
 * ioctls for VM fds
 */
 #define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
+#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
+					struct kvm_userspace_memory_region)
+#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
 /*
 * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
 * a vcpu fd.
@ -359,6 +249,7 @@ struct kvm_signal_mask {
 #define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
 #define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
+#define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x48, struct kvm_cpuid2)
 /* Device model IOC */
 #define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
 #define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
@ -384,5 +275,11 @@ struct kvm_signal_mask {
 #define KVM_SET_FPU               _IOW(KVMIO,  0x8d, struct kvm_fpu)
 #define KVM_GET_LAPIC             _IOR(KVMIO,  0x8e, struct kvm_lapic_state)
 #define KVM_SET_LAPIC             _IOW(KVMIO,  0x8f, struct kvm_lapic_state)
+#define KVM_SET_CPUID2            _IOW(KVMIO,  0x90, struct kvm_cpuid2)
+#define KVM_GET_CPUID2            _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
+/* Available with KVM_CAP_VAPIC */
+#define KVM_TPR_ACCESS_REPORTING  _IOWR(KVMIO,  0x92, struct kvm_tpr_access_ctl)
+/* Available with KVM_CAP_VAPIC */
+#define KVM_SET_VAPIC_ADDR        _IOW(KVMIO,  0x93, struct kvm_vapic_addr)

 #endif
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@ -0,0 +1,299 @@
+#ifndef __KVM_HOST_H
+#define __KVM_HOST_H
+
+/*
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/hardirq.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/preempt.h>
+#include <asm/signal.h>
+
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+
+#include <linux/kvm_types.h>
+
+#include <asm/kvm_host.h>
+
+#define KVM_MAX_VCPUS 4
+#define KVM_MEMORY_SLOTS 8
+/* memory slots that does not exposed to userspace */
+#define KVM_PRIVATE_MEM_SLOTS 4
+
+#define KVM_PIO_PAGE_OFFSET 1
+
+/*
+ * vcpu->requests bit members
+ */
+#define KVM_REQ_TLB_FLUSH          0
+#define KVM_REQ_MIGRATE_TIMER      1
+#define KVM_REQ_REPORT_TPR_ACCESS  2
+
+struct kvm_vcpu;
+extern struct kmem_cache *kvm_vcpu_cache;
+
+struct kvm_guest_debug {
+	int enabled;
+	unsigned long bp[4];
+	int singlestep;
+};
+
+/*
+ * It would be nice to use something smarter than a linear search, TBD...
+ * Thankfully we dont expect many devices to register (famous last words :),
+ * so until then it will suffice.  At least its abstracted so we can change
+ * in one place.
+ */
+struct kvm_io_bus {
+	int                   dev_count;
+#define NR_IOBUS_DEVS 6
+	struct kvm_io_device *devs[NR_IOBUS_DEVS];
+};
+
+void kvm_io_bus_init(struct kvm_io_bus *bus);
+void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
+void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
+			     struct kvm_io_device *dev);
+
+struct kvm_vcpu {
+	struct kvm *kvm;
+	struct preempt_notifier preempt_notifier;
+	int vcpu_id;
+	struct mutex mutex;
+	int   cpu;
+	struct kvm_run *run;
+	int guest_mode;
+	unsigned long requests;
+	struct kvm_guest_debug guest_debug;
+	int fpu_active;
+	int guest_fpu_loaded;
+	wait_queue_head_t wq;
+	int sigset_active;
+	sigset_t sigset;
+	struct kvm_vcpu_stat stat;
+
+#ifdef CONFIG_HAS_IOMEM
+	int mmio_needed;
+	int mmio_read_completed;
+	int mmio_is_write;
+	int mmio_size;
+	unsigned char mmio_data[8];
+	gpa_t mmio_phys_addr;
+#endif
+
+	struct kvm_vcpu_arch arch;
+};
+
+struct kvm_memory_slot {
+	gfn_t base_gfn;
+	unsigned long npages;
+	unsigned long flags;
+	unsigned long *rmap;
+	unsigned long *dirty_bitmap;
+	unsigned long userspace_addr;
+	int user_alloc;
+};
+
+struct kvm {
+	struct mutex lock; /* protects the vcpus array and APIC accesses */
+	spinlock_t mmu_lock;
+	struct mm_struct *mm; /* userspace tied to this vm */
+	int nmemslots;
+	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
+					KVM_PRIVATE_MEM_SLOTS];
+	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+	struct list_head vm_list;
+	struct file *filp;
+	struct kvm_io_bus mmio_bus;
+	struct kvm_io_bus pio_bus;
+	struct kvm_vm_stat stat;
+	struct kvm_arch arch;
+};
+
+/* The guest did something we don't support. */
+#define pr_unimpl(vcpu, fmt, ...)					\
+ do {									\
+	if (printk_ratelimit())						\
+		printk(KERN_ERR "kvm: %i: cpu%i " fmt,			\
+		       current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
+ } while (0)
+
+#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
+#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
+
+int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
+void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
+
+void vcpu_load(struct kvm_vcpu *vcpu);
+void vcpu_put(struct kvm_vcpu *vcpu);
+
+void decache_vcpus_on_cpu(int cpu);
+
+
+int kvm_init(void *opaque, unsigned int vcpu_size,
+		  struct module *module);
+void kvm_exit(void);
+
+#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
+#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
+static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
+struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
+
+extern struct page *bad_page;
+
+int is_error_page(struct page *page);
+int kvm_is_error_hva(unsigned long addr);
+int kvm_set_memory_region(struct kvm *kvm,
+			  struct kvm_userspace_memory_region *mem,
+			  int user_alloc);
+int __kvm_set_memory_region(struct kvm *kvm,
+			    struct kvm_userspace_memory_region *mem,
+			    int user_alloc);
+int kvm_arch_set_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc);
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
+struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
+void kvm_release_page_clean(struct page *page);
+void kvm_release_page_dirty(struct page *page);
+int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
+			int len);
+int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
+			  unsigned long len);
+int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
+int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
+			 int offset, int len);
+int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
+		    unsigned long len);
+int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
+int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
+struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
+int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
+void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
+
+void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_resched(struct kvm_vcpu *vcpu);
+void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
+void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
+void kvm_flush_remote_tlbs(struct kvm *kvm);
+
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg);
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg);
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
+
+int kvm_dev_ioctl_check_extension(long ext);
+
+int kvm_get_dirty_log(struct kvm *kvm,
+			struct kvm_dirty_log *log, int *is_dirty);
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+				struct kvm_dirty_log *log);
+
+int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+				   struct
+				   kvm_userspace_memory_region *mem,
+				   int user_alloc);
+long kvm_arch_vm_ioctl(struct file *filp,
+		       unsigned int ioctl, unsigned long arg);
+void kvm_arch_destroy_vm(struct kvm *kvm);
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				    struct kvm_translation *tr);
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs);
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs);
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+				    struct kvm_debug_guest *dbg);
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
+
+int kvm_arch_init(void *opaque);
+void kvm_arch_exit(void);
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id);
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
+
+int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_arch_hardware_enable(void *garbage);
+void kvm_arch_hardware_disable(void *garbage);
+int kvm_arch_hardware_setup(void);
+void kvm_arch_hardware_unsetup(void);
+void kvm_arch_check_processor_compat(void *rtn);
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
+
+void kvm_free_physmem(struct kvm *kvm);
+
+struct  kvm *kvm_arch_create_vm(void);
+void kvm_arch_destroy_vm(struct kvm *kvm);
+
+int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
+int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+
+static inline void kvm_guest_enter(void)
+{
+	account_system_vtime(current);
+	current->flags |= PF_VCPU;
+}
+
+static inline void kvm_guest_exit(void)
+{
+	account_system_vtime(current);
+	current->flags &= ~PF_VCPU;
+}
+
+static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	return slot - kvm->memslots;
+}
+
+static inline gpa_t gfn_to_gpa(gfn_t gfn)
+{
+	return (gpa_t)gfn << PAGE_SHIFT;
+}
+
+static inline void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
+{
+	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
+}
+
+enum kvm_stat_kind {
+	KVM_STAT_VM,
+	KVM_STAT_VCPU,
+};
+
+struct kvm_stats_debugfs_item {
+	const char *name;
+	int offset;
+	enum kvm_stat_kind kind;
+	struct dentry *dentry;
+};
+extern struct kvm_stats_debugfs_item debugfs_entries[];
+
+#endif
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@ -2,72 +2,30 @@
 #define __LINUX_KVM_PARA_H

 /*
- * Guest OS interface for KVM paravirtualization
- *
- * Note: this interface is totally experimental, and is certain to change
- *       as we make progress.
+ * This header file provides a method for making a hypercall to the host
+ * Architectures should define:
+ * - kvm_hypercall0, kvm_hypercall1...
+ * - kvm_arch_para_features
+ * - kvm_para_available
 */

+/* Return values for hypercalls */
+#define KVM_ENOSYS		1000
+
+#define KVM_HC_VAPIC_POLL_IRQ            1
+
 /*
- * Per-VCPU descriptor area shared between guest and host. Writable to
- * both guest and host. Registered with the host by the guest when
- * a guest acknowledges paravirtual mode.
- *
- * NOTE: all addresses are guest-physical addresses (gpa), to make it
- * easier for the hypervisor to map between the various addresses.
+ * hypercalls use architecture specific
 */
-struct kvm_vcpu_para_state {
-	/*
-	 * API version information for compatibility. If there's any support
-	 * mismatch (too old host trying to execute too new guest) then
-	 * the host will deny entry into paravirtual mode. Any other
-	 * combination (new host + old guest and new host + new guest)
-	 * is supposed to work - new host versions will support all old
-	 * guest API versions.
-	 */
-	u32 guest_version;
-	u32 host_version;
-	u32 size;
-	u32 ret;
+#include <asm/kvm_para.h>

-	/*
-	 * The address of the vm exit instruction (VMCALL or VMMCALL),
-	 * which the host will patch according to the CPU model the
-	 * VM runs on:
-	 */
-	u64 hypercall_gpa;
+#ifdef __KERNEL__
+static inline int kvm_para_has_feature(unsigned int feature)
+{
+	if (kvm_arch_para_features() & (1UL << feature))
+		return 1;
+	return 0;
+}
+#endif /* __KERNEL__ */
+#endif /* __LINUX_KVM_PARA_H */

-} __attribute__ ((aligned(PAGE_SIZE)));
-
-#define KVM_PARA_API_VERSION 1
-
-/*
- * This is used for an RDMSR's ECX parameter to probe for a KVM host.
- * Hopefully no CPU vendor will use up this number. This is placed well
- * out of way of the typical space occupied by CPU vendors' MSR indices,
- * and we think (or at least hope) it wont be occupied in the future
- * either.
- */
-#define MSR_KVM_API_MAGIC 0x87655678
-
-#define KVM_EINVAL 1
-
-/*
- * Hypercall calling convention:
- *
- * Each hypercall may have 0-6 parameters.
- *
- * 64-bit hypercall index is in RAX, goes from 0 to __NR_hypercalls-1
- *
- * 64-bit parameters 1-6 are in the standard gcc x86_64 calling convention
- * order: RDI, RSI, RDX, RCX, R8, R9.
- *
- * 32-bit index is EBX, parameters are: EAX, ECX, EDX, ESI, EDI, EBP.
- * (the first 3 are according to the gcc regparm calling convention)
- *
- * No registers are clobbered by the hypercall, except that the
- * return value is in RAX.
- */
-#define __NR_hypercalls			0
-
-#endif
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@ -0,0 +1,54 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ */
+
+#ifndef __KVM_TYPES_H__
+#define __KVM_TYPES_H__
+
+#include <asm/types.h>
+
+/*
+ * Address types:
+ *
+ *  gva - guest virtual address
+ *  gpa - guest physical address
+ *  gfn - guest frame number
+ *  hva - host virtual address
+ *  hpa - host physical address
+ *  hfn - host frame number
+ */
+
+typedef unsigned long  gva_t;
+typedef u64            gpa_t;
+typedef unsigned long  gfn_t;
+
+typedef unsigned long  hva_t;
+typedef u64            hpa_t;
+typedef unsigned long  hfn_t;
+
+struct kvm_pio_request {
+	unsigned long count;
+	int cur_count;
+	struct page *guest_pages[2];
+	unsigned guest_page_offset;
+	int in;
+	int port;
+	int size;
+	int string;
+	int down;
+	int rep;
+};
+
+#endif /* __KVM_TYPES_H__ */
--- a/kernel/fork.c
+++ b/kernel/fork.c
@ -393,6 +393,7 @@ void fastcall __mmdrop(struct mm_struct *mm)
 	destroy_context(mm);
 	free_mm(mm);
 }
+EXPORT_SYMBOL_GPL(__mmdrop);

 /*
 * Decrement the use count and release all resources for an mm.
--- a/drivers/kvm/ioapic.c
+++ b/drivers/kvm/ioapic.c
@ -26,7 +26,7 @@
 *  Based on Xen 3.1 code.
 */

-#include "kvm.h"
+#include <linux/kvm_host.h>
 #include <linux/kvm.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
@ -34,14 +34,17 @@
 #include <linux/hrtimer.h>
 #include <linux/io.h>
 #include <asm/processor.h>
-#include <asm/msr.h>
 #include <asm/page.h>
 #include <asm/current.h>
-#include <asm/apicdef.h>
-#include <asm/io_apic.h>
-#include "irq.h"
-/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
+
+#include "ioapic.h"
+#include "lapic.h"
+
+#if 0
+#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
+#else
 #define ioapic_debug(fmt, arg...)
+#endif
 static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);

 static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
@ -113,7 +116,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 	default:
 		index = (ioapic->ioregsel - 0x10) >> 1;

-		ioapic_debug("change redir index %x val %x", index, val);
+		ioapic_debug("change redir index %x val %x\n", index, val);
 		if (index >= IOAPIC_NUM_PINS)
 			return;
 		if (ioapic->ioregsel & 1) {
@ -131,16 +134,16 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 }

 static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
-			   struct kvm_lapic *target,
+			   struct kvm_vcpu *vcpu,
 			   u8 vector, u8 trig_mode, u8 delivery_mode)
 {
-	ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
+	ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
 		     delivery_mode);

-	ASSERT((delivery_mode == dest_Fixed) ||
-	       (delivery_mode == dest_LowestPrio));
+	ASSERT((delivery_mode == IOAPIC_FIXED) ||
+	       (delivery_mode == IOAPIC_LOWEST_PRIORITY));

-	kvm_apic_set_irq(target, vector, trig_mode);
+	kvm_apic_set_irq(vcpu, vector, trig_mode);
 }

 static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
@ -151,12 +154,12 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 	struct kvm *kvm = ioapic->kvm;
 	struct kvm_vcpu *vcpu;

-	ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
+	ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);

 	if (dest_mode == 0) {	/* Physical mode. */
 		if (dest == 0xFF) {	/* Broadcast. */
 			for (i = 0; i < KVM_MAX_VCPUS; ++i)
-				if (kvm->vcpus[i] && kvm->vcpus[i]->apic)
+				if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic)
 					mask |= 1 << i;
 			return mask;
 		}
@ -164,8 +167,8 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 			vcpu = kvm->vcpus[i];
 			if (!vcpu)
 				continue;
-			if (kvm_apic_match_physical_addr(vcpu->apic, dest)) {
-				if (vcpu->apic)
+			if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) {
+				if (vcpu->arch.apic)
 					mask = 1 << i;
 				break;
 			}
@ -175,11 +178,11 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
 			vcpu = kvm->vcpus[i];
 			if (!vcpu)
 				continue;
-			if (vcpu->apic &&
-			    kvm_apic_match_logical_addr(vcpu->apic, dest))
+			if (vcpu->arch.apic &&
+			    kvm_apic_match_logical_addr(vcpu->arch.apic, dest))
 				mask |= 1 << vcpu->vcpu_id;
 		}
-	ioapic_debug("mask %x", mask);
+	ioapic_debug("mask %x\n", mask);
 	return mask;
 }

@ -191,41 +194,39 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 	u8 vector = ioapic->redirtbl[irq].fields.vector;
 	u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
 	u32 deliver_bitmask;
-	struct kvm_lapic *target;
 	struct kvm_vcpu *vcpu;
 	int vcpu_id;

 	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
-		     "vector=%x trig_mode=%x",
+		     "vector=%x trig_mode=%x\n",
 		     dest, dest_mode, delivery_mode, vector, trig_mode);

 	deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
 	if (!deliver_bitmask) {
-		ioapic_debug("no target on destination");
+		ioapic_debug("no target on destination\n");
 		return;
 	}

 	switch (delivery_mode) {
-	case dest_LowestPrio:
-		target =
-		    kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask);
-		if (target != NULL)
-			ioapic_inj_irq(ioapic, target, vector,
+	case IOAPIC_LOWEST_PRIORITY:
+		vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
+				deliver_bitmask);
+		if (vcpu != NULL)
+			ioapic_inj_irq(ioapic, vcpu, vector,
 				       trig_mode, delivery_mode);
 		else
-			ioapic_debug("null round robin: "
-				     "mask=%x vector=%x delivery_mode=%x",
-				     deliver_bitmask, vector, dest_LowestPrio);
+			ioapic_debug("null lowest prio vcpu: "
+				     "mask=%x vector=%x delivery_mode=%x\n",
+				     deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY);
 		break;
-	case dest_Fixed:
+	case IOAPIC_FIXED:
 		for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
 			if (!(deliver_bitmask & (1 << vcpu_id)))
 				continue;
 			deliver_bitmask &= ~(1 << vcpu_id);
 			vcpu = ioapic->kvm->vcpus[vcpu_id];
 			if (vcpu) {
-				target = vcpu->apic;
-				ioapic_inj_irq(ioapic, target, vector,
+				ioapic_inj_irq(ioapic, vcpu, vector,
 					       trig_mode, delivery_mode);
 			}
 		}
@ -271,7 +272,7 @@ static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)

 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
 {
-	struct kvm_ioapic *ioapic = kvm->vioapic;
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 	union ioapic_redir_entry *ent;
 	int gsi;

@ -304,7 +305,7 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
 	u32 result;

-	ioapic_debug("addr %lx", (unsigned long)addr);
+	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */

 	addr &= 0xff;
@ -341,8 +342,8 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
 	u32 data;

-	ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
-		     addr, len, val);
+	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
+		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 	if (len == 4 || len == 8)
 		data = *(u32 *) val;
@ -360,24 +361,38 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	case IOAPIC_REG_WINDOW:
 		ioapic_write_indirect(ioapic, data);
 		break;
+#ifdef	CONFIG_IA64
+	case IOAPIC_REG_EOI:
+		kvm_ioapic_update_eoi(ioapic->kvm, data);
+		break;
+#endif

 	default:
 		break;
 	}
 }

+void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
+{
+	int i;
+
+	for (i = 0; i < IOAPIC_NUM_PINS; i++)
+		ioapic->redirtbl[i].fields.mask = 1;
+	ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+	ioapic->ioregsel = 0;
+	ioapic->irr = 0;
+	ioapic->id = 0;
+}
+
 int kvm_ioapic_init(struct kvm *kvm)
 {
 	struct kvm_ioapic *ioapic;
-	int i;

 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
 	if (!ioapic)
 		return -ENOMEM;
-	kvm->vioapic = ioapic;
-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
-		ioapic->redirtbl[i].fields.mask = 1;
-	ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+	kvm->arch.vioapic = ioapic;
+	kvm_ioapic_reset(ioapic);
 	ioapic->dev.read = ioapic_mmio_read;
 	ioapic->dev.write = ioapic_mmio_write;
 	ioapic->dev.in_range = ioapic_in_range;
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@ -0,0 +1,95 @@
+#ifndef __KVM_IO_APIC_H
+#define __KVM_IO_APIC_H
+
+#include <linux/kvm_host.h>
+
+#include "iodev.h"
+
+struct kvm;
+struct kvm_vcpu;
+
+#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
+#define IOAPIC_EDGE_TRIG  0
+#define IOAPIC_LEVEL_TRIG 1
+
+#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
+#define IOAPIC_MEM_LENGTH            0x100
+
+/* Direct registers. */
+#define IOAPIC_REG_SELECT  0x00
+#define IOAPIC_REG_WINDOW  0x10
+#define IOAPIC_REG_EOI     0x40	/* IA64 IOSAPIC only */
+
+/* Indirect registers. */
+#define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
+#define IOAPIC_REG_VERSION 0x01
+#define IOAPIC_REG_ARB_ID  0x02	/* x86 IOAPIC only */
+
+/*ioapic delivery mode*/
+#define	IOAPIC_FIXED			0x0
+#define	IOAPIC_LOWEST_PRIORITY		0x1
+#define	IOAPIC_PMI			0x2
+#define	IOAPIC_NMI			0x4
+#define	IOAPIC_INIT			0x5
+#define	IOAPIC_EXTINT			0x7
+
+struct kvm_ioapic {
+	u64 base_address;
+	u32 ioregsel;
+	u32 id;
+	u32 irr;
+	u32 pad;
+	union ioapic_redir_entry {
+		u64 bits;
+		struct {
+			u8 vector;
+			u8 delivery_mode:3;
+			u8 dest_mode:1;
+			u8 delivery_status:1;
+			u8 polarity:1;
+			u8 remote_irr:1;
+			u8 trig_mode:1;
+			u8 mask:1;
+			u8 reserve:7;
+			u8 reserved[4];
+			u8 dest_id;
+		} fields;
+	} redirtbl[IOAPIC_NUM_PINS];
+	struct kvm_io_device dev;
+	struct kvm *kvm;
+};
+
+#ifdef DEBUG
+#define ASSERT(x)  							\
+do {									\
+	if (!(x)) {							\
+		printk(KERN_EMERG "assertion failed %s: %d: %s\n",	\
+		       __FILE__, __LINE__, #x);				\
+		BUG();							\
+	}								\
+} while (0)
+#else
+#define ASSERT(x) do { } while (0)
+#endif
+
+static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
+{
+	return kvm->arch.vioapic;
+}
+
+#ifdef CONFIG_IA64
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return 1;
+}
+#endif
+
+struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
+				       unsigned long bitmap);
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
+int kvm_ioapic_init(struct kvm *kvm);
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
+void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
+
+#endif
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@ -0,0 +1,63 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __KVM_IODEV_H__
+#define __KVM_IODEV_H__
+
+#include <linux/kvm_types.h>
+
+struct kvm_io_device {
+	void (*read)(struct kvm_io_device *this,
+		     gpa_t addr,
+		     int len,
+		     void *val);
+	void (*write)(struct kvm_io_device *this,
+		      gpa_t addr,
+		      int len,
+		      const void *val);
+	int (*in_range)(struct kvm_io_device *this, gpa_t addr);
+	void (*destructor)(struct kvm_io_device *this);
+
+	void             *private;
+};
+
+static inline void kvm_iodevice_read(struct kvm_io_device *dev,
+				     gpa_t addr,
+				     int len,
+				     void *val)
+{
+	dev->read(dev, addr, len, val);
+}
+
+static inline void kvm_iodevice_write(struct kvm_io_device *dev,
+				      gpa_t addr,
+				      int len,
+				      const void *val)
+{
+	dev->write(dev, addr, len, val);
+}
+
+static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
+{
+	return dev->in_range(dev, addr);
+}
+
+static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
+{
+	if (dev->destructor)
+		dev->destructor(dev);
+}
+
+#endif /* __KVM_IODEV_H__ */
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c