Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (80 commits) KVM: Use CPU_DYING for disabling virtualization KVM: Tune hotplug/suspend IPIs KVM: Keep track of which cpus have virtualization enabled SMP: Allow smp_call_function_single() to current cpu i386: Allow smp_call_function_single() to current cpu x86_64: Allow smp_call_function_single() to current cpu HOTPLUG: Adapt thermal throttle to CPU_DYING HOTPLUG: Adapt cpuset hotplug callback to CPU_DYING HOTPLUG: Add CPU_DYING notifier KVM: Clean up #includes KVM: Remove kvmfs in favor of the anonymous inodes source KVM: SVM: Reliably detect if SVM was disabled by BIOS KVM: VMX: Remove unnecessary code in vmx_tlb_flush() KVM: MMU: Fix Wrong tlb flush order KVM: VMX: Reinitialize the real-mode tss when entering real mode KVM: Avoid useless memory write when possible KVM: Fix x86 emulator writeback KVM: Add support for in-kernel pio handlers KVM: VMX: Fix interrupt checking on lightweight exit KVM: Adds support for in-kernel mmio handlers ...
This commit is contained in:
		| @@ -134,19 +134,21 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, | ||||
| 	int err; | ||||
|  | ||||
| 	sys_dev = get_cpu_sysdev(cpu); | ||||
| 	mutex_lock(&therm_cpu_lock); | ||||
| 	switch (action) { | ||||
| 	case CPU_ONLINE: | ||||
| 	case CPU_ONLINE_FROZEN: | ||||
| 		mutex_lock(&therm_cpu_lock); | ||||
| 		err = thermal_throttle_add_dev(sys_dev); | ||||
| 		mutex_unlock(&therm_cpu_lock); | ||||
| 		WARN_ON(err); | ||||
| 		break; | ||||
| 	case CPU_DEAD: | ||||
| 	case CPU_DEAD_FROZEN: | ||||
| 		mutex_lock(&therm_cpu_lock); | ||||
| 		thermal_throttle_remove_dev(sys_dev); | ||||
| 		mutex_unlock(&therm_cpu_lock); | ||||
| 		break; | ||||
| 	} | ||||
| 	mutex_unlock(&therm_cpu_lock); | ||||
| 	return NOTIFY_OK; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -47,7 +47,7 @@ int smp_call_function(void (*func) (void *info), void *info, int nonatomic, | ||||
| EXPORT_SYMBOL(smp_call_function); | ||||
|  | ||||
| /** | ||||
|  * smp_call_function_single - Run a function on another CPU | ||||
|  * smp_call_function_single - Run a function on a specific CPU | ||||
|  * @cpu: The target CPU.  Cannot be the calling CPU. | ||||
|  * @func: The function to run. This must be fast and non-blocking. | ||||
|  * @info: An arbitrary pointer to pass to the function. | ||||
| @@ -66,9 +66,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||||
| 	int ret; | ||||
| 	int me = get_cpu(); | ||||
| 	if (cpu == me) { | ||||
| 		WARN_ON(1); | ||||
| 		local_irq_disable(); | ||||
| 		func(info); | ||||
| 		local_irq_enable(); | ||||
| 		put_cpu(); | ||||
| 		return -EBUSY; | ||||
| 		return 0; | ||||
| 	} | ||||
|  | ||||
| 	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); | ||||
|   | ||||
| @@ -357,7 +357,7 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * smp_call_function_single - Run a function on another CPU | ||||
|  * smp_call_function_single - Run a function on a specific CPU | ||||
|  * @func: The function to run. This must be fast and non-blocking. | ||||
|  * @info: An arbitrary pointer to pass to the function. | ||||
|  * @nonatomic: Currently unused. | ||||
| @@ -374,14 +374,18 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, | ||||
| { | ||||
| 	/* prevent preemption and reschedule on another processor */ | ||||
| 	int me = get_cpu(); | ||||
| 	if (cpu == me) { | ||||
| 		put_cpu(); | ||||
| 		return 0; | ||||
| 	} | ||||
|  | ||||
| 	/* Can deadlock when called with interrupts disabled */ | ||||
| 	WARN_ON(irqs_disabled()); | ||||
|  | ||||
| 	if (cpu == me) { | ||||
| 		local_irq_disable(); | ||||
| 		func(info); | ||||
| 		local_irq_enable(); | ||||
| 		put_cpu(); | ||||
| 		return 0; | ||||
| 	} | ||||
|  | ||||
| 	spin_lock_bh(&call_lock); | ||||
| 	__smp_call_function_single(cpu, func, info, nonatomic, wait); | ||||
| 	spin_unlock_bh(&call_lock); | ||||
|   | ||||
| @@ -1,12 +1,17 @@ | ||||
| # | ||||
| # KVM configuration | ||||
| # | ||||
| menu "Virtualization" | ||||
| menuconfig VIRTUALIZATION | ||||
| 	bool "Virtualization" | ||||
| 	depends on X86 | ||||
| 	default y | ||||
|  | ||||
| if VIRTUALIZATION | ||||
|  | ||||
| config KVM | ||||
| 	tristate "Kernel-based Virtual Machine (KVM) support" | ||||
| 	depends on X86 && EXPERIMENTAL | ||||
| 	depends on X86_CMPXCHG64 || 64BIT | ||||
| 	---help--- | ||||
| 	  Support hosting fully virtualized guest machines using hardware | ||||
| 	  virtualization extensions.  You will need a fairly recent | ||||
| @@ -35,4 +40,4 @@ config KVM_AMD | ||||
| 	  Provides support for KVM on AMD processors equipped with the AMD-V | ||||
| 	  (SVM) extensions. | ||||
|  | ||||
| endmenu | ||||
| endif # VIRTUALIZATION | ||||
|   | ||||
| @@ -10,6 +10,8 @@ | ||||
| #include <linux/list.h> | ||||
| #include <linux/mutex.h> | ||||
| #include <linux/spinlock.h> | ||||
| #include <linux/signal.h> | ||||
| #include <linux/sched.h> | ||||
| #include <linux/mm.h> | ||||
| #include <asm/signal.h> | ||||
|  | ||||
| @@ -18,6 +20,7 @@ | ||||
| #include <linux/kvm_para.h> | ||||
|  | ||||
| #define CR0_PE_MASK (1ULL << 0) | ||||
| #define CR0_MP_MASK (1ULL << 1) | ||||
| #define CR0_TS_MASK (1ULL << 3) | ||||
| #define CR0_NE_MASK (1ULL << 5) | ||||
| #define CR0_WP_MASK (1ULL << 16) | ||||
| @@ -42,7 +45,8 @@ | ||||
| 	(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \ | ||||
| 	 | CR0_NW_MASK | CR0_CD_MASK) | ||||
| #define KVM_VM_CR0_ALWAYS_ON \ | ||||
| 	(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK) | ||||
| 	(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK | CR0_TS_MASK \ | ||||
| 	 | CR0_MP_MASK) | ||||
| #define KVM_GUEST_CR4_MASK \ | ||||
| 	(CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK) | ||||
| #define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK) | ||||
| @@ -51,10 +55,10 @@ | ||||
| #define INVALID_PAGE (~(hpa_t)0) | ||||
| #define UNMAPPED_GVA (~(gpa_t)0) | ||||
|  | ||||
| #define KVM_MAX_VCPUS 1 | ||||
| #define KVM_MAX_VCPUS 4 | ||||
| #define KVM_ALIAS_SLOTS 4 | ||||
| #define KVM_MEMORY_SLOTS 4 | ||||
| #define KVM_NUM_MMU_PAGES 256 | ||||
| #define KVM_NUM_MMU_PAGES 1024 | ||||
| #define KVM_MIN_FREE_MMU_PAGES 5 | ||||
| #define KVM_REFILL_PAGES 25 | ||||
| #define KVM_MAX_CPUID_ENTRIES 40 | ||||
| @@ -79,6 +83,11 @@ | ||||
|  | ||||
| #define KVM_PIO_PAGE_OFFSET 1 | ||||
|  | ||||
| /* | ||||
|  * vcpu->requests bit members | ||||
|  */ | ||||
| #define KVM_TLB_FLUSH 0 | ||||
|  | ||||
| /* | ||||
|  * Address types: | ||||
|  * | ||||
| @@ -137,7 +146,7 @@ struct kvm_mmu_page { | ||||
| 	gfn_t gfn; | ||||
| 	union kvm_mmu_page_role role; | ||||
|  | ||||
| 	hpa_t page_hpa; | ||||
| 	u64 *spt; | ||||
| 	unsigned long slot_bitmap; /* One bit set per slot which has memory | ||||
| 				    * in this shadow page. | ||||
| 				    */ | ||||
| @@ -232,6 +241,7 @@ struct kvm_pio_request { | ||||
| 	struct page *guest_pages[2]; | ||||
| 	unsigned guest_page_offset; | ||||
| 	int in; | ||||
| 	int port; | ||||
| 	int size; | ||||
| 	int string; | ||||
| 	int down; | ||||
| @@ -252,8 +262,70 @@ struct kvm_stat { | ||||
| 	u32 halt_exits; | ||||
| 	u32 request_irq_exits; | ||||
| 	u32 irq_exits; | ||||
| 	u32 light_exits; | ||||
| 	u32 efer_reload; | ||||
| }; | ||||
|  | ||||
| struct kvm_io_device { | ||||
| 	void (*read)(struct kvm_io_device *this, | ||||
| 		     gpa_t addr, | ||||
| 		     int len, | ||||
| 		     void *val); | ||||
| 	void (*write)(struct kvm_io_device *this, | ||||
| 		      gpa_t addr, | ||||
| 		      int len, | ||||
| 		      const void *val); | ||||
| 	int (*in_range)(struct kvm_io_device *this, gpa_t addr); | ||||
| 	void (*destructor)(struct kvm_io_device *this); | ||||
|  | ||||
| 	void             *private; | ||||
| }; | ||||
|  | ||||
| static inline void kvm_iodevice_read(struct kvm_io_device *dev, | ||||
| 				     gpa_t addr, | ||||
| 				     int len, | ||||
| 				     void *val) | ||||
| { | ||||
| 	dev->read(dev, addr, len, val); | ||||
| } | ||||
|  | ||||
| static inline void kvm_iodevice_write(struct kvm_io_device *dev, | ||||
| 				      gpa_t addr, | ||||
| 				      int len, | ||||
| 				      const void *val) | ||||
| { | ||||
| 	dev->write(dev, addr, len, val); | ||||
| } | ||||
|  | ||||
| static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr) | ||||
| { | ||||
| 	return dev->in_range(dev, addr); | ||||
| } | ||||
|  | ||||
| static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) | ||||
| { | ||||
| 	if (dev->destructor) | ||||
| 		dev->destructor(dev); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * It would be nice to use something smarter than a linear search, TBD... | ||||
|  * Thankfully we dont expect many devices to register (famous last words :), | ||||
|  * so until then it will suffice.  At least its abstracted so we can change | ||||
|  * in one place. | ||||
|  */ | ||||
| struct kvm_io_bus { | ||||
| 	int                   dev_count; | ||||
| #define NR_IOBUS_DEVS 6 | ||||
| 	struct kvm_io_device *devs[NR_IOBUS_DEVS]; | ||||
| }; | ||||
|  | ||||
| void kvm_io_bus_init(struct kvm_io_bus *bus); | ||||
| void kvm_io_bus_destroy(struct kvm_io_bus *bus); | ||||
| struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr); | ||||
| void kvm_io_bus_register_dev(struct kvm_io_bus *bus, | ||||
| 			     struct kvm_io_device *dev); | ||||
|  | ||||
| struct kvm_vcpu { | ||||
| 	struct kvm *kvm; | ||||
| 	union { | ||||
| @@ -266,6 +338,8 @@ struct kvm_vcpu { | ||||
| 	u64 host_tsc; | ||||
| 	struct kvm_run *run; | ||||
| 	int interrupt_window_open; | ||||
| 	int guest_mode; | ||||
| 	unsigned long requests; | ||||
| 	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ | ||||
| #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) | ||||
| 	unsigned long irq_pending[NR_IRQ_WORDS]; | ||||
| @@ -285,15 +359,20 @@ struct kvm_vcpu { | ||||
| 	u64 apic_base; | ||||
| 	u64 ia32_misc_enable_msr; | ||||
| 	int nmsrs; | ||||
| 	int save_nmsrs; | ||||
| 	int msr_offset_efer; | ||||
| #ifdef CONFIG_X86_64 | ||||
| 	int msr_offset_kernel_gs_base; | ||||
| #endif | ||||
| 	struct vmx_msr_entry *guest_msrs; | ||||
| 	struct vmx_msr_entry *host_msrs; | ||||
|  | ||||
| 	struct list_head free_pages; | ||||
| 	struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES]; | ||||
| 	struct kvm_mmu mmu; | ||||
|  | ||||
| 	struct kvm_mmu_memory_cache mmu_pte_chain_cache; | ||||
| 	struct kvm_mmu_memory_cache mmu_rmap_desc_cache; | ||||
| 	struct kvm_mmu_memory_cache mmu_page_cache; | ||||
| 	struct kvm_mmu_memory_cache mmu_page_header_cache; | ||||
|  | ||||
| 	gfn_t last_pt_write_gfn; | ||||
| 	int   last_pt_write_count; | ||||
| @@ -305,6 +384,11 @@ struct kvm_vcpu { | ||||
| 	char *guest_fx_image; | ||||
| 	int fpu_active; | ||||
| 	int guest_fpu_loaded; | ||||
| 	struct vmx_host_state { | ||||
| 		int loaded; | ||||
| 		u16 fs_sel, gs_sel, ldt_sel; | ||||
| 		int fs_gs_ldt_reload_needed; | ||||
| 	} vmx_host_state; | ||||
|  | ||||
| 	int mmio_needed; | ||||
| 	int mmio_read_completed; | ||||
| @@ -331,6 +415,7 @@ struct kvm_vcpu { | ||||
| 			u32 ar; | ||||
| 		} tr, es, ds, fs, gs; | ||||
| 	} rmode; | ||||
| 	int halt_request; /* real mode on Intel only */ | ||||
|  | ||||
| 	int cpuid_nent; | ||||
| 	struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; | ||||
| @@ -362,12 +447,15 @@ struct kvm { | ||||
| 	struct list_head active_mmu_pages; | ||||
| 	int n_free_mmu_pages; | ||||
| 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | ||||
| 	int nvcpus; | ||||
| 	struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; | ||||
| 	int memory_config_version; | ||||
| 	int busy; | ||||
| 	unsigned long rmap_overflow; | ||||
| 	struct list_head vm_list; | ||||
| 	struct file *filp; | ||||
| 	struct kvm_io_bus mmio_bus; | ||||
| 	struct kvm_io_bus pio_bus; | ||||
| }; | ||||
|  | ||||
| struct descriptor_table { | ||||
| @@ -488,6 +576,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | ||||
| 		  int size, unsigned long count, int string, int down, | ||||
| 		  gva_t address, int rep, unsigned port); | ||||
| void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | ||||
| int kvm_emulate_halt(struct kvm_vcpu *vcpu); | ||||
| int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | ||||
| int emulate_clts(struct kvm_vcpu *vcpu); | ||||
| int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, | ||||
| @@ -511,6 +600,7 @@ void save_msrs(struct vmx_msr_entry *e, int n); | ||||
| void kvm_resched(struct kvm_vcpu *vcpu); | ||||
| void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); | ||||
| void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); | ||||
| void kvm_flush_remote_tlbs(struct kvm *kvm); | ||||
|  | ||||
| int kvm_read_guest(struct kvm_vcpu *vcpu, | ||||
| 	       gva_t addr, | ||||
| @@ -524,10 +614,12 @@ int kvm_write_guest(struct kvm_vcpu *vcpu, | ||||
|  | ||||
| unsigned long segment_base(u16 selector); | ||||
|  | ||||
| void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes); | ||||
| void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes); | ||||
| void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | ||||
| 		       const u8 *old, const u8 *new, int bytes); | ||||
| int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); | ||||
| void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | ||||
| int kvm_mmu_load(struct kvm_vcpu *vcpu); | ||||
| void kvm_mmu_unload(struct kvm_vcpu *vcpu); | ||||
|  | ||||
| int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run); | ||||
|  | ||||
| @@ -539,6 +631,14 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | ||||
| 	return vcpu->mmu.page_fault(vcpu, gva, error_code); | ||||
| } | ||||
|  | ||||
| static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (likely(vcpu->mmu.root_hpa != INVALID_PAGE)) | ||||
| 		return 0; | ||||
|  | ||||
| 	return kvm_mmu_load(vcpu); | ||||
| } | ||||
|  | ||||
| static inline int is_long_mode(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| #ifdef CONFIG_X86_64 | ||||
|   | ||||
| @@ -16,34 +16,33 @@ | ||||
|  */ | ||||
|  | ||||
| #include "kvm.h" | ||||
| #include "x86_emulate.h" | ||||
| #include "segment_descriptor.h" | ||||
|  | ||||
| #include <linux/kvm.h> | ||||
| #include <linux/module.h> | ||||
| #include <linux/errno.h> | ||||
| #include <linux/magic.h> | ||||
| #include <asm/processor.h> | ||||
| #include <linux/percpu.h> | ||||
| #include <linux/gfp.h> | ||||
| #include <asm/msr.h> | ||||
| #include <linux/mm.h> | ||||
| #include <linux/miscdevice.h> | ||||
| #include <linux/vmalloc.h> | ||||
| #include <asm/uaccess.h> | ||||
| #include <linux/reboot.h> | ||||
| #include <asm/io.h> | ||||
| #include <linux/debugfs.h> | ||||
| #include <linux/highmem.h> | ||||
| #include <linux/file.h> | ||||
| #include <asm/desc.h> | ||||
| #include <linux/sysdev.h> | ||||
| #include <linux/cpu.h> | ||||
| #include <linux/file.h> | ||||
| #include <linux/fs.h> | ||||
| #include <linux/mount.h> | ||||
| #include <linux/sched.h> | ||||
| #include <linux/cpumask.h> | ||||
| #include <linux/smp.h> | ||||
| #include <linux/anon_inodes.h> | ||||
|  | ||||
| #include "x86_emulate.h" | ||||
| #include "segment_descriptor.h" | ||||
| #include <asm/processor.h> | ||||
| #include <asm/msr.h> | ||||
| #include <asm/io.h> | ||||
| #include <asm/uaccess.h> | ||||
| #include <asm/desc.h> | ||||
|  | ||||
| MODULE_AUTHOR("Qumranet"); | ||||
| MODULE_LICENSE("GPL"); | ||||
| @@ -51,8 +50,12 @@ MODULE_LICENSE("GPL"); | ||||
| static DEFINE_SPINLOCK(kvm_lock); | ||||
| static LIST_HEAD(vm_list); | ||||
|  | ||||
| static cpumask_t cpus_hardware_enabled; | ||||
|  | ||||
| struct kvm_arch_ops *kvm_arch_ops; | ||||
|  | ||||
| static void hardware_disable(void *ignored); | ||||
|  | ||||
| #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) | ||||
|  | ||||
| static struct kvm_stats_debugfs_item { | ||||
| @@ -72,13 +75,13 @@ static struct kvm_stats_debugfs_item { | ||||
| 	{ "halt_exits", STAT_OFFSET(halt_exits) }, | ||||
| 	{ "request_irq", STAT_OFFSET(request_irq_exits) }, | ||||
| 	{ "irq_exits", STAT_OFFSET(irq_exits) }, | ||||
| 	{ "light_exits", STAT_OFFSET(light_exits) }, | ||||
| 	{ "efer_reload", STAT_OFFSET(efer_reload) }, | ||||
| 	{ NULL } | ||||
| }; | ||||
|  | ||||
| static struct dentry *debugfs_dir; | ||||
|  | ||||
| struct vfsmount *kvmfs_mnt; | ||||
|  | ||||
| #define MAX_IO_MSRS 256 | ||||
|  | ||||
| #define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL | ||||
| @@ -100,55 +103,6 @@ struct segment_descriptor_64 { | ||||
| static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | ||||
| 			   unsigned long arg); | ||||
|  | ||||
| static struct inode *kvmfs_inode(struct file_operations *fops) | ||||
| { | ||||
| 	int error = -ENOMEM; | ||||
| 	struct inode *inode = new_inode(kvmfs_mnt->mnt_sb); | ||||
|  | ||||
| 	if (!inode) | ||||
| 		goto eexit_1; | ||||
|  | ||||
| 	inode->i_fop = fops; | ||||
|  | ||||
| 	/* | ||||
| 	 * Mark the inode dirty from the very beginning, | ||||
| 	 * that way it will never be moved to the dirty | ||||
| 	 * list because mark_inode_dirty() will think | ||||
| 	 * that it already _is_ on the dirty list. | ||||
| 	 */ | ||||
| 	inode->i_state = I_DIRTY; | ||||
| 	inode->i_mode = S_IRUSR | S_IWUSR; | ||||
| 	inode->i_uid = current->fsuid; | ||||
| 	inode->i_gid = current->fsgid; | ||||
| 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||||
| 	return inode; | ||||
|  | ||||
| eexit_1: | ||||
| 	return ERR_PTR(error); | ||||
| } | ||||
|  | ||||
| static struct file *kvmfs_file(struct inode *inode, void *private_data) | ||||
| { | ||||
| 	struct file *file = get_empty_filp(); | ||||
|  | ||||
| 	if (!file) | ||||
| 		return ERR_PTR(-ENFILE); | ||||
|  | ||||
| 	file->f_path.mnt = mntget(kvmfs_mnt); | ||||
| 	file->f_path.dentry = d_alloc_anon(inode); | ||||
| 	if (!file->f_path.dentry) | ||||
| 		return ERR_PTR(-ENOMEM); | ||||
| 	file->f_mapping = inode->i_mapping; | ||||
|  | ||||
| 	file->f_pos = 0; | ||||
| 	file->f_flags = O_RDWR; | ||||
| 	file->f_op = inode->i_fop; | ||||
| 	file->f_mode = FMODE_READ | FMODE_WRITE; | ||||
| 	file->f_version = 0; | ||||
| 	file->private_data = private_data; | ||||
| 	return file; | ||||
| } | ||||
|  | ||||
| unsigned long segment_base(u16 selector) | ||||
| { | ||||
| 	struct descriptor_table gdt; | ||||
| @@ -307,6 +261,48 @@ static void vcpu_put(struct kvm_vcpu *vcpu) | ||||
| 	mutex_unlock(&vcpu->mutex); | ||||
| } | ||||
|  | ||||
| static void ack_flush(void *_completed) | ||||
| { | ||||
| 	atomic_t *completed = _completed; | ||||
|  | ||||
| 	atomic_inc(completed); | ||||
| } | ||||
|  | ||||
| void kvm_flush_remote_tlbs(struct kvm *kvm) | ||||
| { | ||||
| 	int i, cpu, needed; | ||||
| 	cpumask_t cpus; | ||||
| 	struct kvm_vcpu *vcpu; | ||||
| 	atomic_t completed; | ||||
|  | ||||
| 	atomic_set(&completed, 0); | ||||
| 	cpus_clear(cpus); | ||||
| 	needed = 0; | ||||
| 	for (i = 0; i < kvm->nvcpus; ++i) { | ||||
| 		vcpu = &kvm->vcpus[i]; | ||||
| 		if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) | ||||
| 			continue; | ||||
| 		cpu = vcpu->cpu; | ||||
| 		if (cpu != -1 && cpu != raw_smp_processor_id()) | ||||
| 			if (!cpu_isset(cpu, cpus)) { | ||||
| 				cpu_set(cpu, cpus); | ||||
| 				++needed; | ||||
| 			} | ||||
| 	} | ||||
|  | ||||
| 	/* | ||||
| 	 * We really want smp_call_function_mask() here.  But that's not | ||||
| 	 * available, so ipi all cpus in parallel and wait for them | ||||
| 	 * to complete. | ||||
| 	 */ | ||||
| 	for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus)) | ||||
| 		smp_call_function_single(cpu, ack_flush, &completed, 1, 0); | ||||
| 	while (atomic_read(&completed) != needed) { | ||||
| 		cpu_relax(); | ||||
| 		barrier(); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| static struct kvm *kvm_create_vm(void) | ||||
| { | ||||
| 	struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); | ||||
| @@ -315,8 +311,13 @@ static struct kvm *kvm_create_vm(void) | ||||
| 	if (!kvm) | ||||
| 		return ERR_PTR(-ENOMEM); | ||||
|  | ||||
| 	kvm_io_bus_init(&kvm->pio_bus); | ||||
| 	spin_lock_init(&kvm->lock); | ||||
| 	INIT_LIST_HEAD(&kvm->active_mmu_pages); | ||||
| 	spin_lock(&kvm_lock); | ||||
| 	list_add(&kvm->vm_list, &vm_list); | ||||
| 	spin_unlock(&kvm_lock); | ||||
| 	kvm_io_bus_init(&kvm->mmio_bus); | ||||
| 	for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||||
| 		struct kvm_vcpu *vcpu = &kvm->vcpus[i]; | ||||
|  | ||||
| @@ -324,10 +325,6 @@ static struct kvm *kvm_create_vm(void) | ||||
| 		vcpu->cpu = -1; | ||||
| 		vcpu->kvm = kvm; | ||||
| 		vcpu->mmu.root_hpa = INVALID_PAGE; | ||||
| 		INIT_LIST_HEAD(&vcpu->free_pages); | ||||
| 		spin_lock(&kvm_lock); | ||||
| 		list_add(&kvm->vm_list, &vm_list); | ||||
| 		spin_unlock(&kvm_lock); | ||||
| 	} | ||||
| 	return kvm; | ||||
| } | ||||
| @@ -380,6 +377,16 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) | ||||
| 		} | ||||
| } | ||||
|  | ||||
| static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (!vcpu->vmcs) | ||||
| 		return; | ||||
|  | ||||
| 	vcpu_load(vcpu); | ||||
| 	kvm_mmu_unload(vcpu); | ||||
| 	vcpu_put(vcpu); | ||||
| } | ||||
|  | ||||
| static void kvm_free_vcpu(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (!vcpu->vmcs) | ||||
| @@ -400,6 +407,11 @@ static void kvm_free_vcpus(struct kvm *kvm) | ||||
| { | ||||
| 	unsigned int i; | ||||
|  | ||||
| 	/* | ||||
| 	 * Unpin any mmu pages first. | ||||
| 	 */ | ||||
| 	for (i = 0; i < KVM_MAX_VCPUS; ++i) | ||||
| 		kvm_unload_vcpu_mmu(&kvm->vcpus[i]); | ||||
| 	for (i = 0; i < KVM_MAX_VCPUS; ++i) | ||||
| 		kvm_free_vcpu(&kvm->vcpus[i]); | ||||
| } | ||||
| @@ -414,6 +426,8 @@ static void kvm_destroy_vm(struct kvm *kvm) | ||||
| 	spin_lock(&kvm_lock); | ||||
| 	list_del(&kvm->vm_list); | ||||
| 	spin_unlock(&kvm_lock); | ||||
| 	kvm_io_bus_destroy(&kvm->pio_bus); | ||||
| 	kvm_io_bus_destroy(&kvm->mmio_bus); | ||||
| 	kvm_free_vcpus(kvm); | ||||
| 	kvm_free_physmem(kvm); | ||||
| 	kfree(kvm); | ||||
| @@ -969,7 +983,7 @@ EXPORT_SYMBOL_GPL(gfn_to_page); | ||||
| void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | ||||
| { | ||||
| 	int i; | ||||
| 	struct kvm_memory_slot *memslot = NULL; | ||||
| 	struct kvm_memory_slot *memslot; | ||||
| 	unsigned long rel_gfn; | ||||
|  | ||||
| 	for (i = 0; i < kvm->nmemslots; ++i) { | ||||
| @@ -978,7 +992,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | ||||
| 		if (gfn >= memslot->base_gfn | ||||
| 		    && gfn < memslot->base_gfn + memslot->npages) { | ||||
|  | ||||
| 			if (!memslot || !memslot->dirty_bitmap) | ||||
| 			if (!memslot->dirty_bitmap) | ||||
| 				return; | ||||
|  | ||||
| 			rel_gfn = gfn - memslot->base_gfn; | ||||
| @@ -1037,12 +1051,31 @@ static int emulator_write_std(unsigned long addr, | ||||
| 	return X86EMUL_UNHANDLEABLE; | ||||
| } | ||||
|  | ||||
| static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, | ||||
| 						gpa_t addr) | ||||
| { | ||||
| 	/* | ||||
| 	 * Note that its important to have this wrapper function because | ||||
| 	 * in the very near future we will be checking for MMIOs against | ||||
| 	 * the LAPIC as well as the general MMIO bus | ||||
| 	 */ | ||||
| 	return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); | ||||
| } | ||||
|  | ||||
| static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, | ||||
| 					       gpa_t addr) | ||||
| { | ||||
| 	return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr); | ||||
| } | ||||
|  | ||||
| static int emulator_read_emulated(unsigned long addr, | ||||
| 				  void *val, | ||||
| 				  unsigned int bytes, | ||||
| 				  struct x86_emulate_ctxt *ctxt) | ||||
| { | ||||
| 	struct kvm_vcpu *vcpu = ctxt->vcpu; | ||||
| 	struct kvm_vcpu      *vcpu = ctxt->vcpu; | ||||
| 	struct kvm_io_device *mmio_dev; | ||||
| 	gpa_t                 gpa; | ||||
|  | ||||
| 	if (vcpu->mmio_read_completed) { | ||||
| 		memcpy(val, vcpu->mmio_data, bytes); | ||||
| @@ -1051,18 +1084,26 @@ static int emulator_read_emulated(unsigned long addr, | ||||
| 	} else if (emulator_read_std(addr, val, bytes, ctxt) | ||||
| 		   == X86EMUL_CONTINUE) | ||||
| 		return X86EMUL_CONTINUE; | ||||
| 	else { | ||||
| 		gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); | ||||
|  | ||||
| 		if (gpa == UNMAPPED_GVA) | ||||
| 			return X86EMUL_PROPAGATE_FAULT; | ||||
| 		vcpu->mmio_needed = 1; | ||||
| 		vcpu->mmio_phys_addr = gpa; | ||||
| 		vcpu->mmio_size = bytes; | ||||
| 		vcpu->mmio_is_write = 0; | ||||
| 	gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); | ||||
| 	if (gpa == UNMAPPED_GVA) | ||||
| 		return X86EMUL_PROPAGATE_FAULT; | ||||
|  | ||||
| 		return X86EMUL_UNHANDLEABLE; | ||||
| 	/* | ||||
| 	 * Is this MMIO handled locally? | ||||
| 	 */ | ||||
| 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); | ||||
| 	if (mmio_dev) { | ||||
| 		kvm_iodevice_read(mmio_dev, gpa, bytes, val); | ||||
| 		return X86EMUL_CONTINUE; | ||||
| 	} | ||||
|  | ||||
| 	vcpu->mmio_needed = 1; | ||||
| 	vcpu->mmio_phys_addr = gpa; | ||||
| 	vcpu->mmio_size = bytes; | ||||
| 	vcpu->mmio_is_write = 0; | ||||
|  | ||||
| 	return X86EMUL_UNHANDLEABLE; | ||||
| } | ||||
|  | ||||
| static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | ||||
| @@ -1070,18 +1111,20 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | ||||
| { | ||||
| 	struct page *page; | ||||
| 	void *virt; | ||||
| 	unsigned offset = offset_in_page(gpa); | ||||
|  | ||||
| 	if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) | ||||
| 		return 0; | ||||
| 	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||||
| 	if (!page) | ||||
| 		return 0; | ||||
| 	kvm_mmu_pre_write(vcpu, gpa, bytes); | ||||
| 	mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); | ||||
| 	virt = kmap_atomic(page, KM_USER0); | ||||
| 	memcpy(virt + offset_in_page(gpa), val, bytes); | ||||
| 	if (memcmp(virt + offset_in_page(gpa), val, bytes)) { | ||||
| 		kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); | ||||
| 		memcpy(virt + offset_in_page(gpa), val, bytes); | ||||
| 	} | ||||
| 	kunmap_atomic(virt, KM_USER0); | ||||
| 	kvm_mmu_post_write(vcpu, gpa, bytes); | ||||
| 	return 1; | ||||
| } | ||||
|  | ||||
| @@ -1090,8 +1133,9 @@ static int emulator_write_emulated(unsigned long addr, | ||||
| 				   unsigned int bytes, | ||||
| 				   struct x86_emulate_ctxt *ctxt) | ||||
| { | ||||
| 	struct kvm_vcpu *vcpu = ctxt->vcpu; | ||||
| 	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); | ||||
| 	struct kvm_vcpu      *vcpu = ctxt->vcpu; | ||||
| 	struct kvm_io_device *mmio_dev; | ||||
| 	gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); | ||||
|  | ||||
| 	if (gpa == UNMAPPED_GVA) { | ||||
| 		kvm_arch_ops->inject_page_fault(vcpu, addr, 2); | ||||
| @@ -1101,6 +1145,15 @@ static int emulator_write_emulated(unsigned long addr, | ||||
| 	if (emulator_write_phys(vcpu, gpa, val, bytes)) | ||||
| 		return X86EMUL_CONTINUE; | ||||
|  | ||||
| 	/* | ||||
| 	 * Is this MMIO handled locally? | ||||
| 	 */ | ||||
| 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); | ||||
| 	if (mmio_dev) { | ||||
| 		kvm_iodevice_write(mmio_dev, gpa, bytes, val); | ||||
| 		return X86EMUL_CONTINUE; | ||||
| 	} | ||||
|  | ||||
| 	vcpu->mmio_needed = 1; | ||||
| 	vcpu->mmio_phys_addr = gpa; | ||||
| 	vcpu->mmio_size = bytes; | ||||
| @@ -1269,6 +1322,17 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(emulate_instruction); | ||||
|  | ||||
| int kvm_emulate_halt(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	if (vcpu->irq_summary) | ||||
| 		return 1; | ||||
|  | ||||
| 	vcpu->run->exit_reason = KVM_EXIT_HLT; | ||||
| 	++vcpu->stat.halt_exits; | ||||
| 	return 0; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_emulate_halt); | ||||
|  | ||||
| int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) | ||||
| { | ||||
| 	unsigned long nr, a0, a1, a2, a3, a4, a5, ret; | ||||
| @@ -1469,6 +1533,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||||
| 	case MSR_IA32_MC0_MISC+16: | ||||
| 	case MSR_IA32_UCODE_REV: | ||||
| 	case MSR_IA32_PERF_STATUS: | ||||
| 	case MSR_IA32_EBL_CR_POWERON: | ||||
| 		/* MTRR registers */ | ||||
| 	case 0xfe: | ||||
| 	case 0x200 ... 0x2ff: | ||||
| @@ -1727,6 +1792,20 @@ static int complete_pio(struct kvm_vcpu *vcpu) | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| void kernel_pio(struct kvm_io_device *pio_dev, struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	/* TODO: String I/O for in kernel device */ | ||||
|  | ||||
| 	if (vcpu->pio.in) | ||||
| 		kvm_iodevice_read(pio_dev, vcpu->pio.port, | ||||
| 				  vcpu->pio.size, | ||||
| 				  vcpu->pio_data); | ||||
| 	else | ||||
| 		kvm_iodevice_write(pio_dev, vcpu->pio.port, | ||||
| 				   vcpu->pio.size, | ||||
| 				   vcpu->pio_data); | ||||
| } | ||||
|  | ||||
| int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | ||||
| 		  int size, unsigned long count, int string, int down, | ||||
| 		  gva_t address, int rep, unsigned port) | ||||
| @@ -1735,6 +1814,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | ||||
| 	int i; | ||||
| 	int nr_pages = 1; | ||||
| 	struct page *page; | ||||
| 	struct kvm_io_device *pio_dev; | ||||
|  | ||||
| 	vcpu->run->exit_reason = KVM_EXIT_IO; | ||||
| 	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||||
| @@ -1746,17 +1826,27 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | ||||
| 	vcpu->pio.cur_count = count; | ||||
| 	vcpu->pio.size = size; | ||||
| 	vcpu->pio.in = in; | ||||
| 	vcpu->pio.port = port; | ||||
| 	vcpu->pio.string = string; | ||||
| 	vcpu->pio.down = down; | ||||
| 	vcpu->pio.guest_page_offset = offset_in_page(address); | ||||
| 	vcpu->pio.rep = rep; | ||||
|  | ||||
| 	pio_dev = vcpu_find_pio_dev(vcpu, port); | ||||
| 	if (!string) { | ||||
| 		kvm_arch_ops->cache_regs(vcpu); | ||||
| 		memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); | ||||
| 		kvm_arch_ops->decache_regs(vcpu); | ||||
| 		if (pio_dev) { | ||||
| 			kernel_pio(pio_dev, vcpu); | ||||
| 			complete_pio(vcpu); | ||||
| 			return 1; | ||||
| 		} | ||||
| 		return 0; | ||||
| 	} | ||||
| 	/* TODO: String I/O for in kernel device */ | ||||
| 	if (pio_dev) | ||||
| 		printk(KERN_ERR "kvm_setup_pio: no string io support\n"); | ||||
|  | ||||
| 	if (!count) { | ||||
| 		kvm_arch_ops->skip_emulated_instruction(vcpu); | ||||
| @@ -2273,34 +2363,12 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) | ||||
| 	struct inode *inode; | ||||
| 	struct file *file; | ||||
|  | ||||
| 	r = anon_inode_getfd(&fd, &inode, &file, | ||||
| 			     "kvm-vcpu", &kvm_vcpu_fops, vcpu); | ||||
| 	if (r) | ||||
| 		return r; | ||||
| 	atomic_inc(&vcpu->kvm->filp->f_count); | ||||
| 	inode = kvmfs_inode(&kvm_vcpu_fops); | ||||
| 	if (IS_ERR(inode)) { | ||||
| 		r = PTR_ERR(inode); | ||||
| 		goto out1; | ||||
| 	} | ||||
|  | ||||
| 	file = kvmfs_file(inode, vcpu); | ||||
| 	if (IS_ERR(file)) { | ||||
| 		r = PTR_ERR(file); | ||||
| 		goto out2; | ||||
| 	} | ||||
|  | ||||
| 	r = get_unused_fd(); | ||||
| 	if (r < 0) | ||||
| 		goto out3; | ||||
| 	fd = r; | ||||
| 	fd_install(fd, file); | ||||
|  | ||||
| 	return fd; | ||||
|  | ||||
| out3: | ||||
| 	fput(file); | ||||
| out2: | ||||
| 	iput(inode); | ||||
| out1: | ||||
| 	fput(vcpu->kvm->filp); | ||||
| 	return r; | ||||
| } | ||||
|  | ||||
| /* | ||||
| @@ -2363,6 +2431,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | ||||
| 	if (r < 0) | ||||
| 		goto out_free_vcpus; | ||||
|  | ||||
| 	spin_lock(&kvm_lock); | ||||
| 	if (n >= kvm->nvcpus) | ||||
| 		kvm->nvcpus = n + 1; | ||||
| 	spin_unlock(&kvm_lock); | ||||
|  | ||||
| 	return r; | ||||
|  | ||||
| out_free_vcpus: | ||||
| @@ -2376,6 +2449,27 @@ out: | ||||
| 	return r; | ||||
| } | ||||
|  | ||||
| static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	u64 efer; | ||||
| 	int i; | ||||
| 	struct kvm_cpuid_entry *e, *entry; | ||||
|  | ||||
| 	rdmsrl(MSR_EFER, efer); | ||||
| 	entry = NULL; | ||||
| 	for (i = 0; i < vcpu->cpuid_nent; ++i) { | ||||
| 		e = &vcpu->cpuid_entries[i]; | ||||
| 		if (e->function == 0x80000001) { | ||||
| 			entry = e; | ||||
| 			break; | ||||
| 		} | ||||
| 	} | ||||
| 	if (entry && (entry->edx & EFER_NX) && !(efer & EFER_NX)) { | ||||
| 		entry->edx &= ~(1 << 20); | ||||
| 		printk(KERN_INFO ": guest NX capability removed\n"); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||||
| 				    struct kvm_cpuid *cpuid, | ||||
| 				    struct kvm_cpuid_entry __user *entries) | ||||
| @@ -2390,6 +2484,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | ||||
| 			   cpuid->nent * sizeof(struct kvm_cpuid_entry))) | ||||
| 		goto out; | ||||
| 	vcpu->cpuid_nent = cpuid->nent; | ||||
| 	cpuid_fix_nx_cap(vcpu); | ||||
| 	return 0; | ||||
|  | ||||
| out: | ||||
| @@ -2738,41 +2833,18 @@ static int kvm_dev_ioctl_create_vm(void) | ||||
| 	struct file *file; | ||||
| 	struct kvm *kvm; | ||||
|  | ||||
| 	inode = kvmfs_inode(&kvm_vm_fops); | ||||
| 	if (IS_ERR(inode)) { | ||||
| 		r = PTR_ERR(inode); | ||||
| 		goto out1; | ||||
| 	} | ||||
|  | ||||
| 	kvm = kvm_create_vm(); | ||||
| 	if (IS_ERR(kvm)) { | ||||
| 		r = PTR_ERR(kvm); | ||||
| 		goto out2; | ||||
| 	if (IS_ERR(kvm)) | ||||
| 		return PTR_ERR(kvm); | ||||
| 	r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm); | ||||
| 	if (r) { | ||||
| 		kvm_destroy_vm(kvm); | ||||
| 		return r; | ||||
| 	} | ||||
|  | ||||
| 	file = kvmfs_file(inode, kvm); | ||||
| 	if (IS_ERR(file)) { | ||||
| 		r = PTR_ERR(file); | ||||
| 		goto out3; | ||||
| 	} | ||||
| 	kvm->filp = file; | ||||
|  | ||||
| 	r = get_unused_fd(); | ||||
| 	if (r < 0) | ||||
| 		goto out4; | ||||
| 	fd = r; | ||||
| 	fd_install(fd, file); | ||||
|  | ||||
| 	return fd; | ||||
|  | ||||
| out4: | ||||
| 	fput(file); | ||||
| out3: | ||||
| 	kvm_destroy_vm(kvm); | ||||
| out2: | ||||
| 	iput(inode); | ||||
| out1: | ||||
| 	return r; | ||||
| } | ||||
|  | ||||
| static long kvm_dev_ioctl(struct file *filp, | ||||
| @@ -2862,7 +2934,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | ||||
| 		 * in vmx root mode. | ||||
| 		 */ | ||||
| 		printk(KERN_INFO "kvm: exiting hardware virtualization\n"); | ||||
| 		on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); | ||||
| 		on_each_cpu(hardware_disable, NULL, 0, 1); | ||||
| 	} | ||||
| 	return NOTIFY_OK; | ||||
| } | ||||
| @@ -2905,33 +2977,88 @@ static void decache_vcpus_on_cpu(int cpu) | ||||
| 	spin_unlock(&kvm_lock); | ||||
| } | ||||
|  | ||||
| static void hardware_enable(void *junk) | ||||
| { | ||||
| 	int cpu = raw_smp_processor_id(); | ||||
|  | ||||
| 	if (cpu_isset(cpu, cpus_hardware_enabled)) | ||||
| 		return; | ||||
| 	cpu_set(cpu, cpus_hardware_enabled); | ||||
| 	kvm_arch_ops->hardware_enable(NULL); | ||||
| } | ||||
|  | ||||
| static void hardware_disable(void *junk) | ||||
| { | ||||
| 	int cpu = raw_smp_processor_id(); | ||||
|  | ||||
| 	if (!cpu_isset(cpu, cpus_hardware_enabled)) | ||||
| 		return; | ||||
| 	cpu_clear(cpu, cpus_hardware_enabled); | ||||
| 	decache_vcpus_on_cpu(cpu); | ||||
| 	kvm_arch_ops->hardware_disable(NULL); | ||||
| } | ||||
|  | ||||
| static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | ||||
| 			   void *v) | ||||
| { | ||||
| 	int cpu = (long)v; | ||||
|  | ||||
| 	switch (val) { | ||||
| 	case CPU_DOWN_PREPARE: | ||||
| 	case CPU_DOWN_PREPARE_FROZEN: | ||||
| 	case CPU_DYING: | ||||
| 	case CPU_DYING_FROZEN: | ||||
| 	case CPU_UP_CANCELED: | ||||
| 	case CPU_UP_CANCELED_FROZEN: | ||||
| 		printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", | ||||
| 		       cpu); | ||||
| 		decache_vcpus_on_cpu(cpu); | ||||
| 		smp_call_function_single(cpu, kvm_arch_ops->hardware_disable, | ||||
| 					 NULL, 0, 1); | ||||
| 		smp_call_function_single(cpu, hardware_disable, NULL, 0, 1); | ||||
| 		break; | ||||
| 	case CPU_ONLINE: | ||||
| 	case CPU_ONLINE_FROZEN: | ||||
| 		printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", | ||||
| 		       cpu); | ||||
| 		smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, | ||||
| 					 NULL, 0, 1); | ||||
| 		smp_call_function_single(cpu, hardware_enable, NULL, 0, 1); | ||||
| 		break; | ||||
| 	} | ||||
| 	return NOTIFY_OK; | ||||
| } | ||||
|  | ||||
| void kvm_io_bus_init(struct kvm_io_bus *bus) | ||||
| { | ||||
| 	memset(bus, 0, sizeof(*bus)); | ||||
| } | ||||
|  | ||||
| void kvm_io_bus_destroy(struct kvm_io_bus *bus) | ||||
| { | ||||
| 	int i; | ||||
|  | ||||
| 	for (i = 0; i < bus->dev_count; i++) { | ||||
| 		struct kvm_io_device *pos = bus->devs[i]; | ||||
|  | ||||
| 		kvm_iodevice_destructor(pos); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr) | ||||
| { | ||||
| 	int i; | ||||
|  | ||||
| 	for (i = 0; i < bus->dev_count; i++) { | ||||
| 		struct kvm_io_device *pos = bus->devs[i]; | ||||
|  | ||||
| 		if (pos->in_range(pos, addr)) | ||||
| 			return pos; | ||||
| 	} | ||||
|  | ||||
| 	return NULL; | ||||
| } | ||||
|  | ||||
| void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev) | ||||
| { | ||||
| 	BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1)); | ||||
|  | ||||
| 	bus->devs[bus->dev_count++] = dev; | ||||
| } | ||||
|  | ||||
| static struct notifier_block kvm_cpu_notifier = { | ||||
| 	.notifier_call = kvm_cpu_hotplug, | ||||
| 	.priority = 20, /* must be > scheduler priority */ | ||||
| @@ -2983,14 +3110,13 @@ static void kvm_exit_debug(void) | ||||
|  | ||||
| static int kvm_suspend(struct sys_device *dev, pm_message_t state) | ||||
| { | ||||
| 	decache_vcpus_on_cpu(raw_smp_processor_id()); | ||||
| 	on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); | ||||
| 	hardware_disable(NULL); | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static int kvm_resume(struct sys_device *dev) | ||||
| { | ||||
| 	on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); | ||||
| 	hardware_enable(NULL); | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| @@ -3007,18 +3133,6 @@ static struct sys_device kvm_sysdev = { | ||||
|  | ||||
| hpa_t bad_page_address; | ||||
|  | ||||
| static int kvmfs_get_sb(struct file_system_type *fs_type, int flags, | ||||
| 			const char *dev_name, void *data, struct vfsmount *mnt) | ||||
| { | ||||
| 	return get_sb_pseudo(fs_type, "kvm:", NULL, KVMFS_SUPER_MAGIC, mnt); | ||||
| } | ||||
|  | ||||
| static struct file_system_type kvm_fs_type = { | ||||
| 	.name		= "kvmfs", | ||||
| 	.get_sb		= kvmfs_get_sb, | ||||
| 	.kill_sb	= kill_anon_super, | ||||
| }; | ||||
|  | ||||
| int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) | ||||
| { | ||||
| 	int r; | ||||
| @@ -3043,7 +3157,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) | ||||
| 	if (r < 0) | ||||
| 		goto out; | ||||
|  | ||||
| 	on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1); | ||||
| 	on_each_cpu(hardware_enable, NULL, 0, 1); | ||||
| 	r = register_cpu_notifier(&kvm_cpu_notifier); | ||||
| 	if (r) | ||||
| 		goto out_free_1; | ||||
| @@ -3075,7 +3189,7 @@ out_free_2: | ||||
| 	unregister_reboot_notifier(&kvm_reboot_notifier); | ||||
| 	unregister_cpu_notifier(&kvm_cpu_notifier); | ||||
| out_free_1: | ||||
| 	on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); | ||||
| 	on_each_cpu(hardware_disable, NULL, 0, 1); | ||||
| 	kvm_arch_ops->hardware_unsetup(); | ||||
| out: | ||||
| 	kvm_arch_ops = NULL; | ||||
| @@ -3089,7 +3203,7 @@ void kvm_exit_arch(void) | ||||
| 	sysdev_class_unregister(&kvm_sysdev_class); | ||||
| 	unregister_reboot_notifier(&kvm_reboot_notifier); | ||||
| 	unregister_cpu_notifier(&kvm_cpu_notifier); | ||||
| 	on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1); | ||||
| 	on_each_cpu(hardware_disable, NULL, 0, 1); | ||||
| 	kvm_arch_ops->hardware_unsetup(); | ||||
| 	kvm_arch_ops = NULL; | ||||
| } | ||||
| @@ -3103,14 +3217,6 @@ static __init int kvm_init(void) | ||||
| 	if (r) | ||||
| 		goto out4; | ||||
|  | ||||
| 	r = register_filesystem(&kvm_fs_type); | ||||
| 	if (r) | ||||
| 		goto out3; | ||||
|  | ||||
| 	kvmfs_mnt = kern_mount(&kvm_fs_type); | ||||
| 	r = PTR_ERR(kvmfs_mnt); | ||||
| 	if (IS_ERR(kvmfs_mnt)) | ||||
| 		goto out2; | ||||
| 	kvm_init_debug(); | ||||
|  | ||||
| 	kvm_init_msr_list(); | ||||
| @@ -3127,10 +3233,6 @@ static __init int kvm_init(void) | ||||
|  | ||||
| out: | ||||
| 	kvm_exit_debug(); | ||||
| 	mntput(kvmfs_mnt); | ||||
| out2: | ||||
| 	unregister_filesystem(&kvm_fs_type); | ||||
| out3: | ||||
| 	kvm_mmu_module_exit(); | ||||
| out4: | ||||
| 	return r; | ||||
| @@ -3140,8 +3242,6 @@ static __exit void kvm_exit(void) | ||||
| { | ||||
| 	kvm_exit_debug(); | ||||
| 	__free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); | ||||
| 	mntput(kvmfs_mnt); | ||||
| 	unregister_filesystem(&kvm_fs_type); | ||||
| 	kvm_mmu_module_exit(); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -16,15 +16,18 @@ | ||||
|  * the COPYING file in the top-level directory. | ||||
|  * | ||||
|  */ | ||||
|  | ||||
| #include "vmx.h" | ||||
| #include "kvm.h" | ||||
|  | ||||
| #include <linux/types.h> | ||||
| #include <linux/string.h> | ||||
| #include <asm/page.h> | ||||
| #include <linux/mm.h> | ||||
| #include <linux/highmem.h> | ||||
| #include <linux/module.h> | ||||
|  | ||||
| #include "vmx.h" | ||||
| #include "kvm.h" | ||||
| #include <asm/page.h> | ||||
| #include <asm/cmpxchg.h> | ||||
|  | ||||
| #undef MMU_DEBUG | ||||
|  | ||||
| @@ -90,25 +93,11 @@ static int dbg = 1; | ||||
| #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | ||||
|  | ||||
|  | ||||
| #define PT32_PTE_COPY_MASK \ | ||||
| 	(PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_GLOBAL_MASK) | ||||
|  | ||||
| #define PT64_PTE_COPY_MASK (PT64_NX_MASK | PT32_PTE_COPY_MASK) | ||||
|  | ||||
| #define PT_FIRST_AVAIL_BITS_SHIFT 9 | ||||
| #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | ||||
|  | ||||
| #define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||||
| #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||||
|  | ||||
| #define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1) | ||||
| #define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT) | ||||
|  | ||||
| #define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1) | ||||
| #define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT)) | ||||
|  | ||||
| #define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT) | ||||
|  | ||||
| #define VALID_PAGE(x) ((x) != INVALID_PAGE) | ||||
|  | ||||
| #define PT64_LEVEL_BITS 9 | ||||
| @@ -165,6 +154,8 @@ struct kvm_rmap_desc { | ||||
|  | ||||
| static struct kmem_cache *pte_chain_cache; | ||||
| static struct kmem_cache *rmap_desc_cache; | ||||
| static struct kmem_cache *mmu_page_cache; | ||||
| static struct kmem_cache *mmu_page_header_cache; | ||||
|  | ||||
| static int is_write_protection(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| @@ -202,6 +193,15 @@ static int is_rmap_pte(u64 pte) | ||||
| 		== (PT_WRITABLE_MASK | PT_PRESENT_MASK); | ||||
| } | ||||
|  | ||||
| static void set_shadow_pte(u64 *sptep, u64 spte) | ||||
| { | ||||
| #ifdef CONFIG_X86_64 | ||||
| 	set_64bit((unsigned long *)sptep, spte); | ||||
| #else | ||||
| 	set_64bit((unsigned long long *)sptep, spte); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | ||||
| 				  struct kmem_cache *base_cache, int min, | ||||
| 				  gfp_t gfp_flags) | ||||
| @@ -235,6 +235,14 @@ static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) | ||||
| 		goto out; | ||||
| 	r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, | ||||
| 				   rmap_desc_cache, 1, gfp_flags); | ||||
| 	if (r) | ||||
| 		goto out; | ||||
| 	r = mmu_topup_memory_cache(&vcpu->mmu_page_cache, | ||||
| 				   mmu_page_cache, 4, gfp_flags); | ||||
| 	if (r) | ||||
| 		goto out; | ||||
| 	r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, | ||||
| 				   mmu_page_header_cache, 4, gfp_flags); | ||||
| out: | ||||
| 	return r; | ||||
| } | ||||
| @@ -258,6 +266,8 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); | ||||
| 	mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); | ||||
| 	mmu_free_memory_cache(&vcpu->mmu_page_cache); | ||||
| 	mmu_free_memory_cache(&vcpu->mmu_page_header_cache); | ||||
| } | ||||
|  | ||||
| static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | ||||
| @@ -433,19 +443,18 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) | ||||
| 		BUG_ON(!(*spte & PT_WRITABLE_MASK)); | ||||
| 		rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | ||||
| 		rmap_remove(vcpu, spte); | ||||
| 		kvm_arch_ops->tlb_flush(vcpu); | ||||
| 		*spte &= ~(u64)PT_WRITABLE_MASK; | ||||
| 		set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); | ||||
| 		kvm_flush_remote_tlbs(vcpu->kvm); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| #ifdef MMU_DEBUG | ||||
| static int is_empty_shadow_page(hpa_t page_hpa) | ||||
| static int is_empty_shadow_page(u64 *spt) | ||||
| { | ||||
| 	u64 *pos; | ||||
| 	u64 *end; | ||||
|  | ||||
| 	for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64); | ||||
| 		      pos != end; pos++) | ||||
| 	for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) | ||||
| 		if (*pos != 0) { | ||||
| 			printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, | ||||
| 			       pos, *pos); | ||||
| @@ -455,13 +464,13 @@ static int is_empty_shadow_page(hpa_t page_hpa) | ||||
| } | ||||
| #endif | ||||
|  | ||||
| static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) | ||||
| static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, | ||||
| 			      struct kvm_mmu_page *page_head) | ||||
| { | ||||
| 	struct kvm_mmu_page *page_head = page_header(page_hpa); | ||||
|  | ||||
| 	ASSERT(is_empty_shadow_page(page_hpa)); | ||||
| 	page_head->page_hpa = page_hpa; | ||||
| 	list_move(&page_head->link, &vcpu->free_pages); | ||||
| 	ASSERT(is_empty_shadow_page(page_head->spt)); | ||||
| 	list_del(&page_head->link); | ||||
| 	mmu_memory_cache_free(&vcpu->mmu_page_cache, page_head->spt); | ||||
| 	mmu_memory_cache_free(&vcpu->mmu_page_header_cache, page_head); | ||||
| 	++vcpu->kvm->n_free_mmu_pages; | ||||
| } | ||||
|  | ||||
| @@ -475,12 +484,15 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | ||||
| { | ||||
| 	struct kvm_mmu_page *page; | ||||
|  | ||||
| 	if (list_empty(&vcpu->free_pages)) | ||||
| 	if (!vcpu->kvm->n_free_mmu_pages) | ||||
| 		return NULL; | ||||
|  | ||||
| 	page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); | ||||
| 	list_move(&page->link, &vcpu->kvm->active_mmu_pages); | ||||
| 	ASSERT(is_empty_shadow_page(page->page_hpa)); | ||||
| 	page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache, | ||||
| 				      sizeof *page); | ||||
| 	page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE); | ||||
| 	set_page_private(virt_to_page(page->spt), (unsigned long)page); | ||||
| 	list_add(&page->link, &vcpu->kvm->active_mmu_pages); | ||||
| 	ASSERT(is_empty_shadow_page(page->spt)); | ||||
| 	page->slot_bitmap = 0; | ||||
| 	page->multimapped = 0; | ||||
| 	page->parent_pte = parent_pte; | ||||
| @@ -638,7 +650,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, | ||||
| 	u64 *pt; | ||||
| 	u64 ent; | ||||
|  | ||||
| 	pt = __va(page->page_hpa); | ||||
| 	pt = page->spt; | ||||
|  | ||||
| 	if (page->role.level == PT_PAGE_TABLE_LEVEL) { | ||||
| 		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||||
| @@ -646,7 +658,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, | ||||
| 				rmap_remove(vcpu, &pt[i]); | ||||
| 			pt[i] = 0; | ||||
| 		} | ||||
| 		kvm_arch_ops->tlb_flush(vcpu); | ||||
| 		kvm_flush_remote_tlbs(vcpu->kvm); | ||||
| 		return; | ||||
| 	} | ||||
|  | ||||
| @@ -659,6 +671,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, | ||||
| 		ent &= PT64_BASE_ADDR_MASK; | ||||
| 		mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); | ||||
| 	} | ||||
| 	kvm_flush_remote_tlbs(vcpu->kvm); | ||||
| } | ||||
|  | ||||
| static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, | ||||
| @@ -685,12 +698,12 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, | ||||
| 		} | ||||
| 		BUG_ON(!parent_pte); | ||||
| 		kvm_mmu_put_page(vcpu, page, parent_pte); | ||||
| 		*parent_pte = 0; | ||||
| 		set_shadow_pte(parent_pte, 0); | ||||
| 	} | ||||
| 	kvm_mmu_page_unlink_children(vcpu, page); | ||||
| 	if (!page->root_count) { | ||||
| 		hlist_del(&page->hash_link); | ||||
| 		kvm_mmu_free_page(vcpu, page->page_hpa); | ||||
| 		kvm_mmu_free_page(vcpu, page); | ||||
| 	} else | ||||
| 		list_move(&page->link, &vcpu->kvm->active_mmu_pages); | ||||
| } | ||||
| @@ -717,6 +730,17 @@ static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) | ||||
| 	return r; | ||||
| } | ||||
|  | ||||
| static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn) | ||||
| { | ||||
| 	struct kvm_mmu_page *page; | ||||
|  | ||||
| 	while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { | ||||
| 		pgprintk("%s: zap %lx %x\n", | ||||
| 			 __FUNCTION__, gfn, page->role.word); | ||||
| 		kvm_mmu_zap_page(vcpu, page); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) | ||||
| { | ||||
| 	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); | ||||
| @@ -805,7 +829,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | ||||
| 				return -ENOMEM; | ||||
| 			} | ||||
|  | ||||
| 			table[index] = new_table->page_hpa | PT_PRESENT_MASK | ||||
| 			table[index] = __pa(new_table->spt) | PT_PRESENT_MASK | ||||
| 				| PT_WRITABLE_MASK | PT_USER_MASK; | ||||
| 		} | ||||
| 		table_addr = table[index] & PT64_BASE_ADDR_MASK; | ||||
| @@ -817,11 +841,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | ||||
| 	int i; | ||||
| 	struct kvm_mmu_page *page; | ||||
|  | ||||
| 	if (!VALID_PAGE(vcpu->mmu.root_hpa)) | ||||
| 		return; | ||||
| #ifdef CONFIG_X86_64 | ||||
| 	if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { | ||||
| 		hpa_t root = vcpu->mmu.root_hpa; | ||||
|  | ||||
| 		ASSERT(VALID_PAGE(root)); | ||||
| 		page = page_header(root); | ||||
| 		--page->root_count; | ||||
| 		vcpu->mmu.root_hpa = INVALID_PAGE; | ||||
| @@ -832,7 +857,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | ||||
| 		hpa_t root = vcpu->mmu.pae_root[i]; | ||||
|  | ||||
| 		if (root) { | ||||
| 			ASSERT(VALID_PAGE(root)); | ||||
| 			root &= PT64_BASE_ADDR_MASK; | ||||
| 			page = page_header(root); | ||||
| 			--page->root_count; | ||||
| @@ -857,7 +881,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | ||||
| 		ASSERT(!VALID_PAGE(root)); | ||||
| 		page = kvm_mmu_get_page(vcpu, root_gfn, 0, | ||||
| 					PT64_ROOT_LEVEL, 0, 0, NULL); | ||||
| 		root = page->page_hpa; | ||||
| 		root = __pa(page->spt); | ||||
| 		++page->root_count; | ||||
| 		vcpu->mmu.root_hpa = root; | ||||
| 		return; | ||||
| @@ -878,7 +902,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | ||||
| 		page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | ||||
| 					PT32_ROOT_LEVEL, !is_paging(vcpu), | ||||
| 					0, NULL); | ||||
| 		root = page->page_hpa; | ||||
| 		root = __pa(page->spt); | ||||
| 		++page->root_count; | ||||
| 		vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; | ||||
| 	} | ||||
| @@ -928,9 +952,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | ||||
| 	context->free = nonpaging_free; | ||||
| 	context->root_level = 0; | ||||
| 	context->shadow_root_level = PT32E_ROOT_LEVEL; | ||||
| 	mmu_alloc_roots(vcpu); | ||||
| 	ASSERT(VALID_PAGE(context->root_hpa)); | ||||
| 	kvm_arch_ops->set_cr3(vcpu, context->root_hpa); | ||||
| 	context->root_hpa = INVALID_PAGE; | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| @@ -944,59 +966,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); | ||||
| 	mmu_free_roots(vcpu); | ||||
| 	if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) | ||||
| 		kvm_mmu_free_some_pages(vcpu); | ||||
| 	mmu_alloc_roots(vcpu); | ||||
| 	kvm_mmu_flush_tlb(vcpu); | ||||
| 	kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); | ||||
| } | ||||
|  | ||||
| static inline void set_pte_common(struct kvm_vcpu *vcpu, | ||||
| 			     u64 *shadow_pte, | ||||
| 			     gpa_t gaddr, | ||||
| 			     int dirty, | ||||
| 			     u64 access_bits, | ||||
| 			     gfn_t gfn) | ||||
| { | ||||
| 	hpa_t paddr; | ||||
|  | ||||
| 	*shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; | ||||
| 	if (!dirty) | ||||
| 		access_bits &= ~PT_WRITABLE_MASK; | ||||
|  | ||||
| 	paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); | ||||
|  | ||||
| 	*shadow_pte |= access_bits; | ||||
|  | ||||
| 	if (is_error_hpa(paddr)) { | ||||
| 		*shadow_pte |= gaddr; | ||||
| 		*shadow_pte |= PT_SHADOW_IO_MARK; | ||||
| 		*shadow_pte &= ~PT_PRESENT_MASK; | ||||
| 		return; | ||||
| 	} | ||||
|  | ||||
| 	*shadow_pte |= paddr; | ||||
|  | ||||
| 	if (access_bits & PT_WRITABLE_MASK) { | ||||
| 		struct kvm_mmu_page *shadow; | ||||
|  | ||||
| 		shadow = kvm_mmu_lookup_page(vcpu, gfn); | ||||
| 		if (shadow) { | ||||
| 			pgprintk("%s: found shadow page for %lx, marking ro\n", | ||||
| 				 __FUNCTION__, gfn); | ||||
| 			access_bits &= ~PT_WRITABLE_MASK; | ||||
| 			if (is_writeble_pte(*shadow_pte)) { | ||||
| 				    *shadow_pte &= ~PT_WRITABLE_MASK; | ||||
| 				    kvm_arch_ops->tlb_flush(vcpu); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if (access_bits & PT_WRITABLE_MASK) | ||||
| 		mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | ||||
|  | ||||
| 	page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | ||||
| 	rmap_add(vcpu, shadow_pte); | ||||
| } | ||||
|  | ||||
| static void inject_page_fault(struct kvm_vcpu *vcpu, | ||||
| @@ -1006,23 +975,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, | ||||
| 	kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); | ||||
| } | ||||
|  | ||||
| static inline int fix_read_pf(u64 *shadow_ent) | ||||
| { | ||||
| 	if ((*shadow_ent & PT_SHADOW_USER_MASK) && | ||||
| 	    !(*shadow_ent & PT_USER_MASK)) { | ||||
| 		/* | ||||
| 		 * If supervisor write protect is disabled, we shadow kernel | ||||
| 		 * pages as user pages so we can trap the write access. | ||||
| 		 */ | ||||
| 		*shadow_ent |= PT_USER_MASK; | ||||
| 		*shadow_ent &= ~PT_WRITABLE_MASK; | ||||
|  | ||||
| 		return 1; | ||||
|  | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static void paging_free(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	nonpaging_free(vcpu); | ||||
| @@ -1047,10 +999,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | ||||
| 	context->free = paging_free; | ||||
| 	context->root_level = level; | ||||
| 	context->shadow_root_level = level; | ||||
| 	mmu_alloc_roots(vcpu); | ||||
| 	ASSERT(VALID_PAGE(context->root_hpa)); | ||||
| 	kvm_arch_ops->set_cr3(vcpu, context->root_hpa | | ||||
| 		    (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); | ||||
| 	context->root_hpa = INVALID_PAGE; | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| @@ -1069,10 +1018,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | ||||
| 	context->free = paging_free; | ||||
| 	context->root_level = PT32_ROOT_LEVEL; | ||||
| 	context->shadow_root_level = PT32E_ROOT_LEVEL; | ||||
| 	mmu_alloc_roots(vcpu); | ||||
| 	ASSERT(VALID_PAGE(context->root_hpa)); | ||||
| 	kvm_arch_ops->set_cr3(vcpu, context->root_hpa | | ||||
| 		    (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); | ||||
| 	context->root_hpa = INVALID_PAGE; | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| @@ -1107,18 +1053,33 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | ||||
|  | ||||
| int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	int r; | ||||
|  | ||||
| 	destroy_kvm_mmu(vcpu); | ||||
| 	r = init_kvm_mmu(vcpu); | ||||
| 	if (r < 0) | ||||
| 		goto out; | ||||
| 	r = mmu_topup_memory_caches(vcpu); | ||||
| out: | ||||
| 	return r; | ||||
| 	return init_kvm_mmu(vcpu); | ||||
| } | ||||
|  | ||||
| static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, | ||||
| int kvm_mmu_load(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	int r; | ||||
|  | ||||
| 	spin_lock(&vcpu->kvm->lock); | ||||
| 	r = mmu_topup_memory_caches(vcpu); | ||||
| 	if (r) | ||||
| 		goto out; | ||||
| 	mmu_alloc_roots(vcpu); | ||||
| 	kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); | ||||
| 	kvm_mmu_flush_tlb(vcpu); | ||||
| out: | ||||
| 	spin_unlock(&vcpu->kvm->lock); | ||||
| 	return r; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(kvm_mmu_load); | ||||
|  | ||||
| void kvm_mmu_unload(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	mmu_free_roots(vcpu); | ||||
| } | ||||
|  | ||||
| static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | ||||
| 				  struct kvm_mmu_page *page, | ||||
| 				  u64 *spte) | ||||
| { | ||||
| @@ -1135,9 +1096,25 @@ static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, | ||||
| 		} | ||||
| 	} | ||||
| 	*spte = 0; | ||||
| 	kvm_flush_remote_tlbs(vcpu->kvm); | ||||
| } | ||||
|  | ||||
| void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | ||||
| static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | ||||
| 				  struct kvm_mmu_page *page, | ||||
| 				  u64 *spte, | ||||
| 				  const void *new, int bytes) | ||||
| { | ||||
| 	if (page->role.level != PT_PAGE_TABLE_LEVEL) | ||||
| 		return; | ||||
|  | ||||
| 	if (page->role.glevels == PT32_ROOT_LEVEL) | ||||
| 		paging32_update_pte(vcpu, page, spte, new, bytes); | ||||
| 	else | ||||
| 		paging64_update_pte(vcpu, page, spte, new, bytes); | ||||
| } | ||||
|  | ||||
| void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | ||||
| 		       const u8 *old, const u8 *new, int bytes) | ||||
| { | ||||
| 	gfn_t gfn = gpa >> PAGE_SHIFT; | ||||
| 	struct kvm_mmu_page *page; | ||||
| @@ -1149,6 +1126,7 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | ||||
| 	unsigned pte_size; | ||||
| 	unsigned page_offset; | ||||
| 	unsigned misaligned; | ||||
| 	unsigned quadrant; | ||||
| 	int level; | ||||
| 	int flooded = 0; | ||||
| 	int npte; | ||||
| @@ -1169,6 +1147,7 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | ||||
| 			continue; | ||||
| 		pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | ||||
| 		misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | ||||
| 		misaligned |= bytes < 4; | ||||
| 		if (misaligned || flooded) { | ||||
| 			/* | ||||
| 			 * Misaligned accesses are too much trouble to fix | ||||
| @@ -1200,21 +1179,20 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | ||||
| 				page_offset <<= 1; | ||||
| 				npte = 2; | ||||
| 			} | ||||
| 			quadrant = page_offset >> PAGE_SHIFT; | ||||
| 			page_offset &= ~PAGE_MASK; | ||||
| 			if (quadrant != page->role.quadrant) | ||||
| 				continue; | ||||
| 		} | ||||
| 		spte = __va(page->page_hpa); | ||||
| 		spte += page_offset / sizeof(*spte); | ||||
| 		spte = &page->spt[page_offset / sizeof(*spte)]; | ||||
| 		while (npte--) { | ||||
| 			mmu_pre_write_zap_pte(vcpu, page, spte); | ||||
| 			mmu_pte_write_zap_pte(vcpu, page, spte); | ||||
| 			mmu_pte_write_new_pte(vcpu, page, spte, new, bytes); | ||||
| 			++spte; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | ||||
| { | ||||
| } | ||||
|  | ||||
| int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | ||||
| { | ||||
| 	gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); | ||||
| @@ -1243,13 +1221,6 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu) | ||||
| 				    struct kvm_mmu_page, link); | ||||
| 		kvm_mmu_zap_page(vcpu, page); | ||||
| 	} | ||||
| 	while (!list_empty(&vcpu->free_pages)) { | ||||
| 		page = list_entry(vcpu->free_pages.next, | ||||
| 				  struct kvm_mmu_page, link); | ||||
| 		list_del(&page->link); | ||||
| 		__free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT)); | ||||
| 		page->page_hpa = INVALID_PAGE; | ||||
| 	} | ||||
| 	free_page((unsigned long)vcpu->mmu.pae_root); | ||||
| } | ||||
|  | ||||
| @@ -1260,18 +1231,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | ||||
|  | ||||
| 	ASSERT(vcpu); | ||||
|  | ||||
| 	for (i = 0; i < KVM_NUM_MMU_PAGES; i++) { | ||||
| 		struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i]; | ||||
|  | ||||
| 		INIT_LIST_HEAD(&page_header->link); | ||||
| 		if ((page = alloc_page(GFP_KERNEL)) == NULL) | ||||
| 			goto error_1; | ||||
| 		set_page_private(page, (unsigned long)page_header); | ||||
| 		page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; | ||||
| 		memset(__va(page_header->page_hpa), 0, PAGE_SIZE); | ||||
| 		list_add(&page_header->link, &vcpu->free_pages); | ||||
| 		++vcpu->kvm->n_free_mmu_pages; | ||||
| 	} | ||||
| 	vcpu->kvm->n_free_mmu_pages = KVM_NUM_MMU_PAGES; | ||||
|  | ||||
| 	/* | ||||
| 	 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. | ||||
| @@ -1296,7 +1256,6 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	ASSERT(vcpu); | ||||
| 	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | ||||
| 	ASSERT(list_empty(&vcpu->free_pages)); | ||||
|  | ||||
| 	return alloc_mmu_pages(vcpu); | ||||
| } | ||||
| @@ -1305,7 +1264,6 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	ASSERT(vcpu); | ||||
| 	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | ||||
| 	ASSERT(!list_empty(&vcpu->free_pages)); | ||||
|  | ||||
| 	return init_kvm_mmu(vcpu); | ||||
| } | ||||
| @@ -1331,7 +1289,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) | ||||
| 		if (!test_bit(slot, &page->slot_bitmap)) | ||||
| 			continue; | ||||
|  | ||||
| 		pt = __va(page->page_hpa); | ||||
| 		pt = page->spt; | ||||
| 		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | ||||
| 			/* avoid RMW */ | ||||
| 			if (pt[i] & PT_WRITABLE_MASK) { | ||||
| @@ -1354,7 +1312,7 @@ void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) | ||||
| 	} | ||||
|  | ||||
| 	mmu_free_memory_caches(vcpu); | ||||
| 	kvm_arch_ops->tlb_flush(vcpu); | ||||
| 	kvm_flush_remote_tlbs(vcpu->kvm); | ||||
| 	init_kvm_mmu(vcpu); | ||||
| } | ||||
|  | ||||
| @@ -1364,6 +1322,10 @@ void kvm_mmu_module_exit(void) | ||||
| 		kmem_cache_destroy(pte_chain_cache); | ||||
| 	if (rmap_desc_cache) | ||||
| 		kmem_cache_destroy(rmap_desc_cache); | ||||
| 	if (mmu_page_cache) | ||||
| 		kmem_cache_destroy(mmu_page_cache); | ||||
| 	if (mmu_page_header_cache) | ||||
| 		kmem_cache_destroy(mmu_page_header_cache); | ||||
| } | ||||
|  | ||||
| int kvm_mmu_module_init(void) | ||||
| @@ -1379,6 +1341,18 @@ int kvm_mmu_module_init(void) | ||||
| 	if (!rmap_desc_cache) | ||||
| 		goto nomem; | ||||
|  | ||||
| 	mmu_page_cache = kmem_cache_create("kvm_mmu_page", | ||||
| 					   PAGE_SIZE, | ||||
| 					   PAGE_SIZE, 0, NULL, NULL); | ||||
| 	if (!mmu_page_cache) | ||||
| 		goto nomem; | ||||
|  | ||||
| 	mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", | ||||
| 						  sizeof(struct kvm_mmu_page), | ||||
| 						  0, 0, NULL, NULL); | ||||
| 	if (!mmu_page_header_cache) | ||||
| 		goto nomem; | ||||
|  | ||||
| 	return 0; | ||||
|  | ||||
| nomem: | ||||
| @@ -1482,7 +1456,7 @@ static int count_writable_mappings(struct kvm_vcpu *vcpu) | ||||
| 	int i; | ||||
|  | ||||
| 	list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { | ||||
| 		u64 *pt = __va(page->page_hpa); | ||||
| 		u64 *pt = page->spt; | ||||
|  | ||||
| 		if (page->role.level != PT_PAGE_TABLE_LEVEL) | ||||
| 			continue; | ||||
|   | ||||
| @@ -31,7 +31,6 @@ | ||||
| 	#define PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||||
| 	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||||
| 	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) | ||||
| 	#define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK | ||||
| 	#ifdef CONFIG_X86_64 | ||||
| 	#define PT_MAX_FULL_LEVELS 4 | ||||
| 	#else | ||||
| @@ -46,7 +45,6 @@ | ||||
| 	#define PT_INDEX(addr, level) PT32_INDEX(addr, level) | ||||
| 	#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||||
| 	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) | ||||
| 	#define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK | ||||
| 	#define PT_MAX_FULL_LEVELS 2 | ||||
| #else | ||||
| 	#error Invalid PTTYPE value | ||||
| @@ -192,40 +190,143 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, | ||||
| 	mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]); | ||||
| } | ||||
|  | ||||
| static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte, | ||||
| 			   u64 *shadow_pte, u64 access_bits, gfn_t gfn) | ||||
| static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, | ||||
| 				  u64 *shadow_pte, | ||||
| 				  gpa_t gaddr, | ||||
| 				  pt_element_t *gpte, | ||||
| 				  u64 access_bits, | ||||
| 				  int user_fault, | ||||
| 				  int write_fault, | ||||
| 				  int *ptwrite, | ||||
| 				  struct guest_walker *walker, | ||||
| 				  gfn_t gfn) | ||||
| { | ||||
| 	ASSERT(*shadow_pte == 0); | ||||
| 	access_bits &= guest_pte; | ||||
| 	*shadow_pte = (guest_pte & PT_PTE_COPY_MASK); | ||||
| 	set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK, | ||||
| 		       guest_pte & PT_DIRTY_MASK, access_bits, gfn); | ||||
| 	hpa_t paddr; | ||||
| 	int dirty = *gpte & PT_DIRTY_MASK; | ||||
| 	u64 spte = *shadow_pte; | ||||
| 	int was_rmapped = is_rmap_pte(spte); | ||||
|  | ||||
| 	pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" | ||||
| 		 " user_fault %d gfn %lx\n", | ||||
| 		 __FUNCTION__, spte, (u64)*gpte, access_bits, | ||||
| 		 write_fault, user_fault, gfn); | ||||
|  | ||||
| 	if (write_fault && !dirty) { | ||||
| 		*gpte |= PT_DIRTY_MASK; | ||||
| 		dirty = 1; | ||||
| 		FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | ||||
| 	} | ||||
|  | ||||
| 	spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; | ||||
| 	spte |= *gpte & PT64_NX_MASK; | ||||
| 	if (!dirty) | ||||
| 		access_bits &= ~PT_WRITABLE_MASK; | ||||
|  | ||||
| 	paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); | ||||
|  | ||||
| 	spte |= PT_PRESENT_MASK; | ||||
| 	if (access_bits & PT_USER_MASK) | ||||
| 		spte |= PT_USER_MASK; | ||||
|  | ||||
| 	if (is_error_hpa(paddr)) { | ||||
| 		spte |= gaddr; | ||||
| 		spte |= PT_SHADOW_IO_MARK; | ||||
| 		spte &= ~PT_PRESENT_MASK; | ||||
| 		set_shadow_pte(shadow_pte, spte); | ||||
| 		return; | ||||
| 	} | ||||
|  | ||||
| 	spte |= paddr; | ||||
|  | ||||
| 	if ((access_bits & PT_WRITABLE_MASK) | ||||
| 	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | ||||
| 		struct kvm_mmu_page *shadow; | ||||
|  | ||||
| 		spte |= PT_WRITABLE_MASK; | ||||
| 		if (user_fault) { | ||||
| 			mmu_unshadow(vcpu, gfn); | ||||
| 			goto unshadowed; | ||||
| 		} | ||||
|  | ||||
| 		shadow = kvm_mmu_lookup_page(vcpu, gfn); | ||||
| 		if (shadow) { | ||||
| 			pgprintk("%s: found shadow page for %lx, marking ro\n", | ||||
| 				 __FUNCTION__, gfn); | ||||
| 			access_bits &= ~PT_WRITABLE_MASK; | ||||
| 			if (is_writeble_pte(spte)) { | ||||
| 				spte &= ~PT_WRITABLE_MASK; | ||||
| 				kvm_arch_ops->tlb_flush(vcpu); | ||||
| 			} | ||||
| 			if (write_fault) | ||||
| 				*ptwrite = 1; | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| unshadowed: | ||||
|  | ||||
| 	if (access_bits & PT_WRITABLE_MASK) | ||||
| 		mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | ||||
|  | ||||
| 	set_shadow_pte(shadow_pte, spte); | ||||
| 	page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | ||||
| 	if (!was_rmapped) | ||||
| 		rmap_add(vcpu, shadow_pte); | ||||
| } | ||||
|  | ||||
| static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde, | ||||
| 			   u64 *shadow_pte, u64 access_bits, gfn_t gfn) | ||||
| static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, | ||||
| 			   u64 *shadow_pte, u64 access_bits, | ||||
| 			   int user_fault, int write_fault, int *ptwrite, | ||||
| 			   struct guest_walker *walker, gfn_t gfn) | ||||
| { | ||||
| 	access_bits &= *gpte; | ||||
| 	FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, | ||||
| 			      gpte, access_bits, user_fault, write_fault, | ||||
| 			      ptwrite, walker, gfn); | ||||
| } | ||||
|  | ||||
| static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | ||||
| 			      u64 *spte, const void *pte, int bytes) | ||||
| { | ||||
| 	pt_element_t gpte; | ||||
|  | ||||
| 	if (bytes < sizeof(pt_element_t)) | ||||
| 		return; | ||||
| 	gpte = *(const pt_element_t *)pte; | ||||
| 	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) | ||||
| 		return; | ||||
| 	pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); | ||||
| 	FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, | ||||
| 		       0, NULL, NULL, | ||||
| 		       (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); | ||||
| } | ||||
|  | ||||
| static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, | ||||
| 			   u64 *shadow_pte, u64 access_bits, | ||||
| 			   int user_fault, int write_fault, int *ptwrite, | ||||
| 			   struct guest_walker *walker, gfn_t gfn) | ||||
| { | ||||
| 	gpa_t gaddr; | ||||
|  | ||||
| 	ASSERT(*shadow_pte == 0); | ||||
| 	access_bits &= guest_pde; | ||||
| 	access_bits &= *gpde; | ||||
| 	gaddr = (gpa_t)gfn << PAGE_SHIFT; | ||||
| 	if (PTTYPE == 32 && is_cpuid_PSE36()) | ||||
| 		gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) << | ||||
| 		gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << | ||||
| 			(32 - PT32_DIR_PSE36_SHIFT); | ||||
| 	*shadow_pte = guest_pde & PT_PTE_COPY_MASK; | ||||
| 	set_pte_common(vcpu, shadow_pte, gaddr, | ||||
| 		       guest_pde & PT_DIRTY_MASK, access_bits, gfn); | ||||
| 	FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, | ||||
| 			      gpde, access_bits, user_fault, write_fault, | ||||
| 			      ptwrite, walker, gfn); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Fetch a shadow pte for a specific level in the paging hierarchy. | ||||
|  */ | ||||
| static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | ||||
| 			      struct guest_walker *walker) | ||||
| 			 struct guest_walker *walker, | ||||
| 			 int user_fault, int write_fault, int *ptwrite) | ||||
| { | ||||
| 	hpa_t shadow_addr; | ||||
| 	int level; | ||||
| 	u64 *shadow_ent; | ||||
| 	u64 *prev_shadow_ent = NULL; | ||||
| 	pt_element_t *guest_ent = walker->ptep; | ||||
|  | ||||
| @@ -242,37 +343,23 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | ||||
|  | ||||
| 	for (; ; level--) { | ||||
| 		u32 index = SHADOW_PT_INDEX(addr, level); | ||||
| 		u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index; | ||||
| 		struct kvm_mmu_page *shadow_page; | ||||
| 		u64 shadow_pte; | ||||
| 		int metaphysical; | ||||
| 		gfn_t table_gfn; | ||||
| 		unsigned hugepage_access = 0; | ||||
|  | ||||
| 		shadow_ent = ((u64 *)__va(shadow_addr)) + index; | ||||
| 		if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { | ||||
| 			if (level == PT_PAGE_TABLE_LEVEL) | ||||
| 				return shadow_ent; | ||||
| 				break; | ||||
| 			shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; | ||||
| 			prev_shadow_ent = shadow_ent; | ||||
| 			continue; | ||||
| 		} | ||||
|  | ||||
| 		if (level == PT_PAGE_TABLE_LEVEL) { | ||||
|  | ||||
| 			if (walker->level == PT_DIRECTORY_LEVEL) { | ||||
| 				if (prev_shadow_ent) | ||||
| 					*prev_shadow_ent |= PT_SHADOW_PS_MARK; | ||||
| 				FNAME(set_pde)(vcpu, *guest_ent, shadow_ent, | ||||
| 					       walker->inherited_ar, | ||||
| 					       walker->gfn); | ||||
| 			} else { | ||||
| 				ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); | ||||
| 				FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, | ||||
| 					       walker->inherited_ar, | ||||
| 					       walker->gfn); | ||||
| 			} | ||||
| 			return shadow_ent; | ||||
| 		} | ||||
| 		if (level == PT_PAGE_TABLE_LEVEL) | ||||
| 			break; | ||||
|  | ||||
| 		if (level - 1 == PT_PAGE_TABLE_LEVEL | ||||
| 		    && walker->level == PT_DIRECTORY_LEVEL) { | ||||
| @@ -289,90 +376,24 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | ||||
| 		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | ||||
| 					       metaphysical, hugepage_access, | ||||
| 					       shadow_ent); | ||||
| 		shadow_addr = shadow_page->page_hpa; | ||||
| 		shadow_addr = __pa(shadow_page->spt); | ||||
| 		shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | ||||
| 			| PT_WRITABLE_MASK | PT_USER_MASK; | ||||
| 		*shadow_ent = shadow_pte; | ||||
| 		prev_shadow_ent = shadow_ent; | ||||
| 	} | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * The guest faulted for write.  We need to | ||||
|  * | ||||
|  * - check write permissions | ||||
|  * - update the guest pte dirty bit | ||||
|  * - update our own dirty page tracking structures | ||||
|  */ | ||||
| static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu, | ||||
| 			       u64 *shadow_ent, | ||||
| 			       struct guest_walker *walker, | ||||
| 			       gva_t addr, | ||||
| 			       int user, | ||||
| 			       int *write_pt) | ||||
| { | ||||
| 	pt_element_t *guest_ent; | ||||
| 	int writable_shadow; | ||||
| 	gfn_t gfn; | ||||
| 	struct kvm_mmu_page *page; | ||||
|  | ||||
| 	if (is_writeble_pte(*shadow_ent)) | ||||
| 		return !user || (*shadow_ent & PT_USER_MASK); | ||||
|  | ||||
| 	writable_shadow = *shadow_ent & PT_SHADOW_WRITABLE_MASK; | ||||
| 	if (user) { | ||||
| 		/* | ||||
| 		 * User mode access.  Fail if it's a kernel page or a read-only | ||||
| 		 * page. | ||||
| 		 */ | ||||
| 		if (!(*shadow_ent & PT_SHADOW_USER_MASK) || !writable_shadow) | ||||
| 			return 0; | ||||
| 		ASSERT(*shadow_ent & PT_USER_MASK); | ||||
| 	} else | ||||
| 		/* | ||||
| 		 * Kernel mode access.  Fail if it's a read-only page and | ||||
| 		 * supervisor write protection is enabled. | ||||
| 		 */ | ||||
| 		if (!writable_shadow) { | ||||
| 			if (is_write_protection(vcpu)) | ||||
| 				return 0; | ||||
| 			*shadow_ent &= ~PT_USER_MASK; | ||||
| 		} | ||||
|  | ||||
| 	guest_ent = walker->ptep; | ||||
|  | ||||
| 	if (!is_present_pte(*guest_ent)) { | ||||
| 		*shadow_ent = 0; | ||||
| 		return 0; | ||||
| 	if (walker->level == PT_DIRECTORY_LEVEL) { | ||||
| 		FNAME(set_pde)(vcpu, guest_ent, shadow_ent, | ||||
| 			       walker->inherited_ar, user_fault, write_fault, | ||||
| 			       ptwrite, walker, walker->gfn); | ||||
| 	} else { | ||||
| 		ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); | ||||
| 		FNAME(set_pte)(vcpu, guest_ent, shadow_ent, | ||||
| 			       walker->inherited_ar, user_fault, write_fault, | ||||
| 			       ptwrite, walker, walker->gfn); | ||||
| 	} | ||||
|  | ||||
| 	gfn = walker->gfn; | ||||
|  | ||||
| 	if (user) { | ||||
| 		/* | ||||
| 		 * Usermode page faults won't be for page table updates. | ||||
| 		 */ | ||||
| 		while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { | ||||
| 			pgprintk("%s: zap %lx %x\n", | ||||
| 				 __FUNCTION__, gfn, page->role.word); | ||||
| 			kvm_mmu_zap_page(vcpu, page); | ||||
| 		} | ||||
| 	} else if (kvm_mmu_lookup_page(vcpu, gfn)) { | ||||
| 		pgprintk("%s: found shadow page for %lx, marking ro\n", | ||||
| 			 __FUNCTION__, gfn); | ||||
| 		mark_page_dirty(vcpu->kvm, gfn); | ||||
| 		FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | ||||
| 		*guest_ent |= PT_DIRTY_MASK; | ||||
| 		*write_pt = 1; | ||||
| 		return 0; | ||||
| 	} | ||||
| 	mark_page_dirty(vcpu->kvm, gfn); | ||||
| 	*shadow_ent |= PT_WRITABLE_MASK; | ||||
| 	FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); | ||||
| 	*guest_ent |= PT_DIRTY_MASK; | ||||
| 	rmap_add(vcpu, shadow_ent); | ||||
|  | ||||
| 	return 1; | ||||
| 	return shadow_ent; | ||||
| } | ||||
|  | ||||
| /* | ||||
| @@ -397,7 +418,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | ||||
| 	int fetch_fault = error_code & PFERR_FETCH_MASK; | ||||
| 	struct guest_walker walker; | ||||
| 	u64 *shadow_pte; | ||||
| 	int fixed; | ||||
| 	int write_pt = 0; | ||||
| 	int r; | ||||
|  | ||||
| @@ -421,27 +441,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | ||||
| 		pgprintk("%s: guest page fault\n", __FUNCTION__); | ||||
| 		inject_page_fault(vcpu, addr, walker.error_code); | ||||
| 		FNAME(release_walker)(&walker); | ||||
| 		vcpu->last_pt_write_count = 0; /* reset fork detector */ | ||||
| 		return 0; | ||||
| 	} | ||||
|  | ||||
| 	shadow_pte = FNAME(fetch)(vcpu, addr, &walker); | ||||
| 	pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__, | ||||
| 		 shadow_pte, *shadow_pte); | ||||
|  | ||||
| 	/* | ||||
| 	 * Update the shadow pte. | ||||
| 	 */ | ||||
| 	if (write_fault) | ||||
| 		fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr, | ||||
| 					    user_fault, &write_pt); | ||||
| 	else | ||||
| 		fixed = fix_read_pf(shadow_pte); | ||||
|  | ||||
| 	pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__, | ||||
| 		 shadow_pte, *shadow_pte); | ||||
| 	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | ||||
| 				  &write_pt); | ||||
| 	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, | ||||
| 		 shadow_pte, *shadow_pte, write_pt); | ||||
|  | ||||
| 	FNAME(release_walker)(&walker); | ||||
|  | ||||
| 	if (!write_pt) | ||||
| 		vcpu->last_pt_write_count = 0; /* reset fork detector */ | ||||
|  | ||||
| 	/* | ||||
| 	 * mmio: emulate if accessible, otherwise its a guest fault. | ||||
| 	 */ | ||||
| @@ -478,7 +491,5 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | ||||
| #undef PT_INDEX | ||||
| #undef SHADOW_PT_INDEX | ||||
| #undef PT_LEVEL_MASK | ||||
| #undef PT_PTE_COPY_MASK | ||||
| #undef PT_NON_PTE_COPY_MASK | ||||
| #undef PT_DIR_BASE_ADDR_MASK | ||||
| #undef PT_MAX_FULL_LEVELS | ||||
|   | ||||
| @@ -14,16 +14,17 @@ | ||||
|  * | ||||
|  */ | ||||
|  | ||||
| #include "kvm_svm.h" | ||||
| #include "x86_emulate.h" | ||||
|  | ||||
| #include <linux/module.h> | ||||
| #include <linux/kernel.h> | ||||
| #include <linux/vmalloc.h> | ||||
| #include <linux/highmem.h> | ||||
| #include <linux/profile.h> | ||||
| #include <linux/sched.h> | ||||
| #include <asm/desc.h> | ||||
|  | ||||
| #include "kvm_svm.h" | ||||
| #include "x86_emulate.h" | ||||
| #include <asm/desc.h> | ||||
|  | ||||
| MODULE_AUTHOR("Qumranet"); | ||||
| MODULE_LICENSE("GPL"); | ||||
| @@ -378,7 +379,7 @@ static __init int svm_hardware_setup(void) | ||||
| 	int cpu; | ||||
| 	struct page *iopm_pages; | ||||
| 	struct page *msrpm_pages; | ||||
| 	void *msrpm_va; | ||||
| 	void *iopm_va, *msrpm_va; | ||||
| 	int r; | ||||
|  | ||||
| 	kvm_emulator_want_group7_invlpg(); | ||||
| @@ -387,8 +388,10 @@ static __init int svm_hardware_setup(void) | ||||
|  | ||||
| 	if (!iopm_pages) | ||||
| 		return -ENOMEM; | ||||
| 	memset(page_address(iopm_pages), 0xff, | ||||
| 					PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); | ||||
|  | ||||
| 	iopm_va = page_address(iopm_pages); | ||||
| 	memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); | ||||
| 	clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */ | ||||
| 	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; | ||||
|  | ||||
|  | ||||
| @@ -579,7 +582,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) | ||||
| 		goto out2; | ||||
|  | ||||
| 	vcpu->svm->vmcb = page_address(page); | ||||
| 	memset(vcpu->svm->vmcb, 0, PAGE_SIZE); | ||||
| 	clear_page(vcpu->svm->vmcb); | ||||
| 	vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; | ||||
| 	vcpu->svm->asid_generation = 0; | ||||
| 	memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs)); | ||||
| @@ -587,9 +590,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) | ||||
|  | ||||
| 	fx_init(vcpu); | ||||
| 	vcpu->fpu_active = 1; | ||||
| 	vcpu->apic_base = 0xfee00000 | | ||||
| 			/*for vcpu 0*/ MSR_IA32_APICBASE_BSP | | ||||
| 			MSR_IA32_APICBASE_ENABLE; | ||||
| 	vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | ||||
| 	if (vcpu == &vcpu->kvm->vcpus[0]) | ||||
| 		vcpu->apic_base |= MSR_IA32_APICBASE_BSP; | ||||
|  | ||||
| 	return 0; | ||||
|  | ||||
| @@ -955,7 +958,7 @@ static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||||
| 	 * VMCB is undefined after a SHUTDOWN intercept | ||||
| 	 * so reinitialize it. | ||||
| 	 */ | ||||
| 	memset(vcpu->svm->vmcb, 0, PAGE_SIZE); | ||||
| 	clear_page(vcpu->svm->vmcb); | ||||
| 	init_vmcb(vcpu->svm->vmcb); | ||||
|  | ||||
| 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | ||||
| @@ -1113,12 +1116,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||||
| { | ||||
| 	vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; | ||||
| 	skip_emulated_instruction(vcpu); | ||||
| 	if (vcpu->irq_summary) | ||||
| 		return 1; | ||||
|  | ||||
| 	kvm_run->exit_reason = KVM_EXIT_HLT; | ||||
| 	++vcpu->stat.halt_exits; | ||||
| 	return 0; | ||||
| 	return kvm_emulate_halt(vcpu); | ||||
| } | ||||
|  | ||||
| static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||||
| @@ -1473,6 +1471,11 @@ static void load_db_regs(unsigned long *db_regs) | ||||
| 	asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3])); | ||||
| } | ||||
|  | ||||
| static void svm_flush_tlb(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	force_new_asid(vcpu); | ||||
| } | ||||
|  | ||||
| static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||||
| { | ||||
| 	u16 fs_selector; | ||||
| @@ -1481,11 +1484,20 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||||
| 	int r; | ||||
|  | ||||
| again: | ||||
| 	r = kvm_mmu_reload(vcpu); | ||||
| 	if (unlikely(r)) | ||||
| 		return r; | ||||
|  | ||||
| 	if (!vcpu->mmio_read_completed) | ||||
| 		do_interrupt_requests(vcpu, kvm_run); | ||||
|  | ||||
| 	clgi(); | ||||
|  | ||||
| 	vcpu->guest_mode = 1; | ||||
| 	if (vcpu->requests) | ||||
| 		if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) | ||||
| 		    svm_flush_tlb(vcpu); | ||||
|  | ||||
| 	pre_svm_run(vcpu); | ||||
|  | ||||
| 	save_host_msrs(vcpu); | ||||
| @@ -1617,6 +1629,8 @@ again: | ||||
| #endif | ||||
| 		: "cc", "memory" ); | ||||
|  | ||||
| 	vcpu->guest_mode = 0; | ||||
|  | ||||
| 	if (vcpu->fpu_active) { | ||||
| 		fx_save(vcpu->guest_fx_image); | ||||
| 		fx_restore(vcpu->host_fx_image); | ||||
| @@ -1681,11 +1695,6 @@ again: | ||||
| 	return r; | ||||
| } | ||||
|  | ||||
| static void svm_flush_tlb(struct kvm_vcpu *vcpu) | ||||
| { | ||||
| 	force_new_asid(vcpu); | ||||
| } | ||||
|  | ||||
| static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | ||||
| { | ||||
| 	vcpu->svm->vmcb->save.cr3 = root; | ||||
| @@ -1727,6 +1736,12 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu, | ||||
|  | ||||
| static int is_disabled(void) | ||||
| { | ||||
| 	u64 vm_cr; | ||||
|  | ||||
| 	rdmsrl(MSR_VM_CR, vm_cr); | ||||
| 	if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE)) | ||||
| 		return 1; | ||||
|  | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -175,8 +175,11 @@ struct __attribute__ ((__packed__)) vmcb { | ||||
| #define SVM_CPUID_FUNC 0x8000000a | ||||
|  | ||||
| #define MSR_EFER_SVME_MASK (1ULL << 12) | ||||
| #define MSR_VM_CR       0xc0010114 | ||||
| #define MSR_VM_HSAVE_PA 0xc0010117ULL | ||||
|  | ||||
| #define SVM_VM_CR_SVM_DISABLE 4 | ||||
|  | ||||
| #define SVM_SELECTOR_S_SHIFT 4 | ||||
| #define SVM_SELECTOR_DPL_SHIFT 5 | ||||
| #define SVM_SELECTOR_P_SHIFT 7 | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -98,8 +98,11 @@ static u8 opcode_table[256] = { | ||||
| 	0, 0, 0, 0, | ||||
| 	/* 0x40 - 0x4F */ | ||||
| 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||||
| 	/* 0x50 - 0x5F */ | ||||
| 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||||
| 	/* 0x50 - 0x57 */ | ||||
| 	0, 0, 0, 0, 0, 0, 0, 0, | ||||
| 	/* 0x58 - 0x5F */ | ||||
| 	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||||
| 	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||||
| 	/* 0x60 - 0x6F */ | ||||
| 	0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | ||||
| 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||||
| @@ -128,9 +131,9 @@ static u8 opcode_table[256] = { | ||||
| 	/* 0xB0 - 0xBF */ | ||||
| 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||||
| 	/* 0xC0 - 0xC7 */ | ||||
| 	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 0, 0, | ||||
| 	0, 0, ByteOp | DstMem | SrcImm | ModRM | Mov, | ||||
| 	    DstMem | SrcImm | ModRM | Mov, | ||||
| 	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, | ||||
| 	0, ImplicitOps, 0, 0, | ||||
| 	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, | ||||
| 	/* 0xC8 - 0xCF */ | ||||
| 	0, 0, 0, 0, 0, 0, 0, 0, | ||||
| 	/* 0xD0 - 0xD7 */ | ||||
| @@ -143,7 +146,8 @@ static u8 opcode_table[256] = { | ||||
| 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||||
| 	/* 0xF0 - 0xF7 */ | ||||
| 	0, 0, 0, 0, | ||||
| 	0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | ||||
| 	ImplicitOps, 0, | ||||
| 	ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | ||||
| 	/* 0xF8 - 0xFF */ | ||||
| 	0, 0, 0, 0, | ||||
| 	0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM | ||||
| @@ -152,7 +156,7 @@ static u8 opcode_table[256] = { | ||||
| static u16 twobyte_table[256] = { | ||||
| 	/* 0x00 - 0x0F */ | ||||
| 	0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0, | ||||
| 	0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, | ||||
| 	0, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, | ||||
| 	/* 0x10 - 0x1F */ | ||||
| 	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, | ||||
| 	/* 0x20 - 0x2F */ | ||||
| @@ -481,6 +485,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | ||||
| 	int mode = ctxt->mode; | ||||
| 	unsigned long modrm_ea; | ||||
| 	int use_modrm_ea, index_reg = 0, base_reg = 0, scale, rip_relative = 0; | ||||
| 	int no_wb = 0; | ||||
|  | ||||
| 	/* Shadow copy of register state. Committed on successful emulation. */ | ||||
| 	unsigned long _regs[NR_VCPU_REGS]; | ||||
| @@ -1047,7 +1052,7 @@ done_prefixes: | ||||
| 						      _regs[VCPU_REGS_RSP]), | ||||
| 				     &dst.val, dst.bytes, ctxt)) != 0) | ||||
| 				goto done; | ||||
| 			dst.val = dst.orig_val;	/* skanky: disable writeback */ | ||||
| 			no_wb = 1; | ||||
| 			break; | ||||
| 		default: | ||||
| 			goto cannot_emulate; | ||||
| @@ -1056,7 +1061,7 @@ done_prefixes: | ||||
| 	} | ||||
|  | ||||
| writeback: | ||||
| 	if ((d & Mov) || (dst.orig_val != dst.val)) { | ||||
| 	if (!no_wb) { | ||||
| 		switch (dst.type) { | ||||
| 		case OP_REG: | ||||
| 			/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ | ||||
| @@ -1149,6 +1154,23 @@ special_insn: | ||||
| 	case 0xae ... 0xaf:	/* scas */ | ||||
| 		DPRINTF("Urk! I don't handle SCAS.\n"); | ||||
| 		goto cannot_emulate; | ||||
| 	case 0xf4:              /* hlt */ | ||||
| 		ctxt->vcpu->halt_request = 1; | ||||
| 		goto done; | ||||
| 	case 0xc3: /* ret */ | ||||
| 		dst.ptr = &_eip; | ||||
| 		goto pop_instruction; | ||||
| 	case 0x58 ... 0x5f: /* pop reg */ | ||||
| 		dst.ptr = (unsigned long *)&_regs[b & 0x7]; | ||||
|  | ||||
| pop_instruction: | ||||
| 		if ((rc = ops->read_std(register_address(ctxt->ss_base, | ||||
| 			_regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt)) != 0) | ||||
| 			goto done; | ||||
|  | ||||
| 		register_address_increment(_regs[VCPU_REGS_RSP], op_bytes); | ||||
| 		no_wb = 1; /* Disable writeback. */ | ||||
| 		break; | ||||
| 	} | ||||
| 	goto writeback; | ||||
|  | ||||
| @@ -1302,8 +1324,10 @@ twobyte_insn: | ||||
|  | ||||
| twobyte_special_insn: | ||||
| 	/* Disable writeback. */ | ||||
| 	dst.orig_val = dst.val; | ||||
| 	no_wb = 1; | ||||
| 	switch (b) { | ||||
| 	case 0x09:		/* wbinvd */ | ||||
| 		break; | ||||
| 	case 0x0d:		/* GrpP (prefetch) */ | ||||
| 	case 0x18:		/* Grp16 (prefetch/nop) */ | ||||
| 		break; | ||||
|   | ||||
| @@ -139,6 +139,7 @@ err_put_filp: | ||||
| 	put_filp(file); | ||||
| 	return error; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(anon_inode_getfd); | ||||
|  | ||||
| /* | ||||
|  * A single inode exists for all anon_inode files. Contrary to pipes, | ||||
|   | ||||
| @@ -13,7 +13,6 @@ | ||||
| #define HPFS_SUPER_MAGIC	0xf995e849 | ||||
| #define ISOFS_SUPER_MAGIC	0x9660 | ||||
| #define JFFS2_SUPER_MAGIC	0x72b6 | ||||
| #define KVMFS_SUPER_MAGIC	0x19700426 | ||||
| #define ANON_INODE_FS_MAGIC	0x09041934 | ||||
|  | ||||
| #define MINIX_SUPER_MAGIC	0x137F		/* original minix fs */ | ||||
|   | ||||
| @@ -196,6 +196,8 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, | ||||
| #define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */ | ||||
| #define CPU_LOCK_ACQUIRE	0x0008 /* Acquire all hotcpu locks */ | ||||
| #define CPU_LOCK_RELEASE	0x0009 /* Release all hotcpu locks */ | ||||
| #define CPU_DYING		0x000A /* CPU (unsigned)v not running any task, | ||||
| 				        * not handling interrupts, soon dead */ | ||||
|  | ||||
| /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend | ||||
|  * operation in progress | ||||
| @@ -208,6 +210,7 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, | ||||
| #define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) | ||||
| #define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN) | ||||
| #define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN) | ||||
| #define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN) | ||||
|  | ||||
| #endif /* __KERNEL__ */ | ||||
| #endif /* _LINUX_NOTIFIER_H */ | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
|  */ | ||||
|  | ||||
| #include <linux/errno.h> | ||||
| #include <asm/system.h> | ||||
|  | ||||
| extern void cpu_idle(void); | ||||
|  | ||||
| @@ -102,7 +103,11 @@ static inline void smp_send_reschedule(int cpu) { } | ||||
| static inline int smp_call_function_single(int cpuid, void (*func) (void *info), | ||||
| 					   void *info, int retry, int wait) | ||||
| { | ||||
| 	return -EBUSY; | ||||
| 	WARN_ON(cpuid != 0); | ||||
| 	local_irq_disable(); | ||||
| 	func(info); | ||||
| 	local_irq_enable(); | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| #endif /* !SMP */ | ||||
|   | ||||
							
								
								
									
										16
									
								
								kernel/cpu.c
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								kernel/cpu.c
									
									
									
									
									
								
							| @@ -103,11 +103,19 @@ static inline void check_for_tasks(int cpu) | ||||
| 	write_unlock_irq(&tasklist_lock); | ||||
| } | ||||
|  | ||||
| struct take_cpu_down_param { | ||||
| 	unsigned long mod; | ||||
| 	void *hcpu; | ||||
| }; | ||||
|  | ||||
| /* Take this CPU down. */ | ||||
| static int take_cpu_down(void *unused) | ||||
| static int take_cpu_down(void *_param) | ||||
| { | ||||
| 	struct take_cpu_down_param *param = _param; | ||||
| 	int err; | ||||
|  | ||||
| 	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, | ||||
| 				param->hcpu); | ||||
| 	/* Ensure this CPU doesn't handle any more interrupts. */ | ||||
| 	err = __cpu_disable(); | ||||
| 	if (err < 0) | ||||
| @@ -127,6 +135,10 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | ||||
| 	cpumask_t old_allowed, tmp; | ||||
| 	void *hcpu = (void *)(long)cpu; | ||||
| 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | ||||
| 	struct take_cpu_down_param tcd_param = { | ||||
| 		.mod = mod, | ||||
| 		.hcpu = hcpu, | ||||
| 	}; | ||||
|  | ||||
| 	if (num_online_cpus() == 1) | ||||
| 		return -EBUSY; | ||||
| @@ -153,7 +165,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | ||||
| 	set_cpus_allowed(current, tmp); | ||||
|  | ||||
| 	mutex_lock(&cpu_bitmask_lock); | ||||
| 	p = __stop_machine_run(take_cpu_down, NULL, cpu); | ||||
| 	p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); | ||||
| 	mutex_unlock(&cpu_bitmask_lock); | ||||
|  | ||||
| 	if (IS_ERR(p) || cpu_online(cpu)) { | ||||
|   | ||||
| @@ -2138,6 +2138,9 @@ static void common_cpu_mem_hotplug_unplug(void) | ||||
| static int cpuset_handle_cpuhp(struct notifier_block *nb, | ||||
| 				unsigned long phase, void *cpu) | ||||
| { | ||||
| 	if (phase == CPU_DYING || phase == CPU_DYING_FROZEN) | ||||
| 		return NOTIFY_DONE; | ||||
|  | ||||
| 	common_cpu_mem_hotplug_unplug(); | ||||
| 	return 0; | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user