From 2c59cad6941cb55990fa6e19d84ae027c46991ee Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Mar 2013 21:20:20 +0100 Subject: [PATCH 1/6] x86, Kconfig: Move PARAVIRT_DEBUG into the paravirt menu This should be under the PARAVIRT_GUEST menu. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1362428421-9244-2-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a4f24f5b1218..f922a9b9a319 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -659,8 +659,6 @@ config PARAVIRT_SPINLOCKS config PARAVIRT_CLOCK bool -endif - config PARAVIRT_DEBUG bool "paravirt-ops debugging" depends on PARAVIRT && DEBUG_KERNEL @@ -668,6 +666,8 @@ config PARAVIRT_DEBUG Enable to debug paravirt_ops internals. Specifically, BUG if a paravirt_op is missing when it is called. +endif #PARAVIRT_GUEST + config NO_BOOTMEM def_bool y From 6276a074c6519946c527f03e2ab69770a62652d9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Mar 2013 21:20:21 +0100 Subject: [PATCH 2/6] x86: Make Linux guest support optional Put all config options needed to run Linux as a guest behind a CONFIG_HYPERVISOR_GUEST menu so that they don't get built-in by default but be selectable by the user. Also, make all units which depend on x86_hyper, depend on this new symbol so that compilation doesn't fail when CONFIG_HYPERVISOR_GUEST is disabled but those units assume its presence. Sort options in the new HYPERVISOR_GUEST menu, adapt config text and drop redundant select. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1362428421-9244-3-git-send-email-bp@alien8.de Cc: Dmitry Torokhov Cc: K. Y. Srinivasan Cc: Haiyang Zhang Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 89 ++++++++++++++++--------------- arch/x86/include/asm/hypervisor.h | 16 ++++-- arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/lguest/Kconfig | 3 +- arch/x86/xen/Kconfig | 2 +- drivers/hv/Kconfig | 2 +- drivers/misc/Kconfig | 2 +- 7 files changed, 62 insertions(+), 55 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f922a9b9a319..e9e4623bd4da 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -389,7 +389,7 @@ config X86_NUMACHIP config X86_VSMP bool "ScaleMP vSMP" - select PARAVIRT_GUEST + select HYPERVISOR_GUEST select PARAVIRT depends on X86_64 && PCI depends on X86_EXTENDED_PLATFORM @@ -596,44 +596,17 @@ config SCHED_OMIT_FRAME_POINTER If in doubt, say "Y". -menuconfig PARAVIRT_GUEST - bool "Paravirtualized guest support" +menuconfig HYPERVISOR_GUEST + bool "Linux guest support" ---help--- - Say Y here to get to see options related to running Linux under - various hypervisors. This option alone does not add any kernel code. + Say Y here to enable options for running Linux under various hyper- + visors. This option enables basic hypervisor detection and platform + setup. - If you say N, all options in this submenu will be skipped and disabled. + If you say N, all options in this submenu will be skipped and + disabled, and Linux guest support won't be built in. -if PARAVIRT_GUEST - -config PARAVIRT_TIME_ACCOUNTING - bool "Paravirtual steal time accounting" - select PARAVIRT - default n - ---help--- - Select this option to enable fine granularity task steal time - accounting. Time spent executing other tasks in parallel with - the current vCPU is discounted from the vCPU power. To account for - that, there can be a small performance impact. - - If in doubt, say N here. - -source "arch/x86/xen/Kconfig" - -config KVM_GUEST - bool "KVM Guest support (including kvmclock)" - select PARAVIRT - select PARAVIRT - select PARAVIRT_CLOCK - default y if PARAVIRT_GUEST - ---help--- - This option enables various optimizations for running under the KVM - hypervisor. It includes a paravirtualized clock, so that instead - of relying on a PIT (or probably other) emulation by the - underlying device model, the host provides the guest with - timing infrastructure such as time of day, and system time - -source "arch/x86/lguest/Kconfig" +if HYPERVISOR_GUEST config PARAVIRT bool "Enable paravirtualization code" @@ -643,6 +616,13 @@ config PARAVIRT over full virtualization. However, when run without a hypervisor the kernel is theoretically slower and slightly larger. +config PARAVIRT_DEBUG + bool "paravirt-ops debugging" + depends on PARAVIRT && DEBUG_KERNEL + ---help--- + Enable to debug paravirt_ops internals. Specifically, BUG if + a paravirt_op is missing when it is called. + config PARAVIRT_SPINLOCKS bool "Paravirtualization layer for spinlocks" depends on PARAVIRT && SMP @@ -656,17 +636,38 @@ config PARAVIRT_SPINLOCKS If you are unsure how to answer this question, answer N. +source "arch/x86/xen/Kconfig" + +config KVM_GUEST + bool "KVM Guest support (including kvmclock)" + depends on PARAVIRT + select PARAVIRT_CLOCK + default y + ---help--- + This option enables various optimizations for running under the KVM + hypervisor. It includes a paravirtualized clock, so that instead + of relying on a PIT (or probably other) emulation by the + underlying device model, the host provides the guest with + timing infrastructure such as time of day, and system time + +source "arch/x86/lguest/Kconfig" + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + depends on PARAVIRT + default n + ---help--- + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + config PARAVIRT_CLOCK bool -config PARAVIRT_DEBUG - bool "paravirt-ops debugging" - depends on PARAVIRT && DEBUG_KERNEL - ---help--- - Enable to debug paravirt_ops internals. Specifically, BUG if - a paravirt_op is missing when it is called. - -endif #PARAVIRT_GUEST +endif #HYPERVISOR_GUEST config NO_BOOTMEM def_bool y diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 86095ed14135..2d4b5e6107cd 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -20,13 +20,11 @@ #ifndef _ASM_X86_HYPERVISOR_H #define _ASM_X86_HYPERVISOR_H +#ifdef CONFIG_HYPERVISOR_GUEST + #include #include -extern void init_hypervisor(struct cpuinfo_x86 *c); -extern void init_hypervisor_platform(void); -extern bool hypervisor_x2apic_available(void); - /* * x86 hypervisor information */ @@ -55,4 +53,12 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv; extern const struct hypervisor_x86 x86_hyper_xen_hvm; extern const struct hypervisor_x86 x86_hyper_kvm; -#endif +extern void init_hypervisor(struct cpuinfo_x86 *c); +extern void init_hypervisor_platform(void); +extern bool hypervisor_x2apic_available(void); +#else +static inline void init_hypervisor(struct cpuinfo_x86 *c) { } +static inline void init_hypervisor_platform(void) { } +static inline bool hypervisor_x2apic_available(void) { return false; } +#endif /* CONFIG_HYPERVISOR_GUEST */ +#endif /* _ASM_X86_HYPERVISOR_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a0e067d3d96c..5f81bcefbe14 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -14,7 +14,6 @@ CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += proc.o capflags.o powerflags.o common.o -obj-y += vmware.o hypervisor.o mshyperv.o obj-y += rdrand.o obj-y += match.o @@ -42,6 +41,8 @@ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o +obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o + quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 29043d2048a0..4a0890f815c4 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig @@ -1,7 +1,6 @@ config LGUEST_GUEST bool "Lguest guest support" - select PARAVIRT - depends on X86_32 + depends on X86_32 && PARAVIRT select TTY select VIRTUALIZATION select VIRTIO diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 131dacd2748a..1a3c76505649 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -4,7 +4,7 @@ config XEN bool "Xen guest support" - select PARAVIRT + depends on PARAVIRT select PARAVIRT_CLOCK select XEN_HAVE_PVMMU depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 64630f15f181..0403b51d20ba 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -2,7 +2,7 @@ menu "Microsoft Hyper-V guest support" config HYPERV tristate "Microsoft Hyper-V client drivers" - depends on X86 && ACPI && PCI && X86_LOCAL_APIC + depends on X86 && ACPI && PCI && X86_LOCAL_APIC && HYPERVISOR_GUEST help Select this option to run Linux as a Hyper-V client operating system. diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index e83fdfe0c8ca..891123e31932 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -418,7 +418,7 @@ config TI_DAC7512 config VMWARE_BALLOON tristate "VMware Balloon Driver" - depends on X86 + depends on X86 && HYPERVISOR_GUEST help This is VMware physical memory management driver which acts like a "balloon" that can be inflated to reclaim physical pages From e7a5cd063c7b4c58417f674821d63f5eb6747e37 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 5 Apr 2013 16:42:21 -0400 Subject: [PATCH 3/6] x86-64, gdt: Store/load GDT for ACPI S3 or hibernate/resume path is not needed. During the ACPI S3 resume path the trampoline code handles it already. During the ACPI S3 suspend phase (acpi_suspend_lowlevel) we set: early_gdt_descr.address = (..)get_cpu_gdt_table(smp_processor_id()); which is then used during the resume path and has the same exact value as what the store/load_gdt do with the saved_context (which is saved/restored via save/restore_processor_state()). The flow during resume is complex and for 64-bit kernels we use three GDTs - one early bootstrap GDT (wakeup_igdt) that we load to workaround broken BIOSes, an early Protected Mode to Long Mode transition one (tr_gdt), and the final one - early_gdt_descr (which points to the real GDT). The early ('wakeup_gdt') is loaded in 'trampoline_start' for working around broken BIOSes, and then when we end up in Protected Mode in the startup_32 (in trampoline_64.s, not head_32.s) we use the 'tr_gdt' (still in trampoline_64.s). This 'tr_gdt' has a a 32-bit code segment, 64-bit code segment with L=1, and a 32-bit data segment. Once we have transitioned from Protected Mode to Long Mode we then set the GDT to 'early_gdt_desc' and then via an iretq emerge in wakeup_long64 (set via 'initial_code' variable in acpi_suspend_lowlevel). In the wakeup_long64 we end up restoring the %rip (which is set to 'resume_point') and jump there. In 'resume_point' we call 'restore_processor_state' which does the load_gdt on the saved context. This load_gdt is redundant as the GDT loaded via early_gdt_desc is the same. Here is the call-chain: wakeup_start |- lgdtl wakeup_gdt [the work-around broken BIOSes] | \-- trampoline_start (trampoline_64.S) |- lgdtl tr_gdt | \-- startup_32 (trampoline_64.S) | \-- startup_64 (trampoline_64.S) | \-- secondary_startup_64 |- lgdtl early_gdt_desc | ... |- movq initial_code(%rip), %eax |-.. lretq \-- wakeup_64 |-- other registers are reloaded |-- call restore_processor_state The hibernate path is much simpler. During the saving of the hibernation image we call save_processor_state() and save the contents of that along with the rest of the kernel in the hibernation image destination. We save the EIP of 'restore_registers' (restore_jump_address) and cr3 (restore_cr3). During hibernate resume, the 'restore_registers' (via the 'restore_jump_address) in hibernate_asm_64.S is invoked which restores the contents of most registers. Naturally the resume path benefits from already being in 64-bit mode, so it does not have to load the GDT. It only reloads the cr3 (from restore_cr3) and continues on. Note that the restoration of the restore image page-tables is done prior to this. After the 'restore_registers' it returns and we end up called restore_processor_state() - where we reload the GDT. The reload of the GDT is not needed as bootup kernel has already loaded the GDT which is at the same physical location as the the restored kernel. Note that the hibernation path assumes the GDT is correct during its 'restore_registers'. The assumption in the code is that the restored image is the same as saved - meaning we are not trying to restore an different kernel in the virtual address space of a new kernel. Signed-off-by: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1365194544-14648-2-git-send-email-konrad.wilk@oracle.com Cc: Rafael J. Wysocki Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/suspend_64.h | 3 --- arch/x86/power/cpu.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 09b0bf104156..97b84e08a211 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -25,9 +25,6 @@ struct saved_context { u64 misc_enable; bool misc_enable_saved; unsigned long efer; - u16 gdt_pad; - u16 gdt_limit; - unsigned long gdt_base; u16 idt_pad; u16 idt_limit; unsigned long idt_base; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 120cee1c3f8d..6bd94233669c 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -65,7 +65,6 @@ static void __save_processor_state(struct saved_context *ctxt) store_idt(&ctxt->idt); #else /* CONFIG_X86_64 */ - store_gdt((struct desc_ptr *)&ctxt->gdt_limit); store_idt((struct desc_ptr *)&ctxt->idt_limit); #endif store_tr(ctxt->tr); @@ -186,7 +185,6 @@ static void __restore_processor_state(struct saved_context *ctxt) load_idt(&ctxt->idt); #else /* CONFIG_X86_64 */ - load_gdt((const struct desc_ptr *)&ctxt->gdt_limit); load_idt((const struct desc_ptr *)&ctxt->idt_limit); #endif From 84e70971e67d97bc2db18a4e76d42846272a54bd Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 5 Apr 2013 16:42:22 -0400 Subject: [PATCH 4/6] x86-32, gdt: Store/load GDT for ACPI S3 or hibernation/resume path is not needed During the ACPI S3 suspend, we store the GDT in the wakup_header (see wakeup_asm.s) field called 'pmode_gdt'. Which is then used during the resume path and has the same exact value as what the store/load_gdt do with the saved_context (which is saved/restored via save/restore_processor_state()). The flow during resume from ACPI S3 is simpler than the 64-bit counterpart. We only use the early bootstrap once (wakeup_gdt) and do various checks in real mode. After the checks are completed, we load the saved GDT ('pmode_gdt') and continue on with the resume (by heading to startup_32 in trampoline_32.S) - which quickly jumps to what was saved in 'pmode_entry' aka 'wakeup_pmode_return'. The 'wakeup_pmode_return' restores the GDT (saved_gdt) again (which was saved in do_suspend_lowlevel initially). After that it ends up calling the 'ret_point' which calls 'restore_processor_state()'. We have two opportunities to remove code where we restore the same GDT twice. Here is the call chain: wakeup_start |- lgdtl wakeup_gdt [the work-around broken BIOSes] | | - lgdtl pmode_gdt [the real one] | \-- startup_32 (in trampoline_32.S) \-- wakeup_pmode_return (in wakeup_32.S) |- lgdtl saved_gdt [the real one] \-- ret_point |.. |- call restore_processor_state The hibernate path is much simpler. During the saving of the hibernation image we call save_processor_state() and save the contents of that along with the rest of the kernel in the hibernation image destination. We save the EIP of 'restore_registers' (restore_jump_address) and cr3 (restore_cr3). During hibernate resume, the 'restore_registers' (via the 'restore_jump_address) in hibernate_asm_32.S is invoked which restores the contents of most registers. Naturally the resume path benefits from already being in 32-bit mode, so it does not have to reload the GDT. It only reloads the cr3 (from restore_cr3) and continues on. Note that the restoration of the restore image page-tables is done prior to this. After the 'restore_registers' it returns and we end up called restore_processor_state() - where we reload the GDT. The reload of the GDT is not needed as bootup kernel has already loaded the GDT which is at the same physical location as the the restored kernel. Note that the hibernation path assumes the GDT is correct during its 'restore_registers'. The assumption in the code is that the restored image is the same as saved - meaning we are not trying to restore an different kernel in the virtual address space of a new kernel. Signed-off-by: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1365194544-14648-3-git-send-email-konrad.wilk@oracle.com Cc: Rafael J. Wysocki Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/suspend_32.h | 1 - arch/x86/kernel/acpi/wakeup_32.S | 3 --- arch/x86/power/cpu.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 487055c8c1aa..f6064b7385b0 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -15,7 +15,6 @@ struct saved_context { unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; bool misc_enable_saved; - struct desc_ptr gdt; struct desc_ptr idt; u16 ldt; u16 tss; diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 13ab720573e3..91adb1be24bc 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -18,7 +18,6 @@ wakeup_pmode_return: movw %ax, %gs # reload the gdt, as we need the full 32 bit address - lgdt saved_gdt lidt saved_idt lldt saved_ldt ljmp $(__KERNEL_CS), $1f @@ -44,7 +43,6 @@ bogus_magic: save_registers: - sgdt saved_gdt sidt saved_idt sldt saved_ldt str saved_tss @@ -93,7 +91,6 @@ ENTRY(saved_magic) .long 0 ENTRY(saved_eip) .long 0 # saved registers -saved_gdt: .long 0,0 saved_idt: .long 0,0 saved_ldt: .long 0 saved_tss: .long 0 diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 6bd94233669c..82c39c532349 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -61,7 +61,6 @@ static void __save_processor_state(struct saved_context *ctxt) * descriptor tables */ #ifdef CONFIG_X86_32 - store_gdt(&ctxt->gdt); store_idt(&ctxt->idt); #else /* CONFIG_X86_64 */ @@ -181,7 +180,6 @@ static void __restore_processor_state(struct saved_context *ctxt) * ltr is done i fix_processor_context(). */ #ifdef CONFIG_X86_32 - load_gdt(&ctxt->gdt); load_idt(&ctxt->idt); #else /* CONFIG_X86_64 */ From 357d122670937c35b33d99c46356ef2b63182a1f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 5 Apr 2013 16:42:23 -0400 Subject: [PATCH 5/6] x86, xen, gdt: Remove the pvops variant of store_gdt. The two use-cases where we needed to store the GDT were during ACPI S3 suspend and resume. As the patches: x86/gdt/i386: store/load GDT for ACPI S3 or hibernation/resume path is not needed x86/gdt/64-bit: store/load GDT for ACPI S3 or hibernate/resume path is not needed. have demonstrated - there are other mechanism by which the GDT is saved and reloaded during early resume path. Hence we do not need to worry about the pvops call-chain for saving the GDT and can and can eliminate it. The other areas where the store_gdt is used are never going to be hit when running under the pvops platforms. Signed-off-by: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1365194544-14648-4-git-send-email-konrad.wilk@oracle.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 4 ---- arch/x86/include/asm/paravirt_types.h | 2 +- arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/doublefault_32.c | 2 +- arch/x86/kernel/paravirt.c | 1 - arch/x86/kvm/vmx.c | 2 +- arch/x86/xen/enlighten.c | 1 - 7 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5edd1742cfd0..736512869950 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -262,10 +262,6 @@ static inline void set_ldt(const void *addr, unsigned entries) { PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); } -static inline void store_gdt(struct desc_ptr *dtr) -{ - PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); -} static inline void store_idt(struct desc_ptr *dtr) { PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 142236ed83af..b6c69a6917c8 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -122,7 +122,7 @@ struct pv_cpu_ops { void (*load_tr_desc)(void); void (*load_gdt)(const struct desc_ptr *); void (*load_idt)(const struct desc_ptr *); - void (*store_gdt)(struct desc_ptr *); + /* store_gdt has been removed. */ void (*store_idt)(struct desc_ptr *); void (*set_ldt)(const void *desc, unsigned entries); unsigned long (*store_tr)(void); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 0532f5d6e4ef..b44577bc9744 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -46,7 +46,7 @@ int acpi_suspend_lowlevel(void) header->pmode_behavior = 0; #ifndef CONFIG_64BIT - store_gdt((struct desc_ptr *)&header->pmode_gdt); + native_store_gdt((struct desc_ptr *)&header->pmode_gdt); if (!rdmsr_safe(MSR_EFER, &header->pmode_efer_low, diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 37250fe490b1..155a13f33ed8 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -20,7 +20,7 @@ static void doublefault_fn(void) struct desc_ptr gdt_desc = {0, 0}; unsigned long gdt, tss; - store_gdt(&gdt_desc); + native_store_gdt(&gdt_desc); gdt = gdt_desc.address; printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 17fff18a1031..4ae3d2305db1 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -360,7 +360,6 @@ struct pv_cpu_ops pv_cpu_ops = { .set_ldt = native_set_ldt, .load_gdt = native_load_gdt, .load_idt = native_load_idt, - .store_gdt = native_store_gdt, .store_idt = native_store_idt, .store_tr = native_store_tr, .load_tls = native_load_tls, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6667042714cc..867b81037f96 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2459,7 +2459,7 @@ static int hardware_enable(void *garbage) ept_sync_global(); } - store_gdt(&__get_cpu_var(host_gdt)); + native_store_gdt(&__get_cpu_var(host_gdt)); return 0; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8e1c7b95c3b..ddab5390d548 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1220,7 +1220,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .alloc_ldt = xen_alloc_ldt, .free_ldt = xen_free_ldt, - .store_gdt = native_store_gdt, .store_idt = native_store_idt, .store_tr = xen_store_tr, From 4d681be3c33dd74efffbe2a8f70634f7128602ec Mon Sep 17 00:00:00 2001 From: "konrad@kernel.org" Date: Fri, 5 Apr 2013 16:42:24 -0400 Subject: [PATCH 6/6] x86, wakeup, sleep: Use pvops functions for changing GDT entries We check the TSS descriptor before we try to dereference it. Also we document what the value '9' actually means using the AMD64 Architecture Programmer's Manual Volume 2, pg 90: "Hex value 9: Available 64-bit TSS" and pg 91: "The available 32-bit TSS (09h), which is redefined as the available 64-bit TSS." Without this, on Xen, where the GDT is available as R/O (to protect the hypervisor from the guest modifying it), we end up with a pagetable fault. Signed-off-by: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1365194544-14648-5-git-send-email-konrad.wilk@oracle.com Cc: Rafael J. Wysocki Signed-off-by: H. Peter Anvin --- arch/x86/power/cpu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 82c39c532349..168da8429032 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -132,7 +132,10 @@ static void fix_processor_context(void) { int cpu = smp_processor_id(); struct tss_struct *t = &per_cpu(init_tss, cpu); - +#ifdef CONFIG_X86_64 + struct desc_struct *desc = get_cpu_gdt_table(cpu); + tss_desc tss; +#endif set_tss_desc(cpu, t); /* * This just modifies memory; should not be * necessary. But... This is necessary, because @@ -141,7 +144,9 @@ static void fix_processor_context(void) */ #ifdef CONFIG_X86_64 - get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; + memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); + tss.type = 0x9; /* The available 64-bit TSS (see AMD vol 2, pg 91 */ + write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS); syscall_init(); /* This sets MSR_*STAR and related */ #endif