x86: move stack_canary into irq_stack
Impact: x86_64 percpu area layout change, irq_stack now at the beginning Now that the PDA is empty except for the stack canary, it can be removed. The irqstack is moved to the start of the per-cpu section. If the stack protector is enabled, the canary overlaps the bottom 48 bytes of the irqstack. tj: * updated subject * dropped asm relocation of irq_stack_ptr * updated comments a bit * rebased on top of stack canary changes Signed-off-by: Brian Gerst <brgerst@gmail.com> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
8c7e58e690
commit
947e76cdc3
@ -17,9 +17,6 @@ struct x8664_pda {
|
|||||||
unsigned long unused4;
|
unsigned long unused4;
|
||||||
int unused5;
|
int unused5;
|
||||||
unsigned int unused6; /* 36 was cpunumber */
|
unsigned int unused6; /* 36 was cpunumber */
|
||||||
unsigned long stack_canary; /* 40 stack canary value */
|
|
||||||
/* gcc-ABI: this canary MUST be at
|
|
||||||
offset 40!!! */
|
|
||||||
short in_bootmem; /* pda lives in bootmem */
|
short in_bootmem; /* pda lives in bootmem */
|
||||||
} ____cacheline_aligned_in_smp;
|
} ____cacheline_aligned_in_smp;
|
||||||
|
|
||||||
|
@ -133,12 +133,6 @@ do { \
|
|||||||
/* We can use this directly for local CPU (faster). */
|
/* We can use this directly for local CPU (faster). */
|
||||||
DECLARE_PER_CPU(unsigned long, this_cpu_off);
|
DECLARE_PER_CPU(unsigned long, this_cpu_off);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
extern void load_pda_offset(int cpu);
|
|
||||||
#else
|
|
||||||
static inline void load_pda_offset(int cpu) { }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
|
@ -379,8 +379,29 @@ union thread_xstate {
|
|||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
DECLARE_PER_CPU(struct orig_ist, orig_ist);
|
DECLARE_PER_CPU(struct orig_ist, orig_ist);
|
||||||
|
|
||||||
DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
|
union irq_stack_union {
|
||||||
|
char irq_stack[IRQ_STACK_SIZE];
|
||||||
|
/*
|
||||||
|
* GCC hardcodes the stack canary as %gs:40. Since the
|
||||||
|
* irq_stack is the object at %gs:0, we reserve the bottom
|
||||||
|
* 48 bytes of the irq stack for the canary.
|
||||||
|
*/
|
||||||
|
struct {
|
||||||
|
char gs_base[40];
|
||||||
|
unsigned long stack_canary;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
|
||||||
DECLARE_PER_CPU(char *, irq_stack_ptr);
|
DECLARE_PER_CPU(char *, irq_stack_ptr);
|
||||||
|
|
||||||
|
static inline void load_gs_base(int cpu)
|
||||||
|
{
|
||||||
|
/* Memory clobbers used to order pda/percpu accesses */
|
||||||
|
mb();
|
||||||
|
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
|
||||||
|
mb();
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern void print_cpu_info(struct cpuinfo_x86 *);
|
extern void print_cpu_info(struct cpuinfo_x86 *);
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
#define _ASM_STACKPROTECTOR_H 1
|
#define _ASM_STACKPROTECTOR_H 1
|
||||||
|
|
||||||
#include <asm/tsc.h>
|
#include <asm/tsc.h>
|
||||||
#include <asm/pda.h>
|
#include <asm/processor.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize the stackprotector canary value.
|
* Initialize the stackprotector canary value.
|
||||||
@ -19,7 +19,7 @@ static __always_inline void boot_init_stack_canary(void)
|
|||||||
* Build time only check to make sure the stack_canary is at
|
* Build time only check to make sure the stack_canary is at
|
||||||
* offset 40 in the pda; this is a gcc ABI requirement
|
* offset 40 in the pda; this is a gcc ABI requirement
|
||||||
*/
|
*/
|
||||||
BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
|
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We both use the random pool and the current TSC as a source
|
* We both use the random pool and the current TSC as a source
|
||||||
@ -32,7 +32,7 @@ static __always_inline void boot_init_stack_canary(void)
|
|||||||
canary += tsc + (tsc << 32UL);
|
canary += tsc + (tsc << 32UL);
|
||||||
|
|
||||||
current->stack_canary = canary;
|
current->stack_canary = canary;
|
||||||
write_pda(stack_canary, canary);
|
percpu_write(irq_stack_union.stack_canary, canary);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -89,10 +89,10 @@ do { \
|
|||||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||||
#define __switch_canary \
|
#define __switch_canary \
|
||||||
"movq %P[task_canary](%%rsi),%%r8\n\t" \
|
"movq %P[task_canary](%%rsi),%%r8\n\t" \
|
||||||
"movq %%r8,%%gs:%P[pda_canary]\n\t"
|
"movq %%r8,%%gs:%P[gs_canary]\n\t"
|
||||||
#define __switch_canary_param \
|
#define __switch_canary_param \
|
||||||
, [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \
|
, [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \
|
||||||
, [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))
|
, [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary))
|
||||||
#else /* CC_STACKPROTECTOR */
|
#else /* CC_STACKPROTECTOR */
|
||||||
#define __switch_canary
|
#define __switch_canary
|
||||||
#define __switch_canary_param
|
#define __switch_canary_param
|
||||||
|
@ -48,10 +48,6 @@ int main(void)
|
|||||||
#endif
|
#endif
|
||||||
BLANK();
|
BLANK();
|
||||||
#undef ENTRY
|
#undef ENTRY
|
||||||
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
|
|
||||||
DEFINE(pda_size, sizeof(struct x8664_pda));
|
|
||||||
BLANK();
|
|
||||||
#undef ENTRY
|
|
||||||
#ifdef CONFIG_PARAVIRT
|
#ifdef CONFIG_PARAVIRT
|
||||||
BLANK();
|
BLANK();
|
||||||
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
|
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
|
||||||
|
@ -881,12 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid);
|
|||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
|
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
|
||||||
|
|
||||||
DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
|
DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
||||||
|
irq_stack_union) __aligned(PAGE_SIZE);
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
|
DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
|
||||||
#else
|
#else
|
||||||
DEFINE_PER_CPU(char *, irq_stack_ptr) =
|
DEFINE_PER_CPU(char *, irq_stack_ptr) =
|
||||||
per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
|
per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
DEFINE_PER_CPU(unsigned long, kernel_stack) =
|
DEFINE_PER_CPU(unsigned long, kernel_stack) =
|
||||||
@ -960,7 +961,7 @@ void __cpuinit cpu_init(void)
|
|||||||
|
|
||||||
loadsegment(fs, 0);
|
loadsegment(fs, 0);
|
||||||
loadsegment(gs, 0);
|
loadsegment(gs, 0);
|
||||||
load_pda_offset(cpu);
|
load_gs_base(cpu);
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
if (cpu != 0 && percpu_read(node_number) == 0 &&
|
if (cpu != 0 && percpu_read(node_number) == 0 &&
|
||||||
|
@ -242,13 +242,10 @@ ENTRY(secondary_startup_64)
|
|||||||
|
|
||||||
/* Set up %gs.
|
/* Set up %gs.
|
||||||
*
|
*
|
||||||
* On SMP, %gs should point to the per-cpu area. For initial
|
* The base of %gs always points to the bottom of the irqstack
|
||||||
* boot, make %gs point to the init data section. For a
|
* union. If the stack protector canary is enabled, it is
|
||||||
* secondary CPU,initial_gs should be set to its pda address
|
* located at %gs:40. Note that, on SMP, the boot cpu uses
|
||||||
* before the CPU runs this code.
|
* init data section till per cpu areas are set up.
|
||||||
*
|
|
||||||
* On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
|
|
||||||
* change.
|
|
||||||
*/
|
*/
|
||||||
movl $MSR_GS_BASE,%ecx
|
movl $MSR_GS_BASE,%ecx
|
||||||
movq initial_gs(%rip),%rax
|
movq initial_gs(%rip),%rax
|
||||||
@ -281,7 +278,7 @@ ENTRY(secondary_startup_64)
|
|||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
.quad __per_cpu_load
|
.quad __per_cpu_load
|
||||||
#else
|
#else
|
||||||
.quad PER_CPU_VAR(__pda)
|
.quad PER_CPU_VAR(irq_stack_union)
|
||||||
#endif
|
#endif
|
||||||
__FINITDATA
|
__FINITDATA
|
||||||
|
|
||||||
|
@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void);
|
|||||||
static inline void setup_node_to_cpumask_map(void) { }
|
static inline void setup_node_to_cpumask_map(void) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* Define load_pda_offset() and per-cpu __pda for x86_64.
|
|
||||||
* load_pda_offset() is responsible for loading the offset of pda into
|
|
||||||
* %gs.
|
|
||||||
*
|
|
||||||
* On SMP, pda offset also duals as percpu base address and thus it
|
|
||||||
* should be at the start of per-cpu area. To achieve this, it's
|
|
||||||
* preallocated in vmlinux_64.lds.S directly instead of using
|
|
||||||
* DEFINE_PER_CPU().
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
void __cpuinit load_pda_offset(int cpu)
|
|
||||||
{
|
|
||||||
/* Memory clobbers used to order pda/percpu accesses */
|
|
||||||
mb();
|
|
||||||
wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
|
|
||||||
mb();
|
|
||||||
}
|
|
||||||
#ifndef CONFIG_SMP
|
|
||||||
DEFINE_PER_CPU(struct x8664_pda, __pda);
|
|
||||||
#endif
|
|
||||||
EXPORT_PER_CPU_SYMBOL(__pda);
|
|
||||||
#endif /* CONFIG_SMP && CONFIG_X86_64 */
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
|
||||||
/* correctly size the local cpu masks */
|
/* correctly size the local cpu masks */
|
||||||
@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void)
|
|||||||
per_cpu(cpu_number, cpu) = cpu;
|
per_cpu(cpu_number, cpu) = cpu;
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
per_cpu(irq_stack_ptr, cpu) =
|
per_cpu(irq_stack_ptr, cpu) =
|
||||||
(char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
|
per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
|
||||||
/*
|
/*
|
||||||
* CPU0 modified pda in the init data area, reload pda
|
* Up to this point, CPU0 has been using .data.init
|
||||||
* offset for CPU0 and clear the area for others.
|
* area. Reload %gs offset for CPU0.
|
||||||
*/
|
*/
|
||||||
if (cpu == 0)
|
if (cpu == 0)
|
||||||
load_pda_offset(0);
|
load_gs_base(cpu);
|
||||||
else
|
|
||||||
memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
|
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
|
||||||
|
@ -220,8 +220,7 @@ SECTIONS
|
|||||||
* so that it can be accessed as a percpu variable.
|
* so that it can be accessed as a percpu variable.
|
||||||
*/
|
*/
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
|
PERCPU_VADDR(0, :percpu)
|
||||||
per_cpu____pda = __per_cpu_start;
|
|
||||||
#else
|
#else
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(PAGE_SIZE)
|
||||||
#endif
|
#endif
|
||||||
@ -262,3 +261,8 @@ SECTIONS
|
|||||||
*/
|
*/
|
||||||
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
|
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
|
||||||
"kernel image bigger than KERNEL_IMAGE_SIZE")
|
"kernel image bigger than KERNEL_IMAGE_SIZE")
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
ASSERT((per_cpu__irq_stack_union == 0),
|
||||||
|
"irq_stack_union is not at start of per-cpu area");
|
||||||
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user