Merge branch 'core/percpu' into perfcounters/core
Conflicts: arch/x86/include/asm/hardirq_32.h arch/x86/include/asm/hardirq_64.h Semantic merge: arch/x86/include/asm/hardirq.h [ added apic_perf_irqs field. ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
commit
bfe2a3c3b5
@ -84,7 +84,7 @@ void build_cpu_to_node_map(void);
|
||||
.child = NULL, \
|
||||
.groups = NULL, \
|
||||
.min_interval = 8, \
|
||||
.max_interval = 8*(min(num_online_cpus(), 32)), \
|
||||
.max_interval = 8*(min(num_online_cpus(), 32U)), \
|
||||
.busy_factor = 64, \
|
||||
.imbalance_pct = 125, \
|
||||
.cache_nice_tries = 2, \
|
||||
|
@ -391,6 +391,13 @@ config X86_RDC321X
|
||||
as R-8610-(G).
|
||||
If you don't have one of these chips, you should say N here.
|
||||
|
||||
config X86_UV
|
||||
bool "SGI Ultraviolet"
|
||||
depends on X86_64
|
||||
help
|
||||
This option is needed in order to support SGI Ultraviolet systems.
|
||||
If you don't have one of these, you should say N here.
|
||||
|
||||
config SCHED_OMIT_FRAME_POINTER
|
||||
def_bool y
|
||||
prompt "Single-depth WCHAN output"
|
||||
@ -1341,13 +1348,17 @@ config SECCOMP
|
||||
|
||||
If unsure, say Y. Only embedded should say N here.
|
||||
|
||||
config CC_STACKPROTECTOR_ALL
|
||||
bool
|
||||
|
||||
config CC_STACKPROTECTOR
|
||||
bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
|
||||
depends on X86_64 && EXPERIMENTAL && BROKEN
|
||||
depends on X86_64
|
||||
select CC_STACKPROTECTOR_ALL
|
||||
help
|
||||
This option turns on the -fstack-protector GCC feature. This
|
||||
feature puts, at the beginning of critical functions, a canary
|
||||
value on the stack just before the return address, and validates
|
||||
This option turns on the -fstack-protector GCC feature. This
|
||||
feature puts, at the beginning of functions, a canary value on
|
||||
the stack just before the return address, and validates
|
||||
the value just before actually returning. Stack based buffer
|
||||
overflows (that need to overwrite this return address) now also
|
||||
overwrite the canary, which gets detected and the attack is then
|
||||
@ -1355,15 +1366,8 @@ config CC_STACKPROTECTOR
|
||||
|
||||
This feature requires gcc version 4.2 or above, or a distribution
|
||||
gcc with the feature backported. Older versions are automatically
|
||||
detected and for those versions, this configuration option is ignored.
|
||||
|
||||
config CC_STACKPROTECTOR_ALL
|
||||
bool "Use stack-protector for all functions"
|
||||
depends on CC_STACKPROTECTOR
|
||||
help
|
||||
Normally, GCC only inserts the canary value protection for
|
||||
functions that use large-ish on-stack buffers. By enabling
|
||||
this option, GCC will be asked to do this for ALL functions.
|
||||
detected and for those versions, this configuration option is
|
||||
ignored. (and a warning is printed during bootup)
|
||||
|
||||
source kernel/Kconfig.hz
|
||||
|
||||
|
@ -292,25 +292,23 @@ config X86_CPU
|
||||
# Define implied options from the CPU selection here
|
||||
config X86_L1_CACHE_BYTES
|
||||
int
|
||||
default "128" if GENERIC_CPU || MPSC
|
||||
default "64" if MK8 || MCORE2
|
||||
depends on X86_64
|
||||
default "128" if MPSC
|
||||
default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32
|
||||
|
||||
config X86_INTERNODE_CACHE_BYTES
|
||||
int
|
||||
default "4096" if X86_VSMP
|
||||
default X86_L1_CACHE_BYTES if !X86_VSMP
|
||||
depends on X86_64
|
||||
|
||||
config X86_CMPXCHG
|
||||
def_bool X86_64 || (X86_32 && !M386)
|
||||
|
||||
config X86_L1_CACHE_SHIFT
|
||||
int
|
||||
default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC
|
||||
default "7" if MPENTIUM4 || MPSC
|
||||
default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
|
||||
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
|
||||
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7
|
||||
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
|
||||
|
||||
config X86_XADD
|
||||
def_bool y
|
||||
|
@ -117,6 +117,7 @@ config DEBUG_RODATA
|
||||
config DEBUG_RODATA_TEST
|
||||
bool "Testcase for the DEBUG_RODATA feature"
|
||||
depends on DEBUG_RODATA
|
||||
default y
|
||||
help
|
||||
This option enables a testcase for the DEBUG_RODATA
|
||||
feature as well as for the change_page_attr() infrastructure.
|
||||
|
@ -73,7 +73,7 @@ else
|
||||
|
||||
stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
|
||||
stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
|
||||
"$(CC)" -fstack-protector )
|
||||
"$(CC)" "-fstack-protector -DGCC_HAS_SP" )
|
||||
stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
|
||||
"$(CC)" -fstack-protector-all )
|
||||
|
||||
|
@ -138,11 +138,4 @@ struct genapic {
|
||||
extern struct genapic *genapic;
|
||||
extern void es7000_update_genapic_to_cluster(void);
|
||||
|
||||
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
|
||||
#define get_uv_system_type() UV_NONE
|
||||
#define is_uv_system() 0
|
||||
#define uv_wakeup_secondary(a, b) 1
|
||||
#define uv_system_init() do {} while (0)
|
||||
|
||||
|
||||
#endif /* _ASM_X86_GENAPIC_32_H */
|
||||
|
@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys;
|
||||
extern int acpi_madt_oem_check(char *, char *);
|
||||
|
||||
extern void apic_send_IPI_self(int vector);
|
||||
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
|
||||
extern enum uv_system_type get_uv_system_type(void);
|
||||
extern int is_uv_system(void);
|
||||
|
||||
extern struct genapic apic_x2apic_uv_x;
|
||||
DECLARE_PER_CPU(int, x2apic_extra_bits);
|
||||
extern void uv_cpu_init(void);
|
||||
extern void uv_system_init(void);
|
||||
extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
|
||||
|
||||
extern void setup_apic_routing(void);
|
||||
|
||||
|
@ -1,11 +1,53 @@
|
||||
#ifdef CONFIG_X86_32
|
||||
# include "hardirq_32.h"
|
||||
#else
|
||||
# include "hardirq_64.h"
|
||||
#ifndef _ASM_X86_HARDIRQ_H
|
||||
#define _ASM_X86_HARDIRQ_H
|
||||
|
||||
#include <linux/threads.h>
|
||||
#include <linux/irq.h>
|
||||
|
||||
typedef struct {
|
||||
unsigned int __softirq_pending;
|
||||
unsigned int __nmi_count; /* arch dependent */
|
||||
unsigned int irq0_irqs;
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
unsigned int apic_timer_irqs; /* arch dependent */
|
||||
unsigned int irq_spurious_count;
|
||||
#endif
|
||||
unsigned int apic_perf_irqs;
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned int irq_resched_count;
|
||||
unsigned int irq_call_count;
|
||||
unsigned int irq_tlb_count;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
unsigned int irq_thermal_count;
|
||||
# ifdef CONFIG_X86_64
|
||||
unsigned int irq_threshold_count;
|
||||
# endif
|
||||
#endif
|
||||
} ____cacheline_aligned irq_cpustat_t;
|
||||
|
||||
DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
|
||||
|
||||
/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
|
||||
#define MAX_HARDIRQS_PER_CPU NR_VECTORS
|
||||
|
||||
#define __ARCH_IRQ_STAT
|
||||
|
||||
#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
|
||||
|
||||
#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
|
||||
|
||||
#define __ARCH_SET_SOFTIRQ_PENDING
|
||||
|
||||
#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
|
||||
#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
|
||||
|
||||
extern void ack_bad_irq(unsigned int irq);
|
||||
|
||||
extern u64 arch_irq_stat_cpu(unsigned int cpu);
|
||||
#define arch_irq_stat_cpu arch_irq_stat_cpu
|
||||
|
||||
extern u64 arch_irq_stat(void);
|
||||
#define arch_irq_stat arch_irq_stat
|
||||
|
||||
#endif /* _ASM_X86_HARDIRQ_H */
|
||||
|
@ -1,34 +0,0 @@
|
||||
#ifndef _ASM_X86_HARDIRQ_32_H
|
||||
#define _ASM_X86_HARDIRQ_32_H
|
||||
|
||||
#include <linux/threads.h>
|
||||
#include <linux/irq.h>
|
||||
|
||||
typedef struct {
|
||||
unsigned int __softirq_pending;
|
||||
unsigned long idle_timestamp;
|
||||
unsigned int __nmi_count; /* arch dependent */
|
||||
unsigned int apic_timer_irqs; /* arch dependent */
|
||||
unsigned int apic_perf_irqs; /* arch dependent */
|
||||
unsigned int irq0_irqs;
|
||||
unsigned int irq_resched_count;
|
||||
unsigned int irq_call_count;
|
||||
unsigned int irq_tlb_count;
|
||||
unsigned int irq_thermal_count;
|
||||
unsigned int irq_spurious_count;
|
||||
} ____cacheline_aligned irq_cpustat_t;
|
||||
|
||||
DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
|
||||
|
||||
/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
|
||||
#define MAX_HARDIRQS_PER_CPU NR_VECTORS
|
||||
|
||||
#define __ARCH_IRQ_STAT
|
||||
#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
|
||||
|
||||
#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++)
|
||||
|
||||
void ack_bad_irq(unsigned int irq);
|
||||
#include <linux/irq_cpustat.h>
|
||||
|
||||
#endif /* _ASM_X86_HARDIRQ_32_H */
|
@ -1,40 +0,0 @@
|
||||
#ifndef _ASM_X86_HARDIRQ_64_H
|
||||
#define _ASM_X86_HARDIRQ_64_H
|
||||
|
||||
#include <linux/threads.h>
|
||||
#include <linux/irq.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
typedef struct {
|
||||
unsigned int __softirq_pending;
|
||||
unsigned int __nmi_count; /* arch dependent */
|
||||
unsigned int apic_timer_irqs; /* arch dependent */
|
||||
unsigned int apic_perf_irqs; /* arch dependent */
|
||||
unsigned int irq0_irqs;
|
||||
unsigned int irq_resched_count;
|
||||
unsigned int irq_call_count;
|
||||
unsigned int irq_tlb_count;
|
||||
unsigned int irq_thermal_count;
|
||||
unsigned int irq_spurious_count;
|
||||
unsigned int irq_threshold_count;
|
||||
} ____cacheline_aligned irq_cpustat_t;
|
||||
|
||||
DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
|
||||
|
||||
/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
|
||||
#define MAX_HARDIRQS_PER_CPU NR_VECTORS
|
||||
|
||||
#define __ARCH_IRQ_STAT 1
|
||||
|
||||
#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
|
||||
|
||||
#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
|
||||
|
||||
#define __ARCH_SET_SOFTIRQ_PENDING 1
|
||||
|
||||
#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
|
||||
#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
|
||||
|
||||
extern void ack_bad_irq(unsigned int irq);
|
||||
|
||||
#endif /* _ASM_X86_HARDIRQ_64_H */
|
@ -1,5 +1,31 @@
|
||||
#ifdef CONFIG_X86_32
|
||||
# include "irq_regs_32.h"
|
||||
#else
|
||||
# include "irq_regs_64.h"
|
||||
#endif
|
||||
/*
|
||||
* Per-cpu current frame pointer - the location of the last exception frame on
|
||||
* the stack, stored in the per-cpu area.
|
||||
*
|
||||
* Jeremy Fitzhardinge <jeremy@goop.org>
|
||||
*/
|
||||
#ifndef _ASM_X86_IRQ_REGS_H
|
||||
#define _ASM_X86_IRQ_REGS_H
|
||||
|
||||
#include <asm/percpu.h>
|
||||
|
||||
#define ARCH_HAS_OWN_IRQ_REGS
|
||||
|
||||
DECLARE_PER_CPU(struct pt_regs *, irq_regs);
|
||||
|
||||
static inline struct pt_regs *get_irq_regs(void)
|
||||
{
|
||||
return percpu_read(irq_regs);
|
||||
}
|
||||
|
||||
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
|
||||
{
|
||||
struct pt_regs *old_regs;
|
||||
|
||||
old_regs = get_irq_regs();
|
||||
percpu_write(irq_regs, new_regs);
|
||||
|
||||
return old_regs;
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_IRQ_REGS_32_H */
|
||||
|
@ -1,31 +0,0 @@
|
||||
/*
|
||||
* Per-cpu current frame pointer - the location of the last exception frame on
|
||||
* the stack, stored in the per-cpu area.
|
||||
*
|
||||
* Jeremy Fitzhardinge <jeremy@goop.org>
|
||||
*/
|
||||
#ifndef _ASM_X86_IRQ_REGS_32_H
|
||||
#define _ASM_X86_IRQ_REGS_32_H
|
||||
|
||||
#include <asm/percpu.h>
|
||||
|
||||
#define ARCH_HAS_OWN_IRQ_REGS
|
||||
|
||||
DECLARE_PER_CPU(struct pt_regs *, irq_regs);
|
||||
|
||||
static inline struct pt_regs *get_irq_regs(void)
|
||||
{
|
||||
return percpu_read(irq_regs);
|
||||
}
|
||||
|
||||
static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
|
||||
{
|
||||
struct pt_regs *old_regs;
|
||||
|
||||
old_regs = get_irq_regs();
|
||||
percpu_write(irq_regs, new_regs);
|
||||
|
||||
return old_regs;
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_IRQ_REGS_32_H */
|
@ -1 +0,0 @@
|
||||
#include <asm-generic/irq_regs.h>
|
@ -49,31 +49,33 @@
|
||||
* some of the following vectors are 'rare', they are merged
|
||||
* into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
|
||||
* TLB, reschedule and local APIC vectors are performance-critical.
|
||||
*
|
||||
* Vectors 0xf0-0xfa are free (reserved for future Linux use).
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
|
||||
# define SPURIOUS_APIC_VECTOR 0xff
|
||||
# define ERROR_APIC_VECTOR 0xfe
|
||||
# define INVALIDATE_TLB_VECTOR 0xfd
|
||||
# define RESCHEDULE_VECTOR 0xfc
|
||||
# define CALL_FUNCTION_VECTOR 0xfb
|
||||
# define CALL_FUNCTION_SINGLE_VECTOR 0xfa
|
||||
# define THERMAL_APIC_VECTOR 0xf0
|
||||
# define RESCHEDULE_VECTOR 0xfd
|
||||
# define CALL_FUNCTION_VECTOR 0xfc
|
||||
# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
|
||||
# define THERMAL_APIC_VECTOR 0xfa
|
||||
/* 0xf8 - 0xf9 : free */
|
||||
# define INVALIDATE_TLB_VECTOR_END 0xf7
|
||||
# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
|
||||
|
||||
# define NUM_INVALIDATE_TLB_VECTORS 8
|
||||
|
||||
#else
|
||||
|
||||
#define SPURIOUS_APIC_VECTOR 0xff
|
||||
#define ERROR_APIC_VECTOR 0xfe
|
||||
#define RESCHEDULE_VECTOR 0xfd
|
||||
#define CALL_FUNCTION_VECTOR 0xfc
|
||||
#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
|
||||
#define THERMAL_APIC_VECTOR 0xfa
|
||||
#define THRESHOLD_APIC_VECTOR 0xf9
|
||||
#define UV_BAU_MESSAGE 0xf8
|
||||
#define INVALIDATE_TLB_VECTOR_END 0xf7
|
||||
#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
|
||||
# define SPURIOUS_APIC_VECTOR 0xff
|
||||
# define ERROR_APIC_VECTOR 0xfe
|
||||
# define RESCHEDULE_VECTOR 0xfd
|
||||
# define CALL_FUNCTION_VECTOR 0xfc
|
||||
# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
|
||||
# define THERMAL_APIC_VECTOR 0xfa
|
||||
# define THRESHOLD_APIC_VECTOR 0xf9
|
||||
# define UV_BAU_MESSAGE 0xf8
|
||||
# define INVALIDATE_TLB_VECTOR_END 0xf7
|
||||
# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
|
||||
|
||||
#define NUM_INVALIDATE_TLB_VECTORS 8
|
||||
|
||||
|
@ -11,10 +11,26 @@
|
||||
*/
|
||||
#ifdef CONFIG_X86_SMP
|
||||
BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
|
||||
BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
|
||||
BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
|
||||
BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
|
||||
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
|
||||
|
||||
BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
|
||||
smp_invalidate_interrupt)
|
||||
BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
|
||||
smp_invalidate_interrupt)
|
||||
BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
|
||||
smp_invalidate_interrupt)
|
||||
BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
|
||||
smp_invalidate_interrupt)
|
||||
BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
|
||||
smp_invalidate_interrupt)
|
||||
BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
|
||||
smp_invalidate_interrupt)
|
||||
BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
|
||||
smp_invalidate_interrupt)
|
||||
BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
|
||||
smp_invalidate_interrupt)
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
|
||||
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
||||
void destroy_context(struct mm_struct *mm);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
# include "mmu_context_32.h"
|
||||
#else
|
||||
# include "mmu_context_64.h"
|
||||
|
||||
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
|
||||
percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
unsigned cpu = smp_processor_id();
|
||||
|
||||
if (likely(prev != next)) {
|
||||
/* stop flush ipis for the previous mm */
|
||||
cpu_clear(cpu, prev->cpu_vm_mask);
|
||||
#ifdef CONFIG_SMP
|
||||
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
percpu_write(cpu_tlbstate.active_mm, next);
|
||||
#endif
|
||||
cpu_set(cpu, next->cpu_vm_mask);
|
||||
|
||||
/* Re-load page tables */
|
||||
load_cr3(next->pgd);
|
||||
|
||||
/*
|
||||
* load the LDT, if the LDT is different:
|
||||
*/
|
||||
if (unlikely(prev->context.ldt != next->context.ldt))
|
||||
load_LDT_nolock(&next->context);
|
||||
}
|
||||
#ifdef CONFIG_SMP
|
||||
else {
|
||||
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
|
||||
|
||||
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
|
||||
/* We were in lazy tlb mode and leave_mm disabled
|
||||
* tlb flush IPI delivery. We must reload CR3
|
||||
* to make sure to use no freed page tables.
|
||||
*/
|
||||
load_cr3(next->pgd);
|
||||
load_LDT_nolock(&next->context);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define activate_mm(prev, next) \
|
||||
do { \
|
||||
@ -33,5 +76,17 @@ do { \
|
||||
switch_mm((prev), (next), NULL); \
|
||||
} while (0);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define deactivate_mm(tsk, mm) \
|
||||
do { \
|
||||
loadsegment(gs, 0); \
|
||||
} while (0)
|
||||
#else
|
||||
#define deactivate_mm(tsk, mm) \
|
||||
do { \
|
||||
load_gs_index(0); \
|
||||
loadsegment(fs, 0); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_MMU_CONTEXT_H */
|
||||
|
@ -1,55 +0,0 @@
|
||||
#ifndef _ASM_X86_MMU_CONTEXT_32_H
|
||||
#define _ASM_X86_MMU_CONTEXT_32_H
|
||||
|
||||
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
|
||||
percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void switch_mm(struct mm_struct *prev,
|
||||
struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
if (likely(prev != next)) {
|
||||
/* stop flush ipis for the previous mm */
|
||||
cpu_clear(cpu, prev->cpu_vm_mask);
|
||||
#ifdef CONFIG_SMP
|
||||
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
percpu_write(cpu_tlbstate.active_mm, next);
|
||||
#endif
|
||||
cpu_set(cpu, next->cpu_vm_mask);
|
||||
|
||||
/* Re-load page tables */
|
||||
load_cr3(next->pgd);
|
||||
|
||||
/*
|
||||
* load the LDT, if the LDT is different:
|
||||
*/
|
||||
if (unlikely(prev->context.ldt != next->context.ldt))
|
||||
load_LDT_nolock(&next->context);
|
||||
}
|
||||
#ifdef CONFIG_SMP
|
||||
else {
|
||||
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
|
||||
|
||||
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
|
||||
/* We were in lazy tlb mode and leave_mm disabled
|
||||
* tlb flush IPI delivery. We must reload %cr3.
|
||||
*/
|
||||
load_cr3(next->pgd);
|
||||
load_LDT_nolock(&next->context);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define deactivate_mm(tsk, mm) \
|
||||
asm("movl %0,%%gs": :"r" (0));
|
||||
|
||||
#endif /* _ASM_X86_MMU_CONTEXT_32_H */
|
@ -1,52 +0,0 @@
|
||||
#ifndef _ASM_X86_MMU_CONTEXT_64_H
|
||||
#define _ASM_X86_MMU_CONTEXT_64_H
|
||||
|
||||
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
|
||||
percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
unsigned cpu = smp_processor_id();
|
||||
if (likely(prev != next)) {
|
||||
/* stop flush ipis for the previous mm */
|
||||
cpu_clear(cpu, prev->cpu_vm_mask);
|
||||
#ifdef CONFIG_SMP
|
||||
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
percpu_write(cpu_tlbstate.active_mm, next);
|
||||
#endif
|
||||
cpu_set(cpu, next->cpu_vm_mask);
|
||||
load_cr3(next->pgd);
|
||||
|
||||
if (unlikely(next->context.ldt != prev->context.ldt))
|
||||
load_LDT_nolock(&next->context);
|
||||
}
|
||||
#ifdef CONFIG_SMP
|
||||
else {
|
||||
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
|
||||
|
||||
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
|
||||
/* We were in lazy tlb mode and leave_mm disabled
|
||||
* tlb flush IPI delivery. We must reload CR3
|
||||
* to make sure to use no freed page tables.
|
||||
*/
|
||||
load_cr3(next->pgd);
|
||||
load_LDT_nolock(&next->context);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define deactivate_mm(tsk, mm) \
|
||||
do { \
|
||||
load_gs_index(0); \
|
||||
asm volatile("movl %0,%%fs"::"r"(0)); \
|
||||
} while (0)
|
||||
|
||||
#endif /* _ASM_X86_MMU_CONTEXT_64_H */
|
@ -1,45 +0,0 @@
|
||||
#ifndef _ASM_X86_PDA_H
|
||||
#define _ASM_X86_PDA_H
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/threads.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/percpu.h>
|
||||
|
||||
/* Per processor datastructure. %gs points to it while the kernel runs */
|
||||
struct x8664_pda {
|
||||
unsigned long unused1;
|
||||
unsigned long unused2;
|
||||
unsigned long unused3;
|
||||
unsigned long unused4;
|
||||
int unused5;
|
||||
unsigned int unused6; /* 36 was cpunumber */
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
unsigned long stack_canary; /* 40 stack canary value */
|
||||
/* gcc-ABI: this canary MUST be at
|
||||
offset 40!!! */
|
||||
#endif
|
||||
short in_bootmem; /* pda lives in bootmem */
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
DECLARE_PER_CPU(struct x8664_pda, __pda);
|
||||
extern void pda_init(int);
|
||||
|
||||
#define cpu_pda(cpu) (&per_cpu(__pda, cpu))
|
||||
|
||||
#define read_pda(field) percpu_read(__pda.field)
|
||||
#define write_pda(field, val) percpu_write(__pda.field, val)
|
||||
#define add_pda(field, val) percpu_add(__pda.field, val)
|
||||
#define sub_pda(field, val) percpu_sub(__pda.field, val)
|
||||
#define or_pda(field, val) percpu_or(__pda.field, val)
|
||||
|
||||
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
|
||||
#define test_and_clear_bit_pda(bit, field) \
|
||||
x86_test_and_clear_bit_percpu(bit, __pda.field)
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_PDA_H */
|
@ -75,7 +75,7 @@ do { \
|
||||
case 8: \
|
||||
asm(op "q %1,"__percpu_arg(0) \
|
||||
: "+m" (var) \
|
||||
: "r" ((T__)val)); \
|
||||
: "re" ((T__)val)); \
|
||||
break; \
|
||||
default: __bad_percpu_size(); \
|
||||
} \
|
||||
@ -133,12 +133,6 @@ do { \
|
||||
/* We can use this directly for local CPU (faster). */
|
||||
DECLARE_PER_CPU(unsigned long, this_cpu_off);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
extern void load_pda_offset(int cpu);
|
||||
#else
|
||||
static inline void load_pda_offset(int cpu) { }
|
||||
#endif
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <asm/processor.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/threads.h>
|
||||
#include <asm/pda.h>
|
||||
|
||||
extern pud_t level3_kernel_pgt[512];
|
||||
extern pud_t level3_ident_pgt[512];
|
||||
|
@ -379,8 +379,29 @@ union thread_xstate {
|
||||
#ifdef CONFIG_X86_64
|
||||
DECLARE_PER_CPU(struct orig_ist, orig_ist);
|
||||
|
||||
DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
|
||||
union irq_stack_union {
|
||||
char irq_stack[IRQ_STACK_SIZE];
|
||||
/*
|
||||
* GCC hardcodes the stack canary as %gs:40. Since the
|
||||
* irq_stack is the object at %gs:0, we reserve the bottom
|
||||
* 48 bytes of the irq stack for the canary.
|
||||
*/
|
||||
struct {
|
||||
char gs_base[40];
|
||||
unsigned long stack_canary;
|
||||
};
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
|
||||
DECLARE_PER_CPU(char *, irq_stack_ptr);
|
||||
|
||||
static inline void load_gs_base(int cpu)
|
||||
{
|
||||
/* Memory clobbers used to order pda/percpu accesses */
|
||||
mb();
|
||||
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
|
||||
mb();
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void print_cpu_info(struct cpuinfo_x86 *);
|
||||
|
@ -15,7 +15,6 @@
|
||||
# include <asm/io_apic.h>
|
||||
# endif
|
||||
#endif
|
||||
#include <asm/pda.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/cpumask.h>
|
||||
|
||||
|
38
arch/x86/include/asm/stackprotector.h
Normal file
38
arch/x86/include/asm/stackprotector.h
Normal file
@ -0,0 +1,38 @@
|
||||
#ifndef _ASM_STACKPROTECTOR_H
|
||||
#define _ASM_STACKPROTECTOR_H 1
|
||||
|
||||
#include <asm/tsc.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
/*
|
||||
* Initialize the stackprotector canary value.
|
||||
*
|
||||
* NOTE: this must only be called from functions that never return,
|
||||
* and it must always be inlined.
|
||||
*/
|
||||
static __always_inline void boot_init_stack_canary(void)
|
||||
{
|
||||
u64 canary;
|
||||
u64 tsc;
|
||||
|
||||
/*
|
||||
* Build time only check to make sure the stack_canary is at
|
||||
* offset 40 in the pda; this is a gcc ABI requirement
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
|
||||
|
||||
/*
|
||||
* We both use the random pool and the current TSC as a source
|
||||
* of randomness. The TSC only matters for very early init,
|
||||
* there it already has some randomness on most systems. Later
|
||||
* on during the bootup the random pool has true entropy too.
|
||||
*/
|
||||
get_random_bytes(&canary, sizeof(canary));
|
||||
tsc = __native_read_tsc();
|
||||
canary += tsc + (tsc << 32UL);
|
||||
|
||||
current->stack_canary = canary;
|
||||
percpu_write(irq_stack_union.stack_canary, canary);
|
||||
}
|
||||
|
||||
#endif
|
@ -86,27 +86,44 @@ do { \
|
||||
, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
|
||||
"r12", "r13", "r14", "r15"
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
#define __switch_canary \
|
||||
"movq %P[task_canary](%%rsi),%%r8\n\t" \
|
||||
"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
|
||||
#define __switch_canary_oparam \
|
||||
, [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
|
||||
#define __switch_canary_iparam \
|
||||
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
|
||||
#else /* CC_STACKPROTECTOR */
|
||||
#define __switch_canary
|
||||
#define __switch_canary_oparam
|
||||
#define __switch_canary_iparam
|
||||
#endif /* CC_STACKPROTECTOR */
|
||||
|
||||
/* Save restore flags to clear handle leaking NT */
|
||||
#define switch_to(prev, next, last) \
|
||||
asm volatile(SAVE_CONTEXT \
|
||||
asm volatile(SAVE_CONTEXT \
|
||||
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
|
||||
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
|
||||
"call __switch_to\n\t" \
|
||||
".globl thread_return\n" \
|
||||
"thread_return:\n\t" \
|
||||
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
|
||||
__switch_canary \
|
||||
"movq %P[thread_info](%%rsi),%%r8\n\t" \
|
||||
LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
|
||||
"movq %%rax,%%rdi\n\t" \
|
||||
"jc ret_from_fork\n\t" \
|
||||
RESTORE_CONTEXT \
|
||||
: "=a" (last) \
|
||||
__switch_canary_oparam \
|
||||
: [next] "S" (next), [prev] "D" (prev), \
|
||||
[threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
|
||||
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
|
||||
[tif_fork] "i" (TIF_FORK), \
|
||||
[thread_info] "i" (offsetof(struct task_struct, stack)), \
|
||||
[current_task] "m" (per_cpu_var(current_task)) \
|
||||
__switch_canary_iparam \
|
||||
: "memory", "cc" __EXTRA_CLOBBER)
|
||||
#endif
|
||||
|
||||
|
@ -190,9 +190,20 @@ extern int __node_distance(int, int);
|
||||
|
||||
#else /* !CONFIG_NUMA */
|
||||
|
||||
#define numa_node_id() 0
|
||||
#define cpu_to_node(cpu) 0
|
||||
#define early_cpu_to_node(cpu) 0
|
||||
static inline int numa_node_id(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int cpu_to_node(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int early_cpu_to_node(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline const cpumask_t *cpumask_of_node(int node)
|
||||
{
|
||||
|
33
arch/x86/include/asm/uv/uv.h
Normal file
33
arch/x86/include/asm/uv/uv.h
Normal file
@ -0,0 +1,33 @@
|
||||
#ifndef _ASM_X86_UV_UV_H
|
||||
#define _ASM_X86_UV_UV_H
|
||||
|
||||
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
|
||||
|
||||
#ifdef CONFIG_X86_UV
|
||||
|
||||
extern enum uv_system_type get_uv_system_type(void);
|
||||
extern int is_uv_system(void);
|
||||
extern void uv_cpu_init(void);
|
||||
extern void uv_system_init(void);
|
||||
extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
|
||||
extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
|
||||
struct mm_struct *mm,
|
||||
unsigned long va,
|
||||
unsigned int cpu);
|
||||
|
||||
#else /* X86_UV */
|
||||
|
||||
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
|
||||
static inline int is_uv_system(void) { return 0; }
|
||||
static inline void uv_cpu_init(void) { }
|
||||
static inline void uv_system_init(void) { }
|
||||
static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
|
||||
{ return 1; }
|
||||
static inline const struct cpumask *
|
||||
uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
|
||||
unsigned long va, unsigned int cpu)
|
||||
{ return cpumask; }
|
||||
|
||||
#endif /* X86_UV */
|
||||
|
||||
#endif /* _ASM_X86_UV_UV_H */
|
@ -325,8 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
|
||||
#define cpubit_isset(cpu, bau_local_cpumask) \
|
||||
test_bit((cpu), (bau_local_cpumask).bits)
|
||||
|
||||
extern int uv_flush_tlb_others(struct cpumask *,
|
||||
struct mm_struct *, unsigned long);
|
||||
extern void uv_bau_message_intr1(void);
|
||||
extern void uv_bau_timeout_intr1(void);
|
||||
|
||||
|
@ -23,6 +23,7 @@ nostackp := $(call cc-option, -fno-stack-protector)
|
||||
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
|
||||
CFLAGS_hpet.o := $(nostackp)
|
||||
CFLAGS_tsc.o := $(nostackp)
|
||||
CFLAGS_paravirt.o := $(nostackp)
|
||||
|
||||
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
|
||||
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
|
||||
@ -57,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o
|
||||
apm-y := apm_32.o
|
||||
obj-$(CONFIG_APM) += apm.o
|
||||
obj-$(CONFIG_X86_SMP) += smp.o
|
||||
obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
|
||||
obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o
|
||||
obj-$(CONFIG_X86_32_SMP) += smpcommon.o
|
||||
obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
|
||||
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
|
||||
@ -114,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
|
||||
###
|
||||
# 64 bit specific files
|
||||
ifeq ($(CONFIG_X86_64),y)
|
||||
obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
|
||||
obj-y += bios_uv.o uv_irq.o uv_sysfs.o
|
||||
obj-y += genapic_64.o genapic_flat_64.o
|
||||
obj-y += genx2apic_cluster.o
|
||||
obj-y += genx2apic_phys.o
|
||||
obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o
|
||||
obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o
|
||||
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
|
||||
obj-$(CONFIG_AUDIT) += audit_64.o
|
||||
|
||||
|
@ -1132,7 +1132,9 @@ void __cpuinit setup_local_APIC(void)
|
||||
int i, j;
|
||||
|
||||
if (disable_apic) {
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
disable_ioapic_setup();
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1844,6 +1846,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
|
||||
num_processors++;
|
||||
cpu = cpumask_next_zero(-1, cpu_present_mask);
|
||||
|
||||
if (version != apic_version[boot_cpu_physical_apicid])
|
||||
WARN_ONCE(1,
|
||||
"ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
|
||||
apic_version[boot_cpu_physical_apicid], cpu, version);
|
||||
|
||||
physid_set(apicid, phys_cpu_present_map);
|
||||
if (apicid == boot_cpu_physical_apicid) {
|
||||
/*
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/kbuild.h>
|
||||
#include <asm/pda.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/thread_info.h>
|
||||
@ -48,10 +47,6 @@ int main(void)
|
||||
#endif
|
||||
BLANK();
|
||||
#undef ENTRY
|
||||
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
|
||||
DEFINE(pda_size, sizeof(struct x8664_pda));
|
||||
BLANK();
|
||||
#undef ENTRY
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
BLANK();
|
||||
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
|
||||
|
@ -29,9 +29,9 @@
|
||||
#include <asm/apic.h>
|
||||
#include <mach_apic.h>
|
||||
#include <asm/genapic.h>
|
||||
#include <asm/uv/uv.h>
|
||||
#endif
|
||||
|
||||
#include <asm/pda.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/desc.h>
|
||||
@ -65,23 +65,23 @@ cpumask_t cpu_sibling_setup_map;
|
||||
|
||||
static struct cpu_dev *this_cpu __cpuinitdata;
|
||||
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
|
||||
#ifdef CONFIG_X86_64
|
||||
/* We need valid kernel segments for data and code in long mode too
|
||||
* IRET will check the segment types kkeil 2000/10/28
|
||||
* Also sysret mandates a special GDT layout
|
||||
*/
|
||||
/* The TLS descriptors are currently at a different place compared to i386.
|
||||
Hopefully nobody expects them at a fixed place (Wine?) */
|
||||
DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
|
||||
/*
|
||||
* We need valid kernel segments for data and code in long mode too
|
||||
* IRET will check the segment types kkeil 2000/10/28
|
||||
* Also sysret mandates a special GDT layout
|
||||
*
|
||||
* The TLS descriptors are currently at a different place compared to i386.
|
||||
* Hopefully nobody expects them at a fixed place (Wine?)
|
||||
*/
|
||||
[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
|
||||
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
|
||||
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
|
||||
[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
|
||||
[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
|
||||
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
|
||||
} };
|
||||
#else
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
|
||||
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
|
||||
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
|
||||
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
|
||||
@ -113,9 +113,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
|
||||
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
|
||||
|
||||
[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
|
||||
[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
|
||||
} };
|
||||
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
|
||||
#endif
|
||||
} };
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
@ -883,12 +883,13 @@ __setup("clearcpuid=", setup_disablecpuid);
|
||||
#ifdef CONFIG_X86_64
|
||||
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
|
||||
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
|
||||
DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
||||
irq_stack_union) __aligned(PAGE_SIZE);
|
||||
#ifdef CONFIG_SMP
|
||||
DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
|
||||
#else
|
||||
DEFINE_PER_CPU(char *, irq_stack_ptr) =
|
||||
per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
|
||||
per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
|
||||
#endif
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, kernel_stack) =
|
||||
@ -897,15 +898,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
|
||||
|
||||
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
|
||||
|
||||
void __cpuinit pda_init(int cpu)
|
||||
{
|
||||
/* Setup up data that may be needed in __get_free_pages early */
|
||||
loadsegment(fs, 0);
|
||||
loadsegment(gs, 0);
|
||||
|
||||
load_pda_offset(cpu);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
||||
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
|
||||
__aligned(PAGE_SIZE);
|
||||
@ -969,9 +961,9 @@ void __cpuinit cpu_init(void)
|
||||
struct task_struct *me;
|
||||
int i;
|
||||
|
||||
/* CPU 0 is initialised in head64.c */
|
||||
if (cpu != 0)
|
||||
pda_init(cpu);
|
||||
loadsegment(fs, 0);
|
||||
loadsegment(gs, 0);
|
||||
load_gs_base(cpu);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
if (cpu != 0 && percpu_read(node_number) == 0 &&
|
||||
|
@ -145,13 +145,14 @@ typedef union {
|
||||
|
||||
struct drv_cmd {
|
||||
unsigned int type;
|
||||
cpumask_var_t mask;
|
||||
const struct cpumask *mask;
|
||||
drv_addr_union addr;
|
||||
u32 val;
|
||||
};
|
||||
|
||||
static void do_drv_read(struct drv_cmd *cmd)
|
||||
static long do_drv_read(void *_cmd)
|
||||
{
|
||||
struct drv_cmd *cmd = _cmd;
|
||||
u32 h;
|
||||
|
||||
switch (cmd->type) {
|
||||
@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void do_drv_write(struct drv_cmd *cmd)
|
||||
static long do_drv_write(void *_cmd)
|
||||
{
|
||||
struct drv_cmd *cmd = _cmd;
|
||||
u32 lo, hi;
|
||||
|
||||
switch (cmd->type) {
|
||||
@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void drv_read(struct drv_cmd *cmd)
|
||||
{
|
||||
cpumask_t saved_mask = current->cpus_allowed;
|
||||
cmd->val = 0;
|
||||
|
||||
set_cpus_allowed_ptr(current, cmd->mask);
|
||||
do_drv_read(cmd);
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
|
||||
}
|
||||
|
||||
static void drv_write(struct drv_cmd *cmd)
|
||||
{
|
||||
cpumask_t saved_mask = current->cpus_allowed;
|
||||
unsigned int i;
|
||||
|
||||
for_each_cpu(i, cmd->mask) {
|
||||
set_cpus_allowed_ptr(current, cpumask_of(i));
|
||||
do_drv_write(cmd);
|
||||
work_on_cpu(i, do_drv_write, cmd);
|
||||
}
|
||||
|
||||
set_cpus_allowed_ptr(current, &saved_mask);
|
||||
return;
|
||||
}
|
||||
|
||||
static u32 get_cur_val(const struct cpumask *mask)
|
||||
@ -235,6 +231,7 @@ static u32 get_cur_val(const struct cpumask *mask)
|
||||
return 0;
|
||||
}
|
||||
|
||||
cmd.mask = mask;
|
||||
drv_read(&cmd);
|
||||
|
||||
dprintk("get_cur_val = %u\n", cmd.val);
|
||||
@ -366,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
|
||||
return freq;
|
||||
}
|
||||
|
||||
static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq,
|
||||
static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
|
||||
struct acpi_cpufreq_data *data)
|
||||
{
|
||||
unsigned int cur_freq;
|
||||
@ -401,9 +398,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
|
||||
return -ENOMEM;
|
||||
|
||||
perf = data->acpi_data;
|
||||
result = cpufreq_frequency_table_target(policy,
|
||||
data->freq_table,
|
||||
@ -448,9 +442,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
|
||||
|
||||
/* cpufreq holds the hotplug lock, so we are safe from here on */
|
||||
if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
|
||||
cpumask_and(cmd.mask, cpu_online_mask, policy->cpus);
|
||||
cmd.mask = policy->cpus;
|
||||
else
|
||||
cpumask_copy(cmd.mask, cpumask_of(policy->cpu));
|
||||
cmd.mask = cpumask_of(policy->cpu);
|
||||
|
||||
freqs.old = perf->states[perf->state].core_frequency * 1000;
|
||||
freqs.new = data->freq_table[next_state].frequency;
|
||||
@ -477,7 +471,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
|
||||
perf->state = next_perf_state;
|
||||
|
||||
out:
|
||||
free_cpumask_var(cmd.mask);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/hw_irq.h>
|
||||
|
@ -366,10 +366,12 @@ void __init efi_init(void)
|
||||
SMBIOS_TABLE_GUID)) {
|
||||
efi.smbios = config_tables[i].table;
|
||||
printk(" SMBIOS=0x%lx ", config_tables[i].table);
|
||||
#ifdef CONFIG_X86_UV
|
||||
} else if (!efi_guidcmp(config_tables[i].guid,
|
||||
UV_SYSTEM_TABLE_GUID)) {
|
||||
efi.uv_systab = config_tables[i].table;
|
||||
printk(" UVsystab=0x%lx ", config_tables[i].table);
|
||||
#endif
|
||||
} else if (!efi_guidcmp(config_tables[i].guid,
|
||||
HCDP_TABLE_GUID)) {
|
||||
efi.hcdp = config_tables[i].table;
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <asm/proto.h>
|
||||
#include <asm/efi.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
||||
static pgd_t save_pgd __initdata;
|
||||
static unsigned long efi_flags __initdata;
|
||||
|
@ -672,7 +672,7 @@ common_interrupt:
|
||||
ENDPROC(common_interrupt)
|
||||
CFI_ENDPROC
|
||||
|
||||
#define BUILD_INTERRUPT(name, nr) \
|
||||
#define BUILD_INTERRUPT3(name, nr, fn) \
|
||||
ENTRY(name) \
|
||||
RING0_INT_FRAME; \
|
||||
pushl $~(nr); \
|
||||
@ -680,11 +680,13 @@ ENTRY(name) \
|
||||
SAVE_ALL; \
|
||||
TRACE_IRQS_OFF \
|
||||
movl %esp,%eax; \
|
||||
call smp_##name; \
|
||||
call fn; \
|
||||
jmp ret_from_intr; \
|
||||
CFI_ENDPROC; \
|
||||
ENDPROC(name)
|
||||
|
||||
#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
|
||||
|
||||
/* The include is where all of the SMP etc. interrupts come from */
|
||||
#include "entry_arch.h"
|
||||
|
||||
|
@ -982,8 +982,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
|
||||
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_UV
|
||||
apicinterrupt UV_BAU_MESSAGE \
|
||||
uv_bau_message_intr1 uv_bau_message_interrupt
|
||||
#endif
|
||||
apicinterrupt LOCAL_TIMER_VECTOR \
|
||||
apic_timer_interrupt smp_apic_timer_interrupt
|
||||
|
||||
|
@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster;
|
||||
struct genapic __read_mostly *genapic = &apic_flat;
|
||||
|
||||
static struct genapic *apic_probe[] __initdata = {
|
||||
#ifdef CONFIG_X86_UV
|
||||
&apic_x2apic_uv_x,
|
||||
#endif
|
||||
&apic_x2apic_phys,
|
||||
&apic_x2apic_cluster,
|
||||
&apic_physflat,
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <asm/ipi.h>
|
||||
#include <asm/genapic.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/uv/uv.h>
|
||||
#include <asm/uv/uv_mmrs.h>
|
||||
#include <asm/uv/uv_hub.h>
|
||||
#include <asm/uv/bios.h>
|
||||
|
@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
|
||||
if (console_loglevel == 10)
|
||||
early_printk("Kernel alive\n");
|
||||
|
||||
pda_init(0);
|
||||
|
||||
x86_64_start_reservations(real_mode_data);
|
||||
}
|
||||
|
||||
|
@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP
|
||||
ljmp $(__KERNEL_CS),$1f
|
||||
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
|
||||
movl %eax,%ss # after changing gdt.
|
||||
movl %eax,%fs # gets reset once there's real percpu
|
||||
|
||||
movl $(__USER_DS),%eax # DS/ES contains default USER segment
|
||||
movl %eax,%ds
|
||||
movl %eax,%es
|
||||
|
||||
movl $(__KERNEL_PERCPU), %eax
|
||||
movl %eax,%fs # set this cpu's percpu
|
||||
|
||||
xorl %eax,%eax # Clear GS and LDT
|
||||
movl %eax,%gs
|
||||
lldt %ax
|
||||
@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP
|
||||
movb $1, ready
|
||||
cmpb $0,%cl # the first CPU calls start_kernel
|
||||
je 1f
|
||||
movl $(__KERNEL_PERCPU), %eax
|
||||
movl %eax,%fs # set this cpu's percpu
|
||||
movl (stack_start), %esp
|
||||
1:
|
||||
#endif /* CONFIG_SMP */
|
||||
|
@ -207,19 +207,15 @@ ENTRY(secondary_startup_64)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* early_gdt_base should point to the gdt_page in static percpu init
|
||||
* data area. Computing this requires two symbols - __per_cpu_load
|
||||
* and per_cpu__gdt_page. As linker can't do no such relocation, do
|
||||
* it by hand. As early_gdt_descr is manipulated by C code for
|
||||
* secondary CPUs, this should be done only once for the boot CPU
|
||||
* when early_gdt_descr_base contains zero.
|
||||
* Fix up static pointers that need __per_cpu_load added. The assembler
|
||||
* is unable to do this directly. This is only needed for the boot cpu.
|
||||
* These values are set up with the correct base addresses by C code for
|
||||
* secondary cpus.
|
||||
*/
|
||||
movq early_gdt_descr_base(%rip), %rax
|
||||
testq %rax, %rax
|
||||
jnz 1f
|
||||
movq $__per_cpu_load, %rax
|
||||
addq $per_cpu__gdt_page, %rax
|
||||
movq %rax, early_gdt_descr_base(%rip)
|
||||
movq initial_gs(%rip), %rax
|
||||
cmpl $0, per_cpu__cpu_number(%rax)
|
||||
jne 1f
|
||||
addq %rax, early_gdt_descr_base(%rip)
|
||||
1:
|
||||
#endif
|
||||
/*
|
||||
@ -246,13 +242,10 @@ ENTRY(secondary_startup_64)
|
||||
|
||||
/* Set up %gs.
|
||||
*
|
||||
* On SMP, %gs should point to the per-cpu area. For initial
|
||||
* boot, make %gs point to the init data section. For a
|
||||
* secondary CPU,initial_gs should be set to its pda address
|
||||
* before the CPU runs this code.
|
||||
*
|
||||
* On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
|
||||
* change.
|
||||
* The base of %gs always points to the bottom of the irqstack
|
||||
* union. If the stack protector canary is enabled, it is
|
||||
* located at %gs:40. Note that, on SMP, the boot cpu uses
|
||||
* init data section till per cpu areas are set up.
|
||||
*/
|
||||
movl $MSR_GS_BASE,%ecx
|
||||
movq initial_gs(%rip),%rax
|
||||
@ -285,7 +278,7 @@ ENTRY(secondary_startup_64)
|
||||
#ifdef CONFIG_SMP
|
||||
.quad __per_cpu_load
|
||||
#else
|
||||
.quad PER_CPU_VAR(__pda)
|
||||
.quad PER_CPU_VAR(irq_stack_union)
|
||||
#endif
|
||||
__FINITDATA
|
||||
|
||||
@ -431,12 +424,8 @@ NEXT_PAGE(level2_spare_pgt)
|
||||
.globl early_gdt_descr
|
||||
early_gdt_descr:
|
||||
.word GDT_ENTRIES*8-1
|
||||
#ifdef CONFIG_SMP
|
||||
early_gdt_descr_base:
|
||||
.quad 0x0000000000000000
|
||||
#else
|
||||
.quad per_cpu__gdt_page
|
||||
#endif
|
||||
|
||||
ENTRY(phys_base)
|
||||
/* This must match the first entry in level2_kernel_pgt */
|
||||
|
@ -3765,7 +3765,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
|
||||
}
|
||||
#endif /* CONFIG_HT_IRQ */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_X86_UV
|
||||
/*
|
||||
* Re-target the irq to the specified CPU and enable the specified MMR located
|
||||
* on the specified blade to allow the sending of MSIs to the specified CPU.
|
||||
|
@ -18,10 +18,14 @@
|
||||
#include <linux/smp.h>
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
|
||||
EXPORT_PER_CPU_SYMBOL(irq_stat);
|
||||
|
||||
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
|
||||
EXPORT_PER_CPU_SYMBOL(irq_regs);
|
||||
|
||||
/*
|
||||
* Probabilistic stack overflow check:
|
||||
*
|
||||
|
@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
|
||||
*/
|
||||
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
|
||||
|
||||
/* IPI for invalidation */
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
|
||||
/* IPIs for invalidation */
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
|
||||
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
|
||||
|
||||
/* IPI for generic function call */
|
||||
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
|
||||
|
@ -108,7 +108,6 @@ void cpu_idle(void)
|
||||
play_dead();
|
||||
|
||||
local_irq_disable();
|
||||
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
|
||||
/* Don't trace irqs off for idle */
|
||||
stop_critical_timings();
|
||||
pm_idle();
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#include <linux/stackprotector.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/sched.h>
|
||||
@ -46,7 +47,6 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/pda.h>
|
||||
#include <asm/prctl.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/proto.h>
|
||||
@ -117,6 +117,17 @@ static inline void play_dead(void)
|
||||
void cpu_idle(void)
|
||||
{
|
||||
current_thread_info()->status |= TS_POLLING;
|
||||
|
||||
/*
|
||||
* If we're the non-boot CPU, nothing set the PDA stack
|
||||
* canary up for us - and if we are the boot CPU we have
|
||||
* a 0 stack canary. This is a good place for updating
|
||||
* it, as we wont ever return from this function (so the
|
||||
* invalid canaries already on the stack wont ever
|
||||
* trigger):
|
||||
*/
|
||||
boot_init_stack_canary();
|
||||
|
||||
/* endless idle loop with no priority at all */
|
||||
while (1) {
|
||||
tick_nohz_stop_sched_tick(1);
|
||||
@ -626,14 +637,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
percpu_write(kernel_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
THREAD_SIZE - KERNEL_STACK_OFFSET);
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
write_pda(stack_canary, next_p->stack_canary);
|
||||
/*
|
||||
* Build time only check to make sure the stack_canary is at
|
||||
* offset 40 in the pda; this is a gcc ABI requirement
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Now maybe reload the debug registers and handle I/O bitmaps
|
||||
|
@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void);
|
||||
static inline void setup_node_to_cpumask_map(void) { }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Define load_pda_offset() and per-cpu __pda for x86_64.
|
||||
* load_pda_offset() is responsible for loading the offset of pda into
|
||||
* %gs.
|
||||
*
|
||||
* On SMP, pda offset also duals as percpu base address and thus it
|
||||
* should be at the start of per-cpu area. To achieve this, it's
|
||||
* preallocated in vmlinux_64.lds.S directly instead of using
|
||||
* DEFINE_PER_CPU().
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
void __cpuinit load_pda_offset(int cpu)
|
||||
{
|
||||
/* Memory clobbers used to order pda/percpu accesses */
|
||||
mb();
|
||||
wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
|
||||
mb();
|
||||
}
|
||||
#ifndef CONFIG_SMP
|
||||
DEFINE_PER_CPU(struct x8664_pda, __pda);
|
||||
#endif
|
||||
EXPORT_PER_CPU_SYMBOL(__pda);
|
||||
#endif /* CONFIG_SMP && CONFIG_X86_64 */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
/* correctly size the local cpu masks */
|
||||
@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void)
|
||||
per_cpu(cpu_number, cpu) = cpu;
|
||||
#ifdef CONFIG_X86_64
|
||||
per_cpu(irq_stack_ptr, cpu) =
|
||||
(char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
|
||||
per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
|
||||
/*
|
||||
* CPU0 modified pda in the init data area, reload pda
|
||||
* offset for CPU0 and clear the area for others.
|
||||
* Up to this point, CPU0 has been using .data.init
|
||||
* area. Reload %gs offset for CPU0.
|
||||
*/
|
||||
if (cpu == 0)
|
||||
load_pda_offset(0);
|
||||
else
|
||||
memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
|
||||
load_gs_base(cpu);
|
||||
#endif
|
||||
|
||||
DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
|
||||
|
@ -62,6 +62,7 @@
|
||||
#include <asm/vmi.h>
|
||||
#include <asm/genapic.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/uv/uv.h>
|
||||
#include <linux/mc146818rtc.h>
|
||||
|
||||
#include <mach_apic.h>
|
||||
|
@ -1,239 +0,0 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
|
||||
= { &init_mm, 0, };
|
||||
|
||||
/* must come after the send_IPI functions above for inlining */
|
||||
#include <mach_ipi.h>
|
||||
|
||||
/*
|
||||
* Smarter SMP flushing macros.
|
||||
* c/o Linus Torvalds.
|
||||
*
|
||||
* These mean you can really definitely utterly forget about
|
||||
* writing to user space from interrupts. (Its not allowed anyway).
|
||||
*
|
||||
* Optimizations Manfred Spraul <manfred@colorfullife.com>
|
||||
*/
|
||||
|
||||
static cpumask_var_t flush_cpumask;
|
||||
static struct mm_struct *flush_mm;
|
||||
static unsigned long flush_va;
|
||||
static DEFINE_SPINLOCK(tlbstate_lock);
|
||||
|
||||
/*
|
||||
* We cannot call mmdrop() because we are in interrupt context,
|
||||
* instead update mm->cpu_vm_mask.
|
||||
*
|
||||
* We need to reload %cr3 since the page tables may be going
|
||||
* away from under us..
|
||||
*/
|
||||
void leave_mm(int cpu)
|
||||
{
|
||||
BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
|
||||
cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
|
||||
load_cr3(swapper_pg_dir);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(leave_mm);
|
||||
|
||||
/*
|
||||
*
|
||||
* The flush IPI assumes that a thread switch happens in this order:
|
||||
* [cpu0: the cpu that switches]
|
||||
* 1) switch_mm() either 1a) or 1b)
|
||||
* 1a) thread switch to a different mm
|
||||
* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
|
||||
* Stop ipi delivery for the old mm. This is not synchronized with
|
||||
* the other cpus, but smp_invalidate_interrupt ignore flush ipis
|
||||
* for the wrong mm, and in the worst case we perform a superfluous
|
||||
* tlb flush.
|
||||
* 1a2) set cpu_tlbstate to TLBSTATE_OK
|
||||
* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
|
||||
* was in lazy tlb mode.
|
||||
* 1a3) update cpu_tlbstate[].active_mm
|
||||
* Now cpu0 accepts tlb flushes for the new mm.
|
||||
* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
|
||||
* Now the other cpus will send tlb flush ipis.
|
||||
* 1a4) change cr3.
|
||||
* 1b) thread switch without mm change
|
||||
* cpu_tlbstate[].active_mm is correct, cpu0 already handles
|
||||
* flush ipis.
|
||||
* 1b1) set cpu_tlbstate to TLBSTATE_OK
|
||||
* 1b2) test_and_set the cpu bit in cpu_vm_mask.
|
||||
* Atomically set the bit [other cpus will start sending flush ipis],
|
||||
* and test the bit.
|
||||
* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
|
||||
* 2) switch %%esp, ie current
|
||||
*
|
||||
* The interrupt must handle 2 special cases:
|
||||
* - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
|
||||
* - the cpu performs speculative tlb reads, i.e. even if the cpu only
|
||||
* runs in kernel space, the cpu could load tlb entries for user space
|
||||
* pages.
|
||||
*
|
||||
* The good news is that cpu_tlbstate is local to each cpu, no
|
||||
* write/read ordering problems.
|
||||
*/
|
||||
|
||||
/*
|
||||
* TLB flush IPI:
|
||||
*
|
||||
* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
|
||||
* 2) Leave the mm if we are in the lazy tlb mode.
|
||||
*/
|
||||
|
||||
void smp_invalidate_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long cpu;
|
||||
|
||||
cpu = get_cpu();
|
||||
|
||||
if (!cpumask_test_cpu(cpu, flush_cpumask))
|
||||
goto out;
|
||||
/*
|
||||
* This was a BUG() but until someone can quote me the
|
||||
* line from the intel manual that guarantees an IPI to
|
||||
* multiple CPUs is retried _only_ on the erroring CPUs
|
||||
* its staying as a return
|
||||
*
|
||||
* BUG();
|
||||
*/
|
||||
|
||||
if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
|
||||
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
|
||||
if (flush_va == TLB_FLUSH_ALL)
|
||||
local_flush_tlb();
|
||||
else
|
||||
__flush_tlb_one(flush_va);
|
||||
} else
|
||||
leave_mm(cpu);
|
||||
}
|
||||
ack_APIC_irq();
|
||||
smp_mb__before_clear_bit();
|
||||
cpumask_clear_cpu(cpu, flush_cpumask);
|
||||
smp_mb__after_clear_bit();
|
||||
out:
|
||||
put_cpu_no_resched();
|
||||
inc_irq_stat(irq_tlb_count);
|
||||
}
|
||||
|
||||
void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
struct mm_struct *mm, unsigned long va)
|
||||
{
|
||||
/*
|
||||
* - mask must exist :)
|
||||
*/
|
||||
BUG_ON(cpumask_empty(cpumask));
|
||||
BUG_ON(!mm);
|
||||
|
||||
/*
|
||||
* i'm not happy about this global shared spinlock in the
|
||||
* MM hot path, but we'll see how contended it is.
|
||||
* AK: x86-64 has a faster method that could be ported.
|
||||
*/
|
||||
spin_lock(&tlbstate_lock);
|
||||
|
||||
cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/* If a CPU which we ran on has gone down, OK. */
|
||||
cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
|
||||
if (unlikely(cpumask_empty(flush_cpumask))) {
|
||||
spin_unlock(&tlbstate_lock);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
flush_mm = mm;
|
||||
flush_va = va;
|
||||
|
||||
/*
|
||||
* Make the above memory operations globally visible before
|
||||
* sending the IPI.
|
||||
*/
|
||||
smp_mb();
|
||||
/*
|
||||
* We have to send the IPI only to
|
||||
* CPUs affected.
|
||||
*/
|
||||
send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
|
||||
|
||||
while (!cpumask_empty(flush_cpumask))
|
||||
/* nothing. lockup detection does not belong here */
|
||||
cpu_relax();
|
||||
|
||||
flush_mm = NULL;
|
||||
flush_va = 0;
|
||||
spin_unlock(&tlbstate_lock);
|
||||
}
|
||||
|
||||
void flush_tlb_current_task(void)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
local_flush_tlb();
|
||||
if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
|
||||
flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void flush_tlb_mm(struct mm_struct *mm)
|
||||
{
|
||||
|
||||
preempt_disable();
|
||||
|
||||
if (current->active_mm == mm) {
|
||||
if (current->mm)
|
||||
local_flush_tlb();
|
||||
else
|
||||
leave_mm(smp_processor_id());
|
||||
}
|
||||
if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
|
||||
flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
|
||||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
if (current->active_mm == mm) {
|
||||
if (current->mm)
|
||||
__flush_tlb_one(va);
|
||||
else
|
||||
leave_mm(smp_processor_id());
|
||||
}
|
||||
|
||||
if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
|
||||
flush_tlb_others(&mm->cpu_vm_mask, mm, va);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(flush_tlb_page);
|
||||
|
||||
static void do_flush_tlb_all(void *info)
|
||||
{
|
||||
unsigned long cpu = smp_processor_id();
|
||||
|
||||
__flush_tlb_all();
|
||||
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
|
||||
leave_mm(cpu);
|
||||
}
|
||||
|
||||
void flush_tlb_all(void)
|
||||
{
|
||||
on_each_cpu(do_flush_tlb_all, NULL, 1);
|
||||
}
|
||||
|
||||
static int init_flush_cpumask(void)
|
||||
{
|
||||
alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
|
||||
return 0;
|
||||
}
|
||||
early_initcall(init_flush_cpumask);
|
@ -11,6 +11,7 @@
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/uv/uv.h>
|
||||
#include <asm/uv/uv_mmrs.h>
|
||||
#include <asm/uv/uv_hub.h>
|
||||
#include <asm/uv/uv_bau.h>
|
||||
@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
|
||||
*
|
||||
* Send a broadcast and wait for a broadcast message to complete.
|
||||
*
|
||||
* The cpumaskp mask contains the cpus the broadcast was sent to.
|
||||
* The flush_mask contains the cpus the broadcast was sent to.
|
||||
*
|
||||
* Returns 1 if all remote flushing was done. The mask is zeroed.
|
||||
* Returns 0 if some remote flushing remains to be done. The mask will have
|
||||
* some bits still set.
|
||||
* Returns NULL if all remote flushing was done. The mask is zeroed.
|
||||
* Returns @flush_mask if some remote flushing remains to be done. The
|
||||
* mask will have some bits still set.
|
||||
*/
|
||||
int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
|
||||
struct cpumask *cpumaskp)
|
||||
const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
|
||||
struct bau_desc *bau_desc,
|
||||
struct cpumask *flush_mask)
|
||||
{
|
||||
int completion_status = 0;
|
||||
int right_shift;
|
||||
@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
|
||||
* Success, so clear the remote cpu's from the mask so we don't
|
||||
* use the IPI method of shootdown on them.
|
||||
*/
|
||||
for_each_cpu(bit, cpumaskp) {
|
||||
for_each_cpu(bit, flush_mask) {
|
||||
blade = uv_cpu_to_blade_id(bit);
|
||||
if (blade == this_blade)
|
||||
continue;
|
||||
cpumask_clear_cpu(bit, cpumaskp);
|
||||
cpumask_clear_cpu(bit, flush_mask);
|
||||
}
|
||||
if (!cpumask_empty(cpumaskp))
|
||||
return 0;
|
||||
return 1;
|
||||
if (!cpumask_empty(flush_mask))
|
||||
return flush_mask;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* uv_flush_tlb_others - globally purge translation cache of a virtual
|
||||
* address or all TLB's
|
||||
* @cpumaskp: mask of all cpu's in which the address is to be removed
|
||||
* @cpumask: mask of all cpu's in which the address is to be removed
|
||||
* @mm: mm_struct containing virtual address range
|
||||
* @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
|
||||
* @cpu: the current cpu
|
||||
*
|
||||
* This is the entry point for initiating any UV global TLB shootdown.
|
||||
*
|
||||
* Purges the translation caches of all specified processors of the given
|
||||
* virtual address, or purges all TLB's on specified processors.
|
||||
*
|
||||
* The caller has derived the cpumaskp from the mm_struct and has subtracted
|
||||
* the local cpu from the mask. This function is called only if there
|
||||
* are bits set in the mask. (e.g. flush_tlb_page())
|
||||
* The caller has derived the cpumask from the mm_struct. This function
|
||||
* is called only if there are bits set in the mask. (e.g. flush_tlb_page())
|
||||
*
|
||||
* The cpumaskp is converted into a nodemask of the nodes containing
|
||||
* The cpumask is converted into a nodemask of the nodes containing
|
||||
* the cpus.
|
||||
*
|
||||
* Returns 1 if all remote flushing was done.
|
||||
* Returns 0 if some remote flushing remains to be done.
|
||||
* Note that this function should be called with preemption disabled.
|
||||
*
|
||||
* Returns NULL if all remote flushing was done.
|
||||
* Returns pointer to cpumask if some remote flushing remains to be
|
||||
* done. The returned pointer is valid till preemption is re-enabled.
|
||||
*/
|
||||
int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
|
||||
unsigned long va)
|
||||
const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
|
||||
struct mm_struct *mm,
|
||||
unsigned long va, unsigned int cpu)
|
||||
{
|
||||
static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
|
||||
struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
|
||||
int i;
|
||||
int bit;
|
||||
int blade;
|
||||
int cpu;
|
||||
int uv_cpu;
|
||||
int this_blade;
|
||||
int locals = 0;
|
||||
struct bau_desc *bau_desc;
|
||||
|
||||
cpu = uv_blade_processor_id();
|
||||
WARN_ON(!in_atomic());
|
||||
|
||||
cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
|
||||
|
||||
uv_cpu = uv_blade_processor_id();
|
||||
this_blade = uv_numa_blade_id();
|
||||
bau_desc = __get_cpu_var(bau_control).descriptor_base;
|
||||
bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu;
|
||||
bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
|
||||
|
||||
bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
|
||||
|
||||
i = 0;
|
||||
for_each_cpu(bit, cpumaskp) {
|
||||
for_each_cpu(bit, flush_mask) {
|
||||
blade = uv_cpu_to_blade_id(bit);
|
||||
BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
|
||||
if (blade == this_blade) {
|
||||
@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
|
||||
* no off_node flushing; return status for local node
|
||||
*/
|
||||
if (locals)
|
||||
return 0;
|
||||
return flush_mask;
|
||||
else
|
||||
return 1;
|
||||
return NULL;
|
||||
}
|
||||
__get_cpu_var(ptcstats).requestor++;
|
||||
__get_cpu_var(ptcstats).ntargeted += i;
|
||||
|
||||
bau_desc->payload.address = va;
|
||||
bau_desc->payload.sending_cpu = smp_processor_id();
|
||||
bau_desc->payload.sending_cpu = cpu;
|
||||
|
||||
return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp);
|
||||
return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -59,7 +59,6 @@
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/pda.h>
|
||||
#else
|
||||
#include <asm/processor-flags.h>
|
||||
#include <asm/arch_hooks.h>
|
||||
|
@ -220,8 +220,7 @@ SECTIONS
|
||||
* so that it can be accessed as a percpu variable.
|
||||
*/
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
|
||||
per_cpu____pda = __per_cpu_start;
|
||||
PERCPU_VADDR(0, :percpu)
|
||||
#else
|
||||
PERCPU(PAGE_SIZE)
|
||||
#endif
|
||||
@ -262,3 +261,8 @@ SECTIONS
|
||||
*/
|
||||
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
|
||||
"kernel image bigger than KERNEL_IMAGE_SIZE")
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
ASSERT((per_cpu__irq_stack_union == 0),
|
||||
"irq_stack_union is not at start of per-cpu area");
|
||||
#endif
|
||||
|
@ -1,6 +1,8 @@
|
||||
obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
|
||||
pat.o pgtable.o gup.o
|
||||
|
||||
obj-$(CONFIG_X86_SMP) += tlb.o
|
||||
|
||||
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
|
||||
|
||||
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/magic.h>
|
||||
|
||||
#include <asm/system.h>
|
||||
#include <asm/desc.h>
|
||||
@ -91,8 +92,8 @@ static inline int notify_page_fault(struct pt_regs *regs)
|
||||
*
|
||||
* Opcode checker based on code by Richard Brunner
|
||||
*/
|
||||
static int is_prefetch(struct pt_regs *regs, unsigned long addr,
|
||||
unsigned long error_code)
|
||||
static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
|
||||
unsigned long addr)
|
||||
{
|
||||
unsigned char *instr;
|
||||
int scan_more = 1;
|
||||
@ -409,15 +410,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
|
||||
unsigned long error_code)
|
||||
static noinline void pgtable_bad(struct pt_regs *regs,
|
||||
unsigned long error_code, unsigned long address)
|
||||
{
|
||||
unsigned long flags = oops_begin();
|
||||
int sig = SIGKILL;
|
||||
struct task_struct *tsk;
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
|
||||
current->comm, address);
|
||||
tsk->comm, address);
|
||||
dump_pagetable(address);
|
||||
tsk = current;
|
||||
tsk->thread.cr2 = address;
|
||||
@ -429,6 +430,196 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
|
||||
}
|
||||
#endif
|
||||
|
||||
static noinline void no_context(struct pt_regs *regs,
|
||||
unsigned long error_code, unsigned long address)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
unsigned long *stackend;
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned long flags;
|
||||
int sig;
|
||||
#endif
|
||||
|
||||
/* Are we prepared to handle this kernel fault? */
|
||||
if (fixup_exception(regs))
|
||||
return;
|
||||
|
||||
/*
|
||||
* X86_32
|
||||
* Valid to do another page fault here, because if this fault
|
||||
* had been triggered by is_prefetch fixup_exception would have
|
||||
* handled it.
|
||||
*
|
||||
* X86_64
|
||||
* Hall of shame of CPU/BIOS bugs.
|
||||
*/
|
||||
if (is_prefetch(regs, error_code, address))
|
||||
return;
|
||||
|
||||
if (is_errata93(regs, address))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Oops. The kernel tried to access some bad page. We'll have to
|
||||
* terminate things with extreme prejudice.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
bust_spinlocks(1);
|
||||
#else
|
||||
flags = oops_begin();
|
||||
#endif
|
||||
|
||||
show_fault_oops(regs, error_code, address);
|
||||
|
||||
stackend = end_of_stack(tsk);
|
||||
if (*stackend != STACK_END_MAGIC)
|
||||
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
|
||||
|
||||
tsk->thread.cr2 = address;
|
||||
tsk->thread.trap_no = 14;
|
||||
tsk->thread.error_code = error_code;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
die("Oops", regs, error_code);
|
||||
bust_spinlocks(0);
|
||||
do_exit(SIGKILL);
|
||||
#else
|
||||
sig = SIGKILL;
|
||||
if (__die("Oops", regs, error_code))
|
||||
sig = 0;
|
||||
/* Executive summary in case the body of the oops scrolled away */
|
||||
printk(KERN_EMERG "CR2: %016lx\n", address);
|
||||
oops_end(flags, regs, sig);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __bad_area_nosemaphore(struct pt_regs *regs,
|
||||
unsigned long error_code, unsigned long address,
|
||||
int si_code)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
/* User mode accesses just cause a SIGSEGV */
|
||||
if (error_code & PF_USER) {
|
||||
/*
|
||||
* It's possible to have interrupts off here.
|
||||
*/
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* Valid to do another page fault here because this one came
|
||||
* from user space.
|
||||
*/
|
||||
if (is_prefetch(regs, error_code, address))
|
||||
return;
|
||||
|
||||
if (is_errata100(regs, address))
|
||||
return;
|
||||
|
||||
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
||||
printk_ratelimit()) {
|
||||
printk(
|
||||
"%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
|
||||
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
||||
tsk->comm, task_pid_nr(tsk), address,
|
||||
(void *) regs->ip, (void *) regs->sp, error_code);
|
||||
print_vma_addr(" in ", regs->ip);
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
tsk->thread.cr2 = address;
|
||||
/* Kernel addresses are always protection faults */
|
||||
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
|
||||
tsk->thread.trap_no = 14;
|
||||
force_sig_info_fault(SIGSEGV, si_code, address, tsk);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_f00f_bug(regs, address))
|
||||
return;
|
||||
|
||||
no_context(regs, error_code, address);
|
||||
}
|
||||
|
||||
static noinline void bad_area_nosemaphore(struct pt_regs *regs,
|
||||
unsigned long error_code, unsigned long address)
|
||||
{
|
||||
__bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
|
||||
}
|
||||
|
||||
static void __bad_area(struct pt_regs *regs,
|
||||
unsigned long error_code, unsigned long address,
|
||||
int si_code)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
|
||||
/*
|
||||
* Something tried to access memory that isn't in our memory map..
|
||||
* Fix it, but check if it's kernel or user first..
|
||||
*/
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
__bad_area_nosemaphore(regs, error_code, address, si_code);
|
||||
}
|
||||
|
||||
static noinline void bad_area(struct pt_regs *regs,
|
||||
unsigned long error_code, unsigned long address)
|
||||
{
|
||||
__bad_area(regs, error_code, address, SEGV_MAPERR);
|
||||
}
|
||||
|
||||
static noinline void bad_area_access_error(struct pt_regs *regs,
|
||||
unsigned long error_code, unsigned long address)
|
||||
{
|
||||
__bad_area(regs, error_code, address, SEGV_ACCERR);
|
||||
}
|
||||
|
||||
/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
|
||||
static void out_of_memory(struct pt_regs *regs,
|
||||
unsigned long error_code, unsigned long address)
|
||||
{
|
||||
/*
|
||||
* We ran out of memory, call the OOM killer, and return the userspace
|
||||
* (which will retry the fault, or kill us if we got oom-killed).
|
||||
*/
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
pagefault_out_of_memory();
|
||||
}
|
||||
|
||||
static void do_sigbus(struct pt_regs *regs,
|
||||
unsigned long error_code, unsigned long address)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct mm_struct *mm = tsk->mm;
|
||||
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
/* Kernel mode? Handle exceptions or die */
|
||||
if (!(error_code & PF_USER))
|
||||
no_context(regs, error_code, address);
|
||||
#ifdef CONFIG_X86_32
|
||||
/* User space => ok to do another page fault */
|
||||
if (is_prefetch(regs, error_code, address))
|
||||
return;
|
||||
#endif
|
||||
tsk->thread.cr2 = address;
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_no = 14;
|
||||
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
|
||||
}
|
||||
|
||||
static noinline void mm_fault_error(struct pt_regs *regs,
|
||||
unsigned long error_code, unsigned long address, unsigned int fault)
|
||||
{
|
||||
if (fault & VM_FAULT_OOM)
|
||||
out_of_memory(regs, error_code, address);
|
||||
else if (fault & VM_FAULT_SIGBUS)
|
||||
do_sigbus(regs, error_code, address);
|
||||
else
|
||||
BUG();
|
||||
}
|
||||
|
||||
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
|
||||
{
|
||||
if ((error_code & PF_WRITE) && !pte_write(*pte))
|
||||
@ -448,8 +639,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
|
||||
* There are no security implications to leaving a stale TLB when
|
||||
* increasing the permissions on a page.
|
||||
*/
|
||||
static int spurious_fault(unsigned long address,
|
||||
unsigned long error_code)
|
||||
static noinline int spurious_fault(unsigned long error_code,
|
||||
unsigned long address)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
@ -494,7 +685,7 @@ static int spurious_fault(unsigned long address,
|
||||
*
|
||||
* This assumes no large pages in there.
|
||||
*/
|
||||
static int vmalloc_fault(unsigned long address)
|
||||
static noinline int vmalloc_fault(unsigned long address)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long pgd_paddr;
|
||||
@ -573,6 +764,25 @@ static int vmalloc_fault(unsigned long address)
|
||||
|
||||
int show_unhandled_signals = 1;
|
||||
|
||||
static inline int access_error(unsigned long error_code, int write,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
if (write) {
|
||||
/* write, present and write, not present */
|
||||
if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
||||
return 1;
|
||||
} else if (unlikely(error_code & PF_PROT)) {
|
||||
/* read, present */
|
||||
return 1;
|
||||
} else {
|
||||
/* read, not present */
|
||||
if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine handles page faults. It determines the address,
|
||||
* and the problem, and then passes it off to one of the appropriate
|
||||
@ -583,16 +793,12 @@ asmlinkage
|
||||
#endif
|
||||
void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
{
|
||||
unsigned long address;
|
||||
struct task_struct *tsk;
|
||||
struct mm_struct *mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long address;
|
||||
int write, si_code;
|
||||
int write;
|
||||
int fault;
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned long flags;
|
||||
int sig;
|
||||
#endif
|
||||
|
||||
tsk = current;
|
||||
mm = tsk->mm;
|
||||
@ -601,9 +807,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
/* get the address */
|
||||
address = read_cr2();
|
||||
|
||||
si_code = SEGV_MAPERR;
|
||||
|
||||
if (notify_page_fault(regs))
|
||||
if (unlikely(notify_page_fault(regs)))
|
||||
return;
|
||||
if (unlikely(kmmio_fault(regs, address)))
|
||||
return;
|
||||
@ -631,17 +835,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
return;
|
||||
|
||||
/* Can handle a stale RO->RW TLB */
|
||||
if (spurious_fault(address, error_code))
|
||||
if (spurious_fault(error_code, address))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Don't take the mm semaphore here. If we fixup a prefetch
|
||||
* fault we could otherwise deadlock.
|
||||
*/
|
||||
goto bad_area_nosemaphore;
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* It's safe to allow irq's after cr2 has been saved and the
|
||||
* vmalloc fault has been handled.
|
||||
@ -657,15 +861,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
if (unlikely(error_code & PF_RSVD))
|
||||
pgtable_bad(address, regs, error_code);
|
||||
pgtable_bad(regs, error_code, address);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we're in an interrupt, have no user context or are running in an
|
||||
* atomic region then we must not take the fault.
|
||||
*/
|
||||
if (unlikely(in_atomic() || !mm))
|
||||
goto bad_area_nosemaphore;
|
||||
if (unlikely(in_atomic() || !mm)) {
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* When running in the kernel we expect faults to occur only to
|
||||
@ -683,20 +889,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
* source. If this is invalid we can skip the address space check,
|
||||
* thus avoiding the deadlock.
|
||||
*/
|
||||
if (!down_read_trylock(&mm->mmap_sem)) {
|
||||
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
|
||||
if ((error_code & PF_USER) == 0 &&
|
||||
!search_exception_tables(regs->ip))
|
||||
goto bad_area_nosemaphore;
|
||||
!search_exception_tables(regs->ip)) {
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
down_read(&mm->mmap_sem);
|
||||
}
|
||||
|
||||
vma = find_vma(mm, address);
|
||||
if (!vma)
|
||||
goto bad_area;
|
||||
if (vma->vm_start <= address)
|
||||
if (unlikely(!vma)) {
|
||||
bad_area(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
if (likely(vma->vm_start <= address))
|
||||
goto good_area;
|
||||
if (!(vma->vm_flags & VM_GROWSDOWN))
|
||||
goto bad_area;
|
||||
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
|
||||
bad_area(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
if (error_code & PF_USER) {
|
||||
/*
|
||||
* Accessing the stack below %sp is always a bug.
|
||||
@ -704,31 +916,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
||||
* and pusha to work. ("enter $65535,$31" pushes
|
||||
* 32 pointers and then decrements %sp by 65535.)
|
||||
*/
|
||||
if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
|
||||
goto bad_area;
|
||||
if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
|
||||
bad_area(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (expand_stack(vma, address))
|
||||
goto bad_area;
|
||||
/*
|
||||
* Ok, we have a good vm_area for this memory access, so
|
||||
* we can handle it..
|
||||
*/
|
||||
if (unlikely(expand_stack(vma, address))) {
|
||||
bad_area(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ok, we have a good vm_area for this memory access, so
|
||||
* we can handle it..
|
||||
*/
|
||||
good_area:
|
||||
si_code = SEGV_ACCERR;
|
||||
write = 0;
|
||||
switch (error_code & (PF_PROT|PF_WRITE)) {
|
||||
default: /* 3: write, present */
|
||||
/* fall through */
|
||||
case PF_WRITE: /* write, not present */
|
||||
if (!(vma->vm_flags & VM_WRITE))
|
||||
goto bad_area;
|
||||
write++;
|
||||
break;
|
||||
case PF_PROT: /* read, present */
|
||||
goto bad_area;
|
||||
case 0: /* read, not present */
|
||||
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
|
||||
goto bad_area;
|
||||
write = error_code & PF_WRITE;
|
||||
if (unlikely(access_error(error_code, write, vma))) {
|
||||
bad_area_access_error(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -738,11 +944,8 @@ good_area:
|
||||
*/
|
||||
fault = handle_mm_fault(mm, vma, address, write);
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
if (fault & VM_FAULT_OOM)
|
||||
goto out_of_memory;
|
||||
else if (fault & VM_FAULT_SIGBUS)
|
||||
goto do_sigbus;
|
||||
BUG();
|
||||
mm_fault_error(regs, error_code, address, fault);
|
||||
return;
|
||||
}
|
||||
if (fault & VM_FAULT_MAJOR)
|
||||
tsk->maj_flt++;
|
||||
@ -760,128 +963,6 @@ good_area:
|
||||
}
|
||||
#endif
|
||||
up_read(&mm->mmap_sem);
|
||||
return;
|
||||
|
||||
/*
|
||||
* Something tried to access memory that isn't in our memory map..
|
||||
* Fix it, but check if it's kernel or user first..
|
||||
*/
|
||||
bad_area:
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
bad_area_nosemaphore:
|
||||
/* User mode accesses just cause a SIGSEGV */
|
||||
if (error_code & PF_USER) {
|
||||
/*
|
||||
* It's possible to have interrupts off here.
|
||||
*/
|
||||
local_irq_enable();
|
||||
|
||||
/*
|
||||
* Valid to do another page fault here because this one came
|
||||
* from user space.
|
||||
*/
|
||||
if (is_prefetch(regs, address, error_code))
|
||||
return;
|
||||
|
||||
if (is_errata100(regs, address))
|
||||
return;
|
||||
|
||||
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
||||
printk_ratelimit()) {
|
||||
printk(
|
||||
"%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
|
||||
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
||||
tsk->comm, task_pid_nr(tsk), address,
|
||||
(void *) regs->ip, (void *) regs->sp, error_code);
|
||||
print_vma_addr(" in ", regs->ip);
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
tsk->thread.cr2 = address;
|
||||
/* Kernel addresses are always protection faults */
|
||||
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
|
||||
tsk->thread.trap_no = 14;
|
||||
force_sig_info_fault(SIGSEGV, si_code, address, tsk);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_f00f_bug(regs, address))
|
||||
return;
|
||||
|
||||
no_context:
|
||||
/* Are we prepared to handle this kernel fault? */
|
||||
if (fixup_exception(regs))
|
||||
return;
|
||||
|
||||
/*
|
||||
* X86_32
|
||||
* Valid to do another page fault here, because if this fault
|
||||
* had been triggered by is_prefetch fixup_exception would have
|
||||
* handled it.
|
||||
*
|
||||
* X86_64
|
||||
* Hall of shame of CPU/BIOS bugs.
|
||||
*/
|
||||
if (is_prefetch(regs, address, error_code))
|
||||
return;
|
||||
|
||||
if (is_errata93(regs, address))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Oops. The kernel tried to access some bad page. We'll have to
|
||||
* terminate things with extreme prejudice.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
bust_spinlocks(1);
|
||||
#else
|
||||
flags = oops_begin();
|
||||
#endif
|
||||
|
||||
show_fault_oops(regs, error_code, address);
|
||||
|
||||
tsk->thread.cr2 = address;
|
||||
tsk->thread.trap_no = 14;
|
||||
tsk->thread.error_code = error_code;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
die("Oops", regs, error_code);
|
||||
bust_spinlocks(0);
|
||||
do_exit(SIGKILL);
|
||||
#else
|
||||
sig = SIGKILL;
|
||||
if (__die("Oops", regs, error_code))
|
||||
sig = 0;
|
||||
/* Executive summary in case the body of the oops scrolled away */
|
||||
printk(KERN_EMERG "CR2: %016lx\n", address);
|
||||
oops_end(flags, regs, sig);
|
||||
#endif
|
||||
|
||||
out_of_memory:
|
||||
/*
|
||||
* We ran out of memory, call the OOM killer, and return the userspace
|
||||
* (which will retry the fault, or kill us if we got oom-killed).
|
||||
*/
|
||||
up_read(&mm->mmap_sem);
|
||||
pagefault_out_of_memory();
|
||||
return;
|
||||
|
||||
do_sigbus:
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
/* Kernel mode? Handle exceptions or die */
|
||||
if (!(error_code & PF_USER))
|
||||
goto no_context;
|
||||
#ifdef CONFIG_X86_32
|
||||
/* User space => ok to do another page fault */
|
||||
if (is_prefetch(regs, address, error_code))
|
||||
return;
|
||||
#endif
|
||||
tsk->thread.cr2 = address;
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_no = 14;
|
||||
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
|
||||
}
|
||||
|
||||
DEFINE_SPINLOCK(pgd_lock);
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <asm/numa.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/genapic.h>
|
||||
#include <asm/uv/uv.h>
|
||||
|
||||
int acpi_numa __initdata;
|
||||
|
||||
|
@ -1,22 +1,15 @@
|
||||
#include <linux/init.h>
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/mc146818rtc.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/mtrr.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/uv/uv_hub.h>
|
||||
#include <asm/uv/uv_bau.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/uv/uv.h>
|
||||
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
|
||||
= { &init_mm, 0, };
|
||||
@ -36,7 +29,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
|
||||
* To avoid global state use 8 different call vectors.
|
||||
* Each CPU uses a specific vector to trigger flushes on other
|
||||
* CPUs. Depending on the received vector the target CPUs look into
|
||||
* the right per cpu variable for the flush data.
|
||||
* the right array slot for the flush data.
|
||||
*
|
||||
* With more than 8 CPUs they are hashed to the 8 available
|
||||
* vectors. The limited global vector space forces us to this right now.
|
||||
@ -51,13 +44,13 @@ union smp_flush_state {
|
||||
spinlock_t tlbstate_lock;
|
||||
DECLARE_BITMAP(flush_cpumask, NR_CPUS);
|
||||
};
|
||||
char pad[SMP_CACHE_BYTES];
|
||||
} ____cacheline_aligned;
|
||||
char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
|
||||
} ____cacheline_internodealigned_in_smp;
|
||||
|
||||
/* State is put into the per CPU data section, but padded
|
||||
to a full cache line because other CPUs can access it and we don't
|
||||
want false sharing in the per cpu data segment. */
|
||||
static DEFINE_PER_CPU(union smp_flush_state, flush_state);
|
||||
static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
|
||||
|
||||
/*
|
||||
* We cannot call mmdrop() because we are in interrupt context,
|
||||
@ -120,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm);
|
||||
* Interrupts are disabled.
|
||||
*/
|
||||
|
||||
asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
|
||||
/*
|
||||
* FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
|
||||
* but still used for documentation purpose but the usage is slightly
|
||||
* inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
|
||||
* entry calls in with the first parameter in %eax. Maybe define
|
||||
* intrlinkage?
|
||||
*/
|
||||
#ifdef CONFIG_X86_64
|
||||
asmlinkage
|
||||
#endif
|
||||
void smp_invalidate_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
int cpu;
|
||||
int sender;
|
||||
unsigned int cpu;
|
||||
unsigned int sender;
|
||||
union smp_flush_state *f;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
@ -132,7 +135,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
|
||||
* Use that to determine where the sender put the data.
|
||||
*/
|
||||
sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
|
||||
f = &per_cpu(flush_state, sender);
|
||||
f = &flush_state[sender];
|
||||
|
||||
if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
|
||||
goto out;
|
||||
@ -156,19 +159,21 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
|
||||
}
|
||||
out:
|
||||
ack_APIC_irq();
|
||||
smp_mb__before_clear_bit();
|
||||
cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
|
||||
smp_mb__after_clear_bit();
|
||||
inc_irq_stat(irq_tlb_count);
|
||||
}
|
||||
|
||||
static void flush_tlb_others_ipi(const struct cpumask *cpumask,
|
||||
struct mm_struct *mm, unsigned long va)
|
||||
{
|
||||
int sender;
|
||||
unsigned int sender;
|
||||
union smp_flush_state *f;
|
||||
|
||||
/* Caller has disabled preemption */
|
||||
sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
|
||||
f = &per_cpu(flush_state, sender);
|
||||
f = &flush_state[sender];
|
||||
|
||||
/*
|
||||
* Could avoid this lock when
|
||||
@ -206,16 +211,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
struct mm_struct *mm, unsigned long va)
|
||||
{
|
||||
if (is_uv_system()) {
|
||||
/* FIXME: could be an percpu_alloc'd thing */
|
||||
static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
|
||||
struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask);
|
||||
unsigned int cpu;
|
||||
|
||||
cpumask_andnot(after_uv_flush, cpumask,
|
||||
cpumask_of(smp_processor_id()));
|
||||
if (!uv_flush_tlb_others(after_uv_flush, mm, va))
|
||||
flush_tlb_others_ipi(after_uv_flush, mm, va);
|
||||
|
||||
put_cpu_var(flush_tlb_uv_cpumask);
|
||||
cpu = get_cpu();
|
||||
cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
|
||||
if (cpumask)
|
||||
flush_tlb_others_ipi(cpumask, mm, va);
|
||||
put_cpu();
|
||||
return;
|
||||
}
|
||||
flush_tlb_others_ipi(cpumask, mm, va);
|
||||
@ -225,8 +227,8 @@ static int __cpuinit init_smp_flush(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
|
||||
for (i = 0; i < ARRAY_SIZE(flush_state); i++)
|
||||
spin_lock_init(&flush_state[i].tlbstate_lock);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Disable until direct per-cpu data access. */
|
||||
have_vcpu_info_placement = 0;
|
||||
pda_init(0);
|
||||
#endif
|
||||
|
||||
xen_smp_init();
|
||||
|
@ -170,7 +170,7 @@ config ENCLOSURE_SERVICES
|
||||
config SGI_XP
|
||||
tristate "Support communication between SGI SSIs"
|
||||
depends on NET
|
||||
depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP
|
||||
depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
|
||||
select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
|
||||
select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
|
||||
select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
|
||||
@ -197,7 +197,7 @@ config HP_ILO
|
||||
|
||||
config SGI_GRU
|
||||
tristate "SGI GRU driver"
|
||||
depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
|
||||
depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP
|
||||
default n
|
||||
select MMU_NOTIFIER
|
||||
---help---
|
||||
|
@ -19,6 +19,8 @@
|
||||
#ifndef __GRU_H__
|
||||
#define __GRU_H__
|
||||
|
||||
#include <asm/uv/uv.h>
|
||||
|
||||
/*
|
||||
* GRU architectural definitions
|
||||
*/
|
||||
|
@ -15,6 +15,8 @@
|
||||
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#include <asm/uv/uv.h>
|
||||
|
||||
#ifdef CONFIG_IA64
|
||||
#include <asm/system.h>
|
||||
#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */
|
||||
|
@ -430,22 +430,10 @@
|
||||
*(.initcall7.init) \
|
||||
*(.initcall7s.init)
|
||||
|
||||
#define PERCPU_PROLOG(vaddr) \
|
||||
VMLINUX_SYMBOL(__per_cpu_load) = .; \
|
||||
.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
|
||||
- LOAD_OFFSET) { \
|
||||
VMLINUX_SYMBOL(__per_cpu_start) = .;
|
||||
|
||||
#define PERCPU_EPILOG(phdr) \
|
||||
VMLINUX_SYMBOL(__per_cpu_end) = .; \
|
||||
} phdr \
|
||||
. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
|
||||
|
||||
/**
|
||||
* PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc
|
||||
* PERCPU_VADDR - define output section for percpu area
|
||||
* @vaddr: explicit base address (optional)
|
||||
* @phdr: destination PHDR (optional)
|
||||
* @prealloc: the size of prealloc area
|
||||
*
|
||||
* Macro which expands to output section for percpu area. If @vaddr
|
||||
* is not blank, it specifies explicit base address and all percpu
|
||||
@ -457,39 +445,23 @@
|
||||
* section in the linker script will go there too. @phdr should have
|
||||
* a leading colon.
|
||||
*
|
||||
* If @prealloc is non-zero, the specified number of bytes will be
|
||||
* reserved at the start of percpu area. As the prealloc area is
|
||||
* likely to break alignment, this macro puts areas in increasing
|
||||
* alignment order.
|
||||
*
|
||||
* This macro defines three symbols, __per_cpu_load, __per_cpu_start
|
||||
* and __per_cpu_end. The first one is the vaddr of loaded percpu
|
||||
* init data. __per_cpu_start equals @vaddr and __per_cpu_end is the
|
||||
* end offset.
|
||||
*/
|
||||
#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \
|
||||
PERCPU_PROLOG(vaddr) \
|
||||
. += prealloc; \
|
||||
*(.data.percpu) \
|
||||
*(.data.percpu.shared_aligned) \
|
||||
*(.data.percpu.page_aligned) \
|
||||
PERCPU_EPILOG(segment)
|
||||
|
||||
/**
|
||||
* PERCPU_VADDR - define output section for percpu area
|
||||
* @vaddr: explicit base address (optional)
|
||||
* @phdr: destination PHDR (optional)
|
||||
*
|
||||
* Macro which expands to output section for percpu area. Mostly
|
||||
* identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than
|
||||
* using slighly different layout.
|
||||
*/
|
||||
#define PERCPU_VADDR(vaddr, phdr) \
|
||||
PERCPU_PROLOG(vaddr) \
|
||||
VMLINUX_SYMBOL(__per_cpu_load) = .; \
|
||||
.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
|
||||
- LOAD_OFFSET) { \
|
||||
VMLINUX_SYMBOL(__per_cpu_start) = .; \
|
||||
*(.data.percpu.first) \
|
||||
*(.data.percpu.page_aligned) \
|
||||
*(.data.percpu) \
|
||||
*(.data.percpu.shared_aligned) \
|
||||
PERCPU_EPILOG(phdr)
|
||||
VMLINUX_SYMBOL(__per_cpu_end) = .; \
|
||||
} phdr \
|
||||
. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
|
||||
|
||||
/**
|
||||
* PERCPU - define output section for percpu area, simple version
|
||||
|
@ -49,4 +49,5 @@
|
||||
#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA
|
||||
#define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA
|
||||
|
||||
#define STACK_END_MAGIC 0x57AC6E9D
|
||||
#endif /* __LINUX_MAGIC_H__ */
|
||||
|
@ -9,34 +9,39 @@
|
||||
#include <asm/percpu.h>
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#define DEFINE_PER_CPU(type, name) \
|
||||
__attribute__((__section__(".data.percpu"))) \
|
||||
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
|
||||
#define PER_CPU_BASE_SECTION ".data.percpu"
|
||||
|
||||
#ifdef MODULE
|
||||
#define SHARED_ALIGNED_SECTION ".data.percpu"
|
||||
#define PER_CPU_SHARED_ALIGNED_SECTION ""
|
||||
#else
|
||||
#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned"
|
||||
#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
|
||||
#endif
|
||||
#define PER_CPU_FIRST_SECTION ".first"
|
||||
|
||||
#else
|
||||
|
||||
#define PER_CPU_BASE_SECTION ".data"
|
||||
#define PER_CPU_SHARED_ALIGNED_SECTION ""
|
||||
#define PER_CPU_FIRST_SECTION ""
|
||||
|
||||
#endif
|
||||
|
||||
#define DEFINE_PER_CPU_SECTION(type, name, section) \
|
||||
__attribute__((__section__(PER_CPU_BASE_SECTION section))) \
|
||||
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
|
||||
|
||||
#define DEFINE_PER_CPU(type, name) \
|
||||
DEFINE_PER_CPU_SECTION(type, name, "")
|
||||
|
||||
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
|
||||
__attribute__((__section__(SHARED_ALIGNED_SECTION))) \
|
||||
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
|
||||
DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
|
||||
____cacheline_aligned_in_smp
|
||||
|
||||
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
|
||||
__attribute__((__section__(".data.percpu.page_aligned"))) \
|
||||
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
|
||||
#else
|
||||
#define DEFINE_PER_CPU(type, name) \
|
||||
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
|
||||
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
|
||||
DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
|
||||
|
||||
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
|
||||
DEFINE_PER_CPU(type, name)
|
||||
|
||||
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
|
||||
DEFINE_PER_CPU(type, name)
|
||||
#endif
|
||||
#define DEFINE_PER_CPU_FIRST(type, name) \
|
||||
DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
|
||||
|
||||
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
|
||||
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
|
||||
|
@ -1159,10 +1159,9 @@ struct task_struct {
|
||||
pid_t pid;
|
||||
pid_t tgid;
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
/* Canary value for the -fstack-protector gcc feature */
|
||||
unsigned long stack_canary;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pointers to (original) parent process, youngest child, younger sibling,
|
||||
* older sibling, respectively. (p->father can be replaced with
|
||||
@ -2069,6 +2068,19 @@ static inline int object_is_on_stack(void *obj)
|
||||
|
||||
extern void thread_info_cache_init(void);
|
||||
|
||||
#ifdef CONFIG_DEBUG_STACK_USAGE
|
||||
static inline unsigned long stack_not_used(struct task_struct *p)
|
||||
{
|
||||
unsigned long *n = end_of_stack(p);
|
||||
|
||||
do { /* Skip over canary */
|
||||
n++;
|
||||
} while (!*n);
|
||||
|
||||
return (unsigned long)n - (unsigned long)end_of_stack(p);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* set thread flags in other task's structures
|
||||
* - see asm/thread_info.h for TIF_xxxx flags available
|
||||
*/
|
||||
|
16
include/linux/stackprotector.h
Normal file
16
include/linux/stackprotector.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef _LINUX_STACKPROTECTOR_H
|
||||
#define _LINUX_STACKPROTECTOR_H 1
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/random.h>
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
# include <asm/stackprotector.h>
|
||||
#else
|
||||
static inline void boot_init_stack_canary(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -14,6 +14,7 @@
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/stackprotector.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/delay.h>
|
||||
@ -539,6 +540,12 @@ asmlinkage void __init start_kernel(void)
|
||||
*/
|
||||
lockdep_init();
|
||||
debug_objects_early_init();
|
||||
|
||||
/*
|
||||
* Set up the the initial canary ASAP:
|
||||
*/
|
||||
boot_init_stack_canary();
|
||||
|
||||
cgroup_init_early();
|
||||
|
||||
local_irq_disable();
|
||||
|
@ -980,12 +980,9 @@ static void check_stack_usage(void)
|
||||
{
|
||||
static DEFINE_SPINLOCK(low_water_lock);
|
||||
static int lowest_to_date = THREAD_SIZE;
|
||||
unsigned long *n = end_of_stack(current);
|
||||
unsigned long free;
|
||||
|
||||
while (*n == 0)
|
||||
n++;
|
||||
free = (unsigned long)n - (unsigned long)end_of_stack(current);
|
||||
free = stack_not_used(current);
|
||||
|
||||
if (free >= lowest_to_date)
|
||||
return;
|
||||
|
@ -61,6 +61,7 @@
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <trace/sched.h>
|
||||
#include <linux/magic.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
@ -212,6 +213,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
struct thread_info *ti;
|
||||
unsigned long *stackend;
|
||||
|
||||
int err;
|
||||
|
||||
prepare_to_copy(orig);
|
||||
@ -237,6 +240,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
|
||||
goto out;
|
||||
|
||||
setup_thread_stack(tsk, orig);
|
||||
stackend = end_of_stack(tsk);
|
||||
*stackend = STACK_END_MAGIC; /* for overflow detection */
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
tsk->stack_canary = get_random_int();
|
||||
|
@ -74,6 +74,9 @@ NORET_TYPE void panic(const char * fmt, ...)
|
||||
vsnprintf(buf, sizeof(buf), fmt, args);
|
||||
va_end(args);
|
||||
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
|
||||
#ifdef CONFIG_DEBUG_BUGVERBOSE
|
||||
dump_stack();
|
||||
#endif
|
||||
bust_spinlocks(0);
|
||||
|
||||
/*
|
||||
@ -355,15 +358,22 @@ EXPORT_SYMBOL(warn_slowpath);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
|
||||
#ifndef GCC_HAS_SP
|
||||
#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this.
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Called when gcc's -fstack-protector feature is used, and
|
||||
* gcc detects corruption of the on-stack canary value
|
||||
*/
|
||||
void __stack_chk_fail(void)
|
||||
{
|
||||
panic("stack-protector: Kernel stack is corrupted");
|
||||
panic("stack-protector: Kernel stack is corrupted in: %p\n",
|
||||
__builtin_return_address(0));
|
||||
}
|
||||
EXPORT_SYMBOL(__stack_chk_fail);
|
||||
|
||||
#endif
|
||||
|
||||
core_param(panic, panic_timeout, int, 0644);
|
||||
|
@ -6009,12 +6009,7 @@ void sched_show_task(struct task_struct *p)
|
||||
printk(KERN_CONT " %016lx ", thread_saved_pc(p));
|
||||
#endif
|
||||
#ifdef CONFIG_DEBUG_STACK_USAGE
|
||||
{
|
||||
unsigned long *n = end_of_stack(p);
|
||||
while (!*n)
|
||||
n++;
|
||||
free = (unsigned long)n - (unsigned long)end_of_stack(p);
|
||||
}
|
||||
free = stack_not_used(p);
|
||||
#endif
|
||||
printk(KERN_CONT "%5lu %5d %6d\n", free,
|
||||
task_pid_nr(p), task_pid_nr(p->real_parent));
|
||||
|
@ -971,6 +971,8 @@ undo:
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static struct workqueue_struct *work_on_cpu_wq __read_mostly;
|
||||
|
||||
struct work_for_cpu {
|
||||
struct work_struct work;
|
||||
long (*fn)(void *);
|
||||
@ -991,8 +993,8 @@ static void do_work_for_cpu(struct work_struct *w)
|
||||
* @fn: the function to run
|
||||
* @arg: the function arg
|
||||
*
|
||||
* This will return -EINVAL in the cpu is not online, or the return value
|
||||
* of @fn otherwise.
|
||||
* This will return the value @fn returns.
|
||||
* It is up to the caller to ensure that the cpu doesn't go offline.
|
||||
*/
|
||||
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
|
||||
{
|
||||
@ -1001,14 +1003,8 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
|
||||
INIT_WORK(&wfc.work, do_work_for_cpu);
|
||||
wfc.fn = fn;
|
||||
wfc.arg = arg;
|
||||
get_online_cpus();
|
||||
if (unlikely(!cpu_online(cpu)))
|
||||
wfc.ret = -EINVAL;
|
||||
else {
|
||||
schedule_work_on(cpu, &wfc.work);
|
||||
flush_work(&wfc.work);
|
||||
}
|
||||
put_online_cpus();
|
||||
queue_work_on(cpu, work_on_cpu_wq, &wfc.work);
|
||||
flush_work(&wfc.work);
|
||||
|
||||
return wfc.ret;
|
||||
}
|
||||
@ -1025,4 +1021,8 @@ void __init init_workqueues(void)
|
||||
hotcpu_notifier(workqueue_cpu_callback, 0);
|
||||
keventd_wq = create_workqueue("events");
|
||||
BUG_ON(!keventd_wq);
|
||||
#ifdef CONFIG_SMP
|
||||
work_on_cpu_wq = create_workqueue("work_on_cpu");
|
||||
BUG_ON(!work_on_cpu_wq);
|
||||
#endif
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user