[SPARC64]: Fix TLB context allocation with SMT style shared TLBs.
The context allocation scheme we use depends upon there being a 1<-->1 mapping from cpu to physical TLB for correctness. Chips like Niagara break this assumption. So what we do is notify all cpus with a cross call when the context version number changes, and if necessary this makes them allocate a valid context for the address space they are running at the time. Stress tested with make -j1024, make -j2048, and make -j4096 kernel builds on a 32-strand, 8 core, T2000 with 16GB of ram. Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
074d82cf68
commit
a0663a79ad
@ -885,26 +885,44 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
|
|||||||
put_cpu();
|
put_cpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __smp_receive_signal_mask(cpumask_t mask)
|
||||||
|
{
|
||||||
|
smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask);
|
||||||
|
}
|
||||||
|
|
||||||
void smp_receive_signal(int cpu)
|
void smp_receive_signal(int cpu)
|
||||||
{
|
{
|
||||||
cpumask_t mask = cpumask_of_cpu(cpu);
|
cpumask_t mask = cpumask_of_cpu(cpu);
|
||||||
|
|
||||||
if (cpu_online(cpu)) {
|
if (cpu_online(cpu))
|
||||||
u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff);
|
__smp_receive_signal_mask(mask);
|
||||||
|
|
||||||
if (tlb_type == spitfire)
|
|
||||||
spitfire_xcall_deliver(data0, 0, 0, mask);
|
|
||||||
else if (tlb_type == cheetah || tlb_type == cheetah_plus)
|
|
||||||
cheetah_xcall_deliver(data0, 0, 0, mask);
|
|
||||||
else if (tlb_type == hypervisor)
|
|
||||||
hypervisor_xcall_deliver(data0, 0, 0, mask);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void smp_receive_signal_client(int irq, struct pt_regs *regs)
|
void smp_receive_signal_client(int irq, struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
/* Just return, rtrap takes care of the rest. */
|
struct mm_struct *mm;
|
||||||
|
|
||||||
clear_softint(1 << irq);
|
clear_softint(1 << irq);
|
||||||
|
|
||||||
|
/* See if we need to allocate a new TLB context because
|
||||||
|
* the version of the one we are using is now out of date.
|
||||||
|
*/
|
||||||
|
mm = current->active_mm;
|
||||||
|
if (likely(mm)) {
|
||||||
|
if (unlikely(!CTX_VALID(mm->context))) {
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&mm->context.lock, flags);
|
||||||
|
get_new_mmu_context(mm);
|
||||||
|
load_secondary_context(mm);
|
||||||
|
spin_unlock_irqrestore(&mm->context.lock, flags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void smp_new_mmu_context_version(void)
|
||||||
|
{
|
||||||
|
__smp_receive_signal_mask(cpu_online_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
void smp_report_regs(void)
|
void smp_report_regs(void)
|
||||||
|
@ -629,17 +629,20 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
|
|||||||
* let the user have CTX 0 (nucleus) or we ever use a CTX
|
* let the user have CTX 0 (nucleus) or we ever use a CTX
|
||||||
* version of zero (and thus NO_CONTEXT would not be caught
|
* version of zero (and thus NO_CONTEXT would not be caught
|
||||||
* by version mis-match tests in mmu_context.h).
|
* by version mis-match tests in mmu_context.h).
|
||||||
|
*
|
||||||
|
* Always invoked with interrupts disabled.
|
||||||
*/
|
*/
|
||||||
void get_new_mmu_context(struct mm_struct *mm)
|
void get_new_mmu_context(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
unsigned long ctx, new_ctx;
|
unsigned long ctx, new_ctx;
|
||||||
unsigned long orig_pgsz_bits;
|
unsigned long orig_pgsz_bits;
|
||||||
|
int new_version;
|
||||||
|
|
||||||
spin_lock(&ctx_alloc_lock);
|
spin_lock(&ctx_alloc_lock);
|
||||||
orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
|
orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
|
||||||
ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
|
ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
|
||||||
new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
|
new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
|
||||||
|
new_version = 0;
|
||||||
if (new_ctx >= (1 << CTX_NR_BITS)) {
|
if (new_ctx >= (1 << CTX_NR_BITS)) {
|
||||||
new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
|
new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
|
||||||
if (new_ctx >= ctx) {
|
if (new_ctx >= ctx) {
|
||||||
@ -662,6 +665,7 @@ void get_new_mmu_context(struct mm_struct *mm)
|
|||||||
mmu_context_bmap[i + 2] = 0;
|
mmu_context_bmap[i + 2] = 0;
|
||||||
mmu_context_bmap[i + 3] = 0;
|
mmu_context_bmap[i + 3] = 0;
|
||||||
}
|
}
|
||||||
|
new_version = 1;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -671,6 +675,9 @@ out:
|
|||||||
tlb_context_cache = new_ctx;
|
tlb_context_cache = new_ctx;
|
||||||
mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
|
mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
|
||||||
spin_unlock(&ctx_alloc_lock);
|
spin_unlock(&ctx_alloc_lock);
|
||||||
|
|
||||||
|
if (unlikely(new_version))
|
||||||
|
smp_new_mmu_context_version();
|
||||||
}
|
}
|
||||||
|
|
||||||
void sparc_ultra_dump_itlb(void)
|
void sparc_ultra_dump_itlb(void)
|
||||||
|
@ -102,6 +102,7 @@ extern void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte
|
|||||||
extern void tsb_flush(unsigned long ent, unsigned long tag);
|
extern void tsb_flush(unsigned long ent, unsigned long tag);
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
spinlock_t lock;
|
||||||
unsigned long sparc64_ctx_val;
|
unsigned long sparc64_ctx_val;
|
||||||
struct tsb *tsb;
|
struct tsb *tsb;
|
||||||
unsigned long tsb_rss_limit;
|
unsigned long tsb_rss_limit;
|
||||||
|
@ -19,6 +19,12 @@ extern unsigned long tlb_context_cache;
|
|||||||
extern unsigned long mmu_context_bmap[];
|
extern unsigned long mmu_context_bmap[];
|
||||||
|
|
||||||
extern void get_new_mmu_context(struct mm_struct *mm);
|
extern void get_new_mmu_context(struct mm_struct *mm);
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
extern void smp_new_mmu_context_version(void);
|
||||||
|
#else
|
||||||
|
#define smp_new_mmu_context_version() do { } while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
||||||
extern void destroy_context(struct mm_struct *mm);
|
extern void destroy_context(struct mm_struct *mm);
|
||||||
|
|
||||||
@ -58,21 +64,17 @@ extern void smp_tsb_sync(struct mm_struct *mm);
|
|||||||
|
|
||||||
extern void __flush_tlb_mm(unsigned long, unsigned long);
|
extern void __flush_tlb_mm(unsigned long, unsigned long);
|
||||||
|
|
||||||
/* Switch the current MM context. */
|
/* Switch the current MM context. Interrupts are disabled. */
|
||||||
static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
|
static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
unsigned long ctx_valid;
|
unsigned long ctx_valid;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
/* Note: page_table_lock is used here to serialize switch_mm
|
spin_lock(&mm->context.lock);
|
||||||
* and activate_mm, and their calls to get_new_mmu_context.
|
|
||||||
* This use of page_table_lock is unrelated to its other uses.
|
|
||||||
*/
|
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
ctx_valid = CTX_VALID(mm->context);
|
ctx_valid = CTX_VALID(mm->context);
|
||||||
if (!ctx_valid)
|
if (!ctx_valid)
|
||||||
get_new_mmu_context(mm);
|
get_new_mmu_context(mm);
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->context.lock);
|
||||||
|
|
||||||
if (!ctx_valid || (old_mm != mm)) {
|
if (!ctx_valid || (old_mm != mm)) {
|
||||||
load_secondary_context(mm);
|
load_secondary_context(mm);
|
||||||
@ -98,19 +100,16 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
|
|||||||
/* Activate a new MM instance for the current task. */
|
/* Activate a new MM instance for the current task. */
|
||||||
static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
|
static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
unsigned long flags;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
/* Note: page_table_lock is used here to serialize switch_mm
|
spin_lock_irqsave(&mm->context.lock, flags);
|
||||||
* and activate_mm, and their calls to get_new_mmu_context.
|
|
||||||
* This use of page_table_lock is unrelated to its other uses.
|
|
||||||
*/
|
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
if (!CTX_VALID(mm->context))
|
if (!CTX_VALID(mm->context))
|
||||||
get_new_mmu_context(mm);
|
get_new_mmu_context(mm);
|
||||||
cpu = smp_processor_id();
|
cpu = smp_processor_id();
|
||||||
if (!cpu_isset(cpu, mm->cpu_vm_mask))
|
if (!cpu_isset(cpu, mm->cpu_vm_mask))
|
||||||
cpu_set(cpu, mm->cpu_vm_mask);
|
cpu_set(cpu, mm->cpu_vm_mask);
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock_irqrestore(&mm->context.lock, flags);
|
||||||
|
|
||||||
load_secondary_context(mm);
|
load_secondary_context(mm);
|
||||||
__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
|
__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
|
||||||
|
Loading…
Reference in New Issue
Block a user