2008-12-18 22:13:24 +03:00
/*
* This file contains the routines for handling the MMU on those
* PowerPC implementations where the MMU is not using the hash
* table , such as 8 xx , 4 xx , BookE ' s etc . . .
*
* Copyright 2008 Ben Herrenschmidt < benh @ kernel . crashing . org >
* IBM Corp .
*
* Derived from previous arch / powerpc / mm / mmu_context . c
* and arch / powerpc / include / asm / mmu_context . h
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
2008-12-18 22:13:29 +03:00
* TODO :
*
* - The global context lock will not scale very well
* - The maps should be dynamically allocated to allow for processors
* that support more PID bits at runtime
* - Implement flush_tlb_mm ( ) by making the context stale and picking
* a new one
* - More aggressively clear stale map bits and maybe find some way to
* also clear mm - > cpu_vm_mask bits when processes are migrated
2008-12-18 22:13:24 +03:00
*/
2009-07-24 03:15:10 +04:00
# define DEBUG_MAP_CONSISTENCY
# define DEBUG_CLAMP_LAST_CONTEXT 31
//#define DEBUG_HARDER
/* We don't use DEBUG because it tends to be compiled in always nowadays
* and this would generate way too much output
*/
# ifdef DEBUG_HARDER
# define pr_hard(args...) printk(KERN_DEBUG args)
# define pr_hardcont(args...) printk(KERN_CONT args)
# else
# define pr_hard(args...) do { } while(0)
# define pr_hardcont(args...) do { } while(0)
# endif
2008-12-18 22:13:29 +03:00
# include <linux/kernel.h>
2008-12-18 22:13:24 +03:00
# include <linux/mm.h>
# include <linux/init.h>
2008-12-18 22:13:48 +03:00
# include <linux/spinlock.h>
# include <linux/bootmem.h>
# include <linux/notifier.h>
# include <linux/cpu.h>
2008-12-18 22:13:24 +03:00
# include <asm/mmu_context.h>
# include <asm/tlbflush.h>
2008-12-18 22:13:48 +03:00
static unsigned int first_context , last_context ;
2008-12-18 22:13:29 +03:00
static unsigned int next_context , nr_free_contexts ;
2008-12-18 22:13:48 +03:00
static unsigned long * context_map ;
static unsigned long * stale_map [ NR_CPUS ] ;
static struct mm_struct * * context_mm ;
2009-06-02 22:53:37 +04:00
static DEFINE_SPINLOCK ( context_lock ) ;
2008-12-18 22:13:24 +03:00
2008-12-18 22:13:48 +03:00
# define CTX_MAP_SIZE \
( sizeof ( unsigned long ) * ( last_context / BITS_PER_LONG + 1 ) )
2008-12-18 22:13:24 +03:00
/* Steal a context from a task that has one at the moment.
2008-12-18 22:13:29 +03:00
*
* This is used when we are running out of available PID numbers
* on the processors .
*
2008-12-18 22:13:24 +03:00
* This isn ' t an LRU system , it just frees up each context in
* turn ( sort - of pseudo - random replacement : ) . This would be the
* place to implement an LRU scheme if anyone was motivated to do it .
* - - paulus
2008-12-18 22:13:29 +03:00
*
* For context stealing , we use a slightly different approach for
* SMP and UP . Basically , the UP one is simpler and doesn ' t use
* the stale map as we can just flush the local CPU
* - - benh
2008-12-18 22:13:24 +03:00
*/
2008-12-18 22:13:29 +03:00
# ifdef CONFIG_SMP
static unsigned int steal_context_smp ( unsigned int id )
2008-12-18 22:13:24 +03:00
{
struct mm_struct * mm ;
2009-07-24 03:15:10 +04:00
unsigned int cpu , max , i ;
2008-12-18 22:13:24 +03:00
2008-12-18 22:13:48 +03:00
max = last_context - first_context ;
2008-12-18 22:13:24 +03:00
2008-12-18 22:13:29 +03:00
/* Attempt to free next_context first and then loop until we manage */
while ( max - - ) {
/* Pick up the victim mm */
mm = context_mm [ id ] ;
2008-12-18 22:13:24 +03:00
2008-12-18 22:13:29 +03:00
/* We have a candidate victim, check if it's active, on SMP
* we cannot steal active contexts
*/
if ( mm - > context . active ) {
id + + ;
2008-12-18 22:13:48 +03:00
if ( id > last_context )
id = first_context ;
2008-12-18 22:13:29 +03:00
continue ;
}
2009-07-24 03:15:10 +04:00
pr_hardcont ( " | steal %d from 0x%p " , id , mm ) ;
2008-12-18 22:13:29 +03:00
/* Mark this mm has having no context anymore */
mm - > context . id = MMU_NO_CONTEXT ;
2009-07-24 03:15:10 +04:00
/* Mark it stale on all CPUs that used this mm. For threaded
* implementations , we set it on all threads on each core
* represented in the mask . A future implementation will use
* a core map instead but this will do for now .
*/
for_each_cpu ( cpu , mm_cpumask ( mm ) ) {
for ( i = cpu_first_thread_in_core ( cpu ) ;
i < = cpu_last_thread_in_core ( cpu ) ; i + + )
__set_bit ( id , stale_map [ i ] ) ;
cpu = i - 1 ;
}
2008-12-18 22:13:29 +03:00
return id ;
}
/* This will happen if you have more CPUs than available contexts,
* all we can do here is wait a bit and try again
*/
spin_unlock ( & context_lock ) ;
cpu_relax ( ) ;
spin_lock ( & context_lock ) ;
2009-05-19 20:56:42 +04:00
/* This will cause the caller to try again */
return MMU_NO_CONTEXT ;
2008-12-18 22:13:29 +03:00
}
# endif /* CONFIG_SMP */
/* Note that this will also be called on SMP if all other CPUs are
* offlined , which means that it may be called for cpu ! = 0. For
* this to work , we somewhat assume that CPUs that are onlined
* come up with a fully clean TLB ( or are cleaned when offlined )
2008-12-18 22:13:24 +03:00
*/
2008-12-18 22:13:29 +03:00
static unsigned int steal_context_up ( unsigned int id )
2008-12-18 22:13:24 +03:00
{
2008-12-18 22:13:29 +03:00
struct mm_struct * mm ;
int cpu = smp_processor_id ( ) ;
2008-12-18 22:13:24 +03:00
2008-12-18 22:13:29 +03:00
/* Pick up the victim mm */
mm = context_mm [ id ] ;
2009-07-24 03:15:10 +04:00
pr_hardcont ( " | steal %d from 0x%p " , id , mm ) ;
2008-12-18 22:13:24 +03:00
2008-12-18 22:13:29 +03:00
/* Flush the TLB for that context */
local_flush_tlb_mm ( mm ) ;
2009-05-24 19:33:34 +04:00
/* Mark this mm has having no context anymore */
mm - > context . id = MMU_NO_CONTEXT ;
2008-12-18 22:13:29 +03:00
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
__clear_bit ( id , stale_map [ cpu ] ) ;
return id ;
}
# ifdef DEBUG_MAP_CONSISTENCY
static void context_check_map ( void )
{
unsigned int id , nrf , nact ;
nrf = nact = 0 ;
2008-12-18 22:13:48 +03:00
for ( id = first_context ; id < = last_context ; id + + ) {
2008-12-18 22:13:29 +03:00
int used = test_bit ( id , context_map ) ;
if ( ! used )
nrf + + ;
if ( used ! = ( context_mm [ id ] ! = NULL ) )
pr_err ( " MMU: Context %d is %s and MM is %p ! \n " ,
id , used ? " used " : " free " , context_mm [ id ] ) ;
if ( context_mm [ id ] ! = NULL )
nact + = context_mm [ id ] - > context . active ;
2008-12-18 22:13:24 +03:00
}
2008-12-18 22:13:29 +03:00
if ( nrf ! = nr_free_contexts ) {
pr_err ( " MMU: Free context count out of sync ! (%d vs %d) \n " ,
nr_free_contexts , nrf ) ;
nr_free_contexts = nrf ;
}
if ( nact > num_online_cpus ( ) )
pr_err ( " MMU: More active contexts than CPUs ! (%d vs %d) \n " ,
nact , num_online_cpus ( ) ) ;
2008-12-18 22:13:48 +03:00
if ( first_context > 0 & & ! test_bit ( 0 , context_map ) )
pr_err ( " MMU: Context 0 has been freed !!! \n " ) ;
2008-12-18 22:13:24 +03:00
}
2008-12-18 22:13:29 +03:00
# else
static void context_check_map ( void ) { }
# endif
2008-12-18 22:13:24 +03:00
void switch_mmu_context ( struct mm_struct * prev , struct mm_struct * next )
{
2009-08-05 07:33:32 +04:00
unsigned int i , id , cpu = smp_processor_id ( ) ;
2008-12-18 22:13:29 +03:00
unsigned long * map ;
2008-12-18 22:13:24 +03:00
2008-12-18 22:13:29 +03:00
/* No lockless fast path .. yet */
spin_lock ( & context_lock ) ;
2009-07-24 03:15:10 +04:00
pr_hard ( " [%d] activating context for mm @%p, active=%d, id=%d " ,
cpu , next , next - > context . active , next - > context . id ) ;
2008-12-18 22:13:29 +03:00
# ifdef CONFIG_SMP
/* Mark us active and the previous one not anymore */
next - > context . active + + ;
if ( prev ) {
2009-07-24 03:15:10 +04:00
pr_hardcont ( " (old=0x%p a=%d) " , prev , prev - > context . active ) ;
2008-12-18 22:13:29 +03:00
WARN_ON ( prev - > context . active < 1 ) ;
prev - > context . active - - ;
}
2009-05-19 20:56:42 +04:00
again :
2008-12-18 22:13:29 +03:00
# endif /* CONFIG_SMP */
/* If we already have a valid assigned context, skip all that */
id = next - > context . id ;
2009-07-24 03:15:10 +04:00
if ( likely ( id ! = MMU_NO_CONTEXT ) ) {
# ifdef DEBUG_MAP_CONSISTENCY
if ( context_mm [ id ] ! = next )
pr_err ( " MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p \n " ,
next , id , id , context_mm [ id ] ) ;
# endif
2008-12-18 22:13:29 +03:00
goto ctxt_ok ;
2009-07-24 03:15:10 +04:00
}
2008-12-18 22:13:29 +03:00
/* We really don't have a context, let's try to acquire one */
id = next_context ;
2008-12-18 22:13:48 +03:00
if ( id > last_context )
id = first_context ;
2008-12-18 22:13:29 +03:00
map = context_map ;
/* No more free contexts, let's try to steal one */
if ( nr_free_contexts = = 0 ) {
# ifdef CONFIG_SMP
if ( num_online_cpus ( ) > 1 ) {
id = steal_context_smp ( id ) ;
2009-05-19 20:56:42 +04:00
if ( id = = MMU_NO_CONTEXT )
goto again ;
2009-07-30 08:04:25 +04:00
goto stolen ;
2008-12-18 22:13:29 +03:00
}
# endif /* CONFIG_SMP */
id = steal_context_up ( id ) ;
goto stolen ;
}
nr_free_contexts - - ;
/* We know there's at least one free context, try to find it */
while ( __test_and_set_bit ( id , map ) ) {
2008-12-18 22:13:48 +03:00
id = find_next_zero_bit ( map , last_context + 1 , id ) ;
if ( id > last_context )
id = first_context ;
2008-12-18 22:13:29 +03:00
}
stolen :
next_context = id + 1 ;
context_mm [ id ] = next ;
next - > context . id = id ;
2009-07-24 03:15:10 +04:00
pr_hardcont ( " | new id=%d,nrf=%d " , id , nr_free_contexts ) ;
2008-12-18 22:13:29 +03:00
context_check_map ( ) ;
ctxt_ok :
/* If that context got marked stale on this CPU, then flush the
* local TLB for it and unmark it before we use it
*/
if ( test_bit ( id , stale_map [ cpu ] ) ) {
2009-07-24 03:15:10 +04:00
pr_hardcont ( " | stale flush %d [%d..%d] " ,
id , cpu_first_thread_in_core ( cpu ) ,
cpu_last_thread_in_core ( cpu ) ) ;
2008-12-18 22:13:29 +03:00
local_flush_tlb_mm ( next ) ;
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
2009-08-05 07:33:32 +04:00
for ( i = cpu_first_thread_in_core ( cpu ) ;
i < = cpu_last_thread_in_core ( cpu ) ; i + + ) {
__clear_bit ( id , stale_map [ i ] ) ;
}
2008-12-18 22:13:29 +03:00
}
/* Flick the MMU and release lock */
2009-07-24 03:15:10 +04:00
pr_hardcont ( " -> %d \n " , id ) ;
2008-12-18 22:13:29 +03:00
set_context ( id , next - > pgd ) ;
spin_unlock ( & context_lock ) ;
2008-12-18 22:13:24 +03:00
}
/*
* Set up the context for a new address space .
*/
int init_new_context ( struct task_struct * t , struct mm_struct * mm )
{
2009-07-24 03:15:10 +04:00
pr_hard ( " initing context for mm @%p \n " , mm ) ;
2008-12-18 22:13:29 +03:00
mm - > context . id = MMU_NO_CONTEXT ;
mm - > context . active = 0 ;
2008-12-18 22:13:24 +03:00
return 0 ;
}
/*
* We ' re finished using the context for an address space .
*/
void destroy_context ( struct mm_struct * mm )
{
2009-06-02 22:53:37 +04:00
unsigned long flags ;
2008-12-18 22:13:29 +03:00
unsigned int id ;
if ( mm - > context . id = = MMU_NO_CONTEXT )
return ;
WARN_ON ( mm - > context . active ! = 0 ) ;
2009-06-02 22:53:37 +04:00
spin_lock_irqsave ( & context_lock , flags ) ;
2008-12-18 22:13:29 +03:00
id = mm - > context . id ;
if ( id ! = MMU_NO_CONTEXT ) {
__clear_bit ( id , context_map ) ;
mm - > context . id = MMU_NO_CONTEXT ;
# ifdef DEBUG_MAP_CONSISTENCY
mm - > context . active = 0 ;
# endif
2009-05-19 20:56:42 +04:00
context_mm [ id ] = NULL ;
2008-12-18 22:13:29 +03:00
nr_free_contexts + + ;
2008-12-18 22:13:24 +03:00
}
2009-06-02 22:53:37 +04:00
spin_unlock_irqrestore ( & context_lock , flags ) ;
2008-12-18 22:13:24 +03:00
}
2008-12-18 22:13:48 +03:00
# ifdef CONFIG_SMP
static int __cpuinit mmu_context_cpu_notify ( struct notifier_block * self ,
unsigned long action , void * hcpu )
{
unsigned int cpu = ( unsigned int ) ( long ) hcpu ;
2009-07-24 03:15:10 +04:00
# ifdef CONFIG_HOTPLUG_CPU
struct task_struct * p ;
# endif
2008-12-18 22:13:48 +03:00
/* We don't touch CPU 0 map, it's allocated at aboot and kept
* around forever
*/
if ( cpu = = 0 )
return NOTIFY_OK ;
switch ( action ) {
case CPU_ONLINE :
case CPU_ONLINE_FROZEN :
2009-06-17 22:13:54 +04:00
pr_devel ( " MMU: Allocating stale context map for CPU %d \n " , cpu ) ;
2008-12-18 22:13:48 +03:00
stale_map [ cpu ] = kzalloc ( CTX_MAP_SIZE , GFP_KERNEL ) ;
break ;
# ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD :
case CPU_DEAD_FROZEN :
2009-06-17 22:13:54 +04:00
pr_devel ( " MMU: Freeing stale context map for CPU %d \n " , cpu ) ;
2008-12-18 22:13:48 +03:00
kfree ( stale_map [ cpu ] ) ;
stale_map [ cpu ] = NULL ;
2009-07-24 03:15:10 +04:00
/* We also clear the cpu_vm_mask bits of CPUs going away */
read_lock ( & tasklist_lock ) ;
for_each_process ( p ) {
if ( p - > mm )
cpu_mask_clear_cpu ( cpu , mm_cpumask ( p - > mm ) ) ;
}
read_unlock ( & tasklist_lock ) ;
break ;
# endif /* CONFIG_HOTPLUG_CPU */
2008-12-18 22:13:48 +03:00
}
return NOTIFY_OK ;
}
static struct notifier_block __cpuinitdata mmu_context_cpu_nb = {
. notifier_call = mmu_context_cpu_notify ,
} ;
# endif /* CONFIG_SMP */
2008-12-18 22:13:24 +03:00
/*
* Initialize the context management stuff .
*/
void __init mmu_context_init ( void )
{
2008-12-18 22:13:29 +03:00
/* Mark init_mm as being active on all possible CPUs since
* we ' ll get called with prev = = init_mm the first time
* we schedule on a given CPU
*/
init_mm . context . active = NR_CPUS ;
2008-12-18 22:13:48 +03:00
/*
* The MPC8xx has only 16 contexts . We rotate through them on each
* task switch . A better way would be to keep track of tasks that
* own contexts , and implement an LRU usage . That way very active
* tasks don ' t always have to pay the TLB reload overhead . The
* kernel pages are mapped shared , so the kernel can run on behalf
* of any task that makes a kernel entry . Shared does not mean they
* are not protected , just that the ASID comparison is not performed .
* - - Dan
*
* The IBM4xx has 256 contexts , so we can just rotate through these
* as a way of " switching " contexts . If the TID of the TLB is zero ,
* the PID / TID comparison is disabled , so we can use a TID of zero
* to represent all kernel pages as shared among all contexts .
* - - Dan
*/
if ( mmu_has_feature ( MMU_FTR_TYPE_8xx ) ) {
first_context = 0 ;
last_context = 15 ;
} else {
first_context = 1 ;
last_context = 255 ;
}
# ifdef DEBUG_CLAMP_LAST_CONTEXT
last_context = DEBUG_CLAMP_LAST_CONTEXT ;
# endif
/*
* Allocate the maps used by context management
*/
context_map = alloc_bootmem ( CTX_MAP_SIZE ) ;
context_mm = alloc_bootmem ( sizeof ( void * ) * ( last_context + 1 ) ) ;
stale_map [ 0 ] = alloc_bootmem ( CTX_MAP_SIZE ) ;
# ifdef CONFIG_SMP
register_cpu_notifier ( & mmu_context_cpu_nb ) ;
# endif
printk ( KERN_INFO
2009-03-19 22:34:13 +03:00
" MMU: Allocated %zu bytes of context maps for %d contexts \n " ,
2008-12-18 22:13:48 +03:00
2 * CTX_MAP_SIZE + ( sizeof ( void * ) * ( last_context + 1 ) ) ,
last_context - first_context + 1 ) ;
2008-12-18 22:13:24 +03:00
/*
* Some processors have too few contexts to reserve one for
* init_mm , and require using context 0 for a normal task .
* Other processors reserve the use of context zero for the kernel .
2008-12-18 22:13:48 +03:00
* This code assumes first_context < 32.
2008-12-18 22:13:24 +03:00
*/
2008-12-18 22:13:48 +03:00
context_map [ 0 ] = ( 1 < < first_context ) - 1 ;
next_context = first_context ;
nr_free_contexts = last_context - first_context + 1 ;
2008-12-18 22:13:24 +03:00
}