2005-04-17 02:20:36 +04:00
/*
* Intel SMP support routines .
*
* ( c ) 1995 Alan Cox , Building # 3 < alan @ redhat . com >
* ( c ) 1998 - 99 , 2000 Ingo Molnar < mingo @ redhat . com >
* ( c ) 2002 , 2003 Andi Kleen , SuSE Labs .
*
* This code is released under the GNU General Public License version 2 or
* later .
*/
# include <linux/init.h>
# include <linux/mm.h>
# include <linux/delay.h>
# include <linux/spinlock.h>
# include <linux/smp.h>
# include <linux/kernel_stat.h>
# include <linux/mc146818rtc.h>
# include <linux/interrupt.h>
# include <asm/mtrr.h>
# include <asm/pgalloc.h>
# include <asm/tlbflush.h>
# include <asm/mach_apic.h>
# include <asm/mmu_context.h>
# include <asm/proto.h>
2005-04-17 02:25:19 +04:00
# include <asm/apicdef.h>
2006-01-12 00:44:36 +03:00
# include <asm/idle.h>
2005-04-17 02:20:36 +04:00
/*
2008-01-30 15:30:27 +03:00
* Smarter SMP flushing macros .
2005-04-17 02:20:36 +04:00
* c / o Linus Torvalds .
*
* These mean you can really definitely utterly forget about
* writing to user space from interrupts . ( Its not allowed anyway ) .
*
* Optimizations Manfred Spraul < manfred @ colorfullife . com >
2005-09-12 20:49:24 +04:00
*
2008-01-30 15:30:27 +03:00
* More scalable flush , from Andi Kleen
2005-09-12 20:49:24 +04:00
*
2008-01-30 15:30:27 +03:00
* To avoid global state use 8 different call vectors .
* Each CPU uses a specific vector to trigger flushes on other
* CPUs . Depending on the received vector the target CPUs look into
2005-09-12 20:49:24 +04:00
* the right per cpu variable for the flush data .
*
2008-01-30 15:30:27 +03:00
* With more than 8 CPUs they are hashed to the 8 available
* vectors . The limited global vector space forces us to this right now .
2005-09-12 20:49:24 +04:00
* In future when interrupts are split into per CPU domains this could be
* fixed , at the cost of triggering multiple IPIs in some cases .
2005-04-17 02:20:36 +04:00
*/
2005-09-12 20:49:24 +04:00
union smp_flush_state {
struct {
cpumask_t flush_cpumask ;
struct mm_struct * flush_mm ;
unsigned long flush_va ;
spinlock_t tlbstate_lock ;
} ;
char pad [ SMP_CACHE_BYTES ] ;
} ____cacheline_aligned ;
/* State is put into the per CPU data section, but padded
to a full cache line because other CPUs can access it and we don ' t
want false sharing in the per cpu data segment . */
static DEFINE_PER_CPU ( union smp_flush_state , flush_state ) ;
2005-04-17 02:20:36 +04:00
/*
2008-01-30 15:30:27 +03:00
* We cannot call mmdrop ( ) because we are in interrupt context ,
2005-04-17 02:20:36 +04:00
* instead update mm - > cpu_vm_mask .
*/
2008-01-30 15:32:01 +03:00
void leave_mm ( int cpu )
2005-04-17 02:20:36 +04:00
{
if ( read_pda ( mmu_state ) = = TLBSTATE_OK )
BUG ( ) ;
2006-03-25 18:31:13 +03:00
cpu_clear ( cpu , read_pda ( active_mm ) - > cpu_vm_mask ) ;
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 19:44:24 +04:00
load_cr3 ( swapper_pg_dir ) ;
2005-04-17 02:20:36 +04:00
}
2008-01-30 15:32:01 +03:00
EXPORT_SYMBOL_GPL ( leave_mm ) ;
2005-04-17 02:20:36 +04:00
/*
*
* The flush IPI assumes that a thread switch happens in this order :
* [ cpu0 : the cpu that switches ]
* 1 ) switch_mm ( ) either 1 a ) or 1 b )
* 1 a ) thread switch to a different mm
2006-03-25 18:31:13 +03:00
* 1 a1 ) cpu_clear ( cpu , old_mm - > cpu_vm_mask ) ;
2008-01-30 15:30:27 +03:00
* Stop ipi delivery for the old mm . This is not synchronized with
* the other cpus , but smp_invalidate_interrupt ignore flush ipis
* for the wrong mm , and in the worst case we perform a superfluous
* tlb flush .
2005-04-17 02:20:36 +04:00
* 1 a2 ) set cpu mmu_state to TLBSTATE_OK
2008-01-30 15:30:27 +03:00
* Now the smp_invalidate_interrupt won ' t call leave_mm if cpu0
2005-04-17 02:20:36 +04:00
* was in lazy tlb mode .
* 1 a3 ) update cpu active_mm
2008-01-30 15:30:27 +03:00
* Now cpu0 accepts tlb flushes for the new mm .
2006-03-25 18:31:13 +03:00
* 1 a4 ) cpu_set ( cpu , new_mm - > cpu_vm_mask ) ;
2008-01-30 15:30:27 +03:00
* Now the other cpus will send tlb flush ipis .
2005-04-17 02:20:36 +04:00
* 1 a4 ) change cr3 .
* 1 b ) thread switch without mm change
* cpu active_mm is correct , cpu0 already handles
* flush ipis .
* 1 b1 ) set cpu mmu_state to TLBSTATE_OK
* 1 b2 ) test_and_set the cpu bit in cpu_vm_mask .
2008-01-30 15:30:27 +03:00
* Atomically set the bit [ other cpus will start sending flush ipis ] ,
* and test the bit .
2005-04-17 02:20:36 +04:00
* 1 b3 ) if the bit was 0 : leave_mm was called , flush the tlb .
* 2 ) switch % % esp , ie current
*
* The interrupt must handle 2 special cases :
* - cr3 is changed before % % esp , ie . it cannot use current - > { active_ , } mm .
* - the cpu performs speculative tlb reads , i . e . even if the cpu only
* runs in kernel space , the cpu could load tlb entries for user space
* pages .
*
* The good news is that cpu mmu_state is local to each cpu , no
* write / read ordering problems .
*/
/*
* TLB flush IPI :
*
* 1 ) Flush the tlb entries if the cpu uses the mm that ' s being flushed .
* 2 ) Leave the mm if we are in the lazy tlb mode .
2005-09-12 20:49:24 +04:00
*
* Interrupts are disabled .
2005-04-17 02:20:36 +04:00
*/
2005-09-12 20:49:24 +04:00
asmlinkage void smp_invalidate_interrupt ( struct pt_regs * regs )
2005-04-17 02:20:36 +04:00
{
2005-09-12 20:49:24 +04:00
int cpu ;
int sender ;
union smp_flush_state * f ;
2005-04-17 02:20:36 +04:00
2005-09-12 20:49:24 +04:00
cpu = smp_processor_id ( ) ;
/*
2006-06-27 13:53:44 +04:00
* orig_rax contains the negated interrupt vector .
2005-09-12 20:49:24 +04:00
* Use that to determine where the sender put the data .
*/
2008-01-30 15:30:56 +03:00
sender = ~ regs - > orig_ax - INVALIDATE_TLB_VECTOR_START ;
2005-09-12 20:49:24 +04:00
f = & per_cpu ( flush_state , sender ) ;
2005-04-17 02:20:36 +04:00
2005-09-12 20:49:24 +04:00
if ( ! cpu_isset ( cpu , f - > flush_cpumask ) )
2005-04-17 02:20:36 +04:00
goto out ;
2008-01-30 15:30:27 +03:00
/*
2005-04-17 02:20:36 +04:00
* This was a BUG ( ) but until someone can quote me the
* line from the intel manual that guarantees an IPI to
* multiple CPUs is retried _only_ on the erroring CPUs
* its staying as a return
*
* BUG ( ) ;
*/
2008-01-30 15:30:27 +03:00
2005-09-12 20:49:24 +04:00
if ( f - > flush_mm = = read_pda ( active_mm ) ) {
2005-04-17 02:20:36 +04:00
if ( read_pda ( mmu_state ) = = TLBSTATE_OK ) {
2008-01-30 15:30:35 +03:00
if ( f - > flush_va = = TLB_FLUSH_ALL )
2005-04-17 02:20:36 +04:00
local_flush_tlb ( ) ;
else
2005-09-12 20:49:24 +04:00
__flush_tlb_one ( f - > flush_va ) ;
2005-04-17 02:20:36 +04:00
} else
leave_mm ( cpu ) ;
}
2005-07-29 08:15:22 +04:00
out :
2005-04-17 02:20:36 +04:00
ack_APIC_irq ( ) ;
2005-09-12 20:49:24 +04:00
cpu_clear ( cpu , f - > flush_cpumask ) ;
2007-10-17 20:04:40 +04:00
add_pda ( irq_tlb_count , 1 ) ;
2005-04-17 02:20:36 +04:00
}
2008-01-30 15:30:35 +03:00
void native_flush_tlb_others ( const cpumask_t * cpumaskp , struct mm_struct * mm ,
unsigned long va )
2005-04-17 02:20:36 +04:00
{
2005-09-12 20:49:24 +04:00
int sender ;
union smp_flush_state * f ;
2008-01-30 15:30:35 +03:00
cpumask_t cpumask = * cpumaskp ;
2005-04-17 02:20:36 +04:00
2005-09-12 20:49:24 +04:00
/* Caller has disabled preemption */
sender = smp_processor_id ( ) % NUM_INVALIDATE_TLB_VECTORS ;
f = & per_cpu ( flush_state , sender ) ;
2008-01-30 15:30:27 +03:00
/*
* Could avoid this lock when
* num_online_cpus ( ) < = NUM_INVALIDATE_TLB_VECTORS , but it is
* probably not worth checking this for a cache - hot lock .
*/
2005-09-12 20:49:24 +04:00
spin_lock ( & f - > tlbstate_lock ) ;
f - > flush_mm = mm ;
f - > flush_va = va ;
cpus_or ( f - > flush_cpumask , cpumask , f - > flush_cpumask ) ;
2005-04-17 02:20:36 +04:00
/*
* We have to send the IPI only to
* CPUs affected .
*/
2005-09-12 20:49:24 +04:00
send_IPI_mask ( cpumask , INVALIDATE_TLB_VECTOR_START + sender ) ;
2005-04-17 02:20:36 +04:00
2005-09-12 20:49:24 +04:00
while ( ! cpus_empty ( f - > flush_cpumask ) )
cpu_relax ( ) ;
2005-04-17 02:20:36 +04:00
2005-09-12 20:49:24 +04:00
f - > flush_mm = NULL ;
f - > flush_va = 0 ;
spin_unlock ( & f - > tlbstate_lock ) ;
2005-04-17 02:20:36 +04:00
}
2005-09-12 20:49:24 +04:00
int __cpuinit init_smp_flush ( void )
{
int i ;
2008-01-30 15:30:27 +03:00
2005-09-12 20:49:24 +04:00
for_each_cpu_mask ( i , cpu_possible_map ) {
2006-08-05 23:14:34 +04:00
spin_lock_init ( & per_cpu ( flush_state , i ) . tlbstate_lock ) ;
2005-09-12 20:49:24 +04:00
}
return 0 ;
}
core_initcall ( init_smp_flush ) ;
2008-01-30 15:30:27 +03:00
2005-04-17 02:20:36 +04:00
void flush_tlb_current_task ( void )
{
struct mm_struct * mm = current - > mm ;
cpumask_t cpu_mask ;
preempt_disable ( ) ;
cpu_mask = mm - > cpu_vm_mask ;
cpu_clear ( smp_processor_id ( ) , cpu_mask ) ;
local_flush_tlb ( ) ;
if ( ! cpus_empty ( cpu_mask ) )
2008-01-30 15:30:35 +03:00
flush_tlb_others ( cpu_mask , mm , TLB_FLUSH_ALL ) ;
2005-04-17 02:20:36 +04:00
preempt_enable ( ) ;
}
void flush_tlb_mm ( struct mm_struct * mm )
{
cpumask_t cpu_mask ;
preempt_disable ( ) ;
cpu_mask = mm - > cpu_vm_mask ;
cpu_clear ( smp_processor_id ( ) , cpu_mask ) ;
if ( current - > active_mm = = mm ) {
if ( current - > mm )
local_flush_tlb ( ) ;
else
leave_mm ( smp_processor_id ( ) ) ;
}
if ( ! cpus_empty ( cpu_mask ) )
2008-01-30 15:30:35 +03:00
flush_tlb_others ( cpu_mask , mm , TLB_FLUSH_ALL ) ;
2007-09-21 23:09:41 +04:00
2005-04-17 02:20:36 +04:00
preempt_enable ( ) ;
}
void flush_tlb_page ( struct vm_area_struct * vma , unsigned long va )
{
struct mm_struct * mm = vma - > vm_mm ;
cpumask_t cpu_mask ;
preempt_disable ( ) ;
cpu_mask = mm - > cpu_vm_mask ;
cpu_clear ( smp_processor_id ( ) , cpu_mask ) ;
if ( current - > active_mm = = mm ) {
if ( current - > mm )
__flush_tlb_one ( va ) ;
2008-01-30 15:30:27 +03:00
else
leave_mm ( smp_processor_id ( ) ) ;
2005-04-17 02:20:36 +04:00
}
if ( ! cpus_empty ( cpu_mask ) )
flush_tlb_others ( cpu_mask , mm , va ) ;
preempt_enable ( ) ;
}
static void do_flush_tlb_all ( void * info )
{
unsigned long cpu = smp_processor_id ( ) ;
__flush_tlb_all ( ) ;
if ( read_pda ( mmu_state ) = = TLBSTATE_LAZY )
leave_mm ( cpu ) ;
}
void flush_tlb_all ( void )
{
on_each_cpu ( do_flush_tlb_all , NULL , 1 , 1 ) ;
}
/*
* this function sends a ' reschedule ' IPI to another CPU .
* it goes straight through and wastes no time serializing
* anything . Worst case is that we lose a reschedule . . .
*/
void smp_send_reschedule ( int cpu )
{
send_IPI_mask ( cpumask_of_cpu ( cpu ) , RESCHEDULE_VECTOR ) ;
}
/*
* Structure and data for smp_call_function ( ) . This is designed to minimise
* static memory requirements . It also looks cleaner .
*/
static DEFINE_SPINLOCK ( call_lock ) ;
struct call_data_struct {
void ( * func ) ( void * info ) ;
void * info ;
atomic_t started ;
atomic_t finished ;
int wait ;
} ;
static struct call_data_struct * call_data ;
2005-06-26 01:55:02 +04:00
void lock_ipi_call_lock ( void )
{
spin_lock_irq ( & call_lock ) ;
}
void unlock_ipi_call_lock ( void )
{
spin_unlock_irq ( & call_lock ) ;
}
2005-07-30 01:03:29 +04:00
/*
2007-10-19 22:35:03 +04:00
* this function sends a ' generic call function ' IPI to all other CPU
* of the system defined in the mask .
2005-07-30 01:03:29 +04:00
*/
2008-01-30 15:30:27 +03:00
static int __smp_call_function_mask ( cpumask_t mask ,
void ( * func ) ( void * ) , void * info ,
int wait )
2005-07-30 01:03:29 +04:00
{
struct call_data_struct data ;
2007-10-19 22:35:03 +04:00
cpumask_t allbutself ;
int cpus ;
allbutself = cpu_online_map ;
cpu_clear ( smp_processor_id ( ) , allbutself ) ;
cpus_and ( mask , mask , allbutself ) ;
cpus = cpus_weight ( mask ) ;
if ( ! cpus )
return 0 ;
2005-07-30 01:03:29 +04:00
data . func = func ;
data . info = info ;
atomic_set ( & data . started , 0 ) ;
data . wait = wait ;
if ( wait )
atomic_set ( & data . finished , 0 ) ;
call_data = & data ;
wmb ( ) ;
2007-10-19 22:35:03 +04:00
/* Send a message to other CPUs */
if ( cpus_equal ( mask , allbutself ) )
send_IPI_allbutself ( CALL_FUNCTION_VECTOR ) ;
else
send_IPI_mask ( mask , CALL_FUNCTION_VECTOR ) ;
2005-07-30 01:03:29 +04:00
/* Wait for response */
while ( atomic_read ( & data . started ) ! = cpus )
cpu_relax ( ) ;
if ( ! wait )
2007-10-19 22:35:03 +04:00
return 0 ;
2005-07-30 01:03:29 +04:00
while ( atomic_read ( & data . finished ) ! = cpus )
cpu_relax ( ) ;
2007-10-19 22:35:03 +04:00
return 0 ;
}
/**
* smp_call_function_mask ( ) : Run a function on a set of other CPUs .
* @ mask : The set of cpus to run on . Must not include the current cpu .
* @ func : The function to run . This must be fast and non - blocking .
* @ info : An arbitrary pointer to pass to the function .
* @ wait : If true , wait ( atomically ) until function has completed on other CPUs .
*
* Returns 0 on success , else a negative status code .
*
* If @ wait is true , then returns once @ func has returned ; otherwise
* it returns just before the target cpu calls @ func .
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler .
*/
int smp_call_function_mask ( cpumask_t mask ,
void ( * func ) ( void * ) , void * info ,
int wait )
{
int ret ;
/* Can deadlock when called with interrupts disabled */
WARN_ON ( irqs_disabled ( ) ) ;
spin_lock ( & call_lock ) ;
ret = __smp_call_function_mask ( mask , func , info , wait ) ;
spin_unlock ( & call_lock ) ;
return ret ;
2005-07-30 01:03:29 +04:00
}
2007-10-19 22:35:03 +04:00
EXPORT_SYMBOL ( smp_call_function_mask ) ;
2005-07-30 01:03:29 +04:00
/*
2007-07-09 18:11:49 +04:00
* smp_call_function_single - Run a function on a specific CPU
2005-07-30 01:03:29 +04:00
* @ func : The function to run . This must be fast and non - blocking .
* @ info : An arbitrary pointer to pass to the function .
* @ nonatomic : Currently unused .
* @ wait : If true , wait until function has completed on other CPUs .
*
* Retrurns 0 on success , else a negative status code .
*
* Does not return until the remote CPU is nearly ready to execute < func >
* or is or has executed .
*/
int smp_call_function_single ( int cpu , void ( * func ) ( void * info ) , void * info ,
2008-01-30 15:30:27 +03:00
int nonatomic , int wait )
2005-07-30 01:03:29 +04:00
{
/* prevent preemption and reschedule on another processor */
2008-01-30 15:30:27 +03:00
int ret , me = get_cpu ( ) ;
2007-07-09 18:11:49 +04:00
/* Can deadlock when called with interrupts disabled */
WARN_ON ( irqs_disabled ( ) ) ;
2005-07-30 01:03:29 +04:00
if ( cpu = = me ) {
2007-07-09 18:11:49 +04:00
local_irq_disable ( ) ;
func ( info ) ;
local_irq_enable ( ) ;
2005-07-30 01:03:29 +04:00
put_cpu ( ) ;
2006-11-14 18:57:46 +03:00
return 0 ;
2005-07-30 01:03:29 +04:00
}
2006-12-07 07:38:16 +03:00
2007-10-19 22:35:03 +04:00
ret = smp_call_function_mask ( cpumask_of_cpu ( cpu ) , func , info , wait ) ;
2005-07-30 01:03:29 +04:00
put_cpu ( ) ;
2007-10-19 22:35:03 +04:00
return ret ;
2005-07-30 01:03:29 +04:00
}
2006-12-07 04:14:19 +03:00
EXPORT_SYMBOL ( smp_call_function_single ) ;
2005-07-30 01:03:29 +04:00
2005-04-17 02:20:36 +04:00
/*
* smp_call_function - run a function on all other CPUs .
* @ func : The function to run . This must be fast and non - blocking .
* @ info : An arbitrary pointer to pass to the function .
* @ nonatomic : currently unused .
* @ wait : If true , wait ( atomically ) until function has completed on other
* CPUs .
*
* Returns 0 on success , else a negative status code . Does not return until
* remote CPUs are nearly ready to execute func or are or have executed .
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler .
* Actually there are a few legal cases , like panic .
*/
int smp_call_function ( void ( * func ) ( void * info ) , void * info , int nonatomic ,
int wait )
{
2007-10-19 22:35:03 +04:00
return smp_call_function_mask ( cpu_online_map , func , info , wait ) ;
2005-04-17 02:20:36 +04:00
}
2006-06-26 15:59:44 +04:00
EXPORT_SYMBOL ( smp_call_function ) ;
2005-04-17 02:20:36 +04:00
2007-05-02 21:27:05 +04:00
static void stop_this_cpu ( void * dummy )
2005-04-17 02:20:36 +04:00
{
2007-05-02 21:27:05 +04:00
local_irq_disable ( ) ;
2005-04-17 02:20:36 +04:00
/*
* Remove this CPU :
*/
cpu_clear ( smp_processor_id ( ) , cpu_online_map ) ;
disable_local_APIC ( ) ;
2008-01-30 15:30:27 +03:00
for ( ; ; )
2006-06-26 15:57:59 +04:00
halt ( ) ;
2008-01-30 15:30:27 +03:00
}
2005-04-17 02:20:36 +04:00
void smp_send_stop ( void )
{
2007-05-02 21:27:05 +04:00
int nolock ;
unsigned long flags ;
2005-04-17 02:20:36 +04:00
if ( reboot_force )
return ;
2007-05-02 21:27:05 +04:00
2005-04-17 02:20:36 +04:00
/* Don't deadlock on the call lock in panic */
2007-05-02 21:27:05 +04:00
nolock = ! spin_trylock ( & call_lock ) ;
local_irq_save ( flags ) ;
2007-10-19 22:35:03 +04:00
__smp_call_function_mask ( cpu_online_map , stop_this_cpu , NULL , 0 ) ;
2005-04-17 02:20:36 +04:00
if ( ! nolock )
spin_unlock ( & call_lock ) ;
disable_local_APIC ( ) ;
2007-05-02 21:27:05 +04:00
local_irq_restore ( flags ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Reschedule call back . Nothing to do ,
* all the work is done automatically when
* we return from the interrupt .
*/
asmlinkage void smp_reschedule_interrupt ( void )
{
ack_APIC_irq ( ) ;
2007-10-17 20:04:40 +04:00
add_pda ( irq_resched_count , 1 ) ;
2005-04-17 02:20:36 +04:00
}
asmlinkage void smp_call_function_interrupt ( void )
{
void ( * func ) ( void * info ) = call_data - > func ;
void * info = call_data - > info ;
int wait = call_data - > wait ;
ack_APIC_irq ( ) ;
/*
* Notify initiating CPU that I ' ve grabbed the data and am
* about to execute the function
*/
mb ( ) ;
atomic_inc ( & call_data - > started ) ;
/*
* At this point the info structure may be out of scope unless wait = = 1
*/
2006-01-12 00:44:36 +03:00
exit_idle ( ) ;
2005-04-17 02:20:36 +04:00
irq_enter ( ) ;
( * func ) ( info ) ;
2007-10-17 20:04:40 +04:00
add_pda ( irq_call_count , 1 ) ;
2005-04-17 02:20:36 +04:00
irq_exit ( ) ;
if ( wait ) {
mb ( ) ;
atomic_inc ( & call_data - > finished ) ;
}
}
2005-04-17 02:25:19 +04:00