2005-04-17 02:20:36 +04:00
/*
* Intel SMP support routines .
*
* ( c ) 1995 Alan Cox , Building # 3 < alan @ redhat . com >
* ( c ) 1998 - 99 , 2000 Ingo Molnar < mingo @ redhat . com >
* ( c ) 2002 , 2003 Andi Kleen , SuSE Labs .
*
* This code is released under the GNU General Public License version 2 or
* later .
*/
# include <linux/init.h>
# include <linux/mm.h>
# include <linux/delay.h>
# include <linux/spinlock.h>
# include <linux/smp_lock.h>
# include <linux/smp.h>
# include <linux/kernel_stat.h>
# include <linux/mc146818rtc.h>
# include <linux/interrupt.h>
# include <asm/mtrr.h>
# include <asm/pgalloc.h>
# include <asm/tlbflush.h>
# include <asm/mach_apic.h>
# include <asm/mmu_context.h>
# include <asm/proto.h>
2005-04-17 02:25:19 +04:00
# include <asm/apicdef.h>
2006-01-12 00:44:36 +03:00
# include <asm/idle.h>
2005-04-17 02:20:36 +04:00
/*
* Smarter SMP flushing macros .
* c / o Linus Torvalds .
*
* These mean you can really definitely utterly forget about
* writing to user space from interrupts . ( Its not allowed anyway ) .
*
* Optimizations Manfred Spraul < manfred @ colorfullife . com >
2005-09-12 20:49:24 +04:00
*
* More scalable flush , from Andi Kleen
*
* To avoid global state use 8 different call vectors .
* Each CPU uses a specific vector to trigger flushes on other
* CPUs . Depending on the received vector the target CPUs look into
* the right per cpu variable for the flush data .
*
* With more than 8 CPUs they are hashed to the 8 available
* vectors . The limited global vector space forces us to this right now .
* In future when interrupts are split into per CPU domains this could be
* fixed , at the cost of triggering multiple IPIs in some cases .
2005-04-17 02:20:36 +04:00
*/
2005-09-12 20:49:24 +04:00
union smp_flush_state {
struct {
cpumask_t flush_cpumask ;
struct mm_struct * flush_mm ;
unsigned long flush_va ;
2005-04-17 02:20:36 +04:00
# define FLUSH_ALL -1ULL
2005-09-12 20:49:24 +04:00
spinlock_t tlbstate_lock ;
} ;
char pad [ SMP_CACHE_BYTES ] ;
} ____cacheline_aligned ;
/* State is put into the per CPU data section, but padded
to a full cache line because other CPUs can access it and we don ' t
want false sharing in the per cpu data segment . */
static DEFINE_PER_CPU ( union smp_flush_state , flush_state ) ;
2005-04-17 02:20:36 +04:00
/*
* We cannot call mmdrop ( ) because we are in interrupt context ,
* instead update mm - > cpu_vm_mask .
*/
2005-09-12 20:49:24 +04:00
static inline void leave_mm ( int cpu )
2005-04-17 02:20:36 +04:00
{
if ( read_pda ( mmu_state ) = = TLBSTATE_OK )
BUG ( ) ;
clear_bit ( cpu , & read_pda ( active_mm ) - > cpu_vm_mask ) ;
load_cr3 ( swapper_pg_dir ) ;
}
/*
*
* The flush IPI assumes that a thread switch happens in this order :
* [ cpu0 : the cpu that switches ]
* 1 ) switch_mm ( ) either 1 a ) or 1 b )
* 1 a ) thread switch to a different mm
* 1 a1 ) clear_bit ( cpu , & old_mm - > cpu_vm_mask ) ;
* Stop ipi delivery for the old mm . This is not synchronized with
* the other cpus , but smp_invalidate_interrupt ignore flush ipis
* for the wrong mm , and in the worst case we perform a superfluous
* tlb flush .
* 1 a2 ) set cpu mmu_state to TLBSTATE_OK
* Now the smp_invalidate_interrupt won ' t call leave_mm if cpu0
* was in lazy tlb mode .
* 1 a3 ) update cpu active_mm
* Now cpu0 accepts tlb flushes for the new mm .
* 1 a4 ) set_bit ( cpu , & new_mm - > cpu_vm_mask ) ;
* Now the other cpus will send tlb flush ipis .
* 1 a4 ) change cr3 .
* 1 b ) thread switch without mm change
* cpu active_mm is correct , cpu0 already handles
* flush ipis .
* 1 b1 ) set cpu mmu_state to TLBSTATE_OK
* 1 b2 ) test_and_set the cpu bit in cpu_vm_mask .
* Atomically set the bit [ other cpus will start sending flush ipis ] ,
* and test the bit .
* 1 b3 ) if the bit was 0 : leave_mm was called , flush the tlb .
* 2 ) switch % % esp , ie current
*
* The interrupt must handle 2 special cases :
* - cr3 is changed before % % esp , ie . it cannot use current - > { active_ , } mm .
* - the cpu performs speculative tlb reads , i . e . even if the cpu only
* runs in kernel space , the cpu could load tlb entries for user space
* pages .
*
* The good news is that cpu mmu_state is local to each cpu , no
* write / read ordering problems .
*/
/*
* TLB flush IPI :
*
* 1 ) Flush the tlb entries if the cpu uses the mm that ' s being flushed .
* 2 ) Leave the mm if we are in the lazy tlb mode .
2005-09-12 20:49:24 +04:00
*
* Interrupts are disabled .
2005-04-17 02:20:36 +04:00
*/
2005-09-12 20:49:24 +04:00
asmlinkage void smp_invalidate_interrupt ( struct pt_regs * regs )
2005-04-17 02:20:36 +04:00
{
2005-09-12 20:49:24 +04:00
int cpu ;
int sender ;
union smp_flush_state * f ;
2005-04-17 02:20:36 +04:00
2005-09-12 20:49:24 +04:00
cpu = smp_processor_id ( ) ;
/*
* orig_rax contains the interrupt vector - 256.
* Use that to determine where the sender put the data .
*/
sender = regs - > orig_rax + 256 - INVALIDATE_TLB_VECTOR_START ;
f = & per_cpu ( flush_state , sender ) ;
2005-04-17 02:20:36 +04:00
2005-09-12 20:49:24 +04:00
if ( ! cpu_isset ( cpu , f - > flush_cpumask ) )
2005-04-17 02:20:36 +04:00
goto out ;
/*
* This was a BUG ( ) but until someone can quote me the
* line from the intel manual that guarantees an IPI to
* multiple CPUs is retried _only_ on the erroring CPUs
* its staying as a return
*
* BUG ( ) ;
*/
2005-09-12 20:49:24 +04:00
if ( f - > flush_mm = = read_pda ( active_mm ) ) {
2005-04-17 02:20:36 +04:00
if ( read_pda ( mmu_state ) = = TLBSTATE_OK ) {
2005-09-12 20:49:24 +04:00
if ( f - > flush_va = = FLUSH_ALL )
2005-04-17 02:20:36 +04:00
local_flush_tlb ( ) ;
else
2005-09-12 20:49:24 +04:00
__flush_tlb_one ( f - > flush_va ) ;
2005-04-17 02:20:36 +04:00
} else
leave_mm ( cpu ) ;
}
2005-07-29 08:15:22 +04:00
out :
2005-04-17 02:20:36 +04:00
ack_APIC_irq ( ) ;
2005-09-12 20:49:24 +04:00
cpu_clear ( cpu , f - > flush_cpumask ) ;
2005-04-17 02:20:36 +04:00
}
static void flush_tlb_others ( cpumask_t cpumask , struct mm_struct * mm ,
unsigned long va )
{
2005-09-12 20:49:24 +04:00
int sender ;
union smp_flush_state * f ;
2005-04-17 02:20:36 +04:00
2005-09-12 20:49:24 +04:00
/* Caller has disabled preemption */
sender = smp_processor_id ( ) % NUM_INVALIDATE_TLB_VECTORS ;
f = & per_cpu ( flush_state , sender ) ;
/* Could avoid this lock when
num_online_cpus ( ) < = NUM_INVALIDATE_TLB_VECTORS , but it is
probably not worth checking this for a cache - hot lock . */
spin_lock ( & f - > tlbstate_lock ) ;
f - > flush_mm = mm ;
f - > flush_va = va ;
cpus_or ( f - > flush_cpumask , cpumask , f - > flush_cpumask ) ;
2005-04-17 02:20:36 +04:00
/*
* We have to send the IPI only to
* CPUs affected .
*/
2005-09-12 20:49:24 +04:00
send_IPI_mask ( cpumask , INVALIDATE_TLB_VECTOR_START + sender ) ;
2005-04-17 02:20:36 +04:00
2005-09-12 20:49:24 +04:00
while ( ! cpus_empty ( f - > flush_cpumask ) )
cpu_relax ( ) ;
2005-04-17 02:20:36 +04:00
2005-09-12 20:49:24 +04:00
f - > flush_mm = NULL ;
f - > flush_va = 0 ;
spin_unlock ( & f - > tlbstate_lock ) ;
2005-04-17 02:20:36 +04:00
}
2005-09-12 20:49:24 +04:00
int __cpuinit init_smp_flush ( void )
{
int i ;
for_each_cpu_mask ( i , cpu_possible_map ) {
spin_lock_init ( & per_cpu ( flush_state . tlbstate_lock , i ) ) ;
}
return 0 ;
}
core_initcall ( init_smp_flush ) ;
2005-04-17 02:20:36 +04:00
void flush_tlb_current_task ( void )
{
struct mm_struct * mm = current - > mm ;
cpumask_t cpu_mask ;
preempt_disable ( ) ;
cpu_mask = mm - > cpu_vm_mask ;
cpu_clear ( smp_processor_id ( ) , cpu_mask ) ;
local_flush_tlb ( ) ;
if ( ! cpus_empty ( cpu_mask ) )
flush_tlb_others ( cpu_mask , mm , FLUSH_ALL ) ;
preempt_enable ( ) ;
}
void flush_tlb_mm ( struct mm_struct * mm )
{
cpumask_t cpu_mask ;
preempt_disable ( ) ;
cpu_mask = mm - > cpu_vm_mask ;
cpu_clear ( smp_processor_id ( ) , cpu_mask ) ;
if ( current - > active_mm = = mm ) {
if ( current - > mm )
local_flush_tlb ( ) ;
else
leave_mm ( smp_processor_id ( ) ) ;
}
if ( ! cpus_empty ( cpu_mask ) )
flush_tlb_others ( cpu_mask , mm , FLUSH_ALL ) ;
preempt_enable ( ) ;
}
void flush_tlb_page ( struct vm_area_struct * vma , unsigned long va )
{
struct mm_struct * mm = vma - > vm_mm ;
cpumask_t cpu_mask ;
preempt_disable ( ) ;
cpu_mask = mm - > cpu_vm_mask ;
cpu_clear ( smp_processor_id ( ) , cpu_mask ) ;
if ( current - > active_mm = = mm ) {
if ( current - > mm )
__flush_tlb_one ( va ) ;
else
leave_mm ( smp_processor_id ( ) ) ;
}
if ( ! cpus_empty ( cpu_mask ) )
flush_tlb_others ( cpu_mask , mm , va ) ;
preempt_enable ( ) ;
}
static void do_flush_tlb_all ( void * info )
{
unsigned long cpu = smp_processor_id ( ) ;
__flush_tlb_all ( ) ;
if ( read_pda ( mmu_state ) = = TLBSTATE_LAZY )
leave_mm ( cpu ) ;
}
void flush_tlb_all ( void )
{
on_each_cpu ( do_flush_tlb_all , NULL , 1 , 1 ) ;
}
/*
* this function sends a ' reschedule ' IPI to another CPU .
* it goes straight through and wastes no time serializing
* anything . Worst case is that we lose a reschedule . . .
*/
void smp_send_reschedule ( int cpu )
{
send_IPI_mask ( cpumask_of_cpu ( cpu ) , RESCHEDULE_VECTOR ) ;
}
/*
* Structure and data for smp_call_function ( ) . This is designed to minimise
* static memory requirements . It also looks cleaner .
*/
static DEFINE_SPINLOCK ( call_lock ) ;
struct call_data_struct {
void ( * func ) ( void * info ) ;
void * info ;
atomic_t started ;
atomic_t finished ;
int wait ;
} ;
static struct call_data_struct * call_data ;
2005-06-26 01:55:02 +04:00
void lock_ipi_call_lock ( void )
{
spin_lock_irq ( & call_lock ) ;
}
void unlock_ipi_call_lock ( void )
{
spin_unlock_irq ( & call_lock ) ;
}
2005-07-30 01:03:29 +04:00
/*
* this function sends a ' generic call function ' IPI to one other CPU
* in the system .
2005-09-12 20:49:24 +04:00
*
* cpu is a standard Linux logical CPU number .
2005-07-30 01:03:29 +04:00
*/
2005-09-12 20:49:24 +04:00
static void
__smp_call_function_single ( int cpu , void ( * func ) ( void * info ) , void * info ,
2005-07-30 01:03:29 +04:00
int nonatomic , int wait )
{
struct call_data_struct data ;
int cpus = 1 ;
data . func = func ;
data . info = info ;
atomic_set ( & data . started , 0 ) ;
data . wait = wait ;
if ( wait )
atomic_set ( & data . finished , 0 ) ;
call_data = & data ;
wmb ( ) ;
/* Send a message to all other CPUs and wait for them to respond */
send_IPI_mask ( cpumask_of_cpu ( cpu ) , CALL_FUNCTION_VECTOR ) ;
/* Wait for response */
while ( atomic_read ( & data . started ) ! = cpus )
cpu_relax ( ) ;
if ( ! wait )
return ;
while ( atomic_read ( & data . finished ) ! = cpus )
cpu_relax ( ) ;
}
/*
* smp_call_function_single - Run a function on another CPU
* @ func : The function to run . This must be fast and non - blocking .
* @ info : An arbitrary pointer to pass to the function .
* @ nonatomic : Currently unused .
* @ wait : If true , wait until function has completed on other CPUs .
*
* Retrurns 0 on success , else a negative status code .
*
* Does not return until the remote CPU is nearly ready to execute < func >
* or is or has executed .
*/
int smp_call_function_single ( int cpu , void ( * func ) ( void * info ) , void * info ,
int nonatomic , int wait )
{
/* prevent preemption and reschedule on another processor */
int me = get_cpu ( ) ;
if ( cpu = = me ) {
WARN_ON ( 1 ) ;
put_cpu ( ) ;
return - EBUSY ;
}
spin_lock_bh ( & call_lock ) ;
__smp_call_function_single ( cpu , func , info , nonatomic , wait ) ;
spin_unlock_bh ( & call_lock ) ;
put_cpu ( ) ;
return 0 ;
}
2005-04-17 02:20:36 +04:00
/*
* this function sends a ' generic call function ' IPI to all other CPUs
* in the system .
*/
static void __smp_call_function ( void ( * func ) ( void * info ) , void * info ,
int nonatomic , int wait )
{
struct call_data_struct data ;
int cpus = num_online_cpus ( ) - 1 ;
if ( ! cpus )
return ;
data . func = func ;
data . info = info ;
atomic_set ( & data . started , 0 ) ;
data . wait = wait ;
if ( wait )
atomic_set ( & data . finished , 0 ) ;
call_data = & data ;
wmb ( ) ;
/* Send a message to all other CPUs and wait for them to respond */
send_IPI_allbutself ( CALL_FUNCTION_VECTOR ) ;
/* Wait for response */
while ( atomic_read ( & data . started ) ! = cpus )
cpu_relax ( ) ;
if ( ! wait )
return ;
while ( atomic_read ( & data . finished ) ! = cpus )
cpu_relax ( ) ;
}
/*
* smp_call_function - run a function on all other CPUs .
* @ func : The function to run . This must be fast and non - blocking .
* @ info : An arbitrary pointer to pass to the function .
* @ nonatomic : currently unused .
* @ wait : If true , wait ( atomically ) until function has completed on other
* CPUs .
*
* Returns 0 on success , else a negative status code . Does not return until
* remote CPUs are nearly ready to execute func or are or have executed .
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler .
* Actually there are a few legal cases , like panic .
*/
int smp_call_function ( void ( * func ) ( void * info ) , void * info , int nonatomic ,
int wait )
{
spin_lock ( & call_lock ) ;
__smp_call_function ( func , info , nonatomic , wait ) ;
spin_unlock ( & call_lock ) ;
return 0 ;
}
void smp_stop_cpu ( void )
{
2005-11-05 19:25:54 +03:00
unsigned long flags ;
2005-04-17 02:20:36 +04:00
/*
* Remove this CPU :
*/
cpu_clear ( smp_processor_id ( ) , cpu_online_map ) ;
2005-11-05 19:25:54 +03:00
local_irq_save ( flags ) ;
2005-04-17 02:20:36 +04:00
disable_local_APIC ( ) ;
2005-11-05 19:25:54 +03:00
local_irq_restore ( flags ) ;
2005-04-17 02:20:36 +04:00
}
static void smp_really_stop_cpu ( void * dummy )
{
smp_stop_cpu ( ) ;
for ( ; ; )
asm ( " hlt " ) ;
}
void smp_send_stop ( void )
{
int nolock = 0 ;
if ( reboot_force )
return ;
/* Don't deadlock on the call lock in panic */
if ( ! spin_trylock ( & call_lock ) ) {
/* ignore locking because we have paniced anyways */
nolock = 1 ;
}
__smp_call_function ( smp_really_stop_cpu , NULL , 0 , 0 ) ;
if ( ! nolock )
spin_unlock ( & call_lock ) ;
local_irq_disable ( ) ;
disable_local_APIC ( ) ;
local_irq_enable ( ) ;
}
/*
* Reschedule call back . Nothing to do ,
* all the work is done automatically when
* we return from the interrupt .
*/
asmlinkage void smp_reschedule_interrupt ( void )
{
ack_APIC_irq ( ) ;
}
asmlinkage void smp_call_function_interrupt ( void )
{
void ( * func ) ( void * info ) = call_data - > func ;
void * info = call_data - > info ;
int wait = call_data - > wait ;
ack_APIC_irq ( ) ;
/*
* Notify initiating CPU that I ' ve grabbed the data and am
* about to execute the function
*/
mb ( ) ;
atomic_inc ( & call_data - > started ) ;
/*
* At this point the info structure may be out of scope unless wait = = 1
*/
2006-01-12 00:44:36 +03:00
exit_idle ( ) ;
2005-04-17 02:20:36 +04:00
irq_enter ( ) ;
( * func ) ( info ) ;
irq_exit ( ) ;
if ( wait ) {
mb ( ) ;
atomic_inc ( & call_data - > finished ) ;
}
}
2005-04-17 02:25:19 +04:00
int safe_smp_processor_id ( void )
{
int apicid , i ;
if ( disable_apic )
return 0 ;
apicid = hard_smp_processor_id ( ) ;
if ( x86_cpu_to_apicid [ apicid ] = = apicid )
return apicid ;
for ( i = 0 ; i < NR_CPUS ; + + i ) {
if ( x86_cpu_to_apicid [ i ] = = apicid )
return i ;
}
/* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
* or called too early . Either way , we must be CPU 0. */
if ( x86_cpu_to_apicid [ 0 ] = = BAD_APICID )
return 0 ;
return 0 ; /* Should not happen */
}