2005-04-17 02:20:36 +04:00
/* smp.c: Sparc64 SMP support.
*
* Copyright ( C ) 1997 David S . Miller ( davem @ caip . rutgers . edu )
*/
# include <linux/module.h>
# include <linux/kernel.h>
# include <linux/sched.h>
# include <linux/mm.h>
# include <linux/pagemap.h>
# include <linux/threads.h>
# include <linux/smp.h>
# include <linux/smp_lock.h>
# include <linux/interrupt.h>
# include <linux/kernel_stat.h>
# include <linux/delay.h>
# include <linux/init.h>
# include <linux/spinlock.h>
# include <linux/fs.h>
# include <linux/seq_file.h>
# include <linux/cache.h>
# include <linux/jiffies.h>
# include <linux/profile.h>
# include <linux/bootmem.h>
# include <asm/head.h>
# include <asm/ptrace.h>
# include <asm/atomic.h>
# include <asm/tlbflush.h>
# include <asm/mmu_context.h>
# include <asm/cpudata.h>
# include <asm/irq.h>
# include <asm/page.h>
# include <asm/pgtable.h>
# include <asm/oplib.h>
# include <asm/uaccess.h>
# include <asm/timer.h>
# include <asm/starfire.h>
# include <asm/tlb.h>
2006-02-27 10:24:22 +03:00
# include <asm/sections.h>
2005-04-17 02:20:36 +04:00
extern void calibrate_delay ( void ) ;
/* Please don't make this stuff initdata!!! --DaveM */
static unsigned char boot_cpu_id ;
2005-07-12 23:09:43 +04:00
cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE ;
cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE ;
2005-04-17 02:20:36 +04:00
static cpumask_t smp_commenced_mask ;
static cpumask_t cpu_callout_map ;
void smp_info ( struct seq_file * m )
{
int i ;
seq_printf ( m , " State: \n " ) ;
for ( i = 0 ; i < NR_CPUS ; i + + ) {
if ( cpu_online ( i ) )
seq_printf ( m ,
" CPU%d: \t \t online \n " , i ) ;
}
}
void smp_bogo ( struct seq_file * m )
{
int i ;
for ( i = 0 ; i < NR_CPUS ; i + + )
if ( cpu_online ( i ) )
seq_printf ( m ,
" Cpu%dBogo \t : %lu.%02lu \n "
" Cpu%dClkTck \t : %016lx \n " ,
i , cpu_data ( i ) . udelay_val / ( 500000 / HZ ) ,
( cpu_data ( i ) . udelay_val / ( 5000 / HZ ) ) % 100 ,
i , cpu_data ( i ) . clock_tick ) ;
}
void __init smp_store_cpu_info ( int id )
{
int cpu_node ;
/* multiplier and counter set by
smp_setup_percpu_timer ( ) */
cpu_data ( id ) . udelay_val = loops_per_jiffy ;
cpu_find_by_mid ( id , & cpu_node ) ;
cpu_data ( id ) . clock_tick = prom_getintdefault ( cpu_node ,
" clock-frequency " , 0 ) ;
cpu_data ( id ) . idle_volume = 1 ;
2005-09-26 11:32:17 +04:00
cpu_data ( id ) . dcache_size = prom_getintdefault ( cpu_node , " dcache-size " ,
16 * 1024 ) ;
cpu_data ( id ) . dcache_line_size =
prom_getintdefault ( cpu_node , " dcache-line-size " , 32 ) ;
cpu_data ( id ) . icache_size = prom_getintdefault ( cpu_node , " icache-size " ,
16 * 1024 ) ;
cpu_data ( id ) . icache_line_size =
prom_getintdefault ( cpu_node , " icache-line-size " , 32 ) ;
cpu_data ( id ) . ecache_size = prom_getintdefault ( cpu_node , " ecache-size " ,
4 * 1024 * 1024 ) ;
cpu_data ( id ) . ecache_line_size =
prom_getintdefault ( cpu_node , " ecache-line-size " , 64 ) ;
printk ( " CPU[%d]: Caches "
" D[sz(%d):line_sz(%d)] "
" I[sz(%d):line_sz(%d)] "
" E[sz(%d):line_sz(%d)] \n " ,
id ,
cpu_data ( id ) . dcache_size , cpu_data ( id ) . dcache_line_size ,
cpu_data ( id ) . icache_size , cpu_data ( id ) . icache_line_size ,
cpu_data ( id ) . ecache_size , cpu_data ( id ) . ecache_line_size ) ;
2005-04-17 02:20:36 +04:00
}
static void smp_setup_percpu_timer ( void ) ;
static volatile unsigned long callin_flag = 0 ;
void __init smp_callin ( void )
{
int cpuid = hard_smp_processor_id ( ) ;
2006-02-27 10:24:22 +03:00
__local_per_cpu_offset = __per_cpu_offset ( cpuid ) ;
2005-04-17 02:20:36 +04:00
2006-02-12 01:41:18 +03:00
if ( tlb_type = = hypervisor ) {
2006-02-08 08:51:08 +03:00
sun4v_register_fault_status ( ) ;
2006-02-12 01:41:18 +03:00
sun4v_ktsb_register ( ) ;
}
2006-02-08 08:51:08 +03:00
2006-02-27 10:24:22 +03:00
__flush_tlb_all ( ) ;
2005-04-17 02:20:36 +04:00
smp_setup_percpu_timer ( ) ;
2005-05-24 02:52:08 +04:00
if ( cheetah_pcache_forced_on )
cheetah_enable_pcache ( ) ;
2005-04-17 02:20:36 +04:00
local_irq_enable ( ) ;
calibrate_delay ( ) ;
smp_store_cpu_info ( cpuid ) ;
callin_flag = 1 ;
__asm__ __volatile__ ( " membar #Sync \n \t "
" flush %%g6 " : : : " memory " ) ;
/* Clear this or we will die instantly when we
* schedule back to this idler . . .
*/
2005-07-25 06:36:26 +04:00
current_thread_info ( ) - > new_child = 0 ;
2005-04-17 02:20:36 +04:00
/* Attach to the address space of init_task. */
atomic_inc ( & init_mm . mm_count ) ;
current - > active_mm = & init_mm ;
while ( ! cpu_isset ( cpuid , smp_commenced_mask ) )
2005-08-29 23:46:22 +04:00
rmb ( ) ;
2005-04-17 02:20:36 +04:00
cpu_set ( cpuid , cpu_online_map ) ;
2005-11-09 08:39:01 +03:00
/* idle thread is expected to have preempt disabled */
preempt_disable ( ) ;
2005-04-17 02:20:36 +04:00
}
void cpu_panic ( void )
{
printk ( " CPU[%d]: Returns from cpu_idle! \n " , smp_processor_id ( ) ) ;
panic ( " SMP bolixed \n " ) ;
}
2005-07-11 02:45:11 +04:00
static unsigned long current_tick_offset __read_mostly ;
2005-04-17 02:20:36 +04:00
/* This tick register synchronization scheme is taken entirely from
* the ia64 port , see arch / ia64 / kernel / smpboot . c for details and credit .
*
* The only change I ' ve made is to rework it so that the master
* initiates the synchonization instead of the slave . - DaveM
*/
# define MASTER 0
# define SLAVE (SMP_CACHE_BYTES / sizeof(unsigned long))
# define NUM_ROUNDS 64 /* magic value */
# define NUM_ITERS 5 /* likewise */
static DEFINE_SPINLOCK ( itc_sync_lock ) ;
static unsigned long go [ SLAVE + 1 ] ;
# define DEBUG_TICK_SYNC 0
static inline long get_delta ( long * rt , long * master )
{
unsigned long best_t0 = 0 , best_t1 = ~ 0UL , best_tm = 0 ;
unsigned long tcenter , t0 , t1 , tm ;
unsigned long i ;
for ( i = 0 ; i < NUM_ITERS ; i + + ) {
t0 = tick_ops - > get_tick ( ) ;
go [ MASTER ] = 1 ;
2005-08-29 23:46:22 +04:00
membar_storeload ( ) ;
2005-04-17 02:20:36 +04:00
while ( ! ( tm = go [ SLAVE ] ) )
2005-08-29 23:46:22 +04:00
rmb ( ) ;
2005-04-17 02:20:36 +04:00
go [ SLAVE ] = 0 ;
2005-08-29 23:46:22 +04:00
wmb ( ) ;
2005-04-17 02:20:36 +04:00
t1 = tick_ops - > get_tick ( ) ;
if ( t1 - t0 < best_t1 - best_t0 )
best_t0 = t0 , best_t1 = t1 , best_tm = tm ;
}
* rt = best_t1 - best_t0 ;
* master = best_tm - best_t0 ;
/* average best_t0 and best_t1 without overflow: */
tcenter = ( best_t0 / 2 + best_t1 / 2 ) ;
if ( best_t0 % 2 + best_t1 % 2 = = 2 )
tcenter + + ;
return tcenter - best_tm ;
}
void smp_synchronize_tick_client ( void )
{
long i , delta , adj , adjust_latency = 0 , done = 0 ;
unsigned long flags , rt , master_time_stamp , bound ;
# if DEBUG_TICK_SYNC
struct {
long rt ; /* roundtrip time */
long master ; /* master's timestamp */
long diff ; /* difference between midpoint and master's timestamp */
long lat ; /* estimate of itc adjustment latency */
} t [ NUM_ROUNDS ] ;
# endif
go [ MASTER ] = 1 ;
while ( go [ MASTER ] )
2005-08-29 23:46:22 +04:00
rmb ( ) ;
2005-04-17 02:20:36 +04:00
local_irq_save ( flags ) ;
{
for ( i = 0 ; i < NUM_ROUNDS ; i + + ) {
delta = get_delta ( & rt , & master_time_stamp ) ;
if ( delta = = 0 ) {
done = 1 ; /* let's lock on to this... */
bound = rt ;
}
if ( ! done ) {
if ( i > 0 ) {
adjust_latency + = - delta ;
adj = - delta + adjust_latency / 4 ;
} else
adj = - delta ;
tick_ops - > add_tick ( adj , current_tick_offset ) ;
}
# if DEBUG_TICK_SYNC
t [ i ] . rt = rt ;
t [ i ] . master = master_time_stamp ;
t [ i ] . diff = delta ;
t [ i ] . lat = adjust_latency / 4 ;
# endif
}
}
local_irq_restore ( flags ) ;
# if DEBUG_TICK_SYNC
for ( i = 0 ; i < NUM_ROUNDS ; i + + )
printk ( " rt=%5ld master=%5ld diff=%5ld adjlat=%5ld \n " ,
t [ i ] . rt , t [ i ] . master , t [ i ] . diff , t [ i ] . lat ) ;
# endif
printk ( KERN_INFO " CPU %d: synchronized TICK with master CPU (last diff %ld cycles, "
" maxerr %lu cycles) \n " , smp_processor_id ( ) , delta , rt ) ;
}
static void smp_start_sync_tick_client ( int cpu ) ;
static void smp_synchronize_one_tick ( int cpu )
{
unsigned long flags , i ;
go [ MASTER ] = 0 ;
smp_start_sync_tick_client ( cpu ) ;
/* wait for client to be ready */
while ( ! go [ MASTER ] )
2005-08-29 23:46:22 +04:00
rmb ( ) ;
2005-04-17 02:20:36 +04:00
/* now let the client proceed into his loop */
go [ MASTER ] = 0 ;
2005-08-29 23:46:22 +04:00
membar_storeload ( ) ;
2005-04-17 02:20:36 +04:00
spin_lock_irqsave ( & itc_sync_lock , flags ) ;
{
for ( i = 0 ; i < NUM_ROUNDS * NUM_ITERS ; i + + ) {
while ( ! go [ MASTER ] )
2005-08-29 23:46:22 +04:00
rmb ( ) ;
2005-04-17 02:20:36 +04:00
go [ MASTER ] = 0 ;
2005-08-29 23:46:22 +04:00
wmb ( ) ;
2005-04-17 02:20:36 +04:00
go [ SLAVE ] = tick_ops - > get_tick ( ) ;
2005-08-29 23:46:22 +04:00
membar_storeload ( ) ;
2005-04-17 02:20:36 +04:00
}
}
spin_unlock_irqrestore ( & itc_sync_lock , flags ) ;
}
extern unsigned long sparc64_cpu_startup ;
/* The OBP cpu startup callback truncates the 3rd arg cookie to
* 32 - bits ( I think ) so to be safe we have it read the pointer
* contained here so we work on > 4 GB machines . - DaveM
*/
static struct thread_info * cpu_new_thread = NULL ;
static int __devinit smp_boot_one_cpu ( unsigned int cpu )
{
unsigned long entry =
( unsigned long ) ( & sparc64_cpu_startup ) ;
unsigned long cookie =
( unsigned long ) ( & cpu_new_thread ) ;
struct task_struct * p ;
int timeout , ret , cpu_node ;
p = fork_idle ( cpu ) ;
callin_flag = 0 ;
2006-01-12 12:05:42 +03:00
cpu_new_thread = task_thread_info ( p ) ;
2005-04-17 02:20:36 +04:00
cpu_set ( cpu , cpu_callout_map ) ;
cpu_find_by_mid ( cpu , & cpu_node ) ;
prom_startcpu ( cpu_node , entry , cookie ) ;
for ( timeout = 0 ; timeout < 5000000 ; timeout + + ) {
if ( callin_flag )
break ;
udelay ( 100 ) ;
}
if ( callin_flag ) {
ret = 0 ;
} else {
printk ( " Processor %d is stuck. \n " , cpu ) ;
cpu_clear ( cpu , cpu_callout_map ) ;
ret = - ENODEV ;
}
cpu_new_thread = NULL ;
return ret ;
}
static void spitfire_xcall_helper ( u64 data0 , u64 data1 , u64 data2 , u64 pstate , unsigned long cpu )
{
u64 result , target ;
int stuck , tmp ;
if ( this_is_starfire ) {
/* map to real upaid */
cpu = ( ( ( cpu & 0x3c ) < < 1 ) |
( ( cpu & 0x40 ) > > 4 ) |
( cpu & 0x3 ) ) ;
}
target = ( cpu < < 14 ) | 0x70 ;
again :
/* Ok, this is the real Spitfire Errata #54.
* One must read back from a UDB internal register
* after writes to the UDB interrupt dispatch , but
* before the membar Sync for that write .
* So we use the high UDB control register ( ASI 0x7f ,
* ADDR 0x20 ) for the dummy read . - DaveM
*/
tmp = 0x40 ;
__asm__ __volatile__ (
" wrpr %1, %2, %%pstate \n \t "
" stxa %4, [%0] %3 \n \t "
" stxa %5, [%0+%8] %3 \n \t "
" add %0, %8, %0 \n \t "
" stxa %6, [%0+%8] %3 \n \t "
" membar #Sync \n \t "
" stxa %%g0, [%7] %3 \n \t "
" membar #Sync \n \t "
" mov 0x20, %%g1 \n \t "
" ldxa [%%g1] 0x7f, %%g0 \n \t "
" membar #Sync "
: " =r " ( tmp )
: " r " ( pstate ) , " i " ( PSTATE_IE ) , " i " ( ASI_INTR_W ) ,
" r " ( data0 ) , " r " ( data1 ) , " r " ( data2 ) , " r " ( target ) ,
" r " ( 0x10 ) , " 0 " ( tmp )
: " g1 " ) ;
/* NOTE: PSTATE_IE is still clear. */
stuck = 100000 ;
do {
__asm__ __volatile__ ( " ldxa [%%g0] %1, %0 "
: " =r " ( result )
: " i " ( ASI_INTR_DISPATCH_STAT ) ) ;
if ( result = = 0 ) {
__asm__ __volatile__ ( " wrpr %0, 0x0, %%pstate "
: : " r " ( pstate ) ) ;
return ;
}
stuck - = 1 ;
if ( stuck = = 0 )
break ;
} while ( result & 0x1 ) ;
__asm__ __volatile__ ( " wrpr %0, 0x0, %%pstate "
: : " r " ( pstate ) ) ;
if ( stuck = = 0 ) {
printk ( " CPU[%d]: mondo stuckage result[%016lx] \n " ,
smp_processor_id ( ) , result ) ;
} else {
udelay ( 2 ) ;
goto again ;
}
}
static __inline__ void spitfire_xcall_deliver ( u64 data0 , u64 data1 , u64 data2 , cpumask_t mask )
{
u64 pstate ;
int i ;
__asm__ __volatile__ ( " rdpr %%pstate, %0 " : " =r " ( pstate ) ) ;
for_each_cpu_mask ( i , mask )
spitfire_xcall_helper ( data0 , data1 , data2 , pstate , i ) ;
}
/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
* packet , but we have no use for that . However we do take advantage of
* the new pipelining feature ( ie . dispatch to multiple cpus simultaneously ) .
*/
static void cheetah_xcall_deliver ( u64 data0 , u64 data1 , u64 data2 , cpumask_t mask )
{
u64 pstate , ver ;
2006-02-27 10:27:19 +03:00
int nack_busy_id , is_jbus ;
2005-04-17 02:20:36 +04:00
if ( cpus_empty ( mask ) )
return ;
/* Unfortunately, someone at Sun had the brilliant idea to make the
* busy / nack fields hard - coded by ITID number for this Ultra - III
* derivative processor .
*/
__asm__ ( " rdpr %%ver, %0 " : " =r " ( ver ) ) ;
2006-02-27 10:27:19 +03:00
is_jbus = ( ( ver > > 32 ) = = __JALAPENO_ID | |
( ver > > 32 ) = = __SERRANO_ID ) ;
2005-04-17 02:20:36 +04:00
__asm__ __volatile__ ( " rdpr %%pstate, %0 " : " =r " ( pstate ) ) ;
retry :
__asm__ __volatile__ ( " wrpr %0, %1, %%pstate \n \t "
: : " r " ( pstate ) , " i " ( PSTATE_IE ) ) ;
/* Setup the dispatch data registers. */
__asm__ __volatile__ ( " stxa %0, [%3] %6 \n \t "
" stxa %1, [%4] %6 \n \t "
" stxa %2, [%5] %6 \n \t "
" membar #Sync \n \t "
: /* no outputs */
: " r " ( data0 ) , " r " ( data1 ) , " r " ( data2 ) ,
" r " ( 0x40 ) , " r " ( 0x50 ) , " r " ( 0x60 ) ,
" i " ( ASI_INTR_W ) ) ;
nack_busy_id = 0 ;
{
int i ;
for_each_cpu_mask ( i , mask ) {
u64 target = ( i < < 14 ) | 0x70 ;
2006-02-27 10:27:19 +03:00
if ( ! is_jbus )
2005-04-17 02:20:36 +04:00
target | = ( nack_busy_id < < 24 ) ;
__asm__ __volatile__ (
" stxa %%g0, [%0] %1 \n \t "
" membar #Sync \n \t "
: /* no outputs */
: " r " ( target ) , " i " ( ASI_INTR_W ) ) ;
nack_busy_id + + ;
}
}
/* Now, poll for completion. */
{
u64 dispatch_stat ;
long stuck ;
stuck = 100000 * nack_busy_id ;
do {
__asm__ __volatile__ ( " ldxa [%%g0] %1, %0 "
: " =r " ( dispatch_stat )
: " i " ( ASI_INTR_DISPATCH_STAT ) ) ;
if ( dispatch_stat = = 0UL ) {
__asm__ __volatile__ ( " wrpr %0, 0x0, %%pstate "
: : " r " ( pstate ) ) ;
return ;
}
if ( ! - - stuck )
break ;
} while ( dispatch_stat & 0x5555555555555555UL ) ;
__asm__ __volatile__ ( " wrpr %0, 0x0, %%pstate "
: : " r " ( pstate ) ) ;
if ( ( dispatch_stat & ~ ( 0x5555555555555555UL ) ) = = 0 ) {
/* Busy bits will not clear, continue instead
* of freezing up on this cpu .
*/
printk ( " CPU[%d]: mondo stuckage result[%016lx] \n " ,
smp_processor_id ( ) , dispatch_stat ) ;
} else {
int i , this_busy_nack = 0 ;
/* Delay some random time with interrupts enabled
* to prevent deadlock .
*/
udelay ( 2 * nack_busy_id ) ;
/* Clear out the mask bits for cpus which did not
* NACK us .
*/
for_each_cpu_mask ( i , mask ) {
u64 check_mask ;
2006-02-27 10:27:19 +03:00
if ( is_jbus )
2005-04-17 02:20:36 +04:00
check_mask = ( 0x2UL < < ( 2 * i ) ) ;
else
check_mask = ( 0x2UL < <
this_busy_nack ) ;
if ( ( dispatch_stat & check_mask ) = = 0 )
cpu_clear ( i , mask ) ;
this_busy_nack + = 2 ;
}
goto retry ;
}
}
}
2006-02-09 03:41:20 +03:00
#if 0
/* Multi-cpu list version. */
static int init_cpu_list ( u16 * list , cpumask_t mask )
{
int i , cnt ;
cnt = 0 ;
for_each_cpu_mask ( i , mask )
list [ cnt + + ] = i ;
return cnt ;
}
static int update_cpu_list ( u16 * list , int orig_cnt , cpumask_t mask )
{
int i ;
for ( i = 0 ; i < orig_cnt ; i + + ) {
if ( list [ i ] = = 0xffff )
cpu_clear ( i , mask ) ;
}
return init_cpu_list ( list , mask ) ;
}
2006-02-04 14:10:53 +03:00
static void hypervisor_xcall_deliver ( u64 data0 , u64 data1 , u64 data2 , cpumask_t mask )
{
2006-02-09 03:41:20 +03:00
int this_cpu = get_cpu ( ) ;
struct trap_per_cpu * tb = & trap_block [ this_cpu ] ;
u64 * mondo = __va ( tb - > cpu_mondo_block_pa ) ;
u16 * cpu_list = __va ( tb - > cpu_list_pa ) ;
int cnt , retries ;
mondo [ 0 ] = data0 ;
mondo [ 1 ] = data1 ;
mondo [ 2 ] = data2 ;
wmb ( ) ;
retries = 0 ;
cnt = init_cpu_list ( cpu_list , mask ) ;
do {
2006-02-10 09:57:21 +03:00
register unsigned long func __asm__ ( " %o5 " ) ;
register unsigned long arg0 __asm__ ( " %o0 " ) ;
register unsigned long arg1 __asm__ ( " %o1 " ) ;
register unsigned long arg2 __asm__ ( " %o2 " ) ;
2006-02-09 03:41:20 +03:00
func = HV_FAST_CPU_MONDO_SEND ;
arg0 = cnt ;
arg1 = tb - > cpu_list_pa ;
arg2 = tb - > cpu_mondo_block_pa ;
__asm__ __volatile__ ( " ta %8 "
: " =&r " ( func ) , " =&r " ( arg0 ) ,
" =&r " ( arg1 ) , " =&r " ( arg2 )
: " 0 " ( func ) , " 1 " ( arg0 ) ,
" 2 " ( arg1 ) , " 3 " ( arg2 ) ,
" i " ( HV_FAST_TRAP )
: " memory " ) ;
2006-02-12 10:07:13 +03:00
if ( likely ( arg0 = = HV_EOK ) )
2006-02-09 03:41:20 +03:00
break ;
if ( unlikely ( + + retries > 100 ) ) {
printk ( " CPU[%d]: sun4v mondo error %lu \n " ,
this_cpu , func ) ;
break ;
}
cnt = update_cpu_list ( cpu_list , cnt , mask ) ;
udelay ( 2 * cnt ) ;
} while ( 1 ) ;
put_cpu ( ) ;
}
# else
/* Single-cpu list version. */
static void hypervisor_xcall_deliver ( u64 data0 , u64 data1 , u64 data2 , cpumask_t mask )
{
int this_cpu = get_cpu ( ) ;
struct trap_per_cpu * tb = & trap_block [ this_cpu ] ;
u64 * mondo = __va ( tb - > cpu_mondo_block_pa ) ;
u16 * cpu_list = __va ( tb - > cpu_list_pa ) ;
int i ;
mondo [ 0 ] = data0 ;
mondo [ 1 ] = data1 ;
mondo [ 2 ] = data2 ;
wmb ( ) ;
for_each_cpu_mask ( i , mask ) {
int retries = 0 ;
do {
2006-02-10 09:57:21 +03:00
register unsigned long func __asm__ ( " %o5 " ) ;
register unsigned long arg0 __asm__ ( " %o0 " ) ;
register unsigned long arg1 __asm__ ( " %o1 " ) ;
register unsigned long arg2 __asm__ ( " %o2 " ) ;
2006-02-09 03:41:20 +03:00
cpu_list [ 0 ] = i ;
func = HV_FAST_CPU_MONDO_SEND ;
arg0 = 1 ;
arg1 = tb - > cpu_list_pa ;
arg2 = tb - > cpu_mondo_block_pa ;
__asm__ __volatile__ ( " ta %8 "
: " =&r " ( func ) , " =&r " ( arg0 ) ,
" =&r " ( arg1 ) , " =&r " ( arg2 )
: " 0 " ( func ) , " 1 " ( arg0 ) ,
" 2 " ( arg1 ) , " 3 " ( arg2 ) ,
" i " ( HV_FAST_TRAP )
: " memory " ) ;
2006-02-12 10:07:13 +03:00
if ( likely ( arg0 = = HV_EOK ) )
2006-02-09 03:41:20 +03:00
break ;
if ( unlikely ( + + retries > 100 ) ) {
printk ( " CPU[%d]: sun4v mondo error %lu \n " ,
this_cpu , func ) ;
break ;
}
udelay ( 2 * i ) ;
} while ( 1 ) ;
}
put_cpu ( ) ;
2006-02-04 14:10:53 +03:00
}
2006-02-09 03:41:20 +03:00
# endif
2006-02-04 14:10:53 +03:00
2005-04-17 02:20:36 +04:00
/* Send cross call to all processors mentioned in MASK
* except self .
*/
static void smp_cross_call_masked ( unsigned long * func , u32 ctx , u64 data1 , u64 data2 , cpumask_t mask )
{
u64 data0 = ( ( ( u64 ) ctx ) < < 32 | ( ( ( u64 ) func ) & 0xffffffff ) ) ;
int this_cpu = get_cpu ( ) ;
cpus_and ( mask , mask , cpu_online_map ) ;
cpu_clear ( this_cpu , mask ) ;
if ( tlb_type = = spitfire )
spitfire_xcall_deliver ( data0 , data1 , data2 , mask ) ;
2006-02-04 14:10:53 +03:00
else if ( tlb_type = = cheetah | | tlb_type = = cheetah_plus )
2005-04-17 02:20:36 +04:00
cheetah_xcall_deliver ( data0 , data1 , data2 , mask ) ;
2006-02-04 14:10:53 +03:00
else
hypervisor_xcall_deliver ( data0 , data1 , data2 , mask ) ;
2005-04-17 02:20:36 +04:00
/* NOTE: Caller runs local copy on master. */
put_cpu ( ) ;
}
extern unsigned long xcall_sync_tick ;
static void smp_start_sync_tick_client ( int cpu )
{
cpumask_t mask = cpumask_of_cpu ( cpu ) ;
smp_cross_call_masked ( & xcall_sync_tick ,
0 , 0 , 0 , mask ) ;
}
/* Send cross call to all processors except self. */
# define smp_cross_call(func, ctx, data1, data2) \
smp_cross_call_masked ( func , ctx , data1 , data2 , cpu_online_map )
struct call_data_struct {
void ( * func ) ( void * info ) ;
void * info ;
atomic_t finished ;
int wait ;
} ;
static DEFINE_SPINLOCK ( call_lock ) ;
static struct call_data_struct * call_data ;
extern unsigned long xcall_call_function ;
/*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler .
*/
2006-02-01 05:31:38 +03:00
static int smp_call_function_mask ( void ( * func ) ( void * info ) , void * info ,
int nonatomic , int wait , cpumask_t mask )
2005-04-17 02:20:36 +04:00
{
struct call_data_struct data ;
2006-02-01 05:31:38 +03:00
int cpus = cpus_weight ( mask ) - 1 ;
2005-04-17 02:20:36 +04:00
long timeout ;
if ( ! cpus )
return 0 ;
/* Can deadlock when called with interrupts disabled */
WARN_ON ( irqs_disabled ( ) ) ;
data . func = func ;
data . info = info ;
atomic_set ( & data . finished , 0 ) ;
data . wait = wait ;
spin_lock ( & call_lock ) ;
call_data = & data ;
2006-02-01 05:31:38 +03:00
smp_cross_call_masked ( & xcall_call_function , 0 , 0 , 0 , mask ) ;
2005-04-17 02:20:36 +04:00
/*
* Wait for other cpus to complete function or at
* least snap the call data .
*/
timeout = 1000000 ;
while ( atomic_read ( & data . finished ) ! = cpus ) {
if ( - - timeout < = 0 )
goto out_timeout ;
barrier ( ) ;
udelay ( 1 ) ;
}
spin_unlock ( & call_lock ) ;
return 0 ;
out_timeout :
spin_unlock ( & call_lock ) ;
printk ( " XCALL: Remote cpus not responding, ncpus=%ld finished=%ld \n " ,
( long ) num_online_cpus ( ) - 1L ,
( long ) atomic_read ( & data . finished ) ) ;
return 0 ;
}
2006-02-01 05:31:38 +03:00
int smp_call_function ( void ( * func ) ( void * info ) , void * info ,
int nonatomic , int wait )
{
return smp_call_function_mask ( func , info , nonatomic , wait ,
cpu_online_map ) ;
}
2005-04-17 02:20:36 +04:00
void smp_call_function_client ( int irq , struct pt_regs * regs )
{
void ( * func ) ( void * info ) = call_data - > func ;
void * info = call_data - > info ;
clear_softint ( 1 < < irq ) ;
if ( call_data - > wait ) {
/* let initiator proceed only after completion */
func ( info ) ;
atomic_inc ( & call_data - > finished ) ;
} else {
/* let initiator proceed after getting data */
atomic_inc ( & call_data - > finished ) ;
func ( info ) ;
}
}
2006-02-01 05:31:38 +03:00
static void tsb_sync ( void * info )
{
struct mm_struct * mm = info ;
if ( current - > active_mm = = mm )
tsb_context_switch ( mm ) ;
}
void smp_tsb_sync ( struct mm_struct * mm )
{
smp_call_function_mask ( tsb_sync , mm , 0 , 1 , mm - > cpu_vm_mask ) ;
}
2005-04-17 02:20:36 +04:00
extern unsigned long xcall_flush_tlb_mm ;
extern unsigned long xcall_flush_tlb_pending ;
extern unsigned long xcall_flush_tlb_kernel_range ;
extern unsigned long xcall_report_regs ;
extern unsigned long xcall_receive_signal ;
# ifdef DCACHE_ALIASING_POSSIBLE
extern unsigned long xcall_flush_dcache_page_cheetah ;
# endif
extern unsigned long xcall_flush_dcache_page_spitfire ;
# ifdef CONFIG_DEBUG_DCFLUSH
extern atomic_t dcpage_flushes ;
extern atomic_t dcpage_flushes_xcall ;
# endif
static __inline__ void __local_flush_dcache_page ( struct page * page )
{
# ifdef DCACHE_ALIASING_POSSIBLE
__flush_dcache_page ( page_address ( page ) ,
( ( tlb_type = = spitfire ) & &
page_mapping ( page ) ! = NULL ) ) ;
# else
if ( page_mapping ( page ) ! = NULL & &
tlb_type = = spitfire )
__flush_icache_page ( __pa ( page_address ( page ) ) ) ;
# endif
}
void smp_flush_dcache_page_impl ( struct page * page , int cpu )
{
cpumask_t mask = cpumask_of_cpu ( cpu ) ;
2006-02-04 14:10:53 +03:00
int this_cpu ;
if ( tlb_type = = hypervisor )
return ;
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc ( & dcpage_flushes ) ;
# endif
2006-02-04 14:10:53 +03:00
this_cpu = get_cpu ( ) ;
2005-04-17 02:20:36 +04:00
if ( cpu = = this_cpu ) {
__local_flush_dcache_page ( page ) ;
} else if ( cpu_online ( cpu ) ) {
void * pg_addr = page_address ( page ) ;
u64 data0 ;
if ( tlb_type = = spitfire ) {
data0 =
( ( u64 ) & xcall_flush_dcache_page_spitfire ) ;
if ( page_mapping ( page ) ! = NULL )
data0 | = ( ( u64 ) 1 < < 32 ) ;
spitfire_xcall_deliver ( data0 ,
__pa ( pg_addr ) ,
( u64 ) pg_addr ,
mask ) ;
2006-02-04 14:10:53 +03:00
} else if ( tlb_type = = cheetah | | tlb_type = = cheetah_plus ) {
2005-04-17 02:20:36 +04:00
# ifdef DCACHE_ALIASING_POSSIBLE
data0 =
( ( u64 ) & xcall_flush_dcache_page_cheetah ) ;
cheetah_xcall_deliver ( data0 ,
__pa ( pg_addr ) ,
0 , mask ) ;
# endif
}
# ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc ( & dcpage_flushes_xcall ) ;
# endif
}
put_cpu ( ) ;
}
void flush_dcache_page_all ( struct mm_struct * mm , struct page * page )
{
void * pg_addr = page_address ( page ) ;
cpumask_t mask = cpu_online_map ;
u64 data0 ;
2006-02-04 14:10:53 +03:00
int this_cpu ;
if ( tlb_type = = hypervisor )
return ;
this_cpu = get_cpu ( ) ;
2005-04-17 02:20:36 +04:00
cpu_clear ( this_cpu , mask ) ;
# ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc ( & dcpage_flushes ) ;
# endif
if ( cpus_empty ( mask ) )
goto flush_self ;
if ( tlb_type = = spitfire ) {
data0 = ( ( u64 ) & xcall_flush_dcache_page_spitfire ) ;
if ( page_mapping ( page ) ! = NULL )
data0 | = ( ( u64 ) 1 < < 32 ) ;
spitfire_xcall_deliver ( data0 ,
__pa ( pg_addr ) ,
( u64 ) pg_addr ,
mask ) ;
2006-02-04 14:10:53 +03:00
} else if ( tlb_type = = cheetah | | tlb_type = = cheetah_plus ) {
2005-04-17 02:20:36 +04:00
# ifdef DCACHE_ALIASING_POSSIBLE
data0 = ( ( u64 ) & xcall_flush_dcache_page_cheetah ) ;
cheetah_xcall_deliver ( data0 ,
__pa ( pg_addr ) ,
0 , mask ) ;
# endif
}
# ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc ( & dcpage_flushes_xcall ) ;
# endif
flush_self :
__local_flush_dcache_page ( page ) ;
put_cpu ( ) ;
}
void smp_receive_signal ( int cpu )
{
cpumask_t mask = cpumask_of_cpu ( cpu ) ;
if ( cpu_online ( cpu ) ) {
u64 data0 = ( ( ( u64 ) & xcall_receive_signal ) & 0xffffffff ) ;
if ( tlb_type = = spitfire )
spitfire_xcall_deliver ( data0 , 0 , 0 , mask ) ;
2006-02-04 14:10:53 +03:00
else if ( tlb_type = = cheetah | | tlb_type = = cheetah_plus )
2005-04-17 02:20:36 +04:00
cheetah_xcall_deliver ( data0 , 0 , 0 , mask ) ;
2006-02-04 14:10:53 +03:00
else if ( tlb_type = = hypervisor )
hypervisor_xcall_deliver ( data0 , 0 , 0 , mask ) ;
2005-04-17 02:20:36 +04:00
}
}
void smp_receive_signal_client ( int irq , struct pt_regs * regs )
{
/* Just return, rtrap takes care of the rest. */
clear_softint ( 1 < < irq ) ;
}
void smp_report_regs ( void )
{
smp_cross_call ( & xcall_report_regs , 0 , 0 , 0 ) ;
}
/* We know that the window frames of the user have been flushed
* to the stack before we get here because all callers of us
* are flush_tlb_ * ( ) routines , and these run after flush_cache_ * ( )
* which performs the flushw .
*
* The SMP TLB coherency scheme we use works as follows :
*
* 1 ) mm - > cpu_vm_mask is a bit mask of which cpus an address
* space has ( potentially ) executed on , this is the heuristic
* we use to avoid doing cross calls .
*
* Also , for flushing from kswapd and also for clones , we
* use cpu_vm_mask as the list of cpus to make run the TLB .
*
* 2 ) TLB context numbers are shared globally across all processors
* in the system , this allows us to play several games to avoid
* cross calls .
*
* One invariant is that when a cpu switches to a process , and
* that processes tsk - > active_mm - > cpu_vm_mask does not have the
* current cpu ' s bit set , that tlb context is flushed locally .
*
* If the address space is non - shared ( ie . mm - > count = = 1 ) we avoid
* cross calls when we want to flush the currently running process ' s
* tlb state . This is done by clearing all cpu bits except the current
* processor ' s in current - > active_mm - > cpu_vm_mask and performing the
* flush locally only . This will force any subsequent cpus which run
* this task to flush the context from the local tlb if the process
* migrates to another cpu ( again ) .
*
* 3 ) For shared address spaces ( threads ) and swapping we bite the
* bullet for most cases and perform the cross call ( but only to
* the cpus listed in cpu_vm_mask ) .
*
* The performance gain from " optimizing " away the cross call for threads is
* questionable ( in theory the big win for threads is the massive sharing of
* address space state across processors ) .
*/
2005-11-08 01:09:58 +03:00
/* This currently is only used by the hugetlb arch pre-fault
* hook on UltraSPARC - III + and later when changing the pagesize
* bits of the context register for an address space .
*/
2005-04-17 02:20:36 +04:00
void smp_flush_tlb_mm ( struct mm_struct * mm )
{
2005-11-08 01:09:58 +03:00
u32 ctx = CTX_HWBITS ( mm - > context ) ;
int cpu = get_cpu ( ) ;
2005-04-17 02:20:36 +04:00
2005-11-08 01:09:58 +03:00
if ( atomic_read ( & mm - > mm_users ) = = 1 ) {
mm - > cpu_vm_mask = cpumask_of_cpu ( cpu ) ;
goto local_flush_and_out ;
}
2005-04-17 02:20:36 +04:00
2005-11-08 01:09:58 +03:00
smp_cross_call_masked ( & xcall_flush_tlb_mm ,
ctx , 0 , 0 ,
mm - > cpu_vm_mask ) ;
2005-04-17 02:20:36 +04:00
2005-11-08 01:09:58 +03:00
local_flush_and_out :
__flush_tlb_mm ( ctx , SECONDARY_CONTEXT ) ;
2005-04-17 02:20:36 +04:00
2005-11-08 01:09:58 +03:00
put_cpu ( ) ;
2005-04-17 02:20:36 +04:00
}
void smp_flush_tlb_pending ( struct mm_struct * mm , unsigned long nr , unsigned long * vaddrs )
{
u32 ctx = CTX_HWBITS ( mm - > context ) ;
int cpu = get_cpu ( ) ;
[SPARC64] mm: context switch ptlock
sparc64 is unique among architectures in taking the page_table_lock in
its context switch (well, cris does too, but erroneously, and it's not
yet SMP anyway).
This seems to be a private affair between switch_mm and activate_mm,
using page_table_lock as a per-mm lock, without any relation to its uses
elsewhere. That's fine, but comment it as such; and unlock sooner in
switch_mm, more like in activate_mm (preemption is disabled here).
There is a block of "if (0)"ed code in smp_flush_tlb_pending which would
have liked to rely on the page_table_lock, in switch_mm and elsewhere;
but its comment explains how dup_mmap's flush_tlb_mm defeated it. And
though that could have been changed at any time over the past few years,
now the chance vanishes as we push the page_table_lock downwards, and
perhaps split it per page table page. Just delete that block of code.
Which leaves the mysterious spin_unlock_wait(&oldmm->page_table_lock)
in kernel/fork.c copy_mm. Textual analysis (supported by Nick Piggin)
suggests that the comment was written by DaveM, and that it relates to
the defeated approach in the sparc64 smp_flush_tlb_pending. Just delete
this block too.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-11-08 01:09:01 +03:00
if ( mm = = current - > active_mm & & atomic_read ( & mm - > mm_users ) = = 1 )
2005-04-17 02:20:36 +04:00
mm - > cpu_vm_mask = cpumask_of_cpu ( cpu ) ;
[SPARC64] mm: context switch ptlock
sparc64 is unique among architectures in taking the page_table_lock in
its context switch (well, cris does too, but erroneously, and it's not
yet SMP anyway).
This seems to be a private affair between switch_mm and activate_mm,
using page_table_lock as a per-mm lock, without any relation to its uses
elsewhere. That's fine, but comment it as such; and unlock sooner in
switch_mm, more like in activate_mm (preemption is disabled here).
There is a block of "if (0)"ed code in smp_flush_tlb_pending which would
have liked to rely on the page_table_lock, in switch_mm and elsewhere;
but its comment explains how dup_mmap's flush_tlb_mm defeated it. And
though that could have been changed at any time over the past few years,
now the chance vanishes as we push the page_table_lock downwards, and
perhaps split it per page table page. Just delete that block of code.
Which leaves the mysterious spin_unlock_wait(&oldmm->page_table_lock)
in kernel/fork.c copy_mm. Textual analysis (supported by Nick Piggin)
suggests that the comment was written by DaveM, and that it relates to
the defeated approach in the sparc64 smp_flush_tlb_pending. Just delete
this block too.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2005-11-08 01:09:01 +03:00
else
smp_cross_call_masked ( & xcall_flush_tlb_pending ,
ctx , nr , ( unsigned long ) vaddrs ,
mm - > cpu_vm_mask ) ;
2005-04-17 02:20:36 +04:00
__flush_tlb_pending ( ctx , nr , vaddrs ) ;
put_cpu ( ) ;
}
void smp_flush_tlb_kernel_range ( unsigned long start , unsigned long end )
{
start & = PAGE_MASK ;
end = PAGE_ALIGN ( end ) ;
if ( start ! = end ) {
smp_cross_call ( & xcall_flush_tlb_kernel_range ,
0 , start , end ) ;
__flush_tlb_kernel_range ( start , end ) ;
}
}
/* CPU capture. */
/* #define CAPTURE_DEBUG */
extern unsigned long xcall_capture ;
static atomic_t smp_capture_depth = ATOMIC_INIT ( 0 ) ;
static atomic_t smp_capture_registry = ATOMIC_INIT ( 0 ) ;
static unsigned long penguins_are_doing_time ;
void smp_capture ( void )
{
int result = atomic_add_ret ( 1 , & smp_capture_depth ) ;
if ( result = = 1 ) {
int ncpus = num_online_cpus ( ) ;
# ifdef CAPTURE_DEBUG
printk ( " CPU[%d]: Sending penguins to jail... " ,
smp_processor_id ( ) ) ;
# endif
penguins_are_doing_time = 1 ;
2005-08-29 23:46:22 +04:00
membar_storestore_loadstore ( ) ;
2005-04-17 02:20:36 +04:00
atomic_inc ( & smp_capture_registry ) ;
smp_cross_call ( & xcall_capture , 0 , 0 , 0 ) ;
while ( atomic_read ( & smp_capture_registry ) ! = ncpus )
2005-08-29 23:46:22 +04:00
rmb ( ) ;
2005-04-17 02:20:36 +04:00
# ifdef CAPTURE_DEBUG
printk ( " done \n " ) ;
# endif
}
}
void smp_release ( void )
{
if ( atomic_dec_and_test ( & smp_capture_depth ) ) {
# ifdef CAPTURE_DEBUG
printk ( " CPU[%d]: Giving pardon to "
" imprisoned penguins \n " ,
smp_processor_id ( ) ) ;
# endif
penguins_are_doing_time = 0 ;
2005-08-29 23:46:22 +04:00
membar_storeload_storestore ( ) ;
2005-04-17 02:20:36 +04:00
atomic_dec ( & smp_capture_registry ) ;
}
}
/* Imprisoned penguins run with %pil == 15, but PSTATE_IE set, so they
* can service tlb flush xcalls . . .
*/
extern void prom_world ( int ) ;
2006-02-01 05:32:29 +03:00
2005-04-17 02:20:36 +04:00
void smp_penguin_jailcell ( int irq , struct pt_regs * regs )
{
clear_softint ( 1 < < irq ) ;
preempt_disable ( ) ;
__asm__ __volatile__ ( " flushw " ) ;
prom_world ( 1 ) ;
atomic_inc ( & smp_capture_registry ) ;
2005-08-29 23:46:22 +04:00
membar_storeload_storestore ( ) ;
2005-04-17 02:20:36 +04:00
while ( penguins_are_doing_time )
2005-08-29 23:46:22 +04:00
rmb ( ) ;
2005-04-17 02:20:36 +04:00
atomic_dec ( & smp_capture_registry ) ;
prom_world ( 0 ) ;
preempt_enable ( ) ;
}
# define prof_multiplier(__cpu) cpu_data(__cpu).multiplier
# define prof_counter(__cpu) cpu_data(__cpu).counter
void smp_percpu_timer_interrupt ( struct pt_regs * regs )
{
unsigned long compare , tick , pstate ;
int cpu = smp_processor_id ( ) ;
int user = user_mode ( regs ) ;
/*
* Check for level 14 softint .
*/
{
unsigned long tick_mask = tick_ops - > softint_mask ;
if ( ! ( get_softint ( ) & tick_mask ) ) {
extern void handler_irq ( int , struct pt_regs * ) ;
handler_irq ( 14 , regs ) ;
return ;
}
clear_softint ( tick_mask ) ;
}
do {
profile_tick ( CPU_PROFILING , regs ) ;
if ( ! - - prof_counter ( cpu ) ) {
irq_enter ( ) ;
if ( cpu = = boot_cpu_id ) {
kstat_this_cpu . irqs [ 0 ] + + ;
timer_tick_interrupt ( regs ) ;
}
update_process_times ( user ) ;
irq_exit ( ) ;
prof_counter ( cpu ) = prof_multiplier ( cpu ) ;
}
/* Guarantee that the following sequences execute
* uninterrupted .
*/
__asm__ __volatile__ ( " rdpr %%pstate, %0 \n \t "
" wrpr %0, %1, %%pstate "
: " =r " ( pstate )
: " i " ( PSTATE_IE ) ) ;
compare = tick_ops - > add_compare ( current_tick_offset ) ;
tick = tick_ops - > get_tick ( ) ;
/* Restore PSTATE_IE. */
__asm__ __volatile__ ( " wrpr %0, 0x0, %%pstate "
: /* no outputs */
: " r " ( pstate ) ) ;
} while ( time_after_eq ( tick , compare ) ) ;
}
static void __init smp_setup_percpu_timer ( void )
{
int cpu = smp_processor_id ( ) ;
unsigned long pstate ;
prof_counter ( cpu ) = prof_multiplier ( cpu ) = 1 ;
/* Guarantee that the following sequences execute
* uninterrupted .
*/
__asm__ __volatile__ ( " rdpr %%pstate, %0 \n \t "
" wrpr %0, %1, %%pstate "
: " =r " ( pstate )
: " i " ( PSTATE_IE ) ) ;
tick_ops - > init_tick ( current_tick_offset ) ;
/* Restore PSTATE_IE. */
__asm__ __volatile__ ( " wrpr %0, 0x0, %%pstate "
: /* no outputs */
: " r " ( pstate ) ) ;
}
void __init smp_tick_init ( void )
{
boot_cpu_id = hard_smp_processor_id ( ) ;
current_tick_offset = timer_tick_offset ;
cpu_set ( boot_cpu_id , cpu_online_map ) ;
prof_counter ( boot_cpu_id ) = prof_multiplier ( boot_cpu_id ) = 1 ;
}
/* /proc/profile writes can call this, don't __init it please. */
static DEFINE_SPINLOCK ( prof_setup_lock ) ;
int setup_profiling_timer ( unsigned int multiplier )
{
unsigned long flags ;
int i ;
if ( ( ! multiplier ) | | ( timer_tick_offset / multiplier ) < 1000 )
return - EINVAL ;
spin_lock_irqsave ( & prof_setup_lock , flags ) ;
for ( i = 0 ; i < NR_CPUS ; i + + )
prof_multiplier ( i ) = multiplier ;
current_tick_offset = ( timer_tick_offset / multiplier ) ;
spin_unlock_irqrestore ( & prof_setup_lock , flags ) ;
return 0 ;
}
2006-02-26 00:39:56 +03:00
/* Constrain the number of cpus to max_cpus. */
2005-04-17 02:20:36 +04:00
void __init smp_prepare_cpus ( unsigned int max_cpus )
{
if ( num_possible_cpus ( ) > max_cpus ) {
2006-02-26 00:39:56 +03:00
int instance , mid ;
2005-04-17 02:20:36 +04:00
instance = 0 ;
while ( ! cpu_find_by_instance ( instance , NULL , & mid ) ) {
if ( mid ! = boot_cpu_id ) {
cpu_clear ( mid , phys_cpu_present_map ) ;
if ( num_possible_cpus ( ) < = max_cpus )
break ;
}
instance + + ;
}
}
smp_store_cpu_info ( boot_cpu_id ) ;
}
2006-02-26 00:39:56 +03:00
/* Set this up early so that things like the scheduler can init
* properly . We use the same cpu mask for both the present and
* possible cpu map .
*/
void __init smp_setup_cpu_possible_map ( void )
{
int instance , mid ;
instance = 0 ;
while ( ! cpu_find_by_instance ( instance , NULL , & mid ) ) {
if ( mid < NR_CPUS )
cpu_set ( mid , phys_cpu_present_map ) ;
instance + + ;
}
}
2005-04-17 02:20:36 +04:00
void __devinit smp_prepare_boot_cpu ( void )
{
2006-02-27 10:24:22 +03:00
int cpu = hard_smp_processor_id ( ) ;
if ( cpu > = NR_CPUS ) {
2005-04-17 02:20:36 +04:00
prom_printf ( " Serious problem, boot cpu id >= NR_CPUS \n " ) ;
prom_halt ( ) ;
}
2006-02-27 10:24:22 +03:00
current_thread_info ( ) - > cpu = cpu ;
__local_per_cpu_offset = __per_cpu_offset ( cpu ) ;
2005-04-17 02:20:36 +04:00
cpu_set ( smp_processor_id ( ) , cpu_online_map ) ;
cpu_set ( smp_processor_id ( ) , phys_cpu_present_map ) ;
}
int __devinit __cpu_up ( unsigned int cpu )
{
int ret = smp_boot_one_cpu ( cpu ) ;
if ( ! ret ) {
cpu_set ( cpu , smp_commenced_mask ) ;
while ( ! cpu_isset ( cpu , cpu_online_map ) )
mb ( ) ;
if ( ! cpu_isset ( cpu , cpu_online_map ) ) {
ret = - ENODEV ;
} else {
2006-02-12 10:22:47 +03:00
/* On SUN4V, writes to %tick and %stick are
* not allowed .
*/
if ( tlb_type ! = hypervisor )
smp_synchronize_one_tick ( cpu ) ;
2005-04-17 02:20:36 +04:00
}
}
return ret ;
}
void __init smp_cpus_done ( unsigned int max_cpus )
{
unsigned long bogosum = 0 ;
int i ;
for ( i = 0 ; i < NR_CPUS ; i + + ) {
if ( cpu_online ( i ) )
bogosum + = cpu_data ( i ) . udelay_val ;
}
printk ( " Total of %ld processors activated "
" (%lu.%02lu BogoMIPS). \n " ,
( long ) num_online_cpus ( ) ,
bogosum / ( 500000 / HZ ) ,
( bogosum / ( 5000 / HZ ) ) % 100 ) ;
}
void smp_send_reschedule ( int cpu )
{
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-09 08:39:04 +03:00
smp_receive_signal ( cpu ) ;
2005-04-17 02:20:36 +04:00
}
/* This is a nop because we capture all other cpus
* anyways when making the PROM active .
*/
void smp_send_stop ( void )
{
}
2005-07-11 02:45:11 +04:00
unsigned long __per_cpu_base __read_mostly ;
unsigned long __per_cpu_shift __read_mostly ;
2005-04-17 02:20:36 +04:00
EXPORT_SYMBOL ( __per_cpu_base ) ;
EXPORT_SYMBOL ( __per_cpu_shift ) ;
void __init setup_per_cpu_areas ( void )
{
unsigned long goal , size , i ;
char * ptr ;
/* Copy section for each CPU (we discard the original) */
2006-02-27 10:24:22 +03:00
goal = ALIGN ( __per_cpu_end - __per_cpu_start , SMP_CACHE_BYTES ) ;
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_MODULES
if ( goal < PERCPU_ENOUGH_ROOM )
goal = PERCPU_ENOUGH_ROOM ;
# endif
__per_cpu_shift = 0 ;
for ( size = 1UL ; size < goal ; size < < = 1UL )
__per_cpu_shift + + ;
2006-02-27 10:24:22 +03:00
ptr = alloc_bootmem ( size * NR_CPUS ) ;
2005-04-17 02:20:36 +04:00
__per_cpu_base = ptr - __per_cpu_start ;
for ( i = 0 ; i < NR_CPUS ; i + + , ptr + = size )
memcpy ( ptr , __per_cpu_start , __per_cpu_end - __per_cpu_start ) ;
}