2010-05-06 20:49:20 +04:00
/*
* kernel / stop_machine . c
*
* Copyright ( C ) 2008 , 2005 IBM Corporation .
* Copyright ( C ) 2008 , 2005 Rusty Russell rusty @ rustcorp . com . au
* Copyright ( C ) 2010 SUSE Linux Products GmbH
* Copyright ( C ) 2010 Tejun Heo < tj @ kernel . org >
*
* This file is released under the GPLv2 and any later version .
2006-09-29 13:01:35 +04:00
*/
2010-05-06 20:49:20 +04:00
# include <linux/completion.h>
2005-04-17 02:20:36 +04:00
# include <linux/cpu.h>
2010-05-06 20:49:20 +04:00
# include <linux/init.h>
2007-05-08 11:25:08 +04:00
# include <linux/kthread.h>
# include <linux/module.h>
2010-05-06 20:49:20 +04:00
# include <linux/percpu.h>
2007-05-08 11:25:08 +04:00
# include <linux/sched.h>
# include <linux/stop_machine.h>
2007-05-11 09:22:47 +04:00
# include <linux/interrupt.h>
2010-05-06 20:49:20 +04:00
# include <linux/kallsyms.h>
2007-05-11 09:22:47 +04:00
2005-04-17 02:20:36 +04:00
# include <asm/atomic.h>
2010-05-06 20:49:20 +04:00
/*
* Structure to determine completion condition and record errors . May
* be shared by works on different cpus .
*/
struct cpu_stop_done {
atomic_t nr_todo ; /* nr left to execute */
bool executed ; /* actually executed? */
int ret ; /* collected return value */
struct completion completion ; /* fired if nr_todo reaches 0 */
} ;
/* the actual stopper, one per every possible cpu, enabled on online cpus */
struct cpu_stopper {
spinlock_t lock ;
struct list_head works ; /* list of pending works */
struct task_struct * thread ; /* stopper thread */
bool enabled ; /* is this stopper enabled? */
} ;
static DEFINE_PER_CPU ( struct cpu_stopper , cpu_stopper ) ;
static void cpu_stop_init_done ( struct cpu_stop_done * done , unsigned int nr_todo )
{
memset ( done , 0 , sizeof ( * done ) ) ;
atomic_set ( & done - > nr_todo , nr_todo ) ;
init_completion ( & done - > completion ) ;
}
/* signal completion unless @done is NULL */
static void cpu_stop_signal_done ( struct cpu_stop_done * done , bool executed )
{
if ( done ) {
if ( executed )
done - > executed = true ;
if ( atomic_dec_and_test ( & done - > nr_todo ) )
complete ( & done - > completion ) ;
}
}
/* queue @work to @stopper. if offline, @work is completed immediately */
static void cpu_stop_queue_work ( struct cpu_stopper * stopper ,
struct cpu_stop_work * work )
{
unsigned long flags ;
spin_lock_irqsave ( & stopper - > lock , flags ) ;
if ( stopper - > enabled ) {
list_add_tail ( & work - > list , & stopper - > works ) ;
wake_up_process ( stopper - > thread ) ;
} else
cpu_stop_signal_done ( work - > done , false ) ;
spin_unlock_irqrestore ( & stopper - > lock , flags ) ;
}
/**
* stop_one_cpu - stop a cpu
* @ cpu : cpu to stop
* @ fn : function to execute
* @ arg : argument to @ fn
*
* Execute @ fn ( @ arg ) on @ cpu . @ fn is run in a process context with
* the highest priority preempting any task on the cpu and
* monopolizing it . This function returns after the execution is
* complete .
*
* This function doesn ' t guarantee @ cpu stays online till @ fn
* completes . If @ cpu goes down in the middle , execution may happen
* partially or fully on different cpus . @ fn should either be ready
* for that or the caller should ensure that @ cpu stays online until
* this function completes .
*
* CONTEXT :
* Might sleep .
*
* RETURNS :
* - ENOENT if @ fn ( @ arg ) was not executed because @ cpu was offline ;
* otherwise , the return value of @ fn .
*/
int stop_one_cpu ( unsigned int cpu , cpu_stop_fn_t fn , void * arg )
{
struct cpu_stop_done done ;
struct cpu_stop_work work = { . fn = fn , . arg = arg , . done = & done } ;
cpu_stop_init_done ( & done , 1 ) ;
cpu_stop_queue_work ( & per_cpu ( cpu_stopper , cpu ) , & work ) ;
wait_for_completion ( & done . completion ) ;
return done . executed ? done . ret : - ENOENT ;
}
/**
* stop_one_cpu_nowait - stop a cpu but don ' t wait for completion
* @ cpu : cpu to stop
* @ fn : function to execute
* @ arg : argument to @ fn
*
* Similar to stop_one_cpu ( ) but doesn ' t wait for completion . The
* caller is responsible for ensuring @ work_buf is currently unused
* and will remain untouched until stopper starts executing @ fn .
*
* CONTEXT :
* Don ' t care .
*/
void stop_one_cpu_nowait ( unsigned int cpu , cpu_stop_fn_t fn , void * arg ,
struct cpu_stop_work * work_buf )
{
* work_buf = ( struct cpu_stop_work ) { . fn = fn , . arg = arg , } ;
cpu_stop_queue_work ( & per_cpu ( cpu_stopper , cpu ) , work_buf ) ;
}
/* static data for stop_cpus */
static DEFINE_MUTEX ( stop_cpus_mutex ) ;
static DEFINE_PER_CPU ( struct cpu_stop_work , stop_cpus_work ) ;
int __stop_cpus ( const struct cpumask * cpumask , cpu_stop_fn_t fn , void * arg )
{
struct cpu_stop_work * work ;
struct cpu_stop_done done ;
unsigned int cpu ;
/* initialize works and done */
for_each_cpu ( cpu , cpumask ) {
work = & per_cpu ( stop_cpus_work , cpu ) ;
work - > fn = fn ;
work - > arg = arg ;
work - > done = & done ;
}
cpu_stop_init_done ( & done , cpumask_weight ( cpumask ) ) ;
/*
* Disable preemption while queueing to avoid getting
* preempted by a stopper which might wait for other stoppers
* to enter @ fn which can lead to deadlock .
*/
preempt_disable ( ) ;
for_each_cpu ( cpu , cpumask )
cpu_stop_queue_work ( & per_cpu ( cpu_stopper , cpu ) ,
& per_cpu ( stop_cpus_work , cpu ) ) ;
preempt_enable ( ) ;
wait_for_completion ( & done . completion ) ;
return done . executed ? done . ret : - ENOENT ;
}
/**
* stop_cpus - stop multiple cpus
* @ cpumask : cpus to stop
* @ fn : function to execute
* @ arg : argument to @ fn
*
* Execute @ fn ( @ arg ) on online cpus in @ cpumask . On each target cpu ,
* @ fn is run in a process context with the highest priority
* preempting any task on the cpu and monopolizing it . This function
* returns after all executions are complete .
*
* This function doesn ' t guarantee the cpus in @ cpumask stay online
* till @ fn completes . If some cpus go down in the middle , execution
* on the cpu may happen partially or fully on different cpus . @ fn
* should either be ready for that or the caller should ensure that
* the cpus stay online until this function completes .
*
* All stop_cpus ( ) calls are serialized making it safe for @ fn to wait
* for all cpus to start executing it .
*
* CONTEXT :
* Might sleep .
*
* RETURNS :
* - ENOENT if @ fn ( @ arg ) was not executed at all because all cpus in
* @ cpumask were offline ; otherwise , 0 if all executions of @ fn
* returned 0 , any non zero return value if any returned non zero .
*/
int stop_cpus ( const struct cpumask * cpumask , cpu_stop_fn_t fn , void * arg )
{
int ret ;
/* static works are used, process one request at a time */
mutex_lock ( & stop_cpus_mutex ) ;
ret = __stop_cpus ( cpumask , fn , arg ) ;
mutex_unlock ( & stop_cpus_mutex ) ;
return ret ;
}
/**
* try_stop_cpus - try to stop multiple cpus
* @ cpumask : cpus to stop
* @ fn : function to execute
* @ arg : argument to @ fn
*
* Identical to stop_cpus ( ) except that it fails with - EAGAIN if
* someone else is already using the facility .
*
* CONTEXT :
* Might sleep .
*
* RETURNS :
* - EAGAIN if someone else is already stopping cpus , - ENOENT if
* @ fn ( @ arg ) was not executed at all because all cpus in @ cpumask were
* offline ; otherwise , 0 if all executions of @ fn returned 0 , any non
* zero return value if any returned non zero .
*/
int try_stop_cpus ( const struct cpumask * cpumask , cpu_stop_fn_t fn , void * arg )
{
int ret ;
/* static works are used, process one request at a time */
if ( ! mutex_trylock ( & stop_cpus_mutex ) )
return - EAGAIN ;
ret = __stop_cpus ( cpumask , fn , arg ) ;
mutex_unlock ( & stop_cpus_mutex ) ;
return ret ;
}
static int cpu_stopper_thread ( void * data )
{
struct cpu_stopper * stopper = data ;
struct cpu_stop_work * work ;
int ret ;
repeat :
set_current_state ( TASK_INTERRUPTIBLE ) ; /* mb paired w/ kthread_stop */
if ( kthread_should_stop ( ) ) {
__set_current_state ( TASK_RUNNING ) ;
return 0 ;
}
work = NULL ;
spin_lock_irq ( & stopper - > lock ) ;
if ( ! list_empty ( & stopper - > works ) ) {
work = list_first_entry ( & stopper - > works ,
struct cpu_stop_work , list ) ;
list_del_init ( & work - > list ) ;
}
spin_unlock_irq ( & stopper - > lock ) ;
if ( work ) {
cpu_stop_fn_t fn = work - > fn ;
void * arg = work - > arg ;
struct cpu_stop_done * done = work - > done ;
char ksym_buf [ KSYM_NAME_LEN ] ;
__set_current_state ( TASK_RUNNING ) ;
/* cpu stop callbacks are not allowed to sleep */
preempt_disable ( ) ;
ret = fn ( arg ) ;
if ( ret )
done - > ret = ret ;
/* restore preemption and check it's still balanced */
preempt_enable ( ) ;
WARN_ONCE ( preempt_count ( ) ,
" cpu_stop: %s(%p) leaked preempt count \n " ,
kallsyms_lookup ( ( unsigned long ) fn , NULL , NULL , NULL ,
ksym_buf ) , arg ) ;
cpu_stop_signal_done ( done , true ) ;
} else
schedule ( ) ;
goto repeat ;
}
/* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
static int __cpuinit cpu_stop_cpu_callback ( struct notifier_block * nfb ,
unsigned long action , void * hcpu )
{
struct sched_param param = { . sched_priority = MAX_RT_PRIO - 1 } ;
unsigned int cpu = ( unsigned long ) hcpu ;
struct cpu_stopper * stopper = & per_cpu ( cpu_stopper , cpu ) ;
struct cpu_stop_work * work ;
struct task_struct * p ;
switch ( action & ~ CPU_TASKS_FROZEN ) {
case CPU_UP_PREPARE :
BUG_ON ( stopper - > thread | | stopper - > enabled | |
! list_empty ( & stopper - > works ) ) ;
p = kthread_create ( cpu_stopper_thread , stopper , " stopper/%d " ,
cpu ) ;
if ( IS_ERR ( p ) )
return NOTIFY_BAD ;
sched_setscheduler_nocheck ( p , SCHED_FIFO , & param ) ;
get_task_struct ( p ) ;
stopper - > thread = p ;
break ;
case CPU_ONLINE :
kthread_bind ( stopper - > thread , cpu ) ;
/* strictly unnecessary, as first user will wake it */
wake_up_process ( stopper - > thread ) ;
/* mark enabled */
spin_lock_irq ( & stopper - > lock ) ;
stopper - > enabled = true ;
spin_unlock_irq ( & stopper - > lock ) ;
break ;
# ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED :
case CPU_DEAD :
/* kill the stopper */
kthread_stop ( stopper - > thread ) ;
/* drain remaining works */
spin_lock_irq ( & stopper - > lock ) ;
list_for_each_entry ( work , & stopper - > works , list )
cpu_stop_signal_done ( work - > done , false ) ;
stopper - > enabled = false ;
spin_unlock_irq ( & stopper - > lock ) ;
/* release the stopper */
put_task_struct ( stopper - > thread ) ;
stopper - > thread = NULL ;
break ;
# endif
}
return NOTIFY_OK ;
}
/*
* Give it a higher priority so that cpu stopper is available to other
* cpu notifiers . It currently shares the same priority as sched
* migration_notifier .
*/
static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = {
. notifier_call = cpu_stop_cpu_callback ,
. priority = 10 ,
} ;
static int __init cpu_stop_init ( void )
{
void * bcpu = ( void * ) ( long ) smp_processor_id ( ) ;
unsigned int cpu ;
int err ;
for_each_possible_cpu ( cpu ) {
struct cpu_stopper * stopper = & per_cpu ( cpu_stopper , cpu ) ;
spin_lock_init ( & stopper - > lock ) ;
INIT_LIST_HEAD ( & stopper - > works ) ;
}
/* start one for the boot cpu */
err = cpu_stop_cpu_callback ( & cpu_stop_cpu_notifier , CPU_UP_PREPARE ,
bcpu ) ;
BUG_ON ( err = = NOTIFY_BAD ) ;
cpu_stop_cpu_callback ( & cpu_stop_cpu_notifier , CPU_ONLINE , bcpu ) ;
register_cpu_notifier ( & cpu_stop_cpu_notifier ) ;
return 0 ;
}
early_initcall ( cpu_stop_init ) ;
2005-04-17 02:20:36 +04:00
2008-07-28 21:16:28 +04:00
/* This controls the threads on each CPU. */
2005-04-17 02:20:36 +04:00
enum stopmachine_state {
2008-07-28 21:16:28 +04:00
/* Dummy starting state for thread. */
STOPMACHINE_NONE ,
/* Awaiting everyone to be scheduled. */
2005-04-17 02:20:36 +04:00
STOPMACHINE_PREPARE ,
2008-07-28 21:16:28 +04:00
/* Disable interrupts. */
2005-04-17 02:20:36 +04:00
STOPMACHINE_DISABLE_IRQ ,
2008-07-28 21:16:28 +04:00
/* Run the function */
2008-02-28 19:33:03 +03:00
STOPMACHINE_RUN ,
2008-07-28 21:16:28 +04:00
/* Exit */
2005-04-17 02:20:36 +04:00
STOPMACHINE_EXIT ,
} ;
2008-07-28 21:16:28 +04:00
static enum stopmachine_state state ;
2005-04-17 02:20:36 +04:00
2008-02-28 19:33:03 +03:00
struct stop_machine_data {
int ( * fn ) ( void * ) ;
void * data ;
2008-07-28 21:16:28 +04:00
int fnret ;
} ;
2008-02-28 19:33:03 +03:00
2008-07-28 21:16:28 +04:00
/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
static unsigned int num_threads ;
static atomic_t thread_ack ;
static DEFINE_MUTEX ( lock ) ;
2008-12-22 14:36:30 +03:00
/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
static DEFINE_MUTEX ( setup_lock ) ;
/* Users of stop_machine. */
static int refcount ;
2008-10-14 01:50:10 +04:00
static struct workqueue_struct * stop_machine_wq ;
static struct stop_machine_data active , idle ;
2009-03-31 08:05:16 +04:00
static const struct cpumask * active_cpus ;
2010-02-02 08:38:57 +03:00
static void __percpu * stop_machine_work ;
2008-10-14 01:50:10 +04:00
2008-07-28 21:16:28 +04:00
static void set_state ( enum stopmachine_state newstate )
2005-04-17 02:20:36 +04:00
{
2008-07-28 21:16:28 +04:00
/* Reset ack counter. */
atomic_set ( & thread_ack , num_threads ) ;
smp_wmb ( ) ;
state = newstate ;
2005-04-17 02:20:36 +04:00
}
2008-07-28 21:16:28 +04:00
/* Last one to ack a state moves to the next state. */
static void ack_state ( void )
2005-04-17 02:20:36 +04:00
{
2008-10-14 01:50:10 +04:00
if ( atomic_dec_and_test ( & thread_ack ) )
set_state ( state + 1 ) ;
2005-04-17 02:20:36 +04:00
}
2008-10-14 01:50:10 +04:00
/* This is the actual function which stops the CPU. It runs
* in the context of a dedicated stopmachine workqueue . */
static void stop_cpu ( struct work_struct * unused )
2005-04-17 02:20:36 +04:00
{
2008-07-28 21:16:28 +04:00
enum stopmachine_state curstate = STOPMACHINE_NONE ;
2008-10-14 01:50:10 +04:00
struct stop_machine_data * smdata = & idle ;
int cpu = smp_processor_id ( ) ;
2008-10-22 19:00:26 +04:00
int err ;
2008-10-14 01:50:10 +04:00
if ( ! active_cpus ) {
2009-01-01 02:42:28 +03:00
if ( cpu = = cpumask_first ( cpu_online_mask ) )
2008-10-14 01:50:10 +04:00
smdata = & active ;
} else {
2009-01-01 02:42:28 +03:00
if ( cpumask_test_cpu ( cpu , active_cpus ) )
2008-10-14 01:50:10 +04:00
smdata = & active ;
}
2008-07-28 21:16:28 +04:00
/* Simple state machine */
do {
/* Chill out and ensure we re-read stopmachine_state. */
2008-05-08 17:20:38 +04:00
cpu_relax ( ) ;
2008-07-28 21:16:28 +04:00
if ( state ! = curstate ) {
curstate = state ;
switch ( curstate ) {
case STOPMACHINE_DISABLE_IRQ :
local_irq_disable ( ) ;
hard_irq_disable ( ) ;
break ;
case STOPMACHINE_RUN :
2008-10-22 19:00:26 +04:00
/* On multiple CPUs only a single error code
* is needed to tell that something failed . */
err = smdata - > fn ( smdata - > data ) ;
if ( err )
smdata - > fnret = err ;
2008-07-28 21:16:28 +04:00
break ;
default :
break ;
}
ack_state ( ) ;
}
} while ( curstate ! = STOPMACHINE_EXIT ) ;
2005-04-17 02:20:36 +04:00
local_irq_enable ( ) ;
}
2008-07-28 21:16:28 +04:00
/* Callback for CPUs which aren't supposed to do anything. */
static int chill ( void * unused )
2008-02-28 19:33:03 +03:00
{
2008-07-28 21:16:28 +04:00
return 0 ;
2008-02-28 19:33:03 +03:00
}
2005-04-17 02:20:36 +04:00
2008-12-22 14:36:30 +03:00
int stop_machine_create ( void )
{
mutex_lock ( & setup_lock ) ;
if ( refcount )
goto done ;
stop_machine_wq = create_rt_workqueue ( " kstop " ) ;
if ( ! stop_machine_wq )
goto err_out ;
stop_machine_work = alloc_percpu ( struct work_struct ) ;
if ( ! stop_machine_work )
goto err_out ;
done :
refcount + + ;
mutex_unlock ( & setup_lock ) ;
return 0 ;
err_out :
if ( stop_machine_wq )
destroy_workqueue ( stop_machine_wq ) ;
mutex_unlock ( & setup_lock ) ;
return - ENOMEM ;
}
EXPORT_SYMBOL_GPL ( stop_machine_create ) ;
void stop_machine_destroy ( void )
{
mutex_lock ( & setup_lock ) ;
refcount - - ;
if ( refcount )
goto done ;
destroy_workqueue ( stop_machine_wq ) ;
free_percpu ( stop_machine_work ) ;
done :
mutex_unlock ( & setup_lock ) ;
}
EXPORT_SYMBOL_GPL ( stop_machine_destroy ) ;
2009-01-01 02:42:28 +03:00
int __stop_machine ( int ( * fn ) ( void * ) , void * data , const struct cpumask * cpus )
2005-04-17 02:20:36 +04:00
{
2008-10-14 01:50:10 +04:00
struct work_struct * sm_work ;
2008-11-17 00:52:18 +03:00
int i , ret ;
2008-07-28 21:16:28 +04:00
2008-10-14 01:50:10 +04:00
/* Set up initial state. */
mutex_lock ( & lock ) ;
num_threads = num_online_cpus ( ) ;
active_cpus = cpus ;
2008-07-28 21:16:28 +04:00
active . fn = fn ;
active . data = data ;
active . fnret = 0 ;
idle . fn = chill ;
idle . data = NULL ;
set_state ( STOPMACHINE_PREPARE ) ;
2005-04-17 02:20:36 +04:00
2008-10-14 01:50:10 +04:00
/* Schedule the stop_cpu work on all cpus: hold this CPU so one
2008-07-28 21:16:28 +04:00
* doesn ' t hit this CPU until we ' re ready . */
2008-07-28 21:16:30 +04:00
get_cpu ( ) ;
2008-10-14 01:50:10 +04:00
for_each_online_cpu ( i ) {
2009-02-20 10:29:08 +03:00
sm_work = per_cpu_ptr ( stop_machine_work , i ) ;
2008-10-14 01:50:10 +04:00
INIT_WORK ( sm_work , stop_cpu ) ;
queue_work_on ( i , stop_machine_wq , sm_work ) ;
}
2008-07-28 21:16:28 +04:00
/* This will release the thread on our CPU. */
put_cpu ( ) ;
2008-10-14 01:50:10 +04:00
flush_workqueue ( stop_machine_wq ) ;
2008-11-17 00:52:18 +03:00
ret = active . fnret ;
2008-07-28 21:16:28 +04:00
mutex_unlock ( & lock ) ;
2008-11-17 00:52:18 +03:00
return ret ;
2005-04-17 02:20:36 +04:00
}
2009-01-01 02:42:28 +03:00
int stop_machine ( int ( * fn ) ( void * ) , void * data , const struct cpumask * cpus )
2005-04-17 02:20:36 +04:00
{
int ret ;
2008-12-22 14:36:30 +03:00
ret = stop_machine_create ( ) ;
if ( ret )
return ret ;
2005-04-17 02:20:36 +04:00
/* No CPUs can come up or down during this. */
2008-01-25 23:08:02 +03:00
get_online_cpus ( ) ;
2008-07-28 21:16:30 +04:00
ret = __stop_machine ( fn , data , cpus ) ;
2008-01-25 23:08:02 +03:00
put_online_cpus ( ) ;
2008-12-22 14:36:30 +03:00
stop_machine_destroy ( ) ;
2005-04-17 02:20:36 +04:00
return ret ;
}
2008-07-28 21:16:30 +04:00
EXPORT_SYMBOL_GPL ( stop_machine ) ;