2005-04-17 02:20:36 +04:00
/* Kernel thread helper functions.
* Copyright ( C ) 2004 IBM Corporation , Rusty Russell .
*
2007-05-09 13:34:32 +04:00
* Creation is done via kthreadd , so that we get a clean environment
2005-04-17 02:20:36 +04:00
* even if we ' re invoked from userspace ( think modprobe , hotplug cpu ,
* etc . ) .
*/
# include <linux/sched.h>
# include <linux/kthread.h>
# include <linux/completion.h>
# include <linux/err.h>
cpuset,mm: update tasks' mems_allowed in time
Fix allocating page cache/slab object on the unallowed node when memory
spread is set by updating tasks' mems_allowed after its cpuset's mems is
changed.
In order to update tasks' mems_allowed in time, we must modify the code of
memory policy. Because the memory policy is applied in the process's
context originally. After applying this patch, one task directly
manipulates anothers mems_allowed, and we use alloc_lock in the
task_struct to protect mems_allowed and memory policy of the task.
But in the fast path, we didn't use lock to protect them, because adding a
lock may lead to performance regression. But if we don't add a lock,the
task might see no nodes when changing cpuset's mems_allowed to some
non-overlapping set. In order to avoid it, we set all new allowed nodes,
then clear newly disallowed ones.
[lee.schermerhorn@hp.com:
The rework of mpol_new() to extract the adjusting of the node mask to
apply cpuset and mpol flags "context" breaks set_mempolicy() and mbind()
with MPOL_PREFERRED and a NULL nodemask--i.e., explicit local
allocation. Fix this by adding the check for MPOL_PREFERRED and empty
node mask to mpol_new_mpolicy().
Remove the now unneeded 'nodes = NULL' from mpol_new().
Note that mpol_new_mempolicy() is always called with a non-NULL
'nodes' parameter now that it has been removed from mpol_new().
Therefore, we don't need to test nodes for NULL before testing it for
'empty'. However, just to be extra paranoid, add a VM_BUG_ON() to
verify this assumption.]
[lee.schermerhorn@hp.com:
I don't think the function name 'mpol_new_mempolicy' is descriptive
enough to differentiate it from mpol_new().
This function applies cpuset set context, usually constraining nodes
to those allowed by the cpuset. However, when the 'RELATIVE_NODES flag
is set, it also translates the nodes. So I settled on
'mpol_set_nodemask()', because the comment block for mpol_new() mentions
that we need to call this function to "set nodes".
Some additional minor line length, whitespace and typo cleanup.]
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Paul Menage <menage@google.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-06-17 02:31:49 +04:00
# include <linux/cpuset.h>
2005-04-17 02:20:36 +04:00
# include <linux/unistd.h>
# include <linux/file.h>
# include <linux/module.h>
2006-03-23 14:00:24 +03:00
# include <linux/mutex.h>
2010-06-29 12:07:09 +04:00
# include <linux/slab.h>
# include <linux/freezer.h>
2009-04-15 03:39:12 +04:00
# include <trace/events/sched.h>
2005-04-17 02:20:36 +04:00
2007-05-09 13:34:32 +04:00
static DEFINE_SPINLOCK ( kthread_create_lock ) ;
static LIST_HEAD ( kthread_create_list ) ;
struct task_struct * kthreadd_task ;
2005-04-17 02:20:36 +04:00
struct kthread_create_info
{
2007-05-09 13:34:32 +04:00
/* Information passed to kthread() from kthreadd. */
2005-04-17 02:20:36 +04:00
int ( * threadfn ) ( void * data ) ;
void * data ;
2007-05-09 13:34:32 +04:00
/* Result passed back to kthread_create() from kthreadd. */
2005-04-17 02:20:36 +04:00
struct task_struct * result ;
struct completion done ;
2006-11-22 17:55:48 +03:00
2007-05-09 13:34:32 +04:00
struct list_head list ;
2005-04-17 02:20:36 +04:00
} ;
2009-06-18 03:27:45 +04:00
struct kthread {
int should_stop ;
2010-06-29 12:07:09 +04:00
void * data ;
2009-06-18 03:27:45 +04:00
struct completion exited ;
2005-04-17 02:20:36 +04:00
} ;
2009-06-18 03:27:45 +04:00
# define to_kthread(tsk) \
container_of ( ( tsk ) - > vfork_done , struct kthread , exited )
2005-04-17 02:20:36 +04:00
2006-06-25 16:49:19 +04:00
/**
* kthread_should_stop - should this kthread return now ?
*
2007-02-10 12:45:59 +03:00
* When someone calls kthread_stop ( ) on your kthread , it will be woken
2006-06-25 16:49:19 +04:00
* and this will return true . You should then return , and your return
* value will be passed through to kthread_stop ( ) .
*/
2005-04-17 02:20:36 +04:00
int kthread_should_stop ( void )
{
2009-06-18 03:27:45 +04:00
return to_kthread ( current ) - > should_stop ;
2005-04-17 02:20:36 +04:00
}
EXPORT_SYMBOL ( kthread_should_stop ) ;
2010-06-29 12:07:09 +04:00
/**
* kthread_data - return data value specified on kthread creation
* @ task : kthread task in question
*
* Return the data value specified when kthread @ task was created .
* The caller is responsible for ensuring the validity of @ task when
* calling this function .
*/
void * kthread_data ( struct task_struct * task )
{
return to_kthread ( task ) - > data ;
}
2005-04-17 02:20:36 +04:00
static int kthread ( void * _create )
{
2009-06-18 03:27:45 +04:00
/* Copy data: it's on kthread's stack */
2005-04-17 02:20:36 +04:00
struct kthread_create_info * create = _create ;
2009-06-18 03:27:45 +04:00
int ( * threadfn ) ( void * data ) = create - > threadfn ;
void * data = create - > data ;
struct kthread self ;
int ret ;
2005-04-17 02:20:36 +04:00
2009-06-18 03:27:45 +04:00
self . should_stop = 0 ;
2010-06-29 12:07:09 +04:00
self . data = data ;
2009-06-18 03:27:45 +04:00
init_completion ( & self . exited ) ;
current - > vfork_done = & self . exited ;
2005-04-17 02:20:36 +04:00
/* OK, tell user we're spawned, wait for stop or wakeup */
2007-05-24 00:57:27 +04:00
__set_current_state ( TASK_UNINTERRUPTIBLE ) ;
2009-04-09 19:50:35 +04:00
create - > result = current ;
2009-06-18 03:27:43 +04:00
complete ( & create - > done ) ;
2005-04-17 02:20:36 +04:00
schedule ( ) ;
2009-06-18 03:27:45 +04:00
ret = - EINTR ;
if ( ! self . should_stop )
2005-04-17 02:20:36 +04:00
ret = threadfn ( data ) ;
2009-06-18 03:27:45 +04:00
/* we can't just return, we must preserve "self" on stack */
do_exit ( ret ) ;
2005-04-17 02:20:36 +04:00
}
2007-05-09 13:34:32 +04:00
static void create_kthread ( struct kthread_create_info * create )
2005-04-17 02:20:36 +04:00
{
int pid ;
/* We want our own signal handler (we take no signals by default). */
pid = kernel_thread ( kthread , create , CLONE_FS | CLONE_FILES | SIGCHLD ) ;
2009-06-18 03:27:43 +04:00
if ( pid < 0 ) {
2005-04-17 02:20:36 +04:00
create - > result = ERR_PTR ( pid ) ;
2009-06-18 03:27:43 +04:00
complete ( & create - > done ) ;
}
2005-04-17 02:20:36 +04:00
}
2006-06-25 16:49:19 +04:00
/**
* kthread_create - create a kthread .
* @ threadfn : the function to run until signal_pending ( current ) .
* @ data : data ptr for @ threadfn .
* @ namefmt : printf - style name for the thread .
*
* Description : This helper function creates and names a kernel
* thread . The thread will be stopped : use wake_up_process ( ) to start
2010-02-09 07:07:40 +03:00
* it . See also kthread_run ( ) .
2006-06-25 16:49:19 +04:00
*
* When woken , the thread will run @ threadfn ( ) with @ data as its
2007-02-10 12:45:59 +03:00
* argument . @ threadfn ( ) can either call do_exit ( ) directly if it is a
2006-06-25 16:49:19 +04:00
* standalone thread for which noone will call kthread_stop ( ) , or
* return when ' kthread_should_stop ( ) ' is true ( which means
* kthread_stop ( ) has been called ) . The return value should be zero
* or a negative error number ; it will be passed to kthread_stop ( ) .
*
* Returns a task_struct or ERR_PTR ( - ENOMEM ) .
*/
2005-04-17 02:20:36 +04:00
struct task_struct * kthread_create ( int ( * threadfn ) ( void * data ) ,
void * data ,
const char namefmt [ ] ,
. . . )
{
struct kthread_create_info create ;
create . threadfn = threadfn ;
create . data = data ;
init_completion ( & create . done ) ;
2007-05-09 13:34:32 +04:00
spin_lock ( & kthread_create_lock ) ;
list_add_tail ( & create . list , & kthread_create_list ) ;
spin_unlock ( & kthread_create_lock ) ;
2008-04-29 11:59:23 +04:00
wake_up_process ( kthreadd_task ) ;
2007-05-09 13:34:32 +04:00
wait_for_completion ( & create . done ) ;
2005-04-17 02:20:36 +04:00
if ( ! IS_ERR ( create . result ) ) {
2009-04-09 19:50:36 +04:00
struct sched_param param = { . sched_priority = 0 } ;
2005-04-17 02:20:36 +04:00
va_list args ;
2009-04-09 19:50:36 +04:00
2005-04-17 02:20:36 +04:00
va_start ( args , namefmt ) ;
vsnprintf ( create . result - > comm , sizeof ( create . result - > comm ) ,
namefmt , args ) ;
va_end ( args ) ;
2009-04-09 19:50:36 +04:00
/*
* root may have changed our ( kthreadd ' s ) priority or CPU mask .
* The kernel thread should not inherit these properties .
*/
sched_setscheduler_nocheck ( create . result , SCHED_NORMAL , & param ) ;
set_cpus_allowed_ptr ( create . result , cpu_all_mask ) ;
2005-04-17 02:20:36 +04:00
}
return create . result ;
}
EXPORT_SYMBOL ( kthread_create ) ;
2009-12-16 20:04:39 +03:00
/**
* kthread_bind - bind a just - created kthread to a cpu .
* @ p : thread created by kthread_create ( ) .
* @ cpu : cpu ( might not be online , must be possible ) for @ k to run on .
*
* Description : This function is equivalent to set_cpus_allowed ( ) ,
* except that @ cpu doesn ' t need to be online , and the thread must be
* stopped ( i . e . , just returned from kthread_create ( ) ) .
*/
void kthread_bind ( struct task_struct * p , unsigned int cpu )
{
/* Must have done schedule() in kthread() before we set_task_cpu */
if ( ! wait_task_inactive ( p , TASK_UNINTERRUPTIBLE ) ) {
WARN_ON ( 1 ) ;
return ;
}
p - > cpus_allowed = cpumask_of_cpu ( cpu ) ;
p - > rt . nr_cpus_allowed = 1 ;
p - > flags | = PF_THREAD_BOUND ;
}
EXPORT_SYMBOL ( kthread_bind ) ;
2006-06-25 16:49:19 +04:00
/**
* kthread_stop - stop a thread created by kthread_create ( ) .
* @ k : thread created by kthread_create ( ) .
*
* Sets kthread_should_stop ( ) for @ k to return true , wakes it , and
2009-06-19 04:51:13 +04:00
* waits for it to exit . This can also be called after kthread_create ( )
* instead of calling wake_up_process ( ) : the thread will exit without
* calling threadfn ( ) .
*
* If threadfn ( ) may call do_exit ( ) itself , the caller must ensure
* task_struct can ' t go away .
2006-06-25 16:49:19 +04:00
*
* Returns the result of threadfn ( ) , or % - EINTR if wake_up_process ( )
* was never called .
*/
2005-04-17 02:20:36 +04:00
int kthread_stop ( struct task_struct * k )
{
2009-06-18 03:27:45 +04:00
struct kthread * kthread ;
2005-04-17 02:20:36 +04:00
int ret ;
tracing, sched: LTTng instrumentation - scheduler
Instrument the scheduler activity (sched_switch, migration, wakeups,
wait for a task, signal delivery) and process/thread
creation/destruction (fork, exit, kthread stop). Actually, kthread
creation is not instrumented in this patch because it is architecture
dependent. It allows to connect tracers such as ftrace which detects
scheduling latencies, good/bad scheduler decisions. Tools like LTTng can
export this scheduler information along with instrumentation of the rest
of the kernel activity to perform post-mortem analysis on the scheduler
activity.
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added. See the "Tracepoints" patch header for
performance result detail.
Changelog :
- Change instrumentation location and parameter to match ftrace
instrumentation, previously done with kernel markers.
[ mingo@elte.hu: conflict resolutions ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-18 20:16:17 +04:00
trace_sched_kthread_stop ( k ) ;
2009-06-18 03:27:45 +04:00
get_task_struct ( k ) ;
tracing, sched: LTTng instrumentation - scheduler
Instrument the scheduler activity (sched_switch, migration, wakeups,
wait for a task, signal delivery) and process/thread
creation/destruction (fork, exit, kthread stop). Actually, kthread
creation is not instrumented in this patch because it is architecture
dependent. It allows to connect tracers such as ftrace which detects
scheduling latencies, good/bad scheduler decisions. Tools like LTTng can
export this scheduler information along with instrumentation of the rest
of the kernel activity to perform post-mortem analysis on the scheduler
activity.
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added. See the "Tracepoints" patch header for
performance result detail.
Changelog :
- Change instrumentation location and parameter to match ftrace
instrumentation, previously done with kernel markers.
[ mingo@elte.hu: conflict resolutions ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-18 20:16:17 +04:00
2009-06-18 03:27:45 +04:00
kthread = to_kthread ( k ) ;
barrier ( ) ; /* it might have exited */
if ( k - > vfork_done ! = NULL ) {
kthread - > should_stop = 1 ;
wake_up_process ( k ) ;
wait_for_completion ( & kthread - > exited ) ;
}
ret = k - > exit_code ;
2005-04-17 02:20:36 +04:00
put_task_struct ( k ) ;
tracing, sched: LTTng instrumentation - scheduler
Instrument the scheduler activity (sched_switch, migration, wakeups,
wait for a task, signal delivery) and process/thread
creation/destruction (fork, exit, kthread stop). Actually, kthread
creation is not instrumented in this patch because it is architecture
dependent. It allows to connect tracers such as ftrace which detects
scheduling latencies, good/bad scheduler decisions. Tools like LTTng can
export this scheduler information along with instrumentation of the rest
of the kernel activity to perform post-mortem analysis on the scheduler
activity.
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added. See the "Tracepoints" patch header for
performance result detail.
Changelog :
- Change instrumentation location and parameter to match ftrace
instrumentation, previously done with kernel markers.
[ mingo@elte.hu: conflict resolutions ]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-18 20:16:17 +04:00
trace_sched_kthread_stop_ret ( ret ) ;
2005-04-17 02:20:36 +04:00
return ret ;
}
2006-07-14 11:24:05 +04:00
EXPORT_SYMBOL ( kthread_stop ) ;
2005-04-17 02:20:36 +04:00
2007-07-31 11:39:16 +04:00
int kthreadd ( void * unused )
2005-04-17 02:20:36 +04:00
{
2007-05-09 13:34:32 +04:00
struct task_struct * tsk = current ;
2005-04-17 02:20:36 +04:00
2007-07-31 11:39:16 +04:00
/* Setup a clean context for our children to inherit. */
2007-05-09 13:34:32 +04:00
set_task_comm ( tsk , " kthreadd " ) ;
2007-05-09 13:34:37 +04:00
ignore_signals ( tsk ) ;
2009-03-31 08:05:10 +04:00
set_cpus_allowed_ptr ( tsk , cpu_all_mask ) ;
2010-03-23 23:35:34 +03:00
set_mems_allowed ( node_states [ N_HIGH_MEMORY ] ) ;
2007-05-09 13:34:32 +04:00
2008-06-12 00:04:29 +04:00
current - > flags | = PF_NOFREEZE | PF_FREEZER_NOSIG ;
2007-05-09 13:34:32 +04:00
for ( ; ; ) {
set_current_state ( TASK_INTERRUPTIBLE ) ;
if ( list_empty ( & kthread_create_list ) )
schedule ( ) ;
__set_current_state ( TASK_RUNNING ) ;
spin_lock ( & kthread_create_lock ) ;
while ( ! list_empty ( & kthread_create_list ) ) {
struct kthread_create_info * create ;
create = list_entry ( kthread_create_list . next ,
struct kthread_create_info , list ) ;
list_del_init ( & create - > list ) ;
spin_unlock ( & kthread_create_lock ) ;
create_kthread ( create ) ;
spin_lock ( & kthread_create_lock ) ;
}
spin_unlock ( & kthread_create_lock ) ;
}
return 0 ;
}
2010-06-29 12:07:09 +04:00
/**
* kthread_worker_fn - kthread function to process kthread_worker
* @ worker_ptr : pointer to initialized kthread_worker
*
* This function can be used as @ threadfn to kthread_create ( ) or
* kthread_run ( ) with @ worker_ptr argument pointing to an initialized
* kthread_worker . The started kthread will process work_list until
* the it is stopped with kthread_stop ( ) . A kthread can also call
* this function directly after extra initialization .
*
* Different kthreads can be used for the same kthread_worker as long
* as there ' s only one kthread attached to it at any given time . A
* kthread_worker without an attached kthread simply collects queued
* kthread_works .
*/
int kthread_worker_fn ( void * worker_ptr )
{
struct kthread_worker * worker = worker_ptr ;
struct kthread_work * work ;
WARN_ON ( worker - > task ) ;
worker - > task = current ;
repeat :
set_current_state ( TASK_INTERRUPTIBLE ) ; /* mb paired w/ kthread_stop */
if ( kthread_should_stop ( ) ) {
__set_current_state ( TASK_RUNNING ) ;
spin_lock_irq ( & worker - > lock ) ;
worker - > task = NULL ;
spin_unlock_irq ( & worker - > lock ) ;
return 0 ;
}
work = NULL ;
spin_lock_irq ( & worker - > lock ) ;
if ( ! list_empty ( & worker - > work_list ) ) {
work = list_first_entry ( & worker - > work_list ,
struct kthread_work , node ) ;
list_del_init ( & work - > node ) ;
}
spin_unlock_irq ( & worker - > lock ) ;
if ( work ) {
__set_current_state ( TASK_RUNNING ) ;
work - > func ( work ) ;
smp_wmb ( ) ; /* wmb worker-b0 paired with flush-b1 */
work - > done_seq = work - > queue_seq ;
smp_mb ( ) ; /* mb worker-b1 paired with flush-b0 */
if ( atomic_read ( & work - > flushing ) )
wake_up_all ( & work - > done ) ;
} else if ( ! freezing ( current ) )
schedule ( ) ;
try_to_freeze ( ) ;
goto repeat ;
}
EXPORT_SYMBOL_GPL ( kthread_worker_fn ) ;
/**
* queue_kthread_work - queue a kthread_work
* @ worker : target kthread_worker
* @ work : kthread_work to queue
*
* Queue @ work to work processor @ task for async execution . @ task
* must have been created with kthread_worker_create ( ) . Returns % true
* if @ work was successfully queued , % false if it was already pending .
*/
bool queue_kthread_work ( struct kthread_worker * worker ,
struct kthread_work * work )
{
bool ret = false ;
unsigned long flags ;
spin_lock_irqsave ( & worker - > lock , flags ) ;
if ( list_empty ( & work - > node ) ) {
list_add_tail ( & work - > node , & worker - > work_list ) ;
work - > queue_seq + + ;
if ( likely ( worker - > task ) )
wake_up_process ( worker - > task ) ;
ret = true ;
}
spin_unlock_irqrestore ( & worker - > lock , flags ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( queue_kthread_work ) ;
/**
* flush_kthread_work - flush a kthread_work
* @ work : work to flush
*
* If @ work is queued or executing , wait for it to finish execution .
*/
void flush_kthread_work ( struct kthread_work * work )
{
int seq = work - > queue_seq ;
atomic_inc ( & work - > flushing ) ;
/*
* mb flush - b0 paired with worker - b1 , to make sure either
* worker sees the above increment or we see done_seq update .
*/
smp_mb__after_atomic_inc ( ) ;
/* A - B <= 0 tests whether B is in front of A regardless of overflow */
wait_event ( work - > done , seq - work - > done_seq < = 0 ) ;
atomic_dec ( & work - > flushing ) ;
/*
* rmb flush - b1 paired with worker - b0 , to make sure our caller
* sees every change made by work - > func ( ) .
*/
smp_mb__after_atomic_dec ( ) ;
}
EXPORT_SYMBOL_GPL ( flush_kthread_work ) ;
struct kthread_flush_work {
struct kthread_work work ;
struct completion done ;
} ;
static void kthread_flush_work_fn ( struct kthread_work * work )
{
struct kthread_flush_work * fwork =
container_of ( work , struct kthread_flush_work , work ) ;
complete ( & fwork - > done ) ;
}
/**
* flush_kthread_worker - flush all current works on a kthread_worker
* @ worker : worker to flush
*
* Wait until all currently executing or pending works on @ worker are
* finished .
*/
void flush_kthread_worker ( struct kthread_worker * worker )
{
struct kthread_flush_work fwork = {
KTHREAD_WORK_INIT ( fwork . work , kthread_flush_work_fn ) ,
COMPLETION_INITIALIZER_ONSTACK ( fwork . done ) ,
} ;
queue_kthread_work ( worker , & fwork . work ) ;
wait_for_completion ( & fwork . done ) ;
}
EXPORT_SYMBOL_GPL ( flush_kthread_worker ) ;