2005-04-16 15:20:36 -07:00
/*
* linux / kernel / exit . c
*
* Copyright ( C ) 1991 , 1992 Linus Torvalds
*/
# include <linux/config.h>
# include <linux/mm.h>
# include <linux/slab.h>
# include <linux/interrupt.h>
# include <linux/smp_lock.h>
# include <linux/module.h>
2006-01-11 12:17:46 -08:00
# include <linux/capability.h>
2005-04-16 15:20:36 -07:00
# include <linux/completion.h>
# include <linux/personality.h>
# include <linux/tty.h>
# include <linux/namespace.h>
# include <linux/key.h>
# include <linux/security.h>
# include <linux/cpu.h>
# include <linux/acct.h>
# include <linux/file.h>
# include <linux/binfmts.h>
# include <linux/ptrace.h>
# include <linux/profile.h>
# include <linux/mount.h>
# include <linux/proc_fs.h>
# include <linux/mempolicy.h>
# include <linux/cpuset.h>
# include <linux/syscalls.h>
2005-05-01 08:59:14 -07:00
# include <linux/signal.h>
2006-03-28 16:11:18 -08:00
# include <linux/posix-timers.h>
2005-11-07 00:59:16 -08:00
# include <linux/cn_proc.h>
2006-01-09 15:59:21 -08:00
# include <linux/mutex.h>
2006-03-27 01:16:22 -08:00
# include <linux/futex.h>
2006-03-27 01:16:24 -08:00
# include <linux/compat.h>
2006-04-11 13:52:07 +02:00
# include <linux/pipe_fs_i.h>
2006-03-29 20:30:19 -05:00
# include <linux/audit.h> /* for audit_free() */
2005-04-16 15:20:36 -07:00
# include <asm/uaccess.h>
# include <asm/unistd.h>
# include <asm/pgtable.h>
# include <asm/mmu_context.h>
extern void sem_exit ( void ) ;
extern struct task_struct * child_reaper ;
int getrusage ( struct task_struct * , int , struct rusage __user * ) ;
2005-05-01 08:59:29 -07:00
static void exit_mm ( struct task_struct * tsk ) ;
2005-04-16 15:20:36 -07:00
static void __unhash_process ( struct task_struct * p )
{
nr_threads - - ;
detach_pid ( p , PIDTYPE_PID ) ;
if ( thread_group_leader ( p ) ) {
detach_pid ( p , PIDTYPE_PGID ) ;
detach_pid ( p , PIDTYPE_SID ) ;
2006-03-28 16:11:06 -08:00
2006-04-18 22:20:16 -07:00
list_del_rcu ( & p - > tasks ) ;
2006-03-28 16:11:07 -08:00
__get_cpu_var ( process_counts ) - - ;
2005-04-16 15:20:36 -07:00
}
2006-03-28 16:11:25 -08:00
list_del_rcu ( & p - > thread_group ) ;
2006-03-28 16:11:06 -08:00
remove_parent ( p ) ;
2005-04-16 15:20:36 -07:00
}
2006-03-28 16:11:18 -08:00
/*
* This function expects the tasklist_lock write - locked .
*/
static void __exit_signal ( struct task_struct * tsk )
{
struct signal_struct * sig = tsk - > signal ;
struct sighand_struct * sighand ;
BUG_ON ( ! sig ) ;
BUG_ON ( ! atomic_read ( & sig - > count ) ) ;
rcu_read_lock ( ) ;
sighand = rcu_dereference ( tsk - > sighand ) ;
spin_lock ( & sighand - > siglock ) ;
posix_cpu_timers_exit ( tsk ) ;
if ( atomic_dec_and_test ( & sig - > count ) )
posix_cpu_timers_exit_group ( tsk ) ;
else {
/*
* If there is any task waiting for the group exit
* then notify it :
*/
if ( sig - > group_exit_task & & atomic_read ( & sig - > count ) = = sig - > notify_count ) {
wake_up_process ( sig - > group_exit_task ) ;
sig - > group_exit_task = NULL ;
}
if ( tsk = = sig - > curr_target )
sig - > curr_target = next_thread ( tsk ) ;
/*
* Accumulate here the counters for all threads but the
* group leader as they die , so they can be added into
* the process - wide totals when those are taken .
* The group leader stays around as a zombie as long
* as there are other threads . When it gets reaped ,
* the exit . c code will add its counts into these totals .
* We won ' t ever get here for the group leader , since it
* will have been the last reference on the signal_struct .
*/
sig - > utime = cputime_add ( sig - > utime , tsk - > utime ) ;
sig - > stime = cputime_add ( sig - > stime , tsk - > stime ) ;
sig - > min_flt + = tsk - > min_flt ;
sig - > maj_flt + = tsk - > maj_flt ;
sig - > nvcsw + = tsk - > nvcsw ;
sig - > nivcsw + = tsk - > nivcsw ;
sig - > sched_time + = tsk - > sched_time ;
sig = NULL ; /* Marker for below. */
}
2006-03-28 16:11:20 -08:00
__unhash_process ( tsk ) ;
2006-03-28 16:11:18 -08:00
tsk - > signal = NULL ;
2006-03-28 16:11:27 -08:00
tsk - > sighand = NULL ;
2006-03-28 16:11:18 -08:00
spin_unlock ( & sighand - > siglock ) ;
rcu_read_unlock ( ) ;
2006-03-28 16:11:27 -08:00
__cleanup_sighand ( sighand ) ;
2006-03-28 16:11:18 -08:00
clear_tsk_thread_flag ( tsk , TIF_SIGPENDING ) ;
flush_sigqueue ( & tsk - > pending ) ;
if ( sig ) {
flush_sigqueue ( & sig - > shared_pending ) ;
__cleanup_signal ( sig ) ;
}
}
2006-03-31 02:31:37 -08:00
static void delayed_put_task_struct ( struct rcu_head * rhp )
{
put_task_struct ( container_of ( rhp , struct task_struct , rcu ) ) ;
}
2005-04-16 15:20:36 -07:00
void release_task ( struct task_struct * p )
{
int zap_leader ;
task_t * leader ;
struct dentry * proc_dentry ;
2006-03-28 16:11:11 -08:00
repeat :
2005-04-16 15:20:36 -07:00
atomic_dec ( & p - > user - > processes ) ;
spin_lock ( & p - > proc_lock ) ;
proc_dentry = proc_pid_unhash ( p ) ;
write_lock_irq ( & tasklist_lock ) ;
2006-03-28 16:11:11 -08:00
ptrace_unlink ( p ) ;
2005-04-16 15:20:36 -07:00
BUG_ON ( ! list_empty ( & p - > ptrace_list ) | | ! list_empty ( & p - > ptrace_children ) ) ;
__exit_signal ( p ) ;
2006-03-28 16:11:19 -08:00
2005-04-16 15:20:36 -07:00
/*
* If we are the last non - leader member of the thread
* group , and the leader is zombie , then notify the
* group leader ' s parent process . ( if it wants notification . )
*/
zap_leader = 0 ;
leader = p - > group_leader ;
if ( leader ! = p & & thread_group_empty ( leader ) & & leader - > exit_state = = EXIT_ZOMBIE ) {
BUG_ON ( leader - > exit_signal = = - 1 ) ;
do_notify_parent ( leader , leader - > exit_signal ) ;
/*
* If we were the last child thread and the leader has
* exited already , and the leader ' s parent ignores SIGCHLD ,
* then we are the one who should release the leader .
*
* do_notify_parent ( ) will have marked it self - reaping in
* that case .
*/
zap_leader = ( leader - > exit_signal = = - 1 ) ;
}
sched_exit ( p ) ;
write_unlock_irq ( & tasklist_lock ) ;
spin_unlock ( & p - > proc_lock ) ;
proc_pid_flush ( proc_dentry ) ;
release_thread ( p ) ;
2006-03-31 02:31:37 -08:00
call_rcu ( & p - > rcu , delayed_put_task_struct ) ;
2005-04-16 15:20:36 -07:00
p = leader ;
if ( unlikely ( zap_leader ) )
goto repeat ;
}
/*
* This checks not only the pgrp , but falls back on the pid if no
* satisfactory pgrp is found . I dunno - gdb doesn ' t work correctly
* without this . . .
*/
int session_of_pgrp ( int pgrp )
{
struct task_struct * p ;
int sid = - 1 ;
read_lock ( & tasklist_lock ) ;
do_each_task_pid ( pgrp , PIDTYPE_PGID , p ) {
if ( p - > signal - > session > 0 ) {
sid = p - > signal - > session ;
goto out ;
}
} while_each_task_pid ( pgrp , PIDTYPE_PGID , p ) ;
p = find_task_by_pid ( pgrp ) ;
if ( p )
sid = p - > signal - > session ;
out :
read_unlock ( & tasklist_lock ) ;
return sid ;
}
/*
* Determine if a process group is " orphaned " , according to the POSIX
* definition in 2.2 .2 .52 . Orphaned process groups are not to be affected
* by terminal - generated stop signals . Newly orphaned process groups are
* to receive a SIGHUP and a SIGCONT .
*
* " I ask you, have you ever known what it is to be an orphan? "
*/
static int will_become_orphaned_pgrp ( int pgrp , task_t * ignored_task )
{
struct task_struct * p ;
int ret = 1 ;
do_each_task_pid ( pgrp , PIDTYPE_PGID , p ) {
if ( p = = ignored_task
| | p - > exit_state
| | p - > real_parent - > pid = = 1 )
continue ;
if ( process_group ( p - > real_parent ) ! = pgrp
& & p - > real_parent - > signal - > session = = p - > signal - > session ) {
ret = 0 ;
break ;
}
} while_each_task_pid ( pgrp , PIDTYPE_PGID , p ) ;
return ret ; /* (sighing) "Often!" */
}
int is_orphaned_pgrp ( int pgrp )
{
int retval ;
read_lock ( & tasklist_lock ) ;
retval = will_become_orphaned_pgrp ( pgrp , NULL ) ;
read_unlock ( & tasklist_lock ) ;
return retval ;
}
2006-01-14 13:20:43 -08:00
static int has_stopped_jobs ( int pgrp )
2005-04-16 15:20:36 -07:00
{
int retval = 0 ;
struct task_struct * p ;
do_each_task_pid ( pgrp , PIDTYPE_PGID , p ) {
if ( p - > state ! = TASK_STOPPED )
continue ;
/* If p is stopped by a debugger on a signal that won't
stop it , then don ' t count p as stopped . This isn ' t
perfect but it ' s a good approximation . */
if ( unlikely ( p - > ptrace )
& & p - > exit_code ! = SIGSTOP
& & p - > exit_code ! = SIGTSTP
& & p - > exit_code ! = SIGTTOU
& & p - > exit_code ! = SIGTTIN )
continue ;
retval = 1 ;
break ;
} while_each_task_pid ( pgrp , PIDTYPE_PGID , p ) ;
return retval ;
}
/**
2005-05-01 08:59:25 -07:00
* reparent_to_init - Reparent the calling kernel thread to the init task .
2005-04-16 15:20:36 -07:00
*
* If a kernel thread is launched as a result of a system call , or if
* it ever exits , it should generally reparent itself to init so that
* it is correctly cleaned up on exit .
*
* The various task state such as scheduling policy and priority may have
* been inherited from a user process , so we reset them to sane values here .
*
* NOTE that reparent_to_init ( ) gives the caller full capabilities .
*/
2006-01-14 13:20:43 -08:00
static void reparent_to_init ( void )
2005-04-16 15:20:36 -07:00
{
write_lock_irq ( & tasklist_lock ) ;
ptrace_unlink ( current ) ;
/* Reparent to init */
2006-03-28 16:11:05 -08:00
remove_parent ( current ) ;
2005-04-16 15:20:36 -07:00
current - > parent = child_reaper ;
current - > real_parent = child_reaper ;
2006-03-28 16:11:05 -08:00
add_parent ( current ) ;
2005-04-16 15:20:36 -07:00
/* Set the exit signal to SIGCHLD so we signal init on exit */
current - > exit_signal = SIGCHLD ;
2006-01-14 13:20:41 -08:00
if ( ( current - > policy = = SCHED_NORMAL | |
current - > policy = = SCHED_BATCH )
& & ( task_nice ( current ) < 0 ) )
2005-04-16 15:20:36 -07:00
set_user_nice ( current , 0 ) ;
/* cpus_allowed? */
/* rt_priority? */
/* signals? */
security_task_reparent_to_init ( current ) ;
memcpy ( current - > signal - > rlim , init_task . signal - > rlim ,
sizeof ( current - > signal - > rlim ) ) ;
atomic_inc ( & ( INIT_USER - > __count ) ) ;
write_unlock_irq ( & tasklist_lock ) ;
switch_uid ( INIT_USER ) ;
}
void __set_special_pids ( pid_t session , pid_t pgrp )
{
2006-01-08 01:03:58 -08:00
struct task_struct * curr = current - > group_leader ;
2005-04-16 15:20:36 -07:00
if ( curr - > signal - > session ! = session ) {
detach_pid ( curr , PIDTYPE_SID ) ;
curr - > signal - > session = session ;
attach_pid ( curr , PIDTYPE_SID , session ) ;
}
if ( process_group ( curr ) ! = pgrp ) {
detach_pid ( curr , PIDTYPE_PGID ) ;
curr - > signal - > pgrp = pgrp ;
attach_pid ( curr , PIDTYPE_PGID , pgrp ) ;
}
}
void set_special_pids ( pid_t session , pid_t pgrp )
{
write_lock_irq ( & tasklist_lock ) ;
__set_special_pids ( session , pgrp ) ;
write_unlock_irq ( & tasklist_lock ) ;
}
/*
* Let kernel threads use this to say that they
* allow a certain signal ( since daemonize ( ) will
* have disabled all of them by default ) .
*/
int allow_signal ( int sig )
{
2005-05-01 08:59:14 -07:00
if ( ! valid_signal ( sig ) | | sig < 1 )
2005-04-16 15:20:36 -07:00
return - EINVAL ;
spin_lock_irq ( & current - > sighand - > siglock ) ;
sigdelset ( & current - > blocked , sig ) ;
if ( ! current - > mm ) {
/* Kernel threads handle their own signals.
Let the signal code know it ' ll be handled , so
that they don ' t get converted to SIGKILL or
just silently dropped */
current - > sighand - > action [ ( sig ) - 1 ] . sa . sa_handler = ( void __user * ) 2 ;
}
recalc_sigpending ( ) ;
spin_unlock_irq ( & current - > sighand - > siglock ) ;
return 0 ;
}
EXPORT_SYMBOL ( allow_signal ) ;
int disallow_signal ( int sig )
{
2005-05-01 08:59:14 -07:00
if ( ! valid_signal ( sig ) | | sig < 1 )
2005-04-16 15:20:36 -07:00
return - EINVAL ;
spin_lock_irq ( & current - > sighand - > siglock ) ;
sigaddset ( & current - > blocked , sig ) ;
recalc_sigpending ( ) ;
spin_unlock_irq ( & current - > sighand - > siglock ) ;
return 0 ;
}
EXPORT_SYMBOL ( disallow_signal ) ;
/*
* Put all the gunge required to become a kernel thread without
* attached user resources in one place where it belongs .
*/
void daemonize ( const char * name , . . . )
{
va_list args ;
struct fs_struct * fs ;
sigset_t blocked ;
va_start ( args , name ) ;
vsnprintf ( current - > comm , sizeof ( current - > comm ) , name , args ) ;
va_end ( args ) ;
/*
* If we were started as result of loading a module , close all of the
* user space pages . We don ' t need them , and if we didn ' t close them
* they would be locked into memory .
*/
exit_mm ( current ) ;
set_special_pids ( 1 , 1 ) ;
2006-03-23 03:00:31 -08:00
mutex_lock ( & tty_mutex ) ;
2005-04-16 15:20:36 -07:00
current - > signal - > tty = NULL ;
2006-03-23 03:00:31 -08:00
mutex_unlock ( & tty_mutex ) ;
2005-04-16 15:20:36 -07:00
/* Block and flush all signals */
sigfillset ( & blocked ) ;
sigprocmask ( SIG_BLOCK , & blocked , NULL ) ;
flush_signals ( current ) ;
/* Become as one with the init task */
exit_fs ( current ) ; /* current->fs->count--; */
fs = init_task . fs ;
current - > fs = fs ;
atomic_inc ( & fs - > count ) ;
2006-02-18 18:12:43 +01:00
exit_namespace ( current ) ;
current - > namespace = init_task . namespace ;
get_namespace ( current - > namespace ) ;
2005-04-16 15:20:36 -07:00
exit_files ( current ) ;
current - > files = init_task . files ;
atomic_inc ( & current - > files - > count ) ;
reparent_to_init ( ) ;
}
EXPORT_SYMBOL ( daemonize ) ;
2006-01-14 13:20:43 -08:00
static void close_files ( struct files_struct * files )
2005-04-16 15:20:36 -07:00
{
int i , j ;
2005-09-09 13:04:10 -07:00
struct fdtable * fdt ;
2005-04-16 15:20:36 -07:00
j = 0 ;
2005-09-16 19:28:13 -07:00
/*
* It is safe to dereference the fd table without RCU or
* - > file_lock because this is the last reference to the
* files structure .
*/
2005-09-09 13:04:10 -07:00
fdt = files_fdtable ( files ) ;
2005-04-16 15:20:36 -07:00
for ( ; ; ) {
unsigned long set ;
i = j * __NFDBITS ;
2005-09-09 13:04:10 -07:00
if ( i > = fdt - > max_fdset | | i > = fdt - > max_fds )
2005-04-16 15:20:36 -07:00
break ;
2005-09-09 13:04:10 -07:00
set = fdt - > open_fds - > fds_bits [ j + + ] ;
2005-04-16 15:20:36 -07:00
while ( set ) {
if ( set & 1 ) {
2005-09-09 13:04:10 -07:00
struct file * file = xchg ( & fdt - > fd [ i ] , NULL ) ;
2005-04-16 15:20:36 -07:00
if ( file )
filp_close ( file , files ) ;
}
i + + ;
set > > = 1 ;
}
}
}
struct files_struct * get_files_struct ( struct task_struct * task )
{
struct files_struct * files ;
task_lock ( task ) ;
files = task - > files ;
if ( files )
atomic_inc ( & files - > count ) ;
task_unlock ( task ) ;
return files ;
}
void fastcall put_files_struct ( struct files_struct * files )
{
2005-09-09 13:04:10 -07:00
struct fdtable * fdt ;
2005-04-16 15:20:36 -07:00
if ( atomic_dec_and_test ( & files - > count ) ) {
close_files ( files ) ;
/*
* Free the fd and fdset arrays if we expanded them .
2005-09-09 13:04:13 -07:00
* If the fdtable was embedded , pass files for freeing
* at the end of the RCU grace period . Otherwise ,
* you can free files immediately .
2005-04-16 15:20:36 -07:00
*/
2005-09-09 13:04:10 -07:00
fdt = files_fdtable ( files ) ;
2005-09-09 13:04:13 -07:00
if ( fdt = = & files - > fdtab )
fdt - > free_files = files ;
else
kmem_cache_free ( files_cachep , files ) ;
free_fdtable ( fdt ) ;
2005-04-16 15:20:36 -07:00
}
}
EXPORT_SYMBOL ( put_files_struct ) ;
static inline void __exit_files ( struct task_struct * tsk )
{
struct files_struct * files = tsk - > files ;
if ( files ) {
task_lock ( tsk ) ;
tsk - > files = NULL ;
task_unlock ( tsk ) ;
put_files_struct ( files ) ;
}
}
void exit_files ( struct task_struct * tsk )
{
__exit_files ( tsk ) ;
}
static inline void __put_fs_struct ( struct fs_struct * fs )
{
/* No need to hold fs->lock if we are killing it */
if ( atomic_dec_and_test ( & fs - > count ) ) {
dput ( fs - > root ) ;
mntput ( fs - > rootmnt ) ;
dput ( fs - > pwd ) ;
mntput ( fs - > pwdmnt ) ;
if ( fs - > altroot ) {
dput ( fs - > altroot ) ;
mntput ( fs - > altrootmnt ) ;
}
kmem_cache_free ( fs_cachep , fs ) ;
}
}
void put_fs_struct ( struct fs_struct * fs )
{
__put_fs_struct ( fs ) ;
}
static inline void __exit_fs ( struct task_struct * tsk )
{
struct fs_struct * fs = tsk - > fs ;
if ( fs ) {
task_lock ( tsk ) ;
tsk - > fs = NULL ;
task_unlock ( tsk ) ;
__put_fs_struct ( fs ) ;
}
}
void exit_fs ( struct task_struct * tsk )
{
__exit_fs ( tsk ) ;
}
EXPORT_SYMBOL_GPL ( exit_fs ) ;
/*
* Turn us into a lazy TLB process if we
* aren ' t already . .
*/
2005-05-01 08:59:29 -07:00
static void exit_mm ( struct task_struct * tsk )
2005-04-16 15:20:36 -07:00
{
struct mm_struct * mm = tsk - > mm ;
mm_release ( tsk , mm ) ;
if ( ! mm )
return ;
/*
* Serialize with any possible pending coredump .
* We must hold mmap_sem around checking core_waiters
* and clearing tsk - > mm . The core - inducing thread
* will increment core_waiters for each thread in the
* group with - > mm ! = NULL .
*/
down_read ( & mm - > mmap_sem ) ;
if ( mm - > core_waiters ) {
up_read ( & mm - > mmap_sem ) ;
down_write ( & mm - > mmap_sem ) ;
if ( ! - - mm - > core_waiters )
complete ( mm - > core_startup_done ) ;
up_write ( & mm - > mmap_sem ) ;
wait_for_completion ( & mm - > core_done ) ;
down_read ( & mm - > mmap_sem ) ;
}
atomic_inc ( & mm - > mm_count ) ;
if ( mm ! = tsk - > active_mm ) BUG ( ) ;
/* more a memory barrier than a real lock */
task_lock ( tsk ) ;
tsk - > mm = NULL ;
up_read ( & mm - > mmap_sem ) ;
enter_lazy_tlb ( mm , current ) ;
task_unlock ( tsk ) ;
mmput ( mm ) ;
}
2006-03-28 16:11:04 -08:00
static inline void choose_new_parent ( task_t * p , task_t * reaper )
2005-04-16 15:20:36 -07:00
{
/*
* Make sure we ' re not reparenting to ourselves and that
* the parent is not a zombie .
*/
2006-03-28 16:11:04 -08:00
BUG_ON ( p = = reaper | | reaper - > exit_state ) ;
2005-04-16 15:20:36 -07:00
p - > real_parent = reaper ;
}
2006-01-14 13:20:43 -08:00
static void reparent_thread ( task_t * p , task_t * father , int traced )
2005-04-16 15:20:36 -07:00
{
/* We don't want people slaying init. */
if ( p - > exit_signal ! = - 1 )
p - > exit_signal = SIGCHLD ;
if ( p - > pdeath_signal )
/* We already hold the tasklist_lock here. */
2005-10-30 15:03:44 -08:00
group_send_sig_info ( p - > pdeath_signal , SEND_SIG_NOINFO , p ) ;
2005-04-16 15:20:36 -07:00
/* Move the child from its dying parent to the new one. */
if ( unlikely ( traced ) ) {
/* Preserve ptrace links if someone else is tracing this child. */
list_del_init ( & p - > ptrace_list ) ;
if ( p - > parent ! = p - > real_parent )
list_add ( & p - > ptrace_list , & p - > real_parent - > ptrace_children ) ;
} else {
/* If this child is being traced, then we're the one tracing it
* anyway , so let go of it .
*/
p - > ptrace = 0 ;
2006-03-28 16:11:09 -08:00
remove_parent ( p ) ;
2005-04-16 15:20:36 -07:00
p - > parent = p - > real_parent ;
2006-03-28 16:11:09 -08:00
add_parent ( p ) ;
2005-04-16 15:20:36 -07:00
/* If we'd notified the old parent about this child's death,
* also notify the new parent .
*/
if ( p - > exit_state = = EXIT_ZOMBIE & & p - > exit_signal ! = - 1 & &
thread_group_empty ( p ) )
do_notify_parent ( p , p - > exit_signal ) ;
else if ( p - > state = = TASK_TRACED ) {
/*
* If it was at a trace stop , turn it into
* a normal stop since it ' s no longer being
* traced .
*/
ptrace_untrace ( p ) ;
}
}
/*
* process group orphan check
* Case ii : Our child is in a different pgrp
* than we are , and it was the only connection
* outside , so the child pgrp is now orphaned .
*/
if ( ( process_group ( p ) ! = process_group ( father ) ) & &
( p - > signal - > session = = father - > signal - > session ) ) {
int pgrp = process_group ( p ) ;
if ( will_become_orphaned_pgrp ( pgrp , NULL ) & & has_stopped_jobs ( pgrp ) ) {
2005-10-30 15:03:44 -08:00
__kill_pg_info ( SIGHUP , SEND_SIG_PRIV , pgrp ) ;
__kill_pg_info ( SIGCONT , SEND_SIG_PRIV , pgrp ) ;
2005-04-16 15:20:36 -07:00
}
}
}
/*
* When we die , we re - parent all our children .
* Try to give them to another thread in our thread
* group , and if no such member exists , give it to
* the global child reaper process ( ie " init " )
*/
2006-01-14 13:20:43 -08:00
static void forget_original_parent ( struct task_struct * father ,
2005-04-16 15:20:36 -07:00
struct list_head * to_release )
{
struct task_struct * p , * reaper = father ;
struct list_head * _p , * _n ;
do {
reaper = next_thread ( reaper ) ;
if ( reaper = = father ) {
reaper = child_reaper ;
break ;
}
} while ( reaper - > exit_state ) ;
/*
* There are only two places where our children can be :
*
* - in our child list
* - in our ptraced child list
*
* Search them and reparent children .
*/
list_for_each_safe ( _p , _n , & father - > children ) {
int ptrace ;
p = list_entry ( _p , struct task_struct , sibling ) ;
ptrace = p - > ptrace ;
/* if father isn't the real parent, then ptrace must be enabled */
BUG_ON ( father ! = p - > real_parent & & ! ptrace ) ;
if ( father = = p - > real_parent ) {
/* reparent with a reaper, real father it's us */
2006-03-28 16:11:04 -08:00
choose_new_parent ( p , reaper ) ;
2005-04-16 15:20:36 -07:00
reparent_thread ( p , father , 0 ) ;
} else {
/* reparent ptraced task to its real parent */
__ptrace_unlink ( p ) ;
if ( p - > exit_state = = EXIT_ZOMBIE & & p - > exit_signal ! = - 1 & &
thread_group_empty ( p ) )
do_notify_parent ( p , p - > exit_signal ) ;
}
/*
* if the ptraced child is a zombie with exit_signal = = - 1
* we must collect it before we exit , or it will remain
* zombie forever since we prevented it from self - reap itself
* while it was being traced by us , to be able to see it in wait4 .
*/
if ( unlikely ( ptrace & & p - > exit_state = = EXIT_ZOMBIE & & p - > exit_signal = = - 1 ) )
list_add ( & p - > ptrace_list , to_release ) ;
}
list_for_each_safe ( _p , _n , & father - > ptrace_children ) {
p = list_entry ( _p , struct task_struct , ptrace_list ) ;
2006-03-28 16:11:04 -08:00
choose_new_parent ( p , reaper ) ;
2005-04-16 15:20:36 -07:00
reparent_thread ( p , father , 1 ) ;
}
}
/*
* Send signals to all our closest relatives so that they know
* to properly mourn us . .
*/
static void exit_notify ( struct task_struct * tsk )
{
int state ;
struct task_struct * t ;
struct list_head ptrace_dead , * _p , * _n ;
if ( signal_pending ( tsk ) & & ! ( tsk - > signal - > flags & SIGNAL_GROUP_EXIT )
& & ! thread_group_empty ( tsk ) ) {
/*
* This occurs when there was a race between our exit
* syscall and a group signal choosing us as the one to
* wake up . It could be that we are the only thread
* alerted to check for pending signals , but another thread
* should be woken now to take the signal since we will not .
* Now we ' ll wake all the threads in the group just to make
* sure someone gets all the pending signals .
*/
read_lock ( & tasklist_lock ) ;
spin_lock_irq ( & tsk - > sighand - > siglock ) ;
for ( t = next_thread ( tsk ) ; t ! = tsk ; t = next_thread ( t ) )
if ( ! signal_pending ( t ) & & ! ( t - > flags & PF_EXITING ) ) {
recalc_sigpending_tsk ( t ) ;
if ( signal_pending ( t ) )
signal_wake_up ( t , 0 ) ;
}
spin_unlock_irq ( & tsk - > sighand - > siglock ) ;
read_unlock ( & tasklist_lock ) ;
}
write_lock_irq ( & tasklist_lock ) ;
/*
* This does two things :
*
* A . Make init inherit all the child processes
* B . Check to see if any process groups have become orphaned
* as a result of our exiting , and if they have any stopped
* jobs , send them a SIGHUP and then a SIGCONT . ( POSIX 3.2 .2 .2 )
*/
INIT_LIST_HEAD ( & ptrace_dead ) ;
forget_original_parent ( tsk , & ptrace_dead ) ;
BUG_ON ( ! list_empty ( & tsk - > children ) ) ;
BUG_ON ( ! list_empty ( & tsk - > ptrace_children ) ) ;
/*
* Check to see if any process groups have become orphaned
* as a result of our exiting , and if they have any stopped
* jobs , send them a SIGHUP and then a SIGCONT . ( POSIX 3.2 .2 .2 )
*
* Case i : Our father is in a different pgrp than we are
* and we were the only connection outside , so our pgrp
* is about to become orphaned .
*/
t = tsk - > real_parent ;
if ( ( process_group ( t ) ! = process_group ( tsk ) ) & &
( t - > signal - > session = = tsk - > signal - > session ) & &
will_become_orphaned_pgrp ( process_group ( tsk ) , tsk ) & &
has_stopped_jobs ( process_group ( tsk ) ) ) {
2005-10-30 15:03:44 -08:00
__kill_pg_info ( SIGHUP , SEND_SIG_PRIV , process_group ( tsk ) ) ;
__kill_pg_info ( SIGCONT , SEND_SIG_PRIV , process_group ( tsk ) ) ;
2005-04-16 15:20:36 -07:00
}
/* Let father know we died
*
* Thread signals are configurable , but you aren ' t going to use
* that to send signals to arbitary processes .
* That stops right now .
*
* If the parent exec id doesn ' t match the exec id we saved
* when we started then we know the parent has changed security
* domain .
*
* If our self_exec id doesn ' t match our parent_exec_id then
* we have changed execution domain as these two values started
* the same after a fork .
*
*/
if ( tsk - > exit_signal ! = SIGCHLD & & tsk - > exit_signal ! = - 1 & &
( tsk - > parent_exec_id ! = t - > self_exec_id | |
tsk - > self_exec_id ! = tsk - > parent_exec_id )
& & ! capable ( CAP_KILL ) )
tsk - > exit_signal = SIGCHLD ;
/* If something other than our normal parent is ptracing us, then
* send it a SIGCHLD instead of honoring exit_signal . exit_signal
* only has special meaning to our real parent .
*/
if ( tsk - > exit_signal ! = - 1 & & thread_group_empty ( tsk ) ) {
int signal = tsk - > parent = = tsk - > real_parent ? tsk - > exit_signal : SIGCHLD ;
do_notify_parent ( tsk , signal ) ;
} else if ( tsk - > ptrace ) {
do_notify_parent ( tsk , SIGCHLD ) ;
}
state = EXIT_ZOMBIE ;
if ( tsk - > exit_signal = = - 1 & &
( likely ( tsk - > ptrace = = 0 ) | |
unlikely ( tsk - > parent - > signal - > flags & SIGNAL_GROUP_EXIT ) ) )
state = EXIT_DEAD ;
tsk - > exit_state = state ;
write_unlock_irq ( & tasklist_lock ) ;
list_for_each_safe ( _p , _n , & ptrace_dead ) {
list_del_init ( _p ) ;
t = list_entry ( _p , struct task_struct , ptrace_list ) ;
release_task ( t ) ;
}
/* If the process is dead, release it - nobody will wait for it */
if ( state = = EXIT_DEAD )
release_task ( tsk ) ;
}
fastcall NORET_TYPE void do_exit ( long code )
{
struct task_struct * tsk = current ;
int group_dead ;
profile_task_exit ( tsk ) ;
2005-06-27 10:55:12 +02:00
WARN_ON ( atomic_read ( & tsk - > fs_excl ) ) ;
2005-04-16 15:20:36 -07:00
if ( unlikely ( in_interrupt ( ) ) )
panic ( " Aiee, killing interrupt handler! " ) ;
if ( unlikely ( ! tsk - > pid ) )
panic ( " Attempted to kill the idle task! " ) ;
2006-03-28 16:10:58 -08:00
if ( unlikely ( tsk = = child_reaper ) )
2005-04-16 15:20:36 -07:00
panic ( " Attempted to kill init! " ) ;
if ( unlikely ( current - > ptrace & PT_TRACE_EXIT ) ) {
current - > ptrace_message = code ;
ptrace_notify ( ( PTRACE_EVENT_EXIT < < 8 ) | SIGTRAP ) ;
}
2005-06-23 00:09:13 -07:00
/*
* We ' re taking recursive faults here in do_exit . Safest is to just
* leave this task alone and wait for reboot .
*/
if ( unlikely ( tsk - > flags & PF_EXITING ) ) {
printk ( KERN_ALERT
" Fixing recursive fault but reboot is needed! \n " ) ;
2006-02-28 12:51:55 -05:00
if ( tsk - > io_context )
exit_io_context ( ) ;
2005-06-23 00:09:13 -07:00
set_current_state ( TASK_UNINTERRUPTIBLE ) ;
schedule ( ) ;
}
2005-04-16 15:20:36 -07:00
tsk - > flags | = PF_EXITING ;
2005-10-27 09:07:33 -07:00
/*
* Make sure we don ' t try to process any timer firings
* while we are already exiting .
*/
tsk - > it_virt_expires = cputime_zero ;
tsk - > it_prof_expires = cputime_zero ;
tsk - > it_sched_expires = 0 ;
2005-04-16 15:20:36 -07:00
if ( unlikely ( in_atomic ( ) ) )
printk ( KERN_INFO " note: %s[%d] exited with preempt_count %d \n " ,
current - > comm , current - > pid ,
preempt_count ( ) ) ;
acct_update_integrals ( tsk ) ;
[PATCH] mm: update_hiwaters just in time
update_mem_hiwater has attracted various criticisms, in particular from those
concerned with mm scalability. Originally it was called whenever rss or
total_vm got raised. Then many of those callsites were replaced by a timer
tick call from account_system_time. Now Frank van Maarseveen reports that to
be found inadequate. How about this? Works for Frank.
Replace update_mem_hiwater, a poor combination of two unrelated ops, by macros
update_hiwater_rss and update_hiwater_vm. Don't attempt to keep
mm->hiwater_rss up to date at timer tick, nor every time we raise rss (usually
by 1): those are hot paths. Do the opposite, update only when about to lower
rss (usually by many), or just before final accounting in do_exit. Handle
mm->hiwater_vm in the same way, though it's much less of an issue. Demand
that whoever collects these hiwater statistics do the work of taking the
maximum with rss or total_vm.
And there has been no collector of these hiwater statistics in the tree. The
new convention needs an example, so match Frank's usage by adding a VmPeak
line above VmSize to /proc/<pid>/status, and also a VmHWM line above VmRSS
(High-Water-Mark or High-Water-Memory).
There was a particular anomaly during mremap move, that hiwater_vm might be
captured too high. A fleeting such anomaly remains, but it's quickly
corrected now, whereas before it would stick.
What locking? None: if the app is racy then these statistics will be racy,
it's not worth any overhead to make them exact. But whenever it suits,
hiwater_vm is updated under exclusive mmap_sem, and hiwater_rss under
page_table_lock (for now) or with preemption disabled (later on): without
going to any trouble, minimize the time between reading current values and
updating, to minimize those occasions when a racing thread bumps a count up
and back down in between.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-29 18:16:18 -07:00
if ( tsk - > mm ) {
update_hiwater_rss ( tsk - > mm ) ;
update_hiwater_vm ( tsk - > mm ) ;
}
2005-04-16 15:20:36 -07:00
group_dead = atomic_dec_and_test ( & tsk - > signal - > live ) ;
2005-08-04 16:49:32 -07:00
if ( group_dead ) {
2006-01-09 20:52:34 -08:00
hrtimer_cancel ( & tsk - > signal - > real_timer ) ;
2005-10-21 15:03:29 -07:00
exit_itimers ( tsk - > signal ) ;
2005-04-16 15:20:36 -07:00
acct_process ( code ) ;
2005-08-04 16:49:32 -07:00
}
2006-03-27 01:16:22 -08:00
if ( unlikely ( tsk - > robust_list ) )
exit_robust_list ( tsk ) ;
2006-03-27 01:16:24 -08:00
# ifdef CONFIG_COMPAT
if ( unlikely ( tsk - > compat_robust_list ) )
compat_exit_robust_list ( tsk ) ;
# endif
2006-03-29 20:30:19 -05:00
if ( unlikely ( tsk - > audit_context ) )
audit_free ( tsk ) ;
2005-04-16 15:20:36 -07:00
exit_mm ( tsk ) ;
exit_sem ( tsk ) ;
__exit_files ( tsk ) ;
__exit_fs ( tsk ) ;
exit_namespace ( tsk ) ;
exit_thread ( ) ;
cpuset_exit ( tsk ) ;
exit_keys ( tsk ) ;
if ( group_dead & & tsk - > signal - > leader )
disassociate_ctty ( 1 ) ;
2005-11-13 16:06:55 -08:00
module_put ( task_thread_info ( tsk ) - > exec_domain - > module ) ;
2005-04-16 15:20:36 -07:00
if ( tsk - > binfmt )
module_put ( tsk - > binfmt - > module ) ;
tsk - > exit_code = code ;
2005-11-07 00:59:16 -08:00
proc_exit_connector ( tsk ) ;
2005-04-16 15:20:36 -07:00
exit_notify ( tsk ) ;
# ifdef CONFIG_NUMA
mpol_free ( tsk - > mempolicy ) ;
tsk - > mempolicy = NULL ;
# endif
2006-01-09 15:59:21 -08:00
/*
* If DEBUG_MUTEXES is on , make sure we are holding no locks :
*/
mutex_debug_check_no_locks_held ( tsk ) ;
2005-04-16 15:20:36 -07:00
2006-02-28 12:51:55 -05:00
if ( tsk - > io_context )
exit_io_context ( ) ;
2006-04-11 13:52:07 +02:00
if ( tsk - > splice_pipe )
__free_pipe_info ( tsk - > splice_pipe ) ;
2005-10-30 15:02:47 -08:00
/* PF_DEAD causes final put_task_struct after we schedule. */
preempt_disable ( ) ;
BUG_ON ( tsk - > flags & PF_DEAD ) ;
tsk - > flags | = PF_DEAD ;
2005-04-16 15:20:36 -07:00
schedule ( ) ;
BUG ( ) ;
/* Avoid "noreturn function does return". */
for ( ; ; ) ;
}
2005-04-23 00:08:00 -07:00
EXPORT_SYMBOL_GPL ( do_exit ) ;
2005-04-16 15:20:36 -07:00
NORET_TYPE void complete_and_exit ( struct completion * comp , long code )
{
if ( comp )
complete ( comp ) ;
do_exit ( code ) ;
}
EXPORT_SYMBOL ( complete_and_exit ) ;
asmlinkage long sys_exit ( int error_code )
{
do_exit ( ( error_code & 0xff ) < < 8 ) ;
}
/*
* Take down every thread in the group . This is called by fatal signals
* as well as by sys_exit_group ( below ) .
*/
NORET_TYPE void
do_group_exit ( int exit_code )
{
BUG_ON ( exit_code & 0x80 ) ; /* core dumps don't get here */
if ( current - > signal - > flags & SIGNAL_GROUP_EXIT )
exit_code = current - > signal - > group_exit_code ;
else if ( ! thread_group_empty ( current ) ) {
struct signal_struct * const sig = current - > signal ;
struct sighand_struct * const sighand = current - > sighand ;
spin_lock_irq ( & sighand - > siglock ) ;
if ( sig - > flags & SIGNAL_GROUP_EXIT )
/* Another thread got here before we took the lock. */
exit_code = sig - > group_exit_code ;
else {
sig - > group_exit_code = exit_code ;
zap_other_threads ( current ) ;
}
spin_unlock_irq ( & sighand - > siglock ) ;
}
do_exit ( exit_code ) ;
/* NOTREACHED */
}
/*
* this kills every thread in the thread group . Note that any externally
* wait4 ( ) - ing process will get the correct exit code - even if this
* thread is not the thread group leader .
*/
asmlinkage void sys_exit_group ( int error_code )
{
do_group_exit ( ( error_code & 0xff ) < < 8 ) ;
}
static int eligible_child ( pid_t pid , int options , task_t * p )
{
if ( pid > 0 ) {
if ( p - > pid ! = pid )
return 0 ;
} else if ( ! pid ) {
if ( process_group ( p ) ! = process_group ( current ) )
return 0 ;
} else if ( pid ! = - 1 ) {
if ( process_group ( p ) ! = - pid )
return 0 ;
}
/*
* Do not consider detached threads that are
* not ptraced :
*/
if ( p - > exit_signal = = - 1 & & ! p - > ptrace )
return 0 ;
/* Wait for all children (clone and not) if __WALL is set;
* otherwise , wait for clone children * only * if __WCLONE is
* set ; otherwise , wait for non - clone children * only * . ( Note :
* A " clone " child here is one that reports to its parent
* using a signal other than SIGCHLD . ) */
if ( ( ( p - > exit_signal ! = SIGCHLD ) ^ ( ( options & __WCLONE ) ! = 0 ) )
& & ! ( options & __WALL ) )
return 0 ;
/*
* Do not consider thread group leaders that are
* in a non - empty thread group :
*/
if ( current - > tgid ! = p - > tgid & & delay_group_leader ( p ) )
return 2 ;
if ( security_task_wait ( p ) )
return 0 ;
return 1 ;
}
static int wait_noreap_copyout ( task_t * p , pid_t pid , uid_t uid ,
int why , int status ,
struct siginfo __user * infop ,
struct rusage __user * rusagep )
{
int retval = rusagep ? getrusage ( p , RUSAGE_BOTH , rusagep ) : 0 ;
put_task_struct ( p ) ;
if ( ! retval )
retval = put_user ( SIGCHLD , & infop - > si_signo ) ;
if ( ! retval )
retval = put_user ( 0 , & infop - > si_errno ) ;
if ( ! retval )
retval = put_user ( ( short ) why , & infop - > si_code ) ;
if ( ! retval )
retval = put_user ( pid , & infop - > si_pid ) ;
if ( ! retval )
retval = put_user ( uid , & infop - > si_uid ) ;
if ( ! retval )
retval = put_user ( status , & infop - > si_status ) ;
if ( ! retval )
retval = pid ;
return retval ;
}
/*
* Handle sys_wait4 work for one task in state EXIT_ZOMBIE . We hold
* read_lock ( & tasklist_lock ) on entry . If we return zero , we still hold
* the lock and this task is uninteresting . If we return nonzero , we have
* released the lock and the system call should return .
*/
static int wait_task_zombie ( task_t * p , int noreap ,
struct siginfo __user * infop ,
int __user * stat_addr , struct rusage __user * ru )
{
unsigned long state ;
int retval ;
int status ;
if ( unlikely ( noreap ) ) {
pid_t pid = p - > pid ;
uid_t uid = p - > uid ;
int exit_code = p - > exit_code ;
int why , status ;
if ( unlikely ( p - > exit_state ! = EXIT_ZOMBIE ) )
return 0 ;
if ( unlikely ( p - > exit_signal = = - 1 & & p - > ptrace = = 0 ) )
return 0 ;
get_task_struct ( p ) ;
read_unlock ( & tasklist_lock ) ;
if ( ( exit_code & 0x7f ) = = 0 ) {
why = CLD_EXITED ;
status = exit_code > > 8 ;
} else {
why = ( exit_code & 0x80 ) ? CLD_DUMPED : CLD_KILLED ;
status = exit_code & 0x7f ;
}
return wait_noreap_copyout ( p , pid , uid , why ,
status , infop , ru ) ;
}
/*
* Try to move the task ' s state to DEAD
* only one thread is allowed to do this :
*/
state = xchg ( & p - > exit_state , EXIT_DEAD ) ;
if ( state ! = EXIT_ZOMBIE ) {
BUG_ON ( state ! = EXIT_DEAD ) ;
return 0 ;
}
if ( unlikely ( p - > exit_signal = = - 1 & & p - > ptrace = = 0 ) ) {
/*
* This can only happen in a race with a ptraced thread
* dying on another processor .
*/
return 0 ;
}
if ( likely ( p - > real_parent = = p - > parent ) & & likely ( p - > signal ) ) {
2006-01-09 20:54:39 -08:00
struct signal_struct * psig ;
struct signal_struct * sig ;
2005-04-16 15:20:36 -07:00
/*
* The resource counters for the group leader are in its
* own task_struct . Those for dead threads in the group
* are in its signal_struct , as are those for the child
* processes it has previously reaped . All these
* accumulate in the parent ' s signal_struct c * fields .
*
* We don ' t bother to take a lock here to protect these
* p - > signal fields , because they are only touched by
* __exit_signal , which runs with tasklist_lock
* write - locked anyway , and so is excluded here . We do
* need to protect the access to p - > parent - > signal fields ,
* as other threads in the parent group can be right
* here reaping other children at the same time .
*/
spin_lock_irq ( & p - > parent - > sighand - > siglock ) ;
2006-01-09 20:54:39 -08:00
psig = p - > parent - > signal ;
sig = p - > signal ;
psig - > cutime =
cputime_add ( psig - > cutime ,
2005-04-16 15:20:36 -07:00
cputime_add ( p - > utime ,
2006-01-09 20:54:39 -08:00
cputime_add ( sig - > utime ,
sig - > cutime ) ) ) ;
psig - > cstime =
cputime_add ( psig - > cstime ,
2005-04-16 15:20:36 -07:00
cputime_add ( p - > stime ,
2006-01-09 20:54:39 -08:00
cputime_add ( sig - > stime ,
sig - > cstime ) ) ) ;
psig - > cmin_flt + =
p - > min_flt + sig - > min_flt + sig - > cmin_flt ;
psig - > cmaj_flt + =
p - > maj_flt + sig - > maj_flt + sig - > cmaj_flt ;
psig - > cnvcsw + =
p - > nvcsw + sig - > nvcsw + sig - > cnvcsw ;
psig - > cnivcsw + =
p - > nivcsw + sig - > nivcsw + sig - > cnivcsw ;
2005-04-16 15:20:36 -07:00
spin_unlock_irq ( & p - > parent - > sighand - > siglock ) ;
}
/*
* Now we are sure this task is interesting , and no other
* thread can reap it because we set its state to EXIT_DEAD .
*/
read_unlock ( & tasklist_lock ) ;
retval = ru ? getrusage ( p , RUSAGE_BOTH , ru ) : 0 ;
status = ( p - > signal - > flags & SIGNAL_GROUP_EXIT )
? p - > signal - > group_exit_code : p - > exit_code ;
if ( ! retval & & stat_addr )
retval = put_user ( status , stat_addr ) ;
if ( ! retval & & infop )
retval = put_user ( SIGCHLD , & infop - > si_signo ) ;
if ( ! retval & & infop )
retval = put_user ( 0 , & infop - > si_errno ) ;
if ( ! retval & & infop ) {
int why ;
if ( ( status & 0x7f ) = = 0 ) {
why = CLD_EXITED ;
status > > = 8 ;
} else {
why = ( status & 0x80 ) ? CLD_DUMPED : CLD_KILLED ;
status & = 0x7f ;
}
retval = put_user ( ( short ) why , & infop - > si_code ) ;
if ( ! retval )
retval = put_user ( status , & infop - > si_status ) ;
}
if ( ! retval & & infop )
retval = put_user ( p - > pid , & infop - > si_pid ) ;
if ( ! retval & & infop )
retval = put_user ( p - > uid , & infop - > si_uid ) ;
if ( retval ) {
// TODO: is this safe?
p - > exit_state = EXIT_ZOMBIE ;
return retval ;
}
retval = p - > pid ;
if ( p - > real_parent ! = p - > parent ) {
write_lock_irq ( & tasklist_lock ) ;
/* Double-check with lock held. */
if ( p - > real_parent ! = p - > parent ) {
__ptrace_unlink ( p ) ;
// TODO: is this safe?
p - > exit_state = EXIT_ZOMBIE ;
/*
* If this is not a detached task , notify the parent .
* If it ' s still not detached after that , don ' t release
* it now .
*/
if ( p - > exit_signal ! = - 1 ) {
do_notify_parent ( p , p - > exit_signal ) ;
if ( p - > exit_signal ! = - 1 )
p = NULL ;
}
}
write_unlock_irq ( & tasklist_lock ) ;
}
if ( p ! = NULL )
release_task ( p ) ;
BUG_ON ( ! retval ) ;
return retval ;
}
/*
* Handle sys_wait4 work for one task in state TASK_STOPPED . We hold
* read_lock ( & tasklist_lock ) on entry . If we return zero , we still hold
* the lock and this task is uninteresting . If we return nonzero , we have
* released the lock and the system call should return .
*/
static int wait_task_stopped ( task_t * p , int delayed_group_leader , int noreap ,
struct siginfo __user * infop ,
int __user * stat_addr , struct rusage __user * ru )
{
int retval , exit_code ;
if ( ! p - > exit_code )
return 0 ;
if ( delayed_group_leader & & ! ( p - > ptrace & PT_PTRACED ) & &
p - > signal & & p - > signal - > group_stop_count > 0 )
/*
* A group stop is in progress and this is the group leader .
* We won ' t report until all threads have stopped .
*/
return 0 ;
/*
* Now we are pretty sure this task is interesting .
* Make sure it doesn ' t get reaped out from under us while we
* give up the lock and then examine it below . We don ' t want to
* keep holding onto the tasklist_lock while we call getrusage and
* possibly take page faults for user memory .
*/
get_task_struct ( p ) ;
read_unlock ( & tasklist_lock ) ;
if ( unlikely ( noreap ) ) {
pid_t pid = p - > pid ;
uid_t uid = p - > uid ;
int why = ( p - > ptrace & PT_PTRACED ) ? CLD_TRAPPED : CLD_STOPPED ;
exit_code = p - > exit_code ;
if ( unlikely ( ! exit_code ) | |
2005-10-01 11:04:18 -07:00
unlikely ( p - > state & TASK_TRACED ) )
2005-04-16 15:20:36 -07:00
goto bail_ref ;
return wait_noreap_copyout ( p , pid , uid ,
why , ( exit_code < < 8 ) | 0x7f ,
infop , ru ) ;
}
write_lock_irq ( & tasklist_lock ) ;
/*
* This uses xchg to be atomic with the thread resuming and setting
* it . It must also be done with the write lock held to prevent a
* race with the EXIT_ZOMBIE case .
*/
exit_code = xchg ( & p - > exit_code , 0 ) ;
if ( unlikely ( p - > exit_state ) ) {
/*
* The task resumed and then died . Let the next iteration
* catch it in EXIT_ZOMBIE . Note that exit_code might
* already be zero here if it resumed and did _exit ( 0 ) .
* The task itself is dead and won ' t touch exit_code again ;
* other processors in this function are locked out .
*/
p - > exit_code = exit_code ;
exit_code = 0 ;
}
if ( unlikely ( exit_code = = 0 ) ) {
/*
* Another thread in this function got to it first , or it
* resumed , or it resumed and then died .
*/
write_unlock_irq ( & tasklist_lock ) ;
bail_ref :
put_task_struct ( p ) ;
/*
* We are returning to the wait loop without having successfully
* removed the process and having released the lock . We cannot
* continue , since the " p " task pointer is potentially stale .
*
* Return - EAGAIN , and do_wait ( ) will restart the loop from the
* beginning . Do _not_ re - acquire the lock .
*/
return - EAGAIN ;
}
/* move to end of parent's list to avoid starvation */
remove_parent ( p ) ;
2006-03-28 16:11:05 -08:00
add_parent ( p ) ;
2005-04-16 15:20:36 -07:00
write_unlock_irq ( & tasklist_lock ) ;
retval = ru ? getrusage ( p , RUSAGE_BOTH , ru ) : 0 ;
if ( ! retval & & stat_addr )
retval = put_user ( ( exit_code < < 8 ) | 0x7f , stat_addr ) ;
if ( ! retval & & infop )
retval = put_user ( SIGCHLD , & infop - > si_signo ) ;
if ( ! retval & & infop )
retval = put_user ( 0 , & infop - > si_errno ) ;
if ( ! retval & & infop )
retval = put_user ( ( short ) ( ( p - > ptrace & PT_PTRACED )
? CLD_TRAPPED : CLD_STOPPED ) ,
& infop - > si_code ) ;
if ( ! retval & & infop )
retval = put_user ( exit_code , & infop - > si_status ) ;
if ( ! retval & & infop )
retval = put_user ( p - > pid , & infop - > si_pid ) ;
if ( ! retval & & infop )
retval = put_user ( p - > uid , & infop - > si_uid ) ;
if ( ! retval )
retval = p - > pid ;
put_task_struct ( p ) ;
BUG_ON ( ! retval ) ;
return retval ;
}
/*
* Handle do_wait work for one task in a live , non - stopped state .
* read_lock ( & tasklist_lock ) on entry . If we return zero , we still hold
* the lock and this task is uninteresting . If we return nonzero , we have
* released the lock and the system call should return .
*/
static int wait_task_continued ( task_t * p , int noreap ,
struct siginfo __user * infop ,
int __user * stat_addr , struct rusage __user * ru )
{
int retval ;
pid_t pid ;
uid_t uid ;
if ( unlikely ( ! p - > signal ) )
return 0 ;
if ( ! ( p - > signal - > flags & SIGNAL_STOP_CONTINUED ) )
return 0 ;
spin_lock_irq ( & p - > sighand - > siglock ) ;
/* Re-check with the lock held. */
if ( ! ( p - > signal - > flags & SIGNAL_STOP_CONTINUED ) ) {
spin_unlock_irq ( & p - > sighand - > siglock ) ;
return 0 ;
}
if ( ! noreap )
p - > signal - > flags & = ~ SIGNAL_STOP_CONTINUED ;
spin_unlock_irq ( & p - > sighand - > siglock ) ;
pid = p - > pid ;
uid = p - > uid ;
get_task_struct ( p ) ;
read_unlock ( & tasklist_lock ) ;
if ( ! infop ) {
retval = ru ? getrusage ( p , RUSAGE_BOTH , ru ) : 0 ;
put_task_struct ( p ) ;
if ( ! retval & & stat_addr )
retval = put_user ( 0xffff , stat_addr ) ;
if ( ! retval )
retval = p - > pid ;
} else {
retval = wait_noreap_copyout ( p , pid , uid ,
CLD_CONTINUED , SIGCONT ,
infop , ru ) ;
BUG_ON ( retval = = 0 ) ;
}
return retval ;
}
static inline int my_ptrace_child ( struct task_struct * p )
{
if ( ! ( p - > ptrace & PT_PTRACED ) )
return 0 ;
if ( ! ( p - > ptrace & PT_ATTACHED ) )
return 1 ;
/*
* This child was PTRACE_ATTACH ' d . We should be seeing it only if
* we are the attacher . If we are the real parent , this is a race
* inside ptrace_attach . It is waiting for the tasklist_lock ,
* which we have to switch the parent links , but has already set
* the flags in p - > ptrace .
*/
return ( p - > parent ! = p - > real_parent ) ;
}
static long do_wait ( pid_t pid , int options , struct siginfo __user * infop ,
int __user * stat_addr , struct rusage __user * ru )
{
DECLARE_WAITQUEUE ( wait , current ) ;
struct task_struct * tsk ;
int flag , retval ;
add_wait_queue ( & current - > signal - > wait_chldexit , & wait ) ;
repeat :
/*
* We will set this flag if we see any child that might later
* match our criteria , even if we are not able to reap it yet .
*/
flag = 0 ;
current - > state = TASK_INTERRUPTIBLE ;
read_lock ( & tasklist_lock ) ;
tsk = current ;
do {
struct task_struct * p ;
struct list_head * _p ;
int ret ;
list_for_each ( _p , & tsk - > children ) {
p = list_entry ( _p , struct task_struct , sibling ) ;
ret = eligible_child ( pid , options , p ) ;
if ( ! ret )
continue ;
switch ( p - > state ) {
case TASK_TRACED :
2005-10-30 15:02:50 -08:00
/*
* When we hit the race with PTRACE_ATTACH ,
* we will not report this child . But the
* race means it has not yet been moved to
* our ptrace_children list , so we need to
* set the flag here to avoid a spurious ECHILD
* when the race happens with the only child .
*/
flag = 1 ;
2005-04-16 15:20:36 -07:00
if ( ! my_ptrace_child ( p ) )
continue ;
/*FALLTHROUGH*/
case TASK_STOPPED :
/*
* It ' s stopped now , so it might later
* continue , exit , or stop again .
*/
flag = 1 ;
if ( ! ( options & WUNTRACED ) & &
! my_ptrace_child ( p ) )
continue ;
retval = wait_task_stopped ( p , ret = = 2 ,
( options & WNOWAIT ) ,
infop ,
stat_addr , ru ) ;
if ( retval = = - EAGAIN )
goto repeat ;
if ( retval ! = 0 ) /* He released the lock. */
goto end ;
break ;
default :
// case EXIT_DEAD:
if ( p - > exit_state = = EXIT_DEAD )
continue ;
// case EXIT_ZOMBIE:
if ( p - > exit_state = = EXIT_ZOMBIE ) {
/*
* Eligible but we cannot release
* it yet :
*/
if ( ret = = 2 )
goto check_continued ;
if ( ! likely ( options & WEXITED ) )
continue ;
retval = wait_task_zombie (
p , ( options & WNOWAIT ) ,
infop , stat_addr , ru ) ;
/* He released the lock. */
if ( retval ! = 0 )
goto end ;
break ;
}
check_continued :
/*
* It ' s running now , so it might later
* exit , stop , or stop and then continue .
*/
flag = 1 ;
if ( ! unlikely ( options & WCONTINUED ) )
continue ;
retval = wait_task_continued (
p , ( options & WNOWAIT ) ,
infop , stat_addr , ru ) ;
if ( retval ! = 0 ) /* He released the lock. */
goto end ;
break ;
}
}
if ( ! flag ) {
list_for_each ( _p , & tsk - > ptrace_children ) {
p = list_entry ( _p , struct task_struct ,
ptrace_list ) ;
if ( ! eligible_child ( pid , options , p ) )
continue ;
flag = 1 ;
break ;
}
}
if ( options & __WNOTHREAD )
break ;
tsk = next_thread ( tsk ) ;
if ( tsk - > signal ! = current - > signal )
BUG ( ) ;
} while ( tsk ! = current ) ;
read_unlock ( & tasklist_lock ) ;
if ( flag ) {
retval = 0 ;
if ( options & WNOHANG )
goto end ;
retval = - ERESTARTSYS ;
if ( signal_pending ( current ) )
goto end ;
schedule ( ) ;
goto repeat ;
}
retval = - ECHILD ;
end :
current - > state = TASK_RUNNING ;
remove_wait_queue ( & current - > signal - > wait_chldexit , & wait ) ;
if ( infop ) {
if ( retval > 0 )
retval = 0 ;
else {
/*
* For a WNOHANG return , clear out all the fields
* we would set so the user can easily tell the
* difference .
*/
if ( ! retval )
retval = put_user ( 0 , & infop - > si_signo ) ;
if ( ! retval )
retval = put_user ( 0 , & infop - > si_errno ) ;
if ( ! retval )
retval = put_user ( 0 , & infop - > si_code ) ;
if ( ! retval )
retval = put_user ( 0 , & infop - > si_pid ) ;
if ( ! retval )
retval = put_user ( 0 , & infop - > si_uid ) ;
if ( ! retval )
retval = put_user ( 0 , & infop - > si_status ) ;
}
}
return retval ;
}
asmlinkage long sys_waitid ( int which , pid_t pid ,
struct siginfo __user * infop , int options ,
struct rusage __user * ru )
{
long ret ;
if ( options & ~ ( WNOHANG | WNOWAIT | WEXITED | WSTOPPED | WCONTINUED ) )
return - EINVAL ;
if ( ! ( options & ( WEXITED | WSTOPPED | WCONTINUED ) ) )
return - EINVAL ;
switch ( which ) {
case P_ALL :
pid = - 1 ;
break ;
case P_PID :
if ( pid < = 0 )
return - EINVAL ;
break ;
case P_PGID :
if ( pid < = 0 )
return - EINVAL ;
pid = - pid ;
break ;
default :
return - EINVAL ;
}
ret = do_wait ( pid , options , infop , NULL , ru ) ;
/* avoid REGPARM breakage on x86: */
prevent_tail_call ( ret ) ;
return ret ;
}
asmlinkage long sys_wait4 ( pid_t pid , int __user * stat_addr ,
int options , struct rusage __user * ru )
{
long ret ;
if ( options & ~ ( WNOHANG | WUNTRACED | WCONTINUED |
__WNOTHREAD | __WCLONE | __WALL ) )
return - EINVAL ;
ret = do_wait ( pid , options | WEXITED , NULL , stat_addr , ru ) ;
/* avoid REGPARM breakage on x86: */
prevent_tail_call ( ret ) ;
return ret ;
}
# ifdef __ARCH_WANT_SYS_WAITPID
/*
* sys_waitpid ( ) remains for compatibility . waitpid ( ) should be
* implemented by calling sys_wait4 ( ) from libc . a .
*/
asmlinkage long sys_waitpid ( pid_t pid , int __user * stat_addr , int options )
{
return sys_wait4 ( pid , stat_addr , options , NULL ) ;
}
# endif