2005-04-17 02:20:36 +04:00
/*
* linux / fs / exec . c
*
* Copyright ( C ) 1991 , 1992 Linus Torvalds
*/
/*
* # ! - checking implemented by tytso .
*/
/*
* Demand - loading implemented 01.12 .91 - no need to read anything but
* the header into memory . The inode of the executable is put into
* " current->executable " , and page faults do the actual loading . Clean .
*
* Once more I can proudly say that linux stood up to being changed : it
* was less than 2 hours work to get demand - loading completely implemented .
*
* Demand loading changed July 1993 by Eric Youngdale . Use mmap instead ,
* current - > executable is only used by the procfs . This allows a dispatch
* table to check for several different types of binary formats . We keep
* trying until we recognize the file or we run out of supported binary
* formats .
*/
# include <linux/slab.h>
# include <linux/file.h>
# include <linux/mman.h>
# include <linux/a.out.h>
# include <linux/stat.h>
# include <linux/fcntl.h>
# include <linux/smp_lock.h>
# include <linux/init.h>
# include <linux/pagemap.h>
# include <linux/highmem.h>
# include <linux/spinlock.h>
# include <linux/key.h>
# include <linux/personality.h>
# include <linux/binfmts.h>
# include <linux/swap.h>
# include <linux/utsname.h>
2006-12-08 13:38:01 +03:00
# include <linux/pid_namespace.h>
2005-04-17 02:20:36 +04:00
# include <linux/module.h>
# include <linux/namei.h>
# include <linux/proc_fs.h>
# include <linux/ptrace.h>
# include <linux/mount.h>
# include <linux/security.h>
# include <linux/syscalls.h>
# include <linux/rmap.h>
2006-10-01 10:28:59 +04:00
# include <linux/tsacct_kern.h>
2005-11-07 11:59:16 +03:00
# include <linux/cn_proc.h>
2006-04-26 22:04:08 +04:00
# include <linux/audit.h>
2005-04-17 02:20:36 +04:00
# include <asm/uaccess.h>
# include <asm/mmu_context.h>
# ifdef CONFIG_KMOD
# include <linux/kmod.h>
# endif
int core_uses_pid ;
2006-10-01 10:29:28 +04:00
char core_pattern [ 128 ] = " core " ;
2005-06-23 11:09:43 +04:00
int suid_dumpable = 0 ;
EXPORT_SYMBOL ( suid_dumpable ) ;
2005-04-17 02:20:36 +04:00
/* The maximal length of core_pattern is also specified in sysctl.c */
static struct linux_binfmt * formats ;
static DEFINE_RWLOCK ( binfmt_lock ) ;
int register_binfmt ( struct linux_binfmt * fmt )
{
struct linux_binfmt * * tmp = & formats ;
if ( ! fmt )
return - EINVAL ;
if ( fmt - > next )
return - EBUSY ;
write_lock ( & binfmt_lock ) ;
while ( * tmp ) {
if ( fmt = = * tmp ) {
write_unlock ( & binfmt_lock ) ;
return - EBUSY ;
}
tmp = & ( * tmp ) - > next ;
}
fmt - > next = formats ;
formats = fmt ;
write_unlock ( & binfmt_lock ) ;
return 0 ;
}
EXPORT_SYMBOL ( register_binfmt ) ;
int unregister_binfmt ( struct linux_binfmt * fmt )
{
struct linux_binfmt * * tmp = & formats ;
write_lock ( & binfmt_lock ) ;
while ( * tmp ) {
if ( fmt = = * tmp ) {
* tmp = fmt - > next ;
write_unlock ( & binfmt_lock ) ;
return 0 ;
}
tmp = & ( * tmp ) - > next ;
}
write_unlock ( & binfmt_lock ) ;
return - EINVAL ;
}
EXPORT_SYMBOL ( unregister_binfmt ) ;
static inline void put_binfmt ( struct linux_binfmt * fmt )
{
module_put ( fmt - > module ) ;
}
/*
* Note that a shared library must be both readable and executable due to
* security reasons .
*
* Also note that we take the address to load from from the file itself .
*/
asmlinkage long sys_uselib ( const char __user * library )
{
struct file * file ;
struct nameidata nd ;
int error ;
2006-03-25 14:07:01 +03:00
error = __user_path_lookup_open ( library , LOOKUP_FOLLOW , & nd , FMODE_READ | FMODE_EXEC ) ;
2005-04-17 02:20:36 +04:00
if ( error )
goto out ;
error = - EINVAL ;
if ( ! S_ISREG ( nd . dentry - > d_inode - > i_mode ) )
goto exit ;
2005-11-09 08:35:04 +03:00
error = vfs_permission ( & nd , MAY_READ | MAY_EXEC ) ;
2005-04-17 02:20:36 +04:00
if ( error )
goto exit ;
2005-10-19 01:20:16 +04:00
file = nameidata_to_filp ( & nd , O_RDONLY ) ;
2005-04-17 02:20:36 +04:00
error = PTR_ERR ( file ) ;
if ( IS_ERR ( file ) )
goto out ;
error = - ENOEXEC ;
if ( file - > f_op ) {
struct linux_binfmt * fmt ;
read_lock ( & binfmt_lock ) ;
for ( fmt = formats ; fmt ; fmt = fmt - > next ) {
if ( ! fmt - > load_shlib )
continue ;
if ( ! try_module_get ( fmt - > module ) )
continue ;
read_unlock ( & binfmt_lock ) ;
error = fmt - > load_shlib ( file ) ;
read_lock ( & binfmt_lock ) ;
put_binfmt ( fmt ) ;
if ( error ! = - ENOEXEC )
break ;
}
read_unlock ( & binfmt_lock ) ;
}
fput ( file ) ;
out :
return error ;
exit :
2005-10-19 01:20:16 +04:00
release_open_intent ( & nd ) ;
2005-04-17 02:20:36 +04:00
path_release ( & nd ) ;
goto out ;
}
/*
* count ( ) counts the number of strings in array ARGV .
*/
static int count ( char __user * __user * argv , int max )
{
int i = 0 ;
if ( argv ! = NULL ) {
for ( ; ; ) {
char __user * p ;
if ( get_user ( p , argv ) )
return - EFAULT ;
if ( ! p )
break ;
argv + + ;
if ( + + i > max )
return - E2BIG ;
cond_resched ( ) ;
}
}
return i ;
}
/*
* ' copy_strings ( ) ' copies argument / environment strings from user
* memory to free pages in kernel mem . These are in a format ready
* to be put directly into the top of new user memory .
*/
2005-05-06 03:16:09 +04:00
static int copy_strings ( int argc , char __user * __user * argv ,
struct linux_binprm * bprm )
2005-04-17 02:20:36 +04:00
{
struct page * kmapped_page = NULL ;
char * kaddr = NULL ;
int ret ;
while ( argc - - > 0 ) {
char __user * str ;
int len ;
unsigned long pos ;
if ( get_user ( str , argv + argc ) | |
! ( len = strnlen_user ( str , bprm - > p ) ) ) {
ret = - EFAULT ;
goto out ;
}
if ( bprm - > p < len ) {
ret = - E2BIG ;
goto out ;
}
bprm - > p - = len ;
/* XXX: add architecture specific overflow check here. */
pos = bprm - > p ;
while ( len > 0 ) {
int i , new , err ;
int offset , bytes_to_copy ;
struct page * page ;
offset = pos % PAGE_SIZE ;
i = pos / PAGE_SIZE ;
page = bprm - > page [ i ] ;
new = 0 ;
if ( ! page ) {
page = alloc_page ( GFP_HIGHUSER ) ;
bprm - > page [ i ] = page ;
if ( ! page ) {
ret = - ENOMEM ;
goto out ;
}
new = 1 ;
}
if ( page ! = kmapped_page ) {
if ( kmapped_page )
kunmap ( kmapped_page ) ;
kmapped_page = page ;
kaddr = kmap ( kmapped_page ) ;
}
if ( new & & offset )
memset ( kaddr , 0 , offset ) ;
bytes_to_copy = PAGE_SIZE - offset ;
if ( bytes_to_copy > len ) {
bytes_to_copy = len ;
if ( new )
memset ( kaddr + offset + len , 0 ,
PAGE_SIZE - offset - len ) ;
}
err = copy_from_user ( kaddr + offset , str , bytes_to_copy ) ;
if ( err ) {
ret = - EFAULT ;
goto out ;
}
pos + = bytes_to_copy ;
str + = bytes_to_copy ;
len - = bytes_to_copy ;
}
}
ret = 0 ;
out :
if ( kmapped_page )
kunmap ( kmapped_page ) ;
return ret ;
}
/*
* Like copy_strings , but get argv and its values from kernel memory .
*/
int copy_strings_kernel ( int argc , char * * argv , struct linux_binprm * bprm )
{
int r ;
mm_segment_t oldfs = get_fs ( ) ;
set_fs ( KERNEL_DS ) ;
r = copy_strings ( argc , ( char __user * __user * ) argv , bprm ) ;
set_fs ( oldfs ) ;
return r ;
}
EXPORT_SYMBOL ( copy_strings_kernel ) ;
# ifdef CONFIG_MMU
/*
* This routine is used to map in a page into an address space : needed by
* execve ( ) for the initial stack and environment pages .
*
* vma - > vm_mm - > mmap_sem is held for writing .
*/
void install_arg_page ( struct vm_area_struct * vma ,
struct page * page , unsigned long address )
{
struct mm_struct * mm = vma - > vm_mm ;
pte_t * pte ;
2005-10-30 04:16:23 +03:00
spinlock_t * ptl ;
2005-04-17 02:20:36 +04:00
if ( unlikely ( anon_vma_prepare ( vma ) ) )
2005-10-30 04:16:23 +03:00
goto out ;
2005-04-17 02:20:36 +04:00
flush_dcache_page ( page ) ;
2005-11-30 01:03:14 +03:00
pte = get_locked_pte ( mm , address , & ptl ) ;
2005-04-17 02:20:36 +04:00
if ( ! pte )
goto out ;
if ( ! pte_none ( * pte ) ) {
2005-10-30 04:16:23 +03:00
pte_unmap_unlock ( pte , ptl ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
2005-10-30 04:16:05 +03:00
inc_mm_counter ( mm , anon_rss ) ;
2005-04-17 02:20:36 +04:00
lru_cache_add_active ( page ) ;
set_pte_at ( mm , address , pte , pte_mkdirty ( pte_mkwrite ( mk_pte (
page , vma - > vm_page_prot ) ) ) ) ;
2006-01-06 11:11:12 +03:00
page_add_new_anon_rmap ( page , vma , address ) ;
2005-10-30 04:16:23 +03:00
pte_unmap_unlock ( pte , ptl ) ;
2005-04-17 02:20:36 +04:00
/* no need for flush_tlb */
return ;
out :
__free_page ( page ) ;
force_sig ( SIGKILL , current ) ;
}
# define EXTRA_STACK_VM_PAGES 20 /* random */
int setup_arg_pages ( struct linux_binprm * bprm ,
unsigned long stack_top ,
int executable_stack )
{
unsigned long stack_base ;
struct vm_area_struct * mpnt ;
struct mm_struct * mm = current - > mm ;
int i , ret ;
long arg_size ;
# ifdef CONFIG_STACK_GROWSUP
/* Move the argument and environment strings to the bottom of the
* stack space .
*/
int offset , j ;
char * to , * from ;
/* Start by shifting all the pages down */
i = 0 ;
for ( j = 0 ; j < MAX_ARG_PAGES ; j + + ) {
struct page * page = bprm - > page [ j ] ;
if ( ! page )
continue ;
bprm - > page [ i + + ] = page ;
}
/* Now move them within their pages */
offset = bprm - > p % PAGE_SIZE ;
to = kmap ( bprm - > page [ 0 ] ) ;
for ( j = 1 ; j < i ; j + + ) {
memmove ( to , to + offset , PAGE_SIZE - offset ) ;
from = kmap ( bprm - > page [ j ] ) ;
memcpy ( to + PAGE_SIZE - offset , from , offset ) ;
kunmap ( bprm - > page [ j - 1 ] ) ;
to = from ;
}
memmove ( to , to + offset , PAGE_SIZE - offset ) ;
kunmap ( bprm - > page [ j - 1 ] ) ;
/* Limit stack size to 1GB */
stack_base = current - > signal - > rlim [ RLIMIT_STACK ] . rlim_max ;
if ( stack_base > ( 1 < < 30 ) )
stack_base = 1 < < 30 ;
stack_base = PAGE_ALIGN ( stack_top - stack_base ) ;
/* Adjust bprm->p to point to the end of the strings. */
bprm - > p = stack_base + PAGE_SIZE * i - offset ;
mm - > arg_start = stack_base ;
arg_size = i < < PAGE_SHIFT ;
/* zero pages that were copied above */
while ( i < MAX_ARG_PAGES )
bprm - > page [ i + + ] = NULL ;
# else
stack_base = arch_align_stack ( stack_top - MAX_ARG_PAGES * PAGE_SIZE ) ;
stack_base = PAGE_ALIGN ( stack_base ) ;
bprm - > p + = stack_base ;
mm - > arg_start = bprm - > p ;
arg_size = stack_top - ( PAGE_MASK & ( unsigned long ) mm - > arg_start ) ;
# endif
arg_size + = EXTRA_STACK_VM_PAGES * PAGE_SIZE ;
if ( bprm - > loader )
bprm - > loader + = stack_base ;
bprm - > exec + = stack_base ;
2007-02-10 12:45:03 +03:00
mpnt = kmem_cache_zalloc ( vm_area_cachep , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! mpnt )
return - ENOMEM ;
down_write ( & mm - > mmap_sem ) ;
{
mpnt - > vm_mm = mm ;
# ifdef CONFIG_STACK_GROWSUP
mpnt - > vm_start = stack_base ;
mpnt - > vm_end = stack_base + arg_size ;
# else
mpnt - > vm_end = stack_top ;
mpnt - > vm_start = mpnt - > vm_end - arg_size ;
# endif
/* Adjust stack execute permissions; explicitly enable
* for EXSTACK_ENABLE_X , disable for EXSTACK_DISABLE_X
* and leave alone ( arch default ) otherwise . */
if ( unlikely ( executable_stack = = EXSTACK_ENABLE_X ) )
mpnt - > vm_flags = VM_STACK_FLAGS | VM_EXEC ;
else if ( executable_stack = = EXSTACK_DISABLE_X )
mpnt - > vm_flags = VM_STACK_FLAGS & ~ VM_EXEC ;
else
mpnt - > vm_flags = VM_STACK_FLAGS ;
mpnt - > vm_flags | = mm - > def_flags ;
mpnt - > vm_page_prot = protection_map [ mpnt - > vm_flags & 0x7 ] ;
if ( ( ret = insert_vm_struct ( mm , mpnt ) ) ) {
up_write ( & mm - > mmap_sem ) ;
kmem_cache_free ( vm_area_cachep , mpnt ) ;
return ret ;
}
mm - > stack_vm = mm - > total_vm = vma_pages ( mpnt ) ;
}
for ( i = 0 ; i < MAX_ARG_PAGES ; i + + ) {
struct page * page = bprm - > page [ i ] ;
if ( page ) {
bprm - > page [ i ] = NULL ;
install_arg_page ( mpnt , page , stack_base ) ;
}
stack_base + = PAGE_SIZE ;
}
up_write ( & mm - > mmap_sem ) ;
return 0 ;
}
EXPORT_SYMBOL ( setup_arg_pages ) ;
# define free_arg_pages(bprm) do { } while (0)
# else
static inline void free_arg_pages ( struct linux_binprm * bprm )
{
int i ;
for ( i = 0 ; i < MAX_ARG_PAGES ; i + + ) {
if ( bprm - > page [ i ] )
__free_page ( bprm - > page [ i ] ) ;
bprm - > page [ i ] = NULL ;
}
}
# endif /* CONFIG_MMU */
struct file * open_exec ( const char * name )
{
struct nameidata nd ;
int err ;
struct file * file ;
2006-03-25 14:07:01 +03:00
err = path_lookup_open ( AT_FDCWD , name , LOOKUP_FOLLOW , & nd , FMODE_READ | FMODE_EXEC ) ;
2005-04-17 02:20:36 +04:00
file = ERR_PTR ( err ) ;
if ( ! err ) {
struct inode * inode = nd . dentry - > d_inode ;
file = ERR_PTR ( - EACCES ) ;
if ( ! ( nd . mnt - > mnt_flags & MNT_NOEXEC ) & &
S_ISREG ( inode - > i_mode ) ) {
2005-11-09 08:35:04 +03:00
int err = vfs_permission ( & nd , MAY_EXEC ) ;
2005-04-17 02:20:36 +04:00
file = ERR_PTR ( err ) ;
if ( ! err ) {
2005-10-19 01:20:16 +04:00
file = nameidata_to_filp ( & nd , O_RDONLY ) ;
2005-04-17 02:20:36 +04:00
if ( ! IS_ERR ( file ) ) {
err = deny_write_access ( file ) ;
if ( err ) {
fput ( file ) ;
file = ERR_PTR ( err ) ;
}
}
out :
return file ;
}
}
2005-10-19 01:20:16 +04:00
release_open_intent ( & nd ) ;
2005-04-17 02:20:36 +04:00
path_release ( & nd ) ;
}
goto out ;
}
EXPORT_SYMBOL ( open_exec ) ;
int kernel_read ( struct file * file , unsigned long offset ,
char * addr , unsigned long count )
{
mm_segment_t old_fs ;
loff_t pos = offset ;
int result ;
old_fs = get_fs ( ) ;
set_fs ( get_ds ( ) ) ;
/* The cast to a user pointer is valid due to the set_fs() */
result = vfs_read ( file , ( void __user * ) addr , count , & pos ) ;
set_fs ( old_fs ) ;
return result ;
}
EXPORT_SYMBOL ( kernel_read ) ;
static int exec_mmap ( struct mm_struct * mm )
{
struct task_struct * tsk ;
struct mm_struct * old_mm , * active_mm ;
/* Notify parent that we're no longer interested in the old VM */
tsk = current ;
old_mm = current - > mm ;
mm_release ( tsk , old_mm ) ;
if ( old_mm ) {
/*
* Make sure that if there is a core dump in progress
* for the old mm , we get out and die instead of going
* through with the exec . We must hold mmap_sem around
* checking core_waiters and changing tsk - > mm . The
* core - inducing thread will increment core_waiters for
* each thread whose - > mm = = old_mm .
*/
down_read ( & old_mm - > mmap_sem ) ;
if ( unlikely ( old_mm - > core_waiters ) ) {
up_read ( & old_mm - > mmap_sem ) ;
return - EINTR ;
}
}
task_lock ( tsk ) ;
active_mm = tsk - > active_mm ;
tsk - > mm = mm ;
tsk - > active_mm = mm ;
activate_mm ( active_mm , mm ) ;
task_unlock ( tsk ) ;
arch_pick_mmap_layout ( mm ) ;
if ( old_mm ) {
up_read ( & old_mm - > mmap_sem ) ;
2006-04-01 03:13:38 +04:00
BUG_ON ( active_mm ! = old_mm ) ;
2005-04-17 02:20:36 +04:00
mmput ( old_mm ) ;
return 0 ;
}
mmdrop ( active_mm ) ;
return 0 ;
}
/*
* This function makes sure the current process has its own signal table ,
* so that flush_signal_handlers can later reset the handlers without
* disturbing other processes . ( Other processes might share the signal
* table via the CLONE_SIGHAND option to clone ( ) . )
*/
2006-01-15 00:20:43 +03:00
static int de_thread ( struct task_struct * tsk )
2005-04-17 02:20:36 +04:00
{
struct signal_struct * sig = tsk - > signal ;
struct sighand_struct * newsighand , * oldsighand = tsk - > sighand ;
spinlock_t * lock = & oldsighand - > siglock ;
2005-11-07 21:12:43 +03:00
struct task_struct * leader = NULL ;
2005-04-17 02:20:36 +04:00
int count ;
/*
* If we don ' t share sighandlers , then we aren ' t sharing anything
* and we can just re - use it all .
*/
if ( atomic_read ( & oldsighand - > count ) < = 1 ) {
BUG_ON ( atomic_read ( & sig - > count ) ! = 1 ) ;
exit_itimers ( sig ) ;
return 0 ;
}
newsighand = kmem_cache_alloc ( sighand_cachep , GFP_KERNEL ) ;
if ( ! newsighand )
return - ENOMEM ;
2006-09-27 12:51:13 +04:00
if ( thread_group_empty ( tsk ) )
2005-04-17 02:20:36 +04:00
goto no_thread_group ;
/*
* Kill all other threads in the thread group .
* We must hold tasklist_lock to call zap_other_threads .
*/
read_lock ( & tasklist_lock ) ;
spin_lock_irq ( lock ) ;
if ( sig - > flags & SIGNAL_GROUP_EXIT ) {
/*
* Another group action in progress , just
* return so that the signal is processed .
*/
spin_unlock_irq ( lock ) ;
read_unlock ( & tasklist_lock ) ;
kmem_cache_free ( sighand_cachep , newsighand ) ;
return - EAGAIN ;
}
2006-03-29 04:10:59 +04:00
/*
* child_reaper ignores SIGKILL , change it now .
* Reparenting needs write_lock on tasklist_lock ,
* so it is safe to do it under read_lock .
*/
2006-12-08 13:38:01 +03:00
if ( unlikely ( tsk - > group_leader = = child_reaper ( tsk ) ) )
tsk - > nsproxy - > pid_ns - > child_reaper = tsk ;
2006-03-29 04:10:59 +04:00
2006-09-27 12:51:13 +04:00
zap_other_threads ( tsk ) ;
2005-04-17 02:20:36 +04:00
read_unlock ( & tasklist_lock ) ;
/*
* Account for the thread group leader hanging around :
*/
2005-10-31 02:01:37 +03:00
count = 1 ;
2006-09-27 12:51:13 +04:00
if ( ! thread_group_leader ( tsk ) ) {
2005-10-31 02:01:37 +03:00
count = 2 ;
2005-07-13 00:58:27 +04:00
/*
* The SIGALRM timer survives the exec , but needs to point
* at us as the new group leader now . We have a race with
* a timer firing now getting the old leader , so we need to
* synchronize with any firing ( by calling del_timer_sync )
* before we can safely let the old group leader die .
*/
2006-09-27 12:51:13 +04:00
sig - > tsk = tsk ;
2005-10-31 02:02:17 +03:00
spin_unlock_irq ( lock ) ;
2006-01-10 07:52:34 +03:00
if ( hrtimer_cancel ( & sig - > real_timer ) )
hrtimer_restart ( & sig - > real_timer ) ;
2005-10-31 02:02:17 +03:00
spin_lock_irq ( lock ) ;
2005-07-13 00:58:27 +04:00
}
2005-04-17 02:20:36 +04:00
while ( atomic_read ( & sig - > count ) > count ) {
2006-09-27 12:51:13 +04:00
sig - > group_exit_task = tsk ;
2005-04-17 02:20:36 +04:00
sig - > notify_count = count ;
__set_current_state ( TASK_UNINTERRUPTIBLE ) ;
spin_unlock_irq ( lock ) ;
schedule ( ) ;
spin_lock_irq ( lock ) ;
}
sig - > group_exit_task = NULL ;
sig - > notify_count = 0 ;
spin_unlock_irq ( lock ) ;
/*
* At this point all other threads have exited , all we have to
* do is to wait for the thread group leader to become inactive ,
* and to assume its PID :
*/
2006-09-27 12:51:13 +04:00
if ( ! thread_group_leader ( tsk ) ) {
2005-04-17 02:20:36 +04:00
/*
* Wait for the thread group leader to be a zombie .
* It should already be zombie at this point , most
* of the time .
*/
2006-09-27 12:51:13 +04:00
leader = tsk - > group_leader ;
2005-04-17 02:20:36 +04:00
while ( leader - > exit_state ! = EXIT_ZOMBIE )
yield ( ) ;
2006-04-11 09:54:16 +04:00
/*
* The only record we have of the real - time age of a
* process , regardless of execs it ' s done , is start_time .
* All the past CPU time is accumulated in signal_struct
* from sister threads now dead . But in this non - leader
* exec , nothing survives from the original leader thread ,
* whose birth marks the true age of this process now .
* When we take on its identity by switching to its PID , we
* also take its birthdate ( always earlier than our own ) .
*/
2006-09-27 12:51:13 +04:00
tsk - > start_time = leader - > start_time ;
2006-04-11 09:54:16 +04:00
2005-04-17 02:20:36 +04:00
write_lock_irq ( & tasklist_lock ) ;
2006-09-27 12:51:13 +04:00
BUG_ON ( leader - > tgid ! = tsk - > tgid ) ;
BUG_ON ( tsk - > pid = = tsk - > tgid ) ;
2005-04-17 02:20:36 +04:00
/*
* An exec ( ) starts a new thread group with the
* TGID of the previous thread group . Rehash the
* two threads with a switched PID , and release
* the former thread group leader :
*/
2006-03-29 04:11:03 +04:00
/* Become a process group leader with the old leader's pid.
2006-09-27 12:51:06 +04:00
* The old leader becomes a thread of the this thread group .
* Note : The old leader also uses this pid until release_task
2006-03-29 04:11:03 +04:00
* is called . Odd but simple and correct .
*/
2006-09-27 12:51:13 +04:00
detach_pid ( tsk , PIDTYPE_PID ) ;
tsk - > pid = leader - > pid ;
attach_pid ( tsk , PIDTYPE_PID , tsk - > pid ) ;
transfer_pid ( leader , tsk , PIDTYPE_PGID ) ;
transfer_pid ( leader , tsk , PIDTYPE_SID ) ;
list_replace_rcu ( & leader - > tasks , & tsk - > tasks ) ;
2005-04-17 02:20:36 +04:00
2006-09-27 12:51:13 +04:00
tsk - > group_leader = tsk ;
leader - > group_leader = tsk ;
2006-04-11 03:16:49 +04:00
2006-09-27 12:51:13 +04:00
tsk - > exit_signal = SIGCHLD ;
2005-11-24 00:37:43 +03:00
BUG_ON ( leader - > exit_state ! = EXIT_ZOMBIE ) ;
leader - > exit_state = EXIT_DEAD ;
2005-04-17 02:20:36 +04:00
write_unlock_irq ( & tasklist_lock ) ;
}
/*
2005-09-14 20:54:06 +04:00
* There may be one thread left which is just exiting ,
* but it ' s safe to stop telling the group to kill themselves .
2005-04-17 02:20:36 +04:00
*/
sig - > flags = 0 ;
no_thread_group :
exit_itimers ( sig ) ;
2005-11-07 21:12:43 +03:00
if ( leader )
release_task ( leader ) ;
BUG_ON ( atomic_read ( & sig - > count ) ! = 1 ) ;
2005-04-17 02:20:36 +04:00
if ( atomic_read ( & oldsighand - > count ) = = 1 ) {
/*
* Now that we nuked the rest of the thread group ,
* it turns out we are not sharing sighand any more either .
* So we can just keep it .
*/
kmem_cache_free ( sighand_cachep , newsighand ) ;
} else {
/*
* Move our state over to newsighand and switch it in .
*/
atomic_set ( & newsighand - > count , 1 ) ;
memcpy ( newsighand - > action , oldsighand - > action ,
sizeof ( newsighand - > action ) ) ;
write_lock_irq ( & tasklist_lock ) ;
spin_lock ( & oldsighand - > siglock ) ;
2006-08-27 12:23:57 +04:00
spin_lock_nested ( & newsighand - > siglock , SINGLE_DEPTH_NESTING ) ;
2005-04-17 02:20:36 +04:00
2006-09-27 12:51:13 +04:00
rcu_assign_pointer ( tsk - > sighand , newsighand ) ;
2005-04-17 02:20:36 +04:00
recalc_sigpending ( ) ;
spin_unlock ( & newsighand - > siglock ) ;
spin_unlock ( & oldsighand - > siglock ) ;
write_unlock_irq ( & tasklist_lock ) ;
if ( atomic_dec_and_test ( & oldsighand - > count ) )
2006-03-29 04:11:12 +04:00
kmem_cache_free ( sighand_cachep , oldsighand ) ;
2005-04-17 02:20:36 +04:00
}
2006-09-27 12:51:13 +04:00
BUG_ON ( ! thread_group_leader ( tsk ) ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
/*
* These functions flushes out all traces of the currently running executable
* so that a new one can be started
*/
2006-01-15 00:20:43 +03:00
static void flush_old_files ( struct files_struct * files )
2005-04-17 02:20:36 +04:00
{
long j = - 1 ;
2005-09-10 00:04:10 +04:00
struct fdtable * fdt ;
2005-04-17 02:20:36 +04:00
spin_lock ( & files - > file_lock ) ;
for ( ; ; ) {
unsigned long set , i ;
j + + ;
i = j * __NFDBITS ;
2005-09-10 00:04:10 +04:00
fdt = files_fdtable ( files ) ;
2006-12-10 13:21:12 +03:00
if ( i > = fdt - > max_fds )
2005-04-17 02:20:36 +04:00
break ;
2005-09-10 00:04:10 +04:00
set = fdt - > close_on_exec - > fds_bits [ j ] ;
2005-04-17 02:20:36 +04:00
if ( ! set )
continue ;
2005-09-10 00:04:10 +04:00
fdt - > close_on_exec - > fds_bits [ j ] = 0 ;
2005-04-17 02:20:36 +04:00
spin_unlock ( & files - > file_lock ) ;
for ( ; set ; i + + , set > > = 1 ) {
if ( set & 1 ) {
sys_close ( i ) ;
}
}
spin_lock ( & files - > file_lock ) ;
}
spin_unlock ( & files - > file_lock ) ;
}
void get_task_comm ( char * buf , struct task_struct * tsk )
{
/* buf must be at least sizeof(tsk->comm) in size */
task_lock ( tsk ) ;
strncpy ( buf , tsk - > comm , sizeof ( tsk - > comm ) ) ;
task_unlock ( tsk ) ;
}
void set_task_comm ( struct task_struct * tsk , char * buf )
{
task_lock ( tsk ) ;
strlcpy ( tsk - > comm , buf , sizeof ( tsk - > comm ) ) ;
task_unlock ( tsk ) ;
}
int flush_old_exec ( struct linux_binprm * bprm )
{
char * name ;
int i , ch , retval ;
struct files_struct * files ;
char tcomm [ sizeof ( current - > comm ) ] ;
/*
* Make sure we have a private signal table and that
* we are unassociated from the previous thread group .
*/
retval = de_thread ( current ) ;
if ( retval )
goto out ;
/*
* Make sure we have private file handles . Ask the
* fork helper to do the work for us and the exit
* helper to do the cleanup of the old one .
*/
files = current - > files ; /* refcounted so safe to hold */
retval = unshare_files ( ) ;
if ( retval )
goto out ;
/*
* Release all of the old mmap stuff
*/
retval = exec_mmap ( bprm - > mm ) ;
if ( retval )
goto mmap_failed ;
bprm - > mm = NULL ; /* We're using it now */
/* This is the point of no return */
put_files_struct ( files ) ;
current - > sas_ss_sp = current - > sas_ss_size = 0 ;
if ( current - > euid = = current - > uid & & current - > egid = = current - > gid )
current - > mm - > dumpable = 1 ;
2005-06-23 11:09:43 +04:00
else
current - > mm - > dumpable = suid_dumpable ;
2005-04-17 02:20:36 +04:00
name = bprm - > filename ;
2005-05-06 03:16:12 +04:00
/* Copies the binary name from after last slash */
2005-04-17 02:20:36 +04:00
for ( i = 0 ; ( ch = * ( name + + ) ) ! = ' \0 ' ; ) {
if ( ch = = ' / ' )
2005-05-06 03:16:12 +04:00
i = 0 ; /* overwrite what we wrote */
2005-04-17 02:20:36 +04:00
else
if ( i < ( sizeof ( tcomm ) - 1 ) )
tcomm [ i + + ] = ch ;
}
tcomm [ i ] = ' \0 ' ;
set_task_comm ( current , tcomm ) ;
current - > flags & = ~ PF_RANDOMIZE ;
flush_thread ( ) ;
2006-03-01 03:59:19 +03:00
/* Set the new mm task size. We have to do that late because it may
* depend on TIF_32BIT which is only updated in flush_thread ( ) on
* some architectures like powerpc
*/
current - > mm - > task_size = TASK_SIZE ;
2005-04-17 02:20:36 +04:00
if ( bprm - > e_uid ! = current - > euid | | bprm - > e_gid ! = current - > egid | |
2005-11-09 08:35:04 +03:00
file_permission ( bprm - > file , MAY_READ ) | |
2005-04-17 02:20:36 +04:00
( bprm - > interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ) ) {
suid_keys ( current ) ;
2005-06-23 11:09:43 +04:00
current - > mm - > dumpable = suid_dumpable ;
2005-04-17 02:20:36 +04:00
}
/* An exec changes our domain. We are no longer part of the thread
group */
current - > self_exec_id + + ;
flush_signal_handlers ( current , 0 ) ;
flush_old_files ( current - > files ) ;
return 0 ;
mmap_failed :
2006-09-29 13:00:05 +04:00
reset_files_struct ( current , files ) ;
2005-04-17 02:20:36 +04:00
out :
return retval ;
}
EXPORT_SYMBOL ( flush_old_exec ) ;
/*
* Fill the binprm structure from the inode .
* Check permissions , then read the first 128 ( BINPRM_BUF_SIZE ) bytes
*/
int prepare_binprm ( struct linux_binprm * bprm )
{
int mode ;
2006-12-08 13:36:35 +03:00
struct inode * inode = bprm - > file - > f_path . dentry - > d_inode ;
2005-04-17 02:20:36 +04:00
int retval ;
mode = inode - > i_mode ;
if ( bprm - > file - > f_op = = NULL )
return - EACCES ;
bprm - > e_uid = current - > euid ;
bprm - > e_gid = current - > egid ;
2006-12-08 13:36:35 +03:00
if ( ! ( bprm - > file - > f_path . mnt - > mnt_flags & MNT_NOSUID ) ) {
2005-04-17 02:20:36 +04:00
/* Set-uid? */
if ( mode & S_ISUID ) {
current - > personality & = ~ PER_CLEAR_ON_SETID ;
bprm - > e_uid = inode - > i_uid ;
}
/* Set-gid? */
/*
* If setgid is set but no group execute bit then this
* is a candidate for mandatory locking , not a setgid
* executable .
*/
if ( ( mode & ( S_ISGID | S_IXGRP ) ) = = ( S_ISGID | S_IXGRP ) ) {
current - > personality & = ~ PER_CLEAR_ON_SETID ;
bprm - > e_gid = inode - > i_gid ;
}
}
/* fill in binprm security blob */
retval = security_bprm_set ( bprm ) ;
if ( retval )
return retval ;
memset ( bprm - > buf , 0 , BINPRM_BUF_SIZE ) ;
return kernel_read ( bprm - > file , 0 , bprm - > buf , BINPRM_BUF_SIZE ) ;
}
EXPORT_SYMBOL ( prepare_binprm ) ;
2006-01-15 00:20:43 +03:00
static int unsafe_exec ( struct task_struct * p )
2005-04-17 02:20:36 +04:00
{
int unsafe = 0 ;
if ( p - > ptrace & PT_PTRACED ) {
if ( p - > ptrace & PT_PTRACE_CAP )
unsafe | = LSM_UNSAFE_PTRACE_CAP ;
else
unsafe | = LSM_UNSAFE_PTRACE ;
}
if ( atomic_read ( & p - > fs - > count ) > 1 | |
atomic_read ( & p - > files - > count ) > 1 | |
atomic_read ( & p - > sighand - > count ) > 1 )
unsafe | = LSM_UNSAFE_SHARE ;
return unsafe ;
}
void compute_creds ( struct linux_binprm * bprm )
{
int unsafe ;
if ( bprm - > e_uid ! = current - > uid )
suid_keys ( current ) ;
exec_keys ( current ) ;
task_lock ( current ) ;
unsafe = unsafe_exec ( current ) ;
security_bprm_apply_creds ( bprm , unsafe ) ;
task_unlock ( current ) ;
security_bprm_post_apply_creds ( bprm ) ;
}
EXPORT_SYMBOL ( compute_creds ) ;
2007-05-08 11:25:16 +04:00
/*
* Arguments are ' \0 ' separated strings found at the location bprm - > p
* points to ; chop off the first by relocating brpm - > p to right after
* the first ' \0 ' encountered .
*/
2005-04-17 02:20:36 +04:00
void remove_arg_zero ( struct linux_binprm * bprm )
{
if ( bprm - > argc ) {
2007-05-08 11:25:16 +04:00
char ch ;
do {
unsigned long offset ;
unsigned long index ;
char * kaddr ;
struct page * page ;
2005-04-17 02:20:36 +04:00
2007-05-08 11:25:16 +04:00
offset = bprm - > p & ~ PAGE_MASK ;
index = bprm - > p > > PAGE_SHIFT ;
2005-04-17 02:20:36 +04:00
2007-05-08 11:25:16 +04:00
page = bprm - > page [ index ] ;
2005-04-17 02:20:36 +04:00
kaddr = kmap_atomic ( page , KM_USER0 ) ;
2007-05-08 11:25:16 +04:00
/* run through page until we reach end or find NUL */
do {
ch = * ( kaddr + offset ) ;
/* discard that character... */
bprm - > p + + ;
offset + + ;
} while ( offset < PAGE_SIZE & & ch ! = ' \0 ' ) ;
kunmap_atomic ( kaddr , KM_USER0 ) ;
/* free the old page */
if ( offset = = PAGE_SIZE ) {
__free_page ( page ) ;
bprm - > page [ index ] = NULL ;
}
} while ( ch ! = ' \0 ' ) ;
2005-04-17 02:20:36 +04:00
bprm - > argc - - ;
}
}
EXPORT_SYMBOL ( remove_arg_zero ) ;
/*
* cycle the list of binary formats handler , until one recognizes the image
*/
int search_binary_handler ( struct linux_binprm * bprm , struct pt_regs * regs )
{
int try , retval ;
struct linux_binfmt * fmt ;
# ifdef __alpha__
/* handle /sbin/loader.. */
{
struct exec * eh = ( struct exec * ) bprm - > buf ;
if ( ! bprm - > loader & & eh - > fh . f_magic = = 0x183 & &
( eh - > fh . f_flags & 0x3000 ) = = 0x3000 )
{
struct file * file ;
unsigned long loader ;
allow_write_access ( bprm - > file ) ;
fput ( bprm - > file ) ;
bprm - > file = NULL ;
loader = PAGE_SIZE * MAX_ARG_PAGES - sizeof ( void * ) ;
file = open_exec ( " /sbin/loader " ) ;
retval = PTR_ERR ( file ) ;
if ( IS_ERR ( file ) )
return retval ;
/* Remember if the application is TASO. */
bprm - > sh_bang = eh - > ah . entry < 0x100000000UL ;
bprm - > file = file ;
bprm - > loader = loader ;
retval = prepare_binprm ( bprm ) ;
if ( retval < 0 )
return retval ;
/* should call search_binary_handler recursively here,
but it does not matter */
}
}
# endif
retval = security_bprm_check ( bprm ) ;
if ( retval )
return retval ;
/* kernel module loader fixup */
/* so we don't try to load run modprobe in kernel space. */
set_fs ( USER_DS ) ;
2006-04-26 22:04:08 +04:00
retval = audit_bprm ( bprm ) ;
if ( retval )
return retval ;
2005-04-17 02:20:36 +04:00
retval = - ENOENT ;
for ( try = 0 ; try < 2 ; try + + ) {
read_lock ( & binfmt_lock ) ;
for ( fmt = formats ; fmt ; fmt = fmt - > next ) {
int ( * fn ) ( struct linux_binprm * , struct pt_regs * ) = fmt - > load_binary ;
if ( ! fn )
continue ;
if ( ! try_module_get ( fmt - > module ) )
continue ;
read_unlock ( & binfmt_lock ) ;
retval = fn ( bprm , regs ) ;
if ( retval > = 0 ) {
put_binfmt ( fmt ) ;
allow_write_access ( bprm - > file ) ;
if ( bprm - > file )
fput ( bprm - > file ) ;
bprm - > file = NULL ;
current - > did_exec = 1 ;
2005-11-07 11:59:16 +03:00
proc_exec_connector ( current ) ;
2005-04-17 02:20:36 +04:00
return retval ;
}
read_lock ( & binfmt_lock ) ;
put_binfmt ( fmt ) ;
if ( retval ! = - ENOEXEC | | bprm - > mm = = NULL )
break ;
if ( ! bprm - > file ) {
read_unlock ( & binfmt_lock ) ;
return retval ;
}
}
read_unlock ( & binfmt_lock ) ;
if ( retval ! = - ENOEXEC | | bprm - > mm = = NULL ) {
break ;
# ifdef CONFIG_KMOD
} else {
# define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
if ( printable ( bprm - > buf [ 0 ] ) & &
printable ( bprm - > buf [ 1 ] ) & &
printable ( bprm - > buf [ 2 ] ) & &
printable ( bprm - > buf [ 3 ] ) )
break ; /* -ENOEXEC */
request_module ( " binfmt-%04x " , * ( unsigned short * ) ( & bprm - > buf [ 2 ] ) ) ;
# endif
}
}
return retval ;
}
EXPORT_SYMBOL ( search_binary_handler ) ;
/*
* sys_execve ( ) executes a new program .
*/
int do_execve ( char * filename ,
char __user * __user * argv ,
char __user * __user * envp ,
struct pt_regs * regs )
{
struct linux_binprm * bprm ;
struct file * file ;
int retval ;
int i ;
retval = - ENOMEM ;
2006-03-25 14:08:13 +03:00
bprm = kzalloc ( sizeof ( * bprm ) , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! bprm )
goto out_ret ;
file = open_exec ( filename ) ;
retval = PTR_ERR ( file ) ;
if ( IS_ERR ( file ) )
goto out_kfree ;
sched_exec ( ) ;
bprm - > p = PAGE_SIZE * MAX_ARG_PAGES - sizeof ( void * ) ;
bprm - > file = file ;
bprm - > filename = filename ;
bprm - > interp = filename ;
bprm - > mm = mm_alloc ( ) ;
retval = - ENOMEM ;
if ( ! bprm - > mm )
goto out_file ;
retval = init_new_context ( current , bprm - > mm ) ;
if ( retval < 0 )
goto out_mm ;
bprm - > argc = count ( argv , bprm - > p / sizeof ( void * ) ) ;
if ( ( retval = bprm - > argc ) < 0 )
goto out_mm ;
bprm - > envc = count ( envp , bprm - > p / sizeof ( void * ) ) ;
if ( ( retval = bprm - > envc ) < 0 )
goto out_mm ;
retval = security_bprm_alloc ( bprm ) ;
if ( retval )
goto out ;
retval = prepare_binprm ( bprm ) ;
if ( retval < 0 )
goto out ;
retval = copy_strings_kernel ( 1 , & bprm - > filename , bprm ) ;
if ( retval < 0 )
goto out ;
bprm - > exec = bprm - > p ;
retval = copy_strings ( bprm - > envc , envp , bprm ) ;
if ( retval < 0 )
goto out ;
retval = copy_strings ( bprm - > argc , argv , bprm ) ;
if ( retval < 0 )
goto out ;
retval = search_binary_handler ( bprm , regs ) ;
if ( retval > = 0 ) {
free_arg_pages ( bprm ) ;
/* execve success */
security_bprm_free ( bprm ) ;
acct_update_integrals ( current ) ;
kfree ( bprm ) ;
return retval ;
}
out :
/* Something went wrong, return the inode and free the argument pages*/
for ( i = 0 ; i < MAX_ARG_PAGES ; i + + ) {
struct page * page = bprm - > page [ i ] ;
if ( page )
__free_page ( page ) ;
}
if ( bprm - > security )
security_bprm_free ( bprm ) ;
out_mm :
if ( bprm - > mm )
mmdrop ( bprm - > mm ) ;
out_file :
if ( bprm - > file ) {
allow_write_access ( bprm - > file ) ;
fput ( bprm - > file ) ;
}
out_kfree :
kfree ( bprm ) ;
out_ret :
return retval ;
}
int set_binfmt ( struct linux_binfmt * new )
{
struct linux_binfmt * old = current - > binfmt ;
if ( new ) {
if ( ! try_module_get ( new - > module ) )
return - 1 ;
}
current - > binfmt = new ;
if ( old )
module_put ( old - > module ) ;
return 0 ;
}
EXPORT_SYMBOL ( set_binfmt ) ;
# define CORENAME_MAX_SIZE 64
/* format_corename will inspect the pattern parameter, and output a
* name into corename , which must have space for at least
* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator .
*/
2007-04-17 09:53:13 +04:00
static int format_corename ( char * corename , const char * pattern , long signr )
2005-04-17 02:20:36 +04:00
{
const char * pat_ptr = pattern ;
char * out_ptr = corename ;
char * const out_end = corename + CORENAME_MAX_SIZE ;
int rc ;
int pid_in_pattern = 0 ;
2007-04-17 09:53:13 +04:00
int ispipe = 0 ;
if ( * pattern = = ' | ' )
ispipe = 1 ;
2005-04-17 02:20:36 +04:00
/* Repeat as long as we have more pattern to process and more output
space */
while ( * pat_ptr ) {
if ( * pat_ptr ! = ' % ' ) {
if ( out_ptr = = out_end )
goto out ;
* out_ptr + + = * pat_ptr + + ;
} else {
switch ( * + + pat_ptr ) {
case 0 :
goto out ;
/* Double percent, output one percent */
case ' % ' :
if ( out_ptr = = out_end )
goto out ;
* out_ptr + + = ' % ' ;
break ;
/* pid */
case ' p ' :
pid_in_pattern = 1 ;
rc = snprintf ( out_ptr , out_end - out_ptr ,
" %d " , current - > tgid ) ;
if ( rc > out_end - out_ptr )
goto out ;
out_ptr + = rc ;
break ;
/* uid */
case ' u ' :
rc = snprintf ( out_ptr , out_end - out_ptr ,
" %d " , current - > uid ) ;
if ( rc > out_end - out_ptr )
goto out ;
out_ptr + = rc ;
break ;
/* gid */
case ' g ' :
rc = snprintf ( out_ptr , out_end - out_ptr ,
" %d " , current - > gid ) ;
if ( rc > out_end - out_ptr )
goto out ;
out_ptr + = rc ;
break ;
/* signal that caused the coredump */
case ' s ' :
rc = snprintf ( out_ptr , out_end - out_ptr ,
" %ld " , signr ) ;
if ( rc > out_end - out_ptr )
goto out ;
out_ptr + = rc ;
break ;
/* UNIX time of coredump */
case ' t ' : {
struct timeval tv ;
do_gettimeofday ( & tv ) ;
rc = snprintf ( out_ptr , out_end - out_ptr ,
" %lu " , tv . tv_sec ) ;
if ( rc > out_end - out_ptr )
goto out ;
out_ptr + = rc ;
break ;
}
/* hostname */
case ' h ' :
down_read ( & uts_sem ) ;
rc = snprintf ( out_ptr , out_end - out_ptr ,
2006-10-02 13:18:11 +04:00
" %s " , utsname ( ) - > nodename ) ;
2005-04-17 02:20:36 +04:00
up_read ( & uts_sem ) ;
if ( rc > out_end - out_ptr )
goto out ;
out_ptr + = rc ;
break ;
/* executable */
case ' e ' :
rc = snprintf ( out_ptr , out_end - out_ptr ,
" %s " , current - > comm ) ;
if ( rc > out_end - out_ptr )
goto out ;
out_ptr + = rc ;
break ;
default :
break ;
}
+ + pat_ptr ;
}
}
/* Backward compatibility with core_uses_pid:
*
* If core_pattern does not include a % p ( as is the default )
* and core_uses_pid is set , then . % pid will be appended to
2007-04-17 09:53:13 +04:00
* the filename . Do not do this for piped commands . */
if ( ! ispipe & & ! pid_in_pattern
2005-04-17 02:20:36 +04:00
& & ( core_uses_pid | | atomic_read ( & current - > mm - > mm_users ) ! = 1 ) ) {
rc = snprintf ( out_ptr , out_end - out_ptr ,
" .%d " , current - > tgid ) ;
if ( rc > out_end - out_ptr )
goto out ;
out_ptr + = rc ;
}
2007-04-17 09:53:13 +04:00
out :
2005-04-17 02:20:36 +04:00
* out_ptr = 0 ;
2007-04-17 09:53:13 +04:00
return ispipe ;
2005-04-17 02:20:36 +04:00
}
2006-06-26 11:26:07 +04:00
static void zap_process ( struct task_struct * start )
2006-06-26 11:26:05 +04:00
{
struct task_struct * t ;
2006-06-26 11:26:06 +04:00
2006-06-26 11:26:07 +04:00
start - > signal - > flags = SIGNAL_GROUP_EXIT ;
start - > signal - > group_stop_count = 0 ;
2006-06-26 11:26:05 +04:00
t = start ;
do {
if ( t ! = current & & t - > mm ) {
t - > mm - > core_waiters + + ;
2006-06-26 11:26:06 +04:00
sigaddset ( & t - > pending . signal , SIGKILL ) ;
signal_wake_up ( t , 1 ) ;
2006-06-26 11:26:05 +04:00
}
} while ( ( t = next_thread ( t ) ) ! = start ) ;
}
2006-06-26 11:26:08 +04:00
static inline int zap_threads ( struct task_struct * tsk , struct mm_struct * mm ,
int exit_code )
2005-04-17 02:20:36 +04:00
{
struct task_struct * g , * p ;
2006-06-26 11:26:09 +04:00
unsigned long flags ;
2006-06-26 11:26:08 +04:00
int err = - EAGAIN ;
spin_lock_irq ( & tsk - > sighand - > siglock ) ;
if ( ! ( tsk - > signal - > flags & SIGNAL_GROUP_EXIT ) ) {
tsk - > signal - > group_exit_code = exit_code ;
2006-06-26 11:26:09 +04:00
zap_process ( tsk ) ;
2006-06-26 11:26:08 +04:00
err = 0 ;
2005-04-17 02:20:36 +04:00
}
2006-06-26 11:26:08 +04:00
spin_unlock_irq ( & tsk - > sighand - > siglock ) ;
if ( err )
return err ;
2005-04-17 02:20:36 +04:00
2006-06-26 11:26:09 +04:00
if ( atomic_read ( & mm - > mm_users ) = = mm - > core_waiters + 1 )
goto done ;
2006-06-26 11:26:08 +04:00
rcu_read_lock ( ) ;
2006-06-26 11:26:05 +04:00
for_each_process ( g ) {
2006-06-26 11:26:09 +04:00
if ( g = = tsk - > group_leader )
continue ;
2006-06-26 11:26:05 +04:00
p = g ;
do {
if ( p - > mm ) {
2006-06-26 11:26:09 +04:00
if ( p - > mm = = mm ) {
/*
* p - > sighand can ' t disappear , but
* may be changed by de_thread ( )
*/
lock_task_sighand ( p , & flags ) ;
2006-06-26 11:26:07 +04:00
zap_process ( p ) ;
2006-06-26 11:26:09 +04:00
unlock_task_sighand ( p , & flags ) ;
}
2006-06-26 11:26:05 +04:00
break ;
}
} while ( ( p = next_thread ( p ) ) ! = g ) ;
}
2006-06-26 11:26:08 +04:00
rcu_read_unlock ( ) ;
2006-06-26 11:26:09 +04:00
done :
2006-06-26 11:26:08 +04:00
return mm - > core_waiters ;
2005-04-17 02:20:36 +04:00
}
2006-06-26 11:26:08 +04:00
static int coredump_wait ( int exit_code )
2005-04-17 02:20:36 +04:00
{
2006-06-26 11:26:08 +04:00
struct task_struct * tsk = current ;
struct mm_struct * mm = tsk - > mm ;
struct completion startup_done ;
struct completion * vfork_done ;
2005-10-31 02:02:47 +03:00
int core_waiters ;
2005-04-17 02:20:36 +04:00
2006-06-26 11:26:08 +04:00
init_completion ( & mm - > core_done ) ;
init_completion ( & startup_done ) ;
2005-04-17 02:20:36 +04:00
mm - > core_startup_done = & startup_done ;
2006-06-26 11:26:08 +04:00
core_waiters = zap_threads ( tsk , mm , exit_code ) ;
2005-10-31 02:02:47 +03:00
up_write ( & mm - > mmap_sem ) ;
2006-06-26 11:26:08 +04:00
if ( unlikely ( core_waiters < 0 ) )
goto fail ;
/*
* Make sure nobody is waiting for us to release the VM ,
* otherwise we can deadlock when we wait on each other
*/
vfork_done = tsk - > vfork_done ;
if ( vfork_done ) {
tsk - > vfork_done = NULL ;
complete ( vfork_done ) ;
}
2005-10-31 02:02:47 +03:00
if ( core_waiters )
2005-04-17 02:20:36 +04:00
wait_for_completion ( & startup_done ) ;
2006-06-26 11:26:08 +04:00
fail :
2005-04-17 02:20:36 +04:00
BUG_ON ( mm - > core_waiters ) ;
2006-06-26 11:26:08 +04:00
return core_waiters ;
2005-04-17 02:20:36 +04:00
}
int do_coredump ( long signr , int exit_code , struct pt_regs * regs )
{
char corename [ CORENAME_MAX_SIZE + 1 ] ;
struct mm_struct * mm = current - > mm ;
struct linux_binfmt * binfmt ;
struct inode * inode ;
struct file * file ;
int retval = 0 ;
2005-06-23 11:09:43 +04:00
int fsuid = current - > fsuid ;
int flag = 0 ;
2006-10-01 10:29:28 +04:00
int ispipe = 0 ;
2005-04-17 02:20:36 +04:00
binfmt = current - > binfmt ;
if ( ! binfmt | | ! binfmt - > core_dump )
goto fail ;
down_write ( & mm - > mmap_sem ) ;
if ( ! mm - > dumpable ) {
up_write ( & mm - > mmap_sem ) ;
goto fail ;
}
2005-06-23 11:09:43 +04:00
/*
* We cannot trust fsuid as being the " true " uid of the
* process nor do we know its entire history . We only know it
* was tainted so we dump it as root in mode 2.
*/
if ( mm - > dumpable = = 2 ) { /* Setuid core dump mode */
flag = O_EXCL ; /* Stop rewrite attacks */
current - > fsuid = 0 ; /* Dump root private */
}
2005-04-17 02:20:36 +04:00
mm - > dumpable = 0 ;
2005-10-31 02:02:54 +03:00
2006-06-26 11:26:08 +04:00
retval = coredump_wait ( exit_code ) ;
if ( retval < 0 )
2005-10-31 02:02:54 +03:00
goto fail ;
2005-04-17 02:20:36 +04:00
/*
* Clear any false indication of pending signals that might
* be seen by the filesystem code called to write the core file .
*/
clear_thread_flag ( TIF_SIGPENDING ) ;
if ( current - > signal - > rlim [ RLIMIT_CORE ] . rlim_cur < binfmt - > min_coredump )
goto fail_unlock ;
/*
* lock_kernel ( ) because format_corename ( ) is controlled by sysctl , which
* uses lock_kernel ( )
*/
lock_kernel ( ) ;
2007-04-17 09:53:13 +04:00
ispipe = format_corename ( corename , core_pattern , signr ) ;
2005-04-17 02:20:36 +04:00
unlock_kernel ( ) ;
2007-04-17 09:53:13 +04:00
if ( ispipe ) {
2006-10-01 10:29:28 +04:00
/* SIGPIPE can happen, but it's just never processed */
if ( call_usermodehelper_pipe ( corename + 1 , NULL , NULL , & file ) ) {
printk ( KERN_INFO " Core dump to %s pipe failed \n " ,
corename ) ;
goto fail_unlock ;
}
} else
file = filp_open ( corename ,
2006-12-07 07:40:39 +03:00
O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag ,
0600 ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( file ) )
goto fail_unlock ;
2006-12-08 13:36:35 +03:00
inode = file - > f_path . dentry - > d_inode ;
2005-04-17 02:20:36 +04:00
if ( inode - > i_nlink > 1 )
goto close_fail ; /* multiple links - don't dump */
2006-12-08 13:36:35 +03:00
if ( ! ispipe & & d_unhashed ( file - > f_path . dentry ) )
2005-04-17 02:20:36 +04:00
goto close_fail ;
2006-10-01 10:29:28 +04:00
/* AK: actually i see no reason to not allow this for named pipes etc.,
but keep the previous behaviour for now . */
if ( ! ispipe & & ! S_ISREG ( inode - > i_mode ) )
2005-04-17 02:20:36 +04:00
goto close_fail ;
if ( ! file - > f_op )
goto close_fail ;
if ( ! file - > f_op - > write )
goto close_fail ;
2006-12-08 13:36:35 +03:00
if ( ! ispipe & & do_truncate ( file - > f_path . dentry , 0 , 0 , file ) ! = 0 )
2005-04-17 02:20:36 +04:00
goto close_fail ;
retval = binfmt - > core_dump ( signr , regs , file ) ;
if ( retval )
current - > signal - > group_exit_code | = 0x80 ;
close_fail :
filp_close ( file , NULL ) ;
fail_unlock :
2005-06-23 11:09:43 +04:00
current - > fsuid = fsuid ;
2005-04-17 02:20:36 +04:00
complete_all ( & mm - > core_done ) ;
fail :
return retval ;
}