2005-04-17 02:20:36 +04:00
/*
* linux / kernel / acct . c
*
* BSD Process Accounting for Linux
*
* Author : Marco van Wieringen < mvw @ planets . elm . net >
*
* Some code based on ideas and code from :
* Thomas K . Dyas < tdyas @ eden . rutgers . edu >
*
* This file implements BSD - style process accounting . Whenever any
* process exits , an accounting record of type " struct acct " is
* written to the file specified with the acct ( ) system call . It is
* up to user - level programs to do useful things with the accounting
* log . The kernel just provides the raw accounting information .
*
* ( C ) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B . V .
*
* Plugged two leaks . 1 ) It didn ' t return acct_file into the free_filps if
* the file happened to be read - only . 2 ) If the accounting was suspended
* due to the lack of space it happily allowed to reopen it and completely
* lost the old acct_file . 3 / 10 / 98 , Al Viro .
*
* Now we silently close acct_file on attempt to reopen . Cleaned sys_acct ( ) .
* XTerms and EMACS are manifestations of pure evil . 21 / 10 / 98 , AV .
*
* Fixed a nasty interaction with with sys_umount ( ) . If the accointing
* was suspeneded we failed to stop it on umount ( ) . Messy .
* Another one : remount to readonly didn ' t stop accounting .
* Question : what should we do if we have CAP_SYS_ADMIN but not
* CAP_SYS_PACCT ? Current code does the following : umount returns - EBUSY
* unless we are messing with the root . In that case we are getting a
* real mess with do_remount_sb ( ) . 9 / 11 / 98 , AV .
*
* Fixed a bunch of races ( and pair of leaks ) . Probably not the best way ,
* but this one obviously doesn ' t introduce deadlocks . Later . BTW , found
* one race ( and leak ) in BSD implementation .
* OK , that ' s better . ANOTHER race and leak in BSD variant . There always
* is one more bug . . . 10 / 11 / 98 , AV .
*
* Oh , fsck . . . Oopsable SMP race in do_process_acct ( ) - we must hold
* - > mmap_sem to walk the vma list of current - > mm . Nasty , since it leaks
* a struct file opened for write . Fixed . 2 / 6 / 2000 , AV .
*/
# include <linux/mm.h>
# include <linux/slab.h>
# include <linux/acct.h>
2006-01-11 23:17:46 +03:00
# include <linux/capability.h>
2005-04-17 02:20:36 +04:00
# include <linux/file.h>
# include <linux/tty.h>
# include <linux/security.h>
# include <linux/vfs.h>
# include <linux/jiffies.h>
# include <linux/times.h>
# include <linux/syscalls.h>
2005-11-08 01:13:39 +03:00
# include <linux/mount.h>
2005-04-17 02:20:36 +04:00
# include <asm/uaccess.h>
# include <asm/div64.h>
# include <linux/blkdev.h> /* sector_div */
2008-03-24 22:29:53 +03:00
# include <linux/pid_namespace.h>
2005-04-17 02:20:36 +04:00
/*
* These constants control the amount of freespace that suspend and
* resume the process accounting system , and the time delay between
* each check .
* Turned into sysctl - controllable parameters . AV , 12 / 11 / 98
*/
int acct_parm [ 3 ] = { 4 , 2 , 30 } ;
# define RESUME (acct_parm[0]) /* >foo% free space - resume */
# define SUSPEND (acct_parm[1]) /* <foo% free space - suspend */
# define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */
/*
* External references and all of the globals .
*/
2008-07-25 12:48:46 +04:00
static void do_acct_process ( struct bsd_acct_struct * acct ,
struct pid_namespace * ns , struct file * ) ;
2005-04-17 02:20:36 +04:00
/*
* This structure is used so that all the data protected by lock
* can be placed in the same cache line as the lock . This primes
* the cache line to have the data after getting the lock .
*/
2008-07-25 12:48:42 +04:00
struct bsd_acct_struct {
2011-12-09 05:08:42 +04:00
int active ;
unsigned long needcheck ;
2005-04-17 02:20:36 +04:00
struct file * file ;
2008-03-24 22:29:53 +03:00
struct pid_namespace * ns ;
2008-07-25 12:48:47 +04:00
struct list_head list ;
2005-04-17 02:20:36 +04:00
} ;
2008-07-25 12:48:45 +04:00
static DEFINE_SPINLOCK ( acct_lock ) ;
2008-07-25 12:48:47 +04:00
static LIST_HEAD ( acct_list ) ;
2008-07-25 12:48:45 +04:00
2005-04-17 02:20:36 +04:00
/*
* Check the amount of free space and suspend / resume accordingly .
*/
2008-07-25 12:48:46 +04:00
static int check_free_space ( struct bsd_acct_struct * acct , struct file * file )
2005-04-17 02:20:36 +04:00
{
struct kstatfs sbuf ;
int res ;
int act ;
2011-12-09 05:08:42 +04:00
u64 resume ;
u64 suspend ;
2005-04-17 02:20:36 +04:00
2008-07-25 12:48:45 +04:00
spin_lock ( & acct_lock ) ;
2008-07-25 12:48:46 +04:00
res = acct - > active ;
2011-12-09 05:08:42 +04:00
if ( ! file | | time_is_before_jiffies ( acct - > needcheck ) )
2005-04-17 02:20:36 +04:00
goto out ;
2008-07-25 12:48:45 +04:00
spin_unlock ( & acct_lock ) ;
2005-04-17 02:20:36 +04:00
/* May block */
2010-07-07 20:53:11 +04:00
if ( vfs_statfs ( & file - > f_path , & sbuf ) )
2005-04-17 02:20:36 +04:00
return res ;
suspend = sbuf . f_blocks * SUSPEND ;
resume = sbuf . f_blocks * RESUME ;
2011-12-09 05:08:42 +04:00
do_div ( suspend , 100 ) ;
do_div ( resume , 100 ) ;
2005-04-17 02:20:36 +04:00
if ( sbuf . f_bavail < = suspend )
act = - 1 ;
else if ( sbuf . f_bavail > = resume )
act = 1 ;
else
act = 0 ;
/*
2008-07-25 12:48:46 +04:00
* If some joker switched acct - > file under us we ' ld better be
2005-04-17 02:20:36 +04:00
* silent and _not_ touch anything .
*/
2008-07-25 12:48:45 +04:00
spin_lock ( & acct_lock ) ;
2008-07-25 12:48:46 +04:00
if ( file ! = acct - > file ) {
2005-04-17 02:20:36 +04:00
if ( act )
res = act > 0 ;
goto out ;
}
2008-07-25 12:48:46 +04:00
if ( acct - > active ) {
2005-04-17 02:20:36 +04:00
if ( act < 0 ) {
2008-07-25 12:48:46 +04:00
acct - > active = 0 ;
2005-04-17 02:20:36 +04:00
printk ( KERN_INFO " Process accounting paused \n " ) ;
}
} else {
if ( act > 0 ) {
2008-07-25 12:48:46 +04:00
acct - > active = 1 ;
2005-04-17 02:20:36 +04:00
printk ( KERN_INFO " Process accounting resumed \n " ) ;
}
}
2011-12-09 05:08:42 +04:00
acct - > needcheck = jiffies + ACCT_TIMEOUT * HZ ;
2008-07-25 12:48:46 +04:00
res = acct - > active ;
2005-04-17 02:20:36 +04:00
out :
2008-07-25 12:48:45 +04:00
spin_unlock ( & acct_lock ) ;
2005-04-17 02:20:36 +04:00
return res ;
}
/*
2005-09-10 11:26:39 +04:00
* Close the old accounting file ( if currently open ) and then replace
2005-04-17 02:20:36 +04:00
* it with file ( if non - NULL ) .
*
2008-07-25 12:48:45 +04:00
* NOTE : acct_lock MUST be held on entry and exit .
2005-04-17 02:20:36 +04:00
*/
2008-07-25 12:48:47 +04:00
static void acct_file_reopen ( struct bsd_acct_struct * acct , struct file * file ,
struct pid_namespace * ns )
2005-04-17 02:20:36 +04:00
{
struct file * old_acct = NULL ;
2008-03-24 22:29:53 +03:00
struct pid_namespace * old_ns = NULL ;
2005-04-17 02:20:36 +04:00
2008-07-25 12:48:46 +04:00
if ( acct - > file ) {
old_acct = acct - > file ;
old_ns = acct - > ns ;
acct - > active = 0 ;
acct - > file = NULL ;
2008-07-25 12:48:47 +04:00
acct - > ns = NULL ;
2008-07-25 12:48:47 +04:00
list_del ( & acct - > list ) ;
2005-04-17 02:20:36 +04:00
}
if ( file ) {
2008-07-25 12:48:46 +04:00
acct - > file = file ;
2008-07-25 12:48:47 +04:00
acct - > ns = ns ;
2011-12-09 05:08:42 +04:00
acct - > needcheck = jiffies + ACCT_TIMEOUT * HZ ;
2008-07-25 12:48:46 +04:00
acct - > active = 1 ;
2008-07-25 12:48:47 +04:00
list_add ( & acct - > list , & acct_list ) ;
2005-04-17 02:20:36 +04:00
}
if ( old_acct ) {
2006-12-08 13:36:43 +03:00
mnt_unpin ( old_acct - > f_path . mnt ) ;
2008-07-25 12:48:45 +04:00
spin_unlock ( & acct_lock ) ;
2008-07-25 12:48:46 +04:00
do_acct_process ( acct , old_ns , old_acct ) ;
2005-04-17 02:20:36 +04:00
filp_close ( old_acct , NULL ) ;
2008-07-25 12:48:45 +04:00
spin_lock ( & acct_lock ) ;
2005-04-17 02:20:36 +04:00
}
}
2005-11-08 01:13:39 +03:00
static int acct_on ( char * name )
{
struct file * file ;
2009-06-30 22:41:34 +04:00
struct vfsmount * mnt ;
2008-07-25 12:48:47 +04:00
struct pid_namespace * ns ;
struct bsd_acct_struct * acct = NULL ;
2005-11-08 01:13:39 +03:00
/* Difference from BSD - they don't do O_APPEND */
file = filp_open ( name , O_WRONLY | O_APPEND | O_LARGEFILE , 0 ) ;
if ( IS_ERR ( file ) )
return PTR_ERR ( file ) ;
2006-12-08 13:36:43 +03:00
if ( ! S_ISREG ( file - > f_path . dentry - > d_inode - > i_mode ) ) {
2005-11-08 01:13:39 +03:00
filp_close ( file , NULL ) ;
return - EACCES ;
}
if ( ! file - > f_op - > write ) {
filp_close ( file , NULL ) ;
return - EIO ;
}
2008-07-25 12:48:47 +04:00
ns = task_active_pid_ns ( current ) ;
if ( ns - > bacct = = NULL ) {
acct = kzalloc ( sizeof ( struct bsd_acct_struct ) , GFP_KERNEL ) ;
if ( acct = = NULL ) {
filp_close ( file , NULL ) ;
return - ENOMEM ;
}
}
2008-07-25 12:48:45 +04:00
spin_lock ( & acct_lock ) ;
2008-07-25 12:48:47 +04:00
if ( ns - > bacct = = NULL ) {
ns - > bacct = acct ;
acct = NULL ;
}
2009-06-30 22:41:34 +04:00
mnt = file - > f_path . mnt ;
mnt_pin ( mnt ) ;
2008-07-25 12:48:47 +04:00
acct_file_reopen ( ns - > bacct , file , ns ) ;
2008-07-25 12:48:45 +04:00
spin_unlock ( & acct_lock ) ;
2005-11-08 01:13:39 +03:00
2009-06-30 22:41:34 +04:00
mntput ( mnt ) ; /* it's pinned, now give up active reference */
2008-07-25 12:48:47 +04:00
kfree ( acct ) ;
2005-11-08 01:13:39 +03:00
return 0 ;
}
2005-09-10 11:26:39 +04:00
/**
* sys_acct - enable / disable process accounting
* @ name : file name for accounting records or NULL to shutdown accounting
*
* Returns 0 for success or negative errno values for failure .
*
* sys_acct ( ) is the only system call needed to implement process
* accounting . It takes the name of the file where accounting records
* should be written . If the filename is NULL , accounting will be
* shutdown .
2005-04-17 02:20:36 +04:00
*/
2009-01-14 16:14:06 +03:00
SYSCALL_DEFINE1 ( acct , const char __user * , name )
2005-04-17 02:20:36 +04:00
{
2010-04-07 23:15:25 +04:00
int error = 0 ;
2005-04-17 02:20:36 +04:00
if ( ! capable ( CAP_SYS_PACCT ) )
return - EPERM ;
if ( name ) {
2005-11-08 01:13:39 +03:00
char * tmp = getname ( name ) ;
if ( IS_ERR ( tmp ) )
2005-04-17 02:20:36 +04:00
return ( PTR_ERR ( tmp ) ) ;
2005-11-08 01:13:39 +03:00
error = acct_on ( tmp ) ;
2005-04-17 02:20:36 +04:00
putname ( tmp ) ;
2005-11-08 01:13:39 +03:00
} else {
2008-07-25 12:48:47 +04:00
struct bsd_acct_struct * acct ;
acct = task_active_pid_ns ( current ) - > bacct ;
if ( acct = = NULL )
return 0 ;
2010-04-07 23:15:25 +04:00
spin_lock ( & acct_lock ) ;
acct_file_reopen ( acct , NULL , NULL ) ;
spin_unlock ( & acct_lock ) ;
2005-04-17 02:20:36 +04:00
}
2010-04-07 23:15:25 +04:00
2005-11-08 01:13:39 +03:00
return error ;
}
2005-04-17 02:20:36 +04:00
2005-11-08 01:13:39 +03:00
/**
* acct_auto_close - turn off a filesystem ' s accounting if it is on
* @ m : vfsmount being shut down
*
* If the accounting is turned on for a file in the subtree pointed to
* to by m , turn accounting off . Done when m is about to die .
*/
void acct_auto_close_mnt ( struct vfsmount * m )
{
2008-07-25 12:48:47 +04:00
struct bsd_acct_struct * acct ;
2008-07-25 12:48:45 +04:00
spin_lock ( & acct_lock ) ;
2008-07-25 12:48:47 +04:00
restart :
list_for_each_entry ( acct , & acct_list , list )
if ( acct - > file & & acct - > file - > f_path . mnt = = m ) {
acct_file_reopen ( acct , NULL , NULL ) ;
goto restart ;
}
2008-07-25 12:48:45 +04:00
spin_unlock ( & acct_lock ) ;
2005-04-17 02:20:36 +04:00
}
2005-09-10 11:26:39 +04:00
/**
* acct_auto_close - turn off a filesystem ' s accounting if it is on
* @ sb : super block for the filesystem
*
* If the accounting is turned on for a file in the filesystem pointed
* to by sb , turn accounting off .
2005-04-17 02:20:36 +04:00
*/
void acct_auto_close ( struct super_block * sb )
{
2008-07-25 12:48:47 +04:00
struct bsd_acct_struct * acct ;
2008-07-25 12:48:45 +04:00
spin_lock ( & acct_lock ) ;
2008-07-25 12:48:47 +04:00
restart :
list_for_each_entry ( acct , & acct_list , list )
2011-12-08 03:16:57 +04:00
if ( acct - > file & & acct - > file - > f_path . dentry - > d_sb = = sb ) {
2008-07-25 12:48:47 +04:00
acct_file_reopen ( acct , NULL , NULL ) ;
goto restart ;
}
2008-07-25 12:48:47 +04:00
spin_unlock ( & acct_lock ) ;
}
void acct_exit_ns ( struct pid_namespace * ns )
{
2010-05-12 01:06:56 +04:00
struct bsd_acct_struct * acct = ns - > bacct ;
2008-07-25 12:48:47 +04:00
2010-05-12 01:06:56 +04:00
if ( acct = = NULL )
return ;
2008-07-25 12:48:47 +04:00
2010-05-12 01:06:56 +04:00
spin_lock ( & acct_lock ) ;
if ( acct - > file ! = NULL )
acct_file_reopen ( acct , NULL , NULL ) ;
2008-07-25 12:48:45 +04:00
spin_unlock ( & acct_lock ) ;
2010-05-12 01:06:56 +04:00
kfree ( acct ) ;
2005-04-17 02:20:36 +04:00
}
/*
* encode an unsigned long into a comp_t
*
* This routine has been adopted from the encode_comp_t ( ) function in
* the kern_acct . c file of the FreeBSD operating system . The encoding
* is a 13 - bit fraction with a 3 - bit ( base 8 ) exponent .
*/
# define MANTSIZE 13 /* 13 bit mantissa. */
# define EXPSIZE 3 /* Base 8 (3 bit) exponent. */
# define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */
static comp_t encode_comp_t ( unsigned long value )
{
int exp , rnd ;
exp = rnd = 0 ;
while ( value > MAXFRACT ) {
rnd = value & ( 1 < < ( EXPSIZE - 1 ) ) ; /* Round up? */
value > > = EXPSIZE ; /* Base 8 exponent == 3 bit shift. */
exp + + ;
}
/*
2007-10-18 14:06:04 +04:00
* If we need to round up , do it ( and handle overflow correctly ) .
*/
2005-04-17 02:20:36 +04:00
if ( rnd & & ( + + value > MAXFRACT ) ) {
value > > = EXPSIZE ;
exp + + ;
}
/*
2007-10-18 14:06:04 +04:00
* Clean it up and polish it off .
*/
2005-04-17 02:20:36 +04:00
exp < < = MANTSIZE ; /* Shift the exponent into place */
exp + = value ; /* and add on the mantissa. */
return exp ;
}
# if ACCT_VERSION==1 || ACCT_VERSION==2
/*
* encode an u64 into a comp2_t ( 24 bits )
*
* Format : 5 bit base 2 exponent , 20 bits mantissa .
* The leading bit of the mantissa is not stored , but implied for
* non - zero exponents .
* Largest encodable value is 50 bits .
*/
# define MANTSIZE2 20 /* 20 bit mantissa. */
# define EXPSIZE2 5 /* 5 bit base 2 exponent. */
# define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */
# define MAXEXP2 ((1 <<EXPSIZE2) - 1) /* Maximum exponent. */
static comp2_t encode_comp2_t ( u64 value )
{
2007-10-18 14:06:04 +04:00
int exp , rnd ;
exp = ( value > ( MAXFRACT2 > > 1 ) ) ;
rnd = 0 ;
while ( value > MAXFRACT2 ) {
rnd = value & 1 ;
value > > = 1 ;
exp + + ;
}
/*
* If we need to round up , do it ( and handle overflow correctly ) .
*/
if ( rnd & & ( + + value > MAXFRACT2 ) ) {
value > > = 1 ;
exp + + ;
}
if ( exp > MAXEXP2 ) {
/* Overflow. Return largest representable number instead. */
return ( 1ul < < ( MANTSIZE2 + EXPSIZE2 - 1 ) ) - 1 ;
} else {
return ( value & ( MAXFRACT2 > > 1 ) ) | ( exp < < ( MANTSIZE2 - 1 ) ) ;
}
2005-04-17 02:20:36 +04:00
}
# endif
# if ACCT_VERSION==3
/*
* encode an u64 into a 32 bit IEEE float
*/
static u32 encode_float ( u64 value )
{
unsigned exp = 190 ;
unsigned u ;
if ( value = = 0 ) return 0 ;
while ( ( s64 ) value > 0 ) {
value < < = 1 ;
exp - - ;
}
u = ( u32 ) ( value > > 40 ) & 0x7fffffu ;
return u | ( exp < < 23 ) ;
}
# endif
/*
* Write an accounting entry for an exiting process
*
* The acct_process ( ) call is the workhorse of the process
* accounting system . The struct acct is built here and then written
* into the accounting file . This function should only be called from
2007-11-26 23:21:49 +03:00
* do_exit ( ) or when switching to a different output file .
2005-04-17 02:20:36 +04:00
*/
/*
* do_acct_process does all actual work . Caller holds the reference to file .
*/
2008-07-25 12:48:46 +04:00
static void do_acct_process ( struct bsd_acct_struct * acct ,
struct pid_namespace * ns , struct file * file )
2005-04-17 02:20:36 +04:00
{
2006-06-25 16:49:24 +04:00
struct pacct_struct * pacct = & current - > signal - > pacct ;
2005-04-17 02:20:36 +04:00
acct_t ac ;
mm_segment_t fs ;
unsigned long flim ;
u64 elapsed ;
u64 run_time ;
struct timespec uptime ;
2006-12-08 13:36:04 +03:00
struct tty_struct * tty ;
2009-08-21 01:39:52 +04:00
const struct cred * orig_cred ;
/* Perform file operations on behalf of whoever enabled accounting */
orig_cred = override_creds ( file - > f_cred ) ;
2005-04-17 02:20:36 +04:00
/*
* First check to see if there is enough free_space to continue
* the process accounting system .
*/
2008-07-25 12:48:46 +04:00
if ( ! check_free_space ( acct , file ) )
2009-08-21 01:39:52 +04:00
goto out ;
2005-04-17 02:20:36 +04:00
/*
* Fill the accounting struct with the needed info as recorded
* by the different kernel functions .
*/
2011-12-09 05:08:42 +04:00
memset ( & ac , 0 , sizeof ( acct_t ) ) ;
2005-04-17 02:20:36 +04:00
ac . ac_version = ACCT_VERSION | ACCT_BYTEORDER ;
strlcpy ( ac . ac_comm , current - > comm , sizeof ( ac . ac_comm ) ) ;
/* calculate run_time in nsec*/
do_posix_clock_monotonic_gettime ( & uptime ) ;
run_time = ( u64 ) uptime . tv_sec * NSEC_PER_SEC + uptime . tv_nsec ;
[PATCH] Fix pacct bug in multithreading case.
I noticed a bug on the process accounting facility. In multi-threading
process, some data would be recorded incorrectly when the group_leader dies
earlier than one or more threads. The attached patch fixes this problem.
See below. 'bugacct' is a test program that create a worker thread after 4
seconds sleeping, then the group_leader dies soon. The worker thread
consume CPU/Memory for 6 seconds, then exit. We can estimate 10 seconds as
etime and 6 seconds as stime + utime. This is a sample program which the
group_leader dies earlier than other threads.
The results of same binary execution on different kernel are below.
-- accounted records --------------------
| btime | utime | stime | etime | minflt | majflt | comm |
original | 13:16:40 | 0.00 | 0.00 | 6.10 | 171 | 0 | bugacct |
patched | 13:20:21 | 5.83 | 0.18 | 10.03 | 32776 | 0 | bugacct |
(*) bugacct allocates 128MB memory, thus 128MB / 4KB = 32768 of minflt is
appropriate.
-- Test results in original kernel ------
$ date; time -p ./bugacct
Tue Mar 28 13:16:36 JST 2006 <- But pacct said btime is 13:16:40
real 10.11 <- But pacct said etime is 6.10
user 5.96 <- But pacct said utime is 0.00
sys 0.14 <- But pacct said stime is 0.00
$
-- Test results in patched kernel -------
$ date; time -p ./bugacct
Tue Mar 28 13:20:21 JST 2006
real 10.04
user 5.83
sys 0.19
$
In the original 2.6.16 kernel, pacct records btime, utime, stime, etime and
minflt incorrectly. In my opinion, this problem is caused by an assumption
that group_leader dies last.
The following section calculates process running time for etime and btime.
But it means running time of the thread that dies last, not process. The
start_time of the first thread in the process (group_leader) should be
reduced from uptime to calculate etime and btime correctly.
---- do_acct_process() in kernel/acct.c:
/* calculate run_time in nsec*/
do_posix_clock_monotonic_gettime(&uptime);
run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec;
run_time -= (u64)current->start_time.tv_sec*NSEC_PER_SEC
+ current->start_time.tv_nsec;
----
The following section calculates stime and utime of the process.
But it might count the utime and stime of the group_leader duplicatly
and ignore the utime and stime of the thread dies last, when one or
more threads remain after group_leader dead.
The ac_utime should be calculated as the sum of the signal->utime
and utime of the thread dies last. The ac_stime should be done also.
---- do_acct_process() in kernel/acct.c:
jiffies = cputime_to_jiffies(cputime_add(current->group_leader->utime,
current->signal->utime));
ac.ac_utime = encode_comp_t(jiffies_to_AHZ(jiffies));
jiffies = cputime_to_jiffies(cputime_add(current->group_leader->stime,
current->signal->stime));
ac.ac_stime = encode_comp_t(jiffies_to_AHZ(jiffies));
----
The part of the minflt/majflt calculation has same problem.
This patch solves those problems, I think.
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-31 14:30:45 +04:00
run_time - = ( u64 ) current - > group_leader - > start_time . tv_sec * NSEC_PER_SEC
+ current - > group_leader - > start_time . tv_nsec ;
2005-04-17 02:20:36 +04:00
/* convert nsec -> AHZ */
elapsed = nsec_to_AHZ ( run_time ) ;
# if ACCT_VERSION==3
ac . ac_etime = encode_float ( elapsed ) ;
# else
ac . ac_etime = encode_comp_t ( elapsed < ( unsigned long ) - 1l ?
( unsigned long ) elapsed : ( unsigned long ) - 1l ) ;
# endif
# if ACCT_VERSION==1 || ACCT_VERSION==2
{
/* new enlarged etime field */
comp2_t etime = encode_comp2_t ( elapsed ) ;
ac . ac_etime_hi = etime > > 16 ;
ac . ac_etime_lo = ( u16 ) etime ;
}
# endif
do_div ( elapsed , AHZ ) ;
2007-07-25 04:47:43 +04:00
ac . ac_btime = get_seconds ( ) - elapsed ;
2005-04-17 02:20:36 +04:00
/* we really need to bite the bullet and change layout */
2009-12-15 04:57:34 +03:00
ac . ac_uid = orig_cred - > uid ;
ac . ac_gid = orig_cred - > gid ;
2005-04-17 02:20:36 +04:00
# if ACCT_VERSION==2
ac . ac_ahz = AHZ ;
# endif
# if ACCT_VERSION==1 || ACCT_VERSION==2
/* backward-compatible 16 bit fields */
2008-11-14 02:39:12 +03:00
ac . ac_uid16 = ac . ac_uid ;
ac . ac_gid16 = ac . ac_gid ;
2005-04-17 02:20:36 +04:00
# endif
# if ACCT_VERSION==3
2008-03-24 22:29:53 +03:00
ac . ac_pid = task_tgid_nr_ns ( current , ns ) ;
2008-03-24 22:29:52 +03:00
rcu_read_lock ( ) ;
2008-03-24 22:29:53 +03:00
ac . ac_ppid = task_tgid_nr_ns ( rcu_dereference ( current - > real_parent ) , ns ) ;
2008-03-24 22:29:52 +03:00
rcu_read_unlock ( ) ;
2005-04-17 02:20:36 +04:00
# endif
2006-07-14 11:24:18 +04:00
spin_lock_irq ( & current - > sighand - > siglock ) ;
2008-10-13 13:40:53 +04:00
tty = current - > signal - > tty ; /* Safe as we hold the siglock */
2006-12-08 13:36:07 +03:00
ac . ac_tty = tty ? old_encode_dev ( tty_devnum ( tty ) ) : 0 ;
2006-06-25 16:49:26 +04:00
ac . ac_utime = encode_comp_t ( jiffies_to_AHZ ( cputime_to_jiffies ( pacct - > ac_utime ) ) ) ;
ac . ac_stime = encode_comp_t ( jiffies_to_AHZ ( cputime_to_jiffies ( pacct - > ac_stime ) ) ) ;
2006-06-25 16:49:25 +04:00
ac . ac_flag = pacct - > ac_flag ;
2006-06-25 16:49:24 +04:00
ac . ac_mem = encode_comp_t ( pacct - > ac_mem ) ;
2006-06-25 16:49:26 +04:00
ac . ac_minflt = encode_comp_t ( pacct - > ac_minflt ) ;
ac . ac_majflt = encode_comp_t ( pacct - > ac_majflt ) ;
2006-06-25 16:49:25 +04:00
ac . ac_exitcode = pacct - > ac_exitcode ;
2006-07-14 11:24:18 +04:00
spin_unlock_irq ( & current - > sighand - > siglock ) ;
2005-04-17 02:20:36 +04:00
ac . ac_io = encode_comp_t ( 0 /* current->io_usage */ ) ; /* %% */
ac . ac_rw = encode_comp_t ( ac . ac_io / 1024 ) ;
ac . ac_swaps = encode_comp_t ( 0 ) ;
/*
2007-10-18 14:06:04 +04:00
* Kernel segment override to datasegment and write it
* to the accounting file .
*/
2005-04-17 02:20:36 +04:00
fs = get_fs ( ) ;
set_fs ( KERNEL_DS ) ;
/*
2007-10-18 14:06:04 +04:00
* Accounting records are not subject to resource limits .
*/
2005-04-17 02:20:36 +04:00
flim = current - > signal - > rlim [ RLIMIT_FSIZE ] . rlim_cur ;
current - > signal - > rlim [ RLIMIT_FSIZE ] . rlim_cur = RLIM_INFINITY ;
file - > f_op - > write ( file , ( char * ) & ac ,
sizeof ( acct_t ) , & file - > f_pos ) ;
current - > signal - > rlim [ RLIMIT_FSIZE ] . rlim_cur = flim ;
set_fs ( fs ) ;
2009-08-21 01:39:52 +04:00
out :
revert_creds ( orig_cred ) ;
2005-04-17 02:20:36 +04:00
}
2006-06-25 16:49:24 +04:00
/**
* acct_collect - collect accounting information into pacct_struct
2006-06-25 16:49:25 +04:00
* @ exitcode : task exit code
* @ group_dead : not 0 , if this thread is the last one in the process .
2006-06-25 16:49:24 +04:00
*/
2006-06-25 16:49:25 +04:00
void acct_collect ( long exitcode , int group_dead )
2006-06-25 16:49:24 +04:00
{
struct pacct_struct * pacct = & current - > signal - > pacct ;
unsigned long vsize = 0 ;
2006-06-25 16:49:25 +04:00
if ( group_dead & & current - > mm ) {
2006-06-25 16:49:24 +04:00
struct vm_area_struct * vma ;
down_read ( & current - > mm - > mmap_sem ) ;
vma = current - > mm - > mmap ;
while ( vma ) {
vsize + = vma - > vm_end - vma - > vm_start ;
vma = vma - > vm_next ;
}
up_read ( & current - > mm - > mmap_sem ) ;
}
2006-06-25 16:49:26 +04:00
spin_lock_irq ( & current - > sighand - > siglock ) ;
2006-06-25 16:49:25 +04:00
if ( group_dead )
pacct - > ac_mem = vsize / 1024 ;
if ( thread_group_leader ( current ) ) {
pacct - > ac_exitcode = exitcode ;
if ( current - > flags & PF_FORKNOEXEC )
pacct - > ac_flag | = AFORK ;
}
if ( current - > flags & PF_SUPERPRIV )
pacct - > ac_flag | = ASU ;
if ( current - > flags & PF_DUMPCORE )
pacct - > ac_flag | = ACORE ;
if ( current - > flags & PF_SIGNALED )
pacct - > ac_flag | = AXSIG ;
2011-12-15 17:56:09 +04:00
pacct - > ac_utime + = current - > utime ;
pacct - > ac_stime + = current - > stime ;
2006-06-25 16:49:26 +04:00
pacct - > ac_minflt + = current - > min_flt ;
pacct - > ac_majflt + = current - > maj_flt ;
spin_unlock_irq ( & current - > sighand - > siglock ) ;
2006-06-25 16:49:24 +04:00
}
2008-07-25 12:48:48 +04:00
static void acct_process_in_ns ( struct pid_namespace * ns )
2005-04-17 02:20:36 +04:00
{
struct file * file = NULL ;
2008-07-25 12:48:47 +04:00
struct bsd_acct_struct * acct ;
2005-04-17 02:20:36 +04:00
2008-07-25 12:48:47 +04:00
acct = ns - > bacct ;
2005-04-17 02:20:36 +04:00
/*
* accelerate the common fastpath :
*/
2008-07-25 12:48:47 +04:00
if ( ! acct | | ! acct - > file )
2005-04-17 02:20:36 +04:00
return ;
2008-07-25 12:48:45 +04:00
spin_lock ( & acct_lock ) ;
2008-07-25 12:48:47 +04:00
file = acct - > file ;
2005-04-17 02:20:36 +04:00
if ( unlikely ( ! file ) ) {
2008-07-25 12:48:45 +04:00
spin_unlock ( & acct_lock ) ;
2005-04-17 02:20:36 +04:00
return ;
}
get_file ( file ) ;
2008-07-25 12:48:45 +04:00
spin_unlock ( & acct_lock ) ;
2005-04-17 02:20:36 +04:00
2008-07-25 12:48:47 +04:00
do_acct_process ( acct , ns , file ) ;
2005-04-17 02:20:36 +04:00
fput ( file ) ;
}
2008-07-25 12:48:48 +04:00
/**
2008-07-25 12:48:49 +04:00
* acct_process - now just a wrapper around acct_process_in_ns ,
* which in turn is a wrapper around do_acct_process .
2008-07-25 12:48:48 +04:00
*
* handles process accounting for an exiting task
*/
void acct_process ( void )
{
struct pid_namespace * ns ;
2008-07-25 12:48:49 +04:00
/*
* This loop is safe lockless , since current is still
* alive and holds its namespace , which in turn holds
* its parent .
*/
2008-07-25 12:48:48 +04:00
for ( ns = task_active_pid_ns ( current ) ; ns ! = NULL ; ns = ns - > parent )
acct_process_in_ns ( ns ) ;
}