2005-04-16 15:20:36 -07:00
/*
* proc / fs / generic . c - - - generic routines for the proc - fs
*
* This file contains generic proc - fs routines for handling
* directories and files .
*
* Copyright ( C ) 1991 , 1992 Linus Torvalds .
* Copyright ( C ) 1997 Theodore Ts ' o
*/
# include <linux/errno.h>
# include <linux/time.h>
# include <linux/proc_fs.h>
# include <linux/stat.h>
# include <linux/module.h>
# include <linux/mount.h>
# include <linux/smp_lock.h>
# include <linux/init.h>
# include <linux/idr.h>
# include <linux/namei.h>
# include <linux/bitops.h>
2006-03-26 01:36:55 -08:00
# include <linux/spinlock.h>
Fix rmmod/read/write races in /proc entries
Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
meanwhile. Or, more generically, system call done on /proc file, method
supplied by module is called, module dissapeares meanwhile.
pde = create_proc_entry()
if (!pde)
return -ENOMEM;
pde->write_proc = ...
open
write
copy_from_user
pde = create_proc_entry();
if (!pde) {
remove_proc_entry();
return -ENOMEM;
/* module unloaded */
}
*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()
remove_proc_entry vfs_read
proc_kill_inodes [check ->f_op validness]
[check ->f_op->read validness]
[verify_area, security permissions checks]
->f_op = NULL;
if (file->f_op->read)
/* ->f_op dereference, boom */
NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.
NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-15 23:39:00 -07:00
# include <linux/completion.h>
2005-04-16 15:20:36 -07:00
# include <asm/uaccess.h>
2006-01-08 01:04:16 -08:00
# include "internal.h"
2005-04-16 15:20:36 -07:00
static ssize_t proc_file_read ( struct file * file , char __user * buf ,
size_t nbytes , loff_t * ppos ) ;
static ssize_t proc_file_write ( struct file * file , const char __user * buffer ,
size_t count , loff_t * ppos ) ;
static loff_t proc_file_lseek ( struct file * , loff_t , int ) ;
2006-03-26 01:36:55 -08:00
DEFINE_SPINLOCK ( proc_subdir_lock ) ;
2007-02-14 00:34:12 -08:00
static int proc_match ( int len , const char * name , struct proc_dir_entry * de )
2005-04-16 15:20:36 -07:00
{
if ( de - > namelen ! = len )
return 0 ;
return ! memcmp ( name , de - > name , len ) ;
}
2007-02-12 00:55:34 -08:00
static const struct file_operations proc_file_operations = {
2005-04-16 15:20:36 -07:00
. llseek = proc_file_lseek ,
. read = proc_file_read ,
. write = proc_file_write ,
} ;
/* buffer size is one page but our output routines use some slack for overruns */
# define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
static ssize_t
proc_file_read ( struct file * file , char __user * buf , size_t nbytes ,
loff_t * ppos )
{
2006-12-08 02:36:36 -08:00
struct inode * inode = file - > f_path . dentry - > d_inode ;
2005-04-16 15:20:36 -07:00
char * page ;
ssize_t retval = 0 ;
int eof = 0 ;
ssize_t n , count ;
char * start ;
struct proc_dir_entry * dp ;
2005-12-30 08:39:10 -08:00
unsigned long long pos ;
/*
* Gaah , please just use " seq_file " instead . The legacy / proc
* interfaces cut loff_t down to off_t for reads , and ignore
* the offset entirely for writes . .
*/
pos = * ppos ;
if ( pos > MAX_NON_LFS )
return 0 ;
if ( nbytes > MAX_NON_LFS - pos )
nbytes = MAX_NON_LFS - pos ;
2005-04-16 15:20:36 -07:00
dp = PDE ( inode ) ;
if ( ! ( page = ( char * ) __get_free_page ( GFP_KERNEL ) ) )
return - ENOMEM ;
while ( ( nbytes > 0 ) & & ! eof ) {
count = min_t ( size_t , PROC_BLOCK_SIZE , nbytes ) ;
start = NULL ;
if ( dp - > get_info ) {
/* Handle old net routines */
n = dp - > get_info ( page , & start , * ppos , count ) ;
if ( n < count )
eof = 1 ;
} else if ( dp - > read_proc ) {
/*
* How to be a proc read function
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* Prototype :
* int f ( char * buffer , char * * start , off_t offset ,
* int count , int * peof , void * dat )
*
* Assume that the buffer is " count " bytes in size .
*
* If you know you have supplied all the data you
* have , set * peof .
*
* You have three ways to return data :
* 0 ) Leave * start = NULL . ( This is the default . )
* Put the data of the requested offset at that
* offset within the buffer . Return the number ( n )
* of bytes there are from the beginning of the
* buffer up to the last byte of data . If the
* number of supplied bytes ( = n - offset ) is
* greater than zero and you didn ' t signal eof
* and the reader is prepared to take more data
* you will be called again with the requested
* offset advanced by the number of bytes
* absorbed . This interface is useful for files
* no larger than the buffer .
* 1 ) Set * start = an unsigned long value less than
* the buffer address but greater than zero .
* Put the data of the requested offset at the
* beginning of the buffer . Return the number of
* bytes of data placed there . If this number is
* greater than zero and you didn ' t signal eof
* and the reader is prepared to take more data
* you will be called again with the requested
* offset advanced by * start . This interface is
* useful when you have a large file consisting
* of a series of blocks which you want to count
* and return as wholes .
* ( Hack by Paul . Russell @ rustcorp . com . au )
* 2 ) Set * start = an address within the buffer .
* Put the data of the requested offset at * start .
* Return the number of bytes of data placed there .
* If this number is greater than zero and you
* didn ' t signal eof and the reader is prepared to
* take more data you will be called again with the
* requested offset advanced by the number of bytes
* absorbed .
*/
n = dp - > read_proc ( page , & start , * ppos ,
count , & eof , dp - > data ) ;
} else
break ;
if ( n = = 0 ) /* end of file */
break ;
if ( n < 0 ) { /* error */
if ( retval = = 0 )
retval = n ;
break ;
}
if ( start = = NULL ) {
if ( n > PAGE_SIZE ) {
printk ( KERN_ERR
" proc_file_read: Apparent buffer overflow! \n " ) ;
n = PAGE_SIZE ;
}
n - = * ppos ;
if ( n < = 0 )
break ;
if ( n > count )
n = count ;
start = page + * ppos ;
} else if ( start < page ) {
if ( n > PAGE_SIZE ) {
printk ( KERN_ERR
" proc_file_read: Apparent buffer overflow! \n " ) ;
n = PAGE_SIZE ;
}
if ( n > count ) {
/*
* Don ' t reduce n because doing so might
* cut off part of a data block .
*/
printk ( KERN_WARNING
" proc_file_read: Read count exceeded \n " ) ;
}
} else /* start >= page */ {
unsigned long startoff = ( unsigned long ) ( start - page ) ;
if ( n > ( PAGE_SIZE - startoff ) ) {
printk ( KERN_ERR
" proc_file_read: Apparent buffer overflow! \n " ) ;
n = PAGE_SIZE - startoff ;
}
if ( n > count )
n = count ;
}
n - = copy_to_user ( buf , start < page ? page : start , n ) ;
if ( n = = 0 ) {
if ( retval = = 0 )
retval = - EFAULT ;
break ;
}
* ppos + = start < page ? ( unsigned long ) start : n ;
nbytes - = n ;
buf + = n ;
retval + = n ;
}
free_page ( ( unsigned long ) page ) ;
return retval ;
}
static ssize_t
proc_file_write ( struct file * file , const char __user * buffer ,
size_t count , loff_t * ppos )
{
2006-12-08 02:36:36 -08:00
struct inode * inode = file - > f_path . dentry - > d_inode ;
2005-04-16 15:20:36 -07:00
struct proc_dir_entry * dp ;
dp = PDE ( inode ) ;
if ( ! dp - > write_proc )
return - EIO ;
/* FIXME: does this routine need ppos? probably... */
return dp - > write_proc ( file , buffer , count , dp - > data ) ;
}
static loff_t
proc_file_lseek ( struct file * file , loff_t offset , int orig )
{
2005-12-30 08:39:10 -08:00
loff_t retval = - EINVAL ;
switch ( orig ) {
case 1 :
offset + = file - > f_pos ;
/* fallthrough */
case 0 :
if ( offset < 0 | | offset > MAX_NON_LFS )
break ;
file - > f_pos = retval = offset ;
}
return retval ;
2005-04-16 15:20:36 -07:00
}
static int proc_notify_change ( struct dentry * dentry , struct iattr * iattr )
{
struct inode * inode = dentry - > d_inode ;
struct proc_dir_entry * de = PDE ( inode ) ;
int error ;
error = inode_change_ok ( inode , iattr ) ;
if ( error )
goto out ;
error = inode_setattr ( inode , iattr ) ;
if ( error )
goto out ;
de - > uid = inode - > i_uid ;
de - > gid = inode - > i_gid ;
de - > mode = inode - > i_mode ;
out :
return error ;
}
2005-09-06 15:17:18 -07:00
static int proc_getattr ( struct vfsmount * mnt , struct dentry * dentry ,
struct kstat * stat )
{
struct inode * inode = dentry - > d_inode ;
struct proc_dir_entry * de = PROC_I ( inode ) - > pde ;
if ( de & & de - > nlink )
inode - > i_nlink = de - > nlink ;
generic_fillattr ( inode , stat ) ;
return 0 ;
}
2007-02-12 00:55:40 -08:00
static const struct inode_operations proc_file_inode_operations = {
2005-04-16 15:20:36 -07:00
. setattr = proc_notify_change ,
} ;
/*
* This function parses a name such as " tty/driver/serial " , and
* returns the struct proc_dir_entry for " /proc/tty/driver " , and
* returns " serial " in residual .
*/
static int xlate_proc_name ( const char * name ,
struct proc_dir_entry * * ret , const char * * residual )
{
const char * cp = name , * next ;
struct proc_dir_entry * de ;
int len ;
2006-03-26 01:36:55 -08:00
int rtn = 0 ;
2005-04-16 15:20:36 -07:00
2006-03-26 01:36:55 -08:00
spin_lock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
de = & proc_root ;
while ( 1 ) {
next = strchr ( cp , ' / ' ) ;
if ( ! next )
break ;
len = next - cp ;
for ( de = de - > subdir ; de ; de = de - > next ) {
if ( proc_match ( len , cp , de ) )
break ;
}
2006-03-26 01:36:55 -08:00
if ( ! de ) {
rtn = - ENOENT ;
goto out ;
}
2005-04-16 15:20:36 -07:00
cp + = len + 1 ;
}
* residual = cp ;
* ret = de ;
2006-03-26 01:36:55 -08:00
out :
spin_unlock ( & proc_subdir_lock ) ;
return rtn ;
2005-04-16 15:20:36 -07:00
}
static DEFINE_IDR ( proc_inum_idr ) ;
static DEFINE_SPINLOCK ( proc_inum_lock ) ; /* protects the above */
# define PROC_DYNAMIC_FIRST 0xF0000000UL
/*
* Return an inode number between PROC_DYNAMIC_FIRST and
* 0xffffffff , or zero on failure .
*/
static unsigned int get_inode_number ( void )
{
int i , inum = 0 ;
int error ;
retry :
if ( idr_pre_get ( & proc_inum_idr , GFP_KERNEL ) = = 0 )
return 0 ;
spin_lock ( & proc_inum_lock ) ;
error = idr_get_new ( & proc_inum_idr , NULL , & i ) ;
spin_unlock ( & proc_inum_lock ) ;
if ( error = = - EAGAIN )
goto retry ;
else if ( error )
return 0 ;
inum = ( i & MAX_ID_MASK ) + PROC_DYNAMIC_FIRST ;
/* inum will never be more than 0xf0ffffff, so no check
* for overflow .
*/
return inum ;
}
static void release_inode_number ( unsigned int inum )
{
int id = ( inum - PROC_DYNAMIC_FIRST ) | ~ MAX_ID_MASK ;
spin_lock ( & proc_inum_lock ) ;
idr_remove ( & proc_inum_idr , id ) ;
spin_unlock ( & proc_inum_lock ) ;
}
[PATCH] Fix up symlink function pointers
This fixes up the symlink functions for the calling convention change:
* afs, autofs4, befs, devfs, freevxfs, jffs2, jfs, ncpfs, procfs,
smbfs, sysvfs, ufs, xfs - prototype change for ->follow_link()
* befs, smbfs, xfs - same for ->put_link()
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-08-20 00:17:39 +01:00
static void * proc_follow_link ( struct dentry * dentry , struct nameidata * nd )
2005-04-16 15:20:36 -07:00
{
nd_set_link ( nd , PDE ( dentry - > d_inode ) - > data ) ;
[PATCH] Fix up symlink function pointers
This fixes up the symlink functions for the calling convention change:
* afs, autofs4, befs, devfs, freevxfs, jffs2, jfs, ncpfs, procfs,
smbfs, sysvfs, ufs, xfs - prototype change for ->follow_link()
* befs, smbfs, xfs - same for ->put_link()
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-08-20 00:17:39 +01:00
return NULL ;
2005-04-16 15:20:36 -07:00
}
2007-02-12 00:55:40 -08:00
static const struct inode_operations proc_link_inode_operations = {
2005-04-16 15:20:36 -07:00
. readlink = generic_readlink ,
. follow_link = proc_follow_link ,
} ;
/*
* As some entries in / proc are volatile , we want to
* get rid of unused dentries . This could be made
* smarter : we could keep a " volatile " flag in the
* inode to indicate which ones to keep .
*/
static int proc_delete_dentry ( struct dentry * dentry )
{
return 1 ;
}
static struct dentry_operations proc_dentry_operations =
{
. d_delete = proc_delete_dentry ,
} ;
/*
* Don ' t create negative dentries here , return - ENOENT by hand
* instead .
*/
struct dentry * proc_lookup ( struct inode * dir , struct dentry * dentry , struct nameidata * nd )
{
struct inode * inode = NULL ;
struct proc_dir_entry * de ;
int error = - ENOENT ;
lock_kernel ( ) ;
2006-03-26 01:36:55 -08:00
spin_lock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
de = PDE ( dir ) ;
if ( de ) {
for ( de = de - > subdir ; de ; de = de - > next ) {
if ( de - > namelen ! = dentry - > d_name . len )
continue ;
if ( ! memcmp ( dentry - > d_name . name , de - > name , de - > namelen ) ) {
unsigned int ino = de - > low_ino ;
2007-05-08 00:25:45 -07:00
de_get ( de ) ;
2006-03-26 01:36:55 -08:00
spin_unlock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
error = - EINVAL ;
inode = proc_get_inode ( dir - > i_sb , ino , de ) ;
2006-03-26 01:36:55 -08:00
spin_lock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
break ;
}
}
}
2006-03-26 01:36:55 -08:00
spin_unlock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
unlock_kernel ( ) ;
if ( inode ) {
dentry - > d_op = & proc_dentry_operations ;
d_add ( dentry , inode ) ;
return NULL ;
}
2007-05-08 00:25:45 -07:00
de_put ( de ) ;
2005-04-16 15:20:36 -07:00
return ERR_PTR ( error ) ;
}
/*
* This returns non - zero if at EOF , so that the / proc
* root directory can use this and check if it should
* continue with the < pid > entries . .
*
* Note that the VFS - layer doesn ' t care about the return
* value of the readdir ( ) call , as long as it ' s non - negative
* for success . .
*/
int proc_readdir ( struct file * filp ,
void * dirent , filldir_t filldir )
{
struct proc_dir_entry * de ;
unsigned int ino ;
int i ;
2006-12-08 02:36:36 -08:00
struct inode * inode = filp - > f_path . dentry - > d_inode ;
2005-04-16 15:20:36 -07:00
int ret = 0 ;
lock_kernel ( ) ;
ino = inode - > i_ino ;
de = PDE ( inode ) ;
if ( ! de ) {
ret = - EINVAL ;
goto out ;
}
i = filp - > f_pos ;
switch ( i ) {
case 0 :
if ( filldir ( dirent , " . " , 1 , i , ino , DT_DIR ) < 0 )
goto out ;
i + + ;
filp - > f_pos + + ;
/* fall through */
case 1 :
if ( filldir ( dirent , " .. " , 2 , i ,
2006-12-08 02:36:36 -08:00
parent_ino ( filp - > f_path . dentry ) ,
2005-04-16 15:20:36 -07:00
DT_DIR ) < 0 )
goto out ;
i + + ;
filp - > f_pos + + ;
/* fall through */
default :
2006-03-26 01:36:55 -08:00
spin_lock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
de = de - > subdir ;
i - = 2 ;
for ( ; ; ) {
if ( ! de ) {
ret = 1 ;
2006-03-26 01:36:55 -08:00
spin_unlock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
goto out ;
}
if ( ! i )
break ;
de = de - > next ;
i - - ;
}
do {
2007-05-08 00:25:47 -07:00
struct proc_dir_entry * next ;
2006-03-26 01:36:55 -08:00
/* filldir passes info to user space */
2007-05-08 00:25:47 -07:00
de_get ( de ) ;
2006-03-26 01:36:55 -08:00
spin_unlock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
if ( filldir ( dirent , de - > name , de - > namelen , filp - > f_pos ,
2007-05-08 00:25:47 -07:00
de - > low_ino , de - > mode > > 12 ) < 0 ) {
de_put ( de ) ;
2005-04-16 15:20:36 -07:00
goto out ;
2007-05-08 00:25:47 -07:00
}
2006-03-26 01:36:55 -08:00
spin_lock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
filp - > f_pos + + ;
2007-05-08 00:25:47 -07:00
next = de - > next ;
de_put ( de ) ;
de = next ;
2005-04-16 15:20:36 -07:00
} while ( de ) ;
2006-03-26 01:36:55 -08:00
spin_unlock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
}
ret = 1 ;
out : unlock_kernel ( ) ;
return ret ;
}
/*
* These are the generic / proc directory operations . They
* use the in - memory " struct proc_dir_entry " tree to parse
* the / proc directory .
*/
2007-02-12 00:55:34 -08:00
static const struct file_operations proc_dir_operations = {
2005-04-16 15:20:36 -07:00
. read = generic_read_dir ,
. readdir = proc_readdir ,
} ;
/*
* proc directories can do almost nothing . .
*/
2007-02-12 00:55:40 -08:00
static const struct inode_operations proc_dir_inode_operations = {
2005-04-16 15:20:36 -07:00
. lookup = proc_lookup ,
2005-09-06 15:17:18 -07:00
. getattr = proc_getattr ,
2005-04-16 15:20:36 -07:00
. setattr = proc_notify_change ,
} ;
static int proc_register ( struct proc_dir_entry * dir , struct proc_dir_entry * dp )
{
unsigned int i ;
i = get_inode_number ( ) ;
if ( i = = 0 )
return - EAGAIN ;
dp - > low_ino = i ;
2006-03-26 01:36:55 -08:00
2005-04-16 15:20:36 -07:00
if ( S_ISDIR ( dp - > mode ) ) {
if ( dp - > proc_iops = = NULL ) {
dp - > proc_fops = & proc_dir_operations ;
dp - > proc_iops = & proc_dir_inode_operations ;
}
dir - > nlink + + ;
} else if ( S_ISLNK ( dp - > mode ) ) {
if ( dp - > proc_iops = = NULL )
dp - > proc_iops = & proc_link_inode_operations ;
} else if ( S_ISREG ( dp - > mode ) ) {
if ( dp - > proc_fops = = NULL )
dp - > proc_fops = & proc_file_operations ;
if ( dp - > proc_iops = = NULL )
dp - > proc_iops = & proc_file_inode_operations ;
}
2007-07-15 23:40:09 -07:00
spin_lock ( & proc_subdir_lock ) ;
dp - > next = dir - > subdir ;
dp - > parent = dir ;
dir - > subdir = dp ;
spin_unlock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
/*
* Kill an inode that got unregistered . .
*/
static void proc_kill_inodes ( struct proc_dir_entry * de )
{
struct list_head * p ;
struct super_block * sb = proc_mnt - > mnt_sb ;
/*
* Actually it ' s a partial revoke ( ) .
*/
file_list_lock ( ) ;
list_for_each ( p , & sb - > s_files ) {
2005-10-30 15:02:16 -08:00
struct file * filp = list_entry ( p , struct file , f_u . fu_list ) ;
2006-12-08 02:36:36 -08:00
struct dentry * dentry = filp - > f_path . dentry ;
2005-04-16 15:20:36 -07:00
struct inode * inode ;
2006-03-28 01:56:41 -08:00
const struct file_operations * fops ;
2005-04-16 15:20:36 -07:00
if ( dentry - > d_op ! = & proc_dentry_operations )
continue ;
inode = dentry - > d_inode ;
if ( PDE ( inode ) ! = de )
continue ;
fops = filp - > f_op ;
filp - > f_op = NULL ;
fops_put ( fops ) ;
}
file_list_unlock ( ) ;
}
static struct proc_dir_entry * proc_create ( struct proc_dir_entry * * parent ,
const char * name ,
mode_t mode ,
nlink_t nlink )
{
struct proc_dir_entry * ent = NULL ;
const char * fn = name ;
int len ;
/* make sure name is valid */
if ( ! name | | ! strlen ( name ) ) goto out ;
if ( ! ( * parent ) & & xlate_proc_name ( name , parent , & fn ) ! = 0 )
goto out ;
/* At this point there must not be any '/' characters beyond *fn */
if ( strchr ( fn , ' / ' ) )
goto out ;
len = strlen ( fn ) ;
ent = kmalloc ( sizeof ( struct proc_dir_entry ) + len + 1 , GFP_KERNEL ) ;
if ( ! ent ) goto out ;
memset ( ent , 0 , sizeof ( struct proc_dir_entry ) ) ;
memcpy ( ( ( char * ) ent ) + sizeof ( struct proc_dir_entry ) , fn , len + 1 ) ;
ent - > name = ( ( char * ) ent ) + sizeof ( * ent ) ;
ent - > namelen = len ;
ent - > mode = mode ;
ent - > nlink = nlink ;
Fix rmmod/read/write races in /proc entries
Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
meanwhile. Or, more generically, system call done on /proc file, method
supplied by module is called, module dissapeares meanwhile.
pde = create_proc_entry()
if (!pde)
return -ENOMEM;
pde->write_proc = ...
open
write
copy_from_user
pde = create_proc_entry();
if (!pde) {
remove_proc_entry();
return -ENOMEM;
/* module unloaded */
}
*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()
remove_proc_entry vfs_read
proc_kill_inodes [check ->f_op validness]
[check ->f_op->read validness]
[verify_area, security permissions checks]
->f_op = NULL;
if (file->f_op->read)
/* ->f_op dereference, boom */
NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.
NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-15 23:39:00 -07:00
ent - > pde_users = 0 ;
spin_lock_init ( & ent - > pde_unload_lock ) ;
ent - > pde_unload_completion = NULL ;
2005-04-16 15:20:36 -07:00
out :
return ent ;
}
struct proc_dir_entry * proc_symlink ( const char * name ,
struct proc_dir_entry * parent , const char * dest )
{
struct proc_dir_entry * ent ;
ent = proc_create ( & parent , name ,
( S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO ) , 1 ) ;
if ( ent ) {
ent - > data = kmalloc ( ( ent - > size = strlen ( dest ) ) + 1 , GFP_KERNEL ) ;
if ( ent - > data ) {
strcpy ( ( char * ) ent - > data , dest ) ;
if ( proc_register ( parent , ent ) < 0 ) {
kfree ( ent - > data ) ;
kfree ( ent ) ;
ent = NULL ;
}
} else {
kfree ( ent ) ;
ent = NULL ;
}
}
return ent ;
}
struct proc_dir_entry * proc_mkdir_mode ( const char * name , mode_t mode ,
struct proc_dir_entry * parent )
{
struct proc_dir_entry * ent ;
ent = proc_create ( & parent , name , S_IFDIR | mode , 2 ) ;
if ( ent ) {
if ( proc_register ( parent , ent ) < 0 ) {
kfree ( ent ) ;
ent = NULL ;
}
}
return ent ;
}
struct proc_dir_entry * proc_mkdir ( const char * name ,
struct proc_dir_entry * parent )
{
return proc_mkdir_mode ( name , S_IRUGO | S_IXUGO , parent ) ;
}
struct proc_dir_entry * create_proc_entry ( const char * name , mode_t mode ,
struct proc_dir_entry * parent )
{
struct proc_dir_entry * ent ;
nlink_t nlink ;
if ( S_ISDIR ( mode ) ) {
if ( ( mode & S_IALLUGO ) = = 0 )
mode | = S_IRUGO | S_IXUGO ;
nlink = 2 ;
} else {
if ( ( mode & S_IFMT ) = = 0 )
mode | = S_IFREG ;
if ( ( mode & S_IALLUGO ) = = 0 )
mode | = S_IRUGO ;
nlink = 1 ;
}
ent = proc_create ( & parent , name , mode , nlink ) ;
if ( ent ) {
if ( proc_register ( parent , ent ) < 0 ) {
kfree ( ent ) ;
ent = NULL ;
}
}
return ent ;
}
void free_proc_entry ( struct proc_dir_entry * de )
{
unsigned int ino = de - > low_ino ;
if ( ino < PROC_DYNAMIC_FIRST )
return ;
release_inode_number ( ino ) ;
if ( S_ISLNK ( de - > mode ) & & de - > data )
kfree ( de - > data ) ;
kfree ( de ) ;
}
/*
* Remove a / proc entry and free it if it ' s not currently in use .
* If it is in use , we set the ' deleted ' flag .
*/
void remove_proc_entry ( const char * name , struct proc_dir_entry * parent )
{
struct proc_dir_entry * * p ;
struct proc_dir_entry * de ;
const char * fn = name ;
int len ;
if ( ! parent & & xlate_proc_name ( name , & parent , & fn ) ! = 0 )
goto out ;
len = strlen ( fn ) ;
2006-03-26 01:36:55 -08:00
spin_lock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
for ( p = & parent - > subdir ; * p ; p = & ( * p ) - > next ) {
if ( ! proc_match ( len , fn , * p ) )
continue ;
de = * p ;
* p = de - > next ;
de - > next = NULL ;
Fix rmmod/read/write races in /proc entries
Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
meanwhile. Or, more generically, system call done on /proc file, method
supplied by module is called, module dissapeares meanwhile.
pde = create_proc_entry()
if (!pde)
return -ENOMEM;
pde->write_proc = ...
open
write
copy_from_user
pde = create_proc_entry();
if (!pde) {
remove_proc_entry();
return -ENOMEM;
/* module unloaded */
}
*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()
remove_proc_entry vfs_read
proc_kill_inodes [check ->f_op validness]
[check ->f_op->read validness]
[verify_area, security permissions checks]
->f_op = NULL;
if (file->f_op->read)
/* ->f_op dereference, boom */
NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.
NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-15 23:39:00 -07:00
spin_lock ( & de - > pde_unload_lock ) ;
/*
* Stop accepting new callers into module . If you ' re
* dynamically allocating - > proc_fops , save a pointer somewhere .
*/
de - > proc_fops = NULL ;
/* Wait until all existing callers into module are done. */
if ( de - > pde_users > 0 ) {
DECLARE_COMPLETION_ONSTACK ( c ) ;
if ( ! de - > pde_unload_completion )
de - > pde_unload_completion = & c ;
spin_unlock ( & de - > pde_unload_lock ) ;
spin_unlock ( & proc_subdir_lock ) ;
wait_for_completion ( de - > pde_unload_completion ) ;
spin_lock ( & proc_subdir_lock ) ;
goto continue_removing ;
}
spin_unlock ( & de - > pde_unload_lock ) ;
continue_removing :
2005-04-16 15:20:36 -07:00
if ( S_ISDIR ( de - > mode ) )
parent - > nlink - - ;
Fix rmmod/read/write races in /proc entries
Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
meanwhile. Or, more generically, system call done on /proc file, method
supplied by module is called, module dissapeares meanwhile.
pde = create_proc_entry()
if (!pde)
return -ENOMEM;
pde->write_proc = ...
open
write
copy_from_user
pde = create_proc_entry();
if (!pde) {
remove_proc_entry();
return -ENOMEM;
/* module unloaded */
}
*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()
remove_proc_entry vfs_read
proc_kill_inodes [check ->f_op validness]
[check ->f_op->read validness]
[verify_area, security permissions checks]
->f_op = NULL;
if (file->f_op->read)
/* ->f_op dereference, boom */
NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.
NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-15 23:39:00 -07:00
if ( ! S_ISREG ( de - > mode ) )
proc_kill_inodes ( de ) ;
2005-04-16 15:20:36 -07:00
de - > nlink = 0 ;
WARN_ON ( de - > subdir ) ;
if ( ! atomic_read ( & de - > count ) )
free_proc_entry ( de ) ;
else {
de - > deleted = 1 ;
printk ( " remove_proc_entry: %s/%s busy, count=%d \n " ,
parent - > name , de - > name , atomic_read ( & de - > count ) ) ;
}
break ;
}
2006-03-26 01:36:55 -08:00
spin_unlock ( & proc_subdir_lock ) ;
2005-04-16 15:20:36 -07:00
out :
return ;
}