2005-04-17 02:20:36 +04:00
/*
* proc / fs / generic . c - - - generic routines for the proc - fs
*
* This file contains generic proc - fs routines for handling
* directories and files .
*
* Copyright ( C ) 1991 , 1992 Linus Torvalds .
* Copyright ( C ) 1997 Theodore Ts ' o
*/
# include <linux/errno.h>
# include <linux/time.h>
# include <linux/proc_fs.h>
# include <linux/stat.h>
# include <linux/module.h>
# include <linux/mount.h>
# include <linux/init.h>
# include <linux/idr.h>
# include <linux/namei.h>
# include <linux/bitops.h>
2006-03-26 13:36:55 +04:00
# include <linux/spinlock.h>
Fix rmmod/read/write races in /proc entries
Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
meanwhile. Or, more generically, system call done on /proc file, method
supplied by module is called, module dissapeares meanwhile.
pde = create_proc_entry()
if (!pde)
return -ENOMEM;
pde->write_proc = ...
open
write
copy_from_user
pde = create_proc_entry();
if (!pde) {
remove_proc_entry();
return -ENOMEM;
/* module unloaded */
}
*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()
remove_proc_entry vfs_read
proc_kill_inodes [check ->f_op validness]
[check ->f_op->read validness]
[verify_area, security permissions checks]
->f_op = NULL;
if (file->f_op->read)
/* ->f_op dereference, boom */
NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.
NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 10:39:00 +04:00
# include <linux/completion.h>
2005-04-17 02:20:36 +04:00
# include <asm/uaccess.h>
2006-01-08 12:04:16 +03:00
# include "internal.h"
2006-03-26 13:36:55 +04:00
DEFINE_SPINLOCK ( proc_subdir_lock ) ;
2007-02-14 11:34:12 +03:00
static int proc_match ( int len , const char * name , struct proc_dir_entry * de )
2005-04-17 02:20:36 +04:00
{
if ( de - > namelen ! = len )
return 0 ;
return ! memcmp ( name , de - > name , len ) ;
}
/* buffer size is one page but our output routines use some slack for overruns */
# define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
static ssize_t
2009-02-20 17:04:33 +03:00
__proc_file_read ( struct file * file , char __user * buf , size_t nbytes ,
2005-04-17 02:20:36 +04:00
loff_t * ppos )
{
2006-12-08 13:36:36 +03:00
struct inode * inode = file - > f_path . dentry - > d_inode ;
2005-04-17 02:20:36 +04:00
char * page ;
ssize_t retval = 0 ;
int eof = 0 ;
ssize_t n , count ;
char * start ;
struct proc_dir_entry * dp ;
2005-12-30 19:39:10 +03:00
unsigned long long pos ;
/*
* Gaah , please just use " seq_file " instead . The legacy / proc
* interfaces cut loff_t down to off_t for reads , and ignore
* the offset entirely for writes . .
*/
pos = * ppos ;
if ( pos > MAX_NON_LFS )
return 0 ;
if ( nbytes > MAX_NON_LFS - pos )
nbytes = MAX_NON_LFS - pos ;
2005-04-17 02:20:36 +04:00
dp = PDE ( inode ) ;
2007-10-16 12:25:52 +04:00
if ( ! ( page = ( char * ) __get_free_page ( GFP_TEMPORARY ) ) )
2005-04-17 02:20:36 +04:00
return - ENOMEM ;
while ( ( nbytes > 0 ) & & ! eof ) {
count = min_t ( size_t , PROC_BLOCK_SIZE , nbytes ) ;
start = NULL ;
2008-04-29 12:01:58 +04:00
if ( dp - > read_proc ) {
2005-04-17 02:20:36 +04:00
/*
* How to be a proc read function
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* Prototype :
* int f ( char * buffer , char * * start , off_t offset ,
* int count , int * peof , void * dat )
*
* Assume that the buffer is " count " bytes in size .
*
* If you know you have supplied all the data you
* have , set * peof .
*
* You have three ways to return data :
* 0 ) Leave * start = NULL . ( This is the default . )
* Put the data of the requested offset at that
* offset within the buffer . Return the number ( n )
* of bytes there are from the beginning of the
* buffer up to the last byte of data . If the
* number of supplied bytes ( = n - offset ) is
* greater than zero and you didn ' t signal eof
* and the reader is prepared to take more data
* you will be called again with the requested
* offset advanced by the number of bytes
* absorbed . This interface is useful for files
* no larger than the buffer .
* 1 ) Set * start = an unsigned long value less than
* the buffer address but greater than zero .
* Put the data of the requested offset at the
* beginning of the buffer . Return the number of
* bytes of data placed there . If this number is
* greater than zero and you didn ' t signal eof
* and the reader is prepared to take more data
* you will be called again with the requested
* offset advanced by * start . This interface is
* useful when you have a large file consisting
* of a series of blocks which you want to count
* and return as wholes .
* ( Hack by Paul . Russell @ rustcorp . com . au )
* 2 ) Set * start = an address within the buffer .
* Put the data of the requested offset at * start .
* Return the number of bytes of data placed there .
* If this number is greater than zero and you
* didn ' t signal eof and the reader is prepared to
* take more data you will be called again with the
* requested offset advanced by the number of bytes
* absorbed .
*/
n = dp - > read_proc ( page , & start , * ppos ,
count , & eof , dp - > data ) ;
} else
break ;
if ( n = = 0 ) /* end of file */
break ;
if ( n < 0 ) { /* error */
if ( retval = = 0 )
retval = n ;
break ;
}
if ( start = = NULL ) {
if ( n > PAGE_SIZE ) {
printk ( KERN_ERR
" proc_file_read: Apparent buffer overflow! \n " ) ;
n = PAGE_SIZE ;
}
n - = * ppos ;
if ( n < = 0 )
break ;
if ( n > count )
n = count ;
start = page + * ppos ;
} else if ( start < page ) {
if ( n > PAGE_SIZE ) {
printk ( KERN_ERR
" proc_file_read: Apparent buffer overflow! \n " ) ;
n = PAGE_SIZE ;
}
if ( n > count ) {
/*
* Don ' t reduce n because doing so might
* cut off part of a data block .
*/
printk ( KERN_WARNING
" proc_file_read: Read count exceeded \n " ) ;
}
} else /* start >= page */ {
unsigned long startoff = ( unsigned long ) ( start - page ) ;
if ( n > ( PAGE_SIZE - startoff ) ) {
printk ( KERN_ERR
" proc_file_read: Apparent buffer overflow! \n " ) ;
n = PAGE_SIZE - startoff ;
}
if ( n > count )
n = count ;
}
n - = copy_to_user ( buf , start < page ? page : start , n ) ;
if ( n = = 0 ) {
if ( retval = = 0 )
retval = - EFAULT ;
break ;
}
* ppos + = start < page ? ( unsigned long ) start : n ;
nbytes - = n ;
buf + = n ;
retval + = n ;
}
free_page ( ( unsigned long ) page ) ;
return retval ;
}
2009-02-20 17:04:33 +03:00
static ssize_t
proc_file_read ( struct file * file , char __user * buf , size_t nbytes ,
loff_t * ppos )
{
struct proc_dir_entry * pde = PDE ( file - > f_path . dentry - > d_inode ) ;
ssize_t rv = - EIO ;
spin_lock ( & pde - > pde_unload_lock ) ;
if ( ! pde - > proc_fops ) {
spin_unlock ( & pde - > pde_unload_lock ) ;
return rv ;
}
pde - > pde_users + + ;
spin_unlock ( & pde - > pde_unload_lock ) ;
rv = __proc_file_read ( file , buf , nbytes , ppos ) ;
pde_users_dec ( pde ) ;
return rv ;
}
2005-04-17 02:20:36 +04:00
static ssize_t
proc_file_write ( struct file * file , const char __user * buffer ,
size_t count , loff_t * ppos )
{
2009-02-20 17:04:33 +03:00
struct proc_dir_entry * pde = PDE ( file - > f_path . dentry - > d_inode ) ;
ssize_t rv = - EIO ;
if ( pde - > write_proc ) {
spin_lock ( & pde - > pde_unload_lock ) ;
if ( ! pde - > proc_fops ) {
spin_unlock ( & pde - > pde_unload_lock ) ;
return rv ;
}
pde - > pde_users + + ;
spin_unlock ( & pde - > pde_unload_lock ) ;
2005-04-17 02:20:36 +04:00
2009-02-20 17:04:33 +03:00
/* FIXME: does this routine need ppos? probably... */
rv = pde - > write_proc ( file , buffer , count , pde - > data ) ;
pde_users_dec ( pde ) ;
}
return rv ;
2005-04-17 02:20:36 +04:00
}
static loff_t
proc_file_lseek ( struct file * file , loff_t offset , int orig )
{
2005-12-30 19:39:10 +03:00
loff_t retval = - EINVAL ;
switch ( orig ) {
case 1 :
offset + = file - > f_pos ;
/* fallthrough */
case 0 :
if ( offset < 0 | | offset > MAX_NON_LFS )
break ;
file - > f_pos = retval = offset ;
}
return retval ;
2005-04-17 02:20:36 +04:00
}
2008-02-08 15:18:27 +03:00
static const struct file_operations proc_file_operations = {
. llseek = proc_file_lseek ,
. read = proc_file_read ,
. write = proc_file_write ,
} ;
2005-04-17 02:20:36 +04:00
static int proc_notify_change ( struct dentry * dentry , struct iattr * iattr )
{
struct inode * inode = dentry - > d_inode ;
struct proc_dir_entry * de = PDE ( inode ) ;
int error ;
error = inode_change_ok ( inode , iattr ) ;
if ( error )
goto out ;
error = inode_setattr ( inode , iattr ) ;
if ( error )
goto out ;
de - > uid = inode - > i_uid ;
de - > gid = inode - > i_gid ;
de - > mode = inode - > i_mode ;
out :
return error ;
}
2005-09-07 02:17:18 +04:00
static int proc_getattr ( struct vfsmount * mnt , struct dentry * dentry ,
struct kstat * stat )
{
struct inode * inode = dentry - > d_inode ;
struct proc_dir_entry * de = PROC_I ( inode ) - > pde ;
if ( de & & de - > nlink )
inode - > i_nlink = de - > nlink ;
generic_fillattr ( inode , stat ) ;
return 0 ;
}
2007-02-12 11:55:40 +03:00
static const struct inode_operations proc_file_inode_operations = {
2005-04-17 02:20:36 +04:00
. setattr = proc_notify_change ,
} ;
/*
* This function parses a name such as " tty/driver/serial " , and
* returns the struct proc_dir_entry for " /proc/tty/driver " , and
* returns " serial " in residual .
*/
static int xlate_proc_name ( const char * name ,
struct proc_dir_entry * * ret , const char * * residual )
{
const char * cp = name , * next ;
struct proc_dir_entry * de ;
int len ;
2006-03-26 13:36:55 +04:00
int rtn = 0 ;
2005-04-17 02:20:36 +04:00
proc: less special case in xlate code
If valid "parent" is passed to proc_create/remove_proc_entry(), then name of
PDE should consist of only one path component, otherwise creation or or
removal will fail. However, if NULL is passed as parent then create/remove
accept full path as a argument. This is arbitrary restriction -- all
infrastructure is in place.
So, patch allows the following to succeed:
create_proc_entry("foo/bar", 0, pde_baz);
remove_proc_entry("baz/foo/bar", &proc_root);
Also makes the following to behave identically:
create_proc_entry("foo/bar", 0, NULL);
create_proc_entry("foo/bar", 0, &proc_root);
Discrepancy noticed by Den Lunev (IIRC).
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 12:01:40 +04:00
de = * ret ;
if ( ! de )
de = & proc_root ;
2006-03-26 13:36:55 +04:00
spin_lock ( & proc_subdir_lock ) ;
2005-04-17 02:20:36 +04:00
while ( 1 ) {
next = strchr ( cp , ' / ' ) ;
if ( ! next )
break ;
len = next - cp ;
for ( de = de - > subdir ; de ; de = de - > next ) {
if ( proc_match ( len , cp , de ) )
break ;
}
2006-03-26 13:36:55 +04:00
if ( ! de ) {
rtn = - ENOENT ;
goto out ;
}
2005-04-17 02:20:36 +04:00
cp + = len + 1 ;
}
* residual = cp ;
* ret = de ;
2006-03-26 13:36:55 +04:00
out :
spin_unlock ( & proc_subdir_lock ) ;
return rtn ;
2005-04-17 02:20:36 +04:00
}
2008-07-26 11:21:37 +04:00
static DEFINE_IDA ( proc_inum_ida ) ;
2005-04-17 02:20:36 +04:00
static DEFINE_SPINLOCK ( proc_inum_lock ) ; /* protects the above */
2008-07-26 11:18:28 +04:00
# define PROC_DYNAMIC_FIRST 0xF0000000U
2005-04-17 02:20:36 +04:00
/*
* Return an inode number between PROC_DYNAMIC_FIRST and
* 0xffffffff , or zero on failure .
2009-01-13 13:53:48 +03:00
*
* Current inode allocations in the proc - fs ( hex - numbers ) :
*
* 00000000 reserved
* 00000001 - 00000ff f static entries ( goners )
* 001 root - ino
*
* 00001000 - 00001ff f unused
* 0001 xxxx - 7ff fxxxx pid - dir entries for pid 1 - 7ff f
* 80000000 - efffffff unused
* f0000000 - ffffffff dynamic entries
*
* Goal :
* Once we split the thing into several virtual filesystems ,
* we will get rid of magical ranges ( and this comment , BTW ) .
2005-04-17 02:20:36 +04:00
*/
static unsigned int get_inode_number ( void )
{
2008-07-26 11:18:28 +04:00
unsigned int i ;
2005-04-17 02:20:36 +04:00
int error ;
retry :
2008-07-26 11:21:37 +04:00
if ( ida_pre_get ( & proc_inum_ida , GFP_KERNEL ) = = 0 )
2005-04-17 02:20:36 +04:00
return 0 ;
spin_lock ( & proc_inum_lock ) ;
2008-07-26 11:21:37 +04:00
error = ida_get_new ( & proc_inum_ida , & i ) ;
2005-04-17 02:20:36 +04:00
spin_unlock ( & proc_inum_lock ) ;
if ( error = = - EAGAIN )
goto retry ;
else if ( error )
return 0 ;
2008-07-26 11:18:28 +04:00
if ( i > UINT_MAX - PROC_DYNAMIC_FIRST ) {
spin_lock ( & proc_inum_lock ) ;
2008-07-26 11:21:37 +04:00
ida_remove ( & proc_inum_ida , i ) ;
2008-07-26 11:18:28 +04:00
spin_unlock ( & proc_inum_lock ) ;
2008-08-02 07:30:48 +04:00
return 0 ;
2008-07-26 11:18:28 +04:00
}
return PROC_DYNAMIC_FIRST + i ;
2005-04-17 02:20:36 +04:00
}
static void release_inode_number ( unsigned int inum )
{
spin_lock ( & proc_inum_lock ) ;
2008-07-26 11:21:37 +04:00
ida_remove ( & proc_inum_ida , inum - PROC_DYNAMIC_FIRST ) ;
2005-04-17 02:20:36 +04:00
spin_unlock ( & proc_inum_lock ) ;
}
[PATCH] Fix up symlink function pointers
This fixes up the symlink functions for the calling convention change:
* afs, autofs4, befs, devfs, freevxfs, jffs2, jfs, ncpfs, procfs,
smbfs, sysvfs, ufs, xfs - prototype change for ->follow_link()
* befs, smbfs, xfs - same for ->put_link()
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-08-20 03:17:39 +04:00
static void * proc_follow_link ( struct dentry * dentry , struct nameidata * nd )
2005-04-17 02:20:36 +04:00
{
nd_set_link ( nd , PDE ( dentry - > d_inode ) - > data ) ;
[PATCH] Fix up symlink function pointers
This fixes up the symlink functions for the calling convention change:
* afs, autofs4, befs, devfs, freevxfs, jffs2, jfs, ncpfs, procfs,
smbfs, sysvfs, ufs, xfs - prototype change for ->follow_link()
* befs, smbfs, xfs - same for ->put_link()
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-08-20 03:17:39 +04:00
return NULL ;
2005-04-17 02:20:36 +04:00
}
2007-02-12 11:55:40 +03:00
static const struct inode_operations proc_link_inode_operations = {
2005-04-17 02:20:36 +04:00
. readlink = generic_readlink ,
. follow_link = proc_follow_link ,
} ;
/*
* As some entries in / proc are volatile , we want to
* get rid of unused dentries . This could be made
* smarter : we could keep a " volatile " flag in the
* inode to indicate which ones to keep .
*/
static int proc_delete_dentry ( struct dentry * dentry )
{
return 1 ;
}
2009-02-20 08:58:47 +03:00
static const struct dentry_operations proc_dentry_operations =
2005-04-17 02:20:36 +04:00
{
. d_delete = proc_delete_dentry ,
} ;
/*
* Don ' t create negative dentries here , return - ENOENT by hand
* instead .
*/
[NET]: Make /proc/net a symlink on /proc/self/net (v3)
Current /proc/net is done with so called "shadows", but current
implementation is broken and has little chances to get fixed.
The problem is that dentries subtree of /proc/net directory has
fancy revalidation rules to make processes living in different
net namespaces see different entries in /proc/net subtree, but
currently, tasks see in the /proc/net subdir the contents of any
other namespace, depending on who opened the file first.
The proposed fix is to turn /proc/net into a symlink, which points
to /proc/self/net, which in turn shows what previously was in
/proc/net - the network-related info, from the net namespace the
appropriate task lives in.
# ls -l /proc/net
lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net
In other words - this behaves like /proc/mounts, but unlike
"mounts", "net" is not a file, but a directory.
Changes from v2:
* Fixed discrepancy of /proc/net nlink count and selinux labeling
screwup pointed out by Stephen.
To get the correct nlink count the ->getattr callback for /proc/net
is overridden to read one from the net->proc_net entry.
To make selinux still work the net->proc_net entry is initialized
properly, i.e. with the "net" name and the proc_net parent.
Selinux fixes are
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Changes from v1:
* Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-03-07 22:08:40 +03:00
struct dentry * proc_lookup_de ( struct proc_dir_entry * de , struct inode * dir ,
struct dentry * dentry )
2005-04-17 02:20:36 +04:00
{
struct inode * inode = NULL ;
int error = - ENOENT ;
2006-03-26 13:36:55 +04:00
spin_lock ( & proc_subdir_lock ) ;
2008-04-29 12:01:41 +04:00
for ( de = de - > subdir ; de ; de = de - > next ) {
if ( de - > namelen ! = dentry - > d_name . len )
continue ;
if ( ! memcmp ( dentry - > d_name . name , de - > name , de - > namelen ) ) {
unsigned int ino ;
ino = de - > low_ino ;
de_get ( de ) ;
spin_unlock ( & proc_subdir_lock ) ;
error = - EINVAL ;
inode = proc_get_inode ( dir - > i_sb , ino , de ) ;
goto out_unlock ;
2005-04-17 02:20:36 +04:00
}
}
2006-03-26 13:36:55 +04:00
spin_unlock ( & proc_subdir_lock ) ;
2008-02-08 15:18:27 +03:00
out_unlock :
2005-04-17 02:20:36 +04:00
if ( inode ) {
dentry - > d_op = & proc_dentry_operations ;
d_add ( dentry , inode ) ;
return NULL ;
}
2008-04-29 12:01:41 +04:00
if ( de )
de_put ( de ) ;
2005-04-17 02:20:36 +04:00
return ERR_PTR ( error ) ;
}
[NET]: Make /proc/net a symlink on /proc/self/net (v3)
Current /proc/net is done with so called "shadows", but current
implementation is broken and has little chances to get fixed.
The problem is that dentries subtree of /proc/net directory has
fancy revalidation rules to make processes living in different
net namespaces see different entries in /proc/net subtree, but
currently, tasks see in the /proc/net subdir the contents of any
other namespace, depending on who opened the file first.
The proposed fix is to turn /proc/net into a symlink, which points
to /proc/self/net, which in turn shows what previously was in
/proc/net - the network-related info, from the net namespace the
appropriate task lives in.
# ls -l /proc/net
lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net
In other words - this behaves like /proc/mounts, but unlike
"mounts", "net" is not a file, but a directory.
Changes from v2:
* Fixed discrepancy of /proc/net nlink count and selinux labeling
screwup pointed out by Stephen.
To get the correct nlink count the ->getattr callback for /proc/net
is overridden to read one from the net->proc_net entry.
To make selinux still work the net->proc_net entry is initialized
properly, i.e. with the "net" name and the proc_net parent.
Selinux fixes are
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Changes from v1:
* Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-03-07 22:08:40 +03:00
struct dentry * proc_lookup ( struct inode * dir , struct dentry * dentry ,
struct nameidata * nd )
{
return proc_lookup_de ( PDE ( dir ) , dir , dentry ) ;
}
2005-04-17 02:20:36 +04:00
/*
* This returns non - zero if at EOF , so that the / proc
* root directory can use this and check if it should
* continue with the < pid > entries . .
*
* Note that the VFS - layer doesn ' t care about the return
* value of the readdir ( ) call , as long as it ' s non - negative
* for success . .
*/
[NET]: Make /proc/net a symlink on /proc/self/net (v3)
Current /proc/net is done with so called "shadows", but current
implementation is broken and has little chances to get fixed.
The problem is that dentries subtree of /proc/net directory has
fancy revalidation rules to make processes living in different
net namespaces see different entries in /proc/net subtree, but
currently, tasks see in the /proc/net subdir the contents of any
other namespace, depending on who opened the file first.
The proposed fix is to turn /proc/net into a symlink, which points
to /proc/self/net, which in turn shows what previously was in
/proc/net - the network-related info, from the net namespace the
appropriate task lives in.
# ls -l /proc/net
lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net
In other words - this behaves like /proc/mounts, but unlike
"mounts", "net" is not a file, but a directory.
Changes from v2:
* Fixed discrepancy of /proc/net nlink count and selinux labeling
screwup pointed out by Stephen.
To get the correct nlink count the ->getattr callback for /proc/net
is overridden to read one from the net->proc_net entry.
To make selinux still work the net->proc_net entry is initialized
properly, i.e. with the "net" name and the proc_net parent.
Selinux fixes are
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Changes from v1:
* Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-03-07 22:08:40 +03:00
int proc_readdir_de ( struct proc_dir_entry * de , struct file * filp , void * dirent ,
filldir_t filldir )
2005-04-17 02:20:36 +04:00
{
unsigned int ino ;
int i ;
2006-12-08 13:36:36 +03:00
struct inode * inode = filp - > f_path . dentry - > d_inode ;
2005-04-17 02:20:36 +04:00
int ret = 0 ;
ino = inode - > i_ino ;
i = filp - > f_pos ;
switch ( i ) {
case 0 :
if ( filldir ( dirent , " . " , 1 , i , ino , DT_DIR ) < 0 )
goto out ;
i + + ;
filp - > f_pos + + ;
/* fall through */
case 1 :
if ( filldir ( dirent , " .. " , 2 , i ,
2006-12-08 13:36:36 +03:00
parent_ino ( filp - > f_path . dentry ) ,
2005-04-17 02:20:36 +04:00
DT_DIR ) < 0 )
goto out ;
i + + ;
filp - > f_pos + + ;
/* fall through */
default :
2006-03-26 13:36:55 +04:00
spin_lock ( & proc_subdir_lock ) ;
2005-04-17 02:20:36 +04:00
de = de - > subdir ;
i - = 2 ;
for ( ; ; ) {
if ( ! de ) {
ret = 1 ;
2006-03-26 13:36:55 +04:00
spin_unlock ( & proc_subdir_lock ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
if ( ! i )
break ;
de = de - > next ;
i - - ;
}
do {
2007-05-08 11:25:47 +04:00
struct proc_dir_entry * next ;
2006-03-26 13:36:55 +04:00
/* filldir passes info to user space */
2007-05-08 11:25:47 +04:00
de_get ( de ) ;
2006-03-26 13:36:55 +04:00
spin_unlock ( & proc_subdir_lock ) ;
2005-04-17 02:20:36 +04:00
if ( filldir ( dirent , de - > name , de - > namelen , filp - > f_pos ,
2007-05-08 11:25:47 +04:00
de - > low_ino , de - > mode > > 12 ) < 0 ) {
de_put ( de ) ;
2005-04-17 02:20:36 +04:00
goto out ;
2007-05-08 11:25:47 +04:00
}
2006-03-26 13:36:55 +04:00
spin_lock ( & proc_subdir_lock ) ;
2005-04-17 02:20:36 +04:00
filp - > f_pos + + ;
2007-05-08 11:25:47 +04:00
next = de - > next ;
de_put ( de ) ;
de = next ;
2005-04-17 02:20:36 +04:00
} while ( de ) ;
2006-03-26 13:36:55 +04:00
spin_unlock ( & proc_subdir_lock ) ;
2005-04-17 02:20:36 +04:00
}
ret = 1 ;
proc: stop using BKL
There are four BKL users in proc: de_put(), proc_lookup_de(),
proc_readdir_de(), proc_root_readdir(),
1) de_put()
-----------
de_put() is classic atomic_dec_and_test() refcount wrapper -- no BKL
needed. BKL doesn't matter to possible refcount leak as well.
2) proc_lookup_de()
-------------------
Walking PDE list is protected by proc_subdir_lock(), proc_get_inode() is
potentially blocking, all callers of proc_lookup_de() eventually end up
from ->lookup hooks which is protected by directory's ->i_mutex -- BKL
doesn't protect anything.
3) proc_readdir_de()
--------------------
"." and ".." part doesn't need BKL, walking PDE list is under
proc_subdir_lock, calling filldir callback is potentially blocking
because it writes to luserspace. All proc_readdir_de() callers
eventually come from ->readdir hook which is under directory's
->i_mutex -- BKL doesn't protect anything.
4) proc_root_readdir_de()
-------------------------
proc_root_readdir_de is ->readdir hook, see (3).
Since readdir hooks doesn't use BKL anymore, switch to
generic_file_llseek, since it also takes directory's i_mutex.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
2008-10-27 22:48:36 +03:00
out :
2005-04-17 02:20:36 +04:00
return ret ;
}
[NET]: Make /proc/net a symlink on /proc/self/net (v3)
Current /proc/net is done with so called "shadows", but current
implementation is broken and has little chances to get fixed.
The problem is that dentries subtree of /proc/net directory has
fancy revalidation rules to make processes living in different
net namespaces see different entries in /proc/net subtree, but
currently, tasks see in the /proc/net subdir the contents of any
other namespace, depending on who opened the file first.
The proposed fix is to turn /proc/net into a symlink, which points
to /proc/self/net, which in turn shows what previously was in
/proc/net - the network-related info, from the net namespace the
appropriate task lives in.
# ls -l /proc/net
lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net
In other words - this behaves like /proc/mounts, but unlike
"mounts", "net" is not a file, but a directory.
Changes from v2:
* Fixed discrepancy of /proc/net nlink count and selinux labeling
screwup pointed out by Stephen.
To get the correct nlink count the ->getattr callback for /proc/net
is overridden to read one from the net->proc_net entry.
To make selinux still work the net->proc_net entry is initialized
properly, i.e. with the "net" name and the proc_net parent.
Selinux fixes are
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Changes from v1:
* Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2008-03-07 22:08:40 +03:00
int proc_readdir ( struct file * filp , void * dirent , filldir_t filldir )
{
struct inode * inode = filp - > f_path . dentry - > d_inode ;
return proc_readdir_de ( PDE ( inode ) , filp , dirent , filldir ) ;
}
2005-04-17 02:20:36 +04:00
/*
* These are the generic / proc directory operations . They
* use the in - memory " struct proc_dir_entry " tree to parse
* the / proc directory .
*/
2007-02-12 11:55:34 +03:00
static const struct file_operations proc_dir_operations = {
proc: stop using BKL
There are four BKL users in proc: de_put(), proc_lookup_de(),
proc_readdir_de(), proc_root_readdir(),
1) de_put()
-----------
de_put() is classic atomic_dec_and_test() refcount wrapper -- no BKL
needed. BKL doesn't matter to possible refcount leak as well.
2) proc_lookup_de()
-------------------
Walking PDE list is protected by proc_subdir_lock(), proc_get_inode() is
potentially blocking, all callers of proc_lookup_de() eventually end up
from ->lookup hooks which is protected by directory's ->i_mutex -- BKL
doesn't protect anything.
3) proc_readdir_de()
--------------------
"." and ".." part doesn't need BKL, walking PDE list is under
proc_subdir_lock, calling filldir callback is potentially blocking
because it writes to luserspace. All proc_readdir_de() callers
eventually come from ->readdir hook which is under directory's
->i_mutex -- BKL doesn't protect anything.
4) proc_root_readdir_de()
-------------------------
proc_root_readdir_de is ->readdir hook, see (3).
Since readdir hooks doesn't use BKL anymore, switch to
generic_file_llseek, since it also takes directory's i_mutex.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
2008-10-27 22:48:36 +03:00
. llseek = generic_file_llseek ,
2005-04-17 02:20:36 +04:00
. read = generic_read_dir ,
. readdir = proc_readdir ,
} ;
/*
* proc directories can do almost nothing . .
*/
2007-02-12 11:55:40 +03:00
static const struct inode_operations proc_dir_inode_operations = {
2005-04-17 02:20:36 +04:00
. lookup = proc_lookup ,
2005-09-07 02:17:18 +04:00
. getattr = proc_getattr ,
2005-04-17 02:20:36 +04:00
. setattr = proc_notify_change ,
} ;
static int proc_register ( struct proc_dir_entry * dir , struct proc_dir_entry * dp )
{
unsigned int i ;
2008-02-08 15:18:29 +03:00
struct proc_dir_entry * tmp ;
2005-04-17 02:20:36 +04:00
i = get_inode_number ( ) ;
if ( i = = 0 )
return - EAGAIN ;
dp - > low_ino = i ;
2006-03-26 13:36:55 +04:00
2005-04-17 02:20:36 +04:00
if ( S_ISDIR ( dp - > mode ) ) {
if ( dp - > proc_iops = = NULL ) {
dp - > proc_fops = & proc_dir_operations ;
dp - > proc_iops = & proc_dir_inode_operations ;
}
dir - > nlink + + ;
} else if ( S_ISLNK ( dp - > mode ) ) {
if ( dp - > proc_iops = = NULL )
dp - > proc_iops = & proc_link_inode_operations ;
} else if ( S_ISREG ( dp - > mode ) ) {
if ( dp - > proc_fops = = NULL )
dp - > proc_fops = & proc_file_operations ;
if ( dp - > proc_iops = = NULL )
dp - > proc_iops = & proc_file_inode_operations ;
}
2007-07-16 10:40:09 +04:00
spin_lock ( & proc_subdir_lock ) ;
2008-02-08 15:18:29 +03:00
for ( tmp = dir - > subdir ; tmp ; tmp = tmp - > next )
if ( strcmp ( tmp - > name , dp - > name ) = = 0 ) {
2008-09-14 06:51:30 +04:00
WARN ( 1 , KERN_WARNING " proc_dir_entry '%s/%s' already registered \n " ,
2008-09-13 13:33:06 +04:00
dir - > name , dp - > name ) ;
2008-02-08 15:18:29 +03:00
break ;
}
2007-07-16 10:40:09 +04:00
dp - > next = dir - > subdir ;
dp - > parent = dir ;
dir - > subdir = dp ;
spin_unlock ( & proc_subdir_lock ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
proc: fix ->open'less usage due to ->proc_fops flip
Typical PDE creation code looks like:
pde = create_proc_entry("foo", 0, NULL);
if (pde)
pde->proc_fops = &foo_proc_fops;
Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
possible to ->read without ->open (see one class of oopses below).
The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:
pde = proc_create("foo", 0, NULL, &foo_proc_fops);
if (!pde)
return -ENOMEM;
Fix most networking users for a start.
In the long run, create_proc_entry() for regular files will go.
BUG: unable to handle kernel NULL pointer dereference at virtual address 00000024
printing eip: c1188c1b *pdpt = 000000002929e001 *pde = 0000000000000000
Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/block/sda/sda1/dev
Modules linked in: foo af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom
Pid: 24679, comm: cat Not tainted (2.6.24-rc3-mm1 #2)
EIP: 0060:[<c1188c1b>] EFLAGS: 00210002 CPU: 0
EIP is at mutex_lock_nested+0x75/0x25d
EAX: 000006fe EBX: fffffffb ECX: 00001000 EDX: e9340570
ESI: 00000020 EDI: 00200246 EBP: e9340570 ESP: e8ea1ef8
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process cat (pid: 24679, ti=E8EA1000 task=E9340570 task.ti=E8EA1000)
Stack: 00000000 c106f7ce e8ee05b4 00000000 00000001 458003d0 f6fb6f20 fffffffb
00000000 c106f7aa 00001000 c106f7ce 08ae9000 f6db53f0 00000020 00200246
00000000 00000002 00000000 00200246 00200246 e8ee05a0 fffffffb e8ee0550
Call Trace:
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c10818b8>] proc_reg_read+0x60/0x73
[<c1081858>] proc_reg_read+0x0/0x73
[<c105a34f>] vfs_read+0x6c/0x8b
[<c105a6f3>] sys_read+0x3c/0x63
[<c10025f2>] sysenter_past_esp+0x5f/0xa5
[<c10697a7>] destroy_inode+0x24/0x33
=======================
INFO: lockdep is turned off.
Code: 75 21 68 e1 1a 19 c1 68 87 00 00 00 68 b8 e8 1f c1 68 25 73 1f c1 e8 84 06 e9 ff e8 52 b8 e7 ff 83 c4 10 9c 5f fa e8 28 89 ea ff <f0> fe 4e 04 79 0a f3 90 80 7e 04 00 7e f8 eb f0 39 76 34 74 33
EIP: [<c1188c1b>] mutex_lock_nested+0x75/0x25d SS:ESP 0068:e8ea1ef8
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 15:18:37 +03:00
static struct proc_dir_entry * __proc_create ( struct proc_dir_entry * * parent ,
2005-04-17 02:20:36 +04:00
const char * name ,
mode_t mode ,
nlink_t nlink )
{
struct proc_dir_entry * ent = NULL ;
const char * fn = name ;
int len ;
/* make sure name is valid */
if ( ! name | | ! strlen ( name ) ) goto out ;
proc: less special case in xlate code
If valid "parent" is passed to proc_create/remove_proc_entry(), then name of
PDE should consist of only one path component, otherwise creation or or
removal will fail. However, if NULL is passed as parent then create/remove
accept full path as a argument. This is arbitrary restriction -- all
infrastructure is in place.
So, patch allows the following to succeed:
create_proc_entry("foo/bar", 0, pde_baz);
remove_proc_entry("baz/foo/bar", &proc_root);
Also makes the following to behave identically:
create_proc_entry("foo/bar", 0, NULL);
create_proc_entry("foo/bar", 0, &proc_root);
Discrepancy noticed by Den Lunev (IIRC).
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 12:01:40 +04:00
if ( xlate_proc_name ( name , parent , & fn ) ! = 0 )
2005-04-17 02:20:36 +04:00
goto out ;
/* At this point there must not be any '/' characters beyond *fn */
if ( strchr ( fn , ' / ' ) )
goto out ;
len = strlen ( fn ) ;
ent = kmalloc ( sizeof ( struct proc_dir_entry ) + len + 1 , GFP_KERNEL ) ;
if ( ! ent ) goto out ;
memset ( ent , 0 , sizeof ( struct proc_dir_entry ) ) ;
memcpy ( ( ( char * ) ent ) + sizeof ( struct proc_dir_entry ) , fn , len + 1 ) ;
ent - > name = ( ( char * ) ent ) + sizeof ( * ent ) ;
ent - > namelen = len ;
ent - > mode = mode ;
ent - > nlink = nlink ;
proc: fix proc_dir_entry refcounting
Creating PDEs with refcount 0 and "deleted" flag has problems (see below).
Switch to usual scheme:
* PDE is created with refcount 1
* every de_get does +1
* every de_put() and remove_proc_entry() do -1
* once refcount reaches 0, PDE is freed.
This elegantly fixes at least two following races (both observed) without
introducing new locks, without abusing old locks, without spreading
lock_kernel():
1) PDE leak
remove_proc_entry de_put
----------------- ------
[refcnt = 1]
if (atomic_read(&de->count) == 0)
if (atomic_dec_and_test(&de->count))
if (de->deleted)
/* also not taken! */
free_proc_entry(de);
else
de->deleted = 1;
[refcount=0, deleted=1]
2) use after free
remove_proc_entry de_put
----------------- ------
[refcnt = 1]
if (atomic_dec_and_test(&de->count))
if (atomic_read(&de->count) == 0)
free_proc_entry(de);
/* boom! */
if (de->deleted)
free_proc_entry(de);
BUG: unable to handle kernel paging request at virtual address 6b6b6b6b
printing eip: c10acdda *pdpt = 00000000338f8001 *pde = 0000000000000000
Oops: 0000 [#1] PREEMPT SMP
Modules linked in: af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom
Pid: 23161, comm: cat Not tainted (2.6.24-rc2-8c0863403f109a43d7000b4646da4818220d501f #4)
EIP: 0060:[<c10acdda>] EFLAGS: 00210097 CPU: 1
EIP is at strnlen+0x6/0x18
EAX: 6b6b6b6b EBX: 6b6b6b6b ECX: 6b6b6b6b EDX: fffffffe
ESI: c128fa3b EDI: f380bf34 EBP: ffffffff ESP: f380be44
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process cat (pid: 23161, ti=f380b000 task=f38f2570 task.ti=f380b000)
Stack: c10ac4f0 00000278 c12ce000 f43cd2a8 00000163 00000000 7da86067 00000400
c128fa20 00896b18 f38325a8 c128fe20 ffffffff 00000000 c11f291e 00000400
f75be300 c128fa20 f769c9a0 c10ac779 f380bf34 f7bfee70 c1018e6b f380bf34
Call Trace:
[<c10ac4f0>] vsnprintf+0x2ad/0x49b
[<c10ac779>] vscnprintf+0x14/0x1f
[<c1018e6b>] vprintk+0xc5/0x2f9
[<c10379f1>] handle_fasteoi_irq+0x0/0xab
[<c1004f44>] do_IRQ+0x9f/0xb7
[<c117db3b>] preempt_schedule_irq+0x3f/0x5b
[<c100264e>] need_resched+0x1f/0x21
[<c10190ba>] printk+0x1b/0x1f
[<c107c8ad>] de_put+0x3d/0x50
[<c107c8f8>] proc_delete_inode+0x38/0x41
[<c107c8c0>] proc_delete_inode+0x0/0x41
[<c1066298>] generic_delete_inode+0x5e/0xc6
[<c1065aa9>] iput+0x60/0x62
[<c1063c8e>] d_kill+0x2d/0x46
[<c1063fa9>] dput+0xdc/0xe4
[<c10571a1>] __fput+0xb0/0xcd
[<c1054e49>] filp_close+0x48/0x4f
[<c1055ee9>] sys_close+0x67/0xa5
[<c10026b6>] sysenter_past_esp+0x5f/0x85
=======================
Code: c9 74 0c f2 ae 74 05 bf 01 00 00 00 4f 89 fa 5f 89 d0 c3 85 c9 57 89 c7 89 d0 74 05 f2 ae 75 01 4f 89 f8 5f c3 89 c1 89 c8 eb 06 <80> 38 00 74 07 40 4a 83 fa ff 75 f4 29 c8 c3 90 90 90 57 83 c9
EIP: [<c10acdda>] strnlen+0x6/0x18 SS:ESP 0068:f380be44
Also, remove broken usage of ->deleted from reiserfs: if sget() succeeds,
module is already pinned and remove_proc_entry() can't happen => nobody
can mark PDE deleted.
Dummy proc root in netns code is not marked with refcount 1. AFAICS, we
never get it, it's just for proper /proc/net removal. I double checked
CLONE_NETNS continues to work.
Patch survives many hours of modprobe/rmmod/cat loops without new bugs
which can be attributed to refcounting.
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-12-05 10:45:28 +03:00
atomic_set ( & ent - > count , 1 ) ;
Fix rmmod/read/write races in /proc entries
Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
meanwhile. Or, more generically, system call done on /proc file, method
supplied by module is called, module dissapeares meanwhile.
pde = create_proc_entry()
if (!pde)
return -ENOMEM;
pde->write_proc = ...
open
write
copy_from_user
pde = create_proc_entry();
if (!pde) {
remove_proc_entry();
return -ENOMEM;
/* module unloaded */
}
*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()
remove_proc_entry vfs_read
proc_kill_inodes [check ->f_op validness]
[check ->f_op->read validness]
[verify_area, security permissions checks]
->f_op = NULL;
if (file->f_op->read)
/* ->f_op dereference, boom */
NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.
NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 10:39:00 +04:00
ent - > pde_users = 0 ;
spin_lock_init ( & ent - > pde_unload_lock ) ;
ent - > pde_unload_completion = NULL ;
2008-07-25 12:48:29 +04:00
INIT_LIST_HEAD ( & ent - > pde_openers ) ;
2005-04-17 02:20:36 +04:00
out :
return ent ;
}
struct proc_dir_entry * proc_symlink ( const char * name ,
struct proc_dir_entry * parent , const char * dest )
{
struct proc_dir_entry * ent ;
proc: fix ->open'less usage due to ->proc_fops flip
Typical PDE creation code looks like:
pde = create_proc_entry("foo", 0, NULL);
if (pde)
pde->proc_fops = &foo_proc_fops;
Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
possible to ->read without ->open (see one class of oopses below).
The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:
pde = proc_create("foo", 0, NULL, &foo_proc_fops);
if (!pde)
return -ENOMEM;
Fix most networking users for a start.
In the long run, create_proc_entry() for regular files will go.
BUG: unable to handle kernel NULL pointer dereference at virtual address 00000024
printing eip: c1188c1b *pdpt = 000000002929e001 *pde = 0000000000000000
Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/block/sda/sda1/dev
Modules linked in: foo af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom
Pid: 24679, comm: cat Not tainted (2.6.24-rc3-mm1 #2)
EIP: 0060:[<c1188c1b>] EFLAGS: 00210002 CPU: 0
EIP is at mutex_lock_nested+0x75/0x25d
EAX: 000006fe EBX: fffffffb ECX: 00001000 EDX: e9340570
ESI: 00000020 EDI: 00200246 EBP: e9340570 ESP: e8ea1ef8
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process cat (pid: 24679, ti=E8EA1000 task=E9340570 task.ti=E8EA1000)
Stack: 00000000 c106f7ce e8ee05b4 00000000 00000001 458003d0 f6fb6f20 fffffffb
00000000 c106f7aa 00001000 c106f7ce 08ae9000 f6db53f0 00000020 00200246
00000000 00000002 00000000 00200246 00200246 e8ee05a0 fffffffb e8ee0550
Call Trace:
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c10818b8>] proc_reg_read+0x60/0x73
[<c1081858>] proc_reg_read+0x0/0x73
[<c105a34f>] vfs_read+0x6c/0x8b
[<c105a6f3>] sys_read+0x3c/0x63
[<c10025f2>] sysenter_past_esp+0x5f/0xa5
[<c10697a7>] destroy_inode+0x24/0x33
=======================
INFO: lockdep is turned off.
Code: 75 21 68 e1 1a 19 c1 68 87 00 00 00 68 b8 e8 1f c1 68 25 73 1f c1 e8 84 06 e9 ff e8 52 b8 e7 ff 83 c4 10 9c 5f fa e8 28 89 ea ff <f0> fe 4e 04 79 0a f3 90 80 7e 04 00 7e f8 eb f0 39 76 34 74 33
EIP: [<c1188c1b>] mutex_lock_nested+0x75/0x25d SS:ESP 0068:e8ea1ef8
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 15:18:37 +03:00
ent = __proc_create ( & parent , name ,
2005-04-17 02:20:36 +04:00
( S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO ) , 1 ) ;
if ( ent ) {
ent - > data = kmalloc ( ( ent - > size = strlen ( dest ) ) + 1 , GFP_KERNEL ) ;
if ( ent - > data ) {
strcpy ( ( char * ) ent - > data , dest ) ;
if ( proc_register ( parent , ent ) < 0 ) {
kfree ( ent - > data ) ;
kfree ( ent ) ;
ent = NULL ;
}
} else {
kfree ( ent ) ;
ent = NULL ;
}
}
return ent ;
}
struct proc_dir_entry * proc_mkdir_mode ( const char * name , mode_t mode ,
struct proc_dir_entry * parent )
{
struct proc_dir_entry * ent ;
proc: fix ->open'less usage due to ->proc_fops flip
Typical PDE creation code looks like:
pde = create_proc_entry("foo", 0, NULL);
if (pde)
pde->proc_fops = &foo_proc_fops;
Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
possible to ->read without ->open (see one class of oopses below).
The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:
pde = proc_create("foo", 0, NULL, &foo_proc_fops);
if (!pde)
return -ENOMEM;
Fix most networking users for a start.
In the long run, create_proc_entry() for regular files will go.
BUG: unable to handle kernel NULL pointer dereference at virtual address 00000024
printing eip: c1188c1b *pdpt = 000000002929e001 *pde = 0000000000000000
Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/block/sda/sda1/dev
Modules linked in: foo af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom
Pid: 24679, comm: cat Not tainted (2.6.24-rc3-mm1 #2)
EIP: 0060:[<c1188c1b>] EFLAGS: 00210002 CPU: 0
EIP is at mutex_lock_nested+0x75/0x25d
EAX: 000006fe EBX: fffffffb ECX: 00001000 EDX: e9340570
ESI: 00000020 EDI: 00200246 EBP: e9340570 ESP: e8ea1ef8
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process cat (pid: 24679, ti=E8EA1000 task=E9340570 task.ti=E8EA1000)
Stack: 00000000 c106f7ce e8ee05b4 00000000 00000001 458003d0 f6fb6f20 fffffffb
00000000 c106f7aa 00001000 c106f7ce 08ae9000 f6db53f0 00000020 00200246
00000000 00000002 00000000 00200246 00200246 e8ee05a0 fffffffb e8ee0550
Call Trace:
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c10818b8>] proc_reg_read+0x60/0x73
[<c1081858>] proc_reg_read+0x0/0x73
[<c105a34f>] vfs_read+0x6c/0x8b
[<c105a6f3>] sys_read+0x3c/0x63
[<c10025f2>] sysenter_past_esp+0x5f/0xa5
[<c10697a7>] destroy_inode+0x24/0x33
=======================
INFO: lockdep is turned off.
Code: 75 21 68 e1 1a 19 c1 68 87 00 00 00 68 b8 e8 1f c1 68 25 73 1f c1 e8 84 06 e9 ff e8 52 b8 e7 ff 83 c4 10 9c 5f fa e8 28 89 ea ff <f0> fe 4e 04 79 0a f3 90 80 7e 04 00 7e f8 eb f0 39 76 34 74 33
EIP: [<c1188c1b>] mutex_lock_nested+0x75/0x25d SS:ESP 0068:e8ea1ef8
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 15:18:37 +03:00
ent = __proc_create ( & parent , name , S_IFDIR | mode , 2 ) ;
2005-04-17 02:20:36 +04:00
if ( ent ) {
if ( proc_register ( parent , ent ) < 0 ) {
kfree ( ent ) ;
ent = NULL ;
}
}
return ent ;
}
2008-05-02 15:12:41 +04:00
struct proc_dir_entry * proc_net_mkdir ( struct net * net , const char * name ,
struct proc_dir_entry * parent )
{
struct proc_dir_entry * ent ;
ent = __proc_create ( & parent , name , S_IFDIR | S_IRUGO | S_IXUGO , 2 ) ;
if ( ent ) {
ent - > data = net ;
if ( proc_register ( parent , ent ) < 0 ) {
kfree ( ent ) ;
ent = NULL ;
}
}
return ent ;
}
EXPORT_SYMBOL_GPL ( proc_net_mkdir ) ;
2005-04-17 02:20:36 +04:00
struct proc_dir_entry * proc_mkdir ( const char * name ,
struct proc_dir_entry * parent )
{
return proc_mkdir_mode ( name , S_IRUGO | S_IXUGO , parent ) ;
}
struct proc_dir_entry * create_proc_entry ( const char * name , mode_t mode ,
struct proc_dir_entry * parent )
{
struct proc_dir_entry * ent ;
nlink_t nlink ;
if ( S_ISDIR ( mode ) ) {
if ( ( mode & S_IALLUGO ) = = 0 )
mode | = S_IRUGO | S_IXUGO ;
nlink = 2 ;
} else {
if ( ( mode & S_IFMT ) = = 0 )
mode | = S_IFREG ;
if ( ( mode & S_IALLUGO ) = = 0 )
mode | = S_IRUGO ;
nlink = 1 ;
}
proc: fix ->open'less usage due to ->proc_fops flip
Typical PDE creation code looks like:
pde = create_proc_entry("foo", 0, NULL);
if (pde)
pde->proc_fops = &foo_proc_fops;
Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
possible to ->read without ->open (see one class of oopses below).
The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:
pde = proc_create("foo", 0, NULL, &foo_proc_fops);
if (!pde)
return -ENOMEM;
Fix most networking users for a start.
In the long run, create_proc_entry() for regular files will go.
BUG: unable to handle kernel NULL pointer dereference at virtual address 00000024
printing eip: c1188c1b *pdpt = 000000002929e001 *pde = 0000000000000000
Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/block/sda/sda1/dev
Modules linked in: foo af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom
Pid: 24679, comm: cat Not tainted (2.6.24-rc3-mm1 #2)
EIP: 0060:[<c1188c1b>] EFLAGS: 00210002 CPU: 0
EIP is at mutex_lock_nested+0x75/0x25d
EAX: 000006fe EBX: fffffffb ECX: 00001000 EDX: e9340570
ESI: 00000020 EDI: 00200246 EBP: e9340570 ESP: e8ea1ef8
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process cat (pid: 24679, ti=E8EA1000 task=E9340570 task.ti=E8EA1000)
Stack: 00000000 c106f7ce e8ee05b4 00000000 00000001 458003d0 f6fb6f20 fffffffb
00000000 c106f7aa 00001000 c106f7ce 08ae9000 f6db53f0 00000020 00200246
00000000 00000002 00000000 00200246 00200246 e8ee05a0 fffffffb e8ee0550
Call Trace:
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c10818b8>] proc_reg_read+0x60/0x73
[<c1081858>] proc_reg_read+0x0/0x73
[<c105a34f>] vfs_read+0x6c/0x8b
[<c105a6f3>] sys_read+0x3c/0x63
[<c10025f2>] sysenter_past_esp+0x5f/0xa5
[<c10697a7>] destroy_inode+0x24/0x33
=======================
INFO: lockdep is turned off.
Code: 75 21 68 e1 1a 19 c1 68 87 00 00 00 68 b8 e8 1f c1 68 25 73 1f c1 e8 84 06 e9 ff e8 52 b8 e7 ff 83 c4 10 9c 5f fa e8 28 89 ea ff <f0> fe 4e 04 79 0a f3 90 80 7e 04 00 7e f8 eb f0 39 76 34 74 33
EIP: [<c1188c1b>] mutex_lock_nested+0x75/0x25d SS:ESP 0068:e8ea1ef8
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 15:18:37 +03:00
ent = __proc_create ( & parent , name , mode , nlink ) ;
2005-04-17 02:20:36 +04:00
if ( ent ) {
if ( proc_register ( parent , ent ) < 0 ) {
kfree ( ent ) ;
ent = NULL ;
}
}
return ent ;
}
proc: introduce proc_create_data to setup de->data
This set of patches fixes an proc ->open'less usage due to ->proc_fops flip in
the most part of the kernel code. The original OOPS is described in the
commit 2d3a4e3666325a9709cc8ea2e88151394e8f20fc:
Typical PDE creation code looks like:
pde = create_proc_entry("foo", 0, NULL);
if (pde)
pde->proc_fops = &foo_proc_fops;
Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
possible to ->read without ->open (see one class of oopses below).
The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:
pde = proc_create("foo", 0, NULL, &foo_proc_fops);
if (!pde)
return -ENOMEM;
Fix most networking users for a start.
In the long run, create_proc_entry() for regular files will go.
In addition to this, proc_create_data is introduced to fix reading from
proc without PDE->data. The race is basically the same as above.
create_proc_entries is replaced in the entire kernel code as new method
is also simply better.
This patch:
The problem is the same as for de->proc_fops. Right now PDE becomes visible
without data set. So, the entry could be looked up without data. This, in
most cases, will simply OOPS.
proc_create_data call is created to address this issue. proc_create now
becomes a wrapper around it.
Signed-off-by: Denis V. Lunev <den@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Chris Mason <chris.mason@oracle.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Grant Grundler <grundler@parisc-linux.org>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Karsten Keil <kkeil@suse.de>
Cc: Kyle McMartin <kyle@parisc-linux.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Osterlund <petero2@telia.com>
Cc: Pierre Peiffer <peifferp@gmail.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 12:02:00 +04:00
struct proc_dir_entry * proc_create_data ( const char * name , mode_t mode ,
struct proc_dir_entry * parent ,
const struct file_operations * proc_fops ,
void * data )
proc: fix ->open'less usage due to ->proc_fops flip
Typical PDE creation code looks like:
pde = create_proc_entry("foo", 0, NULL);
if (pde)
pde->proc_fops = &foo_proc_fops;
Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
possible to ->read without ->open (see one class of oopses below).
The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:
pde = proc_create("foo", 0, NULL, &foo_proc_fops);
if (!pde)
return -ENOMEM;
Fix most networking users for a start.
In the long run, create_proc_entry() for regular files will go.
BUG: unable to handle kernel NULL pointer dereference at virtual address 00000024
printing eip: c1188c1b *pdpt = 000000002929e001 *pde = 0000000000000000
Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/block/sda/sda1/dev
Modules linked in: foo af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom
Pid: 24679, comm: cat Not tainted (2.6.24-rc3-mm1 #2)
EIP: 0060:[<c1188c1b>] EFLAGS: 00210002 CPU: 0
EIP is at mutex_lock_nested+0x75/0x25d
EAX: 000006fe EBX: fffffffb ECX: 00001000 EDX: e9340570
ESI: 00000020 EDI: 00200246 EBP: e9340570 ESP: e8ea1ef8
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process cat (pid: 24679, ti=E8EA1000 task=E9340570 task.ti=E8EA1000)
Stack: 00000000 c106f7ce e8ee05b4 00000000 00000001 458003d0 f6fb6f20 fffffffb
00000000 c106f7aa 00001000 c106f7ce 08ae9000 f6db53f0 00000020 00200246
00000000 00000002 00000000 00200246 00200246 e8ee05a0 fffffffb e8ee0550
Call Trace:
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c10818b8>] proc_reg_read+0x60/0x73
[<c1081858>] proc_reg_read+0x0/0x73
[<c105a34f>] vfs_read+0x6c/0x8b
[<c105a6f3>] sys_read+0x3c/0x63
[<c10025f2>] sysenter_past_esp+0x5f/0xa5
[<c10697a7>] destroy_inode+0x24/0x33
=======================
INFO: lockdep is turned off.
Code: 75 21 68 e1 1a 19 c1 68 87 00 00 00 68 b8 e8 1f c1 68 25 73 1f c1 e8 84 06 e9 ff e8 52 b8 e7 ff 83 c4 10 9c 5f fa e8 28 89 ea ff <f0> fe 4e 04 79 0a f3 90 80 7e 04 00 7e f8 eb f0 39 76 34 74 33
EIP: [<c1188c1b>] mutex_lock_nested+0x75/0x25d SS:ESP 0068:e8ea1ef8
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 15:18:37 +03:00
{
struct proc_dir_entry * pde ;
nlink_t nlink ;
if ( S_ISDIR ( mode ) ) {
if ( ( mode & S_IALLUGO ) = = 0 )
mode | = S_IRUGO | S_IXUGO ;
nlink = 2 ;
} else {
if ( ( mode & S_IFMT ) = = 0 )
mode | = S_IFREG ;
if ( ( mode & S_IALLUGO ) = = 0 )
mode | = S_IRUGO ;
nlink = 1 ;
}
pde = __proc_create ( & parent , name , mode , nlink ) ;
if ( ! pde )
goto out ;
pde - > proc_fops = proc_fops ;
proc: introduce proc_create_data to setup de->data
This set of patches fixes an proc ->open'less usage due to ->proc_fops flip in
the most part of the kernel code. The original OOPS is described in the
commit 2d3a4e3666325a9709cc8ea2e88151394e8f20fc:
Typical PDE creation code looks like:
pde = create_proc_entry("foo", 0, NULL);
if (pde)
pde->proc_fops = &foo_proc_fops;
Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
possible to ->read without ->open (see one class of oopses below).
The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:
pde = proc_create("foo", 0, NULL, &foo_proc_fops);
if (!pde)
return -ENOMEM;
Fix most networking users for a start.
In the long run, create_proc_entry() for regular files will go.
In addition to this, proc_create_data is introduced to fix reading from
proc without PDE->data. The race is basically the same as above.
create_proc_entries is replaced in the entire kernel code as new method
is also simply better.
This patch:
The problem is the same as for de->proc_fops. Right now PDE becomes visible
without data set. So, the entry could be looked up without data. This, in
most cases, will simply OOPS.
proc_create_data call is created to address this issue. proc_create now
becomes a wrapper around it.
Signed-off-by: Denis V. Lunev <den@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Chris Mason <chris.mason@oracle.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Grant Grundler <grundler@parisc-linux.org>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Karsten Keil <kkeil@suse.de>
Cc: Kyle McMartin <kyle@parisc-linux.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Neil Brown <neilb@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Osterlund <petero2@telia.com>
Cc: Pierre Peiffer <peifferp@gmail.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 12:02:00 +04:00
pde - > data = data ;
proc: fix ->open'less usage due to ->proc_fops flip
Typical PDE creation code looks like:
pde = create_proc_entry("foo", 0, NULL);
if (pde)
pde->proc_fops = &foo_proc_fops;
Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
possible to ->read without ->open (see one class of oopses below).
The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:
pde = proc_create("foo", 0, NULL, &foo_proc_fops);
if (!pde)
return -ENOMEM;
Fix most networking users for a start.
In the long run, create_proc_entry() for regular files will go.
BUG: unable to handle kernel NULL pointer dereference at virtual address 00000024
printing eip: c1188c1b *pdpt = 000000002929e001 *pde = 0000000000000000
Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/block/sda/sda1/dev
Modules linked in: foo af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom
Pid: 24679, comm: cat Not tainted (2.6.24-rc3-mm1 #2)
EIP: 0060:[<c1188c1b>] EFLAGS: 00210002 CPU: 0
EIP is at mutex_lock_nested+0x75/0x25d
EAX: 000006fe EBX: fffffffb ECX: 00001000 EDX: e9340570
ESI: 00000020 EDI: 00200246 EBP: e9340570 ESP: e8ea1ef8
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process cat (pid: 24679, ti=E8EA1000 task=E9340570 task.ti=E8EA1000)
Stack: 00000000 c106f7ce e8ee05b4 00000000 00000001 458003d0 f6fb6f20 fffffffb
00000000 c106f7aa 00001000 c106f7ce 08ae9000 f6db53f0 00000020 00200246
00000000 00000002 00000000 00200246 00200246 e8ee05a0 fffffffb e8ee0550
Call Trace:
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c106f7ce>] seq_read+0x24/0x28a
[<c106f7aa>] seq_read+0x0/0x28a
[<c10818b8>] proc_reg_read+0x60/0x73
[<c1081858>] proc_reg_read+0x0/0x73
[<c105a34f>] vfs_read+0x6c/0x8b
[<c105a6f3>] sys_read+0x3c/0x63
[<c10025f2>] sysenter_past_esp+0x5f/0xa5
[<c10697a7>] destroy_inode+0x24/0x33
=======================
INFO: lockdep is turned off.
Code: 75 21 68 e1 1a 19 c1 68 87 00 00 00 68 b8 e8 1f c1 68 25 73 1f c1 e8 84 06 e9 ff e8 52 b8 e7 ff 83 c4 10 9c 5f fa e8 28 89 ea ff <f0> fe 4e 04 79 0a f3 90 80 7e 04 00 7e f8 eb f0 39 76 34 74 33
EIP: [<c1188c1b>] mutex_lock_nested+0x75/0x25d SS:ESP 0068:e8ea1ef8
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 15:18:37 +03:00
if ( proc_register ( parent , pde ) < 0 )
goto out_free ;
return pde ;
out_free :
kfree ( pde ) ;
out :
return NULL ;
}
2005-04-17 02:20:36 +04:00
void free_proc_entry ( struct proc_dir_entry * de )
{
unsigned int ino = de - > low_ino ;
if ( ino < PROC_DYNAMIC_FIRST )
return ;
release_inode_number ( ino ) ;
2008-02-08 15:18:28 +03:00
if ( S_ISLNK ( de - > mode ) )
2005-04-17 02:20:36 +04:00
kfree ( de - > data ) ;
kfree ( de ) ;
}
/*
* Remove a / proc entry and free it if it ' s not currently in use .
*/
void remove_proc_entry ( const char * name , struct proc_dir_entry * parent )
{
struct proc_dir_entry * * p ;
2008-04-29 12:01:39 +04:00
struct proc_dir_entry * de = NULL ;
2005-04-17 02:20:36 +04:00
const char * fn = name ;
int len ;
proc: less special case in xlate code
If valid "parent" is passed to proc_create/remove_proc_entry(), then name of
PDE should consist of only one path component, otherwise creation or or
removal will fail. However, if NULL is passed as parent then create/remove
accept full path as a argument. This is arbitrary restriction -- all
infrastructure is in place.
So, patch allows the following to succeed:
create_proc_entry("foo/bar", 0, pde_baz);
remove_proc_entry("baz/foo/bar", &proc_root);
Also makes the following to behave identically:
create_proc_entry("foo/bar", 0, NULL);
create_proc_entry("foo/bar", 0, &proc_root);
Discrepancy noticed by Den Lunev (IIRC).
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-29 12:01:40 +04:00
if ( xlate_proc_name ( name , & parent , & fn ) ! = 0 )
2008-04-29 12:01:39 +04:00
return ;
2005-04-17 02:20:36 +04:00
len = strlen ( fn ) ;
2006-03-26 13:36:55 +04:00
spin_lock ( & proc_subdir_lock ) ;
2005-04-17 02:20:36 +04:00
for ( p = & parent - > subdir ; * p ; p = & ( * p ) - > next ) {
2008-04-29 12:01:39 +04:00
if ( proc_match ( len , fn , * p ) ) {
de = * p ;
* p = de - > next ;
de - > next = NULL ;
break ;
}
}
spin_unlock ( & proc_subdir_lock ) ;
if ( ! de )
return ;
Fix rmmod/read/write races in /proc entries
Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
meanwhile. Or, more generically, system call done on /proc file, method
supplied by module is called, module dissapeares meanwhile.
pde = create_proc_entry()
if (!pde)
return -ENOMEM;
pde->write_proc = ...
open
write
copy_from_user
pde = create_proc_entry();
if (!pde) {
remove_proc_entry();
return -ENOMEM;
/* module unloaded */
}
*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()
remove_proc_entry vfs_read
proc_kill_inodes [check ->f_op validness]
[check ->f_op->read validness]
[verify_area, security permissions checks]
->f_op = NULL;
if (file->f_op->read)
/* ->f_op dereference, boom */
NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.
NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 10:39:00 +04:00
2008-04-29 12:01:39 +04:00
spin_lock ( & de - > pde_unload_lock ) ;
/*
* Stop accepting new callers into module . If you ' re
* dynamically allocating - > proc_fops , save a pointer somewhere .
*/
de - > proc_fops = NULL ;
/* Wait until all existing callers into module are done. */
if ( de - > pde_users > 0 ) {
DECLARE_COMPLETION_ONSTACK ( c ) ;
if ( ! de - > pde_unload_completion )
de - > pde_unload_completion = & c ;
Fix rmmod/read/write races in /proc entries
Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
meanwhile. Or, more generically, system call done on /proc file, method
supplied by module is called, module dissapeares meanwhile.
pde = create_proc_entry()
if (!pde)
return -ENOMEM;
pde->write_proc = ...
open
write
copy_from_user
pde = create_proc_entry();
if (!pde) {
remove_proc_entry();
return -ENOMEM;
/* module unloaded */
}
*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()
remove_proc_entry vfs_read
proc_kill_inodes [check ->f_op validness]
[check ->f_op->read validness]
[verify_area, security permissions checks]
->f_op = NULL;
if (file->f_op->read)
/* ->f_op dereference, boom */
NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.
NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 10:39:00 +04:00
spin_unlock ( & de - > pde_unload_lock ) ;
2008-04-29 12:01:39 +04:00
wait_for_completion ( de - > pde_unload_completion ) ;
goto continue_removing ;
}
spin_unlock ( & de - > pde_unload_lock ) ;
Fix rmmod/read/write races in /proc entries
Fix following races:
===========================================
1. Write via ->write_proc sleeps in copy_from_user(). Module disappears
meanwhile. Or, more generically, system call done on /proc file, method
supplied by module is called, module dissapeares meanwhile.
pde = create_proc_entry()
if (!pde)
return -ENOMEM;
pde->write_proc = ...
open
write
copy_from_user
pde = create_proc_entry();
if (!pde) {
remove_proc_entry();
return -ENOMEM;
/* module unloaded */
}
*boom*
==========================================
2. bogo-revoke aka proc_kill_inodes()
remove_proc_entry vfs_read
proc_kill_inodes [check ->f_op validness]
[check ->f_op->read validness]
[verify_area, security permissions checks]
->f_op = NULL;
if (file->f_op->read)
/* ->f_op dereference, boom */
NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's
see how this scheme behaves, then extend if needed for directories.
Directories creators in /proc only set ->owner for them, so proxying for
directories may be unneeded.
NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write,
->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release.
If your in-tree module uses something else, yell on me. Full audit pending.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 10:39:00 +04:00
continue_removing :
2008-07-25 12:48:29 +04:00
spin_lock ( & de - > pde_unload_lock ) ;
while ( ! list_empty ( & de - > pde_openers ) ) {
struct pde_opener * pdeo ;
pdeo = list_first_entry ( & de - > pde_openers , struct pde_opener , lh ) ;
list_del ( & pdeo - > lh ) ;
spin_unlock ( & de - > pde_unload_lock ) ;
pdeo - > release ( pdeo - > inode , pdeo - > file ) ;
kfree ( pdeo ) ;
spin_lock ( & de - > pde_unload_lock ) ;
}
spin_unlock ( & de - > pde_unload_lock ) ;
2008-04-29 12:01:39 +04:00
if ( S_ISDIR ( de - > mode ) )
parent - > nlink - - ;
de - > nlink = 0 ;
2008-07-26 06:45:41 +04:00
WARN ( de - > subdir , KERN_WARNING " %s: removing non-empty directory "
2008-04-29 12:01:39 +04:00
" '%s/%s', leaking at least '%s' \n " , __func__ ,
de - > parent - > name , de - > name , de - > subdir - > name ) ;
if ( atomic_dec_and_test ( & de - > count ) )
free_proc_entry ( de ) ;
2005-04-17 02:20:36 +04:00
}