2005-04-17 02:20:36 +04:00
/*
* linux / fs / file_table . c
*
* Copyright ( C ) 1991 , 1992 Linus Torvalds
* Copyright ( C ) 1997 David S . Miller ( davem @ caip . rutgers . edu )
*/
2006-03-08 08:55:35 +03:00
# include <linux/config.h>
2005-04-17 02:20:36 +04:00
# include <linux/string.h>
# include <linux/slab.h>
# include <linux/file.h>
# include <linux/init.h>
# include <linux/module.h>
# include <linux/smp_lock.h>
# include <linux/fs.h>
# include <linux/security.h>
# include <linux/eventpoll.h>
2005-09-10 00:04:13 +04:00
# include <linux/rcupdate.h>
2005-04-17 02:20:36 +04:00
# include <linux/mount.h>
2006-01-11 23:17:46 +03:00
# include <linux/capability.h>
2005-04-17 02:20:36 +04:00
# include <linux/cdev.h>
[PATCH] inotify
inotify is intended to correct the deficiencies of dnotify, particularly
its inability to scale and its terrible user interface:
* dnotify requires the opening of one fd per each directory
that you intend to watch. This quickly results in too many
open files and pins removable media, preventing unmount.
* dnotify is directory-based. You only learn about changes to
directories. Sure, a change to a file in a directory affects
the directory, but you are then forced to keep a cache of
stat structures.
* dnotify's interface to user-space is awful. Signals?
inotify provides a more usable, simple, powerful solution to file change
notification:
* inotify's interface is a system call that returns a fd, not SIGIO.
You get a single fd, which is select()-able.
* inotify has an event that says "the filesystem that the item
you were watching is on was unmounted."
* inotify can watch directories or files.
Inotify is currently used by Beagle (a desktop search infrastructure),
Gamin (a FAM replacement), and other projects.
See Documentation/filesystems/inotify.txt.
Signed-off-by: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-13 01:06:03 +04:00
# include <linux/fsnotify.h>
2006-03-08 08:55:35 +03:00
# include <linux/sysctl.h>
# include <linux/percpu_counter.h>
# include <asm/atomic.h>
2005-04-17 02:20:36 +04:00
/* sysctl tunables... */
struct files_stat_struct files_stat = {
. max_files = NR_FILE
} ;
/* public. Not pretty! */
2006-03-08 08:55:35 +03:00
__cacheline_aligned_in_smp DEFINE_SPINLOCK ( files_lock ) ;
2005-04-17 02:20:36 +04:00
2006-03-08 08:55:35 +03:00
static struct percpu_counter nr_files __cacheline_aligned_in_smp ;
2005-04-17 02:20:36 +04:00
2006-03-08 08:55:35 +03:00
static inline void file_free_rcu ( struct rcu_head * head )
2005-04-17 02:20:36 +04:00
{
2006-03-08 08:55:35 +03:00
struct file * f = container_of ( head , struct file , f_u . fu_rcuhead ) ;
kmem_cache_free ( filp_cachep , f ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-08 08:55:35 +03:00
static inline void file_free ( struct file * f )
2005-04-17 02:20:36 +04:00
{
2006-03-08 08:55:35 +03:00
percpu_counter_dec ( & nr_files ) ;
call_rcu ( & f - > f_u . fu_rcuhead , file_free_rcu ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-08 08:55:35 +03:00
/*
* Return the total number of open files in the system
*/
static int get_nr_files ( void )
2005-04-17 02:20:36 +04:00
{
2006-03-08 08:55:35 +03:00
return percpu_counter_read_positive ( & nr_files ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-08 08:55:35 +03:00
/*
* Return the maximum number of open files in the system
*/
int get_max_files ( void )
2005-09-10 00:04:13 +04:00
{
2006-03-08 08:55:35 +03:00
return files_stat . max_files ;
2005-09-10 00:04:13 +04:00
}
2006-03-08 08:55:35 +03:00
EXPORT_SYMBOL_GPL ( get_max_files ) ;
/*
* Handle nr_files sysctl
*/
# if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
int proc_nr_files ( ctl_table * table , int write , struct file * filp ,
void __user * buffer , size_t * lenp , loff_t * ppos )
{
files_stat . nr_files = get_nr_files ( ) ;
return proc_dointvec ( table , write , filp , buffer , lenp , ppos ) ;
}
# else
int proc_nr_files ( ctl_table * table , int write , struct file * filp ,
void __user * buffer , size_t * lenp , loff_t * ppos )
{
return - ENOSYS ;
}
# endif
2005-09-10 00:04:13 +04:00
2005-04-17 02:20:36 +04:00
/* Find an unused file structure and return a pointer to it.
* Returns NULL , if there are no more free file structures or
* we run out of memory .
*/
struct file * get_empty_filp ( void )
{
2006-03-23 14:01:03 +03:00
struct task_struct * tsk ;
2005-06-23 11:09:50 +04:00
static int old_max ;
2005-04-17 02:20:36 +04:00
struct file * f ;
/*
* Privileged users can go above max_files
*/
2006-03-08 08:55:35 +03:00
if ( get_nr_files ( ) > = files_stat . max_files & & ! capable ( CAP_SYS_ADMIN ) ) {
/*
* percpu_counters are inaccurate . Do an expensive check before
* we go and fail .
*/
if ( percpu_counter_sum ( & nr_files ) > = files_stat . max_files )
goto over ;
}
2005-06-23 11:09:50 +04:00
f = kmem_cache_alloc ( filp_cachep , GFP_KERNEL ) ;
if ( f = = NULL )
goto fail ;
2006-03-08 08:55:35 +03:00
percpu_counter_inc ( & nr_files ) ;
2005-06-23 11:09:50 +04:00
memset ( f , 0 , sizeof ( * f ) ) ;
if ( security_file_alloc ( f ) )
goto fail_sec ;
2005-04-17 02:20:36 +04:00
2006-03-23 14:01:03 +03:00
tsk = current ;
INIT_LIST_HEAD ( & f - > f_u . fu_list ) ;
2005-06-23 11:09:50 +04:00
atomic_set ( & f - > f_count , 1 ) ;
rwlock_init ( & f - > f_owner . lock ) ;
2006-03-23 14:01:03 +03:00
f - > f_uid = tsk - > fsuid ;
f - > f_gid = tsk - > fsgid ;
eventpoll_init_file ( f ) ;
2005-06-23 11:09:50 +04:00
/* f->f_version: 0 */
return f ;
over :
2005-04-17 02:20:36 +04:00
/* Ran out of filps - report that */
2006-03-08 08:55:35 +03:00
if ( get_nr_files ( ) > old_max ) {
2005-04-17 02:20:36 +04:00
printk ( KERN_INFO " VFS: file-max limit %d reached \n " ,
2006-03-08 08:55:35 +03:00
get_max_files ( ) ) ;
old_max = get_nr_files ( ) ;
2005-04-17 02:20:36 +04:00
}
2005-06-23 11:09:50 +04:00
goto fail ;
fail_sec :
file_free ( f ) ;
2005-04-17 02:20:36 +04:00
fail :
return NULL ;
}
EXPORT_SYMBOL ( get_empty_filp ) ;
void fastcall fput ( struct file * file )
{
2006-01-08 12:02:19 +03:00
if ( atomic_dec_and_test ( & file - > f_count ) )
2005-04-17 02:20:36 +04:00
__fput ( file ) ;
}
EXPORT_SYMBOL ( fput ) ;
/* __fput is called from task context when aio completion releases the last
* last use of a struct file * . Do not use otherwise .
*/
void fastcall __fput ( struct file * file )
{
struct dentry * dentry = file - > f_dentry ;
struct vfsmount * mnt = file - > f_vfsmnt ;
struct inode * inode = dentry - > d_inode ;
might_sleep ( ) ;
[PATCH] inotify
inotify is intended to correct the deficiencies of dnotify, particularly
its inability to scale and its terrible user interface:
* dnotify requires the opening of one fd per each directory
that you intend to watch. This quickly results in too many
open files and pins removable media, preventing unmount.
* dnotify is directory-based. You only learn about changes to
directories. Sure, a change to a file in a directory affects
the directory, but you are then forced to keep a cache of
stat structures.
* dnotify's interface to user-space is awful. Signals?
inotify provides a more usable, simple, powerful solution to file change
notification:
* inotify's interface is a system call that returns a fd, not SIGIO.
You get a single fd, which is select()-able.
* inotify has an event that says "the filesystem that the item
you were watching is on was unmounted."
* inotify can watch directories or files.
Inotify is currently used by Beagle (a desktop search infrastructure),
Gamin (a FAM replacement), and other projects.
See Documentation/filesystems/inotify.txt.
Signed-off-by: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-13 01:06:03 +04:00
fsnotify_close ( file ) ;
2005-04-17 02:20:36 +04:00
/*
* The function eventpoll_release ( ) should be the first called
* in the file cleanup chain .
*/
eventpoll_release ( file ) ;
locks_remove_flock ( file ) ;
if ( file - > f_op & & file - > f_op - > release )
file - > f_op - > release ( inode , file ) ;
security_file_free ( file ) ;
if ( unlikely ( inode - > i_cdev ! = NULL ) )
cdev_put ( inode - > i_cdev ) ;
fops_put ( file - > f_op ) ;
if ( file - > f_mode & FMODE_WRITE )
put_write_access ( inode ) ;
file_kill ( file ) ;
file - > f_dentry = NULL ;
file - > f_vfsmnt = NULL ;
file_free ( file ) ;
dput ( dentry ) ;
mntput ( mnt ) ;
}
struct file fastcall * fget ( unsigned int fd )
{
struct file * file ;
struct files_struct * files = current - > files ;
2005-09-10 00:04:13 +04:00
rcu_read_lock ( ) ;
2005-04-17 02:20:36 +04:00
file = fcheck_files ( files , fd ) ;
2005-09-10 00:04:13 +04:00
if ( file ) {
2006-01-08 12:02:19 +03:00
if ( ! atomic_inc_not_zero ( & file - > f_count ) ) {
2005-09-10 00:04:13 +04:00
/* File object ref couldn't be taken */
rcu_read_unlock ( ) ;
return NULL ;
}
}
rcu_read_unlock ( ) ;
2005-04-17 02:20:36 +04:00
return file ;
}
EXPORT_SYMBOL ( fget ) ;
/*
* Lightweight file lookup - no refcnt increment if fd table isn ' t shared .
* You can use this only if it is guranteed that the current task already
* holds a refcnt to that file . That check has to be done at fget ( ) only
* and a flag is returned to be passed to the corresponding fput_light ( ) .
* There must not be a cloning between an fget_light / fput_light pair .
*/
struct file fastcall * fget_light ( unsigned int fd , int * fput_needed )
{
struct file * file ;
struct files_struct * files = current - > files ;
* fput_needed = 0 ;
if ( likely ( ( atomic_read ( & files - > count ) = = 1 ) ) ) {
file = fcheck_files ( files , fd ) ;
} else {
2005-09-10 00:04:13 +04:00
rcu_read_lock ( ) ;
2005-04-17 02:20:36 +04:00
file = fcheck_files ( files , fd ) ;
if ( file ) {
2006-01-08 12:02:19 +03:00
if ( atomic_inc_not_zero ( & file - > f_count ) )
2005-09-10 00:04:13 +04:00
* fput_needed = 1 ;
else
/* Didn't get the reference, someone's freed */
file = NULL ;
2005-04-17 02:20:36 +04:00
}
2005-09-10 00:04:13 +04:00
rcu_read_unlock ( ) ;
2005-04-17 02:20:36 +04:00
}
2005-09-10 00:04:13 +04:00
2005-04-17 02:20:36 +04:00
return file ;
}
void put_filp ( struct file * file )
{
2006-01-08 12:02:19 +03:00
if ( atomic_dec_and_test ( & file - > f_count ) ) {
2005-04-17 02:20:36 +04:00
security_file_free ( file ) ;
file_kill ( file ) ;
file_free ( file ) ;
}
}
void file_move ( struct file * file , struct list_head * list )
{
if ( ! list )
return ;
file_list_lock ( ) ;
2005-10-31 02:02:16 +03:00
list_move ( & file - > f_u . fu_list , list ) ;
2005-04-17 02:20:36 +04:00
file_list_unlock ( ) ;
}
void file_kill ( struct file * file )
{
2005-10-31 02:02:16 +03:00
if ( ! list_empty ( & file - > f_u . fu_list ) ) {
2005-04-17 02:20:36 +04:00
file_list_lock ( ) ;
2005-10-31 02:02:16 +03:00
list_del_init ( & file - > f_u . fu_list ) ;
2005-04-17 02:20:36 +04:00
file_list_unlock ( ) ;
}
}
int fs_may_remount_ro ( struct super_block * sb )
{
struct list_head * p ;
/* Check that no files are currently opened for writing. */
file_list_lock ( ) ;
list_for_each ( p , & sb - > s_files ) {
2005-10-31 02:02:16 +03:00
struct file * file = list_entry ( p , struct file , f_u . fu_list ) ;
2005-04-17 02:20:36 +04:00
struct inode * inode = file - > f_dentry - > d_inode ;
/* File with pending delete? */
if ( inode - > i_nlink = = 0 )
goto too_bad ;
/* Writeable file? */
if ( S_ISREG ( inode - > i_mode ) & & ( file - > f_mode & FMODE_WRITE ) )
goto too_bad ;
}
file_list_unlock ( ) ;
return 1 ; /* Tis' cool bro. */
too_bad :
file_list_unlock ( ) ;
return 0 ;
}
void __init files_init ( unsigned long mempages )
{
int n ;
/* One file with associated inode and dcache is very roughly 1K.
* Per default don ' t use more than 10 % of our memory for files .
*/
n = ( mempages * ( PAGE_SIZE / 1024 ) ) / 10 ;
files_stat . max_files = n ;
if ( files_stat . max_files < NR_FILE )
files_stat . max_files = NR_FILE ;
2005-09-10 00:04:13 +04:00
files_defer_init ( ) ;
2006-06-23 13:05:41 +04:00
percpu_counter_init ( & nr_files , 0 ) ;
2005-04-17 02:20:36 +04:00
}