2005-04-17 02:20:36 +04:00
/*
* linux / fs / file_table . c
*
* Copyright ( C ) 1991 , 1992 Linus Torvalds
* Copyright ( C ) 1997 David S . Miller ( davem @ caip . rutgers . edu )
*/
# include <linux/string.h>
# include <linux/slab.h>
# include <linux/file.h>
# include <linux/init.h>
# include <linux/module.h>
# include <linux/smp_lock.h>
# include <linux/fs.h>
# include <linux/security.h>
# include <linux/eventpoll.h>
2005-09-10 00:04:13 +04:00
# include <linux/rcupdate.h>
2005-04-17 02:20:36 +04:00
# include <linux/mount.h>
# include <linux/cdev.h>
[PATCH] inotify
inotify is intended to correct the deficiencies of dnotify, particularly
its inability to scale and its terrible user interface:
* dnotify requires the opening of one fd per each directory
that you intend to watch. This quickly results in too many
open files and pins removable media, preventing unmount.
* dnotify is directory-based. You only learn about changes to
directories. Sure, a change to a file in a directory affects
the directory, but you are then forced to keep a cache of
stat structures.
* dnotify's interface to user-space is awful. Signals?
inotify provides a more usable, simple, powerful solution to file change
notification:
* inotify's interface is a system call that returns a fd, not SIGIO.
You get a single fd, which is select()-able.
* inotify has an event that says "the filesystem that the item
you were watching is on was unmounted."
* inotify can watch directories or files.
Inotify is currently used by Beagle (a desktop search infrastructure),
Gamin (a FAM replacement), and other projects.
See Documentation/filesystems/inotify.txt.
Signed-off-by: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-13 01:06:03 +04:00
# include <linux/fsnotify.h>
2005-04-17 02:20:36 +04:00
/* sysctl tunables... */
struct files_stat_struct files_stat = {
. max_files = NR_FILE
} ;
EXPORT_SYMBOL ( files_stat ) ; /* Needed by unix.o */
/* public. Not pretty! */
__cacheline_aligned_in_smp DEFINE_SPINLOCK ( files_lock ) ;
static DEFINE_SPINLOCK ( filp_count_lock ) ;
/* slab constructors and destructors are called from arbitrary
* context and must be fully threaded - use a local spinlock
* to protect files_stat . nr_files
*/
void filp_ctor ( void * objp , struct kmem_cache_s * cachep , unsigned long cflags )
{
if ( ( cflags & ( SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR ) ) = =
SLAB_CTOR_CONSTRUCTOR ) {
unsigned long flags ;
spin_lock_irqsave ( & filp_count_lock , flags ) ;
files_stat . nr_files + + ;
spin_unlock_irqrestore ( & filp_count_lock , flags ) ;
}
}
void filp_dtor ( void * objp , struct kmem_cache_s * cachep , unsigned long dflags )
{
unsigned long flags ;
spin_lock_irqsave ( & filp_count_lock , flags ) ;
files_stat . nr_files - - ;
spin_unlock_irqrestore ( & filp_count_lock , flags ) ;
}
2005-09-10 00:04:13 +04:00
static inline void file_free_rcu ( struct rcu_head * head )
2005-04-17 02:20:36 +04:00
{
2005-10-31 02:02:16 +03:00
struct file * f = container_of ( head , struct file , f_u . fu_rcuhead ) ;
2005-04-17 02:20:36 +04:00
kmem_cache_free ( filp_cachep , f ) ;
}
2005-09-10 00:04:13 +04:00
static inline void file_free ( struct file * f )
{
2005-10-31 02:02:16 +03:00
call_rcu ( & f - > f_u . fu_rcuhead , file_free_rcu ) ;
2005-09-10 00:04:13 +04:00
}
2005-04-17 02:20:36 +04:00
/* Find an unused file structure and return a pointer to it.
* Returns NULL , if there are no more free file structures or
* we run out of memory .
*/
struct file * get_empty_filp ( void )
{
2005-06-23 11:09:50 +04:00
static int old_max ;
2005-04-17 02:20:36 +04:00
struct file * f ;
/*
* Privileged users can go above max_files
*/
2005-06-23 11:09:50 +04:00
if ( files_stat . nr_files > = files_stat . max_files & &
! capable ( CAP_SYS_ADMIN ) )
goto over ;
f = kmem_cache_alloc ( filp_cachep , GFP_KERNEL ) ;
if ( f = = NULL )
goto fail ;
memset ( f , 0 , sizeof ( * f ) ) ;
if ( security_file_alloc ( f ) )
goto fail_sec ;
2005-04-17 02:20:36 +04:00
2005-06-23 11:09:50 +04:00
eventpoll_init_file ( f ) ;
atomic_set ( & f - > f_count , 1 ) ;
f - > f_uid = current - > fsuid ;
f - > f_gid = current - > fsgid ;
rwlock_init ( & f - > f_owner . lock ) ;
/* f->f_version: 0 */
2005-10-31 02:02:16 +03:00
INIT_LIST_HEAD ( & f - > f_u . fu_list ) ;
2005-06-23 11:09:50 +04:00
return f ;
over :
2005-04-17 02:20:36 +04:00
/* Ran out of filps - report that */
2005-06-23 11:09:50 +04:00
if ( files_stat . nr_files > old_max ) {
2005-04-17 02:20:36 +04:00
printk ( KERN_INFO " VFS: file-max limit %d reached \n " ,
files_stat . max_files ) ;
2005-06-23 11:09:50 +04:00
old_max = files_stat . nr_files ;
2005-04-17 02:20:36 +04:00
}
2005-06-23 11:09:50 +04:00
goto fail ;
fail_sec :
file_free ( f ) ;
2005-04-17 02:20:36 +04:00
fail :
return NULL ;
}
EXPORT_SYMBOL ( get_empty_filp ) ;
void fastcall fput ( struct file * file )
{
2005-09-10 00:04:13 +04:00
if ( rcuref_dec_and_test ( & file - > f_count ) )
2005-04-17 02:20:36 +04:00
__fput ( file ) ;
}
EXPORT_SYMBOL ( fput ) ;
/* __fput is called from task context when aio completion releases the last
* last use of a struct file * . Do not use otherwise .
*/
void fastcall __fput ( struct file * file )
{
struct dentry * dentry = file - > f_dentry ;
struct vfsmount * mnt = file - > f_vfsmnt ;
struct inode * inode = dentry - > d_inode ;
might_sleep ( ) ;
[PATCH] inotify
inotify is intended to correct the deficiencies of dnotify, particularly
its inability to scale and its terrible user interface:
* dnotify requires the opening of one fd per each directory
that you intend to watch. This quickly results in too many
open files and pins removable media, preventing unmount.
* dnotify is directory-based. You only learn about changes to
directories. Sure, a change to a file in a directory affects
the directory, but you are then forced to keep a cache of
stat structures.
* dnotify's interface to user-space is awful. Signals?
inotify provides a more usable, simple, powerful solution to file change
notification:
* inotify's interface is a system call that returns a fd, not SIGIO.
You get a single fd, which is select()-able.
* inotify has an event that says "the filesystem that the item
you were watching is on was unmounted."
* inotify can watch directories or files.
Inotify is currently used by Beagle (a desktop search infrastructure),
Gamin (a FAM replacement), and other projects.
See Documentation/filesystems/inotify.txt.
Signed-off-by: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-13 01:06:03 +04:00
fsnotify_close ( file ) ;
2005-04-17 02:20:36 +04:00
/*
* The function eventpoll_release ( ) should be the first called
* in the file cleanup chain .
*/
eventpoll_release ( file ) ;
locks_remove_flock ( file ) ;
if ( file - > f_op & & file - > f_op - > release )
file - > f_op - > release ( inode , file ) ;
security_file_free ( file ) ;
if ( unlikely ( inode - > i_cdev ! = NULL ) )
cdev_put ( inode - > i_cdev ) ;
fops_put ( file - > f_op ) ;
if ( file - > f_mode & FMODE_WRITE )
put_write_access ( inode ) ;
file_kill ( file ) ;
file - > f_dentry = NULL ;
file - > f_vfsmnt = NULL ;
file_free ( file ) ;
dput ( dentry ) ;
mntput ( mnt ) ;
}
struct file fastcall * fget ( unsigned int fd )
{
struct file * file ;
struct files_struct * files = current - > files ;
2005-09-10 00:04:13 +04:00
rcu_read_lock ( ) ;
2005-04-17 02:20:36 +04:00
file = fcheck_files ( files , fd ) ;
2005-09-10 00:04:13 +04:00
if ( file ) {
if ( ! rcuref_inc_lf ( & file - > f_count ) ) {
/* File object ref couldn't be taken */
rcu_read_unlock ( ) ;
return NULL ;
}
}
rcu_read_unlock ( ) ;
2005-04-17 02:20:36 +04:00
return file ;
}
EXPORT_SYMBOL ( fget ) ;
/*
* Lightweight file lookup - no refcnt increment if fd table isn ' t shared .
* You can use this only if it is guranteed that the current task already
* holds a refcnt to that file . That check has to be done at fget ( ) only
* and a flag is returned to be passed to the corresponding fput_light ( ) .
* There must not be a cloning between an fget_light / fput_light pair .
*/
struct file fastcall * fget_light ( unsigned int fd , int * fput_needed )
{
struct file * file ;
struct files_struct * files = current - > files ;
* fput_needed = 0 ;
if ( likely ( ( atomic_read ( & files - > count ) = = 1 ) ) ) {
file = fcheck_files ( files , fd ) ;
} else {
2005-09-10 00:04:13 +04:00
rcu_read_lock ( ) ;
2005-04-17 02:20:36 +04:00
file = fcheck_files ( files , fd ) ;
if ( file ) {
2005-09-10 00:04:13 +04:00
if ( rcuref_inc_lf ( & file - > f_count ) )
* fput_needed = 1 ;
else
/* Didn't get the reference, someone's freed */
file = NULL ;
2005-04-17 02:20:36 +04:00
}
2005-09-10 00:04:13 +04:00
rcu_read_unlock ( ) ;
2005-04-17 02:20:36 +04:00
}
2005-09-10 00:04:13 +04:00
2005-04-17 02:20:36 +04:00
return file ;
}
void put_filp ( struct file * file )
{
2005-09-10 00:04:13 +04:00
if ( rcuref_dec_and_test ( & file - > f_count ) ) {
2005-04-17 02:20:36 +04:00
security_file_free ( file ) ;
file_kill ( file ) ;
file_free ( file ) ;
}
}
void file_move ( struct file * file , struct list_head * list )
{
if ( ! list )
return ;
file_list_lock ( ) ;
2005-10-31 02:02:16 +03:00
list_move ( & file - > f_u . fu_list , list ) ;
2005-04-17 02:20:36 +04:00
file_list_unlock ( ) ;
}
void file_kill ( struct file * file )
{
2005-10-31 02:02:16 +03:00
if ( ! list_empty ( & file - > f_u . fu_list ) ) {
2005-04-17 02:20:36 +04:00
file_list_lock ( ) ;
2005-10-31 02:02:16 +03:00
list_del_init ( & file - > f_u . fu_list ) ;
2005-04-17 02:20:36 +04:00
file_list_unlock ( ) ;
}
}
int fs_may_remount_ro ( struct super_block * sb )
{
struct list_head * p ;
/* Check that no files are currently opened for writing. */
file_list_lock ( ) ;
list_for_each ( p , & sb - > s_files ) {
2005-10-31 02:02:16 +03:00
struct file * file = list_entry ( p , struct file , f_u . fu_list ) ;
2005-04-17 02:20:36 +04:00
struct inode * inode = file - > f_dentry - > d_inode ;
/* File with pending delete? */
if ( inode - > i_nlink = = 0 )
goto too_bad ;
/* Writeable file? */
if ( S_ISREG ( inode - > i_mode ) & & ( file - > f_mode & FMODE_WRITE ) )
goto too_bad ;
}
file_list_unlock ( ) ;
return 1 ; /* Tis' cool bro. */
too_bad :
file_list_unlock ( ) ;
return 0 ;
}
void __init files_init ( unsigned long mempages )
{
int n ;
/* One file with associated inode and dcache is very roughly 1K.
* Per default don ' t use more than 10 % of our memory for files .
*/
n = ( mempages * ( PAGE_SIZE / 1024 ) ) / 10 ;
files_stat . max_files = n ;
if ( files_stat . max_files < NR_FILE )
files_stat . max_files = NR_FILE ;
2005-09-10 00:04:13 +04:00
files_defer_init ( ) ;
2005-04-17 02:20:36 +04:00
}