linux/fs/file_table.c

/*
 *  linux/fs/file_table.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *  Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
 */

#include <linux/string.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/smp_lock.h>
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/eventpoll.h>
#include <linux/mount.h>
#include <linux/cdev.h>
#include <linux/fsnotify.h>

/* sysctl tunables... */
struct files_stat_struct files_stat = {
	.max_files = NR_FILE
};

EXPORT_SYMBOL(files_stat); /* Needed by unix.o */

/* public. Not pretty! */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);

static DEFINE_SPINLOCK(filp_count_lock);

/* slab constructors and destructors are called from arbitrary
 * context and must be fully threaded - use a local spinlock
 * to protect files_stat.nr_files
 */
void filp_ctor(void * objp, struct kmem_cache_s *cachep, unsigned long cflags)
{
	if ((cflags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
	    SLAB_CTOR_CONSTRUCTOR) {
		unsigned long flags;
		spin_lock_irqsave(&filp_count_lock, flags);
		files_stat.nr_files++;
		spin_unlock_irqrestore(&filp_count_lock, flags);
	}
}

void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags)
{
	unsigned long flags;
	spin_lock_irqsave(&filp_count_lock, flags);
	files_stat.nr_files--;
	spin_unlock_irqrestore(&filp_count_lock, flags);
}

static inline void file_free(struct file *f)
{
	kmem_cache_free(filp_cachep, f);
}

/* Find an unused file structure and return a pointer to it.
 * Returns NULL, if there are no more free file structures or
 * we run out of memory.
 */
struct file *get_empty_filp(void)
{
	static int old_max;
	struct file * f;

	/*
	 * Privileged users can go above max_files
	 */
	if (files_stat.nr_files >= files_stat.max_files &&
				!capable(CAP_SYS_ADMIN))
		goto over;

	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
	if (f == NULL)
		goto fail;

	memset(f, 0, sizeof(*f));
	if (security_file_alloc(f))
		goto fail_sec;

	eventpoll_init_file(f);
	atomic_set(&f->f_count, 1);
	f->f_uid = current->fsuid;
	f->f_gid = current->fsgid;
	rwlock_init(&f->f_owner.lock);
	/* f->f_version: 0 */
	INIT_LIST_HEAD(&f->f_list);
	return f;

over:
	/* Ran out of filps - report that */
	if (files_stat.nr_files > old_max) {
		printk(KERN_INFO "VFS: file-max limit %d reached\n",
					files_stat.max_files);
		old_max = files_stat.nr_files;
	}
	goto fail;

fail_sec:
	file_free(f);
fail:
	return NULL;
}

EXPORT_SYMBOL(get_empty_filp);

void fastcall fput(struct file *file)
{
	if (atomic_dec_and_test(&file->f_count))
		__fput(file);
}

EXPORT_SYMBOL(fput);

/* __fput is called from task context when aio completion releases the last
 * last use of a struct file *.  Do not use otherwise.
 */
void fastcall __fput(struct file *file)
{
	struct dentry *dentry = file->f_dentry;
	struct vfsmount *mnt = file->f_vfsmnt;
	struct inode *inode = dentry->d_inode;

	might_sleep();

	fsnotify_close(file);
	/*
	 * The function eventpoll_release() should be the first called
	 * in the file cleanup chain.
	 */
	eventpoll_release(file);
	locks_remove_flock(file);

	if (file->f_op && file->f_op->release)
		file->f_op->release(inode, file);
	security_file_free(file);
	if (unlikely(inode->i_cdev != NULL))
		cdev_put(inode->i_cdev);
	fops_put(file->f_op);
	if (file->f_mode & FMODE_WRITE)
		put_write_access(inode);
	file_kill(file);
	file->f_dentry = NULL;
	file->f_vfsmnt = NULL;
	file_free(file);
	dput(dentry);
	mntput(mnt);
}

struct file fastcall *fget(unsigned int fd)
{
	struct file *file;
	struct files_struct *files = current->files;

	spin_lock(&files->file_lock);
	file = fcheck_files(files, fd);
	if (file)
		get_file(file);
	spin_unlock(&files->file_lock);
	return file;
}

EXPORT_SYMBOL(fget);

/*
 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 
 * You can use this only if it is guranteed that the current task already 
 * holds a refcnt to that file. That check has to be done at fget() only
 * and a flag is returned to be passed to the corresponding fput_light().
 * There must not be a cloning between an fget_light/fput_light pair.
 */
struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
{
	struct file *file;
	struct files_struct *files = current->files;

	*fput_needed = 0;
	if (likely((atomic_read(&files->count) == 1))) {
		file = fcheck_files(files, fd);
	} else {
		spin_lock(&files->file_lock);
		file = fcheck_files(files, fd);
		if (file) {
			get_file(file);
			*fput_needed = 1;
		}
		spin_unlock(&files->file_lock);
	}
	return file;
}


void put_filp(struct file *file)
{
	if (atomic_dec_and_test(&file->f_count)) {
		security_file_free(file);
		file_kill(file);
		file_free(file);
	}
}

void file_move(struct file *file, struct list_head *list)
{
	if (!list)
		return;
	file_list_lock();
	list_move(&file->f_list, list);
	file_list_unlock();
}

void file_kill(struct file *file)
{
	if (!list_empty(&file->f_list)) {
		file_list_lock();
		list_del_init(&file->f_list);
		file_list_unlock();
	}
}

int fs_may_remount_ro(struct super_block *sb)
{
	struct list_head *p;

	/* Check that no files are currently opened for writing. */
	file_list_lock();
	list_for_each(p, &sb->s_files) {
		struct file *file = list_entry(p, struct file, f_list);
		struct inode *inode = file->f_dentry->d_inode;

		/* File with pending delete? */
		if (inode->i_nlink == 0)
			goto too_bad;

		/* Writeable file? */
		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
			goto too_bad;
	}
	file_list_unlock();
	return 1; /* Tis' cool bro. */
too_bad:
	file_list_unlock();
	return 0;
}

void __init files_init(unsigned long mempages)
{ 
	int n; 
	/* One file with associated inode and dcache is very roughly 1K. 
	 * Per default don't use more than 10% of our memory for files. 
	 */ 

	n = (mempages * (PAGE_SIZE / 1024)) / 10;
	files_stat.max_files = n; 
	if (files_stat.max_files < NR_FILE)
		files_stat.max_files = NR_FILE;
}
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`/*`
			`* linux/fs/file_table.c`
			`*`
			`* Copyright (C) 1991, 1992 Linus Torvalds`
			`* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)`
			`*/`

			`#include <linux/string.h>`
			`#include <linux/slab.h>`
			`#include <linux/file.h>`
			`#include <linux/init.h>`
			`#include <linux/module.h>`
			`#include <linux/smp_lock.h>`
			`#include <linux/fs.h>`
			`#include <linux/security.h>`
			`#include <linux/eventpoll.h>`
			`#include <linux/mount.h>`
			`#include <linux/cdev.h>`
[PATCH] inotify inotify is intended to correct the deficiencies of dnotify, particularly its inability to scale and its terrible user interface: * dnotify requires the opening of one fd per each directory that you intend to watch. This quickly results in too many open files and pins removable media, preventing unmount. * dnotify is directory-based. You only learn about changes to directories. Sure, a change to a file in a directory affects the directory, but you are then forced to keep a cache of stat structures. * dnotify's interface to user-space is awful. Signals? inotify provides a more usable, simple, powerful solution to file change notification: * inotify's interface is a system call that returns a fd, not SIGIO. You get a single fd, which is select()-able. * inotify has an event that says "the filesystem that the item you were watching is on was unmounted." * inotify can watch directories or files. Inotify is currently used by Beagle (a desktop search infrastructure), Gamin (a FAM replacement), and other projects. See Documentation/filesystems/inotify.txt. Signed-off-by: Robert Love <rml@novell.com> Cc: John McCutchan <ttb@tentacle.dhs.org> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-07-13 01:06:03 +04:00			`#include <linux/fsnotify.h>`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00
			`/* sysctl tunables... */`
			`struct files_stat_struct files_stat = {`
			`.max_files = NR_FILE`
			`};`

			`EXPORT_SYMBOL(files_stat); /* Needed by unix.o */`

			`/* public. Not pretty! */`
			`__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);`

			`static DEFINE_SPINLOCK(filp_count_lock);`

			`/* slab constructors and destructors are called from arbitrary`
			`* context and must be fully threaded - use a local spinlock`
			`* to protect files_stat.nr_files`
			`*/`
			`void filp_ctor(void * objp, struct kmem_cache_s *cachep, unsigned long cflags)`
			`{`
			`if ((cflags & (SLAB_CTOR_VERIFY\|SLAB_CTOR_CONSTRUCTOR)) ==`
			`SLAB_CTOR_CONSTRUCTOR) {`
			`unsigned long flags;`
			`spin_lock_irqsave(&filp_count_lock, flags);`
			`files_stat.nr_files++;`
			`spin_unlock_irqrestore(&filp_count_lock, flags);`
			`}`
			`}`

			`void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags)`
			`{`
			`unsigned long flags;`
			`spin_lock_irqsave(&filp_count_lock, flags);`
			`files_stat.nr_files--;`
			`spin_unlock_irqrestore(&filp_count_lock, flags);`
			`}`

			`static inline void file_free(struct file *f)`
			`{`
			`kmem_cache_free(filp_cachep, f);`
			`}`

			`/* Find an unused file structure and return a pointer to it.`
			`* Returns NULL, if there are no more free file structures or`
			`* we run out of memory.`
			`*/`
			`struct file *get_empty_filp(void)`
			`{`
[PATCH] Fix of bogus file max limit messages This patch fixes incorrect and bogus kernel messages that file-max limit reached when the allocation fails Signed-Off-By: Kirill Korotaev <dev@sw.ru> Signed-Off-By: Denis Lunev <den@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-23 11:09:50 +04:00			`static int old_max;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`struct file * f;`

			`/*`
			`* Privileged users can go above max_files`
			`*/`
[PATCH] Fix of bogus file max limit messages This patch fixes incorrect and bogus kernel messages that file-max limit reached when the allocation fails Signed-Off-By: Kirill Korotaev <dev@sw.ru> Signed-Off-By: Denis Lunev <den@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-23 11:09:50 +04:00			`if (files_stat.nr_files >= files_stat.max_files &&`
			`!capable(CAP_SYS_ADMIN))`
			`goto over;`

			`f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);`
			`if (f == NULL)`
			`goto fail;`

			`memset(f, 0, sizeof(*f));`
			`if (security_file_alloc(f))`
			`goto fail_sec;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00
[PATCH] Fix of bogus file max limit messages This patch fixes incorrect and bogus kernel messages that file-max limit reached when the allocation fails Signed-Off-By: Kirill Korotaev <dev@sw.ru> Signed-Off-By: Denis Lunev <den@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-23 11:09:50 +04:00			`eventpoll_init_file(f);`
			`atomic_set(&f->f_count, 1);`
			`f->f_uid = current->fsuid;`
			`f->f_gid = current->fsgid;`
			`rwlock_init(&f->f_owner.lock);`
			`/* f->f_version: 0 */`
			`INIT_LIST_HEAD(&f->f_list);`
			`return f;`

			`over:`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`/* Ran out of filps - report that */`
[PATCH] Fix of bogus file max limit messages This patch fixes incorrect and bogus kernel messages that file-max limit reached when the allocation fails Signed-Off-By: Kirill Korotaev <dev@sw.ru> Signed-Off-By: Denis Lunev <den@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-23 11:09:50 +04:00			`if (files_stat.nr_files > old_max) {`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`printk(KERN_INFO "VFS: file-max limit %d reached\n",`
			`files_stat.max_files);`
[PATCH] Fix of bogus file max limit messages This patch fixes incorrect and bogus kernel messages that file-max limit reached when the allocation fails Signed-Off-By: Kirill Korotaev <dev@sw.ru> Signed-Off-By: Denis Lunev <den@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-23 11:09:50 +04:00			`old_max = files_stat.nr_files;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`}`
[PATCH] Fix of bogus file max limit messages This patch fixes incorrect and bogus kernel messages that file-max limit reached when the allocation fails Signed-Off-By: Kirill Korotaev <dev@sw.ru> Signed-Off-By: Denis Lunev <den@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-23 11:09:50 +04:00			`goto fail;`

			`fail_sec:`
			`file_free(f);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`fail:`
			`return NULL;`
			`}`

			`EXPORT_SYMBOL(get_empty_filp);`

			`void fastcall fput(struct file *file)`
			`{`
			`if (atomic_dec_and_test(&file->f_count))`
			`__fput(file);`
			`}`

			`EXPORT_SYMBOL(fput);`

			`/* __fput is called from task context when aio completion releases the last`
			`* last use of a struct file *. Do not use otherwise.`
			`*/`
			`void fastcall __fput(struct file *file)`
			`{`
			`struct dentry *dentry = file->f_dentry;`
			`struct vfsmount *mnt = file->f_vfsmnt;`
			`struct inode *inode = dentry->d_inode;`

			`might_sleep();`
[PATCH] inotify inotify is intended to correct the deficiencies of dnotify, particularly its inability to scale and its terrible user interface: * dnotify requires the opening of one fd per each directory that you intend to watch. This quickly results in too many open files and pins removable media, preventing unmount. * dnotify is directory-based. You only learn about changes to directories. Sure, a change to a file in a directory affects the directory, but you are then forced to keep a cache of stat structures. * dnotify's interface to user-space is awful. Signals? inotify provides a more usable, simple, powerful solution to file change notification: * inotify's interface is a system call that returns a fd, not SIGIO. You get a single fd, which is select()-able. * inotify has an event that says "the filesystem that the item you were watching is on was unmounted." * inotify can watch directories or files. Inotify is currently used by Beagle (a desktop search infrastructure), Gamin (a FAM replacement), and other projects. See Documentation/filesystems/inotify.txt. Signed-off-by: Robert Love <rml@novell.com> Cc: John McCutchan <ttb@tentacle.dhs.org> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-07-13 01:06:03 +04:00
			`fsnotify_close(file);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-17 02:20:36 +04:00			`/*`
			`* The function eventpoll_release() should be the first called`
			`* in the file cleanup chain.`
			`*/`
			`eventpoll_release(file);`
			`locks_remove_flock(file);`

			`if (file->f_op && file->f_op->release)`
			`file->f_op->release(inode, file);`
			`security_file_free(file);`
			`if (unlikely(inode->i_cdev != NULL))`
			`cdev_put(inode->i_cdev);`
			`fops_put(file->f_op);`
			`if (file->f_mode & FMODE_WRITE)`
			`put_write_access(inode);`
			`file_kill(file);`
			`file->f_dentry = NULL;`
			`file->f_vfsmnt = NULL;`
			`file_free(file);`
			`dput(dentry);`
			`mntput(mnt);`
			`}`

			`struct file fastcall *fget(unsigned int fd)`
			`{`
			`struct file *file;`
			`struct files_struct *files = current->files;`

			`spin_lock(&files->file_lock);`
			`file = fcheck_files(files, fd);`
			`if (file)`
			`get_file(file);`
			`spin_unlock(&files->file_lock);`
			`return file;`
			`}`

			`EXPORT_SYMBOL(fget);`

			`/*`
			`* Lightweight file lookup - no refcnt increment if fd table isn't shared.`
			`* You can use this only if it is guranteed that the current task already`
			`* holds a refcnt to that file. That check has to be done at fget() only`
			`* and a flag is returned to be passed to the corresponding fput_light().`
			`* There must not be a cloning between an fget_light/fput_light pair.`
			`*/`
			`struct file fastcall fget_light(unsigned int fd, int fput_needed)`
			`{`
			`struct file *file;`
			`struct files_struct *files = current->files;`

			`*fput_needed = 0;`
			`if (likely((atomic_read(&files->count) == 1))) {`
			`file = fcheck_files(files, fd);`
			`} else {`
			`spin_lock(&files->file_lock);`
			`file = fcheck_files(files, fd);`
			`if (file) {`
			`get_file(file);`
			`*fput_needed = 1;`
			`}`
			`spin_unlock(&files->file_lock);`
			`}`
			`return file;`
			`}`


			`void put_filp(struct file *file)`
			`{`
			`if (atomic_dec_and_test(&file->f_count)) {`
			`security_file_free(file);`
			`file_kill(file);`
			`file_free(file);`
			`}`
			`}`

			`void file_move(struct file file, struct list_head list)`
			`{`
			`if (!list)`
			`return;`
			`file_list_lock();`
			`list_move(&file->f_list, list);`
			`file_list_unlock();`
			`}`

			`void file_kill(struct file *file)`
			`{`
			`if (!list_empty(&file->f_list)) {`
			`file_list_lock();`
			`list_del_init(&file->f_list);`
			`file_list_unlock();`
			`}`
			`}`

			`int fs_may_remount_ro(struct super_block *sb)`
			`{`
			`struct list_head *p;`

			`/* Check that no files are currently opened for writing. */`
			`file_list_lock();`
			`list_for_each(p, &sb->s_files) {`
			`struct file *file = list_entry(p, struct file, f_list);`
			`struct inode *inode = file->f_dentry->d_inode;`

			`/* File with pending delete? */`
			`if (inode->i_nlink == 0)`
			`goto too_bad;`

			`/* Writeable file? */`
			`if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))`
			`goto too_bad;`
			`}`
			`file_list_unlock();`
			`return 1; /* Tis' cool bro. */`
			`too_bad:`
			`file_list_unlock();`
			`return 0;`
			`}`

			`void __init files_init(unsigned long mempages)`
			`{`
			`int n;`
			`/* One file with associated inode and dcache is very roughly 1K.`
			`* Per default don't use more than 10% of our memory for files.`
			`*/`

			`n = (mempages * (PAGE_SIZE / 1024)) / 10;`
			`files_stat.max_files = n;`
			`if (files_stat.max_files < NR_FILE)`
			`files_stat.max_files = NR_FILE;`
			`}`