Replace the fd_sets in struct fdtable with an array of unsigned longs
Replace the fd_sets in struct fdtable with an array of unsigned longs and then use the standard non-atomic bit operations rather than the FD_* macros. This: (1) Removes the abuses of struct fd_set: (a) Since we don't want to allocate a full fd_set the vast majority of the time, we actually, in effect, just allocate a just-big-enough array of unsigned longs and cast it to an fd_set type - so why bother with the fd_set at all? (b) Some places outside of the core fdtable handling code (such as SELinux) want to look inside the array of unsigned longs hidden inside the fd_set struct for more efficient iteration over the entire set. (2) Eliminates the use of FD_*() macros in the kernel completely. (3) Permits the __FD_*() macros to be deleted entirely where not exposed to userspace. Signed-off-by: David Howells <dhowells@redhat.com> Link: http://lkml.kernel.org/r/20120216174954.23314.48147.stgit@warthog.procyon.org.uk Signed-off-by: H. Peter Anvin <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
1dce27c5aa
commit
1fd36adcd9
@ -1026,10 +1026,10 @@ static void flush_old_files(struct files_struct * files)
|
|||||||
fdt = files_fdtable(files);
|
fdt = files_fdtable(files);
|
||||||
if (i >= fdt->max_fds)
|
if (i >= fdt->max_fds)
|
||||||
break;
|
break;
|
||||||
set = fdt->close_on_exec->fds_bits[j];
|
set = fdt->close_on_exec[j];
|
||||||
if (!set)
|
if (!set)
|
||||||
continue;
|
continue;
|
||||||
fdt->close_on_exec->fds_bits[j] = 0;
|
fdt->close_on_exec[j] = 0;
|
||||||
spin_unlock(&files->file_lock);
|
spin_unlock(&files->file_lock);
|
||||||
for ( ; set ; i++,set >>= 1) {
|
for ( ; set ; i++,set >>= 1) {
|
||||||
if (set & 1) {
|
if (set & 1) {
|
||||||
|
46
fs/file.c
46
fs/file.c
@ -40,7 +40,7 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */
|
|||||||
*/
|
*/
|
||||||
static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
|
static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
|
||||||
|
|
||||||
static void *alloc_fdmem(unsigned int size)
|
static void *alloc_fdmem(size_t size)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Very large allocations can stress page reclaim, so fall back to
|
* Very large allocations can stress page reclaim, so fall back to
|
||||||
@ -142,7 +142,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
|
|||||||
static struct fdtable * alloc_fdtable(unsigned int nr)
|
static struct fdtable * alloc_fdtable(unsigned int nr)
|
||||||
{
|
{
|
||||||
struct fdtable *fdt;
|
struct fdtable *fdt;
|
||||||
char *data;
|
void *data;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Figure out how many fds we actually want to support in this fdtable.
|
* Figure out how many fds we actually want to support in this fdtable.
|
||||||
@ -172,14 +172,15 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
|
|||||||
data = alloc_fdmem(nr * sizeof(struct file *));
|
data = alloc_fdmem(nr * sizeof(struct file *));
|
||||||
if (!data)
|
if (!data)
|
||||||
goto out_fdt;
|
goto out_fdt;
|
||||||
fdt->fd = (struct file **)data;
|
fdt->fd = data;
|
||||||
data = alloc_fdmem(max_t(unsigned int,
|
|
||||||
|
data = alloc_fdmem(max_t(size_t,
|
||||||
2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
|
2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
|
||||||
if (!data)
|
if (!data)
|
||||||
goto out_arr;
|
goto out_arr;
|
||||||
fdt->open_fds = (fd_set *)data;
|
fdt->open_fds = data;
|
||||||
data += nr / BITS_PER_BYTE;
|
data += nr / BITS_PER_LONG;
|
||||||
fdt->close_on_exec = (fd_set *)data;
|
fdt->close_on_exec = data;
|
||||||
fdt->next = NULL;
|
fdt->next = NULL;
|
||||||
|
|
||||||
return fdt;
|
return fdt;
|
||||||
@ -275,11 +276,11 @@ static int count_open_files(struct fdtable *fdt)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* Find the last open fd */
|
/* Find the last open fd */
|
||||||
for (i = size/(8*sizeof(long)); i > 0; ) {
|
for (i = size / BITS_PER_LONG; i > 0; ) {
|
||||||
if (fdt->open_fds->fds_bits[--i])
|
if (fdt->open_fds[--i])
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
i = (i+1) * 8 * sizeof(long);
|
i = (i + 1) * BITS_PER_LONG;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -306,8 +307,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
|
|||||||
newf->next_fd = 0;
|
newf->next_fd = 0;
|
||||||
new_fdt = &newf->fdtab;
|
new_fdt = &newf->fdtab;
|
||||||
new_fdt->max_fds = NR_OPEN_DEFAULT;
|
new_fdt->max_fds = NR_OPEN_DEFAULT;
|
||||||
new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
|
new_fdt->close_on_exec = newf->close_on_exec_init;
|
||||||
new_fdt->open_fds = (fd_set *)&newf->open_fds_init;
|
new_fdt->open_fds = newf->open_fds_init;
|
||||||
new_fdt->fd = &newf->fd_array[0];
|
new_fdt->fd = &newf->fd_array[0];
|
||||||
new_fdt->next = NULL;
|
new_fdt->next = NULL;
|
||||||
|
|
||||||
@ -350,10 +351,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
|
|||||||
old_fds = old_fdt->fd;
|
old_fds = old_fdt->fd;
|
||||||
new_fds = new_fdt->fd;
|
new_fds = new_fdt->fd;
|
||||||
|
|
||||||
memcpy(new_fdt->open_fds->fds_bits,
|
memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
|
||||||
old_fdt->open_fds->fds_bits, open_files/8);
|
memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
|
||||||
memcpy(new_fdt->close_on_exec->fds_bits,
|
|
||||||
old_fdt->close_on_exec->fds_bits, open_files/8);
|
|
||||||
|
|
||||||
for (i = open_files; i != 0; i--) {
|
for (i = open_files; i != 0; i--) {
|
||||||
struct file *f = *old_fds++;
|
struct file *f = *old_fds++;
|
||||||
@ -379,11 +378,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
|
|||||||
memset(new_fds, 0, size);
|
memset(new_fds, 0, size);
|
||||||
|
|
||||||
if (new_fdt->max_fds > open_files) {
|
if (new_fdt->max_fds > open_files) {
|
||||||
int left = (new_fdt->max_fds-open_files)/8;
|
int left = (new_fdt->max_fds - open_files) / 8;
|
||||||
int start = open_files / (8 * sizeof(unsigned long));
|
int start = open_files / BITS_PER_LONG;
|
||||||
|
|
||||||
memset(&new_fdt->open_fds->fds_bits[start], 0, left);
|
memset(&new_fdt->open_fds[start], 0, left);
|
||||||
memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
|
memset(&new_fdt->close_on_exec[start], 0, left);
|
||||||
}
|
}
|
||||||
|
|
||||||
rcu_assign_pointer(newf->fdt, new_fdt);
|
rcu_assign_pointer(newf->fdt, new_fdt);
|
||||||
@ -419,8 +418,8 @@ struct files_struct init_files = {
|
|||||||
.fdtab = {
|
.fdtab = {
|
||||||
.max_fds = NR_OPEN_DEFAULT,
|
.max_fds = NR_OPEN_DEFAULT,
|
||||||
.fd = &init_files.fd_array[0],
|
.fd = &init_files.fd_array[0],
|
||||||
.close_on_exec = (fd_set *)&init_files.close_on_exec_init,
|
.close_on_exec = init_files.close_on_exec_init,
|
||||||
.open_fds = (fd_set *)&init_files.open_fds_init,
|
.open_fds = init_files.open_fds_init,
|
||||||
},
|
},
|
||||||
.file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
|
.file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
|
||||||
};
|
};
|
||||||
@ -443,8 +442,7 @@ repeat:
|
|||||||
fd = files->next_fd;
|
fd = files->next_fd;
|
||||||
|
|
||||||
if (fd < fdt->max_fds)
|
if (fd < fdt->max_fds)
|
||||||
fd = find_next_zero_bit(fdt->open_fds->fds_bits,
|
fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
|
||||||
fdt->max_fds, fd);
|
|
||||||
|
|
||||||
error = expand_files(files, fd);
|
error = expand_files(files, fd);
|
||||||
if (error < 0)
|
if (error < 0)
|
||||||
|
@ -348,7 +348,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
|
|||||||
set = ~(~0UL << (n & (__NFDBITS-1)));
|
set = ~(~0UL << (n & (__NFDBITS-1)));
|
||||||
n /= __NFDBITS;
|
n /= __NFDBITS;
|
||||||
fdt = files_fdtable(current->files);
|
fdt = files_fdtable(current->files);
|
||||||
open_fds = fdt->open_fds->fds_bits+n;
|
open_fds = fdt->open_fds + n;
|
||||||
max = 0;
|
max = 0;
|
||||||
if (set) {
|
if (set) {
|
||||||
set &= BITS(fds, n);
|
set &= BITS(fds, n);
|
||||||
|
@ -21,51 +21,43 @@
|
|||||||
*/
|
*/
|
||||||
#define NR_OPEN_DEFAULT BITS_PER_LONG
|
#define NR_OPEN_DEFAULT BITS_PER_LONG
|
||||||
|
|
||||||
/*
|
|
||||||
* The embedded_fd_set is a small fd_set,
|
|
||||||
* suitable for most tasks (which open <= BITS_PER_LONG files)
|
|
||||||
*/
|
|
||||||
struct embedded_fd_set {
|
|
||||||
unsigned long fds_bits[1];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct fdtable {
|
struct fdtable {
|
||||||
unsigned int max_fds;
|
unsigned int max_fds;
|
||||||
struct file __rcu **fd; /* current fd array */
|
struct file __rcu **fd; /* current fd array */
|
||||||
fd_set *close_on_exec;
|
unsigned long *close_on_exec;
|
||||||
fd_set *open_fds;
|
unsigned long *open_fds;
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
struct fdtable *next;
|
struct fdtable *next;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
|
static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
|
||||||
{
|
{
|
||||||
FD_SET(fd, fdt->close_on_exec);
|
__set_bit(fd, fdt->close_on_exec);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
|
static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
|
||||||
{
|
{
|
||||||
FD_CLR(fd, fdt->close_on_exec);
|
__clear_bit(fd, fdt->close_on_exec);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool close_on_exec(int fd, const struct fdtable *fdt)
|
static inline bool close_on_exec(int fd, const struct fdtable *fdt)
|
||||||
{
|
{
|
||||||
return FD_ISSET(fd, fdt->close_on_exec);
|
return test_bit(fd, fdt->close_on_exec);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __set_open_fd(int fd, struct fdtable *fdt)
|
static inline void __set_open_fd(int fd, struct fdtable *fdt)
|
||||||
{
|
{
|
||||||
FD_SET(fd, fdt->open_fds);
|
__set_bit(fd, fdt->open_fds);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __clear_open_fd(int fd, struct fdtable *fdt)
|
static inline void __clear_open_fd(int fd, struct fdtable *fdt)
|
||||||
{
|
{
|
||||||
FD_CLR(fd, fdt->open_fds);
|
__clear_bit(fd, fdt->open_fds);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool fd_is_open(int fd, const struct fdtable *fdt)
|
static inline bool fd_is_open(int fd, const struct fdtable *fdt)
|
||||||
{
|
{
|
||||||
return FD_ISSET(fd, fdt->open_fds);
|
return test_bit(fd, fdt->open_fds);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -83,8 +75,8 @@ struct files_struct {
|
|||||||
*/
|
*/
|
||||||
spinlock_t file_lock ____cacheline_aligned_in_smp;
|
spinlock_t file_lock ____cacheline_aligned_in_smp;
|
||||||
int next_fd;
|
int next_fd;
|
||||||
struct embedded_fd_set close_on_exec_init;
|
unsigned long close_on_exec_init[1];
|
||||||
struct embedded_fd_set open_fds_init;
|
unsigned long open_fds_init[1];
|
||||||
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
|
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -473,7 +473,7 @@ static void close_files(struct files_struct * files)
|
|||||||
i = j * __NFDBITS;
|
i = j * __NFDBITS;
|
||||||
if (i >= fdt->max_fds)
|
if (i >= fdt->max_fds)
|
||||||
break;
|
break;
|
||||||
set = fdt->open_fds->fds_bits[j++];
|
set = fdt->open_fds[j++];
|
||||||
while (set) {
|
while (set) {
|
||||||
if (set & 1) {
|
if (set & 1) {
|
||||||
struct file * file = xchg(&fdt->fd[i], NULL);
|
struct file * file = xchg(&fdt->fd[i], NULL);
|
||||||
|
@ -2145,7 +2145,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
|
|||||||
fdt = files_fdtable(files);
|
fdt = files_fdtable(files);
|
||||||
if (i >= fdt->max_fds)
|
if (i >= fdt->max_fds)
|
||||||
break;
|
break;
|
||||||
set = fdt->open_fds->fds_bits[j];
|
set = fdt->open_fds[j];
|
||||||
if (!set)
|
if (!set)
|
||||||
continue;
|
continue;
|
||||||
spin_unlock(&files->file_lock);
|
spin_unlock(&files->file_lock);
|
||||||
|
Loading…
Reference in New Issue
Block a user