8a23eb804c
This has been discussed several times, and now filesystem people are talking about doing it individually at the filesystem layer, so head that off at the pass and just do it in getdents{64}(). This is partially based on a patch by Jann Horn, but checks for NUL bytes as well, and somewhat simplified. There's also commentary about how it might be better if invalid names due to filesystem corruption don't cause an immediate failure, but only an error at the end of the readdir(), so that people can still see the filenames that are ok. There's also been discussion about just how much POSIX strictly speaking requires this since it's about filesystem corruption. It's really more "protect user space from bad behavior" as pointed out by Jann. But since Eric Biederman looked up the POSIX wording, here it is for context: "From readdir: The readdir() function shall return a pointer to a structure representing the directory entry at the current position in the directory stream specified by the argument dirp, and position the directory stream at the next entry. It shall return a null pointer upon reaching the end of the directory stream. The structure dirent defined in the <dirent.h> header describes a directory entry. From definitions: 3.129 Directory Entry (or Link) An object that associates a filename with a file. Several directory entries can associate names with the same file. ... 3.169 Filename A name consisting of 1 to {NAME_MAX} bytes used to name a file. The characters composing the name may be selected from the set of all character values excluding the slash character and the null byte. The filenames dot and dot-dot have special meaning. A filename is sometimes referred to as a 'pathname component'." Note that I didn't bother adding the checks to any legacy interfaces that nobody uses. Also note that if this ends up being noticeable as a performance regression, we can fix that to do a much more optimized model that checks for both NUL and '/' at the same time one word at a time. We haven't really tended to optimize 'memchr()', and it only checks for one pattern at a time anyway, and we really _should_ check for NUL too (but see the comment about "soft errors" in the code about why it currently only checks for '/') See the CONFIG_DCACHE_WORD_ACCESS case of hash_name() for how the name lookup code looks for pathname terminating characters in parallel. Link: https://lore.kernel.org/lkml/20190118161440.220134-2-jannh@google.com/ Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Jann Horn <jannh@google.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
597 lines
15 KiB
C
597 lines
15 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* linux/fs/readdir.c
|
|
*
|
|
* Copyright (C) 1995 Linus Torvalds
|
|
*/
|
|
|
|
#include <linux/stddef.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/time.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/stat.h>
|
|
#include <linux/file.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/fsnotify.h>
|
|
#include <linux/dirent.h>
|
|
#include <linux/security.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/unistd.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <asm/unaligned.h>
|
|
|
|
/*
|
|
* Note the "unsafe_put_user() semantics: we goto a
|
|
* label for errors.
|
|
*
|
|
* Also note how we use a "while()" loop here, even though
|
|
* only the biggest size needs to loop. The compiler (well,
|
|
* at least gcc) is smart enough to turn the smaller sizes
|
|
* into just if-statements, and this way we don't need to
|
|
* care whether 'u64' or 'u32' is the biggest size.
|
|
*/
|
|
#define unsafe_copy_loop(dst, src, len, type, label) \
|
|
while (len >= sizeof(type)) { \
|
|
unsafe_put_user(get_unaligned((type *)src), \
|
|
(type __user *)dst, label); \
|
|
dst += sizeof(type); \
|
|
src += sizeof(type); \
|
|
len -= sizeof(type); \
|
|
}
|
|
|
|
/*
|
|
* We avoid doing 64-bit copies on 32-bit architectures. They
|
|
* might be better, but the component names are mostly small,
|
|
* and the 64-bit cases can end up being much more complex and
|
|
* put much more register pressure on the code, so it's likely
|
|
* not worth the pain of unaligned accesses etc.
|
|
*
|
|
* So limit the copies to "unsigned long" size. I did verify
|
|
* that at least the x86-32 case is ok without this limiting,
|
|
* but I worry about random other legacy 32-bit cases that
|
|
* might not do as well.
|
|
*/
|
|
#define unsafe_copy_type(dst, src, len, type, label) do { \
|
|
if (sizeof(type) <= sizeof(unsigned long)) \
|
|
unsafe_copy_loop(dst, src, len, type, label); \
|
|
} while (0)
|
|
|
|
/*
|
|
* Copy the dirent name to user space, and NUL-terminate
|
|
* it. This should not be a function call, since we're doing
|
|
* the copy inside a "user_access_begin/end()" section.
|
|
*/
|
|
#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
|
|
char __user *dst = (_dst); \
|
|
const char *src = (_src); \
|
|
size_t len = (_len); \
|
|
unsafe_copy_type(dst, src, len, u64, label); \
|
|
unsafe_copy_type(dst, src, len, u32, label); \
|
|
unsafe_copy_type(dst, src, len, u16, label); \
|
|
unsafe_copy_type(dst, src, len, u8, label); \
|
|
unsafe_put_user(0, dst, label); \
|
|
} while (0)
|
|
|
|
|
|
int iterate_dir(struct file *file, struct dir_context *ctx)
|
|
{
|
|
struct inode *inode = file_inode(file);
|
|
bool shared = false;
|
|
int res = -ENOTDIR;
|
|
if (file->f_op->iterate_shared)
|
|
shared = true;
|
|
else if (!file->f_op->iterate)
|
|
goto out;
|
|
|
|
res = security_file_permission(file, MAY_READ);
|
|
if (res)
|
|
goto out;
|
|
|
|
if (shared)
|
|
res = down_read_killable(&inode->i_rwsem);
|
|
else
|
|
res = down_write_killable(&inode->i_rwsem);
|
|
if (res)
|
|
goto out;
|
|
|
|
res = -ENOENT;
|
|
if (!IS_DEADDIR(inode)) {
|
|
ctx->pos = file->f_pos;
|
|
if (shared)
|
|
res = file->f_op->iterate_shared(file, ctx);
|
|
else
|
|
res = file->f_op->iterate(file, ctx);
|
|
file->f_pos = ctx->pos;
|
|
fsnotify_access(file);
|
|
file_accessed(file);
|
|
}
|
|
if (shared)
|
|
inode_unlock_shared(inode);
|
|
else
|
|
inode_unlock(inode);
|
|
out:
|
|
return res;
|
|
}
|
|
EXPORT_SYMBOL(iterate_dir);
|
|
|
|
/*
|
|
* POSIX says that a dirent name cannot contain NULL or a '/'.
|
|
*
|
|
* It's not 100% clear what we should really do in this case.
|
|
* The filesystem is clearly corrupted, but returning a hard
|
|
* error means that you now don't see any of the other names
|
|
* either, so that isn't a perfect alternative.
|
|
*
|
|
* And if you return an error, what error do you use? Several
|
|
* filesystems seem to have decided on EUCLEAN being the error
|
|
* code for EFSCORRUPTED, and that may be the error to use. Or
|
|
* just EIO, which is perhaps more obvious to users.
|
|
*
|
|
* In order to see the other file names in the directory, the
|
|
* caller might want to make this a "soft" error: skip the
|
|
* entry, and return the error at the end instead.
|
|
*
|
|
* Note that this should likely do a "memchr(name, 0, len)"
|
|
* check too, since that would be filesystem corruption as
|
|
* well. However, that case can't actually confuse user space,
|
|
* which has to do a strlen() on the name anyway to find the
|
|
* filename length, and the above "soft error" worry means
|
|
* that it's probably better left alone until we have that
|
|
* issue clarified.
|
|
*/
|
|
static int verify_dirent_name(const char *name, int len)
|
|
{
|
|
if (WARN_ON_ONCE(!len))
|
|
return -EIO;
|
|
if (WARN_ON_ONCE(memchr(name, '/', len)))
|
|
return -EIO;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Traditional linux readdir() handling..
|
|
*
|
|
* "count=1" is a special case, meaning that the buffer is one
|
|
* dirent-structure in size and that the code can't handle more
|
|
* anyway. Thus the special "fillonedir()" function for that
|
|
* case (the low-level handlers don't need to care about this).
|
|
*/
|
|
|
|
#ifdef __ARCH_WANT_OLD_READDIR
|
|
|
|
struct old_linux_dirent {
|
|
unsigned long d_ino;
|
|
unsigned long d_offset;
|
|
unsigned short d_namlen;
|
|
char d_name[1];
|
|
};
|
|
|
|
struct readdir_callback {
|
|
struct dir_context ctx;
|
|
struct old_linux_dirent __user * dirent;
|
|
int result;
|
|
};
|
|
|
|
static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
|
|
loff_t offset, u64 ino, unsigned int d_type)
|
|
{
|
|
struct readdir_callback *buf =
|
|
container_of(ctx, struct readdir_callback, ctx);
|
|
struct old_linux_dirent __user * dirent;
|
|
unsigned long d_ino;
|
|
|
|
if (buf->result)
|
|
return -EINVAL;
|
|
d_ino = ino;
|
|
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
|
|
buf->result = -EOVERFLOW;
|
|
return -EOVERFLOW;
|
|
}
|
|
buf->result++;
|
|
dirent = buf->dirent;
|
|
if (!access_ok(dirent,
|
|
(unsigned long)(dirent->d_name + namlen + 1) -
|
|
(unsigned long)dirent))
|
|
goto efault;
|
|
if ( __put_user(d_ino, &dirent->d_ino) ||
|
|
__put_user(offset, &dirent->d_offset) ||
|
|
__put_user(namlen, &dirent->d_namlen) ||
|
|
__copy_to_user(dirent->d_name, name, namlen) ||
|
|
__put_user(0, dirent->d_name + namlen))
|
|
goto efault;
|
|
return 0;
|
|
efault:
|
|
buf->result = -EFAULT;
|
|
return -EFAULT;
|
|
}
|
|
|
|
SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
|
|
struct old_linux_dirent __user *, dirent, unsigned int, count)
|
|
{
|
|
int error;
|
|
struct fd f = fdget_pos(fd);
|
|
struct readdir_callback buf = {
|
|
.ctx.actor = fillonedir,
|
|
.dirent = dirent
|
|
};
|
|
|
|
if (!f.file)
|
|
return -EBADF;
|
|
|
|
error = iterate_dir(f.file, &buf.ctx);
|
|
if (buf.result)
|
|
error = buf.result;
|
|
|
|
fdput_pos(f);
|
|
return error;
|
|
}
|
|
|
|
#endif /* __ARCH_WANT_OLD_READDIR */
|
|
|
|
/*
|
|
* New, all-improved, singing, dancing, iBCS2-compliant getdents()
|
|
* interface.
|
|
*/
|
|
struct linux_dirent {
|
|
unsigned long d_ino;
|
|
unsigned long d_off;
|
|
unsigned short d_reclen;
|
|
char d_name[1];
|
|
};
|
|
|
|
struct getdents_callback {
|
|
struct dir_context ctx;
|
|
struct linux_dirent __user * current_dir;
|
|
struct linux_dirent __user * previous;
|
|
int count;
|
|
int error;
|
|
};
|
|
|
|
static int filldir(struct dir_context *ctx, const char *name, int namlen,
|
|
loff_t offset, u64 ino, unsigned int d_type)
|
|
{
|
|
struct linux_dirent __user * dirent;
|
|
struct getdents_callback *buf =
|
|
container_of(ctx, struct getdents_callback, ctx);
|
|
unsigned long d_ino;
|
|
int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
|
|
sizeof(long));
|
|
|
|
buf->error = verify_dirent_name(name, namlen);
|
|
if (unlikely(buf->error))
|
|
return buf->error;
|
|
buf->error = -EINVAL; /* only used if we fail.. */
|
|
if (reclen > buf->count)
|
|
return -EINVAL;
|
|
d_ino = ino;
|
|
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
|
|
buf->error = -EOVERFLOW;
|
|
return -EOVERFLOW;
|
|
}
|
|
dirent = buf->previous;
|
|
if (dirent && signal_pending(current))
|
|
return -EINTR;
|
|
|
|
/*
|
|
* Note! This range-checks 'previous' (which may be NULL).
|
|
* The real range was checked in getdents
|
|
*/
|
|
if (!user_access_begin(dirent, sizeof(*dirent)))
|
|
goto efault;
|
|
if (dirent)
|
|
unsafe_put_user(offset, &dirent->d_off, efault_end);
|
|
dirent = buf->current_dir;
|
|
unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
|
|
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
|
|
unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
|
|
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
|
|
user_access_end();
|
|
|
|
buf->previous = dirent;
|
|
dirent = (void __user *)dirent + reclen;
|
|
buf->current_dir = dirent;
|
|
buf->count -= reclen;
|
|
return 0;
|
|
efault_end:
|
|
user_access_end();
|
|
efault:
|
|
buf->error = -EFAULT;
|
|
return -EFAULT;
|
|
}
|
|
|
|
SYSCALL_DEFINE3(getdents, unsigned int, fd,
|
|
struct linux_dirent __user *, dirent, unsigned int, count)
|
|
{
|
|
struct fd f;
|
|
struct linux_dirent __user * lastdirent;
|
|
struct getdents_callback buf = {
|
|
.ctx.actor = filldir,
|
|
.count = count,
|
|
.current_dir = dirent
|
|
};
|
|
int error;
|
|
|
|
if (!access_ok(dirent, count))
|
|
return -EFAULT;
|
|
|
|
f = fdget_pos(fd);
|
|
if (!f.file)
|
|
return -EBADF;
|
|
|
|
error = iterate_dir(f.file, &buf.ctx);
|
|
if (error >= 0)
|
|
error = buf.error;
|
|
lastdirent = buf.previous;
|
|
if (lastdirent) {
|
|
if (put_user(buf.ctx.pos, &lastdirent->d_off))
|
|
error = -EFAULT;
|
|
else
|
|
error = count - buf.count;
|
|
}
|
|
fdput_pos(f);
|
|
return error;
|
|
}
|
|
|
|
struct getdents_callback64 {
|
|
struct dir_context ctx;
|
|
struct linux_dirent64 __user * current_dir;
|
|
struct linux_dirent64 __user * previous;
|
|
int count;
|
|
int error;
|
|
};
|
|
|
|
static int filldir64(struct dir_context *ctx, const char *name, int namlen,
|
|
loff_t offset, u64 ino, unsigned int d_type)
|
|
{
|
|
struct linux_dirent64 __user *dirent;
|
|
struct getdents_callback64 *buf =
|
|
container_of(ctx, struct getdents_callback64, ctx);
|
|
int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
|
|
sizeof(u64));
|
|
|
|
buf->error = verify_dirent_name(name, namlen);
|
|
if (unlikely(buf->error))
|
|
return buf->error;
|
|
buf->error = -EINVAL; /* only used if we fail.. */
|
|
if (reclen > buf->count)
|
|
return -EINVAL;
|
|
dirent = buf->previous;
|
|
if (dirent && signal_pending(current))
|
|
return -EINTR;
|
|
|
|
/*
|
|
* Note! This range-checks 'previous' (which may be NULL).
|
|
* The real range was checked in getdents
|
|
*/
|
|
if (!user_access_begin(dirent, sizeof(*dirent)))
|
|
goto efault;
|
|
if (dirent)
|
|
unsafe_put_user(offset, &dirent->d_off, efault_end);
|
|
dirent = buf->current_dir;
|
|
unsafe_put_user(ino, &dirent->d_ino, efault_end);
|
|
unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
|
|
unsafe_put_user(d_type, &dirent->d_type, efault_end);
|
|
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
|
|
user_access_end();
|
|
|
|
buf->previous = dirent;
|
|
dirent = (void __user *)dirent + reclen;
|
|
buf->current_dir = dirent;
|
|
buf->count -= reclen;
|
|
return 0;
|
|
efault_end:
|
|
user_access_end();
|
|
efault:
|
|
buf->error = -EFAULT;
|
|
return -EFAULT;
|
|
}
|
|
|
|
int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent,
|
|
unsigned int count)
|
|
{
|
|
struct fd f;
|
|
struct linux_dirent64 __user * lastdirent;
|
|
struct getdents_callback64 buf = {
|
|
.ctx.actor = filldir64,
|
|
.count = count,
|
|
.current_dir = dirent
|
|
};
|
|
int error;
|
|
|
|
if (!access_ok(dirent, count))
|
|
return -EFAULT;
|
|
|
|
f = fdget_pos(fd);
|
|
if (!f.file)
|
|
return -EBADF;
|
|
|
|
error = iterate_dir(f.file, &buf.ctx);
|
|
if (error >= 0)
|
|
error = buf.error;
|
|
lastdirent = buf.previous;
|
|
if (lastdirent) {
|
|
typeof(lastdirent->d_off) d_off = buf.ctx.pos;
|
|
if (__put_user(d_off, &lastdirent->d_off))
|
|
error = -EFAULT;
|
|
else
|
|
error = count - buf.count;
|
|
}
|
|
fdput_pos(f);
|
|
return error;
|
|
}
|
|
|
|
|
|
SYSCALL_DEFINE3(getdents64, unsigned int, fd,
|
|
struct linux_dirent64 __user *, dirent, unsigned int, count)
|
|
{
|
|
return ksys_getdents64(fd, dirent, count);
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
struct compat_old_linux_dirent {
|
|
compat_ulong_t d_ino;
|
|
compat_ulong_t d_offset;
|
|
unsigned short d_namlen;
|
|
char d_name[1];
|
|
};
|
|
|
|
struct compat_readdir_callback {
|
|
struct dir_context ctx;
|
|
struct compat_old_linux_dirent __user *dirent;
|
|
int result;
|
|
};
|
|
|
|
static int compat_fillonedir(struct dir_context *ctx, const char *name,
|
|
int namlen, loff_t offset, u64 ino,
|
|
unsigned int d_type)
|
|
{
|
|
struct compat_readdir_callback *buf =
|
|
container_of(ctx, struct compat_readdir_callback, ctx);
|
|
struct compat_old_linux_dirent __user *dirent;
|
|
compat_ulong_t d_ino;
|
|
|
|
if (buf->result)
|
|
return -EINVAL;
|
|
d_ino = ino;
|
|
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
|
|
buf->result = -EOVERFLOW;
|
|
return -EOVERFLOW;
|
|
}
|
|
buf->result++;
|
|
dirent = buf->dirent;
|
|
if (!access_ok(dirent,
|
|
(unsigned long)(dirent->d_name + namlen + 1) -
|
|
(unsigned long)dirent))
|
|
goto efault;
|
|
if ( __put_user(d_ino, &dirent->d_ino) ||
|
|
__put_user(offset, &dirent->d_offset) ||
|
|
__put_user(namlen, &dirent->d_namlen) ||
|
|
__copy_to_user(dirent->d_name, name, namlen) ||
|
|
__put_user(0, dirent->d_name + namlen))
|
|
goto efault;
|
|
return 0;
|
|
efault:
|
|
buf->result = -EFAULT;
|
|
return -EFAULT;
|
|
}
|
|
|
|
COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
|
|
struct compat_old_linux_dirent __user *, dirent, unsigned int, count)
|
|
{
|
|
int error;
|
|
struct fd f = fdget_pos(fd);
|
|
struct compat_readdir_callback buf = {
|
|
.ctx.actor = compat_fillonedir,
|
|
.dirent = dirent
|
|
};
|
|
|
|
if (!f.file)
|
|
return -EBADF;
|
|
|
|
error = iterate_dir(f.file, &buf.ctx);
|
|
if (buf.result)
|
|
error = buf.result;
|
|
|
|
fdput_pos(f);
|
|
return error;
|
|
}
|
|
|
|
struct compat_linux_dirent {
|
|
compat_ulong_t d_ino;
|
|
compat_ulong_t d_off;
|
|
unsigned short d_reclen;
|
|
char d_name[1];
|
|
};
|
|
|
|
struct compat_getdents_callback {
|
|
struct dir_context ctx;
|
|
struct compat_linux_dirent __user *current_dir;
|
|
struct compat_linux_dirent __user *previous;
|
|
int count;
|
|
int error;
|
|
};
|
|
|
|
static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
|
|
loff_t offset, u64 ino, unsigned int d_type)
|
|
{
|
|
struct compat_linux_dirent __user * dirent;
|
|
struct compat_getdents_callback *buf =
|
|
container_of(ctx, struct compat_getdents_callback, ctx);
|
|
compat_ulong_t d_ino;
|
|
int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
|
|
namlen + 2, sizeof(compat_long_t));
|
|
|
|
buf->error = -EINVAL; /* only used if we fail.. */
|
|
if (reclen > buf->count)
|
|
return -EINVAL;
|
|
d_ino = ino;
|
|
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
|
|
buf->error = -EOVERFLOW;
|
|
return -EOVERFLOW;
|
|
}
|
|
dirent = buf->previous;
|
|
if (dirent) {
|
|
if (signal_pending(current))
|
|
return -EINTR;
|
|
if (__put_user(offset, &dirent->d_off))
|
|
goto efault;
|
|
}
|
|
dirent = buf->current_dir;
|
|
if (__put_user(d_ino, &dirent->d_ino))
|
|
goto efault;
|
|
if (__put_user(reclen, &dirent->d_reclen))
|
|
goto efault;
|
|
if (copy_to_user(dirent->d_name, name, namlen))
|
|
goto efault;
|
|
if (__put_user(0, dirent->d_name + namlen))
|
|
goto efault;
|
|
if (__put_user(d_type, (char __user *) dirent + reclen - 1))
|
|
goto efault;
|
|
buf->previous = dirent;
|
|
dirent = (void __user *)dirent + reclen;
|
|
buf->current_dir = dirent;
|
|
buf->count -= reclen;
|
|
return 0;
|
|
efault:
|
|
buf->error = -EFAULT;
|
|
return -EFAULT;
|
|
}
|
|
|
|
COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
|
|
struct compat_linux_dirent __user *, dirent, unsigned int, count)
|
|
{
|
|
struct fd f;
|
|
struct compat_linux_dirent __user * lastdirent;
|
|
struct compat_getdents_callback buf = {
|
|
.ctx.actor = compat_filldir,
|
|
.current_dir = dirent,
|
|
.count = count
|
|
};
|
|
int error;
|
|
|
|
if (!access_ok(dirent, count))
|
|
return -EFAULT;
|
|
|
|
f = fdget_pos(fd);
|
|
if (!f.file)
|
|
return -EBADF;
|
|
|
|
error = iterate_dir(f.file, &buf.ctx);
|
|
if (error >= 0)
|
|
error = buf.error;
|
|
lastdirent = buf.previous;
|
|
if (lastdirent) {
|
|
if (put_user(buf.ctx.pos, &lastdirent->d_off))
|
|
error = -EFAULT;
|
|
else
|
|
error = count - buf.count;
|
|
}
|
|
fdput_pos(f);
|
|
return error;
|
|
}
|
|
#endif
|