linux/fs/ksmbd/vfs.c

1914 lines
44 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2016 Namjae Jeon <linkinjeon@kernel.org>
* Copyright (C) 2018 Samsung Electronics Co., Ltd.
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/uaccess.h>
#include <linux/backing-dev.h>
#include <linux/writeback.h>
#include <linux/xattr.h>
#include <linux/falloc.h>
#include <linux/fsnotify.h>
#include <linux/dcache.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched/xacct.h>
#include <linux/crc32c.h>
#include "../internal.h" /* for vfs_path_lookup */
#include "glob.h"
#include "oplock.h"
#include "connection.h"
#include "vfs.h"
#include "vfs_cache.h"
#include "smbacl.h"
#include "ndr.h"
#include "auth.h"
#include "misc.h"
#include "smb_common.h"
#include "mgmt/share_config.h"
#include "mgmt/tree_connect.h"
#include "mgmt/user_session.h"
#include "mgmt/user_config.h"
static char *extract_last_component(char *path)
{
char *p = strrchr(path, '/');
if (p && p[1] != '\0') {
*p = '\0';
p++;
} else {
p = NULL;
}
return p;
}
static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
struct inode *parent_inode,
struct inode *inode)
{
if (!test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_INHERIT_OWNER))
return;
i_uid_write(inode, i_uid_read(parent_inode));
}
/**
* ksmbd_vfs_lock_parent() - lock parent dentry if it is stable
*
* the parent dentry got by dget_parent or @parent could be
* unstable, we try to lock a parent inode and lookup the
* child dentry again.
*
* the reference count of @parent isn't incremented.
*/
int ksmbd_vfs_lock_parent(struct mnt_idmap *idmap, struct dentry *parent,
ksmbd: fix lookup on idmapped mounts It's great that the new in-kernel ksmbd server will support idmapped mounts out of the box! However, lookup is currently broken. Lookup helpers such as lookup_one_len() call inode_permission() internally to ensure that the caller is privileged over the inode of the base dentry they are trying to lookup under. So the permission checking here is currently wrong. Linux v5.15 will gain a new lookup helper lookup_one() that does take idmappings into account. I've added it as part of my patch series to make btrfs support idmapped mounts. The new helper is in linux-next as part of David's (Sterba) btrfs for-next branch as commit c972214c133b ("namei: add mapping aware lookup helper"). I've said it before during one of my first reviews: I would very much recommend adding fstests to [1]. It already seems to have very rudimentary cifs support. There is a completely generic idmapped mount testsuite that supports idmapped mounts. [1]: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/ Cc: Colin Ian King <colin.king@canonical.com> Cc: Steve French <stfrench@microsoft.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Namjae Jeon <namjae.jeon@samsung.com> Cc: Hyunchul Lee <hyc.lee@gmail.com> Cc: Sergey Senozhatsky <senozhatsky@chromium.org> Cc: David Sterba <dsterba@suse.com> Cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> Signed-off-by: Namjae Jeon <linkinjeon@kernel.org> Signed-off-by: Steve French <stfrench@microsoft.com>
2021-08-23 17:13:47 +02:00
struct dentry *child)
{
struct dentry *dentry;
int ret = 0;
inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
dentry = lookup_one(idmap, child->d_name.name, parent,
ksmbd: fix lookup on idmapped mounts It's great that the new in-kernel ksmbd server will support idmapped mounts out of the box! However, lookup is currently broken. Lookup helpers such as lookup_one_len() call inode_permission() internally to ensure that the caller is privileged over the inode of the base dentry they are trying to lookup under. So the permission checking here is currently wrong. Linux v5.15 will gain a new lookup helper lookup_one() that does take idmappings into account. I've added it as part of my patch series to make btrfs support idmapped mounts. The new helper is in linux-next as part of David's (Sterba) btrfs for-next branch as commit c972214c133b ("namei: add mapping aware lookup helper"). I've said it before during one of my first reviews: I would very much recommend adding fstests to [1]. It already seems to have very rudimentary cifs support. There is a completely generic idmapped mount testsuite that supports idmapped mounts. [1]: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/ Cc: Colin Ian King <colin.king@canonical.com> Cc: Steve French <stfrench@microsoft.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Namjae Jeon <namjae.jeon@samsung.com> Cc: Hyunchul Lee <hyc.lee@gmail.com> Cc: Sergey Senozhatsky <senozhatsky@chromium.org> Cc: David Sterba <dsterba@suse.com> Cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> Signed-off-by: Namjae Jeon <linkinjeon@kernel.org> Signed-off-by: Steve French <stfrench@microsoft.com>
2021-08-23 17:13:47 +02:00
child->d_name.len);
if (IS_ERR(dentry)) {
ret = PTR_ERR(dentry);
goto out_err;
}
if (dentry != child) {
ret = -ESTALE;
dput(dentry);
goto out_err;
}
dput(dentry);
return 0;
out_err:
inode_unlock(d_inode(parent));
return ret;
}
int ksmbd_vfs_may_delete(struct mnt_idmap *idmap,
struct dentry *dentry)
{
struct dentry *parent;
int ret;
parent = dget_parent(dentry);
ret = ksmbd_vfs_lock_parent(idmap, parent, dentry);
if (ret) {
dput(parent);
return ret;
}
ret = inode_permission(idmap, d_inode(parent),
MAY_EXEC | MAY_WRITE);
inode_unlock(d_inode(parent));
dput(parent);
return ret;
}
int ksmbd_vfs_query_maximal_access(struct mnt_idmap *idmap,
struct dentry *dentry, __le32 *daccess)
{
struct dentry *parent;
int ret = 0;
*daccess = cpu_to_le32(FILE_READ_ATTRIBUTES | READ_CONTROL);
if (!inode_permission(idmap, d_inode(dentry), MAY_OPEN | MAY_WRITE))
*daccess |= cpu_to_le32(WRITE_DAC | WRITE_OWNER | SYNCHRONIZE |
FILE_WRITE_DATA | FILE_APPEND_DATA |
FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES |
FILE_DELETE_CHILD);
if (!inode_permission(idmap, d_inode(dentry), MAY_OPEN | MAY_READ))
*daccess |= FILE_READ_DATA_LE | FILE_READ_EA_LE;
if (!inode_permission(idmap, d_inode(dentry), MAY_OPEN | MAY_EXEC))
*daccess |= FILE_EXECUTE_LE;
parent = dget_parent(dentry);
ret = ksmbd_vfs_lock_parent(idmap, parent, dentry);
if (ret) {
dput(parent);
return ret;
}
if (!inode_permission(idmap, d_inode(parent), MAY_EXEC | MAY_WRITE))
*daccess |= FILE_DELETE_LE;
inode_unlock(d_inode(parent));
dput(parent);
return ret;
}
/**
* ksmbd_vfs_create() - vfs helper for smb create file
* @work: work
* @name: file name that is relative to share
* @mode: file create mode
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
{
struct path path;
struct dentry *dentry;
int err;
dentry = ksmbd_vfs_kern_path_create(work, name,
LOOKUP_NO_SYMLINKS, &path);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
if (err != -ENOENT)
pr_err("path create failed for %s, err %d\n",
name, err);
return err;
}
mode |= S_IFREG;
err = vfs_create(mnt_idmap(path.mnt), d_inode(path.dentry),
dentry, mode, true);
if (!err) {
ksmbd_vfs_inherit_owner(work, d_inode(path.dentry),
d_inode(dentry));
} else {
pr_err("File(%s): creation failed (err:%d)\n", name, err);
}
done_path_create(&path, dentry);
return err;
}
/**
* ksmbd_vfs_mkdir() - vfs helper for smb create directory
* @work: work
* @name: directory name that is relative to share
* @mode: directory create mode
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
{
struct mnt_idmap *idmap;
struct path path;
struct dentry *dentry;
int err;
dentry = ksmbd_vfs_kern_path_create(work, name,
LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY,
&path);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
if (err != -EEXIST)
ksmbd_debug(VFS, "path create failed for %s, err %d\n",
name, err);
return err;
}
idmap = mnt_idmap(path.mnt);
mode |= S_IFDIR;
err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode);
if (err) {
goto out;
} else if (d_unhashed(dentry)) {
struct dentry *d;
d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent,
ksmbd: fix lookup on idmapped mounts It's great that the new in-kernel ksmbd server will support idmapped mounts out of the box! However, lookup is currently broken. Lookup helpers such as lookup_one_len() call inode_permission() internally to ensure that the caller is privileged over the inode of the base dentry they are trying to lookup under. So the permission checking here is currently wrong. Linux v5.15 will gain a new lookup helper lookup_one() that does take idmappings into account. I've added it as part of my patch series to make btrfs support idmapped mounts. The new helper is in linux-next as part of David's (Sterba) btrfs for-next branch as commit c972214c133b ("namei: add mapping aware lookup helper"). I've said it before during one of my first reviews: I would very much recommend adding fstests to [1]. It already seems to have very rudimentary cifs support. There is a completely generic idmapped mount testsuite that supports idmapped mounts. [1]: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/ Cc: Colin Ian King <colin.king@canonical.com> Cc: Steve French <stfrench@microsoft.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Namjae Jeon <namjae.jeon@samsung.com> Cc: Hyunchul Lee <hyc.lee@gmail.com> Cc: Sergey Senozhatsky <senozhatsky@chromium.org> Cc: David Sterba <dsterba@suse.com> Cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> Signed-off-by: Namjae Jeon <linkinjeon@kernel.org> Signed-off-by: Steve French <stfrench@microsoft.com>
2021-08-23 17:13:47 +02:00
dentry->d_name.len);
if (IS_ERR(d)) {
err = PTR_ERR(d);
goto out;
}
if (unlikely(d_is_negative(d))) {
dput(d);
err = -ENOENT;
goto out;
}
ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d));
dput(d);
}
out:
done_path_create(&path, dentry);
if (err)
pr_err("mkdir(%s): creation failed (err:%d)\n", name, err);
return err;
}
static ssize_t ksmbd_vfs_getcasexattr(struct mnt_idmap *idmap,
struct dentry *dentry, char *attr_name,
int attr_name_len, char **attr_value)
{
char *name, *xattr_list = NULL;
ssize_t value_len = -ENOENT, xattr_list_len;
xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
if (xattr_list_len <= 0)
goto out;
for (name = xattr_list; name - xattr_list < xattr_list_len;
name += strlen(name) + 1) {
ksmbd_debug(VFS, "%s, len %zd\n", name, strlen(name));
if (strncasecmp(attr_name, name, attr_name_len))
continue;
value_len = ksmbd_vfs_getxattr(idmap,
dentry,
name,
attr_value);
if (value_len < 0)
pr_err("failed to get xattr in file\n");
break;
}
out:
kvfree(xattr_list);
return value_len;
}
static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos,
size_t count)
{
ssize_t v_len;
char *stream_buf = NULL;
ksmbd_debug(VFS, "read stream data pos : %llu, count : %zd\n",
*pos, count);
v_len = ksmbd_vfs_getcasexattr(file_mnt_idmap(fp->filp),
fp->filp->f_path.dentry,
fp->stream.name,
fp->stream.size,
&stream_buf);
if ((int)v_len <= 0)
return (int)v_len;
if (v_len <= *pos) {
count = -EINVAL;
goto free_buf;
}
if (v_len - *pos < count)
count = v_len - *pos;
memcpy(buf, &stream_buf[*pos], count);
free_buf:
kvfree(stream_buf);
return count;
}
/**
* check_lock_range() - vfs helper for smb byte range file locking
* @filp: the file to apply the lock to
* @start: lock start byte offset
* @end: lock end byte offset
* @type: byte range type read/write
*
* Return: 0 on success, otherwise error
*/
static int check_lock_range(struct file *filp, loff_t start, loff_t end,
unsigned char type)
{
struct file_lock *flock;
struct file_lock_context *ctx = locks_inode_context(file_inode(filp));
int error = 0;
if (!ctx || list_empty_careful(&ctx->flc_posix))
return 0;
spin_lock(&ctx->flc_lock);
list_for_each_entry(flock, &ctx->flc_posix, fl_list) {
/* check conflict locks */
if (flock->fl_end >= start && end >= flock->fl_start) {
if (flock->fl_type == F_RDLCK) {
if (type == WRITE) {
pr_err("not allow write by shared lock\n");
error = 1;
goto out;
}
} else if (flock->fl_type == F_WRLCK) {
/* check owner in lock */
if (flock->fl_file != filp) {
error = 1;
pr_err("not allow rw access by exclusive lock from other opens\n");
goto out;
}
}
}
}
out:
spin_unlock(&ctx->flc_lock);
return error;
}
/**
* ksmbd_vfs_read() - vfs helper for smb file read
* @work: smb work
* @fid: file id of open file
* @count: read byte count
* @pos: file pos
*
* Return: number of read bytes on success, otherwise error
*/
int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count,
loff_t *pos)
{
struct file *filp = fp->filp;
ssize_t nbytes = 0;
char *rbuf = work->aux_payload_buf;
struct inode *inode = file_inode(filp);
if (S_ISDIR(inode->i_mode))
return -EISDIR;
if (unlikely(count == 0))
return 0;
if (work->conn->connection_type) {
if (!(fp->daccess & (FILE_READ_DATA_LE | FILE_EXECUTE_LE))) {
pr_err("no right to read(%pD)\n", fp->filp);
return -EACCES;
}
}
if (ksmbd_stream_fd(fp))
return ksmbd_vfs_stream_read(fp, rbuf, pos, count);
if (!work->tcon->posix_extensions) {
int ret;
ret = check_lock_range(filp, *pos, *pos + count - 1, READ);
if (ret) {
pr_err("unable to read due to lock\n");
return -EAGAIN;
}
}
nbytes = kernel_read(filp, rbuf, count, pos);
if (nbytes < 0) {
pr_err("smb read failed, err = %zd\n", nbytes);
return nbytes;
}
filp->f_pos = *pos;
return nbytes;
}
static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
size_t count)
{
char *stream_buf = NULL, *wbuf;
struct mnt_idmap *idmap = file_mnt_idmap(fp->filp);
size_t size, v_len;
int err = 0;
ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n",
*pos, count);
size = *pos + count;
if (size > XATTR_SIZE_MAX) {
size = XATTR_SIZE_MAX;
count = (*pos + count) - XATTR_SIZE_MAX;
}
v_len = ksmbd_vfs_getcasexattr(idmap,
fp->filp->f_path.dentry,
fp->stream.name,
fp->stream.size,
&stream_buf);
if ((int)v_len < 0) {
pr_err("not found stream in xattr : %zd\n", v_len);
err = (int)v_len;
goto out;
}
if (v_len < size) {
wbuf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
if (!wbuf) {
err = -ENOMEM;
goto out;
}
if (v_len > 0)
memcpy(wbuf, stream_buf, v_len);
kvfree(stream_buf);
stream_buf = wbuf;
}
memcpy(&stream_buf[*pos], buf, count);
err = ksmbd_vfs_setxattr(idmap,
fp->filp->f_path.dentry,
fp->stream.name,
(void *)stream_buf,
size,
0);
if (err < 0)
goto out;
fp->filp->f_pos = *pos;
err = 0;
out:
kvfree(stream_buf);
return err;
}
/**
* ksmbd_vfs_write() - vfs helper for smb file write
* @work: work
* @fid: file id of open file
* @buf: buf containing data for writing
* @count: read byte count
* @pos: file pos
* @sync: fsync after write
* @written: number of bytes written
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp,
char *buf, size_t count, loff_t *pos, bool sync,
ssize_t *written)
{
struct file *filp;
loff_t offset = *pos;
int err = 0;
if (work->conn->connection_type) {
if (!(fp->daccess & FILE_WRITE_DATA_LE)) {
pr_err("no right to write(%pD)\n", fp->filp);
err = -EACCES;
goto out;
}
}
filp = fp->filp;
if (ksmbd_stream_fd(fp)) {
err = ksmbd_vfs_stream_write(fp, buf, pos, count);
if (!err)
*written = count;
goto out;
}
if (!work->tcon->posix_extensions) {
err = check_lock_range(filp, *pos, *pos + count - 1, WRITE);
if (err) {
pr_err("unable to write due to lock\n");
err = -EAGAIN;
goto out;
}
}
/* Do we need to break any of a levelII oplock? */
smb_break_all_levII_oplock(work, fp, 1);
err = kernel_write(filp, buf, count, pos);
if (err < 0) {
ksmbd_debug(VFS, "smb write failed, err = %d\n", err);
goto out;
}
filp->f_pos = *pos;
*written = err;
err = 0;
if (sync) {
err = vfs_fsync_range(filp, offset, offset + *written, 0);
if (err < 0)
pr_err("fsync failed for filename = %pD, err = %d\n",
fp->filp, err);
}
out:
return err;
}
/**
* ksmbd_vfs_getattr() - vfs helper for smb getattr
* @work: work
* @fid: file id of open file
* @attrs: inode attributes
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_getattr(const struct path *path, struct kstat *stat)
{
int err;
err = vfs_getattr(path, stat, STATX_BTIME, AT_STATX_SYNC_AS_STAT);
if (err)
pr_err("getattr failed, err %d\n", err);
return err;
}
/**
* ksmbd_vfs_fsync() - vfs helper for smb fsync
* @work: work
* @fid: file id of open file
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id)
{
struct ksmbd_file *fp;
int err;
fp = ksmbd_lookup_fd_slow(work, fid, p_id);
if (!fp) {
pr_err("failed to get filp for fid %llu\n", fid);
return -ENOENT;
}
err = vfs_fsync(fp->filp, 0);
if (err < 0)
pr_err("smb fsync failed, err = %d\n", err);
ksmbd_fd_put(work, fp);
return err;
}
/**
* ksmbd_vfs_remove_file() - vfs helper for smb rmdir or unlink
* @name: directory or file name that is relative to share
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
{
struct mnt_idmap *idmap;
struct path path;
struct dentry *parent;
int err;
if (ksmbd_override_fsids(work))
return -ENOMEM;
err = ksmbd_vfs_kern_path(work, name, LOOKUP_NO_SYMLINKS, &path, false);
if (err) {
ksmbd_debug(VFS, "can't get %s, err %d\n", name, err);
ksmbd_revert_fsids(work);
return err;
}
idmap = mnt_idmap(path.mnt);
parent = dget_parent(path.dentry);
err = ksmbd_vfs_lock_parent(idmap, parent, path.dentry);
if (err) {
dput(parent);
path_put(&path);
ksmbd_revert_fsids(work);
return err;
}
if (!d_inode(path.dentry)->i_nlink) {
err = -ENOENT;
goto out_err;
}
if (S_ISDIR(d_inode(path.dentry)->i_mode)) {
err = vfs_rmdir(idmap, d_inode(parent), path.dentry);
if (err && err != -ENOTEMPTY)
ksmbd_debug(VFS, "%s: rmdir failed, err %d\n", name,
err);
} else {
err = vfs_unlink(idmap, d_inode(parent), path.dentry, NULL);
if (err)
ksmbd_debug(VFS, "%s: unlink failed, err %d\n", name,
err);
}
out_err:
inode_unlock(d_inode(parent));
dput(parent);
path_put(&path);
ksmbd_revert_fsids(work);
return err;
}
/**
* ksmbd_vfs_link() - vfs helper for creating smb hardlink
* @oldname: source file name
* @newname: hardlink name that is relative to share
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname,
const char *newname)
{
struct path oldpath, newpath;
struct dentry *dentry;
int err;
if (ksmbd_override_fsids(work))
return -ENOMEM;
err = kern_path(oldname, LOOKUP_NO_SYMLINKS, &oldpath);
if (err) {
pr_err("cannot get linux path for %s, err = %d\n",
oldname, err);
goto out1;
}
dentry = ksmbd_vfs_kern_path_create(work, newname,
LOOKUP_NO_SYMLINKS | LOOKUP_REVAL,
&newpath);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
pr_err("path create err for %s, err %d\n", newname, err);
goto out2;
}
err = -EXDEV;
if (oldpath.mnt != newpath.mnt) {
pr_err("vfs_link failed err %d\n", err);
goto out3;
}
err = vfs_link(oldpath.dentry, mnt_idmap(newpath.mnt),
d_inode(newpath.dentry),
dentry, NULL);
if (err)
ksmbd_debug(VFS, "vfs_link failed err %d\n", err);
out3:
done_path_create(&newpath, dentry);
out2:
path_put(&oldpath);
out1:
ksmbd_revert_fsids(work);
return err;
}
static int ksmbd_validate_entry_in_use(struct dentry *src_dent)
{
struct dentry *dst_dent;
spin_lock(&src_dent->d_lock);
list_for_each_entry(dst_dent, &src_dent->d_subdirs, d_child) {
struct ksmbd_file *child_fp;
if (d_really_is_negative(dst_dent))
continue;
child_fp = ksmbd_lookup_fd_inode(d_inode(dst_dent));
if (child_fp) {
spin_unlock(&src_dent->d_lock);
ksmbd_debug(VFS, "Forbid rename, sub file/dir is in use\n");
return -EACCES;
}
}
spin_unlock(&src_dent->d_lock);
return 0;
}
static int __ksmbd_vfs_rename(struct ksmbd_work *work,
struct mnt_idmap *src_idmap,
struct dentry *src_dent_parent,
struct dentry *src_dent,
struct mnt_idmap *dst_idmap,
struct dentry *dst_dent_parent,
struct dentry *trap_dent,
char *dst_name)
{
struct dentry *dst_dent;
int err;
if (!work->tcon->posix_extensions) {
err = ksmbd_validate_entry_in_use(src_dent);
if (err)
return err;
}
if (d_really_is_negative(src_dent_parent))
return -ENOENT;
if (d_really_is_negative(dst_dent_parent))
return -ENOENT;
if (d_really_is_negative(src_dent))
return -ENOENT;
if (src_dent == trap_dent)
return -EINVAL;
if (ksmbd_override_fsids(work))
return -ENOMEM;
dst_dent = lookup_one(dst_idmap, dst_name,
dst_dent_parent, strlen(dst_name));
err = PTR_ERR(dst_dent);
if (IS_ERR(dst_dent)) {
pr_err("lookup failed %s [%d]\n", dst_name, err);
goto out;
}
err = -ENOTEMPTY;
if (dst_dent != trap_dent && !d_really_is_positive(dst_dent)) {
struct renamedata rd = {
.old_mnt_idmap = src_idmap,
.old_dir = d_inode(src_dent_parent),
.old_dentry = src_dent,
.new_mnt_idmap = dst_idmap,
.new_dir = d_inode(dst_dent_parent),
.new_dentry = dst_dent,
};
err = vfs_rename(&rd);
}
if (err)
pr_err("vfs_rename failed err %d\n", err);
if (dst_dent)
dput(dst_dent);
out:
ksmbd_revert_fsids(work);
return err;
}
int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
char *newname)
{
struct mnt_idmap *idmap;
struct path dst_path;
struct dentry *src_dent_parent, *dst_dent_parent;
struct dentry *src_dent, *trap_dent, *src_child;
char *dst_name;
int err;
dst_name = extract_last_component(newname);
if (!dst_name) {
dst_name = newname;
newname = "";
}
src_dent_parent = dget_parent(fp->filp->f_path.dentry);
src_dent = fp->filp->f_path.dentry;
err = ksmbd_vfs_kern_path(work, newname,
LOOKUP_NO_SYMLINKS | LOOKUP_DIRECTORY,
&dst_path, false);
if (err) {
ksmbd_debug(VFS, "Cannot get path for %s [%d]\n", newname, err);
goto out;
}
dst_dent_parent = dst_path.dentry;
trap_dent = lock_rename(src_dent_parent, dst_dent_parent);
dget(src_dent);
dget(dst_dent_parent);
idmap = file_mnt_idmap(fp->filp);
src_child = lookup_one(idmap, src_dent->d_name.name, src_dent_parent,
ksmbd: fix lookup on idmapped mounts It's great that the new in-kernel ksmbd server will support idmapped mounts out of the box! However, lookup is currently broken. Lookup helpers such as lookup_one_len() call inode_permission() internally to ensure that the caller is privileged over the inode of the base dentry they are trying to lookup under. So the permission checking here is currently wrong. Linux v5.15 will gain a new lookup helper lookup_one() that does take idmappings into account. I've added it as part of my patch series to make btrfs support idmapped mounts. The new helper is in linux-next as part of David's (Sterba) btrfs for-next branch as commit c972214c133b ("namei: add mapping aware lookup helper"). I've said it before during one of my first reviews: I would very much recommend adding fstests to [1]. It already seems to have very rudimentary cifs support. There is a completely generic idmapped mount testsuite that supports idmapped mounts. [1]: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/ Cc: Colin Ian King <colin.king@canonical.com> Cc: Steve French <stfrench@microsoft.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Namjae Jeon <namjae.jeon@samsung.com> Cc: Hyunchul Lee <hyc.lee@gmail.com> Cc: Sergey Senozhatsky <senozhatsky@chromium.org> Cc: David Sterba <dsterba@suse.com> Cc: linux-cifs@vger.kernel.org Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> Signed-off-by: Namjae Jeon <linkinjeon@kernel.org> Signed-off-by: Steve French <stfrench@microsoft.com>
2021-08-23 17:13:47 +02:00
src_dent->d_name.len);
if (IS_ERR(src_child)) {
err = PTR_ERR(src_child);
goto out_lock;
}
if (src_child != src_dent) {
err = -ESTALE;
dput(src_child);
goto out_lock;
}
dput(src_child);
err = __ksmbd_vfs_rename(work,
idmap,
src_dent_parent,
src_dent,
mnt_idmap(dst_path.mnt),
dst_dent_parent,
trap_dent,
dst_name);
out_lock:
dput(src_dent);
dput(dst_dent_parent);
unlock_rename(src_dent_parent, dst_dent_parent);
path_put(&dst_path);
out:
dput(src_dent_parent);
return err;
}
/**
* ksmbd_vfs_truncate() - vfs helper for smb file truncate
* @work: work
* @fid: file id of old file
* @size: truncate to given size
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_truncate(struct ksmbd_work *work,
struct ksmbd_file *fp, loff_t size)
{
int err = 0;
struct file *filp;
filp = fp->filp;
/* Do we need to break any of a levelII oplock? */
smb_break_all_levII_oplock(work, fp, 1);
if (!work->tcon->posix_extensions) {
struct inode *inode = file_inode(filp);
if (size < inode->i_size) {
err = check_lock_range(filp, size,
inode->i_size - 1, WRITE);
} else {
err = check_lock_range(filp, inode->i_size,
size - 1, WRITE);
}
if (err) {
pr_err("failed due to lock\n");
return -EAGAIN;
}
}
err = vfs_truncate(&filp->f_path, size);
if (err)
pr_err("truncate failed, err %d\n", err);
return err;
}
/**
* ksmbd_vfs_listxattr() - vfs helper for smb list extended attributes
* @dentry: dentry of file for listing xattrs
* @list: destination buffer
* @size: destination buffer length
*
* Return: xattr list length on success, otherwise error
*/
ssize_t ksmbd_vfs_listxattr(struct dentry *dentry, char **list)
{
ssize_t size;
char *vlist = NULL;
size = vfs_listxattr(dentry, NULL, 0);
if (size <= 0)
return size;
vlist = kvmalloc(size, GFP_KERNEL | __GFP_ZERO);
if (!vlist)
return -ENOMEM;
*list = vlist;
size = vfs_listxattr(dentry, vlist, size);
if (size < 0) {
ksmbd_debug(VFS, "listxattr failed\n");
kvfree(vlist);
*list = NULL;
}
return size;
}
static ssize_t ksmbd_vfs_xattr_len(struct mnt_idmap *idmap,
struct dentry *dentry, char *xattr_name)
{
return vfs_getxattr(idmap, dentry, xattr_name, NULL, 0);
}
/**
* ksmbd_vfs_getxattr() - vfs helper for smb get extended attributes value
* @idmap: idmap
* @dentry: dentry of file for getting xattrs
* @xattr_name: name of xattr name to query
* @xattr_buf: destination buffer xattr value
*
* Return: read xattr value length on success, otherwise error
*/
ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap,
struct dentry *dentry,
char *xattr_name, char **xattr_buf)
{
ssize_t xattr_len;
char *buf;
*xattr_buf = NULL;
xattr_len = ksmbd_vfs_xattr_len(idmap, dentry, xattr_name);
if (xattr_len < 0)
return xattr_len;
buf = kmalloc(xattr_len + 1, GFP_KERNEL);
if (!buf)
return -ENOMEM;
xattr_len = vfs_getxattr(idmap, dentry, xattr_name,
(void *)buf, xattr_len);
if (xattr_len > 0)
*xattr_buf = buf;
else
kfree(buf);
return xattr_len;
}
/**
* ksmbd_vfs_setxattr() - vfs helper for smb set extended attributes value
* @idmap: idmap of the relevant mount
* @dentry: dentry to set XATTR at
* @attr_name: xattr name for setxattr
* @attr_value: xattr value to set
* @attr_size: size of xattr value
* @flags: destination buffer length
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
struct dentry *dentry, const char *attr_name,
acl: move idmapped mount fixup into vfs_{g,s}etxattr() This cycle we added support for mounting overlayfs on top of idmapped mounts. Recently I've started looking into potential corner cases when trying to add additional tests and I noticed that reporting for POSIX ACLs is currently wrong when using idmapped layers with overlayfs mounted on top of it. I'm going to give a rather detailed explanation to both the origin of the problem and the solution. Let's assume the user creates the following directory layout and they have a rootfs /var/lib/lxc/c1/rootfs. The files in this rootfs are owned as you would expect files on your host system to be owned. For example, ~/.bashrc for your regular user would be owned by 1000:1000 and /root/.bashrc would be owned by 0:0. IOW, this is just regular boring filesystem tree on an ext4 or xfs filesystem. The user chooses to set POSIX ACLs using the setfacl binary granting the user with uid 4 read, write, and execute permissions for their .bashrc file: setfacl -m u:4:rwx /var/lib/lxc/c2/rootfs/home/ubuntu/.bashrc Now they to expose the whole rootfs to a container using an idmapped mount. So they first create: mkdir -pv /vol/contpool/{ctrover,merge,lowermap,overmap} mkdir -pv /vol/contpool/ctrover/{over,work} chown 10000000:10000000 /vol/contpool/ctrover/{over,work} The user now creates an idmapped mount for the rootfs: mount-idmapped/mount-idmapped --map-mount=b:0:10000000:65536 \ /var/lib/lxc/c2/rootfs \ /vol/contpool/lowermap This for example makes it so that /var/lib/lxc/c2/rootfs/home/ubuntu/.bashrc which is owned by uid and gid 1000 as being owned by uid and gid 10001000 at /vol/contpool/lowermap/home/ubuntu/.bashrc. Assume the user wants to expose these idmapped mounts through an overlayfs mount to a container. mount -t overlay overlay \ -o lowerdir=/vol/contpool/lowermap, \ upperdir=/vol/contpool/overmap/over, \ workdir=/vol/contpool/overmap/work \ /vol/contpool/merge The user can do this in two ways: (1) Mount overlayfs in the initial user namespace and expose it to the container. (2) Mount overlayfs on top of the idmapped mounts inside of the container's user namespace. Let's assume the user chooses the (1) option and mounts overlayfs on the host and then changes into a container which uses the idmapping 0:10000000:65536 which is the same used for the two idmapped mounts. Now the user tries to retrieve the POSIX ACLs using the getfacl command getfacl -n /vol/contpool/lowermap/home/ubuntu/.bashrc and to their surprise they see: # file: vol/contpool/merge/home/ubuntu/.bashrc # owner: 1000 # group: 1000 user::rw- user:4294967295:rwx group::r-- mask::rwx other::r-- indicating the the uid wasn't correctly translated according to the idmapped mount. The problem is how we currently translate POSIX ACLs. Let's inspect the callchain in this example: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:0:4k /* initial idmapping */ sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() |> vfs_getxattr() | -> __vfs_getxattr() | -> handler->get == ovl_posix_acl_xattr_get() | -> ovl_xattr_get() | -> vfs_getxattr() | -> __vfs_getxattr() | -> handler->get() /* lower filesystem callback */ |> posix_acl_fix_xattr_to_user() { 4 = make_kuid(&init_user_ns, 4); 4 = mapped_kuid_fs(&init_user_ns /* no idmapped mount */, 4); /* FAILURE */ -1 = from_kuid(0:10000000:65536 /* caller's idmapping */, 4); } If the user chooses to use option (2) and mounts overlayfs on top of idmapped mounts inside the container things don't look that much better: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:10000000:65536 sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() |> vfs_getxattr() | -> __vfs_getxattr() | -> handler->get == ovl_posix_acl_xattr_get() | -> ovl_xattr_get() | -> vfs_getxattr() | -> __vfs_getxattr() | -> handler->get() /* lower filesystem callback */ |> posix_acl_fix_xattr_to_user() { 4 = make_kuid(&init_user_ns, 4); 4 = mapped_kuid_fs(&init_user_ns, 4); /* FAILURE */ -1 = from_kuid(0:10000000:65536 /* caller's idmapping */, 4); } As is easily seen the problem arises because the idmapping of the lower mount isn't taken into account as all of this happens in do_gexattr(). But do_getxattr() is always called on an overlayfs mount and inode and thus cannot possible take the idmapping of the lower layers into account. This problem is similar for fscaps but there the translation happens as part of vfs_getxattr() already. Let's walk through an fscaps overlayfs callchain: setcap 'cap_net_raw+ep' /var/lib/lxc/c2/rootfs/home/ubuntu/.bashrc The expected outcome here is that we'll receive the cap_net_raw capability as we are able to map the uid associated with the fscap to 0 within our container. IOW, we want to see 0 as the result of the idmapping translations. If the user chooses option (1) we get the following callchain for fscaps: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:0:4k /* initial idmapping */ sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() -> vfs_getxattr() -> xattr_getsecurity() -> security_inode_getsecurity() ________________________________ -> cap_inode_getsecurity() | | { V | 10000000 = make_kuid(0:0:4k /* overlayfs idmapping */, 10000000); | 10000000 = mapped_kuid_fs(0:0:4k /* no idmapped mount */, 10000000); | /* Expected result is 0 and thus that we own the fscap. */ | 0 = from_kuid(0:10000000:65536 /* caller's idmapping */, 10000000); | } | -> vfs_getxattr_alloc() | -> handler->get == ovl_other_xattr_get() | -> vfs_getxattr() | -> xattr_getsecurity() | -> security_inode_getsecurity() | -> cap_inode_getsecurity() | { | 0 = make_kuid(0:0:4k /* lower s_user_ns */, 0); | 10000000 = mapped_kuid_fs(0:10000000:65536 /* idmapped mount */, 0); | 10000000 = from_kuid(0:0:4k /* overlayfs idmapping */, 10000000); | |____________________________________________________________________| } -> vfs_getxattr_alloc() -> handler->get == /* lower filesystem callback */ And if the user chooses option (2) we get: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:10000000:65536 sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() -> vfs_getxattr() -> xattr_getsecurity() -> security_inode_getsecurity() _______________________________ -> cap_inode_getsecurity() | | { V | 10000000 = make_kuid(0:10000000:65536 /* overlayfs idmapping */, 0); | 10000000 = mapped_kuid_fs(0:0:4k /* no idmapped mount */, 10000000); | /* Expected result is 0 and thus that we own the fscap. */ | 0 = from_kuid(0:10000000:65536 /* caller's idmapping */, 10000000); | } | -> vfs_getxattr_alloc() | -> handler->get == ovl_other_xattr_get() | |-> vfs_getxattr() | -> xattr_getsecurity() | -> security_inode_getsecurity() | -> cap_inode_getsecurity() | { | 0 = make_kuid(0:0:4k /* lower s_user_ns */, 0); | 10000000 = mapped_kuid_fs(0:10000000:65536 /* idmapped mount */, 0); | 0 = from_kuid(0:10000000:65536 /* overlayfs idmapping */, 10000000); | |____________________________________________________________________| } -> vfs_getxattr_alloc() -> handler->get == /* lower filesystem callback */ We can see how the translation happens correctly in those cases as the conversion happens within the vfs_getxattr() helper. For POSIX ACLs we need to do something similar. However, in contrast to fscaps we cannot apply the fix directly to the kernel internal posix acl data structure as this would alter the cached values and would also require a rework of how we currently deal with POSIX ACLs in general which almost never take the filesystem idmapping into account (the noteable exception being FUSE but even there the implementation is special) and instead retrieve the raw values based on the initial idmapping. The correct values are then generated right before returning to userspace. The fix for this is to move taking the mount's idmapping into account directly in vfs_getxattr() instead of having it be part of posix_acl_fix_xattr_to_user(). To this end we split out two small and unexported helpers posix_acl_getxattr_idmapped_mnt() and posix_acl_setxattr_idmapped_mnt(). The former to be called in vfs_getxattr() and the latter to be called in vfs_setxattr(). Let's go back to the original example. Assume the user chose option (1) and mounted overlayfs on top of idmapped mounts on the host: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:0:4k /* initial idmapping */ sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() |> vfs_getxattr() | |> __vfs_getxattr() | | -> handler->get == ovl_posix_acl_xattr_get() | | -> ovl_xattr_get() | | -> vfs_getxattr() | | |> __vfs_getxattr() | | | -> handler->get() /* lower filesystem callback */ | | |> posix_acl_getxattr_idmapped_mnt() | | { | | 4 = make_kuid(&init_user_ns, 4); | | 10000004 = mapped_kuid_fs(0:10000000:65536 /* lower idmapped mount */, 4); | | 10000004 = from_kuid(&init_user_ns, 10000004); | | |_______________________ | | } | | | | | |> posix_acl_getxattr_idmapped_mnt() | | { | | V | 10000004 = make_kuid(&init_user_ns, 10000004); | 10000004 = mapped_kuid_fs(&init_user_ns /* no idmapped mount */, 10000004); | 10000004 = from_kuid(&init_user_ns, 10000004); | } |_________________________________________________ | | | | |> posix_acl_fix_xattr_to_user() | { V 10000004 = make_kuid(0:0:4k /* init_user_ns */, 10000004); /* SUCCESS */ 4 = from_kuid(0:10000000:65536 /* caller's idmapping */, 10000004); } And similarly if the user chooses option (1) and mounted overayfs on top of idmapped mounts inside the container: idmapped mount /vol/contpool/merge: 0:10000000:65536 caller's idmapping: 0:10000000:65536 overlayfs idmapping (ofs->creator_cred): 0:10000000:65536 sys_getxattr() -> path_getxattr() -> getxattr() -> do_getxattr() |> vfs_getxattr() | |> __vfs_getxattr() | | -> handler->get == ovl_posix_acl_xattr_get() | | -> ovl_xattr_get() | | -> vfs_getxattr() | | |> __vfs_getxattr() | | | -> handler->get() /* lower filesystem callback */ | | |> posix_acl_getxattr_idmapped_mnt() | | { | | 4 = make_kuid(&init_user_ns, 4); | | 10000004 = mapped_kuid_fs(0:10000000:65536 /* lower idmapped mount */, 4); | | 10000004 = from_kuid(&init_user_ns, 10000004); | | |_______________________ | | } | | | | | |> posix_acl_getxattr_idmapped_mnt() | | { V | 10000004 = make_kuid(&init_user_ns, 10000004); | 10000004 = mapped_kuid_fs(&init_user_ns /* no idmapped mount */, 10000004); | 10000004 = from_kuid(0(&init_user_ns, 10000004); | |_________________________________________________ | } | | | |> posix_acl_fix_xattr_to_user() | { V 10000004 = make_kuid(0:0:4k /* init_user_ns */, 10000004); /* SUCCESS */ 4 = from_kuid(0:10000000:65536 /* caller's idmappings */, 10000004); } The last remaining problem we need to fix here is ovl_get_acl(). During ovl_permission() overlayfs will call: ovl_permission() -> generic_permission() -> acl_permission_check() -> check_acl() -> get_acl() -> inode->i_op->get_acl() == ovl_get_acl() > get_acl() /* on the underlying filesystem) ->inode->i_op->get_acl() == /*lower filesystem callback */ -> posix_acl_permission() passing through the get_acl request to the underlying filesystem. This will retrieve the acls stored in the lower filesystem without taking the idmapping of the underlying mount into account as this would mean altering the cached values for the lower filesystem. So we block using ACLs for now until we decided on a nice way to fix this. Note this limitation both in the documentation and in the code. The most straightforward solution would be to have ovl_get_acl() simply duplicate the ACLs, update the values according to the idmapped mount and return it to acl_permission_check() so it can be used in posix_acl_permission() forgetting them afterwards. This is a bit heavy handed but fairly straightforward otherwise. Link: https://github.com/brauner/mount-idmapped/issues/9 Link: https://lore.kernel.org/r/20220708090134.385160-2-brauner@kernel.org Cc: Seth Forshee <sforshee@digitalocean.com> Cc: Amir Goldstein <amir73il@gmail.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Aleksa Sarai <cyphar@cyphar.com> Cc: Miklos Szeredi <mszeredi@redhat.com> Cc: linux-unionfs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Seth Forshee <sforshee@digitalocean.com> Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
2022-07-06 18:30:59 +02:00
void *attr_value, size_t attr_size, int flags)
{
int err;
err = vfs_setxattr(idmap,
dentry,
attr_name,
attr_value,
attr_size,
flags);
if (err)
ksmbd_debug(VFS, "setxattr failed, err %d\n", err);
return err;
}
/**
* ksmbd_vfs_set_fadvise() - convert smb IO caching options to linux options
* @filp: file pointer for IO
* @options: smb IO options
*/
void ksmbd_vfs_set_fadvise(struct file *filp, __le32 option)
{
struct address_space *mapping;
mapping = filp->f_mapping;
if (!option || !mapping)
return;
if (option & FILE_WRITE_THROUGH_LE) {
filp->f_flags |= O_SYNC;
} else if (option & FILE_SEQUENTIAL_ONLY_LE) {
filp->f_ra.ra_pages = inode_to_bdi(mapping->host)->ra_pages * 2;
spin_lock(&filp->f_lock);
filp->f_mode &= ~FMODE_RANDOM;
spin_unlock(&filp->f_lock);
} else if (option & FILE_RANDOM_ACCESS_LE) {
spin_lock(&filp->f_lock);
filp->f_mode |= FMODE_RANDOM;
spin_unlock(&filp->f_lock);
}
}
int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp,
loff_t off, loff_t len)
{
smb_break_all_levII_oplock(work, fp, 1);
if (fp->f_ci->m_fattr & FILE_ATTRIBUTE_SPARSE_FILE_LE)
return vfs_fallocate(fp->filp,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
off, len);
return vfs_fallocate(fp->filp,
FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE,
off, len);
}
int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
struct file_allocated_range_buffer *ranges,
unsigned int in_count, unsigned int *out_count)
{
struct file *f = fp->filp;
struct inode *inode = file_inode(fp->filp);
loff_t maxbytes = (u64)inode->i_sb->s_maxbytes, end;
loff_t extent_start, extent_end;
int ret = 0;
if (start > maxbytes)
return -EFBIG;
if (!in_count)
return 0;
/*
* Shrink request scope to what the fs can actually handle.
*/
if (length > maxbytes || (maxbytes - length) < start)
length = maxbytes - start;
if (start + length > inode->i_size)
length = inode->i_size - start;
*out_count = 0;
end = start + length;
while (start < end && *out_count < in_count) {
extent_start = vfs_llseek(f, start, SEEK_DATA);
if (extent_start < 0) {
if (extent_start != -ENXIO)
ret = (int)extent_start;
break;
}
if (extent_start >= end)
break;
extent_end = vfs_llseek(f, extent_start, SEEK_HOLE);
if (extent_end < 0) {
if (extent_end != -ENXIO)
ret = (int)extent_end;
break;
} else if (extent_start >= extent_end) {
break;
}
ranges[*out_count].file_offset = cpu_to_le64(extent_start);
ranges[(*out_count)++].length =
cpu_to_le64(min(extent_end, end) - extent_start);
start = extent_end;
}
return ret;
}
int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
struct dentry *dentry, char *attr_name)
{
return vfs_removexattr(idmap, dentry, attr_name);
}
int ksmbd_vfs_unlink(struct mnt_idmap *idmap,
struct dentry *dir, struct dentry *dentry)
{
int err = 0;
err = ksmbd_vfs_lock_parent(idmap, dir, dentry);
if (err)
return err;
dget(dentry);
if (S_ISDIR(d_inode(dentry)->i_mode))
err = vfs_rmdir(idmap, d_inode(dir), dentry);
else
err = vfs_unlink(idmap, d_inode(dir), dentry, NULL);
dput(dentry);
inode_unlock(d_inode(dir));
if (err)
ksmbd_debug(VFS, "failed to delete, err %d\n", err);
return err;
}
static bool __dir_empty(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct ksmbd_readdir_data *buf;
buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
buf->dirent_count++;
return buf->dirent_count <= 2;
}
/**
* ksmbd_vfs_empty_dir() - check for empty directory
* @fp: ksmbd file pointer
*
* Return: true if directory empty, otherwise false
*/
int ksmbd_vfs_empty_dir(struct ksmbd_file *fp)
{
int err;
struct ksmbd_readdir_data readdir_data;
memset(&readdir_data, 0, sizeof(struct ksmbd_readdir_data));
set_ctx_actor(&readdir_data.ctx, __dir_empty);
readdir_data.dirent_count = 0;
err = iterate_dir(fp->filp, &readdir_data.ctx);
if (readdir_data.dirent_count > 2)
err = -ENOTEMPTY;
else
err = 0;
return err;
}
static bool __caseless_lookup(struct dir_context *ctx, const char *name,
int namlen, loff_t offset, u64 ino,
unsigned int d_type)
{
struct ksmbd_readdir_data *buf;
int cmp = -EINVAL;
buf = container_of(ctx, struct ksmbd_readdir_data, ctx);
if (buf->used != namlen)
return true;
if (IS_ENABLED(CONFIG_UNICODE) && buf->um) {
const struct qstr q_buf = {.name = buf->private,
.len = buf->used};
const struct qstr q_name = {.name = name,
.len = namlen};
cmp = utf8_strncasecmp(buf->um, &q_buf, &q_name);
}
if (cmp < 0)
cmp = strncasecmp((char *)buf->private, name, namlen);
if (!cmp) {
memcpy((char *)buf->private, name, namlen);
buf->dirent_count = 1;
return false;
}
return true;
}
/**
* ksmbd_vfs_lookup_in_dir() - lookup a file in a directory
* @dir: path info
* @name: filename to lookup
* @namelen: filename length
*
* Return: 0 on success, otherwise error
*/
static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
size_t namelen, struct unicode_map *um)
{
int ret;
struct file *dfilp;
int flags = O_RDONLY | O_LARGEFILE;
struct ksmbd_readdir_data readdir_data = {
.ctx.actor = __caseless_lookup,
.private = name,
.used = namelen,
.dirent_count = 0,
.um = um,
};
dfilp = dentry_open(dir, flags, current_cred());
if (IS_ERR(dfilp))
return PTR_ERR(dfilp);
ret = iterate_dir(dfilp, &readdir_data.ctx);
if (readdir_data.dirent_count > 0)
ret = 0;
fput(dfilp);
return ret;
}
/**
* ksmbd_vfs_kern_path() - lookup a file and get path info
* @name: file path that is relative to share
* @flags: lookup flags
* @path: if lookup succeed, return path info
* @caseless: caseless filename lookup
*
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_kern_path(struct ksmbd_work *work, char *name,
unsigned int flags, struct path *path, bool caseless)
{
struct ksmbd_share_config *share_conf = work->tcon->share_conf;
int err;
flags |= LOOKUP_BENEATH;
err = vfs_path_lookup(share_conf->vfs_path.dentry,
share_conf->vfs_path.mnt,
name,
flags,
path);
if (!err)
return 0;
if (caseless) {
char *filepath;
struct path parent;
size_t path_len, remain_len;
filepath = kstrdup(name, GFP_KERNEL);
if (!filepath)
return -ENOMEM;
path_len = strlen(filepath);
remain_len = path_len;
parent = share_conf->vfs_path;
path_get(&parent);
while (d_can_lookup(parent.dentry)) {
char *filename = filepath + path_len - remain_len;
char *next = strchrnul(filename, '/');
size_t filename_len = next - filename;
bool is_last = !next[0];
if (filename_len == 0)
break;
err = ksmbd_vfs_lookup_in_dir(&parent, filename,
filename_len,
work->conn->um);
path_put(&parent);
if (err)
goto out;
next[0] = '\0';
err = vfs_path_lookup(share_conf->vfs_path.dentry,
share_conf->vfs_path.mnt,
filepath,
flags,
&parent);
if (err)
goto out;
else if (is_last) {
*path = parent;
goto out;
}
next[0] = '/';
remain_len -= filename_len + 1;
}
path_put(&parent);
err = -EINVAL;
out:
kfree(filepath);
}
return err;
}
struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
const char *name,
unsigned int flags,
struct path *path)
{
char *abs_name;
struct dentry *dent;
abs_name = convert_to_unix_name(work->tcon->share_conf, name);
if (!abs_name)
return ERR_PTR(-ENOMEM);
dent = kern_path_create(AT_FDCWD, abs_name, path, flags);
kfree(abs_name);
return dent;
}
int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
struct dentry *dentry)
{
char *name, *xattr_list = NULL;
ssize_t xattr_list_len;
int err = 0;
xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
if (xattr_list_len < 0) {
goto out;
} else if (!xattr_list_len) {
ksmbd_debug(SMB, "empty xattr in the file\n");
goto out;
}
for (name = xattr_list; name - xattr_list < xattr_list_len;
name += strlen(name) + 1) {
ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) ||
!strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) {
err = vfs_remove_acl(idmap, dentry, name);
if (err)
ksmbd_debug(SMB,
"remove acl xattr failed : %s\n", name);
}
}
out:
kvfree(xattr_list);
return err;
}
int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap,
struct dentry *dentry)
{
char *name, *xattr_list = NULL;
ssize_t xattr_list_len;
int err = 0;
xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
if (xattr_list_len < 0) {
goto out;
} else if (!xattr_list_len) {
ksmbd_debug(SMB, "empty xattr in the file\n");
goto out;
}
for (name = xattr_list; name - xattr_list < xattr_list_len;
name += strlen(name) + 1) {
ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) {
err = ksmbd_vfs_remove_xattr(idmap, dentry, name);
if (err)
ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
}
}
out:
kvfree(xattr_list);
return err;
}
static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct mnt_idmap *idmap,
struct inode *inode,
int acl_type)
{
struct xattr_smb_acl *smb_acl = NULL;
struct posix_acl *posix_acls;
struct posix_acl_entry *pa_entry;
struct xattr_acl_entry *xa_entry;
int i;
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
return NULL;
fs: rename current get acl method The current way of setting and getting posix acls through the generic xattr interface is error prone and type unsafe. The vfs needs to interpret and fixup posix acls before storing or reporting it to userspace. Various hacks exist to make this work. The code is hard to understand and difficult to maintain in it's current form. Instead of making this work by hacking posix acls through xattr handlers we are building a dedicated posix acl api around the get and set inode operations. This removes a lot of hackiness and makes the codepaths easier to maintain. A lot of background can be found in [1]. The current inode operation for getting posix acls takes an inode argument but various filesystems (e.g., 9p, cifs, overlayfs) need access to the dentry. In contrast to the ->set_acl() inode operation we cannot simply extend ->get_acl() to take a dentry argument. The ->get_acl() inode operation is called from: acl_permission_check() -> check_acl() -> get_acl() which is part of generic_permission() which in turn is part of inode_permission(). Both generic_permission() and inode_permission() are called in the ->permission() handler of various filesystems (e.g., overlayfs). So simply passing a dentry argument to ->get_acl() would amount to also having to pass a dentry argument to ->permission(). We should avoid this unnecessary change. So instead of extending the existing inode operation rename it from ->get_acl() to ->get_inode_acl() and add a ->get_acl() method later that passes a dentry argument and which filesystems that need access to the dentry can implement instead of ->get_inode_acl(). Filesystems like cifs which allow setting and getting posix acls but not using them for permission checking during lookup can simply not implement ->get_inode_acl(). This is intended to be a non-functional change. Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Suggested-by/Inspired-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
2022-09-22 17:17:00 +02:00
posix_acls = get_inode_acl(inode, acl_type);
if (!posix_acls)
return NULL;
smb_acl = kzalloc(sizeof(struct xattr_smb_acl) +
sizeof(struct xattr_acl_entry) * posix_acls->a_count,
GFP_KERNEL);
if (!smb_acl)
goto out;
smb_acl->count = posix_acls->a_count;
pa_entry = posix_acls->a_entries;
xa_entry = smb_acl->entries;
for (i = 0; i < posix_acls->a_count; i++, pa_entry++, xa_entry++) {
switch (pa_entry->e_tag) {
case ACL_USER:
xa_entry->type = SMB_ACL_USER;
xa_entry->uid = posix_acl_uid_translate(idmap, pa_entry);
break;
case ACL_USER_OBJ:
xa_entry->type = SMB_ACL_USER_OBJ;
break;
case ACL_GROUP:
xa_entry->type = SMB_ACL_GROUP;
xa_entry->gid = posix_acl_gid_translate(idmap, pa_entry);
break;
case ACL_GROUP_OBJ:
xa_entry->type = SMB_ACL_GROUP_OBJ;
break;
case ACL_OTHER:
xa_entry->type = SMB_ACL_OTHER;
break;
case ACL_MASK:
xa_entry->type = SMB_ACL_MASK;
break;
default:
pr_err("unknown type : 0x%x\n", pa_entry->e_tag);
goto out;
}
if (pa_entry->e_perm & ACL_READ)
xa_entry->perm |= SMB_ACL_READ;
if (pa_entry->e_perm & ACL_WRITE)
xa_entry->perm |= SMB_ACL_WRITE;
if (pa_entry->e_perm & ACL_EXECUTE)
xa_entry->perm |= SMB_ACL_EXECUTE;
}
out:
posix_acl_release(posix_acls);
return smb_acl;
}
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
struct dentry *dentry,
struct smb_ntsd *pntsd, int len)
{
int rc;
struct ndr sd_ndr = {0}, acl_ndr = {0};
struct xattr_ntacl acl = {0};
struct xattr_smb_acl *smb_acl, *def_smb_acl = NULL;
struct inode *inode = d_inode(dentry);
acl.version = 4;
acl.hash_type = XATTR_SD_HASH_TYPE_SHA256;
acl.current_time = ksmbd_UnixTimeToNT(current_time(inode));
memcpy(acl.desc, "posix_acl", 9);
acl.desc_len = 10;
pntsd->osidoffset =
cpu_to_le32(le32_to_cpu(pntsd->osidoffset) + NDR_NTSD_OFFSETOF);
pntsd->gsidoffset =
cpu_to_le32(le32_to_cpu(pntsd->gsidoffset) + NDR_NTSD_OFFSETOF);
pntsd->dacloffset =
cpu_to_le32(le32_to_cpu(pntsd->dacloffset) + NDR_NTSD_OFFSETOF);
acl.sd_buf = (char *)pntsd;
acl.sd_size = len;
rc = ksmbd_gen_sd_hash(conn, acl.sd_buf, acl.sd_size, acl.hash);
if (rc) {
pr_err("failed to generate hash for ndr acl\n");
return rc;
}
smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode,
ACL_TYPE_ACCESS);
if (S_ISDIR(inode->i_mode))
def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode,
ACL_TYPE_DEFAULT);
rc = ndr_encode_posix_acl(&acl_ndr, idmap, inode,
smb_acl, def_smb_acl);
if (rc) {
pr_err("failed to encode ndr to posix acl\n");
goto out;
}
rc = ksmbd_gen_sd_hash(conn, acl_ndr.data, acl_ndr.offset,
acl.posix_acl_hash);
if (rc) {
pr_err("failed to generate hash for ndr acl\n");
goto out;
}
rc = ndr_encode_v4_ntacl(&sd_ndr, &acl);
if (rc) {
pr_err("failed to encode ndr to posix acl\n");
goto out;
}
rc = ksmbd_vfs_setxattr(idmap, dentry,
XATTR_NAME_SD, sd_ndr.data,
sd_ndr.offset, 0);
if (rc < 0)
pr_err("Failed to store XATTR ntacl :%d\n", rc);
kfree(sd_ndr.data);
out:
kfree(acl_ndr.data);
kfree(smb_acl);
kfree(def_smb_acl);
return rc;
}
int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
struct dentry *dentry,
struct smb_ntsd **pntsd)
{
int rc;
struct ndr n;
struct inode *inode = d_inode(dentry);
struct ndr acl_ndr = {0};
struct xattr_ntacl acl;
struct xattr_smb_acl *smb_acl = NULL, *def_smb_acl = NULL;
__u8 cmp_hash[XATTR_SD_HASH_SIZE] = {0};
rc = ksmbd_vfs_getxattr(idmap, dentry, XATTR_NAME_SD, &n.data);
if (rc <= 0)
return rc;
n.length = rc;
rc = ndr_decode_v4_ntacl(&n, &acl);
if (rc)
goto free_n_data;
smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode,
ACL_TYPE_ACCESS);
if (S_ISDIR(inode->i_mode))
def_smb_acl = ksmbd_vfs_make_xattr_posix_acl(idmap, inode,
ACL_TYPE_DEFAULT);
rc = ndr_encode_posix_acl(&acl_ndr, idmap, inode, smb_acl,
def_smb_acl);
if (rc) {
pr_err("failed to encode ndr to posix acl\n");
goto out_free;
}
rc = ksmbd_gen_sd_hash(conn, acl_ndr.data, acl_ndr.offset, cmp_hash);
if (rc) {
pr_err("failed to generate hash for ndr acl\n");
goto out_free;
}
if (memcmp(cmp_hash, acl.posix_acl_hash, XATTR_SD_HASH_SIZE)) {
pr_err("hash value diff\n");
rc = -EINVAL;
goto out_free;
}
*pntsd = acl.sd_buf;
ksmbd: fix heap-based overflow in set_ntacl_dacl() The testcase use SMB2_SET_INFO_HE command to set a malformed file attribute under the label `security.NTACL`. SMB2_QUERY_INFO_HE command in testcase trigger the following overflow. [ 4712.003781] ================================================================== [ 4712.003790] BUG: KASAN: slab-out-of-bounds in build_sec_desc+0x842/0x1dd0 [ksmbd] [ 4712.003807] Write of size 1060 at addr ffff88801e34c068 by task kworker/0:0/4190 [ 4712.003813] CPU: 0 PID: 4190 Comm: kworker/0:0 Not tainted 5.19.0-rc5 #1 [ 4712.003850] Workqueue: ksmbd-io handle_ksmbd_work [ksmbd] [ 4712.003867] Call Trace: [ 4712.003870] <TASK> [ 4712.003873] dump_stack_lvl+0x49/0x5f [ 4712.003935] print_report.cold+0x5e/0x5cf [ 4712.003972] ? ksmbd_vfs_get_sd_xattr+0x16d/0x500 [ksmbd] [ 4712.003984] ? cmp_map_id+0x200/0x200 [ 4712.003988] ? build_sec_desc+0x842/0x1dd0 [ksmbd] [ 4712.004000] kasan_report+0xaa/0x120 [ 4712.004045] ? build_sec_desc+0x842/0x1dd0 [ksmbd] [ 4712.004056] kasan_check_range+0x100/0x1e0 [ 4712.004060] memcpy+0x3c/0x60 [ 4712.004064] build_sec_desc+0x842/0x1dd0 [ksmbd] [ 4712.004076] ? parse_sec_desc+0x580/0x580 [ksmbd] [ 4712.004088] ? ksmbd_acls_fattr+0x281/0x410 [ksmbd] [ 4712.004099] smb2_query_info+0xa8f/0x6110 [ksmbd] [ 4712.004111] ? psi_group_change+0x856/0xd70 [ 4712.004148] ? update_load_avg+0x1c3/0x1af0 [ 4712.004152] ? asym_cpu_capacity_scan+0x5d0/0x5d0 [ 4712.004157] ? xas_load+0x23/0x300 [ 4712.004162] ? smb2_query_dir+0x1530/0x1530 [ksmbd] [ 4712.004173] ? _raw_spin_lock_bh+0xe0/0xe0 [ 4712.004179] handle_ksmbd_work+0x30e/0x1020 [ksmbd] [ 4712.004192] process_one_work+0x778/0x11c0 [ 4712.004227] ? _raw_spin_lock_irq+0x8e/0xe0 [ 4712.004231] worker_thread+0x544/0x1180 [ 4712.004234] ? __cpuidle_text_end+0x4/0x4 [ 4712.004239] kthread+0x282/0x320 [ 4712.004243] ? process_one_work+0x11c0/0x11c0 [ 4712.004246] ? kthread_complete_and_exit+0x30/0x30 [ 4712.004282] ret_from_fork+0x1f/0x30 This patch add the buffer validation for security descriptor that is stored by malformed SMB2_SET_INFO_HE command. and allocate large response buffer about SMB2_O_INFO_SECURITY file info class. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17771 Reviewed-by: Hyunchul Lee <hyc.lee@gmail.com> Signed-off-by: Namjae Jeon <linkinjeon@kernel.org> Signed-off-by: Steve French <stfrench@microsoft.com>
2022-08-02 07:28:51 +09:00
if (acl.sd_size < sizeof(struct smb_ntsd)) {
pr_err("sd size is invalid\n");
goto out_free;
}
(*pntsd)->osidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->osidoffset) -
NDR_NTSD_OFFSETOF);
(*pntsd)->gsidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->gsidoffset) -
NDR_NTSD_OFFSETOF);
(*pntsd)->dacloffset = cpu_to_le32(le32_to_cpu((*pntsd)->dacloffset) -
NDR_NTSD_OFFSETOF);
rc = acl.sd_size;
out_free:
kfree(acl_ndr.data);
kfree(smb_acl);
kfree(def_smb_acl);
if (rc < 0) {
kfree(acl.sd_buf);
*pntsd = NULL;
}
free_n_data:
kfree(n.data);
return rc;
}
int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct xattr_dos_attrib *da)
{
struct ndr n;
int err;
err = ndr_encode_dos_attr(&n, da);
if (err)
return err;
err = ksmbd_vfs_setxattr(idmap, dentry, XATTR_NAME_DOS_ATTRIBUTE,
(void *)n.data, n.offset, 0);
if (err)
ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
kfree(n.data);
return err;
}
int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct xattr_dos_attrib *da)
{
struct ndr n;
int err;
err = ksmbd_vfs_getxattr(idmap, dentry, XATTR_NAME_DOS_ATTRIBUTE,
(char **)&n.data);
if (err > 0) {
n.length = err;
if (ndr_decode_dos_attr(&n, da))
err = -EINVAL;
kfree(n.data);
} else {
ksmbd_debug(SMB, "failed to load dos attribute in xattr\n");
}
return err;
}
/**
* ksmbd_vfs_init_kstat() - convert unix stat information to smb stat format
* @p: destination buffer
* @ksmbd_kstat: ksmbd kstat wrapper
*/
void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat)
{
struct file_directory_info *info = (struct file_directory_info *)(*p);
struct kstat *kstat = ksmbd_kstat->kstat;
u64 time;
info->FileIndex = 0;
info->CreationTime = cpu_to_le64(ksmbd_kstat->create_time);
time = ksmbd_UnixTimeToNT(kstat->atime);
info->LastAccessTime = cpu_to_le64(time);
time = ksmbd_UnixTimeToNT(kstat->mtime);
info->LastWriteTime = cpu_to_le64(time);
time = ksmbd_UnixTimeToNT(kstat->ctime);
info->ChangeTime = cpu_to_le64(time);
if (ksmbd_kstat->file_attributes & FILE_ATTRIBUTE_DIRECTORY_LE) {
info->EndOfFile = 0;
info->AllocationSize = 0;
} else {
info->EndOfFile = cpu_to_le64(kstat->size);
info->AllocationSize = cpu_to_le64(kstat->blocks << 9);
}
info->ExtFileAttributes = ksmbd_kstat->file_attributes;
return info;
}
int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
struct mnt_idmap *idmap,
struct dentry *dentry,
struct ksmbd_kstat *ksmbd_kstat)
{
u64 time;
int rc;
generic_fillattr(idmap, d_inode(dentry), ksmbd_kstat->kstat);
time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime);
ksmbd_kstat->create_time = time;
/*
* set default value for the case that store dos attributes is not yes
* or that acl is disable in server's filesystem and the config is yes.
*/
if (S_ISDIR(ksmbd_kstat->kstat->mode))
ksmbd_kstat->file_attributes = FILE_ATTRIBUTE_DIRECTORY_LE;
else
ksmbd_kstat->file_attributes = FILE_ATTRIBUTE_ARCHIVE_LE;
if (test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) {
struct xattr_dos_attrib da;
rc = ksmbd_vfs_get_dos_attrib_xattr(idmap, dentry, &da);
if (rc > 0) {
ksmbd_kstat->file_attributes = cpu_to_le32(da.attr);
ksmbd_kstat->create_time = da.create_time;
} else {
ksmbd_debug(VFS, "fail to load dos attribute.\n");
}
}
return 0;
}
ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap,
struct dentry *dentry, char *attr_name,
int attr_name_len)
{
char *name, *xattr_list = NULL;
ssize_t value_len = -ENOENT, xattr_list_len;
xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
if (xattr_list_len <= 0)
goto out;
for (name = xattr_list; name - xattr_list < xattr_list_len;
name += strlen(name) + 1) {
ksmbd_debug(VFS, "%s, len %zd\n", name, strlen(name));
if (strncasecmp(attr_name, name, attr_name_len))
continue;
value_len = ksmbd_vfs_xattr_len(idmap, dentry, name);
break;
}
out:
kvfree(xattr_list);
return value_len;
}
int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
size_t *xattr_stream_name_size, int s_type)
{
char *type, *buf;
if (s_type == DIR_STREAM)
type = ":$INDEX_ALLOCATION";
else
type = ":$DATA";
buf = kasprintf(GFP_KERNEL, "%s%s%s",
XATTR_NAME_STREAM, stream_name, type);
if (!buf)
return -ENOMEM;
*xattr_stream_name = buf;
*xattr_stream_name_size = strlen(buf) + 1;
return 0;
}
int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
struct ksmbd_file *src_fp,
struct ksmbd_file *dst_fp,
struct srv_copychunk *chunks,
unsigned int chunk_count,
unsigned int *chunk_count_written,
unsigned int *chunk_size_written,
loff_t *total_size_written)
{
unsigned int i;
loff_t src_off, dst_off, src_file_size;
size_t len;
int ret;
*chunk_count_written = 0;
*chunk_size_written = 0;
*total_size_written = 0;
if (!(src_fp->daccess & (FILE_READ_DATA_LE | FILE_EXECUTE_LE))) {
pr_err("no right to read(%pD)\n", src_fp->filp);
return -EACCES;
}
if (!(dst_fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE))) {
pr_err("no right to write(%pD)\n", dst_fp->filp);
return -EACCES;
}
if (ksmbd_stream_fd(src_fp) || ksmbd_stream_fd(dst_fp))
return -EBADF;
smb_break_all_levII_oplock(work, dst_fp, 1);
if (!work->tcon->posix_extensions) {
for (i = 0; i < chunk_count; i++) {
src_off = le64_to_cpu(chunks[i].SourceOffset);
dst_off = le64_to_cpu(chunks[i].TargetOffset);
len = le32_to_cpu(chunks[i].Length);
if (check_lock_range(src_fp->filp, src_off,
src_off + len - 1, READ))
return -EAGAIN;
if (check_lock_range(dst_fp->filp, dst_off,
dst_off + len - 1, WRITE))
return -EAGAIN;
}
}
src_file_size = i_size_read(file_inode(src_fp->filp));
for (i = 0; i < chunk_count; i++) {
src_off = le64_to_cpu(chunks[i].SourceOffset);
dst_off = le64_to_cpu(chunks[i].TargetOffset);
len = le32_to_cpu(chunks[i].Length);
if (src_off + len > src_file_size)
return -E2BIG;
ret = vfs_copy_file_range(src_fp->filp, src_off,
dst_fp->filp, dst_off, len, 0);
vfs: fix copy_file_range() regression in cross-fs copies A regression has been reported by Nicolas Boichat, found while using the copy_file_range syscall to copy a tracefs file. Before commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") the kernel would return -EXDEV to userspace when trying to copy a file across different filesystems. After this commit, the syscall doesn't fail anymore and instead returns zero (zero bytes copied), as this file's content is generated on-the-fly and thus reports a size of zero. Another regression has been reported by He Zhe - the assertion of WARN_ON_ONCE(ret == -EOPNOTSUPP) can be triggered from userspace when copying from a sysfs file whose read operation may return -EOPNOTSUPP. Since we do not have test coverage for copy_file_range() between any two types of filesystems, the best way to avoid these sort of issues in the future is for the kernel to be more picky about filesystems that are allowed to do copy_file_range(). This patch restores some cross-filesystem copy restrictions that existed prior to commit 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices"), namely, cross-sb copy is not allowed for filesystems that do not implement ->copy_file_range(). Filesystems that do implement ->copy_file_range() have full control of the result - if this method returns an error, the error is returned to the user. Before this change this was only true for fs that did not implement the ->remap_file_range() operation (i.e. nfsv3). Filesystems that do not implement ->copy_file_range() still fall-back to the generic_copy_file_range() implementation when the copy is within the same sb. This helps the kernel can maintain a more consistent story about which filesystems support copy_file_range(). nfsd and ksmbd servers are modified to fall-back to the generic_copy_file_range() implementation in case vfs_copy_file_range() fails with -EOPNOTSUPP or -EXDEV, which preserves behavior of server-side-copy. fall-back to generic_copy_file_range() is not implemented for the smb operation FSCTL_DUPLICATE_EXTENTS_TO_FILE, which is arguably a correct change of behavior. Fixes: 5dae222a5ff0 ("vfs: allow copy_file_range to copy across devices") Link: https://lore.kernel.org/linux-fsdevel/20210212044405.4120619-1-drinkcat@chromium.org/ Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/ Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/ Link: https://lore.kernel.org/linux-fsdevel/20210630161320.29006-1-lhenriques@suse.de/ Reported-by: Nicolas Boichat <drinkcat@chromium.org> Reported-by: kernel test robot <oliver.sang@intel.com> Signed-off-by: Luis Henriques <lhenriques@suse.de> Fixes: 64bf5ff58dff ("vfs: no fallback for ->copy_file_range") Link: https://lore.kernel.org/linux-fsdevel/20f17f64-88cb-4e80-07c1-85cb96c83619@windriver.com/ Reported-by: He Zhe <zhe.he@windriver.com> Tested-by: Namjae Jeon <linkinjeon@kernel.org> Tested-by: Luis Henriques <lhenriques@suse.de> Signed-off-by: Amir Goldstein <amir73il@gmail.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-06-30 22:58:49 +03:00
if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = vfs_copy_file_range(src_fp->filp, src_off,
dst_fp->filp, dst_off, len,
COPY_FILE_SPLICE);
if (ret < 0)
return ret;
*chunk_count_written += 1;
*total_size_written += ret;
}
return 0;
}
void ksmbd_vfs_posix_lock_wait(struct file_lock *flock)
{
wait_event(flock->fl_wait, !flock->fl_blocker);
}
int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout)
{
return wait_event_interruptible_timeout(flock->fl_wait,
!flock->fl_blocker,
timeout);
}
void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock)
{
locks_delete_block(flock);
}
int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
fs: pass dentry to set acl method The current way of setting and getting posix acls through the generic xattr interface is error prone and type unsafe. The vfs needs to interpret and fixup posix acls before storing or reporting it to userspace. Various hacks exist to make this work. The code is hard to understand and difficult to maintain in it's current form. Instead of making this work by hacking posix acls through xattr handlers we are building a dedicated posix acl api around the get and set inode operations. This removes a lot of hackiness and makes the codepaths easier to maintain. A lot of background can be found in [1]. Since some filesystem rely on the dentry being available to them when setting posix acls (e.g., 9p and cifs) they cannot rely on set acl inode operation. But since ->set_acl() is required in order to use the generic posix acl xattr handlers filesystems that do not implement this inode operation cannot use the handler and need to implement their own dedicated posix acl handlers. Update the ->set_acl() inode method to take a dentry argument. This allows all filesystems to rely on ->set_acl(). As far as I can tell all codepaths can be switched to rely on the dentry instead of just the inode. Note that the original motivation for passing the dentry separate from the inode instead of just the dentry in the xattr handlers was because of security modules that call security_d_instantiate(). This hook is called during d_instantiate_new(), d_add(), __d_instantiate_anon(), and d_splice_alias() to initialize the inode's security context and possibly to set security.* xattrs. Since this only affects security.* xattrs this is completely irrelevant for posix acls. Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
2022-09-23 10:29:39 +02:00
struct dentry *dentry)
{
struct posix_acl_state acl_state;
struct posix_acl *acls;
fs: pass dentry to set acl method The current way of setting and getting posix acls through the generic xattr interface is error prone and type unsafe. The vfs needs to interpret and fixup posix acls before storing or reporting it to userspace. Various hacks exist to make this work. The code is hard to understand and difficult to maintain in it's current form. Instead of making this work by hacking posix acls through xattr handlers we are building a dedicated posix acl api around the get and set inode operations. This removes a lot of hackiness and makes the codepaths easier to maintain. A lot of background can be found in [1]. Since some filesystem rely on the dentry being available to them when setting posix acls (e.g., 9p and cifs) they cannot rely on set acl inode operation. But since ->set_acl() is required in order to use the generic posix acl xattr handlers filesystems that do not implement this inode operation cannot use the handler and need to implement their own dedicated posix acl handlers. Update the ->set_acl() inode method to take a dentry argument. This allows all filesystems to rely on ->set_acl(). As far as I can tell all codepaths can be switched to rely on the dentry instead of just the inode. Note that the original motivation for passing the dentry separate from the inode instead of just the dentry in the xattr handlers was because of security modules that call security_d_instantiate(). This hook is called during d_instantiate_new(), d_add(), __d_instantiate_anon(), and d_splice_alias() to initialize the inode's security context and possibly to set security.* xattrs. Since this only affects security.* xattrs this is completely irrelevant for posix acls. Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
2022-09-23 10:29:39 +02:00
struct inode *inode = d_inode(dentry);
int rc;
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
return -EOPNOTSUPP;
ksmbd_debug(SMB, "Set posix acls\n");
rc = init_acl_state(&acl_state, 1);
if (rc)
return rc;
/* Set default owner group */
acl_state.owner.allow = (inode->i_mode & 0700) >> 6;
acl_state.group.allow = (inode->i_mode & 0070) >> 3;
acl_state.other.allow = inode->i_mode & 0007;
acl_state.users->aces[acl_state.users->n].uid = inode->i_uid;
acl_state.users->aces[acl_state.users->n++].perms.allow =
acl_state.owner.allow;
acl_state.groups->aces[acl_state.groups->n].gid = inode->i_gid;
acl_state.groups->aces[acl_state.groups->n++].perms.allow =
acl_state.group.allow;
acl_state.mask.allow = 0x07;
acls = posix_acl_alloc(6, GFP_KERNEL);
if (!acls) {
free_acl_state(&acl_state);
return -ENOMEM;
}
posix_state_to_acl(&acl_state, acls->a_entries);
rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
rc);
else if (S_ISDIR(inode->i_mode)) {
posix_state_to_acl(&acl_state, acls->a_entries);
rc = set_posix_acl(idmap, dentry, ACL_TYPE_DEFAULT, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
}
free_acl_state(&acl_state);
posix_acl_release(acls);
return rc;
}
int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
fs: pass dentry to set acl method The current way of setting and getting posix acls through the generic xattr interface is error prone and type unsafe. The vfs needs to interpret and fixup posix acls before storing or reporting it to userspace. Various hacks exist to make this work. The code is hard to understand and difficult to maintain in it's current form. Instead of making this work by hacking posix acls through xattr handlers we are building a dedicated posix acl api around the get and set inode operations. This removes a lot of hackiness and makes the codepaths easier to maintain. A lot of background can be found in [1]. Since some filesystem rely on the dentry being available to them when setting posix acls (e.g., 9p and cifs) they cannot rely on set acl inode operation. But since ->set_acl() is required in order to use the generic posix acl xattr handlers filesystems that do not implement this inode operation cannot use the handler and need to implement their own dedicated posix acl handlers. Update the ->set_acl() inode method to take a dentry argument. This allows all filesystems to rely on ->set_acl(). As far as I can tell all codepaths can be switched to rely on the dentry instead of just the inode. Note that the original motivation for passing the dentry separate from the inode instead of just the dentry in the xattr handlers was because of security modules that call security_d_instantiate(). This hook is called during d_instantiate_new(), d_add(), __d_instantiate_anon(), and d_splice_alias() to initialize the inode's security context and possibly to set security.* xattrs. Since this only affects security.* xattrs this is completely irrelevant for posix acls. Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
2022-09-23 10:29:39 +02:00
struct dentry *dentry, struct inode *parent_inode)
{
struct posix_acl *acls;
struct posix_acl_entry *pace;
fs: pass dentry to set acl method The current way of setting and getting posix acls through the generic xattr interface is error prone and type unsafe. The vfs needs to interpret and fixup posix acls before storing or reporting it to userspace. Various hacks exist to make this work. The code is hard to understand and difficult to maintain in it's current form. Instead of making this work by hacking posix acls through xattr handlers we are building a dedicated posix acl api around the get and set inode operations. This removes a lot of hackiness and makes the codepaths easier to maintain. A lot of background can be found in [1]. Since some filesystem rely on the dentry being available to them when setting posix acls (e.g., 9p and cifs) they cannot rely on set acl inode operation. But since ->set_acl() is required in order to use the generic posix acl xattr handlers filesystems that do not implement this inode operation cannot use the handler and need to implement their own dedicated posix acl handlers. Update the ->set_acl() inode method to take a dentry argument. This allows all filesystems to rely on ->set_acl(). As far as I can tell all codepaths can be switched to rely on the dentry instead of just the inode. Note that the original motivation for passing the dentry separate from the inode instead of just the dentry in the xattr handlers was because of security modules that call security_d_instantiate(). This hook is called during d_instantiate_new(), d_add(), __d_instantiate_anon(), and d_splice_alias() to initialize the inode's security context and possibly to set security.* xattrs. Since this only affects security.* xattrs this is completely irrelevant for posix acls. Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
2022-09-23 10:29:39 +02:00
struct inode *inode = d_inode(dentry);
int rc, i;
if (!IS_ENABLED(CONFIG_FS_POSIX_ACL))
return -EOPNOTSUPP;
fs: rename current get acl method The current way of setting and getting posix acls through the generic xattr interface is error prone and type unsafe. The vfs needs to interpret and fixup posix acls before storing or reporting it to userspace. Various hacks exist to make this work. The code is hard to understand and difficult to maintain in it's current form. Instead of making this work by hacking posix acls through xattr handlers we are building a dedicated posix acl api around the get and set inode operations. This removes a lot of hackiness and makes the codepaths easier to maintain. A lot of background can be found in [1]. The current inode operation for getting posix acls takes an inode argument but various filesystems (e.g., 9p, cifs, overlayfs) need access to the dentry. In contrast to the ->set_acl() inode operation we cannot simply extend ->get_acl() to take a dentry argument. The ->get_acl() inode operation is called from: acl_permission_check() -> check_acl() -> get_acl() which is part of generic_permission() which in turn is part of inode_permission(). Both generic_permission() and inode_permission() are called in the ->permission() handler of various filesystems (e.g., overlayfs). So simply passing a dentry argument to ->get_acl() would amount to also having to pass a dentry argument to ->permission(). We should avoid this unnecessary change. So instead of extending the existing inode operation rename it from ->get_acl() to ->get_inode_acl() and add a ->get_acl() method later that passes a dentry argument and which filesystems that need access to the dentry can implement instead of ->get_inode_acl(). Filesystems like cifs which allow setting and getting posix acls but not using them for permission checking during lookup can simply not implement ->get_inode_acl(). This is intended to be a non-functional change. Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Suggested-by/Inspired-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
2022-09-22 17:17:00 +02:00
acls = get_inode_acl(parent_inode, ACL_TYPE_DEFAULT);
if (!acls)
return -ENOENT;
pace = acls->a_entries;
for (i = 0; i < acls->a_count; i++, pace++) {
if (pace->e_tag == ACL_MASK) {
pace->e_perm = 0x07;
break;
}
}
rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
rc);
if (S_ISDIR(inode->i_mode)) {
rc = set_posix_acl(idmap, dentry, ACL_TYPE_DEFAULT,
acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
}
posix_acl_release(acls);
return rc;
}