fuse update for 6.9
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQSQHSd0lITzzeNWNm3h3BK/laaZPAUCZfLjeQAKCRDh3BK/laaZ PBYQAQDqYZzq91Kn5jdvjaSd+6I/+x7MDLOIP5hPX0HJLuBxWAEAqENoo4Of0GTC ltW7DKrQy9E3CMp6VKSLVJPN4BYP9gk= =GvOE -----END PGP SIGNATURE----- Merge tag 'fuse-update-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse Pull fuse updates from Miklos Szeredi: - Add passthrough mode for regular file I/O. This allows performing read and write (also via memory maps) on a backing file without incurring the overhead of roundtrips to userspace. For now this is only allowed to privileged servers, but this limitation will go away in the future (Amir Goldstein) - Fix interaction of direct I/O mode with memory maps (Bernd Schubert) - Export filesystem tags through sysfs for virtiofs (Stefan Hajnoczi) - Allow resending queued requests for server crash recovery (Zhao Chen) - Misc fixes and cleanups * tag 'fuse-update-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (38 commits) fuse: get rid of ff->readdir.lock fuse: remove unneeded lock which protecting update of congestion_threshold fuse: Fix missing FOLL_PIN for direct-io fuse: remove an unnecessary if statement fuse: Track process write operations in both direct and writethrough modes fuse: Use the high bit of request ID for indicating resend requests fuse: Introduce a new notification type for resend pending requests fuse: add support for explicit export disabling fuse: __kuid_val/__kgid_val helpers in fuse_fill_attr_from_inode() fuse: fix typo for fuse_permission comment fuse: Convert fuse_writepage_locked to take a folio fuse: Remove fuse_writepage virtio_fs: remove duplicate check if queue is broken fuse: use FUSE_ROOT_ID in fuse_get_root_inode() fuse: don't unhash root fuse: fix root lookup with nonzero generation fuse: replace remaining make_bad_inode() with fuse_make_bad() virtiofs: drop __exit from virtio_fs_sysfs_exit() fuse: implement passthrough for mmap fuse: implement splice read/write passthrough ...
This commit is contained in:
commit
6ce8b2ce0d
11
Documentation/ABI/testing/sysfs-fs-virtiofs
Normal file
11
Documentation/ABI/testing/sysfs-fs-virtiofs
Normal file
@ -0,0 +1,11 @@
|
||||
What: /sys/fs/virtiofs/<n>/tag
|
||||
Date: Feb 2024
|
||||
Contact: virtio-fs@lists.linux.dev
|
||||
Description:
|
||||
[RO] The mount "tag" that can be used to mount this filesystem.
|
||||
|
||||
What: /sys/fs/virtiofs/<n>/device
|
||||
Date: Feb 2024
|
||||
Contact: virtio-fs@lists.linux.dev
|
||||
Description:
|
||||
Symlink to the virtio device that exports this filesystem.
|
@ -52,3 +52,14 @@ config FUSE_DAX
|
||||
|
||||
If you want to allow mounting a Virtio Filesystem with the "dax"
|
||||
option, answer Y.
|
||||
|
||||
config FUSE_PASSTHROUGH
|
||||
bool "FUSE passthrough operations support"
|
||||
default y
|
||||
depends on FUSE_FS
|
||||
select FS_STACK
|
||||
help
|
||||
This allows bypassing FUSE server by mapping specific FUSE operations
|
||||
to be performed directly on a backing file.
|
||||
|
||||
If you want to allow passthrough operations, answer Y.
|
||||
|
@ -8,6 +8,8 @@ obj-$(CONFIG_CUSE) += cuse.o
|
||||
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
|
||||
|
||||
fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
|
||||
fuse-y += iomode.o
|
||||
fuse-$(CONFIG_FUSE_DAX) += dax.o
|
||||
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
|
||||
|
||||
virtiofs-y := virtio_fs.o
|
||||
|
@ -174,11 +174,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
|
||||
if (!fc)
|
||||
goto out;
|
||||
|
||||
down_read(&fc->killsb);
|
||||
spin_lock(&fc->bg_lock);
|
||||
fc->congestion_threshold = val;
|
||||
spin_unlock(&fc->bg_lock);
|
||||
up_read(&fc->killsb);
|
||||
WRITE_ONCE(fc->congestion_threshold, val);
|
||||
fuse_conn_put(fc);
|
||||
out:
|
||||
return ret;
|
||||
|
126
fs/fuse/dev.c
126
fs/fuse/dev.c
@ -1775,6 +1775,61 @@ copy_finish:
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Resending all processing queue requests.
|
||||
*
|
||||
* During a FUSE daemon panics and failover, it is possible for some inflight
|
||||
* requests to be lost and never returned. As a result, applications awaiting
|
||||
* replies would become stuck forever. To address this, we can use notification
|
||||
* to trigger resending of these pending requests to the FUSE daemon, ensuring
|
||||
* they are properly processed again.
|
||||
*
|
||||
* Please note that this strategy is applicable only to idempotent requests or
|
||||
* if the FUSE daemon takes careful measures to avoid processing duplicated
|
||||
* non-idempotent requests.
|
||||
*/
|
||||
static void fuse_resend(struct fuse_conn *fc)
|
||||
{
|
||||
struct fuse_dev *fud;
|
||||
struct fuse_req *req, *next;
|
||||
struct fuse_iqueue *fiq = &fc->iq;
|
||||
LIST_HEAD(to_queue);
|
||||
unsigned int i;
|
||||
|
||||
spin_lock(&fc->lock);
|
||||
if (!fc->connected) {
|
||||
spin_unlock(&fc->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
list_for_each_entry(fud, &fc->devices, entry) {
|
||||
struct fuse_pqueue *fpq = &fud->pq;
|
||||
|
||||
spin_lock(&fpq->lock);
|
||||
for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
|
||||
list_splice_tail_init(&fpq->processing[i], &to_queue);
|
||||
spin_unlock(&fpq->lock);
|
||||
}
|
||||
spin_unlock(&fc->lock);
|
||||
|
||||
list_for_each_entry_safe(req, next, &to_queue, list) {
|
||||
__set_bit(FR_PENDING, &req->flags);
|
||||
/* mark the request as resend request */
|
||||
req->in.h.unique |= FUSE_UNIQUE_RESEND;
|
||||
}
|
||||
|
||||
spin_lock(&fiq->lock);
|
||||
/* iq and pq requests are both oldest to newest */
|
||||
list_splice(&to_queue, &fiq->pending);
|
||||
fiq->ops->wake_pending_and_unlock(fiq);
|
||||
}
|
||||
|
||||
static int fuse_notify_resend(struct fuse_conn *fc)
|
||||
{
|
||||
fuse_resend(fc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
|
||||
unsigned int size, struct fuse_copy_state *cs)
|
||||
{
|
||||
@ -1800,6 +1855,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
|
||||
case FUSE_NOTIFY_DELETE:
|
||||
return fuse_notify_delete(fc, size, cs);
|
||||
|
||||
case FUSE_NOTIFY_RESEND:
|
||||
return fuse_notify_resend(fc);
|
||||
|
||||
default:
|
||||
fuse_copy_finish(cs);
|
||||
return -EINVAL;
|
||||
@ -2251,17 +2309,14 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
|
||||
{
|
||||
int res;
|
||||
int oldfd;
|
||||
struct fuse_dev *fud = NULL;
|
||||
struct fd f;
|
||||
|
||||
switch (cmd) {
|
||||
case FUSE_DEV_IOC_CLONE:
|
||||
if (get_user(oldfd, (__u32 __user *)arg))
|
||||
if (get_user(oldfd, argp))
|
||||
return -EFAULT;
|
||||
|
||||
f = fdget(oldfd);
|
||||
@ -2281,15 +2336,66 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
res = fuse_device_clone(fud->fc, file);
|
||||
mutex_unlock(&fuse_mutex);
|
||||
}
|
||||
|
||||
fdput(f);
|
||||
break;
|
||||
default:
|
||||
res = -ENOTTY;
|
||||
break;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static long fuse_dev_ioctl_backing_open(struct file *file,
|
||||
struct fuse_backing_map __user *argp)
|
||||
{
|
||||
struct fuse_dev *fud = fuse_get_dev(file);
|
||||
struct fuse_backing_map map;
|
||||
|
||||
if (!fud)
|
||||
return -EPERM;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (copy_from_user(&map, argp, sizeof(map)))
|
||||
return -EFAULT;
|
||||
|
||||
return fuse_backing_open(fud->fc, &map);
|
||||
}
|
||||
|
||||
static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
|
||||
{
|
||||
struct fuse_dev *fud = fuse_get_dev(file);
|
||||
int backing_id;
|
||||
|
||||
if (!fud)
|
||||
return -EPERM;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (get_user(backing_id, argp))
|
||||
return -EFAULT;
|
||||
|
||||
return fuse_backing_close(fud->fc, backing_id);
|
||||
}
|
||||
|
||||
static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
void __user *argp = (void __user *)arg;
|
||||
|
||||
switch (cmd) {
|
||||
case FUSE_DEV_IOC_CLONE:
|
||||
return fuse_dev_ioctl_clone(file, argp);
|
||||
|
||||
case FUSE_DEV_IOC_BACKING_OPEN:
|
||||
return fuse_dev_ioctl_backing_open(file, argp);
|
||||
|
||||
case FUSE_DEV_IOC_BACKING_CLOSE:
|
||||
return fuse_dev_ioctl_backing_close(file, argp);
|
||||
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
const struct file_operations fuse_dev_operations = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = fuse_dev_open,
|
||||
|
@ -391,6 +391,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
|
||||
err = -EIO;
|
||||
if (fuse_invalid_attr(&outarg->attr))
|
||||
goto out_put_forget;
|
||||
if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
|
||||
pr_warn_once("root generation should be zero\n");
|
||||
outarg->generation = 0;
|
||||
}
|
||||
|
||||
*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
|
||||
&outarg->attr, ATTR_TIMEOUT(outarg),
|
||||
@ -615,7 +619,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
|
||||
FUSE_ARGS(args);
|
||||
struct fuse_forget_link *forget;
|
||||
struct fuse_create_in inarg;
|
||||
struct fuse_open_out outopen;
|
||||
struct fuse_open_out *outopenp;
|
||||
struct fuse_entry_out outentry;
|
||||
struct fuse_inode *fi;
|
||||
struct fuse_file *ff;
|
||||
@ -630,7 +634,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
|
||||
goto out_err;
|
||||
|
||||
err = -ENOMEM;
|
||||
ff = fuse_file_alloc(fm);
|
||||
ff = fuse_file_alloc(fm, true);
|
||||
if (!ff)
|
||||
goto out_put_forget_req;
|
||||
|
||||
@ -659,8 +663,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
|
||||
args.out_numargs = 2;
|
||||
args.out_args[0].size = sizeof(outentry);
|
||||
args.out_args[0].value = &outentry;
|
||||
args.out_args[1].size = sizeof(outopen);
|
||||
args.out_args[1].value = &outopen;
|
||||
/* Store outarg for fuse_finish_open() */
|
||||
outopenp = &ff->args->open_outarg;
|
||||
args.out_args[1].size = sizeof(*outopenp);
|
||||
args.out_args[1].value = outopenp;
|
||||
|
||||
err = get_create_ext(&args, dir, entry, mode);
|
||||
if (err)
|
||||
@ -676,9 +682,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
|
||||
fuse_invalid_attr(&outentry.attr))
|
||||
goto out_free_ff;
|
||||
|
||||
ff->fh = outopen.fh;
|
||||
ff->fh = outopenp->fh;
|
||||
ff->nodeid = outentry.nodeid;
|
||||
ff->open_flags = outopen.open_flags;
|
||||
ff->open_flags = outopenp->open_flags;
|
||||
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
|
||||
&outentry.attr, ATTR_TIMEOUT(&outentry), 0);
|
||||
if (!inode) {
|
||||
@ -692,13 +698,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
|
||||
d_instantiate(entry, inode);
|
||||
fuse_change_entry_timeout(entry, &outentry);
|
||||
fuse_dir_changed(dir);
|
||||
err = finish_open(file, entry, generic_file_open);
|
||||
err = generic_file_open(inode, file);
|
||||
if (!err) {
|
||||
file->private_data = ff;
|
||||
err = finish_open(file, entry, fuse_finish_open);
|
||||
}
|
||||
if (err) {
|
||||
fi = get_fuse_inode(inode);
|
||||
fuse_sync_release(fi, ff, flags);
|
||||
} else {
|
||||
file->private_data = ff;
|
||||
fuse_finish_open(inode, file);
|
||||
if (fm->fc->atomic_o_trunc && trunc)
|
||||
truncate_pagecache(inode, 0);
|
||||
else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
|
||||
@ -1210,7 +1218,7 @@ static int fuse_do_statx(struct inode *inode, struct file *file,
|
||||
if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
|
||||
((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
|
||||
inode_wrong_type(inode, sx->mode)))) {
|
||||
make_bad_inode(inode);
|
||||
fuse_make_bad(inode);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@ -1485,7 +1493,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
|
||||
*
|
||||
* 1) Local access checking ('default_permissions' mount option) based
|
||||
* on file mode. This is the plain old disk filesystem permission
|
||||
* modell.
|
||||
* model.
|
||||
*
|
||||
* 2) "Remote" access checking, where server is responsible for
|
||||
* checking permission in each inode operation. An exception to this
|
||||
@ -1630,7 +1638,30 @@ out_err:
|
||||
|
||||
static int fuse_dir_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return fuse_open_common(inode, file, true);
|
||||
struct fuse_mount *fm = get_fuse_mount(inode);
|
||||
int err;
|
||||
|
||||
if (fuse_is_bad(inode))
|
||||
return -EIO;
|
||||
|
||||
err = generic_file_open(inode, file);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = fuse_do_open(fm, get_node_id(inode), file, true);
|
||||
if (!err) {
|
||||
struct fuse_file *ff = file->private_data;
|
||||
|
||||
/*
|
||||
* Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
|
||||
* directories for backward compatibility, though it's unlikely
|
||||
* to be useful.
|
||||
*/
|
||||
if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
|
||||
nonseekable_open(inode, file);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int fuse_dir_release(struct inode *inode, struct file *file)
|
||||
|
437
fs/fuse/file.c
437
fs/fuse/file.c
@ -20,6 +20,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/filelock.h>
|
||||
#include <linux/splice.h>
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
|
||||
static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
|
||||
unsigned int open_flags, int opcode,
|
||||
@ -50,13 +51,7 @@ static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
|
||||
return fuse_simple_request(fm, &args);
|
||||
}
|
||||
|
||||
struct fuse_release_args {
|
||||
struct fuse_args args;
|
||||
struct fuse_release_in inarg;
|
||||
struct inode *inode;
|
||||
};
|
||||
|
||||
struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
|
||||
struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release)
|
||||
{
|
||||
struct fuse_file *ff;
|
||||
|
||||
@ -65,15 +60,15 @@ struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
|
||||
return NULL;
|
||||
|
||||
ff->fm = fm;
|
||||
ff->release_args = kzalloc(sizeof(*ff->release_args),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!ff->release_args) {
|
||||
if (release) {
|
||||
ff->args = kzalloc(sizeof(*ff->args), GFP_KERNEL_ACCOUNT);
|
||||
if (!ff->args) {
|
||||
kfree(ff);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&ff->write_entry);
|
||||
mutex_init(&ff->readdir.lock);
|
||||
refcount_set(&ff->count, 1);
|
||||
RB_CLEAR_NODE(&ff->polled_node);
|
||||
init_waitqueue_head(&ff->poll_wait);
|
||||
@ -85,8 +80,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
|
||||
|
||||
void fuse_file_free(struct fuse_file *ff)
|
||||
{
|
||||
kfree(ff->release_args);
|
||||
mutex_destroy(&ff->readdir.lock);
|
||||
kfree(ff->args);
|
||||
kfree(ff);
|
||||
}
|
||||
|
||||
@ -105,14 +99,17 @@ static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
|
||||
kfree(ra);
|
||||
}
|
||||
|
||||
static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
|
||||
static void fuse_file_put(struct fuse_file *ff, bool sync)
|
||||
{
|
||||
if (refcount_dec_and_test(&ff->count)) {
|
||||
struct fuse_args *args = &ff->release_args->args;
|
||||
struct fuse_release_args *ra = &ff->args->release_args;
|
||||
struct fuse_args *args = (ra ? &ra->args : NULL);
|
||||
|
||||
if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) {
|
||||
/* Do nothing when client does not implement 'open' */
|
||||
fuse_release_end(ff->fm, args, 0);
|
||||
if (ra && ra->inode)
|
||||
fuse_file_io_release(ff, ra->inode);
|
||||
|
||||
if (!args) {
|
||||
/* Do nothing when server does not implement 'open' */
|
||||
} else if (sync) {
|
||||
fuse_simple_request(ff->fm, args);
|
||||
fuse_release_end(ff->fm, args, 0);
|
||||
@ -132,27 +129,31 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
|
||||
struct fuse_conn *fc = fm->fc;
|
||||
struct fuse_file *ff;
|
||||
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
|
||||
bool open = isdir ? !fc->no_opendir : !fc->no_open;
|
||||
|
||||
ff = fuse_file_alloc(fm);
|
||||
ff = fuse_file_alloc(fm, open);
|
||||
if (!ff)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ff->fh = 0;
|
||||
/* Default for no-open */
|
||||
ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0);
|
||||
if (isdir ? !fc->no_opendir : !fc->no_open) {
|
||||
struct fuse_open_out outarg;
|
||||
if (open) {
|
||||
/* Store outarg for fuse_finish_open() */
|
||||
struct fuse_open_out *outargp = &ff->args->open_outarg;
|
||||
int err;
|
||||
|
||||
err = fuse_send_open(fm, nodeid, open_flags, opcode, &outarg);
|
||||
err = fuse_send_open(fm, nodeid, open_flags, opcode, outargp);
|
||||
if (!err) {
|
||||
ff->fh = outarg.fh;
|
||||
ff->open_flags = outarg.open_flags;
|
||||
|
||||
ff->fh = outargp->fh;
|
||||
ff->open_flags = outargp->open_flags;
|
||||
} else if (err != -ENOSYS) {
|
||||
fuse_file_free(ff);
|
||||
return ERR_PTR(err);
|
||||
} else {
|
||||
/* No release needed */
|
||||
kfree(ff->args);
|
||||
ff->args = NULL;
|
||||
if (isdir)
|
||||
fc->no_opendir = 1;
|
||||
else
|
||||
@ -195,17 +196,30 @@ static void fuse_link_write_file(struct file *file)
|
||||
spin_unlock(&fi->lock);
|
||||
}
|
||||
|
||||
void fuse_finish_open(struct inode *inode, struct file *file)
|
||||
int fuse_finish_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct fuse_file *ff = file->private_data;
|
||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
int err;
|
||||
|
||||
err = fuse_file_io_open(file, inode);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (ff->open_flags & FOPEN_STREAM)
|
||||
stream_open(inode, file);
|
||||
else if (ff->open_flags & FOPEN_NONSEEKABLE)
|
||||
nonseekable_open(inode, file);
|
||||
|
||||
if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
|
||||
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
|
||||
fuse_link_write_file(file);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fuse_truncate_update_attr(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
@ -214,21 +228,18 @@ void fuse_finish_open(struct inode *inode, struct file *file)
|
||||
spin_unlock(&fi->lock);
|
||||
file_update_time(file);
|
||||
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
|
||||
}
|
||||
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
|
||||
fuse_link_write_file(file);
|
||||
}
|
||||
|
||||
int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
|
||||
static int fuse_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct fuse_mount *fm = get_fuse_mount(inode);
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
struct fuse_conn *fc = fm->fc;
|
||||
struct fuse_file *ff;
|
||||
int err;
|
||||
bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
|
||||
fc->atomic_o_trunc &&
|
||||
fc->writeback_cache;
|
||||
bool dax_truncate = (file->f_flags & O_TRUNC) &&
|
||||
fc->atomic_o_trunc && FUSE_IS_DAX(inode);
|
||||
bool is_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc;
|
||||
bool is_wb_truncate = is_truncate && fc->writeback_cache;
|
||||
bool dax_truncate = is_truncate && FUSE_IS_DAX(inode);
|
||||
|
||||
if (fuse_is_bad(inode))
|
||||
return -EIO;
|
||||
@ -250,16 +261,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
|
||||
if (is_wb_truncate || dax_truncate)
|
||||
fuse_set_nowrite(inode);
|
||||
|
||||
err = fuse_do_open(fm, get_node_id(inode), file, isdir);
|
||||
if (!err)
|
||||
fuse_finish_open(inode, file);
|
||||
err = fuse_do_open(fm, get_node_id(inode), file, false);
|
||||
if (!err) {
|
||||
ff = file->private_data;
|
||||
err = fuse_finish_open(inode, file);
|
||||
if (err)
|
||||
fuse_sync_release(fi, ff, file->f_flags);
|
||||
else if (is_truncate)
|
||||
fuse_truncate_update_attr(inode, file);
|
||||
}
|
||||
|
||||
if (is_wb_truncate || dax_truncate)
|
||||
fuse_release_nowrite(inode);
|
||||
if (!err) {
|
||||
struct fuse_file *ff = file->private_data;
|
||||
|
||||
if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
|
||||
if (is_truncate)
|
||||
truncate_pagecache(inode, 0);
|
||||
else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
|
||||
invalidate_inode_pages2(inode->i_mapping);
|
||||
@ -274,10 +289,13 @@ out_inode_unlock:
|
||||
}
|
||||
|
||||
static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
|
||||
unsigned int flags, int opcode)
|
||||
unsigned int flags, int opcode, bool sync)
|
||||
{
|
||||
struct fuse_conn *fc = ff->fm->fc;
|
||||
struct fuse_release_args *ra = ff->release_args;
|
||||
struct fuse_release_args *ra = &ff->args->release_args;
|
||||
|
||||
if (fuse_file_passthrough(ff))
|
||||
fuse_passthrough_release(ff, fuse_inode_backing(fi));
|
||||
|
||||
/* Inode is NULL on error path of fuse_create_open() */
|
||||
if (likely(fi)) {
|
||||
@ -292,6 +310,11 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
|
||||
|
||||
wake_up_interruptible_all(&ff->poll_wait);
|
||||
|
||||
if (!ra)
|
||||
return;
|
||||
|
||||
/* ff->args was used for open outarg */
|
||||
memset(ff->args, 0, sizeof(*ff->args));
|
||||
ra->inarg.fh = ff->fh;
|
||||
ra->inarg.flags = flags;
|
||||
ra->args.in_numargs = 1;
|
||||
@ -301,23 +324,28 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
|
||||
ra->args.nodeid = ff->nodeid;
|
||||
ra->args.force = true;
|
||||
ra->args.nocreds = true;
|
||||
|
||||
/*
|
||||
* Hold inode until release is finished.
|
||||
* From fuse_sync_release() the refcount is 1 and everything's
|
||||
* synchronous, so we are fine with not doing igrab() here.
|
||||
*/
|
||||
ra->inode = sync ? NULL : igrab(&fi->inode);
|
||||
}
|
||||
|
||||
void fuse_file_release(struct inode *inode, struct fuse_file *ff,
|
||||
unsigned int open_flags, fl_owner_t id, bool isdir)
|
||||
{
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
struct fuse_release_args *ra = ff->release_args;
|
||||
struct fuse_release_args *ra = &ff->args->release_args;
|
||||
int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
|
||||
|
||||
fuse_prepare_release(fi, ff, open_flags, opcode);
|
||||
fuse_prepare_release(fi, ff, open_flags, opcode, false);
|
||||
|
||||
if (ff->flock) {
|
||||
if (ra && ff->flock) {
|
||||
ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
|
||||
ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, id);
|
||||
}
|
||||
/* Hold inode until release is finished */
|
||||
ra->inode = igrab(inode);
|
||||
|
||||
/*
|
||||
* Normally this will send the RELEASE request, however if
|
||||
@ -328,7 +356,7 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
|
||||
* synchronous RELEASE is allowed (and desirable) in this case
|
||||
* because the server can be trusted not to screw up.
|
||||
*/
|
||||
fuse_file_put(ff, ff->fm->fc->destroy, isdir);
|
||||
fuse_file_put(ff, ff->fm->fc->destroy);
|
||||
}
|
||||
|
||||
void fuse_release_common(struct file *file, bool isdir)
|
||||
@ -337,11 +365,6 @@ void fuse_release_common(struct file *file, bool isdir)
|
||||
(fl_owner_t) file, isdir);
|
||||
}
|
||||
|
||||
static int fuse_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return fuse_open_common(inode, file, false);
|
||||
}
|
||||
|
||||
static int fuse_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
@ -363,12 +386,8 @@ void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
|
||||
unsigned int flags)
|
||||
{
|
||||
WARN_ON(refcount_read(&ff->count) > 1);
|
||||
fuse_prepare_release(fi, ff, flags, FUSE_RELEASE);
|
||||
/*
|
||||
* iput(NULL) is a no-op and since the refcount is 1 and everything's
|
||||
* synchronous, we are fine with not doing igrab() here"
|
||||
*/
|
||||
fuse_file_put(ff, true, false);
|
||||
fuse_prepare_release(fi, ff, flags, FUSE_RELEASE, true);
|
||||
fuse_file_put(ff, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(fuse_sync_release);
|
||||
|
||||
@ -634,7 +653,8 @@ static void fuse_release_user_pages(struct fuse_args_pages *ap,
|
||||
for (i = 0; i < ap->num_pages; i++) {
|
||||
if (should_dirty)
|
||||
set_page_dirty_lock(ap->pages[i]);
|
||||
put_page(ap->pages[i]);
|
||||
if (ap->args.is_pinned)
|
||||
unpin_user_page(ap->pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -925,7 +945,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
|
||||
put_page(page);
|
||||
}
|
||||
if (ia->ff)
|
||||
fuse_file_put(ia->ff, false, false);
|
||||
fuse_file_put(ia->ff, false);
|
||||
|
||||
fuse_io_free(ia);
|
||||
}
|
||||
@ -1299,13 +1319,93 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
|
||||
return res;
|
||||
}
|
||||
|
||||
static bool fuse_io_past_eof(struct kiocb *iocb, struct iov_iter *iter)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode);
|
||||
}
|
||||
|
||||
/*
|
||||
* @return true if an exclusive lock for direct IO writes is needed
|
||||
*/
|
||||
static bool fuse_dio_wr_exclusive_lock(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct fuse_file *ff = file->private_data;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
|
||||
/* Server side has to advise that it supports parallel dio writes. */
|
||||
if (!(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Append will need to know the eventual EOF - always needs an
|
||||
* exclusive lock.
|
||||
*/
|
||||
if (iocb->ki_flags & IOCB_APPEND)
|
||||
return true;
|
||||
|
||||
/* shared locks are not allowed with parallel page cache IO */
|
||||
if (test_bit(FUSE_I_CACHE_IO_MODE, &fi->state))
|
||||
return false;
|
||||
|
||||
/* Parallel dio beyond EOF is not supported, at least for now. */
|
||||
if (fuse_io_past_eof(iocb, from))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
|
||||
bool *exclusive)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
struct fuse_file *ff = iocb->ki_filp->private_data;
|
||||
|
||||
*exclusive = fuse_dio_wr_exclusive_lock(iocb, from);
|
||||
if (*exclusive) {
|
||||
inode_lock(inode);
|
||||
} else {
|
||||
inode_lock_shared(inode);
|
||||
/*
|
||||
* New parallal dio allowed only if inode is not in caching
|
||||
* mode and denies new opens in caching mode. This check
|
||||
* should be performed only after taking shared inode lock.
|
||||
* Previous past eof check was without inode lock and might
|
||||
* have raced, so check it again.
|
||||
*/
|
||||
if (fuse_io_past_eof(iocb, from) ||
|
||||
fuse_file_uncached_io_start(inode, ff, NULL) != 0) {
|
||||
inode_unlock_shared(inode);
|
||||
inode_lock(inode);
|
||||
*exclusive = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
struct fuse_file *ff = iocb->ki_filp->private_data;
|
||||
|
||||
if (exclusive) {
|
||||
inode_unlock(inode);
|
||||
} else {
|
||||
/* Allow opens in caching mode after last parallel dio end */
|
||||
fuse_file_uncached_io_end(inode, ff);
|
||||
inode_unlock_shared(inode);
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
ssize_t written = 0;
|
||||
struct inode *inode = mapping->host;
|
||||
ssize_t err;
|
||||
ssize_t err, count;
|
||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
|
||||
if (fc->writeback_cache) {
|
||||
@ -1327,10 +1427,12 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
writethrough:
|
||||
inode_lock(inode);
|
||||
|
||||
err = generic_write_checks(iocb, from);
|
||||
err = count = generic_write_checks(iocb, from);
|
||||
if (err <= 0)
|
||||
goto out;
|
||||
|
||||
task_io_account_write(count);
|
||||
|
||||
err = file_remove_privs(file);
|
||||
if (err)
|
||||
goto out;
|
||||
@ -1392,10 +1494,13 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
|
||||
while (nbytes < *nbytesp && ap->num_pages < max_pages) {
|
||||
unsigned npages;
|
||||
size_t start;
|
||||
ret = iov_iter_get_pages2(ii, &ap->pages[ap->num_pages],
|
||||
struct page **pt_pages;
|
||||
|
||||
pt_pages = &ap->pages[ap->num_pages];
|
||||
ret = iov_iter_extract_pages(ii, &pt_pages,
|
||||
*nbytesp - nbytes,
|
||||
max_pages - ap->num_pages,
|
||||
&start);
|
||||
0, &start);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
@ -1412,6 +1517,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
|
||||
(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
|
||||
}
|
||||
|
||||
ap->args.is_pinned = iov_iter_extract_will_pin(ii);
|
||||
ap->args.user_pages = true;
|
||||
if (write)
|
||||
ap->args.in_pages = true;
|
||||
@ -1558,51 +1664,17 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
return res;
|
||||
}
|
||||
|
||||
static bool fuse_direct_write_extending_i_size(struct kiocb *iocb,
|
||||
struct iov_iter *iter)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode);
|
||||
}
|
||||
|
||||
static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct fuse_file *ff = file->private_data;
|
||||
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
|
||||
ssize_t res;
|
||||
bool exclusive_lock =
|
||||
!(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) ||
|
||||
get_fuse_conn(inode)->direct_io_allow_mmap ||
|
||||
iocb->ki_flags & IOCB_APPEND ||
|
||||
fuse_direct_write_extending_i_size(iocb, from);
|
||||
|
||||
/*
|
||||
* Take exclusive lock if
|
||||
* - Parallel direct writes are disabled - a user space decision
|
||||
* - Parallel direct writes are enabled and i_size is being extended.
|
||||
* - Shared mmap on direct_io file is supported (FUSE_DIRECT_IO_ALLOW_MMAP).
|
||||
* This might not be needed at all, but needs further investigation.
|
||||
*/
|
||||
if (exclusive_lock)
|
||||
inode_lock(inode);
|
||||
else {
|
||||
inode_lock_shared(inode);
|
||||
|
||||
/* A race with truncate might have come up as the decision for
|
||||
* the lock type was done without holding the lock, check again.
|
||||
*/
|
||||
if (fuse_direct_write_extending_i_size(iocb, from)) {
|
||||
inode_unlock_shared(inode);
|
||||
inode_lock(inode);
|
||||
exclusive_lock = true;
|
||||
}
|
||||
}
|
||||
bool exclusive;
|
||||
|
||||
fuse_dio_lock(iocb, from, &exclusive);
|
||||
res = generic_write_checks(iocb, from);
|
||||
if (res > 0) {
|
||||
task_io_account_write(res);
|
||||
if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
|
||||
res = fuse_direct_IO(iocb, from);
|
||||
} else {
|
||||
@ -1611,10 +1683,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
fuse_write_update_attr(inode, iocb->ki_pos, res);
|
||||
}
|
||||
}
|
||||
if (exclusive_lock)
|
||||
inode_unlock(inode);
|
||||
else
|
||||
inode_unlock_shared(inode);
|
||||
fuse_dio_unlock(iocb, exclusive);
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -1631,10 +1700,13 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
if (FUSE_IS_DAX(inode))
|
||||
return fuse_dax_read_iter(iocb, to);
|
||||
|
||||
if (!(ff->open_flags & FOPEN_DIRECT_IO))
|
||||
return fuse_cache_read_iter(iocb, to);
|
||||
else
|
||||
/* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
|
||||
if (ff->open_flags & FOPEN_DIRECT_IO)
|
||||
return fuse_direct_read_iter(iocb, to);
|
||||
else if (fuse_file_passthrough(ff))
|
||||
return fuse_passthrough_read_iter(iocb, to);
|
||||
else
|
||||
return fuse_cache_read_iter(iocb, to);
|
||||
}
|
||||
|
||||
static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
@ -1649,10 +1721,38 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
if (FUSE_IS_DAX(inode))
|
||||
return fuse_dax_write_iter(iocb, from);
|
||||
|
||||
if (!(ff->open_flags & FOPEN_DIRECT_IO))
|
||||
return fuse_cache_write_iter(iocb, from);
|
||||
else
|
||||
/* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
|
||||
if (ff->open_flags & FOPEN_DIRECT_IO)
|
||||
return fuse_direct_write_iter(iocb, from);
|
||||
else if (fuse_file_passthrough(ff))
|
||||
return fuse_passthrough_write_iter(iocb, from);
|
||||
else
|
||||
return fuse_cache_write_iter(iocb, from);
|
||||
}
|
||||
|
||||
static ssize_t fuse_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct fuse_file *ff = in->private_data;
|
||||
|
||||
/* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
|
||||
if (fuse_file_passthrough(ff) && !(ff->open_flags & FOPEN_DIRECT_IO))
|
||||
return fuse_passthrough_splice_read(in, ppos, pipe, len, flags);
|
||||
else
|
||||
return filemap_splice_read(in, ppos, pipe, len, flags);
|
||||
}
|
||||
|
||||
static ssize_t fuse_splice_write(struct pipe_inode_info *pipe, struct file *out,
|
||||
loff_t *ppos, size_t len, unsigned int flags)
|
||||
{
|
||||
struct fuse_file *ff = out->private_data;
|
||||
|
||||
/* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
|
||||
if (fuse_file_passthrough(ff) && !(ff->open_flags & FOPEN_DIRECT_IO))
|
||||
return fuse_passthrough_splice_write(pipe, out, ppos, len, flags);
|
||||
else
|
||||
return iter_file_splice_write(pipe, out, ppos, len, flags);
|
||||
}
|
||||
|
||||
static void fuse_writepage_free(struct fuse_writepage_args *wpa)
|
||||
@ -1667,7 +1767,7 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
|
||||
__free_page(ap->pages[i]);
|
||||
|
||||
if (wpa->ia.ff)
|
||||
fuse_file_put(wpa->ia.ff, false, false);
|
||||
fuse_file_put(wpa->ia.ff, false);
|
||||
|
||||
kfree(ap->pages);
|
||||
kfree(wpa);
|
||||
@ -1909,7 +2009,7 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
ff = __fuse_write_file_get(fi);
|
||||
err = fuse_flush_times(inode, ff);
|
||||
if (ff)
|
||||
fuse_file_put(ff, false, false);
|
||||
fuse_file_put(ff, false);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -1947,26 +2047,26 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int fuse_writepage_locked(struct page *page)
|
||||
static int fuse_writepage_locked(struct folio *folio)
|
||||
{
|
||||
struct address_space *mapping = page->mapping;
|
||||
struct address_space *mapping = folio->mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
struct fuse_writepage_args *wpa;
|
||||
struct fuse_args_pages *ap;
|
||||
struct page *tmp_page;
|
||||
struct folio *tmp_folio;
|
||||
int error = -ENOMEM;
|
||||
|
||||
set_page_writeback(page);
|
||||
folio_start_writeback(folio);
|
||||
|
||||
wpa = fuse_writepage_args_alloc();
|
||||
if (!wpa)
|
||||
goto err;
|
||||
ap = &wpa->ia.ap;
|
||||
|
||||
tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
||||
if (!tmp_page)
|
||||
tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0);
|
||||
if (!tmp_folio)
|
||||
goto err_free;
|
||||
|
||||
error = -EIO;
|
||||
@ -1975,21 +2075,21 @@ static int fuse_writepage_locked(struct page *page)
|
||||
goto err_nofile;
|
||||
|
||||
fuse_writepage_add_to_bucket(fc, wpa);
|
||||
fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
|
||||
fuse_write_args_fill(&wpa->ia, wpa->ia.ff, folio_pos(folio), 0);
|
||||
|
||||
copy_highpage(tmp_page, page);
|
||||
folio_copy(tmp_folio, folio);
|
||||
wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
|
||||
wpa->next = NULL;
|
||||
ap->args.in_pages = true;
|
||||
ap->num_pages = 1;
|
||||
ap->pages[0] = tmp_page;
|
||||
ap->pages[0] = &tmp_folio->page;
|
||||
ap->descs[0].offset = 0;
|
||||
ap->descs[0].length = PAGE_SIZE;
|
||||
ap->args.end = fuse_writepage_end;
|
||||
wpa->inode = inode;
|
||||
|
||||
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
|
||||
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
|
||||
node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP);
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
tree_insert(&fi->writepages, wpa);
|
||||
@ -1997,48 +2097,20 @@ static int fuse_writepage_locked(struct page *page)
|
||||
fuse_flush_writepages(inode);
|
||||
spin_unlock(&fi->lock);
|
||||
|
||||
end_page_writeback(page);
|
||||
folio_end_writeback(folio);
|
||||
|
||||
return 0;
|
||||
|
||||
err_nofile:
|
||||
__free_page(tmp_page);
|
||||
folio_put(tmp_folio);
|
||||
err_free:
|
||||
kfree(wpa);
|
||||
err:
|
||||
mapping_set_error(page->mapping, error);
|
||||
end_page_writeback(page);
|
||||
mapping_set_error(folio->mapping, error);
|
||||
folio_end_writeback(folio);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int fuse_writepage(struct page *page, struct writeback_control *wbc)
|
||||
{
|
||||
struct fuse_conn *fc = get_fuse_conn(page->mapping->host);
|
||||
int err;
|
||||
|
||||
if (fuse_page_is_writeback(page->mapping->host, page->index)) {
|
||||
/*
|
||||
* ->writepages() should be called for sync() and friends. We
|
||||
* should only get here on direct reclaim and then we are
|
||||
* allowed to skip a page which is already in flight
|
||||
*/
|
||||
WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
|
||||
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
unlock_page(page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_NONE &&
|
||||
fc->num_background >= fc->congestion_threshold)
|
||||
return AOP_WRITEPAGE_ACTIVATE;
|
||||
|
||||
err = fuse_writepage_locked(page);
|
||||
unlock_page(page);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
struct fuse_fill_wb_data {
|
||||
struct fuse_writepage_args *wpa;
|
||||
struct fuse_file *ff;
|
||||
@ -2307,7 +2379,7 @@ static int fuse_writepages(struct address_space *mapping,
|
||||
fuse_writepages_send(&data);
|
||||
}
|
||||
if (data.ff)
|
||||
fuse_file_put(data.ff, false, false);
|
||||
fuse_file_put(data.ff, false);
|
||||
|
||||
kfree(data.orig_pages);
|
||||
out:
|
||||
@ -2401,7 +2473,7 @@ static int fuse_launder_folio(struct folio *folio)
|
||||
|
||||
/* Serialize with pending writeback for the same page */
|
||||
fuse_wait_on_page_writeback(inode, folio->index);
|
||||
err = fuse_writepage_locked(&folio->page);
|
||||
err = fuse_writepage_locked(folio);
|
||||
if (!err)
|
||||
fuse_wait_on_page_writeback(inode, folio->index);
|
||||
}
|
||||
@ -2462,13 +2534,30 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct fuse_file *ff = file->private_data;
|
||||
struct fuse_conn *fc = ff->fm->fc;
|
||||
struct inode *inode = file_inode(file);
|
||||
int rc;
|
||||
|
||||
/* DAX mmap is superior to direct_io mmap */
|
||||
if (FUSE_IS_DAX(file_inode(file)))
|
||||
if (FUSE_IS_DAX(inode))
|
||||
return fuse_dax_mmap(file, vma);
|
||||
|
||||
/*
|
||||
* If inode is in passthrough io mode, because it has some file open
|
||||
* in passthrough mode, either mmap to backing file or fail mmap,
|
||||
* because mixing cached mmap and passthrough io mode is not allowed.
|
||||
*/
|
||||
if (fuse_file_passthrough(ff))
|
||||
return fuse_passthrough_mmap(file, vma);
|
||||
else if (fuse_inode_backing(get_fuse_inode(inode)))
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* FOPEN_DIRECT_IO handling is special compared to O_DIRECT,
|
||||
* as does not allow MAP_SHARED mmap without FUSE_DIRECT_IO_ALLOW_MMAP.
|
||||
*/
|
||||
if (ff->open_flags & FOPEN_DIRECT_IO) {
|
||||
/* Can't provide the coherency needed for MAP_SHARED
|
||||
/*
|
||||
* Can't provide the coherency needed for MAP_SHARED
|
||||
* if FUSE_DIRECT_IO_ALLOW_MMAP isn't set.
|
||||
*/
|
||||
if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_allow_mmap)
|
||||
@ -2476,9 +2565,21 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
|
||||
invalidate_inode_pages2(file->f_mapping);
|
||||
|
||||
if (!(vma->vm_flags & VM_MAYSHARE)) {
|
||||
/* MAP_PRIVATE */
|
||||
return generic_file_mmap(file, vma);
|
||||
}
|
||||
|
||||
/*
|
||||
* First mmap of direct_io file enters caching inode io mode.
|
||||
* Also waits for parallel dio writers to go into serial mode
|
||||
* (exclusive instead of shared lock).
|
||||
*/
|
||||
rc = fuse_file_cached_io_start(inode, ff);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
|
||||
fuse_link_write_file(file);
|
||||
|
||||
@ -2580,10 +2681,6 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
|
||||
return -ENOLCK;
|
||||
}
|
||||
|
||||
/* Unlock on close is handled by the flush method */
|
||||
if ((fl->c.flc_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
|
||||
return 0;
|
||||
|
||||
fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
|
||||
err = fuse_simple_request(fm, &args);
|
||||
|
||||
@ -3213,8 +3310,8 @@ static const struct file_operations fuse_file_operations = {
|
||||
.lock = fuse_file_lock,
|
||||
.get_unmapped_area = thp_get_unmapped_area,
|
||||
.flock = fuse_file_flock,
|
||||
.splice_read = filemap_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.splice_read = fuse_splice_read,
|
||||
.splice_write = fuse_splice_write,
|
||||
.unlocked_ioctl = fuse_file_ioctl,
|
||||
.compat_ioctl = fuse_file_compat_ioctl,
|
||||
.poll = fuse_file_poll,
|
||||
@ -3225,10 +3322,10 @@ static const struct file_operations fuse_file_operations = {
|
||||
static const struct address_space_operations fuse_file_aops = {
|
||||
.read_folio = fuse_read_folio,
|
||||
.readahead = fuse_readahead,
|
||||
.writepage = fuse_writepage,
|
||||
.writepages = fuse_writepages,
|
||||
.launder_folio = fuse_launder_folio,
|
||||
.dirty_folio = filemap_dirty_folio,
|
||||
.migrate_folio = filemap_migrate_folio,
|
||||
.bmap = fuse_bmap,
|
||||
.direct_IO = fuse_direct_IO,
|
||||
.write_begin = fuse_write_begin,
|
||||
@ -3245,7 +3342,9 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
|
||||
INIT_LIST_HEAD(&fi->write_files);
|
||||
INIT_LIST_HEAD(&fi->queued_writes);
|
||||
fi->writectr = 0;
|
||||
fi->iocachectr = 0;
|
||||
init_waitqueue_head(&fi->page_waitq);
|
||||
init_waitqueue_head(&fi->direct_io_waitq);
|
||||
fi->writepages = RB_ROOT;
|
||||
|
||||
if (IS_ENABLED(CONFIG_FUSE_DAX))
|
||||
|
153
fs/fuse/fuse_i.h
153
fs/fuse/fuse_i.h
@ -76,6 +76,16 @@ struct fuse_submount_lookup {
|
||||
struct fuse_forget_link *forget;
|
||||
};
|
||||
|
||||
/** Container for data related to mapping to backing file */
|
||||
struct fuse_backing {
|
||||
struct file *file;
|
||||
struct cred *cred;
|
||||
|
||||
/** refcount */
|
||||
refcount_t count;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
/** FUSE inode */
|
||||
struct fuse_inode {
|
||||
/** Inode data */
|
||||
@ -111,7 +121,7 @@ struct fuse_inode {
|
||||
u64 attr_version;
|
||||
|
||||
union {
|
||||
/* Write related fields (regular file only) */
|
||||
/* read/write io cache (regular file only) */
|
||||
struct {
|
||||
/* Files usable in writepage. Protected by fi->lock */
|
||||
struct list_head write_files;
|
||||
@ -123,9 +133,15 @@ struct fuse_inode {
|
||||
* (FUSE_NOWRITE) means more writes are blocked */
|
||||
int writectr;
|
||||
|
||||
/** Number of files/maps using page cache */
|
||||
int iocachectr;
|
||||
|
||||
/* Waitq for writepage completion */
|
||||
wait_queue_head_t page_waitq;
|
||||
|
||||
/* waitq for direct-io completion */
|
||||
wait_queue_head_t direct_io_waitq;
|
||||
|
||||
/* List of writepage requestst (pending or sent) */
|
||||
struct rb_root writepages;
|
||||
};
|
||||
@ -173,6 +189,10 @@ struct fuse_inode {
|
||||
#endif
|
||||
/** Submount specific lookup tracking */
|
||||
struct fuse_submount_lookup *submount_lookup;
|
||||
#ifdef CONFIG_FUSE_PASSTHROUGH
|
||||
/** Reference to backing file in passthrough mode */
|
||||
struct fuse_backing *fb;
|
||||
#endif
|
||||
};
|
||||
|
||||
/** FUSE inode state bits */
|
||||
@ -187,19 +207,21 @@ enum {
|
||||
FUSE_I_BAD,
|
||||
/* Has btime */
|
||||
FUSE_I_BTIME,
|
||||
/* Wants or already has page cache IO */
|
||||
FUSE_I_CACHE_IO_MODE,
|
||||
};
|
||||
|
||||
struct fuse_conn;
|
||||
struct fuse_mount;
|
||||
struct fuse_release_args;
|
||||
union fuse_file_args;
|
||||
|
||||
/** FUSE specific file data */
|
||||
struct fuse_file {
|
||||
/** Fuse connection for this file */
|
||||
struct fuse_mount *fm;
|
||||
|
||||
/* Argument space reserved for release */
|
||||
struct fuse_release_args *release_args;
|
||||
/* Argument space reserved for open/release */
|
||||
union fuse_file_args *args;
|
||||
|
||||
/** Kernel file handle guaranteed to be unique */
|
||||
u64 kh;
|
||||
@ -221,12 +243,6 @@ struct fuse_file {
|
||||
|
||||
/* Readdir related */
|
||||
struct {
|
||||
/*
|
||||
* Protects below fields against (crazy) parallel readdir on
|
||||
* same open file. Uncontended in the normal case.
|
||||
*/
|
||||
struct mutex lock;
|
||||
|
||||
/* Dir stream position */
|
||||
loff_t pos;
|
||||
|
||||
@ -244,6 +260,15 @@ struct fuse_file {
|
||||
/** Wait queue head for poll */
|
||||
wait_queue_head_t poll_wait;
|
||||
|
||||
/** Does file hold a fi->iocachectr refcount? */
|
||||
enum { IOM_NONE, IOM_CACHED, IOM_UNCACHED } iomode;
|
||||
|
||||
#ifdef CONFIG_FUSE_PASSTHROUGH
|
||||
/** Reference to backing file in passthrough mode */
|
||||
struct file *passthrough;
|
||||
const struct cred *cred;
|
||||
#endif
|
||||
|
||||
/** Has flock been performed on this file? */
|
||||
bool flock:1;
|
||||
};
|
||||
@ -283,6 +308,7 @@ struct fuse_args {
|
||||
bool page_replace:1;
|
||||
bool may_block:1;
|
||||
bool is_ext:1;
|
||||
bool is_pinned:1;
|
||||
struct fuse_in_arg in_args[3];
|
||||
struct fuse_arg out_args[2];
|
||||
void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
|
||||
@ -295,6 +321,19 @@ struct fuse_args_pages {
|
||||
unsigned int num_pages;
|
||||
};
|
||||
|
||||
struct fuse_release_args {
|
||||
struct fuse_args args;
|
||||
struct fuse_release_in inarg;
|
||||
struct inode *inode;
|
||||
};
|
||||
|
||||
union fuse_file_args {
|
||||
/* Used during open() */
|
||||
struct fuse_open_out open_outarg;
|
||||
/* Used during release() */
|
||||
struct fuse_release_args release_args;
|
||||
};
|
||||
|
||||
#define FUSE_ARGS(args) struct fuse_args args = {}
|
||||
|
||||
/** The request IO state (for asynchronous processing) */
|
||||
@ -818,6 +857,12 @@ struct fuse_conn {
|
||||
/* Is statx not implemented by fs? */
|
||||
unsigned int no_statx:1;
|
||||
|
||||
/** Passthrough support for read/write IO */
|
||||
unsigned int passthrough:1;
|
||||
|
||||
/** Maximum stack depth for passthrough backing files */
|
||||
int max_stack_depth;
|
||||
|
||||
/** The number of requests waiting for completion */
|
||||
atomic_t num_waiting;
|
||||
|
||||
@ -867,6 +912,11 @@ struct fuse_conn {
|
||||
|
||||
/* New writepages go into this bucket */
|
||||
struct fuse_sync_bucket __rcu *curr_bucket;
|
||||
|
||||
#ifdef CONFIG_FUSE_PASSTHROUGH
|
||||
/** IDR for backing files ids */
|
||||
struct idr backing_files_map;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
@ -940,7 +990,6 @@ static inline bool fuse_stale_inode(const struct inode *inode, int generation,
|
||||
|
||||
static inline void fuse_make_bad(struct inode *inode)
|
||||
{
|
||||
remove_inode_hash(inode);
|
||||
set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
|
||||
}
|
||||
|
||||
@ -1032,14 +1081,9 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
|
||||
size_t count, int opcode);
|
||||
|
||||
|
||||
/**
|
||||
* Send OPEN or OPENDIR request
|
||||
*/
|
||||
int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
|
||||
|
||||
struct fuse_file *fuse_file_alloc(struct fuse_mount *fm);
|
||||
struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release);
|
||||
void fuse_file_free(struct fuse_file *ff);
|
||||
void fuse_finish_open(struct inode *inode, struct file *file);
|
||||
int fuse_finish_open(struct inode *inode, struct file *file);
|
||||
|
||||
void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
|
||||
unsigned int flags);
|
||||
@ -1349,11 +1393,82 @@ int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa);
|
||||
int fuse_fileattr_set(struct mnt_idmap *idmap,
|
||||
struct dentry *dentry, struct fileattr *fa);
|
||||
|
||||
/* file.c */
|
||||
/* iomode.c */
|
||||
int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff);
|
||||
int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb);
|
||||
void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff);
|
||||
|
||||
int fuse_file_io_open(struct file *file, struct inode *inode);
|
||||
void fuse_file_io_release(struct fuse_file *ff, struct inode *inode);
|
||||
|
||||
/* file.c */
|
||||
struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
|
||||
unsigned int open_flags, bool isdir);
|
||||
void fuse_file_release(struct inode *inode, struct fuse_file *ff,
|
||||
unsigned int open_flags, fl_owner_t id, bool isdir);
|
||||
|
||||
/* passthrough.c */
|
||||
static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi)
|
||||
{
|
||||
#ifdef CONFIG_FUSE_PASSTHROUGH
|
||||
return READ_ONCE(fi->fb);
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct fuse_backing *fuse_inode_backing_set(struct fuse_inode *fi,
|
||||
struct fuse_backing *fb)
|
||||
{
|
||||
#ifdef CONFIG_FUSE_PASSTHROUGH
|
||||
return xchg(&fi->fb, fb);
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FUSE_PASSTHROUGH
|
||||
struct fuse_backing *fuse_backing_get(struct fuse_backing *fb);
|
||||
void fuse_backing_put(struct fuse_backing *fb);
|
||||
#else
|
||||
|
||||
static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void fuse_backing_put(struct fuse_backing *fb)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
void fuse_backing_files_init(struct fuse_conn *fc);
|
||||
void fuse_backing_files_free(struct fuse_conn *fc);
|
||||
int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map);
|
||||
int fuse_backing_close(struct fuse_conn *fc, int backing_id);
|
||||
|
||||
struct fuse_backing *fuse_passthrough_open(struct file *file,
|
||||
struct inode *inode,
|
||||
int backing_id);
|
||||
void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb);
|
||||
|
||||
static inline struct file *fuse_file_passthrough(struct fuse_file *ff)
|
||||
{
|
||||
#ifdef CONFIG_FUSE_PASSTHROUGH
|
||||
return ff->passthrough;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter);
|
||||
ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *iter);
|
||||
ssize_t fuse_passthrough_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe,
|
||||
size_t len, unsigned int flags);
|
||||
ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe,
|
||||
struct file *out, loff_t *ppos,
|
||||
size_t len, unsigned int flags);
|
||||
ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);
|
||||
|
||||
#endif /* _FS_FUSE_I_H */
|
||||
|
@ -111,6 +111,9 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
|
||||
if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
|
||||
goto out_free_forget;
|
||||
|
||||
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
|
||||
fuse_inode_backing_set(fi, NULL);
|
||||
|
||||
return &fi->inode;
|
||||
|
||||
out_free_forget:
|
||||
@ -129,6 +132,9 @@ static void fuse_free_inode(struct inode *inode)
|
||||
#ifdef CONFIG_FUSE_DAX
|
||||
kfree(fi->dax);
|
||||
#endif
|
||||
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
|
||||
fuse_backing_put(fuse_inode_backing(fi));
|
||||
|
||||
kmem_cache_free(fuse_inode_cachep, fi);
|
||||
}
|
||||
|
||||
@ -469,9 +475,12 @@ retry:
|
||||
} else if (fuse_stale_inode(inode, generation, attr)) {
|
||||
/* nodeid was reused, any I/O on the old inode should fail */
|
||||
fuse_make_bad(inode);
|
||||
if (inode != d_inode(sb->s_root)) {
|
||||
remove_inode_hash(inode);
|
||||
iput(inode);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
fi = get_fuse_inode(inode);
|
||||
spin_lock(&fi->lock);
|
||||
fi->nlookup++;
|
||||
@ -924,6 +933,9 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
|
||||
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
|
||||
fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
|
||||
|
||||
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
|
||||
fuse_backing_files_init(fc);
|
||||
|
||||
INIT_LIST_HEAD(&fc->mounts);
|
||||
list_add(&fm->fc_entry, &fc->mounts);
|
||||
fm->fc = fc;
|
||||
@ -954,6 +966,8 @@ void fuse_conn_put(struct fuse_conn *fc)
|
||||
WARN_ON(atomic_read(&bucket->count) != 1);
|
||||
kfree(bucket);
|
||||
}
|
||||
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
|
||||
fuse_backing_files_free(fc);
|
||||
call_rcu(&fc->rcu, delayed_release);
|
||||
}
|
||||
}
|
||||
@ -974,7 +988,7 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
|
||||
attr.mode = mode;
|
||||
attr.ino = FUSE_ROOT_ID;
|
||||
attr.nlink = 1;
|
||||
return fuse_iget(sb, 1, 0, &attr, 0, 0);
|
||||
return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0);
|
||||
}
|
||||
|
||||
struct fuse_inode_handle {
|
||||
@ -1117,6 +1131,11 @@ static struct dentry *fuse_get_parent(struct dentry *child)
|
||||
return parent;
|
||||
}
|
||||
|
||||
/* only for fid encoding; no support for file handle */
|
||||
static const struct export_operations fuse_export_fid_operations = {
|
||||
.encode_fh = fuse_encode_fh,
|
||||
};
|
||||
|
||||
static const struct export_operations fuse_export_operations = {
|
||||
.fh_to_dentry = fuse_fh_to_dentry,
|
||||
.fh_to_parent = fuse_fh_to_parent,
|
||||
@ -1291,6 +1310,26 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
|
||||
fc->create_supp_group = 1;
|
||||
if (flags & FUSE_DIRECT_IO_ALLOW_MMAP)
|
||||
fc->direct_io_allow_mmap = 1;
|
||||
/*
|
||||
* max_stack_depth is the max stack depth of FUSE fs,
|
||||
* so it has to be at least 1 to support passthrough
|
||||
* to backing files.
|
||||
*
|
||||
* with max_stack_depth > 1, the backing files can be
|
||||
* on a stacked fs (e.g. overlayfs) themselves and with
|
||||
* max_stack_depth == 1, FUSE fs can be stacked as the
|
||||
* underlying fs of a stacked fs (e.g. overlayfs).
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) &&
|
||||
(flags & FUSE_PASSTHROUGH) &&
|
||||
arg->max_stack_depth > 0 &&
|
||||
arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH) {
|
||||
fc->passthrough = 1;
|
||||
fc->max_stack_depth = arg->max_stack_depth;
|
||||
fm->sb->s_stack_depth = arg->max_stack_depth;
|
||||
}
|
||||
if (flags & FUSE_NO_EXPORT_SUPPORT)
|
||||
fm->sb->s_export_op = &fuse_export_fid_operations;
|
||||
} else {
|
||||
ra_pages = fc->max_read / PAGE_SIZE;
|
||||
fc->no_lock = 1;
|
||||
@ -1337,7 +1376,8 @@ void fuse_send_init(struct fuse_mount *fm)
|
||||
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
|
||||
FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
|
||||
FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
|
||||
FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP;
|
||||
FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP |
|
||||
FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND;
|
||||
#ifdef CONFIG_FUSE_DAX
|
||||
if (fm->fc->dax)
|
||||
flags |= FUSE_MAP_ALIGNMENT;
|
||||
@ -1346,6 +1386,8 @@ void fuse_send_init(struct fuse_mount *fm)
|
||||
#endif
|
||||
if (fm->fc->auto_submounts)
|
||||
flags |= FUSE_SUBMOUNTS;
|
||||
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
|
||||
flags |= FUSE_PASSTHROUGH;
|
||||
|
||||
ia->in.flags = flags;
|
||||
ia->in.flags2 = flags >> 32;
|
||||
@ -1496,8 +1538,8 @@ static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
|
||||
.ctimensec = ctime.tv_nsec,
|
||||
.mode = fi->inode.i_mode,
|
||||
.nlink = fi->inode.i_nlink,
|
||||
.uid = fi->inode.i_uid.val,
|
||||
.gid = fi->inode.i_gid.val,
|
||||
.uid = __kuid_val(fi->inode.i_uid),
|
||||
.gid = __kgid_val(fi->inode.i_gid),
|
||||
.rdev = fi->inode.i_rdev,
|
||||
.blksize = 1u << fi->inode.i_blkbits,
|
||||
};
|
||||
@ -1534,6 +1576,7 @@ static int fuse_fill_super_submount(struct super_block *sb,
|
||||
sb->s_bdi = bdi_get(parent_sb->s_bdi);
|
||||
|
||||
sb->s_xattr = parent_sb->s_xattr;
|
||||
sb->s_export_op = parent_sb->s_export_op;
|
||||
sb->s_time_gran = parent_sb->s_time_gran;
|
||||
sb->s_blocksize = parent_sb->s_blocksize;
|
||||
sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
|
||||
|
254
fs/fuse/iomode.c
Normal file
254
fs/fuse/iomode.c
Normal file
@ -0,0 +1,254 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* FUSE inode io modes.
|
||||
*
|
||||
* Copyright (c) 2024 CTERA Networks.
|
||||
*/
|
||||
|
||||
#include "fuse_i.h"
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
/*
|
||||
* Return true if need to wait for new opens in caching mode.
|
||||
*/
|
||||
static inline bool fuse_is_io_cache_wait(struct fuse_inode *fi)
|
||||
{
|
||||
return READ_ONCE(fi->iocachectr) < 0 && !fuse_inode_backing(fi);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start cached io mode.
|
||||
*
|
||||
* Blocks new parallel dio writes and waits for the in-progress parallel dio
|
||||
* writes to complete.
|
||||
*/
|
||||
int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff)
|
||||
{
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
|
||||
/* There are no io modes if server does not implement open */
|
||||
if (!ff->args)
|
||||
return 0;
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
/*
|
||||
* Setting the bit advises new direct-io writes to use an exclusive
|
||||
* lock - without it the wait below might be forever.
|
||||
*/
|
||||
while (fuse_is_io_cache_wait(fi)) {
|
||||
set_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
|
||||
spin_unlock(&fi->lock);
|
||||
wait_event(fi->direct_io_waitq, !fuse_is_io_cache_wait(fi));
|
||||
spin_lock(&fi->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if inode entered passthrough io mode while waiting for parallel
|
||||
* dio write completion.
|
||||
*/
|
||||
if (fuse_inode_backing(fi)) {
|
||||
clear_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
|
||||
spin_unlock(&fi->lock);
|
||||
return -ETXTBSY;
|
||||
}
|
||||
|
||||
WARN_ON(ff->iomode == IOM_UNCACHED);
|
||||
if (ff->iomode == IOM_NONE) {
|
||||
ff->iomode = IOM_CACHED;
|
||||
if (fi->iocachectr == 0)
|
||||
set_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
|
||||
fi->iocachectr++;
|
||||
}
|
||||
spin_unlock(&fi->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff)
|
||||
{
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
WARN_ON(fi->iocachectr <= 0);
|
||||
WARN_ON(ff->iomode != IOM_CACHED);
|
||||
ff->iomode = IOM_NONE;
|
||||
fi->iocachectr--;
|
||||
if (fi->iocachectr == 0)
|
||||
clear_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
|
||||
spin_unlock(&fi->lock);
|
||||
}
|
||||
|
||||
/* Start strictly uncached io mode where cache access is not allowed */
|
||||
int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb)
|
||||
{
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
struct fuse_backing *oldfb;
|
||||
int err = 0;
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
/* deny conflicting backing files on same fuse inode */
|
||||
oldfb = fuse_inode_backing(fi);
|
||||
if (oldfb && oldfb != fb) {
|
||||
err = -EBUSY;
|
||||
goto unlock;
|
||||
}
|
||||
if (fi->iocachectr > 0) {
|
||||
err = -ETXTBSY;
|
||||
goto unlock;
|
||||
}
|
||||
WARN_ON(ff->iomode != IOM_NONE);
|
||||
fi->iocachectr--;
|
||||
ff->iomode = IOM_UNCACHED;
|
||||
|
||||
/* fuse inode holds a single refcount of backing file */
|
||||
if (!oldfb) {
|
||||
oldfb = fuse_inode_backing_set(fi, fb);
|
||||
WARN_ON_ONCE(oldfb != NULL);
|
||||
} else {
|
||||
fuse_backing_put(fb);
|
||||
}
|
||||
unlock:
|
||||
spin_unlock(&fi->lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff)
|
||||
{
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
struct fuse_backing *oldfb = NULL;
|
||||
|
||||
spin_lock(&fi->lock);
|
||||
WARN_ON(fi->iocachectr >= 0);
|
||||
WARN_ON(ff->iomode != IOM_UNCACHED);
|
||||
ff->iomode = IOM_NONE;
|
||||
fi->iocachectr++;
|
||||
if (!fi->iocachectr) {
|
||||
wake_up(&fi->direct_io_waitq);
|
||||
oldfb = fuse_inode_backing_set(fi, NULL);
|
||||
}
|
||||
spin_unlock(&fi->lock);
|
||||
if (oldfb)
|
||||
fuse_backing_put(oldfb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Open flags that are allowed in combination with FOPEN_PASSTHROUGH.
|
||||
* A combination of FOPEN_PASSTHROUGH and FOPEN_DIRECT_IO means that read/write
|
||||
* operations go directly to the server, but mmap is done on the backing file.
|
||||
* FOPEN_PASSTHROUGH mode should not co-exist with any users of the fuse inode
|
||||
* page cache, so FOPEN_KEEP_CACHE is a strange and undesired combination.
|
||||
*/
|
||||
#define FOPEN_PASSTHROUGH_MASK \
|
||||
(FOPEN_PASSTHROUGH | FOPEN_DIRECT_IO | FOPEN_PARALLEL_DIRECT_WRITES | \
|
||||
FOPEN_NOFLUSH)
|
||||
|
||||
static int fuse_file_passthrough_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct fuse_file *ff = file->private_data;
|
||||
struct fuse_conn *fc = get_fuse_conn(inode);
|
||||
struct fuse_backing *fb;
|
||||
int err;
|
||||
|
||||
/* Check allowed conditions for file open in passthrough mode */
|
||||
if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) || !fc->passthrough ||
|
||||
(ff->open_flags & ~FOPEN_PASSTHROUGH_MASK))
|
||||
return -EINVAL;
|
||||
|
||||
fb = fuse_passthrough_open(file, inode,
|
||||
ff->args->open_outarg.backing_id);
|
||||
if (IS_ERR(fb))
|
||||
return PTR_ERR(fb);
|
||||
|
||||
/* First passthrough file open denies caching inode io mode */
|
||||
err = fuse_file_uncached_io_start(inode, ff, fb);
|
||||
if (!err)
|
||||
return 0;
|
||||
|
||||
fuse_passthrough_release(ff, fb);
|
||||
fuse_backing_put(fb);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Request access to submit new io to inode via open file */
|
||||
int fuse_file_io_open(struct file *file, struct inode *inode)
|
||||
{
|
||||
struct fuse_file *ff = file->private_data;
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
int err;
|
||||
|
||||
/*
|
||||
* io modes are not relevant with DAX and with server that does not
|
||||
* implement open.
|
||||
*/
|
||||
if (FUSE_IS_DAX(inode) || !ff->args)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Server is expected to use FOPEN_PASSTHROUGH for all opens of an inode
|
||||
* which is already open for passthrough.
|
||||
*/
|
||||
err = -EINVAL;
|
||||
if (fuse_inode_backing(fi) && !(ff->open_flags & FOPEN_PASSTHROUGH))
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* FOPEN_PARALLEL_DIRECT_WRITES requires FOPEN_DIRECT_IO.
|
||||
*/
|
||||
if (!(ff->open_flags & FOPEN_DIRECT_IO))
|
||||
ff->open_flags &= ~FOPEN_PARALLEL_DIRECT_WRITES;
|
||||
|
||||
/*
|
||||
* First passthrough file open denies caching inode io mode.
|
||||
* First caching file open enters caching inode io mode.
|
||||
*
|
||||
* Note that if user opens a file open with O_DIRECT, but server did
|
||||
* not specify FOPEN_DIRECT_IO, a later fcntl() could remove O_DIRECT,
|
||||
* so we put the inode in caching mode to prevent parallel dio.
|
||||
*/
|
||||
if ((ff->open_flags & FOPEN_DIRECT_IO) &&
|
||||
!(ff->open_flags & FOPEN_PASSTHROUGH))
|
||||
return 0;
|
||||
|
||||
if (ff->open_flags & FOPEN_PASSTHROUGH)
|
||||
err = fuse_file_passthrough_open(inode, file);
|
||||
else
|
||||
err = fuse_file_cached_io_start(inode, ff);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
pr_debug("failed to open file in requested io mode (open_flags=0x%x, err=%i).\n",
|
||||
ff->open_flags, err);
|
||||
/*
|
||||
* The file open mode determines the inode io mode.
|
||||
* Using incorrect open mode is a server mistake, which results in
|
||||
* user visible failure of open() with EIO error.
|
||||
*/
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* No more pending io and no new io possible to inode via open/mmapped file */
|
||||
void fuse_file_io_release(struct fuse_file *ff, struct inode *inode)
|
||||
{
|
||||
/*
|
||||
* Last parallel dio close allows caching inode io mode.
|
||||
* Last caching file close exits caching inode io mode.
|
||||
*/
|
||||
switch (ff->iomode) {
|
||||
case IOM_NONE:
|
||||
/* Nothing to do */
|
||||
break;
|
||||
case IOM_UNCACHED:
|
||||
fuse_file_uncached_io_end(inode, ff);
|
||||
break;
|
||||
case IOM_CACHED:
|
||||
fuse_file_cached_io_end(inode, ff);
|
||||
break;
|
||||
}
|
||||
}
|
355
fs/fuse/passthrough.c
Normal file
355
fs/fuse/passthrough.c
Normal file
@ -0,0 +1,355 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* FUSE passthrough to backing file.
|
||||
*
|
||||
* Copyright (c) 2023 CTERA Networks.
|
||||
*/
|
||||
|
||||
#include "fuse_i.h"
|
||||
|
||||
#include <linux/file.h>
|
||||
#include <linux/backing-file.h>
|
||||
#include <linux/splice.h>
|
||||
|
||||
static void fuse_file_accessed(struct file *file)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
fuse_invalidate_atime(inode);
|
||||
}
|
||||
|
||||
static void fuse_file_modified(struct file *file)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
|
||||
}
|
||||
|
||||
ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct fuse_file *ff = file->private_data;
|
||||
struct file *backing_file = fuse_file_passthrough(ff);
|
||||
size_t count = iov_iter_count(iter);
|
||||
ssize_t ret;
|
||||
struct backing_file_ctx ctx = {
|
||||
.cred = ff->cred,
|
||||
.user_file = file,
|
||||
.accessed = fuse_file_accessed,
|
||||
};
|
||||
|
||||
|
||||
pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu\n", __func__,
|
||||
backing_file, iocb->ki_pos, count);
|
||||
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
ret = backing_file_read_iter(backing_file, iter, iocb, iocb->ki_flags,
|
||||
&ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t fuse_passthrough_write_iter(struct kiocb *iocb,
|
||||
struct iov_iter *iter)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct fuse_file *ff = file->private_data;
|
||||
struct file *backing_file = fuse_file_passthrough(ff);
|
||||
size_t count = iov_iter_count(iter);
|
||||
ssize_t ret;
|
||||
struct backing_file_ctx ctx = {
|
||||
.cred = ff->cred,
|
||||
.user_file = file,
|
||||
.end_write = fuse_file_modified,
|
||||
};
|
||||
|
||||
pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu\n", __func__,
|
||||
backing_file, iocb->ki_pos, count);
|
||||
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
inode_lock(inode);
|
||||
ret = backing_file_write_iter(backing_file, iter, iocb, iocb->ki_flags,
|
||||
&ctx);
|
||||
inode_unlock(inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t fuse_passthrough_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe,
|
||||
size_t len, unsigned int flags)
|
||||
{
|
||||
struct fuse_file *ff = in->private_data;
|
||||
struct file *backing_file = fuse_file_passthrough(ff);
|
||||
struct backing_file_ctx ctx = {
|
||||
.cred = ff->cred,
|
||||
.user_file = in,
|
||||
.accessed = fuse_file_accessed,
|
||||
};
|
||||
|
||||
pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu, flags=0x%x\n", __func__,
|
||||
backing_file, ppos ? *ppos : 0, len, flags);
|
||||
|
||||
return backing_file_splice_read(backing_file, ppos, pipe, len, flags,
|
||||
&ctx);
|
||||
}
|
||||
|
||||
ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe,
|
||||
struct file *out, loff_t *ppos,
|
||||
size_t len, unsigned int flags)
|
||||
{
|
||||
struct fuse_file *ff = out->private_data;
|
||||
struct file *backing_file = fuse_file_passthrough(ff);
|
||||
struct inode *inode = file_inode(out);
|
||||
ssize_t ret;
|
||||
struct backing_file_ctx ctx = {
|
||||
.cred = ff->cred,
|
||||
.user_file = out,
|
||||
.end_write = fuse_file_modified,
|
||||
};
|
||||
|
||||
pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu, flags=0x%x\n", __func__,
|
||||
backing_file, ppos ? *ppos : 0, len, flags);
|
||||
|
||||
inode_lock(inode);
|
||||
ret = backing_file_splice_write(pipe, backing_file, ppos, len, flags,
|
||||
&ctx);
|
||||
inode_unlock(inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct fuse_file *ff = file->private_data;
|
||||
struct file *backing_file = fuse_file_passthrough(ff);
|
||||
struct backing_file_ctx ctx = {
|
||||
.cred = ff->cred,
|
||||
.user_file = file,
|
||||
.accessed = fuse_file_accessed,
|
||||
};
|
||||
|
||||
pr_debug("%s: backing_file=0x%p, start=%lu, end=%lu\n", __func__,
|
||||
backing_file, vma->vm_start, vma->vm_end);
|
||||
|
||||
return backing_file_mmap(backing_file, vma, &ctx);
|
||||
}
|
||||
|
||||
struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
|
||||
{
|
||||
if (fb && refcount_inc_not_zero(&fb->count))
|
||||
return fb;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void fuse_backing_free(struct fuse_backing *fb)
|
||||
{
|
||||
pr_debug("%s: fb=0x%p\n", __func__, fb);
|
||||
|
||||
if (fb->file)
|
||||
fput(fb->file);
|
||||
put_cred(fb->cred);
|
||||
kfree_rcu(fb, rcu);
|
||||
}
|
||||
|
||||
void fuse_backing_put(struct fuse_backing *fb)
|
||||
{
|
||||
if (fb && refcount_dec_and_test(&fb->count))
|
||||
fuse_backing_free(fb);
|
||||
}
|
||||
|
||||
void fuse_backing_files_init(struct fuse_conn *fc)
|
||||
{
|
||||
idr_init(&fc->backing_files_map);
|
||||
}
|
||||
|
||||
static int fuse_backing_id_alloc(struct fuse_conn *fc, struct fuse_backing *fb)
|
||||
{
|
||||
int id;
|
||||
|
||||
idr_preload(GFP_KERNEL);
|
||||
spin_lock(&fc->lock);
|
||||
/* FIXME: xarray might be space inefficient */
|
||||
id = idr_alloc_cyclic(&fc->backing_files_map, fb, 1, 0, GFP_ATOMIC);
|
||||
spin_unlock(&fc->lock);
|
||||
idr_preload_end();
|
||||
|
||||
WARN_ON_ONCE(id == 0);
|
||||
return id;
|
||||
}
|
||||
|
||||
static struct fuse_backing *fuse_backing_id_remove(struct fuse_conn *fc,
|
||||
int id)
|
||||
{
|
||||
struct fuse_backing *fb;
|
||||
|
||||
spin_lock(&fc->lock);
|
||||
fb = idr_remove(&fc->backing_files_map, id);
|
||||
spin_unlock(&fc->lock);
|
||||
|
||||
return fb;
|
||||
}
|
||||
|
||||
static int fuse_backing_id_free(int id, void *p, void *data)
|
||||
{
|
||||
struct fuse_backing *fb = p;
|
||||
|
||||
WARN_ON_ONCE(refcount_read(&fb->count) != 1);
|
||||
fuse_backing_free(fb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void fuse_backing_files_free(struct fuse_conn *fc)
|
||||
{
|
||||
idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
|
||||
idr_destroy(&fc->backing_files_map);
|
||||
}
|
||||
|
||||
int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
|
||||
{
|
||||
struct file *file;
|
||||
struct super_block *backing_sb;
|
||||
struct fuse_backing *fb = NULL;
|
||||
int res;
|
||||
|
||||
pr_debug("%s: fd=%d flags=0x%x\n", __func__, map->fd, map->flags);
|
||||
|
||||
/* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
|
||||
res = -EPERM;
|
||||
if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
|
||||
goto out;
|
||||
|
||||
res = -EINVAL;
|
||||
if (map->flags)
|
||||
goto out;
|
||||
|
||||
file = fget(map->fd);
|
||||
res = -EBADF;
|
||||
if (!file)
|
||||
goto out;
|
||||
|
||||
res = -EOPNOTSUPP;
|
||||
if (!file->f_op->read_iter || !file->f_op->write_iter)
|
||||
goto out_fput;
|
||||
|
||||
backing_sb = file_inode(file)->i_sb;
|
||||
res = -ELOOP;
|
||||
if (backing_sb->s_stack_depth >= fc->max_stack_depth)
|
||||
goto out_fput;
|
||||
|
||||
fb = kmalloc(sizeof(struct fuse_backing), GFP_KERNEL);
|
||||
res = -ENOMEM;
|
||||
if (!fb)
|
||||
goto out_fput;
|
||||
|
||||
fb->file = file;
|
||||
fb->cred = prepare_creds();
|
||||
refcount_set(&fb->count, 1);
|
||||
|
||||
res = fuse_backing_id_alloc(fc, fb);
|
||||
if (res < 0) {
|
||||
fuse_backing_free(fb);
|
||||
fb = NULL;
|
||||
}
|
||||
|
||||
out:
|
||||
pr_debug("%s: fb=0x%p, ret=%i\n", __func__, fb, res);
|
||||
|
||||
return res;
|
||||
|
||||
out_fput:
|
||||
fput(file);
|
||||
goto out;
|
||||
}
|
||||
|
||||
int fuse_backing_close(struct fuse_conn *fc, int backing_id)
|
||||
{
|
||||
struct fuse_backing *fb = NULL;
|
||||
int err;
|
||||
|
||||
pr_debug("%s: backing_id=%d\n", __func__, backing_id);
|
||||
|
||||
/* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
|
||||
err = -EPERM;
|
||||
if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
|
||||
goto out;
|
||||
|
||||
err = -EINVAL;
|
||||
if (backing_id <= 0)
|
||||
goto out;
|
||||
|
||||
err = -ENOENT;
|
||||
fb = fuse_backing_id_remove(fc, backing_id);
|
||||
if (!fb)
|
||||
goto out;
|
||||
|
||||
fuse_backing_put(fb);
|
||||
err = 0;
|
||||
out:
|
||||
pr_debug("%s: fb=0x%p, err=%i\n", __func__, fb, err);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup passthrough to a backing file.
|
||||
*
|
||||
* Returns an fb object with elevated refcount to be stored in fuse inode.
|
||||
*/
|
||||
struct fuse_backing *fuse_passthrough_open(struct file *file,
|
||||
struct inode *inode,
|
||||
int backing_id)
|
||||
{
|
||||
struct fuse_file *ff = file->private_data;
|
||||
struct fuse_conn *fc = ff->fm->fc;
|
||||
struct fuse_backing *fb = NULL;
|
||||
struct file *backing_file;
|
||||
int err;
|
||||
|
||||
err = -EINVAL;
|
||||
if (backing_id <= 0)
|
||||
goto out;
|
||||
|
||||
rcu_read_lock();
|
||||
fb = idr_find(&fc->backing_files_map, backing_id);
|
||||
fb = fuse_backing_get(fb);
|
||||
rcu_read_unlock();
|
||||
|
||||
err = -ENOENT;
|
||||
if (!fb)
|
||||
goto out;
|
||||
|
||||
/* Allocate backing file per fuse file to store fuse path */
|
||||
backing_file = backing_file_open(&file->f_path, file->f_flags,
|
||||
&fb->file->f_path, fb->cred);
|
||||
err = PTR_ERR(backing_file);
|
||||
if (IS_ERR(backing_file)) {
|
||||
fuse_backing_put(fb);
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = 0;
|
||||
ff->passthrough = backing_file;
|
||||
ff->cred = get_cred(fb->cred);
|
||||
out:
|
||||
pr_debug("%s: backing_id=%d, fb=0x%p, backing_file=0x%p, err=%i\n", __func__,
|
||||
backing_id, fb, ff->passthrough, err);
|
||||
|
||||
return err ? ERR_PTR(err) : fb;
|
||||
}
|
||||
|
||||
void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb)
|
||||
{
|
||||
pr_debug("%s: fb=0x%p, backing_file=0x%p\n", __func__,
|
||||
fb, ff->passthrough);
|
||||
|
||||
fput(ff->passthrough);
|
||||
ff->passthrough = NULL;
|
||||
put_cred(ff->cred);
|
||||
ff->cred = NULL;
|
||||
}
|
@ -592,15 +592,11 @@ int fuse_readdir(struct file *file, struct dir_context *ctx)
|
||||
if (fuse_is_bad(inode))
|
||||
return -EIO;
|
||||
|
||||
mutex_lock(&ff->readdir.lock);
|
||||
|
||||
err = UNCACHED;
|
||||
if (ff->open_flags & FOPEN_CACHE_DIR)
|
||||
err = fuse_readdir_cached(file, ctx);
|
||||
if (err == UNCACHED)
|
||||
err = fuse_readdir_uncached(file, ctx);
|
||||
|
||||
mutex_unlock(&ff->readdir.lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -32,6 +32,9 @@
|
||||
static DEFINE_MUTEX(virtio_fs_mutex);
|
||||
static LIST_HEAD(virtio_fs_instances);
|
||||
|
||||
/* The /sys/fs/virtio_fs/ kset */
|
||||
static struct kset *virtio_fs_kset;
|
||||
|
||||
enum {
|
||||
VQ_HIPRIO,
|
||||
VQ_REQUEST
|
||||
@ -56,7 +59,7 @@ struct virtio_fs_vq {
|
||||
|
||||
/* A virtio-fs device instance */
|
||||
struct virtio_fs {
|
||||
struct kref refcount;
|
||||
struct kobject kobj;
|
||||
struct list_head list; /* on virtio_fs_instances */
|
||||
char *tag;
|
||||
struct virtio_fs_vq *vqs;
|
||||
@ -162,18 +165,40 @@ static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
|
||||
complete(&fsvq->in_flight_zero);
|
||||
}
|
||||
|
||||
static void release_virtio_fs_obj(struct kref *ref)
|
||||
static ssize_t tag_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
|
||||
struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj);
|
||||
|
||||
return sysfs_emit(buf, fs->tag);
|
||||
}
|
||||
|
||||
static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag);
|
||||
|
||||
static struct attribute *virtio_fs_attrs[] = {
|
||||
&virtio_fs_tag_attr.attr,
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(virtio_fs);
|
||||
|
||||
static void virtio_fs_ktype_release(struct kobject *kobj)
|
||||
{
|
||||
struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj);
|
||||
|
||||
kfree(vfs->vqs);
|
||||
kfree(vfs);
|
||||
}
|
||||
|
||||
static const struct kobj_type virtio_fs_ktype = {
|
||||
.release = virtio_fs_ktype_release,
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
.default_groups = virtio_fs_groups,
|
||||
};
|
||||
|
||||
/* Make sure virtiofs_mutex is held */
|
||||
static void virtio_fs_put(struct virtio_fs *fs)
|
||||
{
|
||||
kref_put(&fs->refcount, release_virtio_fs_obj);
|
||||
kobject_put(&fs->kobj);
|
||||
}
|
||||
|
||||
static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
|
||||
@ -244,25 +269,46 @@ static void virtio_fs_start_all_queues(struct virtio_fs *fs)
|
||||
}
|
||||
|
||||
/* Add a new instance to the list or return -EEXIST if tag name exists*/
|
||||
static int virtio_fs_add_instance(struct virtio_fs *fs)
|
||||
static int virtio_fs_add_instance(struct virtio_device *vdev,
|
||||
struct virtio_fs *fs)
|
||||
{
|
||||
struct virtio_fs *fs2;
|
||||
bool duplicate = false;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&virtio_fs_mutex);
|
||||
|
||||
list_for_each_entry(fs2, &virtio_fs_instances, list) {
|
||||
if (strcmp(fs->tag, fs2->tag) == 0)
|
||||
duplicate = true;
|
||||
if (strcmp(fs->tag, fs2->tag) == 0) {
|
||||
mutex_unlock(&virtio_fs_mutex);
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
|
||||
/* Use the virtio_device's index as a unique identifier, there is no
|
||||
* need to allocate our own identifiers because the virtio_fs instance
|
||||
* is only visible to userspace as long as the underlying virtio_device
|
||||
* exists.
|
||||
*/
|
||||
fs->kobj.kset = virtio_fs_kset;
|
||||
ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index);
|
||||
if (ret < 0) {
|
||||
mutex_unlock(&virtio_fs_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device");
|
||||
if (ret < 0) {
|
||||
kobject_del(&fs->kobj);
|
||||
mutex_unlock(&virtio_fs_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!duplicate)
|
||||
list_add_tail(&fs->list, &virtio_fs_instances);
|
||||
|
||||
mutex_unlock(&virtio_fs_mutex);
|
||||
|
||||
if (duplicate)
|
||||
return -EEXIST;
|
||||
kobject_uevent(&fs->kobj, KOBJ_ADD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -275,7 +321,7 @@ static struct virtio_fs *virtio_fs_find_instance(const char *tag)
|
||||
|
||||
list_for_each_entry(fs, &virtio_fs_instances, list) {
|
||||
if (strcmp(fs->tag, tag) == 0) {
|
||||
kref_get(&fs->refcount);
|
||||
kobject_get(&fs->kobj);
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
@ -324,6 +370,16 @@ static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
|
||||
return -ENOMEM;
|
||||
memcpy(fs->tag, tag_buf, len);
|
||||
fs->tag[len] = '\0';
|
||||
|
||||
/* While the VIRTIO specification allows any character, newlines are
|
||||
* awkward on mount(8) command-lines and cause problems in the sysfs
|
||||
* "tag" attr and uevent TAG= properties. Forbid them.
|
||||
*/
|
||||
if (strchr(fs->tag, '\n')) {
|
||||
dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -346,7 +402,7 @@ static void virtio_fs_hiprio_done_work(struct work_struct *work)
|
||||
kfree(req);
|
||||
dec_in_flight_req(fsvq);
|
||||
}
|
||||
} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
|
||||
} while (!virtqueue_enable_cb(vq));
|
||||
spin_unlock(&fsvq->lock);
|
||||
}
|
||||
|
||||
@ -628,7 +684,7 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
|
||||
list_move_tail(&req->list, &reqs);
|
||||
spin_unlock(&fpq->lock);
|
||||
}
|
||||
} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
|
||||
} while (!virtqueue_enable_cb(vq));
|
||||
spin_unlock(&fsvq->lock);
|
||||
|
||||
/* End requests */
|
||||
@ -872,7 +928,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
|
||||
fs = kzalloc(sizeof(*fs), GFP_KERNEL);
|
||||
if (!fs)
|
||||
return -ENOMEM;
|
||||
kref_init(&fs->refcount);
|
||||
kobject_init(&fs->kobj, &virtio_fs_ktype);
|
||||
vdev->priv = fs;
|
||||
|
||||
ret = virtio_fs_read_tag(vdev, fs);
|
||||
@ -894,7 +950,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
|
||||
*/
|
||||
virtio_device_ready(vdev);
|
||||
|
||||
ret = virtio_fs_add_instance(fs);
|
||||
ret = virtio_fs_add_instance(vdev, fs);
|
||||
if (ret < 0)
|
||||
goto out_vqs;
|
||||
|
||||
@ -903,11 +959,10 @@ static int virtio_fs_probe(struct virtio_device *vdev)
|
||||
out_vqs:
|
||||
virtio_reset_device(vdev);
|
||||
virtio_fs_cleanup_vqs(vdev);
|
||||
kfree(fs->vqs);
|
||||
|
||||
out:
|
||||
vdev->priv = NULL;
|
||||
kfree(fs);
|
||||
kobject_put(&fs->kobj);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -931,6 +986,8 @@ static void virtio_fs_remove(struct virtio_device *vdev)
|
||||
mutex_lock(&virtio_fs_mutex);
|
||||
/* This device is going away. No one should get new reference */
|
||||
list_del_init(&fs->list);
|
||||
sysfs_remove_link(&fs->kobj, "device");
|
||||
kobject_del(&fs->kobj);
|
||||
virtio_fs_stop_all_queues(fs);
|
||||
virtio_fs_drain_all_queues_locked(fs);
|
||||
virtio_reset_device(vdev);
|
||||
@ -1517,21 +1574,56 @@ static struct file_system_type virtio_fs_type = {
|
||||
.kill_sb = virtio_kill_sb,
|
||||
};
|
||||
|
||||
static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
|
||||
{
|
||||
const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj);
|
||||
|
||||
add_uevent_var(env, "TAG=%s", fs->tag);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct kset_uevent_ops virtio_fs_uevent_ops = {
|
||||
.uevent = virtio_fs_uevent,
|
||||
};
|
||||
|
||||
static int __init virtio_fs_sysfs_init(void)
|
||||
{
|
||||
virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops,
|
||||
fs_kobj);
|
||||
if (!virtio_fs_kset)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void virtio_fs_sysfs_exit(void)
|
||||
{
|
||||
kset_unregister(virtio_fs_kset);
|
||||
virtio_fs_kset = NULL;
|
||||
}
|
||||
|
||||
static int __init virtio_fs_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_virtio_driver(&virtio_fs_driver);
|
||||
ret = virtio_fs_sysfs_init();
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = register_virtio_driver(&virtio_fs_driver);
|
||||
if (ret < 0)
|
||||
goto sysfs_exit;
|
||||
|
||||
ret = register_filesystem(&virtio_fs_type);
|
||||
if (ret < 0) {
|
||||
unregister_virtio_driver(&virtio_fs_driver);
|
||||
return ret;
|
||||
}
|
||||
if (ret < 0)
|
||||
goto unregister_virtio_driver;
|
||||
|
||||
return 0;
|
||||
|
||||
unregister_virtio_driver:
|
||||
unregister_virtio_driver(&virtio_fs_driver);
|
||||
sysfs_exit:
|
||||
virtio_fs_sysfs_exit();
|
||||
return ret;
|
||||
}
|
||||
module_init(virtio_fs_init);
|
||||
|
||||
@ -1539,6 +1631,7 @@ static void __exit virtio_fs_exit(void)
|
||||
{
|
||||
unregister_filesystem(&virtio_fs_type);
|
||||
unregister_virtio_driver(&virtio_fs_driver);
|
||||
virtio_fs_sysfs_exit();
|
||||
}
|
||||
module_exit(virtio_fs_exit);
|
||||
|
||||
|
@ -211,6 +211,12 @@
|
||||
* 7.39
|
||||
* - add FUSE_DIRECT_IO_ALLOW_MMAP
|
||||
* - add FUSE_STATX and related structures
|
||||
*
|
||||
* 7.40
|
||||
* - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag
|
||||
* - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag
|
||||
* - add FUSE_NO_EXPORT_SUPPORT init flag
|
||||
* - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_FUSE_H
|
||||
@ -246,7 +252,7 @@
|
||||
#define FUSE_KERNEL_VERSION 7
|
||||
|
||||
/** Minor version number of this interface */
|
||||
#define FUSE_KERNEL_MINOR_VERSION 39
|
||||
#define FUSE_KERNEL_MINOR_VERSION 40
|
||||
|
||||
/** The node ID of the root inode */
|
||||
#define FUSE_ROOT_ID 1
|
||||
@ -353,6 +359,7 @@ struct fuse_file_lock {
|
||||
* FOPEN_STREAM: the file is stream-like (no file position at all)
|
||||
* FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
|
||||
* FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode
|
||||
* FOPEN_PASSTHROUGH: passthrough read/write io for this open file
|
||||
*/
|
||||
#define FOPEN_DIRECT_IO (1 << 0)
|
||||
#define FOPEN_KEEP_CACHE (1 << 1)
|
||||
@ -361,6 +368,7 @@ struct fuse_file_lock {
|
||||
#define FOPEN_STREAM (1 << 4)
|
||||
#define FOPEN_NOFLUSH (1 << 5)
|
||||
#define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6)
|
||||
#define FOPEN_PASSTHROUGH (1 << 7)
|
||||
|
||||
/**
|
||||
* INIT request/reply flags
|
||||
@ -410,6 +418,9 @@ struct fuse_file_lock {
|
||||
* symlink and mknod (single group that matches parent)
|
||||
* FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
|
||||
* FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode.
|
||||
* FUSE_NO_EXPORT_SUPPORT: explicitly disable export support
|
||||
* FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
|
||||
* of the request ID indicates resend requests
|
||||
*/
|
||||
#define FUSE_ASYNC_READ (1 << 0)
|
||||
#define FUSE_POSIX_LOCKS (1 << 1)
|
||||
@ -449,6 +460,9 @@ struct fuse_file_lock {
|
||||
#define FUSE_CREATE_SUPP_GROUP (1ULL << 34)
|
||||
#define FUSE_HAS_EXPIRE_ONLY (1ULL << 35)
|
||||
#define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36)
|
||||
#define FUSE_PASSTHROUGH (1ULL << 37)
|
||||
#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38)
|
||||
#define FUSE_HAS_RESEND (1ULL << 39)
|
||||
|
||||
/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
|
||||
#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP
|
||||
@ -635,6 +649,7 @@ enum fuse_notify_code {
|
||||
FUSE_NOTIFY_STORE = 4,
|
||||
FUSE_NOTIFY_RETRIEVE = 5,
|
||||
FUSE_NOTIFY_DELETE = 6,
|
||||
FUSE_NOTIFY_RESEND = 7,
|
||||
FUSE_NOTIFY_CODE_MAX,
|
||||
};
|
||||
|
||||
@ -761,7 +776,7 @@ struct fuse_create_in {
|
||||
struct fuse_open_out {
|
||||
uint64_t fh;
|
||||
uint32_t open_flags;
|
||||
uint32_t padding;
|
||||
int32_t backing_id;
|
||||
};
|
||||
|
||||
struct fuse_release_in {
|
||||
@ -877,7 +892,8 @@ struct fuse_init_out {
|
||||
uint16_t max_pages;
|
||||
uint16_t map_alignment;
|
||||
uint32_t flags2;
|
||||
uint32_t unused[7];
|
||||
uint32_t max_stack_depth;
|
||||
uint32_t unused[6];
|
||||
};
|
||||
|
||||
#define CUSE_INIT_INFO_MAX 4096
|
||||
@ -960,6 +976,14 @@ struct fuse_fallocate_in {
|
||||
uint32_t padding;
|
||||
};
|
||||
|
||||
/**
|
||||
* FUSE request unique ID flag
|
||||
*
|
||||
* Indicates whether this is a resend request. The receiver should handle this
|
||||
* request accordingly.
|
||||
*/
|
||||
#define FUSE_UNIQUE_RESEND (1ULL << 63)
|
||||
|
||||
struct fuse_in_header {
|
||||
uint32_t len;
|
||||
uint32_t opcode;
|
||||
@ -1049,9 +1073,18 @@ struct fuse_notify_retrieve_in {
|
||||
uint64_t dummy4;
|
||||
};
|
||||
|
||||
struct fuse_backing_map {
|
||||
int32_t fd;
|
||||
uint32_t flags;
|
||||
uint64_t padding;
|
||||
};
|
||||
|
||||
/* Device ioctls: */
|
||||
#define FUSE_DEV_IOC_MAGIC 229
|
||||
#define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
|
||||
#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \
|
||||
struct fuse_backing_map)
|
||||
#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t)
|
||||
|
||||
struct fuse_lseek_in {
|
||||
uint64_t fh;
|
||||
|
Loading…
Reference in New Issue
Block a user