2019-05-20 20:08:01 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
2018-11-04 11:19:03 +03:00
/* Provide a way to create a superblock configuration context within the kernel
* that allows a superblock to be set up prior to mounting .
*
* Copyright ( C ) 2017 Red Hat , Inc . All Rights Reserved .
* Written by David Howells ( dhowells @ redhat . com )
*/
# define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
# include <linux/module.h>
2018-11-04 11:19:03 +03:00
# include <linux/fs_context.h>
2018-11-02 02:07:25 +03:00
# include <linux/fs_parser.h>
2018-11-04 11:19:03 +03:00
# include <linux/fs.h>
# include <linux/mount.h>
# include <linux/nsproxy.h>
# include <linux/slab.h>
# include <linux/magic.h>
# include <linux/security.h>
# include <linux/mnt_namespace.h>
# include <linux/pid_namespace.h>
# include <linux/user_namespace.h>
# include <net/net_namespace.h>
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
# include <asm/sections.h>
2018-11-04 11:19:03 +03:00
# include "mount.h"
# include "internal.h"
2018-11-02 02:07:25 +03:00
enum legacy_fs_param {
LEGACY_FS_UNSET_PARAMS ,
LEGACY_FS_MONOLITHIC_PARAMS ,
LEGACY_FS_INDIVIDUAL_PARAMS ,
} ;
2018-11-04 11:19:03 +03:00
struct legacy_fs_context {
char * legacy_data ; /* Data page for legacy filesystems */
size_t data_size ;
2018-11-02 02:07:25 +03:00
enum legacy_fs_param param_type ;
2018-11-04 11:19:03 +03:00
} ;
static int legacy_init_fs_context ( struct fs_context * fc ) ;
2018-11-02 02:07:25 +03:00
static const struct constant_table common_set_sb_flag [ ] = {
{ " dirsync " , SB_DIRSYNC } ,
{ " lazytime " , SB_LAZYTIME } ,
{ " mand " , SB_MANDLOCK } ,
{ " ro " , SB_RDONLY } ,
{ " sync " , SB_SYNCHRONOUS } ,
2019-12-16 21:45:41 +03:00
{ } ,
2018-11-02 02:07:25 +03:00
} ;
static const struct constant_table common_clear_sb_flag [ ] = {
{ " async " , SB_SYNCHRONOUS } ,
{ " nolazytime " , SB_LAZYTIME } ,
{ " nomand " , SB_MANDLOCK } ,
{ " rw " , SB_RDONLY } ,
2019-12-16 21:45:41 +03:00
{ } ,
2018-11-02 02:07:25 +03:00
} ;
/*
* Check for a common mount option that manipulates s_flags .
*/
static int vfs_parse_sb_flag ( struct fs_context * fc , const char * key )
{
unsigned int token ;
token = lookup_constant ( common_set_sb_flag , key , 0 ) ;
if ( token ) {
fc - > sb_flags | = token ;
fc - > sb_flags_mask | = token ;
return 0 ;
}
token = lookup_constant ( common_clear_sb_flag , key , 0 ) ;
if ( token ) {
fc - > sb_flags & = ~ token ;
fc - > sb_flags_mask | = token ;
return 0 ;
}
return - ENOPARAM ;
}
2021-07-14 16:47:50 +03:00
/**
* vfs_parse_fs_param_source - Handle setting " source " via parameter
* @ fc : The filesystem context to modify
* @ param : The parameter
*
* This is a simple helper for filesystems to verify that the " source " they
* accept is sane .
*
* Returns 0 on success , - ENOPARAM if this is not " source " parameter , and
* - EINVAL otherwise . In the event of failure , supplementary error information
* is logged .
*/
int vfs_parse_fs_param_source ( struct fs_context * fc , struct fs_parameter * param )
{
if ( strcmp ( param - > key , " source " ) ! = 0 )
return - ENOPARAM ;
if ( param - > type ! = fs_value_is_string )
return invalf ( fc , " Non-string source " ) ;
if ( fc - > source )
return invalf ( fc , " Multiple sources " ) ;
fc - > source = param - > string ;
param - > string = NULL ;
return 0 ;
}
EXPORT_SYMBOL ( vfs_parse_fs_param_source ) ;
2018-11-02 02:07:25 +03:00
/**
* vfs_parse_fs_param - Add a single parameter to a superblock config
* @ fc : The filesystem context to modify
* @ param : The parameter
*
* A single mount option in string form is applied to the filesystem context
* being set up . Certain standard options ( for example " ro " ) are translated
* into flag bits without going to the filesystem . The active security module
* is allowed to observe and poach options . Any other options are passed over
* to the filesystem to parse .
*
* This may be called multiple times for a context .
*
* Returns 0 on success and a negative error code on failure . In the event of
* failure , supplementary error information may have been set .
*/
int vfs_parse_fs_param ( struct fs_context * fc , struct fs_parameter * param )
{
int ret ;
if ( ! param - > key )
return invalf ( fc , " Unnamed parameter \n " ) ;
ret = vfs_parse_sb_flag ( fc , param - > key ) ;
if ( ret ! = - ENOPARAM )
return ret ;
ret = security_fs_context_parse_param ( fc , param ) ;
if ( ret ! = - ENOPARAM )
/* Param belongs to the LSM or is disallowed by the LSM; so
* don ' t pass to the FS .
*/
return ret ;
if ( fc - > ops - > parse_param ) {
ret = fc - > ops - > parse_param ( fc , param ) ;
if ( ret ! = - ENOPARAM )
return ret ;
}
/* If the filesystem doesn't take any arguments, give it the
* default handling of source .
*/
2021-07-14 16:47:50 +03:00
ret = vfs_parse_fs_param_source ( fc , param ) ;
if ( ret ! = - ENOPARAM )
return ret ;
2018-11-02 02:07:25 +03:00
return invalf ( fc , " %s: Unknown parameter '%s' " ,
fc - > fs_type - > name , param - > key ) ;
}
EXPORT_SYMBOL ( vfs_parse_fs_param ) ;
/**
* vfs_parse_fs_string - Convenience function to just parse a string .
*/
int vfs_parse_fs_string ( struct fs_context * fc , const char * key ,
const char * value , size_t v_size )
{
int ret ;
struct fs_parameter param = {
. key = key ,
2019-12-17 22:15:04 +03:00
. type = fs_value_is_flag ,
2018-11-02 02:07:25 +03:00
. size = v_size ,
} ;
2019-12-17 22:15:04 +03:00
if ( value ) {
2018-11-02 02:07:25 +03:00
param . string = kmemdup_nul ( value , v_size , GFP_KERNEL ) ;
if ( ! param . string )
return - ENOMEM ;
2019-12-17 22:15:04 +03:00
param . type = fs_value_is_string ;
2018-11-02 02:07:25 +03:00
}
ret = vfs_parse_fs_param ( fc , & param ) ;
kfree ( param . string ) ;
return ret ;
}
EXPORT_SYMBOL ( vfs_parse_fs_string ) ;
/**
* generic_parse_monolithic - Parse key [ = val ] [ , key [ = val ] ] * mount data
* @ ctx : The superblock configuration to fill in .
* @ data : The data to parse
*
* Parse a blob of data that ' s in key [ = val ] [ , key [ = val ] ] * form . This can be
* called from the - > monolithic_mount_data ( ) fs_context operation .
*
* Returns 0 on success or the error returned by the - > parse_option ( ) fs_context
* operation on failure .
*/
int generic_parse_monolithic ( struct fs_context * fc , void * data )
{
char * options = data , * key ;
int ret = 0 ;
if ( ! options )
return 0 ;
ret = security_sb_eat_lsm_opts ( options , & fc - > security ) ;
if ( ret )
return ret ;
while ( ( key = strsep ( & options , " , " ) ) ! = NULL ) {
if ( * key ) {
size_t v_len = 0 ;
char * value = strchr ( key , ' = ' ) ;
if ( value ) {
if ( value = = key )
continue ;
* value + + = 0 ;
v_len = strlen ( value ) ;
}
ret = vfs_parse_fs_string ( fc , key , value , v_len ) ;
if ( ret < 0 )
break ;
}
}
return ret ;
}
EXPORT_SYMBOL ( generic_parse_monolithic ) ;
2018-11-04 11:19:03 +03:00
/**
* alloc_fs_context - Create a filesystem context .
* @ fs_type : The filesystem type .
* @ reference : The dentry from which this one derives ( or NULL )
* @ sb_flags : Filesystem / superblock flags ( SB_ * )
* @ sb_flags_mask : Applicable members of @ sb_flags
* @ purpose : The purpose that this configuration shall be used for .
*
* Open a filesystem and create a mount context . The mount context is
* initialised with the supplied flags and , if a submount / automount from
* another superblock ( referred to by @ reference ) is supplied , may have
* parameters such as namespaces copied across from that superblock .
*/
static struct fs_context * alloc_fs_context ( struct file_system_type * fs_type ,
struct dentry * reference ,
unsigned int sb_flags ,
unsigned int sb_flags_mask ,
enum fs_context_purpose purpose )
{
2018-12-24 02:55:56 +03:00
int ( * init_fs_context ) ( struct fs_context * ) ;
2018-11-04 11:19:03 +03:00
struct fs_context * fc ;
int ret = - ENOMEM ;
memcg: charge fs_context and legacy_fs_context
This patch adds accounting flags to fs_context and legacy_fs_context
allocation sites so that kernel could correctly charge these objects.
We have written a PoC to demonstrate the effect of the missing-charging
bugs. The PoC takes around 1,200MB unaccounted memory, while it is
charged for only 362MB memory usage. We evaluate the PoC on QEMU x86_64
v5.2.90 + Linux kernel v5.10.19 + Debian buster. All the limitations
including ulimits and sysctl variables are set as default. Specifically,
the hard NOFILE limit and nr_open in sysctl are both 1,048,576.
/*------------------------- POC code ----------------------------*/
#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/file.h>
#include <time.h>
#include <sys/wait.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <sched.h>
#include <fcntl.h>
#include <linux/mount.h>
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
#define STACK_SIZE (8 * 1024)
#ifndef __NR_fsopen
#define __NR_fsopen 430
#endif
static inline int fsopen(const char *fs_name, unsigned int flags)
{
return syscall(__NR_fsopen, fs_name, flags);
}
static char thread_stack[512][STACK_SIZE];
int thread_fn(void* arg)
{
for (int i = 0; i< 800000; ++i) {
int fsfd = fsopen("nfs", FSOPEN_CLOEXEC);
if (fsfd == -1) {
errExit("fsopen");
}
}
while(1);
return 0;
}
int main(int argc, char *argv[]) {
int thread_pid;
for (int i = 0; i < 1; ++i) {
thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \
SIGCHLD, NULL);
}
while(1);
return 0;
}
/*-------------------------- end --------------------------------*/
Link: https://lkml.kernel.org/r/1626517201-24086-1-git-send-email-nglaive@gmail.com
Signed-off-by: Yutian Yang <nglaive@gmail.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <shenwenbo@zju.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-09-03 00:55:07 +03:00
fc = kzalloc ( sizeof ( struct fs_context ) , GFP_KERNEL_ACCOUNT ) ;
2018-11-04 11:19:03 +03:00
if ( ! fc )
return ERR_PTR ( - ENOMEM ) ;
fc - > purpose = purpose ;
fc - > sb_flags = sb_flags ;
fc - > sb_flags_mask = sb_flags_mask ;
fc - > fs_type = get_filesystem ( fs_type ) ;
fc - > cred = get_current_cred ( ) ;
fc - > net_ns = get_net ( current - > nsproxy - > net_ns ) ;
2019-12-21 08:16:49 +03:00
fc - > log . prefix = fs_type - > name ;
2018-11-04 11:19:03 +03:00
vfs: syscall: Add fsopen() to prepare for superblock creation
Provide an fsopen() system call that starts the process of preparing to
create a superblock that will then be mountable, using an fd as a context
handle. fsopen() is given the name of the filesystem that will be used:
int mfd = fsopen(const char *fsname, unsigned int flags);
where flags can be 0 or FSOPEN_CLOEXEC.
For example:
sfd = fsopen("ext4", FSOPEN_CLOEXEC);
fsconfig(sfd, FSCONFIG_SET_PATH, "source", "/dev/sda1", AT_FDCWD);
fsconfig(sfd, FSCONFIG_SET_FLAG, "noatime", NULL, 0);
fsconfig(sfd, FSCONFIG_SET_FLAG, "acl", NULL, 0);
fsconfig(sfd, FSCONFIG_SET_FLAG, "user_xattr", NULL, 0);
fsconfig(sfd, FSCONFIG_SET_STRING, "sb", "1", 0);
fsconfig(sfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
fsinfo(sfd, NULL, ...); // query new superblock attributes
mfd = fsmount(sfd, FSMOUNT_CLOEXEC, MS_RELATIME);
move_mount(mfd, "", sfd, AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH);
sfd = fsopen("afs", -1);
fsconfig(fd, FSCONFIG_SET_STRING, "source",
"#grand.central.org:root.cell", 0);
fsconfig(fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
mfd = fsmount(sfd, 0, MS_NODEV);
move_mount(mfd, "", sfd, AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH);
If an error is reported at any step, an error message may be available to be
read() back (ENODATA will be reported if there isn't an error available) in
the form:
"e <subsys>:<problem>"
"e SELinux:Mount on mountpoint not permitted"
Once fsmount() has been called, further fsconfig() calls will incur EBUSY,
even if the fsmount() fails. read() is still possible to retrieve error
information.
The fsopen() syscall creates a mount context and hangs it of the fd that it
returns.
Netlink is not used because it is optional and would make the core VFS
dependent on the networking layer and also potentially add network
namespace issues.
Note that, for the moment, the caller must have SYS_CAP_ADMIN to use
fsopen().
Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-api@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:33:31 +03:00
mutex_init ( & fc - > uapi_mutex ) ;
2018-11-04 11:19:03 +03:00
switch ( purpose ) {
case FS_CONTEXT_FOR_MOUNT :
fc - > user_ns = get_user_ns ( fc - > cred - > user_ns ) ;
break ;
2018-12-24 00:25:31 +03:00
case FS_CONTEXT_FOR_SUBMOUNT :
fc - > user_ns = get_user_ns ( reference - > d_sb - > s_user_ns ) ;
break ;
2018-11-04 17:28:36 +03:00
case FS_CONTEXT_FOR_RECONFIGURE :
atomic_inc ( & reference - > d_sb - > s_active ) ;
vfs: set fs_context::user_ns for reconfigure
fs_context::user_ns is used by fuse_parse_param(), even during remount,
so it needs to be set to the existing value for reconfigure.
Reproducer:
#include <fcntl.h>
#include <sys/mount.h>
int main()
{
char opts[128];
int fd = open("/dev/fuse", O_RDWR);
sprintf(opts, "fd=%d,rootmode=040000,user_id=0,group_id=0", fd);
mkdir("mnt", 0777);
mount("foo", "mnt", "fuse.foo", 0, opts);
mount("foo", "mnt", "fuse.foo", MS_REMOUNT, opts);
}
Crash:
BUG: kernel NULL pointer dereference, address: 0000000000000000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] SMP
CPU: 0 PID: 129 Comm: syz_make_kuid Not tainted 5.3.0-rc5-next-20190821 #3
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-20181126_142135-anatol 04/01/2014
RIP: 0010:map_id_range_down+0xb/0xc0 kernel/user_namespace.c:291
[...]
Call Trace:
map_id_down kernel/user_namespace.c:312 [inline]
make_kuid+0xe/0x10 kernel/user_namespace.c:389
fuse_parse_param+0x116/0x210 fs/fuse/inode.c:523
vfs_parse_fs_param+0xdb/0x1b0 fs/fs_context.c:145
vfs_parse_fs_string+0x6a/0xa0 fs/fs_context.c:188
generic_parse_monolithic+0x85/0xc0 fs/fs_context.c:228
parse_monolithic_mount_data+0x1b/0x20 fs/fs_context.c:708
do_remount fs/namespace.c:2525 [inline]
do_mount+0x39a/0xa60 fs/namespace.c:3107
ksys_mount+0x7d/0xd0 fs/namespace.c:3325
__do_sys_mount fs/namespace.c:3339 [inline]
__se_sys_mount fs/namespace.c:3336 [inline]
__x64_sys_mount+0x20/0x30 fs/namespace.c:3336
do_syscall_64+0x4a/0x1a0 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
Reported-by: syzbot+7d6a57304857423318a5@syzkaller.appspotmail.com
Fixes: 408cbe695350 ("vfs: Convert fuse to use the new mount API")
Cc: David Howells <dhowells@redhat.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2019-08-22 08:16:33 +03:00
fc - > user_ns = get_user_ns ( reference - > d_sb - > s_user_ns ) ;
2018-11-04 17:28:36 +03:00
fc - > root = dget ( reference ) ;
break ;
2018-11-04 11:19:03 +03:00
}
2018-12-24 02:55:56 +03:00
/* TODO: Make all filesystems support this unconditionally */
init_fs_context = fc - > fs_type - > init_fs_context ;
if ( ! init_fs_context )
init_fs_context = legacy_init_fs_context ;
ret = init_fs_context ( fc ) ;
2018-11-04 11:19:03 +03:00
if ( ret < 0 )
goto err_fc ;
fc - > need_free = true ;
return fc ;
err_fc :
put_fs_context ( fc ) ;
return ERR_PTR ( ret ) ;
}
struct fs_context * fs_context_for_mount ( struct file_system_type * fs_type ,
unsigned int sb_flags )
{
return alloc_fs_context ( fs_type , NULL , sb_flags , 0 ,
FS_CONTEXT_FOR_MOUNT ) ;
}
EXPORT_SYMBOL ( fs_context_for_mount ) ;
2018-11-04 17:28:36 +03:00
struct fs_context * fs_context_for_reconfigure ( struct dentry * dentry ,
unsigned int sb_flags ,
unsigned int sb_flags_mask )
{
return alloc_fs_context ( dentry - > d_sb - > s_type , dentry , sb_flags ,
sb_flags_mask , FS_CONTEXT_FOR_RECONFIGURE ) ;
}
EXPORT_SYMBOL ( fs_context_for_reconfigure ) ;
2018-12-24 00:25:31 +03:00
struct fs_context * fs_context_for_submount ( struct file_system_type * type ,
struct dentry * reference )
{
return alloc_fs_context ( type , reference , 0 , 0 , FS_CONTEXT_FOR_SUBMOUNT ) ;
}
EXPORT_SYMBOL ( fs_context_for_submount ) ;
2018-12-20 23:04:50 +03:00
void fc_drop_locked ( struct fs_context * fc )
{
struct super_block * sb = fc - > root - > d_sb ;
dput ( fc - > root ) ;
fc - > root = NULL ;
deactivate_locked_super ( sb ) ;
}
2018-11-04 11:19:03 +03:00
static void legacy_fs_context_free ( struct fs_context * fc ) ;
2018-11-04 17:28:36 +03:00
2018-12-24 00:02:47 +03:00
/**
* vfs_dup_fc_config : Duplicate a filesystem context .
* @ src_fc : The context to copy .
*/
struct fs_context * vfs_dup_fs_context ( struct fs_context * src_fc )
{
struct fs_context * fc ;
int ret ;
if ( ! src_fc - > ops - > dup )
return ERR_PTR ( - EOPNOTSUPP ) ;
fc = kmemdup ( src_fc , sizeof ( struct fs_context ) , GFP_KERNEL ) ;
if ( ! fc )
return ERR_PTR ( - ENOMEM ) ;
vfs: syscall: Add fsopen() to prepare for superblock creation
Provide an fsopen() system call that starts the process of preparing to
create a superblock that will then be mountable, using an fd as a context
handle. fsopen() is given the name of the filesystem that will be used:
int mfd = fsopen(const char *fsname, unsigned int flags);
where flags can be 0 or FSOPEN_CLOEXEC.
For example:
sfd = fsopen("ext4", FSOPEN_CLOEXEC);
fsconfig(sfd, FSCONFIG_SET_PATH, "source", "/dev/sda1", AT_FDCWD);
fsconfig(sfd, FSCONFIG_SET_FLAG, "noatime", NULL, 0);
fsconfig(sfd, FSCONFIG_SET_FLAG, "acl", NULL, 0);
fsconfig(sfd, FSCONFIG_SET_FLAG, "user_xattr", NULL, 0);
fsconfig(sfd, FSCONFIG_SET_STRING, "sb", "1", 0);
fsconfig(sfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
fsinfo(sfd, NULL, ...); // query new superblock attributes
mfd = fsmount(sfd, FSMOUNT_CLOEXEC, MS_RELATIME);
move_mount(mfd, "", sfd, AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH);
sfd = fsopen("afs", -1);
fsconfig(fd, FSCONFIG_SET_STRING, "source",
"#grand.central.org:root.cell", 0);
fsconfig(fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
mfd = fsmount(sfd, 0, MS_NODEV);
move_mount(mfd, "", sfd, AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH);
If an error is reported at any step, an error message may be available to be
read() back (ENODATA will be reported if there isn't an error available) in
the form:
"e <subsys>:<problem>"
"e SELinux:Mount on mountpoint not permitted"
Once fsmount() has been called, further fsconfig() calls will incur EBUSY,
even if the fsmount() fails. read() is still possible to retrieve error
information.
The fsopen() syscall creates a mount context and hangs it of the fd that it
returns.
Netlink is not used because it is optional and would make the core VFS
dependent on the networking layer and also potentially add network
namespace issues.
Note that, for the moment, the caller must have SYS_CAP_ADMIN to use
fsopen().
Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-api@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:33:31 +03:00
mutex_init ( & fc - > uapi_mutex ) ;
2018-12-24 00:02:47 +03:00
fc - > fs_private = NULL ;
fc - > s_fs_info = NULL ;
fc - > source = NULL ;
fc - > security = NULL ;
get_filesystem ( fc - > fs_type ) ;
get_net ( fc - > net_ns ) ;
get_user_ns ( fc - > user_ns ) ;
get_cred ( fc - > cred ) ;
2019-12-21 08:16:49 +03:00
if ( fc - > log . log )
refcount_inc ( & fc - > log . log - > usage ) ;
2018-12-24 00:02:47 +03:00
/* Can't call put until we've called ->dup */
ret = fc - > ops - > dup ( fc , src_fc ) ;
if ( ret < 0 )
goto err_fc ;
ret = security_fs_context_dup ( fc , src_fc ) ;
if ( ret < 0 )
goto err_fc ;
return fc ;
err_fc :
put_fs_context ( fc ) ;
return ERR_PTR ( ret ) ;
}
EXPORT_SYMBOL ( vfs_dup_fs_context ) ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. In the future, these will
be extractable by userspace by reading from an fd created by the fsopen()
syscall.
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
In the future, inside the kernel, formatted messages will be malloc'd but
unformatted messages will not copied if they're either in the core .rodata
section or in the .rodata section of the filesystem module pinned by
fs_context::fs_type. The messages will only be good till the fs_type is
released.
Note that the logging object will be shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:07:26 +03:00
/**
* logfc - Log a message to a filesystem context
* @ fc : The filesystem context to log to .
* @ fmt : The format of the buffer .
*/
2019-12-21 06:10:36 +03:00
void logfc ( struct fc_log * log , const char * prefix , char level , const char * fmt , . . . )
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. In the future, these will
be extractable by userspace by reading from an fd created by the fsopen()
syscall.
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
In the future, inside the kernel, formatted messages will be malloc'd but
unformatted messages will not copied if they're either in the core .rodata
section or in the .rodata section of the filesystem module pinned by
fs_context::fs_type. The messages will only be good till the fs_type is
released.
Note that the logging object will be shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:07:26 +03:00
{
va_list va ;
2019-12-21 06:10:36 +03:00
struct va_format vaf = { . fmt = fmt , . va = & va } ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. In the future, these will
be extractable by userspace by reading from an fd created by the fsopen()
syscall.
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
In the future, inside the kernel, formatted messages will be malloc'd but
unformatted messages will not copied if they're either in the core .rodata
section or in the .rodata section of the filesystem module pinned by
fs_context::fs_type. The messages will only be good till the fs_type is
released.
Note that the logging object will be shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:07:26 +03:00
va_start ( va , fmt ) ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
if ( ! log ) {
2019-12-21 06:10:36 +03:00
switch ( level ) {
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
case ' w ' :
2019-12-21 06:10:36 +03:00
printk ( KERN_WARNING " %s%s%pV \n " , prefix ? prefix : " " ,
prefix ? " : " : " " , & vaf ) ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
break ;
case ' e ' :
2019-12-21 06:10:36 +03:00
printk ( KERN_ERR " %s%s%pV \n " , prefix ? prefix : " " ,
prefix ? " : " : " " , & vaf ) ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
break ;
default :
2019-12-21 06:10:36 +03:00
printk ( KERN_NOTICE " %s%s%pV \n " , prefix ? prefix : " " ,
prefix ? " : " : " " , & vaf ) ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
break ;
}
} else {
unsigned int logsize = ARRAY_SIZE ( log - > buffer ) ;
u8 index ;
2019-12-21 06:10:36 +03:00
char * q = kasprintf ( GFP_KERNEL , " %c %s%s%pV \n " , level ,
prefix ? prefix : " " ,
prefix ? " : " : " " , & vaf ) ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
index = log - > head & ( logsize - 1 ) ;
BUILD_BUG_ON ( sizeof ( log - > head ) ! = sizeof ( u8 ) | |
sizeof ( log - > tail ) ! = sizeof ( u8 ) ) ;
if ( ( u8 ) ( log - > head - log - > tail ) = = logsize ) {
/* The buffer is full, discard the oldest message */
if ( log - > need_free & ( 1 < < index ) )
kfree ( log - > buffer [ index ] ) ;
log - > tail + + ;
}
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. In the future, these will
be extractable by userspace by reading from an fd created by the fsopen()
syscall.
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
In the future, inside the kernel, formatted messages will be malloc'd but
unformatted messages will not copied if they're either in the core .rodata
section or in the .rodata section of the filesystem module pinned by
fs_context::fs_type. The messages will only be good till the fs_type is
released.
Note that the logging object will be shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:07:26 +03:00
2019-12-21 06:10:36 +03:00
log - > buffer [ index ] = q ? q : " OOM: Can't store error string " ;
if ( q )
log - > need_free | = 1 < < index ;
else
log - > need_free & = ~ ( 1 < < index ) ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
log - > head + + ;
}
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. In the future, these will
be extractable by userspace by reading from an fd created by the fsopen()
syscall.
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
In the future, inside the kernel, formatted messages will be malloc'd but
unformatted messages will not copied if they're either in the core .rodata
section or in the .rodata section of the filesystem module pinned by
fs_context::fs_type. The messages will only be good till the fs_type is
released.
Note that the logging object will be shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:07:26 +03:00
va_end ( va ) ;
}
EXPORT_SYMBOL ( logfc ) ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
/*
* Free a logging structure .
*/
static void put_fc_log ( struct fs_context * fc )
{
2019-12-21 08:16:49 +03:00
struct fc_log * log = fc - > log . log ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
int i ;
if ( log ) {
if ( refcount_dec_and_test ( & log - > usage ) ) {
2019-12-21 08:16:49 +03:00
fc - > log . log = NULL ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
for ( i = 0 ; i < = 7 ; i + + )
if ( log - > need_free & ( 1 < < i ) )
kfree ( log - > buffer [ i ] ) ;
kfree ( log ) ;
}
}
}
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. In the future, these will
be extractable by userspace by reading from an fd created by the fsopen()
syscall.
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
In the future, inside the kernel, formatted messages will be malloc'd but
unformatted messages will not copied if they're either in the core .rodata
section or in the .rodata section of the filesystem module pinned by
fs_context::fs_type. The messages will only be good till the fs_type is
released.
Note that the logging object will be shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:07:26 +03:00
2018-11-04 11:19:03 +03:00
/**
* put_fs_context - Dispose of a superblock configuration context .
* @ fc : The context to dispose of .
*/
void put_fs_context ( struct fs_context * fc )
{
struct super_block * sb ;
if ( fc - > root ) {
sb = fc - > root - > d_sb ;
dput ( fc - > root ) ;
fc - > root = NULL ;
deactivate_super ( sb ) ;
}
2018-12-24 02:55:56 +03:00
if ( fc - > need_free & & fc - > ops & & fc - > ops - > free )
fc - > ops - > free ( fc ) ;
2018-11-04 11:19:03 +03:00
security_free_mnt_opts ( & fc - > security ) ;
2018-11-04 17:28:36 +03:00
put_net ( fc - > net_ns ) ;
2018-11-04 11:19:03 +03:00
put_user_ns ( fc - > user_ns ) ;
put_cred ( fc - > cred ) ;
vfs: Implement logging through fs_context
Implement the ability for filesystems to log error, warning and
informational messages through the fs_context. These can be extracted by
userspace by reading from an fd created by fsopen().
Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".
Inside the kernel, formatted messages are malloc'd but unformatted messages
are not copied if they're either in the core .rodata section or in the
.rodata section of the filesystem module pinned by fs_context::fs_type.
The messages are only good till the fs_type is released.
Note that the logging object is shared between duplicated fs_context
structures. This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.
Five logging functions are provided for this:
(1) void logfc(struct fs_context *fc, const char *fmt, ...);
This logs a message into the context. If the buffer is full, the
earliest message is discarded.
(2) void errorf(fc, fmt, ...);
This wraps logfc() to log an error.
(3) void invalf(fc, fmt, ...);
This wraps errorf() and returns -EINVAL for convenience.
(4) void warnf(fc, fmt, ...);
This wraps logfc() to log a warning.
(5) void infof(fc, fmt, ...);
This wraps logfc() to log an informational message.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:34:29 +03:00
put_fc_log ( fc ) ;
2018-11-04 11:19:03 +03:00
put_filesystem ( fc - > fs_type ) ;
kfree ( fc - > source ) ;
kfree ( fc ) ;
}
EXPORT_SYMBOL ( put_fs_context ) ;
/*
* Free the config for a filesystem that doesn ' t support fs_context .
*/
static void legacy_fs_context_free ( struct fs_context * fc )
{
2018-11-02 02:07:25 +03:00
struct legacy_fs_context * ctx = fc - > fs_private ;
if ( ctx ) {
if ( ctx - > param_type = = LEGACY_FS_INDIVIDUAL_PARAMS )
kfree ( ctx - > legacy_data ) ;
kfree ( ctx ) ;
}
}
2018-12-24 00:02:47 +03:00
/*
* Duplicate a legacy config .
*/
static int legacy_fs_context_dup ( struct fs_context * fc , struct fs_context * src_fc )
{
struct legacy_fs_context * ctx ;
struct legacy_fs_context * src_ctx = src_fc - > fs_private ;
ctx = kmemdup ( src_ctx , sizeof ( * src_ctx ) , GFP_KERNEL ) ;
if ( ! ctx )
return - ENOMEM ;
if ( ctx - > param_type = = LEGACY_FS_INDIVIDUAL_PARAMS ) {
ctx - > legacy_data = kmemdup ( src_ctx - > legacy_data ,
src_ctx - > data_size , GFP_KERNEL ) ;
if ( ! ctx - > legacy_data ) {
kfree ( ctx ) ;
return - ENOMEM ;
}
}
fc - > fs_private = ctx ;
return 0 ;
}
2018-11-02 02:07:25 +03:00
/*
* Add a parameter to a legacy config . We build up a comma - separated list of
* options .
*/
static int legacy_parse_param ( struct fs_context * fc , struct fs_parameter * param )
{
struct legacy_fs_context * ctx = fc - > fs_private ;
unsigned int size = ctx - > data_size ;
size_t len = 0 ;
2021-07-14 16:47:50 +03:00
int ret ;
2018-11-02 02:07:25 +03:00
2021-07-14 16:47:50 +03:00
ret = vfs_parse_fs_param_source ( fc , param ) ;
if ( ret ! = - ENOPARAM )
return ret ;
2018-11-02 02:07:25 +03:00
if ( ctx - > param_type = = LEGACY_FS_MONOLITHIC_PARAMS )
return invalf ( fc , " VFS: Legacy: Can't mix monolithic and individual options " ) ;
switch ( param - > type ) {
case fs_value_is_string :
len = 1 + param - > size ;
2020-08-24 01:36:59 +03:00
fallthrough ;
2018-11-02 02:07:25 +03:00
case fs_value_is_flag :
len + = strlen ( param - > key ) ;
break ;
default :
return invalf ( fc , " VFS: Legacy: Parameter type for '%s' not supported " ,
param - > key ) ;
}
2022-01-18 10:06:04 +03:00
if ( size + len + 2 > PAGE_SIZE )
2018-11-02 02:07:25 +03:00
return invalf ( fc , " VFS: Legacy: Cumulative options too large " ) ;
if ( strchr ( param - > key , ' , ' ) | |
( param - > type = = fs_value_is_string & &
memchr ( param - > string , ' , ' , param - > size ) ) )
return invalf ( fc , " VFS: Legacy: Option '%s' contained comma " ,
param - > key ) ;
if ( ! ctx - > legacy_data ) {
ctx - > legacy_data = kmalloc ( PAGE_SIZE , GFP_KERNEL ) ;
if ( ! ctx - > legacy_data )
return - ENOMEM ;
}
2023-06-07 20:28:48 +03:00
if ( size )
ctx - > legacy_data [ size + + ] = ' , ' ;
2018-11-02 02:07:25 +03:00
len = strlen ( param - > key ) ;
memcpy ( ctx - > legacy_data + size , param - > key , len ) ;
size + = len ;
if ( param - > type = = fs_value_is_string ) {
ctx - > legacy_data [ size + + ] = ' = ' ;
memcpy ( ctx - > legacy_data + size , param - > string , param - > size ) ;
size + = param - > size ;
}
ctx - > legacy_data [ size ] = ' \0 ' ;
ctx - > data_size = size ;
ctx - > param_type = LEGACY_FS_INDIVIDUAL_PARAMS ;
return 0 ;
2018-11-04 11:19:03 +03:00
}
/*
* Add monolithic mount data .
*/
static int legacy_parse_monolithic ( struct fs_context * fc , void * data )
{
struct legacy_fs_context * ctx = fc - > fs_private ;
2018-11-02 02:07:25 +03:00
if ( ctx - > param_type ! = LEGACY_FS_UNSET_PARAMS ) {
pr_warn ( " VFS: Can't mix monolithic and individual options \n " ) ;
return - EINVAL ;
}
2018-11-04 11:19:03 +03:00
ctx - > legacy_data = data ;
2018-11-02 02:07:25 +03:00
ctx - > param_type = LEGACY_FS_MONOLITHIC_PARAMS ;
2018-11-04 11:19:03 +03:00
if ( ! ctx - > legacy_data )
return 0 ;
2018-11-02 02:07:25 +03:00
2018-11-04 11:19:03 +03:00
if ( fc - > fs_type - > fs_flags & FS_BINARY_MOUNTDATA )
return 0 ;
return security_sb_eat_lsm_opts ( ctx - > legacy_data , & fc - > security ) ;
}
/*
* Get a mountable root with the legacy mount command .
*/
2018-12-24 02:55:56 +03:00
static int legacy_get_tree ( struct fs_context * fc )
2018-11-04 11:19:03 +03:00
{
struct legacy_fs_context * ctx = fc - > fs_private ;
struct super_block * sb ;
struct dentry * root ;
root = fc - > fs_type - > mount ( fc - > fs_type , fc - > sb_flags ,
fc - > source , ctx - > legacy_data ) ;
if ( IS_ERR ( root ) )
return PTR_ERR ( root ) ;
sb = root - > d_sb ;
BUG_ON ( ! sb ) ;
fc - > root = root ;
return 0 ;
}
2018-11-04 17:28:36 +03:00
/*
* Handle remount .
*/
2018-12-24 02:55:56 +03:00
static int legacy_reconfigure ( struct fs_context * fc )
2018-11-04 17:28:36 +03:00
{
struct legacy_fs_context * ctx = fc - > fs_private ;
struct super_block * sb = fc - > root - > d_sb ;
if ( ! sb - > s_op - > remount_fs )
return 0 ;
return sb - > s_op - > remount_fs ( sb , & fc - > sb_flags ,
ctx ? ctx - > legacy_data : NULL ) ;
}
2018-12-24 02:55:56 +03:00
const struct fs_context_operations legacy_fs_context_ops = {
. free = legacy_fs_context_free ,
2018-12-24 00:02:47 +03:00
. dup = legacy_fs_context_dup ,
2018-11-02 02:07:25 +03:00
. parse_param = legacy_parse_param ,
2018-12-24 02:55:56 +03:00
. parse_monolithic = legacy_parse_monolithic ,
. get_tree = legacy_get_tree ,
. reconfigure = legacy_reconfigure ,
} ;
2018-11-04 11:19:03 +03:00
/*
* Initialise a legacy context for a filesystem that doesn ' t support
* fs_context .
*/
static int legacy_init_fs_context ( struct fs_context * fc )
{
memcg: charge fs_context and legacy_fs_context
This patch adds accounting flags to fs_context and legacy_fs_context
allocation sites so that kernel could correctly charge these objects.
We have written a PoC to demonstrate the effect of the missing-charging
bugs. The PoC takes around 1,200MB unaccounted memory, while it is
charged for only 362MB memory usage. We evaluate the PoC on QEMU x86_64
v5.2.90 + Linux kernel v5.10.19 + Debian buster. All the limitations
including ulimits and sysctl variables are set as default. Specifically,
the hard NOFILE limit and nr_open in sysctl are both 1,048,576.
/*------------------------- POC code ----------------------------*/
#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/file.h>
#include <time.h>
#include <sys/wait.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <sched.h>
#include <fcntl.h>
#include <linux/mount.h>
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
#define STACK_SIZE (8 * 1024)
#ifndef __NR_fsopen
#define __NR_fsopen 430
#endif
static inline int fsopen(const char *fs_name, unsigned int flags)
{
return syscall(__NR_fsopen, fs_name, flags);
}
static char thread_stack[512][STACK_SIZE];
int thread_fn(void* arg)
{
for (int i = 0; i< 800000; ++i) {
int fsfd = fsopen("nfs", FSOPEN_CLOEXEC);
if (fsfd == -1) {
errExit("fsopen");
}
}
while(1);
return 0;
}
int main(int argc, char *argv[]) {
int thread_pid;
for (int i = 0; i < 1; ++i) {
thread_pid = clone(thread_fn, thread_stack[i] + STACK_SIZE, \
SIGCHLD, NULL);
}
while(1);
return 0;
}
/*-------------------------- end --------------------------------*/
Link: https://lkml.kernel.org/r/1626517201-24086-1-git-send-email-nglaive@gmail.com
Signed-off-by: Yutian Yang <nglaive@gmail.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <shenwenbo@zju.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-09-03 00:55:07 +03:00
fc - > fs_private = kzalloc ( sizeof ( struct legacy_fs_context ) , GFP_KERNEL_ACCOUNT ) ;
2018-11-04 11:19:03 +03:00
if ( ! fc - > fs_private )
return - ENOMEM ;
2018-12-24 02:55:56 +03:00
fc - > ops = & legacy_fs_context_ops ;
2018-11-04 11:19:03 +03:00
return 0 ;
}
int parse_monolithic_mount_data ( struct fs_context * fc , void * data )
{
2018-12-24 02:55:56 +03:00
int ( * monolithic_mount_data ) ( struct fs_context * , void * ) ;
2018-11-02 02:07:25 +03:00
2018-12-24 02:55:56 +03:00
monolithic_mount_data = fc - > ops - > parse_monolithic ;
2018-11-02 02:07:25 +03:00
if ( ! monolithic_mount_data )
monolithic_mount_data = generic_parse_monolithic ;
2018-12-24 02:55:56 +03:00
return monolithic_mount_data ( fc , data ) ;
2018-11-04 11:19:03 +03:00
}
vfs: syscall: Add fsconfig() for configuring and managing a context
Add a syscall for configuring a filesystem creation context and triggering
actions upon it, to be used in conjunction with fsopen, fspick and fsmount.
long fsconfig(int fs_fd, unsigned int cmd, const char *key,
const void *value, int aux);
Where fs_fd indicates the context, cmd indicates the action to take, key
indicates the parameter name for parameter-setting actions and, if needed,
value points to a buffer containing the value and aux can give more
information for the value.
The following command IDs are proposed:
(*) FSCONFIG_SET_FLAG: No value is specified. The parameter must be
boolean in nature. The key may be prefixed with "no" to invert the
setting. value must be NULL and aux must be 0.
(*) FSCONFIG_SET_STRING: A string value is specified. The parameter can
be expecting boolean, integer, string or take a path. A conversion to
an appropriate type will be attempted (which may include looking up as
a path). value points to a NUL-terminated string and aux must be 0.
(*) FSCONFIG_SET_BINARY: A binary blob is specified. value points to
the blob and aux indicates its size. The parameter must be expecting
a blob.
(*) FSCONFIG_SET_PATH: A non-empty path is specified. The parameter must
be expecting a path object. value points to a NUL-terminated string
that is the path and aux is a file descriptor at which to start a
relative lookup or AT_FDCWD.
(*) FSCONFIG_SET_PATH_EMPTY: As fsconfig_set_path, but with AT_EMPTY_PATH
implied.
(*) FSCONFIG_SET_FD: An open file descriptor is specified. value must
be NULL and aux indicates the file descriptor.
(*) FSCONFIG_CMD_CREATE: Trigger superblock creation.
(*) FSCONFIG_CMD_RECONFIGURE: Trigger superblock reconfiguration.
For the "set" command IDs, the idea is that the file_system_type will point
to a list of parameters and the types of value that those parameters expect
to take. The core code can then do the parse and argument conversion and
then give the LSM and FS a cooked option or array of options to use.
Source specification is also done the same way same way, using special keys
"source", "source1", "source2", etc..
[!] Note that, for the moment, the key and value are just glued back
together and handed to the filesystem. Every filesystem that uses options
uses match_token() and co. to do this, and this will need to be changed -
but not all at once.
Example usage:
fd = fsopen("ext4", FSOPEN_CLOEXEC);
fsconfig(fd, fsconfig_set_path, "source", "/dev/sda1", AT_FDCWD);
fsconfig(fd, fsconfig_set_path_empty, "journal_path", "", journal_fd);
fsconfig(fd, fsconfig_set_fd, "journal_fd", "", journal_fd);
fsconfig(fd, fsconfig_set_flag, "user_xattr", NULL, 0);
fsconfig(fd, fsconfig_set_flag, "noacl", NULL, 0);
fsconfig(fd, fsconfig_set_string, "sb", "1", 0);
fsconfig(fd, fsconfig_set_string, "errors", "continue", 0);
fsconfig(fd, fsconfig_set_string, "data", "journal", 0);
fsconfig(fd, fsconfig_set_string, "context", "unconfined_u:...", 0);
fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0);
mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC);
or:
fd = fsopen("ext4", FSOPEN_CLOEXEC);
fsconfig(fd, fsconfig_set_string, "source", "/dev/sda1", 0);
fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0);
mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC);
or:
fd = fsopen("afs", FSOPEN_CLOEXEC);
fsconfig(fd, fsconfig_set_string, "source", "#grand.central.org:root.cell", 0);
fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0);
mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC);
or:
fd = fsopen("jffs2", FSOPEN_CLOEXEC);
fsconfig(fd, fsconfig_set_string, "source", "mtd0", 0);
fsconfig(fd, fsconfig_cmd_create, NULL, NULL, 0);
mfd = fsmount(fd, FSMOUNT_CLOEXEC, MS_NOEXEC);
Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-api@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2018-11-02 02:36:09 +03:00
/*
* Clean up a context after performing an action on it and put it into a state
* from where it can be used to reconfigure a superblock .
*
* Note that here we do only the parts that can ' t fail ; the rest is in
* finish_clean_context ( ) below and in between those fs_context is marked
* FS_CONTEXT_AWAITING_RECONF . The reason for splitup is that after
* successful mount or remount we need to report success to userland .
* Trying to do full reinit ( for the sake of possible subsequent remount )
* and failing to allocate memory would ' ve put us into a nasty situation .
* So here we only discard the old state and reinitialization is left
* until we actually try to reconfigure .
*/
void vfs_clean_context ( struct fs_context * fc )
{
if ( fc - > need_free & & fc - > ops & & fc - > ops - > free )
fc - > ops - > free ( fc ) ;
fc - > need_free = false ;
fc - > fs_private = NULL ;
fc - > s_fs_info = NULL ;
fc - > sb_flags = 0 ;
security_free_mnt_opts ( & fc - > security ) ;
kfree ( fc - > source ) ;
fc - > source = NULL ;
fc - > purpose = FS_CONTEXT_FOR_RECONFIGURE ;
fc - > phase = FS_CONTEXT_AWAITING_RECONF ;
}
int finish_clean_context ( struct fs_context * fc )
{
int error ;
if ( fc - > phase ! = FS_CONTEXT_AWAITING_RECONF )
return 0 ;
if ( fc - > fs_type - > init_fs_context )
error = fc - > fs_type - > init_fs_context ( fc ) ;
else
error = legacy_init_fs_context ( fc ) ;
if ( unlikely ( error ) ) {
fc - > phase = FS_CONTEXT_FAILED ;
return error ;
}
fc - > need_free = true ;
fc - > phase = FS_CONTEXT_RECONF_PARAMS ;
return 0 ;
}