Merge branch 'work.namespace' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull ipc namespace update from Al Viro:
 "Rik's patches reducing the amount of synchronize_rcu() triggered by
  ipc namespace destruction.

  I've some pending stuff reducing that on the normal umount side, but
  it's nowhere near ready and Rik's stuff shouldn't be held back due to
  conflicts - I'll just redo the parts of my series that stray into
  ipc/*"

* 'work.namespace' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  ipc,namespace: batch free ipc_namespace structures
  ipc,namespace: make ipc namespace allocation wait for pending free
This commit is contained in:
Linus Torvalds 2023-02-24 19:20:07 -08:00
commit 3df88c6a17
5 changed files with 41 additions and 20 deletions

View File

@ -1283,6 +1283,17 @@ struct vfsmount *mntget(struct vfsmount *mnt)
} }
EXPORT_SYMBOL(mntget); EXPORT_SYMBOL(mntget);
/*
* Make a mount point inaccessible to new lookups.
* Because there may still be current users, the caller MUST WAIT
* for an RCU grace period before destroying the mount point.
*/
void mnt_make_shortterm(struct vfsmount *mnt)
{
if (mnt)
real_mount(mnt)->mnt_ns = NULL;
}
/** /**
* path_is_mountpoint() - Check if path is a mount in the current namespace. * path_is_mountpoint() - Check if path is a mount in the current namespace.
* @path: path to check * @path: path to check
@ -4459,8 +4470,8 @@ EXPORT_SYMBOL_GPL(kern_mount);
void kern_unmount(struct vfsmount *mnt) void kern_unmount(struct vfsmount *mnt)
{ {
/* release long term mount so mount point can be released */ /* release long term mount so mount point can be released */
if (!IS_ERR_OR_NULL(mnt)) { if (!IS_ERR(mnt)) {
real_mount(mnt)->mnt_ns = NULL; mnt_make_shortterm(mnt);
synchronize_rcu(); /* yecchhh... */ synchronize_rcu(); /* yecchhh... */
mntput(mnt); mntput(mnt);
} }
@ -4472,8 +4483,7 @@ void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
unsigned int i; unsigned int i;
for (i = 0; i < num; i++) for (i = 0; i < num; i++)
if (mnt[i]) mnt_make_shortterm(mnt[i]);
real_mount(mnt[i])->mnt_ns = NULL;
synchronize_rcu_expedited(); synchronize_rcu_expedited();
for (i = 0; i < num; i++) for (i = 0; i < num; i++)
mntput(mnt[i]); mntput(mnt[i]);

View File

@ -86,6 +86,7 @@ extern void mnt_drop_write(struct vfsmount *mnt);
extern void mnt_drop_write_file(struct file *file); extern void mnt_drop_write_file(struct file *file);
extern void mntput(struct vfsmount *mnt); extern void mntput(struct vfsmount *mnt);
extern struct vfsmount *mntget(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt);
extern void mnt_make_shortterm(struct vfsmount *mnt);
extern struct vfsmount *mnt_clone_internal(const struct path *path); extern struct vfsmount *mnt_clone_internal(const struct path *path);
extern bool __mnt_is_readonly(struct vfsmount *mnt); extern bool __mnt_is_readonly(struct vfsmount *mnt);
extern bool mnt_may_suid(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt);

View File

@ -1709,11 +1709,6 @@ void mq_clear_sbinfo(struct ipc_namespace *ns)
ns->mq_mnt->mnt_sb->s_fs_info = NULL; ns->mq_mnt->mnt_sb->s_fs_info = NULL;
} }
void mq_put_mnt(struct ipc_namespace *ns)
{
kern_unmount(ns->mq_mnt);
}
static int __init init_mqueue_fs(void) static int __init init_mqueue_fs(void)
{ {
int error; int error;

View File

@ -19,6 +19,12 @@
#include "util.h" #include "util.h"
/*
* The work queue is used to avoid the cost of synchronize_rcu in kern_unmount.
*/
static void free_ipc(struct work_struct *unused);
static DECLARE_WORK(free_ipc_work, free_ipc);
static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns) static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns)
{ {
return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES); return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES);
@ -37,9 +43,18 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
int err; int err;
err = -ENOSPC; err = -ENOSPC;
again:
ucounts = inc_ipc_namespaces(user_ns); ucounts = inc_ipc_namespaces(user_ns);
if (!ucounts) if (!ucounts) {
/*
* IPC namespaces are freed asynchronously, by free_ipc_work.
* If frees were pending, flush_work will wait, and
* return true. Fail the allocation if no frees are pending.
*/
if (flush_work(&free_ipc_work))
goto again;
goto fail; goto fail;
}
err = -ENOMEM; err = -ENOMEM;
ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL_ACCOUNT); ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL_ACCOUNT);
@ -130,10 +145,11 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
static void free_ipc_ns(struct ipc_namespace *ns) static void free_ipc_ns(struct ipc_namespace *ns)
{ {
/* mq_put_mnt() waits for a grace period as kern_unmount() /*
* uses synchronize_rcu(). * Caller needs to wait for an RCU grace period to have passed
* after making the mount point inaccessible to new accesses.
*/ */
mq_put_mnt(ns); mntput(ns->mq_mnt);
sem_exit_ns(ns); sem_exit_ns(ns);
msg_exit_ns(ns); msg_exit_ns(ns);
shm_exit_ns(ns); shm_exit_ns(ns);
@ -153,15 +169,16 @@ static void free_ipc(struct work_struct *unused)
struct llist_node *node = llist_del_all(&free_ipc_list); struct llist_node *node = llist_del_all(&free_ipc_list);
struct ipc_namespace *n, *t; struct ipc_namespace *n, *t;
llist_for_each_entry_safe(n, t, node, mnt_llist)
mnt_make_shortterm(n->mq_mnt);
/* Wait for any last users to have gone away. */
synchronize_rcu();
llist_for_each_entry_safe(n, t, node, mnt_llist) llist_for_each_entry_safe(n, t, node, mnt_llist)
free_ipc_ns(n); free_ipc_ns(n);
} }
/*
* The work queue is used to avoid the cost of synchronize_rcu in kern_unmount.
*/
static DECLARE_WORK(free_ipc_work, free_ipc);
/* /*
* put_ipc_ns - drop a reference to an ipc namespace. * put_ipc_ns - drop a reference to an ipc namespace.
* @ns: the namespace to put * @ns: the namespace to put

View File

@ -56,10 +56,8 @@ struct pid_namespace;
#ifdef CONFIG_POSIX_MQUEUE #ifdef CONFIG_POSIX_MQUEUE
extern void mq_clear_sbinfo(struct ipc_namespace *ns); extern void mq_clear_sbinfo(struct ipc_namespace *ns);
extern void mq_put_mnt(struct ipc_namespace *ns);
#else #else
static inline void mq_clear_sbinfo(struct ipc_namespace *ns) { } static inline void mq_clear_sbinfo(struct ipc_namespace *ns) { }
static inline void mq_put_mnt(struct ipc_namespace *ns) { }
#endif #endif
#ifdef CONFIG_SYSVIPC #ifdef CONFIG_SYSVIPC