f2a8d52e0a
Add a simple struct nsset. It holds all necessary pieces to switch to a new set of namespaces without leaving a task in a half-switched state which we will make use of in the next patch. This patch switches the existing setns logic over without causing a change in setns() behavior. This brings setns() closer to how unshare() works(). The prepare_ns() function is responsible to prepare all necessary information. This has two reasons. First it minimizes dependencies between individual namespaces, i.e. all install handler can expect that all fields are properly initialized independent in what order they are called in. Second, this makes the code easier to maintain and easier to follow if it needs to be changed. The prepare_ns() helper will only be switched over to use a flags argument in the next patch. Here it will still use nstype as a simple integer argument which was argued would be clearer. I'm not particularly opinionated about this if it really helps or not. The struct nsset itself already contains the flags field since its name already indicates that it can contain information required by different namespaces. None of this should have functional consequences. Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> Reviewed-by: Serge Hallyn <serge@hallyn.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Serge Hallyn <serge@hallyn.com> Cc: Jann Horn <jannh@google.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Aleksa Sarai <cyphar@cyphar.com> Link: https://lore.kernel.org/r/20200505140432.181565-2-christian.brauner@ubuntu.com
158 lines
3.4 KiB
C
158 lines
3.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include "cgroup-internal.h"
|
|
|
|
#include <linux/sched/task.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/proc_ns.h>
|
|
|
|
|
|
/* cgroup namespaces */
|
|
|
|
static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns)
|
|
{
|
|
return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
|
|
}
|
|
|
|
static void dec_cgroup_namespaces(struct ucounts *ucounts)
|
|
{
|
|
dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
|
|
}
|
|
|
|
static struct cgroup_namespace *alloc_cgroup_ns(void)
|
|
{
|
|
struct cgroup_namespace *new_ns;
|
|
int ret;
|
|
|
|
new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
|
|
if (!new_ns)
|
|
return ERR_PTR(-ENOMEM);
|
|
ret = ns_alloc_inum(&new_ns->ns);
|
|
if (ret) {
|
|
kfree(new_ns);
|
|
return ERR_PTR(ret);
|
|
}
|
|
refcount_set(&new_ns->count, 1);
|
|
new_ns->ns.ops = &cgroupns_operations;
|
|
return new_ns;
|
|
}
|
|
|
|
void free_cgroup_ns(struct cgroup_namespace *ns)
|
|
{
|
|
put_css_set(ns->root_cset);
|
|
dec_cgroup_namespaces(ns->ucounts);
|
|
put_user_ns(ns->user_ns);
|
|
ns_free_inum(&ns->ns);
|
|
kfree(ns);
|
|
}
|
|
EXPORT_SYMBOL(free_cgroup_ns);
|
|
|
|
struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
|
|
struct user_namespace *user_ns,
|
|
struct cgroup_namespace *old_ns)
|
|
{
|
|
struct cgroup_namespace *new_ns;
|
|
struct ucounts *ucounts;
|
|
struct css_set *cset;
|
|
|
|
BUG_ON(!old_ns);
|
|
|
|
if (!(flags & CLONE_NEWCGROUP)) {
|
|
get_cgroup_ns(old_ns);
|
|
return old_ns;
|
|
}
|
|
|
|
/* Allow only sysadmin to create cgroup namespace. */
|
|
if (!ns_capable(user_ns, CAP_SYS_ADMIN))
|
|
return ERR_PTR(-EPERM);
|
|
|
|
ucounts = inc_cgroup_namespaces(user_ns);
|
|
if (!ucounts)
|
|
return ERR_PTR(-ENOSPC);
|
|
|
|
/* It is not safe to take cgroup_mutex here */
|
|
spin_lock_irq(&css_set_lock);
|
|
cset = task_css_set(current);
|
|
get_css_set(cset);
|
|
spin_unlock_irq(&css_set_lock);
|
|
|
|
new_ns = alloc_cgroup_ns();
|
|
if (IS_ERR(new_ns)) {
|
|
put_css_set(cset);
|
|
dec_cgroup_namespaces(ucounts);
|
|
return new_ns;
|
|
}
|
|
|
|
new_ns->user_ns = get_user_ns(user_ns);
|
|
new_ns->ucounts = ucounts;
|
|
new_ns->root_cset = cset;
|
|
|
|
return new_ns;
|
|
}
|
|
|
|
static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
|
|
{
|
|
return container_of(ns, struct cgroup_namespace, ns);
|
|
}
|
|
|
|
static int cgroupns_install(struct nsset *nsset, struct ns_common *ns)
|
|
{
|
|
struct nsproxy *nsproxy = nsset->nsproxy;
|
|
struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
|
|
|
|
if (!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN) ||
|
|
!ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
/* Don't need to do anything if we are attaching to our own cgroupns. */
|
|
if (cgroup_ns == nsproxy->cgroup_ns)
|
|
return 0;
|
|
|
|
get_cgroup_ns(cgroup_ns);
|
|
put_cgroup_ns(nsproxy->cgroup_ns);
|
|
nsproxy->cgroup_ns = cgroup_ns;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct ns_common *cgroupns_get(struct task_struct *task)
|
|
{
|
|
struct cgroup_namespace *ns = NULL;
|
|
struct nsproxy *nsproxy;
|
|
|
|
task_lock(task);
|
|
nsproxy = task->nsproxy;
|
|
if (nsproxy) {
|
|
ns = nsproxy->cgroup_ns;
|
|
get_cgroup_ns(ns);
|
|
}
|
|
task_unlock(task);
|
|
|
|
return ns ? &ns->ns : NULL;
|
|
}
|
|
|
|
static void cgroupns_put(struct ns_common *ns)
|
|
{
|
|
put_cgroup_ns(to_cg_ns(ns));
|
|
}
|
|
|
|
static struct user_namespace *cgroupns_owner(struct ns_common *ns)
|
|
{
|
|
return to_cg_ns(ns)->user_ns;
|
|
}
|
|
|
|
const struct proc_ns_operations cgroupns_operations = {
|
|
.name = "cgroup",
|
|
.type = CLONE_NEWCGROUP,
|
|
.get = cgroupns_get,
|
|
.put = cgroupns_put,
|
|
.install = cgroupns_install,
|
|
.owner = cgroupns_owner,
|
|
};
|
|
|
|
static __init int cgroup_namespaces_init(void)
|
|
{
|
|
return 0;
|
|
}
|
|
subsys_initcall(cgroup_namespaces_init);
|