pid namespaces: destroy pid namespace on init's death
Terminate all processes in a namespace when the reaper of the namespace is exiting. We do this by walking the pidmap of the namespace and sending SIGKILL to all processes. Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Acked-by: Pavel Emelyanov <xemul@openvz.org> Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com> Cc: Paul Menage <menage@google.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
0fbc26a6cf
commit
3eb07c8c8a
@ -125,6 +125,7 @@ extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
|
|||||||
|
|
||||||
extern struct pid *alloc_pid(struct pid_namespace *ns);
|
extern struct pid *alloc_pid(struct pid_namespace *ns);
|
||||||
extern void FASTCALL(free_pid(struct pid *pid));
|
extern void FASTCALL(free_pid(struct pid *pid));
|
||||||
|
extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the helpers to get the pid's id seen from different namespaces
|
* the helpers to get the pid's id seen from different namespaces
|
||||||
|
@ -879,7 +879,32 @@ static inline void exit_child_reaper(struct task_struct *tsk)
|
|||||||
if (likely(tsk->group_leader != task_child_reaper(tsk)))
|
if (likely(tsk->group_leader != task_child_reaper(tsk)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
panic("Attempted to kill init!");
|
if (tsk->nsproxy->pid_ns == &init_pid_ns)
|
||||||
|
panic("Attempted to kill init!");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @tsk is the last thread in the 'cgroup-init' and is exiting.
|
||||||
|
* Terminate all remaining processes in the namespace and reap them
|
||||||
|
* before exiting @tsk.
|
||||||
|
*
|
||||||
|
* Note that @tsk (last thread of cgroup-init) may not necessarily
|
||||||
|
* be the child-reaper (i.e main thread of cgroup-init) of the
|
||||||
|
* namespace i.e the child_reaper may have already exited.
|
||||||
|
*
|
||||||
|
* Even after a child_reaper exits, we let it inherit orphaned children,
|
||||||
|
* because, pid_ns->child_reaper remains valid as long as there is
|
||||||
|
* at least one living sub-thread in the cgroup init.
|
||||||
|
|
||||||
|
* This living sub-thread of the cgroup-init will be notified when
|
||||||
|
* a child inherited by the 'child-reaper' exits (do_notify_parent()
|
||||||
|
* uses __group_send_sig_info()). Further, when reaping child processes,
|
||||||
|
* do_wait() iterates over children of all living sub threads.
|
||||||
|
|
||||||
|
* i.e even though 'child_reaper' thread is listed as the parent of the
|
||||||
|
* orphaned children, any living sub-thread in the cgroup-init can
|
||||||
|
* perform the role of the child_reaper.
|
||||||
|
*/
|
||||||
|
zap_pid_ns_processes(tsk->nsproxy->pid_ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
fastcall NORET_TYPE void do_exit(long code)
|
fastcall NORET_TYPE void do_exit(long code)
|
||||||
|
38
kernel/pid.c
38
kernel/pid.c
@ -34,6 +34,7 @@
|
|||||||
#include <linux/hash.h>
|
#include <linux/hash.h>
|
||||||
#include <linux/pid_namespace.h>
|
#include <linux/pid_namespace.h>
|
||||||
#include <linux/init_task.h>
|
#include <linux/init_task.h>
|
||||||
|
#include <linux/syscalls.h>
|
||||||
|
|
||||||
#define pid_hashfn(nr, ns) \
|
#define pid_hashfn(nr, ns) \
|
||||||
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
|
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
|
||||||
@ -567,6 +568,43 @@ void free_pid_ns(struct kref *kref)
|
|||||||
put_pid_ns(parent);
|
put_pid_ns(parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void zap_pid_ns_processes(struct pid_namespace *pid_ns)
|
||||||
|
{
|
||||||
|
int nr;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The last thread in the cgroup-init thread group is terminating.
|
||||||
|
* Find remaining pid_ts in the namespace, signal and wait for them
|
||||||
|
* to exit.
|
||||||
|
*
|
||||||
|
* Note: This signals each threads in the namespace - even those that
|
||||||
|
* belong to the same thread group, To avoid this, we would have
|
||||||
|
* to walk the entire tasklist looking a processes in this
|
||||||
|
* namespace, but that could be unnecessarily expensive if the
|
||||||
|
* pid namespace has just a few processes. Or we need to
|
||||||
|
* maintain a tasklist for each pid namespace.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
read_lock(&tasklist_lock);
|
||||||
|
nr = next_pidmap(pid_ns, 1);
|
||||||
|
while (nr > 0) {
|
||||||
|
kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
|
||||||
|
nr = next_pidmap(pid_ns, nr);
|
||||||
|
}
|
||||||
|
read_unlock(&tasklist_lock);
|
||||||
|
|
||||||
|
do {
|
||||||
|
clear_thread_flag(TIF_SIGPENDING);
|
||||||
|
rc = sys_wait4(-1, NULL, __WALL, NULL);
|
||||||
|
} while (rc != -ECHILD);
|
||||||
|
|
||||||
|
|
||||||
|
/* Child reaper for the pid namespace is going away */
|
||||||
|
pid_ns->child_reaper = NULL;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The pid hash table is scaled according to the amount of memory in the
|
* The pid hash table is scaled according to the amount of memory in the
|
||||||
* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
|
* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
|
||||||
|
Loading…
Reference in New Issue
Block a user