From 30193fe817d262bd64b9a271534792046f19d7f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 19 Apr 2022 12:44:26 +0200 Subject: [PATCH] manager: prohibit clone3() in seccomp filters RestrictNamespaces should block clone3() like flatpak: https://github.com/flatpak/flatpak/commit/a10f52a7565c549612c92b8e736a6698a53db330 clone3() passes arguments in a structure referenced by a pointer, so we can't filter on the flags as with clone(). Let's disallow the whole function call. --- src/shared/seccomp-util.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 50ad6bc2449..49044a45aec 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -1230,6 +1230,21 @@ int seccomp_restrict_namespaces(unsigned long retain) { if (r < 0) return r; + /* We cannot filter on individual flags to clone3(), and we need to disable the + * syscall altogether. ENOSYS is used instead of EPERM, so that glibc and other + * users shall fall back to clone(), as if on an older kernel. + * + * C.f. https://github.com/flatpak/flatpak/commit/a10f52a7565c549612c92b8e736a6698a53db330, + * https://github.com/moby/moby/issues/42680. */ + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(ENOSYS), + SCMP_SYS(clone3), + 0); + if (r < 0) + log_debug_errno(r, "Failed to add clone3() rule for architecture %s, ignoring: %m", seccomp_arch_to_string(arch)); + if ((retain & NAMESPACE_FLAGS_ALL) == 0) /* If every single kind of namespace shall be prohibited, then let's block the whole setns() syscall * altogether. */