From 9e6e543c173460f394ea13c9b2aa572aef1f6833 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Mar 2019 18:59:59 +0100 Subject: [PATCH 1/9] seccomp: add debug messages to seccomp_protect_hostname() --- src/shared/seccomp-util.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index ba3f433106..9a8a4cf0ab 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -1776,16 +1776,20 @@ int seccomp_protect_hostname(void) { SCMP_ACT_ERRNO(EPERM), SCMP_SYS(sethostname), 0); - if (r < 0) + if (r < 0) { + log_debug_errno(r, "Failed to add sethostname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); continue; + } r = seccomp_rule_add_exact( seccomp, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(setdomainname), 0); - if (r < 0) + if (r < 0) { + log_debug_errno(r, "Failed to add setdomainname() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); continue; + } r = seccomp_load(seccomp); if (IN_SET(r, -EPERM, -EACCES)) From 3c27973b13724ede05a06a5d346a569794cda433 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Mar 2019 19:00:28 +0100 Subject: [PATCH 2/9] seccomp: introduce seccomp_restrict_suid_sgid() for blocking chmod() for suid/sgid files --- src/shared/seccomp-util.c | 132 ++++++++++++++++++++++++++++++++++++++ src/shared/seccomp-util.h | 1 + 2 files changed, 133 insertions(+) diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 9a8a4cf0ab..7a179998bd 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -1,12 +1,14 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ #include +#include #include #include #include #include #include #include +#include #include "af-list.h" #include "alloc-util.h" @@ -1800,3 +1802,133 @@ int seccomp_protect_hostname(void) { return 0; } + +int seccomp_restrict_suid_sgid(void) { + uint32_t arch; + int r; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + /* Checks the mode_t parameter of the following system calls: + * + * → chmod() + fchmod() + fchmodat() + * → open() + creat() + openat() + * → mkdir() + mkdirat() + * → mknod() + mknodat() + */ + + for (unsigned bit = 0; bit < 2; bit ++) { + /* Block S_ISUID in the first iteration, S_ISGID in the second */ + mode_t m = bit == 0 ? S_ISUID : S_ISGID; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(chmod), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + break; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(fchmod), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + break; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(fchmodat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + break; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mkdir), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + break; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mkdirat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + break; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mknod), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + break; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(mknodat), + 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + break; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(open), + 2, + SCMP_A1(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT), + SCMP_A2(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + break; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(openat), + 2, + SCMP_A2(SCMP_CMP_MASKED_EQ, O_CREAT, O_CREAT), + SCMP_A3(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + break; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(creat), + 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, m, m)); + if (r < 0) + break; + } + if (r < 0) { + log_debug_errno(r, "Failed to add suid/sgid rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_load(seccomp); + if (IN_SET(r, -EPERM, -EACCES)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to apply suid/sgid restrictions for architecture %s, skipping: %m", seccomp_arch_to_string(arch)); + } + + return 0; +} diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index 477400237b..31c6c211fd 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -86,6 +86,7 @@ int seccomp_restrict_realtime(void); int seccomp_memory_deny_write_execute(void); int seccomp_lock_personality(unsigned long personality); int seccomp_protect_hostname(void); +int seccomp_restrict_suid_sgid(void); extern const uint32_t seccomp_local_archs[]; From 167fc10cb352b04d442c9010dab4f8dc24219749 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Mar 2019 19:01:37 +0100 Subject: [PATCH 3/9] test: add test case for restrict_suid_sgid() --- src/test/test-seccomp.c | 208 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c index 14b37eed2c..8efbecbeff 100644 --- a/src/test/test-seccomp.c +++ b/src/test/test-seccomp.c @@ -19,10 +19,12 @@ #include "nulstr-util.h" #include "process-util.h" #include "raw-clone.h" +#include "rm-rf.h" #include "seccomp-util.h" #include "set.h" #include "string-util.h" #include "tests.h" +#include "tmpfile-util.h" #include "virt.h" #if SCMP_SYS(socket) < 0 || defined(__i386__) || defined(__s390x__) || defined(__s390__) @@ -759,6 +761,211 @@ static void test_lock_personality(void) { assert_se(wait_for_terminate_and_check("lockpersonalityseccomp", pid, WAIT_LOG) == EXIT_SUCCESS); } +static int real_open(const char *path, int flags, mode_t mode) { + /* glibc internally calls openat() when open() is requested. Let's hence define our own wrapper for + * testing purposes that calls the real syscall. */ + + return (int) syscall(SYS_open, path, flags, mode); +} + +static void test_restrict_suid_sgid(void) { + pid_t pid; + + log_info("/* %s */", __func__); + + if (!is_seccomp_available()) { + log_notice("Seccomp not available, skipping %s", __func__); + return; + } + if (geteuid() != 0) { + log_notice("Not root, skipping %s", __func__); + return; + } + + pid = fork(); + assert_se(pid >= 0); + + if (pid == 0) { + char path[] = "/tmp/suidsgidXXXXXX", dir[] = "/tmp/suidsgiddirXXXXXX"; + int fd = -1, k = -1; + const char *z; + + fd = mkostemp_safe(path); + assert_se(fd >= 0); + + assert_se(mkdtemp(dir)); + z = strjoina(dir, "/test"); + + assert_se(chmod(path, 0755 | S_ISUID) >= 0); + assert_se(chmod(path, 0755 | S_ISGID) >= 0); + assert_se(chmod(path, 0755 | S_ISGID | S_ISUID) >= 0); + assert_se(chmod(path, 0755) >= 0); + + assert_se(fchmod(fd, 0755 | S_ISUID) >= 0); + assert_se(fchmod(fd, 0755 | S_ISGID) >= 0); + assert_se(fchmod(fd, 0755 | S_ISGID | S_ISUID) >= 0); + assert_se(fchmod(fd, 0755) >= 0); + + assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISUID, 0) >= 0); + assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID, 0) >= 0); + assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID | S_ISUID, 0) >= 0); + assert_se(fchmodat(AT_FDCWD, path, 0755, 0) >= 0); + + k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = creat(z, 0644 | S_ISUID); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = creat(z, 0644 | S_ISGID); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = creat(z, 0644 | S_ISUID | S_ISGID); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = creat(z, 0644); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + assert_se(mkdir(z, 0755 | S_ISUID) >= 0); + assert_se(rmdir(z) >= 0); + assert_se(mkdir(z, 0755 | S_ISGID) >= 0); + assert_se(rmdir(z) >= 0); + assert_se(mkdir(z, 0755 | S_ISUID | S_ISGID) >= 0); + assert_se(rmdir(z) >= 0); + assert_se(mkdir(z, 0755) >= 0); + assert_se(rmdir(z) >= 0); + + assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID) >= 0); + assert_se(rmdir(z) >= 0); + assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISGID) >= 0); + assert_se(rmdir(z) >= 0); + assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID | S_ISGID) >= 0); + assert_se(rmdir(z) >= 0); + assert_se(mkdirat(AT_FDCWD, z, 0755) >= 0); + assert_se(rmdir(z) >= 0); + + assert_se(mknod(z, S_IFREG | 0755 | S_ISUID, 0) >= 0); + assert_se(unlink(z) >= 0); + assert_se(mknod(z, S_IFREG | 0755 | S_ISGID, 0) >= 0); + assert_se(unlink(z) >= 0); + assert_se(mknod(z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) >= 0); + assert_se(unlink(z) >= 0); + assert_se(mknod(z, S_IFREG | 0755, 0) >= 0); + assert_se(unlink(z) >= 0); + + assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID, 0) >= 0); + assert_se(unlink(z) >= 0); + assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISGID, 0) >= 0); + assert_se(unlink(z) >= 0); + assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) >= 0); + assert_se(unlink(z) >= 0); + assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755, 0) >= 0); + assert_se(unlink(z) >= 0); + + assert_se(seccomp_restrict_suid_sgid() >= 0); + + assert_se(chmod(path, 0775 | S_ISUID) < 0 && errno == EPERM); + assert_se(chmod(path, 0775 | S_ISGID) < 0 && errno == EPERM); + assert_se(chmod(path, 0775 | S_ISGID | S_ISUID) < 0 && errno == EPERM); + assert_se(chmod(path, 0775) >= 0); + + assert_se(fchmod(fd, 0775 | S_ISUID) < 0 && errno == EPERM); + assert_se(fchmod(fd, 0775 | S_ISGID) < 0 && errno == EPERM); + assert_se(fchmod(fd, 0775 | S_ISGID | S_ISUID) < 0 && errno == EPERM); + assert_se(fchmod(fd, 0775) >= 0); + + assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISUID, 0) < 0 && errno == EPERM); + assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID, 0) < 0 && errno == EPERM); + assert_se(fchmodat(AT_FDCWD, path, 0755 | S_ISGID | S_ISUID, 0) < 0 && errno == EPERM); + assert_se(fchmodat(AT_FDCWD, path, 0755, 0) >= 0); + + assert_se(real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID) < 0 && errno == EPERM); + assert_se(real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID) < 0 && errno == EPERM); + assert_se(real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID) < 0 && errno == EPERM); + k = real_open(z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + assert_se(creat(z, 0644 | S_ISUID) < 0 && errno == EPERM); + assert_se(creat(z, 0644 | S_ISGID) < 0 && errno == EPERM); + assert_se(creat(z, 0644 | S_ISUID | S_ISGID) < 0 && errno == EPERM); + k = creat(z, 0644); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + assert_se(openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID) < 0 && errno == EPERM); + assert_se(openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISGID) < 0 && errno == EPERM); + assert_se(openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644 | S_ISUID | S_ISGID) < 0 && errno == EPERM); + k = openat(AT_FDCWD, z, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL, 0644); + k = safe_close(k); + assert_se(unlink(z) >= 0); + + assert_se(mkdir(z, 0755 | S_ISUID) < 0 && errno == EPERM); + assert_se(mkdir(z, 0755 | S_ISGID) < 0 && errno == EPERM); + assert_se(mkdir(z, 0755 | S_ISUID | S_ISGID) < 0 && errno == EPERM); + assert_se(mkdir(z, 0755) >= 0); + assert_se(rmdir(z) >= 0); + + assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID) < 0 && errno == EPERM); + assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISGID) < 0 && errno == EPERM); + assert_se(mkdirat(AT_FDCWD, z, 0755 | S_ISUID | S_ISGID) < 0 && errno == EPERM); + assert_se(mkdirat(AT_FDCWD, z, 0755) >= 0); + assert_se(rmdir(z) >= 0); + + assert_se(mknod(z, S_IFREG | 0755 | S_ISUID, 0) < 0 && errno == EPERM); + assert_se(mknod(z, S_IFREG | 0755 | S_ISGID, 0) < 0 && errno == EPERM); + assert_se(mknod(z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) < 0 && errno == EPERM); + assert_se(mknod(z, S_IFREG | 0755, 0) >= 0); + assert_se(unlink(z) >= 0); + + assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID, 0) < 0 && errno == EPERM); + assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISGID, 0) < 0 && errno == EPERM); + assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755 | S_ISUID | S_ISGID, 0) < 0 && errno == EPERM); + assert_se(mknodat(AT_FDCWD, z, S_IFREG | 0755, 0) >= 0); + assert_se(unlink(z) >= 0); + + assert_se(unlink(path) >= 0); + assert_se(rm_rf(dir, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); + + _exit(EXIT_SUCCESS); + } + + assert_se(wait_for_terminate_and_check("suidsgidseccomp", pid, WAIT_LOG) == EXIT_SUCCESS); +} + int main(int argc, char *argv[]) { test_setup_logging(LOG_DEBUG); @@ -776,6 +983,7 @@ int main(int argc, char *argv[]) { test_restrict_archs(); test_load_syscall_filter_set_raw(); test_lock_personality(); + test_restrict_suid_sgid(); return 0; } From f69567cbe26d09eac9d387c0be0fc32c65a83ada Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Mar 2019 19:09:09 +0100 Subject: [PATCH 4/9] core: expose SUID/SGID restriction as new unit setting RestrictSUIDSGID= --- src/core/dbus-execute.c | 4 ++++ src/core/execute.c | 22 +++++++++++++++++++++ src/core/execute.h | 1 + src/core/load-fragment-gperf.gperf.m4 | 2 ++ src/shared/bus-unit-util.c | 12 +++++------ test/fuzz/fuzz-unit-file/directives.service | 1 + 6 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index d894dbdd7b..653847acad 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -771,6 +771,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("ConfigurationDirectory", "as", NULL, offsetof(ExecContext, directories[EXEC_DIRECTORY_CONFIGURATION].paths), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MemoryDenyWriteExecute", "b", bus_property_get_bool, offsetof(ExecContext, memory_deny_write_execute), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestrictRealtime", "b", bus_property_get_bool, offsetof(ExecContext, restrict_realtime), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("RestrictSUIDSGID", "b", bus_property_get_bool, offsetof(ExecContext, restrict_suid_sgid), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestrictNamespaces", "t", bus_property_get_ulong, offsetof(ExecContext, restrict_namespaces), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("BindPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST), @@ -1128,6 +1129,9 @@ int bus_exec_context_set_transient_property( if (streq(name, "RestrictRealtime")) return bus_set_transient_bool(u, name, &c->restrict_realtime, message, flags, error); + if (streq(name, "RestrictSUIDSGID")) + return bus_set_transient_bool(u, name, &c->restrict_suid_sgid, message, flags, error); + if (streq(name, "DynamicUser")) return bus_set_transient_bool(u, name, &c->dynamic_user, message, flags, error); diff --git a/src/core/execute.c b/src/core/execute.c index a74967c4d3..5e1a74d0bc 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1404,6 +1404,7 @@ static bool context_has_no_new_privileges(const ExecContext *c) { return context_has_address_families(c) || c->memory_deny_write_execute || c->restrict_realtime || + c->restrict_suid_sgid || exec_context_restrict_namespaces_set(c) || c->protect_kernel_tunables || c->protect_kernel_modules || @@ -1509,6 +1510,19 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) { return seccomp_restrict_realtime(); } +static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) { + assert(u); + assert(c); + + if (!c->restrict_suid_sgid) + return 0; + + if (skip_seccomp_unavailable(u, "RestrictSUIDSGID=")) + return 0; + + return seccomp_restrict_suid_sgid(); +} + static int apply_protect_sysctl(const Unit *u, const ExecContext *c) { assert(u); assert(c); @@ -3567,6 +3581,12 @@ static int exec_child( return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m"); } + r = apply_restrict_suid_sgid(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m"); + } + r = apply_restrict_namespaces(unit, context); if (r < 0) { *exit_status = EXIT_SECCOMP; @@ -4218,6 +4238,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { "%sIgnoreSIGPIPE: %s\n" "%sMemoryDenyWriteExecute: %s\n" "%sRestrictRealtime: %s\n" + "%sRestrictSUIDSGID: %s\n" "%sKeyringMode: %s\n" "%sProtectHostname: %s\n", prefix, c->umask, @@ -4237,6 +4258,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->ignore_sigpipe), prefix, yes_no(c->memory_deny_write_execute), prefix, yes_no(c->restrict_realtime), + prefix, yes_no(c->restrict_suid_sgid), prefix, exec_keyring_mode_to_string(c->keyring_mode), prefix, yes_no(c->protect_hostname)); diff --git a/src/core/execute.h b/src/core/execute.h index b612a10e2e..23bf3b546a 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -261,6 +261,7 @@ struct ExecContext { bool memory_deny_write_execute; bool restrict_realtime; + bool restrict_suid_sgid; bool lock_personality; unsigned long personality; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index a2ed7f2abe..73b368e702 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -74,6 +74,7 @@ $1.SystemCallErrorNumber, config_parse_syscall_errno, 0, $1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute) $1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context) $1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime) +$1.RestrictSUIDSGID, config_parse_bool, 0, offsetof($1, exec_context.restrict_suid_sgid) $1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context) $1.LockPersonality, config_parse_bool, 0, offsetof($1, exec_context.lock_personality)', `$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 @@ -82,6 +83,7 @@ $1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CO $1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 +$1.RestrictSUIDSGID, config_parse_warn_compat, DISABLED_CONFIGURATION 0 $1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0 $1.LockPersonality, config_parse_warn_compat, DISABLED_CONFIGURATION, 0') $1.LimitCPU, config_parse_rlimit, RLIMIT_CPU, offsetof($1, exec_context.rlimit) diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index b27227aeb6..b42a9e5c5b 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -746,12 +746,12 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con return bus_append_string(m, field, eq); if (STR_IN_SET(field, - "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate", - "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", - "PrivateMounts", "NoNewPrivileges", "SyslogLevelPrefix", - "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC", - "ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups", - "MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality", "ProtectHostname")) + "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate", "PrivateTmp", + "PrivateDevices", "PrivateNetwork", "PrivateUsers", "PrivateMounts", + "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime", + "DynamicUser", "RemoveIPC", "ProtectKernelTunables", "ProtectKernelModules", + "ProtectControlGroups", "MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality", + "ProtectHostname", "RestrictSUIDSGID")) return bus_append_parse_boolean(m, field, eq); diff --git a/test/fuzz/fuzz-unit-file/directives.service b/test/fuzz/fuzz-unit-file/directives.service index 2fdfd51d60..86e59184d7 100644 --- a/test/fuzz/fuzz-unit-file/directives.service +++ b/test/fuzz/fuzz-unit-file/directives.service @@ -851,6 +851,7 @@ ReserveVT= RestrictAddressFamilies= RestrictNamespaces= RestrictRealtime= +RestrictSUIDSGID= RuntimeDirectory= RuntimeDirectoryMode= RuntimeDirectoryPreserve= From 9d880b70ba5c6ca83c82952f4c90e86e56c7b70c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Mar 2019 19:20:35 +0100 Subject: [PATCH 5/9] analyze: check for RestrictSUIDSGID= in "systemd-analyze security" And let's give it a heigh weight, since it pretty much can be used for bad things only. --- src/analyze/analyze-security.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/analyze/analyze-security.c b/src/analyze/analyze-security.c index a978ed6da8..b1b88855af 100644 --- a/src/analyze/analyze-security.c +++ b/src/analyze/analyze-security.c @@ -75,6 +75,7 @@ struct security_info { uint64_t restrict_namespaces; bool restrict_realtime; + bool restrict_suid_sgid; char *root_directory; char *root_image; @@ -1148,6 +1149,16 @@ static const struct security_assessor security_assessor_table[] = { .assess = assess_bool, .offset = offsetof(struct security_info, restrict_realtime), }, + { + .id = "RestrictSUIDSGID=", + .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictSUIDSGID=", + .description_good = "SUID/SGID file creation by service is restricted", + .description_bad = "Service may create SUID/SGID files", + .weight = 1000, + .range = 1, + .assess = assess_bool, + .offset = offsetof(struct security_info, restrict_suid_sgid), + }, { .id = "RestrictNamespaces=~CLONE_NEWUSER", .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RestrictNamespaces=", @@ -1881,6 +1892,7 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_ { "RestrictAddressFamilies", "(bas)", property_read_restrict_address_families, 0 }, { "RestrictNamespaces", "t", NULL, offsetof(struct security_info, restrict_namespaces) }, { "RestrictRealtime", "b", NULL, offsetof(struct security_info, restrict_realtime) }, + { "RestrictSUIDSGID", "b", NULL, offsetof(struct security_info, restrict_suid_sgid) }, { "RootDirectory", "s", NULL, offsetof(struct security_info, root_directory) }, { "RootImage", "s", NULL, offsetof(struct security_info, root_image) }, { "SupplementaryGroups", "as", NULL, offsetof(struct security_info, supplementary_groups) }, From 7445db6eb70e8d5989f481d0c5a08ace7047ae5b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Mar 2019 19:45:32 +0100 Subject: [PATCH 6/9] man: document the new RestrictSUIDSGID= setting --- docs/TRANSIENT-SETTINGS.md | 1 + man/systemd.exec.xml | 41 +++++++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/docs/TRANSIENT-SETTINGS.md b/docs/TRANSIENT-SETTINGS.md index 343df66754..f081fdb2ce 100644 --- a/docs/TRANSIENT-SETTINGS.md +++ b/docs/TRANSIENT-SETTINGS.md @@ -155,6 +155,7 @@ All execution-related settings are available for transient units. ✓ MemoryDenyWriteExecute= ✓ RestrictNamespaces= ✓ RestrictRealtime= +✓ RestrictSUIDSGID= ✓ RestrictAddressFamilies= ✓ LockPersonality= ✓ LimitCPU= diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index f8c46a2995..46f2d856e0 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -379,18 +379,19 @@ CapabilityBoundingSet=~CAP_B CAP_C NoNewPrivileges= - Takes a boolean argument. If true, ensures that the service process and all its children can - never gain new privileges through execve() (e.g. via setuid or setgid bits, or filesystem - capabilities). This is the simplest and most effective way to ensure that a process and its children can never - elevate privileges again. Defaults to false, but certain settings override this and ignore the value of this - setting. This is the case when SystemCallFilter=, - SystemCallArchitectures=, RestrictAddressFamilies=, - RestrictNamespaces=, PrivateDevices=, - ProtectKernelTunables=, ProtectKernelModules=, - MemoryDenyWriteExecute=, RestrictRealtime=, or - LockPersonality= are specified. Note that even if this setting is overridden by them, - systemctl show shows the original value of this setting. Also see - No New Privileges + Takes a boolean argument. If true, ensures that the service process and all its + children can never gain new privileges through execve() (e.g. via setuid or + setgid bits, or filesystem capabilities). This is the simplest and most effective way to ensure that + a process and its children can never elevate privileges again. Defaults to false, but certain + settings override this and ignore the value of this setting. This is the case when + SystemCallFilter=, SystemCallArchitectures=, + RestrictAddressFamilies=, RestrictNamespaces=, + PrivateDevices=, ProtectKernelTunables=, + ProtectKernelModules=, MemoryDenyWriteExecute=, + RestrictRealtime=, RestrictSUIDSGID= or + LockPersonality= are specified. Note that even if this setting is overridden by + them, systemctl show shows the original value of this setting. Also see No New Privileges Flag. @@ -1392,6 +1393,22 @@ RestrictNamespaces=~cgroup net that actually require them. Defaults to off. + + RestrictSUIDSGID= + + Takes a boolean argument. If set, any attempts to set the set-user-ID (SUID) or + set-group-ID (SGID) bits on files or directories will be denied (for details on these bits see + inode7). If + running in user mode, or in system mode, but without the CAP_SYS_ADMIN + capability (e.g. setting User=), NoNewPrivileges=yes is + implied. As the SUID/SGID bits are mechanisms to elevate privileges, and allows users to acquire the + identity of other users, it is recommended to restrict creation of SUID/SGID files to the few + programs that actually require them. Note that this restricts marking of any type of file system + object with these bits, including both regular files and directories (where the SGID is a different + meaning than for files, see documentation). Defaults to off. + + RemoveIPC= From 62aa29247c3d74bcec0607c347f2be23cd90675d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Mar 2019 19:52:20 +0100 Subject: [PATCH 7/9] units: turn on RestrictSUIDSGID= in most of our long-running daemons --- units/systemd-coredump@.service.in | 1 + units/systemd-hostnamed.service.in | 1 + units/systemd-journal-remote.service.in | 1 + units/systemd-journald.service.in | 1 + units/systemd-localed.service.in | 1 + units/systemd-logind.service.in | 1 + units/systemd-networkd.service.in | 1 + units/systemd-resolved.service.in | 1 + units/systemd-timedated.service.in | 1 + units/systemd-timesyncd.service.in | 1 + units/systemd-udevd.service.in | 3 ++- 11 files changed, 12 insertions(+), 1 deletion(-) diff --git a/units/systemd-coredump@.service.in b/units/systemd-coredump@.service.in index f6166fa11c..afb2ab9d17 100644 --- a/units/systemd-coredump@.service.in +++ b/units/systemd-coredump@.service.in @@ -36,6 +36,7 @@ ProtectSystem=strict RestrictAddressFamilies=AF_UNIX RestrictNamespaces=yes RestrictRealtime=yes +RestrictSUIDSGID=yes RuntimeMaxSec=5min StateDirectory=systemd/coredump SystemCallArchitectures=native diff --git a/units/systemd-hostnamed.service.in b/units/systemd-hostnamed.service.in index 9c925e80d9..b4f606cf78 100644 --- a/units/systemd-hostnamed.service.in +++ b/units/systemd-hostnamed.service.in @@ -32,6 +32,7 @@ ReadWritePaths=/etc RestrictAddressFamilies=AF_UNIX RestrictNamespaces=yes RestrictRealtime=yes +RestrictSUIDSGID=yes SystemCallArchitectures=native SystemCallErrorNumber=EPERM SystemCallFilter=@system-service sethostname diff --git a/units/systemd-journal-remote.service.in b/units/systemd-journal-remote.service.in index 71727295c3..dd6322e62c 100644 --- a/units/systemd-journal-remote.service.in +++ b/units/systemd-journal-remote.service.in @@ -30,6 +30,7 @@ ProtectSystem=strict RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 RestrictNamespaces=yes RestrictRealtime=yes +RestrictSUIDSGID=yes SystemCallArchitectures=native User=systemd-journal-remote WatchdogSec=3min diff --git a/units/systemd-journald.service.in b/units/systemd-journald.service.in index 4684f095c0..fab405502a 100644 --- a/units/systemd-journald.service.in +++ b/units/systemd-journald.service.in @@ -28,6 +28,7 @@ RestartSec=0 RestrictAddressFamilies=AF_UNIX AF_NETLINK RestrictNamespaces=yes RestrictRealtime=yes +RestrictSUIDSGID=yes Sockets=systemd-journald.socket systemd-journald-dev-log.socket systemd-journald-audit.socket StandardOutput=null SystemCallArchitectures=native diff --git a/units/systemd-localed.service.in b/units/systemd-localed.service.in index a64e7e79a8..7bca34409a 100644 --- a/units/systemd-localed.service.in +++ b/units/systemd-localed.service.in @@ -33,6 +33,7 @@ ReadWritePaths=/etc RestrictAddressFamilies=AF_UNIX RestrictNamespaces=yes RestrictRealtime=yes +RestrictSUIDSGID=yes SystemCallArchitectures=native SystemCallErrorNumber=EPERM SystemCallFilter=@system-service diff --git a/units/systemd-logind.service.in b/units/systemd-logind.service.in index 9c8938ec4a..3eef95c661 100644 --- a/units/systemd-logind.service.in +++ b/units/systemd-logind.service.in @@ -40,6 +40,7 @@ RestartSec=0 RestrictAddressFamilies=AF_UNIX AF_NETLINK RestrictNamespaces=yes RestrictRealtime=yes +RestrictSUIDSGID=yes RuntimeDirectory=systemd/sessions systemd/seats systemd/users systemd/inhibit systemd/shutdown RuntimeDirectoryPreserve=yes SystemCallArchitectures=native diff --git a/units/systemd-networkd.service.in b/units/systemd-networkd.service.in index 472ef045de..2c74da6f1e 100644 --- a/units/systemd-networkd.service.in +++ b/units/systemd-networkd.service.in @@ -34,6 +34,7 @@ RestartSec=0 RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6 AF_PACKET RestrictNamespaces=yes RestrictRealtime=yes +RestrictSUIDSGID=yes RuntimeDirectory=systemd/netif RuntimeDirectoryPreserve=yes SystemCallArchitectures=native diff --git a/units/systemd-resolved.service.in b/units/systemd-resolved.service.in index 3144b70063..eee5d5ea8f 100644 --- a/units/systemd-resolved.service.in +++ b/units/systemd-resolved.service.in @@ -38,6 +38,7 @@ RestartSec=0 RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6 RestrictNamespaces=yes RestrictRealtime=yes +RestrictSUIDSGID=yes RuntimeDirectory=systemd/resolve RuntimeDirectoryPreserve=yes SystemCallArchitectures=native diff --git a/units/systemd-timedated.service.in b/units/systemd-timedated.service.in index 46ee8c894d..df546f471f 100644 --- a/units/systemd-timedated.service.in +++ b/units/systemd-timedated.service.in @@ -31,6 +31,7 @@ ReadWritePaths=/etc RestrictAddressFamilies=AF_UNIX RestrictNamespaces=yes RestrictRealtime=yes +RestrictSUIDSGID=yes SystemCallArchitectures=native SystemCallErrorNumber=EPERM SystemCallFilter=@system-service @clock diff --git a/units/systemd-timesyncd.service.in b/units/systemd-timesyncd.service.in index 5313a90c30..6512531e1c 100644 --- a/units/systemd-timesyncd.service.in +++ b/units/systemd-timesyncd.service.in @@ -38,6 +38,7 @@ RestartSec=0 RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 RestrictNamespaces=yes RestrictRealtime=yes +RestrictSUIDSGID=yes RuntimeDirectory=systemd/timesync StateDirectory=systemd/timesync SystemCallArchitectures=native diff --git a/units/systemd-udevd.service.in b/units/systemd-udevd.service.in index fb98ca4d43..e8a76cc018 100644 --- a/units/systemd-udevd.service.in +++ b/units/systemd-udevd.service.in @@ -28,8 +28,9 @@ TasksMax=infinity PrivateMounts=yes ProtectHostname=yes MemoryDenyWriteExecute=yes -RestrictRealtime=yes RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6 +RestrictRealtime=yes +RestrictSUIDSGID=yes SystemCallFilter=@system-service @module @raw-io SystemCallErrorNumber=EPERM SystemCallArchitectures=native From bf65b7e0c9fc215897b676ab9a7c9d1c688143ba Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Mar 2019 20:19:38 +0100 Subject: [PATCH 8/9] core: imply NNP and SUID/SGID restriction for DynamicUser=yes service Let's be safe, rather than sorry. This way DynamicUser=yes services can neither take benefit of, nor create SUID/SGID binaries. Given that DynamicUser= is a recent addition only we should be able to get away with turning this on, even though this is strictly speaking a binary compatibility breakage. --- NEWS | 15 +++++++++++++++ man/systemd.exec.xml | 16 ++++++++++------ src/core/unit.c | 10 ++++++++-- units/systemd-journal-gatewayd.service.in | 1 - units/systemd-journal-upload.service.in | 1 - 5 files changed, 33 insertions(+), 10 deletions(-) diff --git a/NEWS b/NEWS index e0829002b9..603aa63550 100644 --- a/NEWS +++ b/NEWS @@ -201,6 +201,21 @@ CHANGES WITH 242 in spe: done anymore, and instead calling `systemctl preset-all` is recommended after the first installation of systemd. + * A new boolean sandboxing option RestrictSUIDSGID= has been added that + is built on seccomp. When turned on creation of SUID/SGID files is + prohibited. + + * The NoNewPrivileges= and the new RestrictSUIDSGID= options are now + implied if DynamicUser= is turned on for a service. This hardens + these services, so that they neither can benefit from nor create + SUID/SGID executables. This is a minor compatibility breakage, given + that when DynamicUser= was first introduced SUID/SGID behaviour was + unaffected. However, the security benefit of these two options is + substantial, and the setting is still relatively new, hence we opted + to make it mandatory for services with dynamic users. + + … + CHANGES WITH 241: * The default locale can now be configured at compile time. Otherwise, diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 46f2d856e0..688147ea32 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -252,7 +252,9 @@ of the service, and hence the lifetime of the dynamic user/group. Since /tmp and /var/tmp are usually the only world-writable directories on a system this ensures that a unit making use of dynamic user/group allocation cannot leave files around after unit - termination. Moreover ProtectSystem=strict and + termination. Furthermore NoNewPrivileges= and RestrictSUIDSGID= + are implicitly enabled to ensure that processes invoked cannot take benefit or create SUID/SGID files + or directories. Moreover ProtectSystem=strict and ProtectHome=read-only are implied, thus prohibiting the service to write to arbitrary file system locations. In order to allow the service to write to certain directories, they have to be whitelisted using ReadWritePaths=, but care must be taken so that @@ -388,11 +390,12 @@ CapabilityBoundingSet=~CAP_B CAP_C RestrictAddressFamilies=, RestrictNamespaces=, PrivateDevices=, ProtectKernelTunables=, ProtectKernelModules=, MemoryDenyWriteExecute=, - RestrictRealtime=, RestrictSUIDSGID= or - LockPersonality= are specified. Note that even if this setting is overridden by - them, systemctl show shows the original value of this setting. Also see RestrictRealtime=, RestrictSUIDSGID=, + DynamicUser= or LockPersonality= are specified. Note that even + if this setting is overridden by them, systemctl show shows the original value of + this setting. Also see No New Privileges - Flag. + Flag. @@ -1406,7 +1409,8 @@ RestrictNamespaces=~cgroup net identity of other users, it is recommended to restrict creation of SUID/SGID files to the few programs that actually require them. Note that this restricts marking of any type of file system object with these bits, including both regular files and directories (where the SGID is a different - meaning than for files, see documentation). Defaults to off. + meaning than for files, see documentation). This option is implied if DynamicUser= + is enabled. Defaults to off. diff --git a/src/core/unit.c b/src/core/unit.c index 35c268cd3b..2cde494a7e 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -4088,14 +4088,20 @@ int unit_patch_contexts(Unit *u) { return -ENOMEM; } - /* If the dynamic user option is on, let's make sure that the unit can't leave its UID/GID - * around in the file system or on IPC objects. Hence enforce a strict sandbox. */ + /* If the dynamic user option is on, let's make sure that the unit can't leave its + * UID/GID around in the file system or on IPC objects. Hence enforce a strict + * sandbox. */ ec->private_tmp = true; ec->remove_ipc = true; ec->protect_system = PROTECT_SYSTEM_STRICT; if (ec->protect_home == PROTECT_HOME_NO) ec->protect_home = PROTECT_HOME_READ_ONLY; + + /* Make sure this service can neither benefit from SUID/SGID binaries nor create + * them. */ + ec->no_new_privileges = true; + ec->restrict_suid_sgid = true; } } diff --git a/units/systemd-journal-gatewayd.service.in b/units/systemd-journal-gatewayd.service.in index 0f16ae4ccb..50f774512b 100644 --- a/units/systemd-journal-gatewayd.service.in +++ b/units/systemd-journal-gatewayd.service.in @@ -17,7 +17,6 @@ DynamicUser=yes ExecStart=@rootlibexecdir@/systemd-journal-gatewayd LockPersonality=yes MemoryDenyWriteExecute=yes -NoNewPrivileges=yes PrivateDevices=yes PrivateNetwork=yes ProtectControlGroups=yes diff --git a/units/systemd-journal-upload.service.in b/units/systemd-journal-upload.service.in index 10e4d657d3..e3800473ec 100644 --- a/units/systemd-journal-upload.service.in +++ b/units/systemd-journal-upload.service.in @@ -18,7 +18,6 @@ DynamicUser=yes ExecStart=@rootlibexecdir@/systemd-journal-upload --save-state LockPersonality=yes MemoryDenyWriteExecute=yes -NoNewPrivileges=yes PrivateDevices=yes ProtectControlGroups=yes ProtectHome=yes From 6d85ba729949c00e4723b9517f804b9016ede6e9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 20 Mar 2019 21:00:17 +0100 Subject: [PATCH 9/9] update TODO --- TODO | 3 --- 1 file changed, 3 deletions(-) diff --git a/TODO b/TODO index 010c2ad41d..663e4fedd8 100644 --- a/TODO +++ b/TODO @@ -32,9 +32,6 @@ Features: - Make sure resume= and resume_offset= on the kernel cmdline always take precedence -* maybe add a seccomp-based high-level filter that blocks creation of suid/sgid - files. - * make MAINPID= message reception checks even stricter: if service uses User=, then check sending UID and ignore message if it doesn't match the user or root.