diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index bb38ea24674..fd47b0a20a0 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1607,22 +1607,20 @@
MemoryDenyWriteExecute=
Takes a boolean argument. If set, attempts to create memory mappings that are writable and
- executable at the same time, or to change existing memory mappings to become executable, or mapping shared memory
- segments as executable are prohibited.
- Specifically, a system call filter is added that rejects
- mmap2
- system calls with both PROT_EXEC and PROT_WRITE set,
- mprotect2
- system calls with PROT_EXEC set and
- shmat2
- system calls with SHM_EXEC set. Note that this option is incompatible with programs
- that generate program code dynamically at runtime, such as JIT execution engines, or programs compiled making
- use of the code "trampoline" feature of various C compilers. This option improves service security, as it makes
- harder for software exploits to change running code dynamically.
- If running in user mode, or in system mode, but without the CAP_SYS_ADMIN
- capability (e.g. setting User=), NoNewPrivileges=yes
- is implied.
-
+ executable at the same time, or to change existing memory mappings to become executable, or mapping shared
+ memory segments as executable are prohibited. Specifically, a system call filter is added that rejects
+ mmap2 system calls with both
+ PROT_EXEC and PROT_WRITE set,
+ mprotect2 system calls with
+ PROT_EXEC set and
+ shmat2 system calls with
+ SHM_EXEC set. Note that this option is incompatible with programs that generate program
+ code dynamically at runtime, such as JIT execution engines, or programs compiled making use of the code
+ "trampoline" feature of various C compilers. This option improves service security, as it makes harder for
+ software exploits to change running code dynamically. Note that this feature is fully available on x86-64, and
+ partially on x86. Specifically, the shmat() protection is not available on x86. If running
+ in user mode, or in system mode, but without the CAP_SYS_ADMIN capability (e.g. setting
+ User=), NoNewPrivileges=yes is implied.
diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c
index 609e0619af9..44706669b4f 100644
--- a/src/shared/seccomp-util.c
+++ b/src/shared/seccomp-util.c
@@ -1086,27 +1086,81 @@ int seccomp_restrict_realtime(void) {
}
int seccomp_memory_deny_write_execute(void) {
+
uint32_t arch;
int r;
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
+ int filter_syscall = 0, block_syscall = 0, shmat_syscall = 0;
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
+ switch (arch) {
+
+ case SCMP_ARCH_X86:
+ filter_syscall = SCMP_SYS(mmap2);
+ block_syscall = SCMP_SYS(mmap);
+
+ /* Note that shmat() isn't available on i386, where the call is multiplexed through ipc(). We
+ * ignore that here, which means there's still a way to get writable/executable memory, if an
+ * IPC key is mapped like this on i386. That's a pity, but no total loss. */
+ break;
+
+ case SCMP_ARCH_X86_64:
+ case SCMP_ARCH_X32:
+ filter_syscall = SCMP_SYS(mmap);
+ shmat_syscall = SCMP_SYS(shmat);
+ break;
+
+ /* Please add more definitions here, if you port systemd to other architectures! */
+
+#if !defined(__i386__) && !defined(__x86_64__)
+#warning "Consider adding the right mmap() syscall definitions here!"
+#endif
+ }
+
+ /* Can't filter mmap() on this arch, then skip it */
+ if (filter_syscall == 0)
+ continue;
+
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
if (r < 0)
return r;
- r = seccomp_rule_add_exact(
- seccomp,
- SCMP_ACT_ERRNO(EPERM),
- SCMP_SYS(mmap),
- 1,
- SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
- if (r < 0) {
- log_debug_errno(r, "Failed to add mmap() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
- continue;
+ if (filter_syscall != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ filter_syscall,
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
+ if (r < 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = seccomp_syscall_resolve_num_arch(arch, filter_syscall);
+ log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
+ strna(n),
+ seccomp_arch_to_string(arch));
+ continue;
+ }
+ }
+
+ if (block_syscall != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ block_syscall,
+ 0);
+ if (r < 0) {
+ _cleanup_free_ char *n = NULL;
+
+ n = seccomp_syscall_resolve_num_arch(arch, block_syscall);
+ log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
+ strna(n),
+ seccomp_arch_to_string(arch));
+ continue;
+ }
}
r = seccomp_rule_add_exact(
@@ -1120,15 +1174,17 @@ int seccomp_memory_deny_write_execute(void) {
continue;
}
- r = seccomp_rule_add_exact(
- seccomp,
- SCMP_ACT_ERRNO(EPERM),
- SCMP_SYS(shmat),
- 1,
- SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
- if (r < 0) {
- log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
- continue;
+ if (shmat_syscall != 0) {
+ r = seccomp_rule_add_exact(
+ seccomp,
+ SCMP_ACT_ERRNO(EPERM),
+ SCMP_SYS(shmat),
+ 1,
+ SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
+ if (r < 0) {
+ log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
+ continue;
+ }
}
r = seccomp_load(seccomp);
diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h
index 2563fcd38a5..bfbfb5ab3d7 100644
--- a/src/shared/seccomp-util.h
+++ b/src/shared/seccomp-util.h
@@ -84,6 +84,13 @@ int seccomp_memory_deny_write_execute(void);
#define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
#endif
+/* mmap() blocking is only available on some archs for now */
+#if defined(__x86_64__) || defined(__i386__)
+#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 0
+#else
+#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 1
+#endif
+
extern const uint32_t seccomp_local_archs[];
#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c
index 54e7947c2f1..36592388105 100644
--- a/src/test/test-seccomp.c
+++ b/src/test/test-seccomp.c
@@ -384,11 +384,21 @@ static void test_memory_deny_write_execute(void) {
assert_se(p != MAP_FAILED);
assert_se(munmap(p, page_size()) >= 0);
- seccomp_memory_deny_write_execute();
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+ assert_se(seccomp_memory_deny_write_execute() >= 0);
+
+#if SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN
+ p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
+ assert_se(p != MAP_FAILED);
+ assert_se(munmap(p, page_size()) >= 0);
+#else
p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
assert_se(p == MAP_FAILED);
assert_se(errno == EPERM);
+#endif
p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
assert_se(p != MAP_FAILED);