mirror of
https://github.com/systemd/systemd.git
synced 2025-01-11 09:18:07 +03:00
Merge pull request #5893 from keszybz/memorydenywriteexecute
Add support for more arches for MemoryDenyWriteExecute
This commit is contained in:
commit
271312e37b
@ -792,43 +792,10 @@ int seccomp_restrict_namespaces(unsigned long retain) {
|
||||
|
||||
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
|
||||
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
|
||||
int clone_reversed_order = -1;
|
||||
unsigned i;
|
||||
|
||||
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
|
||||
|
||||
switch (arch) {
|
||||
|
||||
case SCMP_ARCH_X86_64:
|
||||
case SCMP_ARCH_X86:
|
||||
case SCMP_ARCH_X32:
|
||||
case SCMP_ARCH_PPC64:
|
||||
case SCMP_ARCH_PPC64LE:
|
||||
case SCMP_ARCH_MIPS:
|
||||
case SCMP_ARCH_MIPSEL:
|
||||
case SCMP_ARCH_MIPS64:
|
||||
case SCMP_ARCH_MIPSEL64:
|
||||
case SCMP_ARCH_MIPS64N32:
|
||||
case SCMP_ARCH_MIPSEL64N32:
|
||||
clone_reversed_order = 0;
|
||||
break;
|
||||
|
||||
case SCMP_ARCH_S390:
|
||||
case SCMP_ARCH_S390X:
|
||||
/* On s390/s390x the first two parameters to clone are switched */
|
||||
clone_reversed_order = 1;
|
||||
break;
|
||||
|
||||
/* Please add more definitions here, if you port systemd to other architectures! */
|
||||
|
||||
#if SECCOMP_RESTRICT_NAMESPACES_BROKEN
|
||||
# warning "Consider adding the right clone() syscall definitions here!"
|
||||
#endif
|
||||
}
|
||||
|
||||
if (clone_reversed_order < 0) /* we don't know the right order, let's ignore this arch... */
|
||||
continue;
|
||||
|
||||
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -877,7 +844,8 @@ int seccomp_restrict_namespaces(unsigned long retain) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (clone_reversed_order == 0)
|
||||
/* On s390/s390x the first two parameters to clone are switched */
|
||||
if (!IN_SET(arch, SCMP_ARCH_S390, SCMP_ARCH_S390X))
|
||||
r = seccomp_rule_add_exact(
|
||||
seccomp,
|
||||
SCMP_ACT_ERRNO(EPERM),
|
||||
@ -972,16 +940,16 @@ int seccomp_restrict_address_families(Set *address_families, bool whitelist) {
|
||||
case SCMP_ARCH_X32:
|
||||
case SCMP_ARCH_ARM:
|
||||
case SCMP_ARCH_AARCH64:
|
||||
case SCMP_ARCH_PPC64:
|
||||
case SCMP_ARCH_PPC64LE:
|
||||
/* These we know we support (i.e. are the ones that do not use socketcall()) */
|
||||
supported = true;
|
||||
break;
|
||||
|
||||
case SCMP_ARCH_X86:
|
||||
case SCMP_ARCH_S390:
|
||||
case SCMP_ARCH_S390X:
|
||||
case SCMP_ARCH_PPC:
|
||||
case SCMP_ARCH_PPC64:
|
||||
case SCMP_ARCH_PPC64LE:
|
||||
case SCMP_ARCH_X86:
|
||||
default:
|
||||
/* These we either know we don't support (i.e. are the ones that do use socketcall()), or we
|
||||
* don't know */
|
||||
@ -1192,6 +1160,37 @@ int seccomp_restrict_realtime(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_seccomp_syscall_filter(scmp_filter_ctx seccomp,
|
||||
uint32_t arch,
|
||||
int nr,
|
||||
unsigned int arg_cnt,
|
||||
const struct scmp_arg_cmp arg) {
|
||||
int r;
|
||||
|
||||
r = seccomp_rule_add_exact(seccomp, SCMP_ACT_ERRNO(EPERM), nr, arg_cnt, arg);
|
||||
if (r < 0) {
|
||||
_cleanup_free_ char *n = NULL;
|
||||
|
||||
n = seccomp_syscall_resolve_num_arch(arch, nr);
|
||||
log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
|
||||
strna(n),
|
||||
seccomp_arch_to_string(arch));
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* For known architectures, check that syscalls are indeed defined or not. */
|
||||
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
|
||||
assert_cc(SCMP_SYS(shmget) > 0);
|
||||
assert_cc(SCMP_SYS(shmat) > 0);
|
||||
assert_cc(SCMP_SYS(shmdt) > 0);
|
||||
#elif defined(__i386__) || defined(__powerpc64__)
|
||||
assert_cc(SCMP_SYS(shmget) < 0);
|
||||
assert_cc(SCMP_SYS(shmat) < 0);
|
||||
assert_cc(SCMP_SYS(shmdt) < 0);
|
||||
#endif
|
||||
|
||||
int seccomp_memory_deny_write_execute(void) {
|
||||
|
||||
uint32_t arch;
|
||||
@ -1208,21 +1207,36 @@ int seccomp_memory_deny_write_execute(void) {
|
||||
case SCMP_ARCH_X86:
|
||||
filter_syscall = SCMP_SYS(mmap2);
|
||||
block_syscall = SCMP_SYS(mmap);
|
||||
break;
|
||||
|
||||
/* Note that shmat() isn't available on i386, where the call is multiplexed through ipc(). We
|
||||
* ignore that here, which means there's still a way to get writable/executable memory, if an
|
||||
* IPC key is mapped like this on i386. That's a pity, but no total loss. */
|
||||
case SCMP_ARCH_PPC64:
|
||||
case SCMP_ARCH_PPC64LE:
|
||||
filter_syscall = SCMP_SYS(mmap);
|
||||
|
||||
/* Note that shmat() isn't available, and the call is multiplexed through ipc().
|
||||
* We ignore that here, which means there's still a way to get writable/executable
|
||||
* memory, if an IPC key is mapped like this. That's a pity, but no total loss. */
|
||||
|
||||
break;
|
||||
|
||||
case SCMP_ARCH_AARCH64:
|
||||
block_syscall = SCMP_SYS(mmap);
|
||||
/* fall through */
|
||||
|
||||
case SCMP_ARCH_ARM:
|
||||
filter_syscall = SCMP_SYS(mmap2); /* arm has only mmap2 */
|
||||
shmat_syscall = SCMP_SYS(shmat);
|
||||
break;
|
||||
|
||||
case SCMP_ARCH_X86_64:
|
||||
case SCMP_ARCH_X32:
|
||||
filter_syscall = SCMP_SYS(mmap);
|
||||
filter_syscall = SCMP_SYS(mmap); /* amd64 and x32 have only mmap */
|
||||
shmat_syscall = SCMP_SYS(shmat);
|
||||
break;
|
||||
|
||||
/* Please add more definitions here, if you port systemd to other architectures! */
|
||||
|
||||
#if !defined(__i386__) && !defined(__x86_64__)
|
||||
#if !defined(__i386__) && !defined(__x86_64__) && !defined(__powerpc64__) && !defined(__arm__) && !defined(__aarch64__)
|
||||
#warning "Consider adding the right mmap() syscall definitions here!"
|
||||
#endif
|
||||
}
|
||||
@ -1235,63 +1249,30 @@ int seccomp_memory_deny_write_execute(void) {
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (filter_syscall != 0) {
|
||||
r = seccomp_rule_add_exact(
|
||||
seccomp,
|
||||
SCMP_ACT_ERRNO(EPERM),
|
||||
filter_syscall,
|
||||
1,
|
||||
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
|
||||
if (r < 0) {
|
||||
_cleanup_free_ char *n = NULL;
|
||||
|
||||
n = seccomp_syscall_resolve_num_arch(arch, filter_syscall);
|
||||
log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
|
||||
strna(n),
|
||||
seccomp_arch_to_string(arch));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
r = add_seccomp_syscall_filter(seccomp, arch, filter_syscall,
|
||||
1,
|
||||
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC|PROT_WRITE, PROT_EXEC|PROT_WRITE));
|
||||
if (r < 0)
|
||||
continue;
|
||||
|
||||
if (block_syscall != 0) {
|
||||
r = seccomp_rule_add_exact(
|
||||
seccomp,
|
||||
SCMP_ACT_ERRNO(EPERM),
|
||||
block_syscall,
|
||||
0);
|
||||
if (r < 0) {
|
||||
_cleanup_free_ char *n = NULL;
|
||||
|
||||
n = seccomp_syscall_resolve_num_arch(arch, block_syscall);
|
||||
log_debug_errno(r, "Failed to add %s() rule for architecture %s, skipping: %m",
|
||||
strna(n),
|
||||
seccomp_arch_to_string(arch));
|
||||
r = add_seccomp_syscall_filter(seccomp, arch, block_syscall, 0, (const struct scmp_arg_cmp){} );
|
||||
if (r < 0)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
r = seccomp_rule_add_exact(
|
||||
seccomp,
|
||||
SCMP_ACT_ERRNO(EPERM),
|
||||
SCMP_SYS(mprotect),
|
||||
1,
|
||||
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to add mprotect() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
|
||||
r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(mprotect),
|
||||
1,
|
||||
SCMP_A2(SCMP_CMP_MASKED_EQ, PROT_EXEC, PROT_EXEC));
|
||||
if (r < 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (shmat_syscall != 0) {
|
||||
r = seccomp_rule_add_exact(
|
||||
seccomp,
|
||||
SCMP_ACT_ERRNO(EPERM),
|
||||
SCMP_SYS(shmat),
|
||||
1,
|
||||
SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to add shmat() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
|
||||
r = add_seccomp_syscall_filter(seccomp, arch, SCMP_SYS(shmat),
|
||||
1,
|
||||
SCMP_A2(SCMP_CMP_MASKED_EQ, SHM_EXEC, SHM_EXEC));
|
||||
if (r < 0)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
r = seccomp_load(seccomp);
|
||||
|
@ -76,28 +76,6 @@ int seccomp_restrict_address_families(Set *address_families, bool whitelist);
|
||||
int seccomp_restrict_realtime(void);
|
||||
int seccomp_memory_deny_write_execute(void);
|
||||
|
||||
#if defined(__i386__) || defined(__s390x__) || defined(__s390__) || defined(__powerpc64__) || defined(__powerpc__) || defined (__mips__)
|
||||
/* On these archs, socket() is implemented via the socketcall() syscall multiplexer, and we can't restrict it hence via
|
||||
* seccomp */
|
||||
#define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 1
|
||||
#else
|
||||
#define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
|
||||
#endif
|
||||
|
||||
/* mmap() blocking is only available on some archs for now */
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 0
|
||||
#else
|
||||
#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 1
|
||||
#endif
|
||||
|
||||
/* we don't know the right order of the clone() parameters except for these archs, for now */
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(__s390x__) || defined(__s390__) || defined(__powerpc64__) || defined(__mips__)
|
||||
#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 0
|
||||
#else
|
||||
#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 1
|
||||
#endif
|
||||
|
||||
extern const uint32_t seccomp_local_archs[];
|
||||
|
||||
#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include "string-util.h"
|
||||
#include "util.h"
|
||||
|
||||
_unused_ \
|
||||
_unused_
|
||||
static const struct af_name* lookup_af(register const char *str, register GPERF_LEN_TYPE len);
|
||||
|
||||
#include "af-from-name.h"
|
||||
|
@ -21,8 +21,10 @@
|
||||
#include <stdlib.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/poll.h>
|
||||
#include <sys/shm.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "fd-util.h"
|
||||
@ -37,6 +39,15 @@
|
||||
#include "util.h"
|
||||
#include "virt.h"
|
||||
|
||||
#if SCMP_SYS(socket) < 0 || defined(__i386__) || defined(__s390x__) || defined(__s390__)
|
||||
/* On these archs, socket() is implemented via the socketcall() syscall multiplexer,
|
||||
* and we can't restrict it hence via seccomp. */
|
||||
# define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 1
|
||||
#else
|
||||
# define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
|
||||
#endif
|
||||
|
||||
|
||||
static void test_seccomp_arch_to_string(void) {
|
||||
uint32_t a, b;
|
||||
const char *name;
|
||||
@ -158,8 +169,6 @@ static void test_restrict_namespace(void) {
|
||||
assert_se(streq(s, "cgroup ipc net mnt pid user uts"));
|
||||
assert_se(namespace_flag_from_string_many(s, &ul) == 0 && ul == NAMESPACE_FLAGS_ALL);
|
||||
|
||||
#if SECCOMP_RESTRICT_NAMESPACES_BROKEN == 0
|
||||
|
||||
if (!is_seccomp_available())
|
||||
return;
|
||||
if (geteuid() != 0)
|
||||
@ -218,7 +227,6 @@ static void test_restrict_namespace(void) {
|
||||
}
|
||||
|
||||
assert_se(wait_for_terminate_and_warn("nsseccomp", pid, true) == EXIT_SUCCESS);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void test_protect_sysctl(void) {
|
||||
@ -286,12 +294,12 @@ static void test_restrict_address_families(void) {
|
||||
assert_se(fd >= 0);
|
||||
safe_close(fd);
|
||||
|
||||
#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
|
||||
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
|
||||
#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
|
||||
assert_se(fd >= 0);
|
||||
safe_close(fd);
|
||||
#else
|
||||
assert_se(socket(AF_UNIX, SOCK_DGRAM, 0) < 0);
|
||||
assert_se(fd < 0);
|
||||
assert_se(errno == EAFNOSUPPORT);
|
||||
#endif
|
||||
|
||||
@ -309,19 +317,21 @@ static void test_restrict_address_families(void) {
|
||||
assert_se(fd >= 0);
|
||||
safe_close(fd);
|
||||
|
||||
#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
|
||||
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
|
||||
assert_se(fd >= 0);
|
||||
safe_close(fd);
|
||||
|
||||
fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
|
||||
#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
|
||||
assert_se(fd >= 0);
|
||||
safe_close(fd);
|
||||
#else
|
||||
assert_se(socket(AF_UNIX, SOCK_DGRAM, 0) < 0);
|
||||
assert_se(fd < 0);
|
||||
assert_se(errno == EAFNOSUPPORT);
|
||||
#endif
|
||||
|
||||
assert_se(socket(AF_NETLINK, SOCK_DGRAM, 0) < 0);
|
||||
fd = socket(AF_NETLINK, SOCK_DGRAM, 0);
|
||||
#if SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN
|
||||
assert_se(fd >= 0);
|
||||
safe_close(fd);
|
||||
#else
|
||||
assert_se(fd < 0);
|
||||
assert_se(errno == EAFNOSUPPORT);
|
||||
#endif
|
||||
|
||||
@ -369,7 +379,7 @@ static void test_restrict_realtime(void) {
|
||||
assert_se(wait_for_terminate_and_warn("realtimeseccomp", pid, true) == EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
static void test_memory_deny_write_execute(void) {
|
||||
static void test_memory_deny_write_execute_mmap(void) {
|
||||
pid_t pid;
|
||||
|
||||
if (!is_seccomp_available())
|
||||
@ -393,14 +403,13 @@ static void test_memory_deny_write_execute(void) {
|
||||
|
||||
assert_se(seccomp_memory_deny_write_execute() >= 0);
|
||||
|
||||
#if SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN
|
||||
p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
|
||||
assert_se(p != MAP_FAILED);
|
||||
assert_se(munmap(p, page_size()) >= 0);
|
||||
#else
|
||||
p = mmap(NULL, page_size(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc64__) || defined(__arm__) || defined(__aarch64__)
|
||||
assert_se(p == MAP_FAILED);
|
||||
assert_se(errno == EPERM);
|
||||
#else /* unknown architectures */
|
||||
assert_se(p != MAP_FAILED);
|
||||
assert_se(munmap(p, page_size()) >= 0);
|
||||
#endif
|
||||
|
||||
p = mmap(NULL, page_size(), PROT_WRITE|PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
|
||||
@ -410,7 +419,54 @@ static void test_memory_deny_write_execute(void) {
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
assert_se(wait_for_terminate_and_warn("memoryseccomp", pid, true) == EXIT_SUCCESS);
|
||||
assert_se(wait_for_terminate_and_warn("memoryseccomp-mmap", pid, true) == EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
static void test_memory_deny_write_execute_shmat(void) {
|
||||
int shmid;
|
||||
pid_t pid;
|
||||
|
||||
if (!is_seccomp_available())
|
||||
return;
|
||||
if (geteuid() != 0)
|
||||
return;
|
||||
|
||||
shmid = shmget(IPC_PRIVATE, page_size(), 0);
|
||||
assert_se(shmid >= 0);
|
||||
|
||||
pid = fork();
|
||||
assert_se(pid >= 0);
|
||||
|
||||
if (pid == 0) {
|
||||
void *p;
|
||||
|
||||
p = shmat(shmid, NULL, 0);
|
||||
assert_se(p != MAP_FAILED);
|
||||
assert_se(shmdt(p) == 0);
|
||||
|
||||
p = shmat(shmid, NULL, SHM_EXEC);
|
||||
assert_se(p != MAP_FAILED);
|
||||
assert_se(shmdt(p) == 0);
|
||||
|
||||
assert_se(seccomp_memory_deny_write_execute() >= 0);
|
||||
|
||||
p = shmat(shmid, NULL, SHM_EXEC);
|
||||
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
|
||||
assert_se(p == MAP_FAILED);
|
||||
assert_se(errno == EPERM);
|
||||
#else /* __i386__, __powerpc64__, and "unknown" architectures */
|
||||
assert_se(p != MAP_FAILED);
|
||||
assert_se(shmdt(p) == 0);
|
||||
#endif
|
||||
|
||||
p = shmat(shmid, NULL, 0);
|
||||
assert_se(p != MAP_FAILED);
|
||||
assert_se(shmdt(p) == 0);
|
||||
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
assert_se(wait_for_terminate_and_warn("memoryseccomp-shmat", pid, true) == EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
static void test_restrict_archs(void) {
|
||||
@ -509,7 +565,8 @@ int main(int argc, char *argv[]) {
|
||||
test_protect_sysctl();
|
||||
test_restrict_address_families();
|
||||
test_restrict_realtime();
|
||||
test_memory_deny_write_execute();
|
||||
test_memory_deny_write_execute_mmap();
|
||||
test_memory_deny_write_execute_shmat();
|
||||
test_restrict_archs();
|
||||
test_load_syscall_filter_set_raw();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user