mirror of
https://github.com/systemd/systemd.git
synced 2025-01-05 13:18:06 +03:00
Merge pull request #5270 from poettering/seccomp-namespace-fix
swap seccomp filter params on s390
This commit is contained in:
commit
52a4aafb4d
2
TODO
2
TODO
@ -24,6 +24,8 @@ Janitorial Clean-ups:
|
||||
|
||||
Features:
|
||||
|
||||
* set SystemCallArchitectures=native on all our services
|
||||
|
||||
* maybe add call sd_journal_set_block_timeout() or so to set SO_SNDTIMEO for
|
||||
the sd-journal logging socket, and, if the timeout is set to 0, sets
|
||||
O_NONBLOCK on it. That way people can control if and when to block for
|
||||
|
@ -1554,11 +1554,10 @@
|
||||
<citerefentry><refentrytitle>setns</refentrytitle><manvolnum>2</manvolnum></citerefentry> system calls, taking
|
||||
the specified flags parameters into account. Note that — if this option is used — in addition to restricting
|
||||
creation and switching of the specified types of namespaces (or all of them, if true) access to the
|
||||
<function>setns()</function> system call with a zero flags parameter is prohibited.
|
||||
If running in user mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant>
|
||||
capability (e.g. setting <varname>User=</varname>), <varname>NoNewPrivileges=yes</varname>
|
||||
is implied.
|
||||
</para></listitem>
|
||||
<function>setns()</function> system call with a zero flags parameter is prohibited. This setting is only
|
||||
supported on x86, x86-64, s390 and s390x, and enforces no restrictions on other architectures. If running in user
|
||||
mode, or in system mode, but without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting
|
||||
<varname>User=</varname>), <varname>NoNewPrivileges=yes</varname> is implied. </para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
@ -47,8 +47,8 @@
|
||||
static inline int raw_clone(unsigned long flags) {
|
||||
assert((flags & (CLONE_VM|CLONE_PARENT_SETTID|CLONE_CHILD_SETTID|
|
||||
CLONE_CHILD_CLEARTID|CLONE_SETTLS)) == 0);
|
||||
#if defined(__s390__) || defined(__CRIS__)
|
||||
/* On s390 and cris the order of the first and second arguments
|
||||
#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
|
||||
/* On s390/s390x and cris the order of the first and second arguments
|
||||
* of the raw clone() system call is reversed. */
|
||||
return (int) syscall(__NR_clone, NULL, flags);
|
||||
#elif defined(__sparc__) && defined(__arch64__)
|
||||
|
@ -750,10 +750,35 @@ int seccomp_restrict_namespaces(unsigned long retain) {
|
||||
|
||||
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
|
||||
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
|
||||
int clone_reversed_order = -1;
|
||||
unsigned i;
|
||||
|
||||
log_debug("Operating on architecture: %s", seccomp_arch_to_string(arch));
|
||||
|
||||
switch (arch) {
|
||||
|
||||
case SCMP_ARCH_X86_64:
|
||||
case SCMP_ARCH_X86:
|
||||
case SCMP_ARCH_X32:
|
||||
clone_reversed_order = 0;
|
||||
break;
|
||||
|
||||
case SCMP_ARCH_S390:
|
||||
case SCMP_ARCH_S390X:
|
||||
/* On s390/s390x the first two parameters to clone are switched */
|
||||
clone_reversed_order = 1;
|
||||
break;
|
||||
|
||||
/* Please add more definitions here, if you port systemd to other architectures! */
|
||||
|
||||
#if !defined(__i386__) && !defined(__x86_64__) && !defined(__s390__) && !defined(__s390x__)
|
||||
#warning "Consider adding the right clone() syscall definitions here!"
|
||||
#endif
|
||||
}
|
||||
|
||||
if (clone_reversed_order < 0) /* we don't know the right order, let's ignore this arch... */
|
||||
continue;
|
||||
|
||||
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -802,12 +827,20 @@ int seccomp_restrict_namespaces(unsigned long retain) {
|
||||
break;
|
||||
}
|
||||
|
||||
r = seccomp_rule_add_exact(
|
||||
seccomp,
|
||||
SCMP_ACT_ERRNO(EPERM),
|
||||
SCMP_SYS(clone),
|
||||
1,
|
||||
SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
|
||||
if (clone_reversed_order == 0)
|
||||
r = seccomp_rule_add_exact(
|
||||
seccomp,
|
||||
SCMP_ACT_ERRNO(EPERM),
|
||||
SCMP_SYS(clone),
|
||||
1,
|
||||
SCMP_A0(SCMP_CMP_MASKED_EQ, f, f));
|
||||
else
|
||||
r = seccomp_rule_add_exact(
|
||||
seccomp,
|
||||
SCMP_ACT_ERRNO(EPERM),
|
||||
SCMP_SYS(clone),
|
||||
1,
|
||||
SCMP_A1(SCMP_CMP_MASKED_EQ, f, f));
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to add clone() rule for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
|
||||
break;
|
||||
|
@ -91,6 +91,13 @@ int seccomp_memory_deny_write_execute(void);
|
||||
#define SECCOMP_MEMORY_DENY_WRITE_EXECUTE_BROKEN 1
|
||||
#endif
|
||||
|
||||
/* we don't know the right order of the clone() parameters except for these archs, for now */
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(__s390x__) || defined(__s390__)
|
||||
#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 0
|
||||
#else
|
||||
#define SECCOMP_RESTRICT_NAMESPACES_BROKEN 1
|
||||
#endif
|
||||
|
||||
extern const uint32_t seccomp_local_archs[];
|
||||
|
||||
#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
|
||||
|
@ -158,6 +158,8 @@ static void test_restrict_namespace(void) {
|
||||
assert_se(streq(s, "cgroup ipc net mnt pid user uts"));
|
||||
assert_se(namespace_flag_from_string_many(s, &ul) == 0 && ul == NAMESPACE_FLAGS_ALL);
|
||||
|
||||
#if SECCOMP_RESTRICT_NAMESPACES_BROKEN == 0
|
||||
|
||||
if (!is_seccomp_available())
|
||||
return;
|
||||
if (geteuid() != 0)
|
||||
@ -216,6 +218,7 @@ static void test_restrict_namespace(void) {
|
||||
}
|
||||
|
||||
assert_se(wait_for_terminate_and_warn("nsseccomp", pid, true) == EXIT_SUCCESS);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void test_protect_sysctl(void) {
|
||||
|
Loading…
Reference in New Issue
Block a user