mirror of
https://github.com/systemd/systemd-stable.git
synced 2025-02-26 09:57:26 +03:00
seccomp: don't install filters for archs that can't use syscalls
When seccomp_restrict_archs is called, architectures that are blocked are replaced by the SECCOMP_LOCAL_ARCH_BLOCKED marker so that they are not disabled again and filters are not installed for them. This can make some service that use SystemCallArchitecture= and SystemCallFilter= start faster.
This commit is contained in:
parent
104fc4be11
commit
6597686865
4
TODO
4
TODO
@ -135,10 +135,6 @@ Features:
|
|||||||
o move into separate libsystemd-shared-iptables.so .so
|
o move into separate libsystemd-shared-iptables.so .so
|
||||||
- iptables-libs (only used by nspawn + networkd)
|
- iptables-libs (only used by nspawn + networkd)
|
||||||
|
|
||||||
* seccomp: when SystemCallArchitectures=native is set then don't install any
|
|
||||||
other seccomp filters for any of the other archs, in order to reduce the
|
|
||||||
number of seccomp filters we install needlessly.
|
|
||||||
|
|
||||||
* seccomp: maybe use seccomp_merge() to merge our filters per-arch if we can.
|
* seccomp: maybe use seccomp_merge() to merge our filters per-arch if we can.
|
||||||
Apparently kernel performance is much better with fewer larger seccomp
|
Apparently kernel performance is much better with fewer larger seccomp
|
||||||
filters than with more smaller seccomp filters.
|
filters than with more smaller seccomp filters.
|
||||||
|
@ -23,7 +23,8 @@
|
|||||||
#include "string-util.h"
|
#include "string-util.h"
|
||||||
#include "strv.h"
|
#include "strv.h"
|
||||||
|
|
||||||
const uint32_t seccomp_local_archs[] = {
|
/* This array will be modified at runtime as seccomp_restrict_archs is called. */
|
||||||
|
uint32_t seccomp_local_archs[] = {
|
||||||
|
|
||||||
/* Note: always list the native arch we are compiled as last, so that users can deny-list seccomp(), but our own calls to it still succeed */
|
/* Note: always list the native arch we are compiled as last, so that users can deny-list seccomp(), but our own calls to it still succeed */
|
||||||
|
|
||||||
@ -94,7 +95,7 @@ const uint32_t seccomp_local_archs[] = {
|
|||||||
#elif defined(__s390__)
|
#elif defined(__s390__)
|
||||||
SCMP_ARCH_S390,
|
SCMP_ARCH_S390,
|
||||||
#endif
|
#endif
|
||||||
(uint32_t) -1
|
SECCOMP_LOCAL_ARCH_END
|
||||||
};
|
};
|
||||||
|
|
||||||
const char* seccomp_arch_to_string(uint32_t c) {
|
const char* seccomp_arch_to_string(uint32_t c) {
|
||||||
@ -1758,8 +1759,8 @@ int seccomp_memory_deny_write_execute(void) {
|
|||||||
|
|
||||||
int seccomp_restrict_archs(Set *archs) {
|
int seccomp_restrict_archs(Set *archs) {
|
||||||
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
|
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
|
||||||
void *id;
|
|
||||||
int r;
|
int r;
|
||||||
|
bool blocked_new = false;
|
||||||
|
|
||||||
/* This installs a filter with no rules, but that restricts the system call architectures to the specified
|
/* This installs a filter with no rules, but that restricts the system call architectures to the specified
|
||||||
* list.
|
* list.
|
||||||
@ -1775,24 +1776,36 @@ int seccomp_restrict_archs(Set *archs) {
|
|||||||
if (!seccomp)
|
if (!seccomp)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
SET_FOREACH(id, archs) {
|
for (unsigned i = 0; seccomp_local_archs[i] != SECCOMP_LOCAL_ARCH_END; ++i) {
|
||||||
r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
|
uint32_t arch = seccomp_local_archs[i];
|
||||||
if (r < 0 && r != -EEXIST)
|
|
||||||
return r;
|
/* That architecture might have already been blocked by a previous call to seccomp_restrict_archs. */
|
||||||
|
if (arch == SECCOMP_LOCAL_ARCH_BLOCKED)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bool block = !set_contains(archs, UINT32_TO_PTR(arch + 1));
|
||||||
|
|
||||||
|
/* The vdso for x32 assumes that x86-64 syscalls are available. Let's allow them, since x32
|
||||||
|
* x32 syscalls should basically match x86-64 for everything except the pointer type.
|
||||||
|
* The important thing is that you can block the old 32-bit x86 syscalls.
|
||||||
|
* https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=850047 */
|
||||||
|
if (block && arch == SCMP_ARCH_X86_64 && seccomp_arch_native() == SCMP_ARCH_X32)
|
||||||
|
block = !set_contains(archs, UINT32_TO_PTR(SCMP_ARCH_X32 + 1));
|
||||||
|
|
||||||
|
if (block) {
|
||||||
|
seccomp_local_archs[i] = SECCOMP_LOCAL_ARCH_BLOCKED;
|
||||||
|
blocked_new = true;
|
||||||
|
} else {
|
||||||
|
r = seccomp_arch_add(seccomp, arch);
|
||||||
|
if (r < 0 && r != -EEXIST)
|
||||||
|
return r;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The vdso for x32 assumes that x86-64 syscalls are available. Let's allow them, since x32
|
/* All architectures that will be blocked by the seccomp program were
|
||||||
* x32 syscalls should basically match x86-64 for everything except the pointer type.
|
* already blocked. */
|
||||||
* The important thing is that you can block the old 32-bit x86 syscalls.
|
if (!blocked_new)
|
||||||
* https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=850047 */
|
return 0;
|
||||||
|
|
||||||
if (seccomp_arch_native() == SCMP_ARCH_X32 ||
|
|
||||||
set_contains(archs, UINT32_TO_PTR(SCMP_ARCH_X32 + 1))) {
|
|
||||||
|
|
||||||
r = seccomp_arch_add(seccomp, SCMP_ARCH_X86_64);
|
|
||||||
if (r < 0 && r != -EEXIST)
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
|
@ -100,12 +100,20 @@ int seccomp_lock_personality(unsigned long personality);
|
|||||||
int seccomp_protect_hostname(void);
|
int seccomp_protect_hostname(void);
|
||||||
int seccomp_restrict_suid_sgid(void);
|
int seccomp_restrict_suid_sgid(void);
|
||||||
|
|
||||||
extern const uint32_t seccomp_local_archs[];
|
extern uint32_t seccomp_local_archs[];
|
||||||
|
|
||||||
|
#define SECCOMP_LOCAL_ARCH_END UINT32_MAX
|
||||||
|
|
||||||
|
/* Note: 0 is safe to use here because although SCMP_ARCH_NATIVE is 0, it would
|
||||||
|
* never be in the seccomp_local_archs array anyway so we can use it as a
|
||||||
|
* marker. */
|
||||||
|
#define SECCOMP_LOCAL_ARCH_BLOCKED 0
|
||||||
|
|
||||||
#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
|
#define SECCOMP_FOREACH_LOCAL_ARCH(arch) \
|
||||||
for (unsigned _i = ({ (arch) = seccomp_local_archs[0]; 0; }); \
|
for (unsigned _i = ({ (arch) = seccomp_local_archs[0]; 0; }); \
|
||||||
seccomp_local_archs[_i] != (uint32_t) -1; \
|
(arch) != SECCOMP_LOCAL_ARCH_END; \
|
||||||
(arch) = seccomp_local_archs[++_i])
|
(arch) = seccomp_local_archs[++_i]) \
|
||||||
|
if ((arch) != SECCOMP_LOCAL_ARCH_BLOCKED)
|
||||||
|
|
||||||
/* EACCES: does not have the CAP_SYS_ADMIN or no_new_privs == 1
|
/* EACCES: does not have the CAP_SYS_ADMIN or no_new_privs == 1
|
||||||
* ENOMEM: out of memory, failed to allocate space for a libseccomp structure, or would exceed a defined constant
|
* ENOMEM: out of memory, failed to allocate space for a libseccomp structure, or would exceed a defined constant
|
||||||
|
Loading…
x
Reference in New Issue
Block a user