mirror of
https://github.com/systemd/systemd.git
synced 2025-03-21 02:50:18 +03:00
Merge pull request #6585 from poettering/seccomp-lock-personality
Seccomp lock personality
This commit is contained in:
commit
40cdf0c962
@ -1653,6 +1653,18 @@
|
||||
personality of the host system's kernel.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>LockPersonality=</varname></term>
|
||||
|
||||
<listitem><para>Locks down the <citerefentry
|
||||
project='man-pages'><refentrytitle>personality</refentrytitle><manvolnum>2</manvolnum></citerefentry> system
|
||||
call so that the kernel execution domain may not be changed from the default or the personality selected with
|
||||
<varname>Personality=</varname> directive. This may be useful to improve security, because odd personality
|
||||
emulations may be poorly tested and source of vulnerabilities. If running in user mode, or in system mode, but
|
||||
without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting <varname>User=</varname>),
|
||||
<varname>NoNewPrivileges=yes</varname> is implied.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>RuntimeDirectory=</varname></term>
|
||||
|
||||
|
@ -904,6 +904,25 @@ const char* personality_to_string(unsigned long p) {
|
||||
return architecture_to_string(architecture);
|
||||
}
|
||||
|
||||
int opinionated_personality(unsigned long *ret) {
|
||||
int current;
|
||||
|
||||
/* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
|
||||
* opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
|
||||
* two most relevant personalities: PER_LINUX and PER_LINUX32. */
|
||||
|
||||
current = personality(PERSONALITY_INVALID);
|
||||
if (current < 0)
|
||||
return -errno;
|
||||
|
||||
if (((unsigned long) current & 0xffff) == PER_LINUX32)
|
||||
*ret = PER_LINUX32;
|
||||
else
|
||||
*ret = PER_LINUX;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void valgrind_summary_hack(void) {
|
||||
#ifdef HAVE_VALGRIND_VALGRIND_H
|
||||
if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
|
||||
|
@ -91,6 +91,8 @@ bool oom_score_adjust_is_valid(int oa);
|
||||
unsigned long personality_from_string(const char *p);
|
||||
const char *personality_to_string(unsigned long);
|
||||
|
||||
int opinionated_personality(unsigned long *ret);
|
||||
|
||||
int ioprio_class_to_string_alloc(int i, char **s);
|
||||
int ioprio_class_from_string(const char *s);
|
||||
|
||||
|
@ -853,6 +853,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
|
||||
SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("Personality", "s", property_get_personality, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("RuntimeDirectoryPreserve", "s", property_get_exec_preserve_mode, offsetof(ExecContext, runtime_directory_preserve_mode), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].mode), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
@ -1695,7 +1696,7 @@ int bus_exec_context_set_transient_property(
|
||||
"NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
|
||||
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
|
||||
"ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
|
||||
"CPUSchedulingResetOnFork", "NonBlocking")) {
|
||||
"CPUSchedulingResetOnFork", "NonBlocking", "LockPersonality")) {
|
||||
int b;
|
||||
|
||||
r = sd_bus_message_read(message, "b", &b);
|
||||
@ -1743,6 +1744,8 @@ int bus_exec_context_set_transient_property(
|
||||
c->cpu_sched_reset_on_fork = b;
|
||||
else if (streq(name, "NonBlocking"))
|
||||
c->non_blocking = b;
|
||||
else if (streq(name, "LockPersonality"))
|
||||
c->lock_personality = b;
|
||||
|
||||
unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b));
|
||||
}
|
||||
|
@ -1296,7 +1296,8 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
|
||||
c->protect_kernel_modules ||
|
||||
c->private_devices ||
|
||||
context_has_syscall_filters(c) ||
|
||||
!set_isempty(c->syscall_archs);
|
||||
!set_isempty(c->syscall_archs) ||
|
||||
c->lock_personality;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SECCOMP
|
||||
@ -1455,6 +1456,32 @@ static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
|
||||
return seccomp_restrict_namespaces(c->restrict_namespaces);
|
||||
}
|
||||
|
||||
static int apply_lock_personality(const Unit* u, const ExecContext *c) {
|
||||
unsigned long personality;
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
assert(c);
|
||||
|
||||
if (!c->lock_personality)
|
||||
return 0;
|
||||
|
||||
if (skip_seccomp_unavailable(u, "LockPersonality="))
|
||||
return 0;
|
||||
|
||||
personality = c->personality;
|
||||
|
||||
/* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
|
||||
if (personality == PERSONALITY_INVALID) {
|
||||
|
||||
r = opinionated_personality(&personality);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
return seccomp_lock_personality(personality);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void do_idle_pipe_dance(int idle_pipe[4]) {
|
||||
@ -2972,6 +2999,13 @@ static int exec_child(
|
||||
return r;
|
||||
}
|
||||
|
||||
r = apply_lock_personality(unit, context);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_SECCOMP;
|
||||
*error_message = strdup("Failed to lock personalities");
|
||||
return r;
|
||||
}
|
||||
|
||||
/* This really should remain the last step before the execve(), to make sure our own code is unaffected
|
||||
* by the filter as little as possible. */
|
||||
r = apply_syscall_filter(unit, context, needs_ambient_hack);
|
||||
@ -3733,6 +3767,10 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
|
||||
"%sPersonality: %s\n",
|
||||
prefix, strna(personality_to_string(c->personality)));
|
||||
|
||||
fprintf(f,
|
||||
"%sLockPersonality: %s\n",
|
||||
prefix, yes_no(c->lock_personality));
|
||||
|
||||
if (c->syscall_filter) {
|
||||
#ifdef HAVE_SECCOMP
|
||||
Iterator j;
|
||||
|
@ -227,6 +227,7 @@ struct ExecContext {
|
||||
bool same_pgrp;
|
||||
|
||||
unsigned long personality;
|
||||
bool lock_personality;
|
||||
|
||||
unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
|
||||
|
||||
|
@ -60,14 +60,16 @@ $1.SystemCallErrorNumber, config_parse_syscall_errno, 0,
|
||||
$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
|
||||
$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context)
|
||||
$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
|
||||
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)',
|
||||
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)
|
||||
$1.LockPersonality, config_parse_bool, 0, offsetof($1, exec_context.lock_personality)',
|
||||
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
|
||||
$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
|
||||
$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
|
||||
$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
|
||||
$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
|
||||
$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
|
||||
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
|
||||
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
|
||||
$1.LockPersonality, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
|
||||
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
|
||||
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
|
||||
$1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit)
|
||||
|
@ -214,7 +214,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
|
||||
"SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
|
||||
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
|
||||
"ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
|
||||
"CPUSchedulingResetOnFork")) {
|
||||
"CPUSchedulingResetOnFork", "LockPersonality")) {
|
||||
|
||||
r = parse_boolean(eq);
|
||||
if (r < 0)
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "alloc-util.h"
|
||||
#include "macro.h"
|
||||
#include "nsflags.h"
|
||||
#include "process-util.h"
|
||||
#include "seccomp-util.h"
|
||||
#include "set.h"
|
||||
#include "string-util.h"
|
||||
@ -1402,3 +1403,36 @@ int seccomp_filter_set_add(Set *filter, bool add, const SyscallFilterSet *set) {
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int seccomp_lock_personality(unsigned long personality) {
|
||||
uint32_t arch;
|
||||
int r;
|
||||
|
||||
if (personality >= PERSONALITY_INVALID)
|
||||
return -EINVAL;
|
||||
|
||||
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
|
||||
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
|
||||
|
||||
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = seccomp_rule_add_exact(
|
||||
seccomp,
|
||||
SCMP_ACT_ERRNO(EPERM),
|
||||
SCMP_SYS(personality),
|
||||
1,
|
||||
SCMP_A0(SCMP_CMP_NE, personality));
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = seccomp_load(seccomp);
|
||||
if (IN_SET(r, -EPERM, -EACCES))
|
||||
return r;
|
||||
if (r < 0)
|
||||
log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -78,6 +78,7 @@ int seccomp_protect_sysctl(void);
|
||||
int seccomp_restrict_address_families(Set *address_families, bool whitelist);
|
||||
int seccomp_restrict_realtime(void);
|
||||
int seccomp_memory_deny_write_execute(void);
|
||||
int seccomp_lock_personality(unsigned long personality);
|
||||
|
||||
extern const uint32_t seccomp_local_archs[];
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/personality.h>
|
||||
#include <sys/poll.h>
|
||||
#include <sys/shm.h>
|
||||
#include <sys/types.h>
|
||||
@ -47,7 +48,6 @@
|
||||
# define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
|
||||
#endif
|
||||
|
||||
|
||||
static void test_seccomp_arch_to_string(void) {
|
||||
uint32_t a, b;
|
||||
const char *name;
|
||||
@ -565,6 +565,70 @@ static void test_load_syscall_filter_set_raw(void) {
|
||||
assert_se(wait_for_terminate_and_warn("syscallrawseccomp", pid, true) == EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
static void test_lock_personality(void) {
|
||||
unsigned long current;
|
||||
pid_t pid;
|
||||
|
||||
if (!is_seccomp_available())
|
||||
return;
|
||||
if (geteuid() != 0)
|
||||
return;
|
||||
|
||||
assert_se(opinionated_personality(¤t) >= 0);
|
||||
|
||||
log_info("current personality=%lu", current);
|
||||
|
||||
pid = fork();
|
||||
assert_se(pid >= 0);
|
||||
|
||||
if (pid == 0) {
|
||||
assert_se(seccomp_lock_personality(current) >= 0);
|
||||
|
||||
assert_se((unsigned long) personality(current) == current);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(PER_LINUX | ADDR_NO_RANDOMIZE) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(PER_LINUX | MMAP_PAGE_ZERO) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(PER_LINUX | ADDR_COMPAT_LAYOUT) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(PER_LINUX | READ_IMPLIES_EXEC) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(PER_LINUX_32BIT) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(PER_SVR4) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(PER_BSD) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(current == PER_LINUX ? PER_LINUX32 : PER_LINUX) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(PER_LINUX32_3GB) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(PER_UW7) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(0x42) == -1 && errno == EPERM);
|
||||
|
||||
errno = EUCLEAN;
|
||||
assert_se(personality(PERSONALITY_INVALID) == -1 && errno == EPERM); /* maybe remove this later */
|
||||
|
||||
assert_se((unsigned long) personality(current) == current);
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid, true) == EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
log_set_max_level(LOG_DEBUG);
|
||||
@ -581,6 +645,7 @@ int main(int argc, char *argv[]) {
|
||||
test_memory_deny_write_execute_shmat();
|
||||
test_restrict_archs();
|
||||
test_load_syscall_filter_set_raw();
|
||||
test_lock_personality();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user