1
0
mirror of https://github.com/systemd/systemd.git synced 2025-03-21 02:50:18 +03:00

Merge pull request #6585 from poettering/seccomp-lock-personality

Seccomp lock personality
This commit is contained in:
Lennart Poettering 2017-08-29 18:58:56 +02:00 committed by GitHub
commit 40cdf0c962
11 changed files with 183 additions and 6 deletions

View File

@ -1653,6 +1653,18 @@
personality of the host system's kernel.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>LockPersonality=</varname></term>
<listitem><para>Locks down the <citerefentry
project='man-pages'><refentrytitle>personality</refentrytitle><manvolnum>2</manvolnum></citerefentry> system
call so that the kernel execution domain may not be changed from the default or the personality selected with
<varname>Personality=</varname> directive. This may be useful to improve security, because odd personality
emulations may be poorly tested and source of vulnerabilities. If running in user mode, or in system mode, but
without the <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting <varname>User=</varname>),
<varname>NoNewPrivileges=yes</varname> is implied.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>RuntimeDirectory=</varname></term>

View File

@ -904,6 +904,25 @@ const char* personality_to_string(unsigned long p) {
return architecture_to_string(architecture);
}
int opinionated_personality(unsigned long *ret) {
int current;
/* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
* opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
* two most relevant personalities: PER_LINUX and PER_LINUX32. */
current = personality(PERSONALITY_INVALID);
if (current < 0)
return -errno;
if (((unsigned long) current & 0xffff) == PER_LINUX32)
*ret = PER_LINUX32;
else
*ret = PER_LINUX;
return 0;
}
void valgrind_summary_hack(void) {
#ifdef HAVE_VALGRIND_VALGRIND_H
if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {

View File

@ -91,6 +91,8 @@ bool oom_score_adjust_is_valid(int oa);
unsigned long personality_from_string(const char *p);
const char *personality_to_string(unsigned long);
int opinionated_personality(unsigned long *ret);
int ioprio_class_to_string_alloc(int i, char **s);
int ioprio_class_from_string(const char *s);

View File

@ -853,6 +853,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Personality", "s", property_get_personality, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectoryPreserve", "s", property_get_exec_preserve_mode, offsetof(ExecContext, runtime_directory_preserve_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].mode), SD_BUS_VTABLE_PROPERTY_CONST),
@ -1695,7 +1696,7 @@ int bus_exec_context_set_transient_property(
"NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute",
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
"ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
"CPUSchedulingResetOnFork", "NonBlocking")) {
"CPUSchedulingResetOnFork", "NonBlocking", "LockPersonality")) {
int b;
r = sd_bus_message_read(message, "b", &b);
@ -1743,6 +1744,8 @@ int bus_exec_context_set_transient_property(
c->cpu_sched_reset_on_fork = b;
else if (streq(name, "NonBlocking"))
c->non_blocking = b;
else if (streq(name, "LockPersonality"))
c->lock_personality = b;
unit_write_drop_in_private_format(u, mode, name, "%s=%s", name, yes_no(b));
}

View File

@ -1296,7 +1296,8 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
c->protect_kernel_modules ||
c->private_devices ||
context_has_syscall_filters(c) ||
!set_isempty(c->syscall_archs);
!set_isempty(c->syscall_archs) ||
c->lock_personality;
}
#ifdef HAVE_SECCOMP
@ -1455,6 +1456,32 @@ static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
return seccomp_restrict_namespaces(c->restrict_namespaces);
}
static int apply_lock_personality(const Unit* u, const ExecContext *c) {
unsigned long personality;
int r;
assert(u);
assert(c);
if (!c->lock_personality)
return 0;
if (skip_seccomp_unavailable(u, "LockPersonality="))
return 0;
personality = c->personality;
/* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
if (personality == PERSONALITY_INVALID) {
r = opinionated_personality(&personality);
if (r < 0)
return r;
}
return seccomp_lock_personality(personality);
}
#endif
static void do_idle_pipe_dance(int idle_pipe[4]) {
@ -2972,6 +2999,13 @@ static int exec_child(
return r;
}
r = apply_lock_personality(unit, context);
if (r < 0) {
*exit_status = EXIT_SECCOMP;
*error_message = strdup("Failed to lock personalities");
return r;
}
/* This really should remain the last step before the execve(), to make sure our own code is unaffected
* by the filter as little as possible. */
r = apply_syscall_filter(unit, context, needs_ambient_hack);
@ -3733,6 +3767,10 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
"%sPersonality: %s\n",
prefix, strna(personality_to_string(c->personality)));
fprintf(f,
"%sLockPersonality: %s\n",
prefix, yes_no(c->lock_personality));
if (c->syscall_filter) {
#ifdef HAVE_SECCOMP
Iterator j;

View File

@ -227,6 +227,7 @@ struct ExecContext {
bool same_pgrp;
unsigned long personality;
bool lock_personality;
unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */

View File

@ -60,14 +60,16 @@ $1.SystemCallErrorNumber, config_parse_syscall_errno, 0,
$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context)
$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)',
$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)
$1.LockPersonality, config_parse_bool, 0, offsetof($1, exec_context.lock_personality)',
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
$1.RestrictAddressFamilies, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.LockPersonality, config_parse_warn_compat, DISABLED_CONFIGURATION, 0')
$1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit)
$1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit)
$1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit)

View File

@ -214,7 +214,7 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
"SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
"ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
"CPUSchedulingResetOnFork")) {
"CPUSchedulingResetOnFork", "LockPersonality")) {
r = parse_boolean(eq);
if (r < 0)

View File

@ -29,6 +29,7 @@
#include "alloc-util.h"
#include "macro.h"
#include "nsflags.h"
#include "process-util.h"
#include "seccomp-util.h"
#include "set.h"
#include "string-util.h"
@ -1402,3 +1403,36 @@ int seccomp_filter_set_add(Set *filter, bool add, const SyscallFilterSet *set) {
return 0;
}
int seccomp_lock_personality(unsigned long personality) {
uint32_t arch;
int r;
if (personality >= PERSONALITY_INVALID)
return -EINVAL;
SECCOMP_FOREACH_LOCAL_ARCH(arch) {
_cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL;
r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW);
if (r < 0)
return r;
r = seccomp_rule_add_exact(
seccomp,
SCMP_ACT_ERRNO(EPERM),
SCMP_SYS(personality),
1,
SCMP_A0(SCMP_CMP_NE, personality));
if (r < 0)
return r;
r = seccomp_load(seccomp);
if (IN_SET(r, -EPERM, -EACCES))
return r;
if (r < 0)
log_debug_errno(r, "Failed to enable personality lock for architecture %s, skipping: %m", seccomp_arch_to_string(arch));
}
return 0;
}

View File

@ -78,6 +78,7 @@ int seccomp_protect_sysctl(void);
int seccomp_restrict_address_families(Set *address_families, bool whitelist);
int seccomp_restrict_realtime(void);
int seccomp_memory_deny_write_execute(void);
int seccomp_lock_personality(unsigned long personality);
extern const uint32_t seccomp_local_archs[];

View File

@ -21,6 +21,7 @@
#include <stdlib.h>
#include <sys/eventfd.h>
#include <sys/mman.h>
#include <sys/personality.h>
#include <sys/poll.h>
#include <sys/shm.h>
#include <sys/types.h>
@ -47,7 +48,6 @@
# define SECCOMP_RESTRICT_ADDRESS_FAMILIES_BROKEN 0
#endif
static void test_seccomp_arch_to_string(void) {
uint32_t a, b;
const char *name;
@ -565,6 +565,70 @@ static void test_load_syscall_filter_set_raw(void) {
assert_se(wait_for_terminate_and_warn("syscallrawseccomp", pid, true) == EXIT_SUCCESS);
}
static void test_lock_personality(void) {
unsigned long current;
pid_t pid;
if (!is_seccomp_available())
return;
if (geteuid() != 0)
return;
assert_se(opinionated_personality(&current) >= 0);
log_info("current personality=%lu", current);
pid = fork();
assert_se(pid >= 0);
if (pid == 0) {
assert_se(seccomp_lock_personality(current) >= 0);
assert_se((unsigned long) personality(current) == current);
errno = EUCLEAN;
assert_se(personality(PER_LINUX | ADDR_NO_RANDOMIZE) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(PER_LINUX | MMAP_PAGE_ZERO) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(PER_LINUX | ADDR_COMPAT_LAYOUT) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(PER_LINUX | READ_IMPLIES_EXEC) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(PER_LINUX_32BIT) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(PER_SVR4) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(PER_BSD) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(current == PER_LINUX ? PER_LINUX32 : PER_LINUX) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(PER_LINUX32_3GB) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(PER_UW7) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(0x42) == -1 && errno == EPERM);
errno = EUCLEAN;
assert_se(personality(PERSONALITY_INVALID) == -1 && errno == EPERM); /* maybe remove this later */
assert_se((unsigned long) personality(current) == current);
_exit(EXIT_SUCCESS);
}
assert_se(wait_for_terminate_and_warn("lockpersonalityseccomp", pid, true) == EXIT_SUCCESS);
}
int main(int argc, char *argv[]) {
log_set_max_level(LOG_DEBUG);
@ -581,6 +645,7 @@ int main(int argc, char *argv[]) {
test_memory_deny_write_execute_shmat();
test_restrict_archs();
test_load_syscall_filter_set_raw();
test_lock_personality();
return 0;
}