1
0
mirror of https://github.com/systemd/systemd.git synced 2024-10-27 10:25:37 +03:00

core: add new ReadOnlySystem= and ProtectedHome= settings for service units

ReadOnlySystem= uses fs namespaces to mount /usr and /boot read-only for
a service.

ProtectedHome= uses fs namespaces to mount /home and /run/user
inaccessible or read-only for a service.

This patch also enables these settings for all our long-running services.

Together they should be good building block for a minimal service
sandbox, removing the ability for services to modify the operating
system or access the user's private data.
This commit is contained in:
Lennart Poettering 2014-06-03 23:41:44 +02:00
parent 85b5673b33
commit 417116f234
21 changed files with 187 additions and 4 deletions

View File

@ -764,7 +764,7 @@
capability sets as documented in capability sets as documented in
<citerefentry><refentrytitle>cap_from_text</refentrytitle><manvolnum>3</manvolnum></citerefentry>. <citerefentry><refentrytitle>cap_from_text</refentrytitle><manvolnum>3</manvolnum></citerefentry>.
Note that these capability sets are Note that these capability sets are
usually influenced by the capabilities usually influenced (and filtered) by the capabilities
attached to the executed file. Due to attached to the executed file. Due to
that that
<varname>CapabilityBoundingSet=</varname> <varname>CapabilityBoundingSet=</varname>
@ -934,6 +934,63 @@
accessible).</para></listitem> accessible).</para></listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term><varname>ReadOnlySystem=</varname></term>
<listitem><para>Takes a boolean
argument. If true, mounts the
<filename>/usr</filename> and
<filename>/boot</filename> directories
read-only for processes invoked by
this unit. This setting ensures that
any modification of the vendor
supplied operating system is
prohibited for the service. It is
recommended to enable this setting for
all long-running services, unless they
are involved with system updates or
need to modify the operating system in
other ways. Note however, that
processes retaining the CAP_SYS_ADMIN
capability can undo the effect of this
setting. This setting is hence
particularly useful for daemons which
have this capability removed, for
example with
<varname>CapabilityBoundingSet=</varname>. Defaults
to off.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>ProtectedHome=</varname></term>
<listitem><para>Takes a boolean
argument or
<literal>read-only</literal>. If true,
the directories
<filename>/home</filename> and
<filename>/run/user</filename> are
made inaccessible and empty for
processes invoked by this unit. If set
to <literal>read-only</literal> the
two directores are made read-only
instead. It is recommended to enable
this setting for all long-running
services (in particular network-facing
one), to ensure they cannot get access
to private user data, unless the
services actually require access to
the user's private data. Note however,
that processes retaining the
CAP_SYS_ADMIN capability can undo the
effect of this setting. This setting
is hence particularly useful for
daemons which have this capability
removed, for example with
<varname>CapabilityBoundingSet=</varname>. Defaults
to off.</para></listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term><varname>MountFlags=</varname></term> <term><varname>MountFlags=</varname></term>
@ -968,6 +1025,8 @@
namespace related options namespace related options
(<varname>PrivateTmp=</varname>, (<varname>PrivateTmp=</varname>,
<varname>PrivateDevices=</varname>, <varname>PrivateDevices=</varname>,
<varname>ReadOnlySystem=</varname>,
<varname>ProtectedHome=</varname>,
<varname>ReadOnlyDirectories=</varname>, <varname>ReadOnlyDirectories=</varname>,
<varname>InaccessibleDirectories=</varname> <varname>InaccessibleDirectories=</varname>
and and

View File

@ -35,6 +35,7 @@
#include "capability.h" #include "capability.h"
#include "env-util.h" #include "env-util.h"
#include "af-list.h" #include "af-list.h"
#include "namespace.h"
#ifdef HAVE_SECCOMP #ifdef HAVE_SECCOMP
#include "seccomp-util.h" #include "seccomp-util.h"
@ -44,6 +45,8 @@ BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutp
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_input, exec_input, ExecInput); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_input, exec_input, ExecInput);
static BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_protected_home, protected_home, ProtectedHome);
static int property_get_environment_files( static int property_get_environment_files(
sd_bus *bus, sd_bus *bus,
const char *path, const char *path,
@ -626,6 +629,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectedHome", "s", bus_property_get_protected_home, offsetof(ExecContext, protected_home), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ReadOnlySystem", "b", bus_property_get_bool, offsetof(ExecContext, read_only_system), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("UtmpIdentifier", "s", NULL, offsetof(ExecContext, utmp_id), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("UtmpIdentifier", "s", NULL, offsetof(ExecContext, utmp_id), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SELinuxContext", "(bs)", property_get_selinux_context, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SELinuxContext", "(bs)", property_get_selinux_context, 0, SD_BUS_VTABLE_PROPERTY_CONST),

View File

@ -1569,7 +1569,9 @@ int exec_spawn(ExecCommand *command,
!strv_isempty(context->inaccessible_dirs) || !strv_isempty(context->inaccessible_dirs) ||
context->mount_flags != 0 || context->mount_flags != 0 ||
(context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) || (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
context->private_devices) { context->private_devices ||
context->read_only_system ||
context->protected_home != PROTECTED_HOME_NO) {
char *tmp = NULL, *var = NULL; char *tmp = NULL, *var = NULL;
@ -1593,8 +1595,9 @@ int exec_spawn(ExecCommand *command,
tmp, tmp,
var, var,
context->private_devices, context->private_devices,
context->protected_home,
context->read_only_system,
context->mount_flags); context->mount_flags);
if (err < 0) { if (err < 0) {
r = EXIT_NAMESPACE; r = EXIT_NAMESPACE;
goto fail_child; goto fail_child;
@ -2111,6 +2114,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
"%sPrivateTmp: %s\n" "%sPrivateTmp: %s\n"
"%sPrivateNetwork: %s\n" "%sPrivateNetwork: %s\n"
"%sPrivateDevices: %s\n" "%sPrivateDevices: %s\n"
"%sProtectedHome: %s\n"
"%sReadOnlySystem: %s\n"
"%sIgnoreSIGPIPE: %s\n", "%sIgnoreSIGPIPE: %s\n",
prefix, c->umask, prefix, c->umask,
prefix, c->working_directory ? c->working_directory : "/", prefix, c->working_directory ? c->working_directory : "/",
@ -2119,6 +2124,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->private_tmp), prefix, yes_no(c->private_tmp),
prefix, yes_no(c->private_network), prefix, yes_no(c->private_network),
prefix, yes_no(c->private_devices), prefix, yes_no(c->private_devices),
prefix, protected_home_to_string(c->protected_home),
prefix, yes_no(c->read_only_system),
prefix, yes_no(c->ignore_sigpipe)); prefix, yes_no(c->ignore_sigpipe));
STRV_FOREACH(e, c->environment) STRV_FOREACH(e, c->environment)

View File

@ -39,6 +39,7 @@ typedef struct ExecRuntime ExecRuntime;
#include "set.h" #include "set.h"
#include "fdset.h" #include "fdset.h"
#include "missing.h" #include "missing.h"
#include "namespace.h"
typedef enum ExecInput { typedef enum ExecInput {
EXEC_INPUT_NULL, EXEC_INPUT_NULL,
@ -156,6 +157,8 @@ struct ExecContext {
bool private_tmp; bool private_tmp;
bool private_network; bool private_network;
bool private_devices; bool private_devices;
bool read_only_system;
ProtectedHome protected_home;
bool no_new_privileges; bool no_new_privileges;

View File

@ -80,6 +80,8 @@ $1.InaccessibleDirectories, config_parse_namespace_path_strv, 0,
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) $1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
$1.ReadOnlySystem, config_parse_bool, 0, offsetof($1, exec_context.read_only_system)
$1.ProtectedHome, config_parse_protected_home, 0, offsetof($1, exec_context)
$1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context) $1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context)
$1.Personality, config_parse_personality, 0, offsetof($1, exec_context.personality) $1.Personality, config_parse_personality, 0, offsetof($1, exec_context.personality)
$1.RuntimeDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.runtime_directory_mode) $1.RuntimeDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.runtime_directory_mode)

View File

@ -3044,6 +3044,49 @@ int config_parse_no_new_privileges(
return 0; return 0;
} }
int config_parse_protected_home(
const char* unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
ExecContext *c = data;
int k;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
/* Our enum shall be a superset of booleans, hence first try
* to parse as as boolean, and then as enum */
k = parse_boolean(rvalue);
if (k > 0)
c->protected_home = PROTECTED_HOME_YES;
else if (k == 0)
c->protected_home = PROTECTED_HOME_NO;
else {
ProtectedHome h;
h = protected_home_from_string(rvalue);
if (h < 0){
log_syntax(unit, LOG_ERR, filename, line, -h, "Failed to parse protected home value, ignoring: %s", rvalue);
return 0;
}
c->protected_home = h;
}
return 0;
}
#define FOLLOW_MAX 8 #define FOLLOW_MAX 8
static int open_follow(char **filename, FILE **_f, Set *names, char **_final) { static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {

View File

@ -97,6 +97,7 @@ int config_parse_set_status(const char *unit, const char *filename, unsigned lin
int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_no_new_privileges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_no_new_privileges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
int config_parse_protected_home(const char* unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
/* gperf prototypes */ /* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);

View File

@ -331,6 +331,8 @@ int setup_namespace(
char* tmp_dir, char* tmp_dir,
char* var_tmp_dir, char* var_tmp_dir,
bool private_dev, bool private_dev,
ProtectedHome protected_home,
bool read_only_system,
unsigned mount_flags) { unsigned mount_flags) {
BindMount *m, *mounts = NULL; BindMount *m, *mounts = NULL;
@ -347,7 +349,9 @@ int setup_namespace(
strv_length(read_write_dirs) + strv_length(read_write_dirs) +
strv_length(read_only_dirs) + strv_length(read_only_dirs) +
strv_length(inaccessible_dirs) + strv_length(inaccessible_dirs) +
private_dev; private_dev +
(protected_home != PROTECTED_HOME_NO ? 2 : 0) +
(read_only_system ? 2 : 0);
if (n > 0) { if (n > 0) {
m = mounts = (BindMount *) alloca(n * sizeof(BindMount)); m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
@ -381,6 +385,18 @@ int setup_namespace(
m++; m++;
} }
if (protected_home != PROTECTED_HOME_NO) {
r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user"), protected_home == PROTECTED_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
if (r < 0)
return r;
}
if (read_only_system) {
r = append_mounts(&m, STRV_MAKE("/usr", "-/boot"), READONLY);
if (r < 0)
return r;
}
assert(mounts + n == m); assert(mounts + n == m);
qsort(mounts, n, sizeof(BindMount), mount_path_compare); qsort(mounts, n, sizeof(BindMount), mount_path_compare);
@ -581,3 +597,11 @@ fail:
return r; return r;
} }
static const char *const protected_home_table[_PROTECTED_HOME_MAX] = {
[PROTECTED_HOME_NO] = "no",
[PROTECTED_HOME_YES] = "yes",
[PROTECTED_HOME_READ_ONLY] = "read-only",
};
DEFINE_STRING_TABLE_LOOKUP(protected_home, ProtectedHome);

View File

@ -23,12 +23,24 @@
#include <stdbool.h> #include <stdbool.h>
#include "macro.h"
typedef enum ProtectedHome {
PROTECTED_HOME_NO,
PROTECTED_HOME_YES,
PROTECTED_HOME_READ_ONLY,
_PROTECTED_HOME_MAX,
_PROTECTED_HOME_INVALID = -1
} ProtectedHome;
int setup_namespace(char **read_write_dirs, int setup_namespace(char **read_write_dirs,
char **read_only_dirs, char **read_only_dirs,
char **inaccessible_dirs, char **inaccessible_dirs,
char *tmp_dir, char *tmp_dir,
char *var_tmp_dir, char *var_tmp_dir,
bool private_dev, bool private_dev,
ProtectedHome protected_home,
bool read_only_system,
unsigned mount_flags); unsigned mount_flags);
int setup_tmp_dirs(const char *id, int setup_tmp_dirs(const char *id,
@ -36,3 +48,6 @@ int setup_tmp_dirs(const char *id,
char **var_tmp_dir); char **var_tmp_dir);
int setup_netns(int netns_storage_socket[2]); int setup_netns(int netns_storage_socket[2]);
const char* protected_home_to_string(ProtectedHome p) _const_;
ProtectedHome protected_home_from_string(const char *s) _pure_;

View File

@ -60,6 +60,8 @@ int main(int argc, char *argv[]) {
tmp_dir, tmp_dir,
var_tmp_dir, var_tmp_dir,
true, true,
PROTECTED_HOME_NO,
false,
0); 0);
if (r < 0) { if (r < 0) {
log_error("Failed to setup namespace: %s", strerror(-r)); log_error("Failed to setup namespace: %s", strerror(-r));

View File

@ -18,3 +18,5 @@ WatchdogSec=1min
PrivateTmp=yes PrivateTmp=yes
PrivateDevices=yes PrivateDevices=yes
PrivateNetwork=yes PrivateNetwork=yes
ReadOnlySystem=yes
ProtectedHome=yes

View File

@ -17,6 +17,8 @@ SupplementaryGroups=systemd-journal
PrivateTmp=yes PrivateTmp=yes
PrivateDevices=yes PrivateDevices=yes
PrivateNetwork=yes PrivateNetwork=yes
ReadOnlySystem=yes
ProtectedHome=yes
[Install] [Install]
Also=systemd-journal-gatewayd.socket Also=systemd-journal-gatewayd.socket

View File

@ -20,6 +20,8 @@ RestartSec=0
NotifyAccess=all NotifyAccess=all
StandardOutput=null StandardOutput=null
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE CAP_SYSLOG CAP_AUDIT_CONTROL CAP_CHOWN CAP_DAC_READ_SEARCH CAP_FOWNER CAP_SETUID CAP_SETGID CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE CAP_SYSLOG CAP_AUDIT_CONTROL CAP_CHOWN CAP_DAC_READ_SEARCH CAP_FOWNER CAP_SETUID CAP_SETGID
ReadOnlySystem=yes
ProtectedHome=yes
WatchdogSec=1min WatchdogSec=1min
# Increase the default a bit in order to allow many simultaneous # Increase the default a bit in order to allow many simultaneous

View File

@ -18,3 +18,5 @@ WatchdogSec=1min
PrivateTmp=yes PrivateTmp=yes
PrivateDevices=yes PrivateDevices=yes
PrivateNetwork=yes PrivateNetwork=yes
ReadOnlySystem=yes
ProtectedHome=yes

View File

@ -25,6 +25,8 @@ RestartSec=0
BusName=org.freedesktop.login1 BusName=org.freedesktop.login1
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_AUDIT_CONTROL CAP_CHOWN CAP_KILL CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_SYS_TTY_CONFIG CapabilityBoundingSet=CAP_SYS_ADMIN CAP_AUDIT_CONTROL CAP_CHOWN CAP_KILL CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_SYS_TTY_CONFIG
WatchdogSec=1min WatchdogSec=1min
ReadOnlySystem=yes
ProtectedHome=yes
# Increase the default a bit in order to allow many simultaneous # Increase the default a bit in order to allow many simultaneous
# logins since we keep one fd open per session. # logins since we keep one fd open per session.

View File

@ -20,3 +20,5 @@ WatchdogSec=1min
PrivateTmp=yes PrivateTmp=yes
PrivateDevices=yes PrivateDevices=yes
PrivateNetwork=yes PrivateNetwork=yes
ReadOnlySystem=yes
ProtectedHome=yes

View File

@ -20,6 +20,8 @@ Restart=always
RestartSec=0 RestartSec=0
ExecStart=@rootlibexecdir@/systemd-networkd ExecStart=@rootlibexecdir@/systemd-networkd
CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_NET_RAW CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_NET_RAW CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
ReadOnlySystem=yes
ProtectedHome=yes
WatchdogSec=1min WatchdogSec=1min
[Install] [Install]

View File

@ -16,6 +16,8 @@ Restart=always
RestartSec=0 RestartSec=0
ExecStart=@rootlibexecdir@/systemd-resolved ExecStart=@rootlibexecdir@/systemd-resolved
CapabilityBoundingSet=CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER CapabilityBoundingSet=CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
ReadOnlySystem=yes
ProtectedHome=yes
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target

View File

@ -16,3 +16,5 @@ BusName=org.freedesktop.timedate1
CapabilityBoundingSet=CAP_SYS_TIME CapabilityBoundingSet=CAP_SYS_TIME
WatchdogSec=1min WatchdogSec=1min
PrivateTmp=yes PrivateTmp=yes
ReadOnlySystem=yes
ProtectedHome=yes

View File

@ -23,6 +23,8 @@ ExecStart=@rootlibexecdir@/systemd-timesyncd
CapabilityBoundingSet=CAP_SYS_TIME CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER CapabilityBoundingSet=CAP_SYS_TIME CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
PrivateTmp=yes PrivateTmp=yes
PrivateDevices=yes PrivateDevices=yes
ReadOnlySystem=yes
ProtectedHome=yes
WatchdogSec=1min WatchdogSec=1min
[Install] [Install]

View File

@ -22,3 +22,5 @@ Restart=always
RestartSec=0 RestartSec=0
ExecStart=@rootlibexecdir@/systemd-udevd ExecStart=@rootlibexecdir@/systemd-udevd
MountFlags=slave MountFlags=slave
ReadOnlySystem=yes
ProtectedHome=yes