mirror of
https://github.com/systemd/systemd.git
synced 2025-03-28 02:50:16 +03:00
core: add new ReadOnlySystem= and ProtectedHome= settings for service units
ReadOnlySystem= uses fs namespaces to mount /usr and /boot read-only for a service. ProtectedHome= uses fs namespaces to mount /home and /run/user inaccessible or read-only for a service. This patch also enables these settings for all our long-running services. Together they should be good building block for a minimal service sandbox, removing the ability for services to modify the operating system or access the user's private data.
This commit is contained in:
parent
85b5673b33
commit
417116f234
@ -764,7 +764,7 @@
|
||||
capability sets as documented in
|
||||
<citerefentry><refentrytitle>cap_from_text</refentrytitle><manvolnum>3</manvolnum></citerefentry>.
|
||||
Note that these capability sets are
|
||||
usually influenced by the capabilities
|
||||
usually influenced (and filtered) by the capabilities
|
||||
attached to the executed file. Due to
|
||||
that
|
||||
<varname>CapabilityBoundingSet=</varname>
|
||||
@ -934,6 +934,63 @@
|
||||
accessible).</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>ReadOnlySystem=</varname></term>
|
||||
|
||||
<listitem><para>Takes a boolean
|
||||
argument. If true, mounts the
|
||||
<filename>/usr</filename> and
|
||||
<filename>/boot</filename> directories
|
||||
read-only for processes invoked by
|
||||
this unit. This setting ensures that
|
||||
any modification of the vendor
|
||||
supplied operating system is
|
||||
prohibited for the service. It is
|
||||
recommended to enable this setting for
|
||||
all long-running services, unless they
|
||||
are involved with system updates or
|
||||
need to modify the operating system in
|
||||
other ways. Note however, that
|
||||
processes retaining the CAP_SYS_ADMIN
|
||||
capability can undo the effect of this
|
||||
setting. This setting is hence
|
||||
particularly useful for daemons which
|
||||
have this capability removed, for
|
||||
example with
|
||||
<varname>CapabilityBoundingSet=</varname>. Defaults
|
||||
to off.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>ProtectedHome=</varname></term>
|
||||
|
||||
<listitem><para>Takes a boolean
|
||||
argument or
|
||||
<literal>read-only</literal>. If true,
|
||||
the directories
|
||||
<filename>/home</filename> and
|
||||
<filename>/run/user</filename> are
|
||||
made inaccessible and empty for
|
||||
processes invoked by this unit. If set
|
||||
to <literal>read-only</literal> the
|
||||
two directores are made read-only
|
||||
instead. It is recommended to enable
|
||||
this setting for all long-running
|
||||
services (in particular network-facing
|
||||
one), to ensure they cannot get access
|
||||
to private user data, unless the
|
||||
services actually require access to
|
||||
the user's private data. Note however,
|
||||
that processes retaining the
|
||||
CAP_SYS_ADMIN capability can undo the
|
||||
effect of this setting. This setting
|
||||
is hence particularly useful for
|
||||
daemons which have this capability
|
||||
removed, for example with
|
||||
<varname>CapabilityBoundingSet=</varname>. Defaults
|
||||
to off.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>MountFlags=</varname></term>
|
||||
|
||||
@ -968,6 +1025,8 @@
|
||||
namespace related options
|
||||
(<varname>PrivateTmp=</varname>,
|
||||
<varname>PrivateDevices=</varname>,
|
||||
<varname>ReadOnlySystem=</varname>,
|
||||
<varname>ProtectedHome=</varname>,
|
||||
<varname>ReadOnlyDirectories=</varname>,
|
||||
<varname>InaccessibleDirectories=</varname>
|
||||
and
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "capability.h"
|
||||
#include "env-util.h"
|
||||
#include "af-list.h"
|
||||
#include "namespace.h"
|
||||
|
||||
#ifdef HAVE_SECCOMP
|
||||
#include "seccomp-util.h"
|
||||
@ -44,6 +45,8 @@ BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutp
|
||||
|
||||
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_input, exec_input, ExecInput);
|
||||
|
||||
static BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_protected_home, protected_home, ProtectedHome);
|
||||
|
||||
static int property_get_environment_files(
|
||||
sd_bus *bus,
|
||||
const char *path,
|
||||
@ -626,6 +629,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
|
||||
SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("ProtectedHome", "s", bus_property_get_protected_home, offsetof(ExecContext, protected_home), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("ReadOnlySystem", "b", bus_property_get_bool, offsetof(ExecContext, read_only_system), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("UtmpIdentifier", "s", NULL, offsetof(ExecContext, utmp_id), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("SELinuxContext", "(bs)", property_get_selinux_context, 0, SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
|
@ -1569,7 +1569,9 @@ int exec_spawn(ExecCommand *command,
|
||||
!strv_isempty(context->inaccessible_dirs) ||
|
||||
context->mount_flags != 0 ||
|
||||
(context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
|
||||
context->private_devices) {
|
||||
context->private_devices ||
|
||||
context->read_only_system ||
|
||||
context->protected_home != PROTECTED_HOME_NO) {
|
||||
|
||||
char *tmp = NULL, *var = NULL;
|
||||
|
||||
@ -1593,8 +1595,9 @@ int exec_spawn(ExecCommand *command,
|
||||
tmp,
|
||||
var,
|
||||
context->private_devices,
|
||||
context->protected_home,
|
||||
context->read_only_system,
|
||||
context->mount_flags);
|
||||
|
||||
if (err < 0) {
|
||||
r = EXIT_NAMESPACE;
|
||||
goto fail_child;
|
||||
@ -2111,6 +2114,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
|
||||
"%sPrivateTmp: %s\n"
|
||||
"%sPrivateNetwork: %s\n"
|
||||
"%sPrivateDevices: %s\n"
|
||||
"%sProtectedHome: %s\n"
|
||||
"%sReadOnlySystem: %s\n"
|
||||
"%sIgnoreSIGPIPE: %s\n",
|
||||
prefix, c->umask,
|
||||
prefix, c->working_directory ? c->working_directory : "/",
|
||||
@ -2119,6 +2124,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
|
||||
prefix, yes_no(c->private_tmp),
|
||||
prefix, yes_no(c->private_network),
|
||||
prefix, yes_no(c->private_devices),
|
||||
prefix, protected_home_to_string(c->protected_home),
|
||||
prefix, yes_no(c->read_only_system),
|
||||
prefix, yes_no(c->ignore_sigpipe));
|
||||
|
||||
STRV_FOREACH(e, c->environment)
|
||||
|
@ -39,6 +39,7 @@ typedef struct ExecRuntime ExecRuntime;
|
||||
#include "set.h"
|
||||
#include "fdset.h"
|
||||
#include "missing.h"
|
||||
#include "namespace.h"
|
||||
|
||||
typedef enum ExecInput {
|
||||
EXEC_INPUT_NULL,
|
||||
@ -156,6 +157,8 @@ struct ExecContext {
|
||||
bool private_tmp;
|
||||
bool private_network;
|
||||
bool private_devices;
|
||||
bool read_only_system;
|
||||
ProtectedHome protected_home;
|
||||
|
||||
bool no_new_privileges;
|
||||
|
||||
|
@ -80,6 +80,8 @@ $1.InaccessibleDirectories, config_parse_namespace_path_strv, 0,
|
||||
$1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp)
|
||||
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
|
||||
$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices)
|
||||
$1.ReadOnlySystem, config_parse_bool, 0, offsetof($1, exec_context.read_only_system)
|
||||
$1.ProtectedHome, config_parse_protected_home, 0, offsetof($1, exec_context)
|
||||
$1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context)
|
||||
$1.Personality, config_parse_personality, 0, offsetof($1, exec_context.personality)
|
||||
$1.RuntimeDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.runtime_directory_mode)
|
||||
|
@ -3044,6 +3044,49 @@ int config_parse_no_new_privileges(
|
||||
return 0;
|
||||
}
|
||||
|
||||
int config_parse_protected_home(
|
||||
const char* unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
const char *section,
|
||||
unsigned section_line,
|
||||
const char *lvalue,
|
||||
int ltype,
|
||||
const char *rvalue,
|
||||
void *data,
|
||||
void *userdata) {
|
||||
|
||||
ExecContext *c = data;
|
||||
int k;
|
||||
|
||||
assert(filename);
|
||||
assert(lvalue);
|
||||
assert(rvalue);
|
||||
assert(data);
|
||||
|
||||
/* Our enum shall be a superset of booleans, hence first try
|
||||
* to parse as as boolean, and then as enum */
|
||||
|
||||
k = parse_boolean(rvalue);
|
||||
if (k > 0)
|
||||
c->protected_home = PROTECTED_HOME_YES;
|
||||
else if (k == 0)
|
||||
c->protected_home = PROTECTED_HOME_NO;
|
||||
else {
|
||||
ProtectedHome h;
|
||||
|
||||
h = protected_home_from_string(rvalue);
|
||||
if (h < 0){
|
||||
log_syntax(unit, LOG_ERR, filename, line, -h, "Failed to parse protected home value, ignoring: %s", rvalue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
c->protected_home = h;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define FOLLOW_MAX 8
|
||||
|
||||
static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
|
||||
|
@ -97,6 +97,7 @@ int config_parse_set_status(const char *unit, const char *filename, unsigned lin
|
||||
int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
||||
int config_parse_no_new_privileges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
||||
int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
||||
int config_parse_protected_home(const char* unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
|
||||
|
||||
/* gperf prototypes */
|
||||
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);
|
||||
|
@ -331,6 +331,8 @@ int setup_namespace(
|
||||
char* tmp_dir,
|
||||
char* var_tmp_dir,
|
||||
bool private_dev,
|
||||
ProtectedHome protected_home,
|
||||
bool read_only_system,
|
||||
unsigned mount_flags) {
|
||||
|
||||
BindMount *m, *mounts = NULL;
|
||||
@ -347,7 +349,9 @@ int setup_namespace(
|
||||
strv_length(read_write_dirs) +
|
||||
strv_length(read_only_dirs) +
|
||||
strv_length(inaccessible_dirs) +
|
||||
private_dev;
|
||||
private_dev +
|
||||
(protected_home != PROTECTED_HOME_NO ? 2 : 0) +
|
||||
(read_only_system ? 2 : 0);
|
||||
|
||||
if (n > 0) {
|
||||
m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
|
||||
@ -381,6 +385,18 @@ int setup_namespace(
|
||||
m++;
|
||||
}
|
||||
|
||||
if (protected_home != PROTECTED_HOME_NO) {
|
||||
r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user"), protected_home == PROTECTED_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (read_only_system) {
|
||||
r = append_mounts(&m, STRV_MAKE("/usr", "-/boot"), READONLY);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
assert(mounts + n == m);
|
||||
|
||||
qsort(mounts, n, sizeof(BindMount), mount_path_compare);
|
||||
@ -581,3 +597,11 @@ fail:
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static const char *const protected_home_table[_PROTECTED_HOME_MAX] = {
|
||||
[PROTECTED_HOME_NO] = "no",
|
||||
[PROTECTED_HOME_YES] = "yes",
|
||||
[PROTECTED_HOME_READ_ONLY] = "read-only",
|
||||
};
|
||||
|
||||
DEFINE_STRING_TABLE_LOOKUP(protected_home, ProtectedHome);
|
||||
|
@ -23,12 +23,24 @@
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "macro.h"
|
||||
|
||||
typedef enum ProtectedHome {
|
||||
PROTECTED_HOME_NO,
|
||||
PROTECTED_HOME_YES,
|
||||
PROTECTED_HOME_READ_ONLY,
|
||||
_PROTECTED_HOME_MAX,
|
||||
_PROTECTED_HOME_INVALID = -1
|
||||
} ProtectedHome;
|
||||
|
||||
int setup_namespace(char **read_write_dirs,
|
||||
char **read_only_dirs,
|
||||
char **inaccessible_dirs,
|
||||
char *tmp_dir,
|
||||
char *var_tmp_dir,
|
||||
bool private_dev,
|
||||
ProtectedHome protected_home,
|
||||
bool read_only_system,
|
||||
unsigned mount_flags);
|
||||
|
||||
int setup_tmp_dirs(const char *id,
|
||||
@ -36,3 +48,6 @@ int setup_tmp_dirs(const char *id,
|
||||
char **var_tmp_dir);
|
||||
|
||||
int setup_netns(int netns_storage_socket[2]);
|
||||
|
||||
const char* protected_home_to_string(ProtectedHome p) _const_;
|
||||
ProtectedHome protected_home_from_string(const char *s) _pure_;
|
||||
|
@ -60,6 +60,8 @@ int main(int argc, char *argv[]) {
|
||||
tmp_dir,
|
||||
var_tmp_dir,
|
||||
true,
|
||||
PROTECTED_HOME_NO,
|
||||
false,
|
||||
0);
|
||||
if (r < 0) {
|
||||
log_error("Failed to setup namespace: %s", strerror(-r));
|
||||
|
@ -18,3 +18,5 @@ WatchdogSec=1min
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
PrivateNetwork=yes
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
|
@ -17,6 +17,8 @@ SupplementaryGroups=systemd-journal
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
PrivateNetwork=yes
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
|
||||
[Install]
|
||||
Also=systemd-journal-gatewayd.socket
|
||||
|
@ -20,6 +20,8 @@ RestartSec=0
|
||||
NotifyAccess=all
|
||||
StandardOutput=null
|
||||
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE CAP_SYSLOG CAP_AUDIT_CONTROL CAP_CHOWN CAP_DAC_READ_SEARCH CAP_FOWNER CAP_SETUID CAP_SETGID
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
WatchdogSec=1min
|
||||
|
||||
# Increase the default a bit in order to allow many simultaneous
|
||||
|
@ -18,3 +18,5 @@ WatchdogSec=1min
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
PrivateNetwork=yes
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
|
@ -25,6 +25,8 @@ RestartSec=0
|
||||
BusName=org.freedesktop.login1
|
||||
CapabilityBoundingSet=CAP_SYS_ADMIN CAP_AUDIT_CONTROL CAP_CHOWN CAP_KILL CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_SYS_TTY_CONFIG
|
||||
WatchdogSec=1min
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
|
||||
# Increase the default a bit in order to allow many simultaneous
|
||||
# logins since we keep one fd open per session.
|
||||
|
@ -20,3 +20,5 @@ WatchdogSec=1min
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
PrivateNetwork=yes
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
|
@ -20,6 +20,8 @@ Restart=always
|
||||
RestartSec=0
|
||||
ExecStart=@rootlibexecdir@/systemd-networkd
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_NET_RAW CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
WatchdogSec=1min
|
||||
|
||||
[Install]
|
||||
|
@ -16,6 +16,8 @@ Restart=always
|
||||
RestartSec=0
|
||||
ExecStart=@rootlibexecdir@/systemd-resolved
|
||||
CapabilityBoundingSet=CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
@ -16,3 +16,5 @@ BusName=org.freedesktop.timedate1
|
||||
CapabilityBoundingSet=CAP_SYS_TIME
|
||||
WatchdogSec=1min
|
||||
PrivateTmp=yes
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
|
@ -23,6 +23,8 @@ ExecStart=@rootlibexecdir@/systemd-timesyncd
|
||||
CapabilityBoundingSet=CAP_SYS_TIME CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
|
||||
PrivateTmp=yes
|
||||
PrivateDevices=yes
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
WatchdogSec=1min
|
||||
|
||||
[Install]
|
||||
|
@ -22,3 +22,5 @@ Restart=always
|
||||
RestartSec=0
|
||||
ExecStart=@rootlibexecdir@/systemd-udevd
|
||||
MountFlags=slave
|
||||
ReadOnlySystem=yes
|
||||
ProtectedHome=yes
|
||||
|
Loading…
x
Reference in New Issue
Block a user