diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 51f873f8cd9..6b4875f0423 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -695,16 +695,25 @@ CapabilityBoundingSet=~CAP_B CAP_C
setgid bits, or filesystem capabilities). This is the simplest and most effective way to ensure that
a process and its children can never elevate privileges again. Defaults to false, but certain
settings override this and ignore the value of this setting. This is the case when
- SystemCallFilter=, SystemCallArchitectures=,
- RestrictAddressFamilies=, RestrictNamespaces=,
- PrivateDevices=, ProtectKernelTunables=,
- ProtectKernelModules=, ProtectKernelLogs=,
- ProtectClock=, MemoryDenyWriteExecute=,
- RestrictRealtime=, RestrictSUIDSGID=, DynamicUser=
- or LockPersonality= are specified. Note that even if this setting is overridden by them,
- systemctl show shows the original value of this setting.
- Also see No New Privileges
- Flag.
+ DynamicUser=,
+ LockPersonality=,
+ MemoryDenyWriteExecute=,
+ PrivateDevices=,
+ ProtectClock=,
+ ProtectHostname=,
+ ProtectKernelLogs=,
+ ProtectKernelModules=,
+ ProtectKernelTunables=,
+ RestrictAddressFamilies=,
+ RestrictNamespaces=,
+ RestrictRealtime=,
+ RestrictSUIDSGID=,
+ SystemCallArchitectures=,
+ SystemCallFilter=, or
+ SystemCallLog= are specified. Note that even if this setting is overridden
+ by them, systemctl show shows the original value of this setting. Also see
+ No New
+ Privileges Flag.
@@ -1537,14 +1546,14 @@ BindReadOnlyPaths=/var/lib/systemd
unit (see above), and set DevicePolicy=closed (see
systemd.resource-control5
for details). Note that using this setting will disconnect propagation of mounts from the service to the host
- (propagation in the opposite direction continues to work). This means that this setting may not be used for
+ (propagation in the opposite direction continues to work). This means that this setting may not be used for
services which shall be able to install mount points in the main mount namespace. The new
/dev/ will be mounted read-only and 'noexec'. The latter may break old programs which try
to set up executable memory by using
mmap2 of
/dev/zero instead of using MAP_ANON. For this setting the same
restrictions regarding mount propagation and privileges apply as for ReadOnlyPaths= and
- related calls, see above. If turned on and if running in user mode, or in system mode, but without the
+ related calls, see above. If turned on and if running in user mode, or in system mode, but without the
CAP_SYS_ADMIN capability (e.g. setting User=),
NoNewPrivileges=yes is implied.
@@ -1697,6 +1706,10 @@ BindReadOnlyPaths=/var/lib/systemd
the system into the service, it is hence not suitable for services that need to take notice of system
hostname changes dynamically.
+ If this setting is on, but the unit doesn't have the CAP_SYS_ADMIN
+ capability (e.g. services for which User= is set),
+ NoNewPrivileges=yes is implied.
+
@@ -1710,7 +1723,9 @@ BindReadOnlyPaths=/var/lib/systemd
clock, and DeviceAllow=char-rtc r is implied. This ensures /dev/rtc0,
/dev/rtc1, etc. are made read-only to the service. See
systemd.resource-control5
- for the details about DeviceAllow=.
+ for the details about DeviceAllow=. If this setting is on, but the unit
+ doesn't have the CAP_SYS_ADMIN capability (e.g. services for which
+ User= is set), NoNewPrivileges=yes is implied.
@@ -1727,13 +1742,14 @@ BindReadOnlyPaths=/var/lib/systemd
sysctl.d5 mechanism. Few
services need to write to these at runtime; it is hence recommended to turn this on for most services. For this
setting the same restrictions regarding mount propagation and privileges apply as for
- ReadOnlyPaths= and related calls, see above. Defaults to off. If turned on and if running
- in user mode, or in system mode, but without the CAP_SYS_ADMIN capability (e.g. services
- for which User= is set), NoNewPrivileges=yes is implied. Note that this
- option does not prevent indirect changes to kernel tunables effected by IPC calls to other processes. However,
- InaccessiblePaths= may be used to make relevant IPC file system objects inaccessible. If
- ProtectKernelTunables= is set, MountAPIVFS=yes is
- implied.
+ ReadOnlyPaths= and related calls, see above. Defaults to off. If this
+ setting is on, but the unit doesn't have the CAP_SYS_ADMIN capability
+ (e.g. services for which User= is set),
+ NoNewPrivileges=yes is implied. Note that this option does not prevent
+ indirect changes to kernel tunables effected by IPC calls to other processes. However,
+ InaccessiblePaths= may be used to make relevant IPC file system objects
+ inaccessible. If ProtectKernelTunables= is set,
+ MountAPIVFS=yes is implied.
@@ -1752,9 +1768,9 @@ BindReadOnlyPaths=/var/lib/systemd
both privileged and unprivileged. To disable module auto-load feature please see
sysctl.d5
kernel.modules_disabled mechanism and
- /proc/sys/kernel/modules_disabled documentation. If turned on and if running in user
- mode, or in system mode, but without the CAP_SYS_ADMIN capability (e.g. setting
- User=), NoNewPrivileges=yes is implied.
+ /proc/sys/kernel/modules_disabled documentation. If this setting is on,
+ but the unit doesn't have the CAP_SYS_ADMIN capability (e.g. services for
+ which User= is set), NoNewPrivileges=yes is implied.
@@ -1770,7 +1786,10 @@ BindReadOnlyPaths=/var/lib/systemd
system call (not to be confused with the libc API
syslog3
for userspace logging). The kernel exposes its log buffer to userspace via /dev/kmsg and
- /proc/kmsg. If enabled, these are made inaccessible to all the processes in the unit.
+ /proc/kmsg. If enabled, these are made inaccessible to all the processes in the unit.
+ If this setting is on, but the unit doesn't have the CAP_SYS_ADMIN
+ capability (e.g. services for which User= is set),
+ NoNewPrivileges=yes is implied.
@@ -1810,7 +1829,7 @@ BindReadOnlyPaths=/var/lib/systemd
restrictions of this option. Specifically, it is recommended to combine this option with
SystemCallArchitectures=native or similar. If running in user mode, or in system
mode, but without the CAP_SYS_ADMIN capability (e.g. setting
- User=nobody), NoNewPrivileges=yes is implied. By default, no
+ User=), NoNewPrivileges=yes is implied. By default, no
restrictions apply, all address families are accessible to processes. If assigned the empty string,
any previous address family restriction changes are undone. This setting does not affect commands
prefixed with +.
@@ -2040,7 +2059,7 @@ RestrictNamespaces=~cgroup net
explicitly specify killing. This value takes precedence over the one given in
SystemCallErrorNumber=, see below. If running in user mode, or in system mode,
but without the CAP_SYS_ADMIN capability (e.g. setting
- User=nobody), NoNewPrivileges=yes is implied. This feature
+ User=), NoNewPrivileges=yes is implied. This feature
makes use of the Secure Computing Mode 2 interfaces of the kernel ('seccomp filtering') and is useful
for enforcing a minimal sandboxing environment. Note that the execve(),
exit(), exit_group(), getrlimit(),
@@ -2262,7 +2281,7 @@ SystemCallErrorNumber=EPERM
the special identifier native. The special identifier native
implicitly maps to the native architecture of the system (or more precisely: to the architecture the system
manager is compiled for). If running in user mode, or in system mode, but without the
- CAP_SYS_ADMIN capability (e.g. setting User=nobody),
+ CAP_SYS_ADMIN capability (e.g. setting User=),
NoNewPrivileges=yes is implied. By default, this option is set to the empty list, i.e. no
filtering is applied.
@@ -2291,7 +2310,7 @@ SystemCallErrorNumber=EPERM
system calls executed by the unit processes for the listed ones will be logged. If the first
character of the list is ~, the effect is inverted: all system calls except the
listed system calls will be logged. If running in user mode, or in system mode, but without the
- CAP_SYS_ADMIN capability (e.g. setting User=nobody),
+ CAP_SYS_ADMIN capability (e.g. setting User=),
NoNewPrivileges=yes is implied. This feature makes use of the Secure Computing
Mode 2 interfaces of the kernel ('seccomp filtering') and is useful for auditing or setting up a
minimal sandboxing environment. This option may be specified more than once, in which case the filter
diff --git a/src/core/execute.c b/src/core/execute.c
index 158172bd263..35aea2f8301 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1430,21 +1430,21 @@ static bool context_has_no_new_privileges(const ExecContext *c) {
return false;
/* We need NNP if we have any form of seccomp and are unprivileged */
- return context_has_address_families(c) ||
+ return c->lock_personality ||
c->memory_deny_write_execute ||
- c->restrict_realtime ||
- c->restrict_suid_sgid ||
- exec_context_restrict_namespaces_set(c) ||
+ c->private_devices ||
c->protect_clock ||
+ c->protect_hostname ||
c->protect_kernel_tunables ||
c->protect_kernel_modules ||
c->protect_kernel_logs ||
- c->private_devices ||
- context_has_syscall_filters(c) ||
- context_has_syscall_logs(c) ||
+ context_has_address_families(c) ||
+ exec_context_restrict_namespaces_set(c) ||
+ c->restrict_realtime ||
+ c->restrict_suid_sgid ||
!set_isempty(c->syscall_archs) ||
- c->lock_personality ||
- c->protect_hostname;
+ context_has_syscall_filters(c) ||
+ context_has_syscall_logs(c);
}
static bool exec_context_has_credentials(const ExecContext *context) {