mirror of
https://github.com/systemd/systemd.git
synced 2024-12-22 17:35:35 +03:00
Merge pull request #4450 from poettering/seccompfixes
Various seccomp fixes and NEWS update.
This commit is contained in:
commit
8d3eafa161
11
Makefile.am
11
Makefile.am
@ -1558,6 +1558,11 @@ tests += \
|
|||||||
test-acl-util
|
test-acl-util
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
if HAVE_SECCOMP
|
||||||
|
tests += \
|
||||||
|
test-seccomp
|
||||||
|
endif
|
||||||
|
|
||||||
EXTRA_DIST += \
|
EXTRA_DIST += \
|
||||||
test/a.service \
|
test/a.service \
|
||||||
test/basic.target \
|
test/basic.target \
|
||||||
@ -2026,6 +2031,12 @@ test_acl_util_SOURCES = \
|
|||||||
test_acl_util_LDADD = \
|
test_acl_util_LDADD = \
|
||||||
libsystemd-shared.la
|
libsystemd-shared.la
|
||||||
|
|
||||||
|
test_seccomp_SOURCES = \
|
||||||
|
src/test/test-seccomp.c
|
||||||
|
|
||||||
|
test_seccomp_LDADD = \
|
||||||
|
libsystemd-shared.la
|
||||||
|
|
||||||
test_namespace_LDADD = \
|
test_namespace_LDADD = \
|
||||||
libcore.la
|
libcore.la
|
||||||
|
|
||||||
|
97
NEWS
97
NEWS
@ -35,14 +35,14 @@ CHANGES WITH 232 in spe
|
|||||||
ProtectSystem=strict enabled, so they are not able to make any
|
ProtectSystem=strict enabled, so they are not able to make any
|
||||||
permanent modifications to the system.
|
permanent modifications to the system.
|
||||||
|
|
||||||
The nss-systemd module also always resolves root and nobody, making
|
* The nss-systemd module also always resolves root and nobody, making
|
||||||
it possible to have no /etc/passwd or /etc/group files in minimal
|
it possible to have no /etc/passwd or /etc/group files in minimal
|
||||||
container systems.
|
container or chroot environments.
|
||||||
|
|
||||||
* Services may be started with their own user namespace using the new
|
* Services may be started with their own user namespace using the new
|
||||||
PrivateUsers= option. Only root, nobody, and the uid/gid under which
|
boolean PrivateUsers= option. Only root, nobody, and the uid/gid
|
||||||
the service is running are mapped. All other users are mapped to
|
under which the service is running are mapped. All other users are
|
||||||
nobody.
|
mapped to nobody.
|
||||||
|
|
||||||
* Support for the cgroup namespace has been added to systemd-nspawn. If
|
* Support for the cgroup namespace has been added to systemd-nspawn. If
|
||||||
supported by kernel, the container system started by systemd-nspawn
|
supported by kernel, the container system started by systemd-nspawn
|
||||||
@ -57,12 +57,22 @@ CHANGES WITH 232 in spe
|
|||||||
options. This controller requires out-of-tree patches for the kernel
|
options. This controller requires out-of-tree patches for the kernel
|
||||||
and the support is provisional.
|
and the support is provisional.
|
||||||
|
|
||||||
* .automount units may now be transient.
|
* Mount and automount units may now be created transiently
|
||||||
|
(i.e. dynamically at runtime via the bus API, instead of requiring
|
||||||
|
unit files in the file system).
|
||||||
|
|
||||||
* systemd-mount is a new tool which wraps mount(8) to pull in
|
* systemd-mount is a new tool which may mount file systems – much like
|
||||||
additional dependencies through transient .mount and .automount
|
mount(8), optionally pulling in additional dependencies through
|
||||||
units. For example, this automatically runs fsck on the block device
|
transient .mount and .automount units. For example, this tool
|
||||||
before mounting, and allows the automount logic to be used.
|
automatically runs fsck on a backing block device before mounting,
|
||||||
|
and allows the automount logic to be used dynamically from the
|
||||||
|
command line for establishing mount points. This tool is particularly
|
||||||
|
useful when dealing with removable media, as it will ensure fsck is
|
||||||
|
run – if necessary – before the first access and that the file system
|
||||||
|
is quickly unmounted after each access by utilizing the automount
|
||||||
|
logic. This maximizes the chance that the file system on the
|
||||||
|
removable media stays in a clean state, and if it isn't in a clean
|
||||||
|
state is fixed automatically.
|
||||||
|
|
||||||
* LazyUnmount=yes option for mount units has been added to expose the
|
* LazyUnmount=yes option for mount units has been added to expose the
|
||||||
umount --lazy option. Similarly, ForceUnmount=yes exposes the --force
|
umount --lazy option. Similarly, ForceUnmount=yes exposes the --force
|
||||||
@ -75,6 +85,12 @@ CHANGES WITH 232 in spe
|
|||||||
mount the EFI partition on systems where /boot is used for something
|
mount the EFI partition on systems where /boot is used for something
|
||||||
else.
|
else.
|
||||||
|
|
||||||
|
* When operating on GPT disk images for containers, systemd-nspawn will
|
||||||
|
now mount the ESP to /boot or /efi according to the same rules as PID
|
||||||
|
1 running on a host. This allows tools like "bootctl" to operate
|
||||||
|
correctly within such containers, in order to make container images
|
||||||
|
bootable on physical systems.
|
||||||
|
|
||||||
* disk/by-id and disk/by-path symlinks are now created for NVMe drives.
|
* disk/by-id and disk/by-path symlinks are now created for NVMe drives.
|
||||||
|
|
||||||
* Two new user session targets have been added to support running
|
* Two new user session targets have been added to support running
|
||||||
@ -95,7 +111,7 @@ CHANGES WITH 232 in spe
|
|||||||
the top of the process hierarchy (which is usually the init process
|
the top of the process hierarchy (which is usually the init process
|
||||||
of the container).
|
of the container).
|
||||||
|
|
||||||
* systemd-journal-gatewayd learned the --directory option to serve
|
* systemd-journal-gatewayd learned the --directory= option to serve
|
||||||
files from the specified location.
|
files from the specified location.
|
||||||
|
|
||||||
* journalctl --root=… can be used to peruse the journal in the
|
* journalctl --root=… can be used to peruse the journal in the
|
||||||
@ -112,23 +128,26 @@ CHANGES WITH 232 in spe
|
|||||||
a click rate that is different than the one for the vertical wheel.
|
a click rate that is different than the one for the vertical wheel.
|
||||||
|
|
||||||
* systemd-run gained a new --wait option that makes service execution
|
* systemd-run gained a new --wait option that makes service execution
|
||||||
synchronous.
|
synchronous. (Specifically, the command will not return until the
|
||||||
|
specified service binary exited.)
|
||||||
|
|
||||||
systemctl gained a new --wait option that causes the start command to
|
* systemctl gained a new --wait option that causes the start command to
|
||||||
wait until the units being started have terminated again.
|
wait until the units being started have terminated again.
|
||||||
|
|
||||||
* A new journal output mode "short-full" has been added which uses
|
* A new journal output mode "short-full" has been added which displays
|
||||||
timestamps with abbreviated English day names and adds a timezone
|
timestamps with abbreviated English day names and adds a timezone
|
||||||
suffix. Those timestamps include more information and can be parsed
|
suffix. Those timestamps include more information than the default
|
||||||
by journalctl.
|
"short" output mode, and can be passed directly to journalctl's
|
||||||
|
--since= and --until= options.
|
||||||
|
|
||||||
* /etc/resolv.conf will be bind-mounted into containers started by
|
* /etc/resolv.conf will be bind-mounted into containers started by
|
||||||
systemd-nspawn, if possible, so any changes to resolv.conf contents
|
systemd-nspawn, if possible, so any changes to resolv.conf contents
|
||||||
are automatically propagated to the container.
|
are automatically propagated to the container.
|
||||||
|
|
||||||
* The number of instances for socket-activated services originating
|
* The number of instances for socket-activated services originating
|
||||||
from a single IP can be limited with MaxConnectionsPerSource=,
|
from a single IP address can be limited with
|
||||||
extending the existing setting of MaxConnections.
|
MaxConnectionsPerSource=, extending the existing setting of
|
||||||
|
MaxConnections=.
|
||||||
|
|
||||||
* systemd-networkd gained support for vcan ("Virtual CAN") interface
|
* systemd-networkd gained support for vcan ("Virtual CAN") interface
|
||||||
configuration.
|
configuration.
|
||||||
@ -143,21 +162,23 @@ CHANGES WITH 232 in spe
|
|||||||
GenericReceiveOffload=, LargeReceiveOffload= options in the
|
GenericReceiveOffload=, LargeReceiveOffload= options in the
|
||||||
[Link] section of .link files.
|
[Link] section of .link files.
|
||||||
|
|
||||||
Spanning Tree Protocol enablement, Priority, Aging Time, and the
|
* The Spanning Tree Protocol, Priority, Aging Time, and the Default
|
||||||
Default Port VLAN ID can be configured for bridge devices using the
|
Port VLAN ID can be configured for bridge devices using the new STP=,
|
||||||
new STP=, Priority=, AgeingTimeSec=, and DefaultPVID= settings in the
|
Priority=, AgeingTimeSec=, and DefaultPVID= settings in the [Bridge]
|
||||||
[Bridge] section of .netdev files.
|
section of .netdev files.
|
||||||
|
|
||||||
The route table to which routes received over DHCP or RA should be
|
* The route table to which routes received over DHCP or RA should be
|
||||||
added can be configured with the new RouteTable= option in the [DHCP]
|
added can be configured with the new RouteTable= option in the [DHCP]
|
||||||
and [IPv6AcceptRA] sections of .network files.
|
and [IPv6AcceptRA] sections of .network files.
|
||||||
|
|
||||||
Address Resolution Protocol can be disabled on links managed by
|
* The Address Resolution Protocol can be disabled on links managed by
|
||||||
systemd-networkd using the ARP=no setting in the [Link] section of
|
systemd-networkd using the ARP=no setting in the [Link] section of
|
||||||
.network files.
|
.network files.
|
||||||
|
|
||||||
* $SERVICE_RESULT, $EXIT_CODE, $EXIT_STATUS are set for ExecStop= and
|
* New environment variables $SERVICE_RESULT, $EXIT_CODE and
|
||||||
ExecStopPost= commands.
|
$EXIT_STATUS are set for ExecStop= and ExecStopPost= commands, and
|
||||||
|
encode information about the result and exit codes of the current
|
||||||
|
service runtime cycle.
|
||||||
|
|
||||||
* systemd-sysctl will now configure kernel parameters in the order
|
* systemd-sysctl will now configure kernel parameters in the order
|
||||||
they occur in the configuration files. This matches what sysctl
|
they occur in the configuration files. This matches what sysctl
|
||||||
@ -184,6 +205,30 @@ CHANGES WITH 232 in spe
|
|||||||
$SYSTEMD_NSPAWN_SHARE_NS_UTS may be used to control the unsharing of
|
$SYSTEMD_NSPAWN_SHARE_NS_UTS may be used to control the unsharing of
|
||||||
individual namespaces.
|
individual namespaces.
|
||||||
|
|
||||||
|
* "machinectl list" now shows the IP address of running containers in
|
||||||
|
the output, as well as OS release information.
|
||||||
|
|
||||||
|
* "loginctl list" now shows the TTY of each session in the output.
|
||||||
|
|
||||||
|
* sd-bus gained new API calls sd_bus_track_set_recursive(),
|
||||||
|
sd_bus_track_get_recursive(), sd_bus_track_count_name(),
|
||||||
|
sd_bus_track_count_sender(). They permit usage of sd_bus_track peer
|
||||||
|
tracking objects in a "recursive" mode, where a single client can be
|
||||||
|
counted multiple times, if it takes multiple references.
|
||||||
|
|
||||||
|
* sd-bus gained new API calls sd_bus_set_exit_on_disconnect() and
|
||||||
|
sd_bus_get_exit_on_disconnect(). They may be used to to make a
|
||||||
|
process using sd-bus automatically exit if the bus connection is
|
||||||
|
severed.
|
||||||
|
|
||||||
|
* Bus clients of the service manager may now "pin" loaded units into
|
||||||
|
memory, by taking an explicit reference on them. This is useful to
|
||||||
|
ensure the client can retrieve runtime data about the service even
|
||||||
|
after the service completed execution. Taking such a reference is
|
||||||
|
available only for privileged clients and should be helpful to watch
|
||||||
|
running services in a race-free manner, and in particular collect
|
||||||
|
information about exit statuses and results.
|
||||||
|
|
||||||
CHANGES WITH 231:
|
CHANGES WITH 231:
|
||||||
|
|
||||||
* In service units the various ExecXYZ= settings have been extended
|
* In service units the various ExecXYZ= settings have been extended
|
||||||
|
@ -1185,18 +1185,19 @@ static void rename_process_from_path(const char *path) {
|
|||||||
#ifdef HAVE_SECCOMP
|
#ifdef HAVE_SECCOMP
|
||||||
|
|
||||||
static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
|
static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
|
||||||
if (!is_seccomp_available()) {
|
|
||||||
log_open();
|
if (is_seccomp_available())
|
||||||
log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
|
return false;
|
||||||
log_close();
|
|
||||||
return true;
|
log_open();
|
||||||
}
|
log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
|
||||||
return false;
|
log_close();
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int apply_seccomp(const Unit* u, const ExecContext *c) {
|
static int apply_seccomp(const Unit* u, const ExecContext *c) {
|
||||||
uint32_t negative_action, action;
|
uint32_t negative_action, action;
|
||||||
scmp_filter_ctx *seccomp;
|
scmp_filter_ctx seccomp;
|
||||||
Iterator i;
|
Iterator i;
|
||||||
void *id;
|
void *id;
|
||||||
int r;
|
int r;
|
||||||
@ -1247,7 +1248,7 @@ finish:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int apply_address_families(const Unit* u, const ExecContext *c) {
|
static int apply_address_families(const Unit* u, const ExecContext *c) {
|
||||||
scmp_filter_ctx *seccomp;
|
scmp_filter_ctx seccomp;
|
||||||
Iterator i;
|
Iterator i;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
@ -1256,13 +1257,9 @@ static int apply_address_families(const Unit* u, const ExecContext *c) {
|
|||||||
if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
|
if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
seccomp = seccomp_init(SCMP_ACT_ALLOW);
|
r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
|
||||||
if (!seccomp)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
r = seccomp_add_secondary_archs(seccomp);
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto finish;
|
return r;
|
||||||
|
|
||||||
if (c->address_families_whitelist) {
|
if (c->address_families_whitelist) {
|
||||||
int af, first = 0, last = 0;
|
int af, first = 0, last = 0;
|
||||||
@ -1359,10 +1356,6 @@ static int apply_address_families(const Unit* u, const ExecContext *c) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
|
||||||
if (r < 0)
|
|
||||||
goto finish;
|
|
||||||
|
|
||||||
r = seccomp_load(seccomp);
|
r = seccomp_load(seccomp);
|
||||||
|
|
||||||
finish:
|
finish:
|
||||||
@ -1371,7 +1364,7 @@ finish:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
|
static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
|
||||||
scmp_filter_ctx *seccomp;
|
scmp_filter_ctx seccomp;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
assert(c);
|
assert(c);
|
||||||
@ -1379,13 +1372,9 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c)
|
|||||||
if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
|
if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
seccomp = seccomp_init(SCMP_ACT_ALLOW);
|
r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
|
||||||
if (!seccomp)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
r = seccomp_add_secondary_archs(seccomp);
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto finish;
|
return r;
|
||||||
|
|
||||||
r = seccomp_rule_add(
|
r = seccomp_rule_add(
|
||||||
seccomp,
|
seccomp,
|
||||||
@ -1405,10 +1394,6 @@ static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c)
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto finish;
|
goto finish;
|
||||||
|
|
||||||
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
|
||||||
if (r < 0)
|
|
||||||
goto finish;
|
|
||||||
|
|
||||||
r = seccomp_load(seccomp);
|
r = seccomp_load(seccomp);
|
||||||
|
|
||||||
finish:
|
finish:
|
||||||
@ -1423,7 +1408,7 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
|
|||||||
SCHED_IDLE,
|
SCHED_IDLE,
|
||||||
};
|
};
|
||||||
|
|
||||||
scmp_filter_ctx *seccomp;
|
scmp_filter_ctx seccomp;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
int r, p, max_policy = 0;
|
int r, p, max_policy = 0;
|
||||||
|
|
||||||
@ -1432,13 +1417,9 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
|
|||||||
if (skip_seccomp_unavailable(u, "RestrictRealtime="))
|
if (skip_seccomp_unavailable(u, "RestrictRealtime="))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
seccomp = seccomp_init(SCMP_ACT_ALLOW);
|
r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
|
||||||
if (!seccomp)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
r = seccomp_add_secondary_archs(seccomp);
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto finish;
|
return r;
|
||||||
|
|
||||||
/* Determine the highest policy constant we want to allow */
|
/* Determine the highest policy constant we want to allow */
|
||||||
for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
|
for (i = 0; i < ELEMENTSOF(permitted_policies); i++)
|
||||||
@ -1482,10 +1463,6 @@ static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto finish;
|
goto finish;
|
||||||
|
|
||||||
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
|
||||||
if (r < 0)
|
|
||||||
goto finish;
|
|
||||||
|
|
||||||
r = seccomp_load(seccomp);
|
r = seccomp_load(seccomp);
|
||||||
|
|
||||||
finish:
|
finish:
|
||||||
@ -1494,7 +1471,7 @@ finish:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
|
static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
|
||||||
scmp_filter_ctx *seccomp;
|
scmp_filter_ctx seccomp;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
assert(c);
|
assert(c);
|
||||||
@ -1505,13 +1482,9 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
|
|||||||
if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
|
if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
seccomp = seccomp_init(SCMP_ACT_ALLOW);
|
r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
|
||||||
if (!seccomp)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
r = seccomp_add_secondary_archs(seccomp);
|
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto finish;
|
return r;
|
||||||
|
|
||||||
r = seccomp_rule_add(
|
r = seccomp_rule_add(
|
||||||
seccomp,
|
seccomp,
|
||||||
@ -1521,10 +1494,6 @@ static int apply_protect_sysctl(Unit *u, const ExecContext *c) {
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto finish;
|
goto finish;
|
||||||
|
|
||||||
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
|
||||||
if (r < 0)
|
|
||||||
goto finish;
|
|
||||||
|
|
||||||
r = seccomp_load(seccomp);
|
r = seccomp_load(seccomp);
|
||||||
|
|
||||||
finish:
|
finish:
|
||||||
@ -1533,56 +1502,17 @@ finish:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
|
static int apply_protect_kernel_modules(Unit *u, const ExecContext *c) {
|
||||||
static const int module_syscalls[] = {
|
|
||||||
SCMP_SYS(delete_module),
|
|
||||||
SCMP_SYS(finit_module),
|
|
||||||
SCMP_SYS(init_module),
|
|
||||||
};
|
|
||||||
|
|
||||||
scmp_filter_ctx *seccomp;
|
|
||||||
unsigned i;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
assert(c);
|
assert(c);
|
||||||
|
|
||||||
/* Turn of module syscalls on ProtectKernelModules=yes */
|
/* Turn off module syscalls on ProtectKernelModules=yes */
|
||||||
|
|
||||||
if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
|
if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
seccomp = seccomp_init(SCMP_ACT_ALLOW);
|
return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
|
||||||
if (!seccomp)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
r = seccomp_add_secondary_archs(seccomp);
|
|
||||||
if (r < 0)
|
|
||||||
goto finish;
|
|
||||||
|
|
||||||
for (i = 0; i < ELEMENTSOF(module_syscalls); i++) {
|
|
||||||
r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM),
|
|
||||||
module_syscalls[i], 0);
|
|
||||||
if (r < 0)
|
|
||||||
goto finish;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
|
||||||
if (r < 0)
|
|
||||||
goto finish;
|
|
||||||
|
|
||||||
r = seccomp_load(seccomp);
|
|
||||||
|
|
||||||
finish:
|
|
||||||
seccomp_release(seccomp);
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int apply_private_devices(Unit *u, const ExecContext *c) {
|
static int apply_private_devices(Unit *u, const ExecContext *c) {
|
||||||
const SystemCallFilterSet *set;
|
|
||||||
scmp_filter_ctx *seccomp;
|
|
||||||
const char *sys;
|
|
||||||
bool syscalls_found = false;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
assert(c);
|
assert(c);
|
||||||
|
|
||||||
/* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
|
/* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
|
||||||
@ -1590,61 +1520,7 @@ static int apply_private_devices(Unit *u, const ExecContext *c) {
|
|||||||
if (skip_seccomp_unavailable(u, "PrivateDevices="))
|
if (skip_seccomp_unavailable(u, "PrivateDevices="))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
seccomp = seccomp_init(SCMP_ACT_ALLOW);
|
return seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
|
||||||
if (!seccomp)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
r = seccomp_add_secondary_archs(seccomp);
|
|
||||||
if (r < 0)
|
|
||||||
goto finish;
|
|
||||||
|
|
||||||
for (set = syscall_filter_sets; set->set_name; set++)
|
|
||||||
if (streq(set->set_name, "@raw-io")) {
|
|
||||||
syscalls_found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We should never fail here */
|
|
||||||
if (!syscalls_found) {
|
|
||||||
r = -EOPNOTSUPP;
|
|
||||||
goto finish;
|
|
||||||
}
|
|
||||||
|
|
||||||
NULSTR_FOREACH(sys, set->value) {
|
|
||||||
int id;
|
|
||||||
bool add = true;
|
|
||||||
|
|
||||||
#ifndef __NR_s390_pci_mmio_read
|
|
||||||
if (streq(sys, "s390_pci_mmio_read"))
|
|
||||||
add = false;
|
|
||||||
#endif
|
|
||||||
#ifndef __NR_s390_pci_mmio_write
|
|
||||||
if (streq(sys, "s390_pci_mmio_write"))
|
|
||||||
add = false;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!add)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
id = seccomp_syscall_resolve_name(sys);
|
|
||||||
|
|
||||||
r = seccomp_rule_add(
|
|
||||||
seccomp,
|
|
||||||
SCMP_ACT_ERRNO(EPERM),
|
|
||||||
id, 0);
|
|
||||||
if (r < 0)
|
|
||||||
goto finish;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
|
||||||
if (r < 0)
|
|
||||||
goto finish;
|
|
||||||
|
|
||||||
r = seccomp_load(seccomp);
|
|
||||||
|
|
||||||
finish:
|
|
||||||
seccomp_release(seccomp);
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@ -1890,9 +1766,9 @@ static int setup_private_users(uid_t uid, gid_t gid) {
|
|||||||
asprintf(&uid_map,
|
asprintf(&uid_map,
|
||||||
"0 0 1\n" /* Map root → root */
|
"0 0 1\n" /* Map root → root */
|
||||||
UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
|
UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
|
||||||
uid, uid); /* The case where the above is the same */
|
uid, uid);
|
||||||
else
|
else
|
||||||
uid_map = strdup("0 0 1\n");
|
uid_map = strdup("0 0 1\n"); /* The case where the above is the same */
|
||||||
if (!uid_map)
|
if (!uid_map)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
@ -2618,6 +2618,7 @@ int config_parse_documentation(const char *unit,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_SECCOMP
|
#ifdef HAVE_SECCOMP
|
||||||
|
|
||||||
static int syscall_filter_parse_one(
|
static int syscall_filter_parse_one(
|
||||||
const char *unit,
|
const char *unit,
|
||||||
const char *filename,
|
const char *filename,
|
||||||
@ -2628,27 +2629,29 @@ static int syscall_filter_parse_one(
|
|||||||
bool warn) {
|
bool warn) {
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
if (*t == '@') {
|
if (t[0] == '@') {
|
||||||
const SystemCallFilterSet *set;
|
const SyscallFilterSet *set;
|
||||||
|
const char *i;
|
||||||
|
|
||||||
for (set = syscall_filter_sets; set->set_name; set++)
|
set = syscall_filter_set_find(t);
|
||||||
if (streq(set->set_name, t)) {
|
if (!set) {
|
||||||
const char *sys;
|
if (warn)
|
||||||
|
log_syntax(unit, LOG_WARNING, filename, line, 0, "Don't know system call group, ignoring: %s", t);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
NULSTR_FOREACH(sys, set->value) {
|
NULSTR_FOREACH(i, set->value) {
|
||||||
r = syscall_filter_parse_one(unit, filename, line, c, invert, sys, false);
|
r = syscall_filter_parse_one(unit, filename, line, c, invert, i, false);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
int id;
|
int id;
|
||||||
|
|
||||||
id = seccomp_syscall_resolve_name(t);
|
id = seccomp_syscall_resolve_name(t);
|
||||||
if (id == __NR_SCMP_ERROR) {
|
if (id == __NR_SCMP_ERROR) {
|
||||||
if (warn)
|
if (warn)
|
||||||
log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
|
log_syntax(unit, LOG_WARNING, filename, line, 0, "Failed to parse system call, ignoring: %s", t);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2662,8 +2665,9 @@ static int syscall_filter_parse_one(
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_oom();
|
return log_oom();
|
||||||
} else
|
} else
|
||||||
set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
|
(void) set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2682,8 +2686,7 @@ int config_parse_syscall_filter(
|
|||||||
ExecContext *c = data;
|
ExecContext *c = data;
|
||||||
Unit *u = userdata;
|
Unit *u = userdata;
|
||||||
bool invert = false;
|
bool invert = false;
|
||||||
const char *word, *state;
|
const char *p;
|
||||||
size_t l;
|
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
assert(filename);
|
assert(filename);
|
||||||
@ -2722,19 +2725,24 @@ int config_parse_syscall_filter(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FOREACH_WORD_QUOTED(word, l, rvalue, state) {
|
p = rvalue;
|
||||||
_cleanup_free_ char *t = NULL;
|
for (;;) {
|
||||||
|
_cleanup_free_ char *word = NULL;
|
||||||
|
|
||||||
t = strndup(word, l);
|
r = extract_first_word(&p, &word, NULL, 0);
|
||||||
if (!t)
|
if (r == 0)
|
||||||
|
break;
|
||||||
|
if (r == -ENOMEM)
|
||||||
return log_oom();
|
return log_oom();
|
||||||
|
if (r < 0) {
|
||||||
|
log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
r = syscall_filter_parse_one(unit, filename, line, c, invert, t, true);
|
r = syscall_filter_parse_one(unit, filename, line, c, invert, word, true);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
if (!isempty(state))
|
|
||||||
log_syntax(unit, LOG_ERR, filename, line, 0, "Trailing garbage, ignoring.");
|
|
||||||
|
|
||||||
/* Turn on NNP, but only if it wasn't configured explicitly
|
/* Turn on NNP, but only if it wasn't configured explicitly
|
||||||
* before, and only if we are in user mode. */
|
* before, and only if we are in user mode. */
|
||||||
|
@ -135,15 +135,9 @@ int setup_seccomp(uint64_t cap_list_retain) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
seccomp = seccomp_init(SCMP_ACT_ALLOW);
|
r = seccomp_init_conservative(&seccomp, SCMP_ACT_ALLOW);
|
||||||
if (!seccomp)
|
if (r < 0)
|
||||||
return log_oom();
|
return log_error_errno(r, "Failed to allocate seccomp object: %m");
|
||||||
|
|
||||||
r = seccomp_add_secondary_archs(seccomp);
|
|
||||||
if (r < 0) {
|
|
||||||
log_error_errno(r, "Failed to add secondary archs to seccomp filter: %m");
|
|
||||||
goto finish;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain);
|
r = seccomp_add_default_syscall_filter(seccomp, cap_list_retain);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
@ -171,12 +165,6 @@ int setup_seccomp(uint64_t cap_list_retain) {
|
|||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
|
||||||
if (r < 0) {
|
|
||||||
log_error_errno(r, "Failed to unset NO_NEW_PRIVS: %m");
|
|
||||||
goto finish;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = seccomp_load(seccomp);
|
r = seccomp_load(seccomp);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
log_error_errno(r, "Failed to install seccomp audit filter: %m");
|
log_error_errno(r, "Failed to install seccomp audit filter: %m");
|
||||||
|
@ -329,9 +329,9 @@ static int condition_test_needs_update(Condition *c) {
|
|||||||
uint64_t timestamp;
|
uint64_t timestamp;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
r = parse_env_file(p, NULL, "TimestampNSec", ×tamp_str, NULL);
|
r = parse_env_file(p, NULL, "TIMESTAMP_NSEC", ×tamp_str, NULL);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
log_error_errno(-r, "Failed to parse timestamp file '%s', using mtime: %m", p);
|
log_error_errno(r, "Failed to parse timestamp file '%s', using mtime: %m", p);
|
||||||
return true;
|
return true;
|
||||||
} else if (r == 0) {
|
} else if (r == 0) {
|
||||||
log_debug("No data in timestamp file '%s', using mtime", p);
|
log_debug("No data in timestamp file '%s', using mtime", p);
|
||||||
@ -340,12 +340,11 @@ static int condition_test_needs_update(Condition *c) {
|
|||||||
|
|
||||||
r = safe_atou64(timestamp_str, ×tamp);
|
r = safe_atou64(timestamp_str, ×tamp);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
log_error_errno(-r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m",
|
log_error_errno(r, "Failed to parse timestamp value '%s' in file '%s', using mtime: %m", timestamp_str, p);
|
||||||
timestamp_str, p);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
other.st_mtim.tv_nsec = timestamp % NSEC_PER_SEC;
|
timespec_store(&other.st_mtim, timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec;
|
return usr.st_mtim.tv_nsec > other.st_mtim.tv_nsec;
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include "macro.h"
|
#include "macro.h"
|
||||||
#include "seccomp-util.h"
|
#include "seccomp-util.h"
|
||||||
#include "string-util.h"
|
#include "string-util.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
const char* seccomp_arch_to_string(uint32_t c) {
|
const char* seccomp_arch_to_string(uint32_t c) {
|
||||||
|
|
||||||
@ -73,7 +74,34 @@ int seccomp_arch_from_string(const char *n, uint32_t *ret) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int seccomp_add_secondary_archs(scmp_filter_ctx *c) {
|
int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action) {
|
||||||
|
scmp_filter_ctx seccomp;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
/* Much like seccomp_init(), but tries to be a bit more conservative in its defaults: all secondary archs are
|
||||||
|
* added by default, and NNP is turned off. */
|
||||||
|
|
||||||
|
seccomp = seccomp_init(default_action);
|
||||||
|
if (!seccomp)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
r = seccomp_add_secondary_archs(seccomp);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
*ret = seccomp;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
finish:
|
||||||
|
seccomp_release(seccomp);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int seccomp_add_secondary_archs(scmp_filter_ctx c) {
|
||||||
|
|
||||||
#if defined(__i386__) || defined(__x86_64__)
|
#if defined(__i386__) || defined(__x86_64__)
|
||||||
int r;
|
int r;
|
||||||
@ -110,7 +138,6 @@ int seccomp_add_secondary_archs(scmp_filter_ctx *c) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_basic_seccomp_available(void) {
|
static bool is_basic_seccomp_available(void) {
|
||||||
@ -132,28 +159,30 @@ bool is_seccomp_available(void) {
|
|||||||
return cached_enabled;
|
return cached_enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
const SystemCallFilterSet syscall_filter_sets[] = {
|
const SyscallFilterSet syscall_filter_sets[_SYSCALL_FILTER_SET_MAX] = {
|
||||||
{
|
[SYSCALL_FILTER_SET_CLOCK] = {
|
||||||
/* Clock */
|
/* Clock */
|
||||||
.set_name = "@clock",
|
.name = "@clock",
|
||||||
.value =
|
.value =
|
||||||
"adjtimex\0"
|
"adjtimex\0"
|
||||||
"clock_adjtime\0"
|
"clock_adjtime\0"
|
||||||
"clock_settime\0"
|
"clock_settime\0"
|
||||||
"settimeofday\0"
|
"settimeofday\0"
|
||||||
"stime\0"
|
"stime\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_CPU_EMULATION] = {
|
||||||
/* CPU emulation calls */
|
/* CPU emulation calls */
|
||||||
.set_name = "@cpu-emulation",
|
.name = "@cpu-emulation",
|
||||||
.value =
|
.value =
|
||||||
"modify_ldt\0"
|
"modify_ldt\0"
|
||||||
"subpage_prot\0"
|
"subpage_prot\0"
|
||||||
"switch_endian\0"
|
"switch_endian\0"
|
||||||
"vm86\0"
|
"vm86\0"
|
||||||
"vm86old\0"
|
"vm86old\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_DEBUG] = {
|
||||||
/* Debugging/Performance Monitoring/Tracing */
|
/* Debugging/Performance Monitoring/Tracing */
|
||||||
.set_name = "@debug",
|
.name = "@debug",
|
||||||
.value =
|
.value =
|
||||||
"lookup_dcookie\0"
|
"lookup_dcookie\0"
|
||||||
"perf_event_open\0"
|
"perf_event_open\0"
|
||||||
@ -161,11 +190,14 @@ const SystemCallFilterSet syscall_filter_sets[] = {
|
|||||||
"process_vm_writev\0"
|
"process_vm_writev\0"
|
||||||
"ptrace\0"
|
"ptrace\0"
|
||||||
"rtas\0"
|
"rtas\0"
|
||||||
|
#ifdef __NR_s390_runtime_instr
|
||||||
"s390_runtime_instr\0"
|
"s390_runtime_instr\0"
|
||||||
|
#endif
|
||||||
"sys_debug_setcontext\0"
|
"sys_debug_setcontext\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_DEFAULT] = {
|
||||||
/* Default list */
|
/* Default list */
|
||||||
.set_name = "@default",
|
.name = "@default",
|
||||||
.value =
|
.value =
|
||||||
"execve\0"
|
"execve\0"
|
||||||
"exit\0"
|
"exit\0"
|
||||||
@ -173,9 +205,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
|
|||||||
"getrlimit\0" /* make sure processes can query stack size and such */
|
"getrlimit\0" /* make sure processes can query stack size and such */
|
||||||
"rt_sigreturn\0"
|
"rt_sigreturn\0"
|
||||||
"sigreturn\0"
|
"sigreturn\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_IO_EVENT] = {
|
||||||
/* Event loop use */
|
/* Event loop use */
|
||||||
.set_name = "@io-event",
|
.name = "@io-event",
|
||||||
.value =
|
.value =
|
||||||
"_newselect\0"
|
"_newselect\0"
|
||||||
"epoll_create1\0"
|
"epoll_create1\0"
|
||||||
@ -191,9 +224,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
|
|||||||
"ppoll\0"
|
"ppoll\0"
|
||||||
"pselect6\0"
|
"pselect6\0"
|
||||||
"select\0"
|
"select\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_IPC] = {
|
||||||
/* Message queues, SYSV IPC or other IPC: unusual */
|
/* Message queues, SYSV IPC or other IPC: unusual */
|
||||||
.set_name = "@ipc",
|
.name = "@ipc",
|
||||||
.value = "ipc\0"
|
.value = "ipc\0"
|
||||||
"mq_getsetattr\0"
|
"mq_getsetattr\0"
|
||||||
"mq_notify\0"
|
"mq_notify\0"
|
||||||
@ -215,33 +249,36 @@ const SystemCallFilterSet syscall_filter_sets[] = {
|
|||||||
"shmctl\0"
|
"shmctl\0"
|
||||||
"shmdt\0"
|
"shmdt\0"
|
||||||
"shmget\0"
|
"shmget\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_KEYRING] = {
|
||||||
/* Keyring */
|
/* Keyring */
|
||||||
.set_name = "@keyring",
|
.name = "@keyring",
|
||||||
.value =
|
.value =
|
||||||
"add_key\0"
|
"add_key\0"
|
||||||
"keyctl\0"
|
"keyctl\0"
|
||||||
"request_key\0"
|
"request_key\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_MODULE] = {
|
||||||
/* Kernel module control */
|
/* Kernel module control */
|
||||||
.set_name = "@module",
|
.name = "@module",
|
||||||
.value =
|
.value =
|
||||||
"delete_module\0"
|
"delete_module\0"
|
||||||
"finit_module\0"
|
"finit_module\0"
|
||||||
"init_module\0"
|
"init_module\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_MOUNT] = {
|
||||||
/* Mounting */
|
/* Mounting */
|
||||||
.set_name = "@mount",
|
.name = "@mount",
|
||||||
.value =
|
.value =
|
||||||
"chroot\0"
|
"chroot\0"
|
||||||
"mount\0"
|
"mount\0"
|
||||||
"oldumount\0"
|
|
||||||
"pivot_root\0"
|
"pivot_root\0"
|
||||||
"umount2\0"
|
"umount2\0"
|
||||||
"umount\0"
|
"umount\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_NETWORK_IO] = {
|
||||||
/* Network or Unix socket IO, should not be needed if not network facing */
|
/* Network or Unix socket IO, should not be needed if not network facing */
|
||||||
.set_name = "@network-io",
|
.name = "@network-io",
|
||||||
.value =
|
.value =
|
||||||
"accept4\0"
|
"accept4\0"
|
||||||
"accept\0"
|
"accept\0"
|
||||||
@ -264,9 +301,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
|
|||||||
"socket\0"
|
"socket\0"
|
||||||
"socketcall\0"
|
"socketcall\0"
|
||||||
"socketpair\0"
|
"socketpair\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_OBSOLETE] = {
|
||||||
/* Unusual, obsolete or unimplemented, some unknown even to libseccomp */
|
/* Unusual, obsolete or unimplemented, some unknown even to libseccomp */
|
||||||
.set_name = "@obsolete",
|
.name = "@obsolete",
|
||||||
.value =
|
.value =
|
||||||
"_sysctl\0"
|
"_sysctl\0"
|
||||||
"afs_syscall\0"
|
"afs_syscall\0"
|
||||||
@ -292,9 +330,10 @@ const SystemCallFilterSet syscall_filter_sets[] = {
|
|||||||
"uselib\0"
|
"uselib\0"
|
||||||
"ustat\0"
|
"ustat\0"
|
||||||
"vserver\0"
|
"vserver\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_PRIVILEGED] = {
|
||||||
/* Nice grab-bag of all system calls which need superuser capabilities */
|
/* Nice grab-bag of all system calls which need superuser capabilities */
|
||||||
.set_name = "@privileged",
|
.name = "@privileged",
|
||||||
.value =
|
.value =
|
||||||
"@clock\0"
|
"@clock\0"
|
||||||
"@module\0"
|
"@module\0"
|
||||||
@ -331,11 +370,12 @@ const SystemCallFilterSet syscall_filter_sets[] = {
|
|||||||
"setuid\0"
|
"setuid\0"
|
||||||
"swapoff\0"
|
"swapoff\0"
|
||||||
"swapon\0"
|
"swapon\0"
|
||||||
"sysctl\0"
|
"_sysctl\0"
|
||||||
"vhangup\0"
|
"vhangup\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_PROCESS] = {
|
||||||
/* Process control, execution, namespaces */
|
/* Process control, execution, namespaces */
|
||||||
.set_name = "@process",
|
.name = "@process",
|
||||||
.value =
|
.value =
|
||||||
"arch_prctl\0"
|
"arch_prctl\0"
|
||||||
"clone\0"
|
"clone\0"
|
||||||
@ -349,19 +389,90 @@ const SystemCallFilterSet syscall_filter_sets[] = {
|
|||||||
"tkill\0"
|
"tkill\0"
|
||||||
"unshare\0"
|
"unshare\0"
|
||||||
"vfork\0"
|
"vfork\0"
|
||||||
}, {
|
},
|
||||||
|
[SYSCALL_FILTER_SET_RAW_IO] = {
|
||||||
/* Raw I/O ports */
|
/* Raw I/O ports */
|
||||||
.set_name = "@raw-io",
|
.name = "@raw-io",
|
||||||
.value =
|
.value =
|
||||||
"ioperm\0"
|
"ioperm\0"
|
||||||
"iopl\0"
|
"iopl\0"
|
||||||
"pciconfig_iobase\0"
|
"pciconfig_iobase\0"
|
||||||
"pciconfig_read\0"
|
"pciconfig_read\0"
|
||||||
"pciconfig_write\0"
|
"pciconfig_write\0"
|
||||||
|
#ifdef __NR_s390_pci_mmio_read
|
||||||
"s390_pci_mmio_read\0"
|
"s390_pci_mmio_read\0"
|
||||||
|
#endif
|
||||||
|
#ifdef __NR_s390_pci_mmio_write
|
||||||
"s390_pci_mmio_write\0"
|
"s390_pci_mmio_write\0"
|
||||||
}, {
|
#endif
|
||||||
.set_name = NULL,
|
},
|
||||||
.value = NULL
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const SyscallFilterSet *syscall_filter_set_find(const char *name) {
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
if (isempty(name) || name[0] != '@')
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++)
|
||||||
|
if (streq(syscall_filter_sets[i].name, name))
|
||||||
|
return syscall_filter_sets + i;
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action) {
|
||||||
|
const char *sys;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(seccomp);
|
||||||
|
assert(set);
|
||||||
|
|
||||||
|
NULSTR_FOREACH(sys, set->value) {
|
||||||
|
int id;
|
||||||
|
|
||||||
|
if (sys[0] == '@') {
|
||||||
|
const SyscallFilterSet *other;
|
||||||
|
|
||||||
|
other = syscall_filter_set_find(sys);
|
||||||
|
if (!other)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
r = seccomp_add_syscall_filter_set(seccomp, other, action);
|
||||||
|
} else {
|
||||||
|
id = seccomp_syscall_resolve_name(sys);
|
||||||
|
if (id == __NR_SCMP_ERROR)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
r = seccomp_rule_add(seccomp, action, id, 0);
|
||||||
|
}
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action) {
|
||||||
|
scmp_filter_ctx seccomp;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(set);
|
||||||
|
|
||||||
|
/* The one-stop solution: allocate a seccomp object, add a filter to it, and apply it */
|
||||||
|
|
||||||
|
r = seccomp_init_conservative(&seccomp, default_action);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
r = seccomp_add_syscall_filter_set(seccomp, set, action);
|
||||||
|
if (r < 0)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
r = seccomp_load(seccomp);
|
||||||
|
|
||||||
|
finish:
|
||||||
|
seccomp_release(seccomp);
|
||||||
|
return r;
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -20,18 +20,45 @@
|
|||||||
***/
|
***/
|
||||||
|
|
||||||
#include <seccomp.h>
|
#include <seccomp.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
const char* seccomp_arch_to_string(uint32_t c);
|
const char* seccomp_arch_to_string(uint32_t c);
|
||||||
int seccomp_arch_from_string(const char *n, uint32_t *ret);
|
int seccomp_arch_from_string(const char *n, uint32_t *ret);
|
||||||
|
|
||||||
int seccomp_add_secondary_archs(scmp_filter_ctx *c);
|
int seccomp_init_conservative(scmp_filter_ctx *ret, uint32_t default_action);
|
||||||
|
|
||||||
|
int seccomp_add_secondary_archs(scmp_filter_ctx c);
|
||||||
|
|
||||||
bool is_seccomp_available(void);
|
bool is_seccomp_available(void);
|
||||||
|
|
||||||
typedef struct SystemCallFilterSet {
|
typedef struct SyscallFilterSet {
|
||||||
const char *set_name;
|
const char *name;
|
||||||
const char *value;
|
const char *value;
|
||||||
} SystemCallFilterSet;
|
} SyscallFilterSet;
|
||||||
|
|
||||||
extern const SystemCallFilterSet syscall_filter_sets[];
|
enum {
|
||||||
|
SYSCALL_FILTER_SET_CLOCK,
|
||||||
|
SYSCALL_FILTER_SET_CPU_EMULATION,
|
||||||
|
SYSCALL_FILTER_SET_DEBUG,
|
||||||
|
SYSCALL_FILTER_SET_DEFAULT,
|
||||||
|
SYSCALL_FILTER_SET_IO_EVENT,
|
||||||
|
SYSCALL_FILTER_SET_IPC,
|
||||||
|
SYSCALL_FILTER_SET_KEYRING,
|
||||||
|
SYSCALL_FILTER_SET_MODULE,
|
||||||
|
SYSCALL_FILTER_SET_MOUNT,
|
||||||
|
SYSCALL_FILTER_SET_NETWORK_IO,
|
||||||
|
SYSCALL_FILTER_SET_OBSOLETE,
|
||||||
|
SYSCALL_FILTER_SET_PRIVILEGED,
|
||||||
|
SYSCALL_FILTER_SET_PROCESS,
|
||||||
|
SYSCALL_FILTER_SET_RAW_IO,
|
||||||
|
_SYSCALL_FILTER_SET_MAX
|
||||||
|
};
|
||||||
|
|
||||||
|
extern const SyscallFilterSet syscall_filter_sets[];
|
||||||
|
|
||||||
|
const SyscallFilterSet *syscall_filter_set_find(const char *name);
|
||||||
|
|
||||||
|
int seccomp_add_syscall_filter_set(scmp_filter_ctx seccomp, const SyscallFilterSet *set, uint32_t action);
|
||||||
|
|
||||||
|
int seccomp_load_filter_set(uint32_t default_action, const SyscallFilterSet *set, uint32_t action);
|
||||||
|
103
src/test/test-seccomp.c
Normal file
103
src/test/test-seccomp.c
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
/***
|
||||||
|
This file is part of systemd.
|
||||||
|
|
||||||
|
Copyright 2016 Lennart Poettering
|
||||||
|
|
||||||
|
systemd is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU Lesser General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2.1 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
systemd is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public License
|
||||||
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
***/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/eventfd.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "fd-util.h"
|
||||||
|
#include "macro.h"
|
||||||
|
#include "process-util.h"
|
||||||
|
#include "seccomp-util.h"
|
||||||
|
|
||||||
|
static void test_seccomp_arch_to_string(void) {
|
||||||
|
uint32_t a, b;
|
||||||
|
const char *name;
|
||||||
|
|
||||||
|
a = seccomp_arch_native();
|
||||||
|
assert_se(a > 0);
|
||||||
|
name = seccomp_arch_to_string(a);
|
||||||
|
assert_se(name);
|
||||||
|
assert_se(seccomp_arch_from_string(name, &b) >= 0);
|
||||||
|
assert_se(a == b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_syscall_filter_set_find(void) {
|
||||||
|
assert_se(!syscall_filter_set_find(NULL));
|
||||||
|
assert_se(!syscall_filter_set_find(""));
|
||||||
|
assert_se(!syscall_filter_set_find("quux"));
|
||||||
|
assert_se(!syscall_filter_set_find("@quux"));
|
||||||
|
|
||||||
|
assert_se(syscall_filter_set_find("@clock") == syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK);
|
||||||
|
assert_se(syscall_filter_set_find("@default") == syscall_filter_sets + SYSCALL_FILTER_SET_DEFAULT);
|
||||||
|
assert_se(syscall_filter_set_find("@raw-io") == syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_filter_sets(void) {
|
||||||
|
unsigned i;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (!is_seccomp_available())
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (geteuid() != 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (i = 0; i < _SYSCALL_FILTER_SET_MAX; i++) {
|
||||||
|
pid_t pid;
|
||||||
|
|
||||||
|
log_info("Testing %s", syscall_filter_sets[i].name);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
assert_se(pid >= 0);
|
||||||
|
|
||||||
|
if (pid == 0) { /* Child? */
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
if (i == SYSCALL_FILTER_SET_DEFAULT) /* if we look at the default set, whitelist instead of blacklist */
|
||||||
|
r = seccomp_load_filter_set(SCMP_ACT_ERRNO(EPERM), syscall_filter_sets + i, SCMP_ACT_ALLOW);
|
||||||
|
else
|
||||||
|
r = seccomp_load_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + i, SCMP_ACT_ERRNO(EPERM));
|
||||||
|
if (r < 0)
|
||||||
|
_exit(EXIT_FAILURE);
|
||||||
|
|
||||||
|
/* Test the sycall filter with one random system call */
|
||||||
|
fd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
|
||||||
|
if (IN_SET(i, SYSCALL_FILTER_SET_IO_EVENT, SYSCALL_FILTER_SET_DEFAULT))
|
||||||
|
assert_se(fd < 0 && errno == EPERM);
|
||||||
|
else {
|
||||||
|
assert_se(fd >= 0);
|
||||||
|
safe_close(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
_exit(EXIT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_se(wait_for_terminate_and_warn(syscall_filter_sets[i].name, pid, true) == EXIT_SUCCESS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
|
test_seccomp_arch_to_string();
|
||||||
|
test_syscall_filter_set_find();
|
||||||
|
test_filter_sets();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@ -18,6 +18,7 @@
|
|||||||
***/
|
***/
|
||||||
|
|
||||||
#include "fd-util.h"
|
#include "fd-util.h"
|
||||||
|
#include "fileio.h"
|
||||||
#include "io-util.h"
|
#include "io-util.h"
|
||||||
#include "selinux-util.h"
|
#include "selinux-util.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
@ -32,8 +33,8 @@ static int apply_timestamp(const char *path, struct timespec *ts) {
|
|||||||
*ts,
|
*ts,
|
||||||
*ts
|
*ts
|
||||||
};
|
};
|
||||||
int fd = -1;
|
|
||||||
_cleanup_fclose_ FILE *f = NULL;
|
_cleanup_fclose_ FILE *f = NULL;
|
||||||
|
int fd = -1;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
assert(path);
|
assert(path);
|
||||||
@ -59,18 +60,20 @@ static int apply_timestamp(const char *path, struct timespec *ts) {
|
|||||||
return log_error_errno(errno, "Failed to create/open timestamp file %s: %m", path);
|
return log_error_errno(errno, "Failed to create/open timestamp file %s: %m", path);
|
||||||
}
|
}
|
||||||
|
|
||||||
f = fdopen(fd, "w");
|
f = fdopen(fd, "we");
|
||||||
if (!f) {
|
if (!f) {
|
||||||
safe_close(fd);
|
safe_close(fd);
|
||||||
return log_error_errno(errno, "Failed to fdopen() timestamp file %s: %m", path);
|
return log_error_errno(errno, "Failed to fdopen() timestamp file %s: %m", path);
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) fprintf(f,
|
(void) fprintf(f,
|
||||||
"%s"
|
MESSAGE
|
||||||
"TimestampNSec=" NSEC_FMT "\n",
|
"TIMESTAMP_NSEC=" NSEC_FMT "\n",
|
||||||
MESSAGE, timespec_load_nsec(ts));
|
timespec_load_nsec(ts));
|
||||||
|
|
||||||
fflush(f);
|
r = fflush_and_check(f);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to write timestamp file: %m");
|
||||||
|
|
||||||
if (futimens(fd, twice) < 0)
|
if (futimens(fd, twice) < 0)
|
||||||
return log_error_errno(errno, "Failed to update timestamp on %s: %m", path);
|
return log_error_errno(errno, "Failed to update timestamp on %s: %m", path);
|
||||||
|
Loading…
Reference in New Issue
Block a user