mirror of
https://github.com/systemd/systemd.git
synced 2024-11-01 00:51:24 +03:00
core: use LSM BPF functions to implement RestrictFileSystems=
It attaches the LSM BPF program when the system manager starts up. It populates the hash of maps BPF map when services that have RestrictFileSystems= set start. It cleans up the hash of maps when the unit cgroup is pruned. To pass the file descriptor of the BPF map we add it to the keep_fds array.
This commit is contained in:
parent
184b4f78cf
commit
b1994387d3
@ -33,6 +33,9 @@ typedef enum CGroupController {
|
||||
CGROUP_CONTROLLER_BPF_FOREIGN,
|
||||
CGROUP_CONTROLLER_BPF_SOCKET_BIND,
|
||||
CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES,
|
||||
/* The BPF hook implementing RestrictFileSystems= is not defined here.
|
||||
* It's applied as late as possible in exec_child() so we don't block
|
||||
* our own unit setup code. */
|
||||
|
||||
_CGROUP_CONTROLLER_MAX,
|
||||
_CGROUP_CONTROLLER_INVALID = -EINVAL,
|
||||
|
@ -37,6 +37,12 @@
|
||||
#include "string-util.h"
|
||||
#include "virt.h"
|
||||
|
||||
#if BPF_FRAMEWORK
|
||||
#include "bpf-dlopen.h"
|
||||
#include "bpf-link.h"
|
||||
#include "bpf/restrict_fs/restrict-fs-skel.h"
|
||||
#endif
|
||||
|
||||
#define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
|
||||
|
||||
/* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access
|
||||
@ -2736,6 +2742,10 @@ void unit_prune_cgroup(Unit *u) {
|
||||
|
||||
(void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */
|
||||
|
||||
#if BPF_FRAMEWORK
|
||||
(void) lsm_bpf_cleanup(u); /* Remove cgroup from the global LSM BPF map */
|
||||
#endif
|
||||
|
||||
is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
|
||||
|
||||
r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "bpf-lsm.h"
|
||||
#include "cgroup-util.h"
|
||||
#include "cpu-set-util.h"
|
||||
#include "list.h"
|
||||
|
@ -41,6 +41,7 @@
|
||||
#endif
|
||||
#include "async.h"
|
||||
#include "barrier.h"
|
||||
#include "bpf-lsm.h"
|
||||
#include "cap-list.h"
|
||||
#include "capability-util.h"
|
||||
#include "cgroup-setup.h"
|
||||
@ -1685,6 +1686,29 @@ static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
|
||||
return seccomp_restrict_namespaces(c->restrict_namespaces);
|
||||
}
|
||||
|
||||
#if HAVE_LIBBPF
|
||||
static bool skip_lsm_bpf_unsupported(const Unit* u, const char* msg) {
|
||||
if (lsm_bpf_supported())
|
||||
return false;
|
||||
|
||||
log_unit_debug(u, "LSM BPF not supported, skipping %s", msg);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int apply_restrict_filesystems(Unit *u, const ExecContext *c) {
|
||||
assert(u);
|
||||
assert(c);
|
||||
|
||||
if (!exec_context_restrict_filesystems_set(c))
|
||||
return 0;
|
||||
|
||||
if (skip_lsm_bpf_unsupported(u, "RestrictFileSystems="))
|
||||
return 0;
|
||||
|
||||
return lsm_bpf_unit_restrict_filesystems(u, c->restrict_filesystems, c->restrict_filesystems_allow_list);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int apply_lock_personality(const Unit* u, const ExecContext *c) {
|
||||
unsigned long personality;
|
||||
int r;
|
||||
@ -3813,7 +3837,7 @@ static int exec_child(
|
||||
/* In case anything used libc syslog(), close this here, too */
|
||||
closelog();
|
||||
|
||||
int keep_fds[n_fds + 2];
|
||||
int keep_fds[n_fds + 3];
|
||||
memcpy_safe(keep_fds, fds, n_fds * sizeof(int));
|
||||
n_keep_fds = n_fds;
|
||||
|
||||
@ -3823,6 +3847,24 @@ static int exec_child(
|
||||
return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m");
|
||||
}
|
||||
|
||||
#if HAVE_LIBBPF
|
||||
if (MANAGER_IS_SYSTEM(unit->manager) && lsm_bpf_supported()) {
|
||||
int bpf_map_fd = -1;
|
||||
|
||||
bpf_map_fd = lsm_bpf_map_restrict_fs_fd(unit);
|
||||
if (bpf_map_fd < 0) {
|
||||
*exit_status = EXIT_FDS;
|
||||
return log_unit_error_errno(unit, r, "Failed to get restrict filesystems BPF map fd: %m");
|
||||
}
|
||||
|
||||
r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, bpf_map_fd, &bpf_map_fd);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_FDS;
|
||||
return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, keep_fds, n_keep_fds);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_FDS;
|
||||
@ -4682,6 +4724,15 @@ static int exec_child(
|
||||
return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_LIBBPF
|
||||
r = apply_restrict_filesystems(unit, context);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_BPF;
|
||||
return log_unit_error_errno(unit, r, "Failed to restrict filesystems: %m");
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
if (!strv_isempty(context->unset_environment)) {
|
||||
@ -4967,6 +5018,8 @@ void exec_context_done(ExecContext *c) {
|
||||
c->apparmor_profile = mfree(c->apparmor_profile);
|
||||
c->smack_process_label = mfree(c->smack_process_label);
|
||||
|
||||
c->restrict_filesystems = set_free(c->restrict_filesystems);
|
||||
|
||||
c->syscall_filter = hashmap_free(c->syscall_filter);
|
||||
c->syscall_archs = set_free(c->syscall_archs);
|
||||
c->address_families = set_free(c->address_families);
|
||||
@ -5734,6 +5787,12 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
|
||||
prefix, strna(s));
|
||||
}
|
||||
|
||||
#if HAVE_LIBBPF
|
||||
if (exec_context_restrict_filesystems_set(c))
|
||||
SET_FOREACH(e, c->restrict_filesystems)
|
||||
fprintf(f, "%sRestrictFileSystems: %s\n", prefix, *e);
|
||||
#endif
|
||||
|
||||
if (c->network_namespace_path)
|
||||
fprintf(f,
|
||||
"%sNetworkNamespacePath: %s\n",
|
||||
|
@ -314,6 +314,9 @@ struct ExecContext {
|
||||
|
||||
unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */
|
||||
|
||||
Set *restrict_filesystems;
|
||||
bool restrict_filesystems_allow_list:1;
|
||||
|
||||
Hashmap *syscall_filter;
|
||||
Set *syscall_archs;
|
||||
int syscall_errno;
|
||||
@ -342,6 +345,13 @@ static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {
|
||||
return (c->restrict_namespaces & NAMESPACE_FLAGS_ALL) != NAMESPACE_FLAGS_ALL;
|
||||
}
|
||||
|
||||
static inline bool exec_context_restrict_filesystems_set(const ExecContext *c) {
|
||||
assert(c);
|
||||
|
||||
return c->restrict_filesystems_allow_list ||
|
||||
!set_isempty(c->restrict_filesystems);
|
||||
}
|
||||
|
||||
static inline bool exec_context_with_rootfs(const ExecContext *c) {
|
||||
assert(c);
|
||||
|
||||
|
@ -22,6 +22,9 @@
|
||||
#include "alloc-util.h"
|
||||
#include "apparmor-setup.h"
|
||||
#include "architecture.h"
|
||||
#if HAVE_LIBBPF
|
||||
#include "bpf-lsm.h"
|
||||
#endif
|
||||
#include "build.h"
|
||||
#include "bus-error.h"
|
||||
#include "bus-util.h"
|
||||
|
@ -930,6 +930,14 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager
|
||||
r = manager_setup_sigchld_event_source(m);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
#if HAVE_LIBBPF
|
||||
if (MANAGER_IS_SYSTEM(m) && lsm_bpf_supported()) {
|
||||
r = lsm_bpf_setup(m);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (test_run_flags == 0) {
|
||||
@ -1535,6 +1543,10 @@ Manager* manager_free(Manager *m) {
|
||||
m->prefix[dt] = mfree(m->prefix[dt]);
|
||||
free(m->received_credentials);
|
||||
|
||||
#if BPF_FRAMEWORK
|
||||
lsm_bpf_destroy(m->restrict_fs);
|
||||
#endif
|
||||
|
||||
return mfree(m);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user