From b1994387d3cb50b212fc4815941a8ff40d60cd85 Mon Sep 17 00:00:00 2001 From: Iago Lopez Galeiras Date: Tue, 5 Oct 2021 13:18:49 +0200 Subject: [PATCH] core: use LSM BPF functions to implement RestrictFileSystems= It attaches the LSM BPF program when the system manager starts up. It populates the hash of maps BPF map when services that have RestrictFileSystems= set start. It cleans up the hash of maps when the unit cgroup is pruned. To pass the file descriptor of the BPF map we add it to the keep_fds array. --- src/basic/cgroup-util.h | 3 ++ src/core/cgroup.c | 10 +++++++ src/core/cgroup.h | 1 + src/core/execute.c | 61 ++++++++++++++++++++++++++++++++++++++++- src/core/execute.h | 10 +++++++ src/core/main.c | 3 ++ src/core/manager.c | 12 ++++++++ 7 files changed, 99 insertions(+), 1 deletion(-) diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index eec13e18f17..43801ee0f44 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -33,6 +33,9 @@ typedef enum CGroupController { CGROUP_CONTROLLER_BPF_FOREIGN, CGROUP_CONTROLLER_BPF_SOCKET_BIND, CGROUP_CONTROLLER_BPF_RESTRICT_NETWORK_INTERFACES, + /* The BPF hook implementing RestrictFileSystems= is not defined here. + * It's applied as late as possible in exec_child() so we don't block + * our own unit setup code. */ _CGROUP_CONTROLLER_MAX, _CGROUP_CONTROLLER_INVALID = -EINVAL, diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 15592596222..2b15310191b 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -37,6 +37,12 @@ #include "string-util.h" #include "virt.h" +#if BPF_FRAMEWORK +#include "bpf-dlopen.h" +#include "bpf-link.h" +#include "bpf/restrict_fs/restrict-fs-skel.h" +#endif + #define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC) /* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access @@ -2736,6 +2742,10 @@ void unit_prune_cgroup(Unit *u) { (void) unit_get_cpu_usage(u, NULL); /* Cache the last CPU usage value before we destroy the cgroup */ +#if BPF_FRAMEWORK + (void) lsm_bpf_cleanup(u); /* Remove cgroup from the global LSM BPF map */ +#endif + is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE); r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 8795f2724eb..4413eeaaa0a 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -3,6 +3,7 @@ #include +#include "bpf-lsm.h" #include "cgroup-util.h" #include "cpu-set-util.h" #include "list.h" diff --git a/src/core/execute.c b/src/core/execute.c index d68e31eb7dd..6397bab315a 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -41,6 +41,7 @@ #endif #include "async.h" #include "barrier.h" +#include "bpf-lsm.h" #include "cap-list.h" #include "capability-util.h" #include "cgroup-setup.h" @@ -1685,6 +1686,29 @@ static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) { return seccomp_restrict_namespaces(c->restrict_namespaces); } +#if HAVE_LIBBPF +static bool skip_lsm_bpf_unsupported(const Unit* u, const char* msg) { + if (lsm_bpf_supported()) + return false; + + log_unit_debug(u, "LSM BPF not supported, skipping %s", msg); + return true; +} + +static int apply_restrict_filesystems(Unit *u, const ExecContext *c) { + assert(u); + assert(c); + + if (!exec_context_restrict_filesystems_set(c)) + return 0; + + if (skip_lsm_bpf_unsupported(u, "RestrictFileSystems=")) + return 0; + + return lsm_bpf_unit_restrict_filesystems(u, c->restrict_filesystems, c->restrict_filesystems_allow_list); +} +#endif + static int apply_lock_personality(const Unit* u, const ExecContext *c) { unsigned long personality; int r; @@ -3813,7 +3837,7 @@ static int exec_child( /* In case anything used libc syslog(), close this here, too */ closelog(); - int keep_fds[n_fds + 2]; + int keep_fds[n_fds + 3]; memcpy_safe(keep_fds, fds, n_fds * sizeof(int)); n_keep_fds = n_fds; @@ -3823,6 +3847,24 @@ static int exec_child( return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m"); } +#if HAVE_LIBBPF + if (MANAGER_IS_SYSTEM(unit->manager) && lsm_bpf_supported()) { + int bpf_map_fd = -1; + + bpf_map_fd = lsm_bpf_map_restrict_fs_fd(unit); + if (bpf_map_fd < 0) { + *exit_status = EXIT_FDS; + return log_unit_error_errno(unit, r, "Failed to get restrict filesystems BPF map fd: %m"); + } + + r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, bpf_map_fd, &bpf_map_fd); + if (r < 0) { + *exit_status = EXIT_FDS; + return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m"); + } + } +#endif + r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, keep_fds, n_keep_fds); if (r < 0) { *exit_status = EXIT_FDS; @@ -4682,6 +4724,15 @@ static int exec_child( return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m"); } #endif + +#if HAVE_LIBBPF + r = apply_restrict_filesystems(unit, context); + if (r < 0) { + *exit_status = EXIT_BPF; + return log_unit_error_errno(unit, r, "Failed to restrict filesystems: %m"); + } +#endif + } if (!strv_isempty(context->unset_environment)) { @@ -4967,6 +5018,8 @@ void exec_context_done(ExecContext *c) { c->apparmor_profile = mfree(c->apparmor_profile); c->smack_process_label = mfree(c->smack_process_label); + c->restrict_filesystems = set_free(c->restrict_filesystems); + c->syscall_filter = hashmap_free(c->syscall_filter); c->syscall_archs = set_free(c->syscall_archs); c->address_families = set_free(c->address_families); @@ -5734,6 +5787,12 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { prefix, strna(s)); } +#if HAVE_LIBBPF + if (exec_context_restrict_filesystems_set(c)) + SET_FOREACH(e, c->restrict_filesystems) + fprintf(f, "%sRestrictFileSystems: %s\n", prefix, *e); +#endif + if (c->network_namespace_path) fprintf(f, "%sNetworkNamespacePath: %s\n", diff --git a/src/core/execute.h b/src/core/execute.h index 64a38b2d26a..560dcbcc5eb 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -314,6 +314,9 @@ struct ExecContext { unsigned long restrict_namespaces; /* The CLONE_NEWxyz flags permitted to the unit's processes */ + Set *restrict_filesystems; + bool restrict_filesystems_allow_list:1; + Hashmap *syscall_filter; Set *syscall_archs; int syscall_errno; @@ -342,6 +345,13 @@ static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) { return (c->restrict_namespaces & NAMESPACE_FLAGS_ALL) != NAMESPACE_FLAGS_ALL; } +static inline bool exec_context_restrict_filesystems_set(const ExecContext *c) { + assert(c); + + return c->restrict_filesystems_allow_list || + !set_isempty(c->restrict_filesystems); +} + static inline bool exec_context_with_rootfs(const ExecContext *c) { assert(c); diff --git a/src/core/main.c b/src/core/main.c index 059ba6dd493..62f39c7378f 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -22,6 +22,9 @@ #include "alloc-util.h" #include "apparmor-setup.h" #include "architecture.h" +#if HAVE_LIBBPF +#include "bpf-lsm.h" +#endif #include "build.h" #include "bus-error.h" #include "bus-util.h" diff --git a/src/core/manager.c b/src/core/manager.c index 0b2e29ae148..6bcb6bd1535 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -930,6 +930,14 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager r = manager_setup_sigchld_event_source(m); if (r < 0) return r; + +#if HAVE_LIBBPF + if (MANAGER_IS_SYSTEM(m) && lsm_bpf_supported()) { + r = lsm_bpf_setup(m); + if (r < 0) + return r; + } +#endif } if (test_run_flags == 0) { @@ -1535,6 +1543,10 @@ Manager* manager_free(Manager *m) { m->prefix[dt] = mfree(m->prefix[dt]); free(m->received_credentials); +#if BPF_FRAMEWORK + lsm_bpf_destroy(m->restrict_fs); +#endif + return mfree(m); }