From cbc056c81967edd0ba8f0f1d49a13414e7e9630b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sat, 27 Jun 2020 13:23:08 +0200 Subject: [PATCH 1/2] core: wrap some long lines and other formatting changes --- src/core/namespace.c | 116 ++++++++++++++++++++++++++++--------------- src/core/service.c | 6 +-- 2 files changed, 80 insertions(+), 42 deletions(-) diff --git a/src/core/namespace.c b/src/core/namespace.c index b2bbcf58f2d..56f19c738c3 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -649,13 +649,15 @@ add_symlink: return 0; /* Create symlinks like /dev/char/1:9 → ../urandom */ - if (asprintf(&sl, "%s/dev/%s/%u:%u", temporary_mount, S_ISCHR(st.st_mode) ? "char" : "block", major(st.st_rdev), minor(st.st_rdev)) < 0) + if (asprintf(&sl, "%s/dev/%s/%u:%u", + temporary_mount, + S_ISCHR(st.st_mode) ? "char" : "block", + major(st.st_rdev), minor(st.st_rdev)) < 0) return log_oom(); (void) mkdir_parents(sl, 0755); t = strjoina("../", bn); - if (symlink(t, sl) < 0) log_debug_errno(errno, "Failed to symlink '%s' to '%s', ignoring: %m", t, sl); @@ -935,7 +937,8 @@ static int apply_mount( if (errno == ENOENT && m->ignore) return 0; - return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", mount_entry_path(m)); + return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", + mount_entry_path(m)); } if (geteuid() == 0) @@ -962,8 +965,10 @@ static int apply_mount( if (r == -ENOENT && m->ignore) return 0; if (r < 0) - return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", mount_entry_path(m)); - if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY bit for the mount point if needed. */ + return log_debug_errno(r, "Failed to determine whether %s is already a mount point: %m", + mount_entry_path(m)); + if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY + * bit for the mount point if needed. */ return 0; /* This isn't a mount point yet, let's make it one. */ what = mount_entry_path(m); @@ -976,9 +981,9 @@ static int apply_mount( case BIND_MOUNT_RECURSIVE: { _cleanup_free_ char *chased = NULL; - /* Since mount() will always follow symlinks we chase the symlinks on our own first. Note that bind - * mount source paths are always relative to the host root, hence we pass NULL as root directory to - * chase_symlinks() here. */ + /* Since mount() will always follow symlinks we chase the symlinks on our own first. Note + * that bind mount source paths are always relative to the host root, hence we pass NULL as + * root directory to chase_symlinks() here. */ r = chase_symlinks(mount_entry_source(m), NULL, CHASE_TRAIL_SLASH, &chased, NULL); if (r == -ENOENT && m->ignore) { @@ -1031,7 +1036,8 @@ static int apply_mount( if (r == -ENOENT && make) { struct stat st; - /* Hmm, either the source or the destination are missing. Let's see if we can create the destination, then try again */ + /* Hmm, either the source or the destination are missing. Let's see if we can create + the destination, then try again. */ if (stat(what, &st) < 0) log_error_errno(errno, "Mount point source '%s' is not accessible: %m", what); @@ -1046,7 +1052,8 @@ static int apply_mount( q = touch(mount_entry_path(m)); if (q < 0) - log_error_errno(q, "Failed to create destination mount point node '%s': %m", mount_entry_path(m)); + log_error_errno(q, "Failed to create destination mount point node '%s': %m", + mount_entry_path(m)); else try_again = true; } @@ -1302,16 +1309,35 @@ int setup_namespace( if (r < 0) return log_debug_errno(r, "Failed to create loop device for root image: %m"); - r = verity_metadata_load(root_image, root_hash_path, root_hash ? NULL : &root_hash_decoded, root_hash ? NULL : &root_hash_size, root_verity ? NULL : &verity_data, root_hash_sig || root_hash_sig_path ? NULL : &hash_sig_path); + r = verity_metadata_load(root_image, + root_hash_path, + root_hash ? NULL : &root_hash_decoded, + root_hash ? NULL : &root_hash_size, + root_verity ? NULL : &verity_data, + root_hash_sig || root_hash_sig_path ? NULL : &hash_sig_path); if (r < 0) return log_debug_errno(r, "Failed to load root hash: %m"); dissect_image_flags |= root_verity || verity_data ? DISSECT_IMAGE_NO_PARTITION_TABLE : 0; - r = dissect_image(loop_device->fd, root_hash ?: root_hash_decoded, root_hash_size, root_verity ?: verity_data, dissect_image_flags, &dissected_image); + r = dissect_image(loop_device->fd, + root_hash ?: root_hash_decoded, + root_hash_size, + root_verity ?: verity_data, + dissect_image_flags, + &dissected_image); if (r < 0) return log_debug_errno(r, "Failed to dissect image: %m"); - r = dissected_image_decrypt(dissected_image, NULL, root_hash ?: root_hash_decoded, root_hash_size, root_verity ?: verity_data, root_hash_sig_path ?: hash_sig_path, root_hash_sig, root_hash_sig_size, dissect_image_flags, &decrypted_image); + r = dissected_image_decrypt(dissected_image, + NULL, + root_hash ?: root_hash_decoded, + root_hash_size, + root_verity ?: verity_data, + root_hash_sig_path ?: hash_sig_path, + root_hash_sig, + root_hash_sig_size, + dissect_image_flags, + &decrypted_image); if (r < 0) return log_debug_errno(r, "Failed to decrypt dissected image: %m"); } @@ -1396,19 +1422,28 @@ int setup_namespace( } if (ns_info->protect_kernel_tunables) { - r = append_static_mounts(&m, protect_kernel_tunables_table, ELEMENTSOF(protect_kernel_tunables_table), ns_info->ignore_protect_paths); + r = append_static_mounts(&m, + protect_kernel_tunables_table, + ELEMENTSOF(protect_kernel_tunables_table), + ns_info->ignore_protect_paths); if (r < 0) goto finish; } if (ns_info->protect_kernel_modules) { - r = append_static_mounts(&m, protect_kernel_modules_table, ELEMENTSOF(protect_kernel_modules_table), ns_info->ignore_protect_paths); + r = append_static_mounts(&m, + protect_kernel_modules_table, + ELEMENTSOF(protect_kernel_modules_table), + ns_info->ignore_protect_paths); if (r < 0) goto finish; } if (ns_info->protect_kernel_logs) { - r = append_static_mounts(&m, protect_kernel_logs_table, ELEMENTSOF(protect_kernel_logs_table), ns_info->ignore_protect_paths); + r = append_static_mounts(&m, + protect_kernel_logs_table, + ELEMENTSOF(protect_kernel_logs_table), + ns_info->ignore_protect_paths); if (r < 0) goto finish; } @@ -1429,7 +1464,10 @@ int setup_namespace( goto finish; if (namespace_info_mount_apivfs(ns_info)) { - r = append_static_mounts(&m, apivfs_table, ELEMENTSOF(apivfs_table), ns_info->ignore_protect_paths); + r = append_static_mounts(&m, + apivfs_table, + ELEMENTSOF(apivfs_table), + ns_info->ignore_protect_paths); if (r < 0) goto finish; } @@ -1477,10 +1515,10 @@ int setup_namespace( if (unshare(CLONE_NEWNS) < 0) { r = log_debug_errno(errno, "Failed to unshare the mount namespace: %m"); if (IN_SET(r, -EACCES, -EPERM, -EOPNOTSUPP, -ENOSYS)) - /* If the kernel doesn't support namespaces, or when there's a MAC or seccomp filter in place - * that doesn't allow us to create namespaces (or a missing cap), then propagate a recognizable - * error back, which the caller can use to detect this case (and only this) and optionally - * continue without namespacing applied. */ + /* If the kernel doesn't support namespaces, or when there's a MAC or seccomp filter + * in place that doesn't allow us to create namespaces (or a missing cap), then + * propagate a recognizable error back, which the caller can use to detect this case + * (and only this) and optionally continue without namespacing applied. */ r = -ENOANO; goto finish; @@ -1544,8 +1582,8 @@ int setup_namespace( _cleanup_free_ char **deny_list = NULL; size_t j; - /* Open /proc/self/mountinfo now as it may become unavailable if we mount anything on top of /proc. - * For example, this is the case with the option: 'InaccessiblePaths=/proc' */ + /* Open /proc/self/mountinfo now as it may become unavailable if we mount anything on top of + * /proc. For example, this is the case with the option: 'InaccessiblePaths=/proc'. */ proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); if (!proc_self_mountinfo) { r = log_debug_errno(errno, "Failed to open /proc/self/mountinfo: %m"); @@ -1570,10 +1608,10 @@ int setup_namespace( goto finish; } if (r == 0) { - /* We hit a symlinked mount point. The entry got rewritten and might point to a - * very different place now. Let's normalize the changed list, and start from - * the beginning. After all to mount the entry at the new location we might - * need some other mounts first */ + /* We hit a symlinked mount point. The entry got rewritten and might + * point to a very different place now. Let's normalize the changed + * list, and start from the beginning. After all to mount the entry + * at the new location we might need some other mounts first */ again = true; break; } @@ -1978,31 +2016,31 @@ bool ns_type_supported(NamespaceType type) { } static const char *const protect_home_table[_PROTECT_HOME_MAX] = { - [PROTECT_HOME_NO] = "no", - [PROTECT_HOME_YES] = "yes", + [PROTECT_HOME_NO] = "no", + [PROTECT_HOME_YES] = "yes", [PROTECT_HOME_READ_ONLY] = "read-only", - [PROTECT_HOME_TMPFS] = "tmpfs", + [PROTECT_HOME_TMPFS] = "tmpfs", }; DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_home, ProtectHome, PROTECT_HOME_YES); static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = { - [PROTECT_SYSTEM_NO] = "no", - [PROTECT_SYSTEM_YES] = "yes", - [PROTECT_SYSTEM_FULL] = "full", + [PROTECT_SYSTEM_NO] = "no", + [PROTECT_SYSTEM_YES] = "yes", + [PROTECT_SYSTEM_FULL] = "full", [PROTECT_SYSTEM_STRICT] = "strict", }; DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(protect_system, ProtectSystem, PROTECT_SYSTEM_YES); static const char* const namespace_type_table[] = { - [NAMESPACE_MOUNT] = "mnt", + [NAMESPACE_MOUNT] = "mnt", [NAMESPACE_CGROUP] = "cgroup", - [NAMESPACE_UTS] = "uts", - [NAMESPACE_IPC] = "ipc", - [NAMESPACE_USER] = "user", - [NAMESPACE_PID] = "pid", - [NAMESPACE_NET] = "net", + [NAMESPACE_UTS] = "uts", + [NAMESPACE_IPC] = "ipc", + [NAMESPACE_USER] = "user", + [NAMESPACE_PID] = "pid", + [NAMESPACE_NET] = "net", }; DEFINE_STRING_TABLE_LOOKUP(namespace_type, NamespaceType); diff --git a/src/core/service.c b/src/core/service.c index 3daf21296ae..4813ce938d8 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -2132,9 +2132,9 @@ static void service_enter_start(Service *s) { if (!c) { if (s->type != SERVICE_ONESHOT) { - /* There's no command line configured for the main command? Hmm, that is strange. This can only - * happen if the configuration changes at runtime. In this case, let's enter a failure - * state. */ + /* There's no command line configured for the main command? Hmm, that is strange. + * This can only happen if the configuration changes at runtime. In this case, + * let's enter a failure state. */ log_unit_error(UNIT(s), "There's no 'start' task anymore we could start."); r = -ENXIO; goto fail; From 56a13a495ce82d431bc68e0939b76373c879bed3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sun, 28 Jun 2020 19:54:49 +0200 Subject: [PATCH 2/2] pid1: create ro private tmp dirs when /tmp or /var/tmp is read-only Read-only /var/tmp is more likely, because it's backed by a real device. /tmp is (by default) backed by tmpfs, but it doesn't have to be. In both cases the same consideration applies. If we boot with read-only /var/tmp, any unit with PrivateTmp=yes would fail because we cannot create the subdir under /var/tmp to mount the private directory. But many services actually don't require /var/tmp (either because they only use it occasionally, or because they only use /tmp, or even because they don't use the temporary directories at all, and PrivateTmp=yes is used to isolate them from the rest of the system). To handle both cases let's create a read-only directory under /run/systemd and mount it as the private /tmp or /var/tmp. (Read-only to not fool the service into dumping too much data in /run.) $ sudo systemd-run -t -p PrivateTmp=yes bash Running as unit: run-u14.service Press ^] three times within 1s to disconnect TTY. [root@workstation /]# ls -l /tmp/ total 0 [root@workstation /]# ls -l /var/tmp/ total 0 [root@workstation /]# touch /tmp/f [root@workstation /]# touch /var/tmp/f touch: cannot touch '/var/tmp/f': Read-only file system This commit has more changes than I like to put in one commit, but it's touching all the same paths so it's hard to split. exec_runtime_make() was using the wrong cleanup function, so the directory would be left behind on error. --- src/core/execute.c | 145 ++++++++++++++++++-------------------- src/core/execute.h | 2 +- src/core/manager.c | 2 +- src/core/namespace.c | 84 +++++++++++++++------- src/core/namespace.h | 12 ++++ src/test/test-namespace.c | 56 +++++++++------ 6 files changed, 175 insertions(+), 126 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index 078aa144180..3c17c6b5b08 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -2592,7 +2592,7 @@ static int apply_mount_namespace( char **error_path) { _cleanup_strv_free_ char **empty_directories = NULL; - char *tmp = NULL, *var = NULL; + const char *tmp_dir = NULL, *var_tmp_dir = NULL; const char *root_dir = NULL, *root_image = NULL; NamespaceInfo ns_info; bool needs_sandboxing; @@ -2617,13 +2617,19 @@ static int apply_mount_namespace( if (needs_sandboxing) { /* The runtime struct only contains the parent of the private /tmp, * which is non-accessible to world users. Inside of it there's a /tmp - * that is sticky, and that's the one we want to use here. */ + * that is sticky, and that's the one we want to use here. + * This does not apply when we are using /run/systemd/empty as fallback. */ if (context->private_tmp && runtime) { - if (runtime->tmp_dir) - tmp = strjoina(runtime->tmp_dir, "/tmp"); - if (runtime->var_tmp_dir) - var = strjoina(runtime->var_tmp_dir, "/tmp"); + if (streq_ptr(runtime->tmp_dir, RUN_SYSTEMD_EMPTY)) + tmp_dir = runtime->tmp_dir; + else if (runtime->tmp_dir) + tmp_dir = strjoina(runtime->tmp_dir, "/tmp"); + + if (streq_ptr(runtime->var_tmp_dir, RUN_SYSTEMD_EMPTY)) + var_tmp_dir = runtime->var_tmp_dir; + else if (runtime->tmp_dir) + var_tmp_dir = strjoina(runtime->var_tmp_dir, "/tmp"); } ns_info = (NamespaceInfo) { @@ -2661,8 +2667,8 @@ static int apply_mount_namespace( n_bind_mounts, context->temporary_filesystems, context->n_temporary_filesystems, - tmp, - var, + tmp_dir, + var_tmp_dir, context->log_namespace, needs_sandboxing ? context->protect_home : PROTECT_HOME_NO, needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO, @@ -5347,28 +5353,25 @@ static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) { (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id); /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */ - if (destroy && rt->tmp_dir) { + + if (destroy && rt->tmp_dir && !streq(rt->tmp_dir, RUN_SYSTEMD_EMPTY)) { log_debug("Spawning thread to nuke %s", rt->tmp_dir); r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir); - if (r < 0) { + if (r < 0) log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir); - free(rt->tmp_dir); - } - - rt->tmp_dir = NULL; + else + rt->tmp_dir = NULL; } - if (destroy && rt->var_tmp_dir) { + if (destroy && rt->var_tmp_dir && !streq(rt->var_tmp_dir, RUN_SYSTEMD_EMPTY)) { log_debug("Spawning thread to nuke %s", rt->var_tmp_dir); r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir); - if (r < 0) { + if (r < 0) log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir); - free(rt->var_tmp_dir); - } - - rt->var_tmp_dir = NULL; + else + rt->var_tmp_dir = NULL; } rt->id = mfree(rt->id); @@ -5382,16 +5385,22 @@ static void exec_runtime_freep(ExecRuntime **rt) { (void) exec_runtime_free(*rt, false); } -static int exec_runtime_allocate(ExecRuntime **ret) { +static int exec_runtime_allocate(ExecRuntime **ret, const char *id) { + _cleanup_free_ char *id_copy = NULL; ExecRuntime *n; assert(ret); + id_copy = strdup(id); + if (!id_copy) + return -ENOMEM; + n = new(ExecRuntime, 1); if (!n) return -ENOMEM; *n = (ExecRuntime) { + .id = TAKE_PTR(id_copy), .netns_storage_socket = { -1, -1 }, }; @@ -5402,9 +5411,9 @@ static int exec_runtime_allocate(ExecRuntime **ret) { static int exec_runtime_add( Manager *m, const char *id, - const char *tmp_dir, - const char *var_tmp_dir, - const int netns_storage_socket[2], + char **tmp_dir, + char **var_tmp_dir, + int netns_storage_socket[2], ExecRuntime **ret) { _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL; @@ -5413,51 +5422,40 @@ static int exec_runtime_add( assert(m); assert(id); + /* tmp_dir, var_tmp_dir, netns_storage_socket fds are donated on success */ + r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops); if (r < 0) return r; - r = exec_runtime_allocate(&rt); + r = exec_runtime_allocate(&rt, id); if (r < 0) return r; - rt->id = strdup(id); - if (!rt->id) - return -ENOMEM; - - if (tmp_dir) { - rt->tmp_dir = strdup(tmp_dir); - if (!rt->tmp_dir) - return -ENOMEM; - - /* When tmp_dir is set, then we require var_tmp_dir is also set. */ - assert(var_tmp_dir); - rt->var_tmp_dir = strdup(var_tmp_dir); - if (!rt->var_tmp_dir) - return -ENOMEM; - } - - if (netns_storage_socket) { - rt->netns_storage_socket[0] = netns_storage_socket[0]; - rt->netns_storage_socket[1] = netns_storage_socket[1]; - } - r = hashmap_put(m->exec_runtime_by_id, rt->id, rt); if (r < 0) return r; + assert(!!rt->tmp_dir == !!rt->var_tmp_dir); /* We require both to be set together */ + rt->tmp_dir = TAKE_PTR(*tmp_dir); + rt->var_tmp_dir = TAKE_PTR(*var_tmp_dir); + + if (netns_storage_socket) { + rt->netns_storage_socket[0] = TAKE_FD(netns_storage_socket[0]); + rt->netns_storage_socket[1] = TAKE_FD(netns_storage_socket[1]); + } + rt->manager = m; if (ret) *ret = rt; - /* do not remove created ExecRuntime object when the operation succeeds. */ - rt = NULL; + TAKE_PTR(rt); return 0; } static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) { - _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL; + _cleanup_(namespace_cleanup_tmpdirp) char *tmp_dir = NULL, *var_tmp_dir = NULL; _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 }; int r; @@ -5483,12 +5481,10 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E return -errno; } - r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret); + r = exec_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_storage_socket, ret); if (r < 0) return r; - /* Avoid cleanup */ - netns_storage_socket[0] = netns_storage_socket[1] = -1; return 1; } @@ -5606,14 +5602,10 @@ int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, rt = hashmap_get(u->manager->exec_runtime_by_id, u->id); if (!rt) { - r = exec_runtime_allocate(&rt_create); + r = exec_runtime_allocate(&rt_create, u->id); if (r < 0) return log_oom(); - rt_create->id = strdup(u->id); - if (!rt_create->id) - return log_oom(); - rt = rt_create; } @@ -5670,15 +5662,16 @@ int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, rt_create->manager = u->manager; /* Avoid cleanup */ - rt_create = NULL; + TAKE_PTR(rt_create); } return 1; } -void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) { - char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL; - int r, fd0 = -1, fd1 = -1; +int exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) { + _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL; + char *id = NULL; + int r, fdpair[] = {-1, -1}; const char *p, *v = value; size_t n; @@ -5695,7 +5688,9 @@ void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) { v = startswith(p, "tmp-dir="); if (v) { n = strcspn(v, " "); - tmp_dir = strndupa(v, n); + tmp_dir = strndup(v, n); + if (!tmp_dir) + return log_oom(); if (v[n] != ' ') goto finalize; p = v + n + 1; @@ -5704,7 +5699,9 @@ void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) { v = startswith(p, "var-tmp-dir="); if (v) { n = strcspn(v, " "); - var_tmp_dir = strndupa(v, n); + var_tmp_dir = strndup(v, n); + if (!var_tmp_dir) + return log_oom(); if (v[n] != ' ') goto finalize; p = v + n + 1; @@ -5716,11 +5713,9 @@ void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) { n = strcspn(v, " "); buf = strndupa(v, n); - if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) { - log_debug("Unable to process exec-runtime netns fd specification."); - return; - } - fd0 = fdset_remove(fds, fd0); + if (safe_atoi(buf, &fdpair[0]) < 0 || !fdset_contains(fds, fdpair[0])) + return log_debug("Unable to process exec-runtime netns fd specification."); + fdpair[0] = fdset_remove(fds, fdpair[0]); if (v[n] != ' ') goto finalize; p = v + n + 1; @@ -5732,18 +5727,16 @@ void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) { n = strcspn(v, " "); buf = strndupa(v, n); - if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) { - log_debug("Unable to process exec-runtime netns fd specification."); - return; - } - fd1 = fdset_remove(fds, fd1); + if (safe_atoi(buf, &fdpair[1]) < 0 || !fdset_contains(fds, fdpair[1])) + return log_debug("Unable to process exec-runtime netns fd specification."); + fdpair[1] = fdset_remove(fds, fdpair[1]); } finalize: - - r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL); + r = exec_runtime_add(m, id, &tmp_dir, &var_tmp_dir, fdpair, NULL); if (r < 0) - log_debug_errno(r, "Failed to add exec-runtime: %m"); + return log_debug_errno(r, "Failed to add exec-runtime: %m"); + return 0; } void exec_runtime_vacuum(Manager *m) { diff --git a/src/core/execute.h b/src/core/execute.h index 6bd71c17404..fc7bc5c24b2 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -405,7 +405,7 @@ ExecRuntime *exec_runtime_unref(ExecRuntime *r, bool destroy); int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds); int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds); -void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds); +int exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds); void exec_runtime_vacuum(Manager *m); void exec_params_clear(ExecParameters *p); diff --git a/src/core/manager.c b/src/core/manager.c index 743ef6b4fc3..54c6321870f 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3685,7 +3685,7 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { else if ((val = startswith(l, "destroy-ipc-gid="))) manager_deserialize_gid_refs_one(m, val); else if ((val = startswith(l, "exec-runtime="))) - exec_runtime_deserialize_one(m, val, fds); + (void) exec_runtime_deserialize_one(m, val, fds); else if ((val = startswith(l, "subscribed="))) { if (strv_extend(&m->deserialized_subscribed, val) < 0) diff --git a/src/core/namespace.c b/src/core/namespace.c index 56f19c738c3..a17b89ecf11 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -43,6 +43,7 @@ typedef enum MountMode { BIND_MOUNT, BIND_MOUNT_RECURSIVE, PRIVATE_TMP, + PRIVATE_TMP_READONLY, PRIVATE_DEV, BIND_DEV, EMPTY_DIR, @@ -221,7 +222,7 @@ static const char *mount_entry_path(const MountEntry *p) { static bool mount_entry_read_only(const MountEntry *p) { assert(p); - return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE); + return p->read_only || IN_SET(p->mode, READONLY, INACCESSIBLE, PRIVATE_TMP_READONLY); } static const char *mount_entry_source(const MountEntry *p) { @@ -1007,6 +1008,7 @@ static int apply_mount( return mount_tmpfs(m); case PRIVATE_TMP: + case PRIVATE_TMP_READONLY: what = mount_entry_source(m); make = true; break; @@ -1398,17 +1400,21 @@ int setup_namespace( goto finish; if (tmp_dir) { + bool ro = streq(tmp_dir, RUN_SYSTEMD_EMPTY); + *(m++) = (MountEntry) { .path_const = "/tmp", - .mode = PRIVATE_TMP, + .mode = ro ? PRIVATE_TMP_READONLY : PRIVATE_TMP, .source_const = tmp_dir, }; } if (var_tmp_dir) { + bool ro = streq(var_tmp_dir, RUN_SYSTEMD_EMPTY); + *(m++) = (MountEntry) { .path_const = "/var/tmp", - .mode = PRIVATE_TMP, + .mode = ro ? PRIVATE_TMP_READONLY : PRIVATE_TMP, .source_const = var_tmp_dir, }; } @@ -1815,10 +1821,28 @@ static int make_tmp_prefix(const char *prefix) { } -static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) { +static int make_tmp_subdir(const char *parent, char **ret) { + _cleanup_free_ char *y = NULL; + + RUN_WITH_UMASK(0000) { + y = strjoin(parent, "/tmp"); + if (!y) + return -ENOMEM; + + if (mkdir(y, 0777 | S_ISVTX) < 0) + return -errno; + } + + if (ret) + *ret = TAKE_PTR(y); + return 0; +} + +static int setup_one_tmp_dir(const char *id, const char *prefix, char **path, char **tmp_path) { _cleanup_free_ char *x = NULL; char bid[SD_ID128_STRING_MAX]; sd_id128_t boot_id; + bool rw = true; int r; assert(id); @@ -1841,49 +1865,55 @@ static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) { return r; RUN_WITH_UMASK(0077) - if (!mkdtemp(x)) - return -errno; + if (!mkdtemp(x)) { + if (errno == EROFS || ERRNO_IS_DISK_SPACE(errno)) + rw = false; + else + return -errno; + } - RUN_WITH_UMASK(0000) { - char *y; + if (rw) { + r = make_tmp_subdir(x, tmp_path); + if (r < 0) + return r; + } else { + /* Trouble: we failed to create the directory. Instead of failing, let's simulate /tmp being + * read-only. This way the service will get the EROFS result as if it was writing to the real + * file system. */ + r = mkdir_p(RUN_SYSTEMD_EMPTY, 0500); + if (r < 0) + return r; - y = strjoina(x, "/tmp"); - - if (mkdir(y, 0777 | S_ISVTX) < 0) - return -errno; + x = strdup(RUN_SYSTEMD_EMPTY); + if (!x) + return -ENOMEM; } *path = TAKE_PTR(x); - return 0; } int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) { - char *a, *b; + _cleanup_(namespace_cleanup_tmpdirp) char *a = NULL; + _cleanup_(rmdir_and_freep) char *a_tmp = NULL; + char *b; int r; assert(id); assert(tmp_dir); assert(var_tmp_dir); - r = setup_one_tmp_dir(id, "/tmp", &a); + r = setup_one_tmp_dir(id, "/tmp", &a, &a_tmp); if (r < 0) return r; - r = setup_one_tmp_dir(id, "/var/tmp", &b); - if (r < 0) { - char *t; - - t = strjoina(a, "/tmp"); - (void) rmdir(t); - (void) rmdir(a); - - free(a); + r = setup_one_tmp_dir(id, "/var/tmp", &b, NULL); + if (r < 0) return r; - } - *tmp_dir = a; - *var_tmp_dir = b; + a_tmp = mfree(a_tmp); /* avoid rmdir */ + *tmp_dir = TAKE_PTR(a); + *var_tmp_dir = TAKE_PTR(b); return 0; } diff --git a/src/core/namespace.h b/src/core/namespace.h index b04b9b442ea..b182223bd41 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -12,7 +12,9 @@ typedef struct TemporaryFileSystem TemporaryFileSystem; #include #include "dissect-image.h" +#include "fs-util.h" #include "macro.h" +#include "string-util.h" typedef enum ProtectHome { PROTECT_HOME_NO, @@ -98,6 +100,16 @@ int setup_namespace( DissectImageFlags dissected_image_flags, char **error_path); +#define RUN_SYSTEMD_EMPTY "/run/systemd/empty" + +static inline void namespace_cleanup_tmpdir(char *p) { + PROTECT_ERRNO; + if (!streq_ptr(p, RUN_SYSTEMD_EMPTY)) + (void) rmdir(p); + free(p); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, namespace_cleanup_tmpdir); + int setup_tmp_dirs( const char *id, char **tmp_dir, diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index ad135e146fe..a7ad4828370 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -14,14 +14,25 @@ #include "util.h" #include "virt.h" +static void test_namespace_cleanup_tmpdir(void) { + { + _cleanup_(namespace_cleanup_tmpdirp) char *dir; + assert_se(dir = strdup(RUN_SYSTEMD_EMPTY)); + } + + { + _cleanup_(namespace_cleanup_tmpdirp) char *dir; + assert_se(dir = strdup("/tmp/systemd-test-namespace.XXXXXX")); + assert_se(mkdtemp(dir)); + } +} + static void test_tmpdir(const char *id, const char *A, const char *B) { _cleanup_free_ char *a, *b; struct stat x, y; char *c, *d; assert_se(setup_tmp_dirs(id, &a, &b) == 0); - assert_se(startswith(a, A)); - assert_se(startswith(b, B)); assert_se(stat(a, &x) >= 0); assert_se(stat(b, &y) >= 0); @@ -29,26 +40,27 @@ static void test_tmpdir(const char *id, const char *A, const char *B) { assert_se(S_ISDIR(x.st_mode)); assert_se(S_ISDIR(y.st_mode)); - assert_se((x.st_mode & 01777) == 0700); - assert_se((y.st_mode & 01777) == 0700); + if (!streq(a, RUN_SYSTEMD_EMPTY)) { + assert_se(startswith(a, A)); + assert_se((x.st_mode & 01777) == 0700); + c = strjoina(a, "/tmp"); + assert_se(stat(c, &x) >= 0); + assert_se(S_ISDIR(x.st_mode)); + assert_se((x.st_mode & 01777) == 01777); + assert_se(rmdir(c) >= 0); + assert_se(rmdir(a) >= 0); + } - c = strjoina(a, "/tmp"); - d = strjoina(b, "/tmp"); - - assert_se(stat(c, &x) >= 0); - assert_se(stat(d, &y) >= 0); - - assert_se(S_ISDIR(x.st_mode)); - assert_se(S_ISDIR(y.st_mode)); - - assert_se((x.st_mode & 01777) == 01777); - assert_se((y.st_mode & 01777) == 01777); - - assert_se(rmdir(c) >= 0); - assert_se(rmdir(d) >= 0); - - assert_se(rmdir(a) >= 0); - assert_se(rmdir(b) >= 0); + if (!streq(b, RUN_SYSTEMD_EMPTY)) { + assert_se(startswith(b, B)); + assert_se((y.st_mode & 01777) == 0700); + d = strjoina(b, "/tmp"); + assert_se(stat(d, &y) >= 0); + assert_se(S_ISDIR(y.st_mode)); + assert_se((y.st_mode & 01777) == 01777); + assert_se(rmdir(d) >= 0); + assert_se(rmdir(b) >= 0); + } } static void test_netns(void) { @@ -180,6 +192,8 @@ int main(int argc, char *argv[]) { test_setup_logging(LOG_INFO); + test_namespace_cleanup_tmpdir(); + if (!have_namespaces()) { log_tests_skipped("Don't have namespace support"); return EXIT_TEST_SKIP;