diff --git a/NEWS b/NEWS index bdac561bc0b..c7ee3dbb40e 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,15 @@ systemd System and Service Manager +CHANGES WITH 248: + + * The MountAPIVFS= service file setting now additionally mounts a tmpfs + on /run/ if it is not already a mount point. A writable /run/ has always + been a requirement for a functioning system, but this was not + guaranteed when using a read-only image. + Users can always specify BindPaths= or InaccessiblePaths= as overrides, + and they will take precedence. If the host's root mount point is used, + there is no change in behaviour. + CHANGES WITH 247: * KERNEL API INCOMPATIBILITY: Linux 4.14 introduced two new uevents diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 7328f9b9659..568839e0d9e 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -275,11 +275,12 @@ MountAPIVFS= Takes a boolean argument. If on, a private mount namespace for the unit's processes is created - and the API file systems /proc/, /sys/, and /dev/ - are mounted inside of it, unless they are already mounted. Note that this option has no effect unless used in - conjunction with RootDirectory=/RootImage= as these three mounts are + and the API file systems /proc/, /sys/, /dev/ and + /run/ (as an empty tmpfs) are mounted inside of it, unless they are + already mounted. Note that this option has no effect unless used in conjunction with + RootDirectory=/RootImage= as these four mounts are generally mounted in the host anyway, and unless the root directory is changed, the private mount namespace - will be a 1:1 copy of the host's, and include these three mounts. Note that the /dev/ file + will be a 1:1 copy of the host's, and include these four mounts. Note that the /dev/ file system of the host is bind mounted if this option is used without PrivateDevices=. To run the service with a private, minimal version of /dev/, combine this option with PrivateDevices=. diff --git a/src/core/namespace.c b/src/core/namespace.c index e32336a7ff0..73a8fa73a4d 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -51,6 +51,7 @@ typedef enum MountMode { EMPTY_DIR, SYSFS, PROCFS, + RUN, READONLY, READWRITE, TMPFS, @@ -76,12 +77,13 @@ typedef struct MountEntry { LIST_HEAD(MountOptions, image_options); } MountEntry; -/* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted +/* If MountAPIVFS= is used, let's mount /sys, /proc, /dev and /run into the it, but only as a fallback if the user hasn't mounted * something there already. These mounts are hence overridden by any other explicitly configured mounts. */ static const MountEntry apivfs_table[] = { { "/proc", PROCFS, false }, { "/dev", BIND_DEV, false }, { "/sys", SYSFS, false }, + { "/run", RUN, false, .options_const = "mode=755" TMPFS_LIMITS_RUN, .flags = MS_NOSUID|MS_NODEV|MS_STRICTATIME }, }; /* ProtectKernelTunables= option and the related filesystem APIs */ @@ -945,6 +947,20 @@ static int mount_tmpfs(const MountEntry *m) { return 1; } +static int mount_run(const MountEntry *m) { + int r; + + assert(m); + + r = path_is_mount_point(mount_entry_path(m), NULL, 0); + if (r < 0 && r != -ENOENT) + return log_debug_errno(r, "Unable to determine whether /run is already mounted: %m"); + if (r > 0) /* make this a NOP if /run is already a mount point */ + return 0; + + return mount_tmpfs(m); +} + static int mount_images(const MountEntry *m) { _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL; @@ -1170,6 +1186,9 @@ static int apply_mount( case PROCFS: return mount_procfs(m, ns_info); + case RUN: + return mount_run(m); + case MOUNT_IMAGES: return mount_images(m);