diff --git a/NEWS b/NEWS
index bdac561bc0b..c7ee3dbb40e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,15 @@
systemd System and Service Manager
+CHANGES WITH 248:
+
+ * The MountAPIVFS= service file setting now additionally mounts a tmpfs
+ on /run/ if it is not already a mount point. A writable /run/ has always
+ been a requirement for a functioning system, but this was not
+ guaranteed when using a read-only image.
+ Users can always specify BindPaths= or InaccessiblePaths= as overrides,
+ and they will take precedence. If the host's root mount point is used,
+ there is no change in behaviour.
+
CHANGES WITH 247:
* KERNEL API INCOMPATIBILITY: Linux 4.14 introduced two new uevents
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 7328f9b9659..568839e0d9e 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -275,11 +275,12 @@
MountAPIVFS=
Takes a boolean argument. If on, a private mount namespace for the unit's processes is created
- and the API file systems /proc/, /sys/, and /dev/
- are mounted inside of it, unless they are already mounted. Note that this option has no effect unless used in
- conjunction with RootDirectory=/RootImage= as these three mounts are
+ and the API file systems /proc/, /sys/, /dev/ and
+ /run/ (as an empty tmpfs) are mounted inside of it, unless they are
+ already mounted. Note that this option has no effect unless used in conjunction with
+ RootDirectory=/RootImage= as these four mounts are
generally mounted in the host anyway, and unless the root directory is changed, the private mount namespace
- will be a 1:1 copy of the host's, and include these three mounts. Note that the /dev/ file
+ will be a 1:1 copy of the host's, and include these four mounts. Note that the /dev/ file
system of the host is bind mounted if this option is used without PrivateDevices=. To run
the service with a private, minimal version of /dev/, combine this option with
PrivateDevices=.
diff --git a/src/core/namespace.c b/src/core/namespace.c
index e32336a7ff0..73a8fa73a4d 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -51,6 +51,7 @@ typedef enum MountMode {
EMPTY_DIR,
SYSFS,
PROCFS,
+ RUN,
READONLY,
READWRITE,
TMPFS,
@@ -76,12 +77,13 @@ typedef struct MountEntry {
LIST_HEAD(MountOptions, image_options);
} MountEntry;
-/* If MountAPIVFS= is used, let's mount /sys and /proc into the it, but only as a fallback if the user hasn't mounted
+/* If MountAPIVFS= is used, let's mount /sys, /proc, /dev and /run into the it, but only as a fallback if the user hasn't mounted
* something there already. These mounts are hence overridden by any other explicitly configured mounts. */
static const MountEntry apivfs_table[] = {
{ "/proc", PROCFS, false },
{ "/dev", BIND_DEV, false },
{ "/sys", SYSFS, false },
+ { "/run", RUN, false, .options_const = "mode=755" TMPFS_LIMITS_RUN, .flags = MS_NOSUID|MS_NODEV|MS_STRICTATIME },
};
/* ProtectKernelTunables= option and the related filesystem APIs */
@@ -945,6 +947,20 @@ static int mount_tmpfs(const MountEntry *m) {
return 1;
}
+static int mount_run(const MountEntry *m) {
+ int r;
+
+ assert(m);
+
+ r = path_is_mount_point(mount_entry_path(m), NULL, 0);
+ if (r < 0 && r != -ENOENT)
+ return log_debug_errno(r, "Unable to determine whether /run is already mounted: %m");
+ if (r > 0) /* make this a NOP if /run is already a mount point */
+ return 0;
+
+ return mount_tmpfs(m);
+}
+
static int mount_images(const MountEntry *m) {
_cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
_cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL;
@@ -1170,6 +1186,9 @@ static int apply_mount(
case PROCFS:
return mount_procfs(m, ns_info);
+ case RUN:
+ return mount_run(m);
+
case MOUNT_IMAGES:
return mount_images(m);