1
0
mirror of https://github.com/systemd/systemd.git synced 2025-03-10 16:58:28 +03:00
systemd/units/systemd-mountfsd.service.in
Lennart Poettering d6f8e1ae87 mntfsd: add api to mount dirs for containers
systemd-mountfsd so far provided a MountImage() API call for mounting a
disk image and returning a set of mount fds. This complements the API
with a new MountDirectory() API call, that operates on a directory
instead of an image file. Now, what makes this interesting is that it
applies an idmapping from the foreign UID range to the provided target
userns – and in which case unpriveleged operation is allowed (well,
under some conditions: in particular the client must own a parent dir of
the provided path).

This allows container managers to run fully unprivileged from
directories – as long as those directories are owned by the foreign UID
range. Basic operation is like this:

1. acquire a transient userns from systemd-nsresourced with 64K users
2. ask systemd-mountfsd for an idmapped mount of the container dir
   matching that userns
3. join the userns and bind the mount fd as root.

Note that we have to drop various sandboxing knobs from the mountfsd
service file for this to work, since the kernel's security checks that
try to ensure than an obstructed /proc/ cannot be circumvented via
mounting a new procfs will otherwise prohibit mountfsd to duplicate the
mounts properly.
2025-01-23 21:48:02 +01:00

42 lines
1.3 KiB
SYSTEMD

# SPDX-License-Identifier: LGPL-2.1-or-later
#
# This file is part of systemd.
#
# systemd is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
[Unit]
Description=DDI File System Mounter
Documentation=man:systemd-mountfsd.service(8)
Wants=modprobe@loop.service modprobe@dm_mod.service
Requires=systemd-mountfsd.socket
After=systemd-mountfsd.socket modprobe@loop.service modprobe@dm_mod.service
Conflicts=shutdown.target
Before=sysinit.target shutdown.target
DefaultDependencies=no
[Service]
#CapabilityBoundingSet=CAP_DAC_READ_SEARCH CAP_SYS_RESOURCE CAP_BPF CAP_PERFMON CAP_SETGID CAP_SETUID
ExecStart={{LIBEXECDIR}}/systemd-mountfsd
IPAddressDeny=any
LimitNOFILE={{HIGH_RLIMIT_NOFILE}}
LockPersonality=yes
MemoryDenyWriteExecute=yes
NoNewPrivileges=yes
ProtectHostname=yes
RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6
RestrictRealtime=yes
RestrictSUIDSGID=yes
SystemCallArchitectures=native
SystemCallErrorNumber=EPERM
SystemCallFilter=@system-service @mount
Type=notify
NotifyAccess=all
FileDescriptorStoreMax=4096
{{SERVICE_WATCHDOG}}
[Install]
Also=systemd-mountfsd.socket