mirror of
https://github.com/systemd/systemd.git
synced 2025-03-10 16:58:28 +03:00
systemd-mountfsd so far provided a MountImage() API call for mounting a disk image and returning a set of mount fds. This complements the API with a new MountDirectory() API call, that operates on a directory instead of an image file. Now, what makes this interesting is that it applies an idmapping from the foreign UID range to the provided target userns – and in which case unpriveleged operation is allowed (well, under some conditions: in particular the client must own a parent dir of the provided path). This allows container managers to run fully unprivileged from directories – as long as those directories are owned by the foreign UID range. Basic operation is like this: 1. acquire a transient userns from systemd-nsresourced with 64K users 2. ask systemd-mountfsd for an idmapped mount of the container dir matching that userns 3. join the userns and bind the mount fd as root. Note that we have to drop various sandboxing knobs from the mountfsd service file for this to work, since the kernel's security checks that try to ensure than an obstructed /proc/ cannot be circumvented via mounting a new procfs will otherwise prohibit mountfsd to duplicate the mounts properly.
42 lines
1.3 KiB
SYSTEMD
42 lines
1.3 KiB
SYSTEMD
# SPDX-License-Identifier: LGPL-2.1-or-later
|
|
#
|
|
# This file is part of systemd.
|
|
#
|
|
# systemd is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU Lesser General Public License as published by
|
|
# the Free Software Foundation; either version 2.1 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
[Unit]
|
|
Description=DDI File System Mounter
|
|
Documentation=man:systemd-mountfsd.service(8)
|
|
Wants=modprobe@loop.service modprobe@dm_mod.service
|
|
Requires=systemd-mountfsd.socket
|
|
After=systemd-mountfsd.socket modprobe@loop.service modprobe@dm_mod.service
|
|
Conflicts=shutdown.target
|
|
Before=sysinit.target shutdown.target
|
|
DefaultDependencies=no
|
|
|
|
[Service]
|
|
#CapabilityBoundingSet=CAP_DAC_READ_SEARCH CAP_SYS_RESOURCE CAP_BPF CAP_PERFMON CAP_SETGID CAP_SETUID
|
|
ExecStart={{LIBEXECDIR}}/systemd-mountfsd
|
|
IPAddressDeny=any
|
|
LimitNOFILE={{HIGH_RLIMIT_NOFILE}}
|
|
LockPersonality=yes
|
|
MemoryDenyWriteExecute=yes
|
|
NoNewPrivileges=yes
|
|
ProtectHostname=yes
|
|
RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6
|
|
RestrictRealtime=yes
|
|
RestrictSUIDSGID=yes
|
|
SystemCallArchitectures=native
|
|
SystemCallErrorNumber=EPERM
|
|
SystemCallFilter=@system-service @mount
|
|
Type=notify
|
|
NotifyAccess=all
|
|
FileDescriptorStoreMax=4096
|
|
{{SERVICE_WATCHDOG}}
|
|
|
|
[Install]
|
|
Also=systemd-mountfsd.socket
|