diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml
index 19c7b26bcee..94c8e7a2dd3 100644
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -1100,7 +1100,29 @@ BindReadOnlyPaths=/var/lib/systemd
Note that the implementation of this setting might be impossible (for example if network namespaces are
not available), and the unit should be written in a way that does not solely rely on this setting for
- security.
+ security.
+
+ When this option is used on a socket unit any sockets bound on behalf of this unit will be
+ bound within a private network namespace. This may be combined with
+ JoinsNamespaceOf= to listen on sockets inside of network namespaces of other
+ services.
+
+
+
+ NetworkNamespacePath=
+
+ Takes an absolute file system path refererring to a Linux network namespace
+ pseudo-file (i.e. a file like /proc/$PID/ns/net or a bind mount or symlink to
+ one). When set the invoked processes are added to the network namespace referenced by that path. The
+ path has to point to a valid namespace file at the moment the processes are forked off. If this
+ option is used PrivateNetwork= has no effect. If this option is used together with
+ JoinsNamespaceOf= then it only has an effect if this unit is started before any of
+ the listed units that have PrivateNetwork= or
+ NetworkNamespacePath= configured, as otherwise the network namespace of those
+ units is reused.
+
+ When this option is used on a socket unit any sockets bound on behalf of this unit will be
+ bound within the specified network namespace.
diff --git a/man/systemd.unit.xml b/man/systemd.unit.xml
index 82c63e1609d..14418c359f4 100644
--- a/man/systemd.unit.xml
+++ b/man/systemd.unit.xml
@@ -728,23 +728,18 @@
JoinsNamespaceOf=
- For units that start processes (such as
- service units), lists one or more other units whose network
- and/or temporary file namespace to join. This only applies to
- unit types which support the
- PrivateNetwork= and
+ For units that start processes (such as service units), lists one or more other units
+ whose network and/or temporary file namespace to join. This only applies to unit types which support
+ the PrivateNetwork=, NetworkNamespacePath= and
PrivateTmp= directives (see
- systemd.exec5
- for details). If a unit that has this setting set is started,
- its processes will see the same /tmp,
- /var/tmp and network namespace as one
- listed unit that is started. If multiple listed units are
- already started, it is not defined which namespace is joined.
- Note that this setting only has an effect if
- PrivateNetwork= and/or
- PrivateTmp= is enabled for both the unit
- that joins the namespace and the unit whose namespace is
- joined.
+ systemd.exec5 for
+ details). If a unit that has this setting set is started, its processes will see the same
+ /tmp, /var/tmp and network namespace as one listed unit
+ that is started. If multiple listed units are already started, it is not defined which namespace is
+ joined. Note that this setting only has an effect if
+ PrivateNetwork=/NetworkNamespacePath= and/or
+ PrivateTmp= is enabled for both the unit that joins the namespace and the unit
+ whose namespace is joined.
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index f22bf4a371c..0b28643e791 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -778,6 +778,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("MountAPIVFS", "b", bus_property_get_bool, offsetof(ExecContext, mount_apivfs), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHostname", "b", bus_property_get_bool, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
/* Obsolete/redundant properties: */
SD_BUS_PROPERTY("Capabilities", "s", property_get_empty_string, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
@@ -1217,6 +1218,9 @@ int bus_exec_context_set_transient_property(
if (streq(name, "MountFlags"))
return bus_set_transient_mount_flags(u, name, &c->mount_flags, message, flags, error);
+ if (streq(name, "NetworkNamespacePath"))
+ return bus_set_transient_path(u, name, &c->network_namespace_path, message, flags, error);
+
if (streq(name, "SupplementaryGroups")) {
_cleanup_strv_free_ char **l = NULL;
char **p;
diff --git a/src/core/execute.c b/src/core/execute.c
index 3f784dd1797..c6fd82bbf3e 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -3062,6 +3062,14 @@ static int exec_child(
}
}
+ if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
+ r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
+ if (r < 0) {
+ *exit_status = EXIT_NETWORK;
+ return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
+ }
+ }
+
r = setup_input(context, params, socket_fd, named_iofds);
if (r < 0) {
*exit_status = EXIT_STDIN;
@@ -3272,13 +3280,17 @@ static int exec_child(
}
}
- if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
+ if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
+
if (ns_type_supported(NAMESPACE_NET)) {
r = setup_netns(runtime->netns_storage_socket);
if (r < 0) {
*exit_status = EXIT_NETWORK;
return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
}
+ } else if (context->network_namespace_path) {
+ *exit_status = EXIT_NETWORK;
+ return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
} else
log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
}
@@ -3879,6 +3891,8 @@ void exec_context_done(ExecContext *c) {
c->stdin_data = mfree(c->stdin_data);
c->stdin_data_size = 0;
+
+ c->network_namespace_path = mfree(c->network_namespace_path);
}
int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
@@ -4556,6 +4570,11 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
prefix, s);
}
+ if (c->network_namespace_path)
+ fprintf(f,
+ "%sNetworkNamespacePath: %s\n",
+ prefix, c->network_namespace_path);
+
if (c->syscall_errno > 0) {
const char *errno_name;
@@ -4855,18 +4874,23 @@ static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
}
static void exec_runtime_freep(ExecRuntime **rt) {
- if (*rt)
- (void) exec_runtime_free(*rt, false);
+ (void) exec_runtime_free(*rt, false);
}
-static int exec_runtime_allocate(ExecRuntime **rt) {
- assert(rt);
+static int exec_runtime_allocate(ExecRuntime **ret) {
+ ExecRuntime *n;
- *rt = new0(ExecRuntime, 1);
- if (!*rt)
+ assert(ret);
+
+ n = new(ExecRuntime, 1);
+ if (!n)
return -ENOMEM;
- (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
+ *n = (ExecRuntime) {
+ .netns_storage_socket = { -1, -1 },
+ };
+
+ *ret = n;
return 0;
}
@@ -4929,7 +4953,7 @@ static int exec_runtime_add(
static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
_cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
- _cleanup_close_pair_ int netns_storage_socket[2] = {-1, -1};
+ _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
int r;
assert(m);
@@ -4937,7 +4961,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
assert(id);
/* It is not necessary to create ExecRuntime object. */
- if (!c->private_network && !c->private_tmp)
+ if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
return 0;
if (c->private_tmp) {
@@ -4946,7 +4970,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
return r;
}
- if (c->private_network) {
+ if (c->private_network || c->network_namespace_path) {
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
return -errno;
}
@@ -4956,8 +4980,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
return r;
/* Avoid cleanup */
- netns_storage_socket[0] = -1;
- netns_storage_socket[1] = -1;
+ netns_storage_socket[0] = netns_storage_socket[1] = -1;
return 1;
}
diff --git a/src/core/execute.h b/src/core/execute.h
index 4b5b2d98cef..df6dd9f3886 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -279,6 +279,8 @@ struct ExecContext {
bool nice_set:1;
bool ioprio_set:1;
bool cpu_sched_set:1;
+
+ char *network_namespace_path;
};
static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index 2ac822ef4b2..c7c097d0a4a 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -114,6 +114,7 @@ $1.PrivateDevices, config_parse_bool, 0,
$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules)
$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
+$1.NetworkNamespacePath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.network_namespace_path)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
$1.PrivateMounts, config_parse_bool, 0, offsetof($1, exec_context.private_mounts)
diff --git a/src/core/namespace.c b/src/core/namespace.c
index d482c40c240..02ac49d02cf 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -1661,14 +1661,14 @@ int setup_netns(int netns_storage_socket[static 2]) {
netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAIT);
if (netns == -EAGAIN) {
- /* Nothing stored yet, so let's create a new namespace */
+ /* Nothing stored yet, so let's create a new namespace. */
if (unshare(CLONE_NEWNET) < 0) {
r = -errno;
goto fail;
}
- loopback_setup();
+ (void) loopback_setup();
netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
if (netns < 0) {
@@ -1703,6 +1703,59 @@ fail:
return r;
}
+int open_netns_path(int netns_storage_socket[static 2], const char *path) {
+ _cleanup_close_ int netns = -1;
+ int q, r;
+
+ assert(netns_storage_socket);
+ assert(netns_storage_socket[0] >= 0);
+ assert(netns_storage_socket[1] >= 0);
+ assert(path);
+
+ /* If the storage socket doesn't contain a netns fd yet, open one via the file system and store it in
+ * it. This is supposed to be called ahead of time, i.e. before setup_netns() which will allocate a
+ * new anonymous netns if needed. */
+
+ if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
+ return -errno;
+
+ netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAIT);
+ if (netns == -EAGAIN) {
+ /* Nothing stored yet. Open the file from the file system. */
+
+ netns = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (netns < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ r = fd_is_network_ns(netns);
+ if (r == 0) { /* Not a netns? Refuse early. */
+ r = -EINVAL;
+ goto fail;
+ }
+ if (r < 0 && r != -EUCLEAN) /* EUCLEAN: we don't know */
+ goto fail;
+
+ r = 1;
+
+ } else if (netns < 0) {
+ r = netns;
+ goto fail;
+ } else
+ r = 0; /* Already allocated */
+
+ q = send_one_fd(netns_storage_socket[1], netns, MSG_DONTWAIT);
+ if (q < 0) {
+ r = q;
+ goto fail;
+ }
+
+fail:
+ (void) lockf(netns_storage_socket[0], F_ULOCK, 0);
+ return r;
+}
+
bool ns_type_supported(NamespaceType type) {
const char *t, *ns_proc;
diff --git a/src/core/namespace.h b/src/core/namespace.h
index ab3983f790c..cd1e8b77bb2 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -93,6 +93,7 @@ int setup_tmp_dirs(
char **var_tmp_dir);
int setup_netns(int netns_storage_socket[static 2]);
+int open_netns_path(int netns_storage_socket[static 2], const char *path);
const char* protect_home_to_string(ProtectHome p) _const_;
ProtectHome protect_home_from_string(const char *s) _pure_;
diff --git a/src/core/socket.c b/src/core/socket.c
index af95e9027e1..3b60914c867 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -1473,6 +1473,25 @@ static int socket_address_listen_do(
log_unit_error_errno(u, error, fmt, strna(_t)); \
})
+static int fork_needed(const SocketAddress *address, const ExecContext *context) {
+ int r;
+
+ assert(address);
+ assert(context);
+
+ /* Check if we need to do the cgroup or netns stuff. If not we can do things much simpler. */
+
+ if (IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6)) {
+ r = bpf_firewall_supported();
+ if (r < 0)
+ return r;
+ if (r != BPF_FIREWALL_UNSUPPORTED) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
+ return true;
+ }
+
+ return context->private_network || context->network_namespace_path;
+}
+
static int socket_address_listen_in_cgroup(
Socket *s,
const SocketAddress *address,
@@ -1485,18 +1504,34 @@ static int socket_address_listen_in_cgroup(
assert(s);
assert(address);
- /* This is a wrapper around socket_address_listen(), that forks off a helper process inside the socket's cgroup
- * in which the socket is actually created. This way we ensure the socket is actually properly attached to the
- * unit's cgroup for the purpose of BPF filtering and such. */
+ /* This is a wrapper around socket_address_listen(), that forks off a helper process inside the
+ * socket's cgroup and network namespace in which the socket is actually created. This way we ensure
+ * the socket is actually properly attached to the unit's cgroup for the purpose of BPF filtering and
+ * such. */
- if (!IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6))
- goto shortcut; /* BPF filtering only applies to IPv4 + IPv6, shortcut things for other protocols */
-
- r = bpf_firewall_supported();
+ r = fork_needed(address, &s->exec_context);
if (r < 0)
return r;
- if (r == BPF_FIREWALL_UNSUPPORTED) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
- goto shortcut;
+ if (r == 0) {
+ /* Shortcut things... */
+ fd = socket_address_listen_do(s, address, label);
+ if (fd < 0)
+ return log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
+
+ return fd;
+ }
+
+ r = unit_setup_exec_runtime(UNIT(s));
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed acquire runtime: %m");
+
+ if (s->exec_context.network_namespace_path &&
+ s->exec_runtime &&
+ s->exec_runtime->netns_storage_socket[0] >= 0) {
+ r = open_netns_path(s->exec_runtime->netns_storage_socket, s->exec_context.network_namespace_path);
+ if (r < 0)
+ return log_unit_error_errno(UNIT(s), r, "Failed to open network namespace path %s: %m", s->exec_context.network_namespace_path);
+ }
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
@@ -1509,6 +1544,23 @@ static int socket_address_listen_in_cgroup(
pair[0] = safe_close(pair[0]);
+ if ((s->exec_context.private_network || s->exec_context.network_namespace_path) &&
+ s->exec_runtime &&
+ s->exec_runtime->netns_storage_socket[0] >= 0) {
+
+ if (ns_type_supported(NAMESPACE_NET)) {
+ r = setup_netns(s->exec_runtime->netns_storage_socket);
+ if (r < 0) {
+ log_unit_error_errno(UNIT(s), r, "Failed to join network namespace: %m");
+ _exit(EXIT_NETWORK);
+ }
+ } else if (s->exec_context.network_namespace_path) {
+ log_unit_error(UNIT(s), "Network namespace path configured but network namespaces not supported.");
+ _exit(EXIT_NETWORK);
+ } else
+ log_unit_warning(UNIT(s), "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
+ }
+
fd = socket_address_listen_do(s, address, label);
if (fd < 0) {
log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
@@ -1538,13 +1590,6 @@ static int socket_address_listen_in_cgroup(
return log_address_error_errno(UNIT(s), address, fd, "Failed to receive listening socket (%s): %m");
return fd;
-
-shortcut:
- fd = socket_address_listen_do(s, address, label);
- if (fd < 0)
- return log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
-
- return fd;
}
DEFINE_TRIVIAL_CLEANUP_FUNC(Socket *, socket_close_fds);
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index dff87f565ed..3ea1bd29c90 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -744,7 +744,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
"UtmpIdentifier", "UtmpMode", "PAMName", "TTYPath",
"WorkingDirectory", "RootDirectory", "SyslogIdentifier",
"ProtectSystem", "ProtectHome", "SELinuxContext", "RootImage",
- "RuntimeDirectoryPreserve", "Personality", "KeyringMode"))
+ "RuntimeDirectoryPreserve", "Personality", "KeyringMode", "NetworkNamespacePath"))
return bus_append_string(m, field, eq);