mirror of
https://github.com/systemd/systemd.git
synced 2024-12-23 21:35:11 +03:00
Merge pull request #11927 from poettering/network-namespace-path
Add NetworkNamespacePath= to unit files
This commit is contained in:
commit
fb6692ed33
@ -1100,7 +1100,29 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
|
||||
|
||||
<para>Note that the implementation of this setting might be impossible (for example if network namespaces are
|
||||
not available), and the unit should be written in a way that does not solely rely on this setting for
|
||||
security.</para></listitem>
|
||||
security.</para>
|
||||
|
||||
<para>When this option is used on a socket unit any sockets bound on behalf of this unit will be
|
||||
bound within a private network namespace. This may be combined with
|
||||
<varname>JoinsNamespaceOf=</varname> to listen on sockets inside of network namespaces of other
|
||||
services.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>NetworkNamespacePath=</varname></term>
|
||||
|
||||
<listitem><para>Takes an absolute file system path refererring to a Linux network namespace
|
||||
pseudo-file (i.e. a file like <filename>/proc/$PID/ns/net</filename> or a bind mount or symlink to
|
||||
one). When set the invoked processes are added to the network namespace referenced by that path. The
|
||||
path has to point to a valid namespace file at the moment the processes are forked off. If this
|
||||
option is used <varname>PrivateNetwork=</varname> has no effect. If this option is used together with
|
||||
<varname>JoinsNamespaceOf=</varname> then it only has an effect if this unit is started before any of
|
||||
the listed units that have <varname>PrivateNetwork=</varname> or
|
||||
<varname>NetworkNamespacePath=</varname> configured, as otherwise the network namespace of those
|
||||
units is reused.</para>
|
||||
|
||||
<para>When this option is used on a socket unit any sockets bound on behalf of this unit will be
|
||||
bound within the specified network namespace.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
@ -728,23 +728,18 @@
|
||||
<varlistentry>
|
||||
<term><varname>JoinsNamespaceOf=</varname></term>
|
||||
|
||||
<listitem><para>For units that start processes (such as
|
||||
service units), lists one or more other units whose network
|
||||
and/or temporary file namespace to join. This only applies to
|
||||
unit types which support the
|
||||
<varname>PrivateNetwork=</varname> and
|
||||
<listitem><para>For units that start processes (such as service units), lists one or more other units
|
||||
whose network and/or temporary file namespace to join. This only applies to unit types which support
|
||||
the <varname>PrivateNetwork=</varname>, <varname>NetworkNamespacePath=</varname> and
|
||||
<varname>PrivateTmp=</varname> directives (see
|
||||
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>
|
||||
for details). If a unit that has this setting set is started,
|
||||
its processes will see the same <filename>/tmp</filename>,
|
||||
<filename>/var/tmp</filename> and network namespace as one
|
||||
listed unit that is started. If multiple listed units are
|
||||
already started, it is not defined which namespace is joined.
|
||||
Note that this setting only has an effect if
|
||||
<varname>PrivateNetwork=</varname> and/or
|
||||
<varname>PrivateTmp=</varname> is enabled for both the unit
|
||||
that joins the namespace and the unit whose namespace is
|
||||
joined.</para></listitem>
|
||||
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
|
||||
details). If a unit that has this setting set is started, its processes will see the same
|
||||
<filename>/tmp</filename>, <filename>/var/tmp</filename> and network namespace as one listed unit
|
||||
that is started. If multiple listed units are already started, it is not defined which namespace is
|
||||
joined. Note that this setting only has an effect if
|
||||
<varname>PrivateNetwork=</varname>/<varname>NetworkNamespacePath=</varname> and/or
|
||||
<varname>PrivateTmp=</varname> is enabled for both the unit that joins the namespace and the unit
|
||||
whose namespace is joined.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
|
@ -778,6 +778,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
|
||||
SD_BUS_PROPERTY("MountAPIVFS", "b", bus_property_get_bool, offsetof(ExecContext, mount_apivfs), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("ProtectHostname", "b", bus_property_get_bool, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
|
||||
|
||||
/* Obsolete/redundant properties: */
|
||||
SD_BUS_PROPERTY("Capabilities", "s", property_get_empty_string, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
|
||||
@ -1217,6 +1218,9 @@ int bus_exec_context_set_transient_property(
|
||||
if (streq(name, "MountFlags"))
|
||||
return bus_set_transient_mount_flags(u, name, &c->mount_flags, message, flags, error);
|
||||
|
||||
if (streq(name, "NetworkNamespacePath"))
|
||||
return bus_set_transient_path(u, name, &c->network_namespace_path, message, flags, error);
|
||||
|
||||
if (streq(name, "SupplementaryGroups")) {
|
||||
_cleanup_strv_free_ char **l = NULL;
|
||||
char **p;
|
||||
|
@ -3062,6 +3062,14 @@ static int exec_child(
|
||||
}
|
||||
}
|
||||
|
||||
if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
|
||||
r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_NETWORK;
|
||||
return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
|
||||
}
|
||||
}
|
||||
|
||||
r = setup_input(context, params, socket_fd, named_iofds);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_STDIN;
|
||||
@ -3272,13 +3280,17 @@ static int exec_child(
|
||||
}
|
||||
}
|
||||
|
||||
if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
|
||||
if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
|
||||
|
||||
if (ns_type_supported(NAMESPACE_NET)) {
|
||||
r = setup_netns(runtime->netns_storage_socket);
|
||||
if (r < 0) {
|
||||
*exit_status = EXIT_NETWORK;
|
||||
return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
|
||||
}
|
||||
} else if (context->network_namespace_path) {
|
||||
*exit_status = EXIT_NETWORK;
|
||||
return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
|
||||
} else
|
||||
log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
|
||||
}
|
||||
@ -3879,6 +3891,8 @@ void exec_context_done(ExecContext *c) {
|
||||
|
||||
c->stdin_data = mfree(c->stdin_data);
|
||||
c->stdin_data_size = 0;
|
||||
|
||||
c->network_namespace_path = mfree(c->network_namespace_path);
|
||||
}
|
||||
|
||||
int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
|
||||
@ -4556,6 +4570,11 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
|
||||
prefix, s);
|
||||
}
|
||||
|
||||
if (c->network_namespace_path)
|
||||
fprintf(f,
|
||||
"%sNetworkNamespacePath: %s\n",
|
||||
prefix, c->network_namespace_path);
|
||||
|
||||
if (c->syscall_errno > 0) {
|
||||
const char *errno_name;
|
||||
|
||||
@ -4855,18 +4874,23 @@ static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
|
||||
}
|
||||
|
||||
static void exec_runtime_freep(ExecRuntime **rt) {
|
||||
if (*rt)
|
||||
(void) exec_runtime_free(*rt, false);
|
||||
(void) exec_runtime_free(*rt, false);
|
||||
}
|
||||
|
||||
static int exec_runtime_allocate(ExecRuntime **rt) {
|
||||
assert(rt);
|
||||
static int exec_runtime_allocate(ExecRuntime **ret) {
|
||||
ExecRuntime *n;
|
||||
|
||||
*rt = new0(ExecRuntime, 1);
|
||||
if (!*rt)
|
||||
assert(ret);
|
||||
|
||||
n = new(ExecRuntime, 1);
|
||||
if (!n)
|
||||
return -ENOMEM;
|
||||
|
||||
(*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
|
||||
*n = (ExecRuntime) {
|
||||
.netns_storage_socket = { -1, -1 },
|
||||
};
|
||||
|
||||
*ret = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -4929,7 +4953,7 @@ static int exec_runtime_add(
|
||||
|
||||
static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
|
||||
_cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
|
||||
_cleanup_close_pair_ int netns_storage_socket[2] = {-1, -1};
|
||||
_cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
|
||||
int r;
|
||||
|
||||
assert(m);
|
||||
@ -4937,7 +4961,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
|
||||
assert(id);
|
||||
|
||||
/* It is not necessary to create ExecRuntime object. */
|
||||
if (!c->private_network && !c->private_tmp)
|
||||
if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
|
||||
return 0;
|
||||
|
||||
if (c->private_tmp) {
|
||||
@ -4946,7 +4970,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
|
||||
return r;
|
||||
}
|
||||
|
||||
if (c->private_network) {
|
||||
if (c->private_network || c->network_namespace_path) {
|
||||
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
|
||||
return -errno;
|
||||
}
|
||||
@ -4956,8 +4980,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
|
||||
return r;
|
||||
|
||||
/* Avoid cleanup */
|
||||
netns_storage_socket[0] = -1;
|
||||
netns_storage_socket[1] = -1;
|
||||
netns_storage_socket[0] = netns_storage_socket[1] = -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -279,6 +279,8 @@ struct ExecContext {
|
||||
bool nice_set:1;
|
||||
bool ioprio_set:1;
|
||||
bool cpu_sched_set:1;
|
||||
|
||||
char *network_namespace_path;
|
||||
};
|
||||
|
||||
static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {
|
||||
|
@ -114,6 +114,7 @@ $1.PrivateDevices, config_parse_bool, 0,
|
||||
$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
|
||||
$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules)
|
||||
$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
|
||||
$1.NetworkNamespacePath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.network_namespace_path)
|
||||
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
|
||||
$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
|
||||
$1.PrivateMounts, config_parse_bool, 0, offsetof($1, exec_context.private_mounts)
|
||||
|
@ -1661,14 +1661,14 @@ int setup_netns(int netns_storage_socket[static 2]) {
|
||||
|
||||
netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAIT);
|
||||
if (netns == -EAGAIN) {
|
||||
/* Nothing stored yet, so let's create a new namespace */
|
||||
/* Nothing stored yet, so let's create a new namespace. */
|
||||
|
||||
if (unshare(CLONE_NEWNET) < 0) {
|
||||
r = -errno;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
loopback_setup();
|
||||
(void) loopback_setup();
|
||||
|
||||
netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
|
||||
if (netns < 0) {
|
||||
@ -1703,6 +1703,59 @@ fail:
|
||||
return r;
|
||||
}
|
||||
|
||||
int open_netns_path(int netns_storage_socket[static 2], const char *path) {
|
||||
_cleanup_close_ int netns = -1;
|
||||
int q, r;
|
||||
|
||||
assert(netns_storage_socket);
|
||||
assert(netns_storage_socket[0] >= 0);
|
||||
assert(netns_storage_socket[1] >= 0);
|
||||
assert(path);
|
||||
|
||||
/* If the storage socket doesn't contain a netns fd yet, open one via the file system and store it in
|
||||
* it. This is supposed to be called ahead of time, i.e. before setup_netns() which will allocate a
|
||||
* new anonymous netns if needed. */
|
||||
|
||||
if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
|
||||
return -errno;
|
||||
|
||||
netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAIT);
|
||||
if (netns == -EAGAIN) {
|
||||
/* Nothing stored yet. Open the file from the file system. */
|
||||
|
||||
netns = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
|
||||
if (netns < 0) {
|
||||
r = -errno;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
r = fd_is_network_ns(netns);
|
||||
if (r == 0) { /* Not a netns? Refuse early. */
|
||||
r = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
if (r < 0 && r != -EUCLEAN) /* EUCLEAN: we don't know */
|
||||
goto fail;
|
||||
|
||||
r = 1;
|
||||
|
||||
} else if (netns < 0) {
|
||||
r = netns;
|
||||
goto fail;
|
||||
} else
|
||||
r = 0; /* Already allocated */
|
||||
|
||||
q = send_one_fd(netns_storage_socket[1], netns, MSG_DONTWAIT);
|
||||
if (q < 0) {
|
||||
r = q;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
fail:
|
||||
(void) lockf(netns_storage_socket[0], F_ULOCK, 0);
|
||||
return r;
|
||||
}
|
||||
|
||||
bool ns_type_supported(NamespaceType type) {
|
||||
const char *t, *ns_proc;
|
||||
|
||||
|
@ -93,6 +93,7 @@ int setup_tmp_dirs(
|
||||
char **var_tmp_dir);
|
||||
|
||||
int setup_netns(int netns_storage_socket[static 2]);
|
||||
int open_netns_path(int netns_storage_socket[static 2], const char *path);
|
||||
|
||||
const char* protect_home_to_string(ProtectHome p) _const_;
|
||||
ProtectHome protect_home_from_string(const char *s) _pure_;
|
||||
|
@ -1473,6 +1473,25 @@ static int socket_address_listen_do(
|
||||
log_unit_error_errno(u, error, fmt, strna(_t)); \
|
||||
})
|
||||
|
||||
static int fork_needed(const SocketAddress *address, const ExecContext *context) {
|
||||
int r;
|
||||
|
||||
assert(address);
|
||||
assert(context);
|
||||
|
||||
/* Check if we need to do the cgroup or netns stuff. If not we can do things much simpler. */
|
||||
|
||||
if (IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6)) {
|
||||
r = bpf_firewall_supported();
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r != BPF_FIREWALL_UNSUPPORTED) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
|
||||
return true;
|
||||
}
|
||||
|
||||
return context->private_network || context->network_namespace_path;
|
||||
}
|
||||
|
||||
static int socket_address_listen_in_cgroup(
|
||||
Socket *s,
|
||||
const SocketAddress *address,
|
||||
@ -1485,18 +1504,34 @@ static int socket_address_listen_in_cgroup(
|
||||
assert(s);
|
||||
assert(address);
|
||||
|
||||
/* This is a wrapper around socket_address_listen(), that forks off a helper process inside the socket's cgroup
|
||||
* in which the socket is actually created. This way we ensure the socket is actually properly attached to the
|
||||
* unit's cgroup for the purpose of BPF filtering and such. */
|
||||
/* This is a wrapper around socket_address_listen(), that forks off a helper process inside the
|
||||
* socket's cgroup and network namespace in which the socket is actually created. This way we ensure
|
||||
* the socket is actually properly attached to the unit's cgroup for the purpose of BPF filtering and
|
||||
* such. */
|
||||
|
||||
if (!IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6))
|
||||
goto shortcut; /* BPF filtering only applies to IPv4 + IPv6, shortcut things for other protocols */
|
||||
|
||||
r = bpf_firewall_supported();
|
||||
r = fork_needed(address, &s->exec_context);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r == BPF_FIREWALL_UNSUPPORTED) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
|
||||
goto shortcut;
|
||||
if (r == 0) {
|
||||
/* Shortcut things... */
|
||||
fd = socket_address_listen_do(s, address, label);
|
||||
if (fd < 0)
|
||||
return log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
r = unit_setup_exec_runtime(UNIT(s));
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(UNIT(s), r, "Failed acquire runtime: %m");
|
||||
|
||||
if (s->exec_context.network_namespace_path &&
|
||||
s->exec_runtime &&
|
||||
s->exec_runtime->netns_storage_socket[0] >= 0) {
|
||||
r = open_netns_path(s->exec_runtime->netns_storage_socket, s->exec_context.network_namespace_path);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(UNIT(s), r, "Failed to open network namespace path %s: %m", s->exec_context.network_namespace_path);
|
||||
}
|
||||
|
||||
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
|
||||
return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
|
||||
@ -1509,6 +1544,23 @@ static int socket_address_listen_in_cgroup(
|
||||
|
||||
pair[0] = safe_close(pair[0]);
|
||||
|
||||
if ((s->exec_context.private_network || s->exec_context.network_namespace_path) &&
|
||||
s->exec_runtime &&
|
||||
s->exec_runtime->netns_storage_socket[0] >= 0) {
|
||||
|
||||
if (ns_type_supported(NAMESPACE_NET)) {
|
||||
r = setup_netns(s->exec_runtime->netns_storage_socket);
|
||||
if (r < 0) {
|
||||
log_unit_error_errno(UNIT(s), r, "Failed to join network namespace: %m");
|
||||
_exit(EXIT_NETWORK);
|
||||
}
|
||||
} else if (s->exec_context.network_namespace_path) {
|
||||
log_unit_error(UNIT(s), "Network namespace path configured but network namespaces not supported.");
|
||||
_exit(EXIT_NETWORK);
|
||||
} else
|
||||
log_unit_warning(UNIT(s), "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
|
||||
}
|
||||
|
||||
fd = socket_address_listen_do(s, address, label);
|
||||
if (fd < 0) {
|
||||
log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
|
||||
@ -1538,13 +1590,6 @@ static int socket_address_listen_in_cgroup(
|
||||
return log_address_error_errno(UNIT(s), address, fd, "Failed to receive listening socket (%s): %m");
|
||||
|
||||
return fd;
|
||||
|
||||
shortcut:
|
||||
fd = socket_address_listen_do(s, address, label);
|
||||
if (fd < 0)
|
||||
return log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(Socket *, socket_close_fds);
|
||||
|
@ -744,7 +744,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
|
||||
"UtmpIdentifier", "UtmpMode", "PAMName", "TTYPath",
|
||||
"WorkingDirectory", "RootDirectory", "SyslogIdentifier",
|
||||
"ProtectSystem", "ProtectHome", "SELinuxContext", "RootImage",
|
||||
"RuntimeDirectoryPreserve", "Personality", "KeyringMode"))
|
||||
"RuntimeDirectoryPreserve", "Personality", "KeyringMode", "NetworkNamespacePath"))
|
||||
|
||||
return bus_append_string(m, field, eq);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user