mirror of
https://github.com/systemd/systemd.git
synced 2024-10-30 23:21:22 +03:00
nspawn: add support for executing OCI runtime bundles with nspawn
This is a pretty large patch, and adds support for OCI runtime bundles to nspawn. A new switch --oci-bundle= is added that takes a path to an OCI bundle. The JSON file included therein is read similar to a .nspawn settings files, however with a different feature set. Implementation-wise this mostly extends the pre-existing Settings object to carry additional properties for OCI. However, OCI supports some concepts .nspawn files did not support yet, which this patch also adds: 1. Support for "masking" files and directories. This functionatly is now also available via the new --inaccesible= cmdline command, and Inaccessible= in .nspawn files. 2. Support for mounting arbitrary file systems. (not exposed through nspawn cmdline nor .nspawn files, because probably not a good idea) 3. Ability to configure the console settings for a container. This functionality is now also available on the nspawn cmdline in the new --console= switch (not added to .nspawn for now, as it is something specific to the invocation really, not a property of the container) 4. Console width/height configuration. Not exposed through .nspawn/cmdline, but this may be controlled through $COLUMNS and $LINES like in most other UNIX tools. 5. UID/GID configuration by raw numbers. (not exposed in .nspawn and on the cmdline, since containers likely have different user tables, and the existing --user= switch appears to be the better option) 6. OCI hook commands (no exposed in .nspawn/cmdline, as very specific to OCI) 7. Creation of additional devices nodes in /dev. Most likely not a good idea, hence not exposed in .nspawn/cmdline. There's already --bind= to achieve the same, which is the better alternative. 8. Explicit syscall filters. This is not a good idea, due to the skewed arch support, hence not exposed through .nspawn/cmdline. 9. Configuration of some sysctls on a whitelist. Questionnable, not supported in .nspawn/cmdline for now. 10. Configuration of all 5 types of capabilities. Not a useful concept, since the kernel will reduce the caps on execve() anyway. Not exposed through .nspawn/cmdline as this is not very useful hence. Note that this only implements the OCI runtime logic itself. It does not provide a runc-compatible command line tool. This is left for a later PR. Only with that in place tools such as "buildah" can use the OCI support in nspawn as drop-in replacement. Currently still missing is OCI hook support, but it's already parsed and everything, and should be easy to add. Other than that it's OCI is implemented pretty comprehensively. There's a list of incompatibilities in the nspawn-oci.c file. In a later PR I'd like to convert this into proper markdown and add it to the documentation directory.
This commit is contained in:
parent
5ef4cb7ad0
commit
de40a3037a
@ -10,6 +10,8 @@ libnspawn_core_sources = files('''
|
||||
nspawn-mount.h
|
||||
nspawn-network.c
|
||||
nspawn-network.h
|
||||
nspawn-oci.c
|
||||
nspawn-oci.h
|
||||
nspawn-patch-uid.c
|
||||
nspawn-patch-uid.h
|
||||
nspawn-register.c
|
||||
|
@ -62,6 +62,7 @@ Files.Volatile, config_parse_volatile_mode, 0, of
|
||||
Files.Bind, config_parse_bind, 0, 0
|
||||
Files.BindReadOnly, config_parse_bind, 1, 0
|
||||
Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0
|
||||
Files.Inaccessible, config_parse_inaccessible, 0, 0
|
||||
Files.Overlay, config_parse_overlay, 0, 0
|
||||
Files.OverlayReadOnly, config_parse_overlay, 1, 0
|
||||
Files.PrivateUsersChown, config_parse_tristate, 0, offsetof(Settings, userns_chown)
|
||||
|
@ -65,6 +65,7 @@ void custom_mount_free_all(CustomMount *l, size_t n) {
|
||||
}
|
||||
|
||||
strv_free(m->lower);
|
||||
free(m->type_argument);
|
||||
}
|
||||
|
||||
free(l);
|
||||
@ -116,6 +117,13 @@ int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
|
||||
for (i = 0; i < n; i++) {
|
||||
CustomMount *m = l + i;
|
||||
|
||||
/* /proc we mount in the inner child, i.e. when we acquired CLONE_NEWPID. All other mounts we mount
|
||||
* already in the outer child, so that the mounts are already established before CLONE_NEWPID and in
|
||||
* particular CLONE_NEWUSER. This also means any custom mounts below /proc also need to be mounted in
|
||||
* the inner child, not the outer one. Determine this here. */
|
||||
m->in_userns = path_startswith(m->destination, "/proc");
|
||||
|
||||
if (m->type == CUSTOM_MOUNT_BIND) {
|
||||
if (m->source) {
|
||||
char *s;
|
||||
|
||||
@ -143,6 +151,7 @@ int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
|
||||
if (mkdir(m->source, 0755) < 0)
|
||||
return log_error_errno(errno, "Failed to create %s: %m", m->source);
|
||||
}
|
||||
}
|
||||
|
||||
if (m->type == CUSTOM_MOUNT_OVERLAY) {
|
||||
char **j;
|
||||
@ -223,6 +232,7 @@ int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only)
|
||||
m->destination = TAKE_PTR(destination);
|
||||
m->read_only = read_only;
|
||||
m->options = TAKE_PTR(opts);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -327,6 +337,29 @@ int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_onl
|
||||
return 0;
|
||||
}
|
||||
|
||||
int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s) {
|
||||
_cleanup_free_ char *path = NULL;
|
||||
CustomMount *m;
|
||||
|
||||
assert(l);
|
||||
assert(n);
|
||||
assert(s);
|
||||
|
||||
if (!path_is_absolute(s))
|
||||
return -EINVAL;
|
||||
|
||||
path = strdup(s);
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
m = custom_mount_add(l, n, CUSTOM_MOUNT_INACCESSIBLE);
|
||||
if (!m)
|
||||
return -ENOMEM;
|
||||
|
||||
m->destination = TAKE_PTR(path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tmpfs_patch_options(
|
||||
const char *options,
|
||||
uid_t uid_shift,
|
||||
@ -494,9 +527,9 @@ int mount_all(const char *dest,
|
||||
uid_t uid_shift,
|
||||
const char *selinux_apifs_context) {
|
||||
|
||||
#define PROC_INACCESSIBLE(path) \
|
||||
{ NULL, (path), NULL, NULL, MS_BIND, \
|
||||
MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_INACCESSIBLE_REG }, /* Bind mount first ... */ \
|
||||
#define PROC_INACCESSIBLE_REG(path) \
|
||||
{ "/run/systemd/inaccessible/reg", (path), NULL, NULL, MS_BIND, \
|
||||
MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
|
||||
{ NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
|
||||
MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
|
||||
|
||||
@ -531,11 +564,11 @@ int mount_all(const char *dest,
|
||||
|
||||
/* Make these files inaccessible to container payloads: they potentially leak information about kernel
|
||||
* internals or the host's execution environment to the container */
|
||||
PROC_INACCESSIBLE("/proc/kallsyms"),
|
||||
PROC_INACCESSIBLE("/proc/kcore"),
|
||||
PROC_INACCESSIBLE("/proc/keys"),
|
||||
PROC_INACCESSIBLE("/proc/sysrq-trigger"),
|
||||
PROC_INACCESSIBLE("/proc/timer_list"),
|
||||
PROC_INACCESSIBLE_REG("/proc/kallsyms"),
|
||||
PROC_INACCESSIBLE_REG("/proc/kcore"),
|
||||
PROC_INACCESSIBLE_REG("/proc/keys"),
|
||||
PROC_INACCESSIBLE_REG("/proc/sysrq-trigger"),
|
||||
PROC_INACCESSIBLE_REG("/proc/timer_list"),
|
||||
|
||||
/* Make these directories read-only to container payloads: they show hardware information, and in some
|
||||
* cases contain tunables the container really shouldn't have access to. */
|
||||
@ -573,7 +606,6 @@ int mount_all(const char *dest,
|
||||
#endif
|
||||
};
|
||||
|
||||
_cleanup_(unlink_and_freep) char *inaccessible = NULL;
|
||||
bool use_userns = (mount_settings & MOUNT_USE_USERNS);
|
||||
bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS);
|
||||
bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO);
|
||||
@ -584,7 +616,7 @@ int mount_all(const char *dest,
|
||||
|
||||
for (k = 0; k < ELEMENTSOF(mount_table); k++) {
|
||||
_cleanup_free_ char *where = NULL, *options = NULL;
|
||||
const char *o, *what;
|
||||
const char *o;
|
||||
bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL);
|
||||
|
||||
if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS))
|
||||
@ -603,33 +635,14 @@ int mount_all(const char *dest,
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, mount_table[k].where);
|
||||
|
||||
if (mount_table[k].mount_settings & MOUNT_INACCESSIBLE_REG) {
|
||||
|
||||
if (!inaccessible) {
|
||||
_cleanup_free_ char *np = NULL;
|
||||
|
||||
r = tempfn_random_child(NULL, "inaccessible", &np);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to generate inaccessible file node path: %m");
|
||||
|
||||
r = touch_file(np, false, USEC_INFINITY, UID_INVALID, GID_INVALID, 0000);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create inaccessible file node '%s': %m", np);
|
||||
|
||||
inaccessible = TAKE_PTR(np);
|
||||
}
|
||||
|
||||
what = inaccessible;
|
||||
} else
|
||||
what = mount_table[k].what;
|
||||
|
||||
/* Skip this entry if it is not a remount. */
|
||||
if (mount_table[k].what) {
|
||||
r = path_is_mount_point(where, NULL, 0);
|
||||
if (r < 0 && r != -ENOENT)
|
||||
return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
|
||||
|
||||
/* Skip this entry if it is not a remount. */
|
||||
if (what && r > 0)
|
||||
if (r > 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
r = mkdir_userns_p(dest, where, 0755, (use_userns && !in_userns) ? uid_shift : UID_INVALID);
|
||||
if (r < 0 && r != -EEXIST) {
|
||||
@ -654,7 +667,7 @@ int mount_all(const char *dest,
|
||||
}
|
||||
|
||||
r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG,
|
||||
what,
|
||||
mount_table[k].what,
|
||||
where,
|
||||
mount_table[k].type,
|
||||
mount_table[k].flags,
|
||||
@ -667,7 +680,6 @@ int mount_all(const char *dest,
|
||||
}
|
||||
|
||||
static int mount_bind(const char *dest, CustomMount *m) {
|
||||
|
||||
_cleanup_free_ char *where = NULL;
|
||||
struct stat source_st, dest_st;
|
||||
int r;
|
||||
@ -711,7 +723,6 @@ static int mount_bind(const char *dest, CustomMount *m) {
|
||||
r = touch(where);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create mount point %s: %m", where);
|
||||
|
||||
}
|
||||
|
||||
r = mount_verbose(LOG_ERR, m->source, where, NULL, MS_BIND | MS_REC, m->options);
|
||||
@ -773,7 +784,6 @@ static char *joined_and_escaped_lower_dirs(char **lower) {
|
||||
}
|
||||
|
||||
static int mount_overlay(const char *dest, CustomMount *m) {
|
||||
|
||||
_cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL;
|
||||
const char *options;
|
||||
int r;
|
||||
@ -815,11 +825,59 @@ static int mount_overlay(const char *dest, CustomMount *m) {
|
||||
return mount_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options);
|
||||
}
|
||||
|
||||
static int mount_inaccessible(const char *dest, CustomMount *m) {
|
||||
_cleanup_free_ char *where = NULL;
|
||||
const char *source;
|
||||
struct stat st;
|
||||
int r;
|
||||
|
||||
assert(dest);
|
||||
assert(m);
|
||||
|
||||
r = chase_symlinks_and_stat(m->destination, dest, CHASE_PREFIX_ROOT, &where, &st);
|
||||
if (r < 0) {
|
||||
log_full_errno(m->graceful ? LOG_DEBUG : LOG_ERR, r, "Failed to resolve %s/%s: %m", dest, m->destination);
|
||||
return m->graceful ? 0 : r;
|
||||
}
|
||||
|
||||
assert_se(source = mode_to_inaccessible_node(st.st_mode));
|
||||
|
||||
r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, source, where, NULL, MS_BIND, NULL);
|
||||
if (r < 0)
|
||||
return m->graceful ? 0 : r;
|
||||
|
||||
r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, NULL, where, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL);
|
||||
if (r < 0)
|
||||
return m->graceful ? 0 : r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mount_arbitrary(const char *dest, CustomMount *m) {
|
||||
_cleanup_free_ char *where = NULL;
|
||||
int r;
|
||||
|
||||
assert(dest);
|
||||
assert(m);
|
||||
|
||||
r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
|
||||
if (r == 0) { /* Doesn't exist yet? */
|
||||
r = mkdir_p_label(where, 0755);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Creating mount point for mount %s failed: %m", where);
|
||||
}
|
||||
|
||||
return mount_verbose(LOG_ERR, m->source, where, m->type_argument, 0, m->options);
|
||||
}
|
||||
|
||||
int mount_custom(
|
||||
const char *dest,
|
||||
CustomMount *mounts, size_t n,
|
||||
bool userns, uid_t uid_shift, uid_t uid_range,
|
||||
const char *selinux_apifs_context) {
|
||||
const char *selinux_apifs_context,
|
||||
bool in_userns) {
|
||||
|
||||
size_t i;
|
||||
int r;
|
||||
@ -829,6 +887,9 @@ int mount_custom(
|
||||
for (i = 0; i < n; i++) {
|
||||
CustomMount *m = mounts + i;
|
||||
|
||||
if (m->in_userns != in_userns)
|
||||
continue;
|
||||
|
||||
switch (m->type) {
|
||||
|
||||
case CUSTOM_MOUNT_BIND:
|
||||
@ -843,6 +904,14 @@ int mount_custom(
|
||||
r = mount_overlay(dest, m);
|
||||
break;
|
||||
|
||||
case CUSTOM_MOUNT_INACCESSIBLE:
|
||||
r = mount_inaccessible(dest, m);
|
||||
break;
|
||||
|
||||
case CUSTOM_MOUNT_ARBITRARY:
|
||||
r = mount_arbitrary(dest, m);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert_not_reached("Unknown custom mount type");
|
||||
}
|
||||
|
@ -13,14 +13,15 @@ typedef enum MountSettingsMask {
|
||||
MOUNT_APPLY_APIVFS_RO = 1 << 3, /* if set, /proc/sys, and /sys will be mounted read-only, otherwise read-write. */
|
||||
MOUNT_APPLY_APIVFS_NETNS = 1 << 4, /* if set, /proc/sys/net will be mounted read-write.
|
||||
Works only if MOUNT_APPLY_APIVFS_RO is also set. */
|
||||
MOUNT_INACCESSIBLE_REG = 1 << 5, /* if set, create an inaccessible regular file first and use as bind mount source */
|
||||
MOUNT_APPLY_TMPFS_TMP = 1 << 6, /* if set, /tmp will be mounted as tmpfs */
|
||||
MOUNT_APPLY_TMPFS_TMP = 1 << 5, /* if set, /tmp will be mounted as tmpfs */
|
||||
} MountSettingsMask;
|
||||
|
||||
typedef enum CustomMountType {
|
||||
CUSTOM_MOUNT_BIND,
|
||||
CUSTOM_MOUNT_TMPFS,
|
||||
CUSTOM_MOUNT_OVERLAY,
|
||||
CUSTOM_MOUNT_INACCESSIBLE,
|
||||
CUSTOM_MOUNT_ARBITRARY,
|
||||
_CUSTOM_MOUNT_TYPE_MAX,
|
||||
_CUSTOM_MOUNT_TYPE_INVALID = -1
|
||||
} CustomMountType;
|
||||
@ -34,6 +35,9 @@ typedef struct CustomMount {
|
||||
char *work_dir;
|
||||
char **lower;
|
||||
char *rm_rf_tmpdir;
|
||||
char *type_argument; /* only for CUSTOM_MOUNT_ARBITRARY */
|
||||
bool graceful;
|
||||
bool in_userns;
|
||||
} CustomMount;
|
||||
|
||||
CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t);
|
||||
@ -43,11 +47,12 @@ int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n);
|
||||
int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only);
|
||||
int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s);
|
||||
int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only);
|
||||
int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s);
|
||||
|
||||
int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, const char *selinux_apifs_context);
|
||||
int mount_sysfs(const char *dest, MountSettingsMask mount_settings);
|
||||
|
||||
int mount_custom(const char *dest, CustomMount *mounts, size_t n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
|
||||
int mount_custom(const char *dest, CustomMount *mounts, size_t n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, bool in_userns);
|
||||
|
||||
int setup_volatile_mode(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
|
||||
|
||||
|
2352
src/nspawn/nspawn-oci.c
Normal file
2352
src/nspawn/nspawn-oci.c
Normal file
File diff suppressed because it is too large
Load Diff
6
src/nspawn/nspawn-oci.h
Normal file
6
src/nspawn/nspawn-oci.h
Normal file
@ -0,0 +1,6 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1+ */
|
||||
#pragma once
|
||||
|
||||
#include "nspawn-settings.h"
|
||||
|
||||
int oci_load(FILE *f, const char *path, Settings **ret);
|
@ -112,6 +112,7 @@ int register_machine(
|
||||
unsigned n_mounts,
|
||||
int kill_signal,
|
||||
char **properties,
|
||||
sd_bus_message *properties_message,
|
||||
bool keep_unit,
|
||||
const char *service) {
|
||||
|
||||
@ -185,6 +186,12 @@ int register_machine(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (properties_message) {
|
||||
r = sd_bus_message_copy(m, properties_message, true);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
}
|
||||
|
||||
r = bus_append_unit_property_assignment_many(m, UNIT_SERVICE, properties);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -235,7 +242,8 @@ int allocate_scope(
|
||||
CustomMount *mounts,
|
||||
unsigned n_mounts,
|
||||
int kill_signal,
|
||||
char **properties) {
|
||||
char **properties,
|
||||
sd_bus_message *properties_message) {
|
||||
|
||||
_cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL, *reply = NULL;
|
||||
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
|
||||
@ -289,6 +297,12 @@ int allocate_scope(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (properties_message) {
|
||||
r = sd_bus_message_copy(m, properties_message, true);
|
||||
if (r < 0)
|
||||
return bus_log_create_error(r);
|
||||
}
|
||||
|
||||
r = append_machine_properties(
|
||||
m,
|
||||
mounts,
|
||||
|
@ -7,8 +7,8 @@
|
||||
|
||||
#include "nspawn-mount.h"
|
||||
|
||||
int register_machine(sd_bus *bus, const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, bool keep_unit, const char *service);
|
||||
int register_machine(sd_bus *bus, const char *machine_name, pid_t pid, const char *directory, sd_id128_t uuid, int local_ifindex, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, sd_bus_message *properties_message, bool keep_unit, const char *service);
|
||||
int terminate_machine(sd_bus *bus, const char *machine_name);
|
||||
|
||||
int allocate_scope(sd_bus *bus, const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties);
|
||||
int allocate_scope(sd_bus *bus, const char *machine_name, pid_t pid, const char *slice, CustomMount *mounts, unsigned n_mounts, int kill_signal, char **properties, sd_bus_message *properties_message);
|
||||
int terminate_scope(sd_bus *bus, const char *machine_name);
|
||||
|
@ -17,6 +17,50 @@
|
||||
#include "user-util.h"
|
||||
#include "util.h"
|
||||
|
||||
Settings *settings_new(void) {
|
||||
Settings *s;
|
||||
|
||||
s = new(Settings, 1);
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
||||
*s = (Settings) {
|
||||
.start_mode = _START_MODE_INVALID,
|
||||
.personality = PERSONALITY_INVALID,
|
||||
|
||||
.resolv_conf = _RESOLV_CONF_MODE_INVALID,
|
||||
.link_journal = _LINK_JOURNAL_INVALID,
|
||||
.timezone = _TIMEZONE_MODE_INVALID,
|
||||
|
||||
.userns_mode = _USER_NAMESPACE_MODE_INVALID,
|
||||
.userns_chown = -1,
|
||||
.uid_shift = UID_INVALID,
|
||||
.uid_range = UID_INVALID,
|
||||
|
||||
.no_new_privileges = -1,
|
||||
|
||||
.read_only = -1,
|
||||
.volatile_mode = _VOLATILE_MODE_INVALID,
|
||||
|
||||
.private_network = -1,
|
||||
.network_veth = -1,
|
||||
|
||||
.full_capabilities = CAPABILITY_QUINTET_NULL,
|
||||
|
||||
.uid = UID_INVALID,
|
||||
.gid = GID_INVALID,
|
||||
|
||||
.console_mode = _CONSOLE_MODE_INVALID,
|
||||
.console_width = (unsigned) -1,
|
||||
.console_height = (unsigned) -1,
|
||||
|
||||
.clone_ns_flags = (unsigned long) -1,
|
||||
.use_cgns = -1,
|
||||
};
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
int settings_load(FILE *f, const char *path, Settings **ret) {
|
||||
_cleanup_(settings_freep) Settings *s = NULL;
|
||||
int r;
|
||||
@ -24,27 +68,10 @@ int settings_load(FILE *f, const char *path, Settings **ret) {
|
||||
assert(path);
|
||||
assert(ret);
|
||||
|
||||
s = new0(Settings, 1);
|
||||
s = settings_new();
|
||||
if (!s)
|
||||
return -ENOMEM;
|
||||
|
||||
s->start_mode = _START_MODE_INVALID;
|
||||
s->personality = PERSONALITY_INVALID;
|
||||
s->userns_mode = _USER_NAMESPACE_MODE_INVALID;
|
||||
s->resolv_conf = _RESOLV_CONF_MODE_INVALID;
|
||||
s->link_journal = _LINK_JOURNAL_INVALID;
|
||||
s->timezone = _TIMEZONE_MODE_INVALID;
|
||||
s->uid_shift = UID_INVALID;
|
||||
s->uid_range = UID_INVALID;
|
||||
s->no_new_privileges = -1;
|
||||
|
||||
s->read_only = -1;
|
||||
s->volatile_mode = _VOLATILE_MODE_INVALID;
|
||||
s->userns_chown = -1;
|
||||
|
||||
s->private_network = -1;
|
||||
s->network_veth = -1;
|
||||
|
||||
r = config_parse(NULL, path, f,
|
||||
"Exec\0"
|
||||
"Network\0"
|
||||
@ -66,12 +93,33 @@ int settings_load(FILE *f, const char *path, Settings **ret) {
|
||||
s->userns_mode = USER_NAMESPACE_NO;
|
||||
|
||||
*ret = TAKE_PTR(s);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Settings* settings_free(Settings *s) {
|
||||
static void free_oci_hooks(OciHook *h, size_t n) {
|
||||
size_t i;
|
||||
|
||||
assert(h || n == 0);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
free(h[i].path);
|
||||
strv_free(h[i].args);
|
||||
strv_free(h[i].env);
|
||||
}
|
||||
|
||||
free(h);
|
||||
}
|
||||
|
||||
void device_node_free_many(DeviceNode *node, size_t n) {
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
free(node[i].path);
|
||||
|
||||
free(node);
|
||||
}
|
||||
|
||||
Settings* settings_free(Settings *s) {
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
||||
@ -96,6 +144,28 @@ Settings* settings_free(Settings *s) {
|
||||
expose_port_free_all(s->expose_ports);
|
||||
|
||||
custom_mount_free_all(s->custom_mounts, s->n_custom_mounts);
|
||||
|
||||
free(s->bundle);
|
||||
free(s->root);
|
||||
|
||||
free_oci_hooks(s->oci_hooks_prestart, s->n_oci_hooks_prestart);
|
||||
free_oci_hooks(s->oci_hooks_poststart, s->n_oci_hooks_poststart);
|
||||
free_oci_hooks(s->oci_hooks_poststop, s->n_oci_hooks_poststop);
|
||||
|
||||
free(s->slice);
|
||||
sd_bus_message_unref(s->properties);
|
||||
|
||||
free(s->supplementary_gids);
|
||||
device_node_free_many(s->extra_nodes, s->n_extra_nodes);
|
||||
free(s->extra_nodes);
|
||||
free(s->network_namespace_path);
|
||||
|
||||
strv_free(s->sysctl);
|
||||
|
||||
#if HAVE_SECCOMP
|
||||
seccomp_release(s->seccomp);
|
||||
#endif
|
||||
|
||||
return mfree(s);
|
||||
}
|
||||
|
||||
@ -122,6 +192,26 @@ bool settings_network_veth(Settings *s) {
|
||||
s->network_zone;
|
||||
}
|
||||
|
||||
int settings_allocate_properties(Settings *s) {
|
||||
_cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
|
||||
int r;
|
||||
|
||||
assert(s);
|
||||
|
||||
if (s->properties)
|
||||
return 0;
|
||||
|
||||
r = sd_bus_default_system(&bus);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_bus_message_new(bus, &s->properties, SD_BUS_MESSAGE_METHOD_CALL);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_CONFIG_PARSE_ENUM(config_parse_volatile_mode, volatile_mode, VolatileMode, "Failed to parse volatile mode");
|
||||
|
||||
int config_parse_expose_port(
|
||||
@ -315,6 +405,34 @@ int config_parse_tmpfs(
|
||||
return 0;
|
||||
}
|
||||
|
||||
int config_parse_inaccessible(
|
||||
const char *unit,
|
||||
const char *filename,
|
||||
unsigned line,
|
||||
const char *section,
|
||||
unsigned section_line,
|
||||
const char *lvalue,
|
||||
int ltype,
|
||||
const char *rvalue,
|
||||
void *data,
|
||||
void *userdata) {
|
||||
|
||||
Settings *settings = data;
|
||||
int r;
|
||||
|
||||
assert(filename);
|
||||
assert(lvalue);
|
||||
assert(rvalue);
|
||||
|
||||
r = inaccessible_mount_parse(&settings->custom_mounts, &settings->n_custom_mounts, rvalue);
|
||||
if (r < 0) {
|
||||
log_syntax(unit, LOG_ERR, filename, line, r, "Invalid inaccessible file system specification %s: %m", rvalue);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int config_parse_overlay(
|
||||
const char *unit,
|
||||
const char *filename,
|
||||
|
@ -4,8 +4,14 @@
|
||||
#include <sched.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if HAVE_SECCOMP
|
||||
#include <seccomp.h>
|
||||
#endif
|
||||
|
||||
#include "sd-bus.h"
|
||||
#include "sd-id128.h"
|
||||
|
||||
#include "capability-util.h"
|
||||
#include "conf-parser.h"
|
||||
#include "macro.h"
|
||||
#include "missing_resource.h"
|
||||
@ -60,6 +66,15 @@ typedef enum TimezoneMode {
|
||||
_TIMEZONE_MODE_INVALID = -1
|
||||
} TimezoneMode;
|
||||
|
||||
typedef enum ConsoleMode {
|
||||
CONSOLE_INTERACTIVE,
|
||||
CONSOLE_READ_ONLY,
|
||||
CONSOLE_PASSIVE,
|
||||
CONSOLE_PIPE,
|
||||
_CONSOLE_MODE_MAX,
|
||||
_CONSOLE_MODE_INVALID = -1,
|
||||
} ConsoleMode;
|
||||
|
||||
typedef enum SettingsMask {
|
||||
SETTING_START_MODE = UINT64_C(1) << 0,
|
||||
SETTING_ENVIRONMENT = UINT64_C(1) << 1,
|
||||
@ -86,9 +101,14 @@ typedef enum SettingsMask {
|
||||
SETTING_LINK_JOURNAL = UINT64_C(1) << 22,
|
||||
SETTING_TIMEZONE = UINT64_C(1) << 23,
|
||||
SETTING_EPHEMERAL = UINT64_C(1) << 24,
|
||||
SETTING_RLIMIT_FIRST = UINT64_C(1) << 25, /* we define one bit per resource limit here */
|
||||
SETTING_RLIMIT_LAST = UINT64_C(1) << (25 + _RLIMIT_MAX - 1),
|
||||
_SETTINGS_MASK_ALL = (UINT64_C(1) << (25 + _RLIMIT_MAX)) -1,
|
||||
SETTING_SLICE = UINT64_C(1) << 25,
|
||||
SETTING_DIRECTORY = UINT64_C(1) << 26,
|
||||
SETTING_USE_CGNS = UINT64_C(1) << 27,
|
||||
SETTING_CLONE_NS_FLAGS = UINT64_C(1) << 28,
|
||||
SETTING_CONSOLE_MODE = UINT64_C(1) << 29,
|
||||
SETTING_RLIMIT_FIRST = UINT64_C(1) << 30, /* we define one bit per resource limit here */
|
||||
SETTING_RLIMIT_LAST = UINT64_C(1) << (30 + _RLIMIT_MAX - 1),
|
||||
_SETTINGS_MASK_ALL = (UINT64_C(1) << (30 + _RLIMIT_MAX)) -1,
|
||||
_SETTING_FORCE_ENUM_WIDTH = UINT64_MAX
|
||||
} SettingsMask;
|
||||
|
||||
@ -101,6 +121,22 @@ assert_cc(sizeof(SettingsMask) == 8);
|
||||
assert_cc(sizeof(SETTING_RLIMIT_FIRST) == 8);
|
||||
assert_cc(sizeof(SETTING_RLIMIT_LAST) == 8);
|
||||
|
||||
typedef struct DeviceNode {
|
||||
char *path;
|
||||
unsigned major;
|
||||
unsigned minor;
|
||||
mode_t mode;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
} DeviceNode;
|
||||
|
||||
typedef struct OciHook {
|
||||
char *path;
|
||||
char **args;
|
||||
char **env;
|
||||
usec_t timeout;
|
||||
} OciHook;
|
||||
|
||||
typedef struct Settings {
|
||||
/* [Run] */
|
||||
StartMode start_mode;
|
||||
@ -150,13 +186,39 @@ typedef struct Settings {
|
||||
char **network_ipvlan;
|
||||
char **network_veth_extra;
|
||||
ExposePort *expose_ports;
|
||||
|
||||
/* Additional fields, that are specific to OCI runtime case */
|
||||
char *bundle;
|
||||
char *root;
|
||||
OciHook *oci_hooks_prestart, *oci_hooks_poststart, *oci_hooks_poststop;
|
||||
size_t n_oci_hooks_prestart, n_oci_hooks_poststart, n_oci_hooks_poststop;
|
||||
char *slice;
|
||||
sd_bus_message *properties;
|
||||
CapabilityQuintet full_capabilities;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
gid_t *supplementary_gids;
|
||||
size_t n_supplementary_gids;
|
||||
unsigned console_width, console_height;
|
||||
ConsoleMode console_mode;
|
||||
DeviceNode *extra_nodes;
|
||||
size_t n_extra_nodes;
|
||||
unsigned long clone_ns_flags;
|
||||
char *network_namespace_path;
|
||||
int use_cgns;
|
||||
char **sysctl;
|
||||
#if HAVE_SECCOMP
|
||||
scmp_filter_ctx seccomp;
|
||||
#endif
|
||||
} Settings;
|
||||
|
||||
Settings *settings_new(void);
|
||||
int settings_load(FILE *f, const char *path, Settings **ret);
|
||||
Settings* settings_free(Settings *s);
|
||||
|
||||
bool settings_network_veth(Settings *s);
|
||||
bool settings_private_network(Settings *s);
|
||||
int settings_allocate_properties(Settings *s);
|
||||
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(Settings*, settings_free);
|
||||
|
||||
@ -170,6 +232,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_pivot_root);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_bind);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_tmpfs);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_overlay);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_inaccessible);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_veth_extra);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_network_zone);
|
||||
CONFIG_PARSER_PROTOTYPE(config_parse_boot);
|
||||
@ -190,3 +253,5 @@ const char *timezone_mode_to_string(TimezoneMode a) _const_;
|
||||
TimezoneMode timezone_mode_from_string(const char *s) _pure_;
|
||||
|
||||
int parse_link_journal(const char *s, LinkJournal *ret_mode, bool *ret_try);
|
||||
|
||||
void device_node_free_many(DeviceNode *node, size_t n);
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user