mirror of
https://github.com/systemd/systemd.git
synced 2025-03-25 18:50:18 +03:00
nspawn: introduce --notify-ready=[no|yes] (#3474)
This the patch implements a notificaiton mechanism from the init process in the container to systemd-nspawn. The switch --notify-ready=yes configures systemd-nspawn to wait the "READY=1" message from the init process in the container to send its own to systemd. --notify-ready=no is equivalent to the previous behavior before this patch, systemd-nspawn notifies systemd with a "READY=1" message when the container is created. This notificaiton mechanism uses socket file with path relative to the contanier "/run/systemd/nspawn/notify". The default values it --notify-ready=no. It is also possible to configure this mechanism from the .nspawn files using NotifyReady. This parameter takes the same options of the command line switch. Before this patch, systemd-nspawn notifies "ready" after the inner child was created, regardless the status of the service running inside it. Now, with --notify-ready=yes, systemd-nspawn notifies when the service is ready. This is really useful when there are dependencies between different contaniers. Fixes https://github.com/systemd/systemd/issues/1369 Based on the work from https://github.com/systemd/systemd/pull/3022 Testing: Boot a OS inside a container with systemd-nspawn. Note: modify the commands accordingly with your filesystem. 1. Create a filesystem where you can boot an OS. 2. sudo systemd-nspawn -D ${HOME}/distros/fedora-23/ sh 2.1. Create the unit file /etc/systemd/system/sleep.service inside the container (You can use the example below) 2.2. systemdctl enable sleep 2.3 exit 3. sudo systemd-run --service-type=notify --unit=notify-test ${HOME}/systemd/systemd-nspawn --notify-ready=yes -D ${HOME}/distros/fedora-23/ -b 4. In a different shell run "systemctl status notify-test" When using --notify-ready=yes the service status is "activating" for 20 seconds before being set to "active (running)". Instead, using --notify-ready=no the service status is marked "active (running)" quickly, without waiting for the 20 seconds. This patch was also test with --private-users=yes, you can test it just adding it at the end of the command at point 3. ------ sleep.service ------ [Unit] Description=sleep After=network.target [Service] Type=oneshot ExecStart=/bin/sleep 20 [Install] WantedBy=multi-user.target ------------ end ------------
This commit is contained in:
parent
1edce01965
commit
9c1e04d0fa
@ -980,6 +980,19 @@
|
||||
effect.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>--notify-ready=</varname></term>
|
||||
|
||||
<listitem><para>Configures support for notifications from the container's init process.
|
||||
<varname>--notify-ready=</varname> takes a boolean (<option>no</option> and <option>yes</option>).
|
||||
With option <option>no</option> systemd-nspawn notifies systemd
|
||||
with a <literal>READY=1</literal> message when the init process is created.
|
||||
With option <option>yes</option> systemd-nspawn waits for the
|
||||
<literal>READY=1</literal> message from the init process in the container
|
||||
before sending its own to systemd. For more details about notifications
|
||||
see <citerefentry><refentrytitle>sd_notify</refentrytitle><manvolnum>3</manvolnum></citerefentry>).</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<xi:include href="standard-options.xml" xpointer="help" />
|
||||
<xi:include href="standard-options.xml" xpointer="version" />
|
||||
</variablelist>
|
||||
|
@ -259,6 +259,15 @@
|
||||
<option>--private-users=</option> command line switch, and takes the same options. This option is privileged
|
||||
(see above). </para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><varname>NotifyReady=</varname></term>
|
||||
|
||||
<listitem><para>Configures support for notifications from the container's init process.
|
||||
This is equivalent to use <option>--notify-ready=</option> command line switch,
|
||||
and takes the same options. See <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry>
|
||||
for details about the specific options supported.</para></listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
|
@ -27,6 +27,7 @@ Exec.Personality, config_parse_personality, 0, offsetof(Settings,
|
||||
Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id)
|
||||
Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory)
|
||||
Exec.PrivateUsers, config_parse_private_users, 0, 0
|
||||
Exec.NotifyReady, config_parse_bool, 0, offsetof(Settings, notify_ready)
|
||||
Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only)
|
||||
Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode)
|
||||
Files.Bind, config_parse_bind, 0, 0
|
||||
|
@ -56,7 +56,8 @@ typedef enum SettingsMask {
|
||||
SETTING_CUSTOM_MOUNTS = 1 << 11,
|
||||
SETTING_WORKING_DIRECTORY = 1 << 12,
|
||||
SETTING_USERNS = 1 << 13,
|
||||
_SETTINGS_MASK_ALL = (1 << 14) -1
|
||||
SETTING_NOTIFY_READY = 1 << 14,
|
||||
_SETTINGS_MASK_ALL = (1 << 15) -1
|
||||
} SettingsMask;
|
||||
|
||||
typedef struct Settings {
|
||||
@ -73,6 +74,7 @@ typedef struct Settings {
|
||||
char *working_directory;
|
||||
UserNamespaceMode userns_mode;
|
||||
uid_t uid_shift, uid_range;
|
||||
bool notify_ready;
|
||||
|
||||
/* [Image] */
|
||||
int read_only;
|
||||
|
@ -104,6 +104,10 @@
|
||||
* UID range here */
|
||||
#define UID_SHIFT_PICK_MIN ((uid_t) UINT32_C(0x00080000))
|
||||
#define UID_SHIFT_PICK_MAX ((uid_t) UINT32_C(0x6FFF0000))
|
||||
/* nspawn is listening on the socket at the path in the constant nspawn_notify_socket_path
|
||||
* nspawn_notify_socket_path is relative to the container
|
||||
* the init process in the container pid can send messages to nspawn following the sd_notify(3) protocol */
|
||||
#define NSPAWN_NOTIFY_SOCKET_PATH "/run/systemd/nspawn/notify"
|
||||
|
||||
typedef enum ContainerStatus {
|
||||
CONTAINER_TERMINATED,
|
||||
@ -187,6 +191,7 @@ static SettingsMask arg_settings_mask = 0;
|
||||
static int arg_settings_trusted = -1;
|
||||
static char **arg_parameters = NULL;
|
||||
static const char *arg_container_service_name = "systemd-nspawn";
|
||||
static bool arg_notify_ready = false;
|
||||
|
||||
static void help(void) {
|
||||
printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
|
||||
@ -267,6 +272,8 @@ static void help(void) {
|
||||
" the service unit nspawn is running in\n"
|
||||
" --volatile[=MODE] Run the system in volatile mode\n"
|
||||
" --settings=BOOLEAN Load additional settings from .nspawn file\n"
|
||||
" --notify-ready=BOOLEAN Receive notifications from the container's init process,\n"
|
||||
" accepted values: yes and no\n"
|
||||
, program_invocation_short_name);
|
||||
}
|
||||
|
||||
@ -367,6 +374,7 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
ARG_SETTINGS,
|
||||
ARG_CHDIR,
|
||||
ARG_PRIVATE_USERS_CHOWN,
|
||||
ARG_NOTIFY_READY,
|
||||
};
|
||||
|
||||
static const struct option options[] = {
|
||||
@ -415,6 +423,7 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
{ "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
|
||||
{ "settings", required_argument, NULL, ARG_SETTINGS },
|
||||
{ "chdir", required_argument, NULL, ARG_CHDIR },
|
||||
{ "notify-ready", required_argument, NULL, ARG_NOTIFY_READY },
|
||||
{}
|
||||
};
|
||||
|
||||
@ -987,6 +996,16 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
arg_settings_mask |= SETTING_WORKING_DIRECTORY;
|
||||
break;
|
||||
|
||||
case ARG_NOTIFY_READY:
|
||||
r = parse_boolean(optarg);
|
||||
if (r < 0) {
|
||||
log_error("%s is not a valid notify mode. Valid modes are: yes, no, and ready.", optarg);
|
||||
return -EINVAL;
|
||||
}
|
||||
arg_notify_ready = r;
|
||||
arg_settings_mask |= SETTING_NOTIFY_READY;
|
||||
break;
|
||||
|
||||
case '?':
|
||||
return -EINVAL;
|
||||
|
||||
@ -2529,6 +2548,7 @@ static int inner_child(
|
||||
NULL, /* container_uuid */
|
||||
NULL, /* LISTEN_FDS */
|
||||
NULL, /* LISTEN_PID */
|
||||
NULL, /* NOTIFY_SOCKET */
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -2656,6 +2676,8 @@ static int inner_child(
|
||||
(asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0))
|
||||
return log_oom();
|
||||
}
|
||||
if (asprintf((char **)(envp + n_env++), "NOTIFY_SOCKET=%s", NSPAWN_NOTIFY_SOCKET_PATH) < 0)
|
||||
return log_oom();
|
||||
|
||||
env_use = strv_env_merge(2, envp, arg_setenv);
|
||||
if (!env_use)
|
||||
@ -2725,6 +2747,37 @@ static int inner_child(
|
||||
return log_error_errno(r, "execv() failed: %m");
|
||||
}
|
||||
|
||||
static int setup_sd_notify_child(void) {
|
||||
static const int one = 1;
|
||||
int fd = -1;
|
||||
union sockaddr_union sa = {
|
||||
.sa.sa_family = AF_UNIX,
|
||||
};
|
||||
int r;
|
||||
|
||||
fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
|
||||
if (fd < 0)
|
||||
return log_error_errno(errno, "Failed to allocate notification socket: %m");
|
||||
|
||||
(void) mkdir_parents(NSPAWN_NOTIFY_SOCKET_PATH, 0755);
|
||||
(void) unlink(NSPAWN_NOTIFY_SOCKET_PATH);
|
||||
|
||||
strncpy(sa.un.sun_path, NSPAWN_NOTIFY_SOCKET_PATH, sizeof(sa.un.sun_path)-1);
|
||||
r = bind(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
|
||||
if (r < 0) {
|
||||
safe_close(fd);
|
||||
return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
|
||||
}
|
||||
|
||||
r = setsockopt(fd, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
|
||||
if (r < 0) {
|
||||
safe_close(fd);
|
||||
return log_error_errno(errno, "SO_PASSCRED failed: %m");
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
static int outer_child(
|
||||
Barrier *barrier,
|
||||
const char *directory,
|
||||
@ -2736,6 +2789,7 @@ static int outer_child(
|
||||
bool secondary,
|
||||
int pid_socket,
|
||||
int uuid_socket,
|
||||
int notify_socket,
|
||||
int kmsg_socket,
|
||||
int rtnl_socket,
|
||||
int uid_shift_socket,
|
||||
@ -2744,12 +2798,14 @@ static int outer_child(
|
||||
pid_t pid;
|
||||
ssize_t l;
|
||||
int r;
|
||||
_cleanup_close_ int fd = -1;
|
||||
|
||||
assert(barrier);
|
||||
assert(directory);
|
||||
assert(console);
|
||||
assert(pid_socket >= 0);
|
||||
assert(uuid_socket >= 0);
|
||||
assert(notify_socket >= 0);
|
||||
assert(kmsg_socket >= 0);
|
||||
|
||||
cg_unified_flush();
|
||||
@ -2936,6 +2992,10 @@ static int outer_child(
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to move root directory: %m");
|
||||
|
||||
fd = setup_sd_notify_child();
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
pid = raw_clone(SIGCHLD|CLONE_NEWNS|
|
||||
(arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS) |
|
||||
(arg_private_network ? CLONE_NEWNET : 0) |
|
||||
@ -2945,6 +3005,7 @@ static int outer_child(
|
||||
if (pid == 0) {
|
||||
pid_socket = safe_close(pid_socket);
|
||||
uuid_socket = safe_close(uuid_socket);
|
||||
notify_socket = safe_close(notify_socket);
|
||||
uid_shift_socket = safe_close(uid_shift_socket);
|
||||
|
||||
/* The inner child has all namespaces that are
|
||||
@ -2974,8 +3035,13 @@ static int outer_child(
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
l = send_one_fd(notify_socket, fd, 0);
|
||||
if (l < 0)
|
||||
return log_error_errno(errno, "Failed to send notify fd: %m");
|
||||
|
||||
pid_socket = safe_close(pid_socket);
|
||||
uuid_socket = safe_close(uuid_socket);
|
||||
notify_socket = safe_close(notify_socket);
|
||||
kmsg_socket = safe_close(kmsg_socket);
|
||||
rtnl_socket = safe_close(rtnl_socket);
|
||||
|
||||
@ -3058,6 +3124,96 @@ static int setup_uid_map(pid_t pid) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nspawn_dispatch_notify_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
|
||||
_cleanup_fdset_free_ FDSet *fds = NULL;
|
||||
char buf[NOTIFY_BUFFER_MAX+1];
|
||||
char *p = NULL;
|
||||
struct iovec iovec = {
|
||||
.iov_base = buf,
|
||||
.iov_len = sizeof(buf)-1,
|
||||
};
|
||||
union {
|
||||
struct cmsghdr cmsghdr;
|
||||
uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
|
||||
CMSG_SPACE(sizeof(int) * NOTIFY_FD_MAX)];
|
||||
} control = {};
|
||||
struct msghdr msghdr = {
|
||||
.msg_iov = &iovec,
|
||||
.msg_iovlen = 1,
|
||||
.msg_control = &control,
|
||||
.msg_controllen = sizeof(control),
|
||||
};
|
||||
struct cmsghdr *cmsg;
|
||||
struct ucred *ucred = NULL;
|
||||
ssize_t n;
|
||||
pid_t inner_child_pid;
|
||||
_cleanup_strv_free_ char **tags = NULL;
|
||||
|
||||
assert(userdata);
|
||||
|
||||
inner_child_pid = PTR_TO_PID(userdata);
|
||||
|
||||
if (revents != EPOLLIN) {
|
||||
log_warning("Got unexpected poll event for notify fd.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
|
||||
if (n < 0) {
|
||||
if (errno == EAGAIN || errno == EINTR)
|
||||
return 0;
|
||||
|
||||
return log_warning_errno(errno, "Couldn't read notification socket: %m");
|
||||
}
|
||||
cmsg_close_all(&msghdr);
|
||||
|
||||
CMSG_FOREACH(cmsg, &msghdr) {
|
||||
if (cmsg->cmsg_level == SOL_SOCKET &&
|
||||
cmsg->cmsg_type == SCM_CREDENTIALS &&
|
||||
cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
|
||||
|
||||
ucred = (struct ucred*) CMSG_DATA(cmsg);
|
||||
}
|
||||
}
|
||||
|
||||
if (!ucred || ucred->pid != inner_child_pid) {
|
||||
log_warning("Received notify message without valid credentials. Ignoring.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((size_t) n >= sizeof(buf)) {
|
||||
log_warning("Received notify message exceeded maximum size. Ignoring.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
buf[n] = 0;
|
||||
tags = strv_split(buf, "\n\r");
|
||||
if (!tags)
|
||||
return log_oom();
|
||||
|
||||
if (strv_find(tags, "READY=1"))
|
||||
sd_notifyf(false, "READY=1\n");
|
||||
|
||||
p = strv_find_startswith(tags, "STATUS=");
|
||||
if (p)
|
||||
sd_notifyf(false, "STATUS=Container running: %s", p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_sd_notify_parent(sd_event *event, int fd, pid_t *inner_child_pid) {
|
||||
int r;
|
||||
sd_event_source *notify_event_source;
|
||||
|
||||
r = sd_event_add_io(event, ¬ify_event_source, fd, EPOLLIN, nspawn_dispatch_notify_fd, inner_child_pid);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to allocate notify event source: %m");
|
||||
|
||||
(void) sd_event_source_set_description(notify_event_source, "nspawn-notify");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int load_settings(void) {
|
||||
_cleanup_(settings_freep) Settings *settings = NULL;
|
||||
_cleanup_fclose_ FILE *f = NULL;
|
||||
@ -3286,6 +3442,9 @@ static int load_settings(void) {
|
||||
}
|
||||
}
|
||||
|
||||
if ((arg_settings_mask & SETTING_NOTIFY_READY) == 0)
|
||||
arg_notify_ready = settings->notify_ready;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3536,7 +3695,9 @@ int main(int argc, char *argv[]) {
|
||||
rtnl_socket_pair[2] = { -1, -1 },
|
||||
pid_socket_pair[2] = { -1, -1 },
|
||||
uuid_socket_pair[2] = { -1, -1 },
|
||||
notify_socket_pair[2] = { -1, -1 },
|
||||
uid_shift_socket_pair[2] = { -1, -1 };
|
||||
_cleanup_close_ int notify_socket= -1;
|
||||
_cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
|
||||
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
|
||||
_cleanup_(pty_forward_freep) PTYForward *forward = NULL;
|
||||
@ -3587,6 +3748,11 @@ int main(int argc, char *argv[]) {
|
||||
goto finish;
|
||||
}
|
||||
|
||||
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, notify_socket_pair) < 0) {
|
||||
r = log_error_errno(errno, "Failed to create notify socket pair: %m");
|
||||
goto finish;
|
||||
}
|
||||
|
||||
if (arg_userns_mode != USER_NAMESPACE_NO)
|
||||
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, uid_shift_socket_pair) < 0) {
|
||||
r = log_error_errno(errno, "Failed to create uid shift socket pair: %m");
|
||||
@ -3628,6 +3794,7 @@ int main(int argc, char *argv[]) {
|
||||
rtnl_socket_pair[0] = safe_close(rtnl_socket_pair[0]);
|
||||
pid_socket_pair[0] = safe_close(pid_socket_pair[0]);
|
||||
uuid_socket_pair[0] = safe_close(uuid_socket_pair[0]);
|
||||
notify_socket_pair[0] = safe_close(notify_socket_pair[0]);
|
||||
uid_shift_socket_pair[0] = safe_close(uid_shift_socket_pair[0]);
|
||||
|
||||
(void) reset_all_signal_handlers();
|
||||
@ -3643,6 +3810,7 @@ int main(int argc, char *argv[]) {
|
||||
secondary,
|
||||
pid_socket_pair[1],
|
||||
uuid_socket_pair[1],
|
||||
notify_socket_pair[1],
|
||||
kmsg_socket_pair[1],
|
||||
rtnl_socket_pair[1],
|
||||
uid_shift_socket_pair[1],
|
||||
@ -3661,6 +3829,7 @@ int main(int argc, char *argv[]) {
|
||||
rtnl_socket_pair[1] = safe_close(rtnl_socket_pair[1]);
|
||||
pid_socket_pair[1] = safe_close(pid_socket_pair[1]);
|
||||
uuid_socket_pair[1] = safe_close(uuid_socket_pair[1]);
|
||||
notify_socket_pair[1] = safe_close(notify_socket_pair[1]);
|
||||
uid_shift_socket_pair[1] = safe_close(uid_shift_socket_pair[1]);
|
||||
|
||||
if (arg_userns_mode != USER_NAMESPACE_NO) {
|
||||
@ -3734,6 +3903,13 @@ int main(int argc, char *argv[]) {
|
||||
goto finish;
|
||||
}
|
||||
|
||||
/* We also retrieve the socket used for notifications generated by outer child */
|
||||
notify_socket = receive_one_fd(notify_socket_pair[0], 0);
|
||||
if (notify_socket < 0) {
|
||||
r = log_error_errno(errno, "Failed to receive notification socket from the outer child: %m");
|
||||
goto finish;
|
||||
}
|
||||
|
||||
log_debug("Init process invoked as PID " PID_FMT, pid);
|
||||
|
||||
if (arg_userns_mode != USER_NAMESPACE_NO) {
|
||||
@ -3848,6 +4024,16 @@ int main(int argc, char *argv[]) {
|
||||
goto finish;
|
||||
}
|
||||
|
||||
r = sd_event_new(&event);
|
||||
if (r < 0) {
|
||||
log_error_errno(r, "Failed to get default event source: %m");
|
||||
goto finish;
|
||||
}
|
||||
|
||||
r = setup_sd_notify_parent(event, notify_socket, PID_TO_PTR(pid));
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
|
||||
/* Let the child know that we are ready and wait that the child is completely ready now. */
|
||||
if (!barrier_place_and_sync(&barrier)) { /* #4 */
|
||||
log_error("Child died too early.");
|
||||
@ -3860,15 +4046,10 @@ int main(int argc, char *argv[]) {
|
||||
etc_passwd_lock = safe_close(etc_passwd_lock);
|
||||
|
||||
sd_notifyf(false,
|
||||
"READY=1\n"
|
||||
"STATUS=Container running.\n"
|
||||
"X_NSPAWN_LEADER_PID=" PID_FMT, pid);
|
||||
|
||||
r = sd_event_new(&event);
|
||||
if (r < 0) {
|
||||
log_error_errno(r, "Failed to get default event source: %m");
|
||||
goto finish;
|
||||
}
|
||||
if (!arg_notify_ready)
|
||||
sd_notify(false, "READY=1\n");
|
||||
|
||||
if (arg_kill_signal > 0) {
|
||||
/* Try to kill the init system on SIGINT or SIGTERM */
|
||||
|
Loading…
x
Reference in New Issue
Block a user