From 68d9aa7ede2e56e9d29af389598d0bcfdee3c163 Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Sat, 26 Oct 2024 01:15:38 +0200 Subject: [PATCH 1/4] shared/fdset: minor modernization --- src/shared/fdset.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/shared/fdset.c b/src/shared/fdset.c index b3d3cfa784d..42092ada258 100644 --- a/src/shared/fdset.c +++ b/src/shared/fdset.c @@ -22,7 +22,7 @@ #define MAKE_SET(s) ((Set*) s) #define MAKE_FDSET(s) ((FDSet*) s) -FDSet *fdset_new(void) { +FDSet* fdset_new(void) { return MAKE_FDSET(set_new(NULL)); } @@ -52,12 +52,20 @@ int fdset_new_array(FDSet **ret, const int fds[], size_t n_fds) { return 0; } -void fdset_close(FDSet *s, bool async) { +int fdset_steal_first(FDSet *fds) { void *p; - while ((p = set_steal_first(MAKE_SET(s)))) { - int fd = PTR_TO_FD(p); + p = set_steal_first(MAKE_SET(fds)); + if (!p) + return -ENOENT; + return PTR_TO_FD(p); +} + +void fdset_close(FDSet *fds, bool async) { + int fd; + + while ((fd = fdset_steal_first(fds)) >= 0) { /* Valgrind's fd might have ended up in this set here, due to fdset_new_fill(). We'll ignore * all failures here, so that the EBADFD that valgrind will return us on close() doesn't * influence us */ @@ -144,7 +152,7 @@ bool fdset_contains(FDSet *s, int fd) { return false; } - return !!set_get(MAKE_SET(s), FD_TO_PTR(fd)); + return set_contains(MAKE_SET(s), FD_TO_PTR(fd)); } int fdset_remove(FDSet *s, int fd) { @@ -230,13 +238,13 @@ int fdset_new_fill( } int fdset_cloexec(FDSet *fds, bool b) { - void *p; int r; assert(fds); - SET_FOREACH(p, MAKE_SET(fds)) { - r = fd_cloexec(PTR_TO_FD(p), b); + int fd; + FDSET_FOREACH(fd, fds) { + r = fd_cloexec(fd, b); if (r < 0) return r; } @@ -269,7 +277,6 @@ int fdset_new_listen_fds(FDSet **ret, bool unset) { int fdset_to_array(FDSet *fds, int **ret) { unsigned j = 0, m; - void *e; int *a; assert(ret); @@ -286,8 +293,9 @@ int fdset_to_array(FDSet *fds, int **ret) { if (!a) return -ENOMEM; - SET_FOREACH(e, MAKE_SET(fds)) - a[j++] = PTR_TO_FD(e); + int fd; + FDSET_FOREACH(fd, fds) + a[j++] = fd; assert(j == m); @@ -322,13 +330,3 @@ int fdset_iterate(FDSet *s, Iterator *i) { return PTR_TO_FD(p); } - -int fdset_steal_first(FDSet *fds) { - void *p; - - p = set_steal_first(MAKE_SET(fds)); - if (!p) - return -ENOENT; - - return PTR_TO_FD(p); -} From 695323d90a6a097bb08c85c518d10b7601807fe8 Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Sat, 26 Oct 2024 00:49:18 +0200 Subject: [PATCH 2/4] core/service: support sd_notify() MAINPIDFD=1 and MAINPIDFDID= These serve as race-free alternatives for MAINPID= notification. --- TODO | 3 +- man/sd_notify.xml | 27 ++++++++++- src/core/service.c | 116 ++++++++++++++++++++++++++++++++++----------- 3 files changed, 115 insertions(+), 31 deletions(-) diff --git a/TODO b/TODO index 70e088210a1..286a09de86f 100644 --- a/TODO +++ b/TODO @@ -131,8 +131,7 @@ Features: * $LISTEN_PID, $MAINPID and $SYSTEMD_EXECPID env vars that the service manager sets should be augmented with $LISTEN_PIDFDID, $MAINPIDFDID and - $SYSTEMD_EXECPIDFD (and similar for other env vars we might send). Also, - MAINPID= in sd_notify() should be augmented with MAINPIDFDID=, and so on. + $SYSTEMD_EXECPIDFD (and similar for other env vars we might send). * port copy.c over to use LabelOps for all labelling. diff --git a/man/sd_notify.xml b/man/sd_notify.xml index f04251bd197..5e6c9d6dbcd 100644 --- a/man/sd_notify.xml +++ b/man/sd_notify.xml @@ -291,12 +291,35 @@ MAINPID=… - The main process ID (PID) of the service, in case the service manager did not fork - off the process itself. Example: MAINPID=4711. + Change the main process ID (PID) of the service. This is especially useful in the case + where the real main process isn't directly forked off by the service manager. + Example: MAINPID=4711. + + MAINPIDFDID=… + + The pidfd inode number of the new main process (specified through MAINPID=). + This information can be acquired through + fstat2 + on the pidfd and is used to identify the process in a race-free fashion. Alternatively, + a pidfd can be sent directly to the service manager (see MAINPIDFD=1 below). + + + + + + MAINPIDFD=1 + + Similar to MAINPID= with MAINPIDFDID=, but + the process is referenced directly by the pidfd passed to the service manager. This is useful + if pidfd id is not supported on the system. Exactly one fd is expected for this notification. + + + + WATCHDOG=1 diff --git a/src/core/service.c b/src/core/service.c index 1ac158f1eab..f48e15c2ebc 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -4540,6 +4540,72 @@ static bool service_notify_message_authorized(Service *s, PidRef *pid) { } } +static int service_notify_message_parse_new_pid( + Unit *u, + char * const *tags, + FDSet *fds, + PidRef *ret) { + + _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; + const char *e; + int r; + + assert(u); + assert(ret); + + /* MAINPIDFD=1 always takes precedence */ + if (strv_contains(tags, "MAINPIDFD=1")) { + unsigned n_fds = fdset_size(fds); + if (n_fds != 1) + return log_unit_warning_errno(u, SYNTHETIC_ERRNO(EINVAL), + "Got MAINPIDFD=1 with %s fd, ignoring.", n_fds == 0 ? "no" : "more than one"); + + r = pidref_set_pidfd_consume(&pidref, ASSERT_FD(fdset_steal_first(fds))); + if (r < 0) + return log_unit_warning_errno(u, r, "Failed to create reference to received new main pidfd: %m"); + + goto finish; + } + + e = strv_find_startswith(tags, "MAINPID="); + if (!e) { + *ret = PIDREF_NULL; + return 0; + } + + r = pidref_set_pidstr(&pidref, e); + if (r < 0) + return log_unit_warning_errno(u, r, "Failed to parse MAINPID=%s field in notification message, ignoring: %m", e); + + e = strv_find_startswith(tags, "MAINPIDFDID="); + if (!e) + goto finish; + + uint64_t pidfd_id; + + r = safe_atou64(e, &pidfd_id); + if (r < 0) + return log_unit_warning_errno(u, r, "Failed to parse MAINPIDFDID= in notification message, refusing: %s", e); + + r = pidref_acquire_pidfd_id(&pidref); + if (r < 0) { + if (!ERRNO_IS_NEG_NOT_SUPPORTED(r)) + log_unit_warning_errno(u, r, + "Failed to acquire pidfd id of process " PID_FMT ", not validating MAINPIDFDID=%" PRIu64 ": %m", + pidref.pid, pidfd_id); + goto finish; + } + + if (pidref.fd_id != pidfd_id) + return log_unit_warning_errno(u, SYNTHETIC_ERRNO(ESRCH), + "PIDFD ID of process " PID_FMT " (%" PRIu64 ") mismatches with received MAINPIDFDID=%" PRIu64 ", not changing main PID.", + pidref.pid, pidref.fd_id, pidfd_id); + +finish: + *ret = TAKE_PIDREF(pidref); + return 1; +} + static void service_notify_message( Unit *u, PidRef *pidref, @@ -4565,38 +4631,34 @@ static void service_notify_message( bool notify_dbus = false; const char *e; - /* Interpret MAINPID= */ - e = strv_find_startswith(tags, "MAINPID="); - if (e && IN_SET(s->state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, - SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, - SERVICE_STOP, SERVICE_STOP_SIGTERM)) { + /* Interpret MAINPID= (+ MAINPIDFDID=) / MAINPIDFD=1 */ + _cleanup_(pidref_done) PidRef new_main_pid = PIDREF_NULL; - _cleanup_(pidref_done) PidRef new_main_pid = PIDREF_NULL; + r = service_notify_message_parse_new_pid(u, tags, fds, &new_main_pid); + if (r > 0 && + IN_SET(s->state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, + SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, + SERVICE_STOP, SERVICE_STOP_SIGTERM) && + (!s->main_pid_known || !pidref_equal(&new_main_pid, &s->main_pid))) { - r = pidref_set_pidstr(&new_main_pid, e); - if (r < 0) - log_unit_warning_errno(u, r, "Failed to parse MAINPID=%s field in notification message, ignoring: %m", e); - else if (!s->main_pid_known || !pidref_equal(&new_main_pid, &s->main_pid)) { + r = service_is_suitable_main_pid(s, &new_main_pid, LOG_WARNING); + if (r == 0) { + /* The new main PID is a bit suspicious, which is OK if the sender is privileged. */ - r = service_is_suitable_main_pid(s, &new_main_pid, LOG_WARNING); - if (r == 0) { - /* The new main PID is a bit suspicious, which is OK if the sender is privileged. */ + if (ucred->uid == 0) { + log_unit_debug(u, "New main PID "PID_FMT" does not belong to service, but we'll accept it as the request to change it came from a privileged process.", new_main_pid.pid); + r = 1; + } else + log_unit_warning(u, "New main PID "PID_FMT" does not belong to service, refusing.", new_main_pid.pid); + } + if (r > 0) { + (void) service_set_main_pidref(s, TAKE_PIDREF(new_main_pid), /* start_timestamp = */ NULL); - if (ucred->uid == 0) { - log_unit_debug(u, "New main PID "PID_FMT" does not belong to service, but we'll accept it as the request to change it came from a privileged process.", new_main_pid.pid); - r = 1; - } else - log_unit_warning(u, "New main PID "PID_FMT" does not belong to service, refusing.", new_main_pid.pid); - } - if (r > 0) { - (void) service_set_main_pidref(s, TAKE_PIDREF(new_main_pid), /* start_timestamp = */ NULL); + r = unit_watch_pidref(UNIT(s), &s->main_pid, /* exclusive= */ false); + if (r < 0) + log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", s->main_pid.pid); - r = unit_watch_pidref(UNIT(s), &s->main_pid, /* exclusive= */ false); - if (r < 0) - log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", s->main_pid.pid); - - notify_dbus = true; - } + notify_dbus = true; } } From e2037d07c061174ed824f83bc4fe5336b8e76ada Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Sat, 26 Oct 2024 01:51:18 +0200 Subject: [PATCH 3/4] notify: send MAINPIDFDID= for --pid= too if available --- src/notify/notify.c | 74 +++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/src/notify/notify.c b/src/notify/notify.c index 32bd6e6a7ae..8937457ec99 100644 --- a/src/notify/notify.c +++ b/src/notify/notify.c @@ -28,7 +28,7 @@ static bool arg_ready = false; static bool arg_reloading = false; static bool arg_stopping = false; -static pid_t arg_pid = 0; +static PidRef arg_pid = PIDREF_NULL; static const char *arg_status = NULL; static bool arg_booted = false; static uid_t arg_uid = UID_INVALID; @@ -39,6 +39,7 @@ static char **arg_exec = NULL; static FDSet *arg_fds = NULL; static char *arg_fdname = NULL; +STATIC_DESTRUCTOR_REGISTER(arg_pid, pidref_done); STATIC_DESTRUCTOR_REGISTER(arg_env, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_exec, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_fds, fdset_freep); @@ -99,16 +100,24 @@ static pid_t manager_pid(void) { return pid; } -static pid_t pid_parent_if_possible(void) { - pid_t parent_pid = getppid(); +static int pidref_parent_if_applicable(PidRef *ret) { + _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; + int r; + + assert(ret); + + r = pidref_set_parent(&pidref); + if (r < 0) + return log_debug_errno(r, "Failed to create reference to our parent process: %m"); /* Don't send from PID 1 or the service manager's PID (which might be distinct from 1, if we are a * --user service). That'd just be confusing for the service manager. */ - if (parent_pid <= 1 || - parent_pid == manager_pid()) - return getpid_cached(); + if (pidref.pid <= 1 || + pidref.pid == manager_pid()) + return pidref_set_self(ret); - return parent_pid; + *ret = TAKE_PIDREF(pidref); + return 0; } static int parse_argv(int argc, char *argv[]) { @@ -175,17 +184,18 @@ static int parse_argv(int argc, char *argv[]) { break; case ARG_PID: + pidref_done(&arg_pid); + if (isempty(optarg) || streq(optarg, "auto")) - arg_pid = pid_parent_if_possible(); + r = pidref_parent_if_applicable(&arg_pid); else if (streq(optarg, "parent")) - arg_pid = getppid(); + r = pidref_set_parent(&arg_pid); else if (streq(optarg, "self")) - arg_pid = getpid_cached(); - else { - r = parse_pid(optarg, &arg_pid); - if (r < 0) - return log_error_errno(r, "Failed to parse PID %s.", optarg); - } + r = pidref_set_self(&arg_pid); + else + r = pidref_set_pidstr(&arg_pid, optarg); + if (r < 0) + return log_error_errno(r, "Failed to refer to --pid='%s': %m", optarg); break; @@ -276,7 +286,7 @@ static int parse_argv(int argc, char *argv[]) { if (arg_fdname && fdset_isempty(arg_fds)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No file descriptors passed, but --fdname= set, refusing."); - bool have_env = arg_ready || arg_stopping || arg_reloading || arg_status || arg_pid > 0 || !fdset_isempty(arg_fds); + bool have_env = arg_ready || arg_stopping || arg_reloading || arg_status || pidref_is_set(&arg_pid) || !fdset_isempty(arg_fds); size_t n_arg_env; if (do_exec) { @@ -326,9 +336,10 @@ static int parse_argv(int argc, char *argv[]) { } static int run(int argc, char* argv[]) { - _cleanup_free_ char *status = NULL, *cpid = NULL, *msg = NULL, *monotonic_usec = NULL, *fdn = NULL; + _cleanup_free_ char *status = NULL, *main_pid = NULL, *main_pidfd_id = NULL, *msg = NULL, + *monotonic_usec = NULL, *fdn = NULL; _cleanup_strv_free_ char **final_env = NULL; - const char *our_env[9]; + const char *our_env[10]; size_t i = 0; int r; @@ -371,11 +382,22 @@ static int run(int argc, char* argv[]) { our_env[i++] = status; } - if (arg_pid > 0) { - if (asprintf(&cpid, "MAINPID="PID_FMT, arg_pid) < 0) + if (pidref_is_set(&arg_pid)) { + if (asprintf(&main_pid, "MAINPID="PID_FMT, arg_pid.pid) < 0) return log_oom(); - our_env[i++] = cpid; + our_env[i++] = main_pid; + + r = pidref_acquire_pidfd_id(&arg_pid); + if (r < 0) + log_debug_errno(r, "Unable to acquire pidfd id of new main pid " PID_FMT ", ignoring: %m", + arg_pid.pid); + else { + if (asprintf(&main_pidfd_id, "MAINPIDFDID=%" PRIu64, arg_pid.fd_id) < 0) + return log_oom(); + + our_env[i++] = main_pidfd_id; + } } if (!fdset_isempty(arg_fds)) { @@ -415,11 +437,11 @@ static int run(int argc, char* argv[]) { /* If --pid= is explicitly specified, use it as source pid. Otherwise, pretend the message originates * from our parent, i.e. --pid=auto */ - if (arg_pid <= 0) - arg_pid = pid_parent_if_possible(); + if (!pidref_is_set(&arg_pid)) + (void) pidref_parent_if_applicable(&arg_pid); if (fdset_isempty(arg_fds)) - r = sd_pid_notify(arg_pid, /* unset_environment= */ false, msg); + r = sd_pid_notify(arg_pid.pid, /* unset_environment= */ false, msg); else { _cleanup_free_ int *a = NULL; int k; @@ -428,7 +450,7 @@ static int run(int argc, char* argv[]) { if (k < 0) return log_error_errno(k, "Failed to convert file descriptor set to array: %m"); - r = sd_pid_notify_with_fds(arg_pid, /* unset_environment= */ false, msg, a, k); + r = sd_pid_notify_with_fds(arg_pid.pid, /* unset_environment= */ false, msg, a, k); } if (r < 0) @@ -440,7 +462,7 @@ static int run(int argc, char* argv[]) { arg_fds = fdset_free(arg_fds); /* Close before we execute anything */ if (!arg_no_block) { - r = sd_pid_notify_barrier(arg_pid, /* unset_environment= */ false, 5 * USEC_PER_SEC); + r = sd_pid_notify_barrier(arg_pid.pid, /* unset_environment= */ false, 5 * USEC_PER_SEC); if (r < 0) return log_error_errno(r, "Failed to invoke barrier: %m"); if (r == 0) From c3ecb747f1e35f609f15fc94ad4d5e5ca0bda4a2 Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Tue, 29 Oct 2024 18:35:50 +0100 Subject: [PATCH 4/4] TEST-80-NOTIFYACCESS: don't specify --pid= if MAINPID= is provided explicitly Otherwise, with recent additions, the MAINPIDFDID= generated by systemd-notify would mismatch with overridden MAINPID=. --- test/TEST-80-NOTIFYACCESS/TEST-80-NOTIFYACCESS.units/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/TEST-80-NOTIFYACCESS/TEST-80-NOTIFYACCESS.units/test.sh b/test/TEST-80-NOTIFYACCESS/TEST-80-NOTIFYACCESS.units/test.sh index 47333ef0deb..c3a6a3982d6 100755 --- a/test/TEST-80-NOTIFYACCESS/TEST-80-NOTIFYACCESS.units/test.sh +++ b/test/TEST-80-NOTIFYACCESS/TEST-80-NOTIFYACCESS.units/test.sh @@ -39,7 +39,7 @@ sync_in b sync_in d # Move main process back to toplevel - systemd-notify --pid=parent "MAINPID=$$" + systemd-notify "MAINPID=$$" # Should be dropped again systemd-notify --status="BOGUS2" --pid=parent