diff --git a/TODO b/TODO
index 70e088210a1..286a09de86f 100644
--- a/TODO
+++ b/TODO
@@ -131,8 +131,7 @@ Features:
* $LISTEN_PID, $MAINPID and $SYSTEMD_EXECPID env vars that the service manager
sets should be augmented with $LISTEN_PIDFDID, $MAINPIDFDID and
- $SYSTEMD_EXECPIDFD (and similar for other env vars we might send). Also,
- MAINPID= in sd_notify() should be augmented with MAINPIDFDID=, and so on.
+ $SYSTEMD_EXECPIDFD (and similar for other env vars we might send).
* port copy.c over to use LabelOps for all labelling.
diff --git a/man/sd_notify.xml b/man/sd_notify.xml
index f04251bd197..5e6c9d6dbcd 100644
--- a/man/sd_notify.xml
+++ b/man/sd_notify.xml
@@ -291,12 +291,35 @@
MAINPID=…
- The main process ID (PID) of the service, in case the service manager did not fork
- off the process itself. Example: MAINPID=4711.
+ Change the main process ID (PID) of the service. This is especially useful in the case
+ where the real main process isn't directly forked off by the service manager.
+ Example: MAINPID=4711.
+
+ MAINPIDFDID=…
+
+ The pidfd inode number of the new main process (specified through MAINPID=).
+ This information can be acquired through
+ fstat2
+ on the pidfd and is used to identify the process in a race-free fashion. Alternatively,
+ a pidfd can be sent directly to the service manager (see MAINPIDFD=1 below).
+
+
+
+
+
+ MAINPIDFD=1
+
+ Similar to MAINPID= with MAINPIDFDID=, but
+ the process is referenced directly by the pidfd passed to the service manager. This is useful
+ if pidfd id is not supported on the system. Exactly one fd is expected for this notification.
+
+
+
+
WATCHDOG=1
diff --git a/src/core/service.c b/src/core/service.c
index 1ac158f1eab..f48e15c2ebc 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -4540,6 +4540,72 @@ static bool service_notify_message_authorized(Service *s, PidRef *pid) {
}
}
+static int service_notify_message_parse_new_pid(
+ Unit *u,
+ char * const *tags,
+ FDSet *fds,
+ PidRef *ret) {
+
+ _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
+ const char *e;
+ int r;
+
+ assert(u);
+ assert(ret);
+
+ /* MAINPIDFD=1 always takes precedence */
+ if (strv_contains(tags, "MAINPIDFD=1")) {
+ unsigned n_fds = fdset_size(fds);
+ if (n_fds != 1)
+ return log_unit_warning_errno(u, SYNTHETIC_ERRNO(EINVAL),
+ "Got MAINPIDFD=1 with %s fd, ignoring.", n_fds == 0 ? "no" : "more than one");
+
+ r = pidref_set_pidfd_consume(&pidref, ASSERT_FD(fdset_steal_first(fds)));
+ if (r < 0)
+ return log_unit_warning_errno(u, r, "Failed to create reference to received new main pidfd: %m");
+
+ goto finish;
+ }
+
+ e = strv_find_startswith(tags, "MAINPID=");
+ if (!e) {
+ *ret = PIDREF_NULL;
+ return 0;
+ }
+
+ r = pidref_set_pidstr(&pidref, e);
+ if (r < 0)
+ return log_unit_warning_errno(u, r, "Failed to parse MAINPID=%s field in notification message, ignoring: %m", e);
+
+ e = strv_find_startswith(tags, "MAINPIDFDID=");
+ if (!e)
+ goto finish;
+
+ uint64_t pidfd_id;
+
+ r = safe_atou64(e, &pidfd_id);
+ if (r < 0)
+ return log_unit_warning_errno(u, r, "Failed to parse MAINPIDFDID= in notification message, refusing: %s", e);
+
+ r = pidref_acquire_pidfd_id(&pidref);
+ if (r < 0) {
+ if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
+ log_unit_warning_errno(u, r,
+ "Failed to acquire pidfd id of process " PID_FMT ", not validating MAINPIDFDID=%" PRIu64 ": %m",
+ pidref.pid, pidfd_id);
+ goto finish;
+ }
+
+ if (pidref.fd_id != pidfd_id)
+ return log_unit_warning_errno(u, SYNTHETIC_ERRNO(ESRCH),
+ "PIDFD ID of process " PID_FMT " (%" PRIu64 ") mismatches with received MAINPIDFDID=%" PRIu64 ", not changing main PID.",
+ pidref.pid, pidref.fd_id, pidfd_id);
+
+finish:
+ *ret = TAKE_PIDREF(pidref);
+ return 1;
+}
+
static void service_notify_message(
Unit *u,
PidRef *pidref,
@@ -4565,38 +4631,34 @@ static void service_notify_message(
bool notify_dbus = false;
const char *e;
- /* Interpret MAINPID= */
- e = strv_find_startswith(tags, "MAINPID=");
- if (e && IN_SET(s->state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING,
- SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
- SERVICE_STOP, SERVICE_STOP_SIGTERM)) {
+ /* Interpret MAINPID= (+ MAINPIDFDID=) / MAINPIDFD=1 */
+ _cleanup_(pidref_done) PidRef new_main_pid = PIDREF_NULL;
- _cleanup_(pidref_done) PidRef new_main_pid = PIDREF_NULL;
+ r = service_notify_message_parse_new_pid(u, tags, fds, &new_main_pid);
+ if (r > 0 &&
+ IN_SET(s->state, SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING,
+ SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY,
+ SERVICE_STOP, SERVICE_STOP_SIGTERM) &&
+ (!s->main_pid_known || !pidref_equal(&new_main_pid, &s->main_pid))) {
- r = pidref_set_pidstr(&new_main_pid, e);
- if (r < 0)
- log_unit_warning_errno(u, r, "Failed to parse MAINPID=%s field in notification message, ignoring: %m", e);
- else if (!s->main_pid_known || !pidref_equal(&new_main_pid, &s->main_pid)) {
+ r = service_is_suitable_main_pid(s, &new_main_pid, LOG_WARNING);
+ if (r == 0) {
+ /* The new main PID is a bit suspicious, which is OK if the sender is privileged. */
- r = service_is_suitable_main_pid(s, &new_main_pid, LOG_WARNING);
- if (r == 0) {
- /* The new main PID is a bit suspicious, which is OK if the sender is privileged. */
+ if (ucred->uid == 0) {
+ log_unit_debug(u, "New main PID "PID_FMT" does not belong to service, but we'll accept it as the request to change it came from a privileged process.", new_main_pid.pid);
+ r = 1;
+ } else
+ log_unit_warning(u, "New main PID "PID_FMT" does not belong to service, refusing.", new_main_pid.pid);
+ }
+ if (r > 0) {
+ (void) service_set_main_pidref(s, TAKE_PIDREF(new_main_pid), /* start_timestamp = */ NULL);
- if (ucred->uid == 0) {
- log_unit_debug(u, "New main PID "PID_FMT" does not belong to service, but we'll accept it as the request to change it came from a privileged process.", new_main_pid.pid);
- r = 1;
- } else
- log_unit_warning(u, "New main PID "PID_FMT" does not belong to service, refusing.", new_main_pid.pid);
- }
- if (r > 0) {
- (void) service_set_main_pidref(s, TAKE_PIDREF(new_main_pid), /* start_timestamp = */ NULL);
+ r = unit_watch_pidref(UNIT(s), &s->main_pid, /* exclusive= */ false);
+ if (r < 0)
+ log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", s->main_pid.pid);
- r = unit_watch_pidref(UNIT(s), &s->main_pid, /* exclusive= */ false);
- if (r < 0)
- log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", s->main_pid.pid);
-
- notify_dbus = true;
- }
+ notify_dbus = true;
}
}
diff --git a/src/notify/notify.c b/src/notify/notify.c
index 32bd6e6a7ae..8937457ec99 100644
--- a/src/notify/notify.c
+++ b/src/notify/notify.c
@@ -28,7 +28,7 @@
static bool arg_ready = false;
static bool arg_reloading = false;
static bool arg_stopping = false;
-static pid_t arg_pid = 0;
+static PidRef arg_pid = PIDREF_NULL;
static const char *arg_status = NULL;
static bool arg_booted = false;
static uid_t arg_uid = UID_INVALID;
@@ -39,6 +39,7 @@ static char **arg_exec = NULL;
static FDSet *arg_fds = NULL;
static char *arg_fdname = NULL;
+STATIC_DESTRUCTOR_REGISTER(arg_pid, pidref_done);
STATIC_DESTRUCTOR_REGISTER(arg_env, strv_freep);
STATIC_DESTRUCTOR_REGISTER(arg_exec, strv_freep);
STATIC_DESTRUCTOR_REGISTER(arg_fds, fdset_freep);
@@ -99,16 +100,24 @@ static pid_t manager_pid(void) {
return pid;
}
-static pid_t pid_parent_if_possible(void) {
- pid_t parent_pid = getppid();
+static int pidref_parent_if_applicable(PidRef *ret) {
+ _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
+ int r;
+
+ assert(ret);
+
+ r = pidref_set_parent(&pidref);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create reference to our parent process: %m");
/* Don't send from PID 1 or the service manager's PID (which might be distinct from 1, if we are a
* --user service). That'd just be confusing for the service manager. */
- if (parent_pid <= 1 ||
- parent_pid == manager_pid())
- return getpid_cached();
+ if (pidref.pid <= 1 ||
+ pidref.pid == manager_pid())
+ return pidref_set_self(ret);
- return parent_pid;
+ *ret = TAKE_PIDREF(pidref);
+ return 0;
}
static int parse_argv(int argc, char *argv[]) {
@@ -175,17 +184,18 @@ static int parse_argv(int argc, char *argv[]) {
break;
case ARG_PID:
+ pidref_done(&arg_pid);
+
if (isempty(optarg) || streq(optarg, "auto"))
- arg_pid = pid_parent_if_possible();
+ r = pidref_parent_if_applicable(&arg_pid);
else if (streq(optarg, "parent"))
- arg_pid = getppid();
+ r = pidref_set_parent(&arg_pid);
else if (streq(optarg, "self"))
- arg_pid = getpid_cached();
- else {
- r = parse_pid(optarg, &arg_pid);
- if (r < 0)
- return log_error_errno(r, "Failed to parse PID %s.", optarg);
- }
+ r = pidref_set_self(&arg_pid);
+ else
+ r = pidref_set_pidstr(&arg_pid, optarg);
+ if (r < 0)
+ return log_error_errno(r, "Failed to refer to --pid='%s': %m", optarg);
break;
@@ -276,7 +286,7 @@ static int parse_argv(int argc, char *argv[]) {
if (arg_fdname && fdset_isempty(arg_fds))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "No file descriptors passed, but --fdname= set, refusing.");
- bool have_env = arg_ready || arg_stopping || arg_reloading || arg_status || arg_pid > 0 || !fdset_isempty(arg_fds);
+ bool have_env = arg_ready || arg_stopping || arg_reloading || arg_status || pidref_is_set(&arg_pid) || !fdset_isempty(arg_fds);
size_t n_arg_env;
if (do_exec) {
@@ -326,9 +336,10 @@ static int parse_argv(int argc, char *argv[]) {
}
static int run(int argc, char* argv[]) {
- _cleanup_free_ char *status = NULL, *cpid = NULL, *msg = NULL, *monotonic_usec = NULL, *fdn = NULL;
+ _cleanup_free_ char *status = NULL, *main_pid = NULL, *main_pidfd_id = NULL, *msg = NULL,
+ *monotonic_usec = NULL, *fdn = NULL;
_cleanup_strv_free_ char **final_env = NULL;
- const char *our_env[9];
+ const char *our_env[10];
size_t i = 0;
int r;
@@ -371,11 +382,22 @@ static int run(int argc, char* argv[]) {
our_env[i++] = status;
}
- if (arg_pid > 0) {
- if (asprintf(&cpid, "MAINPID="PID_FMT, arg_pid) < 0)
+ if (pidref_is_set(&arg_pid)) {
+ if (asprintf(&main_pid, "MAINPID="PID_FMT, arg_pid.pid) < 0)
return log_oom();
- our_env[i++] = cpid;
+ our_env[i++] = main_pid;
+
+ r = pidref_acquire_pidfd_id(&arg_pid);
+ if (r < 0)
+ log_debug_errno(r, "Unable to acquire pidfd id of new main pid " PID_FMT ", ignoring: %m",
+ arg_pid.pid);
+ else {
+ if (asprintf(&main_pidfd_id, "MAINPIDFDID=%" PRIu64, arg_pid.fd_id) < 0)
+ return log_oom();
+
+ our_env[i++] = main_pidfd_id;
+ }
}
if (!fdset_isempty(arg_fds)) {
@@ -415,11 +437,11 @@ static int run(int argc, char* argv[]) {
/* If --pid= is explicitly specified, use it as source pid. Otherwise, pretend the message originates
* from our parent, i.e. --pid=auto */
- if (arg_pid <= 0)
- arg_pid = pid_parent_if_possible();
+ if (!pidref_is_set(&arg_pid))
+ (void) pidref_parent_if_applicable(&arg_pid);
if (fdset_isempty(arg_fds))
- r = sd_pid_notify(arg_pid, /* unset_environment= */ false, msg);
+ r = sd_pid_notify(arg_pid.pid, /* unset_environment= */ false, msg);
else {
_cleanup_free_ int *a = NULL;
int k;
@@ -428,7 +450,7 @@ static int run(int argc, char* argv[]) {
if (k < 0)
return log_error_errno(k, "Failed to convert file descriptor set to array: %m");
- r = sd_pid_notify_with_fds(arg_pid, /* unset_environment= */ false, msg, a, k);
+ r = sd_pid_notify_with_fds(arg_pid.pid, /* unset_environment= */ false, msg, a, k);
}
if (r < 0)
@@ -440,7 +462,7 @@ static int run(int argc, char* argv[]) {
arg_fds = fdset_free(arg_fds); /* Close before we execute anything */
if (!arg_no_block) {
- r = sd_pid_notify_barrier(arg_pid, /* unset_environment= */ false, 5 * USEC_PER_SEC);
+ r = sd_pid_notify_barrier(arg_pid.pid, /* unset_environment= */ false, 5 * USEC_PER_SEC);
if (r < 0)
return log_error_errno(r, "Failed to invoke barrier: %m");
if (r == 0)
diff --git a/src/shared/fdset.c b/src/shared/fdset.c
index b3d3cfa784d..42092ada258 100644
--- a/src/shared/fdset.c
+++ b/src/shared/fdset.c
@@ -22,7 +22,7 @@
#define MAKE_SET(s) ((Set*) s)
#define MAKE_FDSET(s) ((FDSet*) s)
-FDSet *fdset_new(void) {
+FDSet* fdset_new(void) {
return MAKE_FDSET(set_new(NULL));
}
@@ -52,12 +52,20 @@ int fdset_new_array(FDSet **ret, const int fds[], size_t n_fds) {
return 0;
}
-void fdset_close(FDSet *s, bool async) {
+int fdset_steal_first(FDSet *fds) {
void *p;
- while ((p = set_steal_first(MAKE_SET(s)))) {
- int fd = PTR_TO_FD(p);
+ p = set_steal_first(MAKE_SET(fds));
+ if (!p)
+ return -ENOENT;
+ return PTR_TO_FD(p);
+}
+
+void fdset_close(FDSet *fds, bool async) {
+ int fd;
+
+ while ((fd = fdset_steal_first(fds)) >= 0) {
/* Valgrind's fd might have ended up in this set here, due to fdset_new_fill(). We'll ignore
* all failures here, so that the EBADFD that valgrind will return us on close() doesn't
* influence us */
@@ -144,7 +152,7 @@ bool fdset_contains(FDSet *s, int fd) {
return false;
}
- return !!set_get(MAKE_SET(s), FD_TO_PTR(fd));
+ return set_contains(MAKE_SET(s), FD_TO_PTR(fd));
}
int fdset_remove(FDSet *s, int fd) {
@@ -230,13 +238,13 @@ int fdset_new_fill(
}
int fdset_cloexec(FDSet *fds, bool b) {
- void *p;
int r;
assert(fds);
- SET_FOREACH(p, MAKE_SET(fds)) {
- r = fd_cloexec(PTR_TO_FD(p), b);
+ int fd;
+ FDSET_FOREACH(fd, fds) {
+ r = fd_cloexec(fd, b);
if (r < 0)
return r;
}
@@ -269,7 +277,6 @@ int fdset_new_listen_fds(FDSet **ret, bool unset) {
int fdset_to_array(FDSet *fds, int **ret) {
unsigned j = 0, m;
- void *e;
int *a;
assert(ret);
@@ -286,8 +293,9 @@ int fdset_to_array(FDSet *fds, int **ret) {
if (!a)
return -ENOMEM;
- SET_FOREACH(e, MAKE_SET(fds))
- a[j++] = PTR_TO_FD(e);
+ int fd;
+ FDSET_FOREACH(fd, fds)
+ a[j++] = fd;
assert(j == m);
@@ -322,13 +330,3 @@ int fdset_iterate(FDSet *s, Iterator *i) {
return PTR_TO_FD(p);
}
-
-int fdset_steal_first(FDSet *fds) {
- void *p;
-
- p = set_steal_first(MAKE_SET(fds));
- if (!p)
- return -ENOENT;
-
- return PTR_TO_FD(p);
-}
diff --git a/test/TEST-80-NOTIFYACCESS/TEST-80-NOTIFYACCESS.units/test.sh b/test/TEST-80-NOTIFYACCESS/TEST-80-NOTIFYACCESS.units/test.sh
index 47333ef0deb..c3a6a3982d6 100755
--- a/test/TEST-80-NOTIFYACCESS/TEST-80-NOTIFYACCESS.units/test.sh
+++ b/test/TEST-80-NOTIFYACCESS/TEST-80-NOTIFYACCESS.units/test.sh
@@ -39,7 +39,7 @@ sync_in b
sync_in d
# Move main process back to toplevel
- systemd-notify --pid=parent "MAINPID=$$"
+ systemd-notify "MAINPID=$$"
# Should be dropped again
systemd-notify --status="BOGUS2" --pid=parent