mirror of
https://github.com/systemd/systemd-stable.git
synced 2025-02-01 05:47:04 +03:00
udev: run the main process, workers, and spawned commands in /udev subcgroup
And enable cgroup delegation for udevd. Then, processes invoked through ExecReload= are assigned .control subcgroup, and they are not killed by cg_kill(). Fixes #16867 and #22686.
This commit is contained in:
parent
4267084642
commit
a1f4fd3876
@ -28,6 +28,7 @@
|
||||
#include "sd-event.h"
|
||||
|
||||
#include "alloc-util.h"
|
||||
#include "cgroup-setup.h"
|
||||
#include "cgroup-util.h"
|
||||
#include "cpu-set-util.h"
|
||||
#include "dev-setup.h"
|
||||
@ -48,6 +49,7 @@
|
||||
#include "mkdir.h"
|
||||
#include "netlink-util.h"
|
||||
#include "parse-util.h"
|
||||
#include "path-util.h"
|
||||
#include "pretty-print.h"
|
||||
#include "proc-cmdline.h"
|
||||
#include "process-util.h"
|
||||
@ -85,7 +87,7 @@ typedef struct Manager {
|
||||
sd_event *event;
|
||||
Hashmap *workers;
|
||||
LIST_HEAD(Event, events);
|
||||
const char *cgroup;
|
||||
char *cgroup;
|
||||
pid_t pid; /* the process that originally allocated the manager object */
|
||||
int log_level;
|
||||
|
||||
@ -238,6 +240,7 @@ static Manager* manager_free(Manager *manager) {
|
||||
safe_close(manager->inotify_fd);
|
||||
safe_close_pair(manager->worker_watch);
|
||||
|
||||
free(manager->cgroup);
|
||||
return mfree(manager);
|
||||
}
|
||||
|
||||
@ -1722,12 +1725,63 @@ static int parse_argv(int argc, char *argv[]) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) {
|
||||
static int create_subcgroup(char **ret) {
|
||||
_cleanup_free_ char *cgroup = NULL, *subcgroup = NULL;
|
||||
int r;
|
||||
|
||||
if (getppid() != 1)
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Not invoked by PID1.");
|
||||
|
||||
r = sd_booted();
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to check if systemd is running: %m");
|
||||
if (r == 0)
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "systemd is not running.");
|
||||
|
||||
/* Get our own cgroup, we regularly kill everything udev has left behind.
|
||||
* We only do this on systemd systems, and only if we are directly spawned
|
||||
* by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
|
||||
|
||||
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
|
||||
if (r < 0) {
|
||||
if (IN_SET(r, -ENOENT, -ENOMEDIUM))
|
||||
return log_debug_errno(r, "Dedicated cgroup not found: %m");
|
||||
return log_debug_errno(r, "Failed to get cgroup: %m");
|
||||
}
|
||||
|
||||
r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, cgroup, "trusted.delegate");
|
||||
if (IN_SET(r, 0, -ENODATA))
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "The cgroup %s is not delegated to us.", cgroup);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to read trusted.delegate attribute: %m");
|
||||
|
||||
/* We are invoked with our own delegated cgroup tree, let's move us one level down, so that we
|
||||
* don't collide with the "no processes in inner nodes" rule of cgroups, when the service
|
||||
* manager invokes the ExecReload= job in the .control/ subcgroup. */
|
||||
|
||||
subcgroup = path_join(cgroup, "/udev");
|
||||
if (!subcgroup)
|
||||
return log_oom_debug();
|
||||
|
||||
r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup, 0);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to create %s subcgroup: %m", subcgroup);
|
||||
|
||||
log_debug("Created %s subcgroup.", subcgroup);
|
||||
if (ret)
|
||||
*ret = TAKE_PTR(subcgroup);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent) {
|
||||
_cleanup_(manager_freep) Manager *manager = NULL;
|
||||
_cleanup_free_ char *cgroup = NULL;
|
||||
int r;
|
||||
|
||||
assert(ret);
|
||||
|
||||
(void) create_subcgroup(&cgroup);
|
||||
|
||||
manager = new(Manager, 1);
|
||||
if (!manager)
|
||||
return log_oom();
|
||||
@ -1735,7 +1789,7 @@ static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cg
|
||||
*manager = (Manager) {
|
||||
.inotify_fd = -1,
|
||||
.worker_watch = { -1, -1 },
|
||||
.cgroup = cgroup,
|
||||
.cgroup = TAKE_PTR(cgroup),
|
||||
};
|
||||
|
||||
r = udev_ctrl_new_from_fd(&manager->ctrl, fd_ctrl);
|
||||
@ -1880,7 +1934,6 @@ static int main_loop(Manager *manager) {
|
||||
}
|
||||
|
||||
int run_udevd(int argc, char *argv[]) {
|
||||
_cleanup_free_ char *cgroup = NULL;
|
||||
_cleanup_(manager_freep) Manager *manager = NULL;
|
||||
int fd_ctrl = -1, fd_uevent = -1;
|
||||
int r;
|
||||
@ -1937,24 +1990,11 @@ int run_udevd(int argc, char *argv[]) {
|
||||
if (r < 0 && r != -EEXIST)
|
||||
return log_error_errno(r, "Failed to create /run/udev: %m");
|
||||
|
||||
if (getppid() == 1 && sd_booted() > 0) {
|
||||
/* Get our own cgroup, we regularly kill everything udev has left behind.
|
||||
* We only do this on systemd systems, and only if we are directly spawned
|
||||
* by PID1. Otherwise we are not guaranteed to have a dedicated cgroup. */
|
||||
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
|
||||
if (r < 0) {
|
||||
if (IN_SET(r, -ENOENT, -ENOMEDIUM))
|
||||
log_debug_errno(r, "Dedicated cgroup not found: %m");
|
||||
else
|
||||
log_warning_errno(r, "Failed to get cgroup: %m");
|
||||
}
|
||||
}
|
||||
|
||||
r = listen_fds(&fd_ctrl, &fd_uevent);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to listen on fds: %m");
|
||||
|
||||
r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup);
|
||||
r = manager_new(&manager, fd_ctrl, fd_uevent);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create manager: %m");
|
||||
|
||||
|
@ -16,6 +16,7 @@ Before=sysinit.target
|
||||
ConditionPathIsReadWrite=/sys
|
||||
|
||||
[Service]
|
||||
Delegate=pids
|
||||
DeviceAllow=block-* rwm
|
||||
DeviceAllow=char-* rwm
|
||||
Type=notify
|
||||
|
Loading…
x
Reference in New Issue
Block a user