1
0
mirror of https://github.com/systemd/systemd.git synced 2025-02-21 05:57:34 +03:00

core/oomd: Use oom-kill ServiceResult for oomd

To notify user of kill events from systemd-oomd we now use
`SERVICE_FAILURE_OOM_KILL` as the failure result.

`unit_check_oomd_kill` now calls `notify_cgroup_oom` to
update the service result to `oom-kill`.

We add a new xattr `user.oomd_ooms` to keep track of the OOM kills
initiated by systemd-oomd, this helps us resolve a race between sending
SIGKILL to processes and checking for OOM kill status from the xattr.

Related to: 
This commit is contained in:
Nishal Kulkarni 2022-03-14 00:35:18 +05:30
parent f7829525be
commit 38c41427c7
8 changed files with 42 additions and 15 deletions

@ -3041,7 +3041,7 @@ int unit_check_oomd_kill(Unit *u) {
else if (r == 0)
return 0;
r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_kill", &value);
r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_ooms", &value);
if (r < 0 && r != -ENODATA)
return r;
@ -3057,11 +3057,25 @@ int unit_check_oomd_kill(Unit *u) {
if (!increased)
return 0;
n = 0;
value = mfree(value);
r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.oomd_kill", &value);
if (r >= 0 && !isempty(value))
(void) safe_atou64(value, &n);
if (n > 0)
log_unit_struct(u, LOG_NOTICE,
"MESSAGE_ID=" SD_MESSAGE_UNIT_OOMD_KILL_STR,
LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "systemd-oomd killed %"PRIu64" process(es) in this unit.", n));
LOG_UNIT_MESSAGE(u, "systemd-oomd killed %"PRIu64" process(es) in this unit.", n),
"N_PROCESSES=%" PRIu64, n);
else
log_unit_struct(u, LOG_NOTICE,
"MESSAGE_ID=" SD_MESSAGE_UNIT_OOMD_KILL_STR,
LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "systemd-oomd killed some process(es) in this unit."));
unit_notify_cgroup_oom(u, /* ManagedOOM= */ true);
return 1;
}
@ -3097,8 +3111,7 @@ int unit_check_oom(Unit *u) {
LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "A process of this unit has been killed by the OOM killer."));
if (UNIT_VTABLE(u)->notify_cgroup_oom)
UNIT_VTABLE(u)->notify_cgroup_oom(u);
unit_notify_cgroup_oom(u, /* ManagedOOM= */ false);
return 1;
}

@ -2644,9 +2644,7 @@ static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) {
* We only do this for the cgroup the PID belonged to. */
(void) unit_check_oom(u1);
/* This only logs for now. In the future when the interface for kills/notifications
* is more stable we can extend service results table similar to how kernel oom kills
* are managed. */
/* We check if systemd-oomd perfomed a kill so that we log and notify appropriately */
(void) unit_check_oomd_kill(u1);
manager_invoke_sigchld_event(m, u1, &si);

@ -60,9 +60,9 @@ typedef enum StatusType {
} StatusType;
typedef enum OOMPolicy {
OOM_CONTINUE, /* The kernel kills the process it wants to kill, and that's it */
OOM_STOP, /* The kernel kills the process it wants to kill, and we stop the unit */
OOM_KILL, /* The kernel kills the process it wants to kill, and all others in the unit, and we stop the unit */
OOM_CONTINUE, /* The kernel or systemd-oomd kills the process it wants to kill, and that's it */
OOM_STOP, /* The kernel or systemd-oomd kills the process it wants to kill, and we stop the unit */
OOM_KILL, /* The kernel or systemd-oomd kills the process it wants to kill, and all others in the unit, and we stop the unit */
_OOM_POLICY_MAX,
_OOM_POLICY_INVALID = -EINVAL,
} OOMPolicy;

@ -3404,10 +3404,13 @@ static void service_notify_cgroup_empty_event(Unit *u) {
}
}
static void service_notify_cgroup_oom_event(Unit *u) {
static void service_notify_cgroup_oom_event(Unit *u, bool managed_oom) {
Service *s = SERVICE(u);
log_unit_debug(u, "Process of control group was killed by the OOM killer.");
if (managed_oom)
log_unit_debug(u, "Process(es) of control group were killed by systemd-oomd.");
else
log_unit_debug(u, "Process of control group was killed by the OOM killer.");
if (s->oom_policy == OOM_CONTINUE)
return;

@ -75,7 +75,7 @@ typedef enum ServiceResult {
SERVICE_FAILURE_CORE_DUMP,
SERVICE_FAILURE_WATCHDOG,
SERVICE_FAILURE_START_LIMIT_HIT,
SERVICE_FAILURE_OOM_KILL,
SERVICE_FAILURE_OOM_KILL, /* OOM Kill by the Kernel or systemd-oomd */
SERVICE_SKIP_CONDITION,
_SERVICE_RESULT_MAX,
_SERVICE_RESULT_INVALID = -EINVAL,

@ -3801,6 +3801,13 @@ int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error) {
return UNIT_VTABLE(u)->kill(u, w, signo, error);
}
void unit_notify_cgroup_oom(Unit *u, bool managed_oom) {
assert(u);
if (UNIT_VTABLE(u)->notify_cgroup_oom)
UNIT_VTABLE(u)->notify_cgroup_oom(u, managed_oom);
}
static Set *unit_pid_set(pid_t main_pid, pid_t control_pid) {
_cleanup_set_free_ Set *pid_set = NULL;
int r;

@ -285,7 +285,7 @@ typedef struct Unit {
nsec_t cpu_usage_base;
nsec_t cpu_usage_last; /* the most recently read value */
/* The current counter of processes sent SIGKILL by systemd-oomd */
/* The current counter of OOM kills initiated by systemd-oomd */
uint64_t managed_oom_kill_last;
/* The current counter of the oom_kill field in the memory.events cgroup attribute */
@ -596,7 +596,7 @@ typedef struct UnitVTable {
void (*notify_cgroup_empty)(Unit *u);
/* Called whenever an OOM kill event on this unit was seen */
void (*notify_cgroup_oom)(Unit *u);
void (*notify_cgroup_oom)(Unit *u, bool managed_oom);
/* Called whenever a process of this unit sends us a message */
void (*notify_message)(Unit *u, const struct ucred *ucred, char * const *tags, FDSet *fds);
@ -811,6 +811,8 @@ int unit_reload(Unit *u);
int unit_kill(Unit *u, KillWho w, int signo, sd_bus_error *error);
int unit_kill_common(Unit *u, KillWho who, int signo, pid_t main_pid, pid_t control_pid, sd_bus_error *error);
void unit_notify_cgroup_oom(Unit *u, bool managed_oom);
typedef enum UnitNotifyFlags {
UNIT_NOTIFY_RELOAD_FAILURE = 1 << 0,
UNIT_NOTIFY_WILL_AUTO_RESTART = 1 << 1,

@ -192,6 +192,10 @@ int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run) {
if (!pids_killed)
return -ENOMEM;
r = increment_oomd_xattr(path, "user.oomd_ooms", 1);
if (r < 0)
log_debug_errno(r, "Failed to set user.oomd_ooms before kill: %m");
if (recurse)
r = cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, path, SIGKILL, CGROUP_IGNORE_SELF, pids_killed, log_kill, NULL);
else