1
0
mirror of https://github.com/systemd/systemd.git synced 2024-12-23 21:35:11 +03:00

oomd: loosen the restriction on ManagedOOMPreference

The ManagedOOMPreference property is only honored on cgroups which are
owned by root. This precludes anyone from setting ManagedOOMPreference
on cgroups managed by user managers.

Loosen this restriction in the following way: when processing a
monitored cgroup for kill candidates, honor the ManagedOOMPreference
setting if the monitored cgroup and cgroup candidate are owned by the
same user. This allows unprivileged users to configure
ManagedOOMPreference on their cgroups without affecting the kill
priority of ancestor cgroups.

N.B. that since swap kill operates globally to kill the largest
candidate, it is not appropriate to apply this logic to the swap kill
scenario. Therefore, the existing restriction on ManagedOOMPreference
will remain when calculating candidates for swap kill.

Add a new function, oomd_fetch_cgroup_oom_preference, to assist with
this new logic. To simplify things, move the `user.oomd_{avoid,omit}`
xattr reads to this function so that the xattr reads and uid checks are
performed all at once.
This commit is contained in:
Nick Rosbrook 2022-07-19 11:49:06 -04:00
parent b1cd8ed420
commit 652a4efb66
2 changed files with 61 additions and 19 deletions

View File

@ -14,6 +14,7 @@
#include "sort-util.h"
#include "stat-util.h"
#include "stdio-util.h"
#include "user-util.h"
DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
oomd_cgroup_ctx_hash_ops,
@ -142,10 +143,53 @@ bool oomd_swap_free_below(const OomdSystemContext *ctx, int threshold_permyriad)
return (ctx->swap_total - ctx->swap_used) < swap_threshold;
}
int oomd_fetch_cgroup_oom_preference(OomdCGroupContext *ctx, const char *prefix) {
uid_t uid, prefix_uid;
int r;
assert(ctx);
prefix = empty_to_root(prefix);
if (!path_startswith(ctx->path, prefix))
return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
"%s is not a descendant of %s", ctx->path, prefix);
r = cg_get_owner(SYSTEMD_CGROUP_CONTROLLER, ctx->path, &uid);
if (r < 0)
return log_debug_errno(r, "Failed to get owner/group from %s: %m", ctx->path);
r = cg_get_owner(SYSTEMD_CGROUP_CONTROLLER, prefix, &prefix_uid);
if (r < 0)
return log_debug_errno(r, "Failed to get owner/group from %s: %m", ctx->path);
if (uid == prefix_uid) {
/* Ignore most errors when reading the xattr since it is usually unset and cgroup xattrs are only used
* as an optional feature of systemd-oomd (and the system might not even support them). */
r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, ctx->path, "user.oomd_avoid");
if (r == -ENOMEM)
return log_oom_debug();
if (r < 0 && r != -ENODATA)
log_debug_errno(r, "Failed to get xattr user.oomd_avoid, ignoring: %m");
ctx->preference = r > 0 ? MANAGED_OOM_PREFERENCE_AVOID : ctx->preference;
r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, ctx->path, "user.oomd_omit");
if (r == -ENOMEM)
return log_oom_debug();
if (r < 0 && r != -ENODATA)
log_debug_errno(r, "Failed to get xattr user.oomd_omit, ignoring: %m");
ctx->preference = r > 0 ? MANAGED_OOM_PREFERENCE_OMIT : ctx->preference;
} else
ctx->preference = MANAGED_OOM_PREFERENCE_NONE;
return 0;
}
int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const char *prefix, OomdCGroupContext ***ret) {
_cleanup_free_ OomdCGroupContext **sorted = NULL;
OomdCGroupContext *item;
size_t k = 0;
int r;
assert(h);
assert(compare_func);
@ -157,7 +201,14 @@ int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const cha
HASHMAP_FOREACH(item, h) {
/* Skip over cgroups that are not valid candidates or are explicitly marked for omission */
if ((item->path && prefix && !path_startswith(item->path, prefix)) || item->preference == MANAGED_OOM_PREFERENCE_OMIT)
if (item->path && prefix && !path_startswith(item->path, prefix))
continue;
r = oomd_fetch_cgroup_oom_preference(item, prefix);
if (r == -ENOMEM)
return r;
if (item->preference == MANAGED_OOM_PREFERENCE_OMIT)
continue;
sorted[k++] = item;
@ -334,7 +385,6 @@ int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
_cleanup_(oomd_cgroup_context_freep) OomdCGroupContext *ctx = NULL;
_cleanup_free_ char *p = NULL, *val = NULL;
bool is_root;
uid_t uid;
int r;
assert(path);
@ -355,23 +405,6 @@ int oomd_cgroup_context_acquire(const char *path, OomdCGroupContext **ret) {
if (r < 0)
return log_debug_errno(r, "Error parsing memory pressure from %s: %m", p);
r = cg_get_owner(SYSTEMD_CGROUP_CONTROLLER, path, &uid);
if (r < 0)
log_debug_errno(r, "Failed to get owner/group from %s: %m", path);
else if (uid == 0) {
/* Ignore most errors when reading the xattr since it is usually unset and cgroup xattrs are only used
* as an optional feature of systemd-oomd (and the system might not even support them). */
r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, path, "user.oomd_avoid");
if (r == -ENOMEM)
return r;
ctx->preference = r == 1 ? MANAGED_OOM_PREFERENCE_AVOID : ctx->preference;
r = cg_get_xattr_bool(SYSTEMD_CGROUP_CONTROLLER, path, "user.oomd_omit");
if (r == -ENOMEM)
return r;
ctx->preference = r == 1 ? MANAGED_OOM_PREFERENCE_OMIT : ctx->preference;
}
if (is_root) {
r = procfs_memory_get_used(&ctx->current_memory_usage);
if (r < 0)

View File

@ -109,6 +109,15 @@ static inline int compare_swap_usage(OomdCGroupContext * const *c1, OomdCGroupCo
* Returns the number of sorted items; negative on error. */
int oomd_sort_cgroup_contexts(Hashmap *h, oomd_compare_t compare_func, const char *prefix, OomdCGroupContext ***ret);
/* If the cgroups represented by `ctx` and `prefix` are owned by the same user,
* then set `ctx->preference` using the `user.oomd_avoid` and `user.oomd_omit`
* xattrs. Otherwise, set `ctx->preference` to MANAGED_OOM_PREFERENCE_NONE.
*
* If `prefix` is NULL or the empty string, it is treated as root. If `prefix`
* does not specify an ancestor cgroup of `ctx`, -EINVAL is returned. Returns
* negative on all other errors. */
int oomd_fetch_cgroup_oom_preference(OomdCGroupContext *ctx, const char *prefix);
/* Returns a negative value on error, 0 if no processes were killed, or 1 if processes were killed. */
int oomd_cgroup_kill(const char *path, bool recurse, bool dry_run);