1
0
mirror of https://github.com/systemd/systemd.git synced 2024-12-25 01:34:28 +03:00

Merge pull request #10507 from cdown/cpu_acct

cgroup v2: Don't require CPU controller for CPU accounting in 4.15+
This commit is contained in:
Lennart Poettering 2018-11-19 10:57:48 +01:00 committed by GitHub
commit 2b38a8ea80
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 120 additions and 38 deletions

View File

@ -307,8 +307,9 @@
<varname>TasksAccounting=</varname>, <varname>IOAccounting=</varname> and <varname>IPAccounting=</varname>. See <varname>TasksAccounting=</varname>, <varname>IOAccounting=</varname> and <varname>IPAccounting=</varname>. See
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry> <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details on the per-unit settings. <varname>DefaultTasksAccounting=</varname> defaults to yes, for details on the per-unit settings. <varname>DefaultTasksAccounting=</varname> defaults to yes,
<varname>DefaultMemoryAccounting=</varname> to &MEMORY_ACCOUNTING_DEFAULT;, the other four settings to <varname>DefaultMemoryAccounting=</varname> to &MEMORY_ACCOUNTING_DEFAULT;. <varname>DefaultCPUAccounting=</varname>
no.</para></listitem> defaults to yes if enabling CPU accounting doesn't require the CPU controller to be enabled (Linux 4.15+ using the
unified hierarchy for resource control), otherwise it defaults to no. The other three settings default to no.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>

View File

@ -12,6 +12,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/statfs.h> #include <sys/statfs.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/utsname.h>
#include <sys/xattr.h> #include <sys/xattr.h>
#include <unistd.h> #include <unistd.h>
@ -2822,3 +2823,54 @@ static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
}; };
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController); DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
CGroupMask get_cpu_accounting_mask(void) {
static CGroupMask needed_mask = (CGroupMask) -1;
/* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
* provided externally from the CPU controller, which means we don't
* need to enable the CPU controller just to get metrics. This is good,
* because enabling the CPU controller comes at a minor performance
* hit, especially when it's propagated deep into large hierarchies.
* There's also no separate CPU accounting controller available within
* a unified hierarchy.
*
* This combination of factors results in the desired cgroup mask to
* enable for CPU accounting varying as follows:
*
*
* Linux 4.15 Linux <4.15
*
* Unified nothing CGROUP_MASK_CPU
*
* Hybrid/Legacy CGROUP_MASK_CPUACCT CGROUP_MASK_CPUACCT
*
*
* We check kernel version here instead of manually checking whether
* cpu.stat is present for every cgroup, as that check in itself would
* already be fairly expensive.
*
* Kernels where this patch has been backported will therefore have the
* CPU controller enabled unnecessarily. This is more expensive than
* necessary, but harmless.
*/
if (needed_mask == (CGroupMask) -1) {
if (cg_all_unified()) {
struct utsname u;
assert_se(uname(&u) >= 0);
if (str_verscmp(u.release, "4.15") < 0)
needed_mask = CGROUP_MASK_CPU;
else
needed_mask = 0;
} else
needed_mask = CGROUP_MASK_CPUACCT;
}
return needed_mask;
}
bool cpu_accounting_is_cheap(void) {
return get_cpu_accounting_mask() == 0;
}

View File

@ -69,6 +69,9 @@ static inline CGroupMask CGROUP_MASK_EXTEND_JOINED(CGroupMask mask) {
return mask; return mask;
} }
CGroupMask get_cpu_accounting_mask(void);
bool cpu_accounting_is_cheap(void);
/* Special values for all weight knobs on unified hierarchy */ /* Special values for all weight knobs on unified hierarchy */
#define CGROUP_WEIGHT_INVALID ((uint64_t) -1) #define CGROUP_WEIGHT_INVALID ((uint64_t) -1)
#define CGROUP_WEIGHT_MIN UINT64_C(1) #define CGROUP_WEIGHT_MIN UINT64_C(1)

View File

@ -231,7 +231,7 @@ static int process(
if (g->n_tasks > 0) if (g->n_tasks > 0)
g->n_tasks_valid = true; g->n_tasks_valid = true;
} else if (STR_IN_SET(controller, "cpu", "cpuacct")) { } else if (STR_IN_SET(controller, "cpu", "cpuacct") || cpu_accounting_is_cheap()) {
_cleanup_free_ char *p = NULL, *v = NULL; _cleanup_free_ char *p = NULL, *v = NULL;
uint64_t new_usage; uint64_t new_usage;
nsec_t timestamp; nsec_t timestamp;

View File

@ -1178,7 +1178,7 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
/* Figure out which controllers we need, based on the cgroup context object */ /* Figure out which controllers we need, based on the cgroup context object */
if (c->cpu_accounting) if (c->cpu_accounting)
mask |= CGROUP_MASK_CPUACCT; mask |= get_cpu_accounting_mask();
if (cgroup_context_has_cpu_weight(c) || if (cgroup_context_has_cpu_weight(c) ||
cgroup_context_has_cpu_shares(c) || cgroup_context_has_cpu_shares(c) ||
@ -2617,13 +2617,15 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
r = cg_all_unified(); r = cg_all_unified();
if (r < 0) if (r < 0)
return r; return r;
/* Requisite controllers for CPU accounting are not enabled */
if ((get_cpu_accounting_mask() & ~u->cgroup_realized_mask) != 0)
return -ENODATA;
if (r > 0) { if (r > 0) {
_cleanup_free_ char *val = NULL; _cleanup_free_ char *val = NULL;
uint64_t us; uint64_t us;
if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
return -ENODATA;
r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val); r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
if (r < 0) if (r < 0)
return r; return r;
@ -2636,9 +2638,6 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
ns = us * NSEC_PER_USEC; ns = us * NSEC_PER_USEC;
} else { } else {
if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
return -ENODATA;
r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
if (r == -ENOENT) if (r == -ENOENT)
return -ENODATA; return -ENODATA;

View File

@ -603,7 +603,7 @@ int bus_cgroup_set_property(
flags |= UNIT_PRIVATE; flags |= UNIT_PRIVATE;
if (streq(name, "CPUAccounting")) if (streq(name, "CPUAccounting"))
return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, CGROUP_MASK_CPUACCT|CGROUP_MASK_CPU, message, flags, error); return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, get_cpu_accounting_mask(), message, flags, error);
if (streq(name, "CPUWeight")) if (streq(name, "CPUWeight"))
return bus_cgroup_set_cpu_weight(u, name, &c->cpu_weight, message, flags, error); return bus_cgroup_set_cpu_weight(u, name, &c->cpu_weight, message, flags, error);

View File

@ -28,6 +28,7 @@
#include "bus-error.h" #include "bus-error.h"
#include "bus-util.h" #include "bus-util.h"
#include "capability-util.h" #include "capability-util.h"
#include "cgroup-util.h"
#include "clock-util.h" #include "clock-util.h"
#include "conf-parser.h" #include "conf-parser.h"
#include "cpu-set-util.h" #include "cpu-set-util.h"
@ -119,7 +120,7 @@ static nsec_t arg_timer_slack_nsec = NSEC_INFINITY;
static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE; static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE;
static Set* arg_syscall_archs = NULL; static Set* arg_syscall_archs = NULL;
static FILE* arg_serialization = NULL; static FILE* arg_serialization = NULL;
static bool arg_default_cpu_accounting = false; static int arg_default_cpu_accounting = -1;
static bool arg_default_io_accounting = false; static bool arg_default_io_accounting = false;
static bool arg_default_ip_accounting = false; static bool arg_default_ip_accounting = false;
static bool arg_default_blockio_accounting = false; static bool arg_default_blockio_accounting = false;
@ -702,7 +703,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit }, { "Manager", "DefaultLimitNICE", config_parse_rlimit, RLIMIT_NICE, arg_default_rlimit },
{ "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit }, { "Manager", "DefaultLimitRTPRIO", config_parse_rlimit, RLIMIT_RTPRIO, arg_default_rlimit },
{ "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit }, { "Manager", "DefaultLimitRTTIME", config_parse_rlimit, RLIMIT_RTTIME, arg_default_rlimit },
{ "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting }, { "Manager", "DefaultCPUAccounting", config_parse_tristate, 0, &arg_default_cpu_accounting },
{ "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting }, { "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
{ "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting }, { "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
{ "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting }, { "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
@ -751,7 +752,14 @@ static void set_manager_defaults(Manager *m) {
m->default_restart_usec = arg_default_restart_usec; m->default_restart_usec = arg_default_restart_usec;
m->default_start_limit_interval = arg_default_start_limit_interval; m->default_start_limit_interval = arg_default_start_limit_interval;
m->default_start_limit_burst = arg_default_start_limit_burst; m->default_start_limit_burst = arg_default_start_limit_burst;
m->default_cpu_accounting = arg_default_cpu_accounting;
/* On 4.15+ with unified hierarchy, CPU accounting is essentially free as it doesn't require the CPU
* controller to be enabled, so the default is to enable it unless we got told otherwise. */
if (arg_default_cpu_accounting >= 0)
m->default_cpu_accounting = arg_default_cpu_accounting;
else
m->default_cpu_accounting = cpu_accounting_is_cheap();
m->default_io_accounting = arg_default_io_accounting; m->default_io_accounting = arg_default_io_accounting;
m->default_ip_accounting = arg_default_ip_accounting; m->default_ip_accounting = arg_default_ip_accounting;
m->default_blockio_accounting = arg_default_blockio_accounting; m->default_blockio_accounting = arg_default_blockio_accounting;

View File

@ -2,6 +2,8 @@
#include <stdio.h> #include <stdio.h>
#include "cgroup.h"
#include "cgroup-util.h"
#include "macro.h" #include "macro.h"
#include "manager.h" #include "manager.h"
#include "rm-rf.h" #include "rm-rf.h"
@ -10,11 +12,27 @@
#include "tests.h" #include "tests.h"
#include "unit.h" #include "unit.h"
#define ASSERT_CGROUP_MASK(got, expected) \
log_cgroup_mask(got, expected); \
assert_se(got == expected)
#define ASSERT_CGROUP_MASK_JOINED(got, expected) ASSERT_CGROUP_MASK(got, CGROUP_MASK_EXTEND_JOINED(expected))
static void log_cgroup_mask(CGroupMask got, CGroupMask expected) {
_cleanup_free_ char *e_store = NULL, *g_store = NULL;
assert_se(cg_mask_to_string(expected, &e_store) >= 0);
log_info("Expected mask: %s\n", e_store);
assert_se(cg_mask_to_string(got, &g_store) >= 0);
log_info("Got mask: %s\n", g_store);
}
static int test_cgroup_mask(void) { static int test_cgroup_mask(void) {
_cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL; _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
_cleanup_(manager_freep) Manager *m = NULL; _cleanup_(manager_freep) Manager *m = NULL;
Unit *son, *daughter, *parent, *root, *grandchild, *parent_deep; Unit *son, *daughter, *parent, *root, *grandchild, *parent_deep;
int r; int r;
CGroupMask cpu_accounting_mask = get_cpu_accounting_mask();
r = enter_cgroup_subroot(); r = enter_cgroup_subroot();
if (r == -ENOMEDIUM) if (r == -ENOMEDIUM)
@ -57,36 +75,36 @@ static int test_cgroup_mask(void) {
root = UNIT_DEREF(parent->slice); root = UNIT_DEREF(parent->slice);
/* Verify per-unit cgroups settings. */ /* Verify per-unit cgroups settings. */
assert_se(unit_get_own_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT)); ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(son), CGROUP_MASK_CPU);
assert_se(unit_get_own_mask(daughter) == 0); ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(daughter), cpu_accounting_mask);
assert_se(unit_get_own_mask(grandchild) == 0); ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(grandchild), 0);
assert_se(unit_get_own_mask(parent_deep) == CGROUP_MASK_MEMORY); ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent_deep), CGROUP_MASK_MEMORY);
assert_se(unit_get_own_mask(parent) == (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)); ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
assert_se(unit_get_own_mask(root) == 0); ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(root), 0);
/* Verify aggregation of member masks */ /* Verify aggregation of member masks */
assert_se(unit_get_members_mask(son) == 0); ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(son), 0);
assert_se(unit_get_members_mask(daughter) == 0); ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(daughter), 0);
assert_se(unit_get_members_mask(grandchild) == 0); ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(grandchild), 0);
assert_se(unit_get_members_mask(parent_deep) == 0); ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent_deep), 0);
assert_se(unit_get_members_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
/* Verify aggregation of sibling masks. */ /* Verify aggregation of sibling masks. */
assert_se(unit_get_siblings_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(son), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
assert_se(unit_get_siblings_mask(daughter) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(daughter), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
assert_se(unit_get_siblings_mask(grandchild) == 0); ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(grandchild), 0);
assert_se(unit_get_siblings_mask(parent_deep) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent_deep), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
/* Verify aggregation of target masks. */ /* Verify aggregation of target masks. */
assert_se(unit_get_target_mask(son) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); ASSERT_CGROUP_MASK(unit_get_target_mask(son), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
assert_se(unit_get_target_mask(daughter) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); ASSERT_CGROUP_MASK(unit_get_target_mask(daughter), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
assert_se(unit_get_target_mask(grandchild) == 0); ASSERT_CGROUP_MASK(unit_get_target_mask(grandchild), 0);
assert_se(unit_get_target_mask(parent_deep) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); ASSERT_CGROUP_MASK(unit_get_target_mask(parent_deep), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); ASSERT_CGROUP_MASK(unit_get_target_mask(parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); ASSERT_CGROUP_MASK(unit_get_target_mask(root), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
return 0; return 0;
} }

View File

@ -5,3 +5,4 @@ Description=Daughter Service
Slice=parent.slice Slice=parent.slice
Type=oneshot Type=oneshot
ExecStart=/bin/true ExecStart=/bin/true
CPUAccounting=true