Tejun Heo bf35a7879f selftests: cgroup: Test open-time cgroup namespace usage for migration checks
When a task is writing to an fd opened by a different task, the perm check
should use the cgroup namespace of the latter task. Add a test for it.

Tested-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2022-01-06 11:02:29 -10:00

889 lines
18 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#define _GNU_SOURCE
#include <linux/limits.h>
#include <linux/sched.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
#include <string.h>
#include <pthread.h>
#include "../kselftest.h"
#include "cgroup_util.h"
static int touch_anon(char *buf, size_t size)
{
int fd;
char *pos = buf;
fd = open("/dev/urandom", O_RDONLY);
if (fd < 0)
return -1;
while (size > 0) {
ssize_t ret = read(fd, pos, size);
if (ret < 0) {
if (errno != EINTR) {
close(fd);
return -1;
}
} else {
pos += ret;
size -= ret;
}
}
close(fd);
return 0;
}
static int alloc_and_touch_anon_noexit(const char *cgroup, void *arg)
{
int ppid = getppid();
size_t size = (size_t)arg;
void *buf;
buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
0, 0);
if (buf == MAP_FAILED)
return -1;
if (touch_anon((char *)buf, size)) {
munmap(buf, size);
return -1;
}
while (getppid() == ppid)
sleep(1);
munmap(buf, size);
return 0;
}
/*
* Create a child process that allocates and touches 100MB, then waits to be
* killed. Wait until the child is attached to the cgroup, kill all processes
* in that cgroup and wait until "cgroup.procs" is empty. At this point try to
* destroy the empty cgroup. The test helps detect race conditions between
* dying processes leaving the cgroup and cgroup destruction path.
*/
static int test_cgcore_destroy(const char *root)
{
int ret = KSFT_FAIL;
char *cg_test = NULL;
int child_pid;
char buf[PAGE_SIZE];
cg_test = cg_name(root, "cg_test");
if (!cg_test)
goto cleanup;
for (int i = 0; i < 10; i++) {
if (cg_create(cg_test))
goto cleanup;
child_pid = cg_run_nowait(cg_test, alloc_and_touch_anon_noexit,
(void *) MB(100));
if (child_pid < 0)
goto cleanup;
/* wait for the child to enter cgroup */
if (cg_wait_for_proc_count(cg_test, 1))
goto cleanup;
if (cg_killall(cg_test))
goto cleanup;
/* wait for cgroup to be empty */
while (1) {
if (cg_read(cg_test, "cgroup.procs", buf, sizeof(buf)))
goto cleanup;
if (buf[0] == '\0')
break;
usleep(1000);
}
if (rmdir(cg_test))
goto cleanup;
if (waitpid(child_pid, NULL, 0) < 0)
goto cleanup;
}
ret = KSFT_PASS;
cleanup:
if (cg_test)
cg_destroy(cg_test);
free(cg_test);
return ret;
}
/*
* A(0) - B(0) - C(1)
* \ D(0)
*
* A, B and C's "populated" fields would be 1 while D's 0.
* test that after the one process in C is moved to root,
* A,B and C's "populated" fields would flip to "0" and file
* modified events will be generated on the
* "cgroup.events" files of both cgroups.
*/
static int test_cgcore_populated(const char *root)
{
int ret = KSFT_FAIL;
int err;
char *cg_test_a = NULL, *cg_test_b = NULL;
char *cg_test_c = NULL, *cg_test_d = NULL;
int cgroup_fd = -EBADF;
pid_t pid;
cg_test_a = cg_name(root, "cg_test_a");
cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c");
cg_test_d = cg_name(root, "cg_test_a/cg_test_b/cg_test_d");
if (!cg_test_a || !cg_test_b || !cg_test_c || !cg_test_d)
goto cleanup;
if (cg_create(cg_test_a))
goto cleanup;
if (cg_create(cg_test_b))
goto cleanup;
if (cg_create(cg_test_c))
goto cleanup;
if (cg_create(cg_test_d))
goto cleanup;
if (cg_enter_current(cg_test_c))
goto cleanup;
if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 1\n"))
goto cleanup;
if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 1\n"))
goto cleanup;
if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 1\n"))
goto cleanup;
if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
goto cleanup;
if (cg_enter_current(root))
goto cleanup;
if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 0\n"))
goto cleanup;
if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 0\n"))
goto cleanup;
if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 0\n"))
goto cleanup;
if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
goto cleanup;
/* Test that we can directly clone into a new cgroup. */
cgroup_fd = dirfd_open_opath(cg_test_d);
if (cgroup_fd < 0)
goto cleanup;
pid = clone_into_cgroup(cgroup_fd);
if (pid < 0) {
if (errno == ENOSYS)
goto cleanup_pass;
goto cleanup;
}
if (pid == 0) {
if (raise(SIGSTOP))
exit(EXIT_FAILURE);
exit(EXIT_SUCCESS);
}
err = cg_read_strcmp(cg_test_d, "cgroup.events", "populated 1\n");
(void)clone_reap(pid, WSTOPPED);
(void)kill(pid, SIGCONT);
(void)clone_reap(pid, WEXITED);
if (err)
goto cleanup;
if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
goto cleanup;
/* Remove cgroup. */
if (cg_test_d) {
cg_destroy(cg_test_d);
free(cg_test_d);
cg_test_d = NULL;
}
pid = clone_into_cgroup(cgroup_fd);
if (pid < 0)
goto cleanup_pass;
if (pid == 0)
exit(EXIT_SUCCESS);
(void)clone_reap(pid, WEXITED);
goto cleanup;
cleanup_pass:
ret = KSFT_PASS;
cleanup:
if (cg_test_d)
cg_destroy(cg_test_d);
if (cg_test_c)
cg_destroy(cg_test_c);
if (cg_test_b)
cg_destroy(cg_test_b);
if (cg_test_a)
cg_destroy(cg_test_a);
free(cg_test_d);
free(cg_test_c);
free(cg_test_b);
free(cg_test_a);
if (cgroup_fd >= 0)
close(cgroup_fd);
return ret;
}
/*
* A (domain threaded) - B (threaded) - C (domain)
*
* test that C can't be used until it is turned into a
* threaded cgroup. "cgroup.type" file will report "domain (invalid)" in
* these cases. Operations which fail due to invalid topology use
* EOPNOTSUPP as the errno.
*/
static int test_cgcore_invalid_domain(const char *root)
{
int ret = KSFT_FAIL;
char *grandparent = NULL, *parent = NULL, *child = NULL;
grandparent = cg_name(root, "cg_test_grandparent");
parent = cg_name(root, "cg_test_grandparent/cg_test_parent");
child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child");
if (!parent || !child || !grandparent)
goto cleanup;
if (cg_create(grandparent))
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_write(parent, "cgroup.type", "threaded"))
goto cleanup;
if (cg_read_strcmp(child, "cgroup.type", "domain invalid\n"))
goto cleanup;
if (!cg_enter_current(child))
goto cleanup;
if (errno != EOPNOTSUPP)
goto cleanup;
if (!clone_into_cgroup_run_wait(child))
goto cleanup;
if (errno == ENOSYS)
goto cleanup_pass;
if (errno != EOPNOTSUPP)
goto cleanup;
cleanup_pass:
ret = KSFT_PASS;
cleanup:
cg_enter_current(root);
if (child)
cg_destroy(child);
if (parent)
cg_destroy(parent);
if (grandparent)
cg_destroy(grandparent);
free(child);
free(parent);
free(grandparent);
return ret;
}
/*
* Test that when a child becomes threaded
* the parent type becomes domain threaded.
*/
static int test_cgcore_parent_becomes_threaded(const char *root)
{
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_write(child, "cgroup.type", "threaded"))
goto cleanup;
if (cg_read_strcmp(parent, "cgroup.type", "domain threaded\n"))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (child)
cg_destroy(child);
if (parent)
cg_destroy(parent);
free(child);
free(parent);
return ret;
}
/*
* Test that there's no internal process constrain on threaded cgroups.
* You can add threads/processes on a parent with a controller enabled.
*/
static int test_cgcore_no_internal_process_constraint_on_threads(const char *root)
{
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
if (cg_read_strstr(root, "cgroup.controllers", "cpu") ||
cg_write(root, "cgroup.subtree_control", "+cpu")) {
ret = KSFT_SKIP;
goto cleanup;
}
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_write(parent, "cgroup.type", "threaded"))
goto cleanup;
if (cg_write(child, "cgroup.type", "threaded"))
goto cleanup;
if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
goto cleanup;
if (cg_enter_current(parent))
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_enter_current(root);
cg_enter_current(root);
if (child)
cg_destroy(child);
if (parent)
cg_destroy(parent);
free(child);
free(parent);
return ret;
}
/*
* Test that you can't enable a controller on a child if it's not enabled
* on the parent.
*/
static int test_cgcore_top_down_constraint_enable(const char *root)
{
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (!cg_write(child, "cgroup.subtree_control", "+memory"))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (child)
cg_destroy(child);
if (parent)
cg_destroy(parent);
free(child);
free(parent);
return ret;
}
/*
* Test that you can't disable a controller on a parent
* if it's enabled in a child.
*/
static int test_cgcore_top_down_constraint_disable(const char *root)
{
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_write(parent, "cgroup.subtree_control", "+memory"))
goto cleanup;
if (cg_write(child, "cgroup.subtree_control", "+memory"))
goto cleanup;
if (!cg_write(parent, "cgroup.subtree_control", "-memory"))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (child)
cg_destroy(child);
if (parent)
cg_destroy(parent);
free(child);
free(parent);
return ret;
}
/*
* Test internal process constraint.
* You can't add a pid to a domain parent if a controller is enabled.
*/
static int test_cgcore_internal_process_constraint(const char *root)
{
int ret = KSFT_FAIL;
char *parent = NULL, *child = NULL;
parent = cg_name(root, "cg_test_parent");
child = cg_name(root, "cg_test_parent/cg_test_child");
if (!parent || !child)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_write(parent, "cgroup.subtree_control", "+memory"))
goto cleanup;
if (!cg_enter_current(parent))
goto cleanup;
if (!clone_into_cgroup_run_wait(parent))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (child)
cg_destroy(child);
if (parent)
cg_destroy(parent);
free(child);
free(parent);
return ret;
}
static void *dummy_thread_fn(void *arg)
{
return (void *)(size_t)pause();
}
/*
* Test threadgroup migration.
* All threads of a process are migrated together.
*/
static int test_cgcore_proc_migration(const char *root)
{
int ret = KSFT_FAIL;
int t, c_threads = 0, n_threads = 13;
char *src = NULL, *dst = NULL;
pthread_t threads[n_threads];
src = cg_name(root, "cg_src");
dst = cg_name(root, "cg_dst");
if (!src || !dst)
goto cleanup;
if (cg_create(src))
goto cleanup;
if (cg_create(dst))
goto cleanup;
if (cg_enter_current(src))
goto cleanup;
for (c_threads = 0; c_threads < n_threads; ++c_threads) {
if (pthread_create(&threads[c_threads], NULL, dummy_thread_fn, NULL))
goto cleanup;
}
cg_enter_current(dst);
if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1)
goto cleanup;
ret = KSFT_PASS;
cleanup:
for (t = 0; t < c_threads; ++t) {
pthread_cancel(threads[t]);
}
for (t = 0; t < c_threads; ++t) {
pthread_join(threads[t], NULL);
}
cg_enter_current(root);
if (dst)
cg_destroy(dst);
if (src)
cg_destroy(src);
free(dst);
free(src);
return ret;
}
static void *migrating_thread_fn(void *arg)
{
int g, i, n_iterations = 1000;
char **grps = arg;
char lines[3][PATH_MAX];
for (g = 1; g < 3; ++g)
snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0]));
for (i = 0; i < n_iterations; ++i) {
cg_enter_current_thread(grps[(i % 2) + 1]);
if (proc_read_strstr(0, 1, "cgroup", lines[(i % 2) + 1]))
return (void *)-1;
}
return NULL;
}
/*
* Test single thread migration.
* Threaded cgroups allow successful migration of a thread.
*/
static int test_cgcore_thread_migration(const char *root)
{
int ret = KSFT_FAIL;
char *dom = NULL;
char line[PATH_MAX];
char *grps[3] = { (char *)root, NULL, NULL };
pthread_t thr;
void *retval;
dom = cg_name(root, "cg_dom");
grps[1] = cg_name(root, "cg_dom/cg_src");
grps[2] = cg_name(root, "cg_dom/cg_dst");
if (!grps[1] || !grps[2] || !dom)
goto cleanup;
if (cg_create(dom))
goto cleanup;
if (cg_create(grps[1]))
goto cleanup;
if (cg_create(grps[2]))
goto cleanup;
if (cg_write(grps[1], "cgroup.type", "threaded"))
goto cleanup;
if (cg_write(grps[2], "cgroup.type", "threaded"))
goto cleanup;
if (cg_enter_current(grps[1]))
goto cleanup;
if (pthread_create(&thr, NULL, migrating_thread_fn, grps))
goto cleanup;
if (pthread_join(thr, &retval))
goto cleanup;
if (retval)
goto cleanup;
snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0]));
if (proc_read_strstr(0, 1, "cgroup", line))
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_enter_current(root);
if (grps[2])
cg_destroy(grps[2]);
if (grps[1])
cg_destroy(grps[1]);
if (dom)
cg_destroy(dom);
free(grps[2]);
free(grps[1]);
free(dom);
return ret;
}
/*
* cgroup migration permission check should be performed based on the
* credentials at the time of open instead of write.
*/
static int test_cgcore_lesser_euid_open(const char *root)
{
const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
int ret = KSFT_FAIL;
char *cg_test_a = NULL, *cg_test_b = NULL;
char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
int cg_test_b_procs_fd = -1;
uid_t saved_uid;
cg_test_a = cg_name(root, "cg_test_a");
cg_test_b = cg_name(root, "cg_test_b");
if (!cg_test_a || !cg_test_b)
goto cleanup;
cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
if (!cg_test_a_procs || !cg_test_b_procs)
goto cleanup;
if (cg_create(cg_test_a) || cg_create(cg_test_b))
goto cleanup;
if (cg_enter_current(cg_test_a))
goto cleanup;
if (chown(cg_test_a_procs, test_euid, -1) ||
chown(cg_test_b_procs, test_euid, -1))
goto cleanup;
saved_uid = geteuid();
if (seteuid(test_euid))
goto cleanup;
cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
if (seteuid(saved_uid))
goto cleanup;
if (cg_test_b_procs_fd < 0)
goto cleanup;
if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_enter_current(root);
if (cg_test_b_procs_fd >= 0)
close(cg_test_b_procs_fd);
if (cg_test_b)
cg_destroy(cg_test_b);
if (cg_test_a)
cg_destroy(cg_test_a);
free(cg_test_b_procs);
free(cg_test_a_procs);
free(cg_test_b);
free(cg_test_a);
return ret;
}
struct lesser_ns_open_thread_arg {
const char *path;
int fd;
int err;
};
static int lesser_ns_open_thread_fn(void *arg)
{
struct lesser_ns_open_thread_arg *targ = arg;
targ->fd = open(targ->path, O_RDWR);
targ->err = errno;
return 0;
}
/*
* cgroup migration permission check should be performed based on the cgroup
* namespace at the time of open instead of write.
*/
static int test_cgcore_lesser_ns_open(const char *root)
{
static char stack[65536];
const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
int ret = KSFT_FAIL;
char *cg_test_a = NULL, *cg_test_b = NULL;
char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
int cg_test_b_procs_fd = -1;
struct lesser_ns_open_thread_arg targ = { .fd = -1 };
pid_t pid;
int status;
cg_test_a = cg_name(root, "cg_test_a");
cg_test_b = cg_name(root, "cg_test_b");
if (!cg_test_a || !cg_test_b)
goto cleanup;
cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
if (!cg_test_a_procs || !cg_test_b_procs)
goto cleanup;
if (cg_create(cg_test_a) || cg_create(cg_test_b))
goto cleanup;
if (cg_enter_current(cg_test_b))
goto cleanup;
if (chown(cg_test_a_procs, test_euid, -1) ||
chown(cg_test_b_procs, test_euid, -1))
goto cleanup;
targ.path = cg_test_b_procs;
pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
&targ);
if (pid < 0)
goto cleanup;
if (waitpid(pid, &status, 0) < 0)
goto cleanup;
if (!WIFEXITED(status))
goto cleanup;
cg_test_b_procs_fd = targ.fd;
if (cg_test_b_procs_fd < 0)
goto cleanup;
if (cg_enter_current(cg_test_a))
goto cleanup;
if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
goto cleanup;
ret = KSFT_PASS;
cleanup:
cg_enter_current(root);
if (cg_test_b_procs_fd >= 0)
close(cg_test_b_procs_fd);
if (cg_test_b)
cg_destroy(cg_test_b);
if (cg_test_a)
cg_destroy(cg_test_a);
free(cg_test_b_procs);
free(cg_test_a_procs);
free(cg_test_b);
free(cg_test_a);
return ret;
}
#define T(x) { x, #x }
struct corecg_test {
int (*fn)(const char *root);
const char *name;
} tests[] = {
T(test_cgcore_internal_process_constraint),
T(test_cgcore_top_down_constraint_enable),
T(test_cgcore_top_down_constraint_disable),
T(test_cgcore_no_internal_process_constraint_on_threads),
T(test_cgcore_parent_becomes_threaded),
T(test_cgcore_invalid_domain),
T(test_cgcore_populated),
T(test_cgcore_proc_migration),
T(test_cgcore_thread_migration),
T(test_cgcore_destroy),
T(test_cgcore_lesser_euid_open),
T(test_cgcore_lesser_ns_open),
};
#undef T
int main(int argc, char *argv[])
{
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
if (cg_find_unified_root(root, sizeof(root)))
ksft_exit_skip("cgroup v2 isn't mounted\n");
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
ksft_test_result_pass("%s\n", tests[i].name);
break;
case KSFT_SKIP:
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
return ret;
}