feat: implement support for cgroupsv1

Use boot kernel arg `talos.unified_cgroup_hierarchy=0` to force Talos to
use cgroups v1. Talos still defaults to cgroupsv2.

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2022-11-10 22:27:38 +04:00
parent 3866d0e334
commit 1cfb6188bc
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
8 changed files with 284 additions and 6 deletions

View File

@ -375,12 +375,13 @@ local integration_canal_reset = Step("e2e-canal-reset", target="e2e-qemu", privi
"CUSTOM_CNI_URL": "https://docs.projectcalico.org/manifests/canal.yaml",
"REGISTRY": local_registry,
});
local integration_bios = Step("e2e-bios", target="e2e-qemu", privileged=true, depends_on=[integration_canal_reset], environment={
local integration_bios_cgroupsv1 = Step("e2e-bios-cgroupsv1", target="e2e-qemu", privileged=true, depends_on=[integration_canal_reset], environment={
"SHORT_INTEGRATION_TEST": "yes",
"WITH_UEFI": "false",
"IMAGE_REGISTRY": local_registry,
"WITH_CONFIG_PATCH": '[{"op": "add", "path": "/machine/install/extraKernelArgs/-", "value": "talos.unified_cgroup_hierarchy=0"}]', # use cgroupsv1
});
local integration_disk_image = Step("e2e-disk-image", target="e2e-qemu", privileged=true, depends_on=[integration_bios], environment={
local integration_disk_image = Step("e2e-disk-image", target="e2e-qemu", privileged=true, depends_on=[integration_bios_cgroupsv1], environment={
"SHORT_INTEGRATION_TEST": "yes",
"USE_DISK_IMAGE": "true",
"IMAGE_REGISTRY": local_registry,
@ -465,7 +466,7 @@ local integration_pipelines = [
Pipeline('integration-provision-1', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_1]) + integration_trigger(['integration-provision', 'integration-provision-1']),
Pipeline('integration-provision-2', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_2]) + integration_trigger(['integration-provision', 'integration-provision-2']),
Pipeline('integration-misc', default_pipeline_steps + [integration_extensions
, integration_cilium, integration_canal_reset, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname]) + integration_trigger(['integration-misc']),
, integration_cilium, integration_canal_reset, integration_bios_cgroupsv1, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname]) + integration_trigger(['integration-misc']),
Pipeline('integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip]) + integration_trigger(['integration-qemu-encrypted-vip']),
Pipeline('integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race]) + integration_trigger(['integration-qemu-race']),
Pipeline('integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi]) + integration_trigger(['integration-qemu-csi']),
@ -477,7 +478,7 @@ local integration_pipelines = [
Pipeline('cron-integration-provision-1', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_1], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-provision-2', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_2], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-misc', default_pipeline_steps + [integration_extensions
, integration_cilium, integration_canal_reset, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
, integration_cilium, integration_canal_reset, integration_bios_cgroupsv1, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
Pipeline('cron-integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race], [default_cron_pipeline]) + cron_trigger(['nightly']),
Pipeline('cron-integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi], [default_cron_pipeline]) + cron_trigger(['nightly']),

View File

@ -150,6 +150,65 @@ New resource can be used to get member ID of the Talos node:
```bash
talosctl get etcdmember
```
"""
[notes.cgroupsv1]
title = "cgroups v1"
description = """\
Talos defaults to using cgroups v2 when Talos doesn't run in a container (when running in a container
Talos follows host cgroups mode).
Talos can now be forced to use cgroups v1 by setting boot kernel argument `talos.unified_cgroup_hierarchy=0`:
```yaml
machine:
install:
extraKernelArgs:
- "talos.unified_cgroup_hierarchy=0"
```
Current cgroups mode can be checked with `talosctl ls /sys/fs/cgroup`:
cgroups v1:
```
blkio
cpu
cpuacct
cpuset
devices
freezer
hugetlb
memory
net_cls
net_prio
perf_event
pids
```
cgroups v2:
```
cgroup.controllers
cgroup.max.depth
cgroup.max.descendants
cgroup.procs
cgroup.stat
cgroup.subtree_control
cgroup.threads
cpu.stat
cpuset.cpus.effective
cpuset.mems.effective
init
io.stat
kubepods
memory.numa_stat
memory.stat
podruntime
system
```
> Note: `cgroupsv1` is deprecated and it should be used only for compatibility with workloads which don't support `cgroupsv2` yet.
"""
[make_deps]

View File

@ -151,7 +151,7 @@ func CreateSystemCgroups(seq runtime.Sequence, data interface{}) (runtime.TaskEx
if r.State().Platform().Mode() != runtime.ModeContainer {
// assert that cgroupsv2 is being used when running not in container mode,
// as Talos sets up cgroupsv2 on its own
if cgroups.Mode() != cgroups.Unified {
if cgroups.Mode() != cgroups.Unified && !mount.ForceGGroupsV1() {
return fmt.Errorf("cgroupsv2 should be used")
}
}

View File

@ -0,0 +1,161 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
//go:build integration_api
package api
import (
"context"
"io"
"path/filepath"
"strings"
"time"
"github.com/siderolabs/go-procfs/procfs"
"google.golang.org/grpc/codes"
"github.com/siderolabs/talos/internal/integration/base"
machineapi "github.com/siderolabs/talos/pkg/machinery/api/machine"
"github.com/siderolabs/talos/pkg/machinery/client"
"github.com/siderolabs/talos/pkg/machinery/constants"
)
// CGroupsSuite ...
type CGroupsSuite struct {
base.APISuite
ctx context.Context //nolint:containedctx
ctxCancel context.CancelFunc
}
// SuiteName ...
func (suite *CGroupsSuite) SuiteName() string {
return "api.CGroupsSuite"
}
// SetupTest ...
func (suite *CGroupsSuite) SetupTest() {
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 5*time.Minute)
}
// TearDownTest ...
func (suite *CGroupsSuite) TearDownTest() {
if suite.ctxCancel != nil {
suite.ctxCancel()
}
}
// TestCGroupsVersion tests that cgroups mount match expected version.
func (suite *CGroupsSuite) TestCGroupsVersion() {
node := suite.RandomDiscoveredNodeInternalIP()
ctx := client.WithNode(suite.ctx, node)
cmdline, err := suite.readCmdline(ctx)
suite.Require().NoError(err)
unified := procfs.NewCmdline(cmdline).Get(constants.KernelParamCGroups).First()
cgroupsV1 := false
if unified != nil && *unified == "0" {
cgroupsV1 = true
}
stream, err := suite.Client.MachineClient.List(ctx, &machineapi.ListRequest{Root: constants.CgroupMountPath})
suite.Require().NoError(err)
names := map[string]struct{}{}
for {
var info *machineapi.FileInfo
info, err = stream.Recv()
if err != nil {
if err == io.EOF || client.StatusCode(err) == codes.Canceled {
break
}
suite.Require().NoError(err)
}
names[filepath.Base(info.Name)] = struct{}{}
}
if cgroupsV1 {
suite.T().Log("detected cgroups v1")
for _, subpath := range []string{
"cpu",
"cpuacct",
"cpuset",
"devices",
"freezer",
"memory",
"net_cls",
"net_prio",
"perf_event",
"pids",
} {
suite.Assert().Contains(names, subpath)
}
} else {
suite.T().Log("detected cgroups v2")
for _, subpath := range []string{
"cgroup.controllers",
"cgroup.max.depth",
"cgroup.max.descendants",
"cgroup.procs",
"cgroup.stat",
"cgroup.subtree_control",
"cgroup.threads",
"cpu.stat",
"cpuset.cpus.effective",
"cpuset.mems.effective",
"init",
"io.stat",
"kubepods",
"memory.numa_stat",
"memory.stat",
"podruntime",
"system",
} {
suite.Assert().Contains(names, subpath)
}
}
}
//nolint:gocyclo
func (suite *CGroupsSuite) readCmdline(ctx context.Context) (string, error) {
reader, errCh, err := suite.Client.Read(ctx, "/proc/cmdline")
if err != nil {
return "", err
}
defer reader.Close() //nolint:errcheck
body, err := io.ReadAll(reader)
if err != nil {
return "", err
}
bootID := strings.TrimSpace(string(body))
_, err = io.Copy(io.Discard, reader)
if err != nil {
return "", err
}
for err = range errCh {
if err != nil {
return "", err
}
}
return bootID, reader.Close()
}
func init() {
allSuites = append(allSuites, new(CGroupsSuite))
}

View File

@ -5,15 +5,61 @@
package mount
import (
"path/filepath"
"github.com/siderolabs/go-procfs/procfs"
"golang.org/x/sys/unix"
"github.com/siderolabs/talos/pkg/machinery/constants"
)
// ForceGGroupsV1 returns the cgroup version to be used (only for !container mode).
func ForceGGroupsV1() bool {
value := procfs.ProcCmdline().Get(constants.KernelParamCGroups).First()
return value != nil && *value == "0"
}
// CGroupMountPoints returns the cgroup mount points.
func CGroupMountPoints() (mountpoints *Points, err error) {
if ForceGGroupsV1() {
return cgroupMountPointsV1()
}
return cgroupMountPointsV2()
}
func cgroupMountPointsV2() (mountpoints *Points, err error) {
cgroups := NewMountPoints()
cgroups.Set("cgroup2", NewMountPoint("cgroup", constants.CgroupMountPath, "cgroup2", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_RELATIME, "nsdelegate"))
return cgroups, nil
}
func cgroupMountPointsV1() (mountpoints *Points, err error) {
cgroups := NewMountPoints()
cgroups.Set("dev", NewMountPoint("tmpfs", constants.CgroupMountPath, "tmpfs", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_RELATIME, "mode=755"))
controllers := []string{
"blkio",
"cpu",
"cpuacct",
"cpuset",
"devices",
"freezer",
"hugetlb",
"memory",
"net_cls",
"net_prio",
"perf_event",
"pids",
}
for _, controller := range controllers {
p := filepath.Join(constants.CgroupMountPath, controller)
cgroups.Set(controller, NewMountPoint(controller, p, "cgroup", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_RELATIME, controller))
}
return cgroups, nil
}

View File

@ -66,7 +66,7 @@ func WithInstallImage(imageRef string) GenOption {
// WithInstallExtraKernelArgs specifies extra kernel arguments to pass to the installer.
func WithInstallExtraKernelArgs(args []string) GenOption {
return func(o *GenOptions) error {
o.InstallExtraKernelArgs = args
o.InstallExtraKernelArgs = append(o.InstallExtraKernelArgs, args...)
return nil
}

View File

@ -42,6 +42,10 @@ const (
// disk to wipe on the next boot and reboot.
KernelParamWipe = "talos.experimental.wipe"
// KernelParamCGroups is the kernel parameter name for specifying the
// cgroups version to use (default is cgroupsv2, setting this kernel arg to '0' forces cgroupsv1).
KernelParamCGroups = "talos.unified_cgroup_hierarchy"
// BoardNone indicates that the install is not for a specific board.
BoardNone = "none"

View File

@ -190,3 +190,10 @@ Resets the disk before starting up the system.
Valid options are:
* `system` resets system disk.
#### `talos.unified_cgroup_hierarchy`
Talos defaults to always using the unified cgroup hierarchy (`cgroupsv2`), but `cgroupsv1`
can be forced with `talos.unified_cgroup_hierarchy=0`.
> Note: `cgroupsv1` is deprecated and it should be used only for compatibility with workloads which don't support `cgroupsv2` yet.