feat: implement support for cgroupsv1
Use boot kernel arg `talos.unified_cgroup_hierarchy=0` to force Talos to use cgroups v1. Talos still defaults to cgroupsv2. Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
parent
3866d0e334
commit
1cfb6188bc
@ -375,12 +375,13 @@ local integration_canal_reset = Step("e2e-canal-reset", target="e2e-qemu", privi
|
||||
"CUSTOM_CNI_URL": "https://docs.projectcalico.org/manifests/canal.yaml",
|
||||
"REGISTRY": local_registry,
|
||||
});
|
||||
local integration_bios = Step("e2e-bios", target="e2e-qemu", privileged=true, depends_on=[integration_canal_reset], environment={
|
||||
local integration_bios_cgroupsv1 = Step("e2e-bios-cgroupsv1", target="e2e-qemu", privileged=true, depends_on=[integration_canal_reset], environment={
|
||||
"SHORT_INTEGRATION_TEST": "yes",
|
||||
"WITH_UEFI": "false",
|
||||
"IMAGE_REGISTRY": local_registry,
|
||||
"WITH_CONFIG_PATCH": '[{"op": "add", "path": "/machine/install/extraKernelArgs/-", "value": "talos.unified_cgroup_hierarchy=0"}]', # use cgroupsv1
|
||||
});
|
||||
local integration_disk_image = Step("e2e-disk-image", target="e2e-qemu", privileged=true, depends_on=[integration_bios], environment={
|
||||
local integration_disk_image = Step("e2e-disk-image", target="e2e-qemu", privileged=true, depends_on=[integration_bios_cgroupsv1], environment={
|
||||
"SHORT_INTEGRATION_TEST": "yes",
|
||||
"USE_DISK_IMAGE": "true",
|
||||
"IMAGE_REGISTRY": local_registry,
|
||||
@ -465,7 +466,7 @@ local integration_pipelines = [
|
||||
Pipeline('integration-provision-1', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_1]) + integration_trigger(['integration-provision', 'integration-provision-1']),
|
||||
Pipeline('integration-provision-2', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_2]) + integration_trigger(['integration-provision', 'integration-provision-2']),
|
||||
Pipeline('integration-misc', default_pipeline_steps + [integration_extensions
|
||||
, integration_cilium, integration_canal_reset, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname]) + integration_trigger(['integration-misc']),
|
||||
, integration_cilium, integration_canal_reset, integration_bios_cgroupsv1, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname]) + integration_trigger(['integration-misc']),
|
||||
Pipeline('integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip]) + integration_trigger(['integration-qemu-encrypted-vip']),
|
||||
Pipeline('integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race]) + integration_trigger(['integration-qemu-race']),
|
||||
Pipeline('integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi]) + integration_trigger(['integration-qemu-csi']),
|
||||
@ -477,7 +478,7 @@ local integration_pipelines = [
|
||||
Pipeline('cron-integration-provision-1', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_1], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
|
||||
Pipeline('cron-integration-provision-2', default_pipeline_steps + [integration_provision_tests_prepare, integration_provision_tests_track_2], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
|
||||
Pipeline('cron-integration-misc', default_pipeline_steps + [integration_extensions
|
||||
, integration_cilium, integration_canal_reset, integration_bios, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
|
||||
, integration_cilium, integration_canal_reset, integration_bios_cgroupsv1, integration_disk_image, integration_control_plane_port, integration_no_cluster_discovery, integration_kubespan, integration_default_hostname], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
|
||||
Pipeline('cron-integration-qemu-encrypted-vip', default_pipeline_steps + [integration_qemu_encrypted_vip], [default_cron_pipeline]) + cron_trigger(['thrice-daily', 'nightly']),
|
||||
Pipeline('cron-integration-qemu-race', default_pipeline_steps + [build_race, integration_qemu_race], [default_cron_pipeline]) + cron_trigger(['nightly']),
|
||||
Pipeline('cron-integration-qemu-csi', default_pipeline_steps + [integration_qemu_csi], [default_cron_pipeline]) + cron_trigger(['nightly']),
|
||||
|
@ -150,6 +150,65 @@ New resource can be used to get member ID of the Talos node:
|
||||
```bash
|
||||
talosctl get etcdmember
|
||||
```
|
||||
"""
|
||||
|
||||
[notes.cgroupsv1]
|
||||
title = "cgroups v1"
|
||||
description = """\
|
||||
Talos defaults to using cgroups v2 when Talos doesn't run in a container (when running in a container
|
||||
Talos follows host cgroups mode).
|
||||
Talos can now be forced to use cgroups v1 by setting boot kernel argument `talos.unified_cgroup_hierarchy=0`:
|
||||
|
||||
```yaml
|
||||
machine:
|
||||
install:
|
||||
extraKernelArgs:
|
||||
- "talos.unified_cgroup_hierarchy=0"
|
||||
```
|
||||
|
||||
Current cgroups mode can be checked with `talosctl ls /sys/fs/cgroup`:
|
||||
|
||||
cgroups v1:
|
||||
|
||||
```
|
||||
blkio
|
||||
cpu
|
||||
cpuacct
|
||||
cpuset
|
||||
devices
|
||||
freezer
|
||||
hugetlb
|
||||
memory
|
||||
net_cls
|
||||
net_prio
|
||||
perf_event
|
||||
pids
|
||||
```
|
||||
|
||||
cgroups v2:
|
||||
|
||||
```
|
||||
cgroup.controllers
|
||||
cgroup.max.depth
|
||||
cgroup.max.descendants
|
||||
cgroup.procs
|
||||
cgroup.stat
|
||||
cgroup.subtree_control
|
||||
cgroup.threads
|
||||
cpu.stat
|
||||
cpuset.cpus.effective
|
||||
cpuset.mems.effective
|
||||
init
|
||||
io.stat
|
||||
kubepods
|
||||
memory.numa_stat
|
||||
memory.stat
|
||||
podruntime
|
||||
system
|
||||
```
|
||||
|
||||
> Note: `cgroupsv1` is deprecated and it should be used only for compatibility with workloads which don't support `cgroupsv2` yet.
|
||||
|
||||
"""
|
||||
|
||||
[make_deps]
|
||||
|
@ -151,7 +151,7 @@ func CreateSystemCgroups(seq runtime.Sequence, data interface{}) (runtime.TaskEx
|
||||
if r.State().Platform().Mode() != runtime.ModeContainer {
|
||||
// assert that cgroupsv2 is being used when running not in container mode,
|
||||
// as Talos sets up cgroupsv2 on its own
|
||||
if cgroups.Mode() != cgroups.Unified {
|
||||
if cgroups.Mode() != cgroups.Unified && !mount.ForceGGroupsV1() {
|
||||
return fmt.Errorf("cgroupsv2 should be used")
|
||||
}
|
||||
}
|
||||
|
161
internal/integration/api/cgroups.go
Normal file
161
internal/integration/api/cgroups.go
Normal file
@ -0,0 +1,161 @@
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
//go:build integration_api
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/siderolabs/go-procfs/procfs"
|
||||
"google.golang.org/grpc/codes"
|
||||
|
||||
"github.com/siderolabs/talos/internal/integration/base"
|
||||
machineapi "github.com/siderolabs/talos/pkg/machinery/api/machine"
|
||||
"github.com/siderolabs/talos/pkg/machinery/client"
|
||||
"github.com/siderolabs/talos/pkg/machinery/constants"
|
||||
)
|
||||
|
||||
// CGroupsSuite ...
|
||||
type CGroupsSuite struct {
|
||||
base.APISuite
|
||||
|
||||
ctx context.Context //nolint:containedctx
|
||||
ctxCancel context.CancelFunc
|
||||
}
|
||||
|
||||
// SuiteName ...
|
||||
func (suite *CGroupsSuite) SuiteName() string {
|
||||
return "api.CGroupsSuite"
|
||||
}
|
||||
|
||||
// SetupTest ...
|
||||
func (suite *CGroupsSuite) SetupTest() {
|
||||
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
}
|
||||
|
||||
// TearDownTest ...
|
||||
func (suite *CGroupsSuite) TearDownTest() {
|
||||
if suite.ctxCancel != nil {
|
||||
suite.ctxCancel()
|
||||
}
|
||||
}
|
||||
|
||||
// TestCGroupsVersion tests that cgroups mount match expected version.
|
||||
func (suite *CGroupsSuite) TestCGroupsVersion() {
|
||||
node := suite.RandomDiscoveredNodeInternalIP()
|
||||
ctx := client.WithNode(suite.ctx, node)
|
||||
|
||||
cmdline, err := suite.readCmdline(ctx)
|
||||
suite.Require().NoError(err)
|
||||
|
||||
unified := procfs.NewCmdline(cmdline).Get(constants.KernelParamCGroups).First()
|
||||
cgroupsV1 := false
|
||||
|
||||
if unified != nil && *unified == "0" {
|
||||
cgroupsV1 = true
|
||||
}
|
||||
|
||||
stream, err := suite.Client.MachineClient.List(ctx, &machineapi.ListRequest{Root: constants.CgroupMountPath})
|
||||
suite.Require().NoError(err)
|
||||
|
||||
names := map[string]struct{}{}
|
||||
|
||||
for {
|
||||
var info *machineapi.FileInfo
|
||||
|
||||
info, err = stream.Recv()
|
||||
if err != nil {
|
||||
if err == io.EOF || client.StatusCode(err) == codes.Canceled {
|
||||
break
|
||||
}
|
||||
|
||||
suite.Require().NoError(err)
|
||||
}
|
||||
|
||||
names[filepath.Base(info.Name)] = struct{}{}
|
||||
}
|
||||
|
||||
if cgroupsV1 {
|
||||
suite.T().Log("detected cgroups v1")
|
||||
|
||||
for _, subpath := range []string{
|
||||
"cpu",
|
||||
"cpuacct",
|
||||
"cpuset",
|
||||
"devices",
|
||||
"freezer",
|
||||
"memory",
|
||||
"net_cls",
|
||||
"net_prio",
|
||||
"perf_event",
|
||||
"pids",
|
||||
} {
|
||||
suite.Assert().Contains(names, subpath)
|
||||
}
|
||||
} else {
|
||||
suite.T().Log("detected cgroups v2")
|
||||
|
||||
for _, subpath := range []string{
|
||||
"cgroup.controllers",
|
||||
"cgroup.max.depth",
|
||||
"cgroup.max.descendants",
|
||||
"cgroup.procs",
|
||||
"cgroup.stat",
|
||||
"cgroup.subtree_control",
|
||||
"cgroup.threads",
|
||||
"cpu.stat",
|
||||
"cpuset.cpus.effective",
|
||||
"cpuset.mems.effective",
|
||||
"init",
|
||||
"io.stat",
|
||||
"kubepods",
|
||||
"memory.numa_stat",
|
||||
"memory.stat",
|
||||
"podruntime",
|
||||
"system",
|
||||
} {
|
||||
suite.Assert().Contains(names, subpath)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//nolint:gocyclo
|
||||
func (suite *CGroupsSuite) readCmdline(ctx context.Context) (string, error) {
|
||||
reader, errCh, err := suite.Client.Read(ctx, "/proc/cmdline")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
defer reader.Close() //nolint:errcheck
|
||||
|
||||
body, err := io.ReadAll(reader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
bootID := strings.TrimSpace(string(body))
|
||||
|
||||
_, err = io.Copy(io.Discard, reader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
for err = range errCh {
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
return bootID, reader.Close()
|
||||
}
|
||||
|
||||
func init() {
|
||||
allSuites = append(allSuites, new(CGroupsSuite))
|
||||
}
|
@ -5,15 +5,61 @@
|
||||
package mount
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/siderolabs/go-procfs/procfs"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/siderolabs/talos/pkg/machinery/constants"
|
||||
)
|
||||
|
||||
// ForceGGroupsV1 returns the cgroup version to be used (only for !container mode).
|
||||
func ForceGGroupsV1() bool {
|
||||
value := procfs.ProcCmdline().Get(constants.KernelParamCGroups).First()
|
||||
|
||||
return value != nil && *value == "0"
|
||||
}
|
||||
|
||||
// CGroupMountPoints returns the cgroup mount points.
|
||||
func CGroupMountPoints() (mountpoints *Points, err error) {
|
||||
if ForceGGroupsV1() {
|
||||
return cgroupMountPointsV1()
|
||||
}
|
||||
|
||||
return cgroupMountPointsV2()
|
||||
}
|
||||
|
||||
func cgroupMountPointsV2() (mountpoints *Points, err error) {
|
||||
cgroups := NewMountPoints()
|
||||
|
||||
cgroups.Set("cgroup2", NewMountPoint("cgroup", constants.CgroupMountPath, "cgroup2", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_RELATIME, "nsdelegate"))
|
||||
|
||||
return cgroups, nil
|
||||
}
|
||||
|
||||
func cgroupMountPointsV1() (mountpoints *Points, err error) {
|
||||
cgroups := NewMountPoints()
|
||||
cgroups.Set("dev", NewMountPoint("tmpfs", constants.CgroupMountPath, "tmpfs", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_RELATIME, "mode=755"))
|
||||
|
||||
controllers := []string{
|
||||
"blkio",
|
||||
"cpu",
|
||||
"cpuacct",
|
||||
"cpuset",
|
||||
"devices",
|
||||
"freezer",
|
||||
"hugetlb",
|
||||
"memory",
|
||||
"net_cls",
|
||||
"net_prio",
|
||||
"perf_event",
|
||||
"pids",
|
||||
}
|
||||
|
||||
for _, controller := range controllers {
|
||||
p := filepath.Join(constants.CgroupMountPath, controller)
|
||||
cgroups.Set(controller, NewMountPoint(controller, p, "cgroup", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_RELATIME, controller))
|
||||
}
|
||||
|
||||
return cgroups, nil
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ func WithInstallImage(imageRef string) GenOption {
|
||||
// WithInstallExtraKernelArgs specifies extra kernel arguments to pass to the installer.
|
||||
func WithInstallExtraKernelArgs(args []string) GenOption {
|
||||
return func(o *GenOptions) error {
|
||||
o.InstallExtraKernelArgs = args
|
||||
o.InstallExtraKernelArgs = append(o.InstallExtraKernelArgs, args...)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -42,6 +42,10 @@ const (
|
||||
// disk to wipe on the next boot and reboot.
|
||||
KernelParamWipe = "talos.experimental.wipe"
|
||||
|
||||
// KernelParamCGroups is the kernel parameter name for specifying the
|
||||
// cgroups version to use (default is cgroupsv2, setting this kernel arg to '0' forces cgroupsv1).
|
||||
KernelParamCGroups = "talos.unified_cgroup_hierarchy"
|
||||
|
||||
// BoardNone indicates that the install is not for a specific board.
|
||||
BoardNone = "none"
|
||||
|
||||
|
@ -190,3 +190,10 @@ Resets the disk before starting up the system.
|
||||
Valid options are:
|
||||
|
||||
* `system` resets system disk.
|
||||
|
||||
#### `talos.unified_cgroup_hierarchy`
|
||||
|
||||
Talos defaults to always using the unified cgroup hierarchy (`cgroupsv2`), but `cgroupsv1`
|
||||
can be forced with `talos.unified_cgroup_hierarchy=0`.
|
||||
|
||||
> Note: `cgroupsv1` is deprecated and it should be used only for compatibility with workloads which don't support `cgroupsv2` yet.
|
||||
|
Loading…
Reference in New Issue
Block a user