feat: use wrapper for starting processes

Use a wrapper for starting processes which can setup proper cgroups,
OOMscore, and also drop capabilities for the process, then it calls
`execve`.

The containerd tests is also fixed to support cgroups when
running tests in buildkit. It used to pass previously as we did not
error if cgroup setup failed.

Signed-off-by: Noel Georgi <git@frezbo.dev>
This commit is contained in:
Noel Georgi 2023-02-03 16:51:59 +05:30
parent 56d9453261
commit 5cb2915d8e
No known key found for this signature in database
GPG Key ID: 21A9F444075C9E36
6 changed files with 132 additions and 55 deletions

View File

@ -468,6 +468,8 @@ COPY --from=machined-build-amd64 /machined /rootfs/sbin/init
# the orderly_poweroff call by the kernel will call '/sbin/poweroff'
RUN ln /rootfs/sbin/init /rootfs/sbin/poweroff
RUN chmod +x /rootfs/sbin/poweroff
RUN ln /rootfs/sbin/init /rootfs/sbin/wrapperd
RUN chmod +x /rootfs/sbin/wrapperd
# NB: We run the cleanup step before creating extra directories, files, and
# symlinks to avoid accidentally cleaning them up.
COPY ./hack/cleanup.sh /toolchain/bin/cleanup.sh
@ -515,6 +517,8 @@ COPY --from=machined-build-arm64 /machined /rootfs/sbin/init
# the orderly_poweroff call by the kernel will call '/sbin/poweroff'
RUN ln /rootfs/sbin/init /rootfs/sbin/poweroff
RUN chmod +x /rootfs/sbin/poweroff
RUN ln /rootfs/sbin/init /rootfs/sbin/wrapperd
RUN chmod +x /rootfs/sbin/wrapperd
# NB: We run the cleanup step before creating extra directories, files, and
# symlinks to avoid accidentally cleaning them up.
COPY ./hack/cleanup.sh /toolchain/bin/cleanup.sh

View File

@ -32,6 +32,7 @@ import (
"github.com/siderolabs/talos/internal/app/maintenance"
"github.com/siderolabs/talos/internal/app/poweroff"
"github.com/siderolabs/talos/internal/app/trustd"
"github.com/siderolabs/talos/internal/app/wrapperd"
"github.com/siderolabs/talos/internal/pkg/mount"
"github.com/siderolabs/talos/pkg/httpdefaults"
"github.com/siderolabs/talos/pkg/machinery/api/common"
@ -307,6 +308,10 @@ func main() {
case "/sbin/poweroff":
poweroff.Main()
return
case "/sbin/wrapperd":
wrapperd.Main()
return
default:
}

View File

@ -74,16 +74,14 @@ func (suite *ContainerdSuite) SetupSuite() {
suite.Require().NoError(os.Mkdir(rootDir, 0o777))
if cgroups.Mode() == cgroups.Unified {
var (
groupPath string
manager *cgroupsv2.Manager
)
var manager *cgroupsv2.Manager
groupPath, err = cgroupsv2.NestedGroupPath(suite.tmpDir)
manager, err = cgroupsv2.NewManager(constants.CgroupMountPath, "/"+suite.T().Name(), &cgroupsv2.Resources{})
suite.Require().NoError(err)
manager, err = cgroupsv2.NewManager(constants.CgroupMountPath, groupPath, &cgroupsv2.Resources{})
suite.Require().NoError(err)
// when using buildkit runner, parent `cgroup.type` is set to `domain threaded`, so child cgroups have to explicitly specify
// `cgroup.type` to "threaded" https://www.kernel.org/doc/html/v5.0/admin-guide/cgroup-v2.html#threads
suite.Require().NoError(os.WriteFile(filepath.Join(constants.CgroupMountPath, suite.T().Name(), "cgroup.type"), []byte("threaded"), 0o644))
defer manager.Delete() //nolint:errcheck
} else {
@ -113,7 +111,7 @@ func (suite *ContainerdSuite) SetupSuite() {
args,
runner.WithLoggingManager(suite.loggingManager),
runner.WithEnv([]string{"PATH=/bin:" + constants.PATH}),
runner.WithCgroupPath(suite.tmpDir),
runner.WithCgroupPath("/"+suite.T().Name()),
)
suite.Require().NoError(suite.containerdRunner.Open())
suite.containerdWg.Add(1)

View File

@ -5,17 +5,14 @@
package process
import (
"errors"
"fmt"
"io"
"os"
"os/exec"
"strings"
"syscall"
"time"
"github.com/containerd/cgroups"
cgroupsv2 "github.com/containerd/cgroups/v2"
"github.com/containerd/containerd/sys"
"github.com/siderolabs/go-cmd/pkg/cmd/proc/reaper"
"github.com/siderolabs/talos/internal/app/machined/pkg/system/events"
@ -80,7 +77,15 @@ func (p *processRunner) Close() error {
}
func (p *processRunner) build() (cmd *exec.Cmd, logCloser io.Closer, err error) {
cmd = exec.Command(p.args.ProcessArgs[0], p.args.ProcessArgs[1:]...)
args := []string{
fmt.Sprintf("-name=%s", p.args.ID),
fmt.Sprintf("-dropped-caps=%s", strings.Join(p.opts.DroppedCapabilities, ",")),
fmt.Sprintf("-cgroup-path=%s", p.opts.CgroupPath),
fmt.Sprintf("-oom-score=%d", p.opts.OOMScoreAdj),
}
args = append(args, p.args.ProcessArgs...)
cmd = exec.Command("/sbin/wrapperd", args...)
// Set the environment for the service.
cmd.Env = append([]string{fmt.Sprintf("PATH=%s", constants.PATH)}, p.opts.Env...)
@ -122,52 +127,10 @@ func (p *processRunner) run(eventSink events.Recorder) error {
defer reaper.Stop(notifyCh)
}
var (
cgv1 cgroups.Cgroup
cgv2 *cgroupsv2.Manager
)
// load the cgroup before starting the process, as once process is started,
// it's not easy to fail (as the process has to be cleaned up)
if p.opts.CgroupPath != "" {
if cgroups.Mode() == cgroups.Unified {
cgv2, err = cgroupsv2.LoadManager(constants.CgroupMountPath, p.opts.CgroupPath)
if err != nil {
return fmt.Errorf("failed to load cgroup %s: %w", p.opts.CgroupPath, err)
}
} else {
cgv1, err = cgroups.Load(cgroups.V1, cgroups.StaticPath(p.opts.CgroupPath))
if err != nil {
return fmt.Errorf("failed to load cgroup %s: %w", p.opts.CgroupPath, err)
}
}
}
if err = cmd.Start(); err != nil {
return fmt.Errorf("error starting process: %w", err)
}
if p.opts.OOMScoreAdj != 0 {
if err = sys.AdjustOOMScore(cmd.Process.Pid, p.opts.OOMScoreAdj); err != nil {
eventSink(events.StateRunning, "Failed to change OOMScoreAdj to process %s", p)
}
}
if p.opts.CgroupPath != "" {
// put the process into the cgroup and record failure (if any)
if cgroups.Mode() == cgroups.Unified {
if err = cgv2.AddProc(uint64(cmd.Process.Pid)); err != nil && !errors.Is(err, syscall.ESRCH) { // ignore "no such process" error
eventSink(events.StateRunning, "Failed to move process %s to cgroup: %s", p, err)
}
} else {
if err = cgv1.Add(cgroups.Process{
Pid: cmd.Process.Pid,
}); err != nil && !errors.Is(err, syscall.ESRCH) { // ignore "no such process" error
eventSink(events.StateRunning, "Failed to move process %s to cgroup: %s", p, err)
}
}
}
eventSink(events.StateRunning, "Process %s started with PID %d", p, cmd.Process.Pid)
waitCh := make(chan error)

View File

@ -63,6 +63,8 @@ type Options struct {
CgroupPath string
// OverrideSeccompProfile default Linux seccomp profile.
OverrideSeccompProfile func(*specs.LinuxSeccomp)
// DroppedCapabilities is the list of capabilities to drop.
DroppedCapabilities []string
}
// Option is the functional option func.
@ -164,3 +166,10 @@ func WithCustomSeccompProfile(override func(*specs.LinuxSeccomp)) Option {
args.OverrideSeccompProfile = override
}
}
// WithBoundedCapabilities sets the list of capabilities to drop.
func WithBoundedCapabilities(caps []string) Option {
return func(args *Options) {
args.DroppedCapabilities = caps
}
}

View File

@ -0,0 +1,98 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
// Package wrapperd provides a wrapper for running services.
package wrapperd
import (
"flag"
"log"
"os"
"strings"
"github.com/containerd/cgroups"
cgroupsv2 "github.com/containerd/cgroups/v2"
"github.com/containerd/containerd/sys"
"github.com/siderolabs/gen/slices"
"golang.org/x/sys/unix"
"kernel.org/pub/linux/libs/security/libcap/cap"
"github.com/siderolabs/talos/pkg/machinery/constants"
)
var (
name string
droppedCaps string
cgroupPath string
oomScore int
)
// Main is the entrypoint into /sbin/wrapperd.
// nolint: gocyclo
func Main() {
flag.StringVar(&name, "name", "", "process name")
flag.StringVar(&droppedCaps, "dropped-caps", "", "comma-separated list of capabilities to drop")
flag.StringVar(&cgroupPath, "cgroup-path", "", "cgroup path to use")
flag.IntVar(&oomScore, "oom-score", 0, "oom score to set")
flag.Parse()
currentPid := os.Getpid()
if oomScore != 0 {
if err := sys.AdjustOOMScore(currentPid, oomScore); err != nil {
log.Fatalf("Failed to change OOMScoreAdj of process %s to %d", name, oomScore)
}
}
// load the cgroup and put the process into the cgroup
if cgroupPath != "" {
if cgroups.Mode() == cgroups.Unified {
cgv2, err := cgroupsv2.LoadManager(constants.CgroupMountPath, cgroupPath)
if err != nil {
log.Fatalf("failed to load cgroup %s: %v", cgroupPath, err)
}
if err := cgv2.AddProc(uint64(currentPid)); err != nil {
log.Fatalf("Failed to move process %s to cgroup: %v", name, err)
}
} else {
cgv1, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(cgroupPath))
if err != nil {
log.Fatalf("failed to load cgroup %s: %v", cgroupPath, err)
}
if err := cgv1.Add(cgroups.Process{
Pid: currentPid,
}); err != nil {
log.Fatalf("Failed to move process %s to cgroup: %v", name, err)
}
}
}
if droppedCaps != "" {
caps := strings.Split(droppedCaps, ",")
dropCaps := slices.Map(caps, func(c string) cap.Value {
capability, err := cap.FromName(c)
if err != nil {
log.Fatalf("failed to parse capability: %v", err)
}
return capability
})
// drop capabilities
iab := cap.IABGetProc()
if err := iab.SetVector(cap.Bound, true, dropCaps...); err != nil {
log.Fatalf("failed to set capabilities: %v", err)
}
if err := iab.SetProc(); err != nil {
log.Fatalf("failed to apply capabilities: %v", err)
}
}
if err := unix.Exec(flag.Args()[0], flag.Args()[0:], os.Environ()); err != nil {
log.Fatalf("failed to exec: %v", err)
}
}