feat: use wrapper for starting processes
Use a wrapper for starting processes which can setup proper cgroups, OOMscore, and also drop capabilities for the process, then it calls `execve`. The containerd tests is also fixed to support cgroups when running tests in buildkit. It used to pass previously as we did not error if cgroup setup failed. Signed-off-by: Noel Georgi <git@frezbo.dev>
This commit is contained in:
parent
56d9453261
commit
5cb2915d8e
@ -468,6 +468,8 @@ COPY --from=machined-build-amd64 /machined /rootfs/sbin/init
|
||||
# the orderly_poweroff call by the kernel will call '/sbin/poweroff'
|
||||
RUN ln /rootfs/sbin/init /rootfs/sbin/poweroff
|
||||
RUN chmod +x /rootfs/sbin/poweroff
|
||||
RUN ln /rootfs/sbin/init /rootfs/sbin/wrapperd
|
||||
RUN chmod +x /rootfs/sbin/wrapperd
|
||||
# NB: We run the cleanup step before creating extra directories, files, and
|
||||
# symlinks to avoid accidentally cleaning them up.
|
||||
COPY ./hack/cleanup.sh /toolchain/bin/cleanup.sh
|
||||
@ -515,6 +517,8 @@ COPY --from=machined-build-arm64 /machined /rootfs/sbin/init
|
||||
# the orderly_poweroff call by the kernel will call '/sbin/poweroff'
|
||||
RUN ln /rootfs/sbin/init /rootfs/sbin/poweroff
|
||||
RUN chmod +x /rootfs/sbin/poweroff
|
||||
RUN ln /rootfs/sbin/init /rootfs/sbin/wrapperd
|
||||
RUN chmod +x /rootfs/sbin/wrapperd
|
||||
# NB: We run the cleanup step before creating extra directories, files, and
|
||||
# symlinks to avoid accidentally cleaning them up.
|
||||
COPY ./hack/cleanup.sh /toolchain/bin/cleanup.sh
|
||||
|
@ -32,6 +32,7 @@ import (
|
||||
"github.com/siderolabs/talos/internal/app/maintenance"
|
||||
"github.com/siderolabs/talos/internal/app/poweroff"
|
||||
"github.com/siderolabs/talos/internal/app/trustd"
|
||||
"github.com/siderolabs/talos/internal/app/wrapperd"
|
||||
"github.com/siderolabs/talos/internal/pkg/mount"
|
||||
"github.com/siderolabs/talos/pkg/httpdefaults"
|
||||
"github.com/siderolabs/talos/pkg/machinery/api/common"
|
||||
@ -307,6 +308,10 @@ func main() {
|
||||
case "/sbin/poweroff":
|
||||
poweroff.Main()
|
||||
|
||||
return
|
||||
case "/sbin/wrapperd":
|
||||
wrapperd.Main()
|
||||
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
@ -74,16 +74,14 @@ func (suite *ContainerdSuite) SetupSuite() {
|
||||
suite.Require().NoError(os.Mkdir(rootDir, 0o777))
|
||||
|
||||
if cgroups.Mode() == cgroups.Unified {
|
||||
var (
|
||||
groupPath string
|
||||
manager *cgroupsv2.Manager
|
||||
)
|
||||
var manager *cgroupsv2.Manager
|
||||
|
||||
groupPath, err = cgroupsv2.NestedGroupPath(suite.tmpDir)
|
||||
manager, err = cgroupsv2.NewManager(constants.CgroupMountPath, "/"+suite.T().Name(), &cgroupsv2.Resources{})
|
||||
suite.Require().NoError(err)
|
||||
|
||||
manager, err = cgroupsv2.NewManager(constants.CgroupMountPath, groupPath, &cgroupsv2.Resources{})
|
||||
suite.Require().NoError(err)
|
||||
// when using buildkit runner, parent `cgroup.type` is set to `domain threaded`, so child cgroups have to explicitly specify
|
||||
// `cgroup.type` to "threaded" https://www.kernel.org/doc/html/v5.0/admin-guide/cgroup-v2.html#threads
|
||||
suite.Require().NoError(os.WriteFile(filepath.Join(constants.CgroupMountPath, suite.T().Name(), "cgroup.type"), []byte("threaded"), 0o644))
|
||||
|
||||
defer manager.Delete() //nolint:errcheck
|
||||
} else {
|
||||
@ -113,7 +111,7 @@ func (suite *ContainerdSuite) SetupSuite() {
|
||||
args,
|
||||
runner.WithLoggingManager(suite.loggingManager),
|
||||
runner.WithEnv([]string{"PATH=/bin:" + constants.PATH}),
|
||||
runner.WithCgroupPath(suite.tmpDir),
|
||||
runner.WithCgroupPath("/"+suite.T().Name()),
|
||||
)
|
||||
suite.Require().NoError(suite.containerdRunner.Open())
|
||||
suite.containerdWg.Add(1)
|
||||
|
@ -5,17 +5,14 @@
|
||||
package process
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
cgroupsv2 "github.com/containerd/cgroups/v2"
|
||||
"github.com/containerd/containerd/sys"
|
||||
"github.com/siderolabs/go-cmd/pkg/cmd/proc/reaper"
|
||||
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/system/events"
|
||||
@ -80,7 +77,15 @@ func (p *processRunner) Close() error {
|
||||
}
|
||||
|
||||
func (p *processRunner) build() (cmd *exec.Cmd, logCloser io.Closer, err error) {
|
||||
cmd = exec.Command(p.args.ProcessArgs[0], p.args.ProcessArgs[1:]...)
|
||||
args := []string{
|
||||
fmt.Sprintf("-name=%s", p.args.ID),
|
||||
fmt.Sprintf("-dropped-caps=%s", strings.Join(p.opts.DroppedCapabilities, ",")),
|
||||
fmt.Sprintf("-cgroup-path=%s", p.opts.CgroupPath),
|
||||
fmt.Sprintf("-oom-score=%d", p.opts.OOMScoreAdj),
|
||||
}
|
||||
args = append(args, p.args.ProcessArgs...)
|
||||
|
||||
cmd = exec.Command("/sbin/wrapperd", args...)
|
||||
|
||||
// Set the environment for the service.
|
||||
cmd.Env = append([]string{fmt.Sprintf("PATH=%s", constants.PATH)}, p.opts.Env...)
|
||||
@ -122,52 +127,10 @@ func (p *processRunner) run(eventSink events.Recorder) error {
|
||||
defer reaper.Stop(notifyCh)
|
||||
}
|
||||
|
||||
var (
|
||||
cgv1 cgroups.Cgroup
|
||||
cgv2 *cgroupsv2.Manager
|
||||
)
|
||||
|
||||
// load the cgroup before starting the process, as once process is started,
|
||||
// it's not easy to fail (as the process has to be cleaned up)
|
||||
if p.opts.CgroupPath != "" {
|
||||
if cgroups.Mode() == cgroups.Unified {
|
||||
cgv2, err = cgroupsv2.LoadManager(constants.CgroupMountPath, p.opts.CgroupPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load cgroup %s: %w", p.opts.CgroupPath, err)
|
||||
}
|
||||
} else {
|
||||
cgv1, err = cgroups.Load(cgroups.V1, cgroups.StaticPath(p.opts.CgroupPath))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load cgroup %s: %w", p.opts.CgroupPath, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err = cmd.Start(); err != nil {
|
||||
return fmt.Errorf("error starting process: %w", err)
|
||||
}
|
||||
|
||||
if p.opts.OOMScoreAdj != 0 {
|
||||
if err = sys.AdjustOOMScore(cmd.Process.Pid, p.opts.OOMScoreAdj); err != nil {
|
||||
eventSink(events.StateRunning, "Failed to change OOMScoreAdj to process %s", p)
|
||||
}
|
||||
}
|
||||
|
||||
if p.opts.CgroupPath != "" {
|
||||
// put the process into the cgroup and record failure (if any)
|
||||
if cgroups.Mode() == cgroups.Unified {
|
||||
if err = cgv2.AddProc(uint64(cmd.Process.Pid)); err != nil && !errors.Is(err, syscall.ESRCH) { // ignore "no such process" error
|
||||
eventSink(events.StateRunning, "Failed to move process %s to cgroup: %s", p, err)
|
||||
}
|
||||
} else {
|
||||
if err = cgv1.Add(cgroups.Process{
|
||||
Pid: cmd.Process.Pid,
|
||||
}); err != nil && !errors.Is(err, syscall.ESRCH) { // ignore "no such process" error
|
||||
eventSink(events.StateRunning, "Failed to move process %s to cgroup: %s", p, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
eventSink(events.StateRunning, "Process %s started with PID %d", p, cmd.Process.Pid)
|
||||
|
||||
waitCh := make(chan error)
|
||||
|
@ -63,6 +63,8 @@ type Options struct {
|
||||
CgroupPath string
|
||||
// OverrideSeccompProfile default Linux seccomp profile.
|
||||
OverrideSeccompProfile func(*specs.LinuxSeccomp)
|
||||
// DroppedCapabilities is the list of capabilities to drop.
|
||||
DroppedCapabilities []string
|
||||
}
|
||||
|
||||
// Option is the functional option func.
|
||||
@ -164,3 +166,10 @@ func WithCustomSeccompProfile(override func(*specs.LinuxSeccomp)) Option {
|
||||
args.OverrideSeccompProfile = override
|
||||
}
|
||||
}
|
||||
|
||||
// WithBoundedCapabilities sets the list of capabilities to drop.
|
||||
func WithBoundedCapabilities(caps []string) Option {
|
||||
return func(args *Options) {
|
||||
args.DroppedCapabilities = caps
|
||||
}
|
||||
}
|
||||
|
98
internal/app/wrapperd/main.go
Normal file
98
internal/app/wrapperd/main.go
Normal file
@ -0,0 +1,98 @@
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
// Package wrapperd provides a wrapper for running services.
|
||||
package wrapperd
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
cgroupsv2 "github.com/containerd/cgroups/v2"
|
||||
"github.com/containerd/containerd/sys"
|
||||
"github.com/siderolabs/gen/slices"
|
||||
"golang.org/x/sys/unix"
|
||||
"kernel.org/pub/linux/libs/security/libcap/cap"
|
||||
|
||||
"github.com/siderolabs/talos/pkg/machinery/constants"
|
||||
)
|
||||
|
||||
var (
|
||||
name string
|
||||
droppedCaps string
|
||||
cgroupPath string
|
||||
oomScore int
|
||||
)
|
||||
|
||||
// Main is the entrypoint into /sbin/wrapperd.
|
||||
// nolint: gocyclo
|
||||
func Main() {
|
||||
flag.StringVar(&name, "name", "", "process name")
|
||||
flag.StringVar(&droppedCaps, "dropped-caps", "", "comma-separated list of capabilities to drop")
|
||||
flag.StringVar(&cgroupPath, "cgroup-path", "", "cgroup path to use")
|
||||
flag.IntVar(&oomScore, "oom-score", 0, "oom score to set")
|
||||
flag.Parse()
|
||||
|
||||
currentPid := os.Getpid()
|
||||
|
||||
if oomScore != 0 {
|
||||
if err := sys.AdjustOOMScore(currentPid, oomScore); err != nil {
|
||||
log.Fatalf("Failed to change OOMScoreAdj of process %s to %d", name, oomScore)
|
||||
}
|
||||
}
|
||||
|
||||
// load the cgroup and put the process into the cgroup
|
||||
if cgroupPath != "" {
|
||||
if cgroups.Mode() == cgroups.Unified {
|
||||
cgv2, err := cgroupsv2.LoadManager(constants.CgroupMountPath, cgroupPath)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to load cgroup %s: %v", cgroupPath, err)
|
||||
}
|
||||
|
||||
if err := cgv2.AddProc(uint64(currentPid)); err != nil {
|
||||
log.Fatalf("Failed to move process %s to cgroup: %v", name, err)
|
||||
}
|
||||
} else {
|
||||
cgv1, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(cgroupPath))
|
||||
if err != nil {
|
||||
log.Fatalf("failed to load cgroup %s: %v", cgroupPath, err)
|
||||
}
|
||||
|
||||
if err := cgv1.Add(cgroups.Process{
|
||||
Pid: currentPid,
|
||||
}); err != nil {
|
||||
log.Fatalf("Failed to move process %s to cgroup: %v", name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if droppedCaps != "" {
|
||||
caps := strings.Split(droppedCaps, ",")
|
||||
dropCaps := slices.Map(caps, func(c string) cap.Value {
|
||||
capability, err := cap.FromName(c)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to parse capability: %v", err)
|
||||
}
|
||||
|
||||
return capability
|
||||
})
|
||||
|
||||
// drop capabilities
|
||||
iab := cap.IABGetProc()
|
||||
if err := iab.SetVector(cap.Bound, true, dropCaps...); err != nil {
|
||||
log.Fatalf("failed to set capabilities: %v", err)
|
||||
}
|
||||
|
||||
if err := iab.SetProc(); err != nil {
|
||||
log.Fatalf("failed to apply capabilities: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := unix.Exec(flag.Args()[0], flag.Args()[0:], os.Environ()); err != nil {
|
||||
log.Fatalf("failed to exec: %v", err)
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user