feat: drop some capabilities to be never available

This PR makes sure that some capabilities (SYS_BOOT and SYS_MODULES) and
never be gained by any process running on Talos except for `machined`
itself.

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2021-09-06 21:24:58 +03:00
parent 51e9836b01
commit 69897dbba4
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
8 changed files with 99 additions and 3 deletions

1
go.mod
View File

@ -109,4 +109,5 @@ require (
k8s.io/cri-api v0.22.1
k8s.io/kubectl v0.22.1
k8s.io/kubelet v0.22.1
kernel.org/pub/linux/libs/security/libcap/cap v1.2.56
)

4
go.sum
View File

@ -1893,6 +1893,10 @@ k8s.io/metrics v0.22.1/go.mod h1:i/ZNap89UkV1gLa26dn7fhKAdheJaKy+moOqJbiif7E=
k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA=
k8s.io/utils v0.0.0-20210707171843-4b05e18ac7d9 h1:imL9YgXQ9p7xmPzHFm/vVd/cF78jad+n4wK1ABwYtMM=
k8s.io/utils v0.0.0-20210707171843-4b05e18ac7d9/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA=
kernel.org/pub/linux/libs/security/libcap/cap v1.2.56 h1:nwsOaUtqOMtPPPOwnG90fV8EhVFxj81g7acKMfM/2ag=
kernel.org/pub/linux/libs/security/libcap/cap v1.2.56/go.mod h1:zOKBqiVovj7YgbAvwDRPp1xVaV2PiDoklW2YG8xmzDY=
kernel.org/pub/linux/libs/security/libcap/psx v1.2.56 h1:yAcMQY+fGwpA0puwASaxk+gwlbjrl8TIroSpw8zL4Zo=
kernel.org/pub/linux/libs/security/libcap/psx v1.2.56/go.mod h1:+l6Ee2F59XiJ2I6WR5ObpC1utCQJZ/VLsEbQCD8RG24=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=

View File

@ -24,6 +24,7 @@ import (
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
containerdrunner "github.com/talos-systems/talos/internal/app/machined/pkg/system/runner/containerd"
"github.com/talos-systems/talos/internal/pkg/capability"
"github.com/talos-systems/talos/internal/pkg/containers/image"
machineapi "github.com/talos-systems/talos/pkg/machinery/api/machine"
"github.com/talos-systems/talos/pkg/machinery/config"
@ -113,7 +114,14 @@ func RunInstallerContainer(disk, platform, ref string, configBytes []byte, reg c
oci.WithHostHostsFile,
oci.WithHostResolvconf,
oci.WithParentCgroupDevices,
oci.WithPrivileged,
oci.WithCapabilities(capability.AllGrantableCapabilities()),
oci.WithMaskedPaths(nil),
oci.WithReadonlyPaths(nil),
oci.WithWriteableSysfs,
oci.WithWriteableCgroupfs,
oci.WithSelinuxLabel(""),
oci.WithApparmorProfile(""),
oci.WithSeccompUnconfined,
oci.WithAllDevicesAllowed,
}

View File

@ -107,6 +107,7 @@ func (*Sequencer) Initialize(r runtime.Runtime) []runtime.Phase {
MountCgroups,
MountPseudoFilesystems,
SetRLimit,
DropCapabilities,
).Append(
"integrity",
WriteIMAPolicy,

View File

@ -37,6 +37,7 @@ import (
clientv3 "go.etcd.io/etcd/client/v3"
"golang.org/x/sys/unix"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"kernel.org/pub/linux/libs/security/libcap/cap"
installer "github.com/talos-systems/talos/cmd/installer/pkg/install"
"github.com/talos-systems/talos/internal/app/machined/internal/install"
@ -237,6 +238,51 @@ func SetRLimit(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFun
}, "setRLimit"
}
// DropCapabilities drops some capabilities so that they can't be restored by child processes.
func DropCapabilities(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {
// Disallow raising ambient capabilities (ever).
secbits := cap.GetSecbits()
secbits |=
cap.SecbitNoCapAmbientRaise | cap.SecbitNoCapAmbientRaiseLocked
if err := secbits.Set(); err != nil {
return fmt.Errorf("error setting secbits: %w", err)
}
// Set PR_SET_NO_NEW_PRIVS to limit setuid and similar privilege raising techniques.
// See https://www.kernel.org/doc/html/v5.10/userspace-api/no_new_privs.html.
if _, _, err := syscall.AllThreadsSyscall6(syscall.SYS_PRCTL, unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0, 0); err != 0 {
if errors.Is(err, syscall.EOPNOTSUPP) {
logger.Printf("no_new_privs skipped, as Talos is built with CGo")
} else {
return fmt.Errorf("error setting no new privs: %w", err)
}
}
// Drop capabilities from the bounding set effectively disabling it for all forked processes,
// but keep them for PID 1.
droppedCapabilities := []cap.Value{
cap.SYS_BOOT,
cap.SYS_MODULE,
}
iab := cap.IABGetProc()
for _, val := range droppedCapabilities {
if err := iab.SetVector(cap.Bound, true, val); err != nil {
return fmt.Errorf("error removing %s from the bounding set: %w", val, err)
}
}
if err := iab.SetProc(); err != nil {
return fmt.Errorf("error applying caps: %w", err)
}
return nil
}, "dropCapabilities"
}
// See https://www.kernel.org/doc/Documentation/ABI/testing/ima_policy
var rules = []string{
"dont_measure fsmagic=0x9fa0", // PROC_SUPER_MAGIC

View File

@ -277,7 +277,6 @@ func (c *containerdRunner) newOCISpecOpts(image oci.Image) []oci.SpecOpts {
oci.WithHostHostsFile,
oci.WithHostResolvconf,
oci.WithNoNewPrivileges,
seccomp.WithDefaultProfile(),
)
if c.opts.CgroupPath != "" {
@ -290,6 +289,10 @@ func (c *containerdRunner) newOCISpecOpts(image oci.Image) []oci.SpecOpts {
c.opts.OCISpecOpts...,
)
specOpts = append(specOpts,
seccomp.WithDefaultProfile(), // add seccomp profile last, as it depends on process capabilities
)
return specOpts
}

View File

@ -31,6 +31,7 @@ import (
"github.com/talos-systems/talos/internal/app/machined/pkg/system/runner"
"github.com/talos-systems/talos/internal/app/machined/pkg/system/runner/containerd"
"github.com/talos-systems/talos/internal/app/machined/pkg/system/runner/restart"
"github.com/talos-systems/talos/internal/pkg/capability"
"github.com/talos-systems/talos/internal/pkg/containers/image"
"github.com/talos-systems/talos/pkg/argsbuilder"
"github.com/talos-systems/talos/pkg/conditions"
@ -200,8 +201,14 @@ func (k *Kubelet) Runner(r runtime.Runtime) (runner.Runner, error) {
oci.WithHostNamespace(specs.NetworkNamespace),
oci.WithHostNamespace(specs.PIDNamespace),
oci.WithParentCgroupDevices,
oci.WithPrivileged,
oci.WithMaskedPaths(nil),
oci.WithReadonlyPaths(nil),
oci.WithWriteableSysfs,
oci.WithWriteableCgroupfs,
oci.WithSelinuxLabel(""),
oci.WithApparmorProfile(""),
oci.WithAllDevicesAllowed,
oci.WithCapabilities(capability.AllGrantableCapabilities()), // TODO: kubelet doesn't need all of these, we should consider limiting capabilities
),
),
restart.WithType(restart.Forever),

View File

@ -0,0 +1,26 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
// Package capability provides utility functions to work with capabilities.
package capability
import (
"strings"
"kernel.org/pub/linux/libs/security/libcap/cap"
)
// AllGrantableCapabilities returns list of capabilities that can be granted to the container based on
// process bounding capabilities.
func AllGrantableCapabilities() []string {
capabilities := []string{}
for v := cap.Value(0); v < cap.MaxBits(); v++ {
if set, _ := cap.GetBound(v); set { //nolint:errcheck
capabilities = append(capabilities, strings.ToUpper(v.String()))
}
}
return capabilities
}