feat: allow upgrades in maintenance mode (only over SideroLink)

This implements a simple way to upgrade Talos node running in
maintenance mode (only if Talos is installed, i.e. if `STATE` and
`EPHEMERAL` partitions are wiped).

Upgrade is only available over SideroLink for security reasons.

Upgrade in maintenance mode doesn't support any options, and it works
without machine configuration, so proxy environment variables are not
available, registry mirrors can't be used, and extensions are not
installed.

Fixes #6224

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
Andrey Smirnov 2022-09-20 22:27:03 +04:00
parent 48dee48057
commit 139c62d762
No known key found for this signature in database
GPG Key ID: 7B26396447AB6DFD
17 changed files with 426 additions and 221 deletions

View File

@ -28,6 +28,7 @@ var upgradeCmdFlags struct {
force bool
wait bool
debug bool
insecure bool
}
// upgradeCmd represents the processes command.
@ -41,6 +42,10 @@ var upgradeCmd = &cobra.Command{
upgradeCmdFlags.wait = true
}
if upgradeCmdFlags.wait && upgradeCmdFlags.insecure {
return fmt.Errorf("cannot use --wait and --insecure together")
}
if !upgradeCmdFlags.wait {
return runUpgradeNoWait()
}
@ -58,7 +63,7 @@ var upgradeCmd = &cobra.Command{
}
func runUpgradeNoWait() error {
return WithClient(func(ctx context.Context, c *client.Client) error {
upgradeFn := func(ctx context.Context, c *client.Client) error {
if err := helpers.ClientVersionCheck(ctx, c); err != nil {
return err
}
@ -98,7 +103,13 @@ func runUpgradeNoWait() error {
}
return w.Flush()
})
}
if upgradeCmdFlags.insecure {
return WithClientMaintenance(nil, upgradeFn)
}
return WithClient(upgradeFn)
}
func upgradeGetActorID(ctx context.Context, c *client.Client) (string, error) {
@ -127,5 +138,6 @@ func init() {
upgradeCmd.Flags().BoolVarP(&upgradeCmdFlags.force, "force", "f", false, "force the upgrade (skip checks on etcd health and members, might lead to data loss)")
upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.wait, "wait", false, "wait for the operation to complete, tracking its progress. always set to true when --debug is set")
upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.debug, "debug", false, "debug operation from kernel logs. --no-wait is set to false when this flag is set")
upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.insecure, "insecure", false, "upgrade using the insecure (encrypted with no auth) maintenance service")
addCommand(upgradeCmd)
}

View File

@ -22,11 +22,6 @@ import (
"syscall"
"time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/cio"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/oci"
criconstants "github.com/containerd/containerd/pkg/cri/constants"
cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1"
"github.com/cosi-project/runtime/pkg/safe"
@ -55,7 +50,6 @@ import (
"google.golang.org/protobuf/types/known/emptypb"
installer "github.com/talos-systems/talos/cmd/installer/pkg/install"
"github.com/talos-systems/talos/internal/app/machined/internal/install"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/disk"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader"
@ -68,8 +62,8 @@ import (
"github.com/talos-systems/talos/internal/pkg/containers"
taloscontainerd "github.com/talos-systems/talos/internal/pkg/containers/containerd"
"github.com/talos-systems/talos/internal/pkg/containers/cri"
"github.com/talos-systems/talos/internal/pkg/containers/image"
"github.com/talos-systems/talos/internal/pkg/etcd"
"github.com/talos-systems/talos/internal/pkg/install"
"github.com/talos-systems/talos/internal/pkg/miniprocfs"
"github.com/talos-systems/talos/internal/pkg/mount"
"github.com/talos-systems/talos/pkg/archiver"
@ -84,7 +78,6 @@ import (
"github.com/talos-systems/talos/pkg/machinery/api/storage"
timeapi "github.com/talos-systems/talos/pkg/machinery/api/time"
clientconfig "github.com/talos-systems/talos/pkg/machinery/client/config"
"github.com/talos-systems/talos/pkg/machinery/config"
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1"
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/generate"
machinetype "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
@ -125,7 +118,7 @@ func (s *Server) checkSupported(feature runtime.ModeCapability) error {
mode := s.Controller.Runtime().State().Platform().Mode()
if !mode.Supports(feature) {
return fmt.Errorf("method is not supported in %s mode", mode.String())
return status.Errorf(codes.FailedPrecondition, "method is not supported in %s mode", mode.String())
}
return nil
@ -500,7 +493,7 @@ func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply
log.Printf("validating %q", in.GetImage())
if err = pullAndValidateInstallerImage(ctx, s.Controller.Runtime().Config().Machine().Registries(), in.GetImage()); err != nil {
if err = install.PullAndValidateInstallerImage(ctx, s.Controller.Runtime().Config().Machine().Registries(), in.GetImage()); err != nil {
return nil, fmt.Errorf("error validating installer image %q: %w", in.GetImage(), err)
}
@ -1403,94 +1396,6 @@ func sendEmptyEvent(req *machine.EventsRequest, l machine.MachineService_EventsS
return l.Send(emptyEvent)
}
//nolint:gocyclo
func pullAndValidateInstallerImage(ctx context.Context, reg config.Registries, ref string) error {
// Pull down specified installer image early so we can bail if it doesn't exist in the upstream registry
containerdctx := namespaces.WithNamespace(ctx, constants.SystemContainerdNamespace)
const containerID = "validate"
client, err := containerd.New(constants.SystemContainerdAddress)
if err != nil {
return err
}
defer client.Close() //nolint:errcheck
img, err := image.Pull(containerdctx, reg, client, ref, image.WithSkipIfAlreadyPulled())
if err != nil {
return err
}
// See if there's previous container/snapshot to clean up
var oldcontainer containerd.Container
if oldcontainer, err = client.LoadContainer(containerdctx, containerID); err == nil {
if err = oldcontainer.Delete(containerdctx, containerd.WithSnapshotCleanup); err != nil {
return fmt.Errorf("error deleting old container instance: %w", err)
}
}
if err = client.SnapshotService("").Remove(containerdctx, containerID); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("error cleaning up stale snapshot: %w", err)
}
// Launch the container with a known help command for a simple check to make sure the image is valid
args := []string{
"/bin/installer",
"--help",
}
specOpts := []oci.SpecOpts{
oci.WithImageConfig(img),
oci.WithProcessArgs(args...),
}
containerOpts := []containerd.NewContainerOpts{
containerd.WithImage(img),
containerd.WithNewSnapshot(containerID, img),
containerd.WithNewSpec(specOpts...),
}
container, err := client.NewContainer(containerdctx, containerID, containerOpts...)
if err != nil {
return err
}
//nolint:errcheck
defer container.Delete(containerdctx, containerd.WithSnapshotCleanup)
task, err := container.NewTask(containerdctx, cio.NullIO)
if err != nil {
return err
}
//nolint:errcheck
defer task.Delete(containerdctx)
exitStatusC, err := task.Wait(containerdctx)
if err != nil {
return err
}
if err = task.Start(containerdctx); err != nil {
return err
}
status := <-exitStatusC
code, _, err := status.Result()
if err != nil {
return err
}
if code != 0 {
return fmt.Errorf("installer help returned non-zero exit. assuming invalid installer")
}
return nil
}
// Containers implements the machine.MachineServer interface.
func (s *Server) Containers(ctx context.Context, in *machine.ContainersRequest) (reply *machine.ContainersResponse, err error) {
inspector, err := getContainerInspector(ctx, in.Namespace, in.Driver)

View File

@ -28,6 +28,7 @@ import (
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader"
"github.com/talos-systems/talos/internal/app/machined/pkg/system"
"github.com/talos-systems/talos/internal/app/machined/pkg/system/services"
"github.com/talos-systems/talos/internal/app/maintenance"
"github.com/talos-systems/talos/internal/app/poweroff"
"github.com/talos-systems/talos/internal/app/trustd"
"github.com/talos-systems/talos/internal/pkg/mount"
@ -197,11 +198,10 @@ func run() error {
drainer := runtime.NewDrainer()
defer func() {
c, cancel := context.WithTimeout(context.Background(), time.Second*10)
drainCtx, drainCtxCancel := context.WithTimeout(context.Background(), time.Second*10)
defer drainCtxCancel()
defer cancel()
if e := drainer.Drain(c); e != nil {
if e := drainer.Drain(drainCtx); e != nil {
log.Printf("WARNING: failed to drain controllers: %s", e)
}
}()
@ -227,25 +227,37 @@ func run() error {
log.Printf("controller runtime finished")
}()
// Inject controller into maintenance service.
maintenance.InjectController(c)
initializeCanceled := false
// Initialize the machine.
if err = c.Run(ctx, runtime.SequenceInitialize, nil); err != nil {
return err
if errors.Is(err, context.Canceled) {
initializeCanceled = true
} else {
return err
}
}
// Perform an installation if required.
if err = c.Run(ctx, runtime.SequenceInstall, nil); err != nil {
return err
}
// If Initialize sequence was canceled, don't run any other sequence.
if !initializeCanceled {
// Perform an installation if required.
if err = c.Run(ctx, runtime.SequenceInstall, nil); err != nil {
return err
}
// Start the machine API.
system.Services(c.Runtime()).LoadAndStart(
&services.Machined{Controller: c},
&services.APID{},
)
// Start the machine API.
system.Services(c.Runtime()).LoadAndStart(
&services.Machined{Controller: c},
&services.APID{},
)
// Boot the machine.
if err = c.Run(ctx, runtime.SequenceBoot, nil); err != nil && !errors.Is(err, context.Canceled) {
return err
// Boot the machine.
if err = c.Run(ctx, runtime.SequenceBoot, nil); err != nil && !errors.Is(err, context.Canceled) {
return err
}
}
// Watch and handle runtime events.

View File

@ -353,7 +353,7 @@ func (ctrl *MachineStatusController) watchEvents() {
// install sequence is run always, even if the machine is already installed, so we'll catch it by phase name
case v1alpha1runtime.SequenceShutdown.String():
newStage = runtime.MachineStageShuttingDown
case v1alpha1runtime.SequenceUpgrade.String(), v1alpha1runtime.SequenceStageUpgrade.String():
case v1alpha1runtime.SequenceUpgrade.String(), v1alpha1runtime.SequenceStageUpgrade.String(), v1alpha1runtime.SequenceMaintenanceUpgrade.String():
newStage = runtime.MachineStageUpgrading
case v1alpha1runtime.SequenceReset.String():
newStage = runtime.MachineStageResetting
@ -375,7 +375,9 @@ func (ctrl *MachineStatusController) watchEvents() {
case currentSequence == v1alpha1runtime.SequenceInstall.String() && event.Phase == "install":
newStage = runtime.MachineStageInstalling
case (currentSequence == v1alpha1runtime.SequenceInstall.String() ||
currentSequence == v1alpha1runtime.SequenceUpgrade.String()) && event.Phase == "kexec":
currentSequence == v1alpha1runtime.SequenceUpgrade.String() ||
currentSequence == v1alpha1runtime.SequenceStageUpgrade.String() ||
currentSequence == v1alpha1runtime.SequenceMaintenanceUpgrade.String()) && event.Phase == "kexec":
newStage = runtime.MachineStageRebooting
}
}

View File

@ -28,6 +28,8 @@ const (
SequenceUpgrade
// SequenceStageUpgrade is the stage upgrade sequence.
SequenceStageUpgrade
// SequenceMaintenanceUpgrade is the upgrade sequence in maintenance mode.
SequenceMaintenanceUpgrade
// SequenceReset is the reset sequence.
SequenceReset
// SequenceReboot is the reboot sequence.
@ -35,18 +37,22 @@ const (
)
const (
boot = "boot"
initialize = "initialize"
install = "install"
shutdown = "shutdown"
upgrade = "upgrade"
stageUpgrade = "stageUpgrade"
reset = "reset"
reboot = "reboot"
noop = "noop"
boot = "boot"
initialize = "initialize"
install = "install"
shutdown = "shutdown"
upgrade = "upgrade"
stageUpgrade = "stageUpgrade"
maintenanceUpgrade = "maintenanceUpgrade"
reset = "reset"
reboot = "reboot"
noop = "noop"
)
var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{
SequenceInitialize: {
SequenceMaintenanceUpgrade: {},
},
SequenceBoot: {
SequenceReboot: {},
SequenceReset: {},
@ -62,7 +68,7 @@ var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{
// String returns the string representation of a `Sequence`.
func (s Sequence) String() string {
return [...]string{noop, boot, initialize, install, shutdown, upgrade, stageUpgrade, reset, reboot}[s]
return [...]string{noop, boot, initialize, install, shutdown, upgrade, stageUpgrade, maintenanceUpgrade, reset, reboot}[s]
}
// CanTakeOver defines sequences priority.
@ -141,6 +147,7 @@ type Sequencer interface {
Shutdown(Runtime, *machine.ShutdownRequest) []Phase
StageUpgrade(Runtime, *machine.UpgradeRequest) []Phase
Upgrade(Runtime, *machine.UpgradeRequest) []Phase
MaintenanceUpgrade(Runtime, *machine.UpgradeRequest) []Phase
}
// EventSequenceStart represents the sequence start event.

View File

@ -336,12 +336,8 @@ func (c *Controller) phases(seq runtime.Sequence, data interface{}) ([]runtime.P
case runtime.SequenceInstall:
phases = c.s.Install(c.r)
case runtime.SequenceShutdown:
var (
in *machine.ShutdownRequest
ok bool
)
if in, ok = data.(*machine.ShutdownRequest); !ok {
in, ok := data.(*machine.ShutdownRequest)
if !ok {
return nil, runtime.ErrInvalidSequenceData
}
@ -349,34 +345,29 @@ func (c *Controller) phases(seq runtime.Sequence, data interface{}) ([]runtime.P
case runtime.SequenceReboot:
phases = c.s.Reboot(c.r)
case runtime.SequenceUpgrade:
var (
in *machine.UpgradeRequest
ok bool
)
if in, ok = data.(*machine.UpgradeRequest); !ok {
in, ok := data.(*machine.UpgradeRequest)
if !ok {
return nil, runtime.ErrInvalidSequenceData
}
phases = c.s.Upgrade(c.r, in)
case runtime.SequenceStageUpgrade:
var (
in *machine.UpgradeRequest
ok bool
)
if in, ok = data.(*machine.UpgradeRequest); !ok {
in, ok := data.(*machine.UpgradeRequest)
if !ok {
return nil, runtime.ErrInvalidSequenceData
}
phases = c.s.StageUpgrade(c.r, in)
case runtime.SequenceReset:
var (
in runtime.ResetOptions
ok bool
)
case runtime.SequenceMaintenanceUpgrade:
in, ok := data.(*machine.UpgradeRequest)
if !ok {
return nil, runtime.ErrInvalidSequenceData
}
if in, ok = data.(runtime.ResetOptions); !ok {
phases = c.s.MaintenanceUpgrade(c.r, in)
case runtime.SequenceReset:
in, ok := data.(runtime.ResetOptions)
if !ok {
return nil, runtime.ErrInvalidSequenceData
}

View File

@ -59,6 +59,10 @@ func (m *mockSequencer) StageUpgrade(r runtime.Runtime, req *machine.UpgradeRequ
return m.phases[runtime.SequenceStageUpgrade]
}
func (m *mockSequencer) MaintenanceUpgrade(r runtime.Runtime, req *machine.UpgradeRequest) []runtime.Phase {
return m.phases[runtime.SequenceMaintenanceUpgrade]
}
func (m *mockSequencer) Upgrade(r runtime.Runtime, req *machine.UpgradeRequest) []runtime.Phase {
return m.phases[runtime.SequenceUpgrade]
}

View File

@ -375,6 +375,44 @@ func (*Sequencer) StageUpgrade(r runtime.Runtime, in *machineapi.UpgradeRequest)
return phases
}
// MaintenanceUpgrade is the upgrade sequence in maintenance mode.
func (*Sequencer) MaintenanceUpgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []runtime.Phase {
phases := PhaseList{}
switch r.State().Platform().Mode() { //nolint:exhaustive
case runtime.ModeContainer:
return nil
default:
phases = phases.Append(
"containerd",
StartContainerd,
).Append(
"verifyDisk",
VerifyDiskAvailability,
).Append(
"upgrade",
Upgrade,
).Append(
"mountBoot",
MountBootPartition,
).Append(
"kexec",
KexecPrepare,
).Append(
"unmountBoot",
UnmountBootPartition,
).Append(
"stopEverything",
StopAllServices,
).Append(
"reboot",
Reboot,
)
}
return phases
}
// Upgrade is the upgrade sequence.
func (*Sequencer) Upgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []runtime.Phase {
phases := PhaseList{}

View File

@ -43,7 +43,6 @@ import (
"kernel.org/pub/linux/libs/security/libcap/cap"
installer "github.com/talos-systems/talos/cmd/installer/pkg/install"
"github.com/talos-systems/talos/internal/app/machined/internal/install"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader/adv"
@ -56,6 +55,7 @@ import (
"github.com/talos-systems/talos/internal/app/maintenance"
"github.com/talos-systems/talos/internal/pkg/cri"
"github.com/talos-systems/talos/internal/pkg/etcd"
"github.com/talos-systems/talos/internal/pkg/install"
"github.com/talos-systems/talos/internal/pkg/mount"
"github.com/talos-systems/talos/internal/pkg/partition"
"github.com/talos-systems/talos/pkg/conditions"
@ -625,7 +625,7 @@ func receiveConfigViaMaintenanceService(ctx context.Context, logger *log.Logger,
Task: "runningMaintenance",
})
cfgBytes, err := maintenance.Run(ctx, logger, r)
cfgBytes, err := maintenance.Run(ctx, logger)
if err != nil {
return nil, fmt.Errorf("maintenance service failed: %w", err)
}

View File

@ -94,12 +94,21 @@ func (c *Containerd) Runner(r runtime.Runtime) (runner.Runner, error) {
}
env := []string{}
for key, val := range r.Config().Machine().Env() {
env = append(env, fmt.Sprintf("%s=%s", key, val))
if r.Config() != nil {
for key, val := range r.Config().Machine().Env() {
env = append(env, fmt.Sprintf("%s=%s", key, val))
}
}
debug := false
if r.Config() != nil {
debug = r.Config().Debug()
}
return restart.New(process.NewRunner(
r.Config().Debug(),
debug,
args,
runner.WithLoggingManager(r.Logging()),
runner.WithEnv(env),

View File

@ -32,19 +32,30 @@ import (
"github.com/talos-systems/talos/pkg/machinery/resources/network"
)
var ctrl runtime.Controller
// InjectController is used to pass the controller into the maintenance service.
func InjectController(c runtime.Controller) {
ctrl = c
}
// Run executes the configuration receiver, returning any configuration it receives.
//
//nolint:gocyclo
func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, error) {
func Run(ctx context.Context, logger *log.Logger) ([]byte, error) {
if ctrl == nil {
return nil, fmt.Errorf("controller is not injected")
}
logger.Println("waiting for network address to be ready")
if err := network.NewReadyCondition(r.State().V1Alpha2().Resources(), network.AddressReady).Wait(ctx); err != nil {
if err := network.NewReadyCondition(ctrl.Runtime().State().V1Alpha2().Resources(), network.AddressReady).Wait(ctx); err != nil {
return nil, fmt.Errorf("error waiting for the network to be ready: %w", err)
}
var sideroLinkAddress netip.Addr
currentAddresses, err := r.State().V1Alpha2().Resources().WatchFor(ctx,
currentAddresses, err := ctrl.Runtime().State().V1Alpha2().Resources().WatchFor(ctx,
resource.NewMetadata(network.NamespaceName, network.NodeAddressType, network.NodeAddressCurrentID, resource.VersionUndefined),
sideroLinkAddressFinder(&sideroLinkAddress, logger),
)
@ -55,7 +66,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
ips := currentAddresses.(*network.NodeAddress).TypedSpec().IPs()
// hostname might not be available yet, so use it only if it is available
hostnameStatus, err := r.State().V1Alpha2().Resources().Get(ctx, resource.NewMetadata(network.NamespaceName, network.HostnameStatusType, network.HostnameID, resource.VersionUndefined))
hostnameStatus, err := ctrl.Runtime().State().V1Alpha2().Resources().Get(ctx, resource.NewMetadata(network.NamespaceName, network.HostnameStatusType, network.HostnameID, resource.VersionUndefined))
if err != nil && !state.IsNotFoundError(err) {
return nil, fmt.Errorf("error getting node hostname: %w", err)
}
@ -83,7 +94,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
cfgCh := make(chan []byte)
s := server.New(r, logger, cfgCh)
s := server.New(ctrl, logger, cfgCh)
injector := &authz.Injector{
Mode: authz.ReadOnly,
@ -152,7 +163,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
return cfg, err
case <-ctx.Done():
return nil, fmt.Errorf("context is done")
return nil, ctx.Err()
}
}

View File

@ -0,0 +1,50 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package server
import (
"context"
"net"
"net/netip"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/peer"
"google.golang.org/grpc/status"
"github.com/talos-systems/talos/pkg/machinery/resources/network"
)
func verifyPeer(ctx context.Context, condition func(netip.Addr) bool) bool {
remotePeer, ok := peer.FromContext(ctx)
if !ok {
return false
}
if remotePeer.Addr.Network() != "tcp" {
return false
}
ip, _, err := net.SplitHostPort(remotePeer.Addr.String())
if err != nil {
return false
}
addr, err := netip.ParseAddr(ip)
if err != nil {
return false
}
return condition(addr)
}
func assertPeerSideroLink(ctx context.Context) error {
if !verifyPeer(ctx, func(addr netip.Addr) bool {
return network.IsULA(addr, network.ULASideroLink)
}) {
return status.Error(codes.Unimplemented, "API is not implemented in maintenance mode")
}
return nil
}

View File

@ -8,16 +8,14 @@ import (
"context"
"fmt"
"log"
"net"
"net/netip"
"strings"
cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1"
"github.com/cosi-project/runtime/pkg/state"
"github.com/cosi-project/runtime/pkg/state/protobuf/server"
"github.com/google/uuid"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/peer"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/types/known/emptypb"
@ -30,7 +28,6 @@ import (
"github.com/talos-systems/talos/pkg/machinery/api/storage"
"github.com/talos-systems/talos/pkg/machinery/config/configloader"
v1alpha1machine "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
"github.com/talos-systems/talos/pkg/machinery/resources/network"
"github.com/talos-systems/talos/pkg/version"
)
@ -38,18 +35,18 @@ import (
type Server struct {
machine.UnimplementedMachineServiceServer
runtime runtime.Runtime
logger *log.Logger
cfgCh chan []byte
server *grpc.Server
controller runtime.Controller
logger *log.Logger
cfgCh chan<- []byte
server *grpc.Server
}
// New initializes and returns a `Server`.
func New(r runtime.Runtime, logger *log.Logger, cfgCh chan []byte) *Server {
func New(c runtime.Controller, logger *log.Logger, cfgCh chan<- []byte) *Server {
return &Server{
runtime: r,
logger: logger,
cfgCh: cfgCh,
controller: c,
logger: logger,
cfgCh: cfgCh,
}
}
@ -58,7 +55,7 @@ func (s *Server) Register(obj *grpc.Server) {
s.server = obj
// wrap resources with access filter
resourceState := s.runtime.State().V1Alpha2().Resources()
resourceState := s.controller.Runtime().State().V1Alpha2().Resources()
resourceState = state.WrapCore(state.Filter(resourceState, resources.AccessPolicy(resourceState)))
storage.RegisterStorageServiceServer(obj, &storaged.Server{})
@ -86,7 +83,7 @@ func (s *Server) ApplyConfiguration(ctx context.Context, in *machine.ApplyConfig
return nil, fmt.Errorf("failed to parse config: %w", err)
}
warnings, err := cfgProvider.Validate(s.runtime.State().Platform().Mode())
warnings, err := cfgProvider.Validate(s.controller.Runtime().State().Platform().Mode())
if err != nil {
return nil, status.Errorf(codes.InvalidArgument, "configuration validation failed: %s", err)
}
@ -131,43 +128,18 @@ func (s *Server) GenerateClientConfiguration(ctx context.Context, in *machine.Ge
return nil, status.Error(codes.Unimplemented, "client configuration (talosconfig) can't be generated in the maintenance mode")
}
func verifyPeer(ctx context.Context, condition func(netip.Addr) bool) bool {
remotePeer, ok := peer.FromContext(ctx)
if !ok {
return false
}
if remotePeer.Addr.Network() != "tcp" {
return false
}
ip, _, err := net.SplitHostPort(remotePeer.Addr.String())
if err != nil {
return false
}
addr, err := netip.ParseAddr(ip)
if err != nil {
return false
}
return condition(addr)
}
// Version implements the machine.MachineServer interface.
func (s *Server) Version(ctx context.Context, in *emptypb.Empty) (*machine.VersionResponse, error) {
if !verifyPeer(ctx, func(addr netip.Addr) bool {
return network.IsULA(addr, network.ULASideroLink)
}) {
return nil, status.Error(codes.Unimplemented, "Version API is not implemented in maintenance mode")
if err := assertPeerSideroLink(ctx); err != nil {
return nil, err
}
var platform *machine.PlatformInfo
if s.runtime.State().Platform() != nil {
if s.controller.Runtime().State().Platform() != nil {
platform = &machine.PlatformInfo{
Name: s.runtime.State().Platform().Name(),
Mode: s.runtime.State().Platform().Mode().String(),
Name: s.controller.Runtime().State().Platform().Name(),
Mode: s.controller.Runtime().State().Platform().Mode().String(),
}
}
@ -180,3 +152,52 @@ func (s *Server) Version(ctx context.Context, in *emptypb.Empty) (*machine.Versi
},
}, nil
}
// Upgrade initiates an upgrade.
//
//nolint:gocyclo,cyclop
func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply *machine.UpgradeResponse, err error) {
if err = assertPeerSideroLink(ctx); err != nil {
return nil, err
}
if s.controller.Runtime().State().Machine().Disk() == nil {
return nil, status.Errorf(codes.FailedPrecondition, "Talos is not installed")
}
actorID := uuid.New().String()
mode := s.controller.Runtime().State().Platform().Mode()
if !mode.Supports(runtime.Upgrade) {
return nil, status.Errorf(codes.FailedPrecondition, "method is not supported in %s mode", mode.String())
}
// none of the options are supported in maintenance mode
if in.GetPreserve() || in.GetStage() || in.GetForce() {
return nil, status.Errorf(codes.Unimplemented, "upgrade --preserve, --stage, and --force are not supported in maintenance mode")
}
log.Printf("upgrade request received: %q", in.GetImage())
runCtx := context.WithValue(context.Background(), runtime.ActorIDCtxKey{}, actorID)
go func() {
if err := s.controller.Run(runCtx, runtime.SequenceMaintenanceUpgrade, in); err != nil {
if !runtime.IsRebootError(err) {
log.Println("upgrade failed:", err)
}
}
}()
reply = &machine.UpgradeResponse{
Messages: []*machine.Upgrade{
{
Ack: "Upgrade request received",
ActorId: actorID,
},
},
}
return reply, nil
}

View File

@ -8,6 +8,7 @@ import (
"bytes"
"context"
"fmt"
"io"
"log"
"os"
"strconv"
@ -29,6 +30,7 @@ import (
"github.com/talos-systems/talos/internal/pkg/extensions"
machineapi "github.com/talos-systems/talos/pkg/machinery/api/machine"
"github.com/talos-systems/talos/pkg/machinery/config"
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1"
"github.com/talos-systems/talos/pkg/machinery/constants"
)
@ -46,9 +48,16 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
}
}
configBytes, err := cfg.Bytes()
if err != nil {
return err
var (
registriesConfig config.Registries
extensionsConfig []config.Extension
)
if cfg != nil {
registriesConfig = cfg.Machine().Registries()
extensionsConfig = cfg.Machine().Install().Extensions()
} else {
registriesConfig = &v1alpha1.RegistriesConfig{}
}
ctx, cancel := context.WithCancel(context.Background())
@ -77,7 +86,7 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
if img == nil || err != nil && errdefs.IsNotFound(err) {
log.Printf("pulling %q", ref)
img, err = image.Pull(ctx, cfg.Machine().Registries(), client, ref)
img, err = image.Pull(ctx, registriesConfig, client, ref)
}
if err != nil {
@ -89,8 +98,10 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
return err
}
if err = puller.PullAndMount(ctx, cfg.Machine().Registries(), cfg.Machine().Install().Extensions()); err != nil {
return err
if extensionsConfig != nil {
if err = puller.PullAndMount(ctx, registriesConfig, extensionsConfig); err != nil {
return err
}
}
defer func() {
@ -205,19 +216,35 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
w := &kmsg.Writer{KmsgWriter: f}
configR := &containerdrunner.StdinCloser{
Stdin: bytes.NewReader(configBytes),
Closer: make(chan struct{}),
var r interface {
io.Reader
WaitAndClose(context.Context, containerd.Task)
}
creator := cio.NewCreator(cio.WithStreams(configR, w, w))
if cfg != nil {
var configBytes []byte
configBytes, err = cfg.Bytes()
if err != nil {
return err
}
r = &containerdrunner.StdinCloser{
Stdin: bytes.NewReader(configBytes),
Closer: make(chan struct{}),
}
}
creator := cio.NewCreator(cio.WithStreams(r, w, w))
t, err := container.NewTask(ctx, creator)
if err != nil {
return err
}
go configR.WaitAndClose(ctx, t)
if r != nil {
go r.WaitAndClose(ctx, t)
}
defer t.Delete(ctx) //nolint:errcheck
@ -242,10 +269,15 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
// OptionsFromUpgradeRequest builds installer options from upgrade request.
func OptionsFromUpgradeRequest(r runtime.Runtime, in *machineapi.UpgradeRequest) []Option {
return []Option{
opts := []Option{
WithPull(false),
WithUpgrade(true),
WithForce(!in.GetPreserve()),
WithExtraKernelArgs(r.Config().Machine().Install().ExtraKernelArgs()),
}
if r.Config() != nil {
opts = append(opts, WithExtraKernelArgs(r.Config().Machine().Install().ExtraKernelArgs()))
}
return opts
}

View File

@ -0,0 +1,110 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package install
import (
"context"
"fmt"
"github.com/containerd/containerd"
"github.com/containerd/containerd/cio"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/oci"
"github.com/talos-systems/talos/internal/pkg/containers/image"
"github.com/talos-systems/talos/pkg/machinery/config"
"github.com/talos-systems/talos/pkg/machinery/constants"
)
// PullAndValidateInstallerImage pulls down the installer and validates that it can run.
//
//nolint:gocyclo
func PullAndValidateInstallerImage(ctx context.Context, reg config.Registries, ref string) error {
// Pull down specified installer image early so we can bail if it doesn't exist in the upstream registry
containerdctx := namespaces.WithNamespace(ctx, constants.SystemContainerdNamespace)
const containerID = "validate"
client, err := containerd.New(constants.SystemContainerdAddress)
if err != nil {
return err
}
defer client.Close() //nolint:errcheck
img, err := image.Pull(containerdctx, reg, client, ref, image.WithSkipIfAlreadyPulled())
if err != nil {
return err
}
// See if there's previous container/snapshot to clean up
var oldcontainer containerd.Container
if oldcontainer, err = client.LoadContainer(containerdctx, containerID); err == nil {
if err = oldcontainer.Delete(containerdctx, containerd.WithSnapshotCleanup); err != nil {
return fmt.Errorf("error deleting old container instance: %w", err)
}
}
if err = client.SnapshotService("").Remove(containerdctx, containerID); err != nil && !errdefs.IsNotFound(err) {
return fmt.Errorf("error cleaning up stale snapshot: %w", err)
}
// Launch the container with a known help command for a simple check to make sure the image is valid
args := []string{
"/bin/installer",
"--help",
}
specOpts := []oci.SpecOpts{
oci.WithImageConfig(img),
oci.WithProcessArgs(args...),
}
containerOpts := []containerd.NewContainerOpts{
containerd.WithImage(img),
containerd.WithNewSnapshot(containerID, img),
containerd.WithNewSpec(specOpts...),
}
container, err := client.NewContainer(containerdctx, containerID, containerOpts...)
if err != nil {
return err
}
//nolint:errcheck
defer container.Delete(containerdctx, containerd.WithSnapshotCleanup)
task, err := container.NewTask(containerdctx, cio.NullIO)
if err != nil {
return err
}
//nolint:errcheck
defer task.Delete(containerdctx)
exitStatusC, err := task.Wait(containerdctx)
if err != nil {
return err
}
if err = task.Start(containerdctx); err != nil {
return err
}
status := <-exitStatusC
code, _, err := status.Result()
if err != nil {
return err
}
if code != 0 {
return fmt.Errorf("installer help returned non-zero exit. assuming invalid installer")
}
return nil
}

View File

@ -2201,6 +2201,7 @@ talosctl upgrade [flags]
-f, --force force the upgrade (skip checks on etcd health and members, might lead to data loss)
-h, --help help for upgrade
-i, --image string the container image to use for performing the install
--insecure upgrade using the insecure (encrypted with no auth) maintenance service
-p, --preserve preserve data
-s, --stage stage the upgrade to perform it after a reboot
--wait wait for the operation to complete, tracking its progress. always set to true when --debug is set