feat: allow upgrades in maintenance mode (only over SideroLink)
This implements a simple way to upgrade Talos node running in maintenance mode (only if Talos is installed, i.e. if `STATE` and `EPHEMERAL` partitions are wiped). Upgrade is only available over SideroLink for security reasons. Upgrade in maintenance mode doesn't support any options, and it works without machine configuration, so proxy environment variables are not available, registry mirrors can't be used, and extensions are not installed. Fixes #6224 Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
parent
48dee48057
commit
139c62d762
@ -28,6 +28,7 @@ var upgradeCmdFlags struct {
|
||||
force bool
|
||||
wait bool
|
||||
debug bool
|
||||
insecure bool
|
||||
}
|
||||
|
||||
// upgradeCmd represents the processes command.
|
||||
@ -41,6 +42,10 @@ var upgradeCmd = &cobra.Command{
|
||||
upgradeCmdFlags.wait = true
|
||||
}
|
||||
|
||||
if upgradeCmdFlags.wait && upgradeCmdFlags.insecure {
|
||||
return fmt.Errorf("cannot use --wait and --insecure together")
|
||||
}
|
||||
|
||||
if !upgradeCmdFlags.wait {
|
||||
return runUpgradeNoWait()
|
||||
}
|
||||
@ -58,7 +63,7 @@ var upgradeCmd = &cobra.Command{
|
||||
}
|
||||
|
||||
func runUpgradeNoWait() error {
|
||||
return WithClient(func(ctx context.Context, c *client.Client) error {
|
||||
upgradeFn := func(ctx context.Context, c *client.Client) error {
|
||||
if err := helpers.ClientVersionCheck(ctx, c); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -98,7 +103,13 @@ func runUpgradeNoWait() error {
|
||||
}
|
||||
|
||||
return w.Flush()
|
||||
})
|
||||
}
|
||||
|
||||
if upgradeCmdFlags.insecure {
|
||||
return WithClientMaintenance(nil, upgradeFn)
|
||||
}
|
||||
|
||||
return WithClient(upgradeFn)
|
||||
}
|
||||
|
||||
func upgradeGetActorID(ctx context.Context, c *client.Client) (string, error) {
|
||||
@ -127,5 +138,6 @@ func init() {
|
||||
upgradeCmd.Flags().BoolVarP(&upgradeCmdFlags.force, "force", "f", false, "force the upgrade (skip checks on etcd health and members, might lead to data loss)")
|
||||
upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.wait, "wait", false, "wait for the operation to complete, tracking its progress. always set to true when --debug is set")
|
||||
upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.debug, "debug", false, "debug operation from kernel logs. --no-wait is set to false when this flag is set")
|
||||
upgradeCmd.Flags().BoolVar(&upgradeCmdFlags.insecure, "insecure", false, "upgrade using the insecure (encrypted with no auth) maintenance service")
|
||||
addCommand(upgradeCmd)
|
||||
}
|
||||
|
@ -22,11 +22,6 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/cio"
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
"github.com/containerd/containerd/namespaces"
|
||||
"github.com/containerd/containerd/oci"
|
||||
criconstants "github.com/containerd/containerd/pkg/cri/constants"
|
||||
cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1"
|
||||
"github.com/cosi-project/runtime/pkg/safe"
|
||||
@ -55,7 +50,6 @@ import (
|
||||
"google.golang.org/protobuf/types/known/emptypb"
|
||||
|
||||
installer "github.com/talos-systems/talos/cmd/installer/pkg/install"
|
||||
"github.com/talos-systems/talos/internal/app/machined/internal/install"
|
||||
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
|
||||
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/disk"
|
||||
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader"
|
||||
@ -68,8 +62,8 @@ import (
|
||||
"github.com/talos-systems/talos/internal/pkg/containers"
|
||||
taloscontainerd "github.com/talos-systems/talos/internal/pkg/containers/containerd"
|
||||
"github.com/talos-systems/talos/internal/pkg/containers/cri"
|
||||
"github.com/talos-systems/talos/internal/pkg/containers/image"
|
||||
"github.com/talos-systems/talos/internal/pkg/etcd"
|
||||
"github.com/talos-systems/talos/internal/pkg/install"
|
||||
"github.com/talos-systems/talos/internal/pkg/miniprocfs"
|
||||
"github.com/talos-systems/talos/internal/pkg/mount"
|
||||
"github.com/talos-systems/talos/pkg/archiver"
|
||||
@ -84,7 +78,6 @@ import (
|
||||
"github.com/talos-systems/talos/pkg/machinery/api/storage"
|
||||
timeapi "github.com/talos-systems/talos/pkg/machinery/api/time"
|
||||
clientconfig "github.com/talos-systems/talos/pkg/machinery/client/config"
|
||||
"github.com/talos-systems/talos/pkg/machinery/config"
|
||||
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1"
|
||||
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/generate"
|
||||
machinetype "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
|
||||
@ -125,7 +118,7 @@ func (s *Server) checkSupported(feature runtime.ModeCapability) error {
|
||||
mode := s.Controller.Runtime().State().Platform().Mode()
|
||||
|
||||
if !mode.Supports(feature) {
|
||||
return fmt.Errorf("method is not supported in %s mode", mode.String())
|
||||
return status.Errorf(codes.FailedPrecondition, "method is not supported in %s mode", mode.String())
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -500,7 +493,7 @@ func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply
|
||||
|
||||
log.Printf("validating %q", in.GetImage())
|
||||
|
||||
if err = pullAndValidateInstallerImage(ctx, s.Controller.Runtime().Config().Machine().Registries(), in.GetImage()); err != nil {
|
||||
if err = install.PullAndValidateInstallerImage(ctx, s.Controller.Runtime().Config().Machine().Registries(), in.GetImage()); err != nil {
|
||||
return nil, fmt.Errorf("error validating installer image %q: %w", in.GetImage(), err)
|
||||
}
|
||||
|
||||
@ -1403,94 +1396,6 @@ func sendEmptyEvent(req *machine.EventsRequest, l machine.MachineService_EventsS
|
||||
return l.Send(emptyEvent)
|
||||
}
|
||||
|
||||
//nolint:gocyclo
|
||||
func pullAndValidateInstallerImage(ctx context.Context, reg config.Registries, ref string) error {
|
||||
// Pull down specified installer image early so we can bail if it doesn't exist in the upstream registry
|
||||
containerdctx := namespaces.WithNamespace(ctx, constants.SystemContainerdNamespace)
|
||||
|
||||
const containerID = "validate"
|
||||
|
||||
client, err := containerd.New(constants.SystemContainerdAddress)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer client.Close() //nolint:errcheck
|
||||
|
||||
img, err := image.Pull(containerdctx, reg, client, ref, image.WithSkipIfAlreadyPulled())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// See if there's previous container/snapshot to clean up
|
||||
var oldcontainer containerd.Container
|
||||
|
||||
if oldcontainer, err = client.LoadContainer(containerdctx, containerID); err == nil {
|
||||
if err = oldcontainer.Delete(containerdctx, containerd.WithSnapshotCleanup); err != nil {
|
||||
return fmt.Errorf("error deleting old container instance: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err = client.SnapshotService("").Remove(containerdctx, containerID); err != nil && !errdefs.IsNotFound(err) {
|
||||
return fmt.Errorf("error cleaning up stale snapshot: %w", err)
|
||||
}
|
||||
|
||||
// Launch the container with a known help command for a simple check to make sure the image is valid
|
||||
args := []string{
|
||||
"/bin/installer",
|
||||
"--help",
|
||||
}
|
||||
|
||||
specOpts := []oci.SpecOpts{
|
||||
oci.WithImageConfig(img),
|
||||
oci.WithProcessArgs(args...),
|
||||
}
|
||||
|
||||
containerOpts := []containerd.NewContainerOpts{
|
||||
containerd.WithImage(img),
|
||||
containerd.WithNewSnapshot(containerID, img),
|
||||
containerd.WithNewSpec(specOpts...),
|
||||
}
|
||||
|
||||
container, err := client.NewContainer(containerdctx, containerID, containerOpts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
//nolint:errcheck
|
||||
defer container.Delete(containerdctx, containerd.WithSnapshotCleanup)
|
||||
|
||||
task, err := container.NewTask(containerdctx, cio.NullIO)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
//nolint:errcheck
|
||||
defer task.Delete(containerdctx)
|
||||
|
||||
exitStatusC, err := task.Wait(containerdctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = task.Start(containerdctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
status := <-exitStatusC
|
||||
|
||||
code, _, err := status.Result()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if code != 0 {
|
||||
return fmt.Errorf("installer help returned non-zero exit. assuming invalid installer")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Containers implements the machine.MachineServer interface.
|
||||
func (s *Server) Containers(ctx context.Context, in *machine.ContainersRequest) (reply *machine.ContainersResponse, err error) {
|
||||
inspector, err := getContainerInspector(ctx, in.Namespace, in.Driver)
|
||||
|
@ -28,6 +28,7 @@ import (
|
||||
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader"
|
||||
"github.com/talos-systems/talos/internal/app/machined/pkg/system"
|
||||
"github.com/talos-systems/talos/internal/app/machined/pkg/system/services"
|
||||
"github.com/talos-systems/talos/internal/app/maintenance"
|
||||
"github.com/talos-systems/talos/internal/app/poweroff"
|
||||
"github.com/talos-systems/talos/internal/app/trustd"
|
||||
"github.com/talos-systems/talos/internal/pkg/mount"
|
||||
@ -197,11 +198,10 @@ func run() error {
|
||||
|
||||
drainer := runtime.NewDrainer()
|
||||
defer func() {
|
||||
c, cancel := context.WithTimeout(context.Background(), time.Second*10)
|
||||
drainCtx, drainCtxCancel := context.WithTimeout(context.Background(), time.Second*10)
|
||||
defer drainCtxCancel()
|
||||
|
||||
defer cancel()
|
||||
|
||||
if e := drainer.Drain(c); e != nil {
|
||||
if e := drainer.Drain(drainCtx); e != nil {
|
||||
log.Printf("WARNING: failed to drain controllers: %s", e)
|
||||
}
|
||||
}()
|
||||
@ -227,25 +227,37 @@ func run() error {
|
||||
log.Printf("controller runtime finished")
|
||||
}()
|
||||
|
||||
// Inject controller into maintenance service.
|
||||
maintenance.InjectController(c)
|
||||
|
||||
initializeCanceled := false
|
||||
|
||||
// Initialize the machine.
|
||||
if err = c.Run(ctx, runtime.SequenceInitialize, nil); err != nil {
|
||||
return err
|
||||
if errors.Is(err, context.Canceled) {
|
||||
initializeCanceled = true
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Perform an installation if required.
|
||||
if err = c.Run(ctx, runtime.SequenceInstall, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
// If Initialize sequence was canceled, don't run any other sequence.
|
||||
if !initializeCanceled {
|
||||
// Perform an installation if required.
|
||||
if err = c.Run(ctx, runtime.SequenceInstall, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Start the machine API.
|
||||
system.Services(c.Runtime()).LoadAndStart(
|
||||
&services.Machined{Controller: c},
|
||||
&services.APID{},
|
||||
)
|
||||
// Start the machine API.
|
||||
system.Services(c.Runtime()).LoadAndStart(
|
||||
&services.Machined{Controller: c},
|
||||
&services.APID{},
|
||||
)
|
||||
|
||||
// Boot the machine.
|
||||
if err = c.Run(ctx, runtime.SequenceBoot, nil); err != nil && !errors.Is(err, context.Canceled) {
|
||||
return err
|
||||
// Boot the machine.
|
||||
if err = c.Run(ctx, runtime.SequenceBoot, nil); err != nil && !errors.Is(err, context.Canceled) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Watch and handle runtime events.
|
||||
|
@ -353,7 +353,7 @@ func (ctrl *MachineStatusController) watchEvents() {
|
||||
// install sequence is run always, even if the machine is already installed, so we'll catch it by phase name
|
||||
case v1alpha1runtime.SequenceShutdown.String():
|
||||
newStage = runtime.MachineStageShuttingDown
|
||||
case v1alpha1runtime.SequenceUpgrade.String(), v1alpha1runtime.SequenceStageUpgrade.String():
|
||||
case v1alpha1runtime.SequenceUpgrade.String(), v1alpha1runtime.SequenceStageUpgrade.String(), v1alpha1runtime.SequenceMaintenanceUpgrade.String():
|
||||
newStage = runtime.MachineStageUpgrading
|
||||
case v1alpha1runtime.SequenceReset.String():
|
||||
newStage = runtime.MachineStageResetting
|
||||
@ -375,7 +375,9 @@ func (ctrl *MachineStatusController) watchEvents() {
|
||||
case currentSequence == v1alpha1runtime.SequenceInstall.String() && event.Phase == "install":
|
||||
newStage = runtime.MachineStageInstalling
|
||||
case (currentSequence == v1alpha1runtime.SequenceInstall.String() ||
|
||||
currentSequence == v1alpha1runtime.SequenceUpgrade.String()) && event.Phase == "kexec":
|
||||
currentSequence == v1alpha1runtime.SequenceUpgrade.String() ||
|
||||
currentSequence == v1alpha1runtime.SequenceStageUpgrade.String() ||
|
||||
currentSequence == v1alpha1runtime.SequenceMaintenanceUpgrade.String()) && event.Phase == "kexec":
|
||||
newStage = runtime.MachineStageRebooting
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,8 @@ const (
|
||||
SequenceUpgrade
|
||||
// SequenceStageUpgrade is the stage upgrade sequence.
|
||||
SequenceStageUpgrade
|
||||
// SequenceMaintenanceUpgrade is the upgrade sequence in maintenance mode.
|
||||
SequenceMaintenanceUpgrade
|
||||
// SequenceReset is the reset sequence.
|
||||
SequenceReset
|
||||
// SequenceReboot is the reboot sequence.
|
||||
@ -35,18 +37,22 @@ const (
|
||||
)
|
||||
|
||||
const (
|
||||
boot = "boot"
|
||||
initialize = "initialize"
|
||||
install = "install"
|
||||
shutdown = "shutdown"
|
||||
upgrade = "upgrade"
|
||||
stageUpgrade = "stageUpgrade"
|
||||
reset = "reset"
|
||||
reboot = "reboot"
|
||||
noop = "noop"
|
||||
boot = "boot"
|
||||
initialize = "initialize"
|
||||
install = "install"
|
||||
shutdown = "shutdown"
|
||||
upgrade = "upgrade"
|
||||
stageUpgrade = "stageUpgrade"
|
||||
maintenanceUpgrade = "maintenanceUpgrade"
|
||||
reset = "reset"
|
||||
reboot = "reboot"
|
||||
noop = "noop"
|
||||
)
|
||||
|
||||
var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{
|
||||
SequenceInitialize: {
|
||||
SequenceMaintenanceUpgrade: {},
|
||||
},
|
||||
SequenceBoot: {
|
||||
SequenceReboot: {},
|
||||
SequenceReset: {},
|
||||
@ -62,7 +68,7 @@ var sequenceTakeOver = map[Sequence]map[Sequence]struct{}{
|
||||
|
||||
// String returns the string representation of a `Sequence`.
|
||||
func (s Sequence) String() string {
|
||||
return [...]string{noop, boot, initialize, install, shutdown, upgrade, stageUpgrade, reset, reboot}[s]
|
||||
return [...]string{noop, boot, initialize, install, shutdown, upgrade, stageUpgrade, maintenanceUpgrade, reset, reboot}[s]
|
||||
}
|
||||
|
||||
// CanTakeOver defines sequences priority.
|
||||
@ -141,6 +147,7 @@ type Sequencer interface {
|
||||
Shutdown(Runtime, *machine.ShutdownRequest) []Phase
|
||||
StageUpgrade(Runtime, *machine.UpgradeRequest) []Phase
|
||||
Upgrade(Runtime, *machine.UpgradeRequest) []Phase
|
||||
MaintenanceUpgrade(Runtime, *machine.UpgradeRequest) []Phase
|
||||
}
|
||||
|
||||
// EventSequenceStart represents the sequence start event.
|
||||
|
@ -336,12 +336,8 @@ func (c *Controller) phases(seq runtime.Sequence, data interface{}) ([]runtime.P
|
||||
case runtime.SequenceInstall:
|
||||
phases = c.s.Install(c.r)
|
||||
case runtime.SequenceShutdown:
|
||||
var (
|
||||
in *machine.ShutdownRequest
|
||||
ok bool
|
||||
)
|
||||
|
||||
if in, ok = data.(*machine.ShutdownRequest); !ok {
|
||||
in, ok := data.(*machine.ShutdownRequest)
|
||||
if !ok {
|
||||
return nil, runtime.ErrInvalidSequenceData
|
||||
}
|
||||
|
||||
@ -349,34 +345,29 @@ func (c *Controller) phases(seq runtime.Sequence, data interface{}) ([]runtime.P
|
||||
case runtime.SequenceReboot:
|
||||
phases = c.s.Reboot(c.r)
|
||||
case runtime.SequenceUpgrade:
|
||||
var (
|
||||
in *machine.UpgradeRequest
|
||||
ok bool
|
||||
)
|
||||
|
||||
if in, ok = data.(*machine.UpgradeRequest); !ok {
|
||||
in, ok := data.(*machine.UpgradeRequest)
|
||||
if !ok {
|
||||
return nil, runtime.ErrInvalidSequenceData
|
||||
}
|
||||
|
||||
phases = c.s.Upgrade(c.r, in)
|
||||
case runtime.SequenceStageUpgrade:
|
||||
var (
|
||||
in *machine.UpgradeRequest
|
||||
ok bool
|
||||
)
|
||||
|
||||
if in, ok = data.(*machine.UpgradeRequest); !ok {
|
||||
in, ok := data.(*machine.UpgradeRequest)
|
||||
if !ok {
|
||||
return nil, runtime.ErrInvalidSequenceData
|
||||
}
|
||||
|
||||
phases = c.s.StageUpgrade(c.r, in)
|
||||
case runtime.SequenceReset:
|
||||
var (
|
||||
in runtime.ResetOptions
|
||||
ok bool
|
||||
)
|
||||
case runtime.SequenceMaintenanceUpgrade:
|
||||
in, ok := data.(*machine.UpgradeRequest)
|
||||
if !ok {
|
||||
return nil, runtime.ErrInvalidSequenceData
|
||||
}
|
||||
|
||||
if in, ok = data.(runtime.ResetOptions); !ok {
|
||||
phases = c.s.MaintenanceUpgrade(c.r, in)
|
||||
case runtime.SequenceReset:
|
||||
in, ok := data.(runtime.ResetOptions)
|
||||
if !ok {
|
||||
return nil, runtime.ErrInvalidSequenceData
|
||||
}
|
||||
|
||||
|
@ -59,6 +59,10 @@ func (m *mockSequencer) StageUpgrade(r runtime.Runtime, req *machine.UpgradeRequ
|
||||
return m.phases[runtime.SequenceStageUpgrade]
|
||||
}
|
||||
|
||||
func (m *mockSequencer) MaintenanceUpgrade(r runtime.Runtime, req *machine.UpgradeRequest) []runtime.Phase {
|
||||
return m.phases[runtime.SequenceMaintenanceUpgrade]
|
||||
}
|
||||
|
||||
func (m *mockSequencer) Upgrade(r runtime.Runtime, req *machine.UpgradeRequest) []runtime.Phase {
|
||||
return m.phases[runtime.SequenceUpgrade]
|
||||
}
|
||||
|
@ -375,6 +375,44 @@ func (*Sequencer) StageUpgrade(r runtime.Runtime, in *machineapi.UpgradeRequest)
|
||||
return phases
|
||||
}
|
||||
|
||||
// MaintenanceUpgrade is the upgrade sequence in maintenance mode.
|
||||
func (*Sequencer) MaintenanceUpgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []runtime.Phase {
|
||||
phases := PhaseList{}
|
||||
|
||||
switch r.State().Platform().Mode() { //nolint:exhaustive
|
||||
case runtime.ModeContainer:
|
||||
return nil
|
||||
default:
|
||||
phases = phases.Append(
|
||||
"containerd",
|
||||
StartContainerd,
|
||||
).Append(
|
||||
"verifyDisk",
|
||||
VerifyDiskAvailability,
|
||||
).Append(
|
||||
"upgrade",
|
||||
Upgrade,
|
||||
).Append(
|
||||
"mountBoot",
|
||||
MountBootPartition,
|
||||
).Append(
|
||||
"kexec",
|
||||
KexecPrepare,
|
||||
).Append(
|
||||
"unmountBoot",
|
||||
UnmountBootPartition,
|
||||
).Append(
|
||||
"stopEverything",
|
||||
StopAllServices,
|
||||
).Append(
|
||||
"reboot",
|
||||
Reboot,
|
||||
)
|
||||
}
|
||||
|
||||
return phases
|
||||
}
|
||||
|
||||
// Upgrade is the upgrade sequence.
|
||||
func (*Sequencer) Upgrade(r runtime.Runtime, in *machineapi.UpgradeRequest) []runtime.Phase {
|
||||
phases := PhaseList{}
|
||||
|
@ -43,7 +43,6 @@ import (
|
||||
"kernel.org/pub/linux/libs/security/libcap/cap"
|
||||
|
||||
installer "github.com/talos-systems/talos/cmd/installer/pkg/install"
|
||||
"github.com/talos-systems/talos/internal/app/machined/internal/install"
|
||||
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
|
||||
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader"
|
||||
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader/adv"
|
||||
@ -56,6 +55,7 @@ import (
|
||||
"github.com/talos-systems/talos/internal/app/maintenance"
|
||||
"github.com/talos-systems/talos/internal/pkg/cri"
|
||||
"github.com/talos-systems/talos/internal/pkg/etcd"
|
||||
"github.com/talos-systems/talos/internal/pkg/install"
|
||||
"github.com/talos-systems/talos/internal/pkg/mount"
|
||||
"github.com/talos-systems/talos/internal/pkg/partition"
|
||||
"github.com/talos-systems/talos/pkg/conditions"
|
||||
@ -625,7 +625,7 @@ func receiveConfigViaMaintenanceService(ctx context.Context, logger *log.Logger,
|
||||
Task: "runningMaintenance",
|
||||
})
|
||||
|
||||
cfgBytes, err := maintenance.Run(ctx, logger, r)
|
||||
cfgBytes, err := maintenance.Run(ctx, logger)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("maintenance service failed: %w", err)
|
||||
}
|
||||
|
@ -94,12 +94,21 @@ func (c *Containerd) Runner(r runtime.Runtime) (runner.Runner, error) {
|
||||
}
|
||||
|
||||
env := []string{}
|
||||
for key, val := range r.Config().Machine().Env() {
|
||||
env = append(env, fmt.Sprintf("%s=%s", key, val))
|
||||
|
||||
if r.Config() != nil {
|
||||
for key, val := range r.Config().Machine().Env() {
|
||||
env = append(env, fmt.Sprintf("%s=%s", key, val))
|
||||
}
|
||||
}
|
||||
|
||||
debug := false
|
||||
|
||||
if r.Config() != nil {
|
||||
debug = r.Config().Debug()
|
||||
}
|
||||
|
||||
return restart.New(process.NewRunner(
|
||||
r.Config().Debug(),
|
||||
debug,
|
||||
args,
|
||||
runner.WithLoggingManager(r.Logging()),
|
||||
runner.WithEnv(env),
|
||||
|
@ -32,19 +32,30 @@ import (
|
||||
"github.com/talos-systems/talos/pkg/machinery/resources/network"
|
||||
)
|
||||
|
||||
var ctrl runtime.Controller
|
||||
|
||||
// InjectController is used to pass the controller into the maintenance service.
|
||||
func InjectController(c runtime.Controller) {
|
||||
ctrl = c
|
||||
}
|
||||
|
||||
// Run executes the configuration receiver, returning any configuration it receives.
|
||||
//
|
||||
//nolint:gocyclo
|
||||
func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, error) {
|
||||
func Run(ctx context.Context, logger *log.Logger) ([]byte, error) {
|
||||
if ctrl == nil {
|
||||
return nil, fmt.Errorf("controller is not injected")
|
||||
}
|
||||
|
||||
logger.Println("waiting for network address to be ready")
|
||||
|
||||
if err := network.NewReadyCondition(r.State().V1Alpha2().Resources(), network.AddressReady).Wait(ctx); err != nil {
|
||||
if err := network.NewReadyCondition(ctrl.Runtime().State().V1Alpha2().Resources(), network.AddressReady).Wait(ctx); err != nil {
|
||||
return nil, fmt.Errorf("error waiting for the network to be ready: %w", err)
|
||||
}
|
||||
|
||||
var sideroLinkAddress netip.Addr
|
||||
|
||||
currentAddresses, err := r.State().V1Alpha2().Resources().WatchFor(ctx,
|
||||
currentAddresses, err := ctrl.Runtime().State().V1Alpha2().Resources().WatchFor(ctx,
|
||||
resource.NewMetadata(network.NamespaceName, network.NodeAddressType, network.NodeAddressCurrentID, resource.VersionUndefined),
|
||||
sideroLinkAddressFinder(&sideroLinkAddress, logger),
|
||||
)
|
||||
@ -55,7 +66,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
|
||||
ips := currentAddresses.(*network.NodeAddress).TypedSpec().IPs()
|
||||
|
||||
// hostname might not be available yet, so use it only if it is available
|
||||
hostnameStatus, err := r.State().V1Alpha2().Resources().Get(ctx, resource.NewMetadata(network.NamespaceName, network.HostnameStatusType, network.HostnameID, resource.VersionUndefined))
|
||||
hostnameStatus, err := ctrl.Runtime().State().V1Alpha2().Resources().Get(ctx, resource.NewMetadata(network.NamespaceName, network.HostnameStatusType, network.HostnameID, resource.VersionUndefined))
|
||||
if err != nil && !state.IsNotFoundError(err) {
|
||||
return nil, fmt.Errorf("error getting node hostname: %w", err)
|
||||
}
|
||||
@ -83,7 +94,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
|
||||
|
||||
cfgCh := make(chan []byte)
|
||||
|
||||
s := server.New(r, logger, cfgCh)
|
||||
s := server.New(ctrl, logger, cfgCh)
|
||||
|
||||
injector := &authz.Injector{
|
||||
Mode: authz.ReadOnly,
|
||||
@ -152,7 +163,7 @@ func Run(ctx context.Context, logger *log.Logger, r runtime.Runtime) ([]byte, er
|
||||
|
||||
return cfg, err
|
||||
case <-ctx.Done():
|
||||
return nil, fmt.Errorf("context is done")
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
|
50
internal/app/maintenance/server/peer.go
Normal file
50
internal/app/maintenance/server/peer.go
Normal file
@ -0,0 +1,50 @@
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"net/netip"
|
||||
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/peer"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"github.com/talos-systems/talos/pkg/machinery/resources/network"
|
||||
)
|
||||
|
||||
func verifyPeer(ctx context.Context, condition func(netip.Addr) bool) bool {
|
||||
remotePeer, ok := peer.FromContext(ctx)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
if remotePeer.Addr.Network() != "tcp" {
|
||||
return false
|
||||
}
|
||||
|
||||
ip, _, err := net.SplitHostPort(remotePeer.Addr.String())
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
addr, err := netip.ParseAddr(ip)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return condition(addr)
|
||||
}
|
||||
|
||||
func assertPeerSideroLink(ctx context.Context) error {
|
||||
if !verifyPeer(ctx, func(addr netip.Addr) bool {
|
||||
return network.IsULA(addr, network.ULASideroLink)
|
||||
}) {
|
||||
return status.Error(codes.Unimplemented, "API is not implemented in maintenance mode")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
@ -8,16 +8,14 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"net/netip"
|
||||
"strings"
|
||||
|
||||
cosiv1alpha1 "github.com/cosi-project/runtime/api/v1alpha1"
|
||||
"github.com/cosi-project/runtime/pkg/state"
|
||||
"github.com/cosi-project/runtime/pkg/state/protobuf/server"
|
||||
"github.com/google/uuid"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/peer"
|
||||
"google.golang.org/grpc/status"
|
||||
"google.golang.org/protobuf/types/known/emptypb"
|
||||
|
||||
@ -30,7 +28,6 @@ import (
|
||||
"github.com/talos-systems/talos/pkg/machinery/api/storage"
|
||||
"github.com/talos-systems/talos/pkg/machinery/config/configloader"
|
||||
v1alpha1machine "github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1/machine"
|
||||
"github.com/talos-systems/talos/pkg/machinery/resources/network"
|
||||
"github.com/talos-systems/talos/pkg/version"
|
||||
)
|
||||
|
||||
@ -38,18 +35,18 @@ import (
|
||||
type Server struct {
|
||||
machine.UnimplementedMachineServiceServer
|
||||
|
||||
runtime runtime.Runtime
|
||||
logger *log.Logger
|
||||
cfgCh chan []byte
|
||||
server *grpc.Server
|
||||
controller runtime.Controller
|
||||
logger *log.Logger
|
||||
cfgCh chan<- []byte
|
||||
server *grpc.Server
|
||||
}
|
||||
|
||||
// New initializes and returns a `Server`.
|
||||
func New(r runtime.Runtime, logger *log.Logger, cfgCh chan []byte) *Server {
|
||||
func New(c runtime.Controller, logger *log.Logger, cfgCh chan<- []byte) *Server {
|
||||
return &Server{
|
||||
runtime: r,
|
||||
logger: logger,
|
||||
cfgCh: cfgCh,
|
||||
controller: c,
|
||||
logger: logger,
|
||||
cfgCh: cfgCh,
|
||||
}
|
||||
}
|
||||
|
||||
@ -58,7 +55,7 @@ func (s *Server) Register(obj *grpc.Server) {
|
||||
s.server = obj
|
||||
|
||||
// wrap resources with access filter
|
||||
resourceState := s.runtime.State().V1Alpha2().Resources()
|
||||
resourceState := s.controller.Runtime().State().V1Alpha2().Resources()
|
||||
resourceState = state.WrapCore(state.Filter(resourceState, resources.AccessPolicy(resourceState)))
|
||||
|
||||
storage.RegisterStorageServiceServer(obj, &storaged.Server{})
|
||||
@ -86,7 +83,7 @@ func (s *Server) ApplyConfiguration(ctx context.Context, in *machine.ApplyConfig
|
||||
return nil, fmt.Errorf("failed to parse config: %w", err)
|
||||
}
|
||||
|
||||
warnings, err := cfgProvider.Validate(s.runtime.State().Platform().Mode())
|
||||
warnings, err := cfgProvider.Validate(s.controller.Runtime().State().Platform().Mode())
|
||||
if err != nil {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "configuration validation failed: %s", err)
|
||||
}
|
||||
@ -131,43 +128,18 @@ func (s *Server) GenerateClientConfiguration(ctx context.Context, in *machine.Ge
|
||||
return nil, status.Error(codes.Unimplemented, "client configuration (talosconfig) can't be generated in the maintenance mode")
|
||||
}
|
||||
|
||||
func verifyPeer(ctx context.Context, condition func(netip.Addr) bool) bool {
|
||||
remotePeer, ok := peer.FromContext(ctx)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
if remotePeer.Addr.Network() != "tcp" {
|
||||
return false
|
||||
}
|
||||
|
||||
ip, _, err := net.SplitHostPort(remotePeer.Addr.String())
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
addr, err := netip.ParseAddr(ip)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return condition(addr)
|
||||
}
|
||||
|
||||
// Version implements the machine.MachineServer interface.
|
||||
func (s *Server) Version(ctx context.Context, in *emptypb.Empty) (*machine.VersionResponse, error) {
|
||||
if !verifyPeer(ctx, func(addr netip.Addr) bool {
|
||||
return network.IsULA(addr, network.ULASideroLink)
|
||||
}) {
|
||||
return nil, status.Error(codes.Unimplemented, "Version API is not implemented in maintenance mode")
|
||||
if err := assertPeerSideroLink(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var platform *machine.PlatformInfo
|
||||
|
||||
if s.runtime.State().Platform() != nil {
|
||||
if s.controller.Runtime().State().Platform() != nil {
|
||||
platform = &machine.PlatformInfo{
|
||||
Name: s.runtime.State().Platform().Name(),
|
||||
Mode: s.runtime.State().Platform().Mode().String(),
|
||||
Name: s.controller.Runtime().State().Platform().Name(),
|
||||
Mode: s.controller.Runtime().State().Platform().Mode().String(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -180,3 +152,52 @@ func (s *Server) Version(ctx context.Context, in *emptypb.Empty) (*machine.Versi
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Upgrade initiates an upgrade.
|
||||
//
|
||||
//nolint:gocyclo,cyclop
|
||||
func (s *Server) Upgrade(ctx context.Context, in *machine.UpgradeRequest) (reply *machine.UpgradeResponse, err error) {
|
||||
if err = assertPeerSideroLink(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if s.controller.Runtime().State().Machine().Disk() == nil {
|
||||
return nil, status.Errorf(codes.FailedPrecondition, "Talos is not installed")
|
||||
}
|
||||
|
||||
actorID := uuid.New().String()
|
||||
|
||||
mode := s.controller.Runtime().State().Platform().Mode()
|
||||
|
||||
if !mode.Supports(runtime.Upgrade) {
|
||||
return nil, status.Errorf(codes.FailedPrecondition, "method is not supported in %s mode", mode.String())
|
||||
}
|
||||
|
||||
// none of the options are supported in maintenance mode
|
||||
if in.GetPreserve() || in.GetStage() || in.GetForce() {
|
||||
return nil, status.Errorf(codes.Unimplemented, "upgrade --preserve, --stage, and --force are not supported in maintenance mode")
|
||||
}
|
||||
|
||||
log.Printf("upgrade request received: %q", in.GetImage())
|
||||
|
||||
runCtx := context.WithValue(context.Background(), runtime.ActorIDCtxKey{}, actorID)
|
||||
|
||||
go func() {
|
||||
if err := s.controller.Run(runCtx, runtime.SequenceMaintenanceUpgrade, in); err != nil {
|
||||
if !runtime.IsRebootError(err) {
|
||||
log.Println("upgrade failed:", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
reply = &machine.UpgradeResponse{
|
||||
Messages: []*machine.Upgrade{
|
||||
{
|
||||
Ack: "Upgrade request received",
|
||||
ActorId: actorID,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return reply, nil
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
@ -29,6 +30,7 @@ import (
|
||||
"github.com/talos-systems/talos/internal/pkg/extensions"
|
||||
machineapi "github.com/talos-systems/talos/pkg/machinery/api/machine"
|
||||
"github.com/talos-systems/talos/pkg/machinery/config"
|
||||
"github.com/talos-systems/talos/pkg/machinery/config/types/v1alpha1"
|
||||
"github.com/talos-systems/talos/pkg/machinery/constants"
|
||||
)
|
||||
|
||||
@ -46,9 +48,16 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
|
||||
}
|
||||
}
|
||||
|
||||
configBytes, err := cfg.Bytes()
|
||||
if err != nil {
|
||||
return err
|
||||
var (
|
||||
registriesConfig config.Registries
|
||||
extensionsConfig []config.Extension
|
||||
)
|
||||
|
||||
if cfg != nil {
|
||||
registriesConfig = cfg.Machine().Registries()
|
||||
extensionsConfig = cfg.Machine().Install().Extensions()
|
||||
} else {
|
||||
registriesConfig = &v1alpha1.RegistriesConfig{}
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
@ -77,7 +86,7 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
|
||||
if img == nil || err != nil && errdefs.IsNotFound(err) {
|
||||
log.Printf("pulling %q", ref)
|
||||
|
||||
img, err = image.Pull(ctx, cfg.Machine().Registries(), client, ref)
|
||||
img, err = image.Pull(ctx, registriesConfig, client, ref)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@ -89,8 +98,10 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
|
||||
return err
|
||||
}
|
||||
|
||||
if err = puller.PullAndMount(ctx, cfg.Machine().Registries(), cfg.Machine().Install().Extensions()); err != nil {
|
||||
return err
|
||||
if extensionsConfig != nil {
|
||||
if err = puller.PullAndMount(ctx, registriesConfig, extensionsConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
defer func() {
|
||||
@ -205,19 +216,35 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
|
||||
|
||||
w := &kmsg.Writer{KmsgWriter: f}
|
||||
|
||||
configR := &containerdrunner.StdinCloser{
|
||||
Stdin: bytes.NewReader(configBytes),
|
||||
Closer: make(chan struct{}),
|
||||
var r interface {
|
||||
io.Reader
|
||||
WaitAndClose(context.Context, containerd.Task)
|
||||
}
|
||||
|
||||
creator := cio.NewCreator(cio.WithStreams(configR, w, w))
|
||||
if cfg != nil {
|
||||
var configBytes []byte
|
||||
|
||||
configBytes, err = cfg.Bytes()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
r = &containerdrunner.StdinCloser{
|
||||
Stdin: bytes.NewReader(configBytes),
|
||||
Closer: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
creator := cio.NewCreator(cio.WithStreams(r, w, w))
|
||||
|
||||
t, err := container.NewTask(ctx, creator)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go configR.WaitAndClose(ctx, t)
|
||||
if r != nil {
|
||||
go r.WaitAndClose(ctx, t)
|
||||
}
|
||||
|
||||
defer t.Delete(ctx) //nolint:errcheck
|
||||
|
||||
@ -242,10 +269,15 @@ func RunInstallerContainer(disk, platform, ref string, cfg config.Provider, opts
|
||||
|
||||
// OptionsFromUpgradeRequest builds installer options from upgrade request.
|
||||
func OptionsFromUpgradeRequest(r runtime.Runtime, in *machineapi.UpgradeRequest) []Option {
|
||||
return []Option{
|
||||
opts := []Option{
|
||||
WithPull(false),
|
||||
WithUpgrade(true),
|
||||
WithForce(!in.GetPreserve()),
|
||||
WithExtraKernelArgs(r.Config().Machine().Install().ExtraKernelArgs()),
|
||||
}
|
||||
|
||||
if r.Config() != nil {
|
||||
opts = append(opts, WithExtraKernelArgs(r.Config().Machine().Install().ExtraKernelArgs()))
|
||||
}
|
||||
|
||||
return opts
|
||||
}
|
110
internal/pkg/install/pull.go
Normal file
110
internal/pkg/install/pull.go
Normal file
@ -0,0 +1,110 @@
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
package install
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/cio"
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
"github.com/containerd/containerd/namespaces"
|
||||
"github.com/containerd/containerd/oci"
|
||||
|
||||
"github.com/talos-systems/talos/internal/pkg/containers/image"
|
||||
"github.com/talos-systems/talos/pkg/machinery/config"
|
||||
"github.com/talos-systems/talos/pkg/machinery/constants"
|
||||
)
|
||||
|
||||
// PullAndValidateInstallerImage pulls down the installer and validates that it can run.
|
||||
//
|
||||
//nolint:gocyclo
|
||||
func PullAndValidateInstallerImage(ctx context.Context, reg config.Registries, ref string) error {
|
||||
// Pull down specified installer image early so we can bail if it doesn't exist in the upstream registry
|
||||
containerdctx := namespaces.WithNamespace(ctx, constants.SystemContainerdNamespace)
|
||||
|
||||
const containerID = "validate"
|
||||
|
||||
client, err := containerd.New(constants.SystemContainerdAddress)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer client.Close() //nolint:errcheck
|
||||
|
||||
img, err := image.Pull(containerdctx, reg, client, ref, image.WithSkipIfAlreadyPulled())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// See if there's previous container/snapshot to clean up
|
||||
var oldcontainer containerd.Container
|
||||
|
||||
if oldcontainer, err = client.LoadContainer(containerdctx, containerID); err == nil {
|
||||
if err = oldcontainer.Delete(containerdctx, containerd.WithSnapshotCleanup); err != nil {
|
||||
return fmt.Errorf("error deleting old container instance: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err = client.SnapshotService("").Remove(containerdctx, containerID); err != nil && !errdefs.IsNotFound(err) {
|
||||
return fmt.Errorf("error cleaning up stale snapshot: %w", err)
|
||||
}
|
||||
|
||||
// Launch the container with a known help command for a simple check to make sure the image is valid
|
||||
args := []string{
|
||||
"/bin/installer",
|
||||
"--help",
|
||||
}
|
||||
|
||||
specOpts := []oci.SpecOpts{
|
||||
oci.WithImageConfig(img),
|
||||
oci.WithProcessArgs(args...),
|
||||
}
|
||||
|
||||
containerOpts := []containerd.NewContainerOpts{
|
||||
containerd.WithImage(img),
|
||||
containerd.WithNewSnapshot(containerID, img),
|
||||
containerd.WithNewSpec(specOpts...),
|
||||
}
|
||||
|
||||
container, err := client.NewContainer(containerdctx, containerID, containerOpts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
//nolint:errcheck
|
||||
defer container.Delete(containerdctx, containerd.WithSnapshotCleanup)
|
||||
|
||||
task, err := container.NewTask(containerdctx, cio.NullIO)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
//nolint:errcheck
|
||||
defer task.Delete(containerdctx)
|
||||
|
||||
exitStatusC, err := task.Wait(containerdctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = task.Start(containerdctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
status := <-exitStatusC
|
||||
|
||||
code, _, err := status.Result()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if code != 0 {
|
||||
return fmt.Errorf("installer help returned non-zero exit. assuming invalid installer")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
@ -2201,6 +2201,7 @@ talosctl upgrade [flags]
|
||||
-f, --force force the upgrade (skip checks on etcd health and members, might lead to data loss)
|
||||
-h, --help help for upgrade
|
||||
-i, --image string the container image to use for performing the install
|
||||
--insecure upgrade using the insecure (encrypted with no auth) maintenance service
|
||||
-p, --preserve preserve data
|
||||
-s, --stage stage the upgrade to perform it after a reboot
|
||||
--wait wait for the operation to complete, tracking its progress. always set to true when --debug is set
|
||||
|
Loading…
x
Reference in New Issue
Block a user