fix: store and execute desired action on emergency action
Fixes #7854 Talos runs an emergency handler if the sequence experience and unrecoverable failure. The emergency handler was unconditionally executing "reboot" action if no other action was received (which only gets received if the sequence completes successfully), so the Shutdown request might result in a Reboot behavior on error during shutdown phase. This is not a pretty fix, but it's hard to deliver the intent from one part of the code to another right now, so instead use a global variable which stores default emergency intention, and gets overridden early in the Shutdown sequence. Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
parent
515ae2a184
commit
474fa0480d
@ -27,6 +27,7 @@ import (
|
||||
"github.com/siderolabs/talos/internal/app/apid"
|
||||
"github.com/siderolabs/talos/internal/app/dashboard"
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/emergency"
|
||||
v1alpha1runtime "github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1"
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/system"
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/system/services"
|
||||
@ -95,7 +96,7 @@ func syncNonVolatileStorageBuffers() {
|
||||
|
||||
//nolint:gocyclo
|
||||
func handle(ctx context.Context, err error) {
|
||||
rebootCmd := unix.LINUX_REBOOT_CMD_RESTART
|
||||
rebootCmd := int(emergency.RebootCmd.Load())
|
||||
|
||||
var rebootErr runtime.RebootError
|
||||
|
||||
|
@ -534,6 +534,13 @@ func (ctrl *PlatformConfigController) runWithRestarts(ctx context.Context, logge
|
||||
return
|
||||
}
|
||||
|
||||
// skip restarting if context is already done
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
interval := backoff.NextBackOff()
|
||||
|
||||
logger.Error("restarting platform network config", zap.Duration("interval", interval), zap.Error(err))
|
||||
|
19
internal/app/machined/pkg/runtime/emergency/emergency.go
Normal file
19
internal/app/machined/pkg/runtime/emergency/emergency.go
Normal file
@ -0,0 +1,19 @@
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
// Package emergency provides values to handle emergency (panic/unrecoverable error) handling for machined.
|
||||
package emergency
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// RebootCmd is a command to reboot the system after an unrecoverable error.
|
||||
var RebootCmd atomic.Int64
|
||||
|
||||
func init() {
|
||||
RebootCmd.Store(unix.LINUX_REBOOT_CMD_RESTART)
|
||||
}
|
@ -375,7 +375,10 @@ func (*Sequencer) Reset(r runtime.Runtime, in runtime.ResetOptions) []runtime.Ph
|
||||
|
||||
// Shutdown is the shutdown sequence.
|
||||
func (*Sequencer) Shutdown(r runtime.Runtime, in *machineapi.ShutdownRequest) []runtime.Phase {
|
||||
phases := PhaseList{}.AppendWhen(
|
||||
phases := PhaseList{}.Append(
|
||||
"storeShudown",
|
||||
StoreShutdownEmergency,
|
||||
).AppendWhen(
|
||||
!in.GetForce() && !r.Config().Machine().Kubelet().SkipNodeRegistration(),
|
||||
"drain",
|
||||
CordonAndDrainNode,
|
||||
|
@ -47,6 +47,7 @@ import (
|
||||
installer "github.com/siderolabs/talos/cmd/installer/pkg/install"
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/disk"
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/emergency"
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/bootloader/grub"
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform"
|
||||
"github.com/siderolabs/talos/internal/app/machined/pkg/system"
|
||||
@ -2255,6 +2256,17 @@ func FlushMeta(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
|
||||
}, "flushMeta"
|
||||
}
|
||||
|
||||
// StoreShutdownEmergency stores shutdown emergency state.
|
||||
func StoreShutdownEmergency(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
|
||||
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {
|
||||
// for shutdown sequence, store power_off as the intent, it will be picked up
|
||||
// by emergency handled in machined/main.go if the Shutdown sequence fails
|
||||
emergency.RebootCmd.Store(unix.LINUX_REBOOT_CMD_POWER_OFF)
|
||||
|
||||
return nil
|
||||
}, "storeShutdownEmergency"
|
||||
}
|
||||
|
||||
func pauseOnFailure(callback func(runtime.Sequence, any) (runtime.TaskExecutionFunc, string),
|
||||
timeout time.Duration,
|
||||
) func(seq runtime.Sequence, data any) (runtime.TaskExecutionFunc, string) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user