2020-04-23 21:12:44 -07:00
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package v1alpha1
import (
"context"
2022-07-19 18:14:47 +03:00
"errors"
2020-04-23 21:12:44 -07:00
"fmt"
"log"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
"golang.org/x/sync/errgroup"
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime"
2020-06-05 00:20:40 +03:00
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/logging"
2020-04-23 21:12:44 -07:00
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha1/acpi"
2021-01-12 15:52:24 +03:00
"github.com/talos-systems/talos/internal/app/machined/pkg/runtime/v1alpha2"
2020-08-17 19:12:47 +03:00
"github.com/talos-systems/talos/pkg/machinery/api/common"
"github.com/talos-systems/talos/pkg/machinery/api/machine"
2020-04-23 21:12:44 -07:00
)
// Controller represents the controller responsible for managing the execution
// of sequences.
type Controller struct {
2022-07-19 18:14:47 +03:00
s runtime . Sequencer
2021-01-12 15:52:24 +03:00
r * Runtime
v2 * v1alpha2 . Controller
2020-04-23 21:12:44 -07:00
2022-07-19 18:14:47 +03:00
priorityLock * PriorityLock [ runtime . Sequence ]
2020-04-23 21:12:44 -07:00
}
// NewController intializes and returns a controller.
2021-10-13 16:38:44 +00:00
func NewController ( ) ( * Controller , error ) {
2020-04-23 21:12:44 -07:00
// Wait for USB storage in the case that the install disk is supplied over
// USB. If we don't wait, there is the chance that we will fail to detect the
// install disk.
err := waitForUSBDelay ( )
if err != nil {
return nil , err
}
s , err := NewState ( )
if err != nil {
return nil , err
}
2020-05-15 18:19:06 +03:00
// TODO: this should be streaming capacity and probably some constant
2020-07-06 23:50:45 +03:00
e := NewEvents ( 1000 , 10 )
2020-05-07 22:50:41 -07:00
2021-10-12 08:50:14 +00:00
l := logging . NewCircularBufferLoggingManager ( log . New ( os . Stdout , "machined fallback logger: " , log . Flags ( ) ) )
2020-06-05 00:20:40 +03:00
2020-04-23 21:12:44 -07:00
ctlr := & Controller {
2022-07-19 18:14:47 +03:00
r : NewRuntime ( nil , s , e , l ) ,
s : NewSequencer ( ) ,
priorityLock : NewPriorityLock [ runtime . Sequence ] ( ) ,
2020-04-23 21:12:44 -07:00
}
2021-10-13 16:38:44 +00:00
ctlr . v2 , err = v1alpha2 . NewController ( ctlr . r )
2021-01-12 15:52:24 +03:00
if err != nil {
return nil , err
}
2020-04-23 21:12:44 -07:00
return ctlr , nil
}
// Run executes all phases known to the controller in serial. `Controller`
// aborts immediately if any phase fails.
2022-08-02 22:43:31 +04:00
//
2021-03-03 18:11:59 +03:00
//nolint:gocyclo
2022-07-19 18:14:47 +03:00
func ( c * Controller ) Run ( ctx context . Context , seq runtime . Sequence , data interface { } , setters ... runtime . LockOption ) error {
2020-04-23 21:12:44 -07:00
// We must ensure that the runtime is configured since all sequences depend
// on the runtime.
if c . r == nil {
return runtime . ErrUndefinedRuntime
}
2022-07-19 18:14:47 +03:00
ctx , err := c . priorityLock . Lock ( ctx , time . Minute , seq , setters ... )
if err != nil {
if errors . Is ( err , runtime . ErrLocked ) {
2022-08-10 17:58:55 +02:00
c . Runtime ( ) . Events ( ) . Publish ( context . Background ( ) , & machine . SequenceEvent {
2020-05-07 22:50:41 -07:00
Sequence : seq . String ( ) ,
Action : machine . SequenceEvent_NOOP ,
Error : & common . Error {
2020-07-21 00:17:50 +03:00
Code : common . Code_LOCKED ,
2020-05-07 22:50:41 -07:00
Message : fmt . Sprintf ( "sequence not started: %s" , runtime . ErrLocked . Error ( ) ) ,
} ,
} )
2020-05-01 15:28:55 -07:00
}
2020-04-23 21:12:44 -07:00
2022-07-19 18:14:47 +03:00
return err
2020-05-01 15:28:55 -07:00
}
2020-04-23 21:12:44 -07:00
2022-07-19 18:14:47 +03:00
defer c . priorityLock . Unlock ( )
2020-04-23 21:12:44 -07:00
phases , err := c . phases ( seq , data )
if err != nil {
return err
}
2021-03-02 21:53:31 +03:00
err = c . run ( ctx , seq , phases , data )
2020-05-07 22:50:41 -07:00
if err != nil {
2022-07-19 18:14:47 +03:00
code := common . Code_FATAL
if errors . Is ( err , context . Canceled ) {
code = common . Code_CANCELED
}
2022-08-10 17:58:55 +02:00
c . Runtime ( ) . Events ( ) . Publish ( ctx , & machine . SequenceEvent {
2020-05-07 22:50:41 -07:00
Sequence : seq . String ( ) ,
Action : machine . SequenceEvent_NOOP ,
Error : & common . Error {
2022-07-19 18:14:47 +03:00
Code : code ,
2020-05-07 22:50:41 -07:00
Message : fmt . Sprintf ( "sequence failed: %s" , err . Error ( ) ) ,
} ,
} )
return err
}
return nil
2020-04-23 21:12:44 -07:00
}
2021-01-12 15:52:24 +03:00
// V1Alpha2 implements the controller interface.
func ( c * Controller ) V1Alpha2 ( ) runtime . V1Alpha2Controller {
return c . v2
}
2020-04-23 21:12:44 -07:00
// Runtime implements the controller interface.
func ( c * Controller ) Runtime ( ) runtime . Runtime {
return c . r
}
// Sequencer implements the controller interface.
func ( c * Controller ) Sequencer ( ) runtime . Sequencer {
return c . s
}
// ListenForEvents starts the event listener. The listener will trigger a
// shutdown in response to a SIGTERM signal and ACPI button/power event.
2021-03-02 21:53:31 +03:00
func ( c * Controller ) ListenForEvents ( ctx context . Context ) error {
2020-04-23 21:12:44 -07:00
sigs := make ( chan os . Signal , 1 )
signal . Notify ( sigs , syscall . SIGTERM )
errCh := make ( chan error , 2 )
go func ( ) {
<- sigs
signal . Stop ( sigs )
log . Printf ( "shutdown via SIGTERM received" )
2022-02-03 21:47:23 +03:00
if err := c . Run ( ctx , runtime . SequenceShutdown , & machine . ShutdownRequest { Force : true } , runtime . WithTakeover ( ) ) ; err != nil {
2020-04-23 21:12:44 -07:00
log . Printf ( "shutdown failed: %v" , err )
}
errCh <- nil
} ( )
if c . r . State ( ) . Platform ( ) . Mode ( ) == runtime . ModeContainer {
return nil
}
go func ( ) {
if err := acpi . StartACPIListener ( ) ; err != nil {
errCh <- err
return
}
log . Printf ( "shutdown via ACPI received" )
2022-02-03 21:47:23 +03:00
if err := c . Run ( ctx , runtime . SequenceShutdown , & machine . ShutdownRequest { Force : true } , runtime . WithTakeover ( ) ) ; err != nil {
2021-03-03 18:11:59 +03:00
log . Printf ( "failed to run shutdown sequence: %s" , err )
2020-04-23 21:12:44 -07:00
}
errCh <- nil
} ( )
err := <- errCh
return err
}
2021-03-02 21:53:31 +03:00
func ( c * Controller ) run ( ctx context . Context , seq runtime . Sequence , phases [ ] runtime . Phase , data interface { } ) error {
2022-08-10 17:58:55 +02:00
c . Runtime ( ) . Events ( ) . Publish ( ctx , & machine . SequenceEvent {
2020-05-07 22:50:41 -07:00
Sequence : seq . String ( ) ,
Action : machine . SequenceEvent_START ,
} )
2022-08-10 17:58:55 +02:00
defer c . Runtime ( ) . Events ( ) . Publish ( ctx , & machine . SequenceEvent {
2020-05-07 22:50:41 -07:00
Sequence : seq . String ( ) ,
Action : machine . SequenceEvent_STOP ,
} )
2020-04-23 21:12:44 -07:00
start := time . Now ( )
var (
number int
phase runtime . Phase
err error
)
2020-07-21 00:36:28 +03:00
log . Printf ( "%s sequence: %d phase(s)" , seq . String ( ) , len ( phases ) )
defer func ( ) {
if err != nil {
2021-01-27 22:52:26 +03:00
if ! runtime . IsRebootError ( err ) {
log . Printf ( "%s sequence: failed" , seq . String ( ) )
}
2021-04-01 23:43:45 +03:00
} else {
log . Printf ( "%s sequence: done: %s" , seq . String ( ) , time . Since ( start ) )
2020-07-21 00:36:28 +03:00
}
} ( )
2020-04-23 21:12:44 -07:00
for number , phase = range phases {
// Make the phase number human friendly.
number ++
start := time . Now ( )
progress := fmt . Sprintf ( "%d/%d" , number , len ( phases ) )
2020-07-09 16:17:47 +03:00
log . Printf ( "phase %s (%s): %d tasks(s)" , phase . Name , progress , len ( phase . Tasks ) )
2020-04-23 21:12:44 -07:00
2021-03-02 21:53:31 +03:00
if err = c . runPhase ( ctx , phase , seq , data ) ; err != nil {
2021-01-27 22:52:26 +03:00
if ! runtime . IsRebootError ( err ) {
log . Printf ( "phase %s (%s): failed" , phase . Name , progress )
}
2020-07-21 00:36:28 +03:00
2020-04-23 21:12:44 -07:00
return fmt . Errorf ( "error running phase %d in %s sequence: %w" , number , seq . String ( ) , err )
}
2021-04-01 23:43:45 +03:00
log . Printf ( "phase %s (%s): done, %s" , phase . Name , progress , time . Since ( start ) )
2021-03-02 21:53:31 +03:00
select {
case <- ctx . Done ( ) :
return ctx . Err ( )
default :
}
2020-04-23 21:12:44 -07:00
}
return nil
}
2021-03-02 21:53:31 +03:00
func ( c * Controller ) runPhase ( ctx context . Context , phase runtime . Phase , seq runtime . Sequence , data interface { } ) error {
2022-08-10 17:58:55 +02:00
c . Runtime ( ) . Events ( ) . Publish ( ctx , & machine . PhaseEvent {
2020-07-09 16:17:47 +03:00
Phase : phase . Name ,
2020-05-07 22:50:41 -07:00
Action : machine . PhaseEvent_START ,
} )
2022-08-10 17:58:55 +02:00
defer c . Runtime ( ) . Events ( ) . Publish ( ctx , & machine . PhaseEvent {
2020-07-09 16:17:47 +03:00
Phase : phase . Name ,
2020-05-07 22:50:41 -07:00
Action : machine . PhaseEvent_START ,
} )
2022-07-19 18:14:47 +03:00
eg , ctx := errgroup . WithContext ( ctx )
2020-04-23 21:12:44 -07:00
2020-07-09 16:17:47 +03:00
for number , task := range phase . Tasks {
2020-04-23 21:12:44 -07:00
// Make the task number human friendly.
number := number
number ++
task := task
eg . Go ( func ( ) error {
2020-07-09 16:17:47 +03:00
progress := fmt . Sprintf ( "%d/%d" , number , len ( phase . Tasks ) )
2020-04-23 21:12:44 -07:00
2021-03-02 21:53:31 +03:00
if err := c . runTask ( ctx , progress , task , seq , data ) ; err != nil {
2020-04-23 21:12:44 -07:00
return fmt . Errorf ( "task %s: failed, %w" , progress , err )
}
return nil
} )
}
return eg . Wait ( )
}
2021-03-02 21:53:31 +03:00
func ( c * Controller ) runTask ( ctx context . Context , progress string , f runtime . TaskSetupFunc , seq runtime . Sequence , data interface { } ) error {
2020-07-09 16:17:47 +03:00
task , taskName := f ( seq , data )
if task == nil {
return nil
}
start := time . Now ( )
2022-08-10 17:58:55 +02:00
c . Runtime ( ) . Events ( ) . Publish ( ctx , & machine . TaskEvent {
2020-07-09 16:17:47 +03:00
Task : taskName ,
2020-05-07 22:50:41 -07:00
Action : machine . TaskEvent_START ,
} )
2020-07-21 00:36:28 +03:00
var err error
2020-07-09 16:17:47 +03:00
log . Printf ( "task %s (%s): starting" , taskName , progress )
defer func ( ) {
2020-07-21 00:36:28 +03:00
if err != nil {
2021-01-27 22:52:26 +03:00
if ! runtime . IsRebootError ( err ) {
2021-04-01 23:43:45 +03:00
log . Printf ( "task %s (%s): failed: %s" , taskName , progress , err )
2021-01-27 22:52:26 +03:00
}
2021-04-01 23:43:45 +03:00
} else {
log . Printf ( "task %s (%s): done, %s" , taskName , progress , time . Since ( start ) )
2020-07-21 00:36:28 +03:00
}
2020-07-09 16:17:47 +03:00
} ( )
2022-08-10 17:58:55 +02:00
defer c . Runtime ( ) . Events ( ) . Publish ( ctx , & machine . TaskEvent {
2020-07-09 16:17:47 +03:00
Task : taskName ,
2020-05-07 22:50:41 -07:00
Action : machine . TaskEvent_STOP ,
} )
2020-11-16 20:59:45 +03:00
logger := log . New ( log . Writer ( ) , fmt . Sprintf ( "[talos] task %s (%s): " , taskName , progress ) , log . Flags ( ) )
2020-04-23 21:12:44 -07:00
2021-03-02 21:53:31 +03:00
err = task ( ctx , logger , c . r )
2020-07-21 00:36:28 +03:00
return err
2020-04-23 21:12:44 -07:00
}
2021-03-05 11:52:08 +03:00
//nolint:gocyclo
2020-04-23 21:12:44 -07:00
func ( c * Controller ) phases ( seq runtime . Sequence , data interface { } ) ( [ ] runtime . Phase , error ) {
var phases [ ] runtime . Phase
switch seq {
case runtime . SequenceBoot :
phases = c . s . Boot ( c . r )
case runtime . SequenceInitialize :
phases = c . s . Initialize ( c . r )
case runtime . SequenceInstall :
phases = c . s . Install ( c . r )
case runtime . SequenceShutdown :
2022-09-20 22:27:03 +04:00
in , ok := data . ( * machine . ShutdownRequest )
if ! ok {
2022-01-28 22:54:32 +01:00
return nil , runtime . ErrInvalidSequenceData
}
phases = c . s . Shutdown ( c . r , in )
2020-04-23 21:12:44 -07:00
case runtime . SequenceReboot :
phases = c . s . Reboot ( c . r )
case runtime . SequenceUpgrade :
2022-09-20 22:27:03 +04:00
in , ok := data . ( * machine . UpgradeRequest )
if ! ok {
2020-04-23 21:12:44 -07:00
return nil , runtime . ErrInvalidSequenceData
}
phases = c . s . Upgrade ( c . r , in )
2020-12-21 23:36:23 +03:00
case runtime . SequenceStageUpgrade :
2022-09-20 22:27:03 +04:00
in , ok := data . ( * machine . UpgradeRequest )
if ! ok {
2020-12-21 23:36:23 +03:00
return nil , runtime . ErrInvalidSequenceData
}
phases = c . s . StageUpgrade ( c . r , in )
2022-09-20 22:27:03 +04:00
case runtime . SequenceMaintenanceUpgrade :
in , ok := data . ( * machine . UpgradeRequest )
if ! ok {
return nil , runtime . ErrInvalidSequenceData
}
2020-04-23 21:12:44 -07:00
2022-09-20 22:27:03 +04:00
phases = c . s . MaintenanceUpgrade ( c . r , in )
case runtime . SequenceReset :
in , ok := data . ( runtime . ResetOptions )
if ! ok {
2020-04-23 21:12:44 -07:00
return nil , runtime . ErrInvalidSequenceData
}
phases = c . s . Reset ( c . r , in )
2020-07-16 16:25:04 +03:00
case runtime . SequenceNoop :
2020-05-01 15:28:55 -07:00
default :
return nil , fmt . Errorf ( "sequence not implemented: %q" , seq )
2020-04-23 21:12:44 -07:00
}
return phases , nil
}
func waitForUSBDelay ( ) ( err error ) {
wait := true
file := "/sys/module/usb_storage/parameters/delay_use"
_ , err = os . Stat ( file )
if err != nil {
if os . IsNotExist ( err ) {
wait = false
} else {
return err
}
}
if wait {
var b [ ] byte
2022-08-02 22:43:31 +04:00
b , err = os . ReadFile ( file )
2020-04-23 21:12:44 -07:00
if err != nil {
return err
}
val := strings . TrimSuffix ( string ( b ) , "\n" )
var i int
i , err = strconv . Atoi ( val )
if err != nil {
return err
}
log . Printf ( "waiting %d second(s) for USB storage" , i )
time . Sleep ( time . Duration ( i ) * time . Second )
}
return nil
}