test: add a test for watchdog timers

Try to activate/deactivate watchdogs, change timeout, run only on QEMU.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
Signed-off-by: Dmitry Sharshakov <dmitry.sharshakov@siderolabs.com>
This commit is contained in:
Dmitry Sharshakov 2024-04-22 18:47:24 +03:00 committed by Andrey Smirnov
parent da7f276409
commit da8305ffb4
No known key found for this signature in database
GPG Key ID: FE042E3D4085A811
2 changed files with 193 additions and 0 deletions

View File

@ -0,0 +1,134 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
//go:build integration_api
package api
import (
"bytes"
"context"
"io"
"path/filepath"
"time"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/talos/internal/integration/base"
"github.com/siderolabs/talos/pkg/machinery/client"
"github.com/siderolabs/talos/pkg/machinery/config/types/runtime"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
)
// WatchdogSuite ...
type WatchdogSuite struct {
base.APISuite
ctx context.Context //nolint:containedctx
ctxCancel context.CancelFunc
}
// SuiteName ...
func (suite *WatchdogSuite) SuiteName() string {
return "api.WatchdogSuite"
}
// SetupTest ...
func (suite *WatchdogSuite) SetupTest() {
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 1*time.Minute)
if suite.Cluster == nil || suite.Cluster.Provisioner() != "qemu" {
suite.T().Skip("skipping watchdog test since provisioner is not qemu")
}
}
// TearDownTest ...
func (suite *WatchdogSuite) TearDownTest() {
if suite.ctxCancel != nil {
suite.ctxCancel()
}
}
func (suite *WatchdogSuite) readWatchdogSysfs(nodeCtx context.Context, watchdog, property string) string { //nolint:unparam
r, err := suite.Client.Read(nodeCtx, filepath.Join("/sys/class/watchdog", watchdog, property))
suite.Require().NoError(err)
value, err := io.ReadAll(r)
suite.Require().NoError(err)
suite.Require().NoError(r.Close())
return string(bytes.TrimSpace(value))
}
// TestWatchdogSysfs sets up the watchdog and validates its parameters from the /sys/class/watchdog.
func (suite *WatchdogSuite) TestWatchdogSysfs() {
// pick up a random node to test the watchdog on, and use it throughout the test
node := suite.RandomDiscoveredNodeInternalIP()
suite.T().Logf("testing watchdog on node %s", node)
// build a Talos API context which is tied to the node
nodeCtx := client.WithNode(suite.ctx, node)
// pick a watchdog
const watchdog = "watchdog0"
cfgDocument := runtime.NewWatchdogTimerV1Alpha1()
cfgDocument.WatchdogDevice = "/dev/" + watchdog
cfgDocument.WatchdogTimeout = 120 * time.Second
// deactivate the watchdog
suite.RemoveMachineConfigDocuments(nodeCtx, cfgDocument.MetaKind)
_, err := suite.Client.COSI.WatchFor(nodeCtx, runtimeres.NewWatchdogTimerStatus(runtimeres.WatchdogTimerConfigID).Metadata(), state.WithEventTypes(state.Destroyed))
suite.Require().NoError(err)
wdState := suite.readWatchdogSysfs(nodeCtx, watchdog, "state")
suite.Require().Equal("inactive", wdState)
// enable watchdog with 120s timeout
suite.PatchMachineConfig(nodeCtx, cfgDocument)
_, err = suite.Client.COSI.WatchFor(nodeCtx, runtimeres.NewWatchdogTimerStatus(runtimeres.WatchdogTimerConfigID).Metadata(), state.WithEventTypes(state.Created, state.Updated))
suite.Require().NoError(err)
wdState = suite.readWatchdogSysfs(nodeCtx, watchdog, "state")
suite.Require().Equal("active", wdState)
wdTimeout := suite.readWatchdogSysfs(nodeCtx, watchdog, "timeout")
suite.Require().Equal("120", wdTimeout)
// update watchdog timeout to 60s
cfgDocument.WatchdogTimeout = 60 * time.Second
suite.PatchMachineConfig(nodeCtx, cfgDocument)
_, err = suite.Client.COSI.WatchFor(nodeCtx, runtimeres.NewWatchdogTimerStatus(runtimeres.WatchdogTimerConfigID).Metadata(),
state.WithEventTypes(state.Created, state.Updated),
state.WithCondition(func(r resource.Resource) (bool, error) {
return r.(*runtimeres.WatchdogTimerStatus).TypedSpec().Timeout == cfgDocument.WatchdogTimeout, nil
}),
)
suite.Require().NoError(err)
wdState = suite.readWatchdogSysfs(nodeCtx, watchdog, "state")
suite.Require().Equal("active", wdState)
wdTimeout = suite.readWatchdogSysfs(nodeCtx, watchdog, "timeout")
suite.Require().Equal("60", wdTimeout)
// deactivate the watchdog
suite.RemoveMachineConfigDocuments(nodeCtx, cfgDocument.MetaKind)
_, err = suite.Client.COSI.WatchFor(nodeCtx, runtimeres.NewWatchdogTimerStatus(runtimeres.WatchdogTimerConfigID).Metadata(), state.WithEventTypes(state.Destroyed))
suite.Require().NoError(err)
wdState = suite.readWatchdogSysfs(nodeCtx, watchdog, "state")
suite.Require().Equal("inactive", wdState)
}
func init() {
allSuites = append(allSuites, new(WatchdogSuite))
}

View File

@ -15,15 +15,18 @@ import (
"io"
"math/rand/v2"
"path/filepath"
"slices"
"strings"
"time"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/gen/xslices"
"github.com/siderolabs/go-retry/retry"
"github.com/stretchr/testify/suite"
"google.golang.org/grpc/backoff"
"google.golang.org/grpc/codes"
"gopkg.in/yaml.v3"
"github.com/siderolabs/talos/cmd/talosctl/pkg/talos/helpers"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
@ -33,6 +36,9 @@ import (
"github.com/siderolabs/talos/pkg/machinery/client"
clientconfig "github.com/siderolabs/talos/pkg/machinery/client/config"
"github.com/siderolabs/talos/pkg/machinery/config"
configconfig "github.com/siderolabs/talos/pkg/machinery/config/config"
"github.com/siderolabs/talos/pkg/machinery/config/configpatcher"
"github.com/siderolabs/talos/pkg/machinery/config/container"
"github.com/siderolabs/talos/pkg/machinery/config/machine"
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1"
"github.com/siderolabs/talos/pkg/machinery/constants"
@ -575,6 +581,59 @@ func (apiSuite *APISuite) AssertExpectedModules(ctx context.Context, node string
}
}
// UpdateMachineConfig fetches machine configuration, patches it and applies the changes.
func (apiSuite *APISuite) UpdateMachineConfig(nodeCtx context.Context, patch func(config.Provider) (config.Provider, error)) {
cfg, err := apiSuite.ReadConfigFromNode(nodeCtx)
apiSuite.Require().NoError(err)
patchedCfg, err := patch(cfg)
apiSuite.Require().NoError(err)
bytes, err := patchedCfg.Bytes()
apiSuite.Require().NoError(err)
resp, err := apiSuite.Client.ApplyConfiguration(nodeCtx, &machineapi.ApplyConfigurationRequest{
Data: bytes,
Mode: machineapi.ApplyConfigurationRequest_AUTO,
})
apiSuite.Require().NoError(err)
apiSuite.T().Logf("patched machine config: %s", resp.Messages[0].ModeDetails)
}
// PatchMachineConfig patches machine configuration on the node.
func (apiSuite *APISuite) PatchMachineConfig(nodeCtx context.Context, patches ...any) {
configPatches := make([]configpatcher.Patch, 0, len(patches))
for _, patch := range patches {
marshaled, err := yaml.Marshal(patch)
apiSuite.Require().NoError(err)
configPatch, err := configpatcher.LoadPatch(marshaled)
apiSuite.Require().NoError(err)
configPatches = append(configPatches, configPatch)
}
apiSuite.UpdateMachineConfig(nodeCtx, func(cfg config.Provider) (config.Provider, error) {
out, err := configpatcher.Apply(configpatcher.WithConfig(cfg), configPatches)
if err != nil {
return nil, err
}
return out.Config()
})
}
// RemoveMachineConfigDocuments removes machine configuration documents of specified type from the node.
func (apiSuite *APISuite) RemoveMachineConfigDocuments(nodeCtx context.Context, docTypes ...string) {
apiSuite.UpdateMachineConfig(nodeCtx, func(cfg config.Provider) (config.Provider, error) {
return container.New(xslices.Filter(cfg.Documents(), func(doc configconfig.Document) bool {
return slices.Index(docTypes, doc.Kind()) == -1
})...)
})
}
// PatchV1Alpha1Config patches v1alpha1 config in the config provider.
func (apiSuite *APISuite) PatchV1Alpha1Config(provider config.Provider, patch func(*v1alpha1.Config)) []byte {
ctr, err := provider.PatchV1Alpha1(func(c *v1alpha1.Config) error {