feat: support n-5 latest Kubernetes versions

For Talos 1.6 this means 1.24-1.29 Kubernetes.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
This commit is contained in:
Andrey Smirnov 2023-09-21 14:42:16 +04:00
parent e71508ec10
commit e7575ecaae
No known key found for this signature in database
GPG Key ID: FE042E3D4085A811
14 changed files with 1026 additions and 831 deletions

View File

@ -131,6 +131,7 @@ linters-settings:
replace-local: true
replace-allow-list:
- gopkg.in/yaml.v3
- github.com/vmware-tanzu/sonobuoy
retract-allow-no-explanation: false
exclude-forbidden: true

3
go.mod
View File

@ -6,6 +6,9 @@ replace (
// Use nested module.
github.com/siderolabs/talos/pkg/machinery => ./pkg/machinery
// see https://github.com/vmware-tanzu/sonobuoy/pull/1933
github.com/vmware-tanzu/sonobuoy => github.com/smira/sonobuoy v0.0.0-20230925141431-e9307f0a884d
// forked go-yaml that introduces RawYAML interface, which can be used to populate YAML fields using bytes
// which are then encoded as a valid YAML blocks with proper indentiation
gopkg.in/yaml.v3 => github.com/unix4ever/yaml v0.0.0-20220527175918-f17b0f05cf2c

4
go.sum
View File

@ -682,6 +682,8 @@ github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.1 h1:Ou41VVR3nMWWmTiEUnj0OlsgOSCUFgsPAOl6jRIcVtQ=
github.com/sirupsen/logrus v1.9.1/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/smira/sonobuoy v0.0.0-20230925141431-e9307f0a884d h1:9THSD1LUdi+s1k8autJw9vme/6aT1J0sIN/Hh+7ZW48=
github.com/smira/sonobuoy v0.0.0-20230925141431-e9307f0a884d/go.mod h1:EZay4jbB9SKlkG/ywbmtoe8rWzd0w1eCxuk+tHbBQbM=
github.com/spf13/afero v1.9.3 h1:41FoI0fD7OR7mGcKE/aOiLkGreyf8ifIOQmJANWogMk=
github.com/spf13/afero v1.9.3/go.mod h1:iUV7ddyEEZPO5gA3zD4fJt6iStLlL+Lg4m2cihcDf8Y=
github.com/spf13/cast v1.5.0 h1:rj3WzYc11XZaIZMPKmwP96zkFEnnAmV8s6XbB2aY32w=
@ -734,8 +736,6 @@ github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
github.com/vmware-tanzu/sonobuoy v0.56.17 h1:X30OozdtWOkvMCOZLb1b/QSeKEBx634py8ArTw8RcFU=
github.com/vmware-tanzu/sonobuoy v0.56.17/go.mod h1:EZay4jbB9SKlkG/ywbmtoe8rWzd0w1eCxuk+tHbBQbM=
github.com/vmware/vmw-guestinfo v0.0.0-20220317130741-510905f0efa3 h1:v6jG/tdl4O07LNVp74Nt7/OyL+1JsIW1M2f/nSvQheY=
github.com/vmware/vmw-guestinfo v0.0.0-20220317130741-510905f0efa3/go.mod h1:CSBTxrhePCm0cmXNKDGeu+6bOQzpaEklfCqEpn89JWk=
github.com/vultr/metadata v1.1.0 h1:RUjCnH5Mdlz7uuyfb1jOZNkU72zl/HwK76jLzVFdiOo=

View File

@ -357,9 +357,7 @@ func (ctrl *KubeletServiceController) writeConfig(cfgSpec *k8s.KubeletSpecSpec)
nil,
nil,
json.SerializerOptions{
Yaml: true,
Pretty: true,
Strict: true,
Yaml: true,
},
)

View File

@ -16,6 +16,7 @@ import (
"github.com/cosi-project/runtime/pkg/state"
"github.com/hashicorp/go-multierror"
"github.com/siderolabs/gen/xslices"
"github.com/siderolabs/go-kubernetes/kubernetes/compatibility"
"github.com/siderolabs/go-pointer"
"go.uber.org/zap"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -95,6 +96,8 @@ func (ctrl *KubeletSpecController) Run(ctx context.Context, r controller.Runtime
cfgSpec := cfg.TypedSpec()
kubeletVersion := compatibility.VersionFromImageRef(cfgSpec.Image)
nodename, err := safe.ReaderGetByID[*k8s.Nodename](ctx, r, k8s.NodenameID)
if err != nil {
if state.IsNotFoundError(err) {
@ -123,6 +126,10 @@ func (ctrl *KubeletSpecController) Run(ctx context.Context, r controller.Runtime
args["cloud-provider"] = "external"
}
if !kubeletVersion.SupportsKubeletConfigContainerRuntimeEndpoint() {
args["container-runtime-endpoint"] = constants.CRIContainerdAddress
}
extraArgs := argsbuilder.Args(cfgSpec.ExtraArgs)
// if the user supplied a hostname override, we do not manage it anymore
@ -158,7 +165,7 @@ func (ctrl *KubeletSpecController) Run(ctx context.Context, r controller.Runtime
return fmt.Errorf("error merging arguments: %w", err)
}
kubeletConfig, err := NewKubeletConfiguration(cfgSpec)
kubeletConfig, err := NewKubeletConfiguration(cfgSpec, kubeletVersion)
if err != nil {
return fmt.Errorf("error creating kubelet configuration: %w", err)
}
@ -226,7 +233,7 @@ func prepareExtraConfig(extraConfig map[string]interface{}) (*kubeletconfig.Kube
// NewKubeletConfiguration builds kubelet configuration with defaults and overrides from extraConfig.
//
//nolint:gocyclo,cyclop
func NewKubeletConfiguration(cfgSpec *k8s.KubeletConfigSpec) (*kubeletconfig.KubeletConfiguration, error) {
func NewKubeletConfiguration(cfgSpec *k8s.KubeletConfigSpec, kubeletVersion compatibility.Version) (*kubeletconfig.KubeletConfiguration, error) {
config, err := prepareExtraConfig(cfgSpec.ExtraConfig)
if err != nil {
return nil, err
@ -265,11 +272,22 @@ func NewKubeletConfiguration(cfgSpec *k8s.KubeletConfigSpec) (*kubeletconfig.Kub
config.KubeletCgroups = constants.CgroupKubelet
config.RotateCertificates = true
config.ProtectKernelDefaults = true
config.ContainerRuntimeEndpoint = "unix://" + constants.CRIContainerdAddress
if kubeletVersion.SupportsKubeletConfigContainerRuntimeEndpoint() {
config.ContainerRuntimeEndpoint = "unix://" + constants.CRIContainerdAddress
}
// SeccompDefault feature gate is enabled by default Kubernetes 1.25+, GA in 1.27
if cfgSpec.DefaultRuntimeSeccompEnabled {
config.SeccompDefault = pointer.To(true)
if !kubeletVersion.FeatureFlagSeccompDefaultEnabledByDefault() {
if config.FeatureGates == nil {
config.FeatureGates = map[string]bool{}
}
config.FeatureGates["SeccompDefault"] = true
}
}
if cfgSpec.EnableFSQuotaMonitoring {

View File

@ -2,25 +2,19 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
//nolint:dupl
//nolint:goconst
package k8s_test
import (
"context"
"log"
"net/netip"
"sync"
"testing"
"time"
"github.com/cosi-project/runtime/pkg/controller/runtime"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/state"
"github.com/cosi-project/runtime/pkg/state/impl/inmem"
"github.com/cosi-project/runtime/pkg/state/impl/namespaced"
"github.com/cosi-project/runtime/pkg/resource/rtestutils"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/siderolabs/go-kubernetes/kubernetes/compatibility"
"github.com/siderolabs/go-pointer"
"github.com/siderolabs/go-retry/retry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
@ -29,52 +23,19 @@ import (
v1 "k8s.io/component-base/logs/api/v1"
kubeletconfig "k8s.io/kubelet/config/v1beta1"
"github.com/siderolabs/talos/internal/app/machined/pkg/controllers/ctest"
k8sctrl "github.com/siderolabs/talos/internal/app/machined/pkg/controllers/k8s"
"github.com/siderolabs/talos/pkg/logging"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/k8s"
)
type KubeletSpecSuite struct {
suite.Suite
state state.State
runtime *runtime.Runtime
wg sync.WaitGroup
ctx context.Context //nolint:containedctx
ctxCancel context.CancelFunc
}
func (suite *KubeletSpecSuite) SetupTest() {
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 3*time.Minute)
suite.state = state.WrapCore(namespaced.NewState(inmem.Build))
var err error
suite.runtime, err = runtime.NewRuntime(suite.state, logging.Wrap(log.Writer()))
suite.Require().NoError(err)
suite.Require().NoError(suite.runtime.RegisterController(&k8sctrl.KubeletSpecController{}))
suite.startRuntime()
}
func (suite *KubeletSpecSuite) startRuntime() {
suite.wg.Add(1)
go func() {
defer suite.wg.Done()
suite.Assert().NoError(suite.runtime.Run(suite.ctx))
}()
ctest.DefaultSuite
}
func (suite *KubeletSpecSuite) TestReconcileDefault() {
cfg := k8s.NewKubeletConfig(k8s.NamespaceName, k8s.KubeletID)
cfg.TypedSpec().Image = "kubelet:v1.0.0"
cfg.TypedSpec().Image = "kubelet:v1.29.0"
cfg.TypedSpec().ClusterDNS = []string{"10.96.0.10"}
cfg.TypedSpec().ClusterDomain = "cluster.local"
cfg.TypedSpec().ExtraArgs = map[string]string{"foo": "bar"}
@ -87,116 +48,115 @@ func (suite *KubeletSpecSuite) TestReconcileDefault() {
}
cfg.TypedSpec().CloudProviderExternal = true
suite.Require().NoError(suite.state.Create(suite.ctx, cfg))
suite.Require().NoError(suite.State().Create(suite.Ctx(), cfg))
nodeIP := k8s.NewNodeIP(k8s.NamespaceName, k8s.KubeletID)
nodeIP.TypedSpec().Addresses = []netip.Addr{netip.MustParseAddr("172.20.0.2")}
suite.Require().NoError(suite.state.Create(suite.ctx, nodeIP))
suite.Require().NoError(suite.State().Create(suite.Ctx(), nodeIP))
nodename := k8s.NewNodename(k8s.NamespaceName, k8s.NodenameID)
nodename.TypedSpec().Nodename = "example.com"
suite.Require().NoError(suite.state.Create(suite.ctx, nodename))
suite.Require().NoError(suite.State().Create(suite.Ctx(), nodename))
suite.Assert().NoError(
retry.Constant(10*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
kubeletSpec, err := suite.state.Get(
suite.ctx,
resource.NewMetadata(
k8s.NamespaceName,
k8s.KubeletSpecType,
k8s.KubeletID,
resource.VersionUndefined,
),
)
if err != nil {
if state.IsNotFoundError(err) {
return retry.ExpectedError(err)
}
rtestutils.AssertResources(suite.Ctx(), suite.T(), suite.State(), []resource.ID{k8s.KubeletID}, func(kubeletSpec *k8s.KubeletSpec, asrt *assert.Assertions) {
spec := kubeletSpec.TypedSpec()
return err
}
asrt.Equal(cfg.TypedSpec().Image, spec.Image)
asrt.Equal(
[]string{
"--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubeconfig",
"--cert-dir=/var/lib/kubelet/pki",
"--cloud-provider=external",
"--config=/etc/kubernetes/kubelet.yaml",
"--foo=bar",
"--hostname-override=example.com",
"--kubeconfig=/etc/kubernetes/kubeconfig-kubelet",
"--node-ip=172.20.0.2",
}, spec.Args,
)
asrt.Equal(cfg.TypedSpec().ExtraMounts, spec.ExtraMounts)
spec := kubeletSpec.(*k8s.KubeletSpec).TypedSpec()
suite.Assert().Equal(cfg.TypedSpec().Image, spec.Image)
suite.Assert().Equal(
[]string{
"--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubeconfig",
"--cert-dir=/var/lib/kubelet/pki",
"--cloud-provider=external",
"--config=/etc/kubernetes/kubelet.yaml",
"--foo=bar",
"--hostname-override=example.com",
"--kubeconfig=/etc/kubernetes/kubeconfig-kubelet",
"--node-ip=172.20.0.2",
}, spec.Args,
)
suite.Assert().Equal(cfg.TypedSpec().ExtraMounts, spec.ExtraMounts)
suite.Assert().Equal([]interface{}{"10.96.0.10"}, spec.Config["clusterDNS"])
suite.Assert().Equal("cluster.local", spec.Config["clusterDomain"])
return nil
},
),
)
asrt.Equal([]interface{}{"10.96.0.10"}, spec.Config["clusterDNS"])
asrt.Equal("cluster.local", spec.Config["clusterDomain"])
})
}
func (suite *KubeletSpecSuite) TestReconcileWithExplicitNodeIP() {
cfg := k8s.NewKubeletConfig(k8s.NamespaceName, k8s.KubeletID)
cfg.TypedSpec().Image = "kubelet:v1.0.0"
cfg.TypedSpec().Image = "kubelet:v1.29.0"
cfg.TypedSpec().ClusterDNS = []string{"10.96.0.10"}
cfg.TypedSpec().ClusterDomain = "cluster.local"
cfg.TypedSpec().ExtraArgs = map[string]string{"node-ip": "10.0.0.1"}
suite.Require().NoError(suite.state.Create(suite.ctx, cfg))
suite.Require().NoError(suite.State().Create(suite.Ctx(), cfg))
nodename := k8s.NewNodename(k8s.NamespaceName, k8s.NodenameID)
nodename.TypedSpec().Nodename = "example.com"
suite.Require().NoError(suite.state.Create(suite.ctx, nodename))
suite.Require().NoError(suite.State().Create(suite.Ctx(), nodename))
suite.Assert().NoError(
retry.Constant(10*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
kubeletSpec, err := suite.state.Get(
suite.ctx,
resource.NewMetadata(
k8s.NamespaceName,
k8s.KubeletSpecType,
k8s.KubeletID,
resource.VersionUndefined,
),
)
if err != nil {
if state.IsNotFoundError(err) {
return retry.ExpectedError(err)
}
rtestutils.AssertResources(suite.Ctx(), suite.T(), suite.State(), []resource.ID{k8s.KubeletID}, func(kubeletSpec *k8s.KubeletSpec, asrt *assert.Assertions) {
spec := kubeletSpec.TypedSpec()
return err
}
asrt.Equal(cfg.TypedSpec().Image, spec.Image)
asrt.Equal(
[]string{
"--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubeconfig",
"--cert-dir=/var/lib/kubelet/pki",
"--config=/etc/kubernetes/kubelet.yaml",
"--hostname-override=example.com",
"--kubeconfig=/etc/kubernetes/kubeconfig-kubelet",
"--node-ip=10.0.0.1",
}, spec.Args,
)
})
}
spec := kubeletSpec.(*k8s.KubeletSpec).TypedSpec()
func (suite *KubeletSpecSuite) TestReconcileWithContainerRuntimeEnpointFlag() {
cfg := k8s.NewKubeletConfig(k8s.NamespaceName, k8s.KubeletID)
cfg.TypedSpec().Image = "kubelet:v1.25.0"
cfg.TypedSpec().ClusterDNS = []string{"10.96.0.10"}
cfg.TypedSpec().ClusterDomain = "cluster.local"
cfg.TypedSpec().ExtraArgs = map[string]string{"node-ip": "10.0.0.1"}
suite.Assert().Equal(cfg.TypedSpec().Image, spec.Image)
suite.Assert().Equal(
[]string{
"--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubeconfig",
"--cert-dir=/var/lib/kubelet/pki",
"--config=/etc/kubernetes/kubelet.yaml",
"--hostname-override=example.com",
"--kubeconfig=/etc/kubernetes/kubeconfig-kubelet",
"--node-ip=10.0.0.1",
}, spec.Args,
)
suite.Require().NoError(suite.State().Create(suite.Ctx(), cfg))
return nil
},
),
)
nodename := k8s.NewNodename(k8s.NamespaceName, k8s.NodenameID)
nodename.TypedSpec().Nodename = "example.com"
suite.Require().NoError(suite.State().Create(suite.Ctx(), nodename))
rtestutils.AssertResources(suite.Ctx(), suite.T(), suite.State(), []resource.ID{k8s.KubeletID}, func(kubeletSpec *k8s.KubeletSpec, asrt *assert.Assertions) {
spec := kubeletSpec.TypedSpec()
asrt.Equal(cfg.TypedSpec().Image, spec.Image)
asrt.Equal(
[]string{
"--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubeconfig",
"--cert-dir=/var/lib/kubelet/pki",
"--config=/etc/kubernetes/kubelet.yaml",
"--container-runtime-endpoint=/run/containerd/containerd.sock",
"--hostname-override=example.com",
"--kubeconfig=/etc/kubernetes/kubeconfig-kubelet",
"--node-ip=10.0.0.1",
}, spec.Args,
)
var kubeletConfiguration kubeletconfig.KubeletConfiguration
if err := k8sruntime.DefaultUnstructuredConverter.FromUnstructured(
spec.Config,
&kubeletConfiguration,
); err != nil {
asrt.NoError(err)
return
}
asrt.Empty(kubeletConfiguration.ContainerRuntimeEndpoint)
})
}
func (suite *KubeletSpecSuite) TestReconcileWithExtraConfig() {
@ -208,57 +168,36 @@ func (suite *KubeletSpecSuite) TestReconcileWithExtraConfig() {
"serverTLSBootstrap": true,
}
suite.Require().NoError(suite.state.Create(suite.ctx, cfg))
suite.Require().NoError(suite.State().Create(suite.Ctx(), cfg))
nodename := k8s.NewNodename(k8s.NamespaceName, k8s.NodenameID)
nodename.TypedSpec().Nodename = "foo.com"
suite.Require().NoError(suite.state.Create(suite.ctx, nodename))
suite.Require().NoError(suite.State().Create(suite.Ctx(), nodename))
nodeIP := k8s.NewNodeIP(k8s.NamespaceName, k8s.KubeletID)
nodeIP.TypedSpec().Addresses = []netip.Addr{netip.MustParseAddr("172.20.0.3")}
suite.Require().NoError(suite.state.Create(suite.ctx, nodeIP))
suite.Require().NoError(suite.State().Create(suite.Ctx(), nodeIP))
suite.Assert().NoError(
retry.Constant(10*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
kubeletSpec, err := suite.state.Get(
suite.ctx,
resource.NewMetadata(
k8s.NamespaceName,
k8s.KubeletSpecType,
k8s.KubeletID,
resource.VersionUndefined,
),
)
if err != nil {
if state.IsNotFoundError(err) {
return retry.ExpectedError(err)
}
rtestutils.AssertResources(suite.Ctx(), suite.T(), suite.State(), []resource.ID{k8s.KubeletID}, func(kubeletSpec *k8s.KubeletSpec, asrt *assert.Assertions) {
spec := kubeletSpec.TypedSpec()
return err
}
var kubeletConfiguration kubeletconfig.KubeletConfiguration
spec := kubeletSpec.(*k8s.KubeletSpec).TypedSpec()
if err := k8sruntime.DefaultUnstructuredConverter.FromUnstructured(
spec.Config,
&kubeletConfiguration,
); err != nil {
asrt.NoError(err)
var kubeletConfiguration kubeletconfig.KubeletConfiguration
return
}
if err := k8sruntime.DefaultUnstructuredConverter.FromUnstructured(
spec.Config,
&kubeletConfiguration,
); err != nil {
return err
}
suite.Assert().Equal("/", kubeletConfiguration.CgroupRoot)
suite.Assert().Equal(cfg.TypedSpec().ClusterDomain, kubeletConfiguration.ClusterDomain)
suite.Assert().True(kubeletConfiguration.ServerTLSBootstrap)
return nil
},
),
)
asrt.Equal("/", kubeletConfiguration.CgroupRoot)
asrt.Equal(cfg.TypedSpec().ClusterDomain, kubeletConfiguration.ClusterDomain)
asrt.True(kubeletConfiguration.ServerTLSBootstrap)
})
}
func (suite *KubeletSpecSuite) TestReconcileWithSkipNodeRegistration() {
@ -268,76 +207,54 @@ func (suite *KubeletSpecSuite) TestReconcileWithSkipNodeRegistration() {
cfg.TypedSpec().ClusterDomain = "some.local"
cfg.TypedSpec().SkipNodeRegistration = true
suite.Require().NoError(suite.state.Create(suite.ctx, cfg))
suite.Require().NoError(suite.State().Create(suite.Ctx(), cfg))
nodename := k8s.NewNodename(k8s.NamespaceName, k8s.NodenameID)
nodename.TypedSpec().Nodename = "foo.com"
suite.Require().NoError(suite.state.Create(suite.ctx, nodename))
suite.Require().NoError(suite.State().Create(suite.Ctx(), nodename))
nodeIP := k8s.NewNodeIP(k8s.NamespaceName, k8s.KubeletID)
nodeIP.TypedSpec().Addresses = []netip.Addr{netip.MustParseAddr("172.20.0.3")}
suite.Require().NoError(suite.state.Create(suite.ctx, nodeIP))
suite.Require().NoError(suite.State().Create(suite.Ctx(), nodeIP))
suite.Assert().NoError(
retry.Constant(10*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(
func() error {
kubeletSpec, err := suite.state.Get(
suite.ctx,
resource.NewMetadata(
k8s.NamespaceName,
k8s.KubeletSpecType,
k8s.KubeletID,
resource.VersionUndefined,
),
)
if err != nil {
if state.IsNotFoundError(err) {
return retry.ExpectedError(err)
}
rtestutils.AssertResources(suite.Ctx(), suite.T(), suite.State(), []resource.ID{k8s.KubeletID}, func(kubeletSpec *k8s.KubeletSpec, asrt *assert.Assertions) {
spec := kubeletSpec.TypedSpec()
return err
}
var kubeletConfiguration kubeletconfig.KubeletConfiguration
spec := kubeletSpec.(*k8s.KubeletSpec).TypedSpec()
if err := k8sruntime.DefaultUnstructuredConverter.FromUnstructured(
spec.Config,
&kubeletConfiguration,
); err != nil {
asrt.NoError(err)
var kubeletConfiguration kubeletconfig.KubeletConfiguration
return
}
if err := k8sruntime.DefaultUnstructuredConverter.FromUnstructured(
spec.Config,
&kubeletConfiguration,
); err != nil {
return err
}
suite.Assert().Equal("/", kubeletConfiguration.CgroupRoot)
suite.Assert().Equal(cfg.TypedSpec().ClusterDomain, kubeletConfiguration.ClusterDomain)
suite.Assert().Equal([]string{
"--cert-dir=/var/lib/kubelet/pki",
"--config=/etc/kubernetes/kubelet.yaml",
"--hostname-override=foo.com",
"--node-ip=172.20.0.3",
}, spec.Args)
return nil
},
),
)
}
func (suite *KubeletSpecSuite) TearDownTest() {
suite.T().Log("tear down")
suite.ctxCancel()
suite.wg.Wait()
asrt.Equal("/", kubeletConfiguration.CgroupRoot)
asrt.Equal(cfg.TypedSpec().ClusterDomain, kubeletConfiguration.ClusterDomain)
asrt.Equal([]string{
"--cert-dir=/var/lib/kubelet/pki",
"--config=/etc/kubernetes/kubelet.yaml",
"--hostname-override=foo.com",
"--node-ip=172.20.0.3",
}, spec.Args)
})
}
func TestKubeletSpecSuite(t *testing.T) {
t.Parallel()
suite.Run(t, new(KubeletSpecSuite))
suite.Run(t, &KubeletSpecSuite{
DefaultSuite: ctest.DefaultSuite{
Timeout: 3 * time.Second,
AfterSetup: func(suite *ctest.DefaultSuite) {
suite.Require().NoError(suite.Runtime().RegisterController(&k8sctrl.KubeletSpecController{}))
},
},
})
}
func TestNewKubeletConfigurationFail(t *testing.T) {
@ -392,7 +309,7 @@ func TestNewKubeletConfigurationFail(t *testing.T) {
tt.name, func(t *testing.T) {
t.Parallel()
_, err := k8sctrl.NewKubeletConfiguration(tt.cfgSpec)
_, err := k8sctrl.NewKubeletConfiguration(tt.cfgSpec, compatibility.VersionFromImageRef(""))
require.Error(t, err)
assert.EqualError(t, err, tt.expectedErr)
@ -455,6 +372,7 @@ func TestNewKubeletConfigurationMerge(t *testing.T) {
for _, tt := range []struct {
name string
cfgSpec *k8s.KubeletConfigSpec
kubeletVersion compatibility.Version
expectedOverrides func(*kubeletconfig.KubeletConfiguration)
}{
{
@ -467,6 +385,7 @@ func TestNewKubeletConfigurationMerge(t *testing.T) {
"enableDebuggingHandlers": true,
},
},
kubeletVersion: compatibility.VersionFromImageRef("ghcr.io/siderolabs/kubelet:v1.29.0"),
expectedOverrides: func(kc *kubeletconfig.KubeletConfiguration) {
kc.OOMScoreAdj = pointer.To[int32](-300)
kc.EnableDebuggingHandlers = pointer.To(true)
@ -482,6 +401,7 @@ func TestNewKubeletConfigurationMerge(t *testing.T) {
"shutdownGracePeriodCriticalPods": "0s",
},
},
kubeletVersion: compatibility.VersionFromImageRef("ghcr.io/siderolabs/kubelet:v1.29.0"),
expectedOverrides: func(kc *kubeletconfig.KubeletConfiguration) {
kc.ShutdownGracePeriod = metav1.Duration{}
kc.ShutdownGracePeriodCriticalPods = metav1.Duration{}
@ -494,10 +414,27 @@ func TestNewKubeletConfigurationMerge(t *testing.T) {
ClusterDomain: "cluster.local",
DefaultRuntimeSeccompEnabled: true,
},
kubeletVersion: compatibility.VersionFromImageRef("ghcr.io/siderolabs/kubelet:v1.29.0"),
expectedOverrides: func(kc *kubeletconfig.KubeletConfiguration) {
kc.SeccompDefault = pointer.To(true)
},
},
{
name: "enable seccomp default + feature flag",
cfgSpec: &k8s.KubeletConfigSpec{
ClusterDNS: []string{"10.0.0.5"},
ClusterDomain: "cluster.local",
DefaultRuntimeSeccompEnabled: true,
},
kubeletVersion: compatibility.VersionFromImageRef("ghcr.io/siderolabs/kubelet:v1.24.0"),
expectedOverrides: func(kc *kubeletconfig.KubeletConfiguration) {
kc.ContainerRuntimeEndpoint = ""
kc.SeccompDefault = pointer.To(true)
kc.FeatureGates = map[string]bool{
"SeccompDefault": true,
}
},
},
{
name: "enable skipNodeRegistration",
cfgSpec: &k8s.KubeletConfigSpec{
@ -505,6 +442,7 @@ func TestNewKubeletConfigurationMerge(t *testing.T) {
ClusterDomain: "cluster.local",
SkipNodeRegistration: true,
},
kubeletVersion: compatibility.VersionFromImageRef("ghcr.io/siderolabs/kubelet:v1.29.0"),
expectedOverrides: func(kc *kubeletconfig.KubeletConfiguration) {
kc.Authentication.Webhook.Enabled = pointer.To(false)
kc.Authorization.Mode = kubeletconfig.KubeletAuthorizationModeAlwaysAllow
@ -517,6 +455,7 @@ func TestNewKubeletConfigurationMerge(t *testing.T) {
ClusterDomain: "cluster.local",
DisableManifestsDirectory: true,
},
kubeletVersion: compatibility.VersionFromImageRef("ghcr.io/siderolabs/kubelet:v1.29.0"),
expectedOverrides: func(kc *kubeletconfig.KubeletConfiguration) {
kc.StaticPodPath = ""
},
@ -528,6 +467,7 @@ func TestNewKubeletConfigurationMerge(t *testing.T) {
ClusterDomain: "cluster.local",
EnableFSQuotaMonitoring: true,
},
kubeletVersion: compatibility.VersionFromImageRef("ghcr.io/siderolabs/kubelet:v1.29.0"),
expectedOverrides: func(kc *kubeletconfig.KubeletConfiguration) {
kc.FeatureGates = map[string]bool{
"LocalStorageCapacityIsolationFSQuotaMonitoring": true,
@ -543,7 +483,7 @@ func TestNewKubeletConfigurationMerge(t *testing.T) {
expected := defaultKubeletConfig
tt.expectedOverrides(&expected)
config, err := k8sctrl.NewKubeletConfiguration(tt.cfgSpec)
config, err := k8sctrl.NewKubeletConfiguration(tt.cfgSpec, tt.kubeletVersion)
require.NoError(t, err)

View File

@ -165,7 +165,7 @@ func init() {
flag.Int64Var(&provision_test.DefaultSettings.CPUs, "talos.provision.cpu", provision_test.DefaultSettings.CPUs, "CPU count for each VM (provision tests only)")
flag.Int64Var(&provision_test.DefaultSettings.MemMB, "talos.provision.mem", provision_test.DefaultSettings.MemMB, "memory (in MiB) for each VM (provision tests only)")
flag.Uint64Var(&provision_test.DefaultSettings.DiskGB, "talos.provision.disk", provision_test.DefaultSettings.DiskGB, "disk size (in GiB) for each VM (provision tests only)")
flag.IntVar(&provision_test.DefaultSettings.MasterNodes, "talos.provision.masters", provision_test.DefaultSettings.MasterNodes, "master node count (provision tests only)")
flag.IntVar(&provision_test.DefaultSettings.ControlplaneNodes, "talos.provision.controlplanes", provision_test.DefaultSettings.ControlplaneNodes, "controlplane node count (provision tests only)")
flag.IntVar(&provision_test.DefaultSettings.WorkerNodes, "talos.provision.workers", provision_test.DefaultSettings.WorkerNodes, "worker node count (provision tests only)")
flag.StringVar(&provision_test.DefaultSettings.TargetInstallImageRegistry, "talos.provision.target-installer-registry",
provision_test.DefaultSettings.TargetInstallImageRegistry, "image registry for target installer image (provision tests only)")

View File

@ -5,3 +5,153 @@
//go:build integration_provision
package provision
import (
"fmt"
"slices"
"github.com/blang/semver/v4"
"github.com/google/go-containerregistry/pkg/name"
"github.com/google/go-containerregistry/pkg/v1/remote"
"github.com/siderolabs/gen/maps"
"github.com/siderolabs/gen/xslices"
"github.com/siderolabs/talos/cmd/talosctl/pkg/mgmt/helpers"
"github.com/siderolabs/talos/pkg/images"
"github.com/siderolabs/talos/pkg/machinery/constants"
)
// K8sCompatibilitySuite ...
type K8sCompatibilitySuite struct {
BaseSuite
track int
versionsSequence []string
}
// SuiteName ...
func (suite *K8sCompatibilitySuite) SuiteName() string {
return fmt.Sprintf("provision.UpgradeSuite.KubernetesCompatibility-TR%d", suite.track)
}
// SetupSuite ...
func (suite *K8sCompatibilitySuite) SetupSuite() {
// figure out Kubernetes versions to go through, the calculation is based on:
// * DefaultKubernetesVersion, e.g. 1.29.0
// * SupportedKubernetesVersions, e.g. 6
// * available `kubelet` images (tags)
//
// E.g. with example values above, upgrade will go through:
// 1.24 -> 1.25 -> 1.26 -> 1.27 -> 1.28 -> 1.29 (6 versions)
// For each past Kubernetes release, latest patch release will be used,
// for the latest version (DefaultKubernetesVersion), the exact version will be used
kubeletRepository, err := name.NewRepository(constants.KubeletImage)
suite.Require().NoError(err)
maxVersion, err := semver.Parse(constants.DefaultKubernetesVersion)
suite.Require().NoError(err)
minVersion := semver.Version{
Major: maxVersion.Major,
Minor: maxVersion.Minor - constants.SupportedKubernetesVersions + 1,
Patch: 0,
}
type versionInfo struct {
Major uint64
Minor uint64
}
versionsToUse := map[versionInfo]semver.Version{
{
Major: maxVersion.Major,
Minor: maxVersion.Minor,
}: maxVersion,
}
tags, err := remote.List(kubeletRepository)
suite.Require().NoError(err)
for _, tag := range tags {
version, err := semver.ParseTolerant(tag)
if err != nil {
continue
}
if version.Pre != nil {
continue
}
if version.LT(minVersion) {
continue
}
if version.GT(maxVersion) {
continue
}
versionKey := versionInfo{
Major: version.Major,
Minor: version.Minor,
}
if curVersion := versionsToUse[versionKey]; version.GT(curVersion) {
versionsToUse[versionKey] = version
}
}
k8sVersions := maps.Values(versionsToUse)
slices.SortFunc(k8sVersions, func(a, b semver.Version) int {
return a.Compare(b)
})
suite.versionsSequence = xslices.Map(k8sVersions, semver.Version.String)
suite.T().Logf("using following upgrade sequence: %v", suite.versionsSequence)
suite.Assert().Len(suite.versionsSequence, constants.SupportedKubernetesVersions)
suite.BaseSuite.SetupSuite()
}
// TestAllVersions tries to run cluster on all Kubernetes versions.
func (suite *K8sCompatibilitySuite) TestAllVersions() {
// start a cluster using latest Talos, and on earliest supported Kubernetes version
suite.setupCluster(clusterOptions{
ClusterName: "k8s-compat",
ControlplaneNodes: DefaultSettings.ControlplaneNodes,
WorkerNodes: DefaultSettings.WorkerNodes,
SourceKernelPath: helpers.ArtifactPath(constants.KernelAssetWithArch),
SourceInitramfsPath: helpers.ArtifactPath(constants.InitramfsAssetWithArch),
SourceInstallerImage: fmt.Sprintf(
"%s/%s:%s",
DefaultSettings.TargetInstallImageRegistry,
images.DefaultInstallerImageName,
DefaultSettings.CurrentVersion,
),
SourceVersion: DefaultSettings.CurrentVersion,
SourceK8sVersion: suite.versionsSequence[0],
})
suite.runE2E(suite.versionsSequence[0])
// for each next supported Kubernetes version, upgrade k8s and run e2e tests
for i := 1; i < len(suite.versionsSequence); i++ {
suite.upgradeKubernetes(suite.versionsSequence[i-1], suite.versionsSequence[i], false)
suite.waitForClusterHealth()
suite.runE2E(suite.versionsSequence[i])
}
}
func init() {
allSuites = append(
allSuites,
&K8sCompatibilitySuite{track: 2},
)
}

View File

@ -8,13 +8,45 @@
package provision
import (
"context"
"encoding/json"
"fmt"
"net/netip"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/siderolabs/gen/xslices"
"github.com/siderolabs/go-blockdevice/blockdevice/encryption"
"github.com/siderolabs/go-kubernetes/kubernetes/upgrade"
"github.com/siderolabs/go-retry/retry"
sideronet "github.com/siderolabs/net"
"github.com/stretchr/testify/suite"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"github.com/siderolabs/talos/internal/integration/base"
"github.com/siderolabs/talos/pkg/cluster/check"
"github.com/siderolabs/talos/pkg/cluster/kubernetes"
"github.com/siderolabs/talos/pkg/cluster/sonobuoy"
machineapi "github.com/siderolabs/talos/pkg/machinery/api/machine"
talosclient "github.com/siderolabs/talos/pkg/machinery/client"
clientconfig "github.com/siderolabs/talos/pkg/machinery/client/config"
"github.com/siderolabs/talos/pkg/machinery/config"
"github.com/siderolabs/talos/pkg/machinery/config/bundle"
"github.com/siderolabs/talos/pkg/machinery/config/encoder"
"github.com/siderolabs/talos/pkg/machinery/config/generate"
"github.com/siderolabs/talos/pkg/machinery/config/machine"
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
"github.com/siderolabs/talos/pkg/provision"
"github.com/siderolabs/talos/pkg/provision/access"
"github.com/siderolabs/talos/pkg/provision/providers/qemu"
"github.com/siderolabs/talos/pkg/version"
)
@ -40,8 +72,8 @@ type Settings struct {
MemMB int64
DiskGB uint64
// Node count for the tests
MasterNodes int
WorkerNodes int
ControlplaneNodes int
WorkerNodes int
// Target installer image registry
TargetInstallImageRegistry string
// Current version of the cluster (built in the CI pass)
@ -58,10 +90,10 @@ type Settings struct {
var DefaultSettings = Settings{
CIDR: "172.21.0.0/24",
MTU: 1500,
CPUs: 2,
MemMB: 2 * 1024,
DiskGB: 8,
MasterNodes: 3,
CPUs: 4,
MemMB: 3 * 1024,
DiskGB: 12,
ControlplaneNodes: 3,
WorkerNodes: 1,
TargetInstallImageRegistry: "ghcr.io",
CNIBundleURL: fmt.Sprintf("https://github.com/siderolabs/talos/releases/download/%s/talosctl-cni-bundle-%s.tar.gz", trimVersion(version.Tag), constants.ArchVariable),
@ -71,3 +103,533 @@ func trimVersion(version string) string {
// remove anything extra after semantic version core, `v0.3.2-1-abcd` -> `v0.3.2`
return regexp.MustCompile(`(-\d+-g[0-9a-f]+)$`).ReplaceAllString(version, "")
}
var defaultNameservers = []netip.Addr{netip.MustParseAddr("8.8.8.8"), netip.MustParseAddr("1.1.1.1")}
// BaseSuite provides base features for provision tests.
type BaseSuite struct {
suite.Suite
base.TalosSuite
provisioner provision.Provisioner
configBundle *bundle.Bundle
clusterAccess *access.Adapter
controlPlaneEndpoint string
//nolint:containedctx
ctx context.Context
ctxCancel context.CancelFunc
stateDir string
cniDir string
}
// SetupSuite ...
func (suite *BaseSuite) SetupSuite() {
// timeout for the whole test
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 50*time.Minute)
var err error
suite.provisioner, err = qemu.NewProvisioner(suite.ctx)
suite.Require().NoError(err)
}
// TearDownSuite ...
func (suite *BaseSuite) TearDownSuite() {
if suite.T().Failed() && DefaultSettings.CrashdumpEnabled && suite.Cluster != nil {
// for failed tests, produce crash dump for easier debugging,
// as cluster is going to be torn down below
suite.provisioner.CrashDump(suite.ctx, suite.Cluster, os.Stderr)
if suite.clusterAccess != nil {
suite.clusterAccess.CrashDump(suite.ctx, os.Stderr)
}
}
if suite.clusterAccess != nil {
suite.Assert().NoError(suite.clusterAccess.Close())
}
if suite.Cluster != nil {
suite.Assert().NoError(suite.provisioner.Destroy(suite.ctx, suite.Cluster))
}
suite.ctxCancel()
if suite.stateDir != "" {
suite.Assert().NoError(os.RemoveAll(suite.stateDir))
}
if suite.provisioner != nil {
suite.Assert().NoError(suite.provisioner.Close())
}
}
// waitForClusterHealth asserts cluster health after any change.
func (suite *BaseSuite) waitForClusterHealth() {
runs := 1
singleNodeCluster := len(suite.Cluster.Info().Nodes) == 1
if singleNodeCluster {
// run health check several times for single node clusters,
// as self-hosted control plane is not stable after reboot
runs = 3
}
for run := 0; run < runs; run++ {
if run > 0 {
time.Sleep(15 * time.Second)
}
checkCtx, checkCtxCancel := context.WithTimeout(suite.ctx, 15*time.Minute)
defer checkCtxCancel()
suite.Require().NoError(
check.Wait(
checkCtx,
suite.clusterAccess,
check.DefaultClusterChecks(),
check.StderrReporter(),
),
)
}
}
func (suite *BaseSuite) untaint(name string) {
client, err := suite.clusterAccess.K8sClient(suite.ctx)
suite.Require().NoError(err)
n, err := client.CoreV1().Nodes().Get(suite.ctx, name, metav1.GetOptions{})
suite.Require().NoError(err)
oldData, err := json.Marshal(n)
suite.Require().NoError(err)
k := 0
for _, taint := range n.Spec.Taints {
if taint.Key != constants.LabelNodeRoleControlPlane {
n.Spec.Taints[k] = taint
k++
}
}
n.Spec.Taints = n.Spec.Taints[:k]
newData, err := json.Marshal(n)
suite.Require().NoError(err)
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, corev1.Node{})
suite.Require().NoError(err)
_, err = client.CoreV1().Nodes().Patch(
suite.ctx,
n.Name,
types.StrategicMergePatchType,
patchBytes,
metav1.PatchOptions{},
)
suite.Require().NoError(err)
}
func (suite *BaseSuite) assertSameVersionCluster(client *talosclient.Client, expectedVersion string) {
nodes := xslices.Map(suite.Cluster.Info().Nodes, func(node provision.NodeInfo) string { return node.IPs[0].String() })
ctx := talosclient.WithNodes(suite.ctx, nodes...)
var v *machineapi.VersionResponse
err := retry.Constant(
time.Minute,
).Retry(
func() error {
var e error
v, e = client.Version(ctx)
return retry.ExpectedError(e)
},
)
suite.Require().NoError(err)
suite.Require().Len(v.Messages, len(nodes))
for _, version := range v.Messages {
suite.Assert().Equal(expectedVersion, version.Version.Tag)
}
}
func (suite *BaseSuite) readVersion(nodeCtx context.Context, client *talosclient.Client) (
version string,
err error,
) {
var v *machineapi.VersionResponse
v, err = client.Version(nodeCtx)
if err != nil {
return
}
version = v.Messages[0].Version.Tag
return
}
type upgradeOptions struct {
TargetInstallerImage string
UpgradePreserve bool
UpgradeStage bool
TargetVersion string
}
//nolint:gocyclo
func (suite *BaseSuite) upgradeNode(client *talosclient.Client, node provision.NodeInfo, options upgradeOptions) {
suite.T().Logf("upgrading node %s", node.IPs[0])
ctx, cancel := context.WithCancel(suite.ctx)
defer cancel()
nodeCtx := talosclient.WithNodes(ctx, node.IPs[0].String())
var (
resp *machineapi.UpgradeResponse
err error
)
err = retry.Constant(time.Minute, retry.WithUnits(10*time.Second)).Retry(
func() error {
resp, err = client.Upgrade(
nodeCtx,
options.TargetInstallerImage,
options.UpgradePreserve,
options.UpgradeStage,
false,
)
if err != nil {
if strings.Contains(err.Error(), "leader changed") {
return retry.ExpectedError(err)
}
if strings.Contains(err.Error(), "failed to acquire upgrade lock") {
return retry.ExpectedError(err)
}
return err
}
return nil
},
)
suite.Require().NoError(err)
suite.Require().Equal("Upgrade request received", resp.Messages[0].Ack)
actorID := resp.Messages[0].ActorId
eventCh := make(chan talosclient.EventResult)
// watch for events
suite.Require().NoError(client.EventsWatchV2(nodeCtx, eventCh, talosclient.WithActorID(actorID), talosclient.WithTailEvents(-1)))
waitTimer := time.NewTimer(5 * time.Minute)
defer waitTimer.Stop()
waitLoop:
for {
select {
case ev := <-eventCh:
suite.Require().NoError(ev.Error)
switch msg := ev.Event.Payload.(type) {
case *machineapi.SequenceEvent:
if msg.Error != nil {
suite.FailNow("upgrade failed", "%s: %s", msg.Error.Message, msg.Error.Code)
}
case *machineapi.PhaseEvent:
if msg.Action == machineapi.PhaseEvent_START && msg.Phase == "kexec" {
// about to be rebooted
break waitLoop
}
if msg.Action == machineapi.PhaseEvent_STOP {
suite.T().Logf("upgrade phase %q finished", msg.Phase)
}
}
case <-waitTimer.C:
suite.FailNow("timeout waiting for upgrade to finish")
case <-ctx.Done():
suite.FailNow("context canceled")
}
}
// wait for the apid to be shut down
time.Sleep(10 * time.Second)
// wait for the version to be equal to target version
suite.Require().NoError(
retry.Constant(10 * time.Minute).Retry(
func() error {
var version string
version, err = suite.readVersion(nodeCtx, client)
if err != nil {
// API might be unresponsive during upgrade
return retry.ExpectedError(err)
}
if version != options.TargetVersion {
// upgrade not finished yet
return retry.ExpectedError(
fmt.Errorf(
"node %q version doesn't match expected: expected %q, got %q",
node.IPs[0].String(),
options.TargetVersion,
version,
),
)
}
return nil
},
),
)
suite.waitForClusterHealth()
}
func (suite *BaseSuite) upgradeKubernetes(fromVersion, toVersion string, skipKubeletUpgrade bool) {
if fromVersion == toVersion {
suite.T().Logf("skipping Kubernetes upgrade, as versions are equal %q -> %q", fromVersion, toVersion)
return
}
suite.T().Logf("upgrading Kubernetes: %q -> %q", fromVersion, toVersion)
path, err := upgrade.NewPath(fromVersion, toVersion)
suite.Require().NoError(err)
options := kubernetes.UpgradeOptions{
Path: path,
ControlPlaneEndpoint: suite.controlPlaneEndpoint,
UpgradeKubelet: !skipKubeletUpgrade,
PrePullImages: true,
EncoderOpt: encoder.WithComments(encoder.CommentsAll),
}
suite.Require().NoError(kubernetes.Upgrade(suite.ctx, suite.clusterAccess, options))
}
type clusterOptions struct {
ClusterName string
ControlplaneNodes int
WorkerNodes int
SourceKernelPath string
SourceInitramfsPath string
SourceInstallerImage string
SourceVersion string
SourceK8sVersion string
WithEncryption bool
}
// setupCluster provisions source clusters and waits for health.
func (suite *BaseSuite) setupCluster(options clusterOptions) {
defaultStateDir, err := clientconfig.GetTalosDirectory()
suite.Require().NoError(err)
suite.stateDir = filepath.Join(defaultStateDir, "clusters")
suite.cniDir = filepath.Join(defaultStateDir, "cni")
cidr, err := netip.ParsePrefix(DefaultSettings.CIDR)
suite.Require().NoError(err)
var gatewayIP netip.Addr
gatewayIP, err = sideronet.NthIPInNetwork(cidr, 1)
suite.Require().NoError(err)
ips := make([]netip.Addr, options.ControlplaneNodes+options.WorkerNodes)
for i := range ips {
ips[i], err = sideronet.NthIPInNetwork(cidr, i+2)
suite.Require().NoError(err)
}
suite.T().Logf("initializing provisioner with cluster name %q, state directory %q", options.ClusterName, suite.stateDir)
request := provision.ClusterRequest{
Name: options.ClusterName,
Network: provision.NetworkRequest{
Name: options.ClusterName,
CIDRs: []netip.Prefix{cidr},
GatewayAddrs: []netip.Addr{gatewayIP},
MTU: DefaultSettings.MTU,
Nameservers: defaultNameservers,
CNI: provision.CNIConfig{
BinPath: []string{filepath.Join(suite.cniDir, "bin")},
ConfDir: filepath.Join(suite.cniDir, "conf.d"),
CacheDir: filepath.Join(suite.cniDir, "cache"),
BundleURL: DefaultSettings.CNIBundleURL,
},
},
KernelPath: options.SourceKernelPath,
InitramfsPath: options.SourceInitramfsPath,
SelfExecutable: suite.TalosctlPath,
StateDirectory: suite.stateDir,
}
defaultInternalLB, _ := suite.provisioner.GetLoadBalancers(request.Network)
suite.controlPlaneEndpoint = fmt.Sprintf("https://%s", nethelpers.JoinHostPort(defaultInternalLB, constants.DefaultControlPlanePort))
genOptions := suite.provisioner.GenOptions(request.Network)
for _, registryMirror := range DefaultSettings.RegistryMirrors {
parts := strings.SplitN(registryMirror, "=", 2)
suite.Require().Len(parts, 2)
genOptions = append(genOptions, generate.WithRegistryMirror(parts[0], parts[1]))
}
controlplaneEndpoints := make([]string, options.ControlplaneNodes)
for i := range controlplaneEndpoints {
controlplaneEndpoints[i] = ips[i].String()
}
if DefaultSettings.CustomCNIURL != "" {
genOptions = append(
genOptions, generate.WithClusterCNIConfig(
&v1alpha1.CNIConfig{
CNIName: constants.CustomCNI,
CNIUrls: []string{DefaultSettings.CustomCNIURL},
},
),
)
}
if options.WithEncryption {
genOptions = append(
genOptions, generate.WithSystemDiskEncryption(
&v1alpha1.SystemDiskEncryptionConfig{
StatePartition: &v1alpha1.EncryptionConfig{
EncryptionProvider: encryption.LUKS2,
EncryptionKeys: []*v1alpha1.EncryptionKey{
{
KeyNodeID: &v1alpha1.EncryptionKeyNodeID{},
KeySlot: 0,
},
},
},
EphemeralPartition: &v1alpha1.EncryptionConfig{
EncryptionProvider: encryption.LUKS2,
EncryptionKeys: []*v1alpha1.EncryptionKey{
{
KeyNodeID: &v1alpha1.EncryptionKeyNodeID{},
KeySlot: 0,
},
},
},
},
),
)
}
versionContract, err := config.ParseContractFromVersion(options.SourceVersion)
suite.Require().NoError(err)
suite.configBundle, err = bundle.NewBundle(
bundle.WithInputOptions(
&bundle.InputOptions{
ClusterName: options.ClusterName,
Endpoint: suite.controlPlaneEndpoint,
KubeVersion: options.SourceK8sVersion,
GenOptions: append(
genOptions,
generate.WithEndpointList(controlplaneEndpoints),
generate.WithInstallImage(options.SourceInstallerImage),
generate.WithDNSDomain("cluster.local"),
generate.WithVersionContract(versionContract),
),
},
),
)
suite.Require().NoError(err)
for i := 0; i < options.ControlplaneNodes; i++ {
request.Nodes = append(
request.Nodes,
provision.NodeRequest{
Name: fmt.Sprintf("control-plane-%d", i+1),
Type: machine.TypeControlPlane,
IPs: []netip.Addr{ips[i]},
Memory: DefaultSettings.MemMB * 1024 * 1024,
NanoCPUs: DefaultSettings.CPUs * 1000 * 1000 * 1000,
Disks: []*provision.Disk{
{
Size: DefaultSettings.DiskGB * 1024 * 1024 * 1024,
},
},
Config: suite.configBundle.ControlPlane(),
},
)
}
for i := 1; i <= options.WorkerNodes; i++ {
request.Nodes = append(
request.Nodes,
provision.NodeRequest{
Name: fmt.Sprintf("worker-%d", i),
Type: machine.TypeWorker,
IPs: []netip.Addr{ips[options.ControlplaneNodes+i-1]},
Memory: DefaultSettings.MemMB * 1024 * 1024,
NanoCPUs: DefaultSettings.CPUs * 1000 * 1000 * 1000,
Disks: []*provision.Disk{
{
Size: DefaultSettings.DiskGB * 1024 * 1024 * 1024,
},
},
Config: suite.configBundle.Worker(),
},
)
}
suite.Cluster, err = suite.provisioner.Create(
suite.ctx, request,
provision.WithBootlader(true),
provision.WithUEFI(true),
provision.WithTalosConfig(suite.configBundle.TalosConfig()),
)
suite.Require().NoError(err)
c, err := clientconfig.Open("")
suite.Require().NoError(err)
c.Merge(suite.configBundle.TalosConfig())
suite.Require().NoError(c.Save(""))
suite.clusterAccess = access.NewAdapter(suite.Cluster, provision.WithTalosConfig(suite.configBundle.TalosConfig()))
suite.Require().NoError(suite.clusterAccess.Bootstrap(suite.ctx, os.Stdout))
suite.waitForClusterHealth()
}
// runE2E runs e2e test on the cluster.
func (suite *BaseSuite) runE2E(k8sVersion string) {
options := sonobuoy.DefaultOptions()
options.KubernetesVersion = k8sVersion
suite.Assert().NoError(sonobuoy.Run(suite.ctx, suite.clusterAccess, options))
}

View File

@ -7,46 +7,13 @@
package provision
import (
"context"
"encoding/json"
"fmt"
"net/netip"
"os"
"path/filepath"
"strings"
"time"
"github.com/siderolabs/gen/xslices"
"github.com/siderolabs/go-blockdevice/blockdevice/encryption"
"github.com/siderolabs/go-kubernetes/kubernetes/upgrade"
"github.com/siderolabs/go-retry/retry"
sideronet "github.com/siderolabs/net"
"github.com/stretchr/testify/suite"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"github.com/siderolabs/talos/cmd/talosctl/pkg/mgmt/helpers"
"github.com/siderolabs/talos/internal/integration/base"
"github.com/siderolabs/talos/pkg/cluster/check"
"github.com/siderolabs/talos/pkg/cluster/kubernetes"
"github.com/siderolabs/talos/pkg/cluster/sonobuoy"
"github.com/siderolabs/talos/pkg/images"
machineapi "github.com/siderolabs/talos/pkg/machinery/api/machine"
talosclient "github.com/siderolabs/talos/pkg/machinery/client"
clientconfig "github.com/siderolabs/talos/pkg/machinery/client/config"
"github.com/siderolabs/talos/pkg/machinery/config"
"github.com/siderolabs/talos/pkg/machinery/config/bundle"
"github.com/siderolabs/talos/pkg/machinery/config/encoder"
"github.com/siderolabs/talos/pkg/machinery/config/generate"
"github.com/siderolabs/talos/pkg/machinery/config/machine"
"github.com/siderolabs/talos/pkg/machinery/config/types/v1alpha1"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
"github.com/siderolabs/talos/pkg/provision"
"github.com/siderolabs/talos/pkg/provision/access"
"github.com/siderolabs/talos/pkg/provision/providers/qemu"
)
//nolint:maligned
@ -65,8 +32,8 @@ type upgradeSpec struct {
SkipKubeletUpgrade bool
MasterNodes int
WorkerNodes int
ControlplaneNodes int
WorkerNodes int
UpgradePreserve bool
UpgradeStage bool
@ -87,8 +54,6 @@ const (
currentK8sVersion = constants.DefaultKubernetesVersion
)
var defaultNameservers = []netip.Addr{netip.MustParseAddr("8.8.8.8"), netip.MustParseAddr("1.1.1.1")}
// upgradePreviousToStable upgrades from the previous Talos release to the stable release.
func upgradePreviousToStable() upgradeSpec {
return upgradeSpec{
@ -109,8 +74,8 @@ func upgradePreviousToStable() upgradeSpec {
TargetVersion: stableRelease,
TargetK8sVersion: stableK8sVersion,
MasterNodes: DefaultSettings.MasterNodes,
WorkerNodes: DefaultSettings.WorkerNodes,
ControlplaneNodes: DefaultSettings.ControlplaneNodes,
WorkerNodes: DefaultSettings.WorkerNodes,
}
}
@ -134,8 +99,8 @@ func upgradeStableToCurrent() upgradeSpec {
TargetVersion: DefaultSettings.CurrentVersion,
TargetK8sVersion: currentK8sVersion,
MasterNodes: DefaultSettings.MasterNodes,
WorkerNodes: DefaultSettings.WorkerNodes,
ControlplaneNodes: DefaultSettings.ControlplaneNodes,
WorkerNodes: DefaultSettings.WorkerNodes,
}
}
@ -161,8 +126,8 @@ func upgradeCurrentToCurrent() upgradeSpec {
TargetVersion: DefaultSettings.CurrentVersion,
TargetK8sVersion: currentK8sVersion,
MasterNodes: DefaultSettings.MasterNodes,
WorkerNodes: DefaultSettings.WorkerNodes,
ControlplaneNodes: DefaultSettings.ControlplaneNodes,
WorkerNodes: DefaultSettings.WorkerNodes,
WithEncryption: true,
}
@ -188,9 +153,9 @@ func upgradeStableToCurrentPreserve() upgradeSpec {
TargetVersion: DefaultSettings.CurrentVersion,
TargetK8sVersion: currentK8sVersion,
MasterNodes: 1,
WorkerNodes: 0,
UpgradePreserve: true,
ControlplaneNodes: 1,
WorkerNodes: 0,
UpgradePreserve: true,
}
}
@ -214,36 +179,21 @@ func upgradeStableToCurrentPreserveStage() upgradeSpec {
TargetVersion: DefaultSettings.CurrentVersion,
TargetK8sVersion: currentK8sVersion,
MasterNodes: 1,
WorkerNodes: 0,
UpgradePreserve: true,
UpgradeStage: true,
ControlplaneNodes: 1,
WorkerNodes: 0,
UpgradePreserve: true,
UpgradeStage: true,
}
}
// UpgradeSuite ...
type UpgradeSuite struct {
suite.Suite
base.TalosSuite
BaseSuite
specGen func() upgradeSpec
spec upgradeSpec
track int
provisioner provision.Provisioner
configBundle *bundle.Bundle
clusterAccess *access.Adapter
controlPlaneEndpoint string
//nolint:containedctx
ctx context.Context
ctxCancel context.CancelFunc
stateDir string
cniDir string
}
// SetupSuite ...
@ -253,500 +203,35 @@ func (suite *UpgradeSuite) SetupSuite() {
suite.T().Logf("upgrade spec = %v", suite.spec)
// timeout for the whole test
suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 30*time.Minute)
var err error
suite.provisioner, err = qemu.NewProvisioner(suite.ctx)
suite.Require().NoError(err)
}
// TearDownSuite ...
func (suite *UpgradeSuite) TearDownSuite() {
if suite.T().Failed() && DefaultSettings.CrashdumpEnabled && suite.Cluster != nil {
// for failed tests, produce crash dump for easier debugging,
// as cluster is going to be torn down below
suite.provisioner.CrashDump(suite.ctx, suite.Cluster, os.Stderr)
if suite.clusterAccess != nil {
suite.clusterAccess.CrashDump(suite.ctx, os.Stderr)
}
}
if suite.clusterAccess != nil {
suite.Assert().NoError(suite.clusterAccess.Close())
}
if suite.Cluster != nil {
suite.Assert().NoError(suite.provisioner.Destroy(suite.ctx, suite.Cluster))
}
suite.ctxCancel()
if suite.stateDir != "" {
suite.Assert().NoError(os.RemoveAll(suite.stateDir))
}
if suite.provisioner != nil {
suite.Assert().NoError(suite.provisioner.Close())
}
}
// setupCluster provisions source clusters and waits for health.
func (suite *UpgradeSuite) setupCluster() {
defaultStateDir, err := clientconfig.GetTalosDirectory()
suite.Require().NoError(err)
suite.stateDir = filepath.Join(defaultStateDir, "clusters")
suite.cniDir = filepath.Join(defaultStateDir, "cni")
clusterName := suite.spec.ShortName
cidr, err := netip.ParsePrefix(DefaultSettings.CIDR)
suite.Require().NoError(err)
var gatewayIP netip.Addr
gatewayIP, err = sideronet.NthIPInNetwork(cidr, 1)
suite.Require().NoError(err)
ips := make([]netip.Addr, suite.spec.MasterNodes+suite.spec.WorkerNodes)
for i := range ips {
ips[i], err = sideronet.NthIPInNetwork(cidr, i+2)
suite.Require().NoError(err)
}
suite.T().Logf("initializing provisioner with cluster name %q, state directory %q", clusterName, suite.stateDir)
request := provision.ClusterRequest{
Name: clusterName,
Network: provision.NetworkRequest{
Name: clusterName,
CIDRs: []netip.Prefix{cidr},
GatewayAddrs: []netip.Addr{gatewayIP},
MTU: DefaultSettings.MTU,
Nameservers: defaultNameservers,
CNI: provision.CNIConfig{
BinPath: []string{filepath.Join(suite.cniDir, "bin")},
ConfDir: filepath.Join(suite.cniDir, "conf.d"),
CacheDir: filepath.Join(suite.cniDir, "cache"),
BundleURL: DefaultSettings.CNIBundleURL,
},
},
KernelPath: suite.spec.SourceKernelPath,
InitramfsPath: suite.spec.SourceInitramfsPath,
SelfExecutable: suite.TalosctlPath,
StateDirectory: suite.stateDir,
}
defaultInternalLB, _ := suite.provisioner.GetLoadBalancers(request.Network)
suite.controlPlaneEndpoint = fmt.Sprintf("https://%s", nethelpers.JoinHostPort(defaultInternalLB, constants.DefaultControlPlanePort))
genOptions := suite.provisioner.GenOptions(request.Network)
for _, registryMirror := range DefaultSettings.RegistryMirrors {
parts := strings.SplitN(registryMirror, "=", 2)
suite.Require().Len(parts, 2)
genOptions = append(genOptions, generate.WithRegistryMirror(parts[0], parts[1]))
}
masterEndpoints := make([]string, suite.spec.MasterNodes)
for i := range masterEndpoints {
masterEndpoints[i] = ips[i].String()
}
if DefaultSettings.CustomCNIURL != "" {
genOptions = append(
genOptions, generate.WithClusterCNIConfig(
&v1alpha1.CNIConfig{
CNIName: constants.CustomCNI,
CNIUrls: []string{DefaultSettings.CustomCNIURL},
},
),
)
}
if suite.spec.WithEncryption {
genOptions = append(
genOptions, generate.WithSystemDiskEncryption(
&v1alpha1.SystemDiskEncryptionConfig{
StatePartition: &v1alpha1.EncryptionConfig{
EncryptionProvider: encryption.LUKS2,
EncryptionKeys: []*v1alpha1.EncryptionKey{
{
KeyNodeID: &v1alpha1.EncryptionKeyNodeID{},
KeySlot: 0,
},
},
},
EphemeralPartition: &v1alpha1.EncryptionConfig{
EncryptionProvider: encryption.LUKS2,
EncryptionKeys: []*v1alpha1.EncryptionKey{
{
KeyNodeID: &v1alpha1.EncryptionKeyNodeID{},
KeySlot: 0,
},
},
},
},
),
)
}
versionContract, err := config.ParseContractFromVersion(suite.spec.SourceVersion)
suite.Require().NoError(err)
suite.configBundle, err = bundle.NewBundle(
bundle.WithInputOptions(
&bundle.InputOptions{
ClusterName: clusterName,
Endpoint: suite.controlPlaneEndpoint,
KubeVersion: suite.spec.SourceK8sVersion,
GenOptions: append(
genOptions,
generate.WithEndpointList(masterEndpoints),
generate.WithInstallImage(suite.spec.SourceInstallerImage),
generate.WithDNSDomain("cluster.local"),
generate.WithVersionContract(versionContract),
),
},
),
)
suite.Require().NoError(err)
for i := 0; i < suite.spec.MasterNodes; i++ {
request.Nodes = append(
request.Nodes,
provision.NodeRequest{
Name: fmt.Sprintf("master-%d", i+1),
Type: machine.TypeControlPlane,
IPs: []netip.Addr{ips[i]},
Memory: DefaultSettings.MemMB * 1024 * 1024,
NanoCPUs: DefaultSettings.CPUs * 1000 * 1000 * 1000,
Disks: []*provision.Disk{
{
Size: DefaultSettings.DiskGB * 1024 * 1024 * 1024,
},
},
Config: suite.configBundle.ControlPlane(),
},
)
}
for i := 1; i <= suite.spec.WorkerNodes; i++ {
request.Nodes = append(
request.Nodes,
provision.NodeRequest{
Name: fmt.Sprintf("worker-%d", i),
Type: machine.TypeWorker,
IPs: []netip.Addr{ips[suite.spec.MasterNodes+i-1]},
Memory: DefaultSettings.MemMB * 1024 * 1024,
NanoCPUs: DefaultSettings.CPUs * 1000 * 1000 * 1000,
Disks: []*provision.Disk{
{
Size: DefaultSettings.DiskGB * 1024 * 1024 * 1024,
},
},
Config: suite.configBundle.Worker(),
},
)
}
suite.Cluster, err = suite.provisioner.Create(
suite.ctx, request,
provision.WithBootlader(true),
provision.WithUEFI(true),
provision.WithTalosConfig(suite.configBundle.TalosConfig()),
)
suite.Require().NoError(err)
c, err := clientconfig.Open("")
suite.Require().NoError(err)
c.Merge(suite.configBundle.TalosConfig())
suite.Require().NoError(c.Save(""))
suite.clusterAccess = access.NewAdapter(suite.Cluster, provision.WithTalosConfig(suite.configBundle.TalosConfig()))
suite.Require().NoError(suite.clusterAccess.Bootstrap(suite.ctx, os.Stdout))
suite.waitForClusterHealth()
}
// waitForClusterHealth asserts cluster health after any change.
func (suite *UpgradeSuite) waitForClusterHealth() {
runs := 1
singleNodeCluster := len(suite.Cluster.Info().Nodes) == 1
if singleNodeCluster {
// run health check several times for single node clusters,
// as self-hosted control plane is not stable after reboot
runs = 3
}
for run := 0; run < runs; run++ {
if run > 0 {
time.Sleep(15 * time.Second)
}
checkCtx, checkCtxCancel := context.WithTimeout(suite.ctx, 15*time.Minute)
defer checkCtxCancel()
suite.Require().NoError(
check.Wait(
checkCtx,
suite.clusterAccess,
check.DefaultClusterChecks(),
check.StderrReporter(),
),
)
}
suite.BaseSuite.SetupSuite()
}
// runE2E runs e2e test on the cluster.
func (suite *UpgradeSuite) runE2E(k8sVersion string) {
if suite.spec.WorkerNodes == 0 {
// no worker nodes, should make masters schedulable
suite.untaint("master-1")
suite.untaint("control-plane-1")
}
options := sonobuoy.DefaultOptions()
options.KubernetesVersion = k8sVersion
suite.Assert().NoError(sonobuoy.Run(suite.ctx, suite.clusterAccess, options))
}
func (suite *UpgradeSuite) assertSameVersionCluster(client *talosclient.Client, expectedVersion string) {
nodes := xslices.Map(suite.Cluster.Info().Nodes, func(node provision.NodeInfo) string { return node.IPs[0].String() })
ctx := talosclient.WithNodes(suite.ctx, nodes...)
var v *machineapi.VersionResponse
err := retry.Constant(
time.Minute,
).Retry(
func() error {
var e error
v, e = client.Version(ctx)
return retry.ExpectedError(e)
},
)
suite.Require().NoError(err)
suite.Require().Len(v.Messages, len(nodes))
for _, version := range v.Messages {
suite.Assert().Equal(expectedVersion, version.Version.Tag)
}
}
func (suite *UpgradeSuite) readVersion(nodeCtx context.Context, client *talosclient.Client) (
version string,
err error,
) {
var v *machineapi.VersionResponse
v, err = client.Version(nodeCtx)
if err != nil {
return
}
version = v.Messages[0].Version.Tag
return
}
//nolint:gocyclo
func (suite *UpgradeSuite) upgradeNode(client *talosclient.Client, node provision.NodeInfo) {
suite.T().Logf("upgrading node %s", node.IPs[0])
ctx, cancel := context.WithCancel(suite.ctx)
defer cancel()
nodeCtx := talosclient.WithNodes(ctx, node.IPs[0].String())
var (
resp *machineapi.UpgradeResponse
err error
)
err = retry.Constant(time.Minute, retry.WithUnits(10*time.Second)).Retry(
func() error {
resp, err = client.Upgrade(
nodeCtx,
suite.spec.TargetInstallerImage,
suite.spec.UpgradePreserve,
suite.spec.UpgradeStage,
false,
)
if err != nil {
if strings.Contains(err.Error(), "leader changed") {
return retry.ExpectedError(err)
}
if strings.Contains(err.Error(), "failed to acquire upgrade lock") {
return retry.ExpectedError(err)
}
return err
}
return nil
},
)
suite.Require().NoError(err)
suite.Require().Equal("Upgrade request received", resp.Messages[0].Ack)
actorID := resp.Messages[0].ActorId
eventCh := make(chan talosclient.EventResult)
// watch for events
suite.Require().NoError(client.EventsWatchV2(nodeCtx, eventCh, talosclient.WithActorID(actorID), talosclient.WithTailEvents(-1)))
waitTimer := time.NewTimer(5 * time.Minute)
defer waitTimer.Stop()
waitLoop:
for {
select {
case ev := <-eventCh:
suite.Require().NoError(ev.Error)
switch msg := ev.Event.Payload.(type) {
case *machineapi.SequenceEvent:
if msg.Error != nil {
suite.FailNow("upgrade failed", "%s: %s", msg.Error.Message, msg.Error.Code)
}
case *machineapi.PhaseEvent:
if msg.Action == machineapi.PhaseEvent_START && msg.Phase == "kexec" {
// about to be rebooted
break waitLoop
}
if msg.Action == machineapi.PhaseEvent_STOP {
suite.T().Logf("upgrade phase %q finished", msg.Phase)
}
}
case <-waitTimer.C:
suite.FailNow("timeout waiting for upgrade to finish")
case <-ctx.Done():
suite.FailNow("context canceled")
}
}
// wait for the apid to be shut down
time.Sleep(10 * time.Second)
// wait for the version to be equal to target version
suite.Require().NoError(
retry.Constant(10 * time.Minute).Retry(
func() error {
var version string
version, err = suite.readVersion(nodeCtx, client)
if err != nil {
// API might be unresponsive during upgrade
return retry.ExpectedError(err)
}
if version != suite.spec.TargetVersion {
// upgrade not finished yet
return retry.ExpectedError(
fmt.Errorf(
"node %q version doesn't match expected: expected %q, got %q",
node.IPs[0].String(),
suite.spec.TargetVersion,
version,
),
)
}
return nil
},
),
)
suite.waitForClusterHealth()
}
func (suite *UpgradeSuite) upgradeKubernetes(fromVersion, toVersion string, skipKubeletUpgrade bool) {
if fromVersion == toVersion {
suite.T().Logf("skipping Kubernetes upgrade, as versions are equal %q -> %q", fromVersion, toVersion)
return
}
suite.T().Logf("upgrading Kubernetes: %q -> %q", fromVersion, toVersion)
path, err := upgrade.NewPath(fromVersion, toVersion)
suite.Require().NoError(err)
options := kubernetes.UpgradeOptions{
Path: path,
ControlPlaneEndpoint: suite.controlPlaneEndpoint,
UpgradeKubelet: !skipKubeletUpgrade,
PrePullImages: true,
EncoderOpt: encoder.WithComments(encoder.CommentsAll),
}
suite.Require().NoError(kubernetes.Upgrade(suite.ctx, suite.clusterAccess, options))
}
func (suite *UpgradeSuite) untaint(name string) {
client, err := suite.clusterAccess.K8sClient(suite.ctx)
suite.Require().NoError(err)
n, err := client.CoreV1().Nodes().Get(suite.ctx, name, metav1.GetOptions{})
suite.Require().NoError(err)
oldData, err := json.Marshal(n)
suite.Require().NoError(err)
k := 0
for _, taint := range n.Spec.Taints {
if taint.Key != constants.LabelNodeRoleControlPlane {
n.Spec.Taints[k] = taint
k++
}
}
n.Spec.Taints = n.Spec.Taints[:k]
newData, err := json.Marshal(n)
suite.Require().NoError(err)
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, corev1.Node{})
suite.Require().NoError(err)
_, err = client.CoreV1().Nodes().Patch(
suite.ctx,
n.Name,
types.StrategicMergePatchType,
patchBytes,
metav1.PatchOptions{},
)
suite.Require().NoError(err)
suite.BaseSuite.runE2E(k8sVersion)
}
// TestRolling performs rolling upgrade starting with master nodes.
func (suite *UpgradeSuite) TestRolling() {
suite.setupCluster()
suite.setupCluster(clusterOptions{
ClusterName: suite.spec.ShortName,
ControlplaneNodes: suite.spec.ControlplaneNodes,
WorkerNodes: suite.spec.WorkerNodes,
SourceKernelPath: suite.spec.SourceKernelPath,
SourceInitramfsPath: suite.spec.SourceInitramfsPath,
SourceInstallerImage: suite.spec.SourceInstallerImage,
SourceVersion: suite.spec.SourceVersion,
SourceK8sVersion: suite.spec.SourceK8sVersion,
WithEncryption: suite.spec.WithEncryption,
})
client, err := suite.clusterAccess.Client()
suite.Require().NoError(err)
@ -754,17 +239,24 @@ func (suite *UpgradeSuite) TestRolling() {
// verify initial cluster version
suite.assertSameVersionCluster(client, suite.spec.SourceVersion)
options := upgradeOptions{
TargetInstallerImage: suite.spec.TargetInstallerImage,
UpgradePreserve: suite.spec.UpgradePreserve,
UpgradeStage: suite.spec.UpgradeStage,
TargetVersion: suite.spec.TargetVersion,
}
// upgrade master nodes
for _, node := range suite.Cluster.Info().Nodes {
if node.Type == machine.TypeInit || node.Type == machine.TypeControlPlane {
suite.upgradeNode(client, node)
suite.upgradeNode(client, node, options)
}
}
// upgrade worker nodes
for _, node := range suite.Cluster.Info().Nodes {
if node.Type == machine.TypeWorker {
suite.upgradeNode(client, node)
suite.upgradeNode(client, node, options)
}
}

View File

@ -13,18 +13,19 @@ import (
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/gen/channel"
"github.com/siderolabs/go-kubernetes/kubernetes/manifests"
"github.com/siderolabs/go-kubernetes/kubernetes/upgrade"
"github.com/siderolabs/go-retry/retry"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/client-go/informers"
"k8s.io/client-go/tools/cache"
"github.com/siderolabs/talos/pkg/cluster"
"github.com/siderolabs/talos/pkg/kubernetes"
"github.com/siderolabs/talos/pkg/machinery/api/common"
"github.com/siderolabs/talos/pkg/machinery/client"
machinetype "github.com/siderolabs/talos/pkg/machinery/config/machine"
@ -52,6 +53,8 @@ func Upgrade(ctx context.Context, cluster UpgradeProvider, options UpgradeOption
return fmt.Errorf("error building kubernetes client: %w", err)
}
defer k8sClient.Close() //nolint:errcheck
options.controlPlaneNodes, err = k8sClient.NodeIPs(ctx, machinetype.TypeControlPlane)
if err != nil {
return fmt.Errorf("error fetching controlplane nodes: %w", err)
@ -290,9 +293,7 @@ func upgradeStaticPodOnNode(ctx context.Context, cluster UpgradeProvider, option
}
}
if err = retry.Constant(3*time.Minute, retry.WithUnits(10*time.Second)).Retry(func() error {
return checkPodStatus(ctx, cluster, service, node, expectedConfigVersion)
}); err != nil {
if err = checkPodStatus(ctx, cluster, options, service, node, expectedConfigVersion); err != nil {
return err
}
@ -431,60 +432,86 @@ func syncManifests(ctx context.Context, objects []*unstructured.Unstructured, cl
}
//nolint:gocyclo
func checkPodStatus(ctx context.Context, cluster UpgradeProvider, service, node, configVersion string) error {
func checkPodStatus(ctx context.Context, cluster UpgradeProvider, options UpgradeOptions, service, node, configVersion string) error {
ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
k8sClient, err := cluster.K8sHelper(ctx)
if err != nil {
return fmt.Errorf("error building kubernetes client: %w", err)
}
pods, err := k8sClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
LabelSelector: fmt.Sprintf("k8s-app = %s", service),
})
if err != nil {
if kubernetes.IsRetryableError(err) {
return retry.ExpectedError(err)
}
defer k8sClient.Close() //nolint:errcheck
return err
informerFactory := informers.NewSharedInformerFactoryWithOptions(
k8sClient, 10*time.Second,
informers.WithNamespace(namespace),
informers.WithTweakListOptions(func(options *metav1.ListOptions) {
options.LabelSelector = fmt.Sprintf("k8s-app = %s", service)
}),
)
notifyCh := make(chan *v1.Pod)
informer := informerFactory.Core().V1().Pods().Informer()
if err := informer.SetWatchErrorHandler(func(r *cache.Reflector, err error) {
options.Log("kubernetes endpoint watch error: %s", err)
}); err != nil {
return fmt.Errorf("error setting watch error handler: %w", err)
}
podFound := false
if _, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) { channel.SendWithContext(ctx, notifyCh, obj.(*v1.Pod)) },
DeleteFunc: func(_ interface{}) {},
UpdateFunc: func(_, obj interface{}) { channel.SendWithContext(ctx, notifyCh, obj.(*v1.Pod)) },
}); err != nil {
return fmt.Errorf("error adding watch event handler: %w", err)
}
for _, pod := range pods.Items {
if pod.Status.HostIP != node {
continue
}
informerFactory.Start(ctx.Done())
podFound = true
defer func() {
cancel()
informerFactory.Shutdown()
}()
if pod.Annotations[constants.AnnotationStaticPodConfigVersion] != configVersion {
return retry.ExpectedError(fmt.Errorf("config version mismatch: got %q, expected %q", pod.Annotations[constants.AnnotationStaticPodConfigVersion], configVersion))
}
ready := false
for _, condition := range pod.Status.Conditions {
if condition.Type != v1.PodReady {
for {
select {
case <-ctx.Done():
return ctx.Err()
case pod := <-notifyCh:
if pod.Status.HostIP != node {
continue
}
if condition.Status == v1.ConditionTrue {
ready = true
if pod.Annotations[constants.AnnotationStaticPodConfigVersion] != configVersion {
options.Log(" > %q: %s: waiting, config version mismatch: got %q, expected %q", node, service, pod.Annotations[constants.AnnotationStaticPodConfigVersion], configVersion)
break
continue
}
ready := false
for _, condition := range pod.Status.Conditions {
if condition.Type != v1.PodReady {
continue
}
if condition.Status == v1.ConditionTrue {
ready = true
break
}
}
if !ready {
options.Log(" > %q: %s: pod is not ready, waiting", node, service)
continue
}
return nil
}
if !ready {
return retry.ExpectedError(fmt.Errorf("pod is not ready"))
}
break
}
if !podFound {
return retry.ExpectedError(fmt.Errorf("pod not found in the API server state"))
}
return nil
}

View File

@ -160,6 +160,10 @@ func TestKubernetesCompatibility16(t *testing.T) {
kubernetesVersion: "1.27.1",
target: "1.6.0",
},
{
kubernetesVersion: "1.24.1",
target: "1.6.0",
},
{
kubernetesVersion: "1.28.3",
target: "1.6.0-beta.0",
@ -174,9 +178,9 @@ func TestKubernetesCompatibility16(t *testing.T) {
expectedError: "version of Kubernetes 1.30.0-alpha.0 is too new to be used with Talos 1.6.0",
},
{
kubernetesVersion: "1.26.1",
kubernetesVersion: "1.23.1",
target: "1.6.0",
expectedError: "version of Kubernetes 1.26.1 is too old to be used with Talos 1.6.0",
expectedError: "version of Kubernetes 1.23.1 is too old to be used with Talos 1.6.0",
},
} {
runKubernetesVersionTest(t, tt)

View File

@ -22,7 +22,7 @@ var MaximumHostDowngradeVersion = semver.MustParse("1.8.0")
var DeniedHostUpgradeVersions = []semver.Version{}
// MinimumKubernetesVersion is the minimum version of Kubernetes is supported with 1.6.
var MinimumKubernetesVersion = semver.MustParse("1.27.0")
var MinimumKubernetesVersion = semver.MustParse("1.24.0")
// MaximumKubernetesVersion is the maximum version of Kubernetes is supported with 1.6.
var MaximumKubernetesVersion = semver.MustParse("1.29.99")

View File

@ -9,7 +9,7 @@ description: "Table of supported Talos Linux versions and respective platforms."
| Release Date | 2023-12-15 (TBD) | 2023-08-17 (1.5.0) |
| End of Community Support | 1.7.0 release (2024-03-15, TBD) | 1.6.0 release (2023-12-15) |
| Enterprise Support | [offered by Sidero Labs Inc.](https://www.siderolabs.com/support/) | [offered by Sidero Labs Inc.](https://www.siderolabs.com/support/) |
| Kubernetes | 1.29, 1.28, 1.27 | 1.28, 1.27, 1.26 |
| Kubernetes | 1.29, 1.28, 1.27, 1.26, 1.25, 1.24 | 1.28, 1.27, 1.26 |
| Architecture | amd64, arm64 | amd64, arm64 |
| **Platforms** | | |
| - cloud | AWS, GCP, Azure, Digital Ocean, Exoscale, Hetzner, OpenStack, Oracle Cloud, Scaleway, Vultr, Upcloud | AWS, GCP, Azure, Digital Ocean, Exoscale, Hetzner, OpenStack, Oracle Cloud, Scaleway, Vultr, Upcloud |