fix: properly cleanup legacy static pod manifests directory
When upgrading from older version of Talos using static pod manifests directory to new version providing static pods via internal web server, we need to make sure that legacy static pods are cleaned up, otherwise kubelet receives "two" versions of the static pods which makes it fail to run them. The previous cleanup location wasn't working properly, as `/etc/kubernetes/manifests` exists in the rootfs (and it's empty), while actual contents are in `/var`, and they appear only when respective overlay mount is done. The controller tried to clean up on start, saw nothing (looking into rootfs), then started doing other functions. The result was that when overlay was mounted, static pods were still there, while the controller will do next attempt only when it fails, and it fails next time when kubelet is already running, and when it already picked up those stale definitions. Fix all of that by moving cleanup into sequencer after overlayfs mount. Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
parent
6ee47bcc61
commit
7e50e24c01
@ -7,9 +7,6 @@ package k8s
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/cosi-project/runtime/pkg/controller"
|
||||
@ -20,7 +17,6 @@ import (
|
||||
|
||||
k8sadapter "github.com/talos-systems/talos/internal/app/machined/pkg/adapters/k8s"
|
||||
"github.com/talos-systems/talos/pkg/kubernetes/kubelet"
|
||||
"github.com/talos-systems/talos/pkg/machinery/constants"
|
||||
"github.com/talos-systems/talos/pkg/machinery/resources/k8s"
|
||||
"github.com/talos-systems/talos/pkg/machinery/resources/secrets"
|
||||
"github.com/talos-systems/talos/pkg/machinery/resources/v1alpha1"
|
||||
@ -78,10 +74,6 @@ func (ctrl *KubeletStaticPodController) Outputs() []controller.Output {
|
||||
//
|
||||
//nolint:gocyclo,cyclop
|
||||
func (ctrl *KubeletStaticPodController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
|
||||
if err := ctrl.cleanupLegacyStaticPodFiles(logger); err != nil {
|
||||
logger.Warn("error cleaning up legacy static pod files: %w", zap.Error(err))
|
||||
}
|
||||
|
||||
var kubeletClient *kubelet.Client
|
||||
|
||||
refreshTicker := time.NewTicker(15 * time.Second) // refresh kubelet pods status every 15 seconds
|
||||
@ -172,37 +164,6 @@ func (ctrl *KubeletStaticPodController) Run(ctx context.Context, r controller.Ru
|
||||
}
|
||||
}
|
||||
|
||||
func (ctrl *KubeletStaticPodController) cleanupLegacyStaticPodFiles(logger *zap.Logger) error {
|
||||
manifestDir, err := os.Open(constants.ManifestsDirectory)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening manifests directory: %w", err)
|
||||
}
|
||||
|
||||
defer manifestDir.Close() //nolint:errcheck
|
||||
|
||||
manifests, err := manifestDir.Readdirnames(0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error listing manifests: %w", err)
|
||||
}
|
||||
|
||||
for _, manifest := range manifests {
|
||||
// skip manifests not owned by Talos
|
||||
if !strings.HasPrefix(manifest, constants.TalosManifestPrefix) {
|
||||
continue
|
||||
}
|
||||
|
||||
podPath := filepath.Join(constants.ManifestsDirectory, manifest)
|
||||
|
||||
logger.Sugar().Infof("cleaning up legacy static pod file %q", podPath)
|
||||
|
||||
if err = os.Remove(podPath); err != nil {
|
||||
return fmt.Errorf("error cleaning up legacy static pod file: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ctrl *KubeletStaticPodController) teardownStatuses(ctx context.Context, r controller.Runtime) error {
|
||||
statuses, err := r.List(ctx, resource.NewMetadata(k8s.NamespaceName, k8s.StaticPodStatusType, "", resource.VersionUndefined))
|
||||
if err != nil {
|
||||
|
@ -210,6 +210,9 @@ func (*Sequencer) Boot(r runtime.Runtime) []runtime.Phase {
|
||||
r.State().Platform().Mode() != runtime.ModeContainer,
|
||||
"overlay",
|
||||
MountOverlayFilesystems,
|
||||
).Append(
|
||||
"legacyCleanup",
|
||||
CleanupLegacyStaticPodFiles,
|
||||
).Append(
|
||||
"udevSetup",
|
||||
WriteUdevRules,
|
||||
|
@ -2043,6 +2043,42 @@ func ForceCleanup(seq runtime.Sequence, data interface{}) (runtime.TaskExecution
|
||||
}, "forceCleanup"
|
||||
}
|
||||
|
||||
// CleanupLegacyStaticPodFiles removes legacy static pod files in the manifests directory.
|
||||
//
|
||||
// This part of transition to Talos 1.3.0, as Talos 1.3.0 serves static pods from internal web server.
|
||||
func CleanupLegacyStaticPodFiles(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) {
|
||||
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {
|
||||
manifestDir, err := os.Open(constants.ManifestsDirectory)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening manifests directory: %w", err)
|
||||
}
|
||||
|
||||
defer manifestDir.Close() //nolint:errcheck
|
||||
|
||||
manifests, err := manifestDir.Readdirnames(0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error listing manifests: %w", err)
|
||||
}
|
||||
|
||||
for _, manifest := range manifests {
|
||||
// skip manifests not owned by Talos
|
||||
if !strings.HasPrefix(manifest, constants.TalosManifestPrefix) {
|
||||
continue
|
||||
}
|
||||
|
||||
podPath := filepath.Join(constants.ManifestsDirectory, manifest)
|
||||
|
||||
logger.Printf("cleaning up legacy static pod file %q", podPath)
|
||||
|
||||
if err = os.Remove(podPath); err != nil {
|
||||
return fmt.Errorf("error cleaning up legacy static pod file: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}, "cleanupLegacyStaticPodFiles"
|
||||
}
|
||||
|
||||
func pauseOnFailure(callback func(runtime.Sequence, interface{}) (runtime.TaskExecutionFunc, string),
|
||||
timeout time.Duration,
|
||||
) func(seq runtime.Sequence, data interface{}) (runtime.TaskExecutionFunc, string) {
|
||||
|
Loading…
Reference in New Issue
Block a user