talos/internal/pkg/etcd/local.go
Andrey Smirnov 40e69af224
fix: improve etcd leave on reset process
When removing a member from `etcd`, the server does a pre-check to make
sure the member is connected to a quorum of other members, and the
remove request might fail. Add a retry to wait for the etcd to be fully
connected before giving up, as some parts of the reset flow alrady ran.

Also fix an issue which appears in the integration test, when `reset` is
called early in the boot sequence when local etcd hasn't started fully yet.

Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
2023-03-01 14:51:49 +04:00

35 lines
954 B
Go

// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package etcd
import (
"context"
"fmt"
"time"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/talos/pkg/machinery/resources/etcd"
)
// GetLocalMemberID gets the etcd member id of the local node via resources.
func GetLocalMemberID(ctx context.Context, s state.State) (uint64, error) {
ctx, cancel := context.WithTimeout(ctx, 3*time.Minute)
defer cancel()
member, err := safe.StateWatchFor[*etcd.Member](
ctx,
s,
etcd.NewMember(etcd.NamespaceName, etcd.LocalMemberID).Metadata(),
state.WithEventTypes(state.Created),
)
if err != nil {
return 0, fmt.Errorf("failed to get local etcd member ID: %w", err)
}
return etcd.ParseMemberID(member.TypedSpec().MemberID)
}