fix: ignore completed pods in cluster health check
This fixes an error when integration test become stuck with the message like: ``` waiting for coredns to report ready: some pods are not ready: [coredns-868c687b7-g2z64] ``` After some random sequence of node restarts one of the pods might become "stuck" in `Completed` state (as it is shown in `kubectl get pods`) blocking the check, as the pod will never become ready. Signed-off-by: Andrey Smirnov <andrey.smirnov@talos-systems.com>
This commit is contained in:
parent
488ce753fd
commit
5a91f6076d
@ -275,10 +275,33 @@ func K8sPodReadyAssertion(ctx context.Context, cluster cluster.K8sProvider, name
|
||||
var notReadyPods, readyPods []string
|
||||
|
||||
for _, pod := range pods.Items {
|
||||
// skip deleted pods
|
||||
if pod.DeletionTimestamp != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// skip failed pods
|
||||
if pod.Status.Phase == v1.PodFailed {
|
||||
continue
|
||||
}
|
||||
|
||||
// skip pods which `kubectl get pods` marks as 'Completed':
|
||||
// * these pods have a phase 'Running', but all containers are terminated
|
||||
// * such pods appear after a graceful kubelet shutdown
|
||||
allContainersTerminated := true
|
||||
|
||||
for _, containerStatus := range pod.Status.ContainerStatuses {
|
||||
if containerStatus.State.Terminated == nil {
|
||||
allContainersTerminated = false
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if allContainersTerminated {
|
||||
continue
|
||||
}
|
||||
|
||||
ready := false
|
||||
|
||||
for _, cond := range pod.Status.Conditions {
|
||||
|
Loading…
x
Reference in New Issue
Block a user