recovery: force clear safe deleting pods
This commit is contained in:
parent
9031e09f4e
commit
707458cfc5
@ -286,14 +286,15 @@ func TestRecycleStuckControllerPodsCordonsEncryptedVolumeNodeWhenRepairFails(t *
|
||||
// TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes runs one orchestration or CLI step.
|
||||
// Signature: TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T).
|
||||
// Why: post-outage controller pods can remain Unknown or Failed after their
|
||||
// node recovers; normal deletion clears stale status without force-deleting or
|
||||
// touching storage.
|
||||
// node recovers; deletion clears stale status while force deletion stays away
|
||||
// from PVC-backed storage.
|
||||
func TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T) {
|
||||
old := time.Now().Add(-10 * time.Minute).UTC().Format(time.RFC3339)
|
||||
recent := time.Now().Add(-30 * time.Second).UTC().Format(time.RFC3339)
|
||||
pods := `{"items":[` +
|
||||
`{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-old","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}},` +
|
||||
`{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-failed","creationTimestamp":"` + old + `","deletionTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12","volumes":[{"name":"secret"}]},"status":{"phase":"Failed"}},` +
|
||||
`{"metadata":{"namespace":"logging","name":"oauth2-proxy-terminating","creationTimestamp":"` + old + `","deletionTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"oauth2-proxy-logs"}]},"spec":{"nodeName":"titan-18","volumes":[{"name":"secret"}]},"status":{"phase":"Running"}},` +
|
||||
`{"metadata":{"namespace":"longhorn-system","name":"pvc-backed-failed","creationTimestamp":"` + old + `","deletionTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"pvc-backed"}]},"spec":{"nodeName":"titan-12","volumes":[{"name":"data","persistentVolumeClaim":{"claimName":"data"}}]},"status":{"phase":"Failed"}},` +
|
||||
`{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-fresh","creationTimestamp":"` + recent + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}},` +
|
||||
`{"metadata":{"namespace":"maintenance","name":"stale-on-bad-node","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"maintenance"}]},"spec":{"nodeName":"titan-22"},"status":{"phase":"Unknown"}},` +
|
||||
@ -325,6 +326,15 @@ func TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T) {
|
||||
return true
|
||||
},
|
||||
},
|
||||
{
|
||||
match: func(name string, args []string) bool {
|
||||
if !matchContains("kubectl", "-n", "logging", "delete", "pod", "oauth2-proxy-terminating", "--wait=false", "--grace-period=0", "--force")(name, args) {
|
||||
return false
|
||||
}
|
||||
deleted = append(deleted, "oauth2-proxy-terminating")
|
||||
return true
|
||||
},
|
||||
},
|
||||
{
|
||||
match: func(name string, args []string) bool {
|
||||
if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "pvc-backed-failed", "--wait=false")(name, args) {
|
||||
@ -342,7 +352,7 @@ func TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T) {
|
||||
if err := orch.recycleStuckControllerPods(context.Background()); err != nil {
|
||||
t.Fatalf("recycleStuckControllerPods failed: %v", err)
|
||||
}
|
||||
if strings.Join(deleted, ",") != "longhorn-vault-sync-old,longhorn-vault-sync-failed,pvc-backed-failed" {
|
||||
if strings.Join(deleted, ",") != "longhorn-vault-sync-old,longhorn-vault-sync-failed,oauth2-proxy-terminating,pvc-backed-failed" {
|
||||
t.Fatalf("expected only stale controller pods on Ready node to be recycled, got %#v", deleted)
|
||||
}
|
||||
}
|
||||
|
||||
@ -224,6 +224,9 @@ func (o *Orchestrator) recycleStuckControllerPods(ctx context.Context) error {
|
||||
if reason == "" {
|
||||
reason = stalePhaseReasons[ns+"/"+name]
|
||||
}
|
||||
if reason == "" && staleControllerPodForceDeleteSafe(pod, grace) {
|
||||
reason = "StaleDeletingControllerPod"
|
||||
}
|
||||
if reason == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user