From c415516376f3ce5a19daa06a055885a70ae784d0 Mon Sep 17 00:00:00 2001 From: codex Date: Thu, 18 Jun 2026 23:05:02 -0300 Subject: [PATCH] recovery: force clear safe stale pods --- .../cluster/orchestrator_storage_types.go | 11 +++++ .../orchestrator_unit_additional_test.go | 19 ++++++-- .../orchestrator_workload_convergence.go | 44 ++++++++++++++++++- 3 files changed, 69 insertions(+), 5 deletions(-) diff --git a/internal/cluster/orchestrator_storage_types.go b/internal/cluster/orchestrator_storage_types.go index 4fca02b..a08b78d 100644 --- a/internal/cluster/orchestrator_storage_types.go +++ b/internal/cluster/orchestrator_storage_types.go @@ -246,6 +246,7 @@ type podResource struct { Name string `json:"name"` Annotations map[string]string `json:"annotations"` CreationTimestamp time.Time `json:"creationTimestamp"` + DeletionTimestamp *time.Time `json:"deletionTimestamp"` OwnerReferences []ownerReference `json:"ownerReferences"` } `json:"metadata"` Spec struct { @@ -285,6 +286,16 @@ type podContainerRunningState struct { type podSpec struct { NodeSelector map[string]string `json:"nodeSelector"` Affinity *podAffinity `json:"affinity"` + Volumes []podVolume `json:"volumes"` +} + +type podVolume struct { + Name string `json:"name"` + PersistentVolumeClaim *podPersistentVolumeClaim `json:"persistentVolumeClaim"` +} + +type podPersistentVolumeClaim struct { + ClaimName string `json:"claimName"` } type podAffinity struct { diff --git a/internal/cluster/orchestrator_unit_additional_test.go b/internal/cluster/orchestrator_unit_additional_test.go index 1a20865..62102d0 100644 --- a/internal/cluster/orchestrator_unit_additional_test.go +++ b/internal/cluster/orchestrator_unit_additional_test.go @@ -293,7 +293,8 @@ func TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T) { recent := time.Now().Add(-30 * time.Second).UTC().Format(time.RFC3339) pods := `{"items":[` + `{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-old","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}},` + - `{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-failed","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Failed"}},` + + `{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-failed","creationTimestamp":"` + old + `","deletionTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12","volumes":[{"name":"secret"}]},"status":{"phase":"Failed"}},` + + `{"metadata":{"namespace":"longhorn-system","name":"pvc-backed-failed","creationTimestamp":"` + old + `","deletionTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"pvc-backed"}]},"spec":{"nodeName":"titan-12","volumes":[{"name":"data","persistentVolumeClaim":{"claimName":"data"}}]},"status":{"phase":"Failed"}},` + `{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-fresh","creationTimestamp":"` + recent + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}},` + `{"metadata":{"namespace":"maintenance","name":"stale-on-bad-node","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"maintenance"}]},"spec":{"nodeName":"titan-22"},"status":{"phase":"Unknown"}},` + `{"metadata":{"namespace":"default","name":"bare-pod","creationTimestamp":"` + old + `"},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}}]}` @@ -317,19 +318,31 @@ func TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T) { }, { match: func(name string, args []string) bool { - if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "longhorn-vault-sync-failed", "--wait=false")(name, args) { + if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "longhorn-vault-sync-failed", "--wait=false", "--grace-period=0", "--force")(name, args) { return false } deleted = append(deleted, "longhorn-vault-sync-failed") return true }, }, + { + match: func(name string, args []string) bool { + if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "pvc-backed-failed", "--wait=false")(name, args) { + return false + } + if strings.Contains(strings.Join(args, " "), "--force") { + t.Fatalf("pvc-backed stale pod must not be force deleted") + } + deleted = append(deleted, "pvc-backed-failed") + return true + }, + }, }) if err := orch.recycleStuckControllerPods(context.Background()); err != nil { t.Fatalf("recycleStuckControllerPods failed: %v", err) } - if strings.Join(deleted, ",") != "longhorn-vault-sync-old,longhorn-vault-sync-failed" { + if strings.Join(deleted, ",") != "longhorn-vault-sync-old,longhorn-vault-sync-failed,pvc-backed-failed" { t.Fatalf("expected only stale controller pods on Ready node to be recycled, got %#v", deleted) } } diff --git a/internal/cluster/orchestrator_workload_convergence.go b/internal/cluster/orchestrator_workload_convergence.go index bef7faf..11738c5 100644 --- a/internal/cluster/orchestrator_workload_convergence.go +++ b/internal/cluster/orchestrator_workload_convergence.go @@ -227,8 +227,17 @@ func (o *Orchestrator) recycleStuckControllerPods(ctx context.Context) error { if reason == "" { continue } - o.log.Printf("warning: recycling stuck pod %s/%s reason=%s age=%s", ns, name, reason, age.Round(time.Second)) - if _, err := o.kubectl(ctx, 30*time.Second, "-n", ns, "delete", "pod", name, "--wait=false"); err != nil && !isNotFoundErr(err) { + deleteArgs := []string{"-n", ns, "delete", "pod", name, "--wait=false"} + forceDelete := staleControllerPodForceDeleteSafe(pod, grace) + if forceDelete { + deleteArgs = append(deleteArgs, "--grace-period=0", "--force") + } + if forceDelete { + o.log.Printf("warning: force recycling stuck pod %s/%s reason=%s age=%s", ns, name, reason, age.Round(time.Second)) + } else { + o.log.Printf("warning: recycling stuck pod %s/%s reason=%s age=%s", ns, name, reason, age.Round(time.Second)) + } + if _, err := o.kubectl(ctx, 30*time.Second, deleteArgs...); err != nil && !isNotFoundErr(err) { o.log.Printf("warning: recycle pod failed for %s/%s: %v", ns, name, err) continue } @@ -279,6 +288,37 @@ func (o *Orchestrator) staleControllerPodReasons(ctx context.Context, pods podLi return reasons, nil } +// staleControllerPodForceDeleteSafe runs one orchestration or CLI step. +// Signature: staleControllerPodForceDeleteSafe(pod podResource, grace time.Duration) bool. +// Why: a stale pod already marked for deletion may need force removal after a +// node outage. Keep that fallback away from PVC-bearing pods so Ananke never +// risks duplicating a storage writer. +func staleControllerPodForceDeleteSafe(pod podResource, grace time.Duration) bool { + if pod.Metadata.DeletionTimestamp == nil { + return false + } + if time.Since(*pod.Metadata.DeletionTimestamp) < grace { + return false + } + if podUsesPersistentVolumeClaim(pod) { + return false + } + return true +} + +// podUsesPersistentVolumeClaim runs one orchestration or CLI step. +// Signature: podUsesPersistentVolumeClaim(pod podResource) bool. +// Why: force-delete recovery is deliberately disallowed for pods with PVCs; the +// scheduler and storage controller need to settle those normally. +func podUsesPersistentVolumeClaim(pod podResource) bool { + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil && strings.TrimSpace(volume.PersistentVolumeClaim.ClaimName) != "" { + return true + } + } + return false +} + // repairEncryptedVolumeMountPrereqs runs one orchestration or CLI step. // Signature: (o *Orchestrator) repairEncryptedVolumeMountPrereqs(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error). // Why: encrypted Longhorn volume mounts depend on host cryptsetup. After node