From 566765696b1e542381cb5a52a1af2e22d3c6b640 Mon Sep 17 00:00:00 2001 From: codex Date: Thu, 18 Jun 2026 22:46:02 -0300 Subject: [PATCH] recovery: recycle stale unknown controller pods --- .../orchestrator_unit_additional_test.go | 43 ++++++++++++++++++ .../orchestrator_workload_convergence.go | 45 +++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/internal/cluster/orchestrator_unit_additional_test.go b/internal/cluster/orchestrator_unit_additional_test.go index 2d17df7..697291a 100644 --- a/internal/cluster/orchestrator_unit_additional_test.go +++ b/internal/cluster/orchestrator_unit_additional_test.go @@ -158,6 +158,7 @@ func TestRecycleStuckControllerPodsHandlesLonghornAttachBlockedPods(t *testing.T {match: matchContains("kubectl", "get", "pods", "-A", "-o", "json"), out: pods}, {match: matchContains("kubectl", "-n", "longhorn-system", "get", "nodes.longhorn.io"), out: "titan-0b\tFalse\n"}, {match: matchContains("kubectl", "get", "events", "-A", "-o", "json"), out: events}, + {match: matchContains("kubectl", "get", "nodes", "-o", "json"), out: `{"items":[{"metadata":{"name":"titan-0b"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`}, { match: func(name string, args []string) bool { if !matchContains("kubectl", "-n", "monitoring", "delete", "pod", "victoria-metrics-single-server-0", "--wait=false")(name, args) { @@ -195,6 +196,7 @@ func TestRecycleStuckControllerPodsRepairsEncryptedVolumeCryptsetup(t *testing.T {match: matchContains("kubectl", "get", "pods", "-A", "-o", "json"), out: pods}, {match: matchContains("kubectl", "-n", "longhorn-system", "get", "nodes.longhorn.io"), out: "titan-19\tTrue\n"}, {match: matchContains("kubectl", "get", "events", "-A", "-o", "json"), out: events}, + {match: matchContains("kubectl", "get", "nodes", "-o", "json"), out: `{"items":[{"metadata":{"name":"titan-19"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`}, { match: func(name string, args []string) bool { if name != "ssh" || !strings.Contains(strings.Join(args, " "), "apt-get install -y --no-install-recommends cryptsetup-bin") { @@ -245,6 +247,7 @@ func TestRecycleStuckControllerPodsCordonsEncryptedVolumeNodeWhenRepairFails(t * {match: matchContains("kubectl", "get", "pods", "-A", "-o", "json"), out: pods}, {match: matchContains("kubectl", "-n", "longhorn-system", "get", "nodes.longhorn.io"), out: "titan-19\tTrue\n"}, {match: matchContains("kubectl", "get", "events", "-A", "-o", "json"), out: events}, + {match: matchContains("kubectl", "get", "nodes", "-o", "json"), out: `{"items":[{"metadata":{"name":"titan-19"},"status":{"conditions":[{"type":"Ready","status":"True"}]}}]}`}, { match: matchContains("ssh", "apt-get install -y --no-install-recommends cryptsetup-bin"), err: errors.New("sudo: a password is required"), @@ -280,6 +283,46 @@ func TestRecycleStuckControllerPodsCordonsEncryptedVolumeNodeWhenRepairFails(t * } } +// TestRecycleStuckControllerPodsHandlesUnknownPodsOnReadyNodes runs one orchestration or CLI step. +// Signature: TestRecycleStuckControllerPodsHandlesUnknownPodsOnReadyNodes(t *testing.T). +// Why: post-outage controller pods can remain Unknown after their node recovers; +// normal deletion clears stale status without force-deleting or touching storage. +func TestRecycleStuckControllerPodsHandlesUnknownPodsOnReadyNodes(t *testing.T) { + old := time.Now().Add(-10 * time.Minute).UTC().Format(time.RFC3339) + recent := time.Now().Add(-30 * time.Second).UTC().Format(time.RFC3339) + pods := `{"items":[` + + `{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-old","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}},` + + `{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-fresh","creationTimestamp":"` + recent + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}},` + + `{"metadata":{"namespace":"maintenance","name":"stale-on-bad-node","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"maintenance"}]},"spec":{"nodeName":"titan-22"},"status":{"phase":"Unknown"}},` + + `{"metadata":{"namespace":"default","name":"bare-pod","creationTimestamp":"` + old + `"},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}}]}` + + deleted := []string{} + orch := buildOrchestratorWithStubs(t, config.Config{ + Startup: config.Startup{StuckPodGraceSeconds: 180}, + }, []commandStub{ + {match: matchContains("kubectl", "get", "pods", "-A", "-o", "json"), out: pods}, + {match: matchContains("kubectl", "-n", "longhorn-system", "get", "nodes.longhorn.io"), out: "titan-12\tTrue\ntitan-22\tTrue\n"}, + {match: matchContains("kubectl", "get", "events", "-A", "-o", "json"), out: `{"items":[]}`}, + {match: matchContains("kubectl", "get", "nodes", "-o", "json"), out: `{"items":[{"metadata":{"name":"titan-12"},"status":{"conditions":[{"type":"Ready","status":"True"}]}},{"metadata":{"name":"titan-22"},"status":{"conditions":[{"type":"Ready","status":"False"}]}}]}`}, + { + match: func(name string, args []string) bool { + if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "longhorn-vault-sync-old", "--wait=false")(name, args) { + return false + } + deleted = append(deleted, "longhorn-vault-sync-old") + return true + }, + }, + }) + + if err := orch.recycleStuckControllerPods(context.Background()); err != nil { + t.Fatalf("recycleStuckControllerPods failed: %v", err) + } + if len(deleted) != 1 || deleted[0] != "longhorn-vault-sync-old" { + t.Fatalf("expected only old Unknown controller pod on Ready node to be recycled, got %#v", deleted) + } +} + // TestNewConstructsOrchestrator runs one orchestration or CLI step. // Signature: TestNewConstructsOrchestrator(t *testing.T). // Why: covers constructor path in orchestrator core module. diff --git a/internal/cluster/orchestrator_workload_convergence.go b/internal/cluster/orchestrator_workload_convergence.go index 2d14ac8..6b6b9a4 100644 --- a/internal/cluster/orchestrator_workload_convergence.go +++ b/internal/cluster/orchestrator_workload_convergence.go @@ -182,6 +182,12 @@ func (o *Orchestrator) recycleStuckControllerPods(ctx context.Context) error { } else { encryptedMountReasons = reasons } + unknownPhaseReasons := map[string]string{} + if reasons, scanErr := o.unknownControllerPodReasons(ctx, list, grace); scanErr != nil { + o.log.Printf("warning: unknown controller pod scan failed: %v", scanErr) + } else { + unknownPhaseReasons = reasons + } recycled := []string{} for _, pod := range list.Items { ns := strings.TrimSpace(pod.Metadata.Namespace) @@ -215,6 +221,9 @@ func (o *Orchestrator) recycleStuckControllerPods(ctx context.Context) error { if reason == "" { reason = encryptedMountReasons[ns+"/"+name] } + if reason == "" { + reason = unknownPhaseReasons[ns+"/"+name] + } if reason == "" { continue } @@ -233,6 +242,42 @@ func (o *Orchestrator) recycleStuckControllerPods(ctx context.Context) error { return nil } +// unknownControllerPodReasons runs one orchestration or CLI step. +// Signature: (o *Orchestrator) unknownControllerPodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error). +// Why: after node or kubelet recovery, controller-owned pods can stay in +// Unknown even though the node is Ready and a replacement may already be +// healthy. A normal pod delete lets Kubernetes clean the stale status without +// touching storage objects or forcing deletion on a partitioned node. +func (o *Orchestrator) unknownControllerPodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error) { + unavailable, err := o.unavailableNodeSet(ctx) + if err != nil { + return nil, err + } + reasons := map[string]string{} + for _, pod := range pods.Items { + ns := strings.TrimSpace(pod.Metadata.Namespace) + name := strings.TrimSpace(pod.Metadata.Name) + node := strings.TrimSpace(pod.Spec.NodeName) + if ns == "" || name == "" || node == "" { + continue + } + if !strings.EqualFold(strings.TrimSpace(pod.Status.Phase), "Unknown") { + continue + } + if _, badNode := unavailable[node]; badNode { + continue + } + if !podControllerOwned(pod) { + continue + } + if !pod.Metadata.CreationTimestamp.IsZero() && time.Since(pod.Metadata.CreationTimestamp) < grace { + continue + } + reasons[ns+"/"+name] = "UnknownControllerPodOnReadyNode:" + node + } + return reasons, nil +} + // repairEncryptedVolumeMountPrereqs runs one orchestration or CLI step. // Signature: (o *Orchestrator) repairEncryptedVolumeMountPrereqs(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error). // Why: encrypted Longhorn volume mounts depend on host cryptsetup. After node