recovery: force clear safe stale pods

This commit is contained in:
codex 2026-06-18 23:05:02 -03:00
parent 83723d0358
commit c415516376
3 changed files with 69 additions and 5 deletions

View File

@ -246,6 +246,7 @@ type podResource struct {
Name string `json:"name"`
Annotations map[string]string `json:"annotations"`
CreationTimestamp time.Time `json:"creationTimestamp"`
DeletionTimestamp *time.Time `json:"deletionTimestamp"`
OwnerReferences []ownerReference `json:"ownerReferences"`
} `json:"metadata"`
Spec struct {
@ -285,6 +286,16 @@ type podContainerRunningState struct {
type podSpec struct {
NodeSelector map[string]string `json:"nodeSelector"`
Affinity *podAffinity `json:"affinity"`
Volumes []podVolume `json:"volumes"`
}
type podVolume struct {
Name string `json:"name"`
PersistentVolumeClaim *podPersistentVolumeClaim `json:"persistentVolumeClaim"`
}
type podPersistentVolumeClaim struct {
ClaimName string `json:"claimName"`
}
type podAffinity struct {

View File

@ -293,7 +293,8 @@ func TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T) {
recent := time.Now().Add(-30 * time.Second).UTC().Format(time.RFC3339)
pods := `{"items":[` +
`{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-old","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}},` +
`{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-failed","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Failed"}},` +
`{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-failed","creationTimestamp":"` + old + `","deletionTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12","volumes":[{"name":"secret"}]},"status":{"phase":"Failed"}},` +
`{"metadata":{"namespace":"longhorn-system","name":"pvc-backed-failed","creationTimestamp":"` + old + `","deletionTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"pvc-backed"}]},"spec":{"nodeName":"titan-12","volumes":[{"name":"data","persistentVolumeClaim":{"claimName":"data"}}]},"status":{"phase":"Failed"}},` +
`{"metadata":{"namespace":"longhorn-system","name":"longhorn-vault-sync-fresh","creationTimestamp":"` + recent + `","ownerReferences":[{"kind":"ReplicaSet","name":"longhorn-vault-sync"}]},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}},` +
`{"metadata":{"namespace":"maintenance","name":"stale-on-bad-node","creationTimestamp":"` + old + `","ownerReferences":[{"kind":"ReplicaSet","name":"maintenance"}]},"spec":{"nodeName":"titan-22"},"status":{"phase":"Unknown"}},` +
`{"metadata":{"namespace":"default","name":"bare-pod","creationTimestamp":"` + old + `"},"spec":{"nodeName":"titan-12"},"status":{"phase":"Unknown"}}]}`
@ -317,19 +318,31 @@ func TestRecycleStuckControllerPodsHandlesStalePodsOnReadyNodes(t *testing.T) {
},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "longhorn-vault-sync-failed", "--wait=false")(name, args) {
if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "longhorn-vault-sync-failed", "--wait=false", "--grace-period=0", "--force")(name, args) {
return false
}
deleted = append(deleted, "longhorn-vault-sync-failed")
return true
},
},
{
match: func(name string, args []string) bool {
if !matchContains("kubectl", "-n", "longhorn-system", "delete", "pod", "pvc-backed-failed", "--wait=false")(name, args) {
return false
}
if strings.Contains(strings.Join(args, " "), "--force") {
t.Fatalf("pvc-backed stale pod must not be force deleted")
}
deleted = append(deleted, "pvc-backed-failed")
return true
},
},
})
if err := orch.recycleStuckControllerPods(context.Background()); err != nil {
t.Fatalf("recycleStuckControllerPods failed: %v", err)
}
if strings.Join(deleted, ",") != "longhorn-vault-sync-old,longhorn-vault-sync-failed" {
if strings.Join(deleted, ",") != "longhorn-vault-sync-old,longhorn-vault-sync-failed,pvc-backed-failed" {
t.Fatalf("expected only stale controller pods on Ready node to be recycled, got %#v", deleted)
}
}

View File

@ -227,8 +227,17 @@ func (o *Orchestrator) recycleStuckControllerPods(ctx context.Context) error {
if reason == "" {
continue
}
deleteArgs := []string{"-n", ns, "delete", "pod", name, "--wait=false"}
forceDelete := staleControllerPodForceDeleteSafe(pod, grace)
if forceDelete {
deleteArgs = append(deleteArgs, "--grace-period=0", "--force")
}
if forceDelete {
o.log.Printf("warning: force recycling stuck pod %s/%s reason=%s age=%s", ns, name, reason, age.Round(time.Second))
} else {
o.log.Printf("warning: recycling stuck pod %s/%s reason=%s age=%s", ns, name, reason, age.Round(time.Second))
if _, err := o.kubectl(ctx, 30*time.Second, "-n", ns, "delete", "pod", name, "--wait=false"); err != nil && !isNotFoundErr(err) {
}
if _, err := o.kubectl(ctx, 30*time.Second, deleteArgs...); err != nil && !isNotFoundErr(err) {
o.log.Printf("warning: recycle pod failed for %s/%s: %v", ns, name, err)
continue
}
@ -279,6 +288,37 @@ func (o *Orchestrator) staleControllerPodReasons(ctx context.Context, pods podLi
return reasons, nil
}
// staleControllerPodForceDeleteSafe runs one orchestration or CLI step.
// Signature: staleControllerPodForceDeleteSafe(pod podResource, grace time.Duration) bool.
// Why: a stale pod already marked for deletion may need force removal after a
// node outage. Keep that fallback away from PVC-bearing pods so Ananke never
// risks duplicating a storage writer.
func staleControllerPodForceDeleteSafe(pod podResource, grace time.Duration) bool {
if pod.Metadata.DeletionTimestamp == nil {
return false
}
if time.Since(*pod.Metadata.DeletionTimestamp) < grace {
return false
}
if podUsesPersistentVolumeClaim(pod) {
return false
}
return true
}
// podUsesPersistentVolumeClaim runs one orchestration or CLI step.
// Signature: podUsesPersistentVolumeClaim(pod podResource) bool.
// Why: force-delete recovery is deliberately disallowed for pods with PVCs; the
// scheduler and storage controller need to settle those normally.
func podUsesPersistentVolumeClaim(pod podResource) bool {
for _, volume := range pod.Spec.Volumes {
if volume.PersistentVolumeClaim != nil && strings.TrimSpace(volume.PersistentVolumeClaim.ClaimName) != "" {
return true
}
}
return false
}
// repairEncryptedVolumeMountPrereqs runs one orchestration or CLI step.
// Signature: (o *Orchestrator) repairEncryptedVolumeMountPrereqs(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error).
// Why: encrypted Longhorn volume mounts depend on host cryptsetup. After node