package cluster import ( "context" "strings" "time" ) // staleControllerPodReasons runs one orchestration or CLI step. // Signature: (o *Orchestrator) staleControllerPodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error). // Why: after node or kubelet recovery, controller-owned pods can stay in // terminal or unknown status even though the node is Ready and a replacement may // already be healthy. A normal pod delete lets Kubernetes clean the stale status // without touching storage objects or forcing deletion on a partitioned node. func (o *Orchestrator) staleControllerPodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error) { unavailable, err := o.unavailableNodeSet(ctx) if err != nil { return nil, err } reasons := map[string]string{} for _, pod := range pods.Items { ns := strings.TrimSpace(pod.Metadata.Namespace) name := strings.TrimSpace(pod.Metadata.Name) node := strings.TrimSpace(pod.Spec.NodeName) if ns == "" || name == "" || node == "" { continue } phase := strings.TrimSpace(pod.Status.Phase) if !strings.EqualFold(phase, "Unknown") && !strings.EqualFold(phase, "Failed") { continue } if _, badNode := unavailable[node]; badNode { continue } if !podControllerOwned(pod) { continue } if !pod.Metadata.CreationTimestamp.IsZero() && time.Since(pod.Metadata.CreationTimestamp) < grace { continue } reasons[ns+"/"+name] = "StaleControllerPodOnReadyNode:" + node + ":" + phase } return reasons, nil } // staleControllerPodForceDeleteSafe runs one orchestration or CLI step. // Signature: staleControllerPodForceDeleteSafe(pod podResource, grace time.Duration) bool. // Why: a stale pod already marked for deletion may need force removal after a // node outage. Keep that fallback away from PVC-bearing pods so Ananke never // risks duplicating a storage writer. func staleControllerPodForceDeleteSafe(pod podResource, grace time.Duration) bool { if pod.Metadata.DeletionTimestamp == nil { return false } if time.Since(*pod.Metadata.DeletionTimestamp) < grace { return false } if podUsesPersistentVolumeClaim(pod) { return false } return true } // podUsesPersistentVolumeClaim runs one orchestration or CLI step. // Signature: podUsesPersistentVolumeClaim(pod podResource) bool. // Why: force-delete recovery is deliberately disallowed for pods with PVCs; the // scheduler and storage controller need to settle those normally. func podUsesPersistentVolumeClaim(pod podResource) bool { for _, volume := range pod.Spec.Volumes { if volume.PersistentVolumeClaim != nil && strings.TrimSpace(volume.PersistentVolumeClaim.ClaimName) != "" { return true } } return false } // podControllerOwned runs one orchestration or CLI step. // Signature: podControllerOwned(p podResource) bool. // Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve. func podControllerOwned(p podResource) bool { for _, owner := range p.Metadata.OwnerReferences { switch strings.TrimSpace(owner.Kind) { case "ReplicaSet", "StatefulSet", "DaemonSet", "Job": return true } } return false } // stuckContainerReason runs one orchestration or CLI step. // Signature: stuckContainerReason(p podResource, reasons map[string]struct{}) string. // Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve. func stuckContainerReason(p podResource, reasons map[string]struct{}) string { check := func(statuses []podContainerStatus) string { for _, st := range statuses { if st.State.Waiting == nil { continue } reason := strings.TrimSpace(st.State.Waiting.Reason) if reason == "" { continue } if _, ok := reasons[reason]; ok { return reason } } return "" } if reason := check(p.Status.InitContainerStatuses); reason != "" { return reason } return check(p.Status.ContainerStatuses) } // stuckVaultInitReason runs one orchestration or CLI step. // Signature: stuckVaultInitReason(p podResource, grace time.Duration) string. // Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve. func stuckVaultInitReason(p podResource, grace time.Duration) string { if !strings.EqualFold(strings.TrimSpace(p.Status.Phase), "Pending") { return "" } if !strings.EqualFold(strings.TrimSpace(p.Metadata.Annotations["vault.hashicorp.com/agent-inject"]), "true") { return "" } for _, st := range p.Status.InitContainerStatuses { if strings.TrimSpace(st.Name) != "vault-agent-init" || st.State.Running == nil { continue } startedAt := st.State.Running.StartedAt if startedAt.IsZero() { continue } if time.Since(startedAt) < grace { return "" } return "VaultInitStuck" } return "" }