ananke/internal/cluster/orchestrator_pod_recovery.go
2026-06-19 15:43:44 -03:00

140 lines
4.7 KiB
Go

package cluster
import (
"context"
"strings"
"time"
)
// staleControllerPodReasons runs one orchestration or CLI step.
// Signature: (o *Orchestrator) staleControllerPodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error).
// Why: after node or kubelet recovery, controller-owned pods can stay in
// terminal or unknown status even though the node is Ready and a replacement may
// already be healthy. A normal pod delete lets Kubernetes clean the stale status
// without touching storage objects or forcing deletion on a partitioned node.
func (o *Orchestrator) staleControllerPodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error) {
unavailable, err := o.unavailableNodeSet(ctx)
if err != nil {
return nil, err
}
reasons := map[string]string{}
for _, pod := range pods.Items {
ns := strings.TrimSpace(pod.Metadata.Namespace)
name := strings.TrimSpace(pod.Metadata.Name)
node := strings.TrimSpace(pod.Spec.NodeName)
if ns == "" || name == "" || node == "" {
continue
}
phase := strings.TrimSpace(pod.Status.Phase)
if !strings.EqualFold(phase, "Unknown") && !strings.EqualFold(phase, "Failed") {
continue
}
if _, badNode := unavailable[node]; badNode {
continue
}
if !podControllerOwned(pod) {
continue
}
if !pod.Metadata.CreationTimestamp.IsZero() && time.Since(pod.Metadata.CreationTimestamp) < grace {
continue
}
reasons[ns+"/"+name] = "StaleControllerPodOnReadyNode:" + node + ":" + phase
}
return reasons, nil
}
// staleControllerPodForceDeleteSafe runs one orchestration or CLI step.
// Signature: staleControllerPodForceDeleteSafe(pod podResource, grace time.Duration) bool.
// Why: a stale pod already marked for deletion may need force removal after a
// node outage. Keep that fallback away from PVC-bearing pods so Ananke never
// risks duplicating a storage writer.
func staleControllerPodForceDeleteSafe(pod podResource, grace time.Duration) bool {
if pod.Metadata.DeletionTimestamp == nil {
return false
}
if time.Since(*pod.Metadata.DeletionTimestamp) < grace {
return false
}
if podUsesPersistentVolumeClaim(pod) {
return false
}
return true
}
// podUsesPersistentVolumeClaim runs one orchestration or CLI step.
// Signature: podUsesPersistentVolumeClaim(pod podResource) bool.
// Why: force-delete recovery is deliberately disallowed for pods with PVCs; the
// scheduler and storage controller need to settle those normally.
func podUsesPersistentVolumeClaim(pod podResource) bool {
for _, volume := range pod.Spec.Volumes {
if volume.PersistentVolumeClaim != nil && strings.TrimSpace(volume.PersistentVolumeClaim.ClaimName) != "" {
return true
}
}
return false
}
// podControllerOwned runs one orchestration or CLI step.
// Signature: podControllerOwned(p podResource) bool.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func podControllerOwned(p podResource) bool {
for _, owner := range p.Metadata.OwnerReferences {
switch strings.TrimSpace(owner.Kind) {
case "ReplicaSet", "StatefulSet", "DaemonSet", "Job":
return true
}
}
return false
}
// stuckContainerReason runs one orchestration or CLI step.
// Signature: stuckContainerReason(p podResource, reasons map[string]struct{}) string.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func stuckContainerReason(p podResource, reasons map[string]struct{}) string {
check := func(statuses []podContainerStatus) string {
for _, st := range statuses {
if st.State.Waiting == nil {
continue
}
reason := strings.TrimSpace(st.State.Waiting.Reason)
if reason == "" {
continue
}
if _, ok := reasons[reason]; ok {
return reason
}
}
return ""
}
if reason := check(p.Status.InitContainerStatuses); reason != "" {
return reason
}
return check(p.Status.ContainerStatuses)
}
// stuckVaultInitReason runs one orchestration or CLI step.
// Signature: stuckVaultInitReason(p podResource, grace time.Duration) string.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func stuckVaultInitReason(p podResource, grace time.Duration) string {
if !strings.EqualFold(strings.TrimSpace(p.Status.Phase), "Pending") {
return ""
}
if !strings.EqualFold(strings.TrimSpace(p.Metadata.Annotations["vault.hashicorp.com/agent-inject"]), "true") {
return ""
}
for _, st := range p.Status.InitContainerStatuses {
if strings.TrimSpace(st.Name) != "vault-agent-init" || st.State.Running == nil {
continue
}
startedAt := st.State.Running.StartedAt
if startedAt.IsZero() {
continue
}
if time.Since(startedAt) < grace {
return ""
}
return "VaultInitStuck"
}
return ""
}