ananke/internal/cluster/orchestrator_pod_recovery.go

package cluster

import (
	"context"
	"strings"
	"time"
)

// staleControllerPodReasons runs one orchestration or CLI step.
// Signature: (o *Orchestrator) staleControllerPodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error).
// Why: after node or kubelet recovery, controller-owned pods can stay in
// terminal or unknown status even though the node is Ready and a replacement may
// already be healthy. A normal pod delete lets Kubernetes clean the stale status
// without touching storage objects or forcing deletion on a partitioned node.
func (o *Orchestrator) staleControllerPodReasons(ctx context.Context, pods podList, grace time.Duration) (map[string]string, error) {
	unavailable, err := o.unavailableNodeSet(ctx)
	if err != nil {
		return nil, err
	}
	reasons := map[string]string{}
	for _, pod := range pods.Items {
		ns := strings.TrimSpace(pod.Metadata.Namespace)
		name := strings.TrimSpace(pod.Metadata.Name)
		node := strings.TrimSpace(pod.Spec.NodeName)
		if ns == "" || name == "" || node == "" {
			continue
		}
		phase := strings.TrimSpace(pod.Status.Phase)
		if !strings.EqualFold(phase, "Unknown") && !strings.EqualFold(phase, "Failed") {
			continue
		}
		if _, badNode := unavailable[node]; badNode {
			continue
		}
		if !podControllerOwned(pod) {
			continue
		}
		if !pod.Metadata.CreationTimestamp.IsZero() && time.Since(pod.Metadata.CreationTimestamp) < grace {
			continue
		}
		reasons[ns+"/"+name] = "StaleControllerPodOnReadyNode:" + node + ":" + phase
	}
	return reasons, nil
}

// staleControllerPodForceDeleteSafe runs one orchestration or CLI step.
// Signature: staleControllerPodForceDeleteSafe(pod podResource, grace time.Duration) bool.
// Why: a stale pod already marked for deletion may need force removal after a
// node outage. Keep that fallback away from PVC-bearing pods so Ananke never
// risks duplicating a storage writer.
func staleControllerPodForceDeleteSafe(pod podResource, grace time.Duration) bool {
	if pod.Metadata.DeletionTimestamp == nil {
		return false
	}
	if time.Since(*pod.Metadata.DeletionTimestamp) < grace {
		return false
	}
	if podUsesPersistentVolumeClaim(pod) {
		return false
	}
	return true
}

// podUsesPersistentVolumeClaim runs one orchestration or CLI step.
// Signature: podUsesPersistentVolumeClaim(pod podResource) bool.
// Why: force-delete recovery is deliberately disallowed for pods with PVCs; the
// scheduler and storage controller need to settle those normally.
func podUsesPersistentVolumeClaim(pod podResource) bool {
	for _, volume := range pod.Spec.Volumes {
		if volume.PersistentVolumeClaim != nil && strings.TrimSpace(volume.PersistentVolumeClaim.ClaimName) != "" {
			return true
		}
	}
	return false
}

// podControllerOwned runs one orchestration or CLI step.
// Signature: podControllerOwned(p podResource) bool.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func podControllerOwned(p podResource) bool {
	for _, owner := range p.Metadata.OwnerReferences {
		switch strings.TrimSpace(owner.Kind) {
		case "ReplicaSet", "StatefulSet", "DaemonSet", "Job":
			return true
		}
	}
	return false
}

// stuckContainerReason runs one orchestration or CLI step.
// Signature: stuckContainerReason(p podResource, reasons map[string]struct{}) string.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func stuckContainerReason(p podResource, reasons map[string]struct{}) string {
	check := func(statuses []podContainerStatus) string {
		for _, st := range statuses {
			if st.State.Waiting == nil {
				continue
			}
			reason := strings.TrimSpace(st.State.Waiting.Reason)
			if reason == "" {
				continue
			}
			if _, ok := reasons[reason]; ok {
				return reason
			}
		}
		return ""
	}
	if reason := check(p.Status.InitContainerStatuses); reason != "" {
		return reason
	}
	return check(p.Status.ContainerStatuses)
}

// stuckVaultInitReason runs one orchestration or CLI step.
// Signature: stuckVaultInitReason(p podResource, grace time.Duration) string.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func stuckVaultInitReason(p podResource, grace time.Duration) string {
	if !strings.EqualFold(strings.TrimSpace(p.Status.Phase), "Pending") {
		return ""
	}
	if !strings.EqualFold(strings.TrimSpace(p.Metadata.Annotations["vault.hashicorp.com/agent-inject"]), "true") {
		return ""
	}
	for _, st := range p.Status.InitContainerStatuses {
		if strings.TrimSpace(st.Name) != "vault-agent-init" || st.State.Running == nil {
			continue
		}
		startedAt := st.State.Running.StartedAt
		if startedAt.IsZero() {
			continue
		}
		if time.Since(startedAt) < grace {
			return ""
		}
		return "VaultInitStuck"
	}
	return ""
}