ananke/testing/orchestrator/hooks_lowfile_coverage_test.go

539 lines
30 KiB
Go
Raw Normal View History

package orchestrator
import (
"context"
"errors"
"io"
"log"
"os"
"path/filepath"
"strings"
"testing"
"time"
"scm.bstein.dev/bstein/ananke/internal/cluster"
"scm.bstein.dev/bstein/ananke/internal/execx"
"scm.bstein.dev/bstein/ananke/internal/state"
)
// TestHookLowFileCoverageBoost runs one orchestration or CLI step.
// Signature: TestHookLowFileCoverageBoost(t *testing.T).
// Why: raises the low-coverage orchestrator files through deterministic top-level
// tests that only use the exported hook surface.
func TestHookLowFileCoverageBoost(t *testing.T) {
t.Run("workload-convergence-helpers-and-gates", func(t *testing.T) {
desiredCases := []struct {
kind string
has bool
rep int32
ready int32
sched int32
num int32
wantD int32
wantR int32
wantB bool
}{
{kind: "Deployment", has: false, ready: 1, wantD: 1, wantR: 1, wantB: true},
{kind: "deployment", has: true, rep: 3, ready: 2, wantD: 3, wantR: 2, wantB: true},
{kind: "daemonset", sched: 4, num: 3, wantD: 4, wantR: 3, wantB: true},
{kind: "job", wantD: 0, wantR: 0, wantB: false},
}
for _, tc := range desiredCases {
gotD, gotR, gotB := cluster.TestHookDesiredReady(tc.kind, tc.has, tc.rep, tc.ready, tc.sched, tc.num)
if gotD != tc.wantD || gotR != tc.wantR || gotB != tc.wantB {
t.Fatalf("desiredReady(%q)=%d,%d,%v want %d,%d,%v", tc.kind, gotD, gotR, gotB, tc.wantD, tc.wantR, tc.wantB)
}
}
if !cluster.TestHookPodControllerOwned([]string{"ReplicaSet"}) {
t.Fatalf("expected ReplicaSet owner to be controller-owned")
}
if !cluster.TestHookPodControllerOwned([]string{"StatefulSet"}) {
t.Fatalf("expected StatefulSet owner to be controller-owned")
}
if !cluster.TestHookPodControllerOwned([]string{"DaemonSet"}) {
t.Fatalf("expected DaemonSet owner to be controller-owned")
}
if cluster.TestHookPodControllerOwned([]string{"Job"}) {
t.Fatalf("expected Job owner to be non controller-owned")
}
if got := cluster.TestHookStuckContainerReason([]string{"ImagePullBackOff"}, nil, []string{"ImagePullBackOff"}); got != "ImagePullBackOff" {
t.Fatalf("expected init-container stuck reason, got %q", got)
}
if got := cluster.TestHookStuckContainerReason(nil, []string{"CrashLoopBackOff"}, []string{"CrashLoopBackOff"}); got != "CrashLoopBackOff" {
t.Fatalf("expected container stuck reason, got %q", got)
}
if got := cluster.TestHookStuckContainerReason([]string{"ImagePullBackOff"}, []string{"CrashLoopBackOff"}, []string{"Missing"}); got != "" {
t.Fatalf("expected filtered stuck reason to be empty, got %q", got)
}
vaultCases := []struct {
name string
phase string
inject bool
startedAgo time.Duration
grace time.Duration
want string
}{
{name: "phase-running", phase: "Running", inject: true, startedAgo: 10 * time.Minute, grace: time.Minute, want: ""},
{name: "inject-false", phase: "Pending", inject: false, startedAgo: 10 * time.Minute, grace: time.Minute, want: ""},
{name: "within-grace", phase: "Pending", inject: true, startedAgo: 30 * time.Second, grace: time.Minute, want: ""},
{name: "stuck", phase: "Pending", inject: true, startedAgo: 10 * time.Minute, grace: time.Minute, want: "VaultInitStuck"},
}
for _, tc := range vaultCases {
got := cluster.TestHookStuckVaultInitReason(tc.phase, tc.inject, tc.startedAgo, tc.grace)
if got != tc.want {
t.Fatalf("%s: stuckVaultInitReason=%q want %q", tc.name, got, tc.want)
}
}
cfg := lifecycleConfig(t)
cfg.Startup.WorkloadConvergenceWaitSeconds = 1
cfg.Startup.WorkloadConvergencePollSeconds = 1
cfg.Startup.StuckPodGraceSeconds = 1
cfg.Startup.IgnoreWorkloadNamespaces = []string{"ignored-ns"}
cfg.Startup.IgnoreUnavailableNodes = []string{"titan-22"}
cfg.Startup.IgnoreWorkloads = []string{"monitoring/deployment/ignore-me"}
cfg.Startup.IgnoreFluxKustomizations = []string{"ignored/flux-system"}
readyRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"):
return `{"items":[
{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1,"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"titan-23"}}}},"status":{"readyReplicas":1}},
{"kind":"DaemonSet","metadata":{"namespace":"monitoring","name":"node-exporter"},"spec":{"template":{"spec":{"nodeName":"titan-23"}}},"status":{"desiredNumberScheduled":2,"numberReady":1}},
{"kind":"Deployment","metadata":{"namespace":"ignored-ns","name":"skip"},"spec":{"replicas":1},"status":{"readyReplicas":0}},
{"kind":"Deployment","metadata":{"namespace":"flux-system","name":"ignored"},"spec":{"replicas":1},"status":{"readyReplicas":0}},
{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"ignore-me"},"spec":{"replicas":1},"status":{"readyReplicas":0}}
]}`, nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchReady, _ := newHookOrchestrator(t, cfg, readyRun, readyRun)
ready, detail, err := orchReady.TestHookWorkloadConvergenceReady(context.Background())
if err != nil || !ready || !strings.Contains(detail, "controllers ready=") {
t.Fatalf("expected workload convergence ready path, ready=%v detail=%q err=%v", ready, detail, err)
}
pendingRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"):
return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1},"status":{"readyReplicas":0}}]}`, nil
case name == "kubectl" && strings.Contains(command, "get pods -A -o json"):
return `{"items":[]}`, nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchPending, _ := newHookOrchestrator(t, cfg, pendingRun, pendingRun)
if err := orchPending.TestHookWaitForWorkloadConvergence(context.Background()); err == nil || !strings.Contains(err.Error(), "workload convergence not satisfied") {
t.Fatalf("expected workload convergence timeout, got %v", err)
}
podRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get pods -A -o json"):
return `{"items":[
{"metadata":{"namespace":"","name":"missing-ns"}},
{"metadata":{"namespace":"kube-system","name":"ignored","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"ReplicaSet","name":"ignored"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}},
{"metadata":{"namespace":"monitoring","name":"ignore-me","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"ReplicaSet","name":"ignore-me"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}},
{"metadata":{"namespace":"monitoring","name":"node-ignored","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"ReplicaSet","name":"node-ignored"}]},"spec":{"nodeName":"titan-22","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}},
{"metadata":{"namespace":"monitoring","name":"unowned","creationTimestamp":"2020-01-01T00:00:00Z"},"spec":{"nodeName":"titan-23","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}},
{"metadata":{"namespace":"monitoring","name":"recent","creationTimestamp":"` + time.Now().UTC().Format(time.RFC3339) + `","ownerReferences":[{"kind":"ReplicaSet","name":"recent"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}},
{"metadata":{"namespace":"monitoring","name":"grafana-0","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"ReplicaSet","name":"grafana"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"c"}]},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}},
{"metadata":{"namespace":"vault","name":"vault-0","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"StatefulSet","name":"vault"}],"annotations":{"vault.hashicorp.com/agent-inject":"true"}},"spec":{"nodeName":"titan-23","containers":[{"name":"vault"}]},"status":{"phase":"Pending","initContainerStatuses":[{"name":"vault-agent-init","state":{"running":{"startedAt":"2020-01-01T00:00:00Z"}}}]}}
]}`, nil
case name == "kubectl" && strings.Contains(command, "delete pod grafana-0"):
return "", errors.New("boom")
case name == "kubectl" && strings.Contains(command, "delete pod vault-0"):
return "", nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchPods, _ := newHookOrchestrator(t, cfg, podRun, podRun)
if err := orchPods.TestHookRecycleStuckControllerPods(context.Background()); err != nil {
t.Fatalf("expected recycleStuckControllerPods best-effort success, got %v", err)
}
})
t.Run("scaling-helpers-and-recovery", func(t *testing.T) {
cfg := lifecycleConfig(t)
cfg.ExcludedNamespaces = []string{"flux-system", "vault"}
explicit := []string{"worker-a", "worker-b"}
cfg.Workers = append([]string{}, explicit...)
orchWorkers, _ := newHookOrchestrator(t, cfg, nil, nil)
gotWorkers, err := orchWorkers.TestHookEffectiveWorkers(context.Background())
if err != nil || len(gotWorkers) != len(explicit) || gotWorkers[0] != explicit[0] || gotWorkers[1] != explicit[1] {
t.Fatalf("expected explicit workers copy, got %v err=%v", gotWorkers, err)
}
gotWorkers[0] = "mutated"
if cfg.Workers[0] != explicit[0] {
t.Fatalf("expected effectiveWorkers to return a copy")
}
cfg.Workers = nil
cfg.SSHManagedNodes = nil
cfg.SSHNodeHosts = map[string]string{
"worker-c": "worker-c",
"worker-b": "worker-b",
"worker-a": "worker-a",
"titan-db": "titan-db",
}
discoverErrRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "kubectl" && strings.Contains(command, "get nodes -o custom-columns=") {
return "", errors.New("nodes unavailable")
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
orchFallback, _ := newHookOrchestrator(t, cfg, discoverErrRun, discoverErrRun)
fallbackWorkers, err := orchFallback.TestHookEffectiveWorkers(context.Background())
if err != nil || strings.Join(fallbackWorkers, ",") != "worker-a,worker-b,worker-c" {
t.Fatalf("expected fallback workers, got %v err=%v", fallbackWorkers, err)
}
discoverRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get nodes -o custom-columns="):
return "titan-db <none> <none>\nworker-b <none> <none>\nworker-c control-plane <none>\nbadline\n", nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchDiscover, _ := newHookOrchestrator(t, lifecycleConfig(t), discoverRun, discoverRun)
discovered, err := orchDiscover.TestHookDiscoverWorkers(context.Background())
if err != nil || strings.Join(discovered, ",") != "titan-db,worker-b" {
t.Fatalf("expected discovered workers, got %v err=%v", discovered, err)
}
cfgPatch := lifecycleConfig(t)
patchRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "-n flux-system get kustomizations.kustomize.toolkit.fluxcd.io -o jsonpath="):
return "services\nignored\n", nil
case name == "kubectl" && strings.Contains(command, "get helmreleases.helm.toolkit.fluxcd.io -A -o jsonpath="):
return "monitoring/grafana\nmonitoring/failing\n", nil
case name == "kubectl" && strings.Contains(command, "-n flux-system patch kustomization services"):
return "", errors.New("patch failed")
case name == "kubectl" && strings.Contains(command, "-n flux-system patch kustomization ignored"):
return "", nil
case name == "kubectl" && strings.Contains(command, "-n monitoring patch helmrelease grafana"):
return "", nil
case name == "kubectl" && strings.Contains(command, "-n monitoring patch helmrelease failing"):
return "", errors.New("patch failed")
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchPatch, _ := newHookOrchestrator(t, cfgPatch, patchRun, patchRun)
if err := orchPatch.TestHookPatchFluxSuspendAll(context.Background(), true); err != nil {
t.Fatalf("expected patchFluxSuspendAll best-effort success, got %v", err)
}
listRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
return "monitoring\tgrafana\t2\nflux-system\tsource-controller\t1\nmonitoring\tbad\tbogus\nmonitoring\tempty\t0\n", nil
case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
return "monitoring\tvictoria-metrics-single-server\t3\nvault\tvault\t1\n", nil
case name == "kubectl" && strings.Contains(command, "scale deployment grafana --replicas=0"):
return "", errors.New("scale failed")
case name == "kubectl" && strings.Contains(command, "scale deployment grafana --replicas=1"):
return "", nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchList, _ := newHookOrchestrator(t, cfgPatch, listRun, listRun)
entries, err := orchList.TestHookListScalableWorkloads(context.Background())
if err != nil || len(entries) != 4 {
t.Fatalf("expected four scalable workloads, got %v err=%v", entries, err)
}
if err := orchList.TestHookScaleWorkloads(context.Background(), entries[:1], 0, 0); err != nil {
t.Fatalf("expected single-entry scaleWorkloads success, got %v", err)
}
if err := orchList.TestHookScaleWorkloads(context.Background(), entries[1:], 0, 0); err == nil || !strings.Contains(err.Error(), "scaling had") {
t.Fatalf("expected scaleWorkloads error aggregation, got %v", err)
}
if err := orchList.TestHookScaleWorkloads(context.Background(), nil, 0, 1); err != nil {
t.Fatalf("expected empty scaleWorkloads success, got %v", err)
}
orchSnapshotWrite, _ := newHookOrchestrator(t, lifecycleConfig(t), nil, nil)
if err := orchSnapshotWrite.TestHookWriteScaledWorkloadSnapshot(nil); err != nil {
t.Fatalf("expected snapshot write with empty entries, got %v", err)
}
orchSnapshotRead, _ := newHookOrchestrator(t, lifecycleConfig(t), nil, nil)
if snapshot, err := orchSnapshotRead.TestHookReadScaledWorkloadSnapshot(); err != nil || snapshot != nil {
t.Fatalf("expected missing snapshot to read as nil,nil, got snapshot=%v err=%v", snapshot, err)
}
manualCfg := lifecycleConfig(t)
manualCfg.State.Dir = filepath.Join(t.TempDir(), "state")
manualOrch := cluster.New(manualCfg, &execx.Runner{DryRun: true}, state.New(manualCfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
if err := manualOrch.TestHookWriteScaledWorkloadSnapshot(nil); err != nil {
t.Fatalf("expected dry-run snapshot write success, got %v", err)
}
if snapshot, err := manualOrch.TestHookReadScaledWorkloadSnapshot(); err != nil || snapshot != nil {
t.Fatalf("expected dry-run snapshot read to return nil,nil, got snapshot=%v err=%v", snapshot, err)
}
restorePath := filepath.Join(t.TempDir(), "state", "scaled-workloads.json")
if err := os.MkdirAll(filepath.Dir(restorePath), 0o755); err != nil {
t.Fatalf("mkdir restore path: %v", err)
}
valid := `{"generated_at":"2026-01-01T00:00:00Z","entries":[{"namespace":"monitoring","kind":"deployment","name":"grafana","replicas":1}]}`
if err := os.WriteFile(restorePath, []byte(valid), 0o600); err != nil {
t.Fatalf("write restore snapshot: %v", err)
}
restoreCfg := lifecycleConfig(t)
restoreCfg.State.Dir = filepath.Dir(restorePath)
restoreOrch, _ := newHookOrchestrator(t, restoreCfg, listRun, listRun)
if err := restoreOrch.TestHookRestoreScaledApps(context.Background()); err != nil {
t.Fatalf("expected restoreScaledApps success, got %v", err)
}
if _, err := os.Stat(restorePath); !os.IsNotExist(err) {
t.Fatalf("expected restore snapshot to be removed, stat err=%v", err)
}
pendingCfg := lifecycleConfig(t)
pendingCfg.Startup.WorkloadConvergenceWaitSeconds = 1
pendingCfg.Startup.WorkloadConvergencePollSeconds = 1
pendingRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"):
return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1},"status":{"readyReplicas":0}}]}`, nil
case name == "kubectl" && strings.Contains(command, "get pods -A -o json"):
return `{"items":[]}`, nil
case name == "kubectl" && strings.Contains(command, "scale deployment grafana --replicas=0"):
return "", errors.New("scale failed")
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchPendingScale, _ := newHookOrchestrator(t, pendingCfg, pendingRun, pendingRun)
if err := orchPendingScale.TestHookScaleDownApps(context.Background()); err == nil {
t.Fatalf("expected scaleDownApps to fail when workloads stay pending")
}
})
t.Run("critical-vault-and-flux-health-helpers", func(t *testing.T) {
fluxCfg := lifecycleConfig(t)
fluxCfg.Startup.FluxHealthWaitSeconds = 1
fluxCfg.Startup.FluxHealthPollSeconds = 1
fluxCfg.Startup.IgnoreFluxKustomizations = []string{"flux-system/ignored"}
fluxRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json"):
return `{"items":[
{"metadata":{"namespace":"flux-system","name":"services"},"spec":{"suspend":false,"timeout":"40s"},"status":{"conditions":[{"type":"Ready","status":"False","message":"syncing"}]}},
{"metadata":{"namespace":"flux-system","name":"no-condition"},"spec":{"suspend":false,"timeout":"10s"},"status":{"conditions":[{"type":"Reconciling","status":"True","message":"still"}]}},
{"metadata":{"namespace":"flux-system","name":"ignored"},"spec":{"suspend":false,"timeout":"5s"},"status":{"conditions":[{"type":"Ready","status":"False","message":"ignored"}]}},
{"metadata":{"namespace":"flux-system","name":"suspended"},"spec":{"suspend":true,"timeout":"2m"},"status":{"conditions":[{"type":"Ready","status":"False","message":"skip"}]}},
{"metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"suspend":false,"timeout":"5m"},"status":{"conditions":[{"type":"Ready","status":"True","message":"ok"}]}}
]}`, nil
case name == "kubectl" && strings.Contains(command, "get jobs -A -o json"):
return `{"items":[
{"metadata":{"namespace":"flux-system","name":"job-a","labels":{"kustomize.toolkit.fluxcd.io/name":"services"}},"status":{"failed":1,"conditions":[{"type":"Failed","status":"True"}]}},
{"metadata":{"namespace":"flux-system","name":"job-b","labels":{"kustomize.toolkit.fluxcd.io/name":"services"}},"status":{"failed":1,"conditions":[{"type":"Failed","status":"True"}]}},
{"metadata":{"namespace":"flux-system","name":"cronjob-owned","ownerReferences":[{"kind":"CronJob","name":"cron"}]},"status":{"failed":1,"conditions":[{"type":"Failed","status":"True"}]}},
{"metadata":{"namespace":"flux-system","name":"succeeded"},"status":{"succeeded":1,"failed":1,"conditions":[{"type":"Failed","status":"True"}]}}
]}`, nil
case name == "kubectl" && strings.Contains(command, "delete job job-a"):
return "", nil
case name == "kubectl" && strings.Contains(command, "delete job job-b"):
return "", errors.New("boom")
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchFlux, _ := newHookOrchestrator(t, fluxCfg, fluxRun, fluxRun)
wait, reason, err := orchFlux.TestHookAdaptiveFluxHealthWait(context.Background(), 30*time.Second)
if err != nil || wait <= 30*time.Second || !strings.Contains(reason, "max flux timeout") {
t.Fatalf("expected adaptive flux wait extension, wait=%s reason=%q err=%v", wait, reason, err)
}
noTimeoutCfg := lifecycleConfig(t)
noTimeoutRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json"):
return `{"items":[{"metadata":{"namespace":"flux-system","name":"ready"},"spec":{"suspend":false},"status":{"conditions":[{"type":"Ready","status":"True","message":"ok"}]}}]}`, nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchNoTimeout, _ := newHookOrchestrator(t, noTimeoutCfg, noTimeoutRun, noTimeoutRun)
if wait, reason, err := orchNoTimeout.TestHookAdaptiveFluxHealthWait(context.Background(), 0); err != nil || wait < 15*time.Minute || !strings.Contains(reason, "no explicit kustomization timeouts found") {
t.Fatalf("expected adaptive wait fallback, wait=%s reason=%q err=%v", wait, reason, err)
}
if ready, detail, err := orchFlux.TestHookFluxHealthReady(context.Background()); err != nil || ready || !strings.Contains(detail, "not ready") {
t.Fatalf("expected flux health not-ready result, ready=%v detail=%q err=%v", ready, detail, err)
}
if ready, detail, err := orchNoTimeout.TestHookFluxHealthReady(context.Background()); err != nil || !ready || !strings.Contains(detail, "all kustomizations ready=") {
t.Fatalf("expected flux health ready result, ready=%v detail=%q err=%v", ready, detail, err)
}
if !cluster.TestHookLooksLikeImmutableJobError("Job update failed: field is immutable") {
t.Fatalf("expected immutable-job detector true")
}
if cluster.TestHookLooksLikeImmutableJobError("") {
t.Fatalf("expected empty immutable-job detail to be false")
}
if !cluster.TestHookJobLooksFluxManaged("flux-system", "job-a", map[string]string{"kustomize.toolkit.fluxcd.io/name": "services"}, nil) {
t.Fatalf("expected label-based flux-managed job")
}
if cluster.TestHookJobLooksFluxManaged("flux-system", "cronjob-owned", nil, []string{"CronJob"}) {
t.Fatalf("expected CronJob-owned job to be non flux-managed")
}
if !cluster.TestHookJobFailed(1, 0, []string{"Failed"}, []string{"True"}) {
t.Fatalf("expected failed job detector to be true")
}
if cluster.TestHookJobFailed(0, 1, []string{"Complete"}, []string{"True"}) {
t.Fatalf("expected succeeded job to be false")
}
if healed, err := orchFlux.TestHookHealImmutableFluxJobs(context.Background()); err != nil || !healed {
t.Fatalf("expected immutable job heal success, healed=%v err=%v", healed, err)
}
critCfg := lifecycleConfig(t)
critCfg.Startup.VaultUnsealKeyFile = filepath.Join(t.TempDir(), "vault", "unseal.key")
critCfg.Startup.VaultUnsealBreakglassCommand = "echo breakglass-key"
critCfg.Startup.VaultUnsealBreakglassTimeout = 1
critCfg.Startup.WorkloadConvergenceWaitSeconds = 1
critCfg.Startup.WorkloadConvergencePollSeconds = 1
critCfg.Startup.StuckPodGraceSeconds = 1
critCfg.Startup.IgnoreWorkloadNamespaces = []string{"kube-system"}
critCfg.Startup.IgnoreUnavailableNodes = []string{"titan-22"}
critCfg.Startup.IgnoreWorkloads = []string{"monitoring/deployment/ignore-me"}
critRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "-n flux-system get deployment source-controller -o jsonpath={.status.readyReplicas}"):
return "", errors.New("boom")
case name == "kubectl" && strings.Contains(command, "-n flux-system scale deployment source-controller --replicas=1"):
return "", errors.New("boom")
case name == "kubectl" && strings.Contains(command, "-n vault get pods -o custom-columns="):
return "vault-0 Pending StatefulSet vault\nvault-1 Unknown StatefulSet vault\nvault-2 Running StatefulSet vault\nvault-other Failed Deployment vault\nbadline\n", nil
case name == "kubectl" && strings.Contains(command, "delete pod vault-0"):
return "", nil
case name == "kubectl" && strings.Contains(command, "delete pod vault-1"):
return "", errors.New("boom")
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchCrit, _ := newHookOrchestrator(t, critCfg, critRun, critRun)
if missing, err := orchCrit.TestHookMissingCriticalStartupWorkloads(context.Background()); err == nil || len(missing) != 0 {
t.Fatalf("expected missingCriticalStartupWorkloads generic error, missing=%v err=%v", missing, err)
}
if err := orchCrit.TestHookEnsureCriticalStartupWorkloads(context.Background()); err == nil || !strings.Contains(err.Error(), "scale") {
t.Fatalf("expected ensureCriticalStartupWorkloads scale error, got %v", err)
}
if err := orchCrit.TestHookCleanupStaleCriticalWorkloadPods(context.Background(), "vault", "statefulset", "vault"); err == nil {
t.Fatalf("expected stale critical workload cleanup error branch")
}
vaultCfg := lifecycleConfig(t)
vaultCfg.Startup.VaultUnsealKeyFile = filepath.Join(t.TempDir(), "vault", "unseal.key")
vaultCfg.Startup.VaultUnsealBreakglassCommand = "echo breakglass-key"
vaultCfg.Startup.VaultUnsealBreakglassTimeout = 1
ensureUnsealed := false
ensureRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "-n vault get pod vault-0 -o jsonpath={.status.phase}"):
return "Running", nil
case name == "kubectl" && strings.Contains(command, "vault status -format=json"):
if ensureUnsealed {
return `{"sealed":false}`, nil
}
return `{"sealed":true}`, nil
case name == "kubectl" && strings.Contains(command, "get secret vault-init"):
return "dmF1bHQta2V5", nil
case name == "kubectl" && strings.Contains(command, "vault operator unseal"):
ensureUnsealed = true
return "", nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchVaultEnsure, _ := newHookOrchestrator(t, vaultCfg, ensureRun, ensureRun)
if err := orchVaultEnsure.TestHookEnsureVaultUnsealed(context.Background()); err != nil {
t.Fatalf("expected vault auto-unseal success path, got %v", err)
}
if sealed, err := orchVaultEnsure.TestHookVaultSealed(context.Background()); err != nil || sealed {
t.Fatalf("expected vault sealed helper false after unseal, sealed=%v err=%v", sealed, err)
}
waitReady := false
waitUnsealed := false
waitRun := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "-n vault get pod vault-0 -o jsonpath={.status.phase}"):
return "Running", nil
case name == "kubectl" && strings.Contains(command, "vault status -format=json"):
if waitUnsealed {
return `{"sealed":false}`, nil
}
return `{"sealed":true}`, nil
case name == "kubectl" && strings.Contains(command, "get secret vault-init"):
return "dmF1bHQta2V5", nil
case name == "kubectl" && strings.Contains(command, "vault operator unseal"):
waitUnsealed = true
waitReady = true
return "", nil
case name == "kubectl" && strings.Contains(command, "get statefulset vault -o jsonpath={.status.readyReplicas}"):
if waitReady {
return "1", nil
}
return "0", nil
case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
return "monitoring\tgrafana\t1\n", nil
case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
return "monitoring\tvictoria-metrics-single-server\t1\n", nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orchVaultWait, _ := newHookOrchestrator(t, vaultCfg, waitRun, waitRun)
if err := orchVaultWait.TestHookWaitVaultReady(context.Background(), "vault", "statefulset", "vault"); err != nil {
t.Fatalf("expected vault wait success path, got %v", err)
}
if err := orchVaultWait.TestHookWaitWorkloadReady(context.Background(), "monitoring", "deployment", "grafana"); err != nil {
t.Fatalf("expected generic workload wait success, got %v", err)
}
if err := orchVaultWait.TestHookWriteVaultUnsealKeyFile("cached-key"); err != nil {
t.Fatalf("expected vault key file write success, got %v", err)
}
if got, err := orchVaultWait.TestHookReadVaultUnsealKeyFile(); err != nil || got != "cached-key" {
t.Fatalf("expected vault key file read success, got %q err=%v", got, err)
}
blockedDir := t.TempDir()
blockedFile := filepath.Join(blockedDir, "blocked")
if err := os.WriteFile(blockedFile, []byte("x"), 0o600); err != nil {
t.Fatalf("write blocked file: %v", err)
}
blockedCfg := lifecycleConfig(t)
blockedCfg.Startup.VaultUnsealKeyFile = filepath.Join(blockedFile, "vault.key")
blockedOrch, _ := newHookOrchestrator(t, blockedCfg, nil, nil)
if err := blockedOrch.TestHookWriteVaultUnsealKeyFile("x"); err == nil {
t.Fatalf("expected vault key dir error")
}
})
}