package orchestrator import ( "context" "errors" "strings" "testing" "time" "scm.bstein.dev/bstein/ananke/internal/cluster" "scm.bstein.dev/bstein/ananke/internal/config" ) // TestHookGapMatrixPart3ConvergenceAndStability runs one orchestration or CLI step. // Signature: TestHookGapMatrixPart3ConvergenceAndStability(t *testing.T). // Why: raises coverage for startup convergence orchestration and stability gates // that determine whether startup is considered truly complete. func TestHookGapMatrixPart3ConvergenceAndStability(t *testing.T) { t.Run("wait-for-startup-convergence-gate-matrix", func(t *testing.T) { cfgIngress := lifecycleConfig(t) cfgIngress.Startup.RequireIngressChecklist = true cfgIngress.Startup.IngressChecklistWaitSeconds = 1 cfgIngress.Startup.IngressChecklistPollSeconds = 1 cfgIngress.Startup.RequireServiceChecklist = false cfgIngress.Startup.RequireCriticalServiceEndpoints = false cfgIngress.Startup.RequireFluxHealth = false cfgIngress.Startup.RequireWorkloadConvergence = false cfgIngress.Startup.ServiceChecklistStabilitySec = 0 runIngress := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get ingress -A -o json") { return `{"items":[{"metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"rules":[{"host":"127.0.0.1:1"}]}}]}`, nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchIngress, _ := newHookOrchestrator(t, cfgIngress, runIngress, runIngress) if err := orchIngress.TestHookWaitForStartupConvergence(context.Background()); err == nil || !strings.Contains(err.Error(), "ingress checklist") { t.Fatalf("expected ingress convergence failure, got %v", err) } cfgService := lifecycleConfig(t) cfgService.Startup.RequireServiceChecklist = true cfgService.Startup.ServiceChecklistWaitSeconds = 1 cfgService.Startup.ServiceChecklistPollSeconds = 1 cfgService.Startup.RequireIngressChecklist = false cfgService.Startup.RequireCriticalServiceEndpoints = false cfgService.Startup.RequireFluxHealth = false cfgService.Startup.RequireWorkloadConvergence = false cfgService.Startup.ServiceChecklist = []config.ServiceChecklistCheck{ {Name: "api", URL: "http://127.0.0.1:1/health", AcceptedStatuses: []int{200}, TimeoutSeconds: 1}, } orchService, _ := newHookOrchestrator(t, cfgService, nil, nil) if err := orchService.TestHookWaitForStartupConvergence(context.Background()); err == nil || !strings.Contains(err.Error(), "service checklist") { t.Fatalf("expected service convergence failure, got %v", err) } cfgFlux := lifecycleConfig(t) cfgFlux.Startup.RequireIngressChecklist = false cfgFlux.Startup.RequireServiceChecklist = false cfgFlux.Startup.RequireCriticalServiceEndpoints = false cfgFlux.Startup.RequireFluxHealth = true cfgFlux.Startup.FluxHealthWaitSeconds = 1 cfgFlux.Startup.FluxHealthPollSeconds = 1 cfgFlux.Startup.RequireWorkloadConvergence = false runFlux := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json") { return `{"items":[{"metadata":{"namespace":"flux-system","name":"services"},"spec":{"suspend":false},"status":{"conditions":[{"type":"Ready","status":"False","message":"syncing"}]}}]}`, nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchFlux, _ := newHookOrchestrator(t, cfgFlux, runFlux, runFlux) if err := orchFlux.TestHookWaitForStartupConvergence(context.Background()); err == nil || !strings.Contains(err.Error(), "flux convergence") { t.Fatalf("expected flux convergence failure, got %v", err) } }) t.Run("startup-stability-success-and-pod-check-error", func(t *testing.T) { cfgOK := lifecycleConfig(t) cfgOK.Startup.RequireFluxHealth = false cfgOK.Startup.RequireWorkloadConvergence = false cfgOK.Startup.RequireServiceChecklist = false cfgOK.Startup.RequireIngressChecklist = false runOK := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get pods -A -o json") { return `{"items":[]}`, nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchOK, _ := newHookOrchestrator(t, cfgOK, runOK, runOK) if err := orchOK.TestHookStartupStabilityHealthy(context.Background()); err != nil { t.Fatalf("expected startup stability success, got %v", err) } cfgErr := cfgOK runErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get pods -A -o json") { return "", errors.New("pod list failed") } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchErr, _ := newHookOrchestrator(t, cfgErr, runErr, runErr) if err := orchErr.TestHookStartupStabilityHealthy(context.Background()); err == nil || !strings.Contains(err.Error(), "pod failure check error") { t.Fatalf("expected pod-check error branch, got %v", err) } }) } // TestHookGapMatrixPart3LifecycleRestoreShutdown runs one orchestration or CLI step. // Signature: TestHookGapMatrixPart3LifecycleRestoreShutdown(t *testing.T). // Why: fills lifecycle restore/shutdown success paths that are easy to miss in // failure-focused drill tests. func TestHookGapMatrixPart3LifecycleRestoreShutdown(t *testing.T) { t.Run("etcd-restore-dry-run-and-success", func(t *testing.T) { cfgDry := lifecycleConfig(t) dry := newDryRunHookOrchestrator(t, cfgDry, nil) if err := dry.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{ControlPlane: "titan-db"}); err != nil { t.Fatalf("expected dry-run etcd restore success, got %v", err) } cfg := lifecycleConfig(t) run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "ssh" && strings.Contains(command, "systemctl cat k3s"): return "ExecStart=/usr/local/bin/k3s server", nil case name == "ssh" && strings.Contains(command, "etcd-snapshot ls"): return "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown", nil case name == "ssh" && strings.Contains(command, "stat -c %s"): return "2097152", nil case name == "ssh" && strings.Contains(command, "sha256sum"): return "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", nil case name == "ssh" && strings.Contains(command, "server --cluster-reset"): return "reset done", nil case name == "ssh" && strings.Contains(command, "systemctl stop k3s"): return "stopped", nil case name == "ssh" && strings.Contains(command, "systemctl start k3s"): return "started", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orch, _ := newHookOrchestrator(t, cfg, run, run) if err := orch.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{ControlPlane: "titan-db"}); err != nil { t.Fatalf("expected etcd restore success path, got %v", err) } }) t.Run("shutdown-full-path-cluster-only", func(t *testing.T) { cfg := lifecycleConfig(t) run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "ssh" && strings.Contains(command, "k3s etcd-snapshot save"): return "saved", nil case name == "ssh" && strings.Contains(command, "k3s etcd-snapshot ls"): return "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown", nil case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="): return "monitoring\tgrafana\t1\n", nil case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="): return "", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orch, _ := newHookOrchestrator(t, cfg, run, run) err := orch.Shutdown(context.Background(), cluster.ShutdownOptions{Reason: "full", Mode: "cluster-only"}) if err != nil { t.Fatalf("expected full shutdown success, got %v", err) } }) }