package orchestrator import ( "context" "errors" "fmt" "net/http" "net/http/httptest" "strings" "testing" "time" "scm.bstein.dev/bstein/ananke/internal/cluster" "scm.bstein.dev/bstein/ananke/internal/config" ) // TestHookIngressServiceMatrix runs one orchestration or CLI step. // Signature: TestHookIngressServiceMatrix(t *testing.T). // Why: expands checklist/ingress branch coverage so startup readiness gating is // validated against more realistic and failure-prone edge cases. func TestHookIngressServiceMatrix(t *testing.T) { t.Run("required-node-labels-success-and-error", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.Startup.RequiredNodeLabels = map[string]map[string]string{ "titan-23": { "topology.kubernetes.io/zone": "lab-a", "skip-empty": "", }, "": {"ignored": "value"}, } run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "label node titan-23 --overwrite") { return "", nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orch, _ := newHookOrchestrator(t, cfg, run, run) if err := orch.TestHookEnsureRequiredNodeLabels(context.Background()); err != nil { t.Fatalf("expected required-node-labels success, got %v", err) } runErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "label node titan-23 --overwrite") { return "", errors.New("label denied") } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orchErr, _ := newHookOrchestrator(t, cfg, runErr, runErr) if err := orchErr.TestHookEnsureRequiredNodeLabels(context.Background()); err == nil { t.Fatalf("expected ensureRequiredNodeLabels failure branch") } }) t.Run("required-node-labels-skip-ignored-unavailable-nodes", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.Startup.RequiredNodeLabels = map[string]map[string]string{ "titan-09": { "ananke.bstein.dev/harbor-bootstrap": "true", }, } cfg.Startup.IgnoreUnavailableNodes = []string{"titan-09"} run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "label node titan-09 --overwrite") { t.Fatalf("expected ignored unavailable node labels to be skipped, got %q", command) } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orch, _ := newHookOrchestrator(t, cfg, run, run) if err := orch.TestHookEnsureRequiredNodeLabels(context.Background()); err != nil { t.Fatalf("expected ignored unavailable node label enforcement to be skipped, got %v", err) } }) t.Run("required-node-labels-skip-absent-non-core-nodes", func(t *testing.T) { cfg := lifecycleConfig(t) cfg.Startup.RequiredNodeLabels = map[string]map[string]string{ "titan-09": { "ananke.bstein.dev/harbor-bootstrap": "true", }, } cfg.Startup.NodeInventoryReachRequiredNodes = []string{"titan-db"} run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "label node titan-09 --overwrite") { return "", errors.New("Error from server (NotFound): nodes \"titan-09\" not found") } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } orch, _ := newHookOrchestrator(t, cfg, run, run) if err := orch.TestHookEnsureRequiredNodeLabels(context.Background()); err != nil { t.Fatalf("expected absent non-core node label enforcement to be skipped, got %v", err) } }) t.Run("ingress-discovery-checklist-and-heal", func(t *testing.T) { tlsServer := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) _, _ = w.Write([]byte(`ok`)) })) defer tlsServer.Close() ingressHost := strings.TrimPrefix(tlsServer.URL, "https://") cfg := lifecycleConfig(t) cfg.Startup.IngressChecklistInsecureSkip = true cfg.Startup.IngressChecklistIgnoreHosts = []string{"ignored.bstein.dev", "stream.bstein.dev"} cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{ {Name: "metrics", URL: "https://" + ingressHost + "/"}, } run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"): return fmt.Sprintf(`{"items":[ {"metadata":{"namespace":"monitoring"},"spec":{"rules":[{"host":"%s"},{"host":"ignored.bstein.dev"},{"host":"*.wildcard.bstein.dev"}]}}, {"metadata":{"namespace":"media"},"spec":{"rules":[{"host":"stream.bstein.dev"}]}} ]}`, ingressHost), nil case name == "kubectl" && strings.Contains(command, "get deploy,statefulset -A -o json"): return `{"items":[ {"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":0},"status":{"readyReplicas":0}}, {"kind":"StatefulSet","metadata":{"namespace":"media","name":"jellyfin"},"spec":{"replicas":1},"status":{"readyReplicas":1}} ]}`, nil case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=1"): return "", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orch, _ := newHookOrchestrator(t, cfg, run, run) hosts, err := orch.TestHookDiscoverIngressHosts(context.Background()) if err != nil { t.Fatalf("discover ingress hosts: %v", err) } if len(hosts) == 0 || hosts[0] == "ignored.bstein.dev" { t.Fatalf("expected filtered ingress hosts, got %v", hosts) } ns, err := orch.TestHookDiscoverIngressNamespacesForHost(context.Background(), ingressHost) if err != nil || len(ns) == 0 || ns[0] != "monitoring" { t.Fatalf("discover ingress namespaces: ns=%v err=%v", ns, err) } if ok, detail := orch.TestHookIngressChecklistReady(context.Background()); !ok || !strings.Contains(detail, "hosts=") { t.Fatalf("expected ingress checklist ready, ok=%v detail=%q", ok, detail) } last := time.Time{} orch.TestHookMaybeAutoHealIngressHostBackends(context.Background(), &last, ingressHost+": status=503") prev := last orch.TestHookMaybeAutoHealIngressHostBackends(context.Background(), &last, ingressHost+": status=503") if last != prev { t.Fatalf("expected ingress heal cooldown to suppress second attempt") } expectedHost := cluster.TestHookHostFromURL("https://" + ingressHost + "/") if got := orch.TestHookChecklistFailureHost("metrics: failed"); got != expectedHost { t.Fatalf("expected checklistFailureHost to map service check name to host %q, got %q", expectedHost, got) } if got := cluster.TestHookHostFromURL("not a url"); got != "" { t.Fatalf("expected invalid URL host parse to be empty, got %q", got) } }) t.Run("ingress-discovery-error-branches", func(t *testing.T) { cfg := lifecycleConfig(t) run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") switch { case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"): return "{broken", nil default: return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } } orch, _ := newHookOrchestrator(t, cfg, run, run) if _, err := orch.TestHookDiscoverIngressHosts(context.Background()); err == nil { t.Fatalf("expected ingress decode error branch") } if _, err := orch.TestHookDiscoverIngressNamespacesForHost(context.Background(), "metrics.bstein.dev"); err == nil { t.Fatalf("expected ingress namespace decode error branch") } }) t.Run("service-checklist-and-stability-branches", func(t *testing.T) { okServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) _, _ = w.Write([]byte(`{"database":"ok"}`)) })) defer okServer.Close() failServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusServiceUnavailable) _, _ = w.Write([]byte(`{"status":"down"}`)) })) defer failServer.Close() cfg := lifecycleConfig(t) check := config.ServiceChecklistCheck{ Name: "grafana", URL: okServer.URL, AcceptedStatuses: []int{200}, BodyContains: `"database":"ok"`, BodyNotContains: `"status":"down"`, TimeoutSeconds: 2, } cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{check} orch, _ := newHookOrchestrator(t, cfg, nil, nil) if ok, detail := orch.TestHookServiceCheckReady(context.Background(), check); !ok || !strings.Contains(detail, "status=200") { t.Fatalf("expected service check success, ok=%v detail=%q", ok, detail) } check.BodyContains = `"missing-marker"` if ok, _ := orch.TestHookServiceCheckReady(context.Background(), check); ok { t.Fatalf("expected body-contains mismatch failure") } check.BodyContains = `"database":"ok"` check.BodyNotContains = `"database":"ok"` if ok, _ := orch.TestHookServiceCheckReady(context.Background(), check); ok { t.Fatalf("expected body-not-contains failure") } if _, _, err := orch.TestHookHTTPChecklistProbe(context.Background(), config.ServiceChecklistCheck{URL: "://bad-url"}); err == nil { t.Fatalf("expected http checklist request-build error") } if _, _, err := orch.TestHookHTTPChecklistProbe(context.Background(), config.ServiceChecklistCheck{URL: "http://127.0.0.1:1"}); err == nil { t.Fatalf("expected http checklist request-failure branch") } cfgWait := lifecycleConfig(t) cfgWait.Startup.ServiceChecklist = []config.ServiceChecklistCheck{ {Name: "down", URL: failServer.URL, AcceptedStatuses: []int{200}, TimeoutSeconds: 1}, } cfgWait.Startup.ServiceChecklistWaitSeconds = 1 cfgWait.Startup.ServiceChecklistPollSeconds = 1 orchWait, _ := newHookOrchestrator(t, cfgWait, nil, nil) cancelCtx, cancel := context.WithCancel(context.Background()) cancel() if err := orchWait.TestHookWaitForServiceChecklist(cancelCtx); !errors.Is(err, context.Canceled) { t.Fatalf("expected canceled service-checklist wait, got %v", err) } cfgStable := lifecycleConfig(t) cfgStable.Startup.ServiceChecklistStabilitySec = 0 orchStable, _ := newHookOrchestrator(t, cfgStable, nil, nil) if err := orchStable.TestHookWaitForStabilityWindow(context.Background()); err != nil { t.Fatalf("expected zero-window stability fast-path, got %v", err) } runFailPods := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) { command := name + " " + strings.Join(args, " ") if name == "kubectl" && strings.Contains(command, "get pods -A -o json") { return `{"items":[{"metadata":{"namespace":"vault","name":"vault-0"},"spec":{"nodeName":"titan-23"},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}]}`, nil } return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...) } cfgFailStable := lifecycleConfig(t) cfgFailStable.Startup.ServiceChecklistStabilitySec = 1 orchFailStable, _ := newHookOrchestrator(t, cfgFailStable, runFailPods, runFailPods) err := orchFailStable.TestHookWaitForStabilityWindow(context.Background()) if err == nil || !strings.Contains(err.Error(), "startup stability window failed") { t.Fatalf("expected stability failure from failing pods, got %v", err) } if !cluster.TestHookChecklistContains(`{"A":"B"}`, `"a":"b"`) { t.Fatalf("expected case-insensitive checklistContains success") } if cluster.TestHookIsLikelyHostname("not/a/host") { t.Fatalf("expected invalid hostname heuristic to be false") } }) }