ananke/testing/orchestrator/hooks_ingress_service_matrix_test.go

230 lines
10 KiB
Go

package orchestrator
import (
"context"
"errors"
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"scm.bstein.dev/bstein/ananke/internal/cluster"
"scm.bstein.dev/bstein/ananke/internal/config"
)
// TestHookIngressServiceMatrix runs one orchestration or CLI step.
// Signature: TestHookIngressServiceMatrix(t *testing.T).
// Why: expands checklist/ingress branch coverage so startup readiness gating is
// validated against more realistic and failure-prone edge cases.
func TestHookIngressServiceMatrix(t *testing.T) {
t.Run("required-node-labels-success-and-error", func(t *testing.T) {
cfg := lifecycleConfig(t)
cfg.Startup.RequiredNodeLabels = map[string]map[string]string{
"titan-23": {
"topology.kubernetes.io/zone": "lab-a",
"skip-empty": "",
},
"": {"ignored": "value"},
}
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "kubectl" && strings.Contains(command, "label node titan-23 --overwrite") {
return "", nil
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
orch, _ := newHookOrchestrator(t, cfg, run, run)
if err := orch.TestHookEnsureRequiredNodeLabels(context.Background()); err != nil {
t.Fatalf("expected required-node-labels success, got %v", err)
}
runErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "kubectl" && strings.Contains(command, "label node titan-23 --overwrite") {
return "", errors.New("label denied")
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
orchErr, _ := newHookOrchestrator(t, cfg, runErr, runErr)
if err := orchErr.TestHookEnsureRequiredNodeLabels(context.Background()); err == nil {
t.Fatalf("expected ensureRequiredNodeLabels failure branch")
}
})
t.Run("ingress-discovery-checklist-and-heal", func(t *testing.T) {
tlsServer := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte(`ok`))
}))
defer tlsServer.Close()
ingressHost := strings.TrimPrefix(tlsServer.URL, "https://")
cfg := lifecycleConfig(t)
cfg.Startup.IngressChecklistInsecureSkip = true
cfg.Startup.IngressChecklistIgnoreHosts = []string{"ignored.bstein.dev", "stream.bstein.dev"}
cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
{Name: "metrics", URL: "https://" + ingressHost + "/"},
}
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"):
return fmt.Sprintf(`{"items":[
{"metadata":{"namespace":"monitoring"},"spec":{"rules":[{"host":"%s"},{"host":"ignored.bstein.dev"},{"host":"*.wildcard.bstein.dev"}]}},
{"metadata":{"namespace":"media"},"spec":{"rules":[{"host":"stream.bstein.dev"}]}}
]}`, ingressHost), nil
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset -A -o json"):
return `{"items":[
{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":0},"status":{"readyReplicas":0}},
{"kind":"StatefulSet","metadata":{"namespace":"media","name":"jellyfin"},"spec":{"replicas":1},"status":{"readyReplicas":1}}
]}`, nil
case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=1"):
return "", nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orch, _ := newHookOrchestrator(t, cfg, run, run)
hosts, err := orch.TestHookDiscoverIngressHosts(context.Background())
if err != nil {
t.Fatalf("discover ingress hosts: %v", err)
}
if len(hosts) == 0 || hosts[0] == "ignored.bstein.dev" {
t.Fatalf("expected filtered ingress hosts, got %v", hosts)
}
ns, err := orch.TestHookDiscoverIngressNamespacesForHost(context.Background(), ingressHost)
if err != nil || len(ns) == 0 || ns[0] != "monitoring" {
t.Fatalf("discover ingress namespaces: ns=%v err=%v", ns, err)
}
if ok, detail := orch.TestHookIngressChecklistReady(context.Background()); !ok || !strings.Contains(detail, "hosts=") {
t.Fatalf("expected ingress checklist ready, ok=%v detail=%q", ok, detail)
}
last := time.Time{}
orch.TestHookMaybeAutoHealIngressHostBackends(context.Background(), &last, ingressHost+": status=503")
prev := last
orch.TestHookMaybeAutoHealIngressHostBackends(context.Background(), &last, ingressHost+": status=503")
if last != prev {
t.Fatalf("expected ingress heal cooldown to suppress second attempt")
}
expectedHost := cluster.TestHookHostFromURL("https://" + ingressHost + "/")
if got := orch.TestHookChecklistFailureHost("metrics: failed"); got != expectedHost {
t.Fatalf("expected checklistFailureHost to map service check name to host %q, got %q", expectedHost, got)
}
if got := cluster.TestHookHostFromURL("not a url"); got != "" {
t.Fatalf("expected invalid URL host parse to be empty, got %q", got)
}
})
t.Run("ingress-discovery-error-branches", func(t *testing.T) {
cfg := lifecycleConfig(t)
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"):
return "{broken", nil
default:
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
}
orch, _ := newHookOrchestrator(t, cfg, run, run)
if _, err := orch.TestHookDiscoverIngressHosts(context.Background()); err == nil {
t.Fatalf("expected ingress decode error branch")
}
if _, err := orch.TestHookDiscoverIngressNamespacesForHost(context.Background(), "metrics.bstein.dev"); err == nil {
t.Fatalf("expected ingress namespace decode error branch")
}
})
t.Run("service-checklist-and-stability-branches", func(t *testing.T) {
okServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte(`{"database":"ok"}`))
}))
defer okServer.Close()
failServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte(`{"status":"down"}`))
}))
defer failServer.Close()
cfg := lifecycleConfig(t)
check := config.ServiceChecklistCheck{
Name: "grafana",
URL: okServer.URL,
AcceptedStatuses: []int{200},
BodyContains: `"database":"ok"`,
BodyNotContains: `"status":"down"`,
TimeoutSeconds: 2,
}
cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{check}
orch, _ := newHookOrchestrator(t, cfg, nil, nil)
if ok, detail := orch.TestHookServiceCheckReady(context.Background(), check); !ok || !strings.Contains(detail, "status=200") {
t.Fatalf("expected service check success, ok=%v detail=%q", ok, detail)
}
check.BodyContains = `"missing-marker"`
if ok, _ := orch.TestHookServiceCheckReady(context.Background(), check); ok {
t.Fatalf("expected body-contains mismatch failure")
}
check.BodyContains = `"database":"ok"`
check.BodyNotContains = `"database":"ok"`
if ok, _ := orch.TestHookServiceCheckReady(context.Background(), check); ok {
t.Fatalf("expected body-not-contains failure")
}
if _, _, err := orch.TestHookHTTPChecklistProbe(context.Background(), config.ServiceChecklistCheck{URL: "://bad-url"}); err == nil {
t.Fatalf("expected http checklist request-build error")
}
if _, _, err := orch.TestHookHTTPChecklistProbe(context.Background(), config.ServiceChecklistCheck{URL: "http://127.0.0.1:1"}); err == nil {
t.Fatalf("expected http checklist request-failure branch")
}
cfgWait := lifecycleConfig(t)
cfgWait.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
{Name: "down", URL: failServer.URL, AcceptedStatuses: []int{200}, TimeoutSeconds: 1},
}
cfgWait.Startup.ServiceChecklistWaitSeconds = 1
cfgWait.Startup.ServiceChecklistPollSeconds = 1
orchWait, _ := newHookOrchestrator(t, cfgWait, nil, nil)
cancelCtx, cancel := context.WithCancel(context.Background())
cancel()
if err := orchWait.TestHookWaitForServiceChecklist(cancelCtx); !errors.Is(err, context.Canceled) {
t.Fatalf("expected canceled service-checklist wait, got %v", err)
}
cfgStable := lifecycleConfig(t)
cfgStable.Startup.ServiceChecklistStabilitySec = 0
orchStable, _ := newHookOrchestrator(t, cfgStable, nil, nil)
if err := orchStable.TestHookWaitForStabilityWindow(context.Background()); err != nil {
t.Fatalf("expected zero-window stability fast-path, got %v", err)
}
runFailPods := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
if name == "kubectl" && strings.Contains(command, "get pods -A -o json") {
return `{"items":[{"metadata":{"namespace":"vault","name":"vault-0"},"spec":{"nodeName":"titan-23"},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}]}`, nil
}
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
}
cfgFailStable := lifecycleConfig(t)
cfgFailStable.Startup.ServiceChecklistStabilitySec = 1
orchFailStable, _ := newHookOrchestrator(t, cfgFailStable, runFailPods, runFailPods)
err := orchFailStable.TestHookWaitForStabilityWindow(context.Background())
if err == nil || !strings.Contains(err.Error(), "startup stability window failed") {
t.Fatalf("expected stability failure from failing pods, got %v", err)
}
if !cluster.TestHookChecklistContains(`{"A":"B"}`, `"a":"b"`) {
t.Fatalf("expected case-insensitive checklistContains success")
}
if cluster.TestHookIsLikelyHostname("not/a/host") {
t.Fatalf("expected invalid hostname heuristic to be false")
}
})
}