272 lines
12 KiB
Go
272 lines
12 KiB
Go
package orchestrator
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
|
)
|
|
|
|
// TestHookIngressServiceMatrix runs one orchestration or CLI step.
|
|
// Signature: TestHookIngressServiceMatrix(t *testing.T).
|
|
// Why: expands checklist/ingress branch coverage so startup readiness gating is
|
|
// validated against more realistic and failure-prone edge cases.
|
|
func TestHookIngressServiceMatrix(t *testing.T) {
|
|
t.Run("required-node-labels-success-and-error", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.RequiredNodeLabels = map[string]map[string]string{
|
|
"titan-23": {
|
|
"topology.kubernetes.io/zone": "lab-a",
|
|
"skip-empty": "",
|
|
},
|
|
"": {"ignored": "value"},
|
|
}
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "label node titan-23 --overwrite") {
|
|
return "", nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if err := orch.TestHookEnsureRequiredNodeLabels(context.Background()); err != nil {
|
|
t.Fatalf("expected required-node-labels success, got %v", err)
|
|
}
|
|
|
|
runErr := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "label node titan-23 --overwrite") {
|
|
return "", errors.New("label denied")
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orchErr, _ := newHookOrchestrator(t, cfg, runErr, runErr)
|
|
if err := orchErr.TestHookEnsureRequiredNodeLabels(context.Background()); err == nil {
|
|
t.Fatalf("expected ensureRequiredNodeLabels failure branch")
|
|
}
|
|
})
|
|
|
|
t.Run("required-node-labels-skip-ignored-unavailable-nodes", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.RequiredNodeLabels = map[string]map[string]string{
|
|
"titan-09": {
|
|
"ananke.bstein.dev/harbor-bootstrap": "true",
|
|
},
|
|
}
|
|
cfg.Startup.IgnoreUnavailableNodes = []string{"titan-09"}
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "label node titan-09 --overwrite") {
|
|
t.Fatalf("expected ignored unavailable node labels to be skipped, got %q", command)
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if err := orch.TestHookEnsureRequiredNodeLabels(context.Background()); err != nil {
|
|
t.Fatalf("expected ignored unavailable node label enforcement to be skipped, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("required-node-labels-skip-absent-non-core-nodes", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.RequiredNodeLabels = map[string]map[string]string{
|
|
"titan-09": {
|
|
"ananke.bstein.dev/harbor-bootstrap": "true",
|
|
},
|
|
}
|
|
cfg.Startup.NodeInventoryReachRequiredNodes = []string{"titan-db"}
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "label node titan-09 --overwrite") {
|
|
return "", errors.New("Error from server (NotFound): nodes \"titan-09\" not found")
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if err := orch.TestHookEnsureRequiredNodeLabels(context.Background()); err != nil {
|
|
t.Fatalf("expected absent non-core node label enforcement to be skipped, got %v", err)
|
|
}
|
|
})
|
|
|
|
t.Run("ingress-discovery-checklist-and-heal", func(t *testing.T) {
|
|
tlsServer := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write([]byte(`ok`))
|
|
}))
|
|
defer tlsServer.Close()
|
|
ingressHost := strings.TrimPrefix(tlsServer.URL, "https://")
|
|
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.IngressChecklistInsecureSkip = true
|
|
cfg.Startup.IngressChecklistIgnoreHosts = []string{"ignored.bstein.dev", "stream.bstein.dev"}
|
|
cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
|
|
{Name: "metrics", URL: "https://" + ingressHost + "/"},
|
|
}
|
|
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"):
|
|
return fmt.Sprintf(`{"items":[
|
|
{"metadata":{"namespace":"monitoring"},"spec":{"rules":[{"host":"%s"},{"host":"ignored.bstein.dev"},{"host":"*.wildcard.bstein.dev"}]}},
|
|
{"metadata":{"namespace":"media"},"spec":{"rules":[{"host":"stream.bstein.dev"}]}}
|
|
]}`, ingressHost), nil
|
|
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset -A -o json"):
|
|
return `{"items":[
|
|
{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":0},"status":{"readyReplicas":0}},
|
|
{"kind":"StatefulSet","metadata":{"namespace":"media","name":"jellyfin"},"spec":{"replicas":1},"status":{"readyReplicas":1}}
|
|
]}`, nil
|
|
case name == "kubectl" && strings.Contains(command, " scale deployment grafana --replicas=1"):
|
|
return "", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
hosts, err := orch.TestHookDiscoverIngressHosts(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("discover ingress hosts: %v", err)
|
|
}
|
|
if len(hosts) == 0 || hosts[0] == "ignored.bstein.dev" {
|
|
t.Fatalf("expected filtered ingress hosts, got %v", hosts)
|
|
}
|
|
ns, err := orch.TestHookDiscoverIngressNamespacesForHost(context.Background(), ingressHost)
|
|
if err != nil || len(ns) == 0 || ns[0] != "monitoring" {
|
|
t.Fatalf("discover ingress namespaces: ns=%v err=%v", ns, err)
|
|
}
|
|
if ok, detail := orch.TestHookIngressChecklistReady(context.Background()); !ok || !strings.Contains(detail, "hosts=") {
|
|
t.Fatalf("expected ingress checklist ready, ok=%v detail=%q", ok, detail)
|
|
}
|
|
|
|
last := time.Time{}
|
|
orch.TestHookMaybeAutoHealIngressHostBackends(context.Background(), &last, ingressHost+": status=503")
|
|
prev := last
|
|
orch.TestHookMaybeAutoHealIngressHostBackends(context.Background(), &last, ingressHost+": status=503")
|
|
if last != prev {
|
|
t.Fatalf("expected ingress heal cooldown to suppress second attempt")
|
|
}
|
|
|
|
expectedHost := cluster.TestHookHostFromURL("https://" + ingressHost + "/")
|
|
if got := orch.TestHookChecklistFailureHost("metrics: failed"); got != expectedHost {
|
|
t.Fatalf("expected checklistFailureHost to map service check name to host %q, got %q", expectedHost, got)
|
|
}
|
|
if got := cluster.TestHookHostFromURL("not a url"); got != "" {
|
|
t.Fatalf("expected invalid URL host parse to be empty, got %q", got)
|
|
}
|
|
})
|
|
|
|
t.Run("ingress-discovery-error-branches", func(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"):
|
|
return "{broken", nil
|
|
default:
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch, _ := newHookOrchestrator(t, cfg, run, run)
|
|
if _, err := orch.TestHookDiscoverIngressHosts(context.Background()); err == nil {
|
|
t.Fatalf("expected ingress decode error branch")
|
|
}
|
|
if _, err := orch.TestHookDiscoverIngressNamespacesForHost(context.Background(), "metrics.bstein.dev"); err == nil {
|
|
t.Fatalf("expected ingress namespace decode error branch")
|
|
}
|
|
})
|
|
|
|
t.Run("service-checklist-and-stability-branches", func(t *testing.T) {
|
|
okServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(http.StatusOK)
|
|
_, _ = w.Write([]byte(`{"database":"ok"}`))
|
|
}))
|
|
defer okServer.Close()
|
|
failServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(http.StatusServiceUnavailable)
|
|
_, _ = w.Write([]byte(`{"status":"down"}`))
|
|
}))
|
|
defer failServer.Close()
|
|
|
|
cfg := lifecycleConfig(t)
|
|
check := config.ServiceChecklistCheck{
|
|
Name: "grafana",
|
|
URL: okServer.URL,
|
|
AcceptedStatuses: []int{200},
|
|
BodyContains: `"database":"ok"`,
|
|
BodyNotContains: `"status":"down"`,
|
|
TimeoutSeconds: 2,
|
|
}
|
|
cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{check}
|
|
orch, _ := newHookOrchestrator(t, cfg, nil, nil)
|
|
|
|
if ok, detail := orch.TestHookServiceCheckReady(context.Background(), check); !ok || !strings.Contains(detail, "status=200") {
|
|
t.Fatalf("expected service check success, ok=%v detail=%q", ok, detail)
|
|
}
|
|
check.BodyContains = `"missing-marker"`
|
|
if ok, _ := orch.TestHookServiceCheckReady(context.Background(), check); ok {
|
|
t.Fatalf("expected body-contains mismatch failure")
|
|
}
|
|
check.BodyContains = `"database":"ok"`
|
|
check.BodyNotContains = `"database":"ok"`
|
|
if ok, _ := orch.TestHookServiceCheckReady(context.Background(), check); ok {
|
|
t.Fatalf("expected body-not-contains failure")
|
|
}
|
|
|
|
if _, _, err := orch.TestHookHTTPChecklistProbe(context.Background(), config.ServiceChecklistCheck{URL: "://bad-url"}); err == nil {
|
|
t.Fatalf("expected http checklist request-build error")
|
|
}
|
|
if _, _, err := orch.TestHookHTTPChecklistProbe(context.Background(), config.ServiceChecklistCheck{URL: "http://127.0.0.1:1"}); err == nil {
|
|
t.Fatalf("expected http checklist request-failure branch")
|
|
}
|
|
|
|
cfgWait := lifecycleConfig(t)
|
|
cfgWait.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
|
|
{Name: "down", URL: failServer.URL, AcceptedStatuses: []int{200}, TimeoutSeconds: 1},
|
|
}
|
|
cfgWait.Startup.ServiceChecklistWaitSeconds = 1
|
|
cfgWait.Startup.ServiceChecklistPollSeconds = 1
|
|
orchWait, _ := newHookOrchestrator(t, cfgWait, nil, nil)
|
|
cancelCtx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
if err := orchWait.TestHookWaitForServiceChecklist(cancelCtx); !errors.Is(err, context.Canceled) {
|
|
t.Fatalf("expected canceled service-checklist wait, got %v", err)
|
|
}
|
|
|
|
cfgStable := lifecycleConfig(t)
|
|
cfgStable.Startup.ServiceChecklistStabilitySec = 0
|
|
orchStable, _ := newHookOrchestrator(t, cfgStable, nil, nil)
|
|
if err := orchStable.TestHookWaitForStabilityWindow(context.Background()); err != nil {
|
|
t.Fatalf("expected zero-window stability fast-path, got %v", err)
|
|
}
|
|
|
|
runFailPods := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
if name == "kubectl" && strings.Contains(command, "get pods -A -o json") {
|
|
return `{"items":[{"metadata":{"namespace":"vault","name":"vault-0"},"spec":{"nodeName":"titan-23"},"status":{"containerStatuses":[{"state":{"waiting":{"reason":"CrashLoopBackOff"}}}]}}]}`, nil
|
|
}
|
|
return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
|
|
}
|
|
cfgFailStable := lifecycleConfig(t)
|
|
cfgFailStable.Startup.ServiceChecklistStabilitySec = 1
|
|
orchFailStable, _ := newHookOrchestrator(t, cfgFailStable, runFailPods, runFailPods)
|
|
err := orchFailStable.TestHookWaitForStabilityWindow(context.Background())
|
|
if err == nil || !strings.Contains(err.Error(), "startup stability window failed") {
|
|
t.Fatalf("expected stability failure from failing pods, got %v", err)
|
|
}
|
|
|
|
if !cluster.TestHookChecklistContains(`{"A":"B"}`, `"a":"b"`) {
|
|
t.Fatalf("expected case-insensitive checklistContains success")
|
|
}
|
|
if cluster.TestHookIsLikelyHostname("not/a/host") {
|
|
t.Fatalf("expected invalid hostname heuristic to be false")
|
|
}
|
|
})
|
|
}
|