ananke/internal/cluster/orchestrator_test.go

462 lines
17 KiB
Go

package cluster
import (
"context"
"log"
"net/http"
"net/http/httptest"
"os"
"reflect"
"strings"
"testing"
"time"
"scm.bstein.dev/bstein/ananke/internal/config"
"scm.bstein.dev/bstein/ananke/internal/state"
)
// TestParseVaultSealed runs one orchestration or CLI step.
// Signature: TestParseVaultSealed(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestParseVaultSealed(t *testing.T) {
sealed, err := parseVaultSealed(`{"initialized":true,"sealed":true}`)
if err != nil {
t.Fatalf("parse sealed=true: %v", err)
}
if !sealed {
t.Fatalf("expected sealed=true")
}
sealed, err = parseVaultSealed(`{"initialized":true,"sealed":false}`)
if err != nil {
t.Fatalf("parse sealed=false: %v", err)
}
if sealed {
t.Fatalf("expected sealed=false")
}
}
// TestParseVaultSealedRejectsEmpty runs one orchestration or CLI step.
// Signature: TestParseVaultSealedRejectsEmpty(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestParseVaultSealedRejectsEmpty(t *testing.T) {
if _, err := parseVaultSealed(" "); err == nil {
t.Fatalf("expected parse error for empty status payload")
}
}
// TestParseVaultSealedWithKubectlPreamble runs one orchestration or CLI step.
// Signature: TestParseVaultSealedWithKubectlPreamble(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestParseVaultSealedWithKubectlPreamble(t *testing.T) {
raw := "Defaulted container \"vault\" out of: vault, setup-config (init)\n{\"sealed\":true,\"initialized\":true}\n"
sealed, err := parseVaultSealed(raw)
if err != nil {
t.Fatalf("parse with preamble: %v", err)
}
if !sealed {
t.Fatalf("expected sealed=true from payload with preamble")
}
}
// TestFallbackWorkersFromInventoryUsesManagedNodes runs one orchestration or CLI step.
// Signature: TestFallbackWorkersFromInventoryUsesManagedNodes(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestFallbackWorkersFromInventoryUsesManagedNodes(t *testing.T) {
orch := &Orchestrator{
cfg: config.Config{
ControlPlanes: []string{"titan-0a", "titan-0b", "titan-0c"},
SSHManagedNodes: []string{
"titan-db",
"titan-0a",
"titan-15",
"titan-17",
},
},
log: log.New(os.Stdout, "", 0),
}
got := orch.fallbackWorkersFromInventory()
want := []string{"titan-15", "titan-17", "titan-db"}
if !reflect.DeepEqual(got, want) {
t.Fatalf("fallback workers mismatch: got=%v want=%v", got, want)
}
}
// TestFallbackWorkersFromInventoryFallsBackToHosts runs one orchestration or CLI step.
// Signature: TestFallbackWorkersFromInventoryFallsBackToHosts(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestFallbackWorkersFromInventoryFallsBackToHosts(t *testing.T) {
orch := &Orchestrator{
cfg: config.Config{
ControlPlanes: []string{"titan-0a", "titan-0b", "titan-0c"},
SSHNodeHosts: map[string]string{
"titan-0a": "192.168.22.11",
"titan-22": "192.168.22.22",
"titan-24": "192.168.22.26",
},
},
log: log.New(os.Stdout, "", 0),
}
got := orch.fallbackWorkersFromInventory()
want := []string{"titan-22", "titan-24"}
if !reflect.DeepEqual(got, want) {
t.Fatalf("fallback workers mismatch: got=%v want=%v", got, want)
}
}
// TestIntentFreshTreatsZeroTimestampAsFresh runs one orchestration or CLI step.
// Signature: TestIntentFreshTreatsZeroTimestampAsFresh(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestIntentFreshTreatsZeroTimestampAsFresh(t *testing.T) {
if !intentFresh(state.Intent{}, 30*time.Second) {
t.Fatalf("zero updated_at intent should be treated as fresh")
}
}
// TestIntentFreshRespectsAge runs one orchestration or CLI step.
// Signature: TestIntentFreshRespectsAge(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestIntentFreshRespectsAge(t *testing.T) {
stale := state.Intent{UpdatedAt: time.Now().Add(-2 * time.Minute)}
fresh := state.Intent{UpdatedAt: time.Now().Add(-20 * time.Second)}
if intentFresh(stale, 30*time.Second) {
t.Fatalf("expected stale intent to be considered not fresh")
}
if !intentFresh(fresh, 30*time.Second) {
t.Fatalf("expected recent intent to be considered fresh")
}
}
// TestCoordinationPeersDedupesAndIncludesForwardHost runs one orchestration or CLI step.
// Signature: TestCoordinationPeersDedupesAndIncludesForwardHost(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestCoordinationPeersDedupesAndIncludesForwardHost(t *testing.T) {
orch := &Orchestrator{
cfg: config.Config{
Coordination: config.Coordination{
PeerHosts: []string{"titan-24", "titan-db", "titan-24", " "},
ForwardShutdownHost: "titan-db",
},
},
}
got := orch.coordinationPeers()
want := []string{"titan-24", "titan-db"}
if !reflect.DeepEqual(got, want) {
t.Fatalf("coordination peers mismatch: got=%v want=%v", got, want)
}
}
// TestWorkloadTargetsIgnoredNodesByNodeSelector runs one orchestration or CLI step.
// Signature: TestWorkloadTargetsIgnoredNodesByNodeSelector(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestWorkloadTargetsIgnoredNodesByNodeSelector(t *testing.T) {
spec := podSpec{
NodeSelector: map[string]string{
"kubernetes.io/hostname": "titan-22",
},
}
ignored := map[string]struct{}{"titan-22": {}}
if !workloadTargetsIgnoredNodes(spec, ignored) {
t.Fatalf("expected workload to target ignored node via nodeSelector")
}
}
// TestParseWorkloadIgnoreRules runs one orchestration or CLI step.
// Signature: TestParseWorkloadIgnoreRules(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestParseWorkloadIgnoreRules(t *testing.T) {
rules := parseWorkloadIgnoreRules([]string{
"maintenance/metis",
"crypto/statefulset/monerod",
})
if len(rules) != 2 {
t.Fatalf("expected 2 ignore rules, got %d", len(rules))
}
if !workloadIgnored(rules, "maintenance", "deployment", "metis") {
t.Fatalf("expected namespace/name rule to match")
}
if !workloadIgnored(rules, "crypto", "statefulset", "monerod") {
t.Fatalf("expected namespace/kind/name rule to match")
}
if workloadIgnored(rules, "crypto", "deployment", "monerod") {
t.Fatalf("did not expect mismatched kind to match")
}
}
// TestNamespaceCandidatesFromIgnoreKustomizations runs one orchestration or CLI step.
// Signature: TestNamespaceCandidatesFromIgnoreKustomizations(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestNamespaceCandidatesFromIgnoreKustomizations(t *testing.T) {
got := namespaceCandidatesFromIgnoreKustomizations([]string{
"flux-system/jellyfin",
"flux-system/outline",
})
if _, ok := got["jellyfin"]; !ok {
t.Fatalf("expected jellyfin namespace candidate")
}
if _, ok := got["outline"]; !ok {
t.Fatalf("expected outline namespace candidate")
}
}
// TestProbeStatusAcceptedRejects404 runs one orchestration or CLI step.
// Signature: TestProbeStatusAcceptedRejects404(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestProbeStatusAcceptedRejects404(t *testing.T) {
if probeStatusAccepted("https://metrics.bstein.dev/login", 404) {
t.Fatalf("expected 404 probe status to be rejected")
}
}
// TestParseFluxKustomizationTimeout runs one orchestration or CLI step.
// Signature: TestParseFluxKustomizationTimeout(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestParseFluxKustomizationTimeout(t *testing.T) {
if got := parseFluxKustomizationTimeout("30m"); got != 30*time.Minute {
t.Fatalf("expected 30m duration, got %s", got)
}
if got := parseFluxKustomizationTimeout("5m30s"); got != 5*time.Minute+30*time.Second {
t.Fatalf("expected 5m30s duration, got %s", got)
}
if got := parseFluxKustomizationTimeout(""); got != 0 {
t.Fatalf("expected zero duration for empty timeout, got %s", got)
}
if got := parseFluxKustomizationTimeout("not-a-duration"); got != 0 {
t.Fatalf("expected zero duration for invalid timeout, got %s", got)
}
}
// TestServiceCheckReadyRequiresBodyContains runs one orchestration or CLI step.
// Signature: TestServiceCheckReadyRequiresBodyContains(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestServiceCheckReadyRequiresBodyContains(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte(`{"database":"ok"}`))
}))
defer srv.Close()
orch := &Orchestrator{
log: log.New(os.Stdout, "", 0),
}
ok, detail := orch.serviceCheckReady(context.Background(), config.ServiceChecklistCheck{
Name: "grafana-api",
URL: srv.URL,
AcceptedStatuses: []int{200},
BodyContains: `"database":"ok"`,
TimeoutSeconds: 5,
})
if !ok {
t.Fatalf("expected service check to pass, detail=%s", detail)
}
}
// TestServiceCheckReadyBodyContainsIgnoresWhitespace runs one orchestration or CLI step.
// Signature: TestServiceCheckReadyBodyContainsIgnoresWhitespace(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestServiceCheckReadyBodyContainsIgnoresWhitespace(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("{\n \"database\": \"ok\"\n}\n"))
}))
defer srv.Close()
orch := &Orchestrator{
log: log.New(os.Stdout, "", 0),
}
ok, detail := orch.serviceCheckReady(context.Background(), config.ServiceChecklistCheck{
Name: "grafana-api",
URL: srv.URL,
AcceptedStatuses: []int{200},
BodyContains: `"database":"ok"`,
TimeoutSeconds: 5,
})
if !ok {
t.Fatalf("expected whitespace-tolerant service check to pass, detail=%s", detail)
}
}
// TestServiceCheckReadyRequiresLocationContains runs one orchestration or CLI step.
// Signature: TestServiceCheckReadyRequiresLocationContains(t *testing.T).
// Why: startup checks must validate redirect targets for OIDC-gated services.
func TestServiceCheckReadyRequiresLocationContains(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Location", "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/auth?client_id=logs")
w.WriteHeader(http.StatusFound)
}))
defer srv.Close()
orch := &Orchestrator{
log: log.New(os.Stdout, "", 0),
}
ok, detail := orch.serviceCheckReady(context.Background(), config.ServiceChecklistCheck{
Name: "logging-oidc-redirect",
URL: srv.URL,
AcceptedStatuses: []int{302},
LocationContains: "client_id=logs",
TimeoutSeconds: 5,
})
if !ok {
t.Fatalf("expected location-aware service check to pass, detail=%s", detail)
}
}
// TestServiceCheckReadyRejectsMissingLocationMarker runs one orchestration or CLI step.
// Signature: TestServiceCheckReadyRejectsMissingLocationMarker(t *testing.T).
// Why: prevents false positives when redirects point somewhere unexpected.
func TestServiceCheckReadyRejectsMissingLocationMarker(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Location", "https://sso.bstein.dev/realms/atlas/protocol/openid-connect/auth?client_id=wrong")
w.WriteHeader(http.StatusFound)
}))
defer srv.Close()
orch := &Orchestrator{
log: log.New(os.Stdout, "", 0),
}
ok, detail := orch.serviceCheckReady(context.Background(), config.ServiceChecklistCheck{
Name: "logging-oidc-redirect",
URL: srv.URL,
AcceptedStatuses: []int{302},
LocationContains: "client_id=logs",
TimeoutSeconds: 5,
})
if ok {
t.Fatalf("expected location-aware service check to fail")
}
if !strings.Contains(detail, "location header missing expected marker") {
t.Fatalf("expected missing location marker detail, got %q", detail)
}
}
// TestServiceCheckReadyRequiresFinalURLContains runs one orchestration or CLI step.
// Signature: TestServiceCheckReadyRequiresFinalURLContains(t *testing.T).
// Why: authenticated user-journey checks depend on final URL assertions after
// redirects complete, not only on initial response status.
func TestServiceCheckReadyRequiresFinalURLContains(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/" {
http.Redirect(w, r, "/app/home", http.StatusFound)
return
}
if r.URL.Path == "/app/home" {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("OpenSearch Dashboards"))
return
}
w.WriteHeader(http.StatusNotFound)
}))
defer srv.Close()
orch := &Orchestrator{
log: log.New(os.Stdout, "", 0),
}
ok, detail := orch.serviceCheckReady(context.Background(), config.ServiceChecklistCheck{
Name: "logging-ui-user-session",
URL: srv.URL,
AcceptedStatuses: []int{200},
FollowRedirects: true,
FinalURLContains: "/app/home",
BodyContains: "OpenSearch Dashboards",
TimeoutSeconds: 5,
})
if !ok {
t.Fatalf("expected final-url-aware service check to pass, detail=%s", detail)
}
}
// TestServiceCheckReadyRejectsForbiddenFinalURLMarker runs one orchestration or CLI step.
// Signature: TestServiceCheckReadyRejectsForbiddenFinalURLMarker(t *testing.T).
// Why: user-session checks should fail when final URL indicates auth/login loop
// instead of the expected post-login app route.
func TestServiceCheckReadyRejectsForbiddenFinalURLMarker(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/" {
http.Redirect(w, r, "/oauth2/sign_in", http.StatusFound)
return
}
if r.URL.Path == "/oauth2/sign_in" {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("sign in"))
return
}
w.WriteHeader(http.StatusNotFound)
}))
defer srv.Close()
orch := &Orchestrator{
log: log.New(os.Stdout, "", 0),
}
ok, detail := orch.serviceCheckReady(context.Background(), config.ServiceChecklistCheck{
Name: "logging-ui-user-session",
URL: srv.URL,
AcceptedStatuses: []int{200},
FollowRedirects: true,
FinalURLNotContains: "/oauth2/sign_in",
TimeoutSeconds: 5,
})
if ok {
t.Fatalf("expected forbidden final-url marker check to fail")
}
if !strings.Contains(detail, "final url contained forbidden marker") {
t.Fatalf("expected final-url forbidden marker detail, got %q", detail)
}
}
// TestChecklistFailureHostFromIngressDetail runs one orchestration or CLI step.
// Signature: TestChecklistFailureHostFromIngressDetail(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestChecklistFailureHostFromIngressDetail(t *testing.T) {
orch := &Orchestrator{}
got := orch.checklistFailureHost("cloud.bstein.dev: unexpected status code=500")
if got != "cloud.bstein.dev" {
t.Fatalf("expected host cloud.bstein.dev, got %q", got)
}
}
// TestChecklistFailureHostFromServiceCheckName runs one orchestration or CLI step.
// Signature: TestChecklistFailureHostFromServiceCheckName(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestChecklistFailureHostFromServiceCheckName(t *testing.T) {
orch := &Orchestrator{
cfg: config.Config{
Startup: config.Startup{
ServiceChecklist: []config.ServiceChecklistCheck{
{
Name: "harbor-registry",
URL: "https://registry.bstein.dev/v2/",
},
},
},
},
}
got := orch.checklistFailureHost("harbor-registry: unexpected status code=404")
if got != "registry.bstein.dev" {
t.Fatalf("expected host registry.bstein.dev, got %q", got)
}
}
// TestChecklistFailureHostUnknown runs one orchestration or CLI step.
// Signature: TestChecklistFailureHostUnknown(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestChecklistFailureHostUnknown(t *testing.T) {
orch := &Orchestrator{
cfg: config.Config{
Startup: config.Startup{
ServiceChecklist: []config.ServiceChecklistCheck{
{
Name: "grafana-api",
URL: "https://metrics.bstein.dev/api/health",
},
},
},
},
}
if got := orch.checklistFailureHost("grafana-api: tcp timeout"); got != "metrics.bstein.dev" {
t.Fatalf("expected metrics host from configured URL, got %q", got)
}
if got := orch.checklistFailureHost("some-unmapped-check: fail"); got != "" {
t.Fatalf("expected empty host for unknown check, got %q", got)
}
}