236 lines
9.4 KiB
Go
236 lines
9.4 KiB
Go
package orchestrator
|
|
|
|
import (
|
|
"context"
|
|
"io"
|
|
"log"
|
|
"net"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
|
"scm.bstein.dev/bstein/ananke/internal/execx"
|
|
"scm.bstein.dev/bstein/ananke/internal/state"
|
|
)
|
|
|
|
// TestHookFluxHealthAndStorageBranches runs one orchestration or CLI step.
|
|
// Signature: TestHookFluxHealthAndStorageBranches(t *testing.T).
|
|
// Why: exercises flux-health and storage readiness helpers directly for coverage and behavioral safety.
|
|
func TestHookFluxHealthAndStorageBranches(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.FluxHealthWaitSeconds = 2
|
|
cfg.Startup.FluxHealthPollSeconds = 1
|
|
cfg.Startup.IgnoreFluxKustomizations = []string{"flux-system/skip-me"}
|
|
cfg.Startup.StorageReadyWaitSeconds = 2
|
|
cfg.Startup.StorageReadyPollSeconds = 1
|
|
cfg.Startup.StorageMinReadyNodes = 1
|
|
cfg.Startup.StorageCriticalPVCs = []string{"monitoring/grafana-data"}
|
|
|
|
recorder := &commandRecorder{}
|
|
base := lifecycleDispatcher(recorder)
|
|
fluxCalls := 0
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get kustomizations.kustomize.toolkit.fluxcd.io -A -o json"):
|
|
recorder.record(name, args)
|
|
fluxCalls++
|
|
if fluxCalls <= 1 {
|
|
return `{"items":[{"metadata":{"namespace":"flux-system","name":"services"},"spec":{"suspend":false},"status":{"conditions":[{"type":"Ready","status":"False","reason":"Unknown","message":"waiting"}]}}]}`, nil
|
|
}
|
|
return `{"items":[{"metadata":{"namespace":"flux-system","name":"services"},"spec":{"suspend":false},"status":{"conditions":[{"type":"Ready","status":"True","reason":"ReconciliationSucceeded","message":"ok"}]}}]}`, nil
|
|
case name == "kubectl" && strings.Contains(command, "get jobs -A -o json"):
|
|
recorder.record(name, args)
|
|
return `{"items":[]}`, nil
|
|
case name == "kubectl" && strings.Contains(command, "nodes.longhorn.io"):
|
|
recorder.record(name, args)
|
|
return "lh-a:True:True\n", nil
|
|
case name == "kubectl" && strings.Contains(command, "-n monitoring get pvc grafana-data"):
|
|
recorder.record(name, args)
|
|
return "Bound", nil
|
|
default:
|
|
return base(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch := cluster.New(cfg, &execx.Runner{DryRun: false}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
|
|
orch.SetCommandOverrides(run, run)
|
|
|
|
ok, detail, err := orch.TestHookFluxHealthReady(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("fluxHealthReady error: %v", err)
|
|
}
|
|
if ok {
|
|
t.Fatalf("expected first fluxHealthReady call to be not-ready: %s", detail)
|
|
}
|
|
healed, err := orch.TestHookHealImmutableFluxJobs(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("healImmutableFluxJobs error: %v", err)
|
|
}
|
|
if healed {
|
|
t.Fatalf("expected no immutable job heal action in this fixture")
|
|
}
|
|
if err := orch.TestHookWaitForFluxHealth(context.Background()); err != nil {
|
|
t.Fatalf("waitForFluxHealth: %v", err)
|
|
}
|
|
|
|
ready, reason, readyErr := orch.TestHookStorageReady(context.Background())
|
|
if readyErr != nil {
|
|
t.Fatalf("storageReady error: %v", readyErr)
|
|
}
|
|
if !ready {
|
|
t.Fatalf("expected storage ready, reason=%s", reason)
|
|
}
|
|
if err := orch.TestHookWaitForStorageReady(context.Background()); err != nil {
|
|
t.Fatalf("waitForStorageReady: %v", err)
|
|
}
|
|
}
|
|
|
|
// TestHookTimeSyncAndDatastoreBranches runs one orchestration or CLI step.
|
|
// Signature: TestHookTimeSyncAndDatastoreBranches(t *testing.T).
|
|
// Why: covers time-sync gate and datastore preflight helpers, including parser and TCP helper paths.
|
|
func TestHookTimeSyncAndDatastoreBranches(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.ControlPlanes = []string{"titan-db", "titan-23"}
|
|
cfg.Workers = []string{"titan-24"}
|
|
cfg.SSHManagedNodes = []string{"titan-db", "titan-23", "titan-24"}
|
|
cfg.SSHNodeHosts["titan-23"] = "titan-23"
|
|
cfg.SSHNodeHosts["titan-24"] = "titan-24"
|
|
cfg.Startup.TimeSyncMode = "quorum"
|
|
cfg.Startup.TimeSyncQuorum = 1
|
|
cfg.Startup.TimeSyncWaitSeconds = 2
|
|
cfg.Startup.TimeSyncPollSeconds = 1
|
|
|
|
recorder := &commandRecorder{}
|
|
base := lifecycleDispatcher(recorder)
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "sh" && strings.Contains(command, "timedatectl show -p NTPSynchronized --value"):
|
|
recorder.record(name, args)
|
|
return "yes", nil
|
|
case name == "ssh" && strings.Contains(command, "timedatectl show -p NTPSynchronized --value"):
|
|
recorder.record(name, args)
|
|
if strings.Contains(command, "titan-db") {
|
|
return "yes", nil
|
|
}
|
|
return "no", nil
|
|
case name == "ssh" && strings.Contains(command, "systemctl cat k3s"):
|
|
recorder.record(name, args)
|
|
return "ExecStart=/usr/local/bin/k3s server --datastore-endpoint=postgres://127.0.0.1:6543/k3s", nil
|
|
default:
|
|
return base(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch := cluster.New(cfg, &execx.Runner{DryRun: false}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
|
|
orch.SetCommandOverrides(run, run)
|
|
|
|
if err := orch.TestHookWaitForTimeSync(context.Background(), []string{"titan-db", "titan-23"}); err != nil {
|
|
t.Fatalf("waitForTimeSync: %v", err)
|
|
}
|
|
if got := cluster.TestHookParseDatastoreEndpoint(`ExecStart=/usr/local/bin/k3s server --datastore-endpoint="postgres://db:5432/k3s"`); !strings.Contains(got, "postgres://db:5432/k3s") {
|
|
t.Fatalf("unexpected datastore endpoint parse: %q", got)
|
|
}
|
|
if got := orch.TestHookNodeNameForHost("titan-23"); got != "titan-23" {
|
|
t.Fatalf("unexpected nodeNameForHost direct match: %q", got)
|
|
}
|
|
if err := orch.TestHookValidateNodeInventory(); err != nil {
|
|
t.Fatalf("validateNodeInventory: %v", err)
|
|
}
|
|
|
|
ln, err := net.Listen("tcp", "127.0.0.1:0")
|
|
if err != nil {
|
|
t.Fatalf("listen for tcpReachable test: %v", err)
|
|
}
|
|
addr := ln.Addr().String()
|
|
if !orch.TestHookTCPReachable(addr, time.Second) {
|
|
t.Fatalf("expected tcpReachable=true for listener %s", addr)
|
|
}
|
|
_ = ln.Close()
|
|
if orch.TestHookTCPReachable(addr, 100*time.Millisecond) {
|
|
t.Fatalf("expected tcpReachable=false after listener close")
|
|
}
|
|
}
|
|
|
|
// TestHookChecklistAndStabilityBranches runs one orchestration or CLI step.
|
|
// Signature: TestHookChecklistAndStabilityBranches(t *testing.T).
|
|
// Why: covers checklist helper methods and startup stability window internals.
|
|
func TestHookChecklistAndStabilityBranches(t *testing.T) {
|
|
cfg := lifecycleConfig(t)
|
|
cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
|
|
{
|
|
Name: "grafana",
|
|
URL: "https://metrics.bstein.dev/api/health",
|
|
AcceptedStatuses: []int{200},
|
|
BodyContains: `"database":"ok"`,
|
|
TimeoutSeconds: 5,
|
|
},
|
|
}
|
|
cfg.Startup.ServiceChecklistWaitSeconds = 1
|
|
cfg.Startup.ServiceChecklistPollSeconds = 1
|
|
cfg.Startup.ServiceChecklistStabilitySec = 1
|
|
cfg.Startup.RequireWorkloadConvergence = false
|
|
|
|
recorder := &commandRecorder{}
|
|
base := lifecycleDispatcher(recorder)
|
|
serviceCalls := 0
|
|
run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
|
|
command := name + " " + strings.Join(args, " ")
|
|
switch {
|
|
case name == "kubectl" && strings.Contains(command, "get pods -A -o json"):
|
|
recorder.record(name, args)
|
|
return `{"items":[]}`, nil
|
|
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset -A -o json"):
|
|
recorder.record(name, args)
|
|
return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1,"template":{"spec":{}}},"status":{"readyReplicas":1}}]}`, nil
|
|
case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
|
|
recorder.record(name, args)
|
|
return "monitoring\tgrafana\t1\n", nil
|
|
case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
|
|
recorder.record(name, args)
|
|
return "", nil
|
|
case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"):
|
|
recorder.record(name, args)
|
|
return `{"items":[]}`, nil
|
|
case name == "kubectl" && strings.Contains(command, "nodes.longhorn.io"):
|
|
recorder.record(name, args)
|
|
return "lh-a:True:True\n", nil
|
|
case name == "curl":
|
|
recorder.record(name, args)
|
|
serviceCalls++
|
|
if serviceCalls == 1 {
|
|
return "503", nil
|
|
}
|
|
return "200", nil
|
|
default:
|
|
return base(ctx, timeout, name, args...)
|
|
}
|
|
}
|
|
orch := cluster.New(cfg, &execx.Runner{DryRun: false}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
|
|
orch.SetCommandOverrides(run, run)
|
|
|
|
ok, detail := orch.TestHookPostStartProbesReady(context.Background())
|
|
if !ok || !strings.Contains(detail, "no probes configured") {
|
|
t.Fatalf("expected no-probes ready branch, got ok=%v detail=%q", ok, detail)
|
|
}
|
|
code, err := orch.TestHookHTTPProbe(context.Background(), "https://example.invalid")
|
|
if err != nil {
|
|
t.Fatalf("unexpected HTTP probe error with recorder override: %v", err)
|
|
}
|
|
if code != 503 {
|
|
t.Fatalf("expected first synthetic HTTP probe code=503, got %d", code)
|
|
}
|
|
|
|
// Direct checklist readiness path should always return a non-empty status detail.
|
|
_, checkDetail := orch.TestHookServiceChecklistReady(context.Background())
|
|
if checkDetail == "" {
|
|
t.Fatalf("expected service checklist detail to be populated")
|
|
}
|
|
|
|
// Force stability helper path through synthetic kubectl outputs.
|
|
if err := orch.TestHookWaitForStabilityWindow(context.Background()); err != nil && !strings.Contains(err.Error(), "stability") {
|
|
t.Fatalf("unexpected stability window error: %v", err)
|
|
}
|
|
}
|