ananke/testing/orchestrator/hooks_wrappers_coverage_test.go

298 lines
14 KiB
Go

package orchestrator
import (
"context"
"io"
"log"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"scm.bstein.dev/bstein/ananke/internal/cluster"
"scm.bstein.dev/bstein/ananke/internal/config"
"scm.bstein.dev/bstein/ananke/internal/execx"
"scm.bstein.dev/bstein/ananke/internal/state"
)
// wrapperCoverageDispatcher runs one orchestration or CLI step.
// Signature: wrapperCoverageDispatcher(recorder *commandRecorder) func(context.Context, time.Duration, string, ...string) (string, error).
// Why: centralizes deterministic command output so hook-wrapper tests can execute all exported test hooks without live cluster access.
func wrapperCoverageDispatcher(recorder *commandRecorder) func(context.Context, time.Duration, string, ...string) (string, error) {
base := lifecycleDispatcher(recorder)
return func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
command := name + " " + strings.Join(args, " ")
switch {
case name == "curl":
recorder.record(name, args)
return "200", nil
case name == "ssh" && strings.Contains(command, "k3s etcd-snapshot ls"):
recorder.record(name, args)
return "Name Size Created Location\npre-shutdown 4.2M now \"file:///var/lib/rancher/k3s/server/db/snapshots/pre-shutdown\"\n", nil
case name == "ssh" && strings.Contains(command, "k3s etcd-snapshot save"):
recorder.record(name, args)
return "snapshot saved", nil
case name == "kubectl" && strings.Contains(command, "get ingress -A -o json"):
recorder.record(name, args)
return `{"items":[{"metadata":{"namespace":"monitoring"},"spec":{"rules":[{"host":"metrics.bstein.dev"}]}}]}`, nil
case name == "kubectl" && strings.Contains(command, "get endpoints victoria-metrics-single-server"):
recorder.record(name, args)
return "10.42.0.10\n", nil
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset,daemonset -A -o json"):
recorder.record(name, args)
return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":1,"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"titan-23"}}}},"status":{"readyReplicas":1}},{"kind":"StatefulSet","metadata":{"namespace":"monitoring","name":"victoria-metrics-single-server"},"spec":{"replicas":1,"template":{"spec":{}}},"status":{"readyReplicas":1}},{"kind":"DaemonSet","metadata":{"namespace":"monitoring","name":"node-exporter"},"spec":{"template":{"spec":{}}},"status":{"desiredNumberScheduled":1,"numberReady":1}}]}`, nil
case name == "kubectl" && strings.Contains(command, "get deploy,statefulset -A -o json"):
recorder.record(name, args)
return `{"items":[{"kind":"Deployment","metadata":{"namespace":"monitoring","name":"grafana"},"spec":{"replicas":0,"template":{"spec":{}}},"status":{"readyReplicas":0}},{"kind":"StatefulSet","metadata":{"namespace":"monitoring","name":"victoria-metrics-single-server"},"spec":{"replicas":1,"template":{"spec":{}}},"status":{"readyReplicas":1}}]}`, nil
case name == "kubectl" && strings.Contains(command, "get pods -A -o json"):
recorder.record(name, args)
return `{"items":[{"metadata":{"namespace":"vault","name":"vault-0","creationTimestamp":"2020-01-01T00:00:00Z","ownerReferences":[{"kind":"StatefulSet","name":"vault"}]},"spec":{"nodeName":"titan-23","containers":[{"name":"vault"}]},"status":{"phase":"Pending","initContainerStatuses":[{"name":"vault-agent-init","state":{"running":{"startedAt":"2020-01-01T00:00:00Z"}}}]}}]}`, nil
case name == "kubectl" && strings.Contains(command, "get deployment -A -o jsonpath="):
recorder.record(name, args)
return "monitoring\tgrafana\t1\nflux-system\tsource-controller\t1\ngitea\tgitea\t1\n", nil
case name == "kubectl" && strings.Contains(command, "get statefulset -A -o jsonpath="):
recorder.record(name, args)
return "monitoring\tvictoria-metrics-single-server\t1\nvault\tvault\t1\npostgres\tpostgres\t1\n", nil
case name == "kubectl" && strings.Contains(command, "get nodes -o custom-columns="):
recorder.record(name, args)
return "titan-23 <none> <none>\n", nil
case name == "kubectl" && strings.Contains(command, "-n flux-system get kustomizations.kustomize.toolkit.fluxcd.io -o jsonpath="):
recorder.record(name, args)
return "services\n", nil
case name == "kubectl" && strings.Contains(command, "get helmreleases.helm.toolkit.fluxcd.io -A -o jsonpath="):
recorder.record(name, args)
return "monitoring/grafana\n", nil
case name == "kubectl" && strings.Contains(command, "get statefulset vault -o jsonpath={.status.readyReplicas}"):
recorder.record(name, args)
return "1", nil
case name == "kubectl" && strings.Contains(command, "jsonpath={.status.readyReplicas}"):
recorder.record(name, args)
return "1", nil
case name == "kubectl" && strings.Contains(command, "rollout status"):
recorder.record(name, args)
return "rolled out", nil
case name == "kubectl" && strings.Contains(command, "scale "):
recorder.record(name, args)
return "", nil
case name == "kubectl" && strings.Contains(command, "delete pod"):
recorder.record(name, args)
return "", nil
case name == "kubectl" && strings.Contains(command, "patch "):
recorder.record(name, args)
return "", nil
default:
return base(ctx, timeout, name, args...)
}
}
}
// TestHookLifecycleWrappersCoverage runs one orchestration or CLI step.
// Signature: TestHookLifecycleWrappersCoverage(t *testing.T).
// Why: executes lifecycle hook wrappers so test-hook files and their orchestration backends stay covered from the top-level testing module.
func TestHookLifecycleWrappersCoverage(t *testing.T) {
cfg := lifecycleConfig(t)
cfg.Startup.PostStartProbes = []string{"https://metrics.bstein.dev/healthz"}
cfg.Startup.PostStartProbeWaitSeconds = 1
cfg.Startup.PostStartProbePollSeconds = 1
cfg.Startup.RequireIngressChecklist = false
cfg.Startup.RequireServiceChecklist = false
cfg.Startup.RequireCriticalServiceEndpoints = false
cfg.Startup.RequireFluxHealth = false
cfg.Startup.RequireWorkloadConvergence = false
cfg.Startup.ServiceChecklistStabilitySec = 1
cfg.Startup.IngressChecklistWaitSeconds = 1
cfg.Startup.IngressChecklistPollSeconds = 1
cfg.Startup.ServiceChecklistWaitSeconds = 1
cfg.Startup.ServiceChecklistPollSeconds = 1
cfg.Startup.CriticalServiceEndpoints = []string{"monitoring/victoria-metrics-single-server"}
cfg.Startup.CriticalServiceEndpointWaitSec = 1
cfg.Startup.CriticalServiceEndpointPollSec = 1
svc := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte(`{"database":"ok"}`))
}))
defer svc.Close()
cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
{
Name: "grafana",
URL: svc.URL,
AcceptedStatuses: []int{200},
BodyContains: `"database":"ok"`,
TimeoutSeconds: 2,
},
}
recorder := &commandRecorder{}
dispatch := wrapperCoverageDispatcher(recorder)
orch := cluster.New(cfg, &execx.Runner{DryRun: false}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
orch.SetCommandOverrides(dispatch, dispatch)
ctx := context.Background()
if err := orch.TestHookWaitForPostStartProbes(ctx); err != nil {
t.Fatalf("wait for post-start probes: %v", err)
}
if _, _ = orch.TestHookPostStartProbesReady(ctx); true {
}
if _, err := orch.TestHookHTTPProbe(ctx, "https://metrics.bstein.dev/healthz"); err != nil {
t.Fatalf("http probe: %v", err)
}
if err := orch.TestHookResumeFluxAndReconcile(ctx); err != nil {
t.Fatalf("resume flux and reconcile: %v", err)
}
if _, err := orch.TestHookSSHWithTimeout(ctx, "titan-db", "echo ok", time.Second); err != nil {
t.Fatalf("ssh with timeout: %v", err)
}
if _, err := orch.TestHookRunSensitive(ctx, time.Second, "kubectl", "version", "--request-timeout=5s"); err != nil {
t.Fatalf("runSensitive wrapper: %v", err)
}
if err := orch.TestHookWaitForStartupConvergence(ctx); err != nil {
t.Fatalf("wait for startup convergence: %v", err)
}
if err := orch.TestHookWaitForServiceChecklist(ctx); err != nil {
t.Fatalf("wait for service checklist: %v", err)
}
if ok, detail := orch.TestHookServiceChecklistReady(ctx); !ok || detail == "" {
t.Fatalf("service checklist ready result unexpected: ok=%v detail=%q", ok, detail)
}
if ok, detail := orch.TestHookServiceCheckReady(ctx, cfg.Startup.ServiceChecklist[0]); !ok || detail == "" {
t.Fatalf("service check ready result unexpected: ok=%v detail=%q", ok, detail)
}
if status, body, err := orch.TestHookHTTPChecklistProbe(ctx, cfg.Startup.ServiceChecklist[0]); err != nil || status != 200 || body == "" {
t.Fatalf("http checklist probe unexpected result status=%d body=%q err=%v", status, body, err)
}
if err := orch.TestHookWaitForStabilityWindow(ctx); err != nil {
t.Fatalf("wait for stability window: %v", err)
}
if err := orch.TestHookStartupStabilityHealthy(ctx); err != nil {
t.Fatalf("startup stability healthy: %v", err)
}
cancelCtx, cancel := context.WithCancel(ctx)
cancel()
_ = orch.TestHookWaitForIngressChecklist(cancelCtx)
if _, _ = orch.TestHookIngressChecklistReady(ctx); true {
}
if hosts, err := orch.TestHookDiscoverIngressHosts(ctx); err != nil || len(hosts) == 0 {
t.Fatalf("discover ingress hosts failed: hosts=%v err=%v", hosts, err)
}
if namespaces, err := orch.TestHookDiscoverIngressNamespacesForHost(ctx, "metrics.bstein.dev"); err != nil || len(namespaces) == 0 {
t.Fatalf("discover ingress namespaces failed: ns=%v err=%v", namespaces, err)
}
last := time.Time{}
orch.TestHookMaybeAutoHealIngressHostBackends(ctx, &last, "metrics.bstein.dev: status=503")
if got := orch.TestHookChecklistFailureHost("metrics.bstein.dev: status=503"); got != "metrics.bstein.dev" {
t.Fatalf("unexpected checklistFailureHost parse: %q", got)
}
if got := cluster.TestHookHostFromURL("https://metrics.bstein.dev/api/health"); got != "metrics.bstein.dev" {
t.Fatalf("unexpected host parse: %q", got)
}
}
// TestHookOpsAndWorkloadWrappersCoverage runs one orchestration or CLI step.
// Signature: TestHookOpsAndWorkloadWrappersCoverage(t *testing.T).
// Why: executes ops/workload wrappers so all exposed hook files remain covered and regression-safe.
func TestHookOpsAndWorkloadWrappersCoverage(t *testing.T) {
cfg := lifecycleConfig(t)
cfg.Startup.WorkloadConvergenceWaitSeconds = 1
cfg.Startup.WorkloadConvergencePollSeconds = 1
cfg.Startup.CriticalServiceEndpointWaitSec = 1
cfg.Startup.CriticalServiceEndpointPollSec = 1
cfg.Startup.AutoRecycleStuckPods = true
cfg.Startup.StuckPodGraceSeconds = 1
svc := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte(`{"status":"ok"}`))
}))
defer svc.Close()
cfg.Startup.ServiceChecklist = []config.ServiceChecklistCheck{
{Name: "grafana", URL: svc.URL, AcceptedStatuses: []int{200}, BodyContains: `"status":"ok"`, TimeoutSeconds: 2},
}
cfg.Startup.ServiceChecklistWaitSeconds = 1
cfg.Startup.ServiceChecklistPollSeconds = 1
recorder := &commandRecorder{}
dispatch := wrapperCoverageDispatcher(recorder)
orch := cluster.New(cfg, &execx.Runner{DryRun: false}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
orch.SetCommandOverrides(dispatch, dispatch)
ctx := context.Background()
if err := orch.TestHookDrainWorkers(ctx, []string{"titan-23"}); err != nil {
t.Fatalf("drain workers: %v", err)
}
if err := orch.TestHookUncordonWorkers(ctx, []string{"titan-23"}); err != nil {
t.Fatalf("uncordon workers: %v", err)
}
if err := orch.TestHookTakeEtcdSnapshot(ctx, "titan-db"); err != nil {
t.Fatalf("take etcd snapshot: %v", err)
}
if snapshot, err := orch.TestHookLatestEtcdSnapshotPath(ctx, "titan-db"); err != nil || snapshot == "" {
t.Fatalf("latest snapshot path failed: snapshot=%q err=%v", snapshot, err)
}
if workers, err := orch.TestHookEffectiveWorkers(ctx); err != nil || len(workers) == 0 {
t.Fatalf("effective workers failed: workers=%v err=%v", workers, err)
}
if workers, err := orch.TestHookDiscoverWorkers(ctx); err != nil || len(workers) == 0 {
t.Fatalf("discover workers failed: workers=%v err=%v", workers, err)
}
if err := orch.TestHookPatchFluxSuspendAll(ctx, true); err != nil {
t.Fatalf("patch flux suspend: %v", err)
}
if entries, err := orch.TestHookListScalableWorkloads(ctx); err != nil {
t.Fatalf("list scalable workloads: %v", err)
} else {
if err := orch.TestHookScaleWorkloads(ctx, entries, 0, 1); err != nil {
t.Fatalf("scale workloads forced: %v", err)
}
if err := orch.TestHookWriteScaledWorkloadSnapshot(entries); err != nil {
t.Fatalf("write scaled workload snapshot: %v", err)
}
if _, err := orch.TestHookReadScaledWorkloadSnapshot(); err != nil {
t.Fatalf("read scaled workload snapshot: %v", err)
}
}
if err := orch.TestHookScaleDownApps(ctx); err != nil {
t.Fatalf("scale down apps: %v", err)
}
if err := orch.TestHookRestoreScaledApps(ctx); err != nil {
t.Fatalf("restore scaled apps: %v", err)
}
if err := orch.TestHookWaitForCriticalServiceEndpoints(ctx); err != nil {
t.Fatalf("wait for critical service endpoints: %v", err)
}
if ok, _, _, _, err := orch.TestHookCriticalServiceEndpointsReady(ctx); err != nil || !ok {
t.Fatalf("critical endpoints ready failed: ok=%v err=%v", ok, err)
}
if err := orch.TestHookWaitForWorkloadConvergence(ctx); err != nil {
t.Fatalf("wait for workload convergence: %v", err)
}
if ok, detail, err := orch.TestHookWorkloadConvergenceReady(ctx); err != nil || !ok || detail == "" {
t.Fatalf("workload convergence ready failed: ok=%v detail=%q err=%v", ok, detail, err)
}
if err := orch.TestHookRecycleStuckControllerPods(ctx); err != nil {
t.Fatalf("recycle stuck controller pods: %v", err)
}
lastRecycle := time.Time{}
orch.TestHookMaybeAutoRecycleStuckPods(ctx, &lastRecycle)
lastHeal := time.Time{}
orch.TestHookMaybeAutoHealCriticalWorkloadReplicas(ctx, &lastHeal)
if _, err := orch.TestHookHealCriticalWorkloadReplicas(ctx); err != nil {
t.Fatalf("heal critical workload replicas: %v", err)
}
if _, err := orch.TestHookStartupFailurePods(ctx); err != nil {
t.Fatalf("startup failure pods: %v", err)
}
if err := orch.TestHookEnsureCriticalStartupWorkloads(ctx); err != nil {
t.Fatalf("ensure critical startup workloads: %v", err)
}
if _, err := orch.TestHookMissingCriticalStartupWorkloads(ctx); err != nil {
t.Fatalf("missing critical startup workloads: %v", err)
}
if err := orch.TestHookWaitForServiceChecklistAlias(ctx); err != nil {
t.Fatalf("wait for service checklist alias: %v", err)
}
}