ananke/cmd/ananke/command_handlers_injection_test.go

488 lines
19 KiB
Go

package main
import (
"context"
"errors"
"io"
"log"
"os"
"path/filepath"
"strings"
"testing"
"time"
"scm.bstein.dev/bstein/ananke/internal/cluster"
"scm.bstein.dev/bstein/ananke/internal/config"
"scm.bstein.dev/bstein/ananke/internal/execx"
"scm.bstein.dev/bstein/ananke/internal/service"
"scm.bstein.dev/bstein/ananke/internal/state"
)
// TestRunStartupDispatchWithInjectedOrchestrator runs one orchestration or CLI step.
// Signature: TestRunStartupDispatchWithInjectedOrchestrator(t *testing.T).
// Why: covers startup command wiring and option propagation without invoking a real cluster flow.
func TestRunStartupDispatchWithInjectedOrchestrator(t *testing.T) {
restore := stubCommandHandlerHooks()
defer restore()
var captured cluster.StartupOptions
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
cfg := minimalHandlerConfig(t)
cfg.Coordination.Role = "coordinator"
cfg.UPS.Enabled = false
orch := newTestOrchestrator(cfg, dryRun)
return cfg, orch, nil
}
startupOrchestratorCommand = func(_ context.Context, _ *cluster.Orchestrator, opts cluster.StartupOptions) error {
captured = opts
return nil
}
logger := log.New(io.Discard, "", 0)
err := runStartup(logger, []string{
"--config", "/ignored.yaml",
"--execute",
"--force-flux-branch", "main",
"--skip-local-bootstrap",
"--reason", "drill-startup",
})
if err != nil {
t.Fatalf("runStartup failed: %v", err)
}
if captured.ForceFluxBranch != "main" || !captured.SkipLocalBootstrap || captured.Reason != "drill-startup" {
t.Fatalf("unexpected startup options: %+v", captured)
}
}
// TestRunStartupPeerRoleGuards runs one orchestration or CLI step.
// Signature: TestRunStartupPeerRoleGuards(t *testing.T).
// Why: covers peer-role guard, handoff short-circuit, and coordinator guard disallow paths.
func TestRunStartupPeerRoleGuards(t *testing.T) {
restore := stubCommandHandlerHooks()
defer restore()
logger := log.New(io.Discard, "", 0)
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
cfg := minimalHandlerConfig(t)
cfg.Coordination.Role = "peer"
cfg.UPS.Enabled = false
return cfg, newTestOrchestrator(cfg, dryRun), nil
}
if err := runStartup(logger, []string{"--config", "/ignored.yaml", "--execute"}); err == nil {
t.Fatalf("expected peer-role block without override")
}
tryPeerBootstrapHandoffCommand = func(_ context.Context, _ config.Config, _ *log.Logger) (bool, error) {
return true, nil
}
calledStartup := false
startupOrchestratorCommand = func(_ context.Context, _ *cluster.Orchestrator, _ cluster.StartupOptions) error {
calledStartup = true
return nil
}
if err := runStartup(logger, []string{"--config", "/ignored.yaml", "--execute", "--auto-peer-failover"}); err != nil {
t.Fatalf("expected handoff short-circuit success, got %v", err)
}
if calledStartup {
t.Fatalf("startup should not run after successful handoff")
}
tryPeerBootstrapHandoffCommand = func(_ context.Context, _ config.Config, _ *log.Logger) (bool, error) {
return false, nil
}
coordinatorAllowsPeerFallbackCommand = func(_ context.Context, _ config.Config, _ *log.Logger) (bool, string, error) {
return false, "still busy", nil
}
if err := runStartup(logger, []string{"--config", "/ignored.yaml", "--execute", "--auto-peer-failover"}); err == nil {
t.Fatalf("expected coordinator guard block")
}
}
// TestRunStartupPowerSafetyHooks runs one orchestration or CLI step.
// Signature: TestRunStartupPowerSafetyHooks(t *testing.T).
// Why: covers startup UPS target build and startup power safety check error propagation.
func TestRunStartupPowerSafetyHooks(t *testing.T) {
restore := stubCommandHandlerHooks()
defer restore()
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
cfg := minimalHandlerConfig(t)
cfg.Coordination.Role = "coordinator"
cfg.UPS.Enabled = true
cfg.Coordination.AllowStartupOnBattery = false
return cfg, newTestOrchestrator(cfg, dryRun), nil
}
buildUPSTargetsCommand = func(_ config.Config) ([]service.Target, error) {
return []service.Target{{Name: "Pyrphoros", Target: "pyrphoros@localhost"}}, nil
}
ensureStartupPowerSafeCommand = func(_ context.Context, _ []service.Target, _ float64) error {
return errors.New("ups blocked")
}
logger := log.New(io.Discard, "", 0)
err := runStartup(logger, []string{"--config", "/ignored.yaml", "--execute"})
if err == nil || !strings.Contains(err.Error(), "ups blocked") {
t.Fatalf("expected startup UPS block, got %v", err)
}
}
// TestRunShutdownAndRestoreDispatchHooks runs one orchestration or CLI step.
// Signature: TestRunShutdownAndRestoreDispatchHooks(t *testing.T).
// Why: covers option wiring for shutdown and etcd restore handlers.
func TestRunShutdownAndRestoreDispatchHooks(t *testing.T) {
restore := stubCommandHandlerHooks()
defer restore()
var gotShutdown cluster.ShutdownOptions
var gotRestore cluster.EtcdRestoreOptions
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
cfg := minimalHandlerConfig(t)
return cfg, newTestOrchestrator(cfg, dryRun), nil
}
shutdownOrchestratorCommand = func(_ context.Context, _ *cluster.Orchestrator, opts cluster.ShutdownOptions) error {
gotShutdown = opts
return nil
}
etcdRestoreOrchestratorCommand = func(_ context.Context, _ *cluster.Orchestrator, opts cluster.EtcdRestoreOptions) error {
gotRestore = opts
return nil
}
logger := log.New(io.Discard, "", 0)
if err := runShutdown(logger, []string{"--config", "/ignored.yaml", "--skip-etcd-snapshot", "--skip-drain", "--mode", "cluster-only", "--reason", "drill"}); err != nil {
t.Fatalf("runShutdown failed: %v", err)
}
if !gotShutdown.SkipEtcdSnapshot || !gotShutdown.SkipDrain || gotShutdown.Mode != "cluster-only" || gotShutdown.Reason != "drill" {
t.Fatalf("unexpected shutdown options: %+v", gotShutdown)
}
if err := runEtcdRestore(logger, []string{"--config", "/ignored.yaml", "--control-plane", "titan-0a", "--snapshot", "/tmp/snap"}); err != nil {
t.Fatalf("runEtcdRestore failed: %v", err)
}
if gotRestore.ControlPlane != "titan-0a" || gotRestore.SnapshotPath != "/tmp/snap" {
t.Fatalf("unexpected etcd restore options: %+v", gotRestore)
}
}
// TestRunDaemonAndIntentHooks runs one orchestration or CLI step.
// Signature: TestRunDaemonAndIntentHooks(t *testing.T).
// Why: covers daemon context-cancel behavior and injected intent read/write hooks.
func TestRunDaemonAndIntentHooks(t *testing.T) {
restore := stubCommandHandlerHooks()
defer restore()
cfgPath := writeTestConfig(t)
logger := log.New(io.Discard, "", 0)
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
cfg, err := config.Load(cfgPath)
if err != nil {
return config.Config{}, nil, err
}
cfg.UPS.Enabled = true
orch := newTestOrchestrator(cfg, dryRun)
return cfg, orch, nil
}
buildUPSTargetsCommand = func(_ config.Config) ([]service.Target, error) {
return []service.Target{{Name: "Pyrphoros", Target: "pyrphoros@localhost"}}, nil
}
daemonRunCommand = func(_ context.Context, _ *service.Daemon) error {
return context.Canceled
}
if err := runDaemon(logger, []string{"--config", cfgPath}); err != nil {
t.Fatalf("runDaemon should ignore context cancellation, got %v", err)
}
readIntentCommand = func(_ string) (state.Intent, error) {
return state.Intent{State: state.IntentNormal, Reason: "ok", Source: "unit", UpdatedAt: time.Now().UTC()}, nil
}
if err := runIntent(logger, []string{"--config", cfgPath}); err != nil {
t.Fatalf("runIntent read failed: %v", err)
}
writeCalled := false
writeIntentCommand = func(_ string, stateValue, reason, source string) error {
writeCalled = true
if stateValue != state.IntentNormal || reason != "unit" || source != "tester" {
t.Fatalf("unexpected intent write payload state=%s reason=%s source=%s", stateValue, reason, source)
}
return nil
}
if err := runIntent(logger, []string{"--config", cfgPath, "--set", state.IntentNormal, "--reason", "unit", "--source", "tester", "--execute"}); err != nil {
t.Fatalf("runIntent write failed: %v", err)
}
if !writeCalled {
t.Fatalf("expected write intent hook call")
}
}
// TestRunStatusReportsSnapshotAndJSON runs one orchestration or CLI step.
// Signature: TestRunStatusReportsSnapshotAndJSON(t *testing.T).
// Why: covers runStatus checklist/phase reporting and JSON payload emission from live startup progress.
func TestRunStatusReportsSnapshotAndJSON(t *testing.T) {
restore := stubCommandHandlerHooks()
defer restore()
cfg := minimalHandlerConfig(t)
cfgPath := writeTestConfig(t)
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
return cfg, newTestOrchestrator(cfg, dryRun), nil
}
progress := startupStatusSnapshot{
StartedAt: time.Now().UTC().Add(-30 * time.Second),
Status: "running",
Phase: "convergence-checks",
Reason: "drill",
Checks: map[string]startupCheckRecord{
"service-checklist": {Status: "running", Detail: "waiting"},
"critical-service-endpoints": {Status: "failed", Detail: "monitoring/victoria-metrics-single-server endpoints=0"},
"workload-convergence": {Status: "passed", Detail: "ok"},
"phase": {Status: "running", Detail: "waiting for readiness"},
},
AutoHeals: []string{"restored critical endpoint backends: monitoring/statefulset/victoria-metrics-single-server"},
}
writeStartupStatusFixture(t, filepath.Join(cfg.State.Dir, "startup-progress.json"), progress)
var loggerOut strings.Builder
logger := log.New(&loggerOut, "", 0)
stdout := captureStdout(t, func() {
if err := runStatus(logger, []string{"--config", cfgPath, "--json"}); err != nil {
t.Fatalf("runStatus failed: %v", err)
}
})
if !strings.Contains(loggerOut.String(), "startup_phase=convergence-checks") {
t.Fatalf("expected startup phase in logger output, got:\n%s", loggerOut.String())
}
if !strings.Contains(loggerOut.String(), "startup_failed_check=critical-service-endpoints") {
t.Fatalf("expected failed checklist output, got:\n%s", loggerOut.String())
}
if !strings.Contains(stdout, "\"phase\": \"convergence-checks\"") {
t.Fatalf("expected json status payload on stdout, got:\n%s", stdout)
}
}
// TestRunStatusLogsSnapshotReadError runs one orchestration or CLI step.
// Signature: TestRunStatusLogsSnapshotReadError(t *testing.T).
// Why: covers malformed snapshot parsing path without failing status command.
func TestRunStatusLogsSnapshotReadError(t *testing.T) {
restore := stubCommandHandlerHooks()
defer restore()
cfg := minimalHandlerConfig(t)
cfgPath := writeTestConfig(t)
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
return cfg, newTestOrchestrator(cfg, dryRun), nil
}
progressPath := filepath.Join(cfg.State.Dir, "startup-progress.json")
if err := os.MkdirAll(filepath.Dir(progressPath), 0o755); err != nil {
t.Fatalf("mkdir state dir: %v", err)
}
if err := os.WriteFile(progressPath, []byte("{bad-json"), 0o644); err != nil {
t.Fatalf("write malformed progress file: %v", err)
}
var loggerOut strings.Builder
logger := log.New(&loggerOut, "", 0)
if err := runStatus(logger, []string{"--config", cfgPath}); err != nil {
t.Fatalf("runStatus should continue on snapshot parse error: %v", err)
}
if !strings.Contains(loggerOut.String(), "startup_status_read_error=") {
t.Fatalf("expected snapshot read error log, got:\n%s", loggerOut.String())
}
}
// TestRunStartupAdditionalBranches runs one orchestration or CLI step.
// Signature: TestRunStartupAdditionalBranches(t *testing.T).
// Why: expands startup handler coverage for guard and UPS target build error branches.
func TestRunStartupAdditionalBranches(t *testing.T) {
restore := stubCommandHandlerHooks()
defer restore()
logger := log.New(io.Discard, "", 0)
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
cfg := minimalHandlerConfig(t)
cfg.Coordination.Role = "peer"
return cfg, newTestOrchestrator(cfg, dryRun), nil
}
tryPeerBootstrapHandoffCommand = func(_ context.Context, _ config.Config, _ *log.Logger) (bool, error) {
return false, errors.New("handoff failed")
}
coordinatorAllowsPeerFallbackCommand = func(_ context.Context, _ config.Config, _ *log.Logger) (bool, string, error) {
return false, "", errors.New("guard unavailable")
}
if err := runStartup(logger, []string{"--config", "/ignored.yaml", "--execute", "--auto-peer-failover"}); err == nil {
t.Fatalf("expected startup guard evaluation failure")
}
restore = stubCommandHandlerHooks()
defer restore()
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
cfg := minimalHandlerConfig(t)
cfg.Coordination.Role = "coordinator"
cfg.UPS.Enabled = true
return cfg, newTestOrchestrator(cfg, dryRun), nil
}
buildUPSTargetsCommand = func(_ config.Config) ([]service.Target, error) {
return nil, errors.New("bad ups config")
}
if err := runStartup(logger, []string{"--config", "/ignored.yaml", "--execute"}); err == nil {
t.Fatalf("expected UPS target build error")
}
}
// TestRunStartupPeerFallbackAllowedRunsStartup runs one orchestration or CLI step.
// Signature: TestRunStartupPeerFallbackAllowedRunsStartup(t *testing.T).
// Why: covers peer auto-failover branch where coordinator allows local startup fallback.
func TestRunStartupPeerFallbackAllowedRunsStartup(t *testing.T) {
restore := stubCommandHandlerHooks()
defer restore()
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
cfg := minimalHandlerConfig(t)
cfg.Coordination.Role = "peer"
cfg.UPS.Enabled = false
return cfg, newTestOrchestrator(cfg, dryRun), nil
}
tryPeerBootstrapHandoffCommand = func(_ context.Context, _ config.Config, _ *log.Logger) (bool, error) {
return false, nil
}
coordinatorAllowsPeerFallbackCommand = func(_ context.Context, _ config.Config, _ *log.Logger) (bool, string, error) {
return true, "allowed", nil
}
startupCalled := false
startupOrchestratorCommand = func(_ context.Context, _ *cluster.Orchestrator, _ cluster.StartupOptions) error {
startupCalled = true
return nil
}
logger := log.New(io.Discard, "", 0)
if err := runStartup(logger, []string{"--config", "/ignored.yaml", "--execute", "--auto-peer-failover"}); err != nil {
t.Fatalf("expected startup fallback success, got %v", err)
}
if !startupCalled {
t.Fatalf("expected startup fallback execution")
}
}
// TestRunStartupAllowOnBatterySkipsSafetyCheck runs one orchestration or CLI step.
// Signature: TestRunStartupAllowOnBatterySkipsSafetyCheck(t *testing.T).
// Why: covers startup battery override branch that intentionally bypasses power safety gating.
func TestRunStartupAllowOnBatterySkipsSafetyCheck(t *testing.T) {
restore := stubCommandHandlerHooks()
defer restore()
buildOrchestratorCommand = func(_ *log.Logger, _ string, dryRun bool) (config.Config, *cluster.Orchestrator, error) {
cfg := minimalHandlerConfig(t)
cfg.Coordination.Role = "coordinator"
cfg.UPS.Enabled = true
cfg.Coordination.AllowStartupOnBattery = false
return cfg, newTestOrchestrator(cfg, dryRun), nil
}
buildUPSTargetsCommand = func(_ config.Config) ([]service.Target, error) {
return nil, errors.New("should not be called")
}
startupOrchestratorCommand = func(_ context.Context, _ *cluster.Orchestrator, _ cluster.StartupOptions) error {
return nil
}
logger := log.New(io.Discard, "", 0)
if err := runStartup(logger, []string{"--config", "/ignored.yaml", "--execute", "--allow-on-battery"}); err != nil {
t.Fatalf("expected allow-on-battery startup success, got %v", err)
}
}
// stubCommandHandlerHooks runs one orchestration or CLI step.
// Signature: stubCommandHandlerHooks() func().
// Why: keeps handler hook overrides isolated to each test to avoid cross-test bleed.
func stubCommandHandlerHooks() func() {
prevBuild := buildOrchestratorCommand
prevHandoff := tryPeerBootstrapHandoffCommand
prevGuard := coordinatorAllowsPeerFallbackCommand
prevBuildUPS := buildUPSTargetsCommand
prevPowerSafe := ensureStartupPowerSafeCommand
prevStartup := startupOrchestratorCommand
prevShutdown := shutdownOrchestratorCommand
prevRestore := etcdRestoreOrchestratorCommand
prevDaemon := daemonRunCommand
prevReadIntent := readIntentCommand
prevWriteIntent := writeIntentCommand
return func() {
buildOrchestratorCommand = prevBuild
tryPeerBootstrapHandoffCommand = prevHandoff
coordinatorAllowsPeerFallbackCommand = prevGuard
buildUPSTargetsCommand = prevBuildUPS
ensureStartupPowerSafeCommand = prevPowerSafe
startupOrchestratorCommand = prevStartup
shutdownOrchestratorCommand = prevShutdown
etcdRestoreOrchestratorCommand = prevRestore
daemonRunCommand = prevDaemon
readIntentCommand = prevReadIntent
writeIntentCommand = prevWriteIntent
}
}
// minimalHandlerConfig runs one orchestration or CLI step.
// Signature: minimalHandlerConfig(t *testing.T) config.Config.
// Why: command handler tests need a stable config baseline without reaching external systems.
func minimalHandlerConfig(t *testing.T) config.Config {
t.Helper()
stateDir := t.TempDir()
return config.Config{
ControlPlanes: []string{"titan-0a"},
Workers: []string{"titan-22"},
SSHUser: "atlas",
SSHPort: 2277,
Startup: config.Startup{
MinimumBatteryPercent: 20,
},
Shutdown: config.Shutdown{
DefaultBudgetSeconds: 1380,
HistoryMinSamples: 3,
EmergencyBudgetSec: 420,
EmergencyMinSamples: 3,
},
State: config.State{
Dir: stateDir,
ReportsDir: filepath.Join(stateDir, "reports"),
RunHistoryPath: filepath.Join(stateDir, "runs.json"),
LockPath: filepath.Join(stateDir, "ananke.lock"),
IntentPath: filepath.Join(stateDir, "intent.json"),
},
}
}
// newTestOrchestrator runs one orchestration or CLI step.
// Signature: newTestOrchestrator(cfg config.Config, dryRun bool) *cluster.Orchestrator.
// Why: command handler tests need a concrete orchestrator pointer for method signatures while hooks intercept execution.
func newTestOrchestrator(cfg config.Config, dryRun bool) *cluster.Orchestrator {
return cluster.New(
cfg,
&execx.Runner{DryRun: dryRun, Logger: log.New(io.Discard, "", 0)},
state.New(cfg.State.RunHistoryPath),
log.New(io.Discard, "", 0),
)
}
// captureStdout runs one orchestration or CLI step.
// Signature: captureStdout(t *testing.T, fn func()) string.
// Why: status command optionally emits JSON to stdout, so tests need deterministic capture.
func captureStdout(t *testing.T, fn func()) string {
t.Helper()
orig := os.Stdout
r, w, err := os.Pipe()
if err != nil {
t.Fatalf("pipe: %v", err)
}
os.Stdout = w
done := make(chan string, 1)
go func() {
b, _ := io.ReadAll(r)
done <- string(b)
}()
fn()
_ = w.Close()
os.Stdout = orig
return <-done
}