ananke/cmd/ananke/bootstrap_handoff_additional_test.go

361 lines
14 KiB
Go

package main
import (
"context"
"io"
"log"
"os"
"path/filepath"
"strings"
"testing"
"time"
"scm.bstein.dev/bstein/ananke/internal/config"
)
// writeFakeSSH runs one orchestration or CLI step.
// Signature: writeFakeSSH(t *testing.T, script string) string.
// Why: peer handoff tests need deterministic SSH behavior without external hosts.
func writeFakeSSH(t *testing.T, script string) string {
t.Helper()
tmp := t.TempDir()
sshPath := filepath.Join(tmp, "ssh")
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
t.Fatalf("write fake ssh: %v", err)
}
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
return sshPath
}
// TestTryPeerBootstrapHandoffSuccess runs one orchestration or CLI step.
// Signature: TestTryPeerBootstrapHandoffSuccess(t *testing.T).
// Why: covers successful peer bootstrap handoff flow.
func TestTryPeerBootstrapHandoffSuccess(t *testing.T) {
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho ok\n")
cfg := config.Config{
SSHUser: "atlas",
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
},
}
ok, err := tryPeerBootstrapHandoff(context.Background(), cfg, log.New(io.Discard, "", 0))
if err != nil || !ok {
t.Fatalf("expected successful handoff, got ok=%v err=%v", ok, err)
}
}
// TestTryPeerBootstrapHandoffRejectsMissingCoordinator runs one orchestration or CLI step.
// Signature: TestTryPeerBootstrapHandoffRejectsMissingCoordinator(t *testing.T).
// Why: covers peer handoff validation when no coordinator host is configured.
func TestTryPeerBootstrapHandoffRejectsMissingCoordinator(t *testing.T) {
cfg := config.Config{}
if _, err := tryPeerBootstrapHandoff(context.Background(), cfg, log.New(io.Discard, "", 0)); err == nil {
t.Fatalf("expected missing coordinator error")
}
}
// TestTryPeerBootstrapHandoffTimeout runs one orchestration or CLI step.
// Signature: TestTryPeerBootstrapHandoffTimeout(t *testing.T).
// Why: covers timeout branch when coordinator handoff cannot be completed.
func TestTryPeerBootstrapHandoffTimeout(t *testing.T) {
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho fail >&2\nexit 1\n")
cfg := config.Config{
SSHUser: "atlas",
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
},
}
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
defer cancel()
ok, err := tryPeerBootstrapHandoff(ctx, cfg, log.New(io.Discard, "", 0))
if err == nil || ok {
t.Fatalf("expected timeout failure, got ok=%v err=%v", ok, err)
}
}
// TestTryPeerBootstrapHandoffUsesMappedHostAndUser runs one orchestration or CLI step.
// Signature: TestTryPeerBootstrapHandoffUsesMappedHostAndUser(t *testing.T).
// Why: covers target/user/jump argument composition for coordinator handoff SSH calls.
func TestTryPeerBootstrapHandoffUsesMappedHostAndUser(t *testing.T) {
argsFile := filepath.Join(t.TempDir(), "ssh-args.txt")
t.Setenv("ANANKE_SSH_ARGS_FILE", argsFile)
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\nprintf '%s\\n' \"$@\" > \"$ANANKE_SSH_ARGS_FILE\"\necho ok\n")
cfg := config.Config{
SSHUser: "atlas",
SSHPort: 2222,
SSHNodeUsers: map[string]string{
"titan-db": "override-user",
},
SSHNodeHosts: map[string]string{
"titan-db": "10.0.0.55",
},
SSHJumpHost: "titan-jh",
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
},
}
ok, err := tryPeerBootstrapHandoff(context.Background(), cfg, log.New(io.Discard, "", 0))
if err != nil || !ok {
t.Fatalf("expected successful handoff, got ok=%v err=%v", ok, err)
}
argsRaw, err := os.ReadFile(argsFile)
if err != nil {
t.Fatalf("read captured ssh args: %v", err)
}
args := string(argsRaw)
if !strings.Contains(args, "override-user@10.0.0.55") {
t.Fatalf("expected mapped target in args, got:\n%s", args)
}
if !strings.Contains(args, "-p") || !strings.Contains(args, "2222") {
t.Fatalf("expected port args in ssh invocation, got:\n%s", args)
}
if !strings.Contains(args, "-J") || !strings.Contains(args, "titan-jh:2222") {
t.Fatalf("expected jump host args in ssh invocation, got:\n%s", args)
}
}
// TestCoordinatorAllowsFallbackStates runs one orchestration or CLI step.
// Signature: TestCoordinatorAllowsFallbackStates(t *testing.T).
// Why: covers coordinator guard logic for bootstrap-active and normal intent states.
func TestCoordinatorAllowsFallbackStates(t *testing.T) {
// bootstrap active -> disallow fallback
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho __ANANKE_BOOTSTRAP_ACTIVE__\necho intent=normal\n")
cfg := config.Config{
SSHUser: "atlas",
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
StartupGuardMaxAgeSec: 60,
},
}
allowed, reason, err := coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0))
if err != nil {
t.Fatalf("guard check failed: %v", err)
}
if allowed || !strings.Contains(reason, "bootstrap service is active") {
t.Fatalf("expected disallow for bootstrap active, got allowed=%v reason=%q", allowed, reason)
}
// coordinator normal -> allow fallback
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho __ANANKE_BOOTSTRAP_IDLE__\necho intent=normal reason=\"ok\" source=test updated_at=2026-04-07T00:00:00Z\n")
allowed, reason, err = coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0))
if err != nil {
t.Fatalf("guard check failed: %v", err)
}
if !allowed || !strings.Contains(reason, "intent is normal") {
t.Fatalf("expected allow for normal intent, got allowed=%v reason=%q", allowed, reason)
}
}
// TestCoordinatorAllowsFallbackOnUnreachableCoordinator runs one orchestration or CLI step.
// Signature: TestCoordinatorAllowsFallbackOnUnreachableCoordinator(t *testing.T).
// Why: covers fallback-allowed path when coordinator cannot be reached.
func TestCoordinatorAllowsFallbackOnUnreachableCoordinator(t *testing.T) {
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\nexit 255\n")
cfg := config.Config{
SSHUser: "atlas",
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
},
}
allowed, reason, err := coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0))
if err != nil {
t.Fatalf("expected no hard error, got %v", err)
}
if !allowed || !strings.Contains(reason, "coordinator unreachable") {
t.Fatalf("expected coordinator-unreachable allow, got allowed=%v reason=%q", allowed, reason)
}
}
// TestCoordinatorAllowsFallbackWhenNoCoordinatorConfigured runs one orchestration or CLI step.
// Signature: TestCoordinatorAllowsFallbackWhenNoCoordinatorConfigured(t *testing.T).
// Why: covers no-coordinator short-circuit path for peer fallback checks.
func TestCoordinatorAllowsFallbackWhenNoCoordinatorConfigured(t *testing.T) {
cfg := config.Config{}
allowed, reason, err := coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0))
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
if !allowed || !strings.Contains(reason, "no coordinator configured") {
t.Fatalf("unexpected guard result allowed=%v reason=%q", allowed, reason)
}
}
// TestCoordinatorAllowsFallbackIntentStateMatrix runs one orchestration or CLI step.
// Signature: TestCoordinatorAllowsFallbackIntentStateMatrix(t *testing.T).
// Why: covers peer fallback guard behavior for each coordinator intent branch.
func TestCoordinatorAllowsFallbackIntentStateMatrix(t *testing.T) {
now := time.Now().UTC()
stale := now.Add(-20 * time.Minute).Format(time.RFC3339)
fresh := now.Format(time.RFC3339)
oldComplete := now.Add(-5 * time.Minute).Format(time.RFC3339)
cases := []struct {
name string
intentLine string
wantAllowed bool
wantReasonSub string
}{
{
name: "shutting_down_fresh_blocks",
intentLine: intentOutputLine("shutting_down", "fresh", "test", fresh),
wantAllowed: false,
wantReasonSub: "intent=shutting_down",
},
{
name: "shutting_down_stale_allows",
intentLine: intentOutputLine("shutting_down", "stale", "test", stale),
wantAllowed: true,
wantReasonSub: "shutdown intent stale",
},
{
name: "startup_in_progress_fresh_blocks",
intentLine: intentOutputLine("startup_in_progress", "fresh", "test", fresh),
wantAllowed: false,
wantReasonSub: "intent=startup_in_progress",
},
{
name: "startup_in_progress_stale_allows",
intentLine: intentOutputLine("startup_in_progress", "stale", "test", stale),
wantAllowed: true,
wantReasonSub: "startup intent stale",
},
{
name: "shutdown_complete_without_age_blocks",
intentLine: `intent=shutdown_complete reason="unknown" source=test`,
wantAllowed: false,
wantReasonSub: "unknown age",
},
{
name: "shutdown_complete_recent_blocks",
intentLine: intentOutputLine("shutdown_complete", "recent", "test", fresh),
wantAllowed: false,
wantReasonSub: "recently completed shutdown",
},
{
name: "shutdown_complete_old_allows",
intentLine: intentOutputLine("shutdown_complete", "old", "test", oldComplete),
wantAllowed: true,
wantReasonSub: "old enough",
},
{
name: "unknown_state_blocks",
intentLine: intentOutputLine("mystery_state", "unknown", "test", fresh),
wantAllowed: false,
wantReasonSub: "unknown",
},
}
for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho __ANANKE_BOOTSTRAP_IDLE__\necho '"+tc.intentLine+"'\n")
cfg := config.Config{
SSHUser: "atlas",
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
StartupGuardMaxAgeSec: 60,
},
Startup: config.Startup{
ShutdownCooldownSeconds: 45,
},
}
allowed, reason, err := coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0))
if err != nil {
t.Fatalf("guard check failed: %v", err)
}
if allowed != tc.wantAllowed {
t.Fatalf("unexpected allowed=%v want=%v reason=%q", allowed, tc.wantAllowed, reason)
}
if !strings.Contains(strings.ToLower(reason), strings.ToLower(tc.wantReasonSub)) {
t.Fatalf("unexpected reason=%q want substring %q", reason, tc.wantReasonSub)
}
})
}
}
// TestCoordinatorGuardRejectsUnparseableIntent runs one orchestration or CLI step.
// Signature: TestCoordinatorGuardRejectsUnparseableIntent(t *testing.T).
// Why: covers parse-error path when coordinator output is malformed.
func TestCoordinatorGuardRejectsUnparseableIntent(t *testing.T) {
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho __ANANKE_BOOTSTRAP_IDLE__\necho gibberish-output\n")
cfg := config.Config{
SSHUser: "atlas",
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
},
}
if _, _, err := coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0)); err == nil {
t.Fatalf("expected parse error for malformed coordinator output")
}
}
// TestResolveSSHPathsPreferExplicitValues runs one orchestration or CLI step.
// Signature: TestResolveSSHPathsPreferExplicitValues(t *testing.T).
// Why: covers explicit ssh config/identity path branches in startup handoff helpers.
func TestResolveSSHPathsPreferExplicitValues(t *testing.T) {
cfg := config.Config{
SSHConfigFile: "/tmp/ssh-config",
SSHIdentityFile: "/tmp/ssh-key",
}
if got := resolveSSHConfigFile(cfg); got != "/tmp/ssh-config" {
t.Fatalf("unexpected config path: %q", got)
}
if got := resolveSSHIdentityFile(cfg); got != "/tmp/ssh-key" {
t.Fatalf("unexpected identity path: %q", got)
}
}
// TestResolveSSHPathCandidatesAndCooldownDefaults runs one orchestration or CLI step.
// Signature: TestResolveSSHPathCandidatesAndCooldownDefaults(t *testing.T).
// Why: covers candidate-list fallback branches and startup cooldown default logic.
func TestResolveSSHPathCandidatesAndCooldownDefaults(t *testing.T) {
tmp := t.TempDir()
cfgPath := filepath.Join(tmp, "config")
idPath := filepath.Join(tmp, "id_ed25519")
if err := os.WriteFile(cfgPath, []byte("host *"), 0o600); err != nil {
t.Fatalf("write cfg candidate: %v", err)
}
if err := os.WriteFile(idPath, []byte("key"), 0o600); err != nil {
t.Fatalf("write id candidate: %v", err)
}
prevCfg := append([]string{}, sshConfigCandidates...)
prevID := append([]string{}, sshIdentityCandidates...)
defer func() {
sshConfigCandidates = prevCfg
sshIdentityCandidates = prevID
}()
sshConfigCandidates = []string{cfgPath}
sshIdentityCandidates = []string{idPath}
if got := resolveSSHConfigFile(config.Config{}); got != cfgPath {
t.Fatalf("expected cfg candidate %q, got %q", cfgPath, got)
}
if got := resolveSSHIdentityFile(config.Config{}); got != idPath {
t.Fatalf("expected identity candidate %q, got %q", idPath, got)
}
sshConfigCandidates = []string{filepath.Join(tmp, "missing-config")}
sshIdentityCandidates = []string{filepath.Join(tmp, "missing-key")}
if got := resolveSSHConfigFile(config.Config{}); got != "" {
t.Fatalf("expected empty config fallback, got %q", got)
}
if got := resolveSSHIdentityFile(config.Config{}); got != "" {
t.Fatalf("expected empty identity fallback, got %q", got)
}
if got := startupShutdownCooldown(config.Config{}); got != 45*time.Second {
t.Fatalf("expected default cooldown 45s, got %s", got)
}
if got := startupShutdownCooldown(config.Config{Startup: config.Startup{ShutdownCooldownSeconds: 90}}); got != 90*time.Second {
t.Fatalf("expected configured cooldown 90s, got %s", got)
}
}
// intentOutputLine runs one orchestration or CLI step.
// Signature: intentOutputLine(stateValue, reason, source, updatedAt string) string.
// Why: keeps guard-branch fixtures concise and readable.
func intentOutputLine(stateValue, reason, source, updatedAt string) string {
return `intent=` + stateValue + ` reason="` + reason + `" source=` + source + ` updated_at=` + updatedAt
}