361 lines
14 KiB
Go
361 lines
14 KiB
Go
|
|
package main
|
||
|
|
|
||
|
|
import (
|
||
|
|
"context"
|
||
|
|
"io"
|
||
|
|
"log"
|
||
|
|
"os"
|
||
|
|
"path/filepath"
|
||
|
|
"strings"
|
||
|
|
"testing"
|
||
|
|
"time"
|
||
|
|
|
||
|
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
||
|
|
)
|
||
|
|
|
||
|
|
// writeFakeSSH runs one orchestration or CLI step.
|
||
|
|
// Signature: writeFakeSSH(t *testing.T, script string) string.
|
||
|
|
// Why: peer handoff tests need deterministic SSH behavior without external hosts.
|
||
|
|
func writeFakeSSH(t *testing.T, script string) string {
|
||
|
|
t.Helper()
|
||
|
|
tmp := t.TempDir()
|
||
|
|
sshPath := filepath.Join(tmp, "ssh")
|
||
|
|
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
||
|
|
t.Fatalf("write fake ssh: %v", err)
|
||
|
|
}
|
||
|
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
||
|
|
return sshPath
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestTryPeerBootstrapHandoffSuccess runs one orchestration or CLI step.
|
||
|
|
// Signature: TestTryPeerBootstrapHandoffSuccess(t *testing.T).
|
||
|
|
// Why: covers successful peer bootstrap handoff flow.
|
||
|
|
func TestTryPeerBootstrapHandoffSuccess(t *testing.T) {
|
||
|
|
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho ok\n")
|
||
|
|
cfg := config.Config{
|
||
|
|
SSHUser: "atlas",
|
||
|
|
Coordination: config.Coordination{
|
||
|
|
ForwardShutdownHost: "titan-db",
|
||
|
|
},
|
||
|
|
}
|
||
|
|
ok, err := tryPeerBootstrapHandoff(context.Background(), cfg, log.New(io.Discard, "", 0))
|
||
|
|
if err != nil || !ok {
|
||
|
|
t.Fatalf("expected successful handoff, got ok=%v err=%v", ok, err)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestTryPeerBootstrapHandoffRejectsMissingCoordinator runs one orchestration or CLI step.
|
||
|
|
// Signature: TestTryPeerBootstrapHandoffRejectsMissingCoordinator(t *testing.T).
|
||
|
|
// Why: covers peer handoff validation when no coordinator host is configured.
|
||
|
|
func TestTryPeerBootstrapHandoffRejectsMissingCoordinator(t *testing.T) {
|
||
|
|
cfg := config.Config{}
|
||
|
|
if _, err := tryPeerBootstrapHandoff(context.Background(), cfg, log.New(io.Discard, "", 0)); err == nil {
|
||
|
|
t.Fatalf("expected missing coordinator error")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestTryPeerBootstrapHandoffTimeout runs one orchestration or CLI step.
|
||
|
|
// Signature: TestTryPeerBootstrapHandoffTimeout(t *testing.T).
|
||
|
|
// Why: covers timeout branch when coordinator handoff cannot be completed.
|
||
|
|
func TestTryPeerBootstrapHandoffTimeout(t *testing.T) {
|
||
|
|
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho fail >&2\nexit 1\n")
|
||
|
|
cfg := config.Config{
|
||
|
|
SSHUser: "atlas",
|
||
|
|
Coordination: config.Coordination{
|
||
|
|
ForwardShutdownHost: "titan-db",
|
||
|
|
},
|
||
|
|
}
|
||
|
|
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
||
|
|
defer cancel()
|
||
|
|
ok, err := tryPeerBootstrapHandoff(ctx, cfg, log.New(io.Discard, "", 0))
|
||
|
|
if err == nil || ok {
|
||
|
|
t.Fatalf("expected timeout failure, got ok=%v err=%v", ok, err)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestTryPeerBootstrapHandoffUsesMappedHostAndUser runs one orchestration or CLI step.
|
||
|
|
// Signature: TestTryPeerBootstrapHandoffUsesMappedHostAndUser(t *testing.T).
|
||
|
|
// Why: covers target/user/jump argument composition for coordinator handoff SSH calls.
|
||
|
|
func TestTryPeerBootstrapHandoffUsesMappedHostAndUser(t *testing.T) {
|
||
|
|
argsFile := filepath.Join(t.TempDir(), "ssh-args.txt")
|
||
|
|
t.Setenv("ANANKE_SSH_ARGS_FILE", argsFile)
|
||
|
|
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\nprintf '%s\\n' \"$@\" > \"$ANANKE_SSH_ARGS_FILE\"\necho ok\n")
|
||
|
|
|
||
|
|
cfg := config.Config{
|
||
|
|
SSHUser: "atlas",
|
||
|
|
SSHPort: 2222,
|
||
|
|
SSHNodeUsers: map[string]string{
|
||
|
|
"titan-db": "override-user",
|
||
|
|
},
|
||
|
|
SSHNodeHosts: map[string]string{
|
||
|
|
"titan-db": "10.0.0.55",
|
||
|
|
},
|
||
|
|
SSHJumpHost: "titan-jh",
|
||
|
|
Coordination: config.Coordination{
|
||
|
|
ForwardShutdownHost: "titan-db",
|
||
|
|
},
|
||
|
|
}
|
||
|
|
ok, err := tryPeerBootstrapHandoff(context.Background(), cfg, log.New(io.Discard, "", 0))
|
||
|
|
if err != nil || !ok {
|
||
|
|
t.Fatalf("expected successful handoff, got ok=%v err=%v", ok, err)
|
||
|
|
}
|
||
|
|
argsRaw, err := os.ReadFile(argsFile)
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("read captured ssh args: %v", err)
|
||
|
|
}
|
||
|
|
args := string(argsRaw)
|
||
|
|
if !strings.Contains(args, "override-user@10.0.0.55") {
|
||
|
|
t.Fatalf("expected mapped target in args, got:\n%s", args)
|
||
|
|
}
|
||
|
|
if !strings.Contains(args, "-p") || !strings.Contains(args, "2222") {
|
||
|
|
t.Fatalf("expected port args in ssh invocation, got:\n%s", args)
|
||
|
|
}
|
||
|
|
if !strings.Contains(args, "-J") || !strings.Contains(args, "titan-jh:2222") {
|
||
|
|
t.Fatalf("expected jump host args in ssh invocation, got:\n%s", args)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestCoordinatorAllowsFallbackStates runs one orchestration or CLI step.
|
||
|
|
// Signature: TestCoordinatorAllowsFallbackStates(t *testing.T).
|
||
|
|
// Why: covers coordinator guard logic for bootstrap-active and normal intent states.
|
||
|
|
func TestCoordinatorAllowsFallbackStates(t *testing.T) {
|
||
|
|
// bootstrap active -> disallow fallback
|
||
|
|
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho __ANANKE_BOOTSTRAP_ACTIVE__\necho intent=normal\n")
|
||
|
|
cfg := config.Config{
|
||
|
|
SSHUser: "atlas",
|
||
|
|
Coordination: config.Coordination{
|
||
|
|
ForwardShutdownHost: "titan-db",
|
||
|
|
StartupGuardMaxAgeSec: 60,
|
||
|
|
},
|
||
|
|
}
|
||
|
|
allowed, reason, err := coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0))
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("guard check failed: %v", err)
|
||
|
|
}
|
||
|
|
if allowed || !strings.Contains(reason, "bootstrap service is active") {
|
||
|
|
t.Fatalf("expected disallow for bootstrap active, got allowed=%v reason=%q", allowed, reason)
|
||
|
|
}
|
||
|
|
|
||
|
|
// coordinator normal -> allow fallback
|
||
|
|
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho __ANANKE_BOOTSTRAP_IDLE__\necho intent=normal reason=\"ok\" source=test updated_at=2026-04-07T00:00:00Z\n")
|
||
|
|
allowed, reason, err = coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0))
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("guard check failed: %v", err)
|
||
|
|
}
|
||
|
|
if !allowed || !strings.Contains(reason, "intent is normal") {
|
||
|
|
t.Fatalf("expected allow for normal intent, got allowed=%v reason=%q", allowed, reason)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestCoordinatorAllowsFallbackOnUnreachableCoordinator runs one orchestration or CLI step.
|
||
|
|
// Signature: TestCoordinatorAllowsFallbackOnUnreachableCoordinator(t *testing.T).
|
||
|
|
// Why: covers fallback-allowed path when coordinator cannot be reached.
|
||
|
|
func TestCoordinatorAllowsFallbackOnUnreachableCoordinator(t *testing.T) {
|
||
|
|
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\nexit 255\n")
|
||
|
|
cfg := config.Config{
|
||
|
|
SSHUser: "atlas",
|
||
|
|
Coordination: config.Coordination{
|
||
|
|
ForwardShutdownHost: "titan-db",
|
||
|
|
},
|
||
|
|
}
|
||
|
|
allowed, reason, err := coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0))
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("expected no hard error, got %v", err)
|
||
|
|
}
|
||
|
|
if !allowed || !strings.Contains(reason, "coordinator unreachable") {
|
||
|
|
t.Fatalf("expected coordinator-unreachable allow, got allowed=%v reason=%q", allowed, reason)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestCoordinatorAllowsFallbackWhenNoCoordinatorConfigured runs one orchestration or CLI step.
|
||
|
|
// Signature: TestCoordinatorAllowsFallbackWhenNoCoordinatorConfigured(t *testing.T).
|
||
|
|
// Why: covers no-coordinator short-circuit path for peer fallback checks.
|
||
|
|
func TestCoordinatorAllowsFallbackWhenNoCoordinatorConfigured(t *testing.T) {
|
||
|
|
cfg := config.Config{}
|
||
|
|
allowed, reason, err := coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0))
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("expected no error, got %v", err)
|
||
|
|
}
|
||
|
|
if !allowed || !strings.Contains(reason, "no coordinator configured") {
|
||
|
|
t.Fatalf("unexpected guard result allowed=%v reason=%q", allowed, reason)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestCoordinatorAllowsFallbackIntentStateMatrix runs one orchestration or CLI step.
|
||
|
|
// Signature: TestCoordinatorAllowsFallbackIntentStateMatrix(t *testing.T).
|
||
|
|
// Why: covers peer fallback guard behavior for each coordinator intent branch.
|
||
|
|
func TestCoordinatorAllowsFallbackIntentStateMatrix(t *testing.T) {
|
||
|
|
now := time.Now().UTC()
|
||
|
|
stale := now.Add(-20 * time.Minute).Format(time.RFC3339)
|
||
|
|
fresh := now.Format(time.RFC3339)
|
||
|
|
oldComplete := now.Add(-5 * time.Minute).Format(time.RFC3339)
|
||
|
|
|
||
|
|
cases := []struct {
|
||
|
|
name string
|
||
|
|
intentLine string
|
||
|
|
wantAllowed bool
|
||
|
|
wantReasonSub string
|
||
|
|
}{
|
||
|
|
{
|
||
|
|
name: "shutting_down_fresh_blocks",
|
||
|
|
intentLine: intentOutputLine("shutting_down", "fresh", "test", fresh),
|
||
|
|
wantAllowed: false,
|
||
|
|
wantReasonSub: "intent=shutting_down",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
name: "shutting_down_stale_allows",
|
||
|
|
intentLine: intentOutputLine("shutting_down", "stale", "test", stale),
|
||
|
|
wantAllowed: true,
|
||
|
|
wantReasonSub: "shutdown intent stale",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
name: "startup_in_progress_fresh_blocks",
|
||
|
|
intentLine: intentOutputLine("startup_in_progress", "fresh", "test", fresh),
|
||
|
|
wantAllowed: false,
|
||
|
|
wantReasonSub: "intent=startup_in_progress",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
name: "startup_in_progress_stale_allows",
|
||
|
|
intentLine: intentOutputLine("startup_in_progress", "stale", "test", stale),
|
||
|
|
wantAllowed: true,
|
||
|
|
wantReasonSub: "startup intent stale",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
name: "shutdown_complete_without_age_blocks",
|
||
|
|
intentLine: `intent=shutdown_complete reason="unknown" source=test`,
|
||
|
|
wantAllowed: false,
|
||
|
|
wantReasonSub: "unknown age",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
name: "shutdown_complete_recent_blocks",
|
||
|
|
intentLine: intentOutputLine("shutdown_complete", "recent", "test", fresh),
|
||
|
|
wantAllowed: false,
|
||
|
|
wantReasonSub: "recently completed shutdown",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
name: "shutdown_complete_old_allows",
|
||
|
|
intentLine: intentOutputLine("shutdown_complete", "old", "test", oldComplete),
|
||
|
|
wantAllowed: true,
|
||
|
|
wantReasonSub: "old enough",
|
||
|
|
},
|
||
|
|
{
|
||
|
|
name: "unknown_state_blocks",
|
||
|
|
intentLine: intentOutputLine("mystery_state", "unknown", "test", fresh),
|
||
|
|
wantAllowed: false,
|
||
|
|
wantReasonSub: "unknown",
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
for _, tc := range cases {
|
||
|
|
tc := tc
|
||
|
|
t.Run(tc.name, func(t *testing.T) {
|
||
|
|
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho __ANANKE_BOOTSTRAP_IDLE__\necho '"+tc.intentLine+"'\n")
|
||
|
|
cfg := config.Config{
|
||
|
|
SSHUser: "atlas",
|
||
|
|
Coordination: config.Coordination{
|
||
|
|
ForwardShutdownHost: "titan-db",
|
||
|
|
StartupGuardMaxAgeSec: 60,
|
||
|
|
},
|
||
|
|
Startup: config.Startup{
|
||
|
|
ShutdownCooldownSeconds: 45,
|
||
|
|
},
|
||
|
|
}
|
||
|
|
allowed, reason, err := coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0))
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("guard check failed: %v", err)
|
||
|
|
}
|
||
|
|
if allowed != tc.wantAllowed {
|
||
|
|
t.Fatalf("unexpected allowed=%v want=%v reason=%q", allowed, tc.wantAllowed, reason)
|
||
|
|
}
|
||
|
|
if !strings.Contains(strings.ToLower(reason), strings.ToLower(tc.wantReasonSub)) {
|
||
|
|
t.Fatalf("unexpected reason=%q want substring %q", reason, tc.wantReasonSub)
|
||
|
|
}
|
||
|
|
})
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestCoordinatorGuardRejectsUnparseableIntent runs one orchestration or CLI step.
|
||
|
|
// Signature: TestCoordinatorGuardRejectsUnparseableIntent(t *testing.T).
|
||
|
|
// Why: covers parse-error path when coordinator output is malformed.
|
||
|
|
func TestCoordinatorGuardRejectsUnparseableIntent(t *testing.T) {
|
||
|
|
writeFakeSSH(t, "#!/usr/bin/env bash\nset -euo pipefail\necho __ANANKE_BOOTSTRAP_IDLE__\necho gibberish-output\n")
|
||
|
|
cfg := config.Config{
|
||
|
|
SSHUser: "atlas",
|
||
|
|
Coordination: config.Coordination{
|
||
|
|
ForwardShutdownHost: "titan-db",
|
||
|
|
},
|
||
|
|
}
|
||
|
|
if _, _, err := coordinatorAllowsPeerFallbackStartup(context.Background(), cfg, log.New(io.Discard, "", 0)); err == nil {
|
||
|
|
t.Fatalf("expected parse error for malformed coordinator output")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestResolveSSHPathsPreferExplicitValues runs one orchestration or CLI step.
|
||
|
|
// Signature: TestResolveSSHPathsPreferExplicitValues(t *testing.T).
|
||
|
|
// Why: covers explicit ssh config/identity path branches in startup handoff helpers.
|
||
|
|
func TestResolveSSHPathsPreferExplicitValues(t *testing.T) {
|
||
|
|
cfg := config.Config{
|
||
|
|
SSHConfigFile: "/tmp/ssh-config",
|
||
|
|
SSHIdentityFile: "/tmp/ssh-key",
|
||
|
|
}
|
||
|
|
if got := resolveSSHConfigFile(cfg); got != "/tmp/ssh-config" {
|
||
|
|
t.Fatalf("unexpected config path: %q", got)
|
||
|
|
}
|
||
|
|
if got := resolveSSHIdentityFile(cfg); got != "/tmp/ssh-key" {
|
||
|
|
t.Fatalf("unexpected identity path: %q", got)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// TestResolveSSHPathCandidatesAndCooldownDefaults runs one orchestration or CLI step.
|
||
|
|
// Signature: TestResolveSSHPathCandidatesAndCooldownDefaults(t *testing.T).
|
||
|
|
// Why: covers candidate-list fallback branches and startup cooldown default logic.
|
||
|
|
func TestResolveSSHPathCandidatesAndCooldownDefaults(t *testing.T) {
|
||
|
|
tmp := t.TempDir()
|
||
|
|
cfgPath := filepath.Join(tmp, "config")
|
||
|
|
idPath := filepath.Join(tmp, "id_ed25519")
|
||
|
|
if err := os.WriteFile(cfgPath, []byte("host *"), 0o600); err != nil {
|
||
|
|
t.Fatalf("write cfg candidate: %v", err)
|
||
|
|
}
|
||
|
|
if err := os.WriteFile(idPath, []byte("key"), 0o600); err != nil {
|
||
|
|
t.Fatalf("write id candidate: %v", err)
|
||
|
|
}
|
||
|
|
prevCfg := append([]string{}, sshConfigCandidates...)
|
||
|
|
prevID := append([]string{}, sshIdentityCandidates...)
|
||
|
|
defer func() {
|
||
|
|
sshConfigCandidates = prevCfg
|
||
|
|
sshIdentityCandidates = prevID
|
||
|
|
}()
|
||
|
|
|
||
|
|
sshConfigCandidates = []string{cfgPath}
|
||
|
|
sshIdentityCandidates = []string{idPath}
|
||
|
|
if got := resolveSSHConfigFile(config.Config{}); got != cfgPath {
|
||
|
|
t.Fatalf("expected cfg candidate %q, got %q", cfgPath, got)
|
||
|
|
}
|
||
|
|
if got := resolveSSHIdentityFile(config.Config{}); got != idPath {
|
||
|
|
t.Fatalf("expected identity candidate %q, got %q", idPath, got)
|
||
|
|
}
|
||
|
|
|
||
|
|
sshConfigCandidates = []string{filepath.Join(tmp, "missing-config")}
|
||
|
|
sshIdentityCandidates = []string{filepath.Join(tmp, "missing-key")}
|
||
|
|
if got := resolveSSHConfigFile(config.Config{}); got != "" {
|
||
|
|
t.Fatalf("expected empty config fallback, got %q", got)
|
||
|
|
}
|
||
|
|
if got := resolveSSHIdentityFile(config.Config{}); got != "" {
|
||
|
|
t.Fatalf("expected empty identity fallback, got %q", got)
|
||
|
|
}
|
||
|
|
|
||
|
|
if got := startupShutdownCooldown(config.Config{}); got != 45*time.Second {
|
||
|
|
t.Fatalf("expected default cooldown 45s, got %s", got)
|
||
|
|
}
|
||
|
|
if got := startupShutdownCooldown(config.Config{Startup: config.Startup{ShutdownCooldownSeconds: 90}}); got != 90*time.Second {
|
||
|
|
t.Fatalf("expected configured cooldown 90s, got %s", got)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// intentOutputLine runs one orchestration or CLI step.
|
||
|
|
// Signature: intentOutputLine(stateValue, reason, source, updatedAt string) string.
|
||
|
|
// Why: keeps guard-branch fixtures concise and readable.
|
||
|
|
func intentOutputLine(stateValue, reason, source, updatedAt string) string {
|
||
|
|
return `intent=` + stateValue + ` reason="` + reason + `" source=` + source + ` updated_at=` + updatedAt
|
||
|
|
}
|