422 lines
14 KiB
Go
422 lines
14 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"io"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
|
"scm.bstein.dev/bstein/ananke/internal/execx"
|
|
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
|
"scm.bstein.dev/bstein/ananke/internal/state"
|
|
"scm.bstein.dev/bstein/ananke/internal/ups"
|
|
)
|
|
|
|
// TestNewDaemonInitializesExporter runs one orchestration or CLI step.
|
|
// Signature: TestNewDaemonInitializesExporter(t *testing.T).
|
|
// Why: covers constructor branch so daemon initialization contracts stay explicit.
|
|
func TestNewDaemonInitializesExporter(t *testing.T) {
|
|
d := NewDaemon(config.Config{}, nil, nil, log.New(io.Discard, "", 0))
|
|
if d == nil || d.exporter == nil {
|
|
t.Fatalf("expected NewDaemon to initialize exporter")
|
|
}
|
|
}
|
|
|
|
// TestTriggerShutdownForwardSuccessSetsForwardedIntent runs one orchestration or CLI step.
|
|
// Signature: TestTriggerShutdownForwardSuccessSetsForwardedIntent(t *testing.T).
|
|
// Why: covers forwarded shutdown happy-path branch and completion intent semantics.
|
|
func TestTriggerShutdownForwardSuccessSetsForwardedIntent(t *testing.T) {
|
|
tmp := t.TempDir()
|
|
sshPath := filepath.Join(tmp, "ssh")
|
|
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho forwarded\n"), 0o755); err != nil {
|
|
t.Fatalf("write fake ssh: %v", err)
|
|
}
|
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
|
|
|
intentPath := filepath.Join(tmp, "intent.json")
|
|
d := &Daemon{
|
|
cfg: config.Config{
|
|
SSHUser: "atlas",
|
|
SSHPort: 2277,
|
|
State: config.State{
|
|
IntentPath: intentPath,
|
|
},
|
|
Coordination: config.Coordination{
|
|
ForwardShutdownHost: "titan-db",
|
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
|
CommandTimeoutSeconds: 3,
|
|
},
|
|
},
|
|
log: log.New(io.Discard, "", 0),
|
|
exporter: metrics.New(),
|
|
}
|
|
if err := d.triggerShutdown(context.Background(), "test-forward-success"); err != nil {
|
|
t.Fatalf("triggerShutdown forward success failed: %v", err)
|
|
}
|
|
in, err := state.ReadIntent(intentPath)
|
|
if err != nil {
|
|
t.Fatalf("read forward completion intent: %v", err)
|
|
}
|
|
if in.State != state.IntentShutdownComplete || in.Source != "daemon-forwarded" {
|
|
t.Fatalf("unexpected forward completion intent: %+v", in)
|
|
}
|
|
}
|
|
|
|
// TestTriggerShutdownForwardFailureWithoutFallback runs one orchestration or CLI step.
|
|
// Signature: TestTriggerShutdownForwardFailureWithoutFallback(t *testing.T).
|
|
// Why: covers explicit failure branch when forwarding is required and local fallback is disabled.
|
|
func TestTriggerShutdownForwardFailureWithoutFallback(t *testing.T) {
|
|
tmp := t.TempDir()
|
|
sshPath := filepath.Join(tmp, "ssh")
|
|
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho denied >&2\nexit 255\n"), 0o755); err != nil {
|
|
t.Fatalf("write fake ssh: %v", err)
|
|
}
|
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
|
|
|
d := &Daemon{
|
|
cfg: config.Config{
|
|
SSHUser: "atlas",
|
|
SSHPort: 2277,
|
|
State: config.State{
|
|
IntentPath: filepath.Join(tmp, "intent.json"),
|
|
},
|
|
Coordination: config.Coordination{
|
|
ForwardShutdownHost: "titan-db",
|
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
|
FallbackLocalShutdown: false,
|
|
CommandTimeoutSeconds: 3,
|
|
},
|
|
},
|
|
log: log.New(io.Discard, "", 0),
|
|
exporter: metrics.New(),
|
|
}
|
|
err := d.triggerShutdown(context.Background(), "test-forward-fail")
|
|
if err == nil || !strings.Contains(err.Error(), "forward shutdown failed") {
|
|
t.Fatalf("expected forward failure without fallback, got %v", err)
|
|
}
|
|
}
|
|
|
|
// TestTriggerShutdownForwardFailureFallsBackToLocal runs one orchestration or CLI step.
|
|
// Signature: TestTriggerShutdownForwardFailureFallsBackToLocal(t *testing.T).
|
|
// Why: covers fallback branch where local shutdown is used after forwarding fails.
|
|
func TestTriggerShutdownForwardFailureFallsBackToLocal(t *testing.T) {
|
|
tmp := t.TempDir()
|
|
sshPath := filepath.Join(tmp, "ssh")
|
|
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho denied >&2\nexit 255\n"), 0o755); err != nil {
|
|
t.Fatalf("write fake ssh: %v", err)
|
|
}
|
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
|
|
|
orch := newDaemonTestOrchestrator(t, tmp)
|
|
intentPath := filepath.Join(tmp, "intent.json")
|
|
d := &Daemon{
|
|
cfg: config.Config{
|
|
SSHUser: "atlas",
|
|
SSHPort: 2277,
|
|
State: config.State{
|
|
IntentPath: intentPath,
|
|
},
|
|
Shutdown: config.Shutdown{
|
|
EmergencySkipDrain: true,
|
|
EmergencySkipEtcd: true,
|
|
},
|
|
Coordination: config.Coordination{
|
|
ForwardShutdownHost: "titan-db",
|
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
|
FallbackLocalShutdown: true,
|
|
CommandTimeoutSeconds: 3,
|
|
},
|
|
},
|
|
orch: orch,
|
|
log: log.New(io.Discard, "", 0),
|
|
exporter: metrics.New(),
|
|
}
|
|
if err := d.triggerShutdown(context.Background(), "test-forward-fallback"); err != nil {
|
|
t.Fatalf("triggerShutdown fallback local failed: %v", err)
|
|
}
|
|
in, err := state.ReadIntent(intentPath)
|
|
if err != nil {
|
|
t.Fatalf("read local completion intent: %v", err)
|
|
}
|
|
if in.State != state.IntentShutdownComplete || in.Source != "daemon-local" {
|
|
t.Fatalf("unexpected local completion intent: %+v", in)
|
|
}
|
|
}
|
|
|
|
// TestForwardShutdownBuildsJumpArgs runs one orchestration or CLI step.
|
|
// Signature: TestForwardShutdownBuildsJumpArgs(t *testing.T).
|
|
// Why: covers jump-host argument construction branches in forward shutdown transport.
|
|
func TestForwardShutdownBuildsJumpArgs(t *testing.T) {
|
|
tmp := t.TempDir()
|
|
argsOut := filepath.Join(tmp, "args.txt")
|
|
sshPath := filepath.Join(tmp, "ssh")
|
|
script := "#!/usr/bin/env bash\nset -euo pipefail\nprintf '%s\n' \"$*\" > " + argsOut + "\n"
|
|
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
|
t.Fatalf("write fake ssh: %v", err)
|
|
}
|
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
|
|
|
d := &Daemon{
|
|
cfg: config.Config{
|
|
SSHUser: "atlas",
|
|
SSHPort: 2277,
|
|
SSHConfigFile: "/tmp/custom-config",
|
|
SSHIdentityFile: "/tmp/custom-key",
|
|
SSHJumpHost: "titan-jh",
|
|
SSHJumpUser: "jump",
|
|
SSHNodeHosts: map[string]string{
|
|
"titan-db": "10.0.0.5",
|
|
},
|
|
SSHNodeUsers: map[string]string{
|
|
"titan-db": "dbadmin",
|
|
},
|
|
Coordination: config.Coordination{
|
|
ForwardShutdownHost: "titan-db",
|
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
|
CommandTimeoutSeconds: 3,
|
|
},
|
|
},
|
|
log: log.New(io.Discard, "", 0),
|
|
}
|
|
if err := d.forwardShutdown(context.Background(), "args-check"); err != nil {
|
|
t.Fatalf("forwardShutdown with jump args failed: %v", err)
|
|
}
|
|
|
|
raw, err := os.ReadFile(argsOut)
|
|
if err != nil {
|
|
t.Fatalf("read ssh args output: %v", err)
|
|
}
|
|
out := string(raw)
|
|
for _, want := range []string{"-F /tmp/custom-config", "-i /tmp/custom-key", "-J jump@titan-jh:2277", "-p 2277", "dbadmin@10.0.0.5"} {
|
|
if !strings.Contains(out, want) {
|
|
t.Fatalf("expected ssh args to include %q, got %q", want, out)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestStartMetricsServerInvalidBindLogsErrorPath runs one orchestration or CLI step.
|
|
// Signature: TestStartMetricsServerInvalidBindLogsErrorPath(t *testing.T).
|
|
// Why: exercises goroutine listen failure branch so metrics startup diagnostics remain covered.
|
|
func TestStartMetricsServerInvalidBindLogsErrorPath(t *testing.T) {
|
|
d := &Daemon{
|
|
cfg: config.Config{
|
|
Metrics: config.Metrics{
|
|
Enabled: true,
|
|
BindAddr: "127.0.0.1:not-a-port",
|
|
Path: "/metrics",
|
|
},
|
|
},
|
|
log: log.New(io.Discard, "", 0),
|
|
exporter: metrics.New(),
|
|
}
|
|
if err := d.startMetricsServer(); err != nil {
|
|
t.Fatalf("startMetricsServer should return nil after goroutine spawn, got %v", err)
|
|
}
|
|
time.Sleep(25 * time.Millisecond)
|
|
}
|
|
|
|
// TestResolveSSHPathCandidatesFromOverrides runs one orchestration or CLI step.
|
|
// Signature: TestResolveSSHPathCandidatesFromOverrides(t *testing.T).
|
|
// Why: covers candidate-path discovery branches without requiring writes under /home.
|
|
func TestResolveSSHPathCandidatesFromOverrides(t *testing.T) {
|
|
tmp := t.TempDir()
|
|
cfgPath := filepath.Join(tmp, "config")
|
|
keyPath := filepath.Join(tmp, "id_ed25519")
|
|
if err := os.WriteFile(cfgPath, []byte("Host *\n"), 0o600); err != nil {
|
|
t.Fatalf("write fake config candidate: %v", err)
|
|
}
|
|
if err := os.WriteFile(keyPath, []byte("fake-key"), 0o600); err != nil {
|
|
t.Fatalf("write fake key candidate: %v", err)
|
|
}
|
|
|
|
origConfigs := sshConfigCandidates
|
|
origKeys := sshIdentityCandidates
|
|
t.Cleanup(func() {
|
|
sshConfigCandidates = origConfigs
|
|
sshIdentityCandidates = origKeys
|
|
})
|
|
sshConfigCandidates = []string{cfgPath}
|
|
sshIdentityCandidates = []string{keyPath}
|
|
|
|
d := &Daemon{cfg: config.Config{}}
|
|
if got := d.resolveSSHConfigFile(); got != cfgPath {
|
|
t.Fatalf("expected config candidate path %q, got %q", cfgPath, got)
|
|
}
|
|
if got := d.resolveSSHIdentityFile(); got != keyPath {
|
|
t.Fatalf("expected key candidate path %q, got %q", keyPath, got)
|
|
}
|
|
}
|
|
|
|
// TestForwardShutdownKnownHostsRepairRetry runs one orchestration or CLI step.
|
|
// Signature: TestForwardShutdownKnownHostsRepairRetry(t *testing.T).
|
|
// Why: covers known-hosts-repair retry branch in forwarded shutdown transport.
|
|
func TestForwardShutdownKnownHostsRepairRetry(t *testing.T) {
|
|
tmp := t.TempDir()
|
|
attemptMarker := filepath.Join(tmp, "attempt")
|
|
sshPath := filepath.Join(tmp, "ssh")
|
|
script := `#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
marker="` + attemptMarker + `"
|
|
if [[ ! -f "$marker" ]]; then
|
|
echo "REMOTE HOST IDENTIFICATION HAS CHANGED!" >&2
|
|
touch "$marker"
|
|
exit 255
|
|
fi
|
|
echo "forwarded"
|
|
`
|
|
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
|
t.Fatalf("write fake ssh: %v", err)
|
|
}
|
|
sshKeygenPath := filepath.Join(tmp, "ssh-keygen")
|
|
if err := os.WriteFile(sshKeygenPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\nexit 0\n"), 0o755); err != nil {
|
|
t.Fatalf("write fake ssh-keygen: %v", err)
|
|
}
|
|
sshKeyscanPath := filepath.Join(tmp, "ssh-keyscan")
|
|
if err := os.WriteFile(sshKeyscanPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho fake-key\n"), 0o755); err != nil {
|
|
t.Fatalf("write fake ssh-keyscan: %v", err)
|
|
}
|
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
|
|
|
knownHosts := filepath.Join(tmp, "known_hosts")
|
|
if err := os.WriteFile(knownHosts, []byte{}, 0o600); err != nil {
|
|
t.Fatalf("write known_hosts file: %v", err)
|
|
}
|
|
|
|
d := &Daemon{
|
|
cfg: config.Config{
|
|
SSHConfigFile: knownHosts, // used only to derive known-hosts search path
|
|
SSHUser: "atlas",
|
|
SSHPort: 2277,
|
|
Coordination: config.Coordination{
|
|
ForwardShutdownHost: "titan-db",
|
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
|
CommandTimeoutSeconds: 3,
|
|
},
|
|
},
|
|
log: log.New(io.Discard, "", 0),
|
|
}
|
|
if err := d.forwardShutdown(context.Background(), "repair-retry"); err != nil {
|
|
t.Fatalf("forwardShutdown known-hosts repair retry failed: %v", err)
|
|
}
|
|
}
|
|
|
|
// TestTriggerShutdownReturnsLocalShutdownError runs one orchestration or CLI step.
|
|
// Signature: TestTriggerShutdownReturnsLocalShutdownError(t *testing.T).
|
|
// Why: covers local shutdown error propagation branch from triggerShutdown.
|
|
func TestTriggerShutdownReturnsLocalShutdownError(t *testing.T) {
|
|
tmp := t.TempDir()
|
|
intentPath := filepath.Join(tmp, "intent-dir")
|
|
if err := os.MkdirAll(intentPath, 0o755); err != nil {
|
|
t.Fatalf("mkdir intent dir: %v", err)
|
|
}
|
|
orchCfg := config.Config{
|
|
ControlPlanes: []string{"titan-db"},
|
|
Workers: []string{"titan-23"},
|
|
State: config.State{
|
|
Dir: filepath.Join(tmp, "state"),
|
|
ReportsDir: filepath.Join(tmp, "reports"),
|
|
RunHistoryPath: filepath.Join(tmp, "runs.json"),
|
|
LockPath: filepath.Join(tmp, "ananke.lock"),
|
|
IntentPath: intentPath, // directory path forces MustWriteIntent failure in Shutdown
|
|
},
|
|
}
|
|
orch := cluster.New(
|
|
orchCfg,
|
|
&execx.Runner{DryRun: false, Logger: log.New(io.Discard, "", 0)},
|
|
state.New(filepath.Join(tmp, "runs.json")),
|
|
log.New(io.Discard, "", 0),
|
|
)
|
|
d := &Daemon{
|
|
cfg: config.Config{
|
|
State: config.State{
|
|
IntentPath: intentPath,
|
|
},
|
|
Shutdown: config.Shutdown{
|
|
EmergencySkipDrain: true,
|
|
EmergencySkipEtcd: true,
|
|
},
|
|
},
|
|
orch: orch,
|
|
log: log.New(io.Discard, "", 0),
|
|
exporter: metrics.New(),
|
|
}
|
|
err := d.triggerShutdown(context.Background(), "local-shutdown-error")
|
|
if err == nil {
|
|
t.Fatalf("expected triggerShutdown to propagate local shutdown error")
|
|
}
|
|
}
|
|
|
|
// TestDaemonRunContextCancelNonTriggerPath runs one orchestration or CLI step.
|
|
// Signature: TestDaemonRunContextCancelNonTriggerPath(t *testing.T).
|
|
// Why: covers steady-state non-trigger loop branches in Run until context cancellation.
|
|
func TestDaemonRunContextCancelNonTriggerPath(t *testing.T) {
|
|
stateDir := t.TempDir()
|
|
orch := newDaemonTestOrchestrator(t, stateDir)
|
|
d := &Daemon{
|
|
cfg: config.Config{
|
|
UPS: config.UPS{
|
|
Enabled: true,
|
|
PollSeconds: 0, // exercise default poll fallback
|
|
DebounceCount: 0, // exercise default debounce fallback
|
|
RuntimeSafetyFactor: 0.5,
|
|
},
|
|
State: config.State{
|
|
IntentPath: filepath.Join(stateDir, "intent.json"),
|
|
},
|
|
},
|
|
orch: orch,
|
|
targets: []Target{
|
|
{
|
|
Name: "Pyrphoros",
|
|
Target: "pyrphoros@localhost",
|
|
Provider: &daemonFakeProvider{
|
|
samples: []ups.Sample{
|
|
{OnBattery: false, LowBattery: false, RuntimeSeconds: 7200, RawStatus: "OL"},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
log: log.New(io.Discard, "", 0),
|
|
exporter: metrics.New(),
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), 1100*time.Millisecond)
|
|
defer cancel()
|
|
if err := d.Run(ctx); err == nil {
|
|
t.Fatalf("expected context deadline/cancel in non-trigger loop")
|
|
}
|
|
}
|
|
|
|
// TestForwardShutdownErrorWithoutOutput runs one orchestration or CLI step.
|
|
// Signature: TestForwardShutdownErrorWithoutOutput(t *testing.T).
|
|
// Why: covers forwardShutdown branch where ssh fails without any stderr/stdout text.
|
|
func TestForwardShutdownErrorWithoutOutput(t *testing.T) {
|
|
tmp := t.TempDir()
|
|
sshPath := filepath.Join(tmp, "ssh")
|
|
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\nexit 255\n"), 0o755); err != nil {
|
|
t.Fatalf("write fake ssh: %v", err)
|
|
}
|
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
|
|
|
d := &Daemon{
|
|
cfg: config.Config{
|
|
SSHUser: "atlas",
|
|
Coordination: config.Coordination{
|
|
ForwardShutdownHost: "titan-db",
|
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
|
CommandTimeoutSeconds: 3,
|
|
},
|
|
},
|
|
log: log.New(io.Discard, "", 0),
|
|
}
|
|
err := d.forwardShutdown(context.Background(), "no-output-fail")
|
|
if err == nil || !strings.Contains(strings.ToLower(err.Error()), "forward shutdown via ssh failed") {
|
|
t.Fatalf("expected no-output forward ssh failure, got %v", err)
|
|
}
|
|
}
|