ananke/internal/service/daemon_quality_branches_test.go

422 lines
14 KiB
Go

package service
import (
"context"
"io"
"log"
"os"
"path/filepath"
"strings"
"testing"
"time"
"scm.bstein.dev/bstein/ananke/internal/cluster"
"scm.bstein.dev/bstein/ananke/internal/config"
"scm.bstein.dev/bstein/ananke/internal/execx"
"scm.bstein.dev/bstein/ananke/internal/metrics"
"scm.bstein.dev/bstein/ananke/internal/state"
"scm.bstein.dev/bstein/ananke/internal/ups"
)
// TestNewDaemonInitializesExporter runs one orchestration or CLI step.
// Signature: TestNewDaemonInitializesExporter(t *testing.T).
// Why: covers constructor branch so daemon initialization contracts stay explicit.
func TestNewDaemonInitializesExporter(t *testing.T) {
d := NewDaemon(config.Config{}, nil, nil, log.New(io.Discard, "", 0))
if d == nil || d.exporter == nil {
t.Fatalf("expected NewDaemon to initialize exporter")
}
}
// TestTriggerShutdownForwardSuccessSetsForwardedIntent runs one orchestration or CLI step.
// Signature: TestTriggerShutdownForwardSuccessSetsForwardedIntent(t *testing.T).
// Why: covers forwarded shutdown happy-path branch and completion intent semantics.
func TestTriggerShutdownForwardSuccessSetsForwardedIntent(t *testing.T) {
tmp := t.TempDir()
sshPath := filepath.Join(tmp, "ssh")
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho forwarded\n"), 0o755); err != nil {
t.Fatalf("write fake ssh: %v", err)
}
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
intentPath := filepath.Join(tmp, "intent.json")
d := &Daemon{
cfg: config.Config{
SSHUser: "atlas",
SSHPort: 2277,
State: config.State{
IntentPath: intentPath,
},
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
CommandTimeoutSeconds: 3,
},
},
log: log.New(io.Discard, "", 0),
exporter: metrics.New(),
}
if err := d.triggerShutdown(context.Background(), "test-forward-success"); err != nil {
t.Fatalf("triggerShutdown forward success failed: %v", err)
}
in, err := state.ReadIntent(intentPath)
if err != nil {
t.Fatalf("read forward completion intent: %v", err)
}
if in.State != state.IntentShutdownComplete || in.Source != "daemon-forwarded" {
t.Fatalf("unexpected forward completion intent: %+v", in)
}
}
// TestTriggerShutdownForwardFailureWithoutFallback runs one orchestration or CLI step.
// Signature: TestTriggerShutdownForwardFailureWithoutFallback(t *testing.T).
// Why: covers explicit failure branch when forwarding is required and local fallback is disabled.
func TestTriggerShutdownForwardFailureWithoutFallback(t *testing.T) {
tmp := t.TempDir()
sshPath := filepath.Join(tmp, "ssh")
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho denied >&2\nexit 255\n"), 0o755); err != nil {
t.Fatalf("write fake ssh: %v", err)
}
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
d := &Daemon{
cfg: config.Config{
SSHUser: "atlas",
SSHPort: 2277,
State: config.State{
IntentPath: filepath.Join(tmp, "intent.json"),
},
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
FallbackLocalShutdown: false,
CommandTimeoutSeconds: 3,
},
},
log: log.New(io.Discard, "", 0),
exporter: metrics.New(),
}
err := d.triggerShutdown(context.Background(), "test-forward-fail")
if err == nil || !strings.Contains(err.Error(), "forward shutdown failed") {
t.Fatalf("expected forward failure without fallback, got %v", err)
}
}
// TestTriggerShutdownForwardFailureFallsBackToLocal runs one orchestration or CLI step.
// Signature: TestTriggerShutdownForwardFailureFallsBackToLocal(t *testing.T).
// Why: covers fallback branch where local shutdown is used after forwarding fails.
func TestTriggerShutdownForwardFailureFallsBackToLocal(t *testing.T) {
tmp := t.TempDir()
sshPath := filepath.Join(tmp, "ssh")
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho denied >&2\nexit 255\n"), 0o755); err != nil {
t.Fatalf("write fake ssh: %v", err)
}
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
orch := newDaemonTestOrchestrator(t, tmp)
intentPath := filepath.Join(tmp, "intent.json")
d := &Daemon{
cfg: config.Config{
SSHUser: "atlas",
SSHPort: 2277,
State: config.State{
IntentPath: intentPath,
},
Shutdown: config.Shutdown{
EmergencySkipDrain: true,
EmergencySkipEtcd: true,
},
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
FallbackLocalShutdown: true,
CommandTimeoutSeconds: 3,
},
},
orch: orch,
log: log.New(io.Discard, "", 0),
exporter: metrics.New(),
}
if err := d.triggerShutdown(context.Background(), "test-forward-fallback"); err != nil {
t.Fatalf("triggerShutdown fallback local failed: %v", err)
}
in, err := state.ReadIntent(intentPath)
if err != nil {
t.Fatalf("read local completion intent: %v", err)
}
if in.State != state.IntentShutdownComplete || in.Source != "daemon-local" {
t.Fatalf("unexpected local completion intent: %+v", in)
}
}
// TestForwardShutdownBuildsJumpArgs runs one orchestration or CLI step.
// Signature: TestForwardShutdownBuildsJumpArgs(t *testing.T).
// Why: covers jump-host argument construction branches in forward shutdown transport.
func TestForwardShutdownBuildsJumpArgs(t *testing.T) {
tmp := t.TempDir()
argsOut := filepath.Join(tmp, "args.txt")
sshPath := filepath.Join(tmp, "ssh")
script := "#!/usr/bin/env bash\nset -euo pipefail\nprintf '%s\n' \"$*\" > " + argsOut + "\n"
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
t.Fatalf("write fake ssh: %v", err)
}
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
d := &Daemon{
cfg: config.Config{
SSHUser: "atlas",
SSHPort: 2277,
SSHConfigFile: "/tmp/custom-config",
SSHIdentityFile: "/tmp/custom-key",
SSHJumpHost: "titan-jh",
SSHJumpUser: "jump",
SSHNodeHosts: map[string]string{
"titan-db": "10.0.0.5",
},
SSHNodeUsers: map[string]string{
"titan-db": "dbadmin",
},
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
CommandTimeoutSeconds: 3,
},
},
log: log.New(io.Discard, "", 0),
}
if err := d.forwardShutdown(context.Background(), "args-check"); err != nil {
t.Fatalf("forwardShutdown with jump args failed: %v", err)
}
raw, err := os.ReadFile(argsOut)
if err != nil {
t.Fatalf("read ssh args output: %v", err)
}
out := string(raw)
for _, want := range []string{"-F /tmp/custom-config", "-i /tmp/custom-key", "-J jump@titan-jh:2277", "-p 2277", "dbadmin@10.0.0.5"} {
if !strings.Contains(out, want) {
t.Fatalf("expected ssh args to include %q, got %q", want, out)
}
}
}
// TestStartMetricsServerInvalidBindLogsErrorPath runs one orchestration or CLI step.
// Signature: TestStartMetricsServerInvalidBindLogsErrorPath(t *testing.T).
// Why: exercises goroutine listen failure branch so metrics startup diagnostics remain covered.
func TestStartMetricsServerInvalidBindLogsErrorPath(t *testing.T) {
d := &Daemon{
cfg: config.Config{
Metrics: config.Metrics{
Enabled: true,
BindAddr: "127.0.0.1:not-a-port",
Path: "/metrics",
},
},
log: log.New(io.Discard, "", 0),
exporter: metrics.New(),
}
if err := d.startMetricsServer(); err != nil {
t.Fatalf("startMetricsServer should return nil after goroutine spawn, got %v", err)
}
time.Sleep(25 * time.Millisecond)
}
// TestResolveSSHPathCandidatesFromOverrides runs one orchestration or CLI step.
// Signature: TestResolveSSHPathCandidatesFromOverrides(t *testing.T).
// Why: covers candidate-path discovery branches without requiring writes under /home.
func TestResolveSSHPathCandidatesFromOverrides(t *testing.T) {
tmp := t.TempDir()
cfgPath := filepath.Join(tmp, "config")
keyPath := filepath.Join(tmp, "id_ed25519")
if err := os.WriteFile(cfgPath, []byte("Host *\n"), 0o600); err != nil {
t.Fatalf("write fake config candidate: %v", err)
}
if err := os.WriteFile(keyPath, []byte("fake-key"), 0o600); err != nil {
t.Fatalf("write fake key candidate: %v", err)
}
origConfigs := sshConfigCandidates
origKeys := sshIdentityCandidates
t.Cleanup(func() {
sshConfigCandidates = origConfigs
sshIdentityCandidates = origKeys
})
sshConfigCandidates = []string{cfgPath}
sshIdentityCandidates = []string{keyPath}
d := &Daemon{cfg: config.Config{}}
if got := d.resolveSSHConfigFile(); got != cfgPath {
t.Fatalf("expected config candidate path %q, got %q", cfgPath, got)
}
if got := d.resolveSSHIdentityFile(); got != keyPath {
t.Fatalf("expected key candidate path %q, got %q", keyPath, got)
}
}
// TestForwardShutdownKnownHostsRepairRetry runs one orchestration or CLI step.
// Signature: TestForwardShutdownKnownHostsRepairRetry(t *testing.T).
// Why: covers known-hosts-repair retry branch in forwarded shutdown transport.
func TestForwardShutdownKnownHostsRepairRetry(t *testing.T) {
tmp := t.TempDir()
attemptMarker := filepath.Join(tmp, "attempt")
sshPath := filepath.Join(tmp, "ssh")
script := `#!/usr/bin/env bash
set -euo pipefail
marker="` + attemptMarker + `"
if [[ ! -f "$marker" ]]; then
echo "REMOTE HOST IDENTIFICATION HAS CHANGED!" >&2
touch "$marker"
exit 255
fi
echo "forwarded"
`
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
t.Fatalf("write fake ssh: %v", err)
}
sshKeygenPath := filepath.Join(tmp, "ssh-keygen")
if err := os.WriteFile(sshKeygenPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\nexit 0\n"), 0o755); err != nil {
t.Fatalf("write fake ssh-keygen: %v", err)
}
sshKeyscanPath := filepath.Join(tmp, "ssh-keyscan")
if err := os.WriteFile(sshKeyscanPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho fake-key\n"), 0o755); err != nil {
t.Fatalf("write fake ssh-keyscan: %v", err)
}
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
knownHosts := filepath.Join(tmp, "known_hosts")
if err := os.WriteFile(knownHosts, []byte{}, 0o600); err != nil {
t.Fatalf("write known_hosts file: %v", err)
}
d := &Daemon{
cfg: config.Config{
SSHConfigFile: knownHosts, // used only to derive known-hosts search path
SSHUser: "atlas",
SSHPort: 2277,
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
CommandTimeoutSeconds: 3,
},
},
log: log.New(io.Discard, "", 0),
}
if err := d.forwardShutdown(context.Background(), "repair-retry"); err != nil {
t.Fatalf("forwardShutdown known-hosts repair retry failed: %v", err)
}
}
// TestTriggerShutdownReturnsLocalShutdownError runs one orchestration or CLI step.
// Signature: TestTriggerShutdownReturnsLocalShutdownError(t *testing.T).
// Why: covers local shutdown error propagation branch from triggerShutdown.
func TestTriggerShutdownReturnsLocalShutdownError(t *testing.T) {
tmp := t.TempDir()
intentPath := filepath.Join(tmp, "intent-dir")
if err := os.MkdirAll(intentPath, 0o755); err != nil {
t.Fatalf("mkdir intent dir: %v", err)
}
orchCfg := config.Config{
ControlPlanes: []string{"titan-db"},
Workers: []string{"titan-23"},
State: config.State{
Dir: filepath.Join(tmp, "state"),
ReportsDir: filepath.Join(tmp, "reports"),
RunHistoryPath: filepath.Join(tmp, "runs.json"),
LockPath: filepath.Join(tmp, "ananke.lock"),
IntentPath: intentPath, // directory path forces MustWriteIntent failure in Shutdown
},
}
orch := cluster.New(
orchCfg,
&execx.Runner{DryRun: false, Logger: log.New(io.Discard, "", 0)},
state.New(filepath.Join(tmp, "runs.json")),
log.New(io.Discard, "", 0),
)
d := &Daemon{
cfg: config.Config{
State: config.State{
IntentPath: intentPath,
},
Shutdown: config.Shutdown{
EmergencySkipDrain: true,
EmergencySkipEtcd: true,
},
},
orch: orch,
log: log.New(io.Discard, "", 0),
exporter: metrics.New(),
}
err := d.triggerShutdown(context.Background(), "local-shutdown-error")
if err == nil {
t.Fatalf("expected triggerShutdown to propagate local shutdown error")
}
}
// TestDaemonRunContextCancelNonTriggerPath runs one orchestration or CLI step.
// Signature: TestDaemonRunContextCancelNonTriggerPath(t *testing.T).
// Why: covers steady-state non-trigger loop branches in Run until context cancellation.
func TestDaemonRunContextCancelNonTriggerPath(t *testing.T) {
stateDir := t.TempDir()
orch := newDaemonTestOrchestrator(t, stateDir)
d := &Daemon{
cfg: config.Config{
UPS: config.UPS{
Enabled: true,
PollSeconds: 0, // exercise default poll fallback
DebounceCount: 0, // exercise default debounce fallback
RuntimeSafetyFactor: 0.5,
},
State: config.State{
IntentPath: filepath.Join(stateDir, "intent.json"),
},
},
orch: orch,
targets: []Target{
{
Name: "Pyrphoros",
Target: "pyrphoros@localhost",
Provider: &daemonFakeProvider{
samples: []ups.Sample{
{OnBattery: false, LowBattery: false, RuntimeSeconds: 7200, RawStatus: "OL"},
},
},
},
},
log: log.New(io.Discard, "", 0),
exporter: metrics.New(),
}
ctx, cancel := context.WithTimeout(context.Background(), 1100*time.Millisecond)
defer cancel()
if err := d.Run(ctx); err == nil {
t.Fatalf("expected context deadline/cancel in non-trigger loop")
}
}
// TestForwardShutdownErrorWithoutOutput runs one orchestration or CLI step.
// Signature: TestForwardShutdownErrorWithoutOutput(t *testing.T).
// Why: covers forwardShutdown branch where ssh fails without any stderr/stdout text.
func TestForwardShutdownErrorWithoutOutput(t *testing.T) {
tmp := t.TempDir()
sshPath := filepath.Join(tmp, "ssh")
if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\nexit 255\n"), 0o755); err != nil {
t.Fatalf("write fake ssh: %v", err)
}
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
d := &Daemon{
cfg: config.Config{
SSHUser: "atlas",
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
CommandTimeoutSeconds: 3,
},
},
log: log.New(io.Discard, "", 0),
}
err := d.forwardShutdown(context.Background(), "no-output-fail")
if err == nil || !strings.Contains(strings.ToLower(err.Error()), "forward shutdown via ssh failed") {
t.Fatalf("expected no-output forward ssh failure, got %v", err)
}
}