ananke/internal/service/daemon_additional_test.go

256 lines
7.5 KiB
Go
Raw Normal View History

package service
import (
"context"
"io"
"log"
"os"
"path/filepath"
"strings"
"testing"
"time"
"scm.bstein.dev/bstein/ananke/internal/cluster"
"scm.bstein.dev/bstein/ananke/internal/config"
"scm.bstein.dev/bstein/ananke/internal/execx"
"scm.bstein.dev/bstein/ananke/internal/metrics"
"scm.bstein.dev/bstein/ananke/internal/state"
"scm.bstein.dev/bstein/ananke/internal/ups"
)
type daemonFakeProvider struct {
samples []ups.Sample
errs []error
idx int
}
// Read runs one orchestration or CLI step.
// Signature: (p *daemonFakeProvider) Read(ctx context.Context) (ups.Sample, error).
// Why: daemon tests need deterministic telemetry/error sequencing without real UPS I/O.
func (p *daemonFakeProvider) Read(_ context.Context) (ups.Sample, error) {
if p.idx < len(p.errs) && p.errs[p.idx] != nil {
err := p.errs[p.idx]
p.idx++
return ups.Sample{}, err
}
if p.idx < len(p.samples) {
s := p.samples[p.idx]
p.idx++
return s, nil
}
if len(p.samples) > 0 {
return p.samples[len(p.samples)-1], nil
}
return ups.Sample{}, context.DeadlineExceeded
}
// newDaemonTestOrchestrator runs one orchestration or CLI step.
// Signature: newDaemonTestOrchestrator(t *testing.T, stateDir string) *cluster.Orchestrator.
// Why: daemon tests share a minimal dry-run orchestrator fixture to avoid duplication.
func newDaemonTestOrchestrator(t *testing.T, stateDir string) *cluster.Orchestrator {
t.Helper()
cfg := config.Config{
ControlPlanes: []string{"titan-0a"},
Workers: []string{"titan-22"},
SSHUser: "atlas",
SSHPort: 2277,
SSHManagedNodes: []string{"titan-0a", "titan-22"},
SSHNodeHosts: map[string]string{
"titan-0a": "192.168.22.11",
"titan-22": "192.168.22.22",
},
State: config.State{
Dir: stateDir,
ReportsDir: filepath.Join(stateDir, "reports"),
RunHistoryPath: filepath.Join(stateDir, "runs.json"),
LockPath: filepath.Join(stateDir, "ananke.lock"),
IntentPath: filepath.Join(stateDir, "intent.json"),
},
Shutdown: config.Shutdown{
EmergencySkipDrain: true,
EmergencySkipEtcd: true,
},
}
return cluster.New(
cfg,
&execx.Runner{DryRun: true, Logger: log.New(io.Discard, "", 0)},
state.New(filepath.Join(stateDir, "runs.json")),
log.New(io.Discard, "", 0),
)
}
// TestDaemonRunTriggersShutdownOnLowBattery runs one orchestration or CLI step.
// Signature: TestDaemonRunTriggersShutdownOnLowBattery(t *testing.T).
// Why: covers main daemon loop path that triggers shutdown after debounce threshold.
func TestDaemonRunTriggersShutdownOnLowBattery(t *testing.T) {
stateDir := t.TempDir()
orch := newDaemonTestOrchestrator(t, stateDir)
d := &Daemon{
cfg: config.Config{
UPS: config.UPS{
Enabled: true,
PollSeconds: 1,
DebounceCount: 1,
RuntimeSafetyFactor: 1.0,
},
State: config.State{
IntentPath: filepath.Join(stateDir, "intent.json"),
},
Shutdown: config.Shutdown{
EmergencySkipDrain: true,
EmergencySkipEtcd: true,
},
},
orch: orch,
targets: []Target{
{
Name: "Pyrphoros",
Target: "pyrphoros@localhost",
Provider: &daemonFakeProvider{
samples: []ups.Sample{{OnBattery: true, LowBattery: true, RuntimeSeconds: 30, RawStatus: "OB LB"}},
},
},
},
log: log.New(io.Discard, "", 0),
exporter: metrics.New(),
}
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if err := d.Run(ctx); err != nil {
t.Fatalf("expected daemon to trigger and complete shutdown, got %v", err)
}
}
// TestDaemonRunTriggersShutdownOnTelemetryTimeout runs one orchestration or CLI step.
// Signature: TestDaemonRunTriggersShutdownOnTelemetryTimeout(t *testing.T).
// Why: covers telemetry-timeout trigger path while UPS remains on-battery.
func TestDaemonRunTriggersShutdownOnTelemetryTimeout(t *testing.T) {
stateDir := t.TempDir()
orch := newDaemonTestOrchestrator(t, stateDir)
d := &Daemon{
cfg: config.Config{
UPS: config.UPS{
Enabled: true,
PollSeconds: 1,
DebounceCount: 3,
RuntimeSafetyFactor: 1.0,
TelemetryTimeoutSeconds: 1,
},
State: config.State{
IntentPath: filepath.Join(stateDir, "intent.json"),
},
Shutdown: config.Shutdown{
EmergencySkipDrain: true,
EmergencySkipEtcd: true,
},
},
orch: orch,
targets: []Target{
{
Name: "Statera",
Target: "statera@localhost",
Provider: &daemonFakeProvider{
samples: []ups.Sample{{OnBattery: true, LowBattery: false, RuntimeSeconds: 9999, RawStatus: "OB"}},
errs: []error{nil, context.DeadlineExceeded, context.DeadlineExceeded, context.DeadlineExceeded},
},
},
},
log: log.New(io.Discard, "", 0),
exporter: metrics.New(),
}
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
defer cancel()
if err := d.Run(ctx); err != nil {
t.Fatalf("expected telemetry-timeout shutdown path to complete, got %v", err)
}
}
// TestForwardShutdownSucceedsWithSSHShim runs one orchestration or CLI step.
// Signature: TestForwardShutdownSucceedsWithSSHShim(t *testing.T).
// Why: covers forward-shutdown SSH execution path.
func TestForwardShutdownSucceedsWithSSHShim(t *testing.T) {
tmp := t.TempDir()
sshPath := filepath.Join(tmp, "ssh")
script := `#!/usr/bin/env bash
set -euo pipefail
echo forwarded
`
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
t.Fatalf("write fake ssh: %v", err)
}
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
d := &Daemon{
cfg: config.Config{
SSHUser: "atlas",
SSHPort: 2277,
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
CommandTimeoutSeconds: 5,
},
},
log: log.New(io.Discard, "", 0),
}
if err := d.forwardShutdown(context.Background(), "test-forward"); err != nil {
t.Fatalf("forwardShutdown failed: %v", err)
}
}
// TestForwardShutdownFailsWhenSSHFailsAndNoRecovery runs one orchestration or CLI step.
// Signature: TestForwardShutdownFailsWhenSSHFailsAndNoRecovery(t *testing.T).
// Why: covers forwarded shutdown error propagation branch.
func TestForwardShutdownFailsWhenSSHFailsAndNoRecovery(t *testing.T) {
tmp := t.TempDir()
sshPath := filepath.Join(tmp, "ssh")
script := `#!/usr/bin/env bash
set -euo pipefail
echo "permission denied" >&2
exit 255
`
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
t.Fatalf("write fake ssh: %v", err)
}
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
d := &Daemon{
cfg: config.Config{
SSHUser: "atlas",
SSHPort: 2277,
Coordination: config.Coordination{
ForwardShutdownHost: "titan-db",
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
CommandTimeoutSeconds: 5,
},
},
log: log.New(io.Discard, "", 0),
}
err := d.forwardShutdown(context.Background(), "test-fail")
if err == nil {
t.Fatalf("expected forwardShutdown error")
}
if !strings.Contains(strings.ToLower(err.Error()), "forward shutdown via ssh failed") {
t.Fatalf("unexpected error: %v", err)
}
}
// TestStartMetricsServerSuccess runs one orchestration or CLI step.
// Signature: TestStartMetricsServerSuccess(t *testing.T).
// Why: covers successful metrics server startup branch.
func TestStartMetricsServerSuccess(t *testing.T) {
d := &Daemon{
cfg: config.Config{
Metrics: config.Metrics{
Enabled: true,
BindAddr: "127.0.0.1:0",
Path: "/metrics",
},
},
log: log.New(io.Discard, "", 0),
exporter: metrics.New(),
}
if err := d.startMetricsServer(); err != nil {
t.Fatalf("startMetricsServer failed: %v", err)
}
}