2026-04-09 01:38:06 -03:00
|
|
|
package service
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
|
|
|
|
"io"
|
|
|
|
|
"log"
|
|
|
|
|
"os"
|
|
|
|
|
"path/filepath"
|
|
|
|
|
"strings"
|
|
|
|
|
"testing"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/cluster"
|
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/config"
|
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/execx"
|
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/metrics"
|
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/state"
|
|
|
|
|
"scm.bstein.dev/bstein/ananke/internal/ups"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type daemonFakeProvider struct {
|
|
|
|
|
samples []ups.Sample
|
|
|
|
|
errs []error
|
|
|
|
|
idx int
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Read runs one orchestration or CLI step.
|
|
|
|
|
// Signature: (p *daemonFakeProvider) Read(ctx context.Context) (ups.Sample, error).
|
|
|
|
|
// Why: daemon tests need deterministic telemetry/error sequencing without real UPS I/O.
|
|
|
|
|
func (p *daemonFakeProvider) Read(_ context.Context) (ups.Sample, error) {
|
|
|
|
|
if p.idx < len(p.errs) && p.errs[p.idx] != nil {
|
|
|
|
|
err := p.errs[p.idx]
|
|
|
|
|
p.idx++
|
|
|
|
|
return ups.Sample{}, err
|
|
|
|
|
}
|
|
|
|
|
if p.idx < len(p.samples) {
|
|
|
|
|
s := p.samples[p.idx]
|
|
|
|
|
p.idx++
|
|
|
|
|
return s, nil
|
|
|
|
|
}
|
|
|
|
|
if len(p.samples) > 0 {
|
|
|
|
|
return p.samples[len(p.samples)-1], nil
|
|
|
|
|
}
|
|
|
|
|
return ups.Sample{}, context.DeadlineExceeded
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// newDaemonTestOrchestrator runs one orchestration or CLI step.
|
|
|
|
|
// Signature: newDaemonTestOrchestrator(t *testing.T, stateDir string) *cluster.Orchestrator.
|
|
|
|
|
// Why: daemon tests share a minimal dry-run orchestrator fixture to avoid duplication.
|
|
|
|
|
func newDaemonTestOrchestrator(t *testing.T, stateDir string) *cluster.Orchestrator {
|
|
|
|
|
t.Helper()
|
|
|
|
|
cfg := config.Config{
|
|
|
|
|
ControlPlanes: []string{"titan-0a"},
|
|
|
|
|
Workers: []string{"titan-22"},
|
|
|
|
|
SSHUser: "atlas",
|
|
|
|
|
SSHPort: 2277,
|
|
|
|
|
SSHManagedNodes: []string{"titan-0a", "titan-22"},
|
|
|
|
|
SSHNodeHosts: map[string]string{
|
|
|
|
|
"titan-0a": "192.168.22.11",
|
|
|
|
|
"titan-22": "192.168.22.22",
|
|
|
|
|
},
|
|
|
|
|
State: config.State{
|
|
|
|
|
Dir: stateDir,
|
|
|
|
|
ReportsDir: filepath.Join(stateDir, "reports"),
|
|
|
|
|
RunHistoryPath: filepath.Join(stateDir, "runs.json"),
|
|
|
|
|
LockPath: filepath.Join(stateDir, "ananke.lock"),
|
|
|
|
|
IntentPath: filepath.Join(stateDir, "intent.json"),
|
|
|
|
|
},
|
|
|
|
|
Shutdown: config.Shutdown{
|
|
|
|
|
EmergencySkipDrain: true,
|
|
|
|
|
EmergencySkipEtcd: true,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
return cluster.New(
|
|
|
|
|
cfg,
|
|
|
|
|
&execx.Runner{DryRun: true, Logger: log.New(io.Discard, "", 0)},
|
|
|
|
|
state.New(filepath.Join(stateDir, "runs.json")),
|
|
|
|
|
log.New(io.Discard, "", 0),
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestDaemonRunTriggersShutdownOnLowBattery runs one orchestration or CLI step.
|
|
|
|
|
// Signature: TestDaemonRunTriggersShutdownOnLowBattery(t *testing.T).
|
|
|
|
|
// Why: covers main daemon loop path that triggers shutdown after debounce threshold.
|
|
|
|
|
func TestDaemonRunTriggersShutdownOnLowBattery(t *testing.T) {
|
|
|
|
|
stateDir := t.TempDir()
|
|
|
|
|
orch := newDaemonTestOrchestrator(t, stateDir)
|
|
|
|
|
d := &Daemon{
|
|
|
|
|
cfg: config.Config{
|
|
|
|
|
UPS: config.UPS{
|
|
|
|
|
Enabled: true,
|
|
|
|
|
PollSeconds: 1,
|
|
|
|
|
DebounceCount: 1,
|
|
|
|
|
RuntimeSafetyFactor: 1.0,
|
|
|
|
|
},
|
|
|
|
|
State: config.State{
|
|
|
|
|
IntentPath: filepath.Join(stateDir, "intent.json"),
|
|
|
|
|
},
|
|
|
|
|
Shutdown: config.Shutdown{
|
|
|
|
|
EmergencySkipDrain: true,
|
|
|
|
|
EmergencySkipEtcd: true,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
orch: orch,
|
|
|
|
|
targets: []Target{
|
|
|
|
|
{
|
|
|
|
|
Name: "Pyrphoros",
|
|
|
|
|
Target: "pyrphoros@localhost",
|
|
|
|
|
Provider: &daemonFakeProvider{
|
|
|
|
|
samples: []ups.Sample{{OnBattery: true, LowBattery: true, RuntimeSeconds: 30, RawStatus: "OB LB"}},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
log: log.New(io.Discard, "", 0),
|
|
|
|
|
exporter: metrics.New(),
|
|
|
|
|
}
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
|
|
|
|
defer cancel()
|
|
|
|
|
if err := d.Run(ctx); err != nil {
|
|
|
|
|
t.Fatalf("expected daemon to trigger and complete shutdown, got %v", err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestDaemonRunTriggersShutdownOnTelemetryTimeout runs one orchestration or CLI step.
|
|
|
|
|
// Signature: TestDaemonRunTriggersShutdownOnTelemetryTimeout(t *testing.T).
|
|
|
|
|
// Why: covers telemetry-timeout trigger path while UPS remains on-battery.
|
|
|
|
|
func TestDaemonRunTriggersShutdownOnTelemetryTimeout(t *testing.T) {
|
|
|
|
|
stateDir := t.TempDir()
|
|
|
|
|
orch := newDaemonTestOrchestrator(t, stateDir)
|
|
|
|
|
d := &Daemon{
|
|
|
|
|
cfg: config.Config{
|
|
|
|
|
UPS: config.UPS{
|
|
|
|
|
Enabled: true,
|
|
|
|
|
PollSeconds: 1,
|
|
|
|
|
DebounceCount: 3,
|
|
|
|
|
RuntimeSafetyFactor: 1.0,
|
|
|
|
|
TelemetryTimeoutSeconds: 1,
|
|
|
|
|
},
|
|
|
|
|
State: config.State{
|
|
|
|
|
IntentPath: filepath.Join(stateDir, "intent.json"),
|
|
|
|
|
},
|
|
|
|
|
Shutdown: config.Shutdown{
|
|
|
|
|
EmergencySkipDrain: true,
|
|
|
|
|
EmergencySkipEtcd: true,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
orch: orch,
|
|
|
|
|
targets: []Target{
|
|
|
|
|
{
|
|
|
|
|
Name: "Statera",
|
|
|
|
|
Target: "statera@localhost",
|
|
|
|
|
Provider: &daemonFakeProvider{
|
|
|
|
|
samples: []ups.Sample{{OnBattery: true, LowBattery: false, RuntimeSeconds: 9999, RawStatus: "OB"}},
|
|
|
|
|
errs: []error{nil, context.DeadlineExceeded, context.DeadlineExceeded, context.DeadlineExceeded},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
log: log.New(io.Discard, "", 0),
|
|
|
|
|
exporter: metrics.New(),
|
|
|
|
|
}
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
|
|
|
|
|
defer cancel()
|
|
|
|
|
if err := d.Run(ctx); err != nil {
|
|
|
|
|
t.Fatalf("expected telemetry-timeout shutdown path to complete, got %v", err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-05 10:53:00 -03:00
|
|
|
// TestDaemonRunTriggersShutdownAfterOnBatteryGrace runs one orchestration or CLI step.
|
|
|
|
|
// Signature: TestDaemonRunTriggersShutdownAfterOnBatteryGrace(t *testing.T).
|
|
|
|
|
// Why: covers the sustained-on-battery trigger so short runtime estimates are not
|
|
|
|
|
// the only path to a graceful shutdown during abrupt power loss.
|
|
|
|
|
func TestDaemonRunTriggersShutdownAfterOnBatteryGrace(t *testing.T) {
|
|
|
|
|
stateDir := t.TempDir()
|
|
|
|
|
orch := newDaemonTestOrchestrator(t, stateDir)
|
|
|
|
|
d := &Daemon{
|
|
|
|
|
cfg: config.Config{
|
|
|
|
|
UPS: config.UPS{
|
|
|
|
|
Enabled: true,
|
|
|
|
|
PollSeconds: 1,
|
|
|
|
|
DebounceCount: 1,
|
|
|
|
|
RuntimeSafetyFactor: 1.0,
|
|
|
|
|
OnBatteryGraceSeconds: 1,
|
|
|
|
|
},
|
|
|
|
|
State: config.State{
|
|
|
|
|
IntentPath: filepath.Join(stateDir, "intent.json"),
|
|
|
|
|
},
|
|
|
|
|
Shutdown: config.Shutdown{
|
|
|
|
|
EmergencySkipDrain: true,
|
|
|
|
|
EmergencySkipEtcd: true,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
orch: orch,
|
|
|
|
|
targets: []Target{
|
|
|
|
|
{
|
|
|
|
|
Name: "Pyrphoros",
|
|
|
|
|
Target: "pyrphoros@localhost",
|
|
|
|
|
Provider: &daemonFakeProvider{
|
|
|
|
|
samples: []ups.Sample{{OnBattery: true, LowBattery: false, RuntimeSeconds: 9999, RawStatus: "OB"}},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
log: log.New(io.Discard, "", 0),
|
|
|
|
|
exporter: metrics.New(),
|
|
|
|
|
}
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
|
|
|
defer cancel()
|
|
|
|
|
if err := d.Run(ctx); err != nil {
|
|
|
|
|
t.Fatalf("expected sustained-on-battery shutdown path to complete, got %v", err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-09 01:38:06 -03:00
|
|
|
// TestForwardShutdownSucceedsWithSSHShim runs one orchestration or CLI step.
|
|
|
|
|
// Signature: TestForwardShutdownSucceedsWithSSHShim(t *testing.T).
|
|
|
|
|
// Why: covers forward-shutdown SSH execution path.
|
|
|
|
|
func TestForwardShutdownSucceedsWithSSHShim(t *testing.T) {
|
|
|
|
|
tmp := t.TempDir()
|
|
|
|
|
sshPath := filepath.Join(tmp, "ssh")
|
|
|
|
|
script := `#!/usr/bin/env bash
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
echo forwarded
|
|
|
|
|
`
|
|
|
|
|
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
|
|
|
|
t.Fatalf("write fake ssh: %v", err)
|
|
|
|
|
}
|
|
|
|
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
|
|
|
|
|
|
|
|
|
d := &Daemon{
|
|
|
|
|
cfg: config.Config{
|
|
|
|
|
SSHUser: "atlas",
|
|
|
|
|
SSHPort: 2277,
|
|
|
|
|
Coordination: config.Coordination{
|
|
|
|
|
ForwardShutdownHost: "titan-db",
|
|
|
|
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
|
|
|
|
CommandTimeoutSeconds: 5,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
log: log.New(io.Discard, "", 0),
|
|
|
|
|
}
|
|
|
|
|
if err := d.forwardShutdown(context.Background(), "test-forward"); err != nil {
|
|
|
|
|
t.Fatalf("forwardShutdown failed: %v", err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestForwardShutdownFailsWhenSSHFailsAndNoRecovery runs one orchestration or CLI step.
|
|
|
|
|
// Signature: TestForwardShutdownFailsWhenSSHFailsAndNoRecovery(t *testing.T).
|
|
|
|
|
// Why: covers forwarded shutdown error propagation branch.
|
|
|
|
|
func TestForwardShutdownFailsWhenSSHFailsAndNoRecovery(t *testing.T) {
|
|
|
|
|
tmp := t.TempDir()
|
|
|
|
|
sshPath := filepath.Join(tmp, "ssh")
|
|
|
|
|
script := `#!/usr/bin/env bash
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
echo "permission denied" >&2
|
|
|
|
|
exit 255
|
|
|
|
|
`
|
|
|
|
|
if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
|
|
|
|
|
t.Fatalf("write fake ssh: %v", err)
|
|
|
|
|
}
|
|
|
|
|
t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))
|
|
|
|
|
|
|
|
|
|
d := &Daemon{
|
|
|
|
|
cfg: config.Config{
|
|
|
|
|
SSHUser: "atlas",
|
|
|
|
|
SSHPort: 2277,
|
|
|
|
|
Coordination: config.Coordination{
|
|
|
|
|
ForwardShutdownHost: "titan-db",
|
|
|
|
|
ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
|
|
|
|
|
CommandTimeoutSeconds: 5,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
log: log.New(io.Discard, "", 0),
|
|
|
|
|
}
|
|
|
|
|
err := d.forwardShutdown(context.Background(), "test-fail")
|
|
|
|
|
if err == nil {
|
|
|
|
|
t.Fatalf("expected forwardShutdown error")
|
|
|
|
|
}
|
|
|
|
|
if !strings.Contains(strings.ToLower(err.Error()), "forward shutdown via ssh failed") {
|
|
|
|
|
t.Fatalf("unexpected error: %v", err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestStartMetricsServerSuccess runs one orchestration or CLI step.
|
|
|
|
|
// Signature: TestStartMetricsServerSuccess(t *testing.T).
|
|
|
|
|
// Why: covers successful metrics server startup branch.
|
|
|
|
|
func TestStartMetricsServerSuccess(t *testing.T) {
|
|
|
|
|
d := &Daemon{
|
|
|
|
|
cfg: config.Config{
|
|
|
|
|
Metrics: config.Metrics{
|
|
|
|
|
Enabled: true,
|
|
|
|
|
BindAddr: "127.0.0.1:0",
|
|
|
|
|
Path: "/metrics",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
log: log.New(io.Discard, "", 0),
|
|
|
|
|
exporter: metrics.New(),
|
|
|
|
|
}
|
|
|
|
|
if err := d.startMetricsServer(); err != nil {
|
|
|
|
|
t.Fatalf("startMetricsServer failed: %v", err)
|
|
|
|
|
}
|
|
|
|
|
}
|