ananke/internal/service/daemon_quality_branches_test.go

package service

import (
	"context"
	"io"
	"log"
	"os"
	"path/filepath"
	"strings"
	"testing"
	"time"

	"scm.bstein.dev/bstein/ananke/internal/cluster"
	"scm.bstein.dev/bstein/ananke/internal/config"
	"scm.bstein.dev/bstein/ananke/internal/execx"
	"scm.bstein.dev/bstein/ananke/internal/metrics"
	"scm.bstein.dev/bstein/ananke/internal/state"
	"scm.bstein.dev/bstein/ananke/internal/ups"
)

// TestNewDaemonInitializesExporter runs one orchestration or CLI step.
// Signature: TestNewDaemonInitializesExporter(t *testing.T).
// Why: covers constructor branch so daemon initialization contracts stay explicit.
func TestNewDaemonInitializesExporter(t *testing.T) {
	d := NewDaemon(config.Config{}, nil, nil, log.New(io.Discard, "", 0))
	if d == nil || d.exporter == nil {
		t.Fatalf("expected NewDaemon to initialize exporter")
	}
}

// TestTriggerShutdownForwardSuccessSetsForwardedIntent runs one orchestration or CLI step.
// Signature: TestTriggerShutdownForwardSuccessSetsForwardedIntent(t *testing.T).
// Why: covers forwarded shutdown happy-path branch and completion intent semantics.
func TestTriggerShutdownForwardSuccessSetsForwardedIntent(t *testing.T) {
	tmp := t.TempDir()
	sshPath := filepath.Join(tmp, "ssh")
	if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho forwarded\n"), 0o755); err != nil {
		t.Fatalf("write fake ssh: %v", err)
	}
	t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))

	intentPath := filepath.Join(tmp, "intent.json")
	d := &Daemon{
		cfg: config.Config{
			SSHUser: "atlas",
			SSHPort: 2277,
			State: config.State{
				IntentPath: intentPath,
			},
			Coordination: config.Coordination{
				ForwardShutdownHost:   "titan-db",
				ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
				CommandTimeoutSeconds: 3,
			},
		},
		log:      log.New(io.Discard, "", 0),
		exporter: metrics.New(),
	}
	if err := d.triggerShutdown(context.Background(), "test-forward-success"); err != nil {
		t.Fatalf("triggerShutdown forward success failed: %v", err)
	}
	in, err := state.ReadIntent(intentPath)
	if err != nil {
		t.Fatalf("read forward completion intent: %v", err)
	}
	if in.State != state.IntentShutdownComplete || in.Source != "daemon-forwarded" {
		t.Fatalf("unexpected forward completion intent: %+v", in)
	}
}

// TestTriggerShutdownForwardFailureWithoutFallback runs one orchestration or CLI step.
// Signature: TestTriggerShutdownForwardFailureWithoutFallback(t *testing.T).
// Why: covers explicit failure branch when forwarding is required and local fallback is disabled.
func TestTriggerShutdownForwardFailureWithoutFallback(t *testing.T) {
	tmp := t.TempDir()
	sshPath := filepath.Join(tmp, "ssh")
	if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho denied >&2\nexit 255\n"), 0o755); err != nil {
		t.Fatalf("write fake ssh: %v", err)
	}
	t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))

	d := &Daemon{
		cfg: config.Config{
			SSHUser: "atlas",
			SSHPort: 2277,
			State: config.State{
				IntentPath: filepath.Join(tmp, "intent.json"),
			},
			Coordination: config.Coordination{
				ForwardShutdownHost:   "titan-db",
				ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
				FallbackLocalShutdown: false,
				CommandTimeoutSeconds: 3,
			},
		},
		log:      log.New(io.Discard, "", 0),
		exporter: metrics.New(),
	}
	err := d.triggerShutdown(context.Background(), "test-forward-fail")
	if err == nil || !strings.Contains(err.Error(), "forward shutdown failed") {
		t.Fatalf("expected forward failure without fallback, got %v", err)
	}
}

// TestTriggerShutdownForwardFailureFallsBackToLocal runs one orchestration or CLI step.
// Signature: TestTriggerShutdownForwardFailureFallsBackToLocal(t *testing.T).
// Why: covers fallback branch where local shutdown is used after forwarding fails.
func TestTriggerShutdownForwardFailureFallsBackToLocal(t *testing.T) {
	tmp := t.TempDir()
	sshPath := filepath.Join(tmp, "ssh")
	if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho denied >&2\nexit 255\n"), 0o755); err != nil {
		t.Fatalf("write fake ssh: %v", err)
	}
	t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))

	orch := newDaemonTestOrchestrator(t, tmp)
	intentPath := filepath.Join(tmp, "intent.json")
	d := &Daemon{
		cfg: config.Config{
			SSHUser: "atlas",
			SSHPort: 2277,
			State: config.State{
				IntentPath: intentPath,
			},
			Shutdown: config.Shutdown{
				EmergencySkipDrain: true,
				EmergencySkipEtcd:  true,
			},
			Coordination: config.Coordination{
				ForwardShutdownHost:   "titan-db",
				ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
				FallbackLocalShutdown: true,
				CommandTimeoutSeconds: 3,
			},
		},
		orch:     orch,
		log:      log.New(io.Discard, "", 0),
		exporter: metrics.New(),
	}
	if err := d.triggerShutdown(context.Background(), "test-forward-fallback"); err != nil {
		t.Fatalf("triggerShutdown fallback local failed: %v", err)
	}
	in, err := state.ReadIntent(intentPath)
	if err != nil {
		t.Fatalf("read local completion intent: %v", err)
	}
	if in.State != state.IntentShutdownComplete || in.Source != "daemon-local" {
		t.Fatalf("unexpected local completion intent: %+v", in)
	}
}

// TestForwardShutdownBuildsJumpArgs runs one orchestration or CLI step.
// Signature: TestForwardShutdownBuildsJumpArgs(t *testing.T).
// Why: covers jump-host argument construction branches in forward shutdown transport.
func TestForwardShutdownBuildsJumpArgs(t *testing.T) {
	tmp := t.TempDir()
	argsOut := filepath.Join(tmp, "args.txt")
	sshPath := filepath.Join(tmp, "ssh")
	script := "#!/usr/bin/env bash\nset -euo pipefail\nprintf '%s\n' \"$*\" > " + argsOut + "\n"
	if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
		t.Fatalf("write fake ssh: %v", err)
	}
	t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))

	d := &Daemon{
		cfg: config.Config{
			SSHUser:         "atlas",
			SSHPort:         2277,
			SSHConfigFile:   "/tmp/custom-config",
			SSHIdentityFile: "/tmp/custom-key",
			SSHJumpHost:     "titan-jh",
			SSHJumpUser:     "jump",
			SSHNodeHosts: map[string]string{
				"titan-db": "10.0.0.5",
			},
			SSHNodeUsers: map[string]string{
				"titan-db": "dbadmin",
			},
			Coordination: config.Coordination{
				ForwardShutdownHost:   "titan-db",
				ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
				CommandTimeoutSeconds: 3,
			},
		},
		log: log.New(io.Discard, "", 0),
	}
	if err := d.forwardShutdown(context.Background(), "args-check"); err != nil {
		t.Fatalf("forwardShutdown with jump args failed: %v", err)
	}

	raw, err := os.ReadFile(argsOut)
	if err != nil {
		t.Fatalf("read ssh args output: %v", err)
	}
	out := string(raw)
	for _, want := range []string{"-F /tmp/custom-config", "-i /tmp/custom-key", "-J jump@titan-jh:2277", "-p 2277", "dbadmin@10.0.0.5"} {
		if !strings.Contains(out, want) {
			t.Fatalf("expected ssh args to include %q, got %q", want, out)
		}
	}
}

// TestStartMetricsServerInvalidBindLogsErrorPath runs one orchestration or CLI step.
// Signature: TestStartMetricsServerInvalidBindLogsErrorPath(t *testing.T).
// Why: exercises goroutine listen failure branch so metrics startup diagnostics remain covered.
func TestStartMetricsServerInvalidBindLogsErrorPath(t *testing.T) {
	d := &Daemon{
		cfg: config.Config{
			Metrics: config.Metrics{
				Enabled:  true,
				BindAddr: "127.0.0.1:not-a-port",
				Path:     "/metrics",
			},
		},
		log:      log.New(io.Discard, "", 0),
		exporter: metrics.New(),
	}
	if err := d.startMetricsServer(); err != nil {
		t.Fatalf("startMetricsServer should return nil after goroutine spawn, got %v", err)
	}
	time.Sleep(25 * time.Millisecond)
}

// TestResolveSSHPathCandidatesFromOverrides runs one orchestration or CLI step.
// Signature: TestResolveSSHPathCandidatesFromOverrides(t *testing.T).
// Why: covers candidate-path discovery branches without requiring writes under /home.
func TestResolveSSHPathCandidatesFromOverrides(t *testing.T) {
	tmp := t.TempDir()
	cfgPath := filepath.Join(tmp, "config")
	keyPath := filepath.Join(tmp, "id_ed25519")
	if err := os.WriteFile(cfgPath, []byte("Host *\n"), 0o600); err != nil {
		t.Fatalf("write fake config candidate: %v", err)
	}
	if err := os.WriteFile(keyPath, []byte("fake-key"), 0o600); err != nil {
		t.Fatalf("write fake key candidate: %v", err)
	}

	origConfigs := sshConfigCandidates
	origKeys := sshIdentityCandidates
	t.Cleanup(func() {
		sshConfigCandidates = origConfigs
		sshIdentityCandidates = origKeys
	})
	sshConfigCandidates = []string{cfgPath}
	sshIdentityCandidates = []string{keyPath}

	d := &Daemon{cfg: config.Config{}}
	if got := d.resolveSSHConfigFile(); got != cfgPath {
		t.Fatalf("expected config candidate path %q, got %q", cfgPath, got)
	}
	if got := d.resolveSSHIdentityFile(); got != keyPath {
		t.Fatalf("expected key candidate path %q, got %q", keyPath, got)
	}
}

// TestForwardShutdownKnownHostsRepairRetry runs one orchestration or CLI step.
// Signature: TestForwardShutdownKnownHostsRepairRetry(t *testing.T).
// Why: covers known-hosts-repair retry branch in forwarded shutdown transport.
func TestForwardShutdownKnownHostsRepairRetry(t *testing.T) {
	tmp := t.TempDir()
	attemptMarker := filepath.Join(tmp, "attempt")
	sshPath := filepath.Join(tmp, "ssh")
	script := `#!/usr/bin/env bash
set -euo pipefail
marker="` + attemptMarker + `"
if [[ ! -f "$marker" ]]; then
  echo "REMOTE HOST IDENTIFICATION HAS CHANGED!" >&2
  touch "$marker"
  exit 255
fi
echo "forwarded"
`
	if err := os.WriteFile(sshPath, []byte(script), 0o755); err != nil {
		t.Fatalf("write fake ssh: %v", err)
	}
	sshKeygenPath := filepath.Join(tmp, "ssh-keygen")
	if err := os.WriteFile(sshKeygenPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\nexit 0\n"), 0o755); err != nil {
		t.Fatalf("write fake ssh-keygen: %v", err)
	}
	sshKeyscanPath := filepath.Join(tmp, "ssh-keyscan")
	if err := os.WriteFile(sshKeyscanPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\necho fake-key\n"), 0o755); err != nil {
		t.Fatalf("write fake ssh-keyscan: %v", err)
	}
	t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))

	knownHosts := filepath.Join(tmp, "known_hosts")
	if err := os.WriteFile(knownHosts, []byte{}, 0o600); err != nil {
		t.Fatalf("write known_hosts file: %v", err)
	}

	d := &Daemon{
		cfg: config.Config{
			SSHConfigFile: knownHosts, // used only to derive known-hosts search path
			SSHUser:       "atlas",
			SSHPort:       2277,
			Coordination: config.Coordination{
				ForwardShutdownHost:   "titan-db",
				ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
				CommandTimeoutSeconds: 3,
			},
		},
		log: log.New(io.Discard, "", 0),
	}
	if err := d.forwardShutdown(context.Background(), "repair-retry"); err != nil {
		t.Fatalf("forwardShutdown known-hosts repair retry failed: %v", err)
	}
}

// TestTriggerShutdownReturnsLocalShutdownError runs one orchestration or CLI step.
// Signature: TestTriggerShutdownReturnsLocalShutdownError(t *testing.T).
// Why: covers local shutdown error propagation branch from triggerShutdown.
func TestTriggerShutdownReturnsLocalShutdownError(t *testing.T) {
	tmp := t.TempDir()
	intentPath := filepath.Join(tmp, "intent-dir")
	if err := os.MkdirAll(intentPath, 0o755); err != nil {
		t.Fatalf("mkdir intent dir: %v", err)
	}
	orchCfg := config.Config{
		ControlPlanes: []string{"titan-db"},
		Workers:       []string{"titan-23"},
		State: config.State{
			Dir:            filepath.Join(tmp, "state"),
			ReportsDir:     filepath.Join(tmp, "reports"),
			RunHistoryPath: filepath.Join(tmp, "runs.json"),
			LockPath:       filepath.Join(tmp, "ananke.lock"),
			IntentPath:     intentPath, // directory path forces MustWriteIntent failure in Shutdown
		},
	}
	orch := cluster.New(
		orchCfg,
		&execx.Runner{DryRun: false, Logger: log.New(io.Discard, "", 0)},
		state.New(filepath.Join(tmp, "runs.json")),
		log.New(io.Discard, "", 0),
	)
	d := &Daemon{
		cfg: config.Config{
			State: config.State{
				IntentPath: intentPath,
			},
			Shutdown: config.Shutdown{
				EmergencySkipDrain: true,
				EmergencySkipEtcd:  true,
			},
		},
		orch:     orch,
		log:      log.New(io.Discard, "", 0),
		exporter: metrics.New(),
	}
	err := d.triggerShutdown(context.Background(), "local-shutdown-error")
	if err == nil {
		t.Fatalf("expected triggerShutdown to propagate local shutdown error")
	}
}

// TestDaemonRunContextCancelNonTriggerPath runs one orchestration or CLI step.
// Signature: TestDaemonRunContextCancelNonTriggerPath(t *testing.T).
// Why: covers steady-state non-trigger loop branches in Run until context cancellation.
func TestDaemonRunContextCancelNonTriggerPath(t *testing.T) {
	stateDir := t.TempDir()
	orch := newDaemonTestOrchestrator(t, stateDir)
	d := &Daemon{
		cfg: config.Config{
			UPS: config.UPS{
				Enabled:             true,
				PollSeconds:         0, // exercise default poll fallback
				DebounceCount:       0, // exercise default debounce fallback
				RuntimeSafetyFactor: 0.5,
			},
			State: config.State{
				IntentPath: filepath.Join(stateDir, "intent.json"),
			},
		},
		orch: orch,
		targets: []Target{
			{
				Name:   "Pyrphoros",
				Target: "pyrphoros@localhost",
				Provider: &daemonFakeProvider{
					samples: []ups.Sample{
						{OnBattery: false, LowBattery: false, RuntimeSeconds: 7200, RawStatus: "OL"},
					},
				},
			},
		},
		log:      log.New(io.Discard, "", 0),
		exporter: metrics.New(),
	}
	ctx, cancel := context.WithTimeout(context.Background(), 1100*time.Millisecond)
	defer cancel()
	if err := d.Run(ctx); err == nil {
		t.Fatalf("expected context deadline/cancel in non-trigger loop")
	}
}

// TestForwardShutdownErrorWithoutOutput runs one orchestration or CLI step.
// Signature: TestForwardShutdownErrorWithoutOutput(t *testing.T).
// Why: covers forwardShutdown branch where ssh fails without any stderr/stdout text.
func TestForwardShutdownErrorWithoutOutput(t *testing.T) {
	tmp := t.TempDir()
	sshPath := filepath.Join(tmp, "ssh")
	if err := os.WriteFile(sshPath, []byte("#!/usr/bin/env bash\nset -euo pipefail\nexit 255\n"), 0o755); err != nil {
		t.Fatalf("write fake ssh: %v", err)
	}
	t.Setenv("PATH", tmp+":"+os.Getenv("PATH"))

	d := &Daemon{
		cfg: config.Config{
			SSHUser: "atlas",
			Coordination: config.Coordination{
				ForwardShutdownHost:   "titan-db",
				ForwardShutdownConfig: "/etc/ananke/ananke.yaml",
				CommandTimeoutSeconds: 3,
			},
		},
		log: log.New(io.Discard, "", 0),
	}
	err := d.forwardShutdown(context.Background(), "no-output-fail")
	if err == nil || !strings.Contains(strings.ToLower(err.Error()), "forward shutdown via ssh failed") {
		t.Fatalf("expected no-output forward ssh failure, got %v", err)
	}
}