ananke/internal/config/config_test.go

package config

import (
	"os"
	"path/filepath"
	"strings"
	"testing"
)

// TestLoadAcceptsUPSTargets runs one orchestration or CLI step.
// Signature: TestLoadAcceptsUPSTargets(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestLoadAcceptsUPSTargets(t *testing.T) {
	tmp := t.TempDir()
	cfgPath := filepath.Join(tmp, "ananke.yaml")
	raw := `
control_planes: [titan-0a, titan-0b, titan-0c]
expected_flux_branch: main
iac_repo_path: /opt/titan-iac
ups:
  enabled: true
  provider: nut
  targets:
    - name: pyrphoros
      target: pyrphoros@localhost
shutdown:
  default_budget_seconds: 300
state:
  run_history_path: /tmp/runs.json
  lock_path: /tmp/ananke.lock
`
	if err := os.WriteFile(cfgPath, []byte(strings.TrimSpace(raw)), 0o644); err != nil {
		t.Fatalf("write config: %v", err)
	}

	cfg, err := Load(cfgPath)
	if err != nil {
		t.Fatalf("load config: %v", err)
	}
	if len(cfg.UPS.Targets) != 1 || cfg.UPS.Targets[0].Target != "pyrphoros@localhost" {
		t.Fatalf("unexpected UPS targets: %#v", cfg.UPS.Targets)
	}
}

// TestValidateForwardShutdownRequiresConfigPath runs one orchestration or CLI step.
// Signature: TestValidateForwardShutdownRequiresConfigPath(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateForwardShutdownRequiresConfigPath(t *testing.T) {
	cfg := defaults()
	cfg.Coordination.ForwardShutdownHost = "titan-db"
	cfg.Coordination.ForwardShutdownConfig = ""
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for missing forward_shutdown_config")
	}
}

// TestValidateRejectsUnknownRole runs one orchestration or CLI step.
// Signature: TestValidateRejectsUnknownRole(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsUnknownRole(t *testing.T) {
	cfg := defaults()
	cfg.Coordination.Role = "unknown"
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for unknown coordination role")
	}
}

// TestValidateRejectsEmptyPeerHostEntry runs one orchestration or CLI step.
// Signature: TestValidateRejectsEmptyPeerHostEntry(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsEmptyPeerHostEntry(t *testing.T) {
	cfg := defaults()
	cfg.Coordination.PeerHosts = []string{"titan-24", " "}
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for empty peer_hosts entry")
	}
}

// TestValidateRejectsUnknownEtcdRestoreControlPlane runs one orchestration or CLI step.
// Signature: TestValidateRejectsUnknownEtcdRestoreControlPlane(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsUnknownEtcdRestoreControlPlane(t *testing.T) {
	cfg := defaults()
	cfg.Startup.EtcdRestoreControlPlane = "titan-missing"
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for unknown etcd restore control plane")
	}
}

// TestLoadSetsCoordinationGuardDefaults runs one orchestration or CLI step.
// Signature: TestLoadSetsCoordinationGuardDefaults(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestLoadSetsCoordinationGuardDefaults(t *testing.T) {
	tmp := t.TempDir()
	cfgPath := filepath.Join(tmp, "ananke.yaml")
	raw := `
control_planes: [titan-0a, titan-0b, titan-0c]
expected_flux_branch: main
iac_repo_path: /opt/titan-iac
coordination:
  role: coordinator
ups:
  enabled: false
state:
  run_history_path: /tmp/runs.json
  lock_path: /tmp/ananke.lock
`
	if err := os.WriteFile(cfgPath, []byte(strings.TrimSpace(raw)), 0o644); err != nil {
		t.Fatalf("write config: %v", err)
	}
	cfg, err := Load(cfgPath)
	if err != nil {
		t.Fatalf("load config: %v", err)
	}
	if cfg.Coordination.StartupGuardMaxAgeSec <= 0 {
		t.Fatalf("expected startup guard max age default > 0, got %d", cfg.Coordination.StartupGuardMaxAgeSec)
	}
	if cfg.Startup.EtcdRestoreControlPlane == "" {
		t.Fatalf("expected startup etcd restore control plane default to be set")
	}
	if cfg.Startup.TimeSyncMode == "" {
		t.Fatalf("expected startup time sync mode default to be set")
	}
	if cfg.Startup.VaultUnsealKeyFile == "" {
		t.Fatalf("expected startup vault unseal key file default to be set")
	}
	if cfg.Startup.ShutdownCooldownSeconds <= 0 {
		t.Fatalf("expected startup shutdown cooldown default > 0, got %d", cfg.Startup.ShutdownCooldownSeconds)
	}
	if cfg.Startup.VaultUnsealBreakglassTimeout <= 0 {
		t.Fatalf("expected startup break-glass timeout default > 0, got %d", cfg.Startup.VaultUnsealBreakglassTimeout)
	}
}

// TestValidateRejectsInvalidStartupShutdownCooldown runs one orchestration or CLI step.
// Signature: TestValidateRejectsInvalidStartupShutdownCooldown(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsInvalidStartupShutdownCooldown(t *testing.T) {
	cfg := defaults()
	cfg.Startup.ShutdownCooldownSeconds = 0
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for invalid startup shutdown_cooldown_seconds")
	}
}

// TestValidateRejectsInvalidTimeSyncMode runs one orchestration or CLI step.
// Signature: TestValidateRejectsInvalidTimeSyncMode(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsInvalidTimeSyncMode(t *testing.T) {
	cfg := defaults()
	cfg.Startup.TimeSyncMode = "invalid"
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for invalid time_sync_mode")
	}
}

// TestValidateRejectsBadStoragePVCFormat runs one orchestration or CLI step.
// Signature: TestValidateRejectsBadStoragePVCFormat(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsBadStoragePVCFormat(t *testing.T) {
	cfg := defaults()
	cfg.Startup.StorageCriticalPVCs = []string{"vault-data-vault-0"}
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for invalid storage_critical_pvcs entry")
	}
}

// TestValidateRejectsMissingPostStartProbesWhenRequired runs one orchestration or CLI step.
// Signature: TestValidateRejectsMissingPostStartProbesWhenRequired(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsMissingPostStartProbesWhenRequired(t *testing.T) {
	cfg := defaults()
	cfg.Startup.RequirePostStartProbes = true
	cfg.Startup.PostStartProbes = nil
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error when post start probes are required but empty")
	}
}

// TestValidateRejectsMissingServiceChecklistWhenRequired runs one orchestration or CLI step.
// Signature: TestValidateRejectsMissingServiceChecklistWhenRequired(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsMissingServiceChecklistWhenRequired(t *testing.T) {
	cfg := defaults()
	cfg.Startup.RequireServiceChecklist = true
	cfg.Startup.ServiceChecklist = nil
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error when service checklist is required but empty")
	}
}

// TestValidateRejectsBadServiceChecklistURL runs one orchestration or CLI step.
// Signature: TestValidateRejectsBadServiceChecklistURL(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsBadServiceChecklistURL(t *testing.T) {
	cfg := defaults()
	cfg.Startup.ServiceChecklist = []ServiceChecklistCheck{
		{
			Name:             "grafana",
			URL:              "not-a-url",
			AcceptedStatuses: []int{200},
			TimeoutSeconds:   12,
		},
	}
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for invalid service checklist url")
	}
}

// TestValidateRejectsUnknownServiceChecklistAuthMode runs one orchestration or CLI step.
// Signature: TestValidateRejectsUnknownServiceChecklistAuthMode(t *testing.T).
// Why: authenticated user-journey checklist gates should fail fast when auth
// mode is invalid to avoid silent false-positive startup passes.
func TestValidateRejectsUnknownServiceChecklistAuthMode(t *testing.T) {
	cfg := defaults()
	cfg.Startup.ServiceChecklistAuth.Mode = "bad-mode"
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for invalid service checklist auth mode")
	}
}

// TestValidateRejectsFinalURLMarkersWithoutRedirectFollow runs one orchestration or CLI step.
// Signature: TestValidateRejectsFinalURLMarkersWithoutRedirectFollow(t *testing.T).
// Why: final-url assertions only make sense when redirect following is enabled.
func TestValidateRejectsFinalURLMarkersWithoutRedirectFollow(t *testing.T) {
	cfg := defaults()
	cfg.Startup.ServiceChecklist = []ServiceChecklistCheck{
		{
			Name:             "bad-final-url",
			URL:              "https://logs.bstein.dev/",
			AcceptedStatuses: []int{200},
			FinalURLContains: "/app/home",
			TimeoutSeconds:   12,
		},
	}
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for final_url_* markers without redirect follow")
	}
}

// TestValidateRejectsRobotAuthCheckWhenAuthModeDisabled runs one orchestration or CLI step.
// Signature: TestValidateRejectsRobotAuthCheckWhenAuthModeDisabled(t *testing.T).
// Why: robot-auth checks must be blocked when checklist auth mode is disabled.
func TestValidateRejectsRobotAuthCheckWhenAuthModeDisabled(t *testing.T) {
	cfg := defaults()
	cfg.Startup.ServiceChecklistAuth.Mode = "none"
	cfg.Startup.ServiceChecklist = []ServiceChecklistCheck{
		{
			Name:             "logs-ui",
			URL:              "https://logs.bstein.dev/",
			AcceptedStatuses: []int{200},
			RequireRobotAuth: true,
			FollowRedirects:  true,
			TimeoutSeconds:   12,
		},
	}
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for robot-auth checklist check when auth mode is none")
	}
}

// TestValidateRejectsBadIgnoreFluxKustomizationFormat runs one orchestration or CLI step.
// Signature: TestValidateRejectsBadIgnoreFluxKustomizationFormat(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsBadIgnoreFluxKustomizationFormat(t *testing.T) {
	cfg := defaults()
	cfg.Startup.IgnoreFluxKustomizations = []string{"jellyfin"}
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for invalid ignore_flux_kustomizations entry")
	}
}

// TestValidateRejectsBadIgnoreWorkloadFormat runs one orchestration or CLI step.
// Signature: TestValidateRejectsBadIgnoreWorkloadFormat(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsBadIgnoreWorkloadFormat(t *testing.T) {
	cfg := defaults()
	cfg.Startup.IgnoreWorkloads = []string{"maintenance/metis/extra/value"}
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for invalid ignore_workloads entry")
	}
}

// TestValidateRejectsInvalidRequiredNodeLabel runs one orchestration or CLI step.
// Signature: TestValidateRejectsInvalidRequiredNodeLabel(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsInvalidRequiredNodeLabel(t *testing.T) {
	cfg := defaults()
	cfg.Startup.RequiredNodeLabels = map[string]map[string]string{
		"titan-09": {
			"": "true",
		},
	}
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for invalid required_node_labels entry")
	}
}

// TestValidateRejectsInvalidNodeInventoryReachWindow runs one orchestration or CLI step.
// Signature: TestValidateRejectsInvalidNodeInventoryReachWindow(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsInvalidNodeInventoryReachWindow(t *testing.T) {
	cfg := defaults()
	cfg.Startup.NodeInventoryReachWaitSeconds = 0
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for invalid node_inventory_reachability_wait_seconds")
	}
}

// TestValidateRejectsMissingReportsDir runs one orchestration or CLI step.
// Signature: TestValidateRejectsMissingReportsDir(t *testing.T).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func TestValidateRejectsMissingReportsDir(t *testing.T) {
	cfg := defaults()
	cfg.State.ReportsDir = ""
	if err := cfg.Validate(); err == nil {
		t.Fatalf("expected validation error for missing state.reports_dir")
	}
}

// TestApplyDefaultsMergesServiceChecklistDefaults runs one orchestration or CLI step.
// Signature: TestApplyDefaultsMergesServiceChecklistDefaults(t *testing.T).
// Why: host configs may define a partial checklist; startup still needs the
// baseline service validations learned from drills.
func TestApplyDefaultsMergesServiceChecklistDefaults(t *testing.T) {
	cfg := Config{
		Startup: Startup{
			ServiceChecklist: []ServiceChecklistCheck{
				{
					Name:           "custom-smoke",
					URL:            "https://example.invalid/healthz",
					TimeoutSeconds: 7,
				},
			},
		},
	}
	cfg.applyDefaults()

	names := map[string]struct{}{}
	for _, check := range cfg.Startup.ServiceChecklist {
		names[check.Name] = struct{}{}
	}
	if _, ok := names["custom-smoke"]; !ok {
		t.Fatalf("expected custom checklist entry to be preserved")
	}
	if _, ok := names["logging-ui-user-session"]; !ok {
		t.Fatalf("expected default logging user-session check to be merged in")
	}
	if _, ok := names["vaultwarden-ui"]; !ok {
		t.Fatalf("expected default vaultwarden check to be merged in")
	}
}

// TestApplyDefaultsMergesCriticalServiceEndpointDefaults runs one orchestration or CLI step.
// Signature: TestApplyDefaultsMergesCriticalServiceEndpointDefaults(t *testing.T).
// Why: startup endpoint gating must keep baseline backend checks even when host
// configs only provide a subset.
func TestApplyDefaultsMergesCriticalServiceEndpointDefaults(t *testing.T) {
	cfg := Config{
		Startup: Startup{
			CriticalServiceEndpoints: []string{"customns/customsvc"},
		},
	}
	cfg.applyDefaults()

	seen := map[string]struct{}{}
	for _, entry := range cfg.Startup.CriticalServiceEndpoints {
		seen[entry] = struct{}{}
	}
	if _, ok := seen["customns/customsvc"]; !ok {
		t.Fatalf("expected custom critical endpoint to be preserved")
	}
	if _, ok := seen["logging/opensearch-dashboards"]; !ok {
		t.Fatalf("expected logging/opensearch-dashboards critical endpoint default")
	}
	if _, ok := seen["monitoring/victoria-metrics-single-server"]; !ok {
		t.Fatalf("expected monitoring/victoria-metrics-single-server critical endpoint default")
	}
}