package orchestrator

import (
	"context"
	"errors"
	"io"
	"log"
	"net"
	"os"
	"strconv"
	"strings"
	"testing"
	"time"

	"scm.bstein.dev/bstein/ananke/internal/cluster"
	"scm.bstein.dev/bstein/ananke/internal/config"
	"scm.bstein.dev/bstein/ananke/internal/execx"
	"scm.bstein.dev/bstein/ananke/internal/state"
)

// newLifecycleSaturationOrchestrator runs one orchestration or CLI step.
// Signature: newLifecycleSaturationOrchestrator(t *testing.T, cfg config.Config, run commandOverride) *cluster.Orchestrator.
// Why: lifecycle branch saturation needs deterministic command behavior while preserving real intent/lock/file semantics.
func newLifecycleSaturationOrchestrator(
	t *testing.T,
	cfg config.Config,
	run func(context.Context, time.Duration, string, ...string) (string, error),
) *cluster.Orchestrator {
	t.Helper()
	if run == nil {
		run = lifecycleDispatcher(&commandRecorder{})
	}
	orch := cluster.New(cfg, &execx.Runner{DryRun: false}, state.New(cfg.State.RunHistoryPath), log.New(io.Discard, "", 0))
	orch.SetCommandOverrides(run, run)
	return orch
}

// TestLifecycleStartupBranchSaturation runs one orchestration or CLI step.
// Signature: TestLifecycleStartupBranchSaturation(t *testing.T).
// Why: drives startup through its main error/safety branches so lifecycle coverage
// reflects realistic drill failure modes.
func TestLifecycleStartupBranchSaturation(t *testing.T) {
	t.Run("read-intent-error-branch", func(t *testing.T) {
		cfg := lifecycleConfig(t)
		if err := state.WriteIntent(cfg.State.IntentPath, state.Intent{
			State:     state.IntentNormal,
			Reason:    "seed",
			Source:    "test",
			UpdatedAt: time.Now().UTC(),
		}); err != nil {
			t.Fatalf("seed intent: %v", err)
		}
		// Replace intent file with directory so ReadIntent fails.
		if err := osRemove(cfg.State.IntentPath); err != nil {
			t.Fatalf("remove intent file: %v", err)
		}
		if err := osMkdir(cfg.State.IntentPath); err != nil {
			t.Fatalf("make intent dir: %v", err)
		}
		orch := newLifecycleSaturationOrchestrator(t, cfg, nil)
		if err := orch.Startup(context.Background(), cluster.StartupOptions{Reason: "intent-read-error"}); err == nil {
			t.Fatalf("expected startup to fail when intent path is unreadable")
		}
	})

	t.Run("fresh-shutdown-intent-blocks", func(t *testing.T) {
		cfg := lifecycleConfig(t)
		if err := state.WriteIntent(cfg.State.IntentPath, state.Intent{
			State:     state.IntentShuttingDown,
			Reason:    "active-shutdown",
			Source:    "test",
			UpdatedAt: time.Now().UTC(),
		}); err != nil {
			t.Fatalf("write shutdown intent: %v", err)
		}
		orch := newLifecycleSaturationOrchestrator(t, cfg, nil)
		err := orch.Startup(context.Background(), cluster.StartupOptions{Reason: "blocked-by-shutdown"})
		if err == nil || !strings.Contains(err.Error(), "startup blocked: shutdown intent is active") {
			t.Fatalf("expected active shutdown intent block, got %v", err)
		}
	})

	t.Run("cooldown-cancel-branch", func(t *testing.T) {
		cfg := lifecycleConfig(t)
		cfg.Startup.ShutdownCooldownSeconds = 20
		if err := state.WriteIntent(cfg.State.IntentPath, state.Intent{
			State:     state.IntentShutdownComplete,
			Reason:    "just-finished",
			Source:    "test",
			UpdatedAt: time.Now().UTC(),
		}); err != nil {
			t.Fatalf("write cooldown intent: %v", err)
		}
		orch := newLifecycleSaturationOrchestrator(t, cfg, nil)
		ctx, cancel := context.WithCancel(context.Background())
		go func() {
			time.Sleep(20 * time.Millisecond)
			cancel()
		}()
		err := orch.Startup(ctx, cluster.StartupOptions{Reason: "cooldown-cancel"})
		if err == nil || !strings.Contains(err.Error(), "startup canceled while waiting for shutdown cooldown") {
			t.Fatalf("expected cooldown cancel branch, got %v", err)
		}
	})

	t.Run("api-failure-without-restore", func(t *testing.T) {
		cfg := lifecycleConfig(t)
		cfg.Startup.AutoEtcdRestoreOnAPIFailure = false
		run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
			command := name + " " + strings.Join(args, " ")
			if name == "kubectl" && strings.Contains(command, "version --request-timeout=5s") {
				return "", errors.New("apiserver down")
			}
			return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
		}
		orch := newLifecycleSaturationOrchestrator(t, cfg, run)
		err := orch.Startup(context.Background(), cluster.StartupOptions{Reason: "api-fail-no-restore"})
		if err == nil || !strings.Contains(err.Error(), "kubernetes API did not become reachable") {
			t.Fatalf("expected api wait failure, got %v", err)
		}
	})

	t.Run("api-failure-restore-not-applicable-retries", func(t *testing.T) {
		cfg := lifecycleConfig(t)
		cfg.Startup.AutoEtcdRestoreOnAPIFailure = true
		cfg.Startup.EtcdRestoreControlPlane = "titan-db"
		l, err := net.Listen("tcp", "127.0.0.1:0")
		if err != nil {
			t.Fatalf("open local datastore listener: %v", err)
		}
		defer l.Close()
		port := l.Addr().(*net.TCPAddr).Port
		attempt := 0
		run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
			command := name + " " + strings.Join(args, " ")
			switch {
			case name == "kubectl" && strings.Contains(command, "version --request-timeout=5s"):
				attempt++
				if attempt <= 1 {
					return "", errors.New("apiserver down")
				}
				return "v1.31.0", nil
			case name == "ssh" && strings.Contains(command, "systemctl cat k3s"):
				return "ExecStart=/usr/local/bin/k3s server --datastore-endpoint=postgres://127.0.0.1:" + strconv.Itoa(port) + "/k3s", nil
			default:
				return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
			}
		}
		orch := newLifecycleSaturationOrchestrator(t, cfg, run)
		if err := orch.Startup(context.Background(), cluster.StartupOptions{Reason: "api-restore-not-applicable"}); err != nil {
			t.Fatalf("expected startup success after retry, got %v", err)
		}
	})

	t.Run("bootstrap-required-and-cache-missing-fails", func(t *testing.T) {
		cfg := lifecycleConfig(t)
		cfg.IACRepoPath = t.TempDir()
		cfg.LocalBootstrapPaths = []string{"services/bootstrap"}
		run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
			command := name + " " + strings.Join(args, " ")
			switch {
			case name == "kubectl" && strings.Contains(command, "jsonpath={.status.conditions[?(@.type==\"Ready\")].status}"):
				return "", errors.New("flux source unavailable")
			case name == "kubectl" && strings.Contains(command, " apply -k "):
				return "", errors.New("apply failed")
			case name == "sh" && strings.Contains(command, "kubectl kustomize"):
				return "", errors.New("fallback failed")
			default:
				return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
			}
		}
		orch := newLifecycleSaturationOrchestrator(t, cfg, run)
		err := orch.Startup(context.Background(), cluster.StartupOptions{Reason: "bootstrap-required"})
		if err == nil || !strings.Contains(err.Error(), "local bootstrap apply failed") {
			t.Fatalf("expected bootstrap failure, got %v", err)
		}
	})
}

// TestLifecycleEtcdRestoreAndShutdownBranchSaturation runs one orchestration or CLI step.
// Signature: TestLifecycleEtcdRestoreAndShutdownBranchSaturation(t *testing.T).
// Why: covers restore/shutdown branch paths that are difficult to hit from a single happy-path drill.
func TestLifecycleEtcdRestoreAndShutdownBranchSaturation(t *testing.T) {
	t.Run("etcd-restore-input-validation", func(t *testing.T) {
		cfg := lifecycleConfig(t)
		cfg.ControlPlanes = nil
		orch := newLifecycleSaturationOrchestrator(t, cfg, nil)
		if err := orch.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{}); err == nil {
			t.Fatalf("expected restore error with no control planes")
		}
	})

	t.Run("etcd-restore-unmanaged-node", func(t *testing.T) {
		cfg := lifecycleConfig(t)
		cfg.SSHManagedNodes = []string{"titan-23"}
		orch := newLifecycleSaturationOrchestrator(t, cfg, nil)
		if err := orch.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{ControlPlane: "titan-db"}); err == nil {
			t.Fatalf("expected unmanaged control plane restore error")
		}
	})

	t.Run("etcd-restore-command-failure", func(t *testing.T) {
		cfg := lifecycleConfig(t)
		run := func(ctx context.Context, timeout time.Duration, name string, args ...string) (string, error) {
			command := name + " " + strings.Join(args, " ")
			switch {
			case name == "ssh" && strings.Contains(command, "systemctl cat k3s"):
				return "ExecStart=/usr/local/bin/k3s server", nil
			case name == "ssh" && strings.Contains(command, "etcd-snapshot ls"):
				return "/var/lib/rancher/k3s/server/db/snapshots/pre-shutdown", nil
			case name == "ssh" && strings.Contains(command, "stat -c %s"):
				return "2097152", nil
			case name == "ssh" && strings.Contains(command, "sha256sum"):
				return "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", nil
			case name == "ssh" && strings.Contains(command, "server --cluster-reset"):
				return "", errors.New("cluster reset failed")
			default:
				return lifecycleDispatcher(&commandRecorder{})(ctx, timeout, name, args...)
			}
		}
		orch := newLifecycleSaturationOrchestrator(t, cfg, run)
		err := orch.EtcdRestore(context.Background(), cluster.EtcdRestoreOptions{ControlPlane: "titan-db"})
		if err == nil || !strings.Contains(err.Error(), "etcd restore command failed") {
			t.Fatalf("expected restore command failure branch, got %v", err)
		}
	})

	t.Run("shutdown-invalid-mode", func(t *testing.T) {
		cfg := lifecycleConfig(t)
		orch := newLifecycleSaturationOrchestrator(t, cfg, nil)
		if err := orch.Shutdown(context.Background(), cluster.ShutdownOptions{Reason: "bad-mode", Mode: "invalid"}); err == nil {
			t.Fatalf("expected shutdown mode validation error")
		}
	})
}

// osRemove runs one orchestration or CLI step.
// Signature: osRemove(path string) error.
// Why: keeps error handling explicit in lifecycle branch tests without repeated ignore logic.
func osRemove(path string) error {
	return os.Remove(path)
}

// osMkdir runs one orchestration or CLI step.
// Signature: osMkdir(path string) error.
// Why: keeps branch setup concise in lifecycle branch tests.
func osMkdir(path string) error {
	return os.Mkdir(path, 0o755)
}