package cluster

import (
	"encoding/json"
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"time"

	"scm.bstein.dev/bstein/ananke/internal/state"
)

// EstimatedShutdownSeconds runs one orchestration or CLI step.
// Signature: (o *Orchestrator) EstimatedShutdownSeconds() int.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) EstimatedShutdownSeconds() int {
	return o.store.ShutdownP95WithMinSamples(o.cfg.Shutdown.DefaultBudgetSeconds, o.cfg.Shutdown.HistoryMinSamples)
}

// EstimatedEmergencyShutdownSeconds runs one orchestration or CLI step.
// Signature: (o *Orchestrator) EstimatedEmergencyShutdownSeconds() int.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) EstimatedEmergencyShutdownSeconds() int {
	return o.store.ShutdownP95ByReasonPrefix(
		o.cfg.Shutdown.EmergencyBudgetSec,
		o.cfg.Shutdown.EmergencyMinSamples,
		[]string{"ups-", "emergency-", "drill-emergency"},
	)
}

// finalizeRecord runs one orchestration or CLI step.
// Signature: (o *Orchestrator) finalizeRecord(record *state.RunRecord, err *error).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) finalizeRecord(record *state.RunRecord, err *error) {
	record.EndedAt = time.Now().UTC()
	record.DurationSeconds = int(record.EndedAt.Sub(record.StartedAt).Seconds())
	record.Success = *err == nil
	if *err != nil {
		record.Error = (*err).Error()
	}
	if appendErr := o.store.Append(*record); appendErr != nil {
		o.log.Printf("warning: append run record failed: %v", appendErr)
	}
	if artifactErr := o.writeRunRecordArtifact(*record); artifactErr != nil {
		o.log.Printf("warning: write run artifact failed: %v", artifactErr)
	}
}

// startupReportPath runs one orchestration or CLI step.
// Signature: (o *Orchestrator) startupReportPath() string.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) startupReportPath() string {
	return filepath.Join(o.cfg.State.Dir, "last-startup-report.json")
}

// startupProgressPath runs one orchestration or CLI step.
// Signature: (o *Orchestrator) startupProgressPath() string.
// Why: status polling during long startup drills needs a live, on-disk snapshot
// instead of waiting for final completion artifacts.
func (o *Orchestrator) startupProgressPath() string {
	return filepath.Join(o.cfg.State.Dir, "startup-progress.json")
}

// lastShutdownReportPath runs one orchestration or CLI step.
// Signature: (o *Orchestrator) lastShutdownReportPath() string.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) lastShutdownReportPath() string {
	return filepath.Join(o.cfg.State.Dir, "last-shutdown-report.json")
}

// reportArchiveDir runs one orchestration or CLI step.
// Signature: (o *Orchestrator) reportArchiveDir() string.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) reportArchiveDir() string {
	dir := strings.TrimSpace(o.cfg.State.ReportsDir)
	if dir != "" {
		return dir
	}
	return filepath.Join(o.cfg.State.Dir, "reports")
}

// writeRunRecordArtifact runs one orchestration or CLI step.
// Signature: (o *Orchestrator) writeRunRecordArtifact(record state.RunRecord) error.
// Why: Every drill should leave durable timing and outcome evidence so we can
// compare runs and tighten automation without relying on terminal history.
func (o *Orchestrator) writeRunRecordArtifact(record state.RunRecord) error {
	dir := o.reportArchiveDir()
	if err := os.MkdirAll(dir, 0o750); err != nil {
		return fmt.Errorf("ensure report archive dir %s: %w", dir, err)
	}
	blob, err := json.MarshalIndent(record, "", "  ")
	if err != nil {
		return fmt.Errorf("encode run record %s: %w", record.ID, err)
	}
	reportPath := filepath.Join(dir, sanitizeReportFileName(record.ID)+".json")
	if err := os.WriteFile(reportPath, blob, 0o640); err != nil {
		return fmt.Errorf("write run artifact %s: %w", reportPath, err)
	}
	if record.Action == "shutdown" {
		if err := os.WriteFile(o.lastShutdownReportPath(), blob, 0o640); err != nil {
			return fmt.Errorf("write last shutdown report: %w", err)
		}
	}
	return nil
}

// sanitizeReportFileName runs one orchestration or CLI step.
// Signature: sanitizeReportFileName(value string) string.
// Why: IDs can include punctuation from external inputs; sanitize to keep report
// filenames portable and predictable across hosts.
func sanitizeReportFileName(value string) string {
	value = strings.TrimSpace(value)
	if value == "" {
		return "run"
	}
	var b strings.Builder
	b.Grow(len(value))
	for _, r := range value {
		switch {
		case r >= 'a' && r <= 'z':
			b.WriteRune(r)
		case r >= 'A' && r <= 'Z':
			b.WriteRune(r)
		case r >= '0' && r <= '9':
			b.WriteRune(r)
		case r == '-', r == '_':
			b.WriteRune(r)
		default:
			b.WriteRune('_')
		}
	}
	cleaned := strings.Trim(b.String(), "_")
	if cleaned == "" {
		return "run"
	}
	return cleaned
}

// cloneStartupReport runs one orchestration or CLI step.
// Signature: cloneStartupReport(report *startupReport) *startupReport.
// Why: a status poll can occur while startup is active; clone protects readers
// from observing partially-mutated map/slice state.
func cloneStartupReport(report *startupReport) *startupReport {
	if report == nil {
		return nil
	}
	cloned := *report
	cloned.Checks = make(map[string]startupCheckRecord, len(report.Checks))
	for name, check := range report.Checks {
		cloned.Checks[name] = check
	}
	cloned.AutoHeals = append([]string{}, report.AutoHeals...)
	return &cloned
}

// writeStartupReportFile runs one orchestration or CLI step.
// Signature: (o *Orchestrator) writeStartupReportFile(path string, report *startupReport) error.
// Why: startup status needs one shared serializer so progress and final reports
// stay consistent for both humans and automation.
func (o *Orchestrator) writeStartupReportFile(path string, report *startupReport) error {
	if report == nil {
		return nil
	}
	if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil {
		return fmt.Errorf("ensure startup report dir %s: %w", filepath.Dir(path), err)
	}
	b, err := json.MarshalIndent(report, "", "  ")
	if err != nil {
		return fmt.Errorf("encode startup report: %w", err)
	}
	if err := os.WriteFile(path, b, 0o640); err != nil {
		return fmt.Errorf("write startup report %s: %w", path, err)
	}
	return nil
}

// persistStartupProgress runs one orchestration or CLI step.
// Signature: (o *Orchestrator) persistStartupProgress(report *startupReport).
// Why: status polling must reflect in-flight checklist changes instead of only
// a terminal success/failure snapshot.
func (o *Orchestrator) persistStartupProgress(report *startupReport) {
	if report == nil {
		return
	}
	if err := o.writeStartupReportFile(o.startupProgressPath(), report); err != nil {
		o.log.Printf("warning: persist startup progress failed: %v", err)
	}
}

// setStartupPhase runs one orchestration or CLI step.
// Signature: (o *Orchestrator) setStartupPhase(phase string, detail string).
// Why: operators need to see where startup is paused without reading controller
// logs line-by-line during a drill.
func (o *Orchestrator) setStartupPhase(phase string, detail string) {
	phase = strings.TrimSpace(phase)
	detail = strings.TrimSpace(detail)
	o.startupReportMu.Lock()
	if o.activeStartupReport == nil {
		o.startupReportMu.Unlock()
		return
	}
	if phase != "" {
		o.activeStartupReport.Phase = phase
	}
	now := time.Now().UTC()
	o.activeStartupReport.LastUpdated = now
	if detail != "" {
		o.activeStartupReport.Checks["phase"] = startupCheckRecord{
			Status:    "running",
			Detail:    detail,
			UpdatedAt: now,
		}
	}
	snapshot := cloneStartupReport(o.activeStartupReport)
	o.startupReportMu.Unlock()
	o.persistStartupProgress(snapshot)
}

// beginStartupReport runs one orchestration or CLI step.
// Signature: (o *Orchestrator) beginStartupReport(reason string).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) beginStartupReport(reason string) {
	host, _ := os.Hostname()
	o.startupReportMu.Lock()
	now := time.Now().UTC()
	o.activeStartupReport = &startupReport{
		StartedAt:   now,
		Reason:      strings.TrimSpace(reason),
		Status:      "running",
		Phase:       "startup-initializing",
		Checks:      map[string]startupCheckRecord{},
		AutoHeals:   []string{},
		SourceHost:  strings.TrimSpace(host),
		LastUpdated: now,
	}
	snapshot := cloneStartupReport(o.activeStartupReport)
	o.startupReportMu.Unlock()
	o.persistStartupProgress(snapshot)
}

// noteStartupCheckState runs one orchestration or CLI step.
// Signature: (o *Orchestrator) noteStartupCheckState(name, status, detail string).
// Why: startup checks can be running, passed, or failed; explicit state improves
// checklist introspection from the CLI while startup is still executing.
func (o *Orchestrator) noteStartupCheckState(name, status, detail string) {
	name = strings.TrimSpace(name)
	status = strings.TrimSpace(status)
	detail = strings.TrimSpace(detail)
	if name == "" {
		return
	}
	switch status {
	case "passed", "failed", "running":
	default:
		status = "running"
	}
	o.startupReportMu.Lock()
	if o.activeStartupReport == nil {
		o.startupReportMu.Unlock()
		return
	}
	now := time.Now().UTC()
	o.activeStartupReport.LastUpdated = now
	o.activeStartupReport.Checks[name] = startupCheckRecord{
		Status:    status,
		Detail:    detail,
		UpdatedAt: now,
	}
	snapshot := cloneStartupReport(o.activeStartupReport)
	o.startupReportMu.Unlock()
	o.persistStartupProgress(snapshot)
}

// noteStartupCheck runs one orchestration or CLI step.
// Signature: (o *Orchestrator) noteStartupCheck(name string, success bool, detail string).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) noteStartupCheck(name string, success bool, detail string) {
	status := "failed"
	if success {
		status = "passed"
	}
	o.noteStartupCheckState(name, status, detail)
}

// noteStartupAutoHeal runs one orchestration or CLI step.
// Signature: (o *Orchestrator) noteStartupAutoHeal(detail string).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) noteStartupAutoHeal(detail string) {
	o.startupReportMu.Lock()
	if o.activeStartupReport == nil {
		o.startupReportMu.Unlock()
		return
	}
	detail = strings.TrimSpace(detail)
	if detail == "" {
		o.startupReportMu.Unlock()
		return
	}
	o.activeStartupReport.AutoHeals = append(o.activeStartupReport.AutoHeals, detail)
	o.activeStartupReport.LastUpdated = time.Now().UTC()
	snapshot := cloneStartupReport(o.activeStartupReport)
	o.startupReportMu.Unlock()
	o.persistStartupProgress(snapshot)
}

// finalizeStartupReportSnapshot runs one orchestration or CLI step.
// Signature: (o *Orchestrator) finalizeStartupReportSnapshot(report *startupReport) *startupReport.
// Why: completion metadata should be consistent across both final and progress
// report files so operators can trust whichever file they read.
func (o *Orchestrator) finalizeStartupReportSnapshot(report *startupReport, runErr error) *startupReport {
	report = cloneStartupReport(report)
	if report == nil {
		return nil
	}
	report.Completed = time.Now().UTC()
	report.Success = runErr == nil
	report.Status = "success"
	report.Phase = "complete"
	report.LastUpdated = report.Completed
	if runErr != nil {
		report.Status = "failed"
		report.Phase = "failed"
		report.Error = strings.TrimSpace(runErr.Error())
	}
	return report
}

// finalizeStartupReport runs one orchestration or CLI step.
// Signature: (o *Orchestrator) finalizeStartupReport(runErr error).
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (o *Orchestrator) finalizeStartupReport(runErr error) {
	o.startupReportMu.Lock()
	report := o.activeStartupReport
	o.activeStartupReport = nil
	o.startupReportMu.Unlock()
	if report == nil {
		return
	}
	report = o.finalizeStartupReportSnapshot(report, runErr)
	path := o.startupReportPath()
	if writeErr := o.writeStartupReportFile(path, report); writeErr != nil {
		o.log.Printf("warning: %v", writeErr)
		return
	}
	o.persistStartupProgress(report)
	archivePath := filepath.Join(
		o.reportArchiveDir(),
		fmt.Sprintf("startup-report-%d.json", report.StartedAt.UnixNano()),
	)
	if writeErr := o.writeStartupReportFile(archivePath, report); writeErr != nil {
		o.log.Printf("warning: %v", writeErr)
	}
	o.log.Printf("startup report written: %s", path)
}