package main import ( "context" "flag" "fmt" "log" "os" "os/signal" "syscall" "time" "scm.bstein.dev/bstein/hecate/internal/cluster" "scm.bstein.dev/bstein/hecate/internal/config" "scm.bstein.dev/bstein/hecate/internal/execx" "scm.bstein.dev/bstein/hecate/internal/service" "scm.bstein.dev/bstein/hecate/internal/state" "scm.bstein.dev/bstein/hecate/internal/ups" ) func main() { logger := log.New(os.Stdout, "[hecate] ", log.LstdFlags) if len(os.Args) < 2 { usage() os.Exit(2) } sub := os.Args[1] switch sub { case "startup": if err := runStartup(logger, os.Args[2:]); err != nil { logger.Printf("startup failed: %v", err) os.Exit(1) } case "shutdown": if err := runShutdown(logger, os.Args[2:]); err != nil { logger.Printf("shutdown failed: %v", err) os.Exit(1) } case "daemon": if err := runDaemon(logger, os.Args[2:]); err != nil { logger.Printf("daemon failed: %v", err) os.Exit(1) } case "status": if err := runStatus(logger, os.Args[2:]); err != nil { logger.Printf("status failed: %v", err) os.Exit(1) } case "help", "-h", "--help": usage() default: logger.Printf("unknown command: %s", sub) usage() os.Exit(2) } } func runStartup(logger *log.Logger, args []string) error { fs := flag.NewFlagSet("startup", flag.ExitOnError) configPath := fs.String("config", "/etc/hecate/hecate.yaml", "Path to config file") execute := fs.Bool("execute", false, "Actually execute changes (default dry-run)") forceBranch := fs.String("force-flux-branch", "", "Patch Flux source branch before resume") skipLocalBootstrap := fs.Bool("skip-local-bootstrap", false, "Skip local fallback bootstrap applies") _ = fs.Parse(args) _, orch, err := buildOrchestrator(logger, *configPath, !*execute) if err != nil { return err } ctx, cancel := context.WithCancel(context.Background()) defer cancel() return orch.Startup(ctx, cluster.StartupOptions{ ForceFluxBranch: *forceBranch, SkipLocalBootstrap: *skipLocalBootstrap, Reason: "manual-startup", }) } func runShutdown(logger *log.Logger, args []string) error { fs := flag.NewFlagSet("shutdown", flag.ExitOnError) configPath := fs.String("config", "/etc/hecate/hecate.yaml", "Path to config file") execute := fs.Bool("execute", false, "Actually execute changes (default dry-run)") skipEtcd := fs.Bool("skip-etcd-snapshot", false, "Skip etcd snapshot") skipDrain := fs.Bool("skip-drain", false, "Skip worker drain") _ = fs.Parse(args) _, orch, err := buildOrchestrator(logger, *configPath, !*execute) if err != nil { return err } ctx, cancel := context.WithCancel(context.Background()) defer cancel() return orch.Shutdown(ctx, cluster.ShutdownOptions{ SkipEtcdSnapshot: *skipEtcd, SkipDrain: *skipDrain, Reason: "manual-shutdown", }) } func runDaemon(logger *log.Logger, args []string) error { fs := flag.NewFlagSet("daemon", flag.ExitOnError) configPath := fs.String("config", "/etc/hecate/hecate.yaml", "Path to config file") dryRunActions := fs.Bool("dry-run-actions", false, "Log planned actions without executing") _ = fs.Parse(args) cfg, orch, err := buildOrchestrator(logger, *configPath, *dryRunActions) if err != nil { return err } if !cfg.UPS.Enabled { return fmt.Errorf("UPS monitoring is disabled in config") } var provider ups.Provider switch cfg.UPS.Provider { case "nut": provider = ups.NewNUTProvider(cfg.UPS.Target) default: return fmt.Errorf("unsupported UPS provider: %s", cfg.UPS.Provider) } d := service.NewDaemon(cfg, orch, provider, logger) ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) defer stop() return d.Run(ctx) } func runStatus(logger *log.Logger, args []string) error { fs := flag.NewFlagSet("status", flag.ExitOnError) configPath := fs.String("config", "/etc/hecate/hecate.yaml", "Path to config file") _ = fs.Parse(args) cfg, orch, err := buildOrchestrator(logger, *configPath, true) if err != nil { return err } recs, err := state.New(cfg.State.RunHistoryPath).Load() if err != nil { return err } last := "none" if len(recs) > 0 { r := recs[len(recs)-1] last = fmt.Sprintf("%s success=%t duration=%ds at=%s", r.Action, r.Success, r.DurationSeconds, r.EndedAt.Format(time.RFC3339)) } logger.Printf("expected_flux_branch=%s", cfg.ExpectedFluxBranch) logger.Printf("control_planes=%v", cfg.ControlPlanes) logger.Printf("estimated_shutdown_budget_seconds=%d", orch.EstimatedShutdownSeconds()) logger.Printf("last_run=%s", last) return nil } func buildOrchestrator(logger *log.Logger, cfgPath string, dryRun bool) (config.Config, *cluster.Orchestrator, error) { cfg, err := config.Load(cfgPath) if err != nil { return config.Config{}, nil, err } if err := state.EnsureDir(cfg.State.Dir); err != nil { return config.Config{}, nil, err } runner := &execx.Runner{ DryRun: dryRun, Kubeconfig: cfg.Kubeconfig, Logger: logger, } store := state.New(cfg.State.RunHistoryPath) orch := cluster.New(cfg, runner, store, logger) return cfg, orch, nil } func usage() { fmt.Print(`hecate: staged startup/shutdown + UPS-triggered protection Usage: hecate [flags] Commands: startup Perform staged cluster startup shutdown Perform graceful cluster shutdown daemon Monitor UPS and auto-trigger shutdown status Print current hecate status and estimates Examples: hecate startup --config /etc/hecate/hecate.yaml --execute --force-flux-branch main hecate shutdown --config /etc/hecate/hecate.yaml --execute hecate daemon --config /etc/hecate/hecate.yaml hecate status --config /etc/hecate/hecate.yaml `) }