276 lines
11 KiB
Go
276 lines
11 KiB
Go
|
|
package cluster
|
||
|
|
|
||
|
|
import (
|
||
|
|
"context"
|
||
|
|
"fmt"
|
||
|
|
"path/filepath"
|
||
|
|
"strconv"
|
||
|
|
"strings"
|
||
|
|
"time"
|
||
|
|
|
||
|
|
"scm.bstein.dev/bstein/ananke/internal/state"
|
||
|
|
)
|
||
|
|
|
||
|
|
// intentAge runs one orchestration or CLI step.
|
||
|
|
// Signature: intentAge(in state.Intent) time.Duration.
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func intentAge(in state.Intent) time.Duration {
|
||
|
|
if in.UpdatedAt.IsZero() {
|
||
|
|
return 0
|
||
|
|
}
|
||
|
|
return time.Since(in.UpdatedAt)
|
||
|
|
}
|
||
|
|
|
||
|
|
// intentFresh runs one orchestration or CLI step.
|
||
|
|
// Signature: intentFresh(in state.Intent, maxAge time.Duration) bool.
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func intentFresh(in state.Intent, maxAge time.Duration) bool {
|
||
|
|
if in.UpdatedAt.IsZero() {
|
||
|
|
return true
|
||
|
|
}
|
||
|
|
return intentAge(in) <= maxAge
|
||
|
|
}
|
||
|
|
|
||
|
|
// startupGuardAge runs one orchestration or CLI step.
|
||
|
|
// Signature: (o *Orchestrator) startupGuardAge() time.Duration.
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func (o *Orchestrator) startupGuardAge() time.Duration {
|
||
|
|
seconds := o.cfg.Coordination.StartupGuardMaxAgeSec
|
||
|
|
if seconds <= 0 {
|
||
|
|
seconds = 900
|
||
|
|
}
|
||
|
|
return time.Duration(seconds) * time.Second
|
||
|
|
}
|
||
|
|
|
||
|
|
// startupShutdownCooldown runs one orchestration or CLI step.
|
||
|
|
// Signature: (o *Orchestrator) startupShutdownCooldown() time.Duration.
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func (o *Orchestrator) startupShutdownCooldown() time.Duration {
|
||
|
|
seconds := o.cfg.Startup.ShutdownCooldownSeconds
|
||
|
|
if seconds <= 0 {
|
||
|
|
seconds = 45
|
||
|
|
}
|
||
|
|
return time.Duration(seconds) * time.Second
|
||
|
|
}
|
||
|
|
|
||
|
|
// coordinationPeers runs one orchestration or CLI step.
|
||
|
|
// Signature: (o *Orchestrator) coordinationPeers() []string.
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func (o *Orchestrator) coordinationPeers() []string {
|
||
|
|
seen := map[string]struct{}{}
|
||
|
|
out := make([]string, 0, len(o.cfg.Coordination.PeerHosts)+1)
|
||
|
|
add := func(node string) {
|
||
|
|
node = strings.TrimSpace(node)
|
||
|
|
if node == "" {
|
||
|
|
return
|
||
|
|
}
|
||
|
|
if _, ok := seen[node]; ok {
|
||
|
|
return
|
||
|
|
}
|
||
|
|
seen[node] = struct{}{}
|
||
|
|
out = append(out, node)
|
||
|
|
}
|
||
|
|
for _, node := range o.cfg.Coordination.PeerHosts {
|
||
|
|
add(node)
|
||
|
|
}
|
||
|
|
if strings.TrimSpace(o.cfg.Coordination.ForwardShutdownHost) != "" {
|
||
|
|
add(o.cfg.Coordination.ForwardShutdownHost)
|
||
|
|
}
|
||
|
|
return out
|
||
|
|
}
|
||
|
|
|
||
|
|
// guardPeerStartupIntents runs one orchestration or CLI step.
|
||
|
|
// Signature: (o *Orchestrator) guardPeerStartupIntents(ctx context.Context) error.
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func (o *Orchestrator) guardPeerStartupIntents(ctx context.Context) error {
|
||
|
|
peers := o.coordinationPeers()
|
||
|
|
if len(peers) == 0 {
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
guardAge := o.startupGuardAge()
|
||
|
|
localRole := strings.ToLower(strings.TrimSpace(o.cfg.Coordination.Role))
|
||
|
|
for _, peer := range peers {
|
||
|
|
peerStatus, err := o.readRemotePeerStatus(ctx, peer)
|
||
|
|
if err != nil {
|
||
|
|
o.log.Printf("warning: peer startup guard skipped intent check for %s: %v", peer, err)
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
intent := peerStatus.Intent
|
||
|
|
switch intent.State {
|
||
|
|
case "", state.IntentNormal:
|
||
|
|
continue
|
||
|
|
case state.IntentShuttingDown:
|
||
|
|
if intentFresh(intent, guardAge) {
|
||
|
|
return fmt.Errorf("startup blocked: peer %s has active shutdown intent (reason=%q age=%s)", peer, intent.Reason, intentAge(intent).Round(time.Second))
|
||
|
|
}
|
||
|
|
o.log.Printf("warning: peer %s shutdown intent appears stale; allowing startup", peer)
|
||
|
|
case state.IntentStartupInProgress:
|
||
|
|
if !peerStatus.BootstrapActive {
|
||
|
|
o.log.Printf("warning: peer %s reports startup_in_progress but bootstrap service is inactive (reason=%q age=%s); auto-clearing stale peer intent",
|
||
|
|
peer, intent.Reason, intentAge(intent).Round(time.Second))
|
||
|
|
o.bestEffort(fmt.Sprintf("clear stale peer startup intent on %s", peer), func() error {
|
||
|
|
return o.clearRemotePeerIntent(ctx, peer, "auto-clear stale peer startup intent")
|
||
|
|
})
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
if localRole == "coordinator" && strings.EqualFold(strings.TrimSpace(intent.Reason), "manual-startup") {
|
||
|
|
o.log.Printf("warning: peer %s has manual startup in progress (age=%s); allowing coordinator startup to continue",
|
||
|
|
peer, intentAge(intent).Round(time.Second))
|
||
|
|
continue
|
||
|
|
}
|
||
|
|
if intentFresh(intent, guardAge) {
|
||
|
|
return fmt.Errorf("startup blocked: peer %s reports startup_in_progress (reason=%q age=%s)", peer, intent.Reason, intentAge(intent).Round(time.Second))
|
||
|
|
}
|
||
|
|
o.log.Printf("warning: peer %s startup intent appears stale; auto-clearing and allowing startup", peer)
|
||
|
|
o.bestEffort(fmt.Sprintf("clear stale peer startup intent on %s", peer), func() error {
|
||
|
|
return o.clearRemotePeerIntent(ctx, peer, "auto-clear stale peer startup intent")
|
||
|
|
})
|
||
|
|
case state.IntentShutdownComplete:
|
||
|
|
if intentFresh(intent, o.startupShutdownCooldown()) {
|
||
|
|
return fmt.Errorf("startup blocked: peer %s completed shutdown too recently (age=%s)", peer, intentAge(intent).Round(time.Second))
|
||
|
|
}
|
||
|
|
default:
|
||
|
|
o.log.Printf("warning: peer %s intent state %q is unknown; ignoring", peer, intent.State)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
|
||
|
|
// readRemotePeerStatus runs one orchestration or CLI step.
|
||
|
|
// Signature: (o *Orchestrator) readRemotePeerStatus(ctx context.Context, node string) (remotePeerStatus, error).
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func (o *Orchestrator) readRemotePeerStatus(ctx context.Context, node string) (remotePeerStatus, error) {
|
||
|
|
if !o.sshManaged(node) {
|
||
|
|
return remotePeerStatus{}, fmt.Errorf("%s is not in ssh_managed_nodes", node)
|
||
|
|
}
|
||
|
|
out, err := o.ssh(ctx, node, "if sudo -n /usr/bin/systemctl is-active --quiet ananke-bootstrap.service; then echo __ANANKE_BOOTSTRAP_ACTIVE__; else echo __ANANKE_BOOTSTRAP_IDLE__; fi; sudo -n /usr/local/bin/ananke intent --config /etc/ananke/ananke.yaml")
|
||
|
|
if err != nil {
|
||
|
|
return remotePeerStatus{}, err
|
||
|
|
}
|
||
|
|
status := remotePeerStatus{
|
||
|
|
BootstrapActive: strings.Contains(out, "__ANANKE_BOOTSTRAP_ACTIVE__"),
|
||
|
|
}
|
||
|
|
in, err := state.ParseIntentOutput(out)
|
||
|
|
if err != nil {
|
||
|
|
return remotePeerStatus{}, fmt.Errorf("parse remote intent output: %w", err)
|
||
|
|
}
|
||
|
|
status.Intent = in
|
||
|
|
return status, nil
|
||
|
|
}
|
||
|
|
|
||
|
|
// clearRemotePeerIntent runs one orchestration or CLI step.
|
||
|
|
// Signature: (o *Orchestrator) clearRemotePeerIntent(ctx context.Context, node string, reason string) error.
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func (o *Orchestrator) clearRemotePeerIntent(ctx context.Context, node string, reason string) error {
|
||
|
|
cmd := fmt.Sprintf(
|
||
|
|
"sudo -n /usr/local/bin/ananke intent --config /etc/ananke/ananke.yaml --set normal --reason %s --source startup --execute",
|
||
|
|
shellQuote(reason),
|
||
|
|
)
|
||
|
|
_, err := o.ssh(ctx, node, cmd)
|
||
|
|
return err
|
||
|
|
}
|
||
|
|
|
||
|
|
// shellQuote runs one orchestration or CLI step.
|
||
|
|
// Signature: shellQuote(v string) string.
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func shellQuote(v string) string {
|
||
|
|
return "'" + strings.ReplaceAll(v, "'", `'"'"'`) + "'"
|
||
|
|
}
|
||
|
|
|
||
|
|
// verifyEtcdSnapshot runs one orchestration or CLI step.
|
||
|
|
// Signature: (o *Orchestrator) verifyEtcdSnapshot(ctx context.Context, node string, snapshotPath string) error.
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func (o *Orchestrator) verifyEtcdSnapshot(ctx context.Context, node string, snapshotPath string) error {
|
||
|
|
if o.runner.DryRun {
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
path := strings.TrimSpace(snapshotPath)
|
||
|
|
if path == "" {
|
||
|
|
return fmt.Errorf("etcd snapshot verification failed: snapshot path is empty")
|
||
|
|
}
|
||
|
|
quoted := shellQuote(path)
|
||
|
|
sizeOut, err := o.ssh(ctx, node, fmt.Sprintf("sudo -n sh -lc 'test -s %s && stat -c %%s %s'", quoted, quoted))
|
||
|
|
if err != nil {
|
||
|
|
return fmt.Errorf("etcd snapshot verification failed for %s on %s: %w", path, node, err)
|
||
|
|
}
|
||
|
|
size, convErr := strconv.ParseInt(strings.TrimSpace(sizeOut), 10, 64)
|
||
|
|
if convErr != nil {
|
||
|
|
return fmt.Errorf("etcd snapshot verification failed for %s on %s: parse size %q: %w", path, node, strings.TrimSpace(sizeOut), convErr)
|
||
|
|
}
|
||
|
|
const minSnapshotBytes = int64(1 << 20) // 1 MiB sanity floor.
|
||
|
|
if size < minSnapshotBytes {
|
||
|
|
return fmt.Errorf("etcd snapshot verification failed for %s on %s: snapshot too small (%d bytes)", path, node, size)
|
||
|
|
}
|
||
|
|
lsOut, err := o.runSudoK3S(ctx, node, "etcd-snapshot", "ls")
|
||
|
|
if err != nil {
|
||
|
|
return fmt.Errorf("etcd snapshot verification failed for %s on %s: list snapshots: %w", path, node, err)
|
||
|
|
}
|
||
|
|
if !strings.Contains(lsOut, path) && !strings.Contains(lsOut, filepath.Base(path)) {
|
||
|
|
return fmt.Errorf("etcd snapshot verification failed for %s on %s: snapshot is not present in k3s etcd-snapshot ls output", path, node)
|
||
|
|
}
|
||
|
|
sumOut, err := o.ssh(ctx, node, fmt.Sprintf("sudo -n sh -lc 'sha256sum %s | awk \"{print \\$1}\"'", quoted))
|
||
|
|
if err != nil {
|
||
|
|
return fmt.Errorf("etcd snapshot verification failed for %s on %s: sha256: %w", path, node, err)
|
||
|
|
}
|
||
|
|
hash := strings.TrimSpace(sumOut)
|
||
|
|
if len(hash) != 64 {
|
||
|
|
return fmt.Errorf("etcd snapshot verification failed for %s on %s: invalid sha256 %q", path, node, hash)
|
||
|
|
}
|
||
|
|
o.log.Printf("etcd snapshot verified path=%s size_bytes=%d sha256=%s", path, size, hash[:12])
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
|
||
|
|
// runSudoK3S runs one orchestration or CLI step.
|
||
|
|
// Signature: (o *Orchestrator) runSudoK3S(ctx context.Context, node string, args ...string) (string, error).
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func (o *Orchestrator) runSudoK3S(ctx context.Context, node string, args ...string) (string, error) {
|
||
|
|
k3sPaths := []string{
|
||
|
|
"/usr/local/bin/k3s",
|
||
|
|
"/usr/bin/k3s",
|
||
|
|
"k3s",
|
||
|
|
}
|
||
|
|
var lastErr error
|
||
|
|
for _, path := range k3sPaths {
|
||
|
|
parts := []string{"sudo", "-n", path}
|
||
|
|
parts = append(parts, args...)
|
||
|
|
command := strings.Join(parts, " ")
|
||
|
|
out, err := o.ssh(ctx, node, command)
|
||
|
|
if err == nil {
|
||
|
|
return out, nil
|
||
|
|
}
|
||
|
|
lastErr = err
|
||
|
|
}
|
||
|
|
if lastErr == nil {
|
||
|
|
lastErr = fmt.Errorf("no k3s executable candidates configured")
|
||
|
|
}
|
||
|
|
return "", lastErr
|
||
|
|
}
|
||
|
|
|
||
|
|
// controlPlaneUsesExternalDatastore runs one orchestration or CLI step.
|
||
|
|
// Signature: (o *Orchestrator) controlPlaneUsesExternalDatastore(ctx context.Context, node string) (bool, error).
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func (o *Orchestrator) controlPlaneUsesExternalDatastore(ctx context.Context, node string) (bool, error) {
|
||
|
|
out, err := o.ssh(ctx, node, "sudo systemctl cat k3s")
|
||
|
|
if err != nil {
|
||
|
|
return false, fmt.Errorf("inspect k3s service on %s for datastore mode: %w", node, err)
|
||
|
|
}
|
||
|
|
return strings.Contains(out, "--datastore-endpoint="), nil
|
||
|
|
}
|
||
|
|
|
||
|
|
// waitForAPI runs one orchestration or CLI step.
|
||
|
|
// Signature: (o *Orchestrator) waitForAPI(ctx context.Context, attempts int, sleep time.Duration) error.
|
||
|
|
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
|
||
|
|
func (o *Orchestrator) waitForAPI(ctx context.Context, attempts int, sleep time.Duration) error {
|
||
|
|
if o.runner.DryRun {
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
for i := 0; i < attempts; i++ {
|
||
|
|
_, err := o.kubectl(ctx, 5*time.Second, "version", "--request-timeout=5s")
|
||
|
|
if err == nil {
|
||
|
|
return nil
|
||
|
|
}
|
||
|
|
time.Sleep(sleep)
|
||
|
|
}
|
||
|
|
return fmt.Errorf("kubernetes API did not become reachable within timeout")
|
||
|
|
}
|