hecate(ssh): add config/key fallback and scoped node orchestration

This commit is contained in:
Brad Stein 2026-04-04 12:56:58 -03:00
parent 3af6fe9f6f
commit 7ce729d810
7 changed files with 84 additions and 7 deletions

View File

@ -68,7 +68,7 @@ sudo systemctl start hecate-bootstrap.service
Optional SSH jump/bastion: Optional SSH jump/bastion:
- Set `ssh_jump_host` (and optional `ssh_jump_user`) to route node SSH through a jump host like `titan-jh`; Hecate now falls back to direct SSH automatically if jump routing is unavailable. - Set `ssh_jump_host` (and optional `ssh_jump_user`) to route node SSH through a jump host like `titan-jh`; Hecate now falls back to direct SSH automatically if jump routing is unavailable.
- Set `ssh_port`, `ssh_identity_file`, and `ssh_node_hosts` so root-run systemd actions can actually reach node SSH daemons during cold-start recovery. - Set `ssh_port`, `ssh_config_file`, `ssh_identity_file`, and `ssh_node_hosts` so root-run systemd actions can actually reach node SSH daemons during cold-start recovery.
- Use `ssh_node_users` for per-node username overrides (for example `titan-24: tethys`). - Use `ssh_node_users` for per-node username overrides (for example `titan-24: tethys`).
- Use `ssh_managed_nodes` to limit host-level SSH start/stop actions to nodes Hecate can actually authenticate to. - Use `ssh_managed_nodes` to limit host-level SSH start/stop actions to nodes Hecate can actually authenticate to.

View File

@ -2,6 +2,7 @@
kubeconfig: /etc/hecate/kubeconfig kubeconfig: /etc/hecate/kubeconfig
ssh_user: atlas ssh_user: atlas
ssh_port: 2277 ssh_port: 2277
ssh_config_file: ""
ssh_identity_file: /home/atlas/.ssh/id_ed25519 ssh_identity_file: /home/atlas/.ssh/id_ed25519
ssh_node_hosts: {} ssh_node_hosts: {}
ssh_node_users: {} ssh_node_users: {}

View File

@ -2,9 +2,10 @@
kubeconfig: /etc/hecate/kubeconfig kubeconfig: /etc/hecate/kubeconfig
ssh_user: atlas ssh_user: atlas
ssh_port: 2277 ssh_port: 2277
ssh_config_file: /home/tethys/.ssh/config
ssh_identity_file: /home/tethys/.ssh/id_ed25519 ssh_identity_file: /home/tethys/.ssh/id_ed25519
ssh_node_hosts: ssh_node_hosts:
titan-db: 192.168.22.7 titan-db: 192.168.22.10
titan-0a: 192.168.22.11 titan-0a: 192.168.22.11
titan-0b: 192.168.22.12 titan-0b: 192.168.22.12
titan-0c: 192.168.22.13 titan-0c: 192.168.22.13

View File

@ -2,9 +2,10 @@
kubeconfig: /etc/hecate/kubeconfig kubeconfig: /etc/hecate/kubeconfig
ssh_user: atlas ssh_user: atlas
ssh_port: 2277 ssh_port: 2277
ssh_config_file: /home/atlas/.ssh/config
ssh_identity_file: /home/atlas/.ssh/id_ed25519 ssh_identity_file: /home/atlas/.ssh/id_ed25519
ssh_node_hosts: ssh_node_hosts:
titan-db: 192.168.22.7 titan-db: 192.168.22.10
titan-0a: 192.168.22.11 titan-0a: 192.168.22.11
titan-0b: 192.168.22.12 titan-0b: 192.168.22.12
titan-0c: 192.168.22.13 titan-0c: 192.168.22.13

View File

@ -572,13 +572,18 @@ func (o *Orchestrator) ssh(ctx context.Context, node string, command string) (st
if sshUser != "" { if sshUser != "" {
target = sshUser + "@" + host target = sshUser + "@" + host
} }
sshConfigFile := o.resolveSSHConfigFile()
sshIdentity := o.resolveSSHIdentityFile()
baseArgs := []string{ baseArgs := []string{
"-o", "BatchMode=yes", "-o", "BatchMode=yes",
"-o", "ConnectTimeout=8", "-o", "ConnectTimeout=8",
"-o", "StrictHostKeyChecking=accept-new", "-o", "StrictHostKeyChecking=accept-new",
} }
if o.cfg.SSHIdentityFile != "" { if sshConfigFile != "" {
baseArgs = append(baseArgs, "-i", o.cfg.SSHIdentityFile) baseArgs = append(baseArgs, "-F", sshConfigFile)
}
if sshIdentity != "" {
baseArgs = append(baseArgs, "-i", sshIdentity)
} }
if o.cfg.SSHPort > 0 { if o.cfg.SSHPort > 0 {
baseArgs = append(baseArgs, "-p", strconv.Itoa(o.cfg.SSHPort)) baseArgs = append(baseArgs, "-p", strconv.Itoa(o.cfg.SSHPort))
@ -676,6 +681,38 @@ func (o *Orchestrator) sshManaged(node string) bool {
return false return false
} }
func (o *Orchestrator) resolveSSHConfigFile() string {
if strings.TrimSpace(o.cfg.SSHConfigFile) != "" {
return strings.TrimSpace(o.cfg.SSHConfigFile)
}
candidates := []string{
"/home/atlas/.ssh/config",
"/home/tethys/.ssh/config",
}
for _, p := range candidates {
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
return p
}
}
return ""
}
func (o *Orchestrator) resolveSSHIdentityFile() string {
if strings.TrimSpace(o.cfg.SSHIdentityFile) != "" {
return strings.TrimSpace(o.cfg.SSHIdentityFile)
}
candidates := []string{
"/home/atlas/.ssh/id_ed25519",
"/home/tethys/.ssh/id_ed25519",
}
for _, p := range candidates {
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
return p
}
}
return ""
}
func (o *Orchestrator) bestEffort(name string, fn func() error) { func (o *Orchestrator) bestEffort(name string, fn func() error) {
if err := fn(); err != nil { if err := fn(); err != nil {
o.log.Printf("warning: %s: %v", name, err) o.log.Printf("warning: %s: %v", name, err)

View File

@ -11,6 +11,7 @@ type Config struct {
Kubeconfig string `yaml:"kubeconfig"` Kubeconfig string `yaml:"kubeconfig"`
SSHUser string `yaml:"ssh_user"` SSHUser string `yaml:"ssh_user"`
SSHPort int `yaml:"ssh_port"` SSHPort int `yaml:"ssh_port"`
SSHConfigFile string `yaml:"ssh_config_file"`
SSHIdentityFile string `yaml:"ssh_identity_file"` SSHIdentityFile string `yaml:"ssh_identity_file"`
SSHNodeHosts map[string]string `yaml:"ssh_node_hosts"` SSHNodeHosts map[string]string `yaml:"ssh_node_hosts"`
SSHNodeUsers map[string]string `yaml:"ssh_node_users"` SSHNodeUsers map[string]string `yaml:"ssh_node_users"`

View File

@ -6,6 +6,7 @@ import (
"log" "log"
"math" "math"
"net/http" "net/http"
"os"
"os/exec" "os/exec"
"strconv" "strconv"
"strings" "strings"
@ -219,8 +220,11 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
"-o", "ConnectTimeout=8", "-o", "ConnectTimeout=8",
"-o", "StrictHostKeyChecking=accept-new", "-o", "StrictHostKeyChecking=accept-new",
} }
if d.cfg.SSHIdentityFile != "" { if cfgFile := d.resolveSSHConfigFile(); cfgFile != "" {
args = append(args, "-i", d.cfg.SSHIdentityFile) args = append(args, "-F", cfgFile)
}
if idFile := d.resolveSSHIdentityFile(); idFile != "" {
args = append(args, "-i", idFile)
} }
if d.cfg.SSHPort > 0 { if d.cfg.SSHPort > 0 {
args = append(args, "-p", strconv.Itoa(d.cfg.SSHPort)) args = append(args, "-p", strconv.Itoa(d.cfg.SSHPort))
@ -248,6 +252,38 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
return nil return nil
} }
func (d *Daemon) resolveSSHConfigFile() string {
if strings.TrimSpace(d.cfg.SSHConfigFile) != "" {
return strings.TrimSpace(d.cfg.SSHConfigFile)
}
candidates := []string{
"/home/atlas/.ssh/config",
"/home/tethys/.ssh/config",
}
for _, p := range candidates {
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
return p
}
}
return ""
}
func (d *Daemon) resolveSSHIdentityFile() string {
if strings.TrimSpace(d.cfg.SSHIdentityFile) != "" {
return strings.TrimSpace(d.cfg.SSHIdentityFile)
}
candidates := []string{
"/home/atlas/.ssh/id_ed25519",
"/home/tethys/.ssh/id_ed25519",
}
for _, p := range candidates {
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
return p
}
}
return ""
}
func (d *Daemon) targetList() string { func (d *Daemon) targetList() string {
names := make([]string, 0, len(d.targets)) names := make([]string, 0, len(d.targets))
for _, t := range d.targets { for _, t := range d.targets {