hecate(ssh): add config/key fallback and scoped node orchestration

This commit is contained in:
Brad Stein 2026-04-04 12:56:58 -03:00
parent 3af6fe9f6f
commit 7ce729d810
7 changed files with 84 additions and 7 deletions

View File

@ -68,7 +68,7 @@ sudo systemctl start hecate-bootstrap.service
Optional SSH jump/bastion:
- Set `ssh_jump_host` (and optional `ssh_jump_user`) to route node SSH through a jump host like `titan-jh`; Hecate now falls back to direct SSH automatically if jump routing is unavailable.
- Set `ssh_port`, `ssh_identity_file`, and `ssh_node_hosts` so root-run systemd actions can actually reach node SSH daemons during cold-start recovery.
- Set `ssh_port`, `ssh_config_file`, `ssh_identity_file`, and `ssh_node_hosts` so root-run systemd actions can actually reach node SSH daemons during cold-start recovery.
- Use `ssh_node_users` for per-node username overrides (for example `titan-24: tethys`).
- Use `ssh_managed_nodes` to limit host-level SSH start/stop actions to nodes Hecate can actually authenticate to.

View File

@ -2,6 +2,7 @@
kubeconfig: /etc/hecate/kubeconfig
ssh_user: atlas
ssh_port: 2277
ssh_config_file: ""
ssh_identity_file: /home/atlas/.ssh/id_ed25519
ssh_node_hosts: {}
ssh_node_users: {}

View File

@ -2,9 +2,10 @@
kubeconfig: /etc/hecate/kubeconfig
ssh_user: atlas
ssh_port: 2277
ssh_config_file: /home/tethys/.ssh/config
ssh_identity_file: /home/tethys/.ssh/id_ed25519
ssh_node_hosts:
titan-db: 192.168.22.7
titan-db: 192.168.22.10
titan-0a: 192.168.22.11
titan-0b: 192.168.22.12
titan-0c: 192.168.22.13

View File

@ -2,9 +2,10 @@
kubeconfig: /etc/hecate/kubeconfig
ssh_user: atlas
ssh_port: 2277
ssh_config_file: /home/atlas/.ssh/config
ssh_identity_file: /home/atlas/.ssh/id_ed25519
ssh_node_hosts:
titan-db: 192.168.22.7
titan-db: 192.168.22.10
titan-0a: 192.168.22.11
titan-0b: 192.168.22.12
titan-0c: 192.168.22.13

View File

@ -572,13 +572,18 @@ func (o *Orchestrator) ssh(ctx context.Context, node string, command string) (st
if sshUser != "" {
target = sshUser + "@" + host
}
sshConfigFile := o.resolveSSHConfigFile()
sshIdentity := o.resolveSSHIdentityFile()
baseArgs := []string{
"-o", "BatchMode=yes",
"-o", "ConnectTimeout=8",
"-o", "StrictHostKeyChecking=accept-new",
}
if o.cfg.SSHIdentityFile != "" {
baseArgs = append(baseArgs, "-i", o.cfg.SSHIdentityFile)
if sshConfigFile != "" {
baseArgs = append(baseArgs, "-F", sshConfigFile)
}
if sshIdentity != "" {
baseArgs = append(baseArgs, "-i", sshIdentity)
}
if o.cfg.SSHPort > 0 {
baseArgs = append(baseArgs, "-p", strconv.Itoa(o.cfg.SSHPort))
@ -676,6 +681,38 @@ func (o *Orchestrator) sshManaged(node string) bool {
return false
}
func (o *Orchestrator) resolveSSHConfigFile() string {
if strings.TrimSpace(o.cfg.SSHConfigFile) != "" {
return strings.TrimSpace(o.cfg.SSHConfigFile)
}
candidates := []string{
"/home/atlas/.ssh/config",
"/home/tethys/.ssh/config",
}
for _, p := range candidates {
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
return p
}
}
return ""
}
func (o *Orchestrator) resolveSSHIdentityFile() string {
if strings.TrimSpace(o.cfg.SSHIdentityFile) != "" {
return strings.TrimSpace(o.cfg.SSHIdentityFile)
}
candidates := []string{
"/home/atlas/.ssh/id_ed25519",
"/home/tethys/.ssh/id_ed25519",
}
for _, p := range candidates {
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
return p
}
}
return ""
}
func (o *Orchestrator) bestEffort(name string, fn func() error) {
if err := fn(); err != nil {
o.log.Printf("warning: %s: %v", name, err)

View File

@ -11,6 +11,7 @@ type Config struct {
Kubeconfig string `yaml:"kubeconfig"`
SSHUser string `yaml:"ssh_user"`
SSHPort int `yaml:"ssh_port"`
SSHConfigFile string `yaml:"ssh_config_file"`
SSHIdentityFile string `yaml:"ssh_identity_file"`
SSHNodeHosts map[string]string `yaml:"ssh_node_hosts"`
SSHNodeUsers map[string]string `yaml:"ssh_node_users"`

View File

@ -6,6 +6,7 @@ import (
"log"
"math"
"net/http"
"os"
"os/exec"
"strconv"
"strings"
@ -219,8 +220,11 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
"-o", "ConnectTimeout=8",
"-o", "StrictHostKeyChecking=accept-new",
}
if d.cfg.SSHIdentityFile != "" {
args = append(args, "-i", d.cfg.SSHIdentityFile)
if cfgFile := d.resolveSSHConfigFile(); cfgFile != "" {
args = append(args, "-F", cfgFile)
}
if idFile := d.resolveSSHIdentityFile(); idFile != "" {
args = append(args, "-i", idFile)
}
if d.cfg.SSHPort > 0 {
args = append(args, "-p", strconv.Itoa(d.cfg.SSHPort))
@ -248,6 +252,38 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
return nil
}
func (d *Daemon) resolveSSHConfigFile() string {
if strings.TrimSpace(d.cfg.SSHConfigFile) != "" {
return strings.TrimSpace(d.cfg.SSHConfigFile)
}
candidates := []string{
"/home/atlas/.ssh/config",
"/home/tethys/.ssh/config",
}
for _, p := range candidates {
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
return p
}
}
return ""
}
func (d *Daemon) resolveSSHIdentityFile() string {
if strings.TrimSpace(d.cfg.SSHIdentityFile) != "" {
return strings.TrimSpace(d.cfg.SSHIdentityFile)
}
candidates := []string{
"/home/atlas/.ssh/id_ed25519",
"/home/tethys/.ssh/id_ed25519",
}
for _, p := range candidates {
if stat, err := os.Stat(p); err == nil && !stat.IsDir() {
return p
}
}
return ""
}
func (d *Daemon) targetList() string {
names := make([]string, 0, len(d.targets))
for _, t := range d.targets {