hecate: retry ssh with known_hosts repair on silent 255

This commit is contained in:
Brad Stein 2026-04-04 22:40:39 -03:00
parent 75ad091898
commit b5f27a79e0
5 changed files with 25 additions and 4 deletions

View File

@ -553,7 +553,7 @@ func runSSHWithRecovery(ctx context.Context, logger *log.Logger, cfg config.Conf
if err == nil {
return out, nil
}
if !sshutil.IsHostKeyError(out, err) {
if !sshutil.ShouldAttemptKnownHostsRepair(out, err) {
return out, err
}

View File

@ -1296,8 +1296,8 @@ func (o *Orchestrator) ssh(ctx context.Context, node string, command string) (st
}
return out, nil
}
if sshutil.IsHostKeyError(out, err) {
o.log.Printf("warning: ssh host-key mismatch detected for %s via %s path; repairing known_hosts and retrying once", node, attemptNames[i])
if sshutil.ShouldAttemptKnownHostsRepair(out, err) {
o.log.Printf("warning: ssh failure on %s via %s path may be host-key related; repairing known_hosts and retrying once", node, attemptNames[i])
sshutil.RepairKnownHosts(ctx, o.log, knownHostsFiles, repairHosts, o.cfg.SSHPort)
retryOut, retryErr := o.run(ctx, 45*time.Second, "ssh", args...)
if retryErr == nil {

View File

@ -256,7 +256,7 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
}
out, err := try()
if err != nil && sshutil.IsHostKeyError(out, err) {
if err != nil && sshutil.ShouldAttemptKnownHostsRepair(out, err) {
repairHosts := []string{d.cfg.Coordination.ForwardShutdownHost, host}
if d.cfg.SSHJumpHost != "" {
repairHosts = append(repairHosts, d.cfg.SSHJumpHost)

View File

@ -34,6 +34,21 @@ func IsHostKeyError(output string, err error) bool {
return false
}
func ShouldAttemptKnownHostsRepair(output string, err error) bool {
if IsHostKeyError(output, err) {
return true
}
if err == nil {
return false
}
// Some SSH invocations (especially under strict non-interactive configs)
// return exit 255 without forwarding the host-key mismatch text.
if strings.Contains(strings.ToLower(err.Error()), "exit status 255") && strings.TrimSpace(output) == "" {
return true
}
return false
}
func KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string {
seen := map[string]struct{}{}
add := func(path string) {

View File

@ -20,6 +20,12 @@ func TestIsHostKeyErrorIgnoresGenericFailures(t *testing.T) {
}
}
func TestShouldAttemptKnownHostsRepairOnSilent255(t *testing.T) {
if !ShouldAttemptKnownHostsRepair("", errors.New("ssh ...: exit status 255")) {
t.Fatalf("expected silent exit status 255 to trigger known_hosts repair")
}
}
func TestKnownHostsFilesIncludesDerivedPaths(t *testing.T) {
configFile := "/home/atlas/.ssh/config"
identityFile := "/home/tethys/.ssh/id_ed25519"