hecate: retry ssh with known_hosts repair on silent 255
This commit is contained in:
parent
75ad091898
commit
b5f27a79e0
@ -553,7 +553,7 @@ func runSSHWithRecovery(ctx context.Context, logger *log.Logger, cfg config.Conf
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
return out, nil
|
return out, nil
|
||||||
}
|
}
|
||||||
if !sshutil.IsHostKeyError(out, err) {
|
if !sshutil.ShouldAttemptKnownHostsRepair(out, err) {
|
||||||
return out, err
|
return out, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1296,8 +1296,8 @@ func (o *Orchestrator) ssh(ctx context.Context, node string, command string) (st
|
|||||||
}
|
}
|
||||||
return out, nil
|
return out, nil
|
||||||
}
|
}
|
||||||
if sshutil.IsHostKeyError(out, err) {
|
if sshutil.ShouldAttemptKnownHostsRepair(out, err) {
|
||||||
o.log.Printf("warning: ssh host-key mismatch detected for %s via %s path; repairing known_hosts and retrying once", node, attemptNames[i])
|
o.log.Printf("warning: ssh failure on %s via %s path may be host-key related; repairing known_hosts and retrying once", node, attemptNames[i])
|
||||||
sshutil.RepairKnownHosts(ctx, o.log, knownHostsFiles, repairHosts, o.cfg.SSHPort)
|
sshutil.RepairKnownHosts(ctx, o.log, knownHostsFiles, repairHosts, o.cfg.SSHPort)
|
||||||
retryOut, retryErr := o.run(ctx, 45*time.Second, "ssh", args...)
|
retryOut, retryErr := o.run(ctx, 45*time.Second, "ssh", args...)
|
||||||
if retryErr == nil {
|
if retryErr == nil {
|
||||||
|
|||||||
@ -256,7 +256,7 @@ func (d *Daemon) forwardShutdown(ctx context.Context, reason string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
out, err := try()
|
out, err := try()
|
||||||
if err != nil && sshutil.IsHostKeyError(out, err) {
|
if err != nil && sshutil.ShouldAttemptKnownHostsRepair(out, err) {
|
||||||
repairHosts := []string{d.cfg.Coordination.ForwardShutdownHost, host}
|
repairHosts := []string{d.cfg.Coordination.ForwardShutdownHost, host}
|
||||||
if d.cfg.SSHJumpHost != "" {
|
if d.cfg.SSHJumpHost != "" {
|
||||||
repairHosts = append(repairHosts, d.cfg.SSHJumpHost)
|
repairHosts = append(repairHosts, d.cfg.SSHJumpHost)
|
||||||
|
|||||||
@ -34,6 +34,21 @@ func IsHostKeyError(output string, err error) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ShouldAttemptKnownHostsRepair(output string, err error) bool {
|
||||||
|
if IsHostKeyError(output, err) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// Some SSH invocations (especially under strict non-interactive configs)
|
||||||
|
// return exit 255 without forwarding the host-key mismatch text.
|
||||||
|
if strings.Contains(strings.ToLower(err.Error()), "exit status 255") && strings.TrimSpace(output) == "" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string {
|
func KnownHostsFiles(sshConfigFile, sshIdentityFile string) []string {
|
||||||
seen := map[string]struct{}{}
|
seen := map[string]struct{}{}
|
||||||
add := func(path string) {
|
add := func(path string) {
|
||||||
|
|||||||
@ -20,6 +20,12 @@ func TestIsHostKeyErrorIgnoresGenericFailures(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestShouldAttemptKnownHostsRepairOnSilent255(t *testing.T) {
|
||||||
|
if !ShouldAttemptKnownHostsRepair("", errors.New("ssh ...: exit status 255")) {
|
||||||
|
t.Fatalf("expected silent exit status 255 to trigger known_hosts repair")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestKnownHostsFilesIncludesDerivedPaths(t *testing.T) {
|
func TestKnownHostsFilesIncludesDerivedPaths(t *testing.T) {
|
||||||
configFile := "/home/atlas/.ssh/config"
|
configFile := "/home/atlas/.ssh/config"
|
||||||
identityFile := "/home/tethys/.ssh/id_ed25519"
|
identityFile := "/home/tethys/.ssh/id_ed25519"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user