From e0c5f0a2f6a18ed6c735a26db2fde206645ff6e9 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 29 Jan 2026 06:01:23 -0300 Subject: [PATCH] feat(snapshot): expose pod/job detail lines --- atlasbot/snapshot/builder.py | 100 +++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/atlasbot/snapshot/builder.py b/atlasbot/snapshot/builder.py index 10d02ca..11f5c68 100644 --- a/atlasbot/snapshot/builder.py +++ b/atlasbot/snapshot/builder.py @@ -71,6 +71,8 @@ def build_summary(snapshot: dict[str, Any] | None) -> dict[str, Any]: summary["nodes_summary"] = snapshot.get("nodes_summary") if metrics: summary["metrics"] = metrics + if isinstance(snapshot.get("jobs"), dict): + summary["jobs"] = snapshot.get("jobs") summary.update(_build_nodes(snapshot)) summary.update(_build_pressure(snapshot)) summary.update(_build_hardware(nodes_detail)) @@ -551,6 +553,12 @@ def _append_pod_issues(lines: list[str], summary: dict[str, Any]) -> None: top_line = _format_pod_issue_top(pod_issues) if top_line: lines.append(top_line) + pending_line = _format_pod_pending_oldest(pod_issues) + if pending_line: + lines.append(pending_line) + reasons_line = _format_pod_waiting_reasons(pod_issues) + if reasons_line: + lines.append(reasons_line) def _format_pod_issue_counts(pod_issues: dict[str, Any]) -> str: @@ -582,6 +590,34 @@ def _format_pod_issue_top(pod_issues: dict[str, Any]) -> str: return "pod_issues_top: " + "; ".join(top) if top else "" +def _format_pod_pending_oldest(pod_issues: dict[str, Any]) -> str: + pending = pod_issues.get("pending_oldest") if isinstance(pod_issues.get("pending_oldest"), list) else [] + if not pending: + return "" + parts = [] + for item in pending[:5]: + if not isinstance(item, dict): + continue + namespace = item.get("namespace") + pod = item.get("pod") + age = item.get("age_hours") + reason = item.get("reason") or "" + if namespace and pod and age is not None: + label = f"{namespace}/{pod}={_format_float(age)}h" + if reason: + label = f"{label} ({reason})" + parts.append(label) + return "pods_pending_oldest: " + "; ".join(parts) if parts else "" + + +def _format_pod_waiting_reasons(pod_issues: dict[str, Any]) -> str: + reasons = pod_issues.get("waiting_reasons") if isinstance(pod_issues.get("waiting_reasons"), dict) else {} + if not reasons: + return "" + pairs = sorted(reasons.items(), key=lambda item: (-item[1], item[0]))[:5] + return "pod_waiting_reasons: " + "; ".join([f"{key}={val}" for key, val in pairs]) + + def _append_workload_health(lines: list[str], summary: dict[str, Any]) -> None: health = summary.get("workloads_health") if isinstance(summary.get("workloads_health"), dict) else {} if not health: @@ -796,6 +832,69 @@ def _append_job_failures(lines: list[str], summary: dict[str, Any]) -> None: lines.append("job_failures_24h: " + "; ".join(parts)) +def _append_jobs(lines: list[str], summary: dict[str, Any]) -> None: + jobs = summary.get("jobs") if isinstance(summary.get("jobs"), dict) else {} + if not jobs: + return + totals_line = _format_jobs_totals(jobs) + if totals_line: + lines.append(totals_line) + failing_line = _format_jobs_failing(jobs) + if failing_line: + lines.append(failing_line) + active_line = _format_jobs_active_oldest(jobs) + if active_line: + lines.append(active_line) + + +def _format_jobs_totals(jobs: dict[str, Any]) -> str: + totals = jobs.get("totals") if isinstance(jobs.get("totals"), dict) else {} + if not totals: + return "" + return "jobs: total={total}, active={active}, failed={failed}, succeeded={succeeded}".format( + total=totals.get("total"), + active=totals.get("active"), + failed=totals.get("failed"), + succeeded=totals.get("succeeded"), + ) + + +def _format_jobs_failing(jobs: dict[str, Any]) -> str: + failing = jobs.get("failing") if isinstance(jobs.get("failing"), list) else [] + if not failing: + return "" + parts = [] + for item in failing[:5]: + if not isinstance(item, dict): + continue + namespace = item.get("namespace") + name = item.get("job") + failed = item.get("failed") + age = item.get("age_hours") + if namespace and name and failed is not None: + label = f"{namespace}/{name}={failed}" + if age is not None: + label = f"{label} ({_format_float(age)}h)" + parts.append(label) + return "jobs_failing_top: " + "; ".join(parts) if parts else "" + + +def _format_jobs_active_oldest(jobs: dict[str, Any]) -> str: + active_oldest = jobs.get("active_oldest") if isinstance(jobs.get("active_oldest"), list) else [] + if not active_oldest: + return "" + parts = [] + for item in active_oldest[:5]: + if not isinstance(item, dict): + continue + namespace = item.get("namespace") + name = item.get("job") + age = item.get("age_hours") + if namespace and name and age is not None: + parts.append(f"{namespace}/{name}={_format_float(age)}h") + return "jobs_active_oldest: " + "; ".join(parts) if parts else "" + + def _append_postgres(lines: list[str], summary: dict[str, Any]) -> None: postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {} if not postgres: @@ -903,6 +1002,7 @@ def summary_text(snapshot: dict[str, Any] | None) -> str: _append_pod_usage(lines, summary) _append_restarts(lines, summary) _append_job_failures(lines, summary) + _append_jobs(lines, summary) _append_postgres(lines, summary) _append_hottest(lines, summary) _append_pvc_usage(lines, summary)