atlasbot: expose job failure metrics
This commit is contained in:
parent
2e86985fe4
commit
46855343ce
@ -660,6 +660,23 @@ def _append_restarts(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
lines.append("restarts_1h_top: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_job_failures(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||
failures = metrics.get("job_failures_24h") if isinstance(metrics.get("job_failures_24h"), list) else []
|
||||
if not failures:
|
||||
return
|
||||
parts = []
|
||||
for entry in failures:
|
||||
metric = entry.get("metric") if isinstance(entry, dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
job_name = metric.get("job_name") or metric.get("job")
|
||||
value = entry.get("value")
|
||||
if namespace and job_name and value is not None:
|
||||
parts.append(f"{namespace}/{job_name}={_format_float(value)}")
|
||||
if parts:
|
||||
lines.append("job_failures_24h: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_postgres(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {}
|
||||
if not postgres:
|
||||
@ -762,6 +779,7 @@ def summary_text(snapshot: dict[str, Any] | None) -> str:
|
||||
_append_namespace_usage(lines, summary)
|
||||
_append_pod_usage(lines, summary)
|
||||
_append_restarts(lines, summary)
|
||||
_append_job_failures(lines, summary)
|
||||
_append_postgres(lines, summary)
|
||||
_append_hottest(lines, summary)
|
||||
_append_pvc_usage(lines, summary)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user