atlasbot: enrich fact pack and selection

This commit is contained in:
Brad Stein 2026-01-28 01:02:14 -03:00
parent 6578a8b08a
commit 474c472b1d

View File

@ -936,6 +936,28 @@ def _node_usage_table(metrics: dict[str, Any]) -> list[dict[str, Any]]:
per_node.setdefault(node, {})[metric_name] = entry.get("value")
return [{"node": node, **vals} for node, vals in sorted(per_node.items())]
def _usage_extremes(usage_table: list[dict[str, Any]]) -> dict[str, tuple[str, float]]:
extremes: dict[str, tuple[str, float]] = {}
for metric in ("cpu", "ram", "net", "io"):
values: list[tuple[str, float]] = []
for entry in usage_table:
node = entry.get("node")
raw = entry.get(metric)
if not node or raw is None:
continue
try:
value = float(raw)
except (TypeError, ValueError):
continue
values.append((node, value))
if not values:
continue
lowest = min(values, key=lambda item: item[1])
highest = max(values, key=lambda item: item[1])
extremes[f"min_{metric}"] = lowest
extremes[f"max_{metric}"] = highest
return extremes
def _workloads_for_facts(workloads: list[dict[str, Any]], limit: int = 25) -> list[dict[str, Any]]:
cleaned: list[dict[str, Any]] = []
for entry in workloads:
@ -1023,6 +1045,13 @@ def facts_context(
lines.append(f"- arch {key}: {', '.join(nodes_list)}")
if control_plane_nodes:
lines.append(f"- control_plane_nodes: {', '.join(control_plane_nodes)}")
control_plane_by_hw: dict[str, list[str]] = collections.defaultdict(list)
for node in inv:
if node.get("name") in control_plane_nodes:
control_plane_by_hw[node.get("hardware") or "unknown"].append(node["name"])
parts = [f"{hw}={', '.join(sorted(nodes))}" for hw, nodes in sorted(control_plane_by_hw.items())]
if parts:
lines.append(f"- control_plane_by_hardware: {', '.join(parts)}")
if worker_nodes:
lines.append(f"- worker_nodes: {', '.join(worker_nodes)}")
if ready_workers or not_ready_workers:
@ -1068,6 +1097,22 @@ def facts_context(
if value is not None:
lines.append(f"- {key}: {value}")
top_restarts = metrics.get("top_restarts_1h") if isinstance(metrics.get("top_restarts_1h"), list) else []
if top_restarts:
items = []
for entry in top_restarts[:5]:
if not isinstance(entry, dict):
continue
metric = entry.get("metric") or {}
pod = metric.get("pod") or metric.get("name") or ""
ns = metric.get("namespace") or ""
value = entry.get("value")
label = f"{ns}/{pod}".strip("/")
if label and value is not None:
items.append(f"{label}={value}")
if items:
lines.append(f"- top_restarts_1h: {', '.join(items)}")
usage_table = _node_usage_table(metrics)
if usage_table:
lines.append("- node_usage (cpu/ram/net/io):")
@ -1088,6 +1133,18 @@ def facts_context(
else ""
)
lines.append(f" - {node}: cpu={cpu}, ram={ram}, net={net}, io={io_val}")
extremes = _usage_extremes(usage_table)
for metric in ("cpu", "ram", "net", "io"):
min_key = f"min_{metric}"
if min_key not in extremes:
continue
node, value = extremes[min_key]
value_fmt = _format_metric_value(
str(value),
percent=metric in ("cpu", "ram"),
rate=metric in ("net", "io"),
)
lines.append(f"- lowest_{metric}: {node} ({value_fmt})")
if nodes_in_query:
lines.append("- node_details:")
@ -1112,13 +1169,37 @@ def facts_context(
wl = entry.get("workload") or ""
primary = entry.get("primary_node") or ""
pods_total = entry.get("pods_total")
pods_running = entry.get("pods_running")
label = f"{ns}/{wl}" if ns and wl else (wl or ns)
if not label:
continue
if primary:
lines.append(f" - {label}: primary_node={primary}, pods_total={pods_total}")
lines.append(
f" - {label}: primary_node={primary}, pods_total={pods_total}, pods_running={pods_running}"
)
else:
lines.append(f" - {label}: pods_total={pods_total}")
lines.append(f" - {label}: pods_total={pods_total}, pods_running={pods_running}")
top = max(
(entry for entry in workload_entries if isinstance(entry.get("pods_total"), (int, float))),
key=lambda item: item.get("pods_total", 0),
default=None,
)
if isinstance(top, dict) and top.get("pods_total") is not None:
label = f"{top.get('namespace')}/{top.get('workload')}".strip("/")
lines.append(f"- workload_most_pods: {label} ({top.get('pods_total')})")
zero_running = [
entry
for entry in workload_entries
if isinstance(entry.get("pods_running"), (int, float)) and entry.get("pods_running") == 0
]
if zero_running:
labels = []
for entry in zero_running:
label = f"{entry.get('namespace')}/{entry.get('workload')}".strip("/")
if label:
labels.append(label)
if labels:
lines.append(f"- workloads_zero_running: {', '.join(labels)}")
rendered = "\n".join(lines)
return rendered[:MAX_FACTS_CHARS]
@ -2609,15 +2690,15 @@ def _fact_line_tags(line: str) -> set[str]:
tags.add("architecture")
if any(key in text for key in ("rpi", "jetson", "amd64", "arm64", "non_raspberry_pi")):
tags.update({"hardware", "inventory"})
if "control_plane_nodes" in text or "worker_nodes" in text:
if "control_plane_nodes" in text or "control_plane_by_hardware" in text or "worker_nodes" in text:
tags.add("inventory")
if any(key in text for key in ("hottest_", "node_usage", "cpu=", "ram=", "net=", "io=")):
if any(key in text for key in ("hottest_", "lowest_", "node_usage", "cpu=", "ram=", "net=", "io=")):
tags.add("utilization")
if "postgres_" in text or "postgres connections" in text:
tags.add("database")
if "pods_" in text or "pod phases" in text:
if "pods_" in text or "pod phases" in text or "restarts" in text:
tags.add("pods")
if "workloads" in text or "primary_node" in text:
if "workloads" in text or "primary_node" in text or "workload_" in text:
tags.add("workloads")
if "node_details" in text:
tags.add("node_detail")
@ -3140,8 +3221,15 @@ def _open_ended_select_facts(
selected.append(fid)
if len(selected) >= count:
break
if not selected:
selected = _fallback_fact_ids(fact_meta, focus_tags=focus_tags, count=count)
seed = _fallback_fact_ids(fact_meta, focus_tags=focus_tags, count=count)
if selected:
for fid in seed:
if fid not in selected:
selected.append(fid)
if len(selected) >= count:
break
else:
selected = seed
return selected