atlasbot: enrich fact pack and selection
This commit is contained in:
parent
6578a8b08a
commit
474c472b1d
@ -936,6 +936,28 @@ def _node_usage_table(metrics: dict[str, Any]) -> list[dict[str, Any]]:
|
|||||||
per_node.setdefault(node, {})[metric_name] = entry.get("value")
|
per_node.setdefault(node, {})[metric_name] = entry.get("value")
|
||||||
return [{"node": node, **vals} for node, vals in sorted(per_node.items())]
|
return [{"node": node, **vals} for node, vals in sorted(per_node.items())]
|
||||||
|
|
||||||
|
def _usage_extremes(usage_table: list[dict[str, Any]]) -> dict[str, tuple[str, float]]:
|
||||||
|
extremes: dict[str, tuple[str, float]] = {}
|
||||||
|
for metric in ("cpu", "ram", "net", "io"):
|
||||||
|
values: list[tuple[str, float]] = []
|
||||||
|
for entry in usage_table:
|
||||||
|
node = entry.get("node")
|
||||||
|
raw = entry.get(metric)
|
||||||
|
if not node or raw is None:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
value = float(raw)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
values.append((node, value))
|
||||||
|
if not values:
|
||||||
|
continue
|
||||||
|
lowest = min(values, key=lambda item: item[1])
|
||||||
|
highest = max(values, key=lambda item: item[1])
|
||||||
|
extremes[f"min_{metric}"] = lowest
|
||||||
|
extremes[f"max_{metric}"] = highest
|
||||||
|
return extremes
|
||||||
|
|
||||||
def _workloads_for_facts(workloads: list[dict[str, Any]], limit: int = 25) -> list[dict[str, Any]]:
|
def _workloads_for_facts(workloads: list[dict[str, Any]], limit: int = 25) -> list[dict[str, Any]]:
|
||||||
cleaned: list[dict[str, Any]] = []
|
cleaned: list[dict[str, Any]] = []
|
||||||
for entry in workloads:
|
for entry in workloads:
|
||||||
@ -1023,6 +1045,13 @@ def facts_context(
|
|||||||
lines.append(f"- arch {key}: {', '.join(nodes_list)}")
|
lines.append(f"- arch {key}: {', '.join(nodes_list)}")
|
||||||
if control_plane_nodes:
|
if control_plane_nodes:
|
||||||
lines.append(f"- control_plane_nodes: {', '.join(control_plane_nodes)}")
|
lines.append(f"- control_plane_nodes: {', '.join(control_plane_nodes)}")
|
||||||
|
control_plane_by_hw: dict[str, list[str]] = collections.defaultdict(list)
|
||||||
|
for node in inv:
|
||||||
|
if node.get("name") in control_plane_nodes:
|
||||||
|
control_plane_by_hw[node.get("hardware") or "unknown"].append(node["name"])
|
||||||
|
parts = [f"{hw}={', '.join(sorted(nodes))}" for hw, nodes in sorted(control_plane_by_hw.items())]
|
||||||
|
if parts:
|
||||||
|
lines.append(f"- control_plane_by_hardware: {', '.join(parts)}")
|
||||||
if worker_nodes:
|
if worker_nodes:
|
||||||
lines.append(f"- worker_nodes: {', '.join(worker_nodes)}")
|
lines.append(f"- worker_nodes: {', '.join(worker_nodes)}")
|
||||||
if ready_workers or not_ready_workers:
|
if ready_workers or not_ready_workers:
|
||||||
@ -1068,6 +1097,22 @@ def facts_context(
|
|||||||
if value is not None:
|
if value is not None:
|
||||||
lines.append(f"- {key}: {value}")
|
lines.append(f"- {key}: {value}")
|
||||||
|
|
||||||
|
top_restarts = metrics.get("top_restarts_1h") if isinstance(metrics.get("top_restarts_1h"), list) else []
|
||||||
|
if top_restarts:
|
||||||
|
items = []
|
||||||
|
for entry in top_restarts[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric") or {}
|
||||||
|
pod = metric.get("pod") or metric.get("name") or ""
|
||||||
|
ns = metric.get("namespace") or ""
|
||||||
|
value = entry.get("value")
|
||||||
|
label = f"{ns}/{pod}".strip("/")
|
||||||
|
if label and value is not None:
|
||||||
|
items.append(f"{label}={value}")
|
||||||
|
if items:
|
||||||
|
lines.append(f"- top_restarts_1h: {', '.join(items)}")
|
||||||
|
|
||||||
usage_table = _node_usage_table(metrics)
|
usage_table = _node_usage_table(metrics)
|
||||||
if usage_table:
|
if usage_table:
|
||||||
lines.append("- node_usage (cpu/ram/net/io):")
|
lines.append("- node_usage (cpu/ram/net/io):")
|
||||||
@ -1088,6 +1133,18 @@ def facts_context(
|
|||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
lines.append(f" - {node}: cpu={cpu}, ram={ram}, net={net}, io={io_val}")
|
lines.append(f" - {node}: cpu={cpu}, ram={ram}, net={net}, io={io_val}")
|
||||||
|
extremes = _usage_extremes(usage_table)
|
||||||
|
for metric in ("cpu", "ram", "net", "io"):
|
||||||
|
min_key = f"min_{metric}"
|
||||||
|
if min_key not in extremes:
|
||||||
|
continue
|
||||||
|
node, value = extremes[min_key]
|
||||||
|
value_fmt = _format_metric_value(
|
||||||
|
str(value),
|
||||||
|
percent=metric in ("cpu", "ram"),
|
||||||
|
rate=metric in ("net", "io"),
|
||||||
|
)
|
||||||
|
lines.append(f"- lowest_{metric}: {node} ({value_fmt})")
|
||||||
|
|
||||||
if nodes_in_query:
|
if nodes_in_query:
|
||||||
lines.append("- node_details:")
|
lines.append("- node_details:")
|
||||||
@ -1112,13 +1169,37 @@ def facts_context(
|
|||||||
wl = entry.get("workload") or ""
|
wl = entry.get("workload") or ""
|
||||||
primary = entry.get("primary_node") or ""
|
primary = entry.get("primary_node") or ""
|
||||||
pods_total = entry.get("pods_total")
|
pods_total = entry.get("pods_total")
|
||||||
|
pods_running = entry.get("pods_running")
|
||||||
label = f"{ns}/{wl}" if ns and wl else (wl or ns)
|
label = f"{ns}/{wl}" if ns and wl else (wl or ns)
|
||||||
if not label:
|
if not label:
|
||||||
continue
|
continue
|
||||||
if primary:
|
if primary:
|
||||||
lines.append(f" - {label}: primary_node={primary}, pods_total={pods_total}")
|
lines.append(
|
||||||
|
f" - {label}: primary_node={primary}, pods_total={pods_total}, pods_running={pods_running}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
lines.append(f" - {label}: pods_total={pods_total}")
|
lines.append(f" - {label}: pods_total={pods_total}, pods_running={pods_running}")
|
||||||
|
top = max(
|
||||||
|
(entry for entry in workload_entries if isinstance(entry.get("pods_total"), (int, float))),
|
||||||
|
key=lambda item: item.get("pods_total", 0),
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
if isinstance(top, dict) and top.get("pods_total") is not None:
|
||||||
|
label = f"{top.get('namespace')}/{top.get('workload')}".strip("/")
|
||||||
|
lines.append(f"- workload_most_pods: {label} ({top.get('pods_total')})")
|
||||||
|
zero_running = [
|
||||||
|
entry
|
||||||
|
for entry in workload_entries
|
||||||
|
if isinstance(entry.get("pods_running"), (int, float)) and entry.get("pods_running") == 0
|
||||||
|
]
|
||||||
|
if zero_running:
|
||||||
|
labels = []
|
||||||
|
for entry in zero_running:
|
||||||
|
label = f"{entry.get('namespace')}/{entry.get('workload')}".strip("/")
|
||||||
|
if label:
|
||||||
|
labels.append(label)
|
||||||
|
if labels:
|
||||||
|
lines.append(f"- workloads_zero_running: {', '.join(labels)}")
|
||||||
|
|
||||||
rendered = "\n".join(lines)
|
rendered = "\n".join(lines)
|
||||||
return rendered[:MAX_FACTS_CHARS]
|
return rendered[:MAX_FACTS_CHARS]
|
||||||
@ -2609,15 +2690,15 @@ def _fact_line_tags(line: str) -> set[str]:
|
|||||||
tags.add("architecture")
|
tags.add("architecture")
|
||||||
if any(key in text for key in ("rpi", "jetson", "amd64", "arm64", "non_raspberry_pi")):
|
if any(key in text for key in ("rpi", "jetson", "amd64", "arm64", "non_raspberry_pi")):
|
||||||
tags.update({"hardware", "inventory"})
|
tags.update({"hardware", "inventory"})
|
||||||
if "control_plane_nodes" in text or "worker_nodes" in text:
|
if "control_plane_nodes" in text or "control_plane_by_hardware" in text or "worker_nodes" in text:
|
||||||
tags.add("inventory")
|
tags.add("inventory")
|
||||||
if any(key in text for key in ("hottest_", "node_usage", "cpu=", "ram=", "net=", "io=")):
|
if any(key in text for key in ("hottest_", "lowest_", "node_usage", "cpu=", "ram=", "net=", "io=")):
|
||||||
tags.add("utilization")
|
tags.add("utilization")
|
||||||
if "postgres_" in text or "postgres connections" in text:
|
if "postgres_" in text or "postgres connections" in text:
|
||||||
tags.add("database")
|
tags.add("database")
|
||||||
if "pods_" in text or "pod phases" in text:
|
if "pods_" in text or "pod phases" in text or "restarts" in text:
|
||||||
tags.add("pods")
|
tags.add("pods")
|
||||||
if "workloads" in text or "primary_node" in text:
|
if "workloads" in text or "primary_node" in text or "workload_" in text:
|
||||||
tags.add("workloads")
|
tags.add("workloads")
|
||||||
if "node_details" in text:
|
if "node_details" in text:
|
||||||
tags.add("node_detail")
|
tags.add("node_detail")
|
||||||
@ -3140,8 +3221,15 @@ def _open_ended_select_facts(
|
|||||||
selected.append(fid)
|
selected.append(fid)
|
||||||
if len(selected) >= count:
|
if len(selected) >= count:
|
||||||
break
|
break
|
||||||
if not selected:
|
seed = _fallback_fact_ids(fact_meta, focus_tags=focus_tags, count=count)
|
||||||
selected = _fallback_fact_ids(fact_meta, focus_tags=focus_tags, count=count)
|
if selected:
|
||||||
|
for fid in seed:
|
||||||
|
if fid not in selected:
|
||||||
|
selected.append(fid)
|
||||||
|
if len(selected) >= count:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
selected = seed
|
||||||
return selected
|
return selected
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user