diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml index 377a076..7cb2d7d 100644 --- a/services/comms/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -16,7 +16,7 @@ spec: labels: app: atlasbot annotations: - checksum/atlasbot-configmap: manual-atlasbot-31 + checksum/atlasbot-configmap: manual-atlasbot-32 vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/role: "comms" vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index 3f05529..9e8e0dd 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -688,6 +688,20 @@ def _workloads_for_facts(workloads: list[dict[str, Any]], limit: int = 25) -> li ) return cleaned[:limit] +def _workloads_for_prompt(prompt: str, workloads: list[dict[str, Any]], limit: int = 12) -> list[dict[str, Any]]: + tokens = set(_tokens(prompt)) + if tokens: + matched: list[dict[str, Any]] = [] + for entry in workloads: + if not isinstance(entry, dict): + continue + entry_tokens = _workload_tokens(entry) + if entry_tokens & tokens: + matched.append(entry) + if matched: + return _workloads_for_facts(matched, limit=limit) + return _workloads_for_facts(workloads, limit=limit) + def facts_context( prompt: str, *, @@ -701,77 +715,91 @@ def facts_context( summary = snapshot.get("nodes_summary") if isinstance(snapshot, dict) else {} expected_workers = expected_worker_nodes_from_metrics() ready_workers, not_ready_workers = worker_nodes_status(inv) if inv else ([], []) + total = summary.get("total") if isinstance(summary, dict) and summary.get("total") is not None else nodes.get("total") + ready = summary.get("ready") if isinstance(summary, dict) and summary.get("ready") is not None else nodes.get("ready") + not_ready = summary.get("not_ready") if isinstance(summary, dict) and summary.get("not_ready") is not None else nodes.get("not_ready") + not_ready_names = summary.get("not_ready_names") if isinstance(summary, dict) else nodes.get("not_ready_names") + by_hardware = _group_nodes(inv) if inv else {} + by_arch = _nodes_by_arch(inv) if inv else {} - facts: dict[str, Any] = { - "generated_at": snapshot.get("generated_at") if isinstance(snapshot, dict) else None, - "nodes": { - "total": summary.get("total") if isinstance(summary, dict) and summary.get("total") is not None else nodes.get("total"), - "ready": summary.get("ready") if isinstance(summary, dict) and summary.get("ready") is not None else nodes.get("ready"), - "not_ready": summary.get("not_ready") if isinstance(summary, dict) and summary.get("not_ready") is not None else nodes.get("not_ready"), - "not_ready_names": summary.get("not_ready_names") if isinstance(summary, dict) else nodes.get("not_ready_names"), - "by_hardware": _group_nodes(inv) if inv else {}, - "by_arch": _nodes_by_arch(inv) if inv else {}, - "workers_ready": ready_workers, - "workers_not_ready": not_ready_workers, - "expected_workers": expected_workers, - }, - "metrics": { - "hottest_nodes": metrics.get("hottest_nodes") if isinstance(metrics, dict) else {}, - "postgres_connections": metrics.get("postgres_connections") if isinstance(metrics, dict) else {}, - "node_usage": _node_usage_table(metrics) if isinstance(metrics, dict) else [], - }, - "workloads": _workloads_for_facts(workloads or []), - } - - summary_lines: list[str] = [] - nodes_info = facts.get("nodes") if isinstance(facts.get("nodes"), dict) else {} - if nodes_info.get("total") is not None: - summary_lines.append( - f"nodes_total={nodes_info.get('total')}, ready={nodes_info.get('ready')}, not_ready={nodes_info.get('not_ready')}" + lines: list[str] = ["Facts (live snapshot):"] + if total is not None: + lines.append(f"- nodes_total={total}, ready={ready}, not_ready={not_ready}") + if not_ready_names: + lines.append(f"- nodes_not_ready: {', '.join(not_ready_names)}") + for key in ("rpi5", "rpi4", "jetson", "amd64", "arm64-unknown", "unknown"): + nodes_list = by_hardware.get(key) or [] + if nodes_list: + lines.append(f"- {key}: {', '.join(nodes_list)}") + for key, nodes_list in sorted(by_arch.items()): + if nodes_list: + lines.append(f"- arch {key}: {', '.join(nodes_list)}") + if ready_workers or not_ready_workers: + lines.append(f"- workers_ready: {', '.join(ready_workers) if ready_workers else 'none'}") + if not_ready_workers: + lines.append(f"- workers_not_ready: {', '.join(not_ready_workers)}") + if expected_workers: + missing = sorted( + set(expected_workers) + - {n.get("name") for n in inv if isinstance(n, dict) and n.get("name")} ) - hottest = facts.get("metrics", {}).get("hottest_nodes") if isinstance(facts.get("metrics"), dict) else {} - if isinstance(hottest, dict) and hottest: - for key in ("cpu", "ram", "net", "io"): - entry = hottest.get(key) if isinstance(hottest.get(key), dict) else {} - node = entry.get("node") - value = entry.get("value") - if node and value is not None: - summary_lines.append(f"hottest_{key}={node} ({value})") - postgres = facts.get("metrics", {}).get("postgres_connections") if isinstance(facts.get("metrics"), dict) else {} + lines.append(f"- expected_workers: {', '.join(expected_workers)}") + if missing: + lines.append(f"- expected_workers_missing: {', '.join(missing)}") + + hottest = metrics.get("hottest_nodes") if isinstance(metrics.get("hottest_nodes"), dict) else {} + for key in ("cpu", "ram", "net", "io"): + entry = hottest.get(key) if isinstance(hottest.get(key), dict) else {} + node = entry.get("node") + value = entry.get("value") + if node and value is not None: + lines.append(f"- hottest_{key}: {node} ({value})") + + postgres = metrics.get("postgres_connections") if isinstance(metrics.get("postgres_connections"), dict) else {} if isinstance(postgres, dict) and postgres: used = postgres.get("used") max_conn = postgres.get("max") if used is not None and max_conn is not None: - summary_lines.append(f"postgres_used={used}, postgres_max={max_conn}") + lines.append(f"- postgres_connections: {used} used / {max_conn} max") hottest_db = postgres.get("hottest_db") if isinstance(postgres.get("hottest_db"), dict) else {} if hottest_db.get("label"): - summary_lines.append(f"postgres_hottest_db={hottest_db.get('label')} ({hottest_db.get('value')})") + lines.append( + f"- postgres_hottest_db: {hottest_db.get('label')} ({hottest_db.get('value')})" + ) - rendered = json.dumps(facts, ensure_ascii=False) - rendered_parts = [] - if summary_lines: - rendered_parts.append("Facts summary:\n" + "\n".join(f"- {line}" for line in summary_lines)) - rendered_parts.append("Facts (live snapshot JSON):\n" + rendered) - combined = "\n\n".join(rendered_parts) - if len(combined) <= MAX_FACTS_CHARS: - return combined + usage_table = _node_usage_table(metrics) + if usage_table: + lines.append("- node_usage (cpu/ram/net/io):") + for entry in usage_table: + node = entry.get("node") + if not node: + continue + cpu = entry.get("cpu") + ram = entry.get("ram") + net = entry.get("net") + io_val = entry.get("io") + lines.append(f" - {node}: cpu={cpu}, ram={ram}, net={net}, io={io_val}") - trimmed = dict(facts) - trimmed.pop("workloads", None) - rendered = json.dumps(trimmed, ensure_ascii=False) - combined = "\n\n".join(rendered_parts[:-1] + ["Facts (live snapshot JSON):\n" + rendered]) - if len(combined) <= MAX_FACTS_CHARS: - return combined + workload_entries = _workloads_for_prompt(prompt, workloads or []) + if workload_entries: + lines.append("- workloads:") + for entry in workload_entries: + if not isinstance(entry, dict): + continue + ns = entry.get("namespace") or "" + wl = entry.get("workload") or "" + primary = entry.get("primary_node") or "" + pods_total = entry.get("pods_total") + label = f"{ns}/{wl}" if ns and wl else (wl or ns) + if not label: + continue + if primary: + lines.append(f" - {label}: primary_node={primary}, pods_total={pods_total}") + else: + lines.append(f" - {label}: pods_total={pods_total}") - trimmed_metrics = dict(trimmed.get("metrics") or {}) - trimmed_metrics.pop("node_usage", None) - trimmed["metrics"] = trimmed_metrics - rendered = json.dumps(trimmed, ensure_ascii=False) - combined = "\n\n".join(rendered_parts[:-1] + ["Facts (live snapshot JSON):\n" + rendered]) - if len(combined) <= MAX_FACTS_CHARS: - return combined - - return combined[:MAX_FACTS_CHARS] + rendered = "\n".join(lines) + return rendered[:MAX_FACTS_CHARS] def _inventory_sets(inventory: list[dict[str, Any]]) -> dict[str, Any]: names = [node["name"] for node in inventory]