atlasbot: refine cluster intent handling

2026-01-27 22:44:49 -03:00 · 2026-01-27 22:44:49 -03:00 · 23533e08ee
commit 23533e08ee
parent fc10eed704
1 changed files with 87 additions and 5 deletions
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@ -152,6 +152,16 @@ CLUSTER_HINT_WORDS = {
    "deployment",
    "daemonset",
    "statefulset",
    "snapshot",
    "anomaly",
    "anomalies",
    "monitor",
    "monitoring",
    "runbook",
    "runbooks",
    "documentation",
    "docs",
    "playbook",
    "grafana",
    "victoria",
    "prometheus",
@ -203,6 +213,12 @@ _INSIGHT_HINT_WORDS = {
    "favorite",
    "favourite",
    "trivia",
    "anomaly",
    "anomalies",
    "monitor",
    "monitoring",
    "alert",
    "alerts",
    "stand out",
    "stands out",
 }
@ -532,7 +548,14 @@ def _humanize_rate(value: str, *, unit: str) -> str:
    return f"{val:.2f} B/s"
 def _has_any(text: str, phrases: tuple[str, ...]) -> bool:
-    return any(p in text for p in phrases)
+    for phrase in phrases:
        if " " in phrase:
            if phrase in text:
                return True
        else:
            if re.search(rf"\\b{re.escape(phrase)}\\b", text):
                return True
    return False
 def _detect_operation(q: str) -> str | None:
    if _has_any(q, OPERATION_HINTS["top"]):
@ -552,6 +575,8 @@ def _detect_metric(q: str) -> str | None:
            part = part.strip()
            if len(part) >= 2:
                expanded.add(part)
            if part.endswith("s") and len(part) >= 4:
                expanded.add(part[:-1])
    tokens = expanded
    for metric, phrases in METRIC_HINTS.items():
        for phrase in phrases:
@ -565,6 +590,8 @@ def _detect_metric(q: str) -> str | None:
 def _detect_hardware_filters(q: str) -> tuple[set[str], set[str]]:
    include: set[str] = set()
    exclude: set[str] = set()
    if any(term in q for term in ("gpu", "gpus", "accelerator", "accelerators", "cuda", "nvidia")):
        include.add("jetson")
    rpi_specific = any(
        phrase in q
        for phrase in (
@ -1287,6 +1314,10 @@ def snapshot_metric_answer(
        failed = metrics.get("pods_failed")
        succeeded = metrics.get("pods_succeeded")
        status_terms = ("running", "pending", "failed", "succeeded", "completed")
        if "not running" in q or "not in running" in q or "non running" in q:
            parts = [v for v in (pending, failed, succeeded) if isinstance(v, (int, float))]
            if parts:
                return _format_confidence(f"Pods not running: {sum(parts):.0f}.", "high")
        if sum(1 for term in status_terms if term in q) > 1:
            parts = []
            if running is not None:
@ -1350,6 +1381,8 @@ def structured_answer(
        op = "top"
    entity = _detect_entity(q)
    include_hw, exclude_hw = _detect_hardware_filters(q)
    if entity is None and (include_hw or exclude_hw):
        entity = "node"
    nodes_in_query = _extract_titan_nodes(q)
    only_workers = "worker" in q or "workers" in q
    role_filters = _detect_role_filters(q)
@ -1385,6 +1418,20 @@ def structured_answer(
        if hw_line:
            return _format_confidence(hw_line, "medium")
    if (
        entity == "node"
        and any(term in q for term in ("arm64", "amd64"))
        and any(term in q for term in ("mostly", "majority", "more"))
    ):
        arm64_count = len([n for n in inventory if n.get("arch") == "arm64"])
        amd64_count = len([n for n in inventory if n.get("arch") == "amd64"])
        if arm64_count or amd64_count:
            majority = "arm64" if arm64_count >= amd64_count else "amd64"
            return _format_confidence(
                f"arm64 nodes: {arm64_count}, amd64 nodes: {amd64_count}. Mostly {majority}.",
                "high",
            )
    if op == "top" and metric is None and not any(word in q for word in ("hardware", "architecture", "class")):
        metric = "cpu"
@ -1491,6 +1538,27 @@ def structured_answer(
            )
    if op == "count":
        if only_workers and "ready" in q and ("total" in q or "vs" in q or "versus" in q):
            total_workers = _inventory_filter(
                inventory,
                include_hw=include_hw,
                exclude_hw=exclude_hw,
                only_workers=True,
                only_ready=None,
                nodes_in_query=nodes_in_query,
            )
            ready_workers = _inventory_filter(
                inventory,
                include_hw=include_hw,
                exclude_hw=exclude_hw,
                only_workers=True,
                only_ready=True,
                nodes_in_query=nodes_in_query,
            )
            return _format_confidence(
                f"Worker nodes ready: {len(ready_workers)} / {len(total_workers)} total.",
                "high",
            )
        if expected_workers and ("expected" in q or "should" in q):
            missing = sorted(set(expected_workers) - {n["name"] for n in inventory})
            msg = f"Grafana inventory expects {len(expected_workers)} worker nodes."
@ -1711,6 +1779,15 @@ def _doc_intent(query: str) -> bool:
            "how to",
            "instructions",
            "playbook",
            "next step",
            "next steps",
            "what should",
            "what do i",
            "what to do",
            "troubleshoot",
            "triage",
            "recover",
            "remediate",
        )
    )
@ -2615,10 +2692,13 @@ def _candidate_note(candidate: dict[str, Any]) -> str:
 def _ensure_scores(answer: str) -> str:
    text = answer.strip()
    lines = [line.strip() for line in text.splitlines() if line.strip()]
-    has_relevance = any(line.lower().startswith("relevance") for line in lines)
+    def _score_key(line: str) -> str:
-    has_satisfaction = any(line.lower().startswith("satisfaction") for line in lines)
+        cleaned = line.strip().lstrip("-•* ").strip()
-    has_confidence = any(line.lower().startswith("confidence") for line in lines)
+        return cleaned.lower()
-    has_risk = any(line.lower().startswith("hallucinationrisk") for line in lines)
+    has_relevance = any(_score_key(line).startswith("relevance") for line in lines)
    has_satisfaction = any(_score_key(line).startswith("satisfaction") for line in lines)
    has_confidence = any(_score_key(line).startswith("confidence") for line in lines)
    has_risk = any(_score_key(line).startswith("hallucinationrisk") for line in lines)
    if not has_confidence:
        lines.append("Confidence: medium")
    if not has_relevance:
@ -3004,6 +3084,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
                _is_subjective_query(cleaned)
                or _knowledge_intent(cleaned)
                or _is_overview_query(cleaned)
                or _doc_intent(cleaned)
            )
            if open_ended:
                answer = open_ended_answer(
@ -3558,6 +3639,7 @@ def sync_loop(token: str, room_id: str):
                        _is_subjective_query(cleaned_body)
                        or _knowledge_intent(cleaned_body)
                        or _is_overview_query(cleaned_body)
                        or _doc_intent(cleaned_body)
                    )
                    if open_ended:
                        reply = open_ended_with_thinking(