atlasbot: ignore mentions and gate cluster context

2026-01-27 14:38:05 -03:00 · 2026-01-27 14:38:05 -03:00 · cd45b7faba
commit cd45b7faba
parent d03c846779
1 changed files with 146 additions and 47 deletions
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@ -121,6 +121,49 @@ METRIC_HINTS = {
    "pods": ("pods", "pod"),
 }
 CLUSTER_HINT_WORDS = {
    "atlas",
    "titan",
    "cluster",
    "k8s",
    "kubernetes",
    "node",
    "nodes",
    "pod",
    "pods",
    "namespace",
    "service",
    "deployment",
    "daemonset",
    "statefulset",
    "grafana",
    "victoria",
    "prometheus",
    "ariadne",
    "mailu",
    "nextcloud",
    "vaultwarden",
    "firefly",
    "wger",
    "jellyfin",
    "planka",
    "budget",
    "element",
    "synapse",
    "mas",
    "comms",
    "longhorn",
    "harbor",
    "jenkins",
    "gitea",
    "flux",
    "keycloak",
    "postgres",
    "database",
    "db",
    "atlasbot",
 }
 _OLLAMA_LOCK = threading.Lock()
 HARDWARE_HINTS = {
@ -231,6 +274,18 @@ def is_mentioned(content: dict, body: str) -> bool:
        return False
    return any(isinstance(uid, str) and uid.lower() in MENTION_USER_IDS for uid in user_ids)
 def _strip_bot_mention(text: str) -> str:
    if not text:
        return ""
    if not MENTION_LOCALPARTS:
        return text.strip()
    names = [re.escape(name) for name in MENTION_LOCALPARTS if name]
    if not names:
        return text.strip()
    pattern = r"^(?:\s*@?(?:" + "|".join(names) + r")(?::)?\s+)+"
    cleaned = re.sub(pattern, "", text, flags=re.IGNORECASE).strip()
    return cleaned or text.strip()
 # Matrix HTTP helper.
 def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None):
@ -1780,12 +1835,15 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
        if not prompt:
            self._write_json(400, {"error": "missing_prompt"})
            return
        cleaned = _strip_bot_mention(prompt)
        snapshot = _snapshot_state()
        inventory = _snapshot_inventory(snapshot) or node_inventory_live()
        workloads = _snapshot_workloads(snapshot)
-        metrics_summary = snapshot_context(prompt, snapshot)
+        cluster_query = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads)
        metrics_summary = snapshot_context(cleaned, snapshot) if cluster_query else ""
        if cluster_query:
            structured = structured_answer(
-            prompt,
+                cleaned,
                inventory=inventory,
                metrics_summary=metrics_summary,
                snapshot=snapshot,
@ -1794,19 +1852,21 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
            if structured:
                self._write_json(200, {"answer": structured})
                return
        context = ""
        if cluster_query:
            context = build_context(
-            prompt,
+                cleaned,
                allow_tools=False,
                targets=[],
                inventory=inventory,
                snapshot=snapshot,
                workloads=workloads,
            )
-        metrics_context, _metrics_fallback = metrics_query_context(prompt, allow_tools=True)
+            metrics_context, _metrics_fallback = metrics_query_context(cleaned, allow_tools=True)
            if metrics_context:
                context = (context + "\n\n" + metrics_context).strip() if context else metrics_context
        fallback = "I don't have enough data to answer that."
-        answer = ollama_reply(("http", "internal"), prompt, context=context, fallback=fallback)
+        answer = ollama_reply(("http", "internal"), cleaned, context=context, fallback=fallback)
        self._write_json(200, {"answer": answer})
@ -1920,6 +1980,37 @@ def _knowledge_intent(prompt: str) -> bool:
    )
 def _is_cluster_query(
    prompt: str,
    *,
    inventory: list[dict[str, Any]] | None,
    workloads: list[dict[str, Any]] | None,
 ) -> bool:
    q = normalize_query(prompt)
    if not q:
        return False
    if TITAN_NODE_RE.search(q):
        return True
    if any(word in q for word in CLUSTER_HINT_WORDS):
        return True
    for host_match in HOST_RE.finditer(q):
        host = host_match.group(1).lower()
        if host.endswith("bstein.dev"):
            return True
    tokens = set(_tokens(q))
    if workloads:
        for entry in workloads:
            if not isinstance(entry, dict):
                continue
            if tokens & _workload_tokens(entry):
                return True
    if inventory:
        names = {node.get("name") for node in inventory if isinstance(node, dict)}
        if tokens & {n for n in names if n}:
            return True
    return False
 def _inventory_summary(inventory: list[dict[str, Any]]) -> str:
    if not inventory:
        return ""
@ -1958,7 +2049,8 @@ def _ollama_call(hist_key, prompt: str, *, context: str) -> str:
        "Do not suggest commands unless explicitly asked. "
        "Respond in plain sentences; do not return JSON or code fences unless explicitly asked. "
        "Translate metrics into natural language instead of echoing raw label/value pairs. "
-        "Do not answer by only listing runbooks; summarize the cluster first and mention docs only if useful. "
+        "Do not answer by only listing runbooks; if the question is about Atlas/Othrys, summarize the cluster first and mention docs only if useful. "
        "If the question is not about Atlas/Othrys and no cluster context is provided, answer using general knowledge and say when you are unsure. "
        "If the answer is not grounded in the provided context or tool data, say you do not know. "
        "End every response with a line: 'Confidence: high|medium|low'."
    )
@ -2087,7 +2179,8 @@ def sync_loop(token: str, room_id: str):
                if not (is_dm or mentioned):
                    continue
-                lower_body = body.lower()
+                cleaned_body = _strip_bot_mention(body)
                lower_body = cleaned_body.lower()
                # Only do live cluster introspection in DMs; metrics can be answered when mentioned.
                allow_tools = is_dm
@ -2101,7 +2194,7 @@ def sync_loop(token: str, room_id: str):
                # Attempt to scope tools to the most likely workloads when hostnames are mentioned.
                targets: list[tuple[str, str]] = []
-                for m in HOST_RE.finditer(body.lower()):
+                for m in HOST_RE.finditer(lower_body):
                    host = m.group(1).lower()
                    for ep in _HOST_INDEX.get(host, []):
                        backend = ep.get("backend") or {}
@ -2111,13 +2204,16 @@ def sync_loop(token: str, room_id: str):
                                targets.append((ns, str(w["name"])))
                snapshot = _snapshot_state()
-                inventory = node_inventory_for_prompt(body)
+                inventory = node_inventory_for_prompt(cleaned_body)
                if not inventory:
                    inventory = _snapshot_inventory(snapshot)
                workloads = _snapshot_workloads(snapshot)
-                metrics_summary = snapshot_context(body, snapshot)
+                cluster_query = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads)
                metrics_summary = snapshot_context(cleaned_body, snapshot) if cluster_query else ""
                structured = ""
                if cluster_query:
                    structured = structured_answer(
-                    body,
+                        cleaned_body,
                        inventory=inventory,
                        metrics_summary=metrics_summary,
                        snapshot=snapshot,
@ -2128,8 +2224,10 @@ def sync_loop(token: str, room_id: str):
                    history[hist_key] = history[hist_key][-80:]
                    send_msg(token, rid, structured)
                    continue
                context = ""
                if cluster_query:
                    context = build_context(
-                    body,
+                        cleaned_body,
                        allow_tools=allow_tools,
                        targets=targets,
                        inventory=inventory,
@ -2141,7 +2239,8 @@ def sync_loop(token: str, room_id: str):
                    rendered = vm_render_result(res, limit=15) or "(no results)"
                    extra = "VictoriaMetrics (PromQL result):\n" + rendered
                    context = (context + "\n\n" + extra).strip() if context else extra
-                metrics_context, _metrics_fallback = metrics_query_context(body, allow_tools=allow_metrics)
+                if cluster_query:
                    metrics_context, _metrics_fallback = metrics_query_context(cleaned_body, allow_tools=allow_metrics)
                    if metrics_context:
                        context = (context + "\n\n" + metrics_context).strip() if context else metrics_context
@ -2151,7 +2250,7 @@ def sync_loop(token: str, room_id: str):
                    token,
                    rid,
                    hist_key,
-                    body,
+                    cleaned_body,
                    context=context,
                    fallback=fallback,
                )