atlasbot: ignore mentions and gate cluster context

This commit is contained in:
Brad Stein 2026-01-27 14:38:05 -03:00
parent d03c846779
commit cd45b7faba

View File

@ -121,6 +121,49 @@ METRIC_HINTS = {
"pods": ("pods", "pod"),
}
CLUSTER_HINT_WORDS = {
"atlas",
"titan",
"cluster",
"k8s",
"kubernetes",
"node",
"nodes",
"pod",
"pods",
"namespace",
"service",
"deployment",
"daemonset",
"statefulset",
"grafana",
"victoria",
"prometheus",
"ariadne",
"mailu",
"nextcloud",
"vaultwarden",
"firefly",
"wger",
"jellyfin",
"planka",
"budget",
"element",
"synapse",
"mas",
"comms",
"longhorn",
"harbor",
"jenkins",
"gitea",
"flux",
"keycloak",
"postgres",
"database",
"db",
"atlasbot",
}
_OLLAMA_LOCK = threading.Lock()
HARDWARE_HINTS = {
@ -231,6 +274,18 @@ def is_mentioned(content: dict, body: str) -> bool:
return False
return any(isinstance(uid, str) and uid.lower() in MENTION_USER_IDS for uid in user_ids)
def _strip_bot_mention(text: str) -> str:
if not text:
return ""
if not MENTION_LOCALPARTS:
return text.strip()
names = [re.escape(name) for name in MENTION_LOCALPARTS if name]
if not names:
return text.strip()
pattern = r"^(?:\s*@?(?:" + "|".join(names) + r")(?::)?\s+)+"
cleaned = re.sub(pattern, "", text, flags=re.IGNORECASE).strip()
return cleaned or text.strip()
# Matrix HTTP helper.
def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None):
@ -1780,12 +1835,15 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
if not prompt:
self._write_json(400, {"error": "missing_prompt"})
return
cleaned = _strip_bot_mention(prompt)
snapshot = _snapshot_state()
inventory = _snapshot_inventory(snapshot) or node_inventory_live()
workloads = _snapshot_workloads(snapshot)
metrics_summary = snapshot_context(prompt, snapshot)
cluster_query = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads)
metrics_summary = snapshot_context(cleaned, snapshot) if cluster_query else ""
if cluster_query:
structured = structured_answer(
prompt,
cleaned,
inventory=inventory,
metrics_summary=metrics_summary,
snapshot=snapshot,
@ -1794,19 +1852,21 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
if structured:
self._write_json(200, {"answer": structured})
return
context = ""
if cluster_query:
context = build_context(
prompt,
cleaned,
allow_tools=False,
targets=[],
inventory=inventory,
snapshot=snapshot,
workloads=workloads,
)
metrics_context, _metrics_fallback = metrics_query_context(prompt, allow_tools=True)
metrics_context, _metrics_fallback = metrics_query_context(cleaned, allow_tools=True)
if metrics_context:
context = (context + "\n\n" + metrics_context).strip() if context else metrics_context
fallback = "I don't have enough data to answer that."
answer = ollama_reply(("http", "internal"), prompt, context=context, fallback=fallback)
answer = ollama_reply(("http", "internal"), cleaned, context=context, fallback=fallback)
self._write_json(200, {"answer": answer})
@ -1920,6 +1980,37 @@ def _knowledge_intent(prompt: str) -> bool:
)
def _is_cluster_query(
prompt: str,
*,
inventory: list[dict[str, Any]] | None,
workloads: list[dict[str, Any]] | None,
) -> bool:
q = normalize_query(prompt)
if not q:
return False
if TITAN_NODE_RE.search(q):
return True
if any(word in q for word in CLUSTER_HINT_WORDS):
return True
for host_match in HOST_RE.finditer(q):
host = host_match.group(1).lower()
if host.endswith("bstein.dev"):
return True
tokens = set(_tokens(q))
if workloads:
for entry in workloads:
if not isinstance(entry, dict):
continue
if tokens & _workload_tokens(entry):
return True
if inventory:
names = {node.get("name") for node in inventory if isinstance(node, dict)}
if tokens & {n for n in names if n}:
return True
return False
def _inventory_summary(inventory: list[dict[str, Any]]) -> str:
if not inventory:
return ""
@ -1958,7 +2049,8 @@ def _ollama_call(hist_key, prompt: str, *, context: str) -> str:
"Do not suggest commands unless explicitly asked. "
"Respond in plain sentences; do not return JSON or code fences unless explicitly asked. "
"Translate metrics into natural language instead of echoing raw label/value pairs. "
"Do not answer by only listing runbooks; summarize the cluster first and mention docs only if useful. "
"Do not answer by only listing runbooks; if the question is about Atlas/Othrys, summarize the cluster first and mention docs only if useful. "
"If the question is not about Atlas/Othrys and no cluster context is provided, answer using general knowledge and say when you are unsure. "
"If the answer is not grounded in the provided context or tool data, say you do not know. "
"End every response with a line: 'Confidence: high|medium|low'."
)
@ -2087,7 +2179,8 @@ def sync_loop(token: str, room_id: str):
if not (is_dm or mentioned):
continue
lower_body = body.lower()
cleaned_body = _strip_bot_mention(body)
lower_body = cleaned_body.lower()
# Only do live cluster introspection in DMs; metrics can be answered when mentioned.
allow_tools = is_dm
@ -2101,7 +2194,7 @@ def sync_loop(token: str, room_id: str):
# Attempt to scope tools to the most likely workloads when hostnames are mentioned.
targets: list[tuple[str, str]] = []
for m in HOST_RE.finditer(body.lower()):
for m in HOST_RE.finditer(lower_body):
host = m.group(1).lower()
for ep in _HOST_INDEX.get(host, []):
backend = ep.get("backend") or {}
@ -2111,13 +2204,16 @@ def sync_loop(token: str, room_id: str):
targets.append((ns, str(w["name"])))
snapshot = _snapshot_state()
inventory = node_inventory_for_prompt(body)
inventory = node_inventory_for_prompt(cleaned_body)
if not inventory:
inventory = _snapshot_inventory(snapshot)
workloads = _snapshot_workloads(snapshot)
metrics_summary = snapshot_context(body, snapshot)
cluster_query = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads)
metrics_summary = snapshot_context(cleaned_body, snapshot) if cluster_query else ""
structured = ""
if cluster_query:
structured = structured_answer(
body,
cleaned_body,
inventory=inventory,
metrics_summary=metrics_summary,
snapshot=snapshot,
@ -2128,8 +2224,10 @@ def sync_loop(token: str, room_id: str):
history[hist_key] = history[hist_key][-80:]
send_msg(token, rid, structured)
continue
context = ""
if cluster_query:
context = build_context(
body,
cleaned_body,
allow_tools=allow_tools,
targets=targets,
inventory=inventory,
@ -2141,7 +2239,8 @@ def sync_loop(token: str, room_id: str):
rendered = vm_render_result(res, limit=15) or "(no results)"
extra = "VictoriaMetrics (PromQL result):\n" + rendered
context = (context + "\n\n" + extra).strip() if context else extra
metrics_context, _metrics_fallback = metrics_query_context(body, allow_tools=allow_metrics)
if cluster_query:
metrics_context, _metrics_fallback = metrics_query_context(cleaned_body, allow_tools=allow_metrics)
if metrics_context:
context = (context + "\n\n" + metrics_context).strip() if context else metrics_context
@ -2151,7 +2250,7 @@ def sync_loop(token: str, room_id: str):
token,
rid,
hist_key,
body,
cleaned_body,
context=context,
fallback=fallback,
)