atlasbot: ignore mentions and gate cluster context

This commit is contained in:
Brad Stein 2026-01-27 14:38:05 -03:00
parent d03c846779
commit cd45b7faba

View File

@ -121,6 +121,49 @@ METRIC_HINTS = {
"pods": ("pods", "pod"), "pods": ("pods", "pod"),
} }
CLUSTER_HINT_WORDS = {
"atlas",
"titan",
"cluster",
"k8s",
"kubernetes",
"node",
"nodes",
"pod",
"pods",
"namespace",
"service",
"deployment",
"daemonset",
"statefulset",
"grafana",
"victoria",
"prometheus",
"ariadne",
"mailu",
"nextcloud",
"vaultwarden",
"firefly",
"wger",
"jellyfin",
"planka",
"budget",
"element",
"synapse",
"mas",
"comms",
"longhorn",
"harbor",
"jenkins",
"gitea",
"flux",
"keycloak",
"postgres",
"database",
"db",
"atlasbot",
}
_OLLAMA_LOCK = threading.Lock() _OLLAMA_LOCK = threading.Lock()
HARDWARE_HINTS = { HARDWARE_HINTS = {
@ -231,6 +274,18 @@ def is_mentioned(content: dict, body: str) -> bool:
return False return False
return any(isinstance(uid, str) and uid.lower() in MENTION_USER_IDS for uid in user_ids) return any(isinstance(uid, str) and uid.lower() in MENTION_USER_IDS for uid in user_ids)
def _strip_bot_mention(text: str) -> str:
if not text:
return ""
if not MENTION_LOCALPARTS:
return text.strip()
names = [re.escape(name) for name in MENTION_LOCALPARTS if name]
if not names:
return text.strip()
pattern = r"^(?:\s*@?(?:" + "|".join(names) + r")(?::)?\s+)+"
cleaned = re.sub(pattern, "", text, flags=re.IGNORECASE).strip()
return cleaned or text.strip()
# Matrix HTTP helper. # Matrix HTTP helper.
def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None): def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None):
@ -1780,12 +1835,15 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
if not prompt: if not prompt:
self._write_json(400, {"error": "missing_prompt"}) self._write_json(400, {"error": "missing_prompt"})
return return
cleaned = _strip_bot_mention(prompt)
snapshot = _snapshot_state() snapshot = _snapshot_state()
inventory = _snapshot_inventory(snapshot) or node_inventory_live() inventory = _snapshot_inventory(snapshot) or node_inventory_live()
workloads = _snapshot_workloads(snapshot) workloads = _snapshot_workloads(snapshot)
metrics_summary = snapshot_context(prompt, snapshot) cluster_query = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads)
metrics_summary = snapshot_context(cleaned, snapshot) if cluster_query else ""
if cluster_query:
structured = structured_answer( structured = structured_answer(
prompt, cleaned,
inventory=inventory, inventory=inventory,
metrics_summary=metrics_summary, metrics_summary=metrics_summary,
snapshot=snapshot, snapshot=snapshot,
@ -1794,19 +1852,21 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
if structured: if structured:
self._write_json(200, {"answer": structured}) self._write_json(200, {"answer": structured})
return return
context = ""
if cluster_query:
context = build_context( context = build_context(
prompt, cleaned,
allow_tools=False, allow_tools=False,
targets=[], targets=[],
inventory=inventory, inventory=inventory,
snapshot=snapshot, snapshot=snapshot,
workloads=workloads, workloads=workloads,
) )
metrics_context, _metrics_fallback = metrics_query_context(prompt, allow_tools=True) metrics_context, _metrics_fallback = metrics_query_context(cleaned, allow_tools=True)
if metrics_context: if metrics_context:
context = (context + "\n\n" + metrics_context).strip() if context else metrics_context context = (context + "\n\n" + metrics_context).strip() if context else metrics_context
fallback = "I don't have enough data to answer that." fallback = "I don't have enough data to answer that."
answer = ollama_reply(("http", "internal"), prompt, context=context, fallback=fallback) answer = ollama_reply(("http", "internal"), cleaned, context=context, fallback=fallback)
self._write_json(200, {"answer": answer}) self._write_json(200, {"answer": answer})
@ -1920,6 +1980,37 @@ def _knowledge_intent(prompt: str) -> bool:
) )
def _is_cluster_query(
prompt: str,
*,
inventory: list[dict[str, Any]] | None,
workloads: list[dict[str, Any]] | None,
) -> bool:
q = normalize_query(prompt)
if not q:
return False
if TITAN_NODE_RE.search(q):
return True
if any(word in q for word in CLUSTER_HINT_WORDS):
return True
for host_match in HOST_RE.finditer(q):
host = host_match.group(1).lower()
if host.endswith("bstein.dev"):
return True
tokens = set(_tokens(q))
if workloads:
for entry in workloads:
if not isinstance(entry, dict):
continue
if tokens & _workload_tokens(entry):
return True
if inventory:
names = {node.get("name") for node in inventory if isinstance(node, dict)}
if tokens & {n for n in names if n}:
return True
return False
def _inventory_summary(inventory: list[dict[str, Any]]) -> str: def _inventory_summary(inventory: list[dict[str, Any]]) -> str:
if not inventory: if not inventory:
return "" return ""
@ -1958,7 +2049,8 @@ def _ollama_call(hist_key, prompt: str, *, context: str) -> str:
"Do not suggest commands unless explicitly asked. " "Do not suggest commands unless explicitly asked. "
"Respond in plain sentences; do not return JSON or code fences unless explicitly asked. " "Respond in plain sentences; do not return JSON or code fences unless explicitly asked. "
"Translate metrics into natural language instead of echoing raw label/value pairs. " "Translate metrics into natural language instead of echoing raw label/value pairs. "
"Do not answer by only listing runbooks; summarize the cluster first and mention docs only if useful. " "Do not answer by only listing runbooks; if the question is about Atlas/Othrys, summarize the cluster first and mention docs only if useful. "
"If the question is not about Atlas/Othrys and no cluster context is provided, answer using general knowledge and say when you are unsure. "
"If the answer is not grounded in the provided context or tool data, say you do not know. " "If the answer is not grounded in the provided context or tool data, say you do not know. "
"End every response with a line: 'Confidence: high|medium|low'." "End every response with a line: 'Confidence: high|medium|low'."
) )
@ -2087,7 +2179,8 @@ def sync_loop(token: str, room_id: str):
if not (is_dm or mentioned): if not (is_dm or mentioned):
continue continue
lower_body = body.lower() cleaned_body = _strip_bot_mention(body)
lower_body = cleaned_body.lower()
# Only do live cluster introspection in DMs; metrics can be answered when mentioned. # Only do live cluster introspection in DMs; metrics can be answered when mentioned.
allow_tools = is_dm allow_tools = is_dm
@ -2101,7 +2194,7 @@ def sync_loop(token: str, room_id: str):
# Attempt to scope tools to the most likely workloads when hostnames are mentioned. # Attempt to scope tools to the most likely workloads when hostnames are mentioned.
targets: list[tuple[str, str]] = [] targets: list[tuple[str, str]] = []
for m in HOST_RE.finditer(body.lower()): for m in HOST_RE.finditer(lower_body):
host = m.group(1).lower() host = m.group(1).lower()
for ep in _HOST_INDEX.get(host, []): for ep in _HOST_INDEX.get(host, []):
backend = ep.get("backend") or {} backend = ep.get("backend") or {}
@ -2111,13 +2204,16 @@ def sync_loop(token: str, room_id: str):
targets.append((ns, str(w["name"]))) targets.append((ns, str(w["name"])))
snapshot = _snapshot_state() snapshot = _snapshot_state()
inventory = node_inventory_for_prompt(body) inventory = node_inventory_for_prompt(cleaned_body)
if not inventory: if not inventory:
inventory = _snapshot_inventory(snapshot) inventory = _snapshot_inventory(snapshot)
workloads = _snapshot_workloads(snapshot) workloads = _snapshot_workloads(snapshot)
metrics_summary = snapshot_context(body, snapshot) cluster_query = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads)
metrics_summary = snapshot_context(cleaned_body, snapshot) if cluster_query else ""
structured = ""
if cluster_query:
structured = structured_answer( structured = structured_answer(
body, cleaned_body,
inventory=inventory, inventory=inventory,
metrics_summary=metrics_summary, metrics_summary=metrics_summary,
snapshot=snapshot, snapshot=snapshot,
@ -2128,8 +2224,10 @@ def sync_loop(token: str, room_id: str):
history[hist_key] = history[hist_key][-80:] history[hist_key] = history[hist_key][-80:]
send_msg(token, rid, structured) send_msg(token, rid, structured)
continue continue
context = ""
if cluster_query:
context = build_context( context = build_context(
body, cleaned_body,
allow_tools=allow_tools, allow_tools=allow_tools,
targets=targets, targets=targets,
inventory=inventory, inventory=inventory,
@ -2141,7 +2239,8 @@ def sync_loop(token: str, room_id: str):
rendered = vm_render_result(res, limit=15) or "(no results)" rendered = vm_render_result(res, limit=15) or "(no results)"
extra = "VictoriaMetrics (PromQL result):\n" + rendered extra = "VictoriaMetrics (PromQL result):\n" + rendered
context = (context + "\n\n" + extra).strip() if context else extra context = (context + "\n\n" + extra).strip() if context else extra
metrics_context, _metrics_fallback = metrics_query_context(body, allow_tools=allow_metrics) if cluster_query:
metrics_context, _metrics_fallback = metrics_query_context(cleaned_body, allow_tools=allow_metrics)
if metrics_context: if metrics_context:
context = (context + "\n\n" + metrics_context).strip() if context else metrics_context context = (context + "\n\n" + metrics_context).strip() if context else metrics_context
@ -2151,7 +2250,7 @@ def sync_loop(token: str, room_id: str):
token, token,
rid, rid,
hist_key, hist_key,
body, cleaned_body,
context=context, context=context,
fallback=fallback, fallback=fallback,
) )