Compare commits

..

No commits in common. "cb7141dfb63e74219a1852df458213daf3c3ec6e" and "d03c846779d4c4f217ecb789948e28ba5e6c753a" have entirely different histories.

2 changed files with 48 additions and 147 deletions

View File

@ -16,7 +16,7 @@ spec:
labels: labels:
app: atlasbot app: atlasbot
annotations: annotations:
checksum/atlasbot-configmap: manual-atlasbot-41 checksum/atlasbot-configmap: manual-atlasbot-40
vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "comms" vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"

View File

@ -121,49 +121,6 @@ METRIC_HINTS = {
"pods": ("pods", "pod"), "pods": ("pods", "pod"),
} }
CLUSTER_HINT_WORDS = {
"atlas",
"titan",
"cluster",
"k8s",
"kubernetes",
"node",
"nodes",
"pod",
"pods",
"namespace",
"service",
"deployment",
"daemonset",
"statefulset",
"grafana",
"victoria",
"prometheus",
"ariadne",
"mailu",
"nextcloud",
"vaultwarden",
"firefly",
"wger",
"jellyfin",
"planka",
"budget",
"element",
"synapse",
"mas",
"comms",
"longhorn",
"harbor",
"jenkins",
"gitea",
"flux",
"keycloak",
"postgres",
"database",
"db",
"atlasbot",
}
_OLLAMA_LOCK = threading.Lock() _OLLAMA_LOCK = threading.Lock()
HARDWARE_HINTS = { HARDWARE_HINTS = {
@ -274,18 +231,6 @@ def is_mentioned(content: dict, body: str) -> bool:
return False return False
return any(isinstance(uid, str) and uid.lower() in MENTION_USER_IDS for uid in user_ids) return any(isinstance(uid, str) and uid.lower() in MENTION_USER_IDS for uid in user_ids)
def _strip_bot_mention(text: str) -> str:
if not text:
return ""
if not MENTION_LOCALPARTS:
return text.strip()
names = [re.escape(name) for name in MENTION_LOCALPARTS if name]
if not names:
return text.strip()
pattern = r"^(?:\s*@?(?:" + "|".join(names) + r")(?::)?\s+)+"
cleaned = re.sub(pattern, "", text, flags=re.IGNORECASE).strip()
return cleaned or text.strip()
# Matrix HTTP helper. # Matrix HTTP helper.
def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None): def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None):
@ -1835,38 +1780,33 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
if not prompt: if not prompt:
self._write_json(400, {"error": "missing_prompt"}) self._write_json(400, {"error": "missing_prompt"})
return return
cleaned = _strip_bot_mention(prompt)
snapshot = _snapshot_state() snapshot = _snapshot_state()
inventory = _snapshot_inventory(snapshot) or node_inventory_live() inventory = _snapshot_inventory(snapshot) or node_inventory_live()
workloads = _snapshot_workloads(snapshot) workloads = _snapshot_workloads(snapshot)
cluster_query = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads) metrics_summary = snapshot_context(prompt, snapshot)
metrics_summary = snapshot_context(cleaned, snapshot) if cluster_query else "" structured = structured_answer(
if cluster_query: prompt,
structured = structured_answer( inventory=inventory,
cleaned, metrics_summary=metrics_summary,
inventory=inventory, snapshot=snapshot,
metrics_summary=metrics_summary, workloads=workloads,
snapshot=snapshot, )
workloads=workloads, if structured:
) self._write_json(200, {"answer": structured})
if structured: return
self._write_json(200, {"answer": structured}) context = build_context(
return prompt,
context = "" allow_tools=False,
if cluster_query: targets=[],
context = build_context( inventory=inventory,
cleaned, snapshot=snapshot,
allow_tools=False, workloads=workloads,
targets=[], )
inventory=inventory, metrics_context, _metrics_fallback = metrics_query_context(prompt, allow_tools=True)
snapshot=snapshot, if metrics_context:
workloads=workloads, context = (context + "\n\n" + metrics_context).strip() if context else metrics_context
)
metrics_context, _metrics_fallback = metrics_query_context(cleaned, allow_tools=True)
if metrics_context:
context = (context + "\n\n" + metrics_context).strip() if context else metrics_context
fallback = "I don't have enough data to answer that." fallback = "I don't have enough data to answer that."
answer = ollama_reply(("http", "internal"), cleaned, context=context, fallback=fallback) answer = ollama_reply(("http", "internal"), prompt, context=context, fallback=fallback)
self._write_json(200, {"answer": answer}) self._write_json(200, {"answer": answer})
@ -1980,37 +1920,6 @@ def _knowledge_intent(prompt: str) -> bool:
) )
def _is_cluster_query(
prompt: str,
*,
inventory: list[dict[str, Any]] | None,
workloads: list[dict[str, Any]] | None,
) -> bool:
q = normalize_query(prompt)
if not q:
return False
if TITAN_NODE_RE.search(q):
return True
if any(word in q for word in CLUSTER_HINT_WORDS):
return True
for host_match in HOST_RE.finditer(q):
host = host_match.group(1).lower()
if host.endswith("bstein.dev"):
return True
tokens = set(_tokens(q))
if workloads:
for entry in workloads:
if not isinstance(entry, dict):
continue
if tokens & _workload_tokens(entry):
return True
if inventory:
names = {node.get("name") for node in inventory if isinstance(node, dict)}
if tokens & {n for n in names if n}:
return True
return False
def _inventory_summary(inventory: list[dict[str, Any]]) -> str: def _inventory_summary(inventory: list[dict[str, Any]]) -> str:
if not inventory: if not inventory:
return "" return ""
@ -2049,8 +1958,7 @@ def _ollama_call(hist_key, prompt: str, *, context: str) -> str:
"Do not suggest commands unless explicitly asked. " "Do not suggest commands unless explicitly asked. "
"Respond in plain sentences; do not return JSON or code fences unless explicitly asked. " "Respond in plain sentences; do not return JSON or code fences unless explicitly asked. "
"Translate metrics into natural language instead of echoing raw label/value pairs. " "Translate metrics into natural language instead of echoing raw label/value pairs. "
"Do not answer by only listing runbooks; if the question is about Atlas/Othrys, summarize the cluster first and mention docs only if useful. " "Do not answer by only listing runbooks; summarize the cluster first and mention docs only if useful. "
"If the question is not about Atlas/Othrys and no cluster context is provided, answer using general knowledge and say when you are unsure. "
"If the answer is not grounded in the provided context or tool data, say you do not know. " "If the answer is not grounded in the provided context or tool data, say you do not know. "
"End every response with a line: 'Confidence: high|medium|low'." "End every response with a line: 'Confidence: high|medium|low'."
) )
@ -2179,8 +2087,7 @@ def sync_loop(token: str, room_id: str):
if not (is_dm or mentioned): if not (is_dm or mentioned):
continue continue
cleaned_body = _strip_bot_mention(body) lower_body = body.lower()
lower_body = cleaned_body.lower()
# Only do live cluster introspection in DMs; metrics can be answered when mentioned. # Only do live cluster introspection in DMs; metrics can be answered when mentioned.
allow_tools = is_dm allow_tools = is_dm
@ -2194,7 +2101,7 @@ def sync_loop(token: str, room_id: str):
# Attempt to scope tools to the most likely workloads when hostnames are mentioned. # Attempt to scope tools to the most likely workloads when hostnames are mentioned.
targets: list[tuple[str, str]] = [] targets: list[tuple[str, str]] = []
for m in HOST_RE.finditer(lower_body): for m in HOST_RE.finditer(body.lower()):
host = m.group(1).lower() host = m.group(1).lower()
for ep in _HOST_INDEX.get(host, []): for ep in _HOST_INDEX.get(host, []):
backend = ep.get("backend") or {} backend = ep.get("backend") or {}
@ -2204,45 +2111,39 @@ def sync_loop(token: str, room_id: str):
targets.append((ns, str(w["name"]))) targets.append((ns, str(w["name"])))
snapshot = _snapshot_state() snapshot = _snapshot_state()
inventory = node_inventory_for_prompt(cleaned_body) inventory = node_inventory_for_prompt(body)
if not inventory: if not inventory:
inventory = _snapshot_inventory(snapshot) inventory = _snapshot_inventory(snapshot)
workloads = _snapshot_workloads(snapshot) workloads = _snapshot_workloads(snapshot)
cluster_query = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads) metrics_summary = snapshot_context(body, snapshot)
metrics_summary = snapshot_context(cleaned_body, snapshot) if cluster_query else "" structured = structured_answer(
structured = "" body,
if cluster_query: inventory=inventory,
structured = structured_answer( metrics_summary=metrics_summary,
cleaned_body, snapshot=snapshot,
inventory=inventory, workloads=workloads,
metrics_summary=metrics_summary, )
snapshot=snapshot,
workloads=workloads,
)
if structured: if structured:
history[hist_key].append(f"Atlas: {structured}") history[hist_key].append(f"Atlas: {structured}")
history[hist_key] = history[hist_key][-80:] history[hist_key] = history[hist_key][-80:]
send_msg(token, rid, structured) send_msg(token, rid, structured)
continue continue
context = "" context = build_context(
if cluster_query: body,
context = build_context( allow_tools=allow_tools,
cleaned_body, targets=targets,
allow_tools=allow_tools, inventory=inventory,
targets=targets, snapshot=snapshot,
inventory=inventory, workloads=workloads,
snapshot=snapshot, )
workloads=workloads,
)
if allow_tools and promql: if allow_tools and promql:
res = vm_query(promql, timeout=20) res = vm_query(promql, timeout=20)
rendered = vm_render_result(res, limit=15) or "(no results)" rendered = vm_render_result(res, limit=15) or "(no results)"
extra = "VictoriaMetrics (PromQL result):\n" + rendered extra = "VictoriaMetrics (PromQL result):\n" + rendered
context = (context + "\n\n" + extra).strip() if context else extra context = (context + "\n\n" + extra).strip() if context else extra
if cluster_query: metrics_context, _metrics_fallback = metrics_query_context(body, allow_tools=allow_metrics)
metrics_context, _metrics_fallback = metrics_query_context(cleaned_body, allow_tools=allow_metrics) if metrics_context:
if metrics_context: context = (context + "\n\n" + metrics_context).strip() if context else metrics_context
context = (context + "\n\n" + metrics_context).strip() if context else metrics_context
fallback = "I don't have enough data to answer that." fallback = "I don't have enough data to answer that."
@ -2250,7 +2151,7 @@ def sync_loop(token: str, room_id: str):
token, token,
rid, rid,
hist_key, hist_key,
cleaned_body, body,
context=context, context=context,
fallback=fallback, fallback=fallback,
) )