atlasbot: refine keyword focus and runbook references

2026-02-01 01:38:19 -03:00 · 2026-02-01 01:38:19 -03:00 · 583507b3e5
commit 583507b3e5
parent 89d6ba3a3d
1 changed files with 61 additions and 3 deletions
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
@ -295,7 +295,14 @@ class AnswerEngine:
            unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
            unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
            runbook_fix = _needs_runbook_fix(reply, runbook_paths)
-            if snapshot_context and (_needs_evidence_fix(reply, classify) or unknown_nodes or unknown_namespaces or runbook_fix):
+            runbook_needed = _needs_runbook_reference(normalized, runbook_paths, reply)
+            if snapshot_context and (
+                _needs_evidence_fix(reply, classify)
+                or unknown_nodes
+                or unknown_namespaces
+                or runbook_fix
+                or runbook_needed
+            ):
                if observer:
                    observer("evidence_fix", "repairing missing evidence")
                extra_bits = []
@ -708,8 +715,9 @@ def _select_chunks(
    head = chunks[0]
    selected.append(head)
    keyword_hits: list[dict[str, Any]] = []
-    if keywords:
-        lowered = [kw.lower() for kw in keywords if kw]
+    focused = _focused_keywords(keywords or [])
+    if focused:
+        lowered = [kw.lower() for kw in focused if kw]
        for item in ranked:
            text = item.get("text", "").lower()
            if any(kw in text for kw in lowered):
@ -895,6 +903,41 @@ def _extract_keywords(normalized: str, sub_questions: list[str], keywords: list[
    return list(dict.fromkeys(tokens))[:12]


+def _focused_keywords(tokens: list[str]) -> list[str]:
+    generic = {
+        "atlas",
+        "cluster",
+        "node",
+        "nodes",
+        "pod",
+        "pods",
+        "namespace",
+        "namespaces",
+        "k8s",
+        "kubernetes",
+        "service",
+        "services",
+        "workload",
+        "workloads",
+    }
+    scored: list[tuple[int, str]] = []
+    for token in tokens:
+        if not token or token in generic:
+            continue
+        score = 1
+        if any(ch.isdigit() for ch in token):
+            score += 2
+        if "-" in token:
+            score += 1
+        if len(token) >= 6:
+            score += 1
+        scored.append((score, token))
+    if not scored:
+        return [token for token in tokens if token not in generic][:6]
+    scored.sort(key=lambda item: (-item[0], item[1]))
+    return [token for _, token in scored][:6]
+
+
 def _allowed_nodes(summary: dict[str, Any]) -> list[str]:
    hardware = summary.get("hardware_by_node") if isinstance(summary.get("hardware_by_node"), dict) else {}
    if hardware:
@ -944,6 +987,21 @@ def _needs_runbook_fix(reply: str, allowed: list[str]) -> bool:
    return any(path.lower() not in allowed_set for path in paths)


+def _needs_runbook_reference(question: str, allowed: list[str], reply: str) -> bool:
+    if not allowed or not question:
+        return False
+    lowered = question.lower()
+    cues = ("runbook", "checklist", "documented", "documentation", "where", "guide")
+    if not any(cue in lowered for cue in cues):
+        return False
+    if not reply:
+        return True
+    for token in re.findall(r"runbooks/[A-Za-z0-9._-]+", reply):
+        if token.lower() in {p.lower() for p in allowed}:
+            return False
+    return True
+
+
 def _resolve_path(data: Any, path: str) -> Any | None:
    cursor = data
    for part in re.split(r"\.(?![^\[]*\])", path):