diff --git a/atlasbot/engine/answerer.py b/atlasbot/engine/answerer.py index 10e9d62..0388f81 100644 --- a/atlasbot/engine/answerer.py +++ b/atlasbot/engine/answerer.py @@ -295,7 +295,14 @@ class AnswerEngine: unknown_nodes = _find_unknown_nodes(reply, allowed_nodes) unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces) runbook_fix = _needs_runbook_fix(reply, runbook_paths) - if snapshot_context and (_needs_evidence_fix(reply, classify) or unknown_nodes or unknown_namespaces or runbook_fix): + runbook_needed = _needs_runbook_reference(normalized, runbook_paths, reply) + if snapshot_context and ( + _needs_evidence_fix(reply, classify) + or unknown_nodes + or unknown_namespaces + or runbook_fix + or runbook_needed + ): if observer: observer("evidence_fix", "repairing missing evidence") extra_bits = [] @@ -708,8 +715,9 @@ def _select_chunks( head = chunks[0] selected.append(head) keyword_hits: list[dict[str, Any]] = [] - if keywords: - lowered = [kw.lower() for kw in keywords if kw] + focused = _focused_keywords(keywords or []) + if focused: + lowered = [kw.lower() for kw in focused if kw] for item in ranked: text = item.get("text", "").lower() if any(kw in text for kw in lowered): @@ -895,6 +903,41 @@ def _extract_keywords(normalized: str, sub_questions: list[str], keywords: list[ return list(dict.fromkeys(tokens))[:12] +def _focused_keywords(tokens: list[str]) -> list[str]: + generic = { + "atlas", + "cluster", + "node", + "nodes", + "pod", + "pods", + "namespace", + "namespaces", + "k8s", + "kubernetes", + "service", + "services", + "workload", + "workloads", + } + scored: list[tuple[int, str]] = [] + for token in tokens: + if not token or token in generic: + continue + score = 1 + if any(ch.isdigit() for ch in token): + score += 2 + if "-" in token: + score += 1 + if len(token) >= 6: + score += 1 + scored.append((score, token)) + if not scored: + return [token for token in tokens if token not in generic][:6] + scored.sort(key=lambda item: (-item[0], item[1])) + return [token for _, token in scored][:6] + + def _allowed_nodes(summary: dict[str, Any]) -> list[str]: hardware = summary.get("hardware_by_node") if isinstance(summary.get("hardware_by_node"), dict) else {} if hardware: @@ -944,6 +987,21 @@ def _needs_runbook_fix(reply: str, allowed: list[str]) -> bool: return any(path.lower() not in allowed_set for path in paths) +def _needs_runbook_reference(question: str, allowed: list[str], reply: str) -> bool: + if not allowed or not question: + return False + lowered = question.lower() + cues = ("runbook", "checklist", "documented", "documentation", "where", "guide") + if not any(cue in lowered for cue in cues): + return False + if not reply: + return True + for token in re.findall(r"runbooks/[A-Za-z0-9._-]+", reply): + if token.lower() in {p.lower() for p in allowed}: + return False + return True + + def _resolve_path(data: Any, path: str) -> Any | None: cursor = data for part in re.split(r"\.(?![^\[]*\])", path):