From a8e9bc53de64ad98e22c19837454fac7699a0803 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 1 Feb 2026 05:09:32 -0300 Subject: [PATCH] atlasbot: keep raw keywords for metric fallback --- atlasbot/engine/answerer.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/atlasbot/engine/answerer.py b/atlasbot/engine/answerer.py index a4b862c..29efcb2 100644 --- a/atlasbot/engine/answerer.py +++ b/atlasbot/engine/answerer.py @@ -184,7 +184,7 @@ class AnswerEngine: normalized = str(normalize.get("normalized") or question).strip() or question keywords = normalize.get("keywords") or [] _debug_log("normalize_parsed", {"normalized": normalized, "keywords": keywords}) - keyword_tokens = _extract_keywords(normalized, sub_questions=[], keywords=keywords) + keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords) if observer: observer("route", "routing") @@ -250,7 +250,7 @@ class AnswerEngine: parts = _parse_json_list(decompose_raw) sub_questions = _select_subquestions(parts, normalized, plan.max_subquestions) _debug_log("decompose_parsed", {"sub_questions": sub_questions}) - keyword_tokens = _extract_keywords(normalized, sub_questions=sub_questions, keywords=keywords) + keyword_tokens = _extract_keywords(question, normalized, sub_questions=sub_questions, keywords=keywords) snapshot_context = "" if classify.get("needs_snapshot"): @@ -407,9 +407,10 @@ class AnswerEngine: ) if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts and not re.search(r"\\d", reply): best_line = None - lowered = normalized.lower() + lowered_keywords = [kw.lower() for kw in keyword_tokens if kw] for line in metric_facts: - if any(token in line.lower() for token in lowered.split()): + line_lower = line.lower() + if any(kw in line_lower for kw in lowered_keywords): best_line = line break best_line = best_line or metric_facts[0] @@ -987,7 +988,12 @@ def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> boo return any(marker in reply.lower() for marker in extra_markers) -def _extract_keywords(normalized: str, sub_questions: list[str], keywords: list[Any] | None) -> list[str]: +def _extract_keywords( + raw_question: str, + normalized: str, + sub_questions: list[str], + keywords: list[Any] | None, +) -> list[str]: stopwords = { "the", "and", @@ -1011,7 +1017,7 @@ def _extract_keywords(normalized: str, sub_questions: list[str], keywords: list[ "now", } tokens: list[str] = [] - for source in [normalized, *sub_questions]: + for source in [raw_question, normalized, *sub_questions]: for part in re.split(r"[^a-zA-Z0-9_-]+", source.lower()): if len(part) < 3 or part in stopwords: continue