atlasbot: keep raw keywords for metric fallback

This commit is contained in:
Brad Stein 2026-02-01 05:09:32 -03:00
parent 677e31ba2d
commit a8e9bc53de

View File

@ -184,7 +184,7 @@ class AnswerEngine:
normalized = str(normalize.get("normalized") or question).strip() or question
keywords = normalize.get("keywords") or []
_debug_log("normalize_parsed", {"normalized": normalized, "keywords": keywords})
keyword_tokens = _extract_keywords(normalized, sub_questions=[], keywords=keywords)
keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords)
if observer:
observer("route", "routing")
@ -250,7 +250,7 @@ class AnswerEngine:
parts = _parse_json_list(decompose_raw)
sub_questions = _select_subquestions(parts, normalized, plan.max_subquestions)
_debug_log("decompose_parsed", {"sub_questions": sub_questions})
keyword_tokens = _extract_keywords(normalized, sub_questions=sub_questions, keywords=keywords)
keyword_tokens = _extract_keywords(question, normalized, sub_questions=sub_questions, keywords=keywords)
snapshot_context = ""
if classify.get("needs_snapshot"):
@ -407,9 +407,10 @@ class AnswerEngine:
)
if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts and not re.search(r"\\d", reply):
best_line = None
lowered = normalized.lower()
lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
for line in metric_facts:
if any(token in line.lower() for token in lowered.split()):
line_lower = line.lower()
if any(kw in line_lower for kw in lowered_keywords):
best_line = line
break
best_line = best_line or metric_facts[0]
@ -987,7 +988,12 @@ def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> boo
return any(marker in reply.lower() for marker in extra_markers)
def _extract_keywords(normalized: str, sub_questions: list[str], keywords: list[Any] | None) -> list[str]:
def _extract_keywords(
raw_question: str,
normalized: str,
sub_questions: list[str],
keywords: list[Any] | None,
) -> list[str]:
stopwords = {
"the",
"and",
@ -1011,7 +1017,7 @@ def _extract_keywords(normalized: str, sub_questions: list[str], keywords: list[
"now",
}
tokens: list[str] = []
for source in [normalized, *sub_questions]:
for source in [raw_question, normalized, *sub_questions]:
for part in re.split(r"[^a-zA-Z0-9_-]+", source.lower()):
if len(part) < 3 or part in stopwords:
continue