1 changed files with 10 additions and 51 deletions
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
@ -184,7 +184,7 @@ class AnswerEngine:
            normalized = str(normalize.get("normalized") or question).strip() or question
            keywords = normalize.get("keywords") or []
            _debug_log("normalize_parsed", {"normalized": normalized, "keywords": keywords})
-            keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords)
+            keyword_tokens = _extract_keywords(normalized, sub_questions=[], keywords=keywords)

            if observer:
                observer("route", "routing")
@ -250,7 +250,7 @@ class AnswerEngine:
            parts = _parse_json_list(decompose_raw)
            sub_questions = _select_subquestions(parts, normalized, plan.max_subquestions)
            _debug_log("decompose_parsed", {"sub_questions": sub_questions})
-            keyword_tokens = _extract_keywords(question, normalized, sub_questions=sub_questions, keywords=keywords)
+            keyword_tokens = _extract_keywords(normalized, sub_questions=sub_questions, keywords=keywords)

            snapshot_context = ""
            if classify.get("needs_snapshot"):
@ -260,7 +260,7 @@ class AnswerEngine:
                scored = await _score_chunks(call_llm, chunks, normalized, sub_questions, plan)
                selected = _select_chunks(chunks, scored, plan, keyword_tokens)
                key_facts = _key_fact_lines(summary_lines, keyword_tokens)
-                metric_facts = [line for line in key_facts if re.search(r"\d", line)]
+                metric_facts = [line for line in key_facts if re.search(r"\\d", line)]
                if self._settings.debug_pipeline:
                    scored_preview = sorted(
                        [{"id": c["id"], "score": scored.get(c["id"], 0.0), "summary": c["summary"]} for c in chunks],
@ -405,19 +405,15 @@ class AnswerEngine:
                    model=plan.model,
                    tag="focus_fix",
                )
-            if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts:
+            if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts and not re.search(r"\\d", reply):
                best_line = None
-                lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
+                lowered = normalized.lower()
                for line in metric_facts:
-                    line_lower = line.lower()
-                    if any(kw in line_lower for kw in lowered_keywords):
+                    if any(token in line.lower() for token in lowered.split()):
                        best_line = line
                        break
                best_line = best_line or metric_facts[0]
-                reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
-                fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
-                if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
-                    reply = f"From the latest snapshot: {best_line}."
+                reply = f"From the latest snapshot: {best_line}."

            if plan.use_critic:
                if observer:
@ -447,9 +443,6 @@ class AnswerEngine:
                if note:
                    reply = f"{reply}\n\n{note}"

-            if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts:
-                reply = _metric_fact_guard(reply, metric_facts, keyword_tokens)
-
            scores = await self._score_answer(normalized, reply, plan, call_llm)
            claims = await self._extract_claims(normalized, reply, summary, call_llm)
        except LLMLimitReached:
@ -880,45 +873,16 @@ def _key_fact_lines(lines: list[str], keywords: list[str] | None, limit: int = 6
    lowered = [kw.lower() for kw in keywords if kw]
    if not lowered:
        return []
-    focused = _focused_keywords(lowered)
-    primary = focused or lowered
    matches: list[str] = []
    for line in lines:
        line_lower = line.lower()
-        if any(kw in line_lower for kw in primary):
+        if any(kw in line_lower for kw in lowered):
            matches.append(line)
        if len(matches) >= limit:
            break
-    if len(matches) < limit and focused:
-        for line in lines:
-            if len(matches) >= limit:
-                break
-            if line in matches:
-                continue
-            line_lower = line.lower()
-            if any(kw in line_lower for kw in lowered):
-                matches.append(line)
    return matches


-def _metric_fact_guard(reply: str, metric_facts: list[str], keywords: list[str]) -> str:
-    if not metric_facts:
-        return reply
-    best_line = None
-    lowered_keywords = [kw.lower() for kw in keywords if kw]
-    for line in metric_facts:
-        line_lower = line.lower()
-        if any(kw in line_lower for kw in lowered_keywords):
-            best_line = line
-            break
-    best_line = best_line or metric_facts[0]
-    reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
-    fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
-    if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
-        return f"From the latest snapshot: {best_line}."
-    return reply
-
-
 def _lexicon_context(summary: dict[str, Any]) -> str:
    if not isinstance(summary, dict):
        return ""
@ -1023,12 +987,7 @@ def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> boo
    return any(marker in reply.lower() for marker in extra_markers)


-def _extract_keywords(
-    raw_question: str,
-    normalized: str,
-    sub_questions: list[str],
-    keywords: list[Any] | None,
-) -> list[str]:
+def _extract_keywords(normalized: str, sub_questions: list[str], keywords: list[Any] | None) -> list[str]:
    stopwords = {
        "the",
        "and",
@ -1052,7 +1011,7 @@ def _extract_keywords(
        "now",
    }
    tokens: list[str] = []
-    for source in [raw_question, normalized, *sub_questions]:
+    for source in [normalized, *sub_questions]:
        for part in re.split(r"[^a-zA-Z0-9_-]+", source.lower()):
            if len(part) < 3 or part in stopwords:
                continue