From 9d90d32370c33e7bcb52e54f4d960df4b4f60a93 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 3 Feb 2026 10:05:43 -0300 Subject: [PATCH] atlasbot: enforce keyword overlap for metrics --- atlasbot/engine/answerer.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/atlasbot/engine/answerer.py b/atlasbot/engine/answerer.py index a468884..2f48f2e 100644 --- a/atlasbot/engine/answerer.py +++ b/atlasbot/engine/answerer.py @@ -377,6 +377,10 @@ class AnswerEngine: ) if not metric_facts and fallback_candidates: metric_facts = fallback_candidates[: max(2, plan.max_subquestions)] + if metric_facts and not _has_keyword_overlap(metric_facts, keyword_tokens): + best_line = _best_keyword_line(summary_lines, keyword_tokens) + if best_line: + metric_facts = _merge_fact_lines([best_line], metric_facts) if metric_facts: key_facts = _merge_fact_lines(metric_facts, key_facts) if self._settings.debug_pipeline: @@ -1468,6 +1472,36 @@ def _global_facts(lines: list[str]) -> list[str]: return _dedupe_lines(facts, limit=6) +def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool: + if not lines or not keywords: + return False + tokens = [kw.lower() for kw in keywords if kw] + if not tokens: + return False + for line in lines: + lower = line.lower() + if any(tok in lower for tok in tokens): + return True + return False + + +def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None: + if not lines or not keywords: + return None + tokens = [kw.lower() for kw in keywords if kw] + if not tokens: + return None + best = None + best_score = 0 + for line in lines: + lower = line.lower() + score = sum(1 for tok in tokens if tok in lower) + if score > best_score: + best_score = score + best = line + return best if best_score > 0 else None + + def _lexicon_context(summary: dict[str, Any]) -> str: # noqa: C901 if not isinstance(summary, dict): return ""