From 70c7712b603aac78e35bd582e3e75e51ebb70a0b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 4 Feb 2026 13:57:58 -0300 Subject: [PATCH] atlasbot: rank metric fallback lines --- atlasbot/engine/answerer.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/atlasbot/engine/answerer.py b/atlasbot/engine/answerer.py index 6610b5c..ffb6d0d 100644 --- a/atlasbot/engine/answerer.py +++ b/atlasbot/engine/answerer.py @@ -21,6 +21,7 @@ log = logging.getLogger(__name__) FOLLOWUP_SHORT_WORDS = 6 TOKEN_MIN_LEN = 3 +GENERIC_METRIC_TOKENS = {"atlas", "cluster", "kubernetes", "k8s", "titan", "lab"} NS_ENTRY_MIN_LEN = 2 DEDUP_MIN_SENTENCES = 3 RUNBOOK_SIMILARITY_THRESHOLD = 0.4 @@ -371,11 +372,9 @@ class AnswerEngine: max_lines=min(2, max(1, plan.max_subquestions)), ) if not global_metric_facts and (keyword_tokens or question_tokens): - global_metric_facts = _filter_lines_by_keywords( - global_facts, - keyword_tokens or question_tokens, - max_lines=2, - ) + tokens = set(keyword_tokens or question_tokens) + tokens = {tok for tok in tokens if tok and tok not in GENERIC_METRIC_TOKENS} + global_metric_facts = _rank_metric_lines(global_facts, tokens, max_lines=2) if global_metric_facts: key_facts = _merge_fact_lines(global_metric_facts, key_facts) all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens) @@ -434,7 +433,8 @@ class AnswerEngine: if not metric_facts: if observer: observer("retrieve", "fallback metric selection") - fallback_candidates = _filter_lines_by_keywords(summary_lines, all_tokens, max_lines=200) + token_set = {tok for tok in all_tokens if tok and tok not in GENERIC_METRIC_TOKENS} + fallback_candidates = _rank_metric_lines(summary_lines, token_set, max_lines=200) if fallback_candidates: metric_facts = await _select_fact_lines( call_llm, @@ -2018,6 +2018,21 @@ def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines: return (filtered or lines)[:max_lines] +def _rank_metric_lines(lines: list[str], tokens: set[str], max_lines: int) -> list[str]: + if not lines or not tokens: + return [] + ranked: list[tuple[int, int, str]] = [] + for line in lines: + lower = line.lower() + hits = sum(1 for tok in tokens if tok in lower) + if not hits: + continue + has_number = 1 if re.search(r"\d", line) else 0 + ranked.append((has_number, hits, line)) + ranked.sort(key=lambda item: (-item[0], -item[1], item[2])) + return [item[2] for item in ranked[:max_lines]] + + def _global_facts(lines: list[str]) -> list[str]: if not lines: return []