atlasbot: rank metric fallback lines

This commit is contained in:
Brad Stein 2026-02-04 13:57:58 -03:00
parent 7aca249468
commit 70c7712b60

View File

@ -21,6 +21,7 @@ log = logging.getLogger(__name__)
FOLLOWUP_SHORT_WORDS = 6 FOLLOWUP_SHORT_WORDS = 6
TOKEN_MIN_LEN = 3 TOKEN_MIN_LEN = 3
GENERIC_METRIC_TOKENS = {"atlas", "cluster", "kubernetes", "k8s", "titan", "lab"}
NS_ENTRY_MIN_LEN = 2 NS_ENTRY_MIN_LEN = 2
DEDUP_MIN_SENTENCES = 3 DEDUP_MIN_SENTENCES = 3
RUNBOOK_SIMILARITY_THRESHOLD = 0.4 RUNBOOK_SIMILARITY_THRESHOLD = 0.4
@ -371,11 +372,9 @@ class AnswerEngine:
max_lines=min(2, max(1, plan.max_subquestions)), max_lines=min(2, max(1, plan.max_subquestions)),
) )
if not global_metric_facts and (keyword_tokens or question_tokens): if not global_metric_facts and (keyword_tokens or question_tokens):
global_metric_facts = _filter_lines_by_keywords( tokens = set(keyword_tokens or question_tokens)
global_facts, tokens = {tok for tok in tokens if tok and tok not in GENERIC_METRIC_TOKENS}
keyword_tokens or question_tokens, global_metric_facts = _rank_metric_lines(global_facts, tokens, max_lines=2)
max_lines=2,
)
if global_metric_facts: if global_metric_facts:
key_facts = _merge_fact_lines(global_metric_facts, key_facts) key_facts = _merge_fact_lines(global_metric_facts, key_facts)
all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens) all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
@ -434,7 +433,8 @@ class AnswerEngine:
if not metric_facts: if not metric_facts:
if observer: if observer:
observer("retrieve", "fallback metric selection") observer("retrieve", "fallback metric selection")
fallback_candidates = _filter_lines_by_keywords(summary_lines, all_tokens, max_lines=200) token_set = {tok for tok in all_tokens if tok and tok not in GENERIC_METRIC_TOKENS}
fallback_candidates = _rank_metric_lines(summary_lines, token_set, max_lines=200)
if fallback_candidates: if fallback_candidates:
metric_facts = await _select_fact_lines( metric_facts = await _select_fact_lines(
call_llm, call_llm,
@ -2018,6 +2018,21 @@ def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines:
return (filtered or lines)[:max_lines] return (filtered or lines)[:max_lines]
def _rank_metric_lines(lines: list[str], tokens: set[str], max_lines: int) -> list[str]:
if not lines or not tokens:
return []
ranked: list[tuple[int, int, str]] = []
for line in lines:
lower = line.lower()
hits = sum(1 for tok in tokens if tok in lower)
if not hits:
continue
has_number = 1 if re.search(r"\d", line) else 0
ranked.append((has_number, hits, line))
ranked.sort(key=lambda item: (-item[0], -item[1], item[2]))
return [item[2] for item in ranked[:max_lines]]
def _global_facts(lines: list[str]) -> list[str]: def _global_facts(lines: list[str]) -> list[str]:
if not lines: if not lines:
return [] return []