atlasbot: expand keyword tokens

This commit is contained in:
Brad Stein 2026-02-03 11:19:33 -03:00
parent 13f1299af6
commit 875fe03aa3

View File

@ -1464,7 +1464,7 @@ def _needs_evidence_guard(reply: str, facts: list[str]) -> bool:
def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines: int) -> list[str]: def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines: int) -> list[str]:
if not lines: if not lines:
return [] return []
tokens = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()] tokens = _expand_tokens(keywords)
if not tokens: if not tokens:
return lines[:max_lines] return lines[:max_lines]
filtered = [line for line in lines if any(tok in line.lower() for tok in tokens)] filtered = [line for line in lines if any(tok in line.lower() for tok in tokens)]
@ -1486,7 +1486,7 @@ def _global_facts(lines: list[str]) -> list[str]:
def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool: def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool:
if not lines or not keywords: if not lines or not keywords:
return False return False
tokens = [kw.lower() for kw in keywords if kw] tokens = _expand_tokens(keywords)
if not tokens: if not tokens:
return False return False
for line in lines: for line in lines:
@ -1506,6 +1506,21 @@ def _merge_tokens(primary: list[str], secondary: list[str]) -> list[str]:
return merged return merged
def _expand_tokens(tokens: list[str]) -> list[str]:
if not tokens:
return []
expanded: list[str] = []
for token in tokens:
if not isinstance(token, str):
continue
for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()):
if len(part) < 3:
continue
if part not in expanded:
expanded.append(part)
return expanded
def _ensure_token_coverage( def _ensure_token_coverage(
lines: list[str], lines: list[str],
tokens: list[str], tokens: list[str],
@ -1535,7 +1550,7 @@ def _ensure_token_coverage(
def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None: def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None:
if not lines or not keywords: if not lines or not keywords:
return None return None
tokens = [kw.lower() for kw in keywords if kw] tokens = _expand_tokens(keywords)
if not tokens: if not tokens:
return None return None
best = None best = None