diff --git a/atlasbot/engine/answerer.py b/atlasbot/engine/answerer.py index 7f5d84c..7936fd1 100644 --- a/atlasbot/engine/answerer.py +++ b/atlasbot/engine/answerer.py @@ -1464,7 +1464,7 @@ def _needs_evidence_guard(reply: str, facts: list[str]) -> bool: def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines: int) -> list[str]: if not lines: return [] - tokens = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()] + tokens = _expand_tokens(keywords) if not tokens: return lines[:max_lines] filtered = [line for line in lines if any(tok in line.lower() for tok in tokens)] @@ -1486,7 +1486,7 @@ def _global_facts(lines: list[str]) -> list[str]: def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool: if not lines or not keywords: return False - tokens = [kw.lower() for kw in keywords if kw] + tokens = _expand_tokens(keywords) if not tokens: return False for line in lines: @@ -1506,6 +1506,21 @@ def _merge_tokens(primary: list[str], secondary: list[str]) -> list[str]: return merged +def _expand_tokens(tokens: list[str]) -> list[str]: + if not tokens: + return [] + expanded: list[str] = [] + for token in tokens: + if not isinstance(token, str): + continue + for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()): + if len(part) < 3: + continue + if part not in expanded: + expanded.append(part) + return expanded + + def _ensure_token_coverage( lines: list[str], tokens: list[str], @@ -1535,7 +1550,7 @@ def _ensure_token_coverage( def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None: if not lines or not keywords: return None - tokens = [kw.lower() for kw in keywords if kw] + tokens = _expand_tokens(keywords) if not tokens: return None best = None