atlasbot: always include question tokens

This commit is contained in:
Brad Stein 2026-02-03 11:54:59 -03:00
parent dc2bb6229e
commit 81fa889a29

View File

@ -199,6 +199,7 @@ class AnswerEngine:
keywords = normalize.get("keywords") or [] keywords = normalize.get("keywords") or []
_debug_log("normalize_parsed", {"normalized": normalized, "keywords": keywords}) _debug_log("normalize_parsed", {"normalized": normalized, "keywords": keywords})
keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords) keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords)
question_tokens = _extract_question_tokens(normalized)
if observer: if observer:
observer("route", "routing") observer("route", "routing")
@ -328,7 +329,7 @@ class AnswerEngine:
) )
if isinstance(signals, list): if isinstance(signals, list):
signal_tokens = [str(item) for item in signals if item] signal_tokens = [str(item) for item in signals if item]
all_tokens = _merge_tokens(signal_tokens, keyword_tokens) all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
if observer: if observer:
observer("retrieve", "scanning chunks") observer("retrieve", "scanning chunks")
candidate_lines: list[str] = [] candidate_lines: list[str] = []
@ -1502,9 +1503,9 @@ def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool:
return False return False
def _merge_tokens(primary: list[str], secondary: list[str]) -> list[str]: def _merge_tokens(primary: list[str], secondary: list[str], third: list[str] | None = None) -> list[str]:
merged: list[str] = [] merged: list[str] = []
for token in primary + secondary: for token in primary + secondary + (third or []):
if not token: if not token:
continue continue
if token not in merged: if token not in merged:
@ -1512,6 +1513,18 @@ def _merge_tokens(primary: list[str], secondary: list[str]) -> list[str]:
return merged return merged
def _extract_question_tokens(question: str) -> list[str]:
if not question:
return []
tokens: list[str] = []
for part in re.split(r"[^a-zA-Z0-9_-]+", question.lower()):
if len(part) < TOKEN_MIN_LEN:
continue
if part not in tokens:
tokens.append(part)
return tokens
def _expand_tokens(tokens: list[str]) -> list[str]: def _expand_tokens(tokens: list[str]) -> list[str]:
if not tokens: if not tokens:
return [] return []