atlasbot: always include question tokens
This commit is contained in:
parent
dc2bb6229e
commit
81fa889a29
@ -199,6 +199,7 @@ class AnswerEngine:
|
|||||||
keywords = normalize.get("keywords") or []
|
keywords = normalize.get("keywords") or []
|
||||||
_debug_log("normalize_parsed", {"normalized": normalized, "keywords": keywords})
|
_debug_log("normalize_parsed", {"normalized": normalized, "keywords": keywords})
|
||||||
keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords)
|
keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords)
|
||||||
|
question_tokens = _extract_question_tokens(normalized)
|
||||||
|
|
||||||
if observer:
|
if observer:
|
||||||
observer("route", "routing")
|
observer("route", "routing")
|
||||||
@ -328,7 +329,7 @@ class AnswerEngine:
|
|||||||
)
|
)
|
||||||
if isinstance(signals, list):
|
if isinstance(signals, list):
|
||||||
signal_tokens = [str(item) for item in signals if item]
|
signal_tokens = [str(item) for item in signals if item]
|
||||||
all_tokens = _merge_tokens(signal_tokens, keyword_tokens)
|
all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
|
||||||
if observer:
|
if observer:
|
||||||
observer("retrieve", "scanning chunks")
|
observer("retrieve", "scanning chunks")
|
||||||
candidate_lines: list[str] = []
|
candidate_lines: list[str] = []
|
||||||
@ -1502,9 +1503,9 @@ def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _merge_tokens(primary: list[str], secondary: list[str]) -> list[str]:
|
def _merge_tokens(primary: list[str], secondary: list[str], third: list[str] | None = None) -> list[str]:
|
||||||
merged: list[str] = []
|
merged: list[str] = []
|
||||||
for token in primary + secondary:
|
for token in primary + secondary + (third or []):
|
||||||
if not token:
|
if not token:
|
||||||
continue
|
continue
|
||||||
if token not in merged:
|
if token not in merged:
|
||||||
@ -1512,6 +1513,18 @@ def _merge_tokens(primary: list[str], secondary: list[str]) -> list[str]:
|
|||||||
return merged
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_question_tokens(question: str) -> list[str]:
|
||||||
|
if not question:
|
||||||
|
return []
|
||||||
|
tokens: list[str] = []
|
||||||
|
for part in re.split(r"[^a-zA-Z0-9_-]+", question.lower()):
|
||||||
|
if len(part) < TOKEN_MIN_LEN:
|
||||||
|
continue
|
||||||
|
if part not in tokens:
|
||||||
|
tokens.append(part)
|
||||||
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
def _expand_tokens(tokens: list[str]) -> list[str]:
|
def _expand_tokens(tokens: list[str]) -> list[str]:
|
||||||
if not tokens:
|
if not tokens:
|
||||||
return []
|
return []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user