atlasbot: expand keyword tokens
This commit is contained in:
parent
13f1299af6
commit
875fe03aa3
@ -1464,7 +1464,7 @@ def _needs_evidence_guard(reply: str, facts: list[str]) -> bool:
|
|||||||
def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines: int) -> list[str]:
|
def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines: int) -> list[str]:
|
||||||
if not lines:
|
if not lines:
|
||||||
return []
|
return []
|
||||||
tokens = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
|
tokens = _expand_tokens(keywords)
|
||||||
if not tokens:
|
if not tokens:
|
||||||
return lines[:max_lines]
|
return lines[:max_lines]
|
||||||
filtered = [line for line in lines if any(tok in line.lower() for tok in tokens)]
|
filtered = [line for line in lines if any(tok in line.lower() for tok in tokens)]
|
||||||
@ -1486,7 +1486,7 @@ def _global_facts(lines: list[str]) -> list[str]:
|
|||||||
def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool:
|
def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool:
|
||||||
if not lines or not keywords:
|
if not lines or not keywords:
|
||||||
return False
|
return False
|
||||||
tokens = [kw.lower() for kw in keywords if kw]
|
tokens = _expand_tokens(keywords)
|
||||||
if not tokens:
|
if not tokens:
|
||||||
return False
|
return False
|
||||||
for line in lines:
|
for line in lines:
|
||||||
@ -1506,6 +1506,21 @@ def _merge_tokens(primary: list[str], secondary: list[str]) -> list[str]:
|
|||||||
return merged
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def _expand_tokens(tokens: list[str]) -> list[str]:
|
||||||
|
if not tokens:
|
||||||
|
return []
|
||||||
|
expanded: list[str] = []
|
||||||
|
for token in tokens:
|
||||||
|
if not isinstance(token, str):
|
||||||
|
continue
|
||||||
|
for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()):
|
||||||
|
if len(part) < 3:
|
||||||
|
continue
|
||||||
|
if part not in expanded:
|
||||||
|
expanded.append(part)
|
||||||
|
return expanded
|
||||||
|
|
||||||
|
|
||||||
def _ensure_token_coverage(
|
def _ensure_token_coverage(
|
||||||
lines: list[str],
|
lines: list[str],
|
||||||
tokens: list[str],
|
tokens: list[str],
|
||||||
@ -1535,7 +1550,7 @@ def _ensure_token_coverage(
|
|||||||
def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None:
|
def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None:
|
||||||
if not lines or not keywords:
|
if not lines or not keywords:
|
||||||
return None
|
return None
|
||||||
tokens = [kw.lower() for kw in keywords if kw]
|
tokens = _expand_tokens(keywords)
|
||||||
if not tokens:
|
if not tokens:
|
||||||
return None
|
return None
|
||||||
best = None
|
best = None
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user