atlasbot: centralize token length

This commit is contained in:
Brad Stein 2026-02-03 11:38:02 -03:00
parent 875fe03aa3
commit 84b09f34ed

View File

@ -18,6 +18,7 @@ from atlasbot.state.store import ClaimStore
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
FOLLOWUP_SHORT_WORDS = 6 FOLLOWUP_SHORT_WORDS = 6
TOKEN_MIN_LEN = 3
NS_ENTRY_MIN_LEN = 2 NS_ENTRY_MIN_LEN = 2
DEDUP_MIN_SENTENCES = 3 DEDUP_MIN_SENTENCES = 3
TOKEN_MIN_LEN = 3 TOKEN_MIN_LEN = 3
@ -1514,7 +1515,7 @@ def _expand_tokens(tokens: list[str]) -> list[str]:
if not isinstance(token, str): if not isinstance(token, str):
continue continue
for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()): for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()):
if len(part) < 3: if len(part) < TOKEN_MIN_LEN:
continue continue
if part not in expanded: if part not in expanded:
expanded.append(part) expanded.append(part)