atlasbot: centralize token length
This commit is contained in:
parent
875fe03aa3
commit
84b09f34ed
@ -18,6 +18,7 @@ from atlasbot.state.store import ClaimStore
|
|||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
FOLLOWUP_SHORT_WORDS = 6
|
FOLLOWUP_SHORT_WORDS = 6
|
||||||
|
TOKEN_MIN_LEN = 3
|
||||||
NS_ENTRY_MIN_LEN = 2
|
NS_ENTRY_MIN_LEN = 2
|
||||||
DEDUP_MIN_SENTENCES = 3
|
DEDUP_MIN_SENTENCES = 3
|
||||||
TOKEN_MIN_LEN = 3
|
TOKEN_MIN_LEN = 3
|
||||||
@ -1514,7 +1515,7 @@ def _expand_tokens(tokens: list[str]) -> list[str]:
|
|||||||
if not isinstance(token, str):
|
if not isinstance(token, str):
|
||||||
continue
|
continue
|
||||||
for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()):
|
for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()):
|
||||||
if len(part) < 3:
|
if len(part) < TOKEN_MIN_LEN:
|
||||||
continue
|
continue
|
||||||
if part not in expanded:
|
if part not in expanded:
|
||||||
expanded.append(part)
|
expanded.append(part)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user