atlasbot: expand plural token variants
This commit is contained in:
parent
ed038ebb8d
commit
83e6a0fac2
@ -1511,8 +1511,9 @@ def _metric_ctx_values(ctx: dict[str, Any]) -> tuple[list[str], str, list[str],
|
||||
sub_questions = ctx.get("sub_questions") if isinstance(ctx, dict) else []
|
||||
keywords = ctx.get("keywords") if isinstance(ctx, dict) else []
|
||||
keyword_tokens = ctx.get("keyword_tokens") if isinstance(ctx, dict) else []
|
||||
token_set = set([str(token) for token in keyword_tokens if token])
|
||||
token_set |= set(_extract_keywords(str(question), str(question), sub_questions=sub_questions, keywords=keywords))
|
||||
token_set = {str(token).lower() for token in keyword_tokens if token}
|
||||
token_set |= {token.lower() for token in _extract_keywords(str(question), str(question), sub_questions=sub_questions, keywords=keywords)}
|
||||
token_set = _token_variants(token_set)
|
||||
return summary_lines, str(question), sub_questions, keywords, token_set
|
||||
|
||||
|
||||
@ -1529,6 +1530,22 @@ def _extract_metric_keys(lines: list[str]) -> list[str]:
|
||||
return keys
|
||||
|
||||
|
||||
def _token_variants(tokens: set[str]) -> set[str]:
|
||||
if not tokens:
|
||||
return set()
|
||||
variants = set(tokens)
|
||||
for token in list(tokens):
|
||||
if len(token) <= TOKEN_MIN_LEN:
|
||||
continue
|
||||
if token.endswith("ies") and len(token) > TOKEN_MIN_LEN:
|
||||
variants.add(token[:-3] + "y")
|
||||
if token.endswith("es") and len(token) > TOKEN_MIN_LEN:
|
||||
variants.add(token[:-2])
|
||||
if token.endswith("s") and len(token) > TOKEN_MIN_LEN:
|
||||
variants.add(token[:-1])
|
||||
return variants
|
||||
|
||||
|
||||
def _parse_key_list(raw: str, allowed: list[str], max_keys: int) -> list[str]:
|
||||
parsed = _parse_json_block(raw, fallback={})
|
||||
if isinstance(parsed, list):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user