atlasbot: expand plural token variants
This commit is contained in:
parent
ed038ebb8d
commit
83e6a0fac2
@ -1511,8 +1511,9 @@ def _metric_ctx_values(ctx: dict[str, Any]) -> tuple[list[str], str, list[str],
|
|||||||
sub_questions = ctx.get("sub_questions") if isinstance(ctx, dict) else []
|
sub_questions = ctx.get("sub_questions") if isinstance(ctx, dict) else []
|
||||||
keywords = ctx.get("keywords") if isinstance(ctx, dict) else []
|
keywords = ctx.get("keywords") if isinstance(ctx, dict) else []
|
||||||
keyword_tokens = ctx.get("keyword_tokens") if isinstance(ctx, dict) else []
|
keyword_tokens = ctx.get("keyword_tokens") if isinstance(ctx, dict) else []
|
||||||
token_set = set([str(token) for token in keyword_tokens if token])
|
token_set = {str(token).lower() for token in keyword_tokens if token}
|
||||||
token_set |= set(_extract_keywords(str(question), str(question), sub_questions=sub_questions, keywords=keywords))
|
token_set |= {token.lower() for token in _extract_keywords(str(question), str(question), sub_questions=sub_questions, keywords=keywords)}
|
||||||
|
token_set = _token_variants(token_set)
|
||||||
return summary_lines, str(question), sub_questions, keywords, token_set
|
return summary_lines, str(question), sub_questions, keywords, token_set
|
||||||
|
|
||||||
|
|
||||||
@ -1529,6 +1530,22 @@ def _extract_metric_keys(lines: list[str]) -> list[str]:
|
|||||||
return keys
|
return keys
|
||||||
|
|
||||||
|
|
||||||
|
def _token_variants(tokens: set[str]) -> set[str]:
|
||||||
|
if not tokens:
|
||||||
|
return set()
|
||||||
|
variants = set(tokens)
|
||||||
|
for token in list(tokens):
|
||||||
|
if len(token) <= TOKEN_MIN_LEN:
|
||||||
|
continue
|
||||||
|
if token.endswith("ies") and len(token) > TOKEN_MIN_LEN:
|
||||||
|
variants.add(token[:-3] + "y")
|
||||||
|
if token.endswith("es") and len(token) > TOKEN_MIN_LEN:
|
||||||
|
variants.add(token[:-2])
|
||||||
|
if token.endswith("s") and len(token) > TOKEN_MIN_LEN:
|
||||||
|
variants.add(token[:-1])
|
||||||
|
return variants
|
||||||
|
|
||||||
|
|
||||||
def _parse_key_list(raw: str, allowed: list[str], max_keys: int) -> list[str]:
|
def _parse_key_list(raw: str, allowed: list[str], max_keys: int) -> list[str]:
|
||||||
parsed = _parse_json_block(raw, fallback={})
|
parsed = _parse_json_block(raw, fallback={})
|
||||||
if isinstance(parsed, list):
|
if isinstance(parsed, list):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user