diff --git a/atlasbot/engine/answerer.py b/atlasbot/engine/answerer.py index 18386ea..187caa6 100644 --- a/atlasbot/engine/answerer.py +++ b/atlasbot/engine/answerer.py @@ -1511,8 +1511,9 @@ def _metric_ctx_values(ctx: dict[str, Any]) -> tuple[list[str], str, list[str], sub_questions = ctx.get("sub_questions") if isinstance(ctx, dict) else [] keywords = ctx.get("keywords") if isinstance(ctx, dict) else [] keyword_tokens = ctx.get("keyword_tokens") if isinstance(ctx, dict) else [] - token_set = set([str(token) for token in keyword_tokens if token]) - token_set |= set(_extract_keywords(str(question), str(question), sub_questions=sub_questions, keywords=keywords)) + token_set = {str(token).lower() for token in keyword_tokens if token} + token_set |= {token.lower() for token in _extract_keywords(str(question), str(question), sub_questions=sub_questions, keywords=keywords)} + token_set = _token_variants(token_set) return summary_lines, str(question), sub_questions, keywords, token_set @@ -1529,6 +1530,22 @@ def _extract_metric_keys(lines: list[str]) -> list[str]: return keys +def _token_variants(tokens: set[str]) -> set[str]: + if not tokens: + return set() + variants = set(tokens) + for token in list(tokens): + if len(token) <= TOKEN_MIN_LEN: + continue + if token.endswith("ies") and len(token) > TOKEN_MIN_LEN: + variants.add(token[:-3] + "y") + if token.endswith("es") and len(token) > TOKEN_MIN_LEN: + variants.add(token[:-2]) + if token.endswith("s") and len(token) > TOKEN_MIN_LEN: + variants.add(token[:-1]) + return variants + + def _parse_key_list(raw: str, allowed: list[str], max_keys: int) -> list[str]: parsed = _parse_json_block(raw, fallback={}) if isinstance(parsed, list):