From 46fdebbbfbd3ef16591bd03148a969c35b161642 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 3 Feb 2026 14:14:05 -0300 Subject: [PATCH] retriever: split keyword hit selection --- atlasbot/engine/answerer.py | 42 ++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/atlasbot/engine/answerer.py b/atlasbot/engine/answerer.py index 9eaf7e4..6010d4c 100644 --- a/atlasbot/engine/answerer.py +++ b/atlasbot/engine/answerer.py @@ -1082,6 +1082,26 @@ async def _score_chunk_group( return scored +def _keyword_hits( + ranked: list[dict[str, Any]], + head: dict[str, Any], + keywords: list[str] | None, +) -> list[dict[str, Any]]: + if not keywords: + return [] + lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()] + if not lowered: + return [] + hits: list[dict[str, Any]] = [] + for item in ranked: + if item is head: + continue + text = str(item.get("text") or "").lower() + if any(kw in text for kw in lowered): + hits.append(item) + return hits + + def _select_chunks( chunks: list[dict[str, Any]], scores: dict[str, float], @@ -1095,29 +1115,17 @@ def _select_chunks( head = chunks[0] selected.append(head) - keyword_hits: list[dict[str, Any]] = [] - if keywords: - lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()] - for item in ranked: - if item is head: - continue - text = str(item.get("text") or "").lower() - if any(kw in text for kw in lowered): - keyword_hits.append(item) - - for item in keyword_hits: + for item in _keyword_hits(ranked, head, keywords): if len(selected) >= plan.chunk_top: return selected - if item in selected: - continue - selected.append(item) + if item not in selected: + selected.append(item) for item in ranked: if len(selected) >= plan.chunk_top: break - if item in selected: - continue - selected.append(item) + if item not in selected: + selected.append(item) return selected