retriever: split keyword hit selection

This commit is contained in:
Brad Stein 2026-02-03 14:14:05 -03:00
parent 7e9b01915a
commit 46fdebbbfb

View File

@ -1082,6 +1082,26 @@ async def _score_chunk_group(
return scored
def _keyword_hits(
ranked: list[dict[str, Any]],
head: dict[str, Any],
keywords: list[str] | None,
) -> list[dict[str, Any]]:
if not keywords:
return []
lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
if not lowered:
return []
hits: list[dict[str, Any]] = []
for item in ranked:
if item is head:
continue
text = str(item.get("text") or "").lower()
if any(kw in text for kw in lowered):
hits.append(item)
return hits
def _select_chunks(
chunks: list[dict[str, Any]],
scores: dict[str, float],
@ -1095,29 +1115,17 @@ def _select_chunks(
head = chunks[0]
selected.append(head)
keyword_hits: list[dict[str, Any]] = []
if keywords:
lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
for item in ranked:
if item is head:
continue
text = str(item.get("text") or "").lower()
if any(kw in text for kw in lowered):
keyword_hits.append(item)
for item in keyword_hits:
for item in _keyword_hits(ranked, head, keywords):
if len(selected) >= plan.chunk_top:
return selected
if item in selected:
continue
selected.append(item)
if item not in selected:
selected.append(item)
for item in ranked:
if len(selected) >= plan.chunk_top:
break
if item in selected:
continue
selected.append(item)
if item not in selected:
selected.append(item)
return selected