atlasbot: improve evidence coverage

2026-02-03 10:42:01 -03:00 · 2026-02-03 10:42:01 -03:00 · a77e041d7c
commit a77e041d7c
parent eaa1fd96b5
2 changed files with 44 additions and 1 deletions
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
@ -377,12 +377,26 @@ class AnswerEngine:
                            )
                        if not metric_facts and fallback_candidates:
                            metric_facts = fallback_candidates[: max(2, plan.max_subquestions)]
+                    if metric_facts:
+                        metric_facts = _ensure_token_coverage(
+                            metric_facts,
+                            signal_tokens or keyword_tokens,
+                            summary_lines,
+                            max_add=plan.max_subquestions,
+                        )
                    if metric_facts and not _has_keyword_overlap(metric_facts, keyword_tokens):
                        best_line = _best_keyword_line(summary_lines, keyword_tokens)
                        if best_line:
                            metric_facts = _merge_fact_lines([best_line], metric_facts)
                        if metric_facts:
                            key_facts = _merge_fact_lines(metric_facts, key_facts)
+                if key_facts:
+                    key_facts = _ensure_token_coverage(
+                        key_facts,
+                        signal_tokens or keyword_tokens,
+                        summary_lines,
+                        max_add=plan.max_subquestions,
+                    )
                if self._settings.debug_pipeline:
                    scored_preview = sorted(
                        [{"id": c["id"], "score": scored.get(c["id"], 0.0), "summary": c["summary"]} for c in chunks],
@ -1485,6 +1499,32 @@ def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool:
    return False


+def _ensure_token_coverage(
+    lines: list[str],
+    tokens: list[str],
+    summary_lines: list[str],
+    max_add: int = 4,
+) -> list[str]:
+    if not lines or not tokens or not summary_lines:
+        return lines
+    hay = " ".join(lines).lower()
+    missing = [tok for tok in tokens if tok and tok.lower() not in hay]
+    if not missing:
+        return lines
+    added: list[str] = []
+    for token in missing:
+        token_lower = token.lower()
+        for line in summary_lines:
+            if token_lower in line.lower() and line not in lines and line not in added:
+                added.append(line)
+                break
+        if len(added) >= max_add:
+            break
+    if not added:
+        return lines
+    return _merge_fact_lines(added, lines)
+
+
 def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None:
    if not lines or not keywords:
        return None
--- a/atlasbot/llm/prompts.py
+++ b/atlasbot/llm/prompts.py
@ -134,8 +134,11 @@ EVIDENCE_GUARD_SYSTEM = (

 EVIDENCE_GUARD_PROMPT = (
    "Rewrite the draft to only include claims supported by FactsUsed. "
+    "If FactsUsed lists explicit values (for example hardware_nodes or node_arch), "
+    "use those exact values and do not invert or reinterpret them. "
    "If the draft mentions pressure/overload/headroom without evidence, remove it. "
-    "If the draft mentions nodes not in FactsUsed, remove those statements. "
+    "If the draft mentions nodes not present in FactsUsed, remove those statements. "
+    "If the draft contradicts FactsUsed, correct it to match FactsUsed. "
    "Return the corrected answer only."
 )