From 44c22e3d004ab81dd3b798c88a0422f0650ee39e Mon Sep 17 00:00:00 2001
From: Brad Stein <Brad.Stein@gmail.com>
Date: Wed, 28 Jan 2026 00:22:32 -0300
Subject: [PATCH] atlasbot: improve multi-pass synthesis

---
 services/comms/scripts/atlasbot/bot.py | 307 +++++++++++++++++++------
 1 file changed, 239 insertions(+), 68 deletions(-)

diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py
index 9108478..df718e6 100644
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@@ -2559,8 +2559,13 @@ def _fact_pack_lines(
     return lines
 
 
-def _fact_pack_text(lines: list[str]) -> str:
-    labeled = [f"F{idx + 1}: {line}" for idx, line in enumerate(lines)]
+def _fact_pack_text(lines: list[str], fact_meta: dict[str, dict[str, Any]]) -> str:
+    labeled: list[str] = []
+    for idx, line in enumerate(lines):
+        fid = f"F{idx + 1}"
+        tags = fact_meta.get(fid, {}).get("tags") or []
+        tag_text = f" [tags: {', '.join(tags)}]" if tags else ""
+        labeled.append(f"{fid}{tag_text}: {line}")
     return "Fact pack:\n" + "\n".join(labeled)
 
 
@@ -2782,7 +2787,8 @@ def _open_ended_system() -> str:
         "Use ONLY the provided fact pack and recent chat as your evidence. "
         "You may draw light inferences if you label them as such. "
         "Write concise, human sentences with a helpful, calm tone (not a list). "
-        "If the question is subjective, share a light opinion grounded in facts and explain why it stands out. "
+        "If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. "
+        "If the question asks for a list, embed the list inline in a sentence (comma-separated). "
         "If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
         "Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
         "Do not invent numbers or facts. "
@@ -2938,6 +2944,67 @@ def _open_ended_plan(
     return cleaned
 
 
+def _sanitize_focus_tags(raw_tags: list[Any]) -> list[str]:
+    tags: list[str] = []
+    for tag in raw_tags:
+        if not isinstance(tag, str):
+            continue
+        tag = tag.strip()
+        if tag in _ALLOWED_INSIGHT_TAGS and tag not in tags:
+            tags.append(tag)
+    return tags
+
+
+def _open_ended_interpret(
+    prompt: str,
+    *,
+    fact_pack: str,
+    history_lines: list[str],
+    state: ThoughtState | None,
+    model: str | None,
+) -> dict[str, Any]:
+    if state:
+        state.update("interpreting", step=1, note="reading question")
+    allowed_tags = ", ".join(sorted(_ALLOWED_INSIGHT_TAGS))
+    prompt_text = (
+        "Classify how to answer the question using only the fact pack. "
+        "Return JSON: {\"style\":\"objective|subjective\","
+        "\"tone\":\"neutral|curious|enthusiastic\","
+        "\"focus_tags\":[\"tag\"],"
+        "\"focus_label\":\"short phrase\","
+        "\"allow_list\":true|false}. "
+        "Use allow_list=true only if the question explicitly asks for names or lists. "
+        f"Only use tags from: {allowed_tags}."
+    )
+    context = _append_history_context(fact_pack, history_lines)
+    result = _ollama_json_call(
+        prompt_text + f" Question: {prompt}",
+        context=context,
+        model=model,
+    )
+    if not isinstance(result, dict):
+        result = {}
+    style = str(result.get("style") or "").strip().lower()
+    if style not in ("objective", "subjective"):
+        style = "subjective" if _is_subjective_query(prompt) else "objective"
+    tone = str(result.get("tone") or "neutral").strip().lower()
+    if tone not in ("neutral", "curious", "enthusiastic"):
+        tone = "neutral"
+    focus_tags = _sanitize_focus_tags(result.get("focus_tags") or [])
+    focus_label = str(result.get("focus_label") or "").strip()
+    allow_list = result.get("allow_list")
+    if not isinstance(allow_list, bool):
+        q = normalize_query(prompt)
+        allow_list = any(phrase in q for phrase in ("list", "which", "what are", "names"))
+    return {
+        "style": style,
+        "tone": tone,
+        "focus_tags": focus_tags,
+        "focus_label": focus_label,
+        "allow_list": allow_list,
+    }
+
+
 def _preferred_tags_for_prompt(prompt: str) -> set[str]:
     q = normalize_query(prompt)
     tags: set[str] = set()
@@ -3013,6 +3080,71 @@ def _open_ended_insights(
     return cleaned
 
 
+def _fallback_fact_ids(
+    fact_meta: dict[str, dict[str, Any]],
+    *,
+    focus_tags: set[str],
+    count: int,
+) -> list[str]:
+    if not fact_meta:
+        return []
+    if focus_tags:
+        tagged = [
+            fid
+            for fid, meta in fact_meta.items()
+            if focus_tags & set(meta.get("tags") or [])
+        ]
+        if tagged:
+            return tagged[:count]
+    return list(fact_meta.keys())[:count]
+
+
+def _open_ended_select_facts(
+    prompt: str,
+    *,
+    fact_pack: str,
+    fact_meta: dict[str, dict[str, Any]],
+    history_lines: list[str],
+    focus_tags: set[str],
+    avoid_fact_ids: list[str],
+    count: int,
+    subjective: bool,
+    state: ThoughtState | None,
+    step: int,
+    model: str | None,
+) -> list[str]:
+    if state:
+        state.update("selecting facts", step=step, note="picking evidence")
+    focus_hint = ", ".join(sorted(focus_tags)) if focus_tags else "any"
+    avoid_hint = ", ".join(avoid_fact_ids) if avoid_fact_ids else "none"
+    prompt_text = (
+        "Select the fact IDs that best answer the question. "
+        f"Pick up to {count} fact IDs. "
+        f"Focus tags: {focus_hint}. "
+        f"Avoid these fact IDs: {avoid_hint}. "
+        "If the question is subjective, pick standout or unusual facts; "
+        "if objective, pick the minimal facts needed. "
+        "Return JSON: {\"fact_ids\":[\"F1\"...],\"note\":\"...\"}."
+    )
+    context = _append_history_context(fact_pack, history_lines)
+    result = _ollama_json_call(
+        prompt_text + f" Question: {prompt}",
+        context=context,
+        model=model,
+    )
+    fact_ids = result.get("fact_ids") if isinstance(result, dict) else None
+    selected: list[str] = []
+    if isinstance(fact_ids, list):
+        for fid in fact_ids:
+            if isinstance(fid, str) and fid in fact_meta and fid not in selected:
+                selected.append(fid)
+            if len(selected) >= count:
+                break
+    if not selected:
+        selected = _fallback_fact_ids(fact_meta, focus_tags=focus_tags, count=count)
+    return selected
+
+
 def _normalize_score(value: Any, *, default: int = 60) -> int:
     if isinstance(value, (int, float)):
         return int(max(0, min(100, value)))
@@ -3043,6 +3175,9 @@ def _open_ended_candidate(
     focus: str,
     fact_pack: str,
     history_lines: list[str],
+    subjective: bool,
+    tone: str,
+    allow_list: bool,
     state: ThoughtState | None,
     step: int,
     fact_hints: list[str] | None = None,
@@ -3053,10 +3188,23 @@ def _open_ended_candidate(
     hint_text = ""
     if fact_hints:
         hint_text = " Prioritize these fact IDs if relevant: " + ", ".join(fact_hints) + "."
+    style_hint = (
+        "Offer a brief opinion grounded in facts and explain why it stands out. "
+        if subjective
+        else "Answer directly and succinctly. "
+    )
+    list_hint = (
+        "If a list is requested, embed it inline in a sentence (comma-separated). "
+        if allow_list
+        else "Avoid bullet lists. "
+    )
     prompt_text = (
         "Using ONLY the fact pack, answer the question focusing on this angle: "
         f"{focus}. "
-        "Write 2-4 sentences in plain prose (not a list)."
+        f"Tone: {tone}. "
+        + style_hint
+        + list_hint
+        + "Write 2-4 sentences in plain prose."
         + hint_text
         + " "
         "If you infer, label it as inference. "
@@ -3125,6 +3273,9 @@ def _open_ended_synthesize(
     fact_pack: str,
     history_lines: list[str],
     candidates: list[dict[str, Any]],
+    subjective: bool,
+    tone: str,
+    allow_list: bool,
     state: ThoughtState | None,
     step: int,
     model: str | None,
@@ -3133,6 +3284,16 @@ def _open_ended_synthesize(
     if state:
         state.update("synthesizing", step=step, note="composing answer")
     critique_block = f"\nCritique guidance: {critique}\n" if critique else "\n"
+    style_hint = (
+        "If the question is subjective, share a light opinion grounded in facts and explain why it stands out. "
+        if subjective
+        else "Answer directly without extra caveats. "
+    )
+    list_hint = (
+        "If a list is requested, embed it inline in a sentence (comma-separated). "
+        if allow_list
+        else "Avoid bullet lists. "
+    )
     synth_prompt = (
         "Compose the final answer to the question using the candidate answers below. "
         "Select the best 1-2 candidates, blend them if helpful, and keep 2-4 sentences. "
@@ -3140,7 +3301,10 @@ def _open_ended_synthesize(
         "If you infer, label it as inference. "
         "Do not claim nodes are missing or not ready unless the fact pack explicitly lists "
         "nodes_not_ready or expected_workers_missing. "
-        "Keep the tone conversational and answer the user's intent directly. "
+        f"Tone: {tone}. "
+        + style_hint
+        + list_hint
+        + "Keep the tone conversational and answer the user's intent directly. "
         "Avoid repeating the last response if possible. "
         "End with lines: Confidence, Relevance (0-100), Satisfaction (0-100), "
         "HallucinationRisk (low|medium|high).\n"
@@ -3202,85 +3366,90 @@ def _open_ended_multi(
 ) -> str:
     model = _model_for_mode(mode)
     if mode == "fast":
-        angle_count = 1
-        insight_count = 1
-        total_steps = 2
+        total_steps = 4
     else:
-        angle_count = 4
-        insight_count = 4
-        total_steps = 2 + angle_count + 2 + 1
+        total_steps = 7
     if state:
         state.total_steps = total_steps
 
-    angles: list[dict[str, Any]] = []
-    insights: list[dict[str, Any]] = []
-    if mode != "fast":
-        angles = _open_ended_plan(
-            prompt,
-            fact_pack=fact_pack,
-            history_lines=history_lines,
-            count=angle_count,
-            state=state,
-            model=model,
-        )
-        insights = _open_ended_insights(
+    interpretation = _open_ended_interpret(
+        prompt,
+        fact_pack=fact_pack,
+        history_lines=history_lines,
+        state=state,
+        model=model,
+    )
+    style = interpretation.get("style") or "objective"
+    subjective = style == "subjective" or _is_subjective_query(prompt)
+    tone = str(interpretation.get("tone") or "").strip().lower()
+    if tone not in ("neutral", "curious", "enthusiastic"):
+        tone = "curious" if subjective else "neutral"
+    allow_list = bool(interpretation.get("allow_list"))
+    focus_tags = set(interpretation.get("focus_tags") or []) or _preferred_tags_for_prompt(prompt)
+    if not focus_tags and subjective:
+        focus_tags = set(_ALLOWED_INSIGHT_TAGS)
+
+    primary_ids = _open_ended_select_facts(
+        prompt,
+        fact_pack=fact_pack,
+        fact_meta=fact_meta,
+        history_lines=history_lines,
+        focus_tags=focus_tags,
+        avoid_fact_ids=[],
+        count=4 if mode == "deep" else 3,
+        subjective=subjective,
+        state=state,
+        step=2,
+        model=model,
+    )
+    alternate_ids: list[str] = []
+    if mode == "deep":
+        alternate_ids = _open_ended_select_facts(
             prompt,
             fact_pack=fact_pack,
             fact_meta=fact_meta,
             history_lines=history_lines,
-            count=insight_count,
+            focus_tags=focus_tags,
+            avoid_fact_ids=primary_ids,
+            count=4,
+            subjective=subjective,
             state=state,
+            step=3,
             model=model,
         )
-    seeds = _seed_insights(fact_lines, fact_meta, limit=max(4, insight_count))
-    insight_candidates = insights + seeds
-    subjective = _is_subjective_query(prompt)
-    prefer_tags = _preferred_tags_for_prompt(prompt)
-    history_tags = _history_tags(history_lines)
-    avoid_tags = history_tags if subjective else set()
-    preference = "novelty" if subjective else "relevance"
-    selected_insights = _select_diverse_insights(
-        insight_candidates,
-        preference=preference,
-        prefer_tags=prefer_tags,
-        avoid_tags=avoid_tags,
-        history_tags=history_tags,
-        fact_meta=fact_meta,
-        count=1 if mode == "fast" else 2,
-    )
-    if state and selected_insights:
-        state.update("analyzing", note=_candidate_note(selected_insights[0]))
-
-    angle_inputs: list[dict[str, Any]] = []
-    for insight in selected_insights:
-        angle_inputs.append(
-            {
-                "focus": str(insight.get("summary") or "Direct answer"),
-                "fact_ids": insight.get("fact_ids") or [],
-            }
-        )
-    for angle in angles:
-        if len(angle_inputs) >= angle_count:
-            break
-        angle_inputs.append(
-            {
-                "focus": str(angle.get("focus") or "Direct answer"),
-                "fact_ids": [],
-            }
-        )
 
     candidates: list[dict[str, Any]] = []
-    step = 1 if mode == "fast" else 3
-    for angle in angle_inputs[:angle_count]:
+    focus_label = interpretation.get("focus_label") or "primary angle"
+    step = 3 if mode == "fast" else 4
+    candidates.append(
+        _open_ended_candidate(
+            prompt,
+            focus=str(focus_label),
+            fact_pack=fact_pack,
+            history_lines=history_lines,
+            subjective=subjective,
+            tone=str(tone),
+            allow_list=allow_list,
+            state=state,
+            step=step,
+            fact_hints=primary_ids,
+            model=model,
+        )
+    )
+    step += 1
+    if mode == "deep" and alternate_ids:
         candidates.append(
             _open_ended_candidate(
                 prompt,
-                focus=str(angle.get("focus") or "Direct answer"),
+                focus="alternate angle",
                 fact_pack=fact_pack,
                 history_lines=history_lines,
+                subjective=subjective,
+                tone=str(tone),
+                allow_list=allow_list,
                 state=state,
                 step=step,
-                fact_hints=angle.get("fact_ids") if isinstance(angle.get("fact_ids"), list) else None,
+                fact_hints=alternate_ids,
                 model=model,
             )
         )
@@ -3306,6 +3475,9 @@ def _open_ended_multi(
         fact_pack=fact_pack,
         history_lines=history_lines,
         candidates=selected or candidates,
+        subjective=subjective,
+        tone=str(tone),
+        allow_list=allow_list,
         state=state,
         step=step,
         model=model,
@@ -3318,9 +3490,8 @@ def _open_ended_multi(
 
 def _open_ended_total_steps(mode: str) -> int:
     if mode == "fast":
-        return 2
-    angle_count = 4
-    return 2 + angle_count + 2 + 1
+        return 4
+    return 7
 
 
 def _open_ended_fast(
@@ -3386,8 +3557,8 @@ def open_ended_answer(
         lines.extend(tool_lines)
     if not lines:
         return _ensure_scores("I don't have enough data to answer that.")
-    fact_pack = _fact_pack_text(lines)
     fact_meta = _fact_pack_meta(lines)
+    fact_pack = _fact_pack_text(lines, fact_meta)
     if mode == "fast":
         return _open_ended_fast(
             prompt,