From 44c22e3d004ab81dd3b798c88a0422f0650ee39e Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 28 Jan 2026 00:22:32 -0300 Subject: [PATCH] atlasbot: improve multi-pass synthesis --- services/comms/scripts/atlasbot/bot.py | 307 +++++++++++++++++++------ 1 file changed, 239 insertions(+), 68 deletions(-) diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index 9108478..df718e6 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -2559,8 +2559,13 @@ def _fact_pack_lines( return lines -def _fact_pack_text(lines: list[str]) -> str: - labeled = [f"F{idx + 1}: {line}" for idx, line in enumerate(lines)] +def _fact_pack_text(lines: list[str], fact_meta: dict[str, dict[str, Any]]) -> str: + labeled: list[str] = [] + for idx, line in enumerate(lines): + fid = f"F{idx + 1}" + tags = fact_meta.get(fid, {}).get("tags") or [] + tag_text = f" [tags: {', '.join(tags)}]" if tags else "" + labeled.append(f"{fid}{tag_text}: {line}") return "Fact pack:\n" + "\n".join(labeled) @@ -2782,7 +2787,8 @@ def _open_ended_system() -> str: "Use ONLY the provided fact pack and recent chat as your evidence. " "You may draw light inferences if you label them as such. " "Write concise, human sentences with a helpful, calm tone (not a list). " - "If the question is subjective, share a light opinion grounded in facts and explain why it stands out. " + "If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. " + "If the question asks for a list, embed the list inline in a sentence (comma-separated). " "If the question is ambiguous, pick a reasonable interpretation and state it briefly. " "Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. " "Do not invent numbers or facts. " @@ -2938,6 +2944,67 @@ def _open_ended_plan( return cleaned +def _sanitize_focus_tags(raw_tags: list[Any]) -> list[str]: + tags: list[str] = [] + for tag in raw_tags: + if not isinstance(tag, str): + continue + tag = tag.strip() + if tag in _ALLOWED_INSIGHT_TAGS and tag not in tags: + tags.append(tag) + return tags + + +def _open_ended_interpret( + prompt: str, + *, + fact_pack: str, + history_lines: list[str], + state: ThoughtState | None, + model: str | None, +) -> dict[str, Any]: + if state: + state.update("interpreting", step=1, note="reading question") + allowed_tags = ", ".join(sorted(_ALLOWED_INSIGHT_TAGS)) + prompt_text = ( + "Classify how to answer the question using only the fact pack. " + "Return JSON: {\"style\":\"objective|subjective\"," + "\"tone\":\"neutral|curious|enthusiastic\"," + "\"focus_tags\":[\"tag\"]," + "\"focus_label\":\"short phrase\"," + "\"allow_list\":true|false}. " + "Use allow_list=true only if the question explicitly asks for names or lists. " + f"Only use tags from: {allowed_tags}." + ) + context = _append_history_context(fact_pack, history_lines) + result = _ollama_json_call( + prompt_text + f" Question: {prompt}", + context=context, + model=model, + ) + if not isinstance(result, dict): + result = {} + style = str(result.get("style") or "").strip().lower() + if style not in ("objective", "subjective"): + style = "subjective" if _is_subjective_query(prompt) else "objective" + tone = str(result.get("tone") or "neutral").strip().lower() + if tone not in ("neutral", "curious", "enthusiastic"): + tone = "neutral" + focus_tags = _sanitize_focus_tags(result.get("focus_tags") or []) + focus_label = str(result.get("focus_label") or "").strip() + allow_list = result.get("allow_list") + if not isinstance(allow_list, bool): + q = normalize_query(prompt) + allow_list = any(phrase in q for phrase in ("list", "which", "what are", "names")) + return { + "style": style, + "tone": tone, + "focus_tags": focus_tags, + "focus_label": focus_label, + "allow_list": allow_list, + } + + def _preferred_tags_for_prompt(prompt: str) -> set[str]: q = normalize_query(prompt) tags: set[str] = set() @@ -3013,6 +3080,71 @@ def _open_ended_insights( return cleaned +def _fallback_fact_ids( + fact_meta: dict[str, dict[str, Any]], + *, + focus_tags: set[str], + count: int, +) -> list[str]: + if not fact_meta: + return [] + if focus_tags: + tagged = [ + fid + for fid, meta in fact_meta.items() + if focus_tags & set(meta.get("tags") or []) + ] + if tagged: + return tagged[:count] + return list(fact_meta.keys())[:count] + + +def _open_ended_select_facts( + prompt: str, + *, + fact_pack: str, + fact_meta: dict[str, dict[str, Any]], + history_lines: list[str], + focus_tags: set[str], + avoid_fact_ids: list[str], + count: int, + subjective: bool, + state: ThoughtState | None, + step: int, + model: str | None, +) -> list[str]: + if state: + state.update("selecting facts", step=step, note="picking evidence") + focus_hint = ", ".join(sorted(focus_tags)) if focus_tags else "any" + avoid_hint = ", ".join(avoid_fact_ids) if avoid_fact_ids else "none" + prompt_text = ( + "Select the fact IDs that best answer the question. " + f"Pick up to {count} fact IDs. " + f"Focus tags: {focus_hint}. " + f"Avoid these fact IDs: {avoid_hint}. " + "If the question is subjective, pick standout or unusual facts; " + "if objective, pick the minimal facts needed. " + "Return JSON: {\"fact_ids\":[\"F1\"...],\"note\":\"...\"}." + ) + context = _append_history_context(fact_pack, history_lines) + result = _ollama_json_call( + prompt_text + f" Question: {prompt}", + context=context, + model=model, + ) + fact_ids = result.get("fact_ids") if isinstance(result, dict) else None + selected: list[str] = [] + if isinstance(fact_ids, list): + for fid in fact_ids: + if isinstance(fid, str) and fid in fact_meta and fid not in selected: + selected.append(fid) + if len(selected) >= count: + break + if not selected: + selected = _fallback_fact_ids(fact_meta, focus_tags=focus_tags, count=count) + return selected + + def _normalize_score(value: Any, *, default: int = 60) -> int: if isinstance(value, (int, float)): return int(max(0, min(100, value))) @@ -3043,6 +3175,9 @@ def _open_ended_candidate( focus: str, fact_pack: str, history_lines: list[str], + subjective: bool, + tone: str, + allow_list: bool, state: ThoughtState | None, step: int, fact_hints: list[str] | None = None, @@ -3053,10 +3188,23 @@ def _open_ended_candidate( hint_text = "" if fact_hints: hint_text = " Prioritize these fact IDs if relevant: " + ", ".join(fact_hints) + "." + style_hint = ( + "Offer a brief opinion grounded in facts and explain why it stands out. " + if subjective + else "Answer directly and succinctly. " + ) + list_hint = ( + "If a list is requested, embed it inline in a sentence (comma-separated). " + if allow_list + else "Avoid bullet lists. " + ) prompt_text = ( "Using ONLY the fact pack, answer the question focusing on this angle: " f"{focus}. " - "Write 2-4 sentences in plain prose (not a list)." + f"Tone: {tone}. " + + style_hint + + list_hint + + "Write 2-4 sentences in plain prose." + hint_text + " " "If you infer, label it as inference. " @@ -3125,6 +3273,9 @@ def _open_ended_synthesize( fact_pack: str, history_lines: list[str], candidates: list[dict[str, Any]], + subjective: bool, + tone: str, + allow_list: bool, state: ThoughtState | None, step: int, model: str | None, @@ -3133,6 +3284,16 @@ def _open_ended_synthesize( if state: state.update("synthesizing", step=step, note="composing answer") critique_block = f"\nCritique guidance: {critique}\n" if critique else "\n" + style_hint = ( + "If the question is subjective, share a light opinion grounded in facts and explain why it stands out. " + if subjective + else "Answer directly without extra caveats. " + ) + list_hint = ( + "If a list is requested, embed it inline in a sentence (comma-separated). " + if allow_list + else "Avoid bullet lists. " + ) synth_prompt = ( "Compose the final answer to the question using the candidate answers below. " "Select the best 1-2 candidates, blend them if helpful, and keep 2-4 sentences. " @@ -3140,7 +3301,10 @@ def _open_ended_synthesize( "If you infer, label it as inference. " "Do not claim nodes are missing or not ready unless the fact pack explicitly lists " "nodes_not_ready or expected_workers_missing. " - "Keep the tone conversational and answer the user's intent directly. " + f"Tone: {tone}. " + + style_hint + + list_hint + + "Keep the tone conversational and answer the user's intent directly. " "Avoid repeating the last response if possible. " "End with lines: Confidence, Relevance (0-100), Satisfaction (0-100), " "HallucinationRisk (low|medium|high).\n" @@ -3202,85 +3366,90 @@ def _open_ended_multi( ) -> str: model = _model_for_mode(mode) if mode == "fast": - angle_count = 1 - insight_count = 1 - total_steps = 2 + total_steps = 4 else: - angle_count = 4 - insight_count = 4 - total_steps = 2 + angle_count + 2 + 1 + total_steps = 7 if state: state.total_steps = total_steps - angles: list[dict[str, Any]] = [] - insights: list[dict[str, Any]] = [] - if mode != "fast": - angles = _open_ended_plan( - prompt, - fact_pack=fact_pack, - history_lines=history_lines, - count=angle_count, - state=state, - model=model, - ) - insights = _open_ended_insights( + interpretation = _open_ended_interpret( + prompt, + fact_pack=fact_pack, + history_lines=history_lines, + state=state, + model=model, + ) + style = interpretation.get("style") or "objective" + subjective = style == "subjective" or _is_subjective_query(prompt) + tone = str(interpretation.get("tone") or "").strip().lower() + if tone not in ("neutral", "curious", "enthusiastic"): + tone = "curious" if subjective else "neutral" + allow_list = bool(interpretation.get("allow_list")) + focus_tags = set(interpretation.get("focus_tags") or []) or _preferred_tags_for_prompt(prompt) + if not focus_tags and subjective: + focus_tags = set(_ALLOWED_INSIGHT_TAGS) + + primary_ids = _open_ended_select_facts( + prompt, + fact_pack=fact_pack, + fact_meta=fact_meta, + history_lines=history_lines, + focus_tags=focus_tags, + avoid_fact_ids=[], + count=4 if mode == "deep" else 3, + subjective=subjective, + state=state, + step=2, + model=model, + ) + alternate_ids: list[str] = [] + if mode == "deep": + alternate_ids = _open_ended_select_facts( prompt, fact_pack=fact_pack, fact_meta=fact_meta, history_lines=history_lines, - count=insight_count, + focus_tags=focus_tags, + avoid_fact_ids=primary_ids, + count=4, + subjective=subjective, state=state, + step=3, model=model, ) - seeds = _seed_insights(fact_lines, fact_meta, limit=max(4, insight_count)) - insight_candidates = insights + seeds - subjective = _is_subjective_query(prompt) - prefer_tags = _preferred_tags_for_prompt(prompt) - history_tags = _history_tags(history_lines) - avoid_tags = history_tags if subjective else set() - preference = "novelty" if subjective else "relevance" - selected_insights = _select_diverse_insights( - insight_candidates, - preference=preference, - prefer_tags=prefer_tags, - avoid_tags=avoid_tags, - history_tags=history_tags, - fact_meta=fact_meta, - count=1 if mode == "fast" else 2, - ) - if state and selected_insights: - state.update("analyzing", note=_candidate_note(selected_insights[0])) - - angle_inputs: list[dict[str, Any]] = [] - for insight in selected_insights: - angle_inputs.append( - { - "focus": str(insight.get("summary") or "Direct answer"), - "fact_ids": insight.get("fact_ids") or [], - } - ) - for angle in angles: - if len(angle_inputs) >= angle_count: - break - angle_inputs.append( - { - "focus": str(angle.get("focus") or "Direct answer"), - "fact_ids": [], - } - ) candidates: list[dict[str, Any]] = [] - step = 1 if mode == "fast" else 3 - for angle in angle_inputs[:angle_count]: + focus_label = interpretation.get("focus_label") or "primary angle" + step = 3 if mode == "fast" else 4 + candidates.append( + _open_ended_candidate( + prompt, + focus=str(focus_label), + fact_pack=fact_pack, + history_lines=history_lines, + subjective=subjective, + tone=str(tone), + allow_list=allow_list, + state=state, + step=step, + fact_hints=primary_ids, + model=model, + ) + ) + step += 1 + if mode == "deep" and alternate_ids: candidates.append( _open_ended_candidate( prompt, - focus=str(angle.get("focus") or "Direct answer"), + focus="alternate angle", fact_pack=fact_pack, history_lines=history_lines, + subjective=subjective, + tone=str(tone), + allow_list=allow_list, state=state, step=step, - fact_hints=angle.get("fact_ids") if isinstance(angle.get("fact_ids"), list) else None, + fact_hints=alternate_ids, model=model, ) ) @@ -3306,6 +3475,9 @@ def _open_ended_multi( fact_pack=fact_pack, history_lines=history_lines, candidates=selected or candidates, + subjective=subjective, + tone=str(tone), + allow_list=allow_list, state=state, step=step, model=model, @@ -3318,9 +3490,8 @@ def _open_ended_multi( def _open_ended_total_steps(mode: str) -> int: if mode == "fast": - return 2 - angle_count = 4 - return 2 + angle_count + 2 + 1 + return 4 + return 7 def _open_ended_fast( @@ -3386,8 +3557,8 @@ def open_ended_answer( lines.extend(tool_lines) if not lines: return _ensure_scores("I don't have enough data to answer that.") - fact_pack = _fact_pack_text(lines) fact_meta = _fact_pack_meta(lines) + fact_pack = _fact_pack_text(lines, fact_meta) if mode == "fast": return _open_ended_fast( prompt,