From 666dcb3faa09243eb010c8aac081f4a1ea007d6a Mon Sep 17 00:00:00 2001
From: Brad Stein <Brad.Stein@gmail.com>
Date: Wed, 28 Jan 2026 02:21:42 -0300
Subject: [PATCH] atlasbot: rework reasoning pipeline

---
 services/comms/atlasbot-deployment.yaml |   2 +-
 services/comms/scripts/atlasbot/bot.py  | 286 +++++++++++++++++-------
 2 files changed, 210 insertions(+), 78 deletions(-)

diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml
index a7fbea9..c9602c3 100644
--- a/services/comms/atlasbot-deployment.yaml
+++ b/services/comms/atlasbot-deployment.yaml
@@ -16,7 +16,7 @@ spec:
       labels:
         app: atlasbot
       annotations:
-        checksum/atlasbot-configmap: manual-atlasbot-89
+        checksum/atlasbot-configmap: manual-atlasbot-90
         vault.hashicorp.com/agent-inject: "true"
         vault.hashicorp.com/role: "comms"
         vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py
index c44c7da..ffc8a5c 100644
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@@ -16,7 +16,7 @@ PASSWORD = os.environ["BOT_PASS"]
 ROOM_ALIAS = "#othrys:live.bstein.dev"
 
 OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/")
-MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0")
+MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5:14b-instruct")
 MODEL_FAST = os.environ.get("ATLASBOT_MODEL_FAST", "")
 MODEL_DEEP = os.environ.get("ATLASBOT_MODEL_DEEP", "")
 FALLBACK_MODEL = os.environ.get("OLLAMA_FALLBACK_MODEL", "")
@@ -2895,6 +2895,7 @@ def _open_ended_system() -> str:
         "Use ONLY the provided fact pack and recent chat as your evidence. "
         "You may draw light inferences if you label them as such. "
         "Write concise, human sentences with a helpful, calm tone (not a list). "
+        "Be willing to take a light stance; do not over-hedge. "
         "If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. "
         "If the question asks for a list, embed the list inline in a sentence (comma-separated). "
         "If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
@@ -3002,20 +3003,27 @@ def _open_ended_plan(
     *,
     fact_pack: str,
     history_lines: list[str],
+    focus_tags: set[str],
+    avoid_tags: set[str],
     count: int,
     state: ThoughtState | None,
+    step: int,
     model: str | None,
 ) -> list[dict[str, Any]]:
     if state:
-        state.update("planning", step=1, note="mapping angles")
+        state.update("planning", step=step, note="mapping angles")
     count = max(1, count)
+    focus_hint = ", ".join(sorted(focus_tags)) if focus_tags else "any"
+    avoid_hint = ", ".join(sorted(avoid_tags)) if avoid_tags else "none"
     prompt_text = (
         "Analyze the question and propose up to "
         f"{count} distinct answer angles that can be supported by the fact pack. "
         "Keep them diverse (e.g., metrics, hardware, workload placement, recent changes). "
         "If the question is subjective, propose at least one angle that surfaces a standout detail. "
+        f"Prefer angles that align with these tags: {focus_hint}. "
+        f"Avoid angles that overlap these tags if possible: {avoid_hint}. "
         "Avoid repeating the same angle as the most recent response if possible. "
-        "Return JSON: {\"angles\":[{\"focus\":\"...\",\"reason\":\"...\",\"priority\":1-5}]}."
+        "Return JSON: {\"angles\":[{\"focus\":\"...\",\"reason\":\"...\",\"tags\":[\"tag\"],\"priority\":1-5}]}."
     )
     context = _append_history_context(fact_pack, history_lines)
     result = _ollama_json_call(
@@ -3037,10 +3045,12 @@ def _open_ended_plan(
             priority = item.get("priority")
             if not isinstance(priority, (int, float)):
                 priority = 3
+            tags = _sanitize_focus_tags(item.get("tags") or [])
             cleaned.append(
                 {
                     "focus": focus,
                     "reason": str(item.get("reason") or ""),
+                    "tags": tags,
                     "priority": int(max(1, min(5, priority))),
                 }
             )
@@ -3131,6 +3141,35 @@ def _preferred_tags_for_prompt(prompt: str) -> set[str]:
     return tags & _ALLOWED_INSIGHT_TAGS
 
 
+_TAG_KEYWORDS: dict[str, tuple[str, ...]] = {
+    "utilization": ("cpu", "ram", "memory", "net", "network", "io", "disk", "usage", "utilization", "hottest", "busy"),
+    "database": ("postgres", "db", "database", "connections"),
+    "pods": ("pod", "pods", "deployment", "daemonset", "job", "cron", "workload"),
+    "hardware": ("hardware", "architecture", "arch", "rpi", "raspberry", "jetson", "amd64", "arm64", "node", "nodes"),
+    "availability": ("ready", "not ready", "unready", "down", "missing"),
+    "workloads": ("workload", "service", "namespace", "app"),
+    "os": ("os", "kernel", "kubelet", "containerd", "runtime"),
+}
+
+
+def _tags_from_text(text: str) -> set[str]:
+    q = normalize_query(text)
+    if not q:
+        return set()
+    tags: set[str] = set()
+    for tag, keywords in _TAG_KEYWORDS.items():
+        if any(word in q for word in keywords):
+            tags.add(tag)
+    return tags & _ALLOWED_INSIGHT_TAGS
+
+
+def _history_focus_tags(history_lines: list[str]) -> set[str]:
+    if not history_lines:
+        return set()
+    recent = " ".join(line for line in history_lines[-6:] if isinstance(line, str))
+    return _tags_from_text(recent)
+
+
 def _open_ended_insights(
     prompt: str,
     *,
@@ -3139,10 +3178,11 @@ def _open_ended_insights(
     history_lines: list[str],
     count: int,
     state: ThoughtState | None,
+    step: int,
     model: str | None,
 ) -> list[dict[str, Any]]:
     if state:
-        state.update("analyzing", note="scouting insights")
+        state.update("analyzing", step=step, note="scouting insights")
     count = max(1, count)
     allowed_tags = ", ".join(sorted(_ALLOWED_INSIGHT_TAGS))
     prompt_text = (
@@ -3188,10 +3228,35 @@ def _open_ended_insights(
     return cleaned
 
 
+def _rank_insights(
+    insights: list[dict[str, Any]],
+    *,
+    focus_tags: set[str],
+    avoid_tags: set[str],
+    count: int,
+) -> list[dict[str, Any]]:
+    if not insights:
+        return []
+    ranked: list[tuple[float, dict[str, Any]]] = []
+    for insight in insights:
+        relevance = _normalize_fraction(insight.get("relevance"), default=0.6)
+        novelty = _normalize_fraction(insight.get("novelty"), default=0.5)
+        tags = set(insight.get("tags") or [])
+        score = relevance * 0.65 + novelty * 0.35
+        if focus_tags and tags & focus_tags:
+            score += 0.1
+        if avoid_tags and tags & avoid_tags:
+            score -= 0.2
+        ranked.append((score, insight))
+    ranked.sort(key=lambda item: item[0], reverse=True)
+    return [item for _, item in ranked[:count]]
+
+
 def _fallback_fact_ids(
     fact_meta: dict[str, dict[str, Any]],
     *,
     focus_tags: set[str],
+    avoid_tags: set[str],
     count: int,
 ) -> list[str]:
     if not fact_meta:
@@ -3202,9 +3267,16 @@ def _fallback_fact_ids(
             for fid, meta in fact_meta.items()
             if focus_tags & set(meta.get("tags") or [])
         ]
+        if avoid_tags:
+            tagged = [fid for fid in tagged if not (avoid_tags & set(fact_meta.get(fid, {}).get("tags") or []))]
         if tagged:
             return tagged[:count]
-    return list(fact_meta.keys())[:count]
+    all_ids = list(fact_meta.keys())
+    if avoid_tags:
+        filtered = [fid for fid in all_ids if not (avoid_tags & set(fact_meta.get(fid, {}).get("tags") or []))]
+        if filtered:
+            return filtered[:count]
+    return all_ids[:count]
 
 
 def _open_ended_select_facts(
@@ -3214,6 +3286,7 @@ def _open_ended_select_facts(
     fact_meta: dict[str, dict[str, Any]],
     history_lines: list[str],
     focus_tags: set[str],
+    avoid_tags: set[str],
     avoid_fact_ids: list[str],
     count: int,
     subjective: bool,
@@ -3224,11 +3297,13 @@ def _open_ended_select_facts(
     if state:
         state.update("selecting facts", step=step, note="picking evidence")
     focus_hint = ", ".join(sorted(focus_tags)) if focus_tags else "any"
+    avoid_tag_hint = ", ".join(sorted(avoid_tags)) if avoid_tags else "none"
     avoid_hint = ", ".join(avoid_fact_ids) if avoid_fact_ids else "none"
     prompt_text = (
         "Select the fact IDs that best answer the question. "
         f"Pick up to {count} fact IDs. "
         f"Focus tags: {focus_hint}. "
+        f"Avoid these tags if possible: {avoid_tag_hint}. "
         f"Avoid these fact IDs: {avoid_hint}. "
         "If the question is subjective, pick standout or unusual facts; "
         "if objective, pick the minimal facts needed. "
@@ -3248,7 +3323,18 @@ def _open_ended_select_facts(
                 selected.append(fid)
             if len(selected) >= count:
                 break
-    seed = _fallback_fact_ids(fact_meta, focus_tags=focus_tags, count=count)
+    if avoid_tags:
+        selected = [
+            fid
+            for fid in selected
+            if not (avoid_tags & set(fact_meta.get(fid, {}).get("tags") or []))
+        ] or selected
+    seed = _fallback_fact_ids(
+        fact_meta,
+        focus_tags=focus_tags,
+        avoid_tags=avoid_tags,
+        count=count,
+    )
     if selected:
         for fid in seed:
             if fid not in selected:
@@ -3483,7 +3569,7 @@ def _open_ended_multi(
     if mode == "fast":
         total_steps = 4
     else:
-        total_steps = 7
+        total_steps = 9
     if state:
         state.total_steps = total_steps
 
@@ -3503,41 +3589,25 @@ def _open_ended_multi(
     focus_tags = set(interpretation.get("focus_tags") or []) or _preferred_tags_for_prompt(prompt)
     if not focus_tags and subjective:
         focus_tags = set(_ALLOWED_INSIGHT_TAGS)
+    avoid_tags = _history_focus_tags(history_lines) if (subjective or _is_followup_query(prompt)) else set()
 
-    primary_ids = _open_ended_select_facts(
-        prompt,
-        fact_pack=fact_pack,
-        fact_meta=fact_meta,
-        history_lines=history_lines,
-        focus_tags=focus_tags,
-        avoid_fact_ids=[],
-        count=4 if mode == "deep" else 3,
-        subjective=subjective,
-        state=state,
-        step=2,
-        model=model,
-    )
-    alternate_ids: list[str] = []
-    if mode == "deep":
-        alternate_ids = _open_ended_select_facts(
+    if mode == "fast":
+        primary_ids = _open_ended_select_facts(
             prompt,
             fact_pack=fact_pack,
             fact_meta=fact_meta,
             history_lines=history_lines,
             focus_tags=focus_tags,
-            avoid_fact_ids=primary_ids,
-            count=4,
+            avoid_tags=avoid_tags,
+            avoid_fact_ids=[],
+            count=3,
             subjective=subjective,
             state=state,
-            step=3,
+            step=2,
             model=model,
         )
-
-    candidates: list[dict[str, Any]] = []
-    focus_label = interpretation.get("focus_label") or "primary angle"
-    step = 3 if mode == "fast" else 4
-    candidates.append(
-        _open_ended_candidate(
+        focus_label = interpretation.get("focus_label") or "primary angle"
+        candidate = _open_ended_candidate(
             prompt,
             focus=str(focus_label),
             fact_pack=fact_pack,
@@ -3546,17 +3616,65 @@ def _open_ended_multi(
             tone=str(tone),
             allow_list=allow_list,
             state=state,
-            step=step,
+            step=3,
             fact_hints=primary_ids,
             model=model,
         )
+        reply = _open_ended_synthesize(
+            prompt,
+            fact_pack=fact_pack,
+            history_lines=history_lines,
+            candidates=[candidate],
+            subjective=subjective,
+            tone=str(tone),
+            allow_list=allow_list,
+            state=state,
+            step=4,
+            model=model,
+            critique=None,
+        )
+        if state:
+            state.update("done", step=total_steps)
+        return reply
+
+    angles = _open_ended_plan(
+        prompt,
+        fact_pack=fact_pack,
+        history_lines=history_lines,
+        focus_tags=focus_tags,
+        avoid_tags=avoid_tags,
+        count=5,
+        state=state,
+        step=2,
+        model=model,
     )
-    step += 1
-    if mode == "deep" and alternate_ids:
+    if state and avoid_tags:
+        state.update("planning", step=2, note=f"avoiding {', '.join(sorted(avoid_tags))}")
+
+    insights = _open_ended_insights(
+        prompt,
+        fact_pack=fact_pack,
+        fact_meta=fact_meta,
+        history_lines=history_lines,
+        count=7,
+        state=state,
+        step=3,
+        model=model,
+    )
+    ranked_insights = _rank_insights(
+        insights,
+        focus_tags=focus_tags,
+        avoid_tags=avoid_tags,
+        count=3,
+    )
+
+    candidates: list[dict[str, Any]] = []
+    step = 4
+    for insight in ranked_insights:
         candidates.append(
             _open_ended_candidate(
                 prompt,
-                focus="alternate angle",
+                focus=insight.get("summary") or "insight",
                 fact_pack=fact_pack,
                 history_lines=history_lines,
                 subjective=subjective,
@@ -3564,27 +3682,61 @@ def _open_ended_multi(
                 allow_list=allow_list,
                 state=state,
                 step=step,
-                fact_hints=alternate_ids,
+                fact_hints=insight.get("fact_ids") or [],
                 model=model,
             )
         )
         step += 1
+    if not candidates and angles:
+        for angle in angles[:2]:
+            angle_tags = set(angle.get("tags") or []) or _tags_from_text(angle.get("focus") or "")
+            fact_ids = _open_ended_select_facts(
+                prompt,
+                fact_pack=fact_pack,
+                fact_meta=fact_meta,
+                history_lines=history_lines,
+                focus_tags=angle_tags or focus_tags,
+                avoid_tags=avoid_tags,
+                avoid_fact_ids=[],
+                count=4,
+                subjective=subjective,
+                state=state,
+                step=step,
+                model=model,
+            )
+            candidates.append(
+                _open_ended_candidate(
+                    prompt,
+                    focus=angle.get("focus") or "alternate angle",
+                    fact_pack=fact_pack,
+                    history_lines=history_lines,
+                    subjective=subjective,
+                    tone=str(tone),
+                    allow_list=allow_list,
+                    state=state,
+                    step=step,
+                    fact_hints=fact_ids,
+                    model=model,
+                )
+            )
+            step += 1
+            if len(candidates) >= 2:
+                break
+
     if state:
         state.update("evaluating", step=step, note="ranking candidates")
-    selected = _select_candidates(candidates, count=1 if mode == "fast" else 2)
+    selected = _select_candidates(candidates, count=2)
+    step += 1
+    critique = _open_ended_critique(
+        prompt,
+        fact_pack=fact_pack,
+        history_lines=history_lines,
+        candidates=selected or candidates,
+        state=state,
+        step=step,
+        model=model,
+    )
     step += 1
-    critique = ""
-    if mode == "deep":
-        critique = _open_ended_critique(
-            prompt,
-            fact_pack=fact_pack,
-            history_lines=history_lines,
-            candidates=selected or candidates,
-            state=state,
-            step=step,
-            model=model,
-        )
-        step += 1
     reply = _open_ended_synthesize(
         prompt,
         fact_pack=fact_pack,
@@ -3605,8 +3757,8 @@ def _open_ended_multi(
 
 def _open_ended_total_steps(mode: str) -> int:
     if mode == "fast":
-        return 3
-    return 7
+        return 4
+    return 9
 
 
 def _fast_fact_lines(
@@ -3656,34 +3808,14 @@ def _open_ended_fast(
     history_lines: list[str],
     state: ThoughtState | None = None,
 ) -> str:
-    model = _model_for_mode("fast")
-    if state:
-        state.update("selecting", step=2, note="picking key facts")
-    subjective = _is_subjective_query(prompt)
-    focus_tags = _preferred_tags_for_prompt(prompt)
-    if not focus_tags and subjective:
-        focus_tags = set(_ALLOWED_INSIGHT_TAGS)
-    primary_ids = _open_ended_select_facts(
+    return _open_ended_multi(
         prompt,
         fact_pack=fact_pack,
+        fact_lines=fact_lines,
         fact_meta=fact_meta,
         history_lines=history_lines,
-        focus_tags=focus_tags,
-        avoid_fact_ids=[],
-        count=3,
-        subjective=subjective,
+        mode="fast",
         state=state,
-        step=2,
-        model=model,
-    )
-    selected_lines = _fast_fact_lines(fact_lines, fact_meta, primary_ids)
-    fact_pack = _fact_pack_text(selected_lines, fact_meta)
-    return _open_ended_fast_single(
-        prompt,
-        fact_pack=fact_pack,
-        history_lines=history_lines,
-        state=state,
-        model=model,
     )
 
 
@@ -3846,7 +3978,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
         if cluster_query:
             context = build_context(
                 cleaned,
-                allow_tools=False,
+                allow_tools=True,
                 targets=[],
                 inventory=inventory,
                 snapshot=snapshot,
@@ -3860,7 +3992,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
                 workloads=workloads,
                 history_lines=history_lines,
                 mode=mode,
-                allow_tools=False,
+                allow_tools=True,
                 state=None,
             )
         else: