From 7194cad0a8fdb040ae5ca34a7c65c785f84f2f10 Mon Sep 17 00:00:00 2001
From: Brad Stein <Brad.Stein@gmail.com>
Date: Wed, 28 Jan 2026 03:46:06 -0300
Subject: [PATCH] atlasbot: refine fast fact selection and prompts

---
 services/comms/atlasbot-deployment.yaml |  2 +-
 services/comms/scripts/atlasbot/bot.py  | 56 +++++++++++++++++++++----
 2 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml
index 6fbd327..f007942 100644
--- a/services/comms/atlasbot-deployment.yaml
+++ b/services/comms/atlasbot-deployment.yaml
@@ -16,7 +16,7 @@ spec:
       labels:
         app: atlasbot
       annotations:
-        checksum/atlasbot-configmap: manual-atlasbot-92
+        checksum/atlasbot-configmap: manual-atlasbot-93
         vault.hashicorp.com/agent-inject: "true"
         vault.hashicorp.com/role: "comms"
         vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py
index e0f8417..5ce1984 100644
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@@ -253,11 +253,13 @@ def normalize_query(text: str) -> str:
     cleaned = (text or "").lower()
     for ch in _DASH_CHARS:
         cleaned = cleaned.replace(ch, "-")
+    cleaned = cleaned.replace("_", " ")
     cleaned = re.sub(r"\s+", " ", cleaned).strip()
     return cleaned
 
 def _tokens(text: str) -> list[str]:
-    toks = [t.lower() for t in TOKEN_RE.findall(text or "")]
+    cleaned = re.sub(r"[\\_/]", " ", text or "")
+    toks = [t.lower() for t in TOKEN_RE.findall(cleaned)]
     return [t for t in toks if t not in STOPWORDS and len(t) >= 2]
 
 
@@ -2730,6 +2732,18 @@ _ALLOWED_INSIGHT_TAGS = {
 
 _DYNAMIC_TAGS = {"availability", "database", "pods", "utilization", "workloads"}
 _INVENTORY_TAGS = {"hardware", "architecture", "inventory", "workers", "node_detail", "os"}
+_SUBJECTIVE_TAG_PRIORITY = (
+    "utilization",
+    "database",
+    "pods",
+    "workloads",
+    "availability",
+    "hardware",
+    "inventory",
+    "architecture",
+    "node_detail",
+    "os",
+)
 
 
 def _fact_line_tags(line: str) -> set[str]:
@@ -2922,7 +2936,8 @@ def _open_ended_system() -> str:
         "You may draw light inferences if you label them as such. "
         "Write concise, human sentences with a helpful, calm tone (not a list). "
         "Be willing to take a light stance; do not over-hedge. "
-        "If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. "
+        "If the question is subjective (cool/interesting/unconventional), pick a standout fact, explain why it stands out, "
+        "and use 2-3 sentences. "
         "If the question asks for a list, embed the list inline in a sentence (comma-separated). "
         "If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
         "Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
@@ -3773,6 +3788,8 @@ def _fast_fact_lines(
         return []
     primary_tags = primary_tags or set()
     scored: list[tuple[int, int, str]] = []
+    priority_map = {tag: idx for idx, tag in enumerate(_SUBJECTIVE_TAG_PRIORITY)}
+    use_priority = not primary_tags and focus_tags == _ALLOWED_INSIGHT_TAGS
     for idx, line in enumerate(fact_lines):
         fid = f"F{idx + 1}"
         tags = set(fact_meta.get(fid, {}).get("tags") or [])
@@ -3783,6 +3800,12 @@ def _fast_fact_lines(
             score += 4 * len(tags & primary_tags)
         if focus_tags:
             score += 2 * len(tags & focus_tags)
+        if use_priority and tags:
+            bonus = 0
+            for tag in tags:
+                if tag in priority_map:
+                    bonus = max(bonus, len(priority_map) - priority_map[tag])
+            score += bonus
         scored.append((score, idx, line))
     scored.sort(key=lambda item: (-item[0], item[1]))
     selected: list[str] = []
@@ -3845,13 +3868,27 @@ def _fallback_fact_answer(prompt: str, context: str) -> str:
     best_fact = ""
     best_score = -1
     for fact in facts:
-        score = len(tokens & set(_tokens(fact)))
+        key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", fact)
+        if not key_match:
+            key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", fact)
+        key_tokens: set[str] = set()
+        if key_match:
+            key_tokens = set(_tokens(key_match.group(1)))
+        score = len(tokens & set(_tokens(fact))) + 2 * len(tokens & key_tokens)
         if score > best_score:
             best_score = score
             best_fact = fact
     if best_score <= 0:
         return ""
-    sentence = f"Based on the snapshot, {best_fact}"
+    key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", best_fact)
+    if not key_match:
+        key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", best_fact)
+    if key_match:
+        key = key_match.group(1).strip().replace("_", " ")
+        val = key_match.group(2).strip()
+        sentence = f"{key.capitalize()} is {val}"
+    else:
+        sentence = f"Based on the snapshot, {best_fact}"
     if not sentence.endswith((".", "!", "?")):
         sentence += "."
     return sentence
@@ -3873,15 +3910,17 @@ def _open_ended_fast_single(
     prompt: str,
     *,
     context: str,
+    history_lines: list[str] | None = None,
     state: ThoughtState | None = None,
     model: str,
 ) -> str:
     if state:
         state.update("drafting", step=1, note="summarizing")
+    working_context = _append_history_context(context, history_lines or []) if history_lines else context
     reply = _ollama_call(
         ("atlasbot_fast", "atlasbot_fast"),
         prompt,
-        context=context,
+        context=working_context,
         use_history=False,
         system_override=_open_ended_system(),
         model=model,
@@ -3890,7 +3929,7 @@ def _open_ended_fast_single(
         reply = _ollama_call(
             ("atlasbot_fast", "atlasbot_fast"),
             prompt + " Provide one clear sentence before the score lines.",
-            context=context,
+            context=working_context,
             use_history=False,
             system_override=_open_ended_system(),
             model=model,
@@ -3933,6 +3972,7 @@ def _open_ended_fast(
     return _open_ended_fast_single(
         prompt,
         context=selected_pack,
+        history_lines=history_lines,
         state=state,
         model=model,
     )
@@ -4089,7 +4129,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
         cleaned_q = normalize_query(cleaned)
         cluster_affinity = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads)
         subjective = _is_subjective_query(cleaned)
-        followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS)
+        followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS)
         contextual = history_cluster and (followup or followup_affinity)
         cluster_query = cluster_affinity or contextual
         context = ""
@@ -4633,7 +4673,7 @@ def sync_loop(token: str, room_id: str):
                 cleaned_q = normalize_query(cleaned_body)
                 cluster_affinity = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads)
                 subjective = _is_subjective_query(cleaned_body)
-                followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS)
+                followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS)
                 contextual = history_cluster and (followup or followup_affinity)
                 cluster_query = cluster_affinity or contextual
                 context = ""