atlasbot: refine fast fact selection and prompts

2026-01-28 03:46:06 -03:00 · 2026-01-28 03:46:06 -03:00 · 7194cad0a8
commit 7194cad0a8
parent eb567fda06
2 changed files with 49 additions and 9 deletions
--- a/services/comms/atlasbot-deployment.yaml
+++ b/services/comms/atlasbot-deployment.yaml
@ -16,7 +16,7 @@ spec:
      labels:
        app: atlasbot
      annotations:
-        checksum/atlasbot-configmap: manual-atlasbot-92
+        checksum/atlasbot-configmap: manual-atlasbot-93
        vault.hashicorp.com/agent-inject: "true"
        vault.hashicorp.com/role: "comms"
        vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@ -253,11 +253,13 @@ def normalize_query(text: str) -> str:
    cleaned = (text or "").lower()
    for ch in _DASH_CHARS:
        cleaned = cleaned.replace(ch, "-")
+    cleaned = cleaned.replace("_", " ")
    cleaned = re.sub(r"\s+", " ", cleaned).strip()
    return cleaned

 def _tokens(text: str) -> list[str]:
-    toks = [t.lower() for t in TOKEN_RE.findall(text or "")]
+    cleaned = re.sub(r"[\\_/]", " ", text or "")
+    toks = [t.lower() for t in TOKEN_RE.findall(cleaned)]
    return [t for t in toks if t not in STOPWORDS and len(t) >= 2]


@ -2730,6 +2732,18 @@ _ALLOWED_INSIGHT_TAGS = {

 _DYNAMIC_TAGS = {"availability", "database", "pods", "utilization", "workloads"}
 _INVENTORY_TAGS = {"hardware", "architecture", "inventory", "workers", "node_detail", "os"}
+_SUBJECTIVE_TAG_PRIORITY = (
+    "utilization",
+    "database",
+    "pods",
+    "workloads",
+    "availability",
+    "hardware",
+    "inventory",
+    "architecture",
+    "node_detail",
+    "os",
+)


 def _fact_line_tags(line: str) -> set[str]:
@ -2922,7 +2936,8 @@ def _open_ended_system() -> str:
        "You may draw light inferences if you label them as such. "
        "Write concise, human sentences with a helpful, calm tone (not a list). "
        "Be willing to take a light stance; do not over-hedge. "
-        "If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. "
+        "If the question is subjective (cool/interesting/unconventional), pick a standout fact, explain why it stands out, "
+        "and use 2-3 sentences. "
        "If the question asks for a list, embed the list inline in a sentence (comma-separated). "
        "If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
        "Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
@ -3773,6 +3788,8 @@ def _fast_fact_lines(
        return []
    primary_tags = primary_tags or set()
    scored: list[tuple[int, int, str]] = []
+    priority_map = {tag: idx for idx, tag in enumerate(_SUBJECTIVE_TAG_PRIORITY)}
+    use_priority = not primary_tags and focus_tags == _ALLOWED_INSIGHT_TAGS
    for idx, line in enumerate(fact_lines):
        fid = f"F{idx + 1}"
        tags = set(fact_meta.get(fid, {}).get("tags") or [])
@ -3783,6 +3800,12 @@ def _fast_fact_lines(
            score += 4 * len(tags & primary_tags)
        if focus_tags:
            score += 2 * len(tags & focus_tags)
+        if use_priority and tags:
+            bonus = 0
+            for tag in tags:
+                if tag in priority_map:
+                    bonus = max(bonus, len(priority_map) - priority_map[tag])
+            score += bonus
        scored.append((score, idx, line))
    scored.sort(key=lambda item: (-item[0], item[1]))
    selected: list[str] = []
@ -3845,12 +3868,26 @@ def _fallback_fact_answer(prompt: str, context: str) -> str:
    best_fact = ""
    best_score = -1
    for fact in facts:
-        score = len(tokens & set(_tokens(fact)))
+        key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", fact)
+        if not key_match:
+            key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", fact)
+        key_tokens: set[str] = set()
+        if key_match:
+            key_tokens = set(_tokens(key_match.group(1)))
+        score = len(tokens & set(_tokens(fact))) + 2 * len(tokens & key_tokens)
        if score > best_score:
            best_score = score
            best_fact = fact
    if best_score <= 0:
        return ""
+    key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", best_fact)
+    if not key_match:
+        key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", best_fact)
+    if key_match:
+        key = key_match.group(1).strip().replace("_", " ")
+        val = key_match.group(2).strip()
+        sentence = f"{key.capitalize()} is {val}"
+    else:
        sentence = f"Based on the snapshot, {best_fact}"
    if not sentence.endswith((".", "!", "?")):
        sentence += "."
@ -3873,15 +3910,17 @@ def _open_ended_fast_single(
    prompt: str,
    *,
    context: str,
+    history_lines: list[str] | None = None,
    state: ThoughtState | None = None,
    model: str,
 ) -> str:
    if state:
        state.update("drafting", step=1, note="summarizing")
+    working_context = _append_history_context(context, history_lines or []) if history_lines else context
    reply = _ollama_call(
        ("atlasbot_fast", "atlasbot_fast"),
        prompt,
-        context=context,
+        context=working_context,
        use_history=False,
        system_override=_open_ended_system(),
        model=model,
@ -3890,7 +3929,7 @@ def _open_ended_fast_single(
        reply = _ollama_call(
            ("atlasbot_fast", "atlasbot_fast"),
            prompt + " Provide one clear sentence before the score lines.",
-            context=context,
+            context=working_context,
            use_history=False,
            system_override=_open_ended_system(),
            model=model,
@ -3933,6 +3972,7 @@ def _open_ended_fast(
    return _open_ended_fast_single(
        prompt,
        context=selected_pack,
+        history_lines=history_lines,
        state=state,
        model=model,
    )
@ -4089,7 +4129,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
        cleaned_q = normalize_query(cleaned)
        cluster_affinity = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads)
        subjective = _is_subjective_query(cleaned)
-        followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS)
+        followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS)
        contextual = history_cluster and (followup or followup_affinity)
        cluster_query = cluster_affinity or contextual
        context = ""
@ -4633,7 +4673,7 @@ def sync_loop(token: str, room_id: str):
                cleaned_q = normalize_query(cleaned_body)
                cluster_affinity = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads)
                subjective = _is_subjective_query(cleaned_body)
-                followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS)
+                followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS)
                contextual = history_cluster and (followup or followup_affinity)
                cluster_query = cluster_affinity or contextual
                context = ""