From 7194cad0a8fdb040ae5ca34a7c65c785f84f2f10 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 28 Jan 2026 03:46:06 -0300 Subject: [PATCH] atlasbot: refine fast fact selection and prompts --- services/comms/atlasbot-deployment.yaml | 2 +- services/comms/scripts/atlasbot/bot.py | 56 +++++++++++++++++++++---- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml index 6fbd327..f007942 100644 --- a/services/comms/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -16,7 +16,7 @@ spec: labels: app: atlasbot annotations: - checksum/atlasbot-configmap: manual-atlasbot-92 + checksum/atlasbot-configmap: manual-atlasbot-93 vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/role: "comms" vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index e0f8417..5ce1984 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -253,11 +253,13 @@ def normalize_query(text: str) -> str: cleaned = (text or "").lower() for ch in _DASH_CHARS: cleaned = cleaned.replace(ch, "-") + cleaned = cleaned.replace("_", " ") cleaned = re.sub(r"\s+", " ", cleaned).strip() return cleaned def _tokens(text: str) -> list[str]: - toks = [t.lower() for t in TOKEN_RE.findall(text or "")] + cleaned = re.sub(r"[\\_/]", " ", text or "") + toks = [t.lower() for t in TOKEN_RE.findall(cleaned)] return [t for t in toks if t not in STOPWORDS and len(t) >= 2] @@ -2730,6 +2732,18 @@ _ALLOWED_INSIGHT_TAGS = { _DYNAMIC_TAGS = {"availability", "database", "pods", "utilization", "workloads"} _INVENTORY_TAGS = {"hardware", "architecture", "inventory", "workers", "node_detail", "os"} +_SUBJECTIVE_TAG_PRIORITY = ( + "utilization", + "database", + "pods", + "workloads", + "availability", + "hardware", + "inventory", + "architecture", + "node_detail", + "os", +) def _fact_line_tags(line: str) -> set[str]: @@ -2922,7 +2936,8 @@ def _open_ended_system() -> str: "You may draw light inferences if you label them as such. " "Write concise, human sentences with a helpful, calm tone (not a list). " "Be willing to take a light stance; do not over-hedge. " - "If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. " + "If the question is subjective (cool/interesting/unconventional), pick a standout fact, explain why it stands out, " + "and use 2-3 sentences. " "If the question asks for a list, embed the list inline in a sentence (comma-separated). " "If the question is ambiguous, pick a reasonable interpretation and state it briefly. " "Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. " @@ -3773,6 +3788,8 @@ def _fast_fact_lines( return [] primary_tags = primary_tags or set() scored: list[tuple[int, int, str]] = [] + priority_map = {tag: idx for idx, tag in enumerate(_SUBJECTIVE_TAG_PRIORITY)} + use_priority = not primary_tags and focus_tags == _ALLOWED_INSIGHT_TAGS for idx, line in enumerate(fact_lines): fid = f"F{idx + 1}" tags = set(fact_meta.get(fid, {}).get("tags") or []) @@ -3783,6 +3800,12 @@ def _fast_fact_lines( score += 4 * len(tags & primary_tags) if focus_tags: score += 2 * len(tags & focus_tags) + if use_priority and tags: + bonus = 0 + for tag in tags: + if tag in priority_map: + bonus = max(bonus, len(priority_map) - priority_map[tag]) + score += bonus scored.append((score, idx, line)) scored.sort(key=lambda item: (-item[0], item[1])) selected: list[str] = [] @@ -3845,13 +3868,27 @@ def _fallback_fact_answer(prompt: str, context: str) -> str: best_fact = "" best_score = -1 for fact in facts: - score = len(tokens & set(_tokens(fact))) + key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", fact) + if not key_match: + key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", fact) + key_tokens: set[str] = set() + if key_match: + key_tokens = set(_tokens(key_match.group(1))) + score = len(tokens & set(_tokens(fact))) + 2 * len(tokens & key_tokens) if score > best_score: best_score = score best_fact = fact if best_score <= 0: return "" - sentence = f"Based on the snapshot, {best_fact}" + key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", best_fact) + if not key_match: + key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", best_fact) + if key_match: + key = key_match.group(1).strip().replace("_", " ") + val = key_match.group(2).strip() + sentence = f"{key.capitalize()} is {val}" + else: + sentence = f"Based on the snapshot, {best_fact}" if not sentence.endswith((".", "!", "?")): sentence += "." return sentence @@ -3873,15 +3910,17 @@ def _open_ended_fast_single( prompt: str, *, context: str, + history_lines: list[str] | None = None, state: ThoughtState | None = None, model: str, ) -> str: if state: state.update("drafting", step=1, note="summarizing") + working_context = _append_history_context(context, history_lines or []) if history_lines else context reply = _ollama_call( ("atlasbot_fast", "atlasbot_fast"), prompt, - context=context, + context=working_context, use_history=False, system_override=_open_ended_system(), model=model, @@ -3890,7 +3929,7 @@ def _open_ended_fast_single( reply = _ollama_call( ("atlasbot_fast", "atlasbot_fast"), prompt + " Provide one clear sentence before the score lines.", - context=context, + context=working_context, use_history=False, system_override=_open_ended_system(), model=model, @@ -3933,6 +3972,7 @@ def _open_ended_fast( return _open_ended_fast_single( prompt, context=selected_pack, + history_lines=history_lines, state=state, model=model, ) @@ -4089,7 +4129,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler): cleaned_q = normalize_query(cleaned) cluster_affinity = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads) subjective = _is_subjective_query(cleaned) - followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS) + followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS) contextual = history_cluster and (followup or followup_affinity) cluster_query = cluster_affinity or contextual context = "" @@ -4633,7 +4673,7 @@ def sync_loop(token: str, room_id: str): cleaned_q = normalize_query(cleaned_body) cluster_affinity = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads) subjective = _is_subjective_query(cleaned_body) - followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS) + followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS) contextual = history_cluster and (followup or followup_affinity) cluster_query = cluster_affinity or contextual context = ""