atlasbot: refine fast fact selection and prompts

This commit is contained in:
Brad Stein 2026-01-28 03:46:06 -03:00
parent eb567fda06
commit 7194cad0a8
2 changed files with 49 additions and 9 deletions

View File

@ -16,7 +16,7 @@ spec:
labels: labels:
app: atlasbot app: atlasbot
annotations: annotations:
checksum/atlasbot-configmap: manual-atlasbot-92 checksum/atlasbot-configmap: manual-atlasbot-93
vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "comms" vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"

View File

@ -253,11 +253,13 @@ def normalize_query(text: str) -> str:
cleaned = (text or "").lower() cleaned = (text or "").lower()
for ch in _DASH_CHARS: for ch in _DASH_CHARS:
cleaned = cleaned.replace(ch, "-") cleaned = cleaned.replace(ch, "-")
cleaned = cleaned.replace("_", " ")
cleaned = re.sub(r"\s+", " ", cleaned).strip() cleaned = re.sub(r"\s+", " ", cleaned).strip()
return cleaned return cleaned
def _tokens(text: str) -> list[str]: def _tokens(text: str) -> list[str]:
toks = [t.lower() for t in TOKEN_RE.findall(text or "")] cleaned = re.sub(r"[\\_/]", " ", text or "")
toks = [t.lower() for t in TOKEN_RE.findall(cleaned)]
return [t for t in toks if t not in STOPWORDS and len(t) >= 2] return [t for t in toks if t not in STOPWORDS and len(t) >= 2]
@ -2730,6 +2732,18 @@ _ALLOWED_INSIGHT_TAGS = {
_DYNAMIC_TAGS = {"availability", "database", "pods", "utilization", "workloads"} _DYNAMIC_TAGS = {"availability", "database", "pods", "utilization", "workloads"}
_INVENTORY_TAGS = {"hardware", "architecture", "inventory", "workers", "node_detail", "os"} _INVENTORY_TAGS = {"hardware", "architecture", "inventory", "workers", "node_detail", "os"}
_SUBJECTIVE_TAG_PRIORITY = (
"utilization",
"database",
"pods",
"workloads",
"availability",
"hardware",
"inventory",
"architecture",
"node_detail",
"os",
)
def _fact_line_tags(line: str) -> set[str]: def _fact_line_tags(line: str) -> set[str]:
@ -2922,7 +2936,8 @@ def _open_ended_system() -> str:
"You may draw light inferences if you label them as such. " "You may draw light inferences if you label them as such. "
"Write concise, human sentences with a helpful, calm tone (not a list). " "Write concise, human sentences with a helpful, calm tone (not a list). "
"Be willing to take a light stance; do not over-hedge. " "Be willing to take a light stance; do not over-hedge. "
"If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. " "If the question is subjective (cool/interesting/unconventional), pick a standout fact, explain why it stands out, "
"and use 2-3 sentences. "
"If the question asks for a list, embed the list inline in a sentence (comma-separated). " "If the question asks for a list, embed the list inline in a sentence (comma-separated). "
"If the question is ambiguous, pick a reasonable interpretation and state it briefly. " "If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
"Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. " "Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
@ -3773,6 +3788,8 @@ def _fast_fact_lines(
return [] return []
primary_tags = primary_tags or set() primary_tags = primary_tags or set()
scored: list[tuple[int, int, str]] = [] scored: list[tuple[int, int, str]] = []
priority_map = {tag: idx for idx, tag in enumerate(_SUBJECTIVE_TAG_PRIORITY)}
use_priority = not primary_tags and focus_tags == _ALLOWED_INSIGHT_TAGS
for idx, line in enumerate(fact_lines): for idx, line in enumerate(fact_lines):
fid = f"F{idx + 1}" fid = f"F{idx + 1}"
tags = set(fact_meta.get(fid, {}).get("tags") or []) tags = set(fact_meta.get(fid, {}).get("tags") or [])
@ -3783,6 +3800,12 @@ def _fast_fact_lines(
score += 4 * len(tags & primary_tags) score += 4 * len(tags & primary_tags)
if focus_tags: if focus_tags:
score += 2 * len(tags & focus_tags) score += 2 * len(tags & focus_tags)
if use_priority and tags:
bonus = 0
for tag in tags:
if tag in priority_map:
bonus = max(bonus, len(priority_map) - priority_map[tag])
score += bonus
scored.append((score, idx, line)) scored.append((score, idx, line))
scored.sort(key=lambda item: (-item[0], item[1])) scored.sort(key=lambda item: (-item[0], item[1]))
selected: list[str] = [] selected: list[str] = []
@ -3845,13 +3868,27 @@ def _fallback_fact_answer(prompt: str, context: str) -> str:
best_fact = "" best_fact = ""
best_score = -1 best_score = -1
for fact in facts: for fact in facts:
score = len(tokens & set(_tokens(fact))) key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", fact)
if not key_match:
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", fact)
key_tokens: set[str] = set()
if key_match:
key_tokens = set(_tokens(key_match.group(1)))
score = len(tokens & set(_tokens(fact))) + 2 * len(tokens & key_tokens)
if score > best_score: if score > best_score:
best_score = score best_score = score
best_fact = fact best_fact = fact
if best_score <= 0: if best_score <= 0:
return "" return ""
sentence = f"Based on the snapshot, {best_fact}" key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", best_fact)
if not key_match:
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", best_fact)
if key_match:
key = key_match.group(1).strip().replace("_", " ")
val = key_match.group(2).strip()
sentence = f"{key.capitalize()} is {val}"
else:
sentence = f"Based on the snapshot, {best_fact}"
if not sentence.endswith((".", "!", "?")): if not sentence.endswith((".", "!", "?")):
sentence += "." sentence += "."
return sentence return sentence
@ -3873,15 +3910,17 @@ def _open_ended_fast_single(
prompt: str, prompt: str,
*, *,
context: str, context: str,
history_lines: list[str] | None = None,
state: ThoughtState | None = None, state: ThoughtState | None = None,
model: str, model: str,
) -> str: ) -> str:
if state: if state:
state.update("drafting", step=1, note="summarizing") state.update("drafting", step=1, note="summarizing")
working_context = _append_history_context(context, history_lines or []) if history_lines else context
reply = _ollama_call( reply = _ollama_call(
("atlasbot_fast", "atlasbot_fast"), ("atlasbot_fast", "atlasbot_fast"),
prompt, prompt,
context=context, context=working_context,
use_history=False, use_history=False,
system_override=_open_ended_system(), system_override=_open_ended_system(),
model=model, model=model,
@ -3890,7 +3929,7 @@ def _open_ended_fast_single(
reply = _ollama_call( reply = _ollama_call(
("atlasbot_fast", "atlasbot_fast"), ("atlasbot_fast", "atlasbot_fast"),
prompt + " Provide one clear sentence before the score lines.", prompt + " Provide one clear sentence before the score lines.",
context=context, context=working_context,
use_history=False, use_history=False,
system_override=_open_ended_system(), system_override=_open_ended_system(),
model=model, model=model,
@ -3933,6 +3972,7 @@ def _open_ended_fast(
return _open_ended_fast_single( return _open_ended_fast_single(
prompt, prompt,
context=selected_pack, context=selected_pack,
history_lines=history_lines,
state=state, state=state,
model=model, model=model,
) )
@ -4089,7 +4129,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
cleaned_q = normalize_query(cleaned) cleaned_q = normalize_query(cleaned)
cluster_affinity = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads) cluster_affinity = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads)
subjective = _is_subjective_query(cleaned) subjective = _is_subjective_query(cleaned)
followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS) followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS)
contextual = history_cluster and (followup or followup_affinity) contextual = history_cluster and (followup or followup_affinity)
cluster_query = cluster_affinity or contextual cluster_query = cluster_affinity or contextual
context = "" context = ""
@ -4633,7 +4673,7 @@ def sync_loop(token: str, room_id: str):
cleaned_q = normalize_query(cleaned_body) cleaned_q = normalize_query(cleaned_body)
cluster_affinity = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads) cluster_affinity = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads)
subjective = _is_subjective_query(cleaned_body) subjective = _is_subjective_query(cleaned_body)
followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS) followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS)
contextual = history_cluster and (followup or followup_affinity) contextual = history_cluster and (followup or followup_affinity)
cluster_query = cluster_affinity or contextual cluster_query = cluster_affinity or contextual
context = "" context = ""