atlasbot: refine fast fact selection and prompts
This commit is contained in:
parent
eb567fda06
commit
7194cad0a8
@ -16,7 +16,7 @@ spec:
|
||||
labels:
|
||||
app: atlasbot
|
||||
annotations:
|
||||
checksum/atlasbot-configmap: manual-atlasbot-92
|
||||
checksum/atlasbot-configmap: manual-atlasbot-93
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/role: "comms"
|
||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||
|
||||
@ -253,11 +253,13 @@ def normalize_query(text: str) -> str:
|
||||
cleaned = (text or "").lower()
|
||||
for ch in _DASH_CHARS:
|
||||
cleaned = cleaned.replace(ch, "-")
|
||||
cleaned = cleaned.replace("_", " ")
|
||||
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
||||
return cleaned
|
||||
|
||||
def _tokens(text: str) -> list[str]:
|
||||
toks = [t.lower() for t in TOKEN_RE.findall(text or "")]
|
||||
cleaned = re.sub(r"[\\_/]", " ", text or "")
|
||||
toks = [t.lower() for t in TOKEN_RE.findall(cleaned)]
|
||||
return [t for t in toks if t not in STOPWORDS and len(t) >= 2]
|
||||
|
||||
|
||||
@ -2730,6 +2732,18 @@ _ALLOWED_INSIGHT_TAGS = {
|
||||
|
||||
_DYNAMIC_TAGS = {"availability", "database", "pods", "utilization", "workloads"}
|
||||
_INVENTORY_TAGS = {"hardware", "architecture", "inventory", "workers", "node_detail", "os"}
|
||||
_SUBJECTIVE_TAG_PRIORITY = (
|
||||
"utilization",
|
||||
"database",
|
||||
"pods",
|
||||
"workloads",
|
||||
"availability",
|
||||
"hardware",
|
||||
"inventory",
|
||||
"architecture",
|
||||
"node_detail",
|
||||
"os",
|
||||
)
|
||||
|
||||
|
||||
def _fact_line_tags(line: str) -> set[str]:
|
||||
@ -2922,7 +2936,8 @@ def _open_ended_system() -> str:
|
||||
"You may draw light inferences if you label them as such. "
|
||||
"Write concise, human sentences with a helpful, calm tone (not a list). "
|
||||
"Be willing to take a light stance; do not over-hedge. "
|
||||
"If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. "
|
||||
"If the question is subjective (cool/interesting/unconventional), pick a standout fact, explain why it stands out, "
|
||||
"and use 2-3 sentences. "
|
||||
"If the question asks for a list, embed the list inline in a sentence (comma-separated). "
|
||||
"If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
|
||||
"Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
|
||||
@ -3773,6 +3788,8 @@ def _fast_fact_lines(
|
||||
return []
|
||||
primary_tags = primary_tags or set()
|
||||
scored: list[tuple[int, int, str]] = []
|
||||
priority_map = {tag: idx for idx, tag in enumerate(_SUBJECTIVE_TAG_PRIORITY)}
|
||||
use_priority = not primary_tags and focus_tags == _ALLOWED_INSIGHT_TAGS
|
||||
for idx, line in enumerate(fact_lines):
|
||||
fid = f"F{idx + 1}"
|
||||
tags = set(fact_meta.get(fid, {}).get("tags") or [])
|
||||
@ -3783,6 +3800,12 @@ def _fast_fact_lines(
|
||||
score += 4 * len(tags & primary_tags)
|
||||
if focus_tags:
|
||||
score += 2 * len(tags & focus_tags)
|
||||
if use_priority and tags:
|
||||
bonus = 0
|
||||
for tag in tags:
|
||||
if tag in priority_map:
|
||||
bonus = max(bonus, len(priority_map) - priority_map[tag])
|
||||
score += bonus
|
||||
scored.append((score, idx, line))
|
||||
scored.sort(key=lambda item: (-item[0], item[1]))
|
||||
selected: list[str] = []
|
||||
@ -3845,12 +3868,26 @@ def _fallback_fact_answer(prompt: str, context: str) -> str:
|
||||
best_fact = ""
|
||||
best_score = -1
|
||||
for fact in facts:
|
||||
score = len(tokens & set(_tokens(fact)))
|
||||
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", fact)
|
||||
if not key_match:
|
||||
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", fact)
|
||||
key_tokens: set[str] = set()
|
||||
if key_match:
|
||||
key_tokens = set(_tokens(key_match.group(1)))
|
||||
score = len(tokens & set(_tokens(fact))) + 2 * len(tokens & key_tokens)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_fact = fact
|
||||
if best_score <= 0:
|
||||
return ""
|
||||
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", best_fact)
|
||||
if not key_match:
|
||||
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", best_fact)
|
||||
if key_match:
|
||||
key = key_match.group(1).strip().replace("_", " ")
|
||||
val = key_match.group(2).strip()
|
||||
sentence = f"{key.capitalize()} is {val}"
|
||||
else:
|
||||
sentence = f"Based on the snapshot, {best_fact}"
|
||||
if not sentence.endswith((".", "!", "?")):
|
||||
sentence += "."
|
||||
@ -3873,15 +3910,17 @@ def _open_ended_fast_single(
|
||||
prompt: str,
|
||||
*,
|
||||
context: str,
|
||||
history_lines: list[str] | None = None,
|
||||
state: ThoughtState | None = None,
|
||||
model: str,
|
||||
) -> str:
|
||||
if state:
|
||||
state.update("drafting", step=1, note="summarizing")
|
||||
working_context = _append_history_context(context, history_lines or []) if history_lines else context
|
||||
reply = _ollama_call(
|
||||
("atlasbot_fast", "atlasbot_fast"),
|
||||
prompt,
|
||||
context=context,
|
||||
context=working_context,
|
||||
use_history=False,
|
||||
system_override=_open_ended_system(),
|
||||
model=model,
|
||||
@ -3890,7 +3929,7 @@ def _open_ended_fast_single(
|
||||
reply = _ollama_call(
|
||||
("atlasbot_fast", "atlasbot_fast"),
|
||||
prompt + " Provide one clear sentence before the score lines.",
|
||||
context=context,
|
||||
context=working_context,
|
||||
use_history=False,
|
||||
system_override=_open_ended_system(),
|
||||
model=model,
|
||||
@ -3933,6 +3972,7 @@ def _open_ended_fast(
|
||||
return _open_ended_fast_single(
|
||||
prompt,
|
||||
context=selected_pack,
|
||||
history_lines=history_lines,
|
||||
state=state,
|
||||
model=model,
|
||||
)
|
||||
@ -4089,7 +4129,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
|
||||
cleaned_q = normalize_query(cleaned)
|
||||
cluster_affinity = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads)
|
||||
subjective = _is_subjective_query(cleaned)
|
||||
followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS)
|
||||
followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS)
|
||||
contextual = history_cluster and (followup or followup_affinity)
|
||||
cluster_query = cluster_affinity or contextual
|
||||
context = ""
|
||||
@ -4633,7 +4673,7 @@ def sync_loop(token: str, room_id: str):
|
||||
cleaned_q = normalize_query(cleaned_body)
|
||||
cluster_affinity = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads)
|
||||
subjective = _is_subjective_query(cleaned_body)
|
||||
followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS)
|
||||
followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS)
|
||||
contextual = history_cluster and (followup or followup_affinity)
|
||||
cluster_query = cluster_affinity or contextual
|
||||
context = ""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user