atlasbot: refine fast fact selection and prompts
This commit is contained in:
parent
eb567fda06
commit
7194cad0a8
@ -16,7 +16,7 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app: atlasbot
|
app: atlasbot
|
||||||
annotations:
|
annotations:
|
||||||
checksum/atlasbot-configmap: manual-atlasbot-92
|
checksum/atlasbot-configmap: manual-atlasbot-93
|
||||||
vault.hashicorp.com/agent-inject: "true"
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
vault.hashicorp.com/role: "comms"
|
vault.hashicorp.com/role: "comms"
|
||||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||||
|
|||||||
@ -253,11 +253,13 @@ def normalize_query(text: str) -> str:
|
|||||||
cleaned = (text or "").lower()
|
cleaned = (text or "").lower()
|
||||||
for ch in _DASH_CHARS:
|
for ch in _DASH_CHARS:
|
||||||
cleaned = cleaned.replace(ch, "-")
|
cleaned = cleaned.replace(ch, "-")
|
||||||
|
cleaned = cleaned.replace("_", " ")
|
||||||
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
||||||
return cleaned
|
return cleaned
|
||||||
|
|
||||||
def _tokens(text: str) -> list[str]:
|
def _tokens(text: str) -> list[str]:
|
||||||
toks = [t.lower() for t in TOKEN_RE.findall(text or "")]
|
cleaned = re.sub(r"[\\_/]", " ", text or "")
|
||||||
|
toks = [t.lower() for t in TOKEN_RE.findall(cleaned)]
|
||||||
return [t for t in toks if t not in STOPWORDS and len(t) >= 2]
|
return [t for t in toks if t not in STOPWORDS and len(t) >= 2]
|
||||||
|
|
||||||
|
|
||||||
@ -2730,6 +2732,18 @@ _ALLOWED_INSIGHT_TAGS = {
|
|||||||
|
|
||||||
_DYNAMIC_TAGS = {"availability", "database", "pods", "utilization", "workloads"}
|
_DYNAMIC_TAGS = {"availability", "database", "pods", "utilization", "workloads"}
|
||||||
_INVENTORY_TAGS = {"hardware", "architecture", "inventory", "workers", "node_detail", "os"}
|
_INVENTORY_TAGS = {"hardware", "architecture", "inventory", "workers", "node_detail", "os"}
|
||||||
|
_SUBJECTIVE_TAG_PRIORITY = (
|
||||||
|
"utilization",
|
||||||
|
"database",
|
||||||
|
"pods",
|
||||||
|
"workloads",
|
||||||
|
"availability",
|
||||||
|
"hardware",
|
||||||
|
"inventory",
|
||||||
|
"architecture",
|
||||||
|
"node_detail",
|
||||||
|
"os",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _fact_line_tags(line: str) -> set[str]:
|
def _fact_line_tags(line: str) -> set[str]:
|
||||||
@ -2922,7 +2936,8 @@ def _open_ended_system() -> str:
|
|||||||
"You may draw light inferences if you label them as such. "
|
"You may draw light inferences if you label them as such. "
|
||||||
"Write concise, human sentences with a helpful, calm tone (not a list). "
|
"Write concise, human sentences with a helpful, calm tone (not a list). "
|
||||||
"Be willing to take a light stance; do not over-hedge. "
|
"Be willing to take a light stance; do not over-hedge. "
|
||||||
"If the question is subjective (cool/interesting/unconventional), pick a standout fact and explain why it stands out. "
|
"If the question is subjective (cool/interesting/unconventional), pick a standout fact, explain why it stands out, "
|
||||||
|
"and use 2-3 sentences. "
|
||||||
"If the question asks for a list, embed the list inline in a sentence (comma-separated). "
|
"If the question asks for a list, embed the list inline in a sentence (comma-separated). "
|
||||||
"If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
|
"If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
|
||||||
"Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
|
"Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
|
||||||
@ -3773,6 +3788,8 @@ def _fast_fact_lines(
|
|||||||
return []
|
return []
|
||||||
primary_tags = primary_tags or set()
|
primary_tags = primary_tags or set()
|
||||||
scored: list[tuple[int, int, str]] = []
|
scored: list[tuple[int, int, str]] = []
|
||||||
|
priority_map = {tag: idx for idx, tag in enumerate(_SUBJECTIVE_TAG_PRIORITY)}
|
||||||
|
use_priority = not primary_tags and focus_tags == _ALLOWED_INSIGHT_TAGS
|
||||||
for idx, line in enumerate(fact_lines):
|
for idx, line in enumerate(fact_lines):
|
||||||
fid = f"F{idx + 1}"
|
fid = f"F{idx + 1}"
|
||||||
tags = set(fact_meta.get(fid, {}).get("tags") or [])
|
tags = set(fact_meta.get(fid, {}).get("tags") or [])
|
||||||
@ -3783,6 +3800,12 @@ def _fast_fact_lines(
|
|||||||
score += 4 * len(tags & primary_tags)
|
score += 4 * len(tags & primary_tags)
|
||||||
if focus_tags:
|
if focus_tags:
|
||||||
score += 2 * len(tags & focus_tags)
|
score += 2 * len(tags & focus_tags)
|
||||||
|
if use_priority and tags:
|
||||||
|
bonus = 0
|
||||||
|
for tag in tags:
|
||||||
|
if tag in priority_map:
|
||||||
|
bonus = max(bonus, len(priority_map) - priority_map[tag])
|
||||||
|
score += bonus
|
||||||
scored.append((score, idx, line))
|
scored.append((score, idx, line))
|
||||||
scored.sort(key=lambda item: (-item[0], item[1]))
|
scored.sort(key=lambda item: (-item[0], item[1]))
|
||||||
selected: list[str] = []
|
selected: list[str] = []
|
||||||
@ -3845,13 +3868,27 @@ def _fallback_fact_answer(prompt: str, context: str) -> str:
|
|||||||
best_fact = ""
|
best_fact = ""
|
||||||
best_score = -1
|
best_score = -1
|
||||||
for fact in facts:
|
for fact in facts:
|
||||||
score = len(tokens & set(_tokens(fact)))
|
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", fact)
|
||||||
|
if not key_match:
|
||||||
|
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", fact)
|
||||||
|
key_tokens: set[str] = set()
|
||||||
|
if key_match:
|
||||||
|
key_tokens = set(_tokens(key_match.group(1)))
|
||||||
|
score = len(tokens & set(_tokens(fact))) + 2 * len(tokens & key_tokens)
|
||||||
if score > best_score:
|
if score > best_score:
|
||||||
best_score = score
|
best_score = score
|
||||||
best_fact = fact
|
best_fact = fact
|
||||||
if best_score <= 0:
|
if best_score <= 0:
|
||||||
return ""
|
return ""
|
||||||
sentence = f"Based on the snapshot, {best_fact}"
|
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", best_fact)
|
||||||
|
if not key_match:
|
||||||
|
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", best_fact)
|
||||||
|
if key_match:
|
||||||
|
key = key_match.group(1).strip().replace("_", " ")
|
||||||
|
val = key_match.group(2).strip()
|
||||||
|
sentence = f"{key.capitalize()} is {val}"
|
||||||
|
else:
|
||||||
|
sentence = f"Based on the snapshot, {best_fact}"
|
||||||
if not sentence.endswith((".", "!", "?")):
|
if not sentence.endswith((".", "!", "?")):
|
||||||
sentence += "."
|
sentence += "."
|
||||||
return sentence
|
return sentence
|
||||||
@ -3873,15 +3910,17 @@ def _open_ended_fast_single(
|
|||||||
prompt: str,
|
prompt: str,
|
||||||
*,
|
*,
|
||||||
context: str,
|
context: str,
|
||||||
|
history_lines: list[str] | None = None,
|
||||||
state: ThoughtState | None = None,
|
state: ThoughtState | None = None,
|
||||||
model: str,
|
model: str,
|
||||||
) -> str:
|
) -> str:
|
||||||
if state:
|
if state:
|
||||||
state.update("drafting", step=1, note="summarizing")
|
state.update("drafting", step=1, note="summarizing")
|
||||||
|
working_context = _append_history_context(context, history_lines or []) if history_lines else context
|
||||||
reply = _ollama_call(
|
reply = _ollama_call(
|
||||||
("atlasbot_fast", "atlasbot_fast"),
|
("atlasbot_fast", "atlasbot_fast"),
|
||||||
prompt,
|
prompt,
|
||||||
context=context,
|
context=working_context,
|
||||||
use_history=False,
|
use_history=False,
|
||||||
system_override=_open_ended_system(),
|
system_override=_open_ended_system(),
|
||||||
model=model,
|
model=model,
|
||||||
@ -3890,7 +3929,7 @@ def _open_ended_fast_single(
|
|||||||
reply = _ollama_call(
|
reply = _ollama_call(
|
||||||
("atlasbot_fast", "atlasbot_fast"),
|
("atlasbot_fast", "atlasbot_fast"),
|
||||||
prompt + " Provide one clear sentence before the score lines.",
|
prompt + " Provide one clear sentence before the score lines.",
|
||||||
context=context,
|
context=working_context,
|
||||||
use_history=False,
|
use_history=False,
|
||||||
system_override=_open_ended_system(),
|
system_override=_open_ended_system(),
|
||||||
model=model,
|
model=model,
|
||||||
@ -3933,6 +3972,7 @@ def _open_ended_fast(
|
|||||||
return _open_ended_fast_single(
|
return _open_ended_fast_single(
|
||||||
prompt,
|
prompt,
|
||||||
context=selected_pack,
|
context=selected_pack,
|
||||||
|
history_lines=history_lines,
|
||||||
state=state,
|
state=state,
|
||||||
model=model,
|
model=model,
|
||||||
)
|
)
|
||||||
@ -4089,7 +4129,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
|
|||||||
cleaned_q = normalize_query(cleaned)
|
cleaned_q = normalize_query(cleaned)
|
||||||
cluster_affinity = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads)
|
cluster_affinity = _is_cluster_query(cleaned, inventory=inventory, workloads=workloads)
|
||||||
subjective = _is_subjective_query(cleaned)
|
subjective = _is_subjective_query(cleaned)
|
||||||
followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS)
|
followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS)
|
||||||
contextual = history_cluster and (followup or followup_affinity)
|
contextual = history_cluster and (followup or followup_affinity)
|
||||||
cluster_query = cluster_affinity or contextual
|
cluster_query = cluster_affinity or contextual
|
||||||
context = ""
|
context = ""
|
||||||
@ -4633,7 +4673,7 @@ def sync_loop(token: str, room_id: str):
|
|||||||
cleaned_q = normalize_query(cleaned_body)
|
cleaned_q = normalize_query(cleaned_body)
|
||||||
cluster_affinity = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads)
|
cluster_affinity = _is_cluster_query(cleaned_body, inventory=inventory, workloads=workloads)
|
||||||
subjective = _is_subjective_query(cleaned_body)
|
subjective = _is_subjective_query(cleaned_body)
|
||||||
followup_affinity = subjective or any(word in cleaned_q for word in METRIC_HINT_WORDS)
|
followup_affinity = any(word in cleaned_q for word in METRIC_HINT_WORDS)
|
||||||
contextual = history_cluster and (followup or followup_affinity)
|
contextual = history_cluster and (followup or followup_affinity)
|
||||||
cluster_query = cluster_affinity or contextual
|
cluster_query = cluster_affinity or contextual
|
||||||
context = ""
|
context = ""
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user