atlasbot: improve fact parsing and fallback answers
This commit is contained in:
parent
436e56c5de
commit
aa608fbf0f
@ -16,7 +16,7 @@ spec:
|
||||
labels:
|
||||
app: atlasbot
|
||||
annotations:
|
||||
checksum/atlasbot-configmap: manual-atlasbot-98
|
||||
checksum/atlasbot-configmap: manual-atlasbot-101
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/role: "comms"
|
||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||
|
||||
@ -260,7 +260,24 @@ def normalize_query(text: str) -> str:
|
||||
def _tokens(text: str) -> list[str]:
|
||||
cleaned = re.sub(r"[\\_/]", " ", text or "")
|
||||
toks = [t.lower() for t in TOKEN_RE.findall(cleaned)]
|
||||
return [t for t in toks if t not in STOPWORDS and len(t) >= 2]
|
||||
expanded: list[str] = []
|
||||
synonyms = {
|
||||
"network": "net",
|
||||
"net": "network",
|
||||
"memory": "ram",
|
||||
"ram": "memory",
|
||||
"i/o": "io",
|
||||
}
|
||||
for token in toks:
|
||||
expanded.append(token)
|
||||
if "-" in token:
|
||||
expanded.extend(part for part in token.split("-") if part)
|
||||
for token in list(expanded):
|
||||
if token in synonyms:
|
||||
expanded.append(synonyms[token])
|
||||
if token.endswith("s") and len(token) > 3:
|
||||
expanded.append(token.rstrip("s"))
|
||||
return [t for t in expanded if t not in STOPWORDS and len(t) >= 2]
|
||||
|
||||
|
||||
def _ensure_confidence(text: str) -> str:
|
||||
@ -1077,10 +1094,16 @@ def facts_context(
|
||||
lines.append(f"- expected_workers_missing: {', '.join(missing)}")
|
||||
|
||||
hottest = metrics.get("hottest_nodes") if isinstance(metrics.get("hottest_nodes"), dict) else {}
|
||||
usage_metrics = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
|
||||
for key in ("cpu", "ram", "net", "io"):
|
||||
entry = hottest.get(key) if isinstance(hottest.get(key), dict) else {}
|
||||
node = entry.get("node")
|
||||
value = entry.get("value")
|
||||
if not node or value is None:
|
||||
usage = usage_metrics.get(key) if isinstance(usage_metrics.get(key), list) else []
|
||||
pick = _node_usage_top(usage, allowed_nodes=None)
|
||||
if pick:
|
||||
node, value = pick
|
||||
if node and value is not None:
|
||||
value_fmt = _format_metric_value(
|
||||
str(value),
|
||||
@ -3001,6 +3024,7 @@ def _ensure_scores(answer: str) -> str:
|
||||
def _record_score(key: str, value: str):
|
||||
if not value:
|
||||
return
|
||||
value = value.strip().rstrip("%")
|
||||
score_map.setdefault(key, value)
|
||||
|
||||
for line in lines:
|
||||
@ -3010,10 +3034,10 @@ def _ensure_scores(answer: str) -> str:
|
||||
"confidence" in lowered and "relevance" in lowered and "satisfaction" in lowered
|
||||
):
|
||||
for key in ("confidence", "relevance", "satisfaction"):
|
||||
match = re.search(rf"{key}\\s*[:=]?\\s*(\\d{{1,3}}|high|medium|low)", lowered)
|
||||
match = re.search(rf"{key}\s*[:=]?\s*(\d{{1,3}}|high|medium|low)", lowered)
|
||||
if match:
|
||||
_record_score(key, match.group(1))
|
||||
risk_match = re.search(r"hallucination\\s*risk\\s*[:=]?\\s*(low|medium|high)", lowered)
|
||||
risk_match = re.search(r"hallucination\s*risk\s*[:=]?\s*(low|medium|high)", lowered)
|
||||
if risk_match:
|
||||
_record_score("hallucinationrisk", risk_match.group(1))
|
||||
continue
|
||||
@ -3032,11 +3056,18 @@ def _ensure_scores(answer: str) -> str:
|
||||
_record_score("hallucinationrisk", _extract_value(cleaned))
|
||||
continue
|
||||
cleaned_body = re.sub(
|
||||
r"\\bconfidence\\s*:\\s*(high|medium|low)\\b\\.?\\s*",
|
||||
r"\bconfidence\s*:\s*(high|medium|low)\b\.?\s*",
|
||||
"",
|
||||
line,
|
||||
flags=re.IGNORECASE,
|
||||
).strip()
|
||||
cleaned_body = re.sub(
|
||||
r"\bconfident\s*level\s*:\s*(high|medium|low)\b\.?\s*",
|
||||
"",
|
||||
cleaned_body,
|
||||
flags=re.IGNORECASE,
|
||||
).strip()
|
||||
cleaned_body = re.sub(r"\bF\d+\b", "", cleaned_body).strip()
|
||||
if cleaned_body:
|
||||
body_lines.append(cleaned_body)
|
||||
|
||||
@ -3860,41 +3891,195 @@ def _has_body_lines(answer: str) -> bool:
|
||||
|
||||
def _fallback_fact_answer(prompt: str, context: str) -> str:
|
||||
facts: list[str] = []
|
||||
parsed_facts: list[tuple[str, str | None, str | None]] = []
|
||||
q = normalize_query(prompt)
|
||||
tokens = set(_tokens(prompt))
|
||||
for line in (context or "").splitlines():
|
||||
trimmed = line.strip()
|
||||
if not trimmed.startswith("F"):
|
||||
if not trimmed:
|
||||
continue
|
||||
match = re.match(r"^F\\d+.*?\\]:\\s*(.*)$", trimmed)
|
||||
if trimmed.startswith("F"):
|
||||
match = re.match(r"^F\d+.*?\]:\s*(.*)$", trimmed)
|
||||
if not match:
|
||||
match = re.match(r"^F\\d+:\\s*(.*)$", trimmed)
|
||||
match = re.match(r"^F\d+:\s*(.*)$", trimmed)
|
||||
if not match:
|
||||
continue
|
||||
fact = match.group(1).strip()
|
||||
else:
|
||||
if trimmed.lower().startswith("fact pack") or trimmed.lower().startswith("facts"):
|
||||
continue
|
||||
if trimmed.startswith("-"):
|
||||
fact = trimmed.lstrip("-").strip()
|
||||
else:
|
||||
fact = trimmed
|
||||
if fact.startswith("-"):
|
||||
fact = fact.lstrip("-").strip()
|
||||
if fact:
|
||||
if fact and (":" in fact or "=" in fact):
|
||||
facts.append(fact)
|
||||
key_match = re.match(r"^([\w\s/.-]+):\s*(.+)$", fact)
|
||||
if not key_match:
|
||||
key_match = re.match(r"^([\w\s/.-]+)=\s*(.+)$", fact)
|
||||
if key_match:
|
||||
parsed_facts.append((fact, key_match.group(1).strip(), key_match.group(2).strip()))
|
||||
else:
|
||||
parsed_facts.append((fact, None, None))
|
||||
if not facts:
|
||||
return ""
|
||||
tokens = set(_tokens(prompt))
|
||||
|
||||
def _norm_key(text: str) -> str:
|
||||
return normalize_query(text).replace(" ", "_")
|
||||
|
||||
def _find_value(target: str) -> str | None:
|
||||
for _fact, key, val in parsed_facts:
|
||||
if key and _norm_key(key) == target:
|
||||
return val
|
||||
return None
|
||||
|
||||
def _parse_counts(text: str) -> dict[str, int]:
|
||||
counts: dict[str, int] = {}
|
||||
for part in (text or "").split(","):
|
||||
if "=" not in part:
|
||||
continue
|
||||
k, v = part.split("=", 1)
|
||||
k = k.strip()
|
||||
v = v.strip()
|
||||
if not k or not v:
|
||||
continue
|
||||
try:
|
||||
counts[k] = int(float(v))
|
||||
except ValueError:
|
||||
continue
|
||||
return counts
|
||||
|
||||
def _parse_map(text: str) -> dict[str, str]:
|
||||
mapping: dict[str, str] = {}
|
||||
pattern = re.compile(r"(\w+)\s*=\s*([^=]+?)(?=(?:\s*,\s*\w+\s*=)|$)")
|
||||
for match in pattern.finditer(text or ""):
|
||||
mapping[match.group(1).strip()] = match.group(2).strip().strip(",")
|
||||
return mapping
|
||||
|
||||
list_intent = _is_list_prompt(prompt) or "name" in tokens
|
||||
count_intent = _is_quantitative_prompt(prompt) and ("how many" in q or "count" in tokens or "number" in tokens)
|
||||
hottest_intent = any(word in q for word in ("hottest", "highest", "most", "top", "busiest"))
|
||||
metric = _detect_metric(q)
|
||||
include_hw, _exclude_hw = _detect_hardware_filters(q)
|
||||
|
||||
if hottest_intent and metric in {"cpu", "ram", "net", "io"}:
|
||||
hottest_val = _find_value(f"hottest_{metric}")
|
||||
if hottest_val:
|
||||
return f"Hottest {metric} is {hottest_val}."
|
||||
if hottest_intent and tokens & {"postgres", "database", "db", "connections"}:
|
||||
hottest_db = _find_value("postgres_hottest_db")
|
||||
if hottest_db:
|
||||
return f"Hottest database is {hottest_db}."
|
||||
|
||||
if count_intent and tokens & {"pods", "pod"}:
|
||||
pending = _find_value("pods_pending")
|
||||
failed = _find_value("pods_failed")
|
||||
running = _find_value("pods_running")
|
||||
succeeded = _find_value("pods_succeeded")
|
||||
if "pending" in q and "failed" in q:
|
||||
try:
|
||||
total = float(pending or 0) + float(failed or 0)
|
||||
return f"Pods pending or failed: {total:.0f}."
|
||||
except ValueError:
|
||||
pass
|
||||
if "pending" in q and pending is not None:
|
||||
return f"Pods pending is {pending}."
|
||||
if "failed" in q and failed is not None:
|
||||
return f"Pods failed is {failed}."
|
||||
if "succeeded" in q and succeeded is not None:
|
||||
return f"Pods succeeded is {succeeded}."
|
||||
if "running" in q and running is not None:
|
||||
return f"Pods running is {running}."
|
||||
|
||||
if count_intent and tokens & {"nodes", "node"} and "not ready" in q:
|
||||
nodes_total = _find_value("nodes_total")
|
||||
if nodes_total and "not_ready" in nodes_total:
|
||||
match = re.search(r"not_ready=([0-9.]+)", nodes_total)
|
||||
if match:
|
||||
return f"Not ready nodes: {match.group(1)}."
|
||||
|
||||
if count_intent and include_hw:
|
||||
counts_line = _find_value("nodes_by_hardware_count")
|
||||
if counts_line:
|
||||
counts = _parse_counts(counts_line)
|
||||
for hw in include_hw:
|
||||
if hw in counts:
|
||||
return f"{hw} nodes: {counts[hw]}."
|
||||
for hw in include_hw:
|
||||
hw_line = _find_value(hw)
|
||||
if hw_line:
|
||||
items = [item.strip() for item in hw_line.split(",") if item.strip()]
|
||||
return f"{hw} nodes: {len(items)}."
|
||||
|
||||
if list_intent and include_hw:
|
||||
if "control" in q:
|
||||
cp_by_hw = _find_value("control_plane_by_hardware")
|
||||
if cp_by_hw:
|
||||
mapping = _parse_map(cp_by_hw)
|
||||
for hw in include_hw:
|
||||
if hw in mapping:
|
||||
return f"{hw} control-plane nodes: {mapping[hw]}."
|
||||
cp_nodes = _find_value("control_plane_nodes")
|
||||
if cp_nodes:
|
||||
return f"Control-plane nodes: {cp_nodes}."
|
||||
for hw in include_hw:
|
||||
hw_line = _find_value(hw)
|
||||
if hw_line:
|
||||
return f"{hw} nodes: {hw_line}."
|
||||
|
||||
if list_intent and "control" in q:
|
||||
cp_nodes = _find_value("control_plane_nodes")
|
||||
if cp_nodes:
|
||||
return f"Control-plane nodes: {cp_nodes}."
|
||||
|
||||
preferred = tokens & {
|
||||
"node",
|
||||
"nodes",
|
||||
"pod",
|
||||
"pods",
|
||||
"postgres",
|
||||
"db",
|
||||
"database",
|
||||
"namespace",
|
||||
"workload",
|
||||
"worker",
|
||||
"workers",
|
||||
"cpu",
|
||||
"ram",
|
||||
"memory",
|
||||
"net",
|
||||
"network",
|
||||
"io",
|
||||
"disk",
|
||||
"connection",
|
||||
"connections",
|
||||
}
|
||||
best_fact = ""
|
||||
best_score = -1
|
||||
for fact in facts:
|
||||
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", fact)
|
||||
key_match = re.match(r"^([\w\s/.-]+):\s*(.+)$", fact)
|
||||
if not key_match:
|
||||
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", fact)
|
||||
key_match = re.match(r"^([\w\s/.-]+)=\s*(.+)$", fact)
|
||||
key_tokens: set[str] = set()
|
||||
if key_match:
|
||||
key_tokens = set(_tokens(key_match.group(1)))
|
||||
score = len(tokens & set(_tokens(fact))) + 2 * len(tokens & key_tokens)
|
||||
if preferred:
|
||||
score += 3 * len(preferred & key_tokens)
|
||||
if not (preferred & key_tokens):
|
||||
score -= 1
|
||||
if list_intent and key_match and "count" in key_tokens:
|
||||
score -= 3
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_fact = fact
|
||||
if best_score <= 0:
|
||||
return ""
|
||||
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+):\\s*(.+)$", best_fact)
|
||||
key_match = re.match(r"^([\w\s/.-]+):\s*(.+)$", best_fact)
|
||||
if not key_match:
|
||||
key_match = re.match(r"^([A-Za-z0-9_\\-/ ]+)=\\s*(.+)$", best_fact)
|
||||
key_match = re.match(r"^([\w\s/.-]+)=\s*(.+)$", best_fact)
|
||||
if key_match:
|
||||
key = key_match.group(1).strip().replace("_", " ")
|
||||
val = key_match.group(2).strip()
|
||||
@ -3936,6 +4121,10 @@ def _needs_full_fact_pack(prompt: str) -> bool:
|
||||
return True
|
||||
if tokens & {"workload", "pods", "namespace", "worker", "workers"}:
|
||||
return True
|
||||
if tokens & {"arch", "architecture", "hardware"}:
|
||||
return True
|
||||
if tokens & METRIC_HINT_WORDS:
|
||||
return True
|
||||
if _NAME_INDEX and tokens & _NAME_INDEX:
|
||||
return True
|
||||
if any(phrase in q for phrase in ("where does", "where is", "where are", "running", "run on", "hosted on", "primary node")):
|
||||
@ -4104,7 +4293,7 @@ def _non_cluster_reply(prompt: str, *, history_lines: list[str], mode: str) -> s
|
||||
system_override=system,
|
||||
model=model,
|
||||
)
|
||||
reply = re.sub(r"\\bconfidence\\s*:\\s*(high|medium|low)\\b\\.?\\s*", "", reply, flags=re.IGNORECASE).strip()
|
||||
reply = re.sub(r"\bconfidence\s*:\s*(high|medium|low)\b\.?\s*", "", reply, flags=re.IGNORECASE).strip()
|
||||
return _ensure_scores(reply)
|
||||
|
||||
|
||||
@ -4405,6 +4594,8 @@ def _is_cluster_query(
|
||||
return True
|
||||
if any(word in q for word in CLUSTER_HINT_WORDS):
|
||||
return True
|
||||
if any(word in q for word in METRIC_HINT_WORDS):
|
||||
return True
|
||||
for host_match in HOST_RE.finditer(q):
|
||||
host = host_match.group(1).lower()
|
||||
if host.endswith("bstein.dev"):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user