quality(atlasbot): enforce strict gate split
This commit is contained in:
parent
6ecf531bac
commit
b7543d7e57
@ -6,11 +6,13 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY requirements.txt /app/requirements.txt
|
COPY requirements.txt /app/requirements.txt
|
||||||
COPY requirements-dev.txt /app/requirements-dev.txt
|
COPY requirements-dev.txt /app/requirements-dev.txt
|
||||||
|
COPY pyproject.toml /app/pyproject.toml
|
||||||
RUN pip install --no-cache-dir -r /app/requirements.txt -r /app/requirements-dev.txt
|
RUN pip install --no-cache-dir -r /app/requirements.txt -r /app/requirements-dev.txt
|
||||||
|
|
||||||
COPY atlasbot /app/atlasbot
|
COPY atlasbot /app/atlasbot
|
||||||
|
|
||||||
FROM base AS test
|
FROM base AS test
|
||||||
|
COPY testing /app/testing
|
||||||
COPY tests /app/tests
|
COPY tests /app/tests
|
||||||
COPY scripts /app/scripts
|
COPY scripts /app/scripts
|
||||||
|
|
||||||
|
|||||||
4
Jenkinsfile
vendored
4
Jenkinsfile
vendored
@ -75,6 +75,10 @@ spec:
|
|||||||
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
|
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
|
||||||
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
|
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
|
||||||
}
|
}
|
||||||
|
options {
|
||||||
|
disableConcurrentBuilds()
|
||||||
|
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
|
||||||
|
}
|
||||||
stages {
|
stages {
|
||||||
stage('Checkout') {
|
stage('Checkout') {
|
||||||
steps {
|
steps {
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from collections.abc import Awaitable, Callable
|
from collections.abc import Awaitable, Callable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from fastapi import FastAPI, Header, HTTPException
|
from fastapi import FastAPI, Header, HTTPException
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
@ -29,6 +28,16 @@ class AnswerResponse(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class Api:
|
class Api:
|
||||||
|
"""Expose the answer API and enforce the shared internal token.
|
||||||
|
|
||||||
|
Input:
|
||||||
|
- `settings`: runtime configuration, including the optional internal token;
|
||||||
|
- `answer_handler`: async adapter that answers a normalized question.
|
||||||
|
|
||||||
|
Output:
|
||||||
|
- registers the HTTP routes on `self.app`.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
settings: Settings,
|
settings: Settings,
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
def _env_bool(name: str, default: str = "false") -> bool:
|
def _env_bool(name: str, default: str = "false") -> bool:
|
||||||
value = os.getenv(name, default).strip().lower()
|
value = os.getenv(name, default).strip().lower()
|
||||||
return value in {"1", "true", "yes", "y", "on"}
|
return value in {"1", "true", "yes", "y", "on"}
|
||||||
@ -121,6 +122,12 @@ def _load_matrix_bots(bot_mentions: tuple[str, ...]) -> tuple[MatrixBotConfig, .
|
|||||||
|
|
||||||
|
|
||||||
def load_settings() -> Settings:
|
def load_settings() -> Settings:
|
||||||
|
"""Load process settings from environment variables.
|
||||||
|
|
||||||
|
Output:
|
||||||
|
- a fully populated `Settings` instance with defaults for missing values.
|
||||||
|
"""
|
||||||
|
|
||||||
bot_mentions = tuple(
|
bot_mentions = tuple(
|
||||||
[
|
[
|
||||||
item.strip()
|
item.strip()
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
12
atlasbot/engine/answerer/__init__.py
Normal file
12
atlasbot/engine/answerer/__init__.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
"""Answer engine package."""
|
||||||
|
|
||||||
|
from ._base import *
|
||||||
|
from .common import *
|
||||||
|
from .engine import *
|
||||||
|
from .factsheet import *
|
||||||
|
from .post import *
|
||||||
|
from .post_ext import *
|
||||||
|
from .retrieval import *
|
||||||
|
from .retrieval_ext import *
|
||||||
|
from .spine import *
|
||||||
|
from .workflow import *
|
||||||
116
atlasbot/engine/answerer/_base.py
Normal file
116
atlasbot/engine/answerer/_base.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
FOLLOWUP_SHORT_WORDS = 6
|
||||||
|
TOKEN_MIN_LEN = 3
|
||||||
|
GENERIC_METRIC_TOKENS = {"atlas", "cluster", "kubernetes", "k8s", "titan", "lab"}
|
||||||
|
NS_ENTRY_MIN_LEN = 2
|
||||||
|
DEDUP_MIN_SENTENCES = 3
|
||||||
|
RUNBOOK_SIMILARITY_THRESHOLD = 0.4
|
||||||
|
BYTES_KB = 1024
|
||||||
|
BYTES_MB = 1024 * 1024
|
||||||
|
|
||||||
|
|
||||||
|
class LLMLimitReached(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LLMTimeBudgetExceeded(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AnswerScores:
|
||||||
|
confidence: int
|
||||||
|
relevance: int
|
||||||
|
satisfaction: int
|
||||||
|
hallucination_risk: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AnswerResult:
|
||||||
|
reply: str
|
||||||
|
scores: AnswerScores
|
||||||
|
meta: dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class InsightGuardInput:
|
||||||
|
question: str
|
||||||
|
reply: str
|
||||||
|
classify: dict[str, Any]
|
||||||
|
context: str
|
||||||
|
plan: ModePlan
|
||||||
|
call_llm: Callable[..., Awaitable[str]]
|
||||||
|
facts: list[str]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ContradictionContext:
|
||||||
|
call_llm: Callable[..., Awaitable[str]]
|
||||||
|
question: str
|
||||||
|
reply: str
|
||||||
|
facts: list[str]
|
||||||
|
plan: ModePlan
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class EvidenceItem:
|
||||||
|
path: str
|
||||||
|
reason: str
|
||||||
|
value: Any | None = None
|
||||||
|
value_at_claim: Any | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ClaimItem:
|
||||||
|
id: str
|
||||||
|
claim: str
|
||||||
|
evidence: list[EvidenceItem]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ConversationState:
|
||||||
|
updated_at: float
|
||||||
|
claims: list[ClaimItem]
|
||||||
|
snapshot_id: str | None = None
|
||||||
|
snapshot: dict[str, Any] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ModePlan:
|
||||||
|
model: str
|
||||||
|
fast_model: str
|
||||||
|
max_subquestions: int
|
||||||
|
chunk_lines: int
|
||||||
|
chunk_top: int
|
||||||
|
chunk_group: int
|
||||||
|
kb_max_chars: int
|
||||||
|
kb_max_files: int
|
||||||
|
use_raw_snapshot: bool
|
||||||
|
parallelism: int
|
||||||
|
score_retries: int
|
||||||
|
use_deep_retrieval: bool
|
||||||
|
use_tool: bool
|
||||||
|
use_critic: bool
|
||||||
|
use_gap: bool
|
||||||
|
use_scores: bool
|
||||||
|
drafts: int
|
||||||
|
metric_retries: int
|
||||||
|
subanswer_retries: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScoreContext:
|
||||||
|
question: str
|
||||||
|
sub_questions: list[str]
|
||||||
|
retries: int
|
||||||
|
parallelism: int
|
||||||
|
select_best: bool
|
||||||
|
fast_model: str
|
||||||
395
atlasbot/engine/answerer/common.py
Normal file
395
atlasbot/engine/answerer/common.py
Normal file
@ -0,0 +1,395 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from atlasbot.config import Settings
|
||||||
|
from atlasbot.llm import prompts
|
||||||
|
from atlasbot.llm.client import parse_json
|
||||||
|
|
||||||
|
from ._base import *
|
||||||
|
from .factsheet import *
|
||||||
|
from .post import *
|
||||||
|
from .post_ext import *
|
||||||
|
from .retrieval import _gather_limited
|
||||||
|
from .retrieval_ext import *
|
||||||
|
from .spine import *
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_followup_meta(reply: str) -> str:
|
||||||
|
cleaned = reply.strip()
|
||||||
|
if not cleaned:
|
||||||
|
return cleaned
|
||||||
|
prefixes = [
|
||||||
|
"The draft is correct based on the provided context.",
|
||||||
|
"The draft is correct based on the context.",
|
||||||
|
"The draft is correct based on the provided evidence.",
|
||||||
|
"The draft is correct.",
|
||||||
|
"Based on the provided context,",
|
||||||
|
"Based on the context,",
|
||||||
|
"Based on the provided evidence,",
|
||||||
|
]
|
||||||
|
for prefix in prefixes:
|
||||||
|
if cleaned.lower().startswith(prefix.lower()):
|
||||||
|
cleaned = cleaned[len(prefix) :].lstrip(" .")
|
||||||
|
break
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def _build_meta(mode: str, call_count: int, call_cap: int, limit_hit: bool, time_budget_hit: bool, time_budget_sec: float, classify: dict[str, Any], tool_hint: dict[str, Any] | None, started: float) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"mode": mode,
|
||||||
|
"llm_calls": call_count,
|
||||||
|
"llm_limit": call_cap,
|
||||||
|
"llm_limit_hit": limit_hit,
|
||||||
|
"time_budget_sec": time_budget_sec,
|
||||||
|
"time_budget_hit": time_budget_hit,
|
||||||
|
"classify": classify,
|
||||||
|
"tool_hint": tool_hint,
|
||||||
|
"elapsed_sec": round(time.monotonic() - started, 2),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _debug_pipeline_log(settings: Settings, name: str, payload: Any) -> None:
|
||||||
|
"""Write a structured debug event when pipeline tracing is enabled."""
|
||||||
|
|
||||||
|
if not settings.debug_pipeline:
|
||||||
|
return
|
||||||
|
log.info("atlasbot_debug", extra={"extra": {"name": name, "payload": payload}})
|
||||||
|
|
||||||
|
|
||||||
|
def _mode_plan(settings: Settings, mode: str) -> ModePlan:
|
||||||
|
if mode == "genius":
|
||||||
|
return ModePlan(
|
||||||
|
model=settings.ollama_model_genius,
|
||||||
|
fast_model=settings.ollama_model_fast,
|
||||||
|
max_subquestions=6,
|
||||||
|
chunk_lines=6,
|
||||||
|
chunk_top=10,
|
||||||
|
chunk_group=4,
|
||||||
|
kb_max_chars=200000,
|
||||||
|
kb_max_files=200,
|
||||||
|
use_raw_snapshot=True,
|
||||||
|
parallelism=4,
|
||||||
|
score_retries=3,
|
||||||
|
use_deep_retrieval=True,
|
||||||
|
use_tool=True,
|
||||||
|
use_critic=True,
|
||||||
|
use_gap=True,
|
||||||
|
use_scores=True,
|
||||||
|
drafts=2,
|
||||||
|
metric_retries=3,
|
||||||
|
subanswer_retries=3,
|
||||||
|
)
|
||||||
|
if mode == "smart":
|
||||||
|
return ModePlan(
|
||||||
|
model=settings.ollama_model_smart,
|
||||||
|
fast_model=settings.ollama_model_fast,
|
||||||
|
max_subquestions=4,
|
||||||
|
chunk_lines=8,
|
||||||
|
chunk_top=8,
|
||||||
|
chunk_group=4,
|
||||||
|
kb_max_chars=3000,
|
||||||
|
kb_max_files=12,
|
||||||
|
use_raw_snapshot=False,
|
||||||
|
parallelism=2,
|
||||||
|
score_retries=2,
|
||||||
|
use_deep_retrieval=True,
|
||||||
|
use_tool=True,
|
||||||
|
use_critic=True,
|
||||||
|
use_gap=True,
|
||||||
|
use_scores=True,
|
||||||
|
drafts=1,
|
||||||
|
metric_retries=2,
|
||||||
|
subanswer_retries=2,
|
||||||
|
)
|
||||||
|
return ModePlan(
|
||||||
|
model=settings.ollama_model_fast,
|
||||||
|
fast_model=settings.ollama_model_fast,
|
||||||
|
max_subquestions=1,
|
||||||
|
chunk_lines=16,
|
||||||
|
chunk_top=3,
|
||||||
|
chunk_group=5,
|
||||||
|
kb_max_chars=800,
|
||||||
|
kb_max_files=4,
|
||||||
|
use_raw_snapshot=False,
|
||||||
|
parallelism=1,
|
||||||
|
score_retries=1,
|
||||||
|
use_deep_retrieval=False,
|
||||||
|
use_tool=False,
|
||||||
|
use_critic=False,
|
||||||
|
use_gap=False,
|
||||||
|
use_scores=False,
|
||||||
|
drafts=1,
|
||||||
|
metric_retries=1,
|
||||||
|
subanswer_retries=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _llm_call_limit(settings: Settings, mode: str) -> int:
|
||||||
|
if mode == "genius":
|
||||||
|
return settings.genius_llm_calls_max
|
||||||
|
if mode == "smart":
|
||||||
|
return settings.smart_llm_calls_max
|
||||||
|
return settings.fast_llm_calls_max
|
||||||
|
|
||||||
|
|
||||||
|
def _mode_time_budget(settings: Settings, mode: str) -> float:
|
||||||
|
if mode == "genius":
|
||||||
|
return max(0.0, settings.genius_time_budget_sec)
|
||||||
|
if mode == "smart":
|
||||||
|
return max(0.0, settings.smart_time_budget_sec)
|
||||||
|
return max(0.0, settings.quick_time_budget_sec)
|
||||||
|
|
||||||
|
|
||||||
|
def _select_subquestions(parts: list[dict[str, Any]], fallback: str, limit: int) -> list[str]:
|
||||||
|
if not parts:
|
||||||
|
return [fallback]
|
||||||
|
ranked = []
|
||||||
|
for entry in parts:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
question = str(entry.get("question") or "").strip()
|
||||||
|
if not question:
|
||||||
|
continue
|
||||||
|
priority = entry.get("priority")
|
||||||
|
try:
|
||||||
|
weight = float(priority)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
weight = 1.0
|
||||||
|
ranked.append((weight, question))
|
||||||
|
ranked.sort(key=lambda item: item[0], reverse=True)
|
||||||
|
questions = [item[1] for item in ranked][:limit]
|
||||||
|
return questions or [fallback]
|
||||||
|
|
||||||
|
|
||||||
|
def _chunk_lines(lines: list[str], lines_per_chunk: int) -> list[dict[str, Any]]:
|
||||||
|
chunks: list[dict[str, Any]] = []
|
||||||
|
if not lines:
|
||||||
|
return chunks
|
||||||
|
for idx in range(0, len(lines), lines_per_chunk):
|
||||||
|
chunk_lines = lines[idx : idx + lines_per_chunk]
|
||||||
|
text = "\n".join(chunk_lines)
|
||||||
|
summary = " | ".join(chunk_lines[:4])
|
||||||
|
chunks.append({"id": f"c{idx//lines_per_chunk}", "text": text, "summary": summary})
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def _raw_snapshot_chunks(snapshot: dict[str, Any] | None) -> list[dict[str, Any]]:
|
||||||
|
if not isinstance(snapshot, dict) or not snapshot:
|
||||||
|
return []
|
||||||
|
chunks: list[dict[str, Any]] = []
|
||||||
|
for key, value in snapshot.items():
|
||||||
|
try:
|
||||||
|
payload = json.dumps({key: value}, indent=2)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
summary = f"raw:{key}"
|
||||||
|
chunks.append({"id": f"r{key}", "text": payload, "summary": summary})
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def _build_chunk_groups(chunks: list[dict[str, Any]], group_size: int) -> list[list[dict[str, Any]]]:
|
||||||
|
groups: list[list[dict[str, Any]]] = []
|
||||||
|
group: list[dict[str, Any]] = []
|
||||||
|
for chunk in chunks:
|
||||||
|
group.append({"id": chunk["id"], "summary": chunk["summary"]})
|
||||||
|
if len(group) >= group_size:
|
||||||
|
groups.append(group)
|
||||||
|
group = []
|
||||||
|
if group:
|
||||||
|
groups.append(group)
|
||||||
|
return groups
|
||||||
|
|
||||||
|
|
||||||
|
async def _score_chunks(call_llm: Callable[..., Any], chunks: list[dict[str, Any]], question: str, sub_questions: list[str], plan: ModePlan) -> dict[str, float]:
|
||||||
|
scores: dict[str, float] = {chunk["id"]: 0.0 for chunk in chunks}
|
||||||
|
if not chunks:
|
||||||
|
return scores
|
||||||
|
groups = _build_chunk_groups(chunks, plan.chunk_group)
|
||||||
|
ctx = ScoreContext(
|
||||||
|
question=question,
|
||||||
|
sub_questions=sub_questions,
|
||||||
|
retries=max(1, plan.score_retries),
|
||||||
|
parallelism=plan.parallelism,
|
||||||
|
select_best=plan.score_retries > 1,
|
||||||
|
fast_model=plan.fast_model,
|
||||||
|
)
|
||||||
|
if ctx.parallelism <= 1 or len(groups) * ctx.retries <= 1:
|
||||||
|
return await _score_groups_serial(call_llm, groups, ctx)
|
||||||
|
return await _score_groups_parallel(call_llm, groups, ctx)
|
||||||
|
|
||||||
|
|
||||||
|
async def _score_groups_serial(call_llm: Callable[..., Any], groups: list[list[dict[str, Any]]], ctx: ScoreContext) -> dict[str, float]:
|
||||||
|
scores: dict[str, float] = {}
|
||||||
|
for grp in groups:
|
||||||
|
runs = [await _score_chunk_group(call_llm, grp, ctx.question, ctx.sub_questions) for _ in range(ctx.retries)]
|
||||||
|
if ctx.select_best and len(runs) > 1:
|
||||||
|
best = await _select_best_score_run(call_llm, grp, runs, ctx)
|
||||||
|
scores.update(best)
|
||||||
|
else:
|
||||||
|
scores.update(_merge_score_runs(runs))
|
||||||
|
return scores
|
||||||
|
|
||||||
|
|
||||||
|
async def _score_groups_parallel(call_llm: Callable[..., Any], groups: list[list[dict[str, Any]]], ctx: ScoreContext) -> dict[str, float]:
|
||||||
|
coros: list[Awaitable[tuple[int, dict[str, float]]]] = []
|
||||||
|
for idx, grp in enumerate(groups):
|
||||||
|
for _ in range(ctx.retries):
|
||||||
|
coros.append(_score_chunk_group_run(call_llm, idx, grp, ctx.question, ctx.sub_questions))
|
||||||
|
results = await _gather_limited(coros, ctx.parallelism)
|
||||||
|
grouped: dict[int, list[dict[str, float]]] = {}
|
||||||
|
for idx, result in results:
|
||||||
|
grouped.setdefault(idx, []).append(result)
|
||||||
|
scores: dict[str, float] = {}
|
||||||
|
for idx, runs in grouped.items():
|
||||||
|
if ctx.select_best and len(runs) > 1:
|
||||||
|
group = groups[idx]
|
||||||
|
best = await _select_best_score_run(call_llm, group, runs, ctx)
|
||||||
|
scores.update(best)
|
||||||
|
else:
|
||||||
|
scores.update(_merge_score_runs(runs))
|
||||||
|
return scores
|
||||||
|
|
||||||
|
|
||||||
|
async def _score_chunk_group(call_llm: Callable[..., Any], group: list[dict[str, Any]], question: str, sub_questions: list[str]) -> dict[str, float]:
|
||||||
|
prompt = (
|
||||||
|
prompts.CHUNK_SCORE_PROMPT
|
||||||
|
+ "\nQuestion: "
|
||||||
|
+ question
|
||||||
|
+ "\nSubQuestions: "
|
||||||
|
+ json.dumps(sub_questions)
|
||||||
|
+ "\nChunks: "
|
||||||
|
+ json.dumps(group)
|
||||||
|
)
|
||||||
|
raw = await call_llm(prompts.RETRIEVER_SYSTEM, prompt, model=None, tag="chunk_score")
|
||||||
|
data = _parse_json_list(raw)
|
||||||
|
scored: dict[str, float] = {}
|
||||||
|
for entry in data:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
cid = str(entry.get("id") or "").strip()
|
||||||
|
if not cid:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
score = float(entry.get("score") or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
score = 0.0
|
||||||
|
scored[cid] = score
|
||||||
|
return scored
|
||||||
|
|
||||||
|
|
||||||
|
async def _score_chunk_group_run(call_llm: Callable[..., Any], idx: int, group: list[dict[str, Any]], question: str, sub_questions: list[str]) -> tuple[int, dict[str, float]]:
|
||||||
|
return idx, await _score_chunk_group(call_llm, group, question, sub_questions)
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_score_runs(runs: list[dict[str, float]]) -> dict[str, float]:
|
||||||
|
if not runs:
|
||||||
|
return {}
|
||||||
|
totals: dict[str, float] = {}
|
||||||
|
counts: dict[str, int] = {}
|
||||||
|
for run in runs:
|
||||||
|
for key, value in run.items():
|
||||||
|
totals[key] = totals.get(key, 0.0) + float(value)
|
||||||
|
counts[key] = counts.get(key, 0) + 1
|
||||||
|
return {key: totals[key] / counts[key] for key in totals}
|
||||||
|
|
||||||
|
|
||||||
|
async def _select_best_score_run(call_llm: Callable[..., Any], group: list[dict[str, Any]], runs: list[dict[str, float]], ctx: ScoreContext) -> dict[str, float]:
|
||||||
|
if not runs:
|
||||||
|
return {}
|
||||||
|
prompt = (
|
||||||
|
prompts.RETRIEVER_SELECT_PROMPT
|
||||||
|
+ "\nQuestion: "
|
||||||
|
+ ctx.question
|
||||||
|
+ "\nSubQuestions: "
|
||||||
|
+ json.dumps(ctx.sub_questions)
|
||||||
|
+ "\nChunks: "
|
||||||
|
+ json.dumps(group)
|
||||||
|
+ "\nRuns: "
|
||||||
|
+ json.dumps(runs)
|
||||||
|
)
|
||||||
|
raw = await call_llm(prompts.RETRIEVER_SELECT_SYSTEM, prompt, model=ctx.fast_model, tag="chunk_select")
|
||||||
|
data = parse_json(raw)
|
||||||
|
idx = 0
|
||||||
|
if isinstance(data, dict):
|
||||||
|
try:
|
||||||
|
idx = int(data.get("selected_index") or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
idx = 0
|
||||||
|
if idx < 0 or idx >= len(runs):
|
||||||
|
idx = 0
|
||||||
|
return runs[idx]
|
||||||
|
|
||||||
|
|
||||||
|
def _keyword_hits(ranked: list[dict[str, Any]], head: dict[str, Any], keywords: list[str] | None) -> list[dict[str, Any]]:
|
||||||
|
if not keywords:
|
||||||
|
return []
|
||||||
|
lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
|
||||||
|
if not lowered:
|
||||||
|
return []
|
||||||
|
hits: list[dict[str, Any]] = []
|
||||||
|
for item in ranked:
|
||||||
|
if item is head:
|
||||||
|
continue
|
||||||
|
text = str(item.get("text") or "").lower()
|
||||||
|
if any(kw in text for kw in lowered):
|
||||||
|
hits.append(item)
|
||||||
|
return hits
|
||||||
|
|
||||||
|
|
||||||
|
def _select_chunks(chunks: list[dict[str, Any]], scores: dict[str, float], plan: ModePlan, keywords: list[str] | None = None, must_ids: list[str] | None = None) -> list[dict[str, Any]]:
|
||||||
|
if not chunks:
|
||||||
|
return []
|
||||||
|
ranked = sorted(chunks, key=lambda item: scores.get(item["id"], 0.0), reverse=True)
|
||||||
|
selected: list[dict[str, Any]] = [chunks[0]]
|
||||||
|
if _append_must_chunks(chunks, selected, must_ids, plan.chunk_top):
|
||||||
|
return selected
|
||||||
|
if _append_keyword_chunks(ranked, selected, keywords, plan.chunk_top):
|
||||||
|
return selected
|
||||||
|
_append_ranked_chunks(ranked, selected, plan.chunk_top)
|
||||||
|
return selected
|
||||||
|
|
||||||
|
|
||||||
|
def _append_must_chunks(chunks: list[dict[str, Any]], selected: list[dict[str, Any]], must_ids: list[str] | None, limit: int) -> bool:
|
||||||
|
if not must_ids:
|
||||||
|
return False
|
||||||
|
id_map = {item["id"]: item for item in chunks}
|
||||||
|
for cid in must_ids:
|
||||||
|
item = id_map.get(cid)
|
||||||
|
if item and item not in selected:
|
||||||
|
selected.append(item)
|
||||||
|
if len(selected) >= limit:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _append_keyword_chunks(ranked: list[dict[str, Any]], selected: list[dict[str, Any]], keywords: list[str] | None, limit: int) -> bool:
|
||||||
|
if not ranked:
|
||||||
|
return False
|
||||||
|
head = ranked[0]
|
||||||
|
for item in _keyword_hits(ranked, head, keywords):
|
||||||
|
if item not in selected:
|
||||||
|
selected.append(item)
|
||||||
|
if len(selected) >= limit:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _append_ranked_chunks(ranked: list[dict[str, Any]], selected: list[dict[str, Any]], limit: int) -> None:
|
||||||
|
for item in ranked:
|
||||||
|
if len(selected) >= limit:
|
||||||
|
break
|
||||||
|
if item not in selected:
|
||||||
|
selected.append(item)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_runbooks(runbooks: list[str]) -> str:
|
||||||
|
if not runbooks:
|
||||||
|
return ""
|
||||||
|
return "Relevant runbooks:\n" + "\n".join([f"- {item}" for item in runbooks])
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||||
267
atlasbot/engine/answerer/engine.py
Normal file
267
atlasbot/engine/answerer/engine.py
Normal file
@ -0,0 +1,267 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Callable
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from atlasbot.config import Settings
|
||||||
|
from atlasbot.knowledge.loader import KnowledgeBase
|
||||||
|
from atlasbot.llm import prompts
|
||||||
|
from atlasbot.llm.client import LLMClient, build_messages
|
||||||
|
from atlasbot.snapshot.builder import SnapshotProvider
|
||||||
|
from atlasbot.state.store import ClaimStore
|
||||||
|
|
||||||
|
from ._base import *
|
||||||
|
from .common import *
|
||||||
|
from .factsheet import *
|
||||||
|
from .post import *
|
||||||
|
from .post_ext import *
|
||||||
|
from .retrieval import *
|
||||||
|
from .retrieval_ext import *
|
||||||
|
from .spine import *
|
||||||
|
from .workflow import run_answer
|
||||||
|
|
||||||
|
|
||||||
|
class AnswerEngine:
|
||||||
|
"""Coordinate Atlas question answering across snapshots, KB, and LLMs.
|
||||||
|
|
||||||
|
Why:
|
||||||
|
- keep the public answer surface in one place while the retrieval and
|
||||||
|
post-processing helpers stay split across smaller modules.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, settings: Settings, llm: LLMClient, kb: KnowledgeBase, snapshot: SnapshotProvider) -> None:
|
||||||
|
self._settings = settings
|
||||||
|
self._llm = llm
|
||||||
|
self._kb = kb
|
||||||
|
self._snapshot = snapshot
|
||||||
|
self._store = ClaimStore(settings.state_db_path, settings.conversation_ttl_sec)
|
||||||
|
|
||||||
|
async def answer(self, question: str, *, mode: str, history: list[dict[str, str]] | None = None, observer: Callable[[str, str], None] | None = None, conversation_id: str | None = None, snapshot_pin: bool | None = None) -> AnswerResult:
|
||||||
|
"""Answer a question by delegating to the staged workflow."""
|
||||||
|
|
||||||
|
return await run_answer(
|
||||||
|
self,
|
||||||
|
question,
|
||||||
|
mode=mode,
|
||||||
|
history=history,
|
||||||
|
observer=observer,
|
||||||
|
conversation_id=conversation_id,
|
||||||
|
snapshot_pin=snapshot_pin,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _answer_stock(self, question: str) -> AnswerResult:
|
||||||
|
messages = build_messages(prompts.STOCK_SYSTEM, question)
|
||||||
|
reply = await self._llm.chat(messages, model=self._settings.ollama_model)
|
||||||
|
return AnswerResult(reply, _default_scores(), {"mode": "stock"})
|
||||||
|
|
||||||
|
async def _synthesize_answer(self, question: str, subanswers: list[str], context: str, classify: dict[str, Any], plan: ModePlan, call_llm: Callable[..., Any]) -> str:
|
||||||
|
style_hint = _style_hint(classify)
|
||||||
|
if not subanswers:
|
||||||
|
prompt = (
|
||||||
|
prompts.SYNTHESIZE_PROMPT
|
||||||
|
+ "\nQuestion: "
|
||||||
|
+ question
|
||||||
|
+ "\nStyle: "
|
||||||
|
+ style_hint
|
||||||
|
+ "\nQuestionType: "
|
||||||
|
+ (classify.get("question_type") or "unknown")
|
||||||
|
)
|
||||||
|
return await call_llm(prompts.SYNTHESIZE_SYSTEM, prompt, context=context, model=plan.model, tag="synth")
|
||||||
|
draft_prompts = []
|
||||||
|
for idx in range(plan.drafts):
|
||||||
|
draft_prompts.append(
|
||||||
|
prompts.SYNTHESIZE_PROMPT
|
||||||
|
+ "\nQuestion: "
|
||||||
|
+ question
|
||||||
|
+ "\nStyle: "
|
||||||
|
+ style_hint
|
||||||
|
+ "\nQuestionType: "
|
||||||
|
+ (classify.get("question_type") or "unknown")
|
||||||
|
+ "\nSubanswers:\n"
|
||||||
|
+ "\n".join([f"- {item}" for item in subanswers])
|
||||||
|
+ f"\nDraftIndex: {idx + 1}"
|
||||||
|
)
|
||||||
|
drafts: list[str] = []
|
||||||
|
if plan.parallelism > 1 and len(draft_prompts) > 1:
|
||||||
|
drafts = await _gather_limited(
|
||||||
|
[
|
||||||
|
call_llm(
|
||||||
|
prompts.SYNTHESIZE_SYSTEM,
|
||||||
|
prompt,
|
||||||
|
context=context,
|
||||||
|
model=plan.model,
|
||||||
|
tag="synth",
|
||||||
|
)
|
||||||
|
for prompt in draft_prompts
|
||||||
|
],
|
||||||
|
plan.parallelism,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
for prompt in draft_prompts:
|
||||||
|
drafts.append(
|
||||||
|
await call_llm(
|
||||||
|
prompts.SYNTHESIZE_SYSTEM,
|
||||||
|
prompt,
|
||||||
|
context=context,
|
||||||
|
model=plan.model,
|
||||||
|
tag="synth",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if len(drafts) == 1:
|
||||||
|
return drafts[0]
|
||||||
|
select_prompt = (
|
||||||
|
prompts.DRAFT_SELECT_PROMPT
|
||||||
|
+ "\nQuestion: "
|
||||||
|
+ question
|
||||||
|
+ "\nDrafts:\n"
|
||||||
|
+ "\n\n".join([f"Draft {idx + 1}: {text}" for idx, text in enumerate(drafts)])
|
||||||
|
)
|
||||||
|
select_raw = await call_llm(prompts.CRITIC_SYSTEM, select_prompt, context=context, model=plan.fast_model, tag="draft_select")
|
||||||
|
selection = _parse_json_block(select_raw, fallback={})
|
||||||
|
idx = int(selection.get("best", 1)) - 1
|
||||||
|
if 0 <= idx < len(drafts):
|
||||||
|
return drafts[idx]
|
||||||
|
return drafts[0]
|
||||||
|
|
||||||
|
async def _score_answer(self, question: str, reply: str, plan: ModePlan, call_llm: Callable[..., Any]) -> AnswerScores:
|
||||||
|
if not plan.use_scores:
|
||||||
|
return _default_scores()
|
||||||
|
prompt = prompts.SCORE_PROMPT + "\nQuestion: " + question + "\nAnswer: " + reply
|
||||||
|
raw = await call_llm(prompts.SCORE_SYSTEM, prompt, model=plan.fast_model, tag="score")
|
||||||
|
data = _parse_json_block(raw, fallback={})
|
||||||
|
return _scores_from_json(data)
|
||||||
|
|
||||||
|
async def _extract_claims(self, question: str, reply: str, summary: dict[str, Any], facts_used: list[str], call_llm: Callable[..., Any]) -> list[ClaimItem]:
|
||||||
|
if not reply or not summary:
|
||||||
|
return []
|
||||||
|
summary_json = _json_excerpt(summary)
|
||||||
|
facts_used = [line.strip() for line in (facts_used or []) if line and line.strip()]
|
||||||
|
facts_block = ""
|
||||||
|
if facts_used:
|
||||||
|
facts_block = "\nFactsUsed:\n" + "\n".join([f"- {line}" for line in facts_used[:12]])
|
||||||
|
prompt = prompts.CLAIM_MAP_PROMPT + "\nQuestion: " + question + "\nAnswer: " + reply + facts_block
|
||||||
|
raw = await call_llm(
|
||||||
|
prompts.CLAIM_SYSTEM,
|
||||||
|
prompt,
|
||||||
|
context=f"SnapshotSummaryJson:{summary_json}",
|
||||||
|
model=self._settings.ollama_model_fast,
|
||||||
|
tag="claim_map",
|
||||||
|
)
|
||||||
|
data = _parse_json_block(raw, fallback={})
|
||||||
|
claims_raw = data.get("claims") if isinstance(data, dict) else None
|
||||||
|
claims: list[ClaimItem] = []
|
||||||
|
if isinstance(claims_raw, list):
|
||||||
|
for entry in claims_raw:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
claim_text = str(entry.get("claim") or "").strip()
|
||||||
|
claim_id = str(entry.get("id") or "").strip() or f"c{len(claims)+1}"
|
||||||
|
evidence_items: list[EvidenceItem] = []
|
||||||
|
for ev in entry.get("evidence") or []:
|
||||||
|
if not isinstance(ev, dict):
|
||||||
|
continue
|
||||||
|
path = str(ev.get("path") or "").strip()
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
reason = str(ev.get("reason") or "").strip()
|
||||||
|
value = _resolve_path(summary, path)
|
||||||
|
evidence_items.append(EvidenceItem(path=path, reason=reason, value=value, value_at_claim=value))
|
||||||
|
if claim_text and evidence_items:
|
||||||
|
claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
|
||||||
|
return claims
|
||||||
|
|
||||||
|
async def _dedup_reply(self, reply: str, plan: ModePlan, call_llm: Callable[..., Any], tag: str) -> str:
|
||||||
|
if not _needs_dedup(reply):
|
||||||
|
return reply
|
||||||
|
dedup_prompt = prompts.DEDUP_PROMPT + "\nDraft: " + reply
|
||||||
|
return await call_llm(prompts.DEDUP_SYSTEM, dedup_prompt, model=plan.fast_model, tag=tag)
|
||||||
|
|
||||||
|
async def _answer_followup(self, question: str, state: ConversationState, summary: dict[str, Any], classify: dict[str, Any], plan: ModePlan, call_llm: Callable[..., Any]) -> str: # noqa: C901, ARG002
|
||||||
|
claim_ids = await self._select_claims(question, state.claims, plan, call_llm)
|
||||||
|
selected = [claim for claim in state.claims if claim.id in claim_ids] if claim_ids else state.claims[:2]
|
||||||
|
evidence_lines = []
|
||||||
|
lowered = question.lower()
|
||||||
|
for claim in selected:
|
||||||
|
evidence_lines.append(f"Claim: {claim.claim}")
|
||||||
|
for ev in claim.evidence:
|
||||||
|
current = _resolve_path(summary, ev.path)
|
||||||
|
ev.value = current
|
||||||
|
delta_note = ""
|
||||||
|
if ev.value_at_claim is not None and current is not None and current != ev.value_at_claim:
|
||||||
|
delta_note = f" (now {current})"
|
||||||
|
evidence_lines.append(f"- {ev.path}: {ev.value_at_claim}{delta_note}")
|
||||||
|
if any(term in lowered for term in ("hotspot", "hot spot", "hottest", "jetson", "rpi", "amd64", "arm64", "hardware", "class")):
|
||||||
|
hotspot_lines = _hotspot_evidence(summary)
|
||||||
|
if hotspot_lines:
|
||||||
|
evidence_lines.append("HotspotSummary:")
|
||||||
|
evidence_lines.extend(hotspot_lines)
|
||||||
|
evidence_ctx = "\n".join(evidence_lines)
|
||||||
|
prompt = prompts.FOLLOWUP_PROMPT + "\nFollow-up: " + question + "\nEvidence:\n" + evidence_ctx
|
||||||
|
reply = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup")
|
||||||
|
allowed_nodes = _allowed_nodes(summary)
|
||||||
|
allowed_namespaces = _allowed_namespaces(summary)
|
||||||
|
unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
|
||||||
|
unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
|
||||||
|
extra_bits = []
|
||||||
|
if unknown_nodes:
|
||||||
|
extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
|
||||||
|
if unknown_namespaces:
|
||||||
|
extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
|
||||||
|
if allowed_nodes:
|
||||||
|
extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
|
||||||
|
if allowed_namespaces:
|
||||||
|
extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
|
||||||
|
if extra_bits:
|
||||||
|
fix_prompt = (
|
||||||
|
prompts.EVIDENCE_FIX_PROMPT
|
||||||
|
+ "\nQuestion: "
|
||||||
|
+ question
|
||||||
|
+ "\nDraft: "
|
||||||
|
+ reply
|
||||||
|
+ "\n"
|
||||||
|
+ "\n".join(extra_bits)
|
||||||
|
)
|
||||||
|
reply = await call_llm(
|
||||||
|
prompts.EVIDENCE_FIX_SYSTEM,
|
||||||
|
fix_prompt,
|
||||||
|
context="Evidence:\n" + evidence_ctx,
|
||||||
|
model=plan.model,
|
||||||
|
tag="followup_fix",
|
||||||
|
)
|
||||||
|
reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup_followup")
|
||||||
|
reply = _strip_followup_meta(reply)
|
||||||
|
return reply
|
||||||
|
|
||||||
|
async def _select_claims(self, question: str, claims: list[ClaimItem], plan: ModePlan, call_llm: Callable[..., Any]) -> list[str]:
|
||||||
|
if not claims:
|
||||||
|
return []
|
||||||
|
claims_brief = [{"id": claim.id, "claim": claim.claim} for claim in claims]
|
||||||
|
prompt = prompts.SELECT_CLAIMS_PROMPT + "\nFollow-up: " + question + "\nClaims: " + json.dumps(claims_brief)
|
||||||
|
raw = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.fast_model, tag="select_claims")
|
||||||
|
data = _parse_json_block(raw, fallback={})
|
||||||
|
ids = data.get("claim_ids") if isinstance(data, dict) else []
|
||||||
|
if isinstance(ids, list):
|
||||||
|
return [str(item) for item in ids if item]
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _get_state(self, conversation_id: str | None) -> ConversationState | None:
|
||||||
|
if not conversation_id:
|
||||||
|
return None
|
||||||
|
state_payload = self._store.get(conversation_id)
|
||||||
|
return _state_from_payload(state_payload) if state_payload else None
|
||||||
|
|
||||||
|
def _store_state(self, conversation_id: str, claims: list[ClaimItem], summary: dict[str, Any], snapshot: dict[str, Any] | None, pin_snapshot: bool) -> None:
|
||||||
|
snapshot_id = _snapshot_id(summary)
|
||||||
|
pinned_snapshot = snapshot if pin_snapshot else None
|
||||||
|
payload = {
|
||||||
|
"updated_at": time.monotonic(),
|
||||||
|
"claims": _claims_to_payload(claims),
|
||||||
|
"snapshot_id": snapshot_id,
|
||||||
|
"snapshot": pinned_snapshot,
|
||||||
|
}
|
||||||
|
self._store.set(conversation_id, payload)
|
||||||
|
|
||||||
|
def _cleanup_state(self) -> None:
|
||||||
|
self._store.cleanup()
|
||||||
189
atlasbot/engine/answerer/factsheet.py
Normal file
189
atlasbot/engine/answerer/factsheet.py
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from ._base import *
|
||||||
|
|
||||||
|
MAX_FACT_LINE_CHARS = 180
|
||||||
|
MAX_KB_LINE_CHARS = 220
|
||||||
|
|
||||||
|
|
||||||
|
def _factsheet_kb_chars(mode: str, default_chars: int) -> int:
|
||||||
|
if mode == "genius":
|
||||||
|
return min(max(default_chars, 4000), 6000)
|
||||||
|
if mode == "smart":
|
||||||
|
return min(max(default_chars, 3000), 4500)
|
||||||
|
return max(1200, default_chars)
|
||||||
|
|
||||||
|
|
||||||
|
def _factsheet_line_limit(mode: str) -> int:
|
||||||
|
if mode == "genius":
|
||||||
|
return 30
|
||||||
|
if mode == "smart":
|
||||||
|
return 22
|
||||||
|
return 14
|
||||||
|
|
||||||
|
|
||||||
|
def _factsheet_instruction(mode: str) -> str:
|
||||||
|
if mode == "genius":
|
||||||
|
return (
|
||||||
|
"Start with a direct conclusion, then include the strongest supporting facts and one caveat. "
|
||||||
|
"Keep it to 4-8 sentences. If data is missing, name the missing metric explicitly."
|
||||||
|
)
|
||||||
|
if mode == "smart":
|
||||||
|
return (
|
||||||
|
"Start with a direct conclusion and support it with key facts. Keep it to 2-5 sentences. "
|
||||||
|
"If data is missing, say exactly what is missing and suggest atlas-genius."
|
||||||
|
)
|
||||||
|
return "Keep it to 1-3 sentences. If key data is missing, say what is missing and suggest atlas-smart."
|
||||||
|
|
||||||
|
|
||||||
|
def _factsheet_model(mode: str, plan: ModePlan) -> str:
|
||||||
|
if mode in {"quick", "fast"}:
|
||||||
|
return plan.fast_model
|
||||||
|
return plan.model
|
||||||
|
|
||||||
|
|
||||||
|
def _is_plain_math_question(question: str) -> bool:
|
||||||
|
lowered = question.lower().strip()
|
||||||
|
if not lowered:
|
||||||
|
return False
|
||||||
|
cluster_markers = (
|
||||||
|
"titan",
|
||||||
|
"atlas",
|
||||||
|
"cluster",
|
||||||
|
"node",
|
||||||
|
"pod",
|
||||||
|
"namespace",
|
||||||
|
"workload",
|
||||||
|
"grafana",
|
||||||
|
"alert",
|
||||||
|
"k8s",
|
||||||
|
"kubernetes",
|
||||||
|
"rpi",
|
||||||
|
"longhorn",
|
||||||
|
"postgres",
|
||||||
|
"victoria",
|
||||||
|
"ollama",
|
||||||
|
)
|
||||||
|
if any(token in lowered for token in cluster_markers):
|
||||||
|
return False
|
||||||
|
return bool(
|
||||||
|
re.fullmatch(r"[0-9\s+\-*/().=]+", lowered)
|
||||||
|
or re.search(r"\bwhat(?:'s| is)\s+\d+\s*[-+*/]\s*\d+\b", lowered)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _quick_fact_sheet_lines(question: str, summary_lines: list[str], kb_lines: list[str], *, limit: int) -> list[str]: # noqa: C901
|
||||||
|
tokens = {
|
||||||
|
token
|
||||||
|
for token in re.findall(r"[a-z0-9][a-z0-9_-]{2,}", question.lower())
|
||||||
|
if token not in GENERIC_METRIC_TOKENS
|
||||||
|
}
|
||||||
|
priority_markers = (
|
||||||
|
"snapshot:",
|
||||||
|
"nodes_total",
|
||||||
|
"nodes_ready",
|
||||||
|
"nodes_not_ready",
|
||||||
|
"workers_ready",
|
||||||
|
"workers_not_ready",
|
||||||
|
"control_plane",
|
||||||
|
"worker_nodes",
|
||||||
|
"hottest",
|
||||||
|
"postgres",
|
||||||
|
"pods",
|
||||||
|
"longhorn",
|
||||||
|
"titan-",
|
||||||
|
"rpi5",
|
||||||
|
"rpi4",
|
||||||
|
"jetson",
|
||||||
|
"amd64",
|
||||||
|
)
|
||||||
|
scored: list[tuple[int, str]] = []
|
||||||
|
for raw in summary_lines:
|
||||||
|
line = raw.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
lowered = line.lower()
|
||||||
|
score = 0
|
||||||
|
if any(marker in lowered for marker in priority_markers):
|
||||||
|
score += 4
|
||||||
|
overlap = sum(1 for token in tokens if token in lowered)
|
||||||
|
score += overlap * 3
|
||||||
|
if len(line) <= MAX_FACT_LINE_CHARS:
|
||||||
|
score += 1
|
||||||
|
if score > 0:
|
||||||
|
scored.append((score, line))
|
||||||
|
|
||||||
|
scored.sort(key=lambda item: item[0], reverse=True)
|
||||||
|
selected = [line for _, line in scored[:limit]]
|
||||||
|
if not selected:
|
||||||
|
selected = [line.strip() for line in summary_lines if line.strip()][:limit]
|
||||||
|
|
||||||
|
kb_selected: list[str] = []
|
||||||
|
for raw in kb_lines:
|
||||||
|
line = raw.strip()
|
||||||
|
if not line or len(line) > MAX_KB_LINE_CHARS:
|
||||||
|
continue
|
||||||
|
lowered = line.lower()
|
||||||
|
if "kb file:" in lowered or "kb: atlas.json" in lowered:
|
||||||
|
continue
|
||||||
|
overlap = sum(1 for token in tokens if token in lowered)
|
||||||
|
if overlap > 0 or any(marker in lowered for marker in ("runbook", "titan-", "rpi5", "rpi4", "amd64", "jetson")):
|
||||||
|
kb_selected.append(line)
|
||||||
|
if len(kb_selected) >= max(4, limit // 3):
|
||||||
|
break
|
||||||
|
|
||||||
|
merged = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for line in selected + kb_selected:
|
||||||
|
if line not in seen:
|
||||||
|
seen.add(line)
|
||||||
|
merged.append(line)
|
||||||
|
if len(merged) >= limit:
|
||||||
|
break
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def _quick_fact_sheet_text(lines: list[str]) -> str:
|
||||||
|
if not lines:
|
||||||
|
return "Fact Sheet:\n- No snapshot facts available."
|
||||||
|
body = "\n".join([f"- {line}" for line in lines])
|
||||||
|
return "Fact Sheet:\n" + body
|
||||||
|
|
||||||
|
|
||||||
|
def _quick_fact_sheet_heuristic_answer(question: str, fact_lines: list[str]) -> str:
|
||||||
|
lowered = question.lower()
|
||||||
|
if (
|
||||||
|
any(token in lowered for token in ("placement", "schedule", "last resort", "last-resort"))
|
||||||
|
and any(token in lowered for token in ("node", "workload", "worker", "titan"))
|
||||||
|
):
|
||||||
|
return (
|
||||||
|
"General workload placement is: prefer rpi5 workers first, then rpi4 workers. "
|
||||||
|
"titan-22 is the last-resort general compute node, and titan-24 is the absolute last resort "
|
||||||
|
"reserved for heavy one-offs."
|
||||||
|
)
|
||||||
|
|
||||||
|
for line in fact_lines:
|
||||||
|
compact = line.replace(" ", "")
|
||||||
|
match = re.search(r"nodes_total[:=](\d+),ready[:=](\d+),not_ready[:=](\d+)", compact)
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
total = match.group(1)
|
||||||
|
ready = match.group(2)
|
||||||
|
not_ready = match.group(3)
|
||||||
|
if "how many" in lowered and "ready" in lowered and "node" in lowered:
|
||||||
|
return f"The latest snapshot shows {ready} ready nodes out of {total} total ({not_ready} not ready)."
|
||||||
|
if ("not ready" in lowered or "unready" in lowered) and "node" in lowered:
|
||||||
|
return f"The latest snapshot shows {not_ready} not-ready nodes ({ready} ready out of {total} total)."
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _json_excerpt(summary: dict[str, Any], max_chars: int = 12000) -> str:
|
||||||
|
raw = json.dumps(summary, ensure_ascii=False)
|
||||||
|
return raw[:max_chars]
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||||
459
atlasbot/engine/answerer/post.py
Normal file
459
atlasbot/engine/answerer/post.py
Normal file
@ -0,0 +1,459 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from atlasbot.llm import prompts
|
||||||
|
from atlasbot.llm.client import parse_json
|
||||||
|
|
||||||
|
from ._base import *
|
||||||
|
from .retrieval_ext import _dedupe_lines
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_fact_lines(primary: list[str], fallback: list[str]) -> list[str]:
|
||||||
|
merged: list[str] = []
|
||||||
|
for line in primary + fallback:
|
||||||
|
value = (line or "").strip()
|
||||||
|
if value and value not in merged:
|
||||||
|
merged.append(value)
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_unknown_entities(reply: str, unknown_nodes: list[str], unknown_namespaces: list[str]) -> str:
|
||||||
|
if not reply:
|
||||||
|
return reply
|
||||||
|
if not unknown_nodes and not unknown_namespaces:
|
||||||
|
return reply
|
||||||
|
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", reply) if s.strip()]
|
||||||
|
if not sentences:
|
||||||
|
return reply
|
||||||
|
lowered_nodes = [node.lower() for node in unknown_nodes]
|
||||||
|
lowered_namespaces = [ns.lower() for ns in unknown_namespaces]
|
||||||
|
kept: list[str] = []
|
||||||
|
for sent in sentences:
|
||||||
|
lower = sent.lower()
|
||||||
|
if lowered_nodes and any(node in lower for node in lowered_nodes):
|
||||||
|
continue
|
||||||
|
if lowered_namespaces and any(f"namespace {ns}" in lower for ns in lowered_namespaces):
|
||||||
|
continue
|
||||||
|
kept.append(sent)
|
||||||
|
cleaned = " ".join(kept).strip()
|
||||||
|
return cleaned or reply
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_evidence_guard(reply: str, facts: list[str]) -> bool:
|
||||||
|
if not reply or not facts:
|
||||||
|
return False
|
||||||
|
lower_reply = reply.lower()
|
||||||
|
fact_text = " ".join(facts).lower()
|
||||||
|
node_pattern = re.compile(r"\b(titan-[0-9a-z]+|node-?\d+)\b", re.IGNORECASE)
|
||||||
|
nodes = {m.group(1).lower() for m in node_pattern.finditer(reply)}
|
||||||
|
if nodes:
|
||||||
|
missing = [node for node in nodes if node not in fact_text]
|
||||||
|
if missing:
|
||||||
|
return True
|
||||||
|
pressure_terms = ("pressure", "diskpressure", "memorypressure", "pidpressure", "headroom")
|
||||||
|
if any(term in lower_reply for term in pressure_terms) and not any(term in fact_text for term in pressure_terms):
|
||||||
|
return True
|
||||||
|
arch_terms = ("amd64", "arm64", "rpi", "rpi4", "rpi5", "jetson")
|
||||||
|
return any(term in lower_reply for term in arch_terms) and not any(term in fact_text for term in arch_terms)
|
||||||
|
|
||||||
|
|
||||||
|
async def _contradiction_decision(ctx: ContradictionContext, attempts: int = 1) -> dict[str, Any]:
|
||||||
|
best = {"use_facts": True, "confidence": 50}
|
||||||
|
facts_block = "\n".join(ctx.facts[:12])
|
||||||
|
for idx in range(max(1, attempts)):
|
||||||
|
variant = f"Variant: {idx + 1}" if attempts > 1 else ""
|
||||||
|
prompt = (
|
||||||
|
prompts.CONTRADICTION_PROMPT.format(question=ctx.question, draft=ctx.reply, facts=facts_block)
|
||||||
|
+ ("\n" + variant if variant else "")
|
||||||
|
)
|
||||||
|
raw = await ctx.call_llm(
|
||||||
|
prompts.CONTRADICTION_SYSTEM,
|
||||||
|
prompt,
|
||||||
|
model=ctx.plan.fast_model,
|
||||||
|
tag="contradiction",
|
||||||
|
)
|
||||||
|
data = _parse_json_block(raw, fallback={})
|
||||||
|
try:
|
||||||
|
confidence = int(data.get("confidence", 50))
|
||||||
|
except Exception:
|
||||||
|
confidence = 50
|
||||||
|
use_facts = bool(data.get("use_facts", True))
|
||||||
|
if confidence >= best.get("confidence", 0):
|
||||||
|
best = {"use_facts": use_facts, "confidence": confidence}
|
||||||
|
return best
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines: int) -> list[str]:
|
||||||
|
if not lines:
|
||||||
|
return []
|
||||||
|
tokens = _expand_tokens(keywords)
|
||||||
|
if not tokens:
|
||||||
|
return lines[:max_lines]
|
||||||
|
filtered = [line for line in lines if any(tok in line.lower() for tok in tokens)]
|
||||||
|
return (filtered or lines)[:max_lines]
|
||||||
|
|
||||||
|
|
||||||
|
def _rank_metric_lines(lines: list[str], tokens: set[str], max_lines: int) -> list[str]:
|
||||||
|
if not lines or not tokens:
|
||||||
|
return []
|
||||||
|
ranked: list[tuple[int, int, str]] = []
|
||||||
|
for line in lines:
|
||||||
|
lower = line.lower()
|
||||||
|
hits = sum(1 for tok in tokens if tok in lower)
|
||||||
|
if not hits:
|
||||||
|
continue
|
||||||
|
has_number = 1 if re.search(r"\d", line) else 0
|
||||||
|
ranked.append((has_number, hits, line))
|
||||||
|
ranked.sort(key=lambda item: (-item[0], -item[1], item[2]))
|
||||||
|
return [item[2] for item in ranked[:max_lines]]
|
||||||
|
|
||||||
|
|
||||||
|
def _select_metric_line(lines: list[str], question: str, tokens: list[str] | set[str]) -> str | None:
|
||||||
|
if not lines or not tokens:
|
||||||
|
return None
|
||||||
|
token_set = {str(tok).lower() for tok in tokens if tok}
|
||||||
|
ranked = _rank_metric_lines(lines, token_set, max_lines=6)
|
||||||
|
if not ranked:
|
||||||
|
return None
|
||||||
|
question_lower = (question or "").lower()
|
||||||
|
if any(term in question_lower for term in ("how many", "count", "total")):
|
||||||
|
for line in ranked:
|
||||||
|
lower = line.lower()
|
||||||
|
if "total" in lower or "count" in lower:
|
||||||
|
return line
|
||||||
|
return ranked[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _format_direct_metric_line(line: str) -> str:
|
||||||
|
if not line:
|
||||||
|
return ""
|
||||||
|
if ":" in line:
|
||||||
|
formatted = _format_colon_metric(line)
|
||||||
|
if formatted:
|
||||||
|
return formatted
|
||||||
|
if "=" in line:
|
||||||
|
formatted = _format_equals_metric(line)
|
||||||
|
if formatted:
|
||||||
|
return formatted
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
def _format_colon_metric(line: str) -> str | None:
|
||||||
|
key, value = line.split(":", 1)
|
||||||
|
key = key.strip().replace("_", " ")
|
||||||
|
value = value.strip()
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
if key == "nodes":
|
||||||
|
formatted = _format_nodes_value(value)
|
||||||
|
if formatted:
|
||||||
|
return formatted
|
||||||
|
if key in {"nodes total", "nodes_total"}:
|
||||||
|
return f"Atlas has {value} total nodes."
|
||||||
|
return f"{key} is {value}."
|
||||||
|
|
||||||
|
|
||||||
|
def _format_equals_metric(line: str) -> str | None:
|
||||||
|
pairs: list[str] = []
|
||||||
|
for part in line.split(","):
|
||||||
|
if "=" not in part:
|
||||||
|
continue
|
||||||
|
key, value = part.split("=", 1)
|
||||||
|
key = key.strip().replace("_", " ")
|
||||||
|
value = value.strip()
|
||||||
|
if not value:
|
||||||
|
continue
|
||||||
|
if key in {"nodes total", "nodes_total"}:
|
||||||
|
return f"Atlas has {value} total nodes."
|
||||||
|
pairs.append(f"{key} is {value}")
|
||||||
|
if not pairs:
|
||||||
|
return None
|
||||||
|
if len(pairs) == 1:
|
||||||
|
return f"{pairs[0]}."
|
||||||
|
return "; ".join(pairs) + "."
|
||||||
|
|
||||||
|
|
||||||
|
def _format_nodes_value(value: str) -> str | None:
|
||||||
|
parts = [p.strip() for p in value.split(",") if p.strip()]
|
||||||
|
total = None
|
||||||
|
rest: list[str] = []
|
||||||
|
for part in parts:
|
||||||
|
if part.startswith("total="):
|
||||||
|
total = part.split("=", 1)[1]
|
||||||
|
else:
|
||||||
|
rest.append(part.replace("_", " "))
|
||||||
|
if not total:
|
||||||
|
return None
|
||||||
|
if rest:
|
||||||
|
return f"Atlas has {total} total nodes ({'; '.join(rest)})."
|
||||||
|
return f"Atlas has {total} total nodes."
|
||||||
|
|
||||||
|
|
||||||
|
def _global_facts(lines: list[str]) -> list[str]:
|
||||||
|
if not lines:
|
||||||
|
return []
|
||||||
|
wanted = ("nodes_total", "nodes_ready", "cluster_name", "cluster", "nodes_not_ready")
|
||||||
|
facts: list[str] = []
|
||||||
|
for line in lines:
|
||||||
|
lower = line.lower()
|
||||||
|
if any(key in lower for key in wanted):
|
||||||
|
facts.append(line)
|
||||||
|
return _dedupe_lines(facts, limit=6)
|
||||||
|
|
||||||
|
|
||||||
|
def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool:
|
||||||
|
if not lines or not keywords:
|
||||||
|
return False
|
||||||
|
tokens = _expand_tokens(keywords)
|
||||||
|
if not tokens:
|
||||||
|
return False
|
||||||
|
for line in lines:
|
||||||
|
lower = line.lower()
|
||||||
|
if any(tok in lower for tok in tokens):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_tokens(primary: list[str], secondary: list[str], third: list[str] | None = None) -> list[str]:
|
||||||
|
merged: list[str] = []
|
||||||
|
for token in primary + secondary + (third or []):
|
||||||
|
if not token:
|
||||||
|
continue
|
||||||
|
if token not in merged:
|
||||||
|
merged.append(token)
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_question_tokens(question: str) -> list[str]:
|
||||||
|
if not question:
|
||||||
|
return []
|
||||||
|
tokens: list[str] = []
|
||||||
|
for part in re.split(r"[^a-zA-Z0-9_-]+", question.lower()):
|
||||||
|
if len(part) < TOKEN_MIN_LEN:
|
||||||
|
continue
|
||||||
|
if part not in tokens:
|
||||||
|
tokens.append(part)
|
||||||
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
|
def _expand_tokens(tokens: list[str]) -> list[str]:
|
||||||
|
if not tokens:
|
||||||
|
return []
|
||||||
|
expanded: list[str] = []
|
||||||
|
for token in tokens:
|
||||||
|
if not isinstance(token, str):
|
||||||
|
continue
|
||||||
|
for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()):
|
||||||
|
if len(part) < TOKEN_MIN_LEN:
|
||||||
|
continue
|
||||||
|
if part not in expanded:
|
||||||
|
expanded.append(part)
|
||||||
|
return expanded
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_token_coverage(lines: list[str], tokens: list[str], summary_lines: list[str], max_add: int = 4) -> list[str]:
|
||||||
|
if not lines or not tokens or not summary_lines:
|
||||||
|
return lines
|
||||||
|
hay = " ".join(lines).lower()
|
||||||
|
missing = [tok for tok in tokens if tok and tok.lower() not in hay]
|
||||||
|
if not missing:
|
||||||
|
return lines
|
||||||
|
added: list[str] = []
|
||||||
|
for token in missing:
|
||||||
|
token_lower = token.lower()
|
||||||
|
for line in summary_lines:
|
||||||
|
if token_lower in line.lower() and line not in lines and line not in added:
|
||||||
|
added.append(line)
|
||||||
|
break
|
||||||
|
if len(added) >= max_add:
|
||||||
|
break
|
||||||
|
if not added:
|
||||||
|
return lines
|
||||||
|
return _merge_fact_lines(added, lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None:
|
||||||
|
if not lines or not keywords:
|
||||||
|
return None
|
||||||
|
tokens = _expand_tokens(keywords)
|
||||||
|
if not tokens:
|
||||||
|
return None
|
||||||
|
best = None
|
||||||
|
best_score = 0
|
||||||
|
for line in lines:
|
||||||
|
lower = line.lower()
|
||||||
|
score = sum(1 for tok in tokens if tok in lower)
|
||||||
|
if score > best_score:
|
||||||
|
best_score = score
|
||||||
|
best = line
|
||||||
|
return best if best_score > 0 else None
|
||||||
|
|
||||||
|
|
||||||
|
def _line_starting_with(lines: list[str], prefix: str) -> str | None:
|
||||||
|
if not lines or not prefix:
|
||||||
|
return None
|
||||||
|
lower_prefix = prefix.lower()
|
||||||
|
for line in lines:
|
||||||
|
if str(line).lower().startswith(lower_prefix):
|
||||||
|
return line
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _non_rpi_nodes(summary: dict[str, Any]) -> dict[str, list[str]]:
|
||||||
|
hardware = summary.get("hardware_by_node") if isinstance(summary, dict) else None
|
||||||
|
if not isinstance(hardware, dict):
|
||||||
|
return {}
|
||||||
|
grouped: dict[str, list[str]] = {}
|
||||||
|
for node, hw in hardware.items():
|
||||||
|
if not isinstance(node, str) or not isinstance(hw, str):
|
||||||
|
continue
|
||||||
|
if hw.startswith("rpi"):
|
||||||
|
continue
|
||||||
|
grouped.setdefault(hw, []).append(node)
|
||||||
|
for nodes in grouped.values():
|
||||||
|
nodes.sort()
|
||||||
|
return grouped
|
||||||
|
|
||||||
|
|
||||||
|
def _format_hardware_groups(groups: dict[str, list[str]], label: str) -> str:
|
||||||
|
if not groups:
|
||||||
|
return ""
|
||||||
|
parts = []
|
||||||
|
for hw, nodes in sorted(groups.items()):
|
||||||
|
parts.append(f"{hw} ({', '.join(nodes)})")
|
||||||
|
return f"{label}: " + "; ".join(parts) + "."
|
||||||
|
|
||||||
|
|
||||||
|
def _lexicon_context(summary: dict[str, Any]) -> str: # noqa: C901
|
||||||
|
if not isinstance(summary, dict):
|
||||||
|
return ""
|
||||||
|
lexicon = summary.get("lexicon")
|
||||||
|
if not isinstance(lexicon, dict):
|
||||||
|
return ""
|
||||||
|
terms = lexicon.get("terms")
|
||||||
|
aliases = lexicon.get("aliases")
|
||||||
|
lines: list[str] = []
|
||||||
|
if isinstance(terms, list):
|
||||||
|
for entry in terms[:8]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
term = entry.get("term")
|
||||||
|
meaning = entry.get("meaning")
|
||||||
|
if term and meaning:
|
||||||
|
lines.append(f"{term}: {meaning}")
|
||||||
|
if isinstance(aliases, dict):
|
||||||
|
for key, value in list(aliases.items())[:6]:
|
||||||
|
if key and value:
|
||||||
|
lines.append(f"alias {key} -> {value}")
|
||||||
|
if not lines:
|
||||||
|
return ""
|
||||||
|
return "Lexicon:\n" + "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
raw = text.strip()
|
||||||
|
match = re.search(r"\{.*\}", raw, flags=re.S)
|
||||||
|
if match:
|
||||||
|
return parse_json(match.group(0), fallback=fallback)
|
||||||
|
return parse_json(raw, fallback=fallback)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_json_list(text: str) -> list[dict[str, Any]]:
|
||||||
|
raw = text.strip()
|
||||||
|
match = re.search(r"\[.*\]", raw, flags=re.S)
|
||||||
|
data = parse_json(match.group(0), fallback={}) if match else parse_json(raw, fallback={})
|
||||||
|
if isinstance(data, list):
|
||||||
|
return [entry for entry in data if isinstance(entry, dict)]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _scores_from_json(data: dict[str, Any]) -> AnswerScores:
|
||||||
|
return AnswerScores(
|
||||||
|
confidence=_coerce_int(data.get("confidence"), 60),
|
||||||
|
relevance=_coerce_int(data.get("relevance"), 60),
|
||||||
|
satisfaction=_coerce_int(data.get("satisfaction"), 60),
|
||||||
|
hallucination_risk=str(data.get("hallucination_risk") or "medium"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_int(value: Any, default: int) -> int:
|
||||||
|
try:
|
||||||
|
return int(float(value))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _default_scores() -> AnswerScores:
|
||||||
|
return AnswerScores(confidence=60, relevance=60, satisfaction=60, hallucination_risk="medium")
|
||||||
|
|
||||||
|
|
||||||
|
def _style_hint(classify: dict[str, Any]) -> str:
|
||||||
|
style = (classify.get("answer_style") or "").strip().lower()
|
||||||
|
qtype = (classify.get("question_type") or "").strip().lower()
|
||||||
|
if style == "insightful" or qtype in {"open_ended", "planning"}:
|
||||||
|
return "insightful"
|
||||||
|
return "direct"
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
|
||||||
|
if not reply:
|
||||||
|
return False
|
||||||
|
lowered = reply.lower()
|
||||||
|
missing_markers = (
|
||||||
|
"don't have",
|
||||||
|
"do not have",
|
||||||
|
"don't know",
|
||||||
|
"cannot",
|
||||||
|
"can't",
|
||||||
|
"need to",
|
||||||
|
"would need",
|
||||||
|
"does not provide",
|
||||||
|
"does not mention",
|
||||||
|
"not mention",
|
||||||
|
"not provided",
|
||||||
|
"not in context",
|
||||||
|
"not referenced",
|
||||||
|
"missing",
|
||||||
|
"no specific",
|
||||||
|
"no information",
|
||||||
|
)
|
||||||
|
if classify.get("needs_snapshot") and any(marker in lowered for marker in missing_markers):
|
||||||
|
return True
|
||||||
|
return classify.get("question_type") in {"metric", "diagnostic"} and not re.search(r"\d", reply)
|
||||||
|
|
||||||
|
|
||||||
|
def _should_use_insight_guard(classify: dict[str, Any]) -> bool:
|
||||||
|
style = (classify.get("answer_style") or "").strip().lower()
|
||||||
|
qtype = (classify.get("question_type") or "").strip().lower()
|
||||||
|
return style == "insightful" or qtype in {"open_ended", "planning"}
|
||||||
|
|
||||||
|
|
||||||
|
async def _apply_insight_guard(inputs: InsightGuardInput) -> str:
|
||||||
|
if not inputs.reply or not _should_use_insight_guard(inputs.classify):
|
||||||
|
return inputs.reply
|
||||||
|
guard_prompt = prompts.INSIGHT_GUARD_PROMPT.format(question=inputs.question, answer=inputs.reply)
|
||||||
|
guard_raw = await inputs.call_llm(
|
||||||
|
prompts.INSIGHT_GUARD_SYSTEM,
|
||||||
|
guard_prompt,
|
||||||
|
context=inputs.context,
|
||||||
|
model=inputs.plan.fast_model,
|
||||||
|
tag="insight_guard",
|
||||||
|
)
|
||||||
|
guard = _parse_json_block(guard_raw, fallback={})
|
||||||
|
if guard.get("ok") is True:
|
||||||
|
return inputs.reply
|
||||||
|
fix_prompt = prompts.INSIGHT_FIX_PROMPT.format(question=inputs.question, answer=inputs.reply)
|
||||||
|
if inputs.facts:
|
||||||
|
fix_prompt = fix_prompt + "\nFacts:\n" + "\n".join(inputs.facts[:6])
|
||||||
|
return await inputs.call_llm(
|
||||||
|
prompts.INSIGHT_FIX_SYSTEM,
|
||||||
|
fix_prompt,
|
||||||
|
context=inputs.context,
|
||||||
|
model=inputs.plan.model,
|
||||||
|
tag="insight_fix",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||||
276
atlasbot/engine/answerer/post_ext.py
Normal file
276
atlasbot/engine/answerer/post_ext.py
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import difflib
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from ._base import *
|
||||||
|
|
||||||
|
|
||||||
|
def _reply_matches_metric_facts(reply: str, metric_facts: list[str], tokens: list[str] | set[str] | None = None) -> bool:
|
||||||
|
if not reply or not metric_facts:
|
||||||
|
return True
|
||||||
|
reply_numbers = set(re.findall(r"\d+(?:\\.\d+)?", reply))
|
||||||
|
if not reply_numbers:
|
||||||
|
return False
|
||||||
|
fact_numbers: set[str] = set()
|
||||||
|
value_pattern = re.compile(r"(?:>=|<=|=|:)\s*(\d+(?:\.\d+)?)")
|
||||||
|
filtered = metric_facts
|
||||||
|
if tokens:
|
||||||
|
token_set = {str(tok).lower() for tok in tokens if tok}
|
||||||
|
focused = []
|
||||||
|
for line in metric_facts:
|
||||||
|
key = line.split(":", 1)[0].lower()
|
||||||
|
if any(tok in key for tok in token_set):
|
||||||
|
focused.append(line)
|
||||||
|
if focused:
|
||||||
|
filtered = focused
|
||||||
|
for line in filtered:
|
||||||
|
for match in value_pattern.findall(line):
|
||||||
|
fact_numbers.add(match)
|
||||||
|
if not fact_numbers:
|
||||||
|
return False
|
||||||
|
return bool(reply_numbers & fact_numbers)
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_dedup(reply: str) -> bool:
|
||||||
|
if not reply:
|
||||||
|
return False
|
||||||
|
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", reply) if s.strip()]
|
||||||
|
if len(sentences) < DEDUP_MIN_SENTENCES:
|
||||||
|
return False
|
||||||
|
seen = set()
|
||||||
|
for sent in sentences:
|
||||||
|
norm = re.sub(r"\s+", " ", sent.lower())
|
||||||
|
if norm in seen:
|
||||||
|
return True
|
||||||
|
seen.add(norm)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> bool:
|
||||||
|
if not reply:
|
||||||
|
return False
|
||||||
|
q_lower = (question or "").lower()
|
||||||
|
if classify.get("question_type") not in {"metric", "diagnostic"} and not re.search(r"\b(how many|list|count)\b", q_lower):
|
||||||
|
return False
|
||||||
|
missing_markers = (
|
||||||
|
"does not provide",
|
||||||
|
"does not specify",
|
||||||
|
"not available",
|
||||||
|
"not provided",
|
||||||
|
"cannot determine",
|
||||||
|
"don't have",
|
||||||
|
"do not have",
|
||||||
|
"insufficient",
|
||||||
|
"no data",
|
||||||
|
)
|
||||||
|
if any(marker in reply.lower() for marker in missing_markers):
|
||||||
|
return True
|
||||||
|
if reply.count(".") <= 1:
|
||||||
|
return False
|
||||||
|
extra_markers = ("for more", "if you need", "additional", "based on")
|
||||||
|
return any(marker in reply.lower() for marker in extra_markers)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_keywords(raw_question: str, normalized: str, sub_questions: list[str], keywords: list[Any] | None) -> list[str]:
|
||||||
|
stopwords = {
|
||||||
|
"the",
|
||||||
|
"and",
|
||||||
|
"for",
|
||||||
|
"with",
|
||||||
|
"that",
|
||||||
|
"this",
|
||||||
|
"what",
|
||||||
|
"which",
|
||||||
|
"when",
|
||||||
|
"where",
|
||||||
|
"who",
|
||||||
|
"why",
|
||||||
|
"how",
|
||||||
|
"tell",
|
||||||
|
"show",
|
||||||
|
"list",
|
||||||
|
"give",
|
||||||
|
"about",
|
||||||
|
"right",
|
||||||
|
"now",
|
||||||
|
}
|
||||||
|
tokens: list[str] = []
|
||||||
|
for source in [raw_question, normalized, *sub_questions]:
|
||||||
|
for part in re.split(r"[^a-zA-Z0-9_-]+", source.lower()):
|
||||||
|
if len(part) < TOKEN_MIN_LEN or part in stopwords:
|
||||||
|
continue
|
||||||
|
tokens.append(part)
|
||||||
|
if keywords:
|
||||||
|
for kw in keywords:
|
||||||
|
if isinstance(kw, str):
|
||||||
|
part = kw.strip().lower()
|
||||||
|
if part and part not in stopwords and part not in tokens:
|
||||||
|
tokens.append(part)
|
||||||
|
return list(dict.fromkeys(tokens))[:12]
|
||||||
|
|
||||||
|
|
||||||
|
def _allowed_nodes(summary: dict[str, Any]) -> list[str]:
|
||||||
|
hardware = summary.get("hardware_by_node") if isinstance(summary.get("hardware_by_node"), dict) else {}
|
||||||
|
if hardware:
|
||||||
|
return sorted([node for node in hardware if isinstance(node, str)])
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _allowed_namespaces(summary: dict[str, Any]) -> list[str]:
|
||||||
|
namespaces: list[str] = []
|
||||||
|
for entry in summary.get("namespace_pods") or []:
|
||||||
|
if isinstance(entry, dict):
|
||||||
|
name = entry.get("namespace")
|
||||||
|
if name:
|
||||||
|
namespaces.append(str(name))
|
||||||
|
return sorted(set(namespaces))
|
||||||
|
|
||||||
|
|
||||||
|
def _find_unknown_nodes(reply: str, allowed: list[str]) -> list[str]:
|
||||||
|
if not reply or not allowed:
|
||||||
|
return []
|
||||||
|
pattern = re.compile(r"\b(titan-[0-9a-z]+|node-?\d+)\b", re.IGNORECASE)
|
||||||
|
found = {m.group(1) for m in pattern.finditer(reply)}
|
||||||
|
if not found:
|
||||||
|
return []
|
||||||
|
allowed_set = {a.lower() for a in allowed}
|
||||||
|
return sorted({item for item in found if item.lower() not in allowed_set})
|
||||||
|
|
||||||
|
|
||||||
|
def _find_unknown_namespaces(reply: str, allowed: list[str]) -> list[str]:
|
||||||
|
if not reply or not allowed:
|
||||||
|
return []
|
||||||
|
pattern = re.compile(r"\bnamespace\s+([a-z0-9-]+)\b", re.IGNORECASE)
|
||||||
|
found = {m.group(1) for m in pattern.finditer(reply)}
|
||||||
|
if not found:
|
||||||
|
return []
|
||||||
|
allowed_set = {a.lower() for a in allowed}
|
||||||
|
return sorted({item for item in found if item.lower() not in allowed_set})
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_runbook_fix(reply: str, allowed: list[str]) -> bool:
|
||||||
|
if not reply or not allowed:
|
||||||
|
return False
|
||||||
|
paths = set(re.findall(r"runbooks/[A-Za-z0-9._-]+", reply))
|
||||||
|
if not paths:
|
||||||
|
return False
|
||||||
|
allowed_set = {p.lower() for p in allowed}
|
||||||
|
return any(path.lower() not in allowed_set for path in paths)
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_runbook_reference(question: str, allowed: list[str], reply: str) -> bool:
|
||||||
|
if not allowed or not question:
|
||||||
|
return False
|
||||||
|
lowered = question.lower()
|
||||||
|
cues = ("runbook", "checklist", "documented", "documentation", "where", "guide")
|
||||||
|
if not any(cue in lowered for cue in cues):
|
||||||
|
return False
|
||||||
|
if not reply:
|
||||||
|
return True
|
||||||
|
for token in re.findall(r"runbooks/[A-Za-z0-9._-]+", reply):
|
||||||
|
if token.lower() in {p.lower() for p in allowed}:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _best_runbook_match(candidate: str, allowed: list[str]) -> str | None:
|
||||||
|
if not candidate or not allowed:
|
||||||
|
return None
|
||||||
|
best = None
|
||||||
|
best_score = 0.0
|
||||||
|
for path in allowed:
|
||||||
|
score = difflib.SequenceMatcher(a=candidate.lower(), b=path.lower()).ratio()
|
||||||
|
if score > best_score:
|
||||||
|
best_score = score
|
||||||
|
best = path
|
||||||
|
return best if best_score >= RUNBOOK_SIMILARITY_THRESHOLD else None
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_path(data: Any, path: str) -> Any | None:
|
||||||
|
if path.startswith("line:"):
|
||||||
|
return path.split("line:", 1)[1].strip()
|
||||||
|
cursor = data
|
||||||
|
for part in re.split(r"\.(?![^\[]*\])", path):
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
match = re.match(r"^(\w+)(?:\[(\d+)\])?$", part)
|
||||||
|
if not match:
|
||||||
|
return None
|
||||||
|
key = match.group(1)
|
||||||
|
index = match.group(2)
|
||||||
|
if isinstance(cursor, dict):
|
||||||
|
cursor = cursor.get(key)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
if index is not None:
|
||||||
|
idx = int(index)
|
||||||
|
if isinstance(cursor, list) and 0 <= idx < len(cursor):
|
||||||
|
cursor = cursor[idx]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
return cursor
|
||||||
|
|
||||||
|
|
||||||
|
def _snapshot_id(summary: dict[str, Any]) -> str | None:
|
||||||
|
if not summary:
|
||||||
|
return None
|
||||||
|
for key in ("generated_at", "snapshot_ts", "snapshot_id"):
|
||||||
|
value = summary.get(key)
|
||||||
|
if isinstance(value, str) and value:
|
||||||
|
return value
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _claims_to_payload(claims: list[ClaimItem]) -> list[dict[str, Any]]:
|
||||||
|
output: list[dict[str, Any]] = []
|
||||||
|
for claim in claims:
|
||||||
|
evidence = []
|
||||||
|
for ev in claim.evidence:
|
||||||
|
evidence.append(
|
||||||
|
{
|
||||||
|
"path": ev.path,
|
||||||
|
"reason": ev.reason,
|
||||||
|
"value_at_claim": ev.value_at_claim,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
output.append({"id": claim.id, "claim": claim.claim, "evidence": evidence})
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def _state_from_payload(payload: dict[str, Any] | None) -> ConversationState | None:
|
||||||
|
if not payload:
|
||||||
|
return None
|
||||||
|
claims_raw = payload.get("claims") if isinstance(payload, dict) else None
|
||||||
|
claims: list[ClaimItem] = []
|
||||||
|
if isinstance(claims_raw, list):
|
||||||
|
for entry in claims_raw:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
claim_text = str(entry.get("claim") or "").strip()
|
||||||
|
claim_id = str(entry.get("id") or "").strip()
|
||||||
|
if not claim_text or not claim_id:
|
||||||
|
continue
|
||||||
|
evidence_items: list[EvidenceItem] = []
|
||||||
|
for ev in entry.get("evidence") or []:
|
||||||
|
if not isinstance(ev, dict):
|
||||||
|
continue
|
||||||
|
path = str(ev.get("path") or "").strip()
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
reason = str(ev.get("reason") or "").strip()
|
||||||
|
value_at_claim = ev.get("value_at_claim")
|
||||||
|
evidence_items.append(EvidenceItem(path=path, reason=reason, value_at_claim=value_at_claim))
|
||||||
|
if evidence_items:
|
||||||
|
claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
|
||||||
|
return ConversationState(
|
||||||
|
updated_at=float(payload.get("updated_at") or time.monotonic()),
|
||||||
|
claims=claims,
|
||||||
|
snapshot_id=payload.get("snapshot_id"),
|
||||||
|
snapshot=payload.get("snapshot"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||||
344
atlasbot/engine/answerer/retrieval.py
Normal file
344
atlasbot/engine/answerer/retrieval.py
Normal file
@ -0,0 +1,344 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from collections.abc import Awaitable
|
||||||
|
from collections.abc import Callable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from atlasbot.llm import prompts
|
||||||
|
from atlasbot.llm.client import parse_json
|
||||||
|
|
||||||
|
from ._base import *
|
||||||
|
from .post_ext import _extract_keywords
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
raw = text.strip()
|
||||||
|
match = re.search(r"\{.*\}", raw, flags=re.S)
|
||||||
|
if match:
|
||||||
|
return parse_json(match.group(0), fallback=fallback)
|
||||||
|
return parse_json(raw, fallback=fallback)
|
||||||
|
|
||||||
|
|
||||||
|
async def _select_metric_chunks(
|
||||||
|
call_llm: Callable[..., Awaitable[str]],
|
||||||
|
ctx: dict[str, Any],
|
||||||
|
chunks: list[dict[str, Any]],
|
||||||
|
plan: ModePlan,
|
||||||
|
) -> tuple[list[str], list[str]]:
|
||||||
|
summary_lines, question, sub_questions, keywords, token_set = _metric_ctx_values(ctx)
|
||||||
|
if not summary_lines or not chunks:
|
||||||
|
return [], []
|
||||||
|
keys = _extract_metric_keys(summary_lines)
|
||||||
|
if not keys:
|
||||||
|
return [], []
|
||||||
|
max_keys = max(4, plan.max_subquestions * 2)
|
||||||
|
candidate_keys = _filter_metric_keys(keys, token_set)
|
||||||
|
available_keys = candidate_keys or keys
|
||||||
|
prompt = prompts.METRIC_KEYS_PROMPT.format(available="\n".join(available_keys), max_keys=max_keys)
|
||||||
|
raw = await call_llm(
|
||||||
|
prompts.METRIC_KEYS_SYSTEM,
|
||||||
|
prompt + "\nQuestion: " + str(question) + "\nSubQuestions:\n" + "\n".join([str(item) for item in sub_questions]),
|
||||||
|
context="Keywords:\n" + ", ".join([str(item) for item in keywords if item]),
|
||||||
|
model=plan.fast_model,
|
||||||
|
tag="metric_keys",
|
||||||
|
)
|
||||||
|
selected = _parse_key_list(raw, available_keys, max_keys)
|
||||||
|
if candidate_keys:
|
||||||
|
selected = _merge_metric_keys(selected, candidate_keys, max_keys)
|
||||||
|
if selected and candidate_keys and not _metric_key_overlap(selected, token_set):
|
||||||
|
selected = candidate_keys[:max_keys]
|
||||||
|
if not selected and candidate_keys:
|
||||||
|
selected = candidate_keys[:max_keys]
|
||||||
|
if available_keys:
|
||||||
|
missing = await _validate_metric_keys(
|
||||||
|
call_llm,
|
||||||
|
{
|
||||||
|
"question": question,
|
||||||
|
"sub_questions": sub_questions,
|
||||||
|
"selected": selected,
|
||||||
|
},
|
||||||
|
available_keys,
|
||||||
|
plan,
|
||||||
|
)
|
||||||
|
if missing:
|
||||||
|
selected = _merge_metric_keys(selected, missing, max_keys)
|
||||||
|
if not selected:
|
||||||
|
return [], []
|
||||||
|
ids = _chunk_ids_for_keys(chunks, selected)
|
||||||
|
return selected, ids
|
||||||
|
|
||||||
|
|
||||||
|
async def _validate_metric_keys(
|
||||||
|
call_llm: Callable[..., Awaitable[str]],
|
||||||
|
ctx: dict[str, Any],
|
||||||
|
available: list[str],
|
||||||
|
plan: ModePlan,
|
||||||
|
) -> list[str]:
|
||||||
|
if not available:
|
||||||
|
return []
|
||||||
|
question = str(ctx.get("question") or "")
|
||||||
|
sub_questions = ctx.get("sub_questions") if isinstance(ctx.get("sub_questions"), list) else []
|
||||||
|
selected = ctx.get("selected") if isinstance(ctx.get("selected"), list) else []
|
||||||
|
cap = max(12, plan.max_subquestions * 4)
|
||||||
|
available_list = available[:cap]
|
||||||
|
prompt = prompts.METRIC_KEYS_VALIDATE_PROMPT.format(
|
||||||
|
question=question,
|
||||||
|
sub_questions=json.dumps(sub_questions),
|
||||||
|
selected=json.dumps(selected),
|
||||||
|
available="\n".join(available_list),
|
||||||
|
)
|
||||||
|
raw = await call_llm(
|
||||||
|
prompts.METRIC_KEYS_VALIDATE_SYSTEM,
|
||||||
|
prompt,
|
||||||
|
model=plan.fast_model,
|
||||||
|
tag="metric_keys_validate",
|
||||||
|
)
|
||||||
|
parsed = _parse_json_block(raw, fallback={})
|
||||||
|
items = parsed.get("missing") if isinstance(parsed, dict) else []
|
||||||
|
if not isinstance(items, list):
|
||||||
|
return []
|
||||||
|
available_set = set(available_list)
|
||||||
|
out: list[str] = []
|
||||||
|
for item in items:
|
||||||
|
if isinstance(item, str) and item in available_set and item not in out:
|
||||||
|
out.append(item)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
async def _gather_limited(coros: list[Awaitable[Any]], limit: int) -> list[Any]:
|
||||||
|
if not coros:
|
||||||
|
return []
|
||||||
|
semaphore = asyncio.Semaphore(max(1, limit))
|
||||||
|
|
||||||
|
async def runner(coro: Awaitable[Any]) -> Any:
|
||||||
|
async with semaphore:
|
||||||
|
return await coro
|
||||||
|
|
||||||
|
return await asyncio.gather(*(runner(coro) for coro in coros))
|
||||||
|
|
||||||
|
|
||||||
|
def _metric_ctx_values(ctx: dict[str, Any]) -> tuple[list[str], str, list[str], list[str], set[str]]:
|
||||||
|
summary_lines = ctx.get("summary_lines") if isinstance(ctx, dict) else None
|
||||||
|
if not isinstance(summary_lines, list):
|
||||||
|
return [], "", [], [], set()
|
||||||
|
question = ctx.get("question") if isinstance(ctx, dict) else ""
|
||||||
|
sub_questions = ctx.get("sub_questions") if isinstance(ctx.get("sub_questions"), list) else []
|
||||||
|
keywords = ctx.get("keywords") if isinstance(ctx.get("keywords"), list) else []
|
||||||
|
keyword_tokens = ctx.get("keyword_tokens") if isinstance(ctx.get("keyword_tokens"), list) else []
|
||||||
|
token_set = {str(token).lower() for token in keyword_tokens if token}
|
||||||
|
token_set |= {token.lower() for token in _extract_keywords(str(question), str(question), sub_questions=sub_questions, keywords=keywords)}
|
||||||
|
token_set = _token_variants(token_set)
|
||||||
|
return summary_lines, str(question), sub_questions, keywords, token_set
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_metric_keys(lines: list[str]) -> list[str]:
|
||||||
|
keys: list[str] = []
|
||||||
|
for line in lines:
|
||||||
|
if ":" not in line:
|
||||||
|
continue
|
||||||
|
key = line.split(":", 1)[0].strip()
|
||||||
|
if not key or " " in key:
|
||||||
|
continue
|
||||||
|
if key not in keys:
|
||||||
|
keys.append(key)
|
||||||
|
return keys
|
||||||
|
|
||||||
|
|
||||||
|
def _token_variants(tokens: set[str]) -> set[str]:
|
||||||
|
if not tokens:
|
||||||
|
return set()
|
||||||
|
variants = set(tokens)
|
||||||
|
for token in list(tokens):
|
||||||
|
if len(token) <= TOKEN_MIN_LEN:
|
||||||
|
continue
|
||||||
|
if token.endswith("ies") and len(token) > TOKEN_MIN_LEN:
|
||||||
|
variants.add(token[:-3] + "y")
|
||||||
|
if token.endswith("es") and len(token) > TOKEN_MIN_LEN:
|
||||||
|
variants.add(token[:-2])
|
||||||
|
if token.endswith("s") and len(token) > TOKEN_MIN_LEN:
|
||||||
|
variants.add(token[:-1])
|
||||||
|
return variants
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_key_list(raw: str, allowed: list[str], max_keys: int) -> list[str]:
|
||||||
|
parsed = _parse_json_block(raw, fallback={})
|
||||||
|
if isinstance(parsed, list):
|
||||||
|
items = parsed
|
||||||
|
else:
|
||||||
|
items = parsed.get("keys") if isinstance(parsed, dict) else []
|
||||||
|
if not isinstance(items, list):
|
||||||
|
return []
|
||||||
|
allowed_set = set(allowed)
|
||||||
|
out: list[str] = []
|
||||||
|
for item in items:
|
||||||
|
if not isinstance(item, str):
|
||||||
|
continue
|
||||||
|
if item in allowed_set and item not in out:
|
||||||
|
out.append(item)
|
||||||
|
if len(out) >= max_keys:
|
||||||
|
break
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _chunk_ids_for_keys(chunks: list[dict[str, Any]], keys: list[str]) -> list[str]:
|
||||||
|
if not keys:
|
||||||
|
return []
|
||||||
|
ids: list[str] = []
|
||||||
|
key_set = {f"{key}:" for key in keys}
|
||||||
|
for chunk in chunks:
|
||||||
|
text = str(chunk.get("text") or "")
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
for line in text.splitlines():
|
||||||
|
for key in key_set:
|
||||||
|
if line.startswith(key):
|
||||||
|
cid = chunk.get("id")
|
||||||
|
if cid and cid not in ids:
|
||||||
|
ids.append(cid)
|
||||||
|
break
|
||||||
|
return ids
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_metric_keys(keys: list[str], tokens: set[str]) -> list[str]:
|
||||||
|
if not keys or not tokens:
|
||||||
|
return []
|
||||||
|
lowered_tokens = {token.lower() for token in tokens if token and len(token) >= TOKEN_MIN_LEN}
|
||||||
|
ranked: list[tuple[int, str]] = []
|
||||||
|
for key in keys:
|
||||||
|
parts = [part for part in re.split(r"[_\W]+", key.lower()) if part]
|
||||||
|
if not parts:
|
||||||
|
continue
|
||||||
|
hits = len(set(parts) & lowered_tokens)
|
||||||
|
if hits:
|
||||||
|
ranked.append((hits, key))
|
||||||
|
ranked.sort(key=lambda item: (-item[0], item[1]))
|
||||||
|
return [item[1] for item in ranked]
|
||||||
|
|
||||||
|
|
||||||
|
def _metric_key_overlap(keys: list[str], tokens: set[str]) -> bool:
|
||||||
|
if not keys or not tokens:
|
||||||
|
return False
|
||||||
|
lowered_tokens = {token.lower() for token in tokens if token and len(token) >= TOKEN_MIN_LEN}
|
||||||
|
for key in keys:
|
||||||
|
parts = [part for part in re.split(r"[_\W]+", key.lower()) if part]
|
||||||
|
if set(parts) & lowered_tokens:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _lines_for_metric_keys(lines: list[str], keys: list[str], max_lines: int = 0) -> list[str]:
|
||||||
|
if not lines or not keys:
|
||||||
|
return []
|
||||||
|
prefixes = {f"{key}:" for key in keys}
|
||||||
|
selected: list[str] = []
|
||||||
|
for line in lines:
|
||||||
|
for prefix in prefixes:
|
||||||
|
if prefix in line:
|
||||||
|
selected.append(line)
|
||||||
|
break
|
||||||
|
if max_lines and len(selected) >= max_lines:
|
||||||
|
break
|
||||||
|
return selected
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_metric_keys(current: list[str], candidates: list[str], max_keys: int) -> list[str]:
|
||||||
|
merged: list[str] = []
|
||||||
|
seen = set()
|
||||||
|
for key in current:
|
||||||
|
if key and key not in seen:
|
||||||
|
merged.append(key)
|
||||||
|
seen.add(key)
|
||||||
|
for key in candidates:
|
||||||
|
if key and key not in seen:
|
||||||
|
merged.append(key)
|
||||||
|
seen.add(key)
|
||||||
|
if len(merged) >= max_keys:
|
||||||
|
break
|
||||||
|
return merged[:max_keys]
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_fact_lines(primary: list[str], fallback: list[str]) -> list[str]:
|
||||||
|
seen = set()
|
||||||
|
merged: list[str] = []
|
||||||
|
for line in primary + fallback:
|
||||||
|
if line in seen:
|
||||||
|
continue
|
||||||
|
seen.add(line)
|
||||||
|
merged.append(line)
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def _expand_hottest_line(line: str) -> list[str]:
|
||||||
|
if not line:
|
||||||
|
return []
|
||||||
|
if not line.lower().startswith("hottest:"):
|
||||||
|
return []
|
||||||
|
expanded: list[str] = []
|
||||||
|
payload = line.split("hottest:", 1)[1]
|
||||||
|
for part in payload.split(";"):
|
||||||
|
part = part.strip()
|
||||||
|
if not part or "=" not in part:
|
||||||
|
continue
|
||||||
|
metric, rest = part.split("=", 1)
|
||||||
|
metric = metric.strip()
|
||||||
|
match = re.search(r"(?P<node>[^\s\[]+).*\((?P<value>[^)]+)\)", rest)
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
node = match.group("node").strip()
|
||||||
|
value = match.group("value").strip()
|
||||||
|
class_match = re.search(r"\[(?P<class>[^\]]+)\]", rest)
|
||||||
|
node_class = class_match.group("class").strip() if class_match else ""
|
||||||
|
if node_class:
|
||||||
|
expanded.append(f"hottest_{metric}_node: {node} [{node_class}] ({value})")
|
||||||
|
else:
|
||||||
|
expanded.append(f"hottest_{metric}_node: {node} ({value})")
|
||||||
|
return expanded
|
||||||
|
|
||||||
|
|
||||||
|
def _has_token(text: str, token: str) -> bool:
|
||||||
|
if not text or not token:
|
||||||
|
return False
|
||||||
|
if token == "io":
|
||||||
|
return "i/o" in text or re.search(r"\bio\b", text) is not None
|
||||||
|
return re.search(rf"\b{re.escape(token)}\b", text) is not None
|
||||||
|
|
||||||
|
|
||||||
|
def _hotspot_evidence(summary: dict[str, Any]) -> list[str]:
|
||||||
|
hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
|
||||||
|
if not hottest:
|
||||||
|
return []
|
||||||
|
hardware_by_node = summary.get("hardware_by_node") if isinstance(summary.get("hardware_by_node"), dict) else {}
|
||||||
|
node_pods_top = summary.get("node_pods_top") if isinstance(summary.get("node_pods_top"), list) else []
|
||||||
|
ns_map = {}
|
||||||
|
for item in node_pods_top:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
node = item.get("node")
|
||||||
|
namespaces_top = item.get("namespaces_top") if isinstance(item.get("namespaces_top"), list) else []
|
||||||
|
ns_map[node] = namespaces_top
|
||||||
|
lines: list[str] = []
|
||||||
|
for metric, info in hottest.items():
|
||||||
|
if not isinstance(info, dict):
|
||||||
|
continue
|
||||||
|
node = info.get("node")
|
||||||
|
value = info.get("value")
|
||||||
|
if not node:
|
||||||
|
continue
|
||||||
|
node_class = hardware_by_node.get(node)
|
||||||
|
ns_parts = []
|
||||||
|
for entry in ns_map.get(node, [])[:3]:
|
||||||
|
if isinstance(entry, (list, tuple)) and len(entry) >= NS_ENTRY_MIN_LEN:
|
||||||
|
ns_parts.append(f"{entry[0]}={entry[1]}")
|
||||||
|
ns_text = ", ".join(ns_parts)
|
||||||
|
value_text = f"{value:.2f}" if isinstance(value, (int, float)) else str(value)
|
||||||
|
line = f"hotspot.{metric}: node={node} class={node_class or 'unknown'} value={value_text}"
|
||||||
|
if ns_text:
|
||||||
|
line += f" namespaces_top={ns_text}"
|
||||||
|
lines.append(line)
|
||||||
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||||
197
atlasbot/engine/answerer/retrieval_ext.py
Normal file
197
atlasbot/engine/answerer/retrieval_ext.py
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from collections.abc import Callable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from atlasbot.llm import prompts
|
||||||
|
from atlasbot.llm.client import parse_json
|
||||||
|
from ._base import *
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
raw = text.strip()
|
||||||
|
match = re.search(r"\{.*\}", raw, flags=re.S)
|
||||||
|
if match:
|
||||||
|
return parse_json(match.group(0), fallback=fallback)
|
||||||
|
return parse_json(raw, fallback=fallback)
|
||||||
|
|
||||||
|
|
||||||
|
def _metric_key_tokens(summary_lines: list[str]) -> set[str]:
|
||||||
|
tokens: set[str] = set()
|
||||||
|
for line in summary_lines:
|
||||||
|
if not isinstance(line, str) or ":" not in line:
|
||||||
|
continue
|
||||||
|
key = line.split(":", 1)[0].strip().lower()
|
||||||
|
if not key:
|
||||||
|
continue
|
||||||
|
tokens.add(key)
|
||||||
|
for part in re.split(r"[_\s]+", key):
|
||||||
|
if part:
|
||||||
|
tokens.add(part)
|
||||||
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
|
async def _select_best_candidate(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, tag: str) -> int:
|
||||||
|
if len(candidates) <= 1:
|
||||||
|
return 0
|
||||||
|
prompt = (
|
||||||
|
prompts.CANDIDATE_SELECT_PROMPT
|
||||||
|
+ "\nQuestion: "
|
||||||
|
+ question
|
||||||
|
+ "\nCandidates:\n"
|
||||||
|
+ "\n".join([f"{idx+1}) {cand}" for idx, cand in enumerate(candidates)])
|
||||||
|
)
|
||||||
|
raw = await call_llm(prompts.CANDIDATE_SELECT_SYSTEM, prompt, model=plan.model, tag=tag)
|
||||||
|
data = _parse_json_block(raw, fallback={})
|
||||||
|
best = data.get("best") if isinstance(data, dict) else None
|
||||||
|
if isinstance(best, int) and 1 <= best <= len(candidates):
|
||||||
|
return best - 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _dedupe_lines(lines: list[str], limit: int | None = None) -> list[str]:
|
||||||
|
seen: set[str] = set()
|
||||||
|
cleaned: list[str] = []
|
||||||
|
for line in lines:
|
||||||
|
value = (line or "").strip()
|
||||||
|
if not value or value in seen:
|
||||||
|
continue
|
||||||
|
if value.lower().startswith("lexicon_") or value.lower().startswith("units:"):
|
||||||
|
continue
|
||||||
|
cleaned.append(value)
|
||||||
|
seen.add(value)
|
||||||
|
if limit and len(cleaned) >= limit:
|
||||||
|
break
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_fact_candidates(selected: list[dict[str, Any]], limit: int) -> list[str]:
|
||||||
|
lines: list[str] = []
|
||||||
|
for chunk in selected:
|
||||||
|
text = chunk.get("text") if isinstance(chunk, dict) else None
|
||||||
|
if not isinstance(text, str):
|
||||||
|
continue
|
||||||
|
lines.extend([line for line in text.splitlines() if line.strip()])
|
||||||
|
return _dedupe_lines(lines, limit=limit)
|
||||||
|
|
||||||
|
|
||||||
|
async def _select_best_list(call_llm: Callable[..., Any], question: str, candidates: list[list[str]], plan: ModePlan, tag: str) -> list[str]:
|
||||||
|
if not candidates:
|
||||||
|
return []
|
||||||
|
if len(candidates) == 1:
|
||||||
|
return candidates[0]
|
||||||
|
render = ["; ".join(items) for items in candidates]
|
||||||
|
best_idx = await _select_best_candidate(call_llm, question, render, plan, tag)
|
||||||
|
chosen = candidates[best_idx] if 0 <= best_idx < len(candidates) else candidates[0]
|
||||||
|
if not chosen:
|
||||||
|
merged: list[str] = []
|
||||||
|
for entry in candidates:
|
||||||
|
for item in entry:
|
||||||
|
if item not in merged:
|
||||||
|
merged.append(item)
|
||||||
|
chosen = merged
|
||||||
|
return chosen
|
||||||
|
|
||||||
|
|
||||||
|
async def _extract_fact_types(call_llm: Callable[..., Any], question: str, keywords: list[str], plan: ModePlan) -> list[str]:
|
||||||
|
prompt = prompts.FACT_TYPES_PROMPT + "\nQuestion: " + question
|
||||||
|
if keywords:
|
||||||
|
prompt += "\nKeywords: " + ", ".join(keywords)
|
||||||
|
candidates: list[list[str]] = []
|
||||||
|
attempts = max(plan.metric_retries, 1)
|
||||||
|
for _ in range(attempts):
|
||||||
|
raw = await call_llm(prompts.FACT_TYPES_SYSTEM, prompt, model=plan.fast_model, tag="fact_types")
|
||||||
|
data = _parse_json_block(raw, fallback={})
|
||||||
|
items = data.get("fact_types") if isinstance(data, dict) else None
|
||||||
|
if not isinstance(items, list):
|
||||||
|
continue
|
||||||
|
cleaned = _dedupe_lines([str(item) for item in items if isinstance(item, (str, int, float))], limit=10)
|
||||||
|
if cleaned:
|
||||||
|
candidates.append(cleaned)
|
||||||
|
chosen = await _select_best_list(call_llm, question, candidates, plan, "fact_types_select")
|
||||||
|
return chosen[:10]
|
||||||
|
|
||||||
|
|
||||||
|
async def _derive_signals(call_llm: Callable[..., Any], question: str, fact_types: list[str], plan: ModePlan) -> list[str]:
|
||||||
|
if not fact_types:
|
||||||
|
return []
|
||||||
|
prompt = prompts.SIGNAL_PROMPT.format(question=question, fact_types="; ".join(fact_types))
|
||||||
|
candidates: list[list[str]] = []
|
||||||
|
attempts = max(plan.metric_retries, 1)
|
||||||
|
for _ in range(attempts):
|
||||||
|
raw = await call_llm(prompts.SIGNAL_SYSTEM, prompt, model=plan.fast_model, tag="signals")
|
||||||
|
data = _parse_json_block(raw, fallback={})
|
||||||
|
items = data.get("signals") if isinstance(data, dict) else None
|
||||||
|
if not isinstance(items, list):
|
||||||
|
continue
|
||||||
|
cleaned = _dedupe_lines([str(item) for item in items if isinstance(item, (str, int, float))], limit=12)
|
||||||
|
if cleaned:
|
||||||
|
candidates.append(cleaned)
|
||||||
|
chosen = await _select_best_list(call_llm, question, candidates, plan, "signals_select")
|
||||||
|
return chosen[:12]
|
||||||
|
|
||||||
|
|
||||||
|
async def _scan_chunk_for_signals(call_llm: Callable[..., Any], question: str, signals: list[str], chunk_lines: list[str], plan: ModePlan) -> list[str]:
|
||||||
|
if not signals or not chunk_lines:
|
||||||
|
return []
|
||||||
|
prompt = prompts.CHUNK_SCAN_PROMPT.format(
|
||||||
|
signals="; ".join(signals),
|
||||||
|
lines="\n".join(chunk_lines),
|
||||||
|
)
|
||||||
|
attempts = max(1, min(plan.metric_retries, 2))
|
||||||
|
candidates: list[list[str]] = []
|
||||||
|
for _ in range(attempts):
|
||||||
|
raw = await call_llm(prompts.CHUNK_SCAN_SYSTEM, prompt, model=plan.fast_model, tag="chunk_scan")
|
||||||
|
data = _parse_json_block(raw, fallback={})
|
||||||
|
items = data.get("lines") if isinstance(data, dict) else None
|
||||||
|
if not isinstance(items, list):
|
||||||
|
continue
|
||||||
|
cleaned = [line for line in chunk_lines if line in items]
|
||||||
|
cleaned = _dedupe_lines(cleaned, limit=15)
|
||||||
|
if cleaned:
|
||||||
|
candidates.append(cleaned)
|
||||||
|
chosen = await _select_best_list(call_llm, question, candidates, plan, "chunk_scan_select")
|
||||||
|
return chosen[:15]
|
||||||
|
|
||||||
|
|
||||||
|
async def _prune_metric_candidates(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, attempts: int) -> list[str]:
|
||||||
|
if not candidates:
|
||||||
|
return []
|
||||||
|
prompt = prompts.FACT_PRUNE_PROMPT.format(question=question, candidates="\n".join(candidates), max_lines=6)
|
||||||
|
picks: list[list[str]] = []
|
||||||
|
for _ in range(max(attempts, 1)):
|
||||||
|
raw = await call_llm(prompts.FACT_PRUNE_SYSTEM, prompt, model=plan.fast_model, tag="fact_prune")
|
||||||
|
data = _parse_json_block(raw, fallback={})
|
||||||
|
items = data.get("lines") if isinstance(data, dict) else None
|
||||||
|
if not isinstance(items, list):
|
||||||
|
continue
|
||||||
|
cleaned = [line for line in candidates if line in items]
|
||||||
|
cleaned = _dedupe_lines(cleaned, limit=6)
|
||||||
|
if cleaned:
|
||||||
|
picks.append(cleaned)
|
||||||
|
chosen = await _select_best_list(call_llm, question, picks, plan, "fact_prune_select")
|
||||||
|
return chosen[:6]
|
||||||
|
|
||||||
|
|
||||||
|
async def _select_fact_lines(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, max_lines: int) -> list[str]:
|
||||||
|
if not candidates:
|
||||||
|
return []
|
||||||
|
prompt = prompts.FACT_PRUNE_PROMPT.format(question=question, candidates="\n".join(candidates), max_lines=max_lines)
|
||||||
|
picks: list[list[str]] = []
|
||||||
|
attempts = max(plan.metric_retries, 1)
|
||||||
|
for _ in range(attempts):
|
||||||
|
raw = await call_llm(prompts.FACT_PRUNE_SYSTEM, prompt, model=plan.fast_model, tag="fact_select")
|
||||||
|
data = _parse_json_block(raw, fallback={})
|
||||||
|
items = data.get("lines") if isinstance(data, dict) else None
|
||||||
|
if not isinstance(items, list):
|
||||||
|
continue
|
||||||
|
cleaned = [line for line in candidates if line in items]
|
||||||
|
cleaned = _dedupe_lines(cleaned, limit=max_lines)
|
||||||
|
if cleaned:
|
||||||
|
picks.append(cleaned)
|
||||||
|
chosen = await _select_best_list(call_llm, question, picks, plan, "fact_select_best")
|
||||||
|
return chosen[:max_lines]
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||||
404
atlasbot/engine/answerer/spine.py
Normal file
404
atlasbot/engine/answerer/spine.py
Normal file
@ -0,0 +1,404 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from atlasbot.engine.intent_router import IntentMatch
|
||||||
|
from atlasbot.snapshot.builder import summary_text
|
||||||
|
|
||||||
|
from ._base import *
|
||||||
|
|
||||||
|
|
||||||
|
def _join_context(parts: list[str]) -> str:
|
||||||
|
text = "\n".join([part for part in parts if part])
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _format_metric_value(value: Any) -> str:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return str(value).lower()
|
||||||
|
if isinstance(value, int):
|
||||||
|
return str(value)
|
||||||
|
if isinstance(value, float):
|
||||||
|
return f"{value:.1f}".rstrip("0").rstrip(".")
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_history(history: list[dict[str, str]] | None) -> str:
|
||||||
|
if not history:
|
||||||
|
return ""
|
||||||
|
lines = ["Recent conversation (non-authoritative):"]
|
||||||
|
for entry in history[-4:]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
question = entry.get("q")
|
||||||
|
answer = entry.get("a")
|
||||||
|
role = entry.get("role")
|
||||||
|
content = entry.get("content")
|
||||||
|
if question:
|
||||||
|
lines.append(f"Q: {question}")
|
||||||
|
if answer:
|
||||||
|
lines.append(f"A: {answer}")
|
||||||
|
if role and content:
|
||||||
|
prefix = "Q" if role == "user" else "A"
|
||||||
|
lines.append(f"{prefix}: {content}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _summary_lines(snapshot: dict[str, Any] | None) -> list[str]:
|
||||||
|
text = summary_text(snapshot)
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
return [line for line in text.splitlines() if line.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def _line_starting_with(lines: list[str], prefix: str) -> str | None:
|
||||||
|
if not lines:
|
||||||
|
return None
|
||||||
|
for line in lines:
|
||||||
|
if line.lower().startswith(prefix.lower()):
|
||||||
|
return line
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_lines(lines: list[str]) -> dict[str, str]:
|
||||||
|
spine: dict[str, str] = {}
|
||||||
|
_spine_nodes(lines, spine)
|
||||||
|
_spine_hardware(lines, spine)
|
||||||
|
_spine_hottest(lines, spine)
|
||||||
|
_spine_postgres(lines, spine)
|
||||||
|
_spine_namespaces(lines, spine)
|
||||||
|
_spine_pressure(lines, spine)
|
||||||
|
return spine
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_nodes(lines: list[str], spine: dict[str, str]) -> None:
|
||||||
|
nodes_line = _line_starting_with(lines, "nodes:")
|
||||||
|
if nodes_line:
|
||||||
|
spine["nodes_count"] = nodes_line
|
||||||
|
spine["nodes_ready"] = nodes_line
|
||||||
|
return
|
||||||
|
nodes_total = _line_starting_with(lines, "nodes_total:")
|
||||||
|
nodes_ready = _line_starting_with(lines, "nodes_ready:")
|
||||||
|
if nodes_total:
|
||||||
|
spine["nodes_count"] = nodes_total
|
||||||
|
if nodes_ready:
|
||||||
|
spine["nodes_ready"] = nodes_ready
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_hardware(lines: list[str], spine: dict[str, str]) -> None:
|
||||||
|
hardware_line = _line_starting_with(lines, "hardware_nodes:")
|
||||||
|
if not hardware_line:
|
||||||
|
hardware_line = _line_starting_with(lines, "hardware:")
|
||||||
|
if hardware_line:
|
||||||
|
spine["nodes_non_rpi"] = hardware_line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_hottest(lines: list[str], spine: dict[str, str]) -> None:
|
||||||
|
hottest_line = _line_starting_with(lines, "hottest:")
|
||||||
|
if not hottest_line:
|
||||||
|
return
|
||||||
|
for key in ("hottest_cpu", "hottest_ram", "hottest_net", "hottest_io", "hottest_disk"):
|
||||||
|
spine[key] = hottest_line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_postgres(lines: list[str], spine: dict[str, str]) -> None:
|
||||||
|
postgres_total = _line_starting_with(lines, "postgres_connections_total:")
|
||||||
|
if postgres_total:
|
||||||
|
spine["postgres_connections"] = postgres_total
|
||||||
|
postgres_line = _line_starting_with(lines, "postgres:")
|
||||||
|
if postgres_line:
|
||||||
|
spine["postgres_hottest"] = postgres_line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_namespaces(lines: list[str], spine: dict[str, str]) -> None:
|
||||||
|
namespaces_top = _line_starting_with(lines, "namespaces_top:")
|
||||||
|
if namespaces_top:
|
||||||
|
spine["namespace_most_pods"] = namespaces_top
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_pressure(lines: list[str], spine: dict[str, str]) -> None:
|
||||||
|
pressure_line = _line_starting_with(lines, "pressure_nodes:")
|
||||||
|
if pressure_line:
|
||||||
|
spine["pressure_summary"] = pressure_line
|
||||||
|
return
|
||||||
|
load_line = _line_starting_with(lines, "node_load_top:")
|
||||||
|
if load_line:
|
||||||
|
spine["pressure_summary"] = load_line
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_group_line(line: str) -> dict[str, list[str]]:
|
||||||
|
groups: dict[str, list[str]] = {}
|
||||||
|
if not line:
|
||||||
|
return groups
|
||||||
|
payload = line.split(":", 1)[1] if ":" in line else line
|
||||||
|
for part in payload.split(";"):
|
||||||
|
part = part.strip()
|
||||||
|
if not part or "=" not in part:
|
||||||
|
continue
|
||||||
|
key, value = part.split("=", 1)
|
||||||
|
value = value.strip()
|
||||||
|
nodes: list[str] = []
|
||||||
|
if "(" in value and ")" in value:
|
||||||
|
inner = value[value.find("(") + 1 : value.rfind(")")]
|
||||||
|
nodes = [item.strip() for item in inner.split(",") if item.strip()]
|
||||||
|
if not nodes:
|
||||||
|
cleaned = re.sub(r"^[0-9]+", "", value).strip()
|
||||||
|
nodes = [item.strip() for item in cleaned.split(",") if item.strip()]
|
||||||
|
groups[key.strip()] = nodes
|
||||||
|
return groups
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_hottest(line: str, metric: str) -> str | None:
|
||||||
|
if not line:
|
||||||
|
return None
|
||||||
|
payload = line.split(":", 1)[1] if ":" in line else line
|
||||||
|
for part in payload.split(";"):
|
||||||
|
part = part.strip()
|
||||||
|
if part.startswith(f"{metric}="):
|
||||||
|
return part
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_answer(intent: IntentMatch, spine_line: str | None) -> str | None:
|
||||||
|
if not spine_line:
|
||||||
|
return None
|
||||||
|
handlers = {
|
||||||
|
"nodes_count": _spine_nodes_answer,
|
||||||
|
"nodes_ready": _spine_nodes_answer,
|
||||||
|
"nodes_non_rpi": _spine_non_rpi_answer,
|
||||||
|
"hardware_mix": _spine_hardware_answer,
|
||||||
|
"postgres_connections": _spine_postgres_answer,
|
||||||
|
"postgres_hottest": _spine_postgres_answer,
|
||||||
|
"namespace_most_pods": _spine_namespace_answer,
|
||||||
|
"pressure_summary": _spine_pressure_answer,
|
||||||
|
}
|
||||||
|
kind = intent.kind
|
||||||
|
if kind.startswith("hottest_"):
|
||||||
|
return _spine_hottest_answer(kind, spine_line)
|
||||||
|
handler = handlers.get(kind)
|
||||||
|
if handler:
|
||||||
|
return handler(spine_line)
|
||||||
|
return spine_line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_nodes_answer(line: str) -> str:
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_non_rpi_answer(line: str) -> str:
|
||||||
|
groups = _parse_group_line(line)
|
||||||
|
non_rpi: list[str] = []
|
||||||
|
for key, nodes in groups.items():
|
||||||
|
if key.lower().startswith("rpi"):
|
||||||
|
continue
|
||||||
|
non_rpi.extend(nodes)
|
||||||
|
if non_rpi:
|
||||||
|
return "Non-Raspberry Pi nodes: " + ", ".join(non_rpi) + "."
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_hardware_answer(line: str) -> str:
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_hottest_answer(kind: str, line: str) -> str:
|
||||||
|
metric = kind.split("_", 1)[1]
|
||||||
|
hottest = _parse_hottest(line, metric)
|
||||||
|
if hottest:
|
||||||
|
return hottest
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_postgres_answer(line: str) -> str:
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_namespace_answer(line: str) -> str:
|
||||||
|
payload = line.split(":", 1)[1] if ":" in line else line
|
||||||
|
top = payload.split(";")[0].strip()
|
||||||
|
if top:
|
||||||
|
return f"Namespace with most pods: {top}."
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_pressure_answer(line: str) -> str:
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_from_summary(summary: dict[str, Any]) -> dict[str, str]:
|
||||||
|
if not isinstance(summary, dict) or not summary:
|
||||||
|
return {}
|
||||||
|
spine: dict[str, str] = {}
|
||||||
|
spine.update(_spine_from_counts(summary))
|
||||||
|
spine.update(_spine_from_hardware(summary))
|
||||||
|
spine.update(_spine_from_hottest(summary))
|
||||||
|
spine.update(_spine_from_postgres(summary))
|
||||||
|
spine.update(_spine_from_namespace_pods(summary))
|
||||||
|
spine.update(_spine_from_pressure(summary))
|
||||||
|
return spine
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_from_counts(summary: dict[str, Any]) -> dict[str, str]:
|
||||||
|
counts = summary.get("counts") if isinstance(summary.get("counts"), dict) else {}
|
||||||
|
inventory = summary.get("inventory") if isinstance(summary.get("inventory"), dict) else {}
|
||||||
|
nodes = summary.get("nodes") if isinstance(summary.get("nodes"), dict) else {}
|
||||||
|
workers = inventory.get("workers") if isinstance(inventory.get("workers"), dict) else {}
|
||||||
|
total = nodes.get("total")
|
||||||
|
ready = nodes.get("ready")
|
||||||
|
not_ready = nodes.get("not_ready")
|
||||||
|
if total is None:
|
||||||
|
total = counts.get("nodes_total")
|
||||||
|
if ready is None:
|
||||||
|
ready = counts.get("nodes_ready")
|
||||||
|
if not_ready is None and isinstance(inventory.get("not_ready_names"), list):
|
||||||
|
not_ready = len(inventory.get("not_ready_names") or [])
|
||||||
|
workers_ready = workers.get("ready")
|
||||||
|
workers_total = workers.get("total")
|
||||||
|
if total is None and ready is None and not_ready is None:
|
||||||
|
return {}
|
||||||
|
parts = []
|
||||||
|
if total is not None:
|
||||||
|
parts.append(f"total={int(total)}")
|
||||||
|
if ready is not None:
|
||||||
|
parts.append(f"ready={int(ready)}")
|
||||||
|
if not_ready is not None:
|
||||||
|
parts.append(f"not_ready={int(not_ready)}")
|
||||||
|
if workers_total is not None and workers_ready is not None:
|
||||||
|
parts.append(f"workers_ready={int(workers_ready)}/{int(workers_total)}")
|
||||||
|
line = "nodes: " + ", ".join(parts)
|
||||||
|
return {"nodes_count": line, "nodes_ready": line}
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_from_hardware(summary: dict[str, Any]) -> dict[str, str]:
|
||||||
|
hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
|
||||||
|
if not hardware:
|
||||||
|
return {}
|
||||||
|
parts = []
|
||||||
|
for key, nodes in hardware.items():
|
||||||
|
if not isinstance(nodes, list):
|
||||||
|
continue
|
||||||
|
node_list = ", ".join(str(n) for n in nodes if n)
|
||||||
|
if node_list:
|
||||||
|
parts.append(f"{key}=({node_list})")
|
||||||
|
if not parts:
|
||||||
|
return {}
|
||||||
|
return {"nodes_non_rpi": "hardware: " + "; ".join(parts)}
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_from_hottest(summary: dict[str, Any]) -> dict[str, str]:
|
||||||
|
hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
|
||||||
|
top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
|
||||||
|
top_hottest = top.get("node_hottest") if isinstance(top.get("node_hottest"), dict) else {}
|
||||||
|
if not hottest and top_hottest:
|
||||||
|
hottest = top_hottest
|
||||||
|
elif top_hottest:
|
||||||
|
for key, value in top_hottest.items():
|
||||||
|
if key not in hottest and value is not None:
|
||||||
|
hottest[key] = value
|
||||||
|
if not hottest:
|
||||||
|
return {}
|
||||||
|
mapping = {}
|
||||||
|
for key in ("cpu", "ram", "net", "io", "disk"):
|
||||||
|
entry = hottest.get(key)
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
node = entry.get("node") or entry.get("label") or ""
|
||||||
|
value = entry.get("value")
|
||||||
|
if node:
|
||||||
|
mapping[f"hottest_{key}"] = f"{key}={node} ({_format_metric_value(value)})"
|
||||||
|
if not mapping:
|
||||||
|
return {}
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_from_postgres(summary: dict[str, Any]) -> dict[str, str]:
|
||||||
|
postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {}
|
||||||
|
if not postgres:
|
||||||
|
top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
|
||||||
|
postgres = top.get("postgres") if isinstance(top.get("postgres"), dict) else {}
|
||||||
|
if not postgres:
|
||||||
|
return {}
|
||||||
|
used = postgres.get("used")
|
||||||
|
max_conn = postgres.get("max")
|
||||||
|
hottest = postgres.get("hottest_db") if isinstance(postgres.get("hottest_db"), dict) else {}
|
||||||
|
hottest_label = hottest.get("label") or ""
|
||||||
|
facts: dict[str, str] = {}
|
||||||
|
if used is not None and max_conn is not None:
|
||||||
|
facts["postgres_connections"] = f"postgres_connections_total: used={int(used)}, max={int(max_conn)}"
|
||||||
|
if hottest_label:
|
||||||
|
facts["postgres_hottest"] = f"postgres_hottest_db: {hottest_label}"
|
||||||
|
return facts
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_from_namespace_pods(summary: dict[str, Any]) -> dict[str, str]:
|
||||||
|
pods = summary.get("namespace_pods") if isinstance(summary.get("namespace_pods"), list) else []
|
||||||
|
if not pods:
|
||||||
|
top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
|
||||||
|
pods = top.get("namespace_pods") if isinstance(top.get("namespace_pods"), list) else []
|
||||||
|
if not pods:
|
||||||
|
return {}
|
||||||
|
best_name = ""
|
||||||
|
best_value = None
|
||||||
|
for entry in pods:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
name = entry.get("namespace") or entry.get("name") or entry.get("label") or ""
|
||||||
|
value = entry.get("pods")
|
||||||
|
if value is None:
|
||||||
|
value = entry.get("pods_total")
|
||||||
|
if value is None:
|
||||||
|
value = entry.get("value")
|
||||||
|
try:
|
||||||
|
numeric = float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
numeric = None
|
||||||
|
if name and numeric is not None and (best_value is None or numeric > best_value):
|
||||||
|
best_name = name
|
||||||
|
best_value = numeric
|
||||||
|
if best_name:
|
||||||
|
return {"namespace_most_pods": f"namespace_most_pods: {best_name} ({int(best_value or 0)} pods)"}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_from_pressure(summary: dict[str, Any]) -> dict[str, str]:
|
||||||
|
pressure = summary.get("pressure_summary") if isinstance(summary.get("pressure_summary"), dict) else {}
|
||||||
|
if not pressure:
|
||||||
|
pressure = summary.get("pressure_nodes") if isinstance(summary.get("pressure_nodes"), dict) else {}
|
||||||
|
if not pressure:
|
||||||
|
return {}
|
||||||
|
total = pressure.get("total")
|
||||||
|
unsched = pressure.get("unschedulable")
|
||||||
|
names = pressure.get("names") if isinstance(pressure.get("names"), list) else []
|
||||||
|
parts = []
|
||||||
|
if total is None and names:
|
||||||
|
total = len([name for name in names if name])
|
||||||
|
if total is not None:
|
||||||
|
parts.append(f"total={int(total)}")
|
||||||
|
if unsched is not None:
|
||||||
|
parts.append(f"unschedulable={int(unsched)}")
|
||||||
|
if parts:
|
||||||
|
return {"pressure_summary": "pressure_nodes: " + ", ".join(parts)}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _spine_fallback(intent: IntentMatch, lines: list[str]) -> str | None:
|
||||||
|
if not lines:
|
||||||
|
return None
|
||||||
|
keywords = {
|
||||||
|
"nodes_count": ("nodes:", "nodes_total:"),
|
||||||
|
"nodes_ready": ("nodes:", "nodes_ready:"),
|
||||||
|
"postgres_hottest": ("postgres_hottest", "hottest_db", "postgres"),
|
||||||
|
"namespace_most_pods": ("namespace", "pods", "namespaces_top"),
|
||||||
|
"pressure_summary": ("pressure", "node_load_top"),
|
||||||
|
}
|
||||||
|
for token in keywords.get(intent.kind, ("",)):
|
||||||
|
if not token:
|
||||||
|
continue
|
||||||
|
for line in lines:
|
||||||
|
if token in line:
|
||||||
|
return line
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||||
484
atlasbot/engine/answerer/workflow.py
Normal file
484
atlasbot/engine/answerer/workflow.py
Normal file
@ -0,0 +1,484 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from collections.abc import Callable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from atlasbot.engine.intent_router import route_intent
|
||||||
|
from atlasbot.llm import prompts
|
||||||
|
from atlasbot.llm.client import build_messages
|
||||||
|
from atlasbot.snapshot.builder import build_summary
|
||||||
|
|
||||||
|
from ._base import *
|
||||||
|
from .common import *
|
||||||
|
from .factsheet import *
|
||||||
|
from .post import *
|
||||||
|
from .post_ext import *
|
||||||
|
from .retrieval import *
|
||||||
|
from .retrieval_ext import *
|
||||||
|
from .spine import *
|
||||||
|
from .workflow_post import finalize_answer
|
||||||
|
|
||||||
|
async def run_answer(engine: Any, question: str, *, mode: str, history: list[dict[str, str]] | None = None, observer: Callable[[str, str], None] | None = None, conversation_id: str | None = None, snapshot_pin: bool | None = None) -> AnswerResult: # noqa: C901
|
||||||
|
"""Answer a question using the staged reasoning pipeline."""
|
||||||
|
|
||||||
|
settings = engine._settings
|
||||||
|
question = (question or "").strip()
|
||||||
|
if not question:
|
||||||
|
return AnswerResult("I need a question to answer.", _default_scores(), {"mode": mode})
|
||||||
|
if mode == "stock":
|
||||||
|
return await engine._answer_stock(question)
|
||||||
|
|
||||||
|
limitless = "run limitless" in question.lower()
|
||||||
|
if limitless:
|
||||||
|
question = re.sub(r"(?i)run limitless", "", question).strip()
|
||||||
|
|
||||||
|
plan = _mode_plan(settings, mode)
|
||||||
|
call_limit = _llm_call_limit(settings, mode)
|
||||||
|
call_cap = math.ceil(call_limit * settings.llm_limit_multiplier)
|
||||||
|
call_count = 0
|
||||||
|
limit_hit = False
|
||||||
|
time_budget_hit = False
|
||||||
|
started = time.monotonic()
|
||||||
|
time_budget_sec = _mode_time_budget(settings, mode) if not limitless else 0.0
|
||||||
|
|
||||||
|
debug_tags = {
|
||||||
|
"route",
|
||||||
|
"decompose",
|
||||||
|
"chunk_score",
|
||||||
|
"chunk_select",
|
||||||
|
"fact_select",
|
||||||
|
"synth",
|
||||||
|
"subanswer",
|
||||||
|
"tool",
|
||||||
|
"followup",
|
||||||
|
"select_claims",
|
||||||
|
"evidence_fix",
|
||||||
|
}
|
||||||
|
|
||||||
|
async def call_llm(system: str, prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
|
||||||
|
nonlocal call_count, limit_hit, time_budget_hit
|
||||||
|
if not limitless and call_count >= call_cap:
|
||||||
|
limit_hit = True
|
||||||
|
raise LLMLimitReached("llm_limit")
|
||||||
|
timeout_sec = None
|
||||||
|
if not limitless and time_budget_sec > 0:
|
||||||
|
time_left = time_budget_sec - (time.monotonic() - started)
|
||||||
|
if time_left <= 0:
|
||||||
|
time_budget_hit = True
|
||||||
|
raise LLMTimeBudgetExceeded("time_budget")
|
||||||
|
timeout_sec = min(settings.ollama_timeout_sec, time_left)
|
||||||
|
call_count += 1
|
||||||
|
messages = build_messages(system, prompt, context=context)
|
||||||
|
try:
|
||||||
|
llm_call = engine._llm.chat(messages, model=model or plan.model, timeout_sec=timeout_sec)
|
||||||
|
if timeout_sec is not None:
|
||||||
|
response = await asyncio.wait_for(llm_call, timeout=max(0.001, timeout_sec))
|
||||||
|
else:
|
||||||
|
response = await llm_call
|
||||||
|
except TimeoutError as exc:
|
||||||
|
time_budget_hit = True
|
||||||
|
raise LLMTimeBudgetExceeded("time_budget") from exc
|
||||||
|
log.info(
|
||||||
|
"atlasbot_llm_call",
|
||||||
|
extra={"extra": {"mode": mode, "tag": tag, "call": call_count, "limit": call_cap}},
|
||||||
|
)
|
||||||
|
if settings.debug_pipeline and tag in debug_tags:
|
||||||
|
_debug_pipeline_log(settings, f"llm_raw_{tag}", str(response)[:1200])
|
||||||
|
return response
|
||||||
|
|
||||||
|
state = engine._get_state(conversation_id)
|
||||||
|
pin_snapshot = bool(snapshot_pin) or settings.snapshot_pin_enabled
|
||||||
|
snapshot = engine._snapshot.get()
|
||||||
|
snapshot_used = state.snapshot if pin_snapshot and state and state.snapshot else snapshot
|
||||||
|
summary = build_summary(snapshot_used)
|
||||||
|
summary_lines = _summary_lines(snapshot_used)
|
||||||
|
allowed_nodes = _allowed_nodes(summary)
|
||||||
|
allowed_namespaces = _allowed_namespaces(summary)
|
||||||
|
spine = _spine_from_summary(summary) or _spine_lines(summary_lines)
|
||||||
|
metric_tokens = _metric_key_tokens(summary_lines)
|
||||||
|
global_facts = _global_facts(summary_lines)
|
||||||
|
kb_summary = engine._kb.summary()
|
||||||
|
runbooks = engine._kb.runbook_titles(limit=6)
|
||||||
|
runbook_paths = engine._kb.runbook_paths(limit=10)
|
||||||
|
history_ctx = _format_history(history)
|
||||||
|
lexicon_ctx = _lexicon_context(summary)
|
||||||
|
|
||||||
|
key_facts: list[str] = []
|
||||||
|
metric_facts: list[str] = []
|
||||||
|
facts_used: list[str] = []
|
||||||
|
reply = ""
|
||||||
|
scores = _default_scores()
|
||||||
|
claims: list[ClaimItem] = []
|
||||||
|
classify: dict[str, Any] = {}
|
||||||
|
tool_hint: dict[str, Any] | None = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
if mode in {"quick", "fast", "smart", "genius"} and not limitless:
|
||||||
|
if observer:
|
||||||
|
observer("factsheet", "building fact sheet")
|
||||||
|
if _is_plain_math_question(question):
|
||||||
|
reply = (
|
||||||
|
"I focus on Titan cluster operations. Ask me about cluster health, nodes, workloads, "
|
||||||
|
"namespaces, storage, or alerts."
|
||||||
|
)
|
||||||
|
return AnswerResult(reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
|
||||||
|
kb_lines = (
|
||||||
|
engine._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=_factsheet_kb_chars(mode, plan.kb_max_chars))
|
||||||
|
if engine._kb
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
fact_lines = _quick_fact_sheet_lines(question, summary_lines, kb_lines, limit=_factsheet_line_limit(mode))
|
||||||
|
classify = {
|
||||||
|
"needs_snapshot": True,
|
||||||
|
"needs_kb": bool(kb_lines),
|
||||||
|
"question_type": f"{mode}_factsheet",
|
||||||
|
"answer_style": "direct" if mode in {"quick", "fast"} else "concise",
|
||||||
|
"follow_up": False,
|
||||||
|
}
|
||||||
|
heuristic_reply = _quick_fact_sheet_heuristic_answer(question, fact_lines)
|
||||||
|
if heuristic_reply:
|
||||||
|
return AnswerResult(heuristic_reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
|
||||||
|
if observer:
|
||||||
|
observer("quick", "answering from fact sheet")
|
||||||
|
quick_context = _quick_fact_sheet_text(fact_lines)
|
||||||
|
quick_prompt = "Question: " + question + "\nAnswer using only the Fact Sheet. " + _factsheet_instruction(mode)
|
||||||
|
reply = await call_llm(prompts.ANSWER_SYSTEM, quick_prompt, context=quick_context, model=_factsheet_model(mode, plan), tag=f"{mode}_factsheet")
|
||||||
|
reply = _strip_followup_meta(reply)
|
||||||
|
return AnswerResult(reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
|
||||||
|
|
||||||
|
if observer:
|
||||||
|
observer("normalize", "normalizing")
|
||||||
|
normalize_prompt = prompts.NORMALIZE_PROMPT + "\nQuestion: " + question
|
||||||
|
normalize_raw = await call_llm(prompts.NORMALIZE_SYSTEM, normalize_prompt, context=lexicon_ctx, model=plan.fast_model, tag="normalize")
|
||||||
|
normalize = _parse_json_block(normalize_raw, fallback={"normalized": question, "keywords": []})
|
||||||
|
normalized = str(normalize.get("normalized") or question).strip() or question
|
||||||
|
keywords = normalize.get("keywords") or []
|
||||||
|
_debug_pipeline_log(settings, "normalize_parsed", {"normalized": normalized, "keywords": keywords})
|
||||||
|
keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords)
|
||||||
|
question_tokens = _extract_question_tokens(normalized)
|
||||||
|
|
||||||
|
if observer:
|
||||||
|
observer("route", "routing")
|
||||||
|
route_prompt = prompts.ROUTE_PROMPT + "\nQuestion: " + normalized + "\nKeywords: " + json.dumps(keywords)
|
||||||
|
route_raw = await call_llm(prompts.ROUTE_SYSTEM, route_prompt, context=_join_context([kb_summary, lexicon_ctx]), model=plan.fast_model, tag="route")
|
||||||
|
classify = _parse_json_block(route_raw, fallback={})
|
||||||
|
classify.setdefault("needs_snapshot", True)
|
||||||
|
classify.setdefault("answer_style", "direct")
|
||||||
|
classify.setdefault("follow_up", False)
|
||||||
|
classify.setdefault("focus_entity", "unknown")
|
||||||
|
classify.setdefault("focus_metric", "unknown")
|
||||||
|
if metric_tokens and keyword_tokens and any(token in metric_tokens for token in keyword_tokens):
|
||||||
|
classify["needs_snapshot"] = True
|
||||||
|
intent = route_intent(normalized)
|
||||||
|
if intent:
|
||||||
|
classify["needs_snapshot"] = True
|
||||||
|
classify["question_type"] = "metric"
|
||||||
|
_debug_pipeline_log(settings, "route_parsed", {"classify": classify, "normalized": normalized})
|
||||||
|
lowered_question = f"{question} {normalized}".lower()
|
||||||
|
force_metric = bool(re.search(r"\bhow many\b|\bcount\b|\btotal\b", lowered_question))
|
||||||
|
if any(term in lowered_question for term in ("postgres", "connections", "pvc", "ready")):
|
||||||
|
force_metric = True
|
||||||
|
|
||||||
|
if intent:
|
||||||
|
spine_line = spine.get(intent.kind) if isinstance(spine, dict) else None
|
||||||
|
if not spine_line:
|
||||||
|
spine_line = _spine_fallback(intent, summary_lines)
|
||||||
|
spine_answer = _spine_answer(intent, spine_line)
|
||||||
|
if spine_line:
|
||||||
|
key_facts = _merge_fact_lines([spine_line], key_facts)
|
||||||
|
metric_facts = _merge_fact_lines([spine_line], metric_facts)
|
||||||
|
if spine_answer and mode in {"fast", "quick"}:
|
||||||
|
return AnswerResult(spine_answer, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
|
||||||
|
|
||||||
|
cluster_terms = (
|
||||||
|
"atlas",
|
||||||
|
"cluster",
|
||||||
|
"node",
|
||||||
|
"nodes",
|
||||||
|
"namespace",
|
||||||
|
"pod",
|
||||||
|
"workload",
|
||||||
|
"k8s",
|
||||||
|
"kubernetes",
|
||||||
|
"postgres",
|
||||||
|
"database",
|
||||||
|
"db",
|
||||||
|
"connections",
|
||||||
|
"cpu",
|
||||||
|
"ram",
|
||||||
|
"memory",
|
||||||
|
"network",
|
||||||
|
"io",
|
||||||
|
"disk",
|
||||||
|
"pvc",
|
||||||
|
"storage",
|
||||||
|
)
|
||||||
|
has_cluster_terms = any(term in lowered_question for term in cluster_terms)
|
||||||
|
if has_cluster_terms:
|
||||||
|
classify["needs_snapshot"] = True
|
||||||
|
lowered_norm = normalized.lower()
|
||||||
|
if ("namespace" in lowered_norm and ("pod" in lowered_norm or "pods" in lowered_norm)) or re.search(r"\bmost\s+pods\b", lowered_norm) or re.search(r"\bpods\s+running\b", lowered_norm):
|
||||||
|
classify["question_type"] = "metric"
|
||||||
|
classify["needs_snapshot"] = True
|
||||||
|
if re.search(r"\b(how many|count|number of|list)\b", lowered_question):
|
||||||
|
classify["question_type"] = "metric"
|
||||||
|
if any(term in lowered_question for term in ("postgres", "connections", "db")):
|
||||||
|
classify["question_type"] = "metric"
|
||||||
|
classify["needs_snapshot"] = True
|
||||||
|
if any(term in lowered_question for term in ("pvc", "persistentvolume", "persistent volume", "storage")):
|
||||||
|
if classify.get("question_type") not in {"metric", "diagnostic"}:
|
||||||
|
classify["question_type"] = "metric"
|
||||||
|
classify["needs_snapshot"] = True
|
||||||
|
if "ready" in lowered_question and classify.get("question_type") not in {"metric", "diagnostic"}:
|
||||||
|
classify["question_type"] = "diagnostic"
|
||||||
|
hottest_terms = ("hottest", "highest", "lowest", "most")
|
||||||
|
metric_terms = ("cpu", "ram", "memory", "net", "network", "io", "disk", "load", "usage", "pod", "pods", "namespace")
|
||||||
|
if any(term in lowered_question for term in hottest_terms) and any(term in lowered_question for term in metric_terms):
|
||||||
|
classify["question_type"] = "metric"
|
||||||
|
baseline_terms = ("baseline", "delta", "trend", "increase", "decrease", "drop", "spike", "regression", "change")
|
||||||
|
if any(term in lowered_question for term in baseline_terms) and any(term in lowered_question for term in metric_terms):
|
||||||
|
classify["question_type"] = "metric"
|
||||||
|
classify["needs_snapshot"] = True
|
||||||
|
|
||||||
|
if not classify.get("follow_up") and state and state.claims:
|
||||||
|
follow_terms = ("there", "that", "those", "these", "it", "them", "that one", "this", "former", "latter")
|
||||||
|
is_metric_query = force_metric or classify.get("question_type") in {"metric", "diagnostic"}
|
||||||
|
if not is_metric_query and (
|
||||||
|
any(term in lowered_question for term in follow_terms)
|
||||||
|
or (len(normalized.split()) <= FOLLOWUP_SHORT_WORDS and not has_cluster_terms)
|
||||||
|
):
|
||||||
|
classify["follow_up"] = True
|
||||||
|
|
||||||
|
if classify.get("follow_up") and state and state.claims:
|
||||||
|
if observer:
|
||||||
|
observer("followup", "answering follow-up")
|
||||||
|
reply = await engine._answer_followup(question, state, summary, classify, plan, call_llm)
|
||||||
|
scores = await engine._score_answer(question, reply, plan, call_llm)
|
||||||
|
return AnswerResult(reply, scores, _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
|
||||||
|
|
||||||
|
if observer:
|
||||||
|
observer("decompose", "decomposing")
|
||||||
|
decompose_prompt = prompts.DECOMPOSE_PROMPT.format(max_parts=plan.max_subquestions * 2)
|
||||||
|
decompose_raw = await call_llm(prompts.DECOMPOSE_SYSTEM, decompose_prompt + "\nQuestion: " + normalized, context=lexicon_ctx, model=plan.fast_model if mode == "quick" else plan.model, tag="decompose")
|
||||||
|
parts = _parse_json_list(decompose_raw)
|
||||||
|
sub_questions = _select_subquestions(parts, normalized, plan.max_subquestions)
|
||||||
|
_debug_pipeline_log(settings, "decompose_parsed", {"sub_questions": sub_questions})
|
||||||
|
keyword_tokens = _extract_keywords(question, normalized, sub_questions=sub_questions, keywords=keywords)
|
||||||
|
|
||||||
|
snapshot_context = ""
|
||||||
|
signal_tokens: list[str] = []
|
||||||
|
if classify.get("needs_snapshot"):
|
||||||
|
if observer:
|
||||||
|
observer("retrieve", "scoring chunks")
|
||||||
|
chunks = _chunk_lines(summary_lines, plan.chunk_lines)
|
||||||
|
if plan.use_raw_snapshot:
|
||||||
|
raw_chunks = _raw_snapshot_chunks(snapshot_used)
|
||||||
|
if raw_chunks:
|
||||||
|
chunks.extend(raw_chunks)
|
||||||
|
kb_lines = engine._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=plan.kb_max_chars) if engine._kb else []
|
||||||
|
if kb_lines:
|
||||||
|
kb_chunks = _chunk_lines(kb_lines, plan.chunk_lines)
|
||||||
|
for idx, chunk in enumerate(kb_chunks):
|
||||||
|
chunk["id"] = f"k{idx}"
|
||||||
|
chunks.extend(kb_chunks)
|
||||||
|
metric_keys: list[str] = []
|
||||||
|
must_chunk_ids: list[str] = []
|
||||||
|
metric_task = None
|
||||||
|
if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and summary_lines:
|
||||||
|
metric_ctx = {"question": normalized, "sub_questions": sub_questions, "keywords": keywords, "keyword_tokens": keyword_tokens, "summary_lines": summary_lines}
|
||||||
|
metric_task = asyncio.create_task(_select_metric_chunks(call_llm, metric_ctx, chunks, plan))
|
||||||
|
scored_task = asyncio.create_task(_score_chunks(call_llm, chunks, normalized, sub_questions, plan))
|
||||||
|
if metric_task:
|
||||||
|
metric_keys, must_chunk_ids = await metric_task
|
||||||
|
scored = await scored_task
|
||||||
|
selected = _select_chunks(chunks, scored, plan, keyword_tokens, must_chunk_ids)
|
||||||
|
fact_candidates = _collect_fact_candidates(selected, limit=plan.max_subquestions * 12)
|
||||||
|
key_facts = await _select_fact_lines(call_llm, normalized, fact_candidates, plan, max_lines=max(4, plan.max_subquestions * 2))
|
||||||
|
metric_facts = []
|
||||||
|
if classify.get("question_type") in {"metric", "diagnostic"} or force_metric:
|
||||||
|
global_metric_facts: list[str] = []
|
||||||
|
if global_facts:
|
||||||
|
global_metric_facts = await _select_fact_lines(call_llm, normalized, global_facts, plan, max_lines=min(2, max(1, plan.max_subquestions)))
|
||||||
|
if not global_metric_facts and (keyword_tokens or question_tokens):
|
||||||
|
tokens = {tok for tok in (keyword_tokens or question_tokens) if tok and tok not in GENERIC_METRIC_TOKENS}
|
||||||
|
global_metric_facts = _rank_metric_lines(global_facts, tokens, max_lines=2)
|
||||||
|
if global_metric_facts:
|
||||||
|
key_facts = _merge_fact_lines(global_metric_facts, key_facts)
|
||||||
|
all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
|
||||||
|
if plan.use_deep_retrieval:
|
||||||
|
if observer:
|
||||||
|
observer("retrieve", "extracting fact types")
|
||||||
|
fact_types = await _extract_fact_types(call_llm, normalized, keyword_tokens, plan)
|
||||||
|
if observer:
|
||||||
|
observer("retrieve", "deriving signals")
|
||||||
|
signals = await _derive_signals(call_llm, normalized, fact_types, plan)
|
||||||
|
if isinstance(signals, list):
|
||||||
|
signal_tokens = [str(item) for item in signals if item]
|
||||||
|
all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
|
||||||
|
if observer:
|
||||||
|
observer("retrieve", "scanning chunks")
|
||||||
|
candidate_lines: list[str] = []
|
||||||
|
if signals:
|
||||||
|
for chunk in selected:
|
||||||
|
chunk_lines = chunk["text"].splitlines()
|
||||||
|
if not chunk_lines:
|
||||||
|
continue
|
||||||
|
hits = await _scan_chunk_for_signals(call_llm, normalized, signals, chunk_lines, plan)
|
||||||
|
if hits:
|
||||||
|
candidate_lines.extend(hits)
|
||||||
|
candidate_lines = list(dict.fromkeys(candidate_lines))
|
||||||
|
if candidate_lines:
|
||||||
|
if observer:
|
||||||
|
observer("retrieve", "pruning candidates")
|
||||||
|
metric_facts = await _prune_metric_candidates(call_llm, normalized, candidate_lines, plan, plan.metric_retries)
|
||||||
|
if metric_facts:
|
||||||
|
key_facts = _merge_fact_lines(metric_facts, key_facts)
|
||||||
|
if settings.debug_pipeline:
|
||||||
|
_debug_pipeline_log(settings, "metric_facts_selected", {"facts": metric_facts})
|
||||||
|
if not metric_facts:
|
||||||
|
if observer:
|
||||||
|
observer("retrieve", "fallback metric selection")
|
||||||
|
token_set = {tok for tok in all_tokens if tok and tok not in GENERIC_METRIC_TOKENS}
|
||||||
|
fallback_candidates = _rank_metric_lines(summary_lines, token_set, max_lines=200)
|
||||||
|
if fallback_candidates:
|
||||||
|
metric_facts = await _select_fact_lines(call_llm, normalized, fallback_candidates, plan, max_lines=max(2, plan.max_subquestions))
|
||||||
|
if not metric_facts and fallback_candidates:
|
||||||
|
metric_facts = fallback_candidates[: max(2, plan.max_subquestions)]
|
||||||
|
if metric_keys:
|
||||||
|
key_lines = _lines_for_metric_keys(summary_lines, metric_keys, max_lines=plan.max_subquestions * 3)
|
||||||
|
if key_lines:
|
||||||
|
metric_facts = _merge_fact_lines(key_lines, metric_facts)
|
||||||
|
if metric_facts:
|
||||||
|
metric_cover_tokens = [tok for tok in keyword_tokens if tok and tok not in GENERIC_METRIC_TOKENS]
|
||||||
|
if not metric_cover_tokens:
|
||||||
|
metric_cover_tokens = [tok for tok in question_tokens if tok and tok not in GENERIC_METRIC_TOKENS]
|
||||||
|
metric_facts = _ensure_token_coverage(metric_facts, metric_cover_tokens or all_tokens, summary_lines, max_add=plan.max_subquestions)
|
||||||
|
if metric_cover_tokens:
|
||||||
|
ranked_metric_lines = _rank_metric_lines(summary_lines, set(metric_cover_tokens), max_lines=max(1, plan.max_subquestions))
|
||||||
|
if ranked_metric_lines:
|
||||||
|
metric_facts = _merge_fact_lines(ranked_metric_lines, metric_facts)
|
||||||
|
if metric_facts and not _has_keyword_overlap(metric_facts, keyword_tokens):
|
||||||
|
best_line = _best_keyword_line(summary_lines, keyword_tokens)
|
||||||
|
if best_line:
|
||||||
|
metric_facts = _merge_fact_lines([best_line], metric_facts)
|
||||||
|
if metric_facts:
|
||||||
|
key_facts = _merge_fact_lines(metric_facts, key_facts)
|
||||||
|
if global_metric_facts:
|
||||||
|
metric_facts = _merge_fact_lines(global_metric_facts, metric_facts)
|
||||||
|
if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and not metric_facts and key_facts:
|
||||||
|
metric_facts = key_facts
|
||||||
|
if key_facts:
|
||||||
|
key_facts = _ensure_token_coverage(key_facts, _merge_tokens(keyword_tokens, question_tokens), summary_lines, max_add=plan.max_subquestions)
|
||||||
|
facts_used = list(dict.fromkeys(key_facts)) if key_facts else list(dict.fromkeys(metric_facts))
|
||||||
|
snapshot_context = "ClusterSnapshot:\n" + "\n".join([chunk["text"] for chunk in selected])
|
||||||
|
combined_facts = _merge_fact_lines(global_facts, key_facts) if global_facts else key_facts
|
||||||
|
if combined_facts:
|
||||||
|
snapshot_context = "KeyFacts:\n" + "\n".join(combined_facts) + "\n\n" + snapshot_context
|
||||||
|
|
||||||
|
context = _join_context([kb_summary, _format_runbooks(runbooks), snapshot_context, history_ctx if classify.get("follow_up") else ""])
|
||||||
|
|
||||||
|
if plan.use_tool and classify.get("needs_tool"):
|
||||||
|
if observer:
|
||||||
|
observer("tool", "suggesting tools")
|
||||||
|
tool_prompt = prompts.TOOL_PROMPT + "\nQuestion: " + normalized
|
||||||
|
tool_raw = await call_llm(prompts.TOOL_SYSTEM, tool_prompt, context=context, model=plan.fast_model, tag="tool")
|
||||||
|
tool_hint = _parse_json_block(tool_raw, fallback={})
|
||||||
|
|
||||||
|
if observer:
|
||||||
|
observer("subanswers", "drafting subanswers")
|
||||||
|
async def _subanswer_for(subq: str) -> str:
|
||||||
|
sub_prompt = prompts.SUBANSWER_PROMPT + "\nQuestion: " + subq
|
||||||
|
if plan.subanswer_retries > 1:
|
||||||
|
candidates = await _gather_limited(
|
||||||
|
[call_llm(prompts.ANSWER_SYSTEM, sub_prompt, context=context, model=plan.model, tag="subanswer") for _ in range(plan.subanswer_retries)],
|
||||||
|
plan.parallelism,
|
||||||
|
)
|
||||||
|
best_idx = await _select_best_candidate(call_llm, subq, candidates, plan, "subanswer_select")
|
||||||
|
return candidates[best_idx]
|
||||||
|
return await call_llm(prompts.ANSWER_SYSTEM, sub_prompt, context=context, model=plan.model, tag="subanswer")
|
||||||
|
|
||||||
|
subanswers: list[str] = []
|
||||||
|
if plan.parallelism > 1 and len(sub_questions) > 1:
|
||||||
|
subanswers = await _gather_limited([_subanswer_for(subq) for subq in sub_questions], plan.parallelism)
|
||||||
|
else:
|
||||||
|
for subq in sub_questions:
|
||||||
|
subanswers.append(await _subanswer_for(subq))
|
||||||
|
|
||||||
|
if observer:
|
||||||
|
observer("synthesize", "synthesizing")
|
||||||
|
reply, scores, claims = await finalize_answer(
|
||||||
|
engine=engine,
|
||||||
|
call_llm=call_llm,
|
||||||
|
normalized=normalized,
|
||||||
|
subanswers=subanswers,
|
||||||
|
context=context,
|
||||||
|
classify=classify,
|
||||||
|
plan=plan,
|
||||||
|
summary=summary,
|
||||||
|
summary_lines=summary_lines,
|
||||||
|
metric_facts=metric_facts,
|
||||||
|
key_facts=key_facts,
|
||||||
|
facts_used=facts_used,
|
||||||
|
allowed_nodes=allowed_nodes,
|
||||||
|
allowed_namespaces=allowed_namespaces,
|
||||||
|
runbook_paths=runbook_paths,
|
||||||
|
lowered_question=lowered_question,
|
||||||
|
force_metric=force_metric,
|
||||||
|
keyword_tokens=keyword_tokens,
|
||||||
|
question_tokens=question_tokens,
|
||||||
|
snapshot_context=snapshot_context,
|
||||||
|
observer=observer,
|
||||||
|
mode=mode,
|
||||||
|
metric_keys=metric_keys if 'metric_keys' in locals() else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
except LLMTimeBudgetExceeded:
|
||||||
|
time_budget_hit = True
|
||||||
|
if not reply:
|
||||||
|
budget = max(1, round(time_budget_sec)) if time_budget_sec > 0 else 0
|
||||||
|
budget_text = f"{budget}s" if budget else "its configured"
|
||||||
|
if mode in {"quick", "fast"}:
|
||||||
|
reply = f"Quick mode hit {budget_text} time budget before finishing. Try atlas-smart for a deeper answer."
|
||||||
|
elif mode == "smart":
|
||||||
|
reply = f"Smart mode hit {budget_text} time budget before finishing. Try atlas-genius or ask a narrower follow-up."
|
||||||
|
else:
|
||||||
|
reply = "I ran out of time before I could finish this answer."
|
||||||
|
scores = _default_scores()
|
||||||
|
except LLMLimitReached:
|
||||||
|
if not reply:
|
||||||
|
reply = "I started working on this but hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
|
||||||
|
scores = _default_scores()
|
||||||
|
finally:
|
||||||
|
elapsed = round(time.monotonic() - started, 2)
|
||||||
|
log.info(
|
||||||
|
"atlasbot_answer",
|
||||||
|
extra={
|
||||||
|
"extra": {
|
||||||
|
"mode": mode,
|
||||||
|
"seconds": elapsed,
|
||||||
|
"llm_calls": call_count,
|
||||||
|
"limit": call_cap,
|
||||||
|
"limit_hit": limit_hit,
|
||||||
|
"time_budget_sec": time_budget_sec,
|
||||||
|
"time_budget_hit": time_budget_hit,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if limit_hit and "run limitless" not in reply.lower():
|
||||||
|
reply = reply.rstrip() + "\n\nNote: I hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
|
||||||
|
|
||||||
|
if conversation_id and claims:
|
||||||
|
engine._store_state(conversation_id, claims, summary, snapshot_used, pin_snapshot)
|
||||||
|
|
||||||
|
return AnswerResult(
|
||||||
|
reply,
|
||||||
|
scores,
|
||||||
|
_build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started),
|
||||||
|
)
|
||||||
170
atlasbot/engine/answerer/workflow_post.py
Normal file
170
atlasbot/engine/answerer/workflow_post.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from collections.abc import Callable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from atlasbot.llm import prompts
|
||||||
|
|
||||||
|
from ._base import *
|
||||||
|
from .common import *
|
||||||
|
from .post import *
|
||||||
|
from .post_ext import *
|
||||||
|
from .retrieval import *
|
||||||
|
from .spine import *
|
||||||
|
|
||||||
|
|
||||||
|
async def finalize_answer(*, engine: Any, call_llm: Callable[..., Any], normalized: str, subanswers: list[str], context: str, classify: dict[str, Any], plan: ModePlan, summary: dict[str, Any], summary_lines: list[str], metric_facts: list[str], key_facts: list[str], facts_used: list[str], allowed_nodes: list[str], allowed_namespaces: list[str], runbook_paths: list[str], lowered_question: str, force_metric: bool, keyword_tokens: list[str], question_tokens: list[str], snapshot_context: str, observer: Callable[[str, str], None] | None, mode: str, metric_keys: list[str] | None = None) -> tuple[str, AnswerScores, list[ClaimItem]]: # noqa: C901
|
||||||
|
"""Synthesize and post-process the final answer."""
|
||||||
|
|
||||||
|
reply = await engine._synthesize_answer(normalized, subanswers, context, classify, plan, call_llm)
|
||||||
|
|
||||||
|
unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
|
||||||
|
unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
|
||||||
|
runbook_fix = _needs_runbook_fix(reply, runbook_paths)
|
||||||
|
runbook_needed = _needs_runbook_reference(normalized, runbook_paths, reply)
|
||||||
|
needs_evidence = _needs_evidence_fix(reply, classify)
|
||||||
|
hardware_terms = ("rpi", "raspberry", "jetson", "amd64", "arm64", "hardware")
|
||||||
|
hardware_line = _line_starting_with(summary_lines, "hardware_nodes:")
|
||||||
|
if any(term in lowered_question for term in hardware_terms) and hardware_line:
|
||||||
|
needs_evidence = True
|
||||||
|
if metric_facts and (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
|
||||||
|
needs_evidence = True
|
||||||
|
if classify.get("question_type") in {"open_ended", "planning"} and metric_facts:
|
||||||
|
needs_evidence = True
|
||||||
|
resolved_runbook = None
|
||||||
|
if runbook_paths and (runbook_fix or runbook_needed):
|
||||||
|
resolver_prompt = prompts.RUNBOOK_SELECT_PROMPT + "\nQuestion: " + normalized
|
||||||
|
resolver_raw = await call_llm(prompts.RUNBOOK_SELECT_SYSTEM, resolver_prompt, context="AllowedRunbooks:\n" + "\n".join(runbook_paths), model=plan.fast_model, tag="runbook_select")
|
||||||
|
resolver = _parse_json_block(resolver_raw, fallback={})
|
||||||
|
candidate = resolver.get("path") if isinstance(resolver.get("path"), str) else None
|
||||||
|
if candidate and candidate in runbook_paths:
|
||||||
|
resolved_runbook = candidate
|
||||||
|
|
||||||
|
if (snapshot_context and needs_evidence) or unknown_nodes or unknown_namespaces or runbook_fix or runbook_needed:
|
||||||
|
if observer:
|
||||||
|
observer("evidence_fix", "repairing missing evidence")
|
||||||
|
extra_bits = []
|
||||||
|
if unknown_nodes:
|
||||||
|
extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
|
||||||
|
if unknown_namespaces:
|
||||||
|
extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
|
||||||
|
if runbook_paths:
|
||||||
|
extra_bits.append("AllowedRunbooks: " + ", ".join(runbook_paths))
|
||||||
|
if resolved_runbook:
|
||||||
|
extra_bits.append("ResolvedRunbook: " + resolved_runbook)
|
||||||
|
if metric_facts:
|
||||||
|
extra_bits.append("MustUseFacts: " + "; ".join(metric_facts[:4]))
|
||||||
|
if hardware_line:
|
||||||
|
extra_bits.append("HardwareNodes: " + hardware_line)
|
||||||
|
if allowed_nodes:
|
||||||
|
extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
|
||||||
|
if allowed_namespaces:
|
||||||
|
extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
|
||||||
|
fix_prompt = prompts.EVIDENCE_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + ("\n" + "\n".join(extra_bits) if extra_bits else "")
|
||||||
|
reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, fix_prompt, context=context, model=plan.model, tag="evidence_fix")
|
||||||
|
if metric_facts and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
|
||||||
|
enforce_prompt = prompts.EVIDENCE_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nMustIncludeFacts: " + "; ".join(metric_facts[:6]) + "\nInstruction: The answer must include all MustIncludeFacts items."
|
||||||
|
reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, enforce_prompt, context=context, model=plan.model, tag="evidence_fix_enforce")
|
||||||
|
|
||||||
|
if metric_facts and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
|
||||||
|
direct_candidates = _lines_for_metric_keys(summary_lines, metric_keys, max_lines=plan.max_subquestions * 3) if 'metric_keys' in locals() and metric_keys else summary_lines
|
||||||
|
direct_line = _select_metric_line(direct_candidates, normalized, _merge_tokens(keyword_tokens, question_tokens))
|
||||||
|
if direct_line:
|
||||||
|
direct_prompt = f"Question: {normalized}\nFact: {direct_line}\nAnswer using the fact."
|
||||||
|
reply = await call_llm(prompts.ANSWER_SYSTEM, direct_prompt, context="", model=plan.fast_model, tag="metric_direct")
|
||||||
|
if (mode == "quick" and any(term in normalized.lower() for term in ("how many", "count", "total"))) or not _reply_matches_metric_facts(reply, [direct_line], _merge_tokens(keyword_tokens, question_tokens)):
|
||||||
|
reply = _format_direct_metric_line(direct_line)
|
||||||
|
|
||||||
|
if "raspberry" in lowered_question and "not" in lowered_question:
|
||||||
|
non_rpi = _non_rpi_nodes(summary)
|
||||||
|
if non_rpi:
|
||||||
|
reply = _format_hardware_groups(non_rpi, "Non-Raspberry Pi nodes")
|
||||||
|
if unknown_nodes or unknown_namespaces:
|
||||||
|
refreshed_nodes = _find_unknown_nodes(reply, allowed_nodes)
|
||||||
|
refreshed_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
|
||||||
|
if refreshed_nodes or refreshed_namespaces:
|
||||||
|
reply = _strip_unknown_entities(reply, refreshed_nodes, refreshed_namespaces)
|
||||||
|
if runbook_paths and resolved_runbook and _needs_runbook_reference(normalized, runbook_paths, reply):
|
||||||
|
if observer:
|
||||||
|
observer("runbook_enforce", "enforcing runbook path")
|
||||||
|
enforce_prompt = prompts.RUNBOOK_ENFORCE_PROMPT.format(path=resolved_runbook)
|
||||||
|
reply = await call_llm(prompts.RUNBOOK_ENFORCE_SYSTEM, enforce_prompt + "\nAnswer: " + reply, context=context, model=plan.model, tag="runbook_enforce")
|
||||||
|
if runbook_paths:
|
||||||
|
invalid = [token for token in re.findall(r"runbooks/[A-Za-z0-9._-]+", reply) if token.lower() not in {p.lower() for p in runbook_paths}]
|
||||||
|
if invalid:
|
||||||
|
if observer:
|
||||||
|
observer("runbook_enforce", "replacing invalid runbook path")
|
||||||
|
resolver_prompt = prompts.RUNBOOK_SELECT_PROMPT + "\nQuestion: " + normalized
|
||||||
|
resolver_raw = await call_llm(prompts.RUNBOOK_SELECT_SYSTEM, resolver_prompt, context="AllowedRunbooks:\n" + "\n".join(runbook_paths), model=plan.fast_model, tag="runbook_select")
|
||||||
|
resolver = _parse_json_block(resolver_raw, fallback={})
|
||||||
|
candidate = resolver.get("path") if isinstance(resolver.get("path"), str) else None
|
||||||
|
if not (candidate and candidate in runbook_paths):
|
||||||
|
candidate = _best_runbook_match(invalid[0], runbook_paths)
|
||||||
|
if candidate and candidate in runbook_paths:
|
||||||
|
enforce_prompt = prompts.RUNBOOK_ENFORCE_PROMPT.format(path=candidate)
|
||||||
|
reply = await call_llm(prompts.RUNBOOK_ENFORCE_SYSTEM, enforce_prompt + "\nAnswer: " + reply, context=context, model=plan.model, tag="runbook_enforce")
|
||||||
|
reply = _strip_unknown_entities(reply, unknown_nodes, unknown_namespaces)
|
||||||
|
|
||||||
|
if facts_used and _needs_evidence_guard(reply, facts_used):
|
||||||
|
if observer:
|
||||||
|
observer("evidence_guard", "tightening unsupported claims")
|
||||||
|
use_guard = True
|
||||||
|
if mode in {"smart", "genius"}:
|
||||||
|
decision = await _contradiction_decision(ContradictionContext(call_llm, normalized, reply, facts_used, plan), attempts=3 if mode == "genius" else 1)
|
||||||
|
use_guard = decision.get("use_facts", True)
|
||||||
|
if use_guard:
|
||||||
|
guard_prompt = prompts.EVIDENCE_GUARD_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nFactsUsed:\n" + "\n".join(facts_used)
|
||||||
|
reply = await call_llm(prompts.EVIDENCE_GUARD_SYSTEM, guard_prompt, context=context, model=plan.model, tag="evidence_guard")
|
||||||
|
|
||||||
|
if _needs_focus_fix(normalized, reply, classify):
|
||||||
|
if observer:
|
||||||
|
observer("focus_fix", "tightening answer")
|
||||||
|
reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, prompts.FOCUS_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply, context=context, model=plan.model, tag="focus_fix")
|
||||||
|
if not metric_facts or not _has_keyword_overlap(metric_facts, keyword_tokens):
|
||||||
|
best_line = _best_keyword_line(summary_lines, keyword_tokens)
|
||||||
|
if best_line:
|
||||||
|
reply = f"Latest metrics: {best_line}."
|
||||||
|
if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and metric_facts:
|
||||||
|
best_line = None
|
||||||
|
lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
|
||||||
|
for line in metric_facts:
|
||||||
|
if any(kw in line.lower() for kw in lowered_keywords):
|
||||||
|
best_line = line
|
||||||
|
break
|
||||||
|
best_line = best_line or metric_facts[0]
|
||||||
|
reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
|
||||||
|
fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
|
||||||
|
if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
|
||||||
|
reply = f"Latest metrics: {best_line}."
|
||||||
|
|
||||||
|
if _should_use_insight_guard(classify):
|
||||||
|
if observer:
|
||||||
|
observer("insight_guard", "checking for concrete signals")
|
||||||
|
reply = await _apply_insight_guard(InsightGuardInput(question=normalized, reply=reply, classify=classify, context=context, plan=plan, call_llm=call_llm, facts=metric_facts or key_facts))
|
||||||
|
|
||||||
|
if plan.use_critic:
|
||||||
|
if observer:
|
||||||
|
observer("critic", "reviewing")
|
||||||
|
critic_prompt = prompts.CRITIC_PROMPT + "\nQuestion: " + normalized + "\nAnswer: " + reply
|
||||||
|
critic_raw = await call_llm(prompts.CRITIC_SYSTEM, critic_prompt, context=context, model=plan.model, tag="critic")
|
||||||
|
critic = _parse_json_block(critic_raw, fallback={})
|
||||||
|
if critic.get("issues"):
|
||||||
|
revise_prompt = prompts.REVISION_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nCritique: " + json.dumps(critic)
|
||||||
|
reply = await call_llm(prompts.REVISION_SYSTEM, revise_prompt, context=context, model=plan.model, tag="revise")
|
||||||
|
|
||||||
|
if plan.use_gap:
|
||||||
|
if observer:
|
||||||
|
observer("gap", "checking gaps")
|
||||||
|
gap_prompt = prompts.EVIDENCE_GAP_PROMPT + "\nQuestion: " + normalized + "\nAnswer: " + reply
|
||||||
|
gap_raw = await call_llm(prompts.GAP_SYSTEM, gap_prompt, context=context, model=plan.fast_model, tag="gap")
|
||||||
|
gap = _parse_json_block(gap_raw, fallback={})
|
||||||
|
note = str(gap.get("note") or "").strip()
|
||||||
|
if note:
|
||||||
|
reply = f"{reply}\n\n{note}"
|
||||||
|
|
||||||
|
reply = await engine._dedup_reply(reply, plan, call_llm, tag="dedup")
|
||||||
|
scores = await engine._score_answer(normalized, reply, plan, call_llm)
|
||||||
|
claims = await engine._extract_claims(normalized, reply, summary, facts_used, call_llm)
|
||||||
|
return reply, scores, claims
|
||||||
@ -1,35 +1,46 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
|
||||||
import re
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class IntentMatch:
|
class IntentMatch:
|
||||||
|
"""Describe the best cluster intent match for a user question."""
|
||||||
|
|
||||||
kind: str
|
kind: str
|
||||||
score: int
|
score: int
|
||||||
|
|
||||||
|
|
||||||
_COUNT_TERMS = r"(how\\s+many|count|number\\s+of|total|totals|tally|amount\\s+of|quantity|sum\\s+of|overall|in\\s+total|all\\s+up)"
|
_COUNT_TERMS = r"(how\s+many|count|number\s+of|total|totals|tally|amount\s+of|quantity|sum\s+of|overall|in\s+total|all\s+up)"
|
||||||
_NODE_TERMS = r"(nodes?|workers?|worker\\s+nodes?|cluster\\s+nodes?|machines?|hosts?|members?|instances?|servers?|agents?|control[-\\s]?plane|control\\s+plane)"
|
_NODE_TERMS = r"(nodes?|workers?|worker\s+nodes?|cluster\s+nodes?|machines?|hosts?|members?|instances?|servers?|agents?|control[-\s]?plane|control\s+plane)"
|
||||||
_READY_TERMS = r"(ready|unready|not\\s+ready|down|offline|not\\s+responding|missing|lost|gone|drain(?:ed|ing)?|cordon(?:ed|ing)?)"
|
_READY_TERMS = r"(ready|unready|not\s+ready|down|offline|not\s+responding|missing|lost|gone|drain(?:ed|ing)?|cordon(?:ed|ing)?)"
|
||||||
_HOTTEST_TERMS = r"(hottest|hot|highest|max(?:imum)?|peak|top|most|worst|spikiest|heaviest|largest|biggest|noisiest|loudest)"
|
_HOTTEST_TERMS = r"(hottest|hot|highest|max(?:imum)?|peak|top|most|worst|spikiest|heaviest|largest|biggest|noisiest|loudest)"
|
||||||
_CPU_TERMS = r"(cpu|processor|processors|compute|core|cores|load|load\\s+avg|load\\s+average|util(?:ization)?|usage)"
|
_CPU_TERMS = r"(cpu|processor|processors|compute|core|cores|load|load\s+avg|load\s+average|util(?:ization)?|usage)"
|
||||||
_RAM_TERMS = r"(ram|memory|mem|heap|rss|resident|swap)"
|
_RAM_TERMS = r"(ram|memory|mem|heap|rss|resident|swap)"
|
||||||
_NET_TERMS = r"(net|network|bandwidth|throughput|traffic|rx|tx|ingress|egress|bits|bytes|packets|pps|bps)"
|
_NET_TERMS = r"(net|network|bandwidth|throughput|traffic|rx|tx|ingress|egress|bits|bytes|packets|pps|bps)"
|
||||||
_IO_TERMS = r"(\\bio\\b|i/o|disk\\s+io|disk\\s+activity|read/?write|storage\\s+io|iops|latency)"
|
_IO_TERMS = r"(\bio\b|i/o|disk\s+io|disk\s+activity|read/?write|storage\s+io|iops|latency)"
|
||||||
_DISK_TERMS = r"(disk|storage|volume|pvc|filesystem|fs|capacity|\\bspace\\b|full|usage)"
|
_DISK_TERMS = r"(disk|storage|volume|pvc|filesystem|fs|capacity|\bspace\b|full|usage)"
|
||||||
_PG_TERMS = r"(postgres|postgresql|pg\\b|database|db|sql|psql)"
|
_PG_TERMS = r"(postgres|postgresql|pg\b|database|db|sql|psql)"
|
||||||
_CONN_TERMS = r"(connections?|conn|pool|sessions?|clients?|active\\s+connections?|open\\s+connections?)"
|
_CONN_TERMS = r"(connections?|conn|pool|sessions?|clients?|active\s+connections?|open\s+connections?)"
|
||||||
_DB_HOT_TERMS = r"(hottest|busiest|most|largest|top|heaviest|noisiest|highest\\s+load)"
|
_DB_HOT_TERMS = r"(hottest|busiest|most|largest|top|heaviest|noisiest|highest\s+load)"
|
||||||
_NAMESPACE_TERMS = r"(namespace|namespaces|ns\\b|tenant|workload\\s+namespace)"
|
_NAMESPACE_TERMS = r"(namespace|namespaces|ns\b|tenant|workload\s+namespace)"
|
||||||
_PODS_TERMS = r"(pods?|workloads?|tasks?|containers?|deployments?|jobs?|cronjobs?|daemonsets?|statefulsets?)"
|
_PODS_TERMS = r"(pods?|workloads?|tasks?|containers?|deployments?|jobs?|cronjobs?|daemonsets?|statefulsets?)"
|
||||||
_NON_RPI_TERMS = r"(non[-\\s]?raspberry|not\\s+raspberry|non[-\\s]?rpi|not\\s+rpi|amd64|x86|x86_64|intel|ryzen|jetson|arm64\\b(?!.*rpi))"
|
_NON_RPI_TERMS = r"(non[-\s]?raspberry|not\s+raspberry|non[-\s]?rpi|not\s+rpi|amd64|x86|x86_64|intel|ryzen|jetson|arm64\b(?!.*rpi))"
|
||||||
_PRESSURE_TERMS = r"(pressure|overload|hotspot|bottleneck|saturation|headroom|strain|stress|critical|warning|at\\s+capacity|near\\s+limit)"
|
_PRESSURE_TERMS = r"(pressure|overload|hotspot|bottleneck|saturation|headroom|strain|stress|critical|warning|at\s+capacity|near\s+limit)"
|
||||||
_HARDWARE_TERMS = r"(hardware|arch(?:itecture)?|platform|mix|profile|node\\s+types?)"
|
_HARDWARE_TERMS = r"(hardware|arch(?:itecture)?|platform|mix|profile|node\s+types?)"
|
||||||
|
|
||||||
|
|
||||||
def route_intent(question: str) -> IntentMatch | None:
|
def route_intent(question: str) -> IntentMatch | None:
|
||||||
|
"""Classify a question into a deterministic cluster intent.
|
||||||
|
|
||||||
|
Input:
|
||||||
|
- `question`: user text to inspect.
|
||||||
|
|
||||||
|
Output:
|
||||||
|
- the highest-confidence `IntentMatch`, or `None` when no intent fits.
|
||||||
|
"""
|
||||||
|
|
||||||
text = (question or "").lower()
|
text = (question or "").lower()
|
||||||
if not text:
|
if not text:
|
||||||
return None
|
return None
|
||||||
@ -44,13 +55,13 @@ def route_intent(question: str) -> IntentMatch | None:
|
|||||||
return any(_has(pat) for pat in patterns)
|
return any(_has(pat) for pat in patterns)
|
||||||
|
|
||||||
intents = [
|
intents = [
|
||||||
(lambda: _all(_COUNT_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_count", 90)),
|
|
||||||
(
|
(
|
||||||
lambda: _all(_READY_TERMS) and (_any(_NODE_TERMS) or "cluster" in text or "workers" in text),
|
lambda: _all(_READY_TERMS) and (_any(_NODE_TERMS) or "cluster" in text or "workers" in text),
|
||||||
IntentMatch("nodes_ready", 85),
|
IntentMatch("nodes_ready", 85),
|
||||||
),
|
),
|
||||||
|
(lambda: _all(_COUNT_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_count", 90)),
|
||||||
(lambda: _all(_NON_RPI_TERMS) and (_any(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_non_rpi", 80)),
|
(lambda: _all(_NON_RPI_TERMS) and (_any(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_non_rpi", 80)),
|
||||||
(lambda: _all(_HARDWARE_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("hardware_mix", 75)),
|
(lambda: _all(_HARDWARE_TERMS) and (_has(_NODE_TERMS) or "cluster" in text or "mix" in text), IntentMatch("hardware_mix", 75)),
|
||||||
(lambda: _all(_HOTTEST_TERMS, _CPU_TERMS), IntentMatch("hottest_cpu", 80)),
|
(lambda: _all(_HOTTEST_TERMS, _CPU_TERMS), IntentMatch("hottest_cpu", 80)),
|
||||||
(lambda: _all(_HOTTEST_TERMS, _RAM_TERMS), IntentMatch("hottest_ram", 80)),
|
(lambda: _all(_HOTTEST_TERMS, _RAM_TERMS), IntentMatch("hottest_ram", 80)),
|
||||||
(lambda: _all(_HOTTEST_TERMS, _NET_TERMS), IntentMatch("hottest_net", 80)),
|
(lambda: _all(_HOTTEST_TERMS, _NET_TERMS), IntentMatch("hottest_net", 80)),
|
||||||
|
|||||||
@ -7,6 +7,8 @@ log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class KnowledgeBase:
|
class KnowledgeBase:
|
||||||
|
"""Load Atlas knowledge-base files and expose summary snippets."""
|
||||||
|
|
||||||
def __init__(self, base_dir: str) -> None:
|
def __init__(self, base_dir: str) -> None:
|
||||||
self._base = Path(base_dir) if base_dir else None
|
self._base = Path(base_dir) if base_dir else None
|
||||||
self._atlas: dict[str, Any] = {}
|
self._atlas: dict[str, Any] = {}
|
||||||
@ -14,6 +16,8 @@ class KnowledgeBase:
|
|||||||
self._loaded = False
|
self._loaded = False
|
||||||
|
|
||||||
def load(self) -> None:
|
def load(self) -> None:
|
||||||
|
"""Load catalog files once so subsequent reads stay cheap."""
|
||||||
|
|
||||||
if self._loaded or not self._base:
|
if self._loaded or not self._base:
|
||||||
return
|
return
|
||||||
self._atlas = self._read_json(self._base / "catalog" / "atlas.json")
|
self._atlas = self._read_json(self._base / "catalog" / "atlas.json")
|
||||||
@ -30,6 +34,8 @@ class KnowledgeBase:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
def summary(self) -> str:
|
def summary(self) -> str:
|
||||||
|
"""Return a short human-readable KB summary for prompt context."""
|
||||||
|
|
||||||
self.load()
|
self.load()
|
||||||
if not self._atlas:
|
if not self._atlas:
|
||||||
return ""
|
return ""
|
||||||
@ -42,12 +48,14 @@ class KnowledgeBase:
|
|||||||
if services:
|
if services:
|
||||||
parts.append(f"Services indexed: {len(services)}.")
|
parts.append(f"Services indexed: {len(services)}.")
|
||||||
if isinstance(self._atlas, dict):
|
if isinstance(self._atlas, dict):
|
||||||
keys = [key for key in self._atlas.keys() if key not in {"sources"}]
|
keys = [key for key in self._atlas if key not in {"sources"}]
|
||||||
if keys:
|
if keys:
|
||||||
parts.append(f"Atlas keys: {', '.join(sorted(keys)[:8])}.")
|
parts.append(f"Atlas keys: {', '.join(sorted(keys)[:8])}.")
|
||||||
return " ".join(parts)
|
return " ".join(parts)
|
||||||
|
|
||||||
def runbook_titles(self, *, limit: int = 5) -> str:
|
def runbook_titles(self, *, limit: int = 5) -> str:
|
||||||
|
"""Render the top runbook titles for prompt context."""
|
||||||
|
|
||||||
self.load()
|
self.load()
|
||||||
if not self._runbooks:
|
if not self._runbooks:
|
||||||
return ""
|
return ""
|
||||||
@ -64,6 +72,8 @@ class KnowledgeBase:
|
|||||||
return "Relevant runbooks:\n" + "\n".join(titles[:limit])
|
return "Relevant runbooks:\n" + "\n".join(titles[:limit])
|
||||||
|
|
||||||
def runbook_paths(self, *, limit: int = 10) -> list[str]:
|
def runbook_paths(self, *, limit: int = 10) -> list[str]:
|
||||||
|
"""Return the runbook paths used for exact-path enforcement."""
|
||||||
|
|
||||||
self.load()
|
self.load()
|
||||||
if not self._runbooks:
|
if not self._runbooks:
|
||||||
return []
|
return []
|
||||||
@ -77,6 +87,8 @@ class KnowledgeBase:
|
|||||||
return paths[:limit]
|
return paths[:limit]
|
||||||
|
|
||||||
def chunk_lines(self, *, max_files: int = 20, max_chars: int = 6000) -> list[str]:
|
def chunk_lines(self, *, max_files: int = 20, max_chars: int = 6000) -> list[str]:
|
||||||
|
"""Collect KB excerpts into prompt-sized chunks."""
|
||||||
|
|
||||||
self.load()
|
self.load()
|
||||||
if not self._base:
|
if not self._base:
|
||||||
return []
|
return []
|
||||||
|
|||||||
@ -17,6 +17,8 @@ class LLMError(RuntimeError):
|
|||||||
|
|
||||||
|
|
||||||
class LLMClient:
|
class LLMClient:
|
||||||
|
"""Wrap the Ollama chat endpoint with retries and fallback-model support."""
|
||||||
|
|
||||||
def __init__(self, settings: Settings) -> None:
|
def __init__(self, settings: Settings) -> None:
|
||||||
self._settings = settings
|
self._settings = settings
|
||||||
self._timeout = settings.ollama_timeout_sec
|
self._timeout = settings.ollama_timeout_sec
|
||||||
@ -37,6 +39,8 @@ class LLMClient:
|
|||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
timeout_sec: float | None = None,
|
timeout_sec: float | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
"""Send a chat request and return the model content text."""
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model or self._settings.ollama_model,
|
"model": model or self._settings.ollama_model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
@ -77,6 +81,8 @@ class LLMClient:
|
|||||||
|
|
||||||
|
|
||||||
def build_messages(system: str, prompt: str, *, context: str | None = None) -> list[dict[str, str]]:
|
def build_messages(system: str, prompt: str, *, context: str | None = None) -> list[dict[str, str]]:
|
||||||
|
"""Assemble the minimal chat message list used by the answer pipeline."""
|
||||||
|
|
||||||
messages: list[dict[str, str]] = [{"role": "system", "content": system}]
|
messages: list[dict[str, str]] = [{"role": "system", "content": system}]
|
||||||
if context:
|
if context:
|
||||||
messages.append({"role": "user", "content": "Context (grounded facts):\n" + context})
|
messages.append({"role": "user", "content": "Context (grounded facts):\n" + context})
|
||||||
@ -85,6 +91,8 @@ def build_messages(system: str, prompt: str, *, context: str | None = None) -> l
|
|||||||
|
|
||||||
|
|
||||||
def parse_json(text: str, *, fallback: dict[str, Any] | None = None) -> dict[str, Any]:
|
def parse_json(text: str, *, fallback: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||||
|
"""Parse a JSON blob from model output and fall back to a safe default."""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
raw = text.strip()
|
raw = text.strip()
|
||||||
if raw.startswith("`"):
|
if raw.startswith("`"):
|
||||||
|
|||||||
@ -253,7 +253,7 @@ CONTRADICTION_PROMPT = (
|
|||||||
"Question: {question}\n"
|
"Question: {question}\n"
|
||||||
"Draft: {draft}\n"
|
"Draft: {draft}\n"
|
||||||
"FactsUsed:\n{facts}\n\n"
|
"FactsUsed:\n{facts}\n\n"
|
||||||
"Return JSON: {\"use_facts\": true|false, \"confidence\": 0-100, \"reason\": \"...\"}"
|
"Return JSON: {{\"use_facts\": true|false, \"confidence\": 0-100, \"reason\": \"...\"}}"
|
||||||
)
|
)
|
||||||
|
|
||||||
CANDIDATE_SELECT_SYSTEM = (
|
CANDIDATE_SELECT_SYSTEM = (
|
||||||
|
|||||||
@ -1,13 +1,17 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime, timezone
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
|
|
||||||
class JsonFormatter(logging.Formatter):
|
class JsonFormatter(logging.Formatter):
|
||||||
|
"""Emit structured log records for the atlasbot services."""
|
||||||
|
|
||||||
def format(self, record: logging.LogRecord) -> str:
|
def format(self, record: logging.LogRecord) -> str:
|
||||||
|
"""Render a log record as JSON for downstream ingestion."""
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
"timestamp": datetime.now(UTC).isoformat(),
|
||||||
"level": record.levelname.lower(),
|
"level": record.levelname.lower(),
|
||||||
"logger": record.name,
|
"logger": record.name,
|
||||||
"message": record.getMessage(),
|
"message": record.getMessage(),
|
||||||
@ -21,6 +25,8 @@ class JsonFormatter(logging.Formatter):
|
|||||||
|
|
||||||
|
|
||||||
def configure_logging(level: str = "INFO") -> None:
|
def configure_logging(level: str = "INFO") -> None:
|
||||||
|
"""Install JSON logging on the process root logger."""
|
||||||
|
|
||||||
root = logging.getLogger()
|
root = logging.getLogger()
|
||||||
root.setLevel(level.upper())
|
root.setLevel(level.upper())
|
||||||
handler = logging.StreamHandler(sys.stdout)
|
handler = logging.StreamHandler(sys.stdout)
|
||||||
|
|||||||
@ -17,6 +17,8 @@ log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def _build_engine(settings) -> AnswerEngine:
|
def _build_engine(settings) -> AnswerEngine:
|
||||||
|
"""Construct the answer engine from the configured backends."""
|
||||||
|
|
||||||
kb = KnowledgeBase(settings.kb_dir)
|
kb = KnowledgeBase(settings.kb_dir)
|
||||||
snapshot = SnapshotProvider(settings)
|
snapshot = SnapshotProvider(settings)
|
||||||
llm = LLMClient(settings)
|
llm = LLMClient(settings)
|
||||||
@ -24,6 +26,8 @@ def _build_engine(settings) -> AnswerEngine:
|
|||||||
|
|
||||||
|
|
||||||
async def main() -> None:
|
async def main() -> None:
|
||||||
|
"""Start the HTTP API, Matrix bots, and queue worker."""
|
||||||
|
|
||||||
settings = load_settings()
|
settings = load_settings()
|
||||||
configure_logging("INFO")
|
configure_logging("INFO")
|
||||||
|
|
||||||
@ -45,14 +49,7 @@ async def main() -> None:
|
|||||||
queue = QueueManager(settings, handler)
|
queue = QueueManager(settings, handler)
|
||||||
await queue.start()
|
await queue.start()
|
||||||
|
|
||||||
async def answer_handler( # noqa: PLR0913
|
async def answer_handler(question: str, mode: str, history=None, conversation_id=None, snapshot_pin: bool | None = None, observer=None) -> AnswerResult:
|
||||||
question: str,
|
|
||||||
mode: str,
|
|
||||||
history=None,
|
|
||||||
conversation_id=None,
|
|
||||||
snapshot_pin: bool | None = None,
|
|
||||||
observer=None,
|
|
||||||
) -> AnswerResult:
|
|
||||||
if settings.queue_enabled:
|
if settings.queue_enabled:
|
||||||
payload = await queue.submit(
|
payload = await queue.submit(
|
||||||
{
|
{
|
||||||
@ -86,6 +83,8 @@ async def main() -> None:
|
|||||||
|
|
||||||
|
|
||||||
def result_scores(payload: dict[str, object]) -> AnswerScores:
|
def result_scores(payload: dict[str, object]) -> AnswerScores:
|
||||||
|
"""Coerce a queue payload into the public `AnswerScores` shape."""
|
||||||
|
|
||||||
scores = payload.get("scores") if isinstance(payload, dict) else None
|
scores = payload.get("scores") if isinstance(payload, dict) else None
|
||||||
if isinstance(scores, dict):
|
if isinstance(scores, dict):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -15,11 +15,15 @@ log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class MatrixClient:
|
class MatrixClient:
|
||||||
|
"""Wrap the Matrix client endpoints used by the bot runtime."""
|
||||||
|
|
||||||
def __init__(self, settings: Settings, bot: MatrixBotConfig) -> None:
|
def __init__(self, settings: Settings, bot: MatrixBotConfig) -> None:
|
||||||
self._settings = settings
|
self._settings = settings
|
||||||
self._bot = bot
|
self._bot = bot
|
||||||
|
|
||||||
async def login(self) -> str:
|
async def login(self) -> str:
|
||||||
|
"""Exchange bot credentials for a Matrix access token."""
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"type": "m.login.password",
|
"type": "m.login.password",
|
||||||
"identifier": {"type": "m.id.user", "user": self._bot.username},
|
"identifier": {"type": "m.id.user", "user": self._bot.username},
|
||||||
@ -33,6 +37,8 @@ class MatrixClient:
|
|||||||
return data.get("access_token", "")
|
return data.get("access_token", "")
|
||||||
|
|
||||||
async def resolve_room(self, token: str) -> str:
|
async def resolve_room(self, token: str) -> str:
|
||||||
|
"""Resolve the configured room alias into a room id."""
|
||||||
|
|
||||||
alias = quote(self._settings.room_alias, safe="")
|
alias = quote(self._settings.room_alias, safe="")
|
||||||
url = f"{self._settings.matrix_base}/_matrix/client/v3/directory/room/{alias}"
|
url = f"{self._settings.matrix_base}/_matrix/client/v3/directory/room/{alias}"
|
||||||
headers = {"Authorization": f"Bearer {token}"}
|
headers = {"Authorization": f"Bearer {token}"}
|
||||||
@ -50,12 +56,16 @@ class MatrixClient:
|
|||||||
return data.get("room_id", "")
|
return data.get("room_id", "")
|
||||||
|
|
||||||
async def join_room(self, token: str, room_id: str) -> None:
|
async def join_room(self, token: str, room_id: str) -> None:
|
||||||
|
"""Join the target room if the bot is not already present."""
|
||||||
|
|
||||||
url = f"{self._settings.matrix_base}/_matrix/client/v3/rooms/{room_id}/join"
|
url = f"{self._settings.matrix_base}/_matrix/client/v3/rooms/{room_id}/join"
|
||||||
headers = {"Authorization": f"Bearer {token}"}
|
headers = {"Authorization": f"Bearer {token}"}
|
||||||
async with httpx.AsyncClient(timeout=15.0) as client:
|
async with httpx.AsyncClient(timeout=15.0) as client:
|
||||||
await client.post(url, headers=headers)
|
await client.post(url, headers=headers)
|
||||||
|
|
||||||
async def send_message(self, token: str, room_id: str, text: str) -> None:
|
async def send_message(self, token: str, room_id: str, text: str) -> None:
|
||||||
|
"""Send a plain text message to the Matrix room."""
|
||||||
|
|
||||||
url = f"{self._settings.matrix_base}/_matrix/client/v3/rooms/{room_id}/send/m.room.message"
|
url = f"{self._settings.matrix_base}/_matrix/client/v3/rooms/{room_id}/send/m.room.message"
|
||||||
headers = {"Authorization": f"Bearer {token}"}
|
headers = {"Authorization": f"Bearer {token}"}
|
||||||
payload = {"msgtype": "m.text", "body": text}
|
payload = {"msgtype": "m.text", "body": text}
|
||||||
@ -63,6 +73,8 @@ class MatrixClient:
|
|||||||
await client.post(url, json=payload, headers=headers)
|
await client.post(url, json=payload, headers=headers)
|
||||||
|
|
||||||
async def sync(self, token: str, since: str | None) -> dict[str, Any]:
|
async def sync(self, token: str, since: str | None) -> dict[str, Any]:
|
||||||
|
"""Fetch the incremental Matrix sync payload."""
|
||||||
|
|
||||||
base = f"{self._settings.matrix_base}/_matrix/client/v3/sync"
|
base = f"{self._settings.matrix_base}/_matrix/client/v3/sync"
|
||||||
params = {"timeout": 30000}
|
params = {"timeout": 30000}
|
||||||
if since:
|
if since:
|
||||||
@ -75,17 +87,9 @@ class MatrixClient:
|
|||||||
|
|
||||||
|
|
||||||
class MatrixBot:
|
class MatrixBot:
|
||||||
def __init__(
|
"""Drive Matrix conversation handling and heartbeat replies."""
|
||||||
self,
|
|
||||||
settings: Settings,
|
def __init__(self, settings: Settings, bot: MatrixBotConfig, engine: AnswerEngine, answer_handler: Callable[[str, str, list[dict[str, str]] | None, str | None, Callable[[str, str], None] | None], Awaitable[AnswerResult]] | None = None) -> None:
|
||||||
bot: MatrixBotConfig,
|
|
||||||
engine: AnswerEngine,
|
|
||||||
answer_handler: Callable[
|
|
||||||
[str, str, list[dict[str, str]] | None, str | None, Callable[[str, str], None] | None],
|
|
||||||
Awaitable[AnswerResult],
|
|
||||||
]
|
|
||||||
| None = None,
|
|
||||||
) -> None:
|
|
||||||
self._settings = settings
|
self._settings = settings
|
||||||
self._bot = bot
|
self._bot = bot
|
||||||
self._engine = engine
|
self._engine = engine
|
||||||
@ -94,6 +98,8 @@ class MatrixBot:
|
|||||||
self._history: dict[str, list[dict[str, str]]] = {}
|
self._history: dict[str, list[dict[str, str]]] = {}
|
||||||
|
|
||||||
async def run(self) -> None:
|
async def run(self) -> None:
|
||||||
|
"""Continuously bootstrap, sync, and answer Matrix events."""
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
token = await self._client.login()
|
token = await self._client.login()
|
||||||
|
|||||||
@ -1,7 +1,8 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Awaitable, Callable
|
from collections.abc import Awaitable, Callable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
from nats.aio.client import Client as NATS
|
from nats.aio.client import Client as NATS
|
||||||
from nats.js.errors import NotFoundError
|
from nats.js.errors import NotFoundError
|
||||||
@ -12,6 +13,8 @@ log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class QueueManager:
|
class QueueManager:
|
||||||
|
"""Manage optional NATS-backed work queue processing."""
|
||||||
|
|
||||||
def __init__(self, settings: Settings, handler: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> None:
|
def __init__(self, settings: Settings, handler: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> None:
|
||||||
self._settings = settings
|
self._settings = settings
|
||||||
self._handler = handler
|
self._handler = handler
|
||||||
@ -20,6 +23,8 @@ class QueueManager:
|
|||||||
self._worker_task: asyncio.Task | None = None
|
self._worker_task: asyncio.Task | None = None
|
||||||
|
|
||||||
async def start(self) -> None:
|
async def start(self) -> None:
|
||||||
|
"""Connect to NATS and start the worker loop when queueing is enabled."""
|
||||||
|
|
||||||
if not self._settings.queue_enabled:
|
if not self._settings.queue_enabled:
|
||||||
return
|
return
|
||||||
self._nc = NATS()
|
self._nc = NATS()
|
||||||
@ -29,12 +34,16 @@ class QueueManager:
|
|||||||
self._worker_task = asyncio.create_task(self._worker_loop())
|
self._worker_task = asyncio.create_task(self._worker_loop())
|
||||||
|
|
||||||
async def stop(self) -> None:
|
async def stop(self) -> None:
|
||||||
|
"""Drain the NATS connection and cancel background work."""
|
||||||
|
|
||||||
if self._worker_task:
|
if self._worker_task:
|
||||||
self._worker_task.cancel()
|
self._worker_task.cancel()
|
||||||
if self._nc:
|
if self._nc:
|
||||||
await self._nc.drain()
|
await self._nc.drain()
|
||||||
|
|
||||||
async def submit(self, payload: dict[str, Any]) -> dict[str, Any]:
|
async def submit(self, payload: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Submit work to NATS or fall back to direct handling."""
|
||||||
|
|
||||||
if not self._settings.queue_enabled:
|
if not self._settings.queue_enabled:
|
||||||
return await self._handler(payload)
|
return await self._handler(payload)
|
||||||
if not self._nc or not self._js:
|
if not self._nc or not self._js:
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
8
atlasbot/snapshot/builder/__init__.py
Normal file
8
atlasbot/snapshot/builder/__init__.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
"""Snapshot summary builder and text render helpers."""
|
||||||
|
|
||||||
|
from .core_a import *
|
||||||
|
from .core_b import *
|
||||||
|
from .format_a import *
|
||||||
|
from .format_b import *
|
||||||
|
from .format_c import *
|
||||||
|
from .summary_text import *
|
||||||
492
atlasbot/snapshot/builder/core_a.py
Normal file
492
atlasbot/snapshot/builder/core_a.py
Normal file
@ -0,0 +1,492 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from atlasbot.config import Settings
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
PVC_USAGE_CRITICAL = 90
|
||||||
|
|
||||||
|
_BYTES_KB = 1024
|
||||||
|
_BYTES_MB = 1024 * 1024
|
||||||
|
_BYTES_GB = 1024 * 1024 * 1024
|
||||||
|
_VALUE_PAIR_LEN = 2
|
||||||
|
class SnapshotProvider:
|
||||||
|
"""Fetch and cache the Ariadne snapshot used by the answer engine."""
|
||||||
|
|
||||||
|
def __init__(self, settings: Settings) -> None:
|
||||||
|
self._settings = settings
|
||||||
|
self._cache: dict[str, Any] = {}
|
||||||
|
self._cache_ts = 0.0
|
||||||
|
|
||||||
|
def _cache_valid(self) -> bool:
|
||||||
|
return time.monotonic() - self._cache_ts < max(5, self._settings.snapshot_ttl_sec)
|
||||||
|
|
||||||
|
def get(self) -> dict[str, Any] | None:
|
||||||
|
"""Return the cached snapshot or refresh it from Ariadne."""
|
||||||
|
|
||||||
|
if self._cache and self._cache_valid():
|
||||||
|
return self._cache
|
||||||
|
if not self._settings.ariadne_state_url:
|
||||||
|
return self._cache or None
|
||||||
|
headers = {}
|
||||||
|
if self._settings.ariadne_state_token:
|
||||||
|
headers["x-internal-token"] = self._settings.ariadne_state_token
|
||||||
|
try:
|
||||||
|
resp = httpx.get(self._settings.ariadne_state_url, headers=headers, timeout=10.0)
|
||||||
|
resp.raise_for_status()
|
||||||
|
payload = resp.json()
|
||||||
|
if isinstance(payload, dict):
|
||||||
|
self._cache = payload
|
||||||
|
self._cache_ts = time.monotonic()
|
||||||
|
return payload
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("snapshot fetch failed", extra={"extra": {"error": str(exc)}})
|
||||||
|
return self._cache or None
|
||||||
|
|
||||||
|
|
||||||
|
def _node_usage_top(series: list[dict[str, Any]]) -> dict[str, Any] | None:
|
||||||
|
best = None
|
||||||
|
for entry in series or []:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
node = entry.get("node")
|
||||||
|
value = entry.get("value")
|
||||||
|
try:
|
||||||
|
numeric = float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if best is None or numeric > best["value"]:
|
||||||
|
best = {"node": node, "value": numeric}
|
||||||
|
return best
|
||||||
|
|
||||||
|
|
||||||
|
def build_summary(snapshot: dict[str, Any] | None) -> dict[str, Any]:
|
||||||
|
"""Condense a raw snapshot into the summary shape used for prompts."""
|
||||||
|
|
||||||
|
if not snapshot:
|
||||||
|
return {}
|
||||||
|
from .core_b import (
|
||||||
|
_build_flux,
|
||||||
|
_build_hottest,
|
||||||
|
_build_namespace_capacity,
|
||||||
|
_build_namespace_capacity_summary,
|
||||||
|
_build_node_load_summary,
|
||||||
|
_build_pvc,
|
||||||
|
_build_workloads,
|
||||||
|
)
|
||||||
|
from .format_c import _build_cluster_watchlist
|
||||||
|
|
||||||
|
nodes_detail = _nodes_detail(snapshot)
|
||||||
|
metrics = _metrics(snapshot)
|
||||||
|
summary: dict[str, Any] = {}
|
||||||
|
|
||||||
|
if isinstance(snapshot.get("nodes_summary"), dict):
|
||||||
|
summary["nodes_summary"] = snapshot.get("nodes_summary")
|
||||||
|
if metrics:
|
||||||
|
summary["metrics"] = metrics
|
||||||
|
if isinstance(snapshot.get("jobs"), dict):
|
||||||
|
summary["jobs"] = snapshot.get("jobs")
|
||||||
|
summary.update(_build_nodes(snapshot))
|
||||||
|
summary.update(_build_pressure(snapshot))
|
||||||
|
summary.update(_build_hardware(nodes_detail))
|
||||||
|
summary.update(_build_hardware_by_node(nodes_detail))
|
||||||
|
summary.update(_build_hardware_usage(metrics, summary.get("hardware_by_node")))
|
||||||
|
summary.update(_build_node_facts(nodes_detail))
|
||||||
|
summary.update(_build_node_ages(nodes_detail))
|
||||||
|
summary.update(_build_node_taints(nodes_detail))
|
||||||
|
summary.update(_build_capacity(metrics))
|
||||||
|
summary.update(_build_pods(metrics))
|
||||||
|
summary.update(_build_namespace_pods(snapshot))
|
||||||
|
summary.update(_build_namespace_nodes(snapshot))
|
||||||
|
summary.update(_build_node_pods(snapshot))
|
||||||
|
summary.update(_build_node_pods_top(metrics))
|
||||||
|
summary.update(_build_pod_issues(snapshot))
|
||||||
|
summary.update(_build_workload_health(snapshot))
|
||||||
|
summary.update(_build_events(snapshot))
|
||||||
|
summary.update(_build_event_summary(snapshot))
|
||||||
|
summary.update(_build_postgres(metrics))
|
||||||
|
summary.update(_build_hottest(metrics))
|
||||||
|
summary.update(_build_pvc(metrics))
|
||||||
|
summary.update(_build_namespace_capacity(metrics))
|
||||||
|
summary.update(_build_namespace_capacity_summary(metrics))
|
||||||
|
summary.update(_build_longhorn(snapshot))
|
||||||
|
summary.update(_build_root_disk_headroom(metrics))
|
||||||
|
summary.update(_build_node_load(metrics))
|
||||||
|
summary.update(_build_node_load_summary(metrics))
|
||||||
|
summary.update(_build_cluster_watchlist(summary))
|
||||||
|
summary.update(_build_workloads(snapshot))
|
||||||
|
summary.update(_build_flux(snapshot))
|
||||||
|
_merge_cluster_summary(snapshot, summary)
|
||||||
|
_augment_lexicon(summary)
|
||||||
|
return summary
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_cluster_summary(snapshot: dict[str, Any], summary: dict[str, Any]) -> None:
|
||||||
|
cluster_summary = snapshot.get("summary") if isinstance(snapshot.get("summary"), dict) else {}
|
||||||
|
if not cluster_summary:
|
||||||
|
return
|
||||||
|
_merge_cluster_fields(
|
||||||
|
summary,
|
||||||
|
cluster_summary,
|
||||||
|
{
|
||||||
|
"signals": list,
|
||||||
|
"profiles": dict,
|
||||||
|
"inventory": dict,
|
||||||
|
"topology": dict,
|
||||||
|
"lexicon": dict,
|
||||||
|
"cross_stats": dict,
|
||||||
|
"baseline_deltas": dict,
|
||||||
|
"pod_issue_summary": dict,
|
||||||
|
"trend_requests": dict,
|
||||||
|
"pod_waiting_trends": dict,
|
||||||
|
"pod_terminated_trends": dict,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_cluster_fields(summary: dict[str, Any], cluster_summary: dict[str, Any], field_types: dict[str, type]) -> None:
|
||||||
|
for key, expected in field_types.items():
|
||||||
|
value = cluster_summary.get(key)
|
||||||
|
if isinstance(value, expected):
|
||||||
|
summary[key] = value
|
||||||
|
|
||||||
|
|
||||||
|
def _augment_lexicon(summary: dict[str, Any]) -> None:
|
||||||
|
lexicon = summary.get("lexicon")
|
||||||
|
if not isinstance(lexicon, dict):
|
||||||
|
lexicon = {"terms": [], "aliases": {}}
|
||||||
|
terms = list(lexicon.get("terms") or [])
|
||||||
|
aliases = dict(lexicon.get("aliases") or {})
|
||||||
|
hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
|
||||||
|
hardware_map = {
|
||||||
|
"rpi5": "Raspberry Pi 5 nodes",
|
||||||
|
"rpi4": "Raspberry Pi 4 nodes",
|
||||||
|
"rpi": "Raspberry Pi nodes",
|
||||||
|
"jetson": "NVIDIA Jetson nodes",
|
||||||
|
"amd64": "AMD64 nodes",
|
||||||
|
}
|
||||||
|
existing_terms = {entry.get("term") for entry in terms if isinstance(entry, dict)}
|
||||||
|
for key, meaning in hardware_map.items():
|
||||||
|
if key not in hardware:
|
||||||
|
continue
|
||||||
|
if key not in existing_terms:
|
||||||
|
terms.append({"term": key, "meaning": meaning})
|
||||||
|
if key not in aliases:
|
||||||
|
aliases[key] = meaning
|
||||||
|
if "raspberry pi 5" not in aliases and "rpi5" in hardware:
|
||||||
|
aliases["raspberry pi 5"] = "rpi5"
|
||||||
|
if "raspberry pi 4" not in aliases and "rpi4" in hardware:
|
||||||
|
aliases["raspberry pi 4"] = "rpi4"
|
||||||
|
lexicon["terms"] = terms
|
||||||
|
lexicon["aliases"] = aliases
|
||||||
|
summary["lexicon"] = lexicon
|
||||||
|
|
||||||
|
|
||||||
|
def _nodes_detail(snapshot: dict[str, Any]) -> list[dict[str, Any]]:
|
||||||
|
items = snapshot.get("nodes_detail")
|
||||||
|
return items if isinstance(items, list) else []
|
||||||
|
|
||||||
|
|
||||||
|
def _metrics(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
metrics = snapshot.get("metrics")
|
||||||
|
return metrics if isinstance(metrics, dict) else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_nodes(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
nodes_summary = snapshot.get("nodes_summary") if isinstance(snapshot.get("nodes_summary"), dict) else {}
|
||||||
|
if not nodes_summary:
|
||||||
|
return {}
|
||||||
|
return {
|
||||||
|
"nodes": {
|
||||||
|
"total": nodes_summary.get("total"),
|
||||||
|
"ready": nodes_summary.get("ready"),
|
||||||
|
"not_ready": nodes_summary.get("not_ready"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_pressure(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
nodes_summary = snapshot.get("nodes_summary") if isinstance(snapshot.get("nodes_summary"), dict) else {}
|
||||||
|
pressure = nodes_summary.get("pressure_nodes") if isinstance(nodes_summary.get("pressure_nodes"), dict) else {}
|
||||||
|
if not pressure:
|
||||||
|
return {}
|
||||||
|
return {"pressure_nodes": pressure}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_hardware(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
|
||||||
|
hardware: dict[str, list[str]] = {}
|
||||||
|
for node in nodes_detail or []:
|
||||||
|
if not isinstance(node, dict):
|
||||||
|
continue
|
||||||
|
name = node.get("name")
|
||||||
|
hardware_class = node.get("hardware") or "unknown"
|
||||||
|
if name:
|
||||||
|
hardware.setdefault(hardware_class, []).append(name)
|
||||||
|
if not hardware:
|
||||||
|
return {}
|
||||||
|
return {"hardware": {key: sorted(value) for key, value in hardware.items()}}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_hardware_by_node(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
|
||||||
|
mapping: dict[str, str] = {}
|
||||||
|
for node in nodes_detail or []:
|
||||||
|
if not isinstance(node, dict):
|
||||||
|
continue
|
||||||
|
name = node.get("name")
|
||||||
|
if isinstance(name, str) and name:
|
||||||
|
hardware = node.get("hardware") or "unknown"
|
||||||
|
mapping[name] = str(hardware)
|
||||||
|
return {"hardware_by_node": mapping} if mapping else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_hardware_usage(metrics: dict[str, Any], hardware_by_node: dict[str, Any] | None) -> dict[str, Any]: # noqa: C901
|
||||||
|
if not isinstance(hardware_by_node, dict) or not hardware_by_node:
|
||||||
|
return {}
|
||||||
|
node_load = metrics.get("node_load") if isinstance(metrics.get("node_load"), list) else []
|
||||||
|
if not node_load:
|
||||||
|
return {}
|
||||||
|
buckets: dict[str, dict[str, list[float]]] = {}
|
||||||
|
for entry in node_load:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
node = entry.get("node")
|
||||||
|
if not isinstance(node, str) or not node:
|
||||||
|
continue
|
||||||
|
hardware = hardware_by_node.get(node, "unknown")
|
||||||
|
bucket = buckets.setdefault(str(hardware), {"load_index": [], "cpu": [], "ram": [], "net": [], "io": []})
|
||||||
|
for key in ("load_index", "cpu", "ram", "net", "io"):
|
||||||
|
value = entry.get(key)
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
bucket[key].append(float(value))
|
||||||
|
output: list[dict[str, Any]] = []
|
||||||
|
for hardware, metrics_bucket in buckets.items():
|
||||||
|
row: dict[str, Any] = {"hardware": hardware}
|
||||||
|
for key, values in metrics_bucket.items():
|
||||||
|
if values:
|
||||||
|
row[key] = sum(values) / len(values)
|
||||||
|
output.append(row)
|
||||||
|
output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("hardware") or ""))
|
||||||
|
return {"hardware_usage_avg": output}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_node_ages(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
|
||||||
|
ages: list[dict[str, Any]] = []
|
||||||
|
for node in nodes_detail or []:
|
||||||
|
if not isinstance(node, dict):
|
||||||
|
continue
|
||||||
|
name = node.get("name")
|
||||||
|
age = node.get("age_hours")
|
||||||
|
if name and isinstance(age, (int, float)):
|
||||||
|
ages.append({"name": name, "age_hours": age})
|
||||||
|
ages.sort(key=lambda item: -(item.get("age_hours") or 0))
|
||||||
|
return {"node_ages": ages[:5]} if ages else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _count_values(nodes_detail: list[dict[str, Any]], key: str) -> dict[str, int]:
|
||||||
|
counts: dict[str, int] = {}
|
||||||
|
for node in nodes_detail or []:
|
||||||
|
if not isinstance(node, dict):
|
||||||
|
continue
|
||||||
|
value = node.get(key)
|
||||||
|
if isinstance(value, str) and value:
|
||||||
|
counts[value] = counts.get(value, 0) + 1
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
|
def _build_node_facts(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
|
||||||
|
if not nodes_detail:
|
||||||
|
return {}
|
||||||
|
role_counts: dict[str, int] = {}
|
||||||
|
for node in nodes_detail:
|
||||||
|
if not isinstance(node, dict):
|
||||||
|
continue
|
||||||
|
if node.get("is_worker"):
|
||||||
|
role_counts["worker"] = role_counts.get("worker", 0) + 1
|
||||||
|
roles = node.get("roles")
|
||||||
|
if isinstance(roles, list):
|
||||||
|
for role in roles:
|
||||||
|
if isinstance(role, str) and role:
|
||||||
|
role_counts[role] = role_counts.get(role, 0) + 1
|
||||||
|
return {
|
||||||
|
"node_arch_counts": _count_values(nodes_detail, "arch"),
|
||||||
|
"node_os_counts": _count_values(nodes_detail, "os"),
|
||||||
|
"node_kubelet_versions": _count_values(nodes_detail, "kubelet"),
|
||||||
|
"node_kernel_versions": _count_values(nodes_detail, "kernel"),
|
||||||
|
"node_runtime_versions": _count_values(nodes_detail, "container_runtime"),
|
||||||
|
"node_role_counts": role_counts,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_node_taints(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
|
||||||
|
taints: dict[str, list[str]] = {}
|
||||||
|
for node in nodes_detail or []:
|
||||||
|
if not isinstance(node, dict):
|
||||||
|
continue
|
||||||
|
name = node.get("name")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
entries = node.get("taints") if isinstance(node.get("taints"), list) else []
|
||||||
|
for entry in entries:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
key = entry.get("key")
|
||||||
|
effect = entry.get("effect")
|
||||||
|
if isinstance(key, str) and isinstance(effect, str):
|
||||||
|
label = f"{key}:{effect}"
|
||||||
|
taints.setdefault(label, []).append(name)
|
||||||
|
if not taints:
|
||||||
|
return {}
|
||||||
|
return {"node_taints": {key: sorted(names) for key, names in taints.items()}}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_root_disk_headroom(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
|
||||||
|
disk = node_usage.get("disk") if isinstance(node_usage.get("disk"), list) else []
|
||||||
|
if not disk:
|
||||||
|
return {}
|
||||||
|
entries = []
|
||||||
|
for entry in disk:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
node = entry.get("node")
|
||||||
|
try:
|
||||||
|
used_pct = float(entry.get("value"))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
headroom = max(0.0, 100.0 - used_pct)
|
||||||
|
if node:
|
||||||
|
entries.append({"node": node, "headroom_pct": headroom, "used_pct": used_pct})
|
||||||
|
entries.sort(key=lambda item: (item.get("headroom_pct") or 0.0, item.get("node") or ""))
|
||||||
|
return {"root_disk_low_headroom": entries[:5]} if entries else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_longhorn(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
longhorn = snapshot.get("longhorn")
|
||||||
|
return {"longhorn": longhorn} if isinstance(longhorn, dict) and longhorn else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_node_load(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
node_load = metrics.get("node_load")
|
||||||
|
if not isinstance(node_load, list) or not node_load:
|
||||||
|
return {}
|
||||||
|
return {"node_load": node_load}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_pods(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
pods = {
|
||||||
|
"running": metrics.get("pods_running"),
|
||||||
|
"pending": metrics.get("pods_pending"),
|
||||||
|
"failed": metrics.get("pods_failed"),
|
||||||
|
"succeeded": metrics.get("pods_succeeded"),
|
||||||
|
}
|
||||||
|
if not any(value is not None for value in pods.values()):
|
||||||
|
return {}
|
||||||
|
return {"pods": pods}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_capacity(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
if not metrics:
|
||||||
|
return {}
|
||||||
|
capacity = {
|
||||||
|
"cpu": metrics.get("capacity_cpu"),
|
||||||
|
"allocatable_cpu": metrics.get("allocatable_cpu"),
|
||||||
|
"mem_bytes": metrics.get("capacity_mem_bytes"),
|
||||||
|
"allocatable_mem_bytes": metrics.get("allocatable_mem_bytes"),
|
||||||
|
"pods": metrics.get("capacity_pods"),
|
||||||
|
"allocatable_pods": metrics.get("allocatable_pods"),
|
||||||
|
}
|
||||||
|
if not any(value is not None for value in capacity.values()):
|
||||||
|
return {}
|
||||||
|
return {"capacity": capacity}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_namespace_pods(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
namespaces = snapshot.get("namespace_pods")
|
||||||
|
if not isinstance(namespaces, list) or not namespaces:
|
||||||
|
return {}
|
||||||
|
return {"namespace_pods": namespaces}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_namespace_nodes(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
namespace_nodes = snapshot.get("namespace_nodes")
|
||||||
|
if not isinstance(namespace_nodes, list) or not namespace_nodes:
|
||||||
|
return {}
|
||||||
|
return {"namespace_nodes": namespace_nodes}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_node_pods(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
node_pods = snapshot.get("node_pods")
|
||||||
|
if not isinstance(node_pods, list) or not node_pods:
|
||||||
|
return {}
|
||||||
|
return {"node_pods": node_pods}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_node_pods_top(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
top = metrics.get("node_pods_top")
|
||||||
|
if not isinstance(top, list) or not top:
|
||||||
|
return {}
|
||||||
|
return {"node_pods_top": top}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_pod_issues(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
pod_issues = snapshot.get("pod_issues")
|
||||||
|
if not isinstance(pod_issues, dict) or not pod_issues:
|
||||||
|
return {}
|
||||||
|
return {"pod_issues": pod_issues}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_workload_health(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
health = snapshot.get("workloads_health")
|
||||||
|
if not isinstance(health, dict) or not health:
|
||||||
|
return {}
|
||||||
|
deployments = health.get("deployments")
|
||||||
|
statefulsets = health.get("statefulsets")
|
||||||
|
daemonsets = health.get("daemonsets")
|
||||||
|
if not isinstance(deployments, dict) or not isinstance(statefulsets, dict) or not isinstance(daemonsets, dict):
|
||||||
|
return {}
|
||||||
|
return {
|
||||||
|
"workloads_health": {
|
||||||
|
"deployments": deployments,
|
||||||
|
"statefulsets": statefulsets,
|
||||||
|
"daemonsets": daemonsets,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_events(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
events = snapshot.get("events")
|
||||||
|
if not isinstance(events, dict) or not events:
|
||||||
|
return {}
|
||||||
|
return {"events": events}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_event_summary(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
events = snapshot.get("events")
|
||||||
|
if not isinstance(events, dict) or not events:
|
||||||
|
return {}
|
||||||
|
summary = {}
|
||||||
|
if isinstance(events.get("warnings_top_reason"), dict):
|
||||||
|
summary["warnings_top_reason"] = events.get("warnings_top_reason")
|
||||||
|
if events.get("warnings_latest"):
|
||||||
|
summary["warnings_latest"] = events.get("warnings_latest")
|
||||||
|
return {"event_summary": summary} if summary else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_postgres(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
postgres = metrics.get("postgres_connections") if isinstance(metrics.get("postgres_connections"), dict) else {}
|
||||||
|
if not postgres:
|
||||||
|
return {}
|
||||||
|
return {
|
||||||
|
"postgres": {
|
||||||
|
"used": postgres.get("used"),
|
||||||
|
"max": postgres.get("max"),
|
||||||
|
"hottest_db": postgres.get("hottest_db"),
|
||||||
|
"by_db": postgres.get("by_db"),
|
||||||
|
}
|
||||||
|
}
|
||||||
57
atlasbot/snapshot/builder/core_b.py
Normal file
57
atlasbot/snapshot/builder/core_b.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .core_a import _node_usage_top
|
||||||
|
|
||||||
|
def _build_hottest(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
|
||||||
|
hottest: dict[str, Any] = {}
|
||||||
|
for key in ("cpu", "ram", "net", "io", "disk"):
|
||||||
|
top = _node_usage_top(node_usage.get(key, []))
|
||||||
|
if top:
|
||||||
|
hottest[key] = top
|
||||||
|
if not hottest:
|
||||||
|
return {}
|
||||||
|
return {"hottest": hottest}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_pvc(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
pvc_usage = metrics.get("pvc_usage_top") if isinstance(metrics.get("pvc_usage_top"), list) else []
|
||||||
|
if not pvc_usage:
|
||||||
|
return {}
|
||||||
|
return {"pvc_usage_top": pvc_usage}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_namespace_capacity(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
capacity = metrics.get("namespace_capacity")
|
||||||
|
if not isinstance(capacity, list) or not capacity:
|
||||||
|
return {}
|
||||||
|
return {"namespace_capacity": capacity}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_namespace_capacity_summary(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
summary = metrics.get("namespace_capacity_summary")
|
||||||
|
if not isinstance(summary, dict) or not summary:
|
||||||
|
return {}
|
||||||
|
return {"namespace_capacity_summary": summary}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_node_load_summary(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
summary = metrics.get("node_load_summary")
|
||||||
|
if not isinstance(summary, dict) or not summary:
|
||||||
|
return {}
|
||||||
|
return {"node_load_summary": summary}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_workloads(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
workloads = snapshot.get("workloads") if isinstance(snapshot.get("workloads"), list) else []
|
||||||
|
return {"workloads": workloads}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_flux(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
flux = snapshot.get("flux") if isinstance(snapshot.get("flux"), dict) else {}
|
||||||
|
return {"flux": flux}
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [name for name in globals() if not name.startswith("__")]
|
||||||
497
atlasbot/snapshot/builder/format_a.py
Normal file
497
atlasbot/snapshot/builder/format_a.py
Normal file
@ -0,0 +1,497 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .core_a import _BYTES_GB, _BYTES_KB, _BYTES_MB
|
||||||
|
from .core_b import *
|
||||||
|
|
||||||
|
|
||||||
|
def _format_float(value: Any) -> str:
|
||||||
|
try:
|
||||||
|
numeric = float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return str(value)
|
||||||
|
return f"{numeric:.2f}".rstrip("0").rstrip(".")
|
||||||
|
|
||||||
|
|
||||||
|
def _format_rate_bytes(value: Any) -> str:
|
||||||
|
try:
|
||||||
|
numeric = float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return str(value)
|
||||||
|
if numeric >= _BYTES_MB:
|
||||||
|
return f"{numeric / _BYTES_MB:.2f} MB/s"
|
||||||
|
if numeric >= _BYTES_KB:
|
||||||
|
return f"{numeric / _BYTES_KB:.2f} KB/s"
|
||||||
|
return f"{numeric:.2f} B/s"
|
||||||
|
|
||||||
|
|
||||||
|
def _format_bytes(value: Any) -> str:
|
||||||
|
try:
|
||||||
|
numeric = float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return str(value)
|
||||||
|
if numeric >= _BYTES_GB:
|
||||||
|
return f"{numeric / _BYTES_GB:.2f} GB"
|
||||||
|
if numeric >= _BYTES_MB:
|
||||||
|
return f"{numeric / _BYTES_MB:.2f} MB"
|
||||||
|
if numeric >= _BYTES_KB:
|
||||||
|
return f"{numeric / _BYTES_KB:.2f} KB"
|
||||||
|
return f"{numeric:.2f} B"
|
||||||
|
|
||||||
|
|
||||||
|
def _format_kv_map(values: dict[str, Any]) -> str:
|
||||||
|
parts = []
|
||||||
|
for key, value in values.items():
|
||||||
|
parts.append(f"{key}={value}")
|
||||||
|
return ", ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_names(names: list[str]) -> str:
|
||||||
|
if not names:
|
||||||
|
return ""
|
||||||
|
return ", ".join(sorted(names))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_nodes(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||||
|
nodes = summary.get("nodes") if isinstance(summary.get("nodes"), dict) else {}
|
||||||
|
if not nodes:
|
||||||
|
return
|
||||||
|
workers = {}
|
||||||
|
if isinstance(summary.get("nodes_summary"), dict):
|
||||||
|
workers = summary["nodes_summary"].get("workers") or {}
|
||||||
|
workers_total = workers.get("total")
|
||||||
|
workers_ready = workers.get("ready")
|
||||||
|
workers_str = ""
|
||||||
|
if workers_total is not None and workers_ready is not None:
|
||||||
|
workers_str = f", workers_ready={workers_ready}/{workers_total}"
|
||||||
|
total = nodes.get("total")
|
||||||
|
ready = nodes.get("ready")
|
||||||
|
not_ready = nodes.get("not_ready")
|
||||||
|
if not_ready is None:
|
||||||
|
not_ready = 0
|
||||||
|
lines.append(f"nodes: total={total}, ready={ready}, not_ready={not_ready}{workers_str}")
|
||||||
|
if total is not None:
|
||||||
|
lines.append(f"nodes_total: {total}")
|
||||||
|
if ready is not None:
|
||||||
|
lines.append(f"nodes_ready: {ready}")
|
||||||
|
if not_ready is not None:
|
||||||
|
lines.append(f"nodes_not_ready_count: {not_ready}")
|
||||||
|
if not isinstance(summary.get("nodes_summary"), dict):
|
||||||
|
return
|
||||||
|
not_ready_names = summary["nodes_summary"].get("not_ready_names") or []
|
||||||
|
if not_ready_names:
|
||||||
|
lines.append("nodes_not_ready: " + _format_names(not_ready_names))
|
||||||
|
by_arch = summary["nodes_summary"].get("by_arch") or {}
|
||||||
|
if isinstance(by_arch, dict) and by_arch:
|
||||||
|
lines.append("archs: " + _format_kv_map(by_arch))
|
||||||
|
by_role = summary["nodes_summary"].get("by_role") or {}
|
||||||
|
if isinstance(by_role, dict) and by_role:
|
||||||
|
lines.append("roles: " + _format_kv_map(by_role))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_hardware(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
|
||||||
|
if not hardware:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for key, names in hardware.items():
|
||||||
|
if not isinstance(names, list):
|
||||||
|
continue
|
||||||
|
label = f"{key}={len(names)}"
|
||||||
|
name_list = _format_names([str(name) for name in names if name])
|
||||||
|
if name_list:
|
||||||
|
label = f"{label} ({name_list})"
|
||||||
|
parts.append(label)
|
||||||
|
if parts:
|
||||||
|
lines.append("hardware: " + "; ".join(sorted(parts)))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_hardware_groups(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
|
||||||
|
if not hardware:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for key, names in hardware.items():
|
||||||
|
if not isinstance(names, list):
|
||||||
|
continue
|
||||||
|
name_list = _format_names([str(name) for name in names if name])
|
||||||
|
if name_list:
|
||||||
|
parts.append(f"{key}={name_list}")
|
||||||
|
if parts:
|
||||||
|
lines.append("hardware_nodes: " + "; ".join(sorted(parts)))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_node_ages(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
ages = summary.get("node_ages") if isinstance(summary.get("node_ages"), list) else []
|
||||||
|
if not ages:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for entry in ages[:3]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
name = entry.get("name")
|
||||||
|
age = entry.get("age_hours")
|
||||||
|
if name and isinstance(age, (int, float)):
|
||||||
|
parts.append(f"{name}={_format_float(age)}h")
|
||||||
|
if parts:
|
||||||
|
lines.append("node_age_top: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_node_taints(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
taints = summary.get("node_taints") if isinstance(summary.get("node_taints"), dict) else {}
|
||||||
|
if not taints:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for key, names in taints.items():
|
||||||
|
if not isinstance(names, list):
|
||||||
|
continue
|
||||||
|
name_list = _format_names([str(name) for name in names if name])
|
||||||
|
parts.append(f"{key}={len(names)} ({name_list})" if name_list else f"{key}={len(names)}")
|
||||||
|
if parts:
|
||||||
|
lines.append("node_taints: " + "; ".join(sorted(parts)))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_node_facts(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
def top_counts(label: str, counts: dict[str, int], limit: int = 4) -> None:
|
||||||
|
if not counts:
|
||||||
|
return
|
||||||
|
top = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:limit]
|
||||||
|
rendered = "; ".join([f"{name}={count}" for name, count in top])
|
||||||
|
if rendered:
|
||||||
|
lines.append(f"{label}: {rendered}")
|
||||||
|
|
||||||
|
top_counts("node_arch", summary.get("node_arch_counts") or {})
|
||||||
|
top_counts("node_os", summary.get("node_os_counts") or {})
|
||||||
|
top_counts("node_kubelet_versions", summary.get("node_kubelet_versions") or {})
|
||||||
|
top_counts("node_kernel_versions", summary.get("node_kernel_versions") or {})
|
||||||
|
top_counts("node_runtime_versions", summary.get("node_runtime_versions") or {})
|
||||||
|
top_counts("node_roles", summary.get("node_role_counts") or {})
|
||||||
|
|
||||||
|
|
||||||
|
def _append_pressure(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
pressure = summary.get("pressure_nodes")
|
||||||
|
if not isinstance(pressure, dict) or not pressure:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for cond, nodes in sorted(pressure.items()):
|
||||||
|
if not nodes:
|
||||||
|
continue
|
||||||
|
name_list = _format_names([str(name) for name in nodes if name])
|
||||||
|
parts.append(f"{cond}={len(nodes)} ({name_list})" if name_list else f"{cond}={len(nodes)}")
|
||||||
|
if parts:
|
||||||
|
lines.append("node_pressure: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_pods(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
pods = summary.get("pods") if isinstance(summary.get("pods"), dict) else {}
|
||||||
|
if not pods:
|
||||||
|
return
|
||||||
|
lines.append(
|
||||||
|
"pods: running={running}, pending={pending}, failed={failed}, succeeded={succeeded}".format(
|
||||||
|
running=pods.get("running"),
|
||||||
|
pending=pods.get("pending"),
|
||||||
|
failed=pods.get("failed"),
|
||||||
|
succeeded=pods.get("succeeded"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _append_capacity(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
capacity = summary.get("capacity") if isinstance(summary.get("capacity"), dict) else {}
|
||||||
|
if not capacity:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
if capacity.get("cpu") is not None:
|
||||||
|
parts.append(f"cpu={_format_float(capacity.get('cpu'))}")
|
||||||
|
if capacity.get("allocatable_cpu") is not None:
|
||||||
|
parts.append(f"alloc_cpu={_format_float(capacity.get('allocatable_cpu'))}")
|
||||||
|
if capacity.get("mem_bytes") is not None:
|
||||||
|
parts.append(f"mem={_format_bytes(capacity.get('mem_bytes'))}")
|
||||||
|
if capacity.get("allocatable_mem_bytes") is not None:
|
||||||
|
parts.append(f"alloc_mem={_format_bytes(capacity.get('allocatable_mem_bytes'))}")
|
||||||
|
if capacity.get("pods") is not None:
|
||||||
|
parts.append(f"pods={_format_float(capacity.get('pods'))}")
|
||||||
|
if capacity.get("allocatable_pods") is not None:
|
||||||
|
parts.append(f"alloc_pods={_format_float(capacity.get('allocatable_pods'))}")
|
||||||
|
if parts:
|
||||||
|
lines.append("capacity: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_namespace_pods(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
namespaces = summary.get("namespace_pods")
|
||||||
|
if not isinstance(namespaces, list) or not namespaces:
|
||||||
|
return
|
||||||
|
top = sorted(
|
||||||
|
(item for item in namespaces if isinstance(item, dict)),
|
||||||
|
key=lambda item: (-int(item.get("pods_total") or 0), item.get("namespace") or ""),
|
||||||
|
)[:8]
|
||||||
|
parts = []
|
||||||
|
for item in top:
|
||||||
|
name = item.get("namespace")
|
||||||
|
total = item.get("pods_total")
|
||||||
|
running = item.get("pods_running")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
label = f"{name}={total}"
|
||||||
|
if running is not None:
|
||||||
|
label = f"{label} (running={running})"
|
||||||
|
parts.append(label)
|
||||||
|
if parts:
|
||||||
|
lines.append("namespaces_top: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_namespace_nodes(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
namespace_nodes = summary.get("namespace_nodes")
|
||||||
|
if not isinstance(namespace_nodes, list) or not namespace_nodes:
|
||||||
|
return
|
||||||
|
top = sorted(
|
||||||
|
(item for item in namespace_nodes if isinstance(item, dict)),
|
||||||
|
key=lambda item: (-int(item.get("pods_total") or 0), item.get("namespace") or ""),
|
||||||
|
)[:8]
|
||||||
|
parts = []
|
||||||
|
for item in top:
|
||||||
|
namespace = item.get("namespace")
|
||||||
|
pods_total = item.get("pods_total")
|
||||||
|
primary = item.get("primary_node")
|
||||||
|
if namespace:
|
||||||
|
label = f"{namespace}={pods_total}"
|
||||||
|
if primary:
|
||||||
|
label = f"{label} (primary={primary})"
|
||||||
|
parts.append(label)
|
||||||
|
if parts:
|
||||||
|
lines.append("namespace_nodes_top: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_node_pods(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||||
|
node_pods = summary.get("node_pods")
|
||||||
|
if not isinstance(node_pods, list) or not node_pods:
|
||||||
|
return
|
||||||
|
sortable: list[dict[str, Any]] = []
|
||||||
|
for item in node_pods:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
pods_value = int(item.get("pods_total") or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
sortable.append({**item, "pods_total": pods_value})
|
||||||
|
top = sorted(sortable, key=lambda item: (-int(item.get("pods_total") or 0), item.get("node") or ""))[:8]
|
||||||
|
max_entry = None
|
||||||
|
for entry in node_pods:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
pods_total = entry.get("pods_total")
|
||||||
|
try:
|
||||||
|
pods_value = int(pods_total)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if max_entry is None or pods_value > max_entry["pods_total"]:
|
||||||
|
max_entry = {
|
||||||
|
"node": entry.get("node"),
|
||||||
|
"pods_total": pods_value,
|
||||||
|
"namespaces_top": entry.get("namespaces_top") or [],
|
||||||
|
}
|
||||||
|
parts = []
|
||||||
|
for item in top:
|
||||||
|
node = item.get("node")
|
||||||
|
pods_total = item.get("pods_total")
|
||||||
|
namespaces = item.get("namespaces_top") or []
|
||||||
|
ns_label = ""
|
||||||
|
if namespaces:
|
||||||
|
ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
|
||||||
|
if node:
|
||||||
|
label = f"{node}={pods_total}"
|
||||||
|
if ns_label:
|
||||||
|
label = f"{label} ({ns_label})"
|
||||||
|
parts.append(label)
|
||||||
|
if parts:
|
||||||
|
lines.append("node_pods_top: " + "; ".join(parts))
|
||||||
|
if max_entry and isinstance(max_entry.get("node"), str):
|
||||||
|
ns_label = ""
|
||||||
|
namespaces = max_entry.get("namespaces_top") or []
|
||||||
|
if namespaces:
|
||||||
|
ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
|
||||||
|
label = f"{max_entry.get('node')}={max_entry.get('pods_total')}"
|
||||||
|
if ns_label:
|
||||||
|
label = f"{label} ({ns_label})"
|
||||||
|
lines.append("node_pods_max: " + label)
|
||||||
|
for item in top:
|
||||||
|
node = item.get("node")
|
||||||
|
namespaces = item.get("namespaces_top") or []
|
||||||
|
if not node or not namespaces:
|
||||||
|
continue
|
||||||
|
ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
|
||||||
|
lines.append(f"node_namespaces_top: {node} ({ns_label})")
|
||||||
|
|
||||||
|
|
||||||
|
def _append_pod_issues(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
pod_issues = summary.get("pod_issues") if isinstance(summary.get("pod_issues"), dict) else {}
|
||||||
|
if not pod_issues:
|
||||||
|
return
|
||||||
|
counts_line = _format_pod_issue_counts(pod_issues)
|
||||||
|
if counts_line:
|
||||||
|
lines.append(counts_line)
|
||||||
|
top_line = _format_pod_issue_top(pod_issues)
|
||||||
|
if top_line:
|
||||||
|
lines.append(top_line)
|
||||||
|
pending_line = _format_pod_pending_oldest(pod_issues)
|
||||||
|
if pending_line:
|
||||||
|
lines.append(pending_line)
|
||||||
|
pending_over_line = _format_pod_pending_over_15m(pod_issues)
|
||||||
|
if pending_over_line:
|
||||||
|
lines.append(pending_over_line)
|
||||||
|
reasons_line = _format_pod_waiting_reasons(pod_issues)
|
||||||
|
if reasons_line:
|
||||||
|
lines.append(reasons_line)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_pod_issue_counts(pod_issues: dict[str, Any]) -> str:
|
||||||
|
counts = pod_issues.get("counts") if isinstance(pod_issues.get("counts"), dict) else {}
|
||||||
|
if not counts:
|
||||||
|
return ""
|
||||||
|
parts = []
|
||||||
|
for key in ("Failed", "Pending", "Unknown"):
|
||||||
|
if key in counts:
|
||||||
|
parts.append(f"{key}={counts.get(key)}")
|
||||||
|
return "pod_issues: " + "; ".join(parts) if parts else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _format_pod_issue_top(pod_issues: dict[str, Any]) -> str:
|
||||||
|
items = pod_issues.get("items") if isinstance(pod_issues.get("items"), list) else []
|
||||||
|
if not items:
|
||||||
|
return ""
|
||||||
|
top = []
|
||||||
|
for item in items[:5]:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
namespace = item.get("namespace")
|
||||||
|
pod = item.get("pod")
|
||||||
|
if not namespace or not pod:
|
||||||
|
continue
|
||||||
|
phase = item.get("phase") or ""
|
||||||
|
restarts = item.get("restarts") or 0
|
||||||
|
top.append(f"{namespace}/{pod}({phase},r={restarts})")
|
||||||
|
return "pod_issues_top: " + "; ".join(top) if top else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _format_pod_pending_oldest(pod_issues: dict[str, Any]) -> str:
|
||||||
|
pending = pod_issues.get("pending_oldest") if isinstance(pod_issues.get("pending_oldest"), list) else []
|
||||||
|
if not pending:
|
||||||
|
return ""
|
||||||
|
parts = []
|
||||||
|
for item in pending[:5]:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
namespace = item.get("namespace")
|
||||||
|
pod = item.get("pod")
|
||||||
|
age = item.get("age_hours")
|
||||||
|
reason = item.get("reason") or ""
|
||||||
|
if namespace and pod and age is not None:
|
||||||
|
label = f"{namespace}/{pod}={_format_float(age)}h"
|
||||||
|
if reason:
|
||||||
|
label = f"{label} ({reason})"
|
||||||
|
parts.append(label)
|
||||||
|
return "pods_pending_oldest: " + "; ".join(parts) if parts else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _format_pod_waiting_reasons(pod_issues: dict[str, Any]) -> str:
|
||||||
|
reasons = pod_issues.get("waiting_reasons") if isinstance(pod_issues.get("waiting_reasons"), dict) else {}
|
||||||
|
if not reasons:
|
||||||
|
return ""
|
||||||
|
pairs = sorted(reasons.items(), key=lambda item: (-item[1], item[0]))[:5]
|
||||||
|
return "pod_waiting_reasons: " + "; ".join([f"{key}={val}" for key, val in pairs])
|
||||||
|
|
||||||
|
|
||||||
|
def _format_pod_pending_over_15m(pod_issues: dict[str, Any]) -> str:
|
||||||
|
count = pod_issues.get("pending_over_15m")
|
||||||
|
if count is None:
|
||||||
|
return ""
|
||||||
|
try:
|
||||||
|
count_val = int(count)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return ""
|
||||||
|
return f"pods_pending_over_15m: {count_val}"
|
||||||
|
|
||||||
|
|
||||||
|
def _append_workload_health(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
health = summary.get("workloads_health") if isinstance(summary.get("workloads_health"), dict) else {}
|
||||||
|
if not health:
|
||||||
|
return
|
||||||
|
deployments = health.get("deployments") if isinstance(health.get("deployments"), dict) else {}
|
||||||
|
statefulsets = health.get("statefulsets") if isinstance(health.get("statefulsets"), dict) else {}
|
||||||
|
daemonsets = health.get("daemonsets") if isinstance(health.get("daemonsets"), dict) else {}
|
||||||
|
total_not_ready = 0
|
||||||
|
for entry in (deployments, statefulsets, daemonsets):
|
||||||
|
total_not_ready += int(entry.get("not_ready") or 0)
|
||||||
|
lines.append(
|
||||||
|
"workloads_not_ready: "
|
||||||
|
f"deployments={deployments.get('not_ready', 0)}, "
|
||||||
|
f"statefulsets={statefulsets.get('not_ready', 0)}, "
|
||||||
|
f"daemonsets={daemonsets.get('not_ready', 0)} "
|
||||||
|
f"(total={total_not_ready})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _append_node_usage_stats(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||||
|
stats = metrics.get("node_usage_stats") if isinstance(metrics.get("node_usage_stats"), dict) else {}
|
||||||
|
if not stats:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for key in ("cpu", "ram", "net", "io", "disk"):
|
||||||
|
entry = stats.get(key) if isinstance(stats.get(key), dict) else {}
|
||||||
|
avg = entry.get("avg")
|
||||||
|
if avg is None:
|
||||||
|
continue
|
||||||
|
value = _format_rate_bytes(avg) if key in {"net", "io"} else _format_float(avg)
|
||||||
|
parts.append(f"{key}={value}")
|
||||||
|
if parts:
|
||||||
|
lines.append("node_usage_avg: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_events(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
events = summary.get("events") if isinstance(summary.get("events"), dict) else {}
|
||||||
|
if not events:
|
||||||
|
return
|
||||||
|
total = events.get("warnings_total")
|
||||||
|
by_reason = events.get("warnings_by_reason") if isinstance(events.get("warnings_by_reason"), dict) else {}
|
||||||
|
if total is None:
|
||||||
|
return
|
||||||
|
if by_reason:
|
||||||
|
top = sorted(by_reason.items(), key=lambda item: (-item[1], item[0]))[:3]
|
||||||
|
reasons = "; ".join([f"{reason}={count}" for reason, count in top])
|
||||||
|
lines.append(f"warnings: total={total}; top={reasons}")
|
||||||
|
else:
|
||||||
|
lines.append(f"warnings: total={total}")
|
||||||
|
def _append_pvc_usage(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
pvc_usage = summary.get("pvc_usage_top")
|
||||||
|
if not isinstance(pvc_usage, list) or not pvc_usage:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for entry in pvc_usage:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||||
|
namespace = metric.get("namespace")
|
||||||
|
pvc = metric.get("persistentvolumeclaim")
|
||||||
|
value = entry.get("value")
|
||||||
|
if namespace and pvc:
|
||||||
|
parts.append(f"{namespace}/{pvc}={_format_float(value)}%")
|
||||||
|
if parts:
|
||||||
|
lines.append("pvc_usage_top: " + "; ".join(parts))
|
||||||
|
def _append_root_disk_headroom(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
headroom = summary.get("root_disk_low_headroom")
|
||||||
|
if not isinstance(headroom, list) or not headroom:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for entry in headroom:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
node = entry.get("node")
|
||||||
|
headroom_pct = entry.get("headroom_pct")
|
||||||
|
if node and headroom_pct is not None:
|
||||||
|
parts.append(f"{node}={_format_float(headroom_pct)}%")
|
||||||
|
if parts:
|
||||||
|
lines.append("root_disk_low_headroom: " + "; ".join(parts))
|
||||||
|
__all__ = [name for name in globals() if not name.startswith("__")]
|
||||||
435
atlasbot/snapshot/builder/format_b.py
Normal file
435
atlasbot/snapshot/builder/format_b.py
Normal file
@ -0,0 +1,435 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .core_a import _VALUE_PAIR_LEN
|
||||||
|
from .format_a import *
|
||||||
|
|
||||||
|
|
||||||
|
def _append_namespace_metric_series(
|
||||||
|
lines: list[str],
|
||||||
|
label: str,
|
||||||
|
entries: list[Any],
|
||||||
|
formatter: Any,
|
||||||
|
) -> None:
|
||||||
|
parts = []
|
||||||
|
for entry in entries:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||||
|
namespace = metric.get("namespace")
|
||||||
|
value = entry.get("value")
|
||||||
|
if namespace:
|
||||||
|
parts.append(f"{namespace}={formatter(value)}")
|
||||||
|
if parts:
|
||||||
|
lines.append(f"{label}: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_longhorn(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||||
|
longhorn = summary.get("longhorn") if isinstance(summary.get("longhorn"), dict) else {}
|
||||||
|
if not longhorn:
|
||||||
|
return
|
||||||
|
total = longhorn.get("total")
|
||||||
|
attached = longhorn.get("attached_count")
|
||||||
|
detached = longhorn.get("detached_count")
|
||||||
|
degraded = longhorn.get("degraded_count")
|
||||||
|
by_state = longhorn.get("by_state") if isinstance(longhorn.get("by_state"), dict) else {}
|
||||||
|
by_robust = longhorn.get("by_robustness") if isinstance(longhorn.get("by_robustness"), dict) else {}
|
||||||
|
if total is not None:
|
||||||
|
if attached is None and detached is None and degraded is None:
|
||||||
|
unhealthy = longhorn.get("unhealthy_count")
|
||||||
|
lines.append(f"longhorn: total={total}, unhealthy={unhealthy if unhealthy is not None else 0}")
|
||||||
|
else:
|
||||||
|
lines.append(
|
||||||
|
f"longhorn: total={total}, attached={attached if attached is not None else 0}, "
|
||||||
|
f"detached={detached if detached is not None else 0}, "
|
||||||
|
f"degraded={degraded if degraded is not None else 0}"
|
||||||
|
)
|
||||||
|
if by_state:
|
||||||
|
lines.append("longhorn_state: " + _format_kv_map(by_state))
|
||||||
|
if by_robust:
|
||||||
|
lines.append("longhorn_robustness: " + _format_kv_map(by_robust))
|
||||||
|
unhealthy_items = longhorn.get("unhealthy")
|
||||||
|
if isinstance(unhealthy_items, list) and unhealthy_items:
|
||||||
|
parts = []
|
||||||
|
for entry in unhealthy_items[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
name = entry.get("name")
|
||||||
|
state = entry.get("state")
|
||||||
|
robustness = entry.get("robustness")
|
||||||
|
if name:
|
||||||
|
label = name
|
||||||
|
if state or robustness:
|
||||||
|
label = f"{label}({state},{robustness})"
|
||||||
|
parts.append(label)
|
||||||
|
if parts:
|
||||||
|
lines.append("longhorn_unhealthy_top: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_namespace_usage(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||||
|
cpu_top = metrics.get("namespace_cpu_top") if isinstance(metrics.get("namespace_cpu_top"), list) else []
|
||||||
|
mem_top = metrics.get("namespace_mem_top") if isinstance(metrics.get("namespace_mem_top"), list) else []
|
||||||
|
_append_namespace_metric_series(lines, "namespace_cpu_top", cpu_top, _format_float)
|
||||||
|
_append_namespace_metric_series(lines, "namespace_mem_top", mem_top, _format_bytes)
|
||||||
|
|
||||||
|
|
||||||
|
def _append_namespace_requests(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||||
|
cpu_req = metrics.get("namespace_cpu_requests_top") if isinstance(metrics.get("namespace_cpu_requests_top"), list) else []
|
||||||
|
mem_req = metrics.get("namespace_mem_requests_top") if isinstance(metrics.get("namespace_mem_requests_top"), list) else []
|
||||||
|
_append_namespace_metric_series(lines, "namespace_cpu_requests_top", cpu_req, _format_float)
|
||||||
|
_append_namespace_metric_series(lines, "namespace_mem_requests_top", mem_req, _format_bytes)
|
||||||
|
|
||||||
|
|
||||||
|
def _append_namespace_io_net(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||||
|
net_top = metrics.get("namespace_net_top") if isinstance(metrics.get("namespace_net_top"), list) else []
|
||||||
|
io_top = metrics.get("namespace_io_top") if isinstance(metrics.get("namespace_io_top"), list) else []
|
||||||
|
_append_namespace_metric_series(lines, "namespace_net_top", net_top, _format_rate_bytes)
|
||||||
|
_append_namespace_metric_series(lines, "namespace_io_top", io_top, _format_rate_bytes)
|
||||||
|
|
||||||
|
|
||||||
|
def _append_pod_usage(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||||
|
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||||
|
cpu_top = metrics.get("pod_cpu_top") if isinstance(metrics.get("pod_cpu_top"), list) else []
|
||||||
|
cpu_top_node = (
|
||||||
|
metrics.get("pod_cpu_top_node")
|
||||||
|
if isinstance(metrics.get("pod_cpu_top_node"), list)
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
mem_top = metrics.get("pod_mem_top") if isinstance(metrics.get("pod_mem_top"), list) else []
|
||||||
|
mem_top_node = (
|
||||||
|
metrics.get("pod_mem_top_node")
|
||||||
|
if isinstance(metrics.get("pod_mem_top_node"), list)
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
if cpu_top:
|
||||||
|
parts = []
|
||||||
|
for entry in cpu_top:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||||
|
namespace = metric.get("namespace")
|
||||||
|
pod = metric.get("pod")
|
||||||
|
value = entry.get("value")
|
||||||
|
if namespace and pod and value is not None:
|
||||||
|
parts.append(f"{namespace}/{pod}={_format_float(value)}")
|
||||||
|
if parts:
|
||||||
|
lines.append("pod_cpu_top: " + "; ".join(parts))
|
||||||
|
if cpu_top_node:
|
||||||
|
parts = []
|
||||||
|
for entry in cpu_top_node:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||||
|
namespace = metric.get("namespace")
|
||||||
|
pod = metric.get("pod")
|
||||||
|
node = metric.get("node")
|
||||||
|
value = entry.get("value")
|
||||||
|
if namespace and pod and node and value is not None:
|
||||||
|
parts.append(f"{node}:{namespace}/{pod}={_format_float(value)}")
|
||||||
|
if parts:
|
||||||
|
lines.append("pod_cpu_top_node: " + "; ".join(parts))
|
||||||
|
if mem_top:
|
||||||
|
parts = []
|
||||||
|
for entry in mem_top:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||||
|
namespace = metric.get("namespace")
|
||||||
|
pod = metric.get("pod")
|
||||||
|
value = entry.get("value")
|
||||||
|
if namespace and pod and value is not None:
|
||||||
|
parts.append(f"{namespace}/{pod}={_format_bytes(value)}")
|
||||||
|
if parts:
|
||||||
|
lines.append("pod_mem_top: " + "; ".join(parts))
|
||||||
|
if mem_top_node:
|
||||||
|
parts = []
|
||||||
|
for entry in mem_top_node:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||||
|
namespace = metric.get("namespace")
|
||||||
|
pod = metric.get("pod")
|
||||||
|
node = metric.get("node")
|
||||||
|
value = entry.get("value")
|
||||||
|
if namespace and pod and node and value is not None:
|
||||||
|
parts.append(f"{node}:{namespace}/{pod}={_format_bytes(value)}")
|
||||||
|
if parts:
|
||||||
|
lines.append("pod_mem_top_node: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_restarts(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||||
|
top_restarts = metrics.get("top_restarts_1h") or []
|
||||||
|
if not isinstance(top_restarts, list) or not top_restarts:
|
||||||
|
top_restarts = []
|
||||||
|
parts = []
|
||||||
|
for entry in top_restarts:
|
||||||
|
metric = entry.get("metric") if isinstance(entry, dict) else {}
|
||||||
|
value = entry.get("value") if isinstance(entry, dict) else []
|
||||||
|
if not isinstance(metric, dict) or not isinstance(value, list) or len(value) < _VALUE_PAIR_LEN:
|
||||||
|
continue
|
||||||
|
namespace = metric.get("namespace")
|
||||||
|
pod = metric.get("pod")
|
||||||
|
count = _format_float(value[1])
|
||||||
|
if namespace and pod:
|
||||||
|
parts.append(f"{namespace}/{pod}={count}")
|
||||||
|
if parts:
|
||||||
|
lines.append("restarts_1h_top: " + "; ".join(parts))
|
||||||
|
else:
|
||||||
|
lines.append("restarts_1h_top: none")
|
||||||
|
ns_top = metrics.get("restart_namespace_top") or []
|
||||||
|
if isinstance(ns_top, list) and ns_top:
|
||||||
|
ns_parts = []
|
||||||
|
for entry in ns_top:
|
||||||
|
metric = entry.get("metric") if isinstance(entry, dict) else {}
|
||||||
|
value = entry.get("value")
|
||||||
|
namespace = metric.get("namespace") if isinstance(metric, dict) else None
|
||||||
|
if namespace and value is not None:
|
||||||
|
ns_parts.append(f"{namespace}={_format_float(value)}")
|
||||||
|
if ns_parts:
|
||||||
|
lines.append("restarts_1h_namespace_top: " + "; ".join(ns_parts))
|
||||||
|
else:
|
||||||
|
lines.append("restarts_1h_namespace_top: none")
|
||||||
|
|
||||||
|
|
||||||
|
def _append_job_failures(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||||
|
failures = metrics.get("job_failures_24h") if isinstance(metrics.get("job_failures_24h"), list) else []
|
||||||
|
if not failures:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for entry in failures:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||||
|
namespace = metric.get("namespace")
|
||||||
|
job_name = metric.get("job_name") or metric.get("job")
|
||||||
|
value = entry.get("value")
|
||||||
|
if namespace and job_name and value is not None:
|
||||||
|
parts.append(f"{namespace}/{job_name}={_format_float(value)}")
|
||||||
|
if parts:
|
||||||
|
lines.append("job_failures_24h: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_jobs(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
jobs = summary.get("jobs") if isinstance(summary.get("jobs"), dict) else {}
|
||||||
|
if not jobs:
|
||||||
|
return
|
||||||
|
totals_line = _format_jobs_totals(jobs)
|
||||||
|
if totals_line:
|
||||||
|
lines.append(totals_line)
|
||||||
|
failing_line = _format_jobs_failing(jobs)
|
||||||
|
if failing_line:
|
||||||
|
lines.append(failing_line)
|
||||||
|
active_line = _format_jobs_active_oldest(jobs)
|
||||||
|
if active_line:
|
||||||
|
lines.append(active_line)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_jobs_totals(jobs: dict[str, Any]) -> str:
|
||||||
|
totals = jobs.get("totals") if isinstance(jobs.get("totals"), dict) else {}
|
||||||
|
if not totals:
|
||||||
|
return ""
|
||||||
|
return "jobs: total={total}, active={active}, failed={failed}, succeeded={succeeded}".format(
|
||||||
|
total=totals.get("total"),
|
||||||
|
active=totals.get("active"),
|
||||||
|
failed=totals.get("failed"),
|
||||||
|
succeeded=totals.get("succeeded"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_jobs_failing(jobs: dict[str, Any]) -> str:
|
||||||
|
failing = jobs.get("failing") if isinstance(jobs.get("failing"), list) else []
|
||||||
|
if not failing:
|
||||||
|
return ""
|
||||||
|
parts = []
|
||||||
|
for item in failing[:5]:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
namespace = item.get("namespace")
|
||||||
|
name = item.get("job")
|
||||||
|
failed = item.get("failed")
|
||||||
|
age = item.get("age_hours")
|
||||||
|
if namespace and name and failed is not None:
|
||||||
|
label = f"{namespace}/{name}={failed}"
|
||||||
|
if age is not None:
|
||||||
|
label = f"{label} ({_format_float(age)}h)"
|
||||||
|
parts.append(label)
|
||||||
|
return "jobs_failing_top: " + "; ".join(parts) if parts else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _format_jobs_active_oldest(jobs: dict[str, Any]) -> str:
|
||||||
|
active_oldest = jobs.get("active_oldest") if isinstance(jobs.get("active_oldest"), list) else []
|
||||||
|
if not active_oldest:
|
||||||
|
return ""
|
||||||
|
parts = []
|
||||||
|
for item in active_oldest[:5]:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
namespace = item.get("namespace")
|
||||||
|
name = item.get("job")
|
||||||
|
age = item.get("age_hours")
|
||||||
|
if namespace and name and age is not None:
|
||||||
|
parts.append(f"{namespace}/{name}={_format_float(age)}h")
|
||||||
|
return "jobs_active_oldest: " + "; ".join(parts) if parts else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _append_postgres(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {}
|
||||||
|
if not postgres:
|
||||||
|
return
|
||||||
|
hottest = postgres.get("hottest_db") or ""
|
||||||
|
lines.append(
|
||||||
|
"postgres: used={used}, max={max}, hottest_db={hottest}".format(
|
||||||
|
used=postgres.get("used"),
|
||||||
|
max=postgres.get("max"),
|
||||||
|
hottest=hottest,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
used = postgres.get("used")
|
||||||
|
max_conn = postgres.get("max")
|
||||||
|
if used is not None or max_conn is not None:
|
||||||
|
lines.append(f"postgres_connections_total: used={_format_float(used)}, max={_format_float(max_conn)}")
|
||||||
|
by_db = postgres.get("by_db")
|
||||||
|
if isinstance(by_db, list) and by_db:
|
||||||
|
parts = []
|
||||||
|
for entry in by_db:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||||
|
value = entry.get("value")
|
||||||
|
if isinstance(value, list) and len(value) >= _VALUE_PAIR_LEN:
|
||||||
|
value = value[1]
|
||||||
|
name = metric.get("datname") if isinstance(metric, dict) else None
|
||||||
|
if name and value is not None:
|
||||||
|
parts.append(f"{name}={_format_float(value)}")
|
||||||
|
if parts:
|
||||||
|
lines.append("postgres_connections_by_db: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_hottest(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
|
||||||
|
if not hottest:
|
||||||
|
return
|
||||||
|
hardware_map = summary.get("hardware_by_node")
|
||||||
|
if not isinstance(hardware_map, dict):
|
||||||
|
hardware_map = {}
|
||||||
|
parts = []
|
||||||
|
for key, entry in hottest.items():
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
node = entry.get("node")
|
||||||
|
hardware = hardware_map.get(node) if node else None
|
||||||
|
if key in {"net", "io"}:
|
||||||
|
value = _format_rate_bytes(entry.get("value"))
|
||||||
|
else:
|
||||||
|
value = _format_float(entry.get("value"))
|
||||||
|
if value and key in {"cpu", "ram", "disk"}:
|
||||||
|
value = f"{value}%"
|
||||||
|
if node:
|
||||||
|
label = node
|
||||||
|
if hardware:
|
||||||
|
label = f"{label} [{hardware}]"
|
||||||
|
parts.append(f"{key}={label} ({value})")
|
||||||
|
if parts:
|
||||||
|
lines.append("hottest: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_workloads(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
workloads = summary.get("workloads")
|
||||||
|
if not isinstance(workloads, list) or not workloads:
|
||||||
|
return
|
||||||
|
lines.append(f"workloads: total={len(workloads)}")
|
||||||
|
top_workloads = sorted(
|
||||||
|
(item for item in workloads if isinstance(item, dict)),
|
||||||
|
key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or ""),
|
||||||
|
)[:5]
|
||||||
|
if not top_workloads:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for item in top_workloads:
|
||||||
|
namespace = item.get("namespace")
|
||||||
|
name = item.get("workload")
|
||||||
|
pods_total = item.get("pods_total")
|
||||||
|
primary = item.get("primary_node")
|
||||||
|
if namespace and name:
|
||||||
|
label = f"{namespace}/{name}={pods_total}"
|
||||||
|
if primary:
|
||||||
|
label = f"{label} (primary={primary})"
|
||||||
|
parts.append(label)
|
||||||
|
if parts:
|
||||||
|
lines.append("workloads_top: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_topology(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||||
|
topology = summary.get("topology") if isinstance(summary.get("topology"), dict) else {}
|
||||||
|
if not topology:
|
||||||
|
return
|
||||||
|
nodes = topology.get("nodes") if isinstance(topology.get("nodes"), list) else []
|
||||||
|
workloads = topology.get("workloads") if isinstance(topology.get("workloads"), list) else []
|
||||||
|
if nodes:
|
||||||
|
parts = []
|
||||||
|
for entry in nodes[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
node = entry.get("node")
|
||||||
|
top = entry.get("workloads_top") if isinstance(entry.get("workloads_top"), list) else []
|
||||||
|
if not node or not top:
|
||||||
|
continue
|
||||||
|
items = ", ".join([f"{name}({count})" for name, count in top if name and count is not None])
|
||||||
|
if items:
|
||||||
|
parts.append(f"{node}={items}")
|
||||||
|
if parts:
|
||||||
|
lines.append("node_workloads_top: " + "; ".join(parts))
|
||||||
|
if workloads:
|
||||||
|
parts = []
|
||||||
|
for entry in workloads[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
namespace = entry.get("namespace")
|
||||||
|
name = entry.get("workload")
|
||||||
|
nodes_top = entry.get("nodes_top") if isinstance(entry.get("nodes_top"), list) else []
|
||||||
|
if not namespace or not name:
|
||||||
|
continue
|
||||||
|
nodes_label = ", ".join([f"{node}:{count}" for node, count in nodes_top if node])
|
||||||
|
label = f"{namespace}/{name}"
|
||||||
|
if nodes_label:
|
||||||
|
label = f"{label} [{nodes_label}]"
|
||||||
|
parts.append(label)
|
||||||
|
if parts:
|
||||||
|
lines.append("workload_nodes_top: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_flux(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
flux = summary.get("flux") if isinstance(summary.get("flux"), dict) else {}
|
||||||
|
if not flux:
|
||||||
|
return
|
||||||
|
not_ready = flux.get("not_ready")
|
||||||
|
if not_ready is not None:
|
||||||
|
lines.append(f"flux_not_ready: {not_ready}")
|
||||||
|
items = flux.get("items")
|
||||||
|
if isinstance(items, list) and items:
|
||||||
|
parts = []
|
||||||
|
for item in items[:10]:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
name = item.get("name") or ""
|
||||||
|
namespace = item.get("namespace") or ""
|
||||||
|
reason = item.get("reason") or ""
|
||||||
|
suspended = item.get("suspended")
|
||||||
|
label = f"{namespace}/{name}".strip("/")
|
||||||
|
if reason:
|
||||||
|
label = f"{label} ({reason})"
|
||||||
|
if suspended:
|
||||||
|
label = f"{label} [suspended]"
|
||||||
|
if label:
|
||||||
|
parts.append(label)
|
||||||
|
if parts:
|
||||||
|
lines.append("flux_not_ready_items: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [name for name in globals() if not name.startswith("__")]
|
||||||
448
atlasbot/snapshot/builder/format_c.py
Normal file
448
atlasbot/snapshot/builder/format_c.py
Normal file
@ -0,0 +1,448 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .core_a import PVC_USAGE_CRITICAL
|
||||||
|
from .format_b import *
|
||||||
|
def _append_signals(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
signals = summary.get("signals") if isinstance(summary.get("signals"), list) else []
|
||||||
|
if not signals:
|
||||||
|
return
|
||||||
|
lines.append("signals:")
|
||||||
|
for entry in signals[:8]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
scope = entry.get("scope") or ""
|
||||||
|
target = entry.get("target") or ""
|
||||||
|
metric = entry.get("metric") or ""
|
||||||
|
current = entry.get("current")
|
||||||
|
delta = entry.get("delta_pct")
|
||||||
|
severity = entry.get("severity") or ""
|
||||||
|
detail = f"{scope}:{target} {metric}={current}"
|
||||||
|
if delta is not None:
|
||||||
|
detail += f" delta={delta}%"
|
||||||
|
if severity:
|
||||||
|
detail += f" severity={severity}"
|
||||||
|
lines.append(f"- {detail}")
|
||||||
|
|
||||||
|
|
||||||
|
def _append_profiles(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||||
|
profiles = summary.get("profiles") if isinstance(summary.get("profiles"), dict) else {}
|
||||||
|
if not profiles:
|
||||||
|
return
|
||||||
|
nodes = profiles.get("nodes") if isinstance(profiles.get("nodes"), list) else []
|
||||||
|
namespaces = profiles.get("namespaces") if isinstance(profiles.get("namespaces"), list) else []
|
||||||
|
workloads = profiles.get("workloads") if isinstance(profiles.get("workloads"), list) else []
|
||||||
|
if nodes:
|
||||||
|
lines.append("node_profiles:")
|
||||||
|
for entry in nodes[:3]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
lines.append(
|
||||||
|
f"- {entry.get('node')}: load={entry.get('load_index')} cpu={entry.get('cpu')} ram={entry.get('ram')} "
|
||||||
|
f"pods={entry.get('pods_total')} hw={entry.get('hardware')}"
|
||||||
|
)
|
||||||
|
if namespaces:
|
||||||
|
lines.append("namespace_profiles:")
|
||||||
|
for entry in namespaces[:3]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
lines.append(
|
||||||
|
f"- {entry.get('namespace')}: pods={entry.get('pods_total')} cpu={entry.get('cpu_usage')} "
|
||||||
|
f"mem={entry.get('mem_usage')} primary={entry.get('primary_node')}"
|
||||||
|
)
|
||||||
|
if workloads:
|
||||||
|
lines.append("workload_profiles:")
|
||||||
|
for entry in workloads[:3]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
lines.append(
|
||||||
|
f"- {entry.get('namespace')}/{entry.get('workload')}: pods={entry.get('pods_total')} "
|
||||||
|
f"running={entry.get('pods_running')} node={entry.get('primary_node')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _append_units_windows(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||||
|
units = metrics.get("units") if isinstance(metrics.get("units"), dict) else {}
|
||||||
|
windows = metrics.get("windows") if isinstance(metrics.get("windows"), dict) else {}
|
||||||
|
if units:
|
||||||
|
lines.append("units: " + _format_kv_map(units))
|
||||||
|
else:
|
||||||
|
lines.append("units: cpu_pct, ram_pct, net=bytes_per_sec, io=bytes_per_sec")
|
||||||
|
if windows:
|
||||||
|
lines.append("windows: " + _format_kv_map(windows))
|
||||||
|
else:
|
||||||
|
lines.append("windows: rates=5m, restarts=1h")
|
||||||
|
|
||||||
|
|
||||||
|
def _append_node_load_summary(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
node_load = summary.get("node_load_summary")
|
||||||
|
if not isinstance(node_load, dict) or not node_load:
|
||||||
|
return
|
||||||
|
hardware_by_node = summary.get("hardware_by_node")
|
||||||
|
hardware_by_node = hardware_by_node if isinstance(hardware_by_node, dict) else {}
|
||||||
|
top = node_load.get("top")
|
||||||
|
if isinstance(top, list) and top:
|
||||||
|
parts = []
|
||||||
|
for entry in top[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
node = entry.get("node") or ""
|
||||||
|
load = entry.get("load_index")
|
||||||
|
cpu = entry.get("cpu")
|
||||||
|
ram = entry.get("ram")
|
||||||
|
io = entry.get("io")
|
||||||
|
net = entry.get("net")
|
||||||
|
pods_total = entry.get("pods_total")
|
||||||
|
label = f"{node} idx={_format_float(load)}"
|
||||||
|
if node and node in hardware_by_node:
|
||||||
|
label += f" hw={hardware_by_node.get(node)}"
|
||||||
|
if isinstance(pods_total, (int, float)):
|
||||||
|
label += f" pods={int(pods_total)}"
|
||||||
|
label += f" cpu={_format_float(cpu)} ram={_format_float(ram)}"
|
||||||
|
label += f" io={_format_rate_bytes(io)} net={_format_rate_bytes(net)}"
|
||||||
|
parts.append(label)
|
||||||
|
if parts:
|
||||||
|
lines.append("node_load_top: " + "; ".join(parts))
|
||||||
|
outliers = node_load.get("outliers")
|
||||||
|
if isinstance(outliers, list) and outliers:
|
||||||
|
names = [entry.get("node") for entry in outliers if isinstance(entry, dict)]
|
||||||
|
names = [name for name in names if isinstance(name, str) and name]
|
||||||
|
if names:
|
||||||
|
lines.append("node_load_outliers: " + _format_names(names))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_hardware_usage(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||||
|
usage = summary.get("hardware_usage_avg")
|
||||||
|
if not isinstance(usage, list) or not usage:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
tops: dict[str, tuple[str, float]] = {}
|
||||||
|
for entry in usage[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
hardware = entry.get("hardware")
|
||||||
|
load = entry.get("load_index")
|
||||||
|
cpu = entry.get("cpu")
|
||||||
|
ram = entry.get("ram")
|
||||||
|
io = entry.get("io")
|
||||||
|
net = entry.get("net")
|
||||||
|
if not hardware:
|
||||||
|
continue
|
||||||
|
label = f"{hardware} idx={_format_float(load)}"
|
||||||
|
label += f" cpu={_format_float(cpu)} ram={_format_float(ram)}"
|
||||||
|
label += f" io={_format_rate_bytes(io)} net={_format_rate_bytes(net)}"
|
||||||
|
parts.append(label)
|
||||||
|
for metric, value in (("cpu", cpu), ("ram", ram), ("io", io), ("net", net), ("load", load)):
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
current = tops.get(metric)
|
||||||
|
if current is None or float(value) > current[1]:
|
||||||
|
tops[metric] = (hardware, float(value))
|
||||||
|
if parts:
|
||||||
|
lines.append("hardware_usage_avg: " + "; ".join(parts))
|
||||||
|
if tops:
|
||||||
|
top_parts = []
|
||||||
|
for metric in ("cpu", "ram", "io", "net", "load"):
|
||||||
|
entry = tops.get(metric)
|
||||||
|
if not entry:
|
||||||
|
continue
|
||||||
|
hardware, value = entry
|
||||||
|
if metric in {"io", "net"}:
|
||||||
|
rendered = _format_rate_bytes(value)
|
||||||
|
else:
|
||||||
|
rendered = _format_float(value)
|
||||||
|
top_parts.append(f"{metric}={hardware} ({rendered})")
|
||||||
|
if top_parts:
|
||||||
|
lines.append("hardware_usage_top: " + "; ".join(top_parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_cluster_watchlist(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
watchlist = summary.get("cluster_watchlist")
|
||||||
|
if not isinstance(watchlist, list) or not watchlist:
|
||||||
|
return
|
||||||
|
lines.append("cluster_watchlist: " + "; ".join(watchlist))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_baseline_deltas(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
deltas = summary.get("baseline_deltas") if isinstance(summary.get("baseline_deltas"), dict) else {}
|
||||||
|
nodes = deltas.get("nodes") if isinstance(deltas.get("nodes"), dict) else {}
|
||||||
|
namespaces = deltas.get("namespaces") if isinstance(deltas.get("namespaces"), dict) else {}
|
||||||
|
for scope, block in (("nodes", nodes), ("namespaces", namespaces)):
|
||||||
|
if not isinstance(block, dict):
|
||||||
|
continue
|
||||||
|
for metric, entries in block.items():
|
||||||
|
if not isinstance(entries, list) or not entries:
|
||||||
|
continue
|
||||||
|
parts: list[str] = []
|
||||||
|
for entry in entries[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
name = entry.get("node") if scope == "nodes" else entry.get("namespace")
|
||||||
|
delta = entry.get("delta")
|
||||||
|
severity = entry.get("severity")
|
||||||
|
if not isinstance(name, str) or not name or not isinstance(delta, (int, float)):
|
||||||
|
continue
|
||||||
|
suffix = f" ({severity})" if isinstance(severity, str) and severity else ""
|
||||||
|
parts.append(f"{name}={_format_float(delta)}%{suffix}")
|
||||||
|
if parts:
|
||||||
|
lines.append(f"{scope}_baseline_delta_{metric}: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_pod_issue_summary(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
issues = summary.get("pod_issue_summary") if isinstance(summary.get("pod_issue_summary"), dict) else {}
|
||||||
|
waiting = issues.get("waiting_reasons_top") if isinstance(issues.get("waiting_reasons_top"), list) else []
|
||||||
|
phases = issues.get("phase_reasons_top") if isinstance(issues.get("phase_reasons_top"), list) else []
|
||||||
|
namespace_issue = issues.get("namespace_issue_top") if isinstance(issues.get("namespace_issue_top"), dict) else {}
|
||||||
|
waiting_line = _reason_line(waiting, "pod_waiting_reasons_top")
|
||||||
|
if waiting_line:
|
||||||
|
lines.append(waiting_line)
|
||||||
|
phase_line = _reason_line(phases, "pod_phase_reasons_top")
|
||||||
|
if phase_line:
|
||||||
|
lines.append(phase_line)
|
||||||
|
if namespace_issue:
|
||||||
|
_append_namespace_issue_lines(lines, namespace_issue)
|
||||||
|
|
||||||
|
|
||||||
|
def _reason_line(entries: list[dict[str, Any]], label: str) -> str:
|
||||||
|
parts = []
|
||||||
|
for entry in entries[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
reason = entry.get("reason")
|
||||||
|
count = entry.get("count")
|
||||||
|
if reason:
|
||||||
|
parts.append(f"{reason}={count}")
|
||||||
|
if parts:
|
||||||
|
return f"{label}: " + "; ".join(parts)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _append_namespace_issue_lines(lines: list[str], namespace_issue: dict[str, Any]) -> None:
|
||||||
|
for key, entries in namespace_issue.items():
|
||||||
|
if not isinstance(entries, list) or not entries:
|
||||||
|
continue
|
||||||
|
parts: list[str] = []
|
||||||
|
for entry in entries[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
ns = entry.get("namespace")
|
||||||
|
value = entry.get("value")
|
||||||
|
if ns:
|
||||||
|
parts.append(f"{ns}={value}")
|
||||||
|
if parts:
|
||||||
|
lines.append(f"namespace_issue_top_{key}: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _build_cluster_watchlist(summary: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
items: list[str] = []
|
||||||
|
nodes_summary = summary.get("nodes_summary") if isinstance(summary.get("nodes_summary"), dict) else {}
|
||||||
|
not_ready = int(nodes_summary.get("not_ready") or 0)
|
||||||
|
if not_ready > 0:
|
||||||
|
items.append(f"not_ready_nodes={not_ready}")
|
||||||
|
pressure = summary.get("pressure_nodes") if isinstance(summary.get("pressure_nodes"), dict) else {}
|
||||||
|
pressure_nodes = pressure.get("names") if isinstance(pressure.get("names"), list) else []
|
||||||
|
if pressure_nodes:
|
||||||
|
items.append(f"pressure_nodes={len(pressure_nodes)}")
|
||||||
|
pod_issues = summary.get("pod_issues") if isinstance(summary.get("pod_issues"), dict) else {}
|
||||||
|
pending_over = int(pod_issues.get("pending_over_15m") or 0)
|
||||||
|
if pending_over > 0:
|
||||||
|
items.append(f"pods_pending_over_15m={pending_over}")
|
||||||
|
workloads = summary.get("workloads_health") if isinstance(summary.get("workloads_health"), dict) else {}
|
||||||
|
deployments = workloads.get("deployments") if isinstance(workloads.get("deployments"), dict) else {}
|
||||||
|
statefulsets = workloads.get("statefulsets") if isinstance(workloads.get("statefulsets"), dict) else {}
|
||||||
|
daemonsets = workloads.get("daemonsets") if isinstance(workloads.get("daemonsets"), dict) else {}
|
||||||
|
total_not_ready = int(deployments.get("not_ready") or 0) + int(statefulsets.get("not_ready") or 0) + int(daemonsets.get("not_ready") or 0)
|
||||||
|
if total_not_ready > 0:
|
||||||
|
items.append(f"workloads_not_ready={total_not_ready}")
|
||||||
|
flux = summary.get("flux") if isinstance(summary.get("flux"), dict) else {}
|
||||||
|
flux_not_ready = int(flux.get("not_ready") or 0)
|
||||||
|
if flux_not_ready > 0:
|
||||||
|
items.append(f"flux_not_ready={flux_not_ready}")
|
||||||
|
pvc_usage = summary.get("pvc_usage_top") if isinstance(summary.get("pvc_usage_top"), list) else []
|
||||||
|
high_pvc = [
|
||||||
|
entry for entry in pvc_usage if isinstance(entry, dict) and (entry.get("value") or 0) >= PVC_USAGE_CRITICAL
|
||||||
|
]
|
||||||
|
if high_pvc:
|
||||||
|
items.append(f"pvc_usage>={PVC_USAGE_CRITICAL}%")
|
||||||
|
return {"cluster_watchlist": items} if items else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _capacity_ratio_parts(entries: list[dict[str, Any]], ratio_key: str, usage_key: str, req_key: str) -> list[str]:
|
||||||
|
parts: list[str] = []
|
||||||
|
for entry in entries[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
ns = entry.get("namespace") or ""
|
||||||
|
ratio = entry.get(ratio_key)
|
||||||
|
usage = entry.get(usage_key)
|
||||||
|
req = entry.get(req_key)
|
||||||
|
if ns:
|
||||||
|
parts.append(
|
||||||
|
f"{ns}={_format_float(ratio)} (usage={_format_float(usage)} req={_format_float(req)})"
|
||||||
|
)
|
||||||
|
return parts
|
||||||
|
|
||||||
|
|
||||||
|
def _capacity_headroom_parts(entries: list[dict[str, Any]]) -> list[str]:
|
||||||
|
parts: list[str] = []
|
||||||
|
for entry in entries[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
ns = entry.get("namespace") or ""
|
||||||
|
headroom = entry.get("headroom")
|
||||||
|
if ns:
|
||||||
|
parts.append(f"{ns}={_format_float(headroom)}")
|
||||||
|
return parts
|
||||||
|
|
||||||
|
|
||||||
|
def _append_namespace_capacity_summary( # noqa: C901
|
||||||
|
lines: list[str],
|
||||||
|
summary: dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
cap = summary.get("namespace_capacity_summary")
|
||||||
|
if not isinstance(cap, dict) or not cap:
|
||||||
|
return
|
||||||
|
cpu_ratio = cap.get("cpu_ratio_top")
|
||||||
|
if isinstance(cpu_ratio, list):
|
||||||
|
parts = _capacity_ratio_parts(cpu_ratio, "cpu_usage_ratio", "cpu_usage", "cpu_requests")
|
||||||
|
if parts:
|
||||||
|
lines.append("namespace_cpu_ratio_top: " + "; ".join(parts))
|
||||||
|
mem_ratio = cap.get("mem_ratio_top")
|
||||||
|
if isinstance(mem_ratio, list):
|
||||||
|
parts = _capacity_ratio_parts(mem_ratio, "mem_usage_ratio", "mem_usage", "mem_requests")
|
||||||
|
if parts:
|
||||||
|
lines.append("namespace_mem_ratio_top: " + "; ".join(parts))
|
||||||
|
cpu_headroom = cap.get("cpu_headroom_low")
|
||||||
|
if isinstance(cpu_headroom, list):
|
||||||
|
parts = _capacity_headroom_parts(cpu_headroom)
|
||||||
|
if parts:
|
||||||
|
lines.append("namespace_cpu_headroom_low: " + "; ".join(parts))
|
||||||
|
mem_headroom = cap.get("mem_headroom_low")
|
||||||
|
if isinstance(mem_headroom, list):
|
||||||
|
parts = _capacity_headroom_parts(mem_headroom)
|
||||||
|
if parts:
|
||||||
|
lines.append("namespace_mem_headroom_low: " + "; ".join(parts))
|
||||||
|
cpu_over = cap.get("cpu_overcommitted")
|
||||||
|
mem_over = cap.get("mem_overcommitted")
|
||||||
|
if cpu_over is not None or mem_over is not None:
|
||||||
|
lines.append(f"namespace_overcommitted: cpu={cpu_over} mem={mem_over}")
|
||||||
|
cpu_over_names = cap.get("cpu_overcommitted_names")
|
||||||
|
if isinstance(cpu_over_names, list) and cpu_over_names:
|
||||||
|
names = [name for name in cpu_over_names if isinstance(name, str) and name]
|
||||||
|
if names:
|
||||||
|
lines.append("namespace_cpu_overcommitted_names: " + _format_names(names))
|
||||||
|
mem_over_names = cap.get("mem_overcommitted_names")
|
||||||
|
if isinstance(mem_over_names, list) and mem_over_names:
|
||||||
|
names = [name for name in mem_over_names if isinstance(name, str) and name]
|
||||||
|
if names:
|
||||||
|
lines.append("namespace_mem_overcommitted_names: " + _format_names(names))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_workloads_by_namespace(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
workloads = summary.get("workloads")
|
||||||
|
if not isinstance(workloads, list) or not workloads:
|
||||||
|
return
|
||||||
|
by_ns: dict[str, list[dict[str, Any]]] = {}
|
||||||
|
for item in workloads:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
ns = item.get("namespace") or ""
|
||||||
|
name = item.get("workload") or ""
|
||||||
|
if not ns or not name:
|
||||||
|
continue
|
||||||
|
by_ns.setdefault(ns, []).append(item)
|
||||||
|
for ns, items in sorted(by_ns.items()):
|
||||||
|
items.sort(
|
||||||
|
key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or "")
|
||||||
|
)
|
||||||
|
parts = []
|
||||||
|
for entry in items[:2]:
|
||||||
|
name = entry.get("workload") or ""
|
||||||
|
pods = entry.get("pods_total")
|
||||||
|
primary = entry.get("primary_node")
|
||||||
|
label = f"{name}({pods})" if pods is not None else name
|
||||||
|
if primary:
|
||||||
|
label = f"{label}@{primary}"
|
||||||
|
if label:
|
||||||
|
parts.append(label)
|
||||||
|
if parts:
|
||||||
|
lines.append(f"workloads_top_{ns}: " + "; ".join(parts))
|
||||||
|
|
||||||
|
|
||||||
|
def _append_lexicon(lines: list[str], summary: dict[str, Any]) -> None:
|
||||||
|
lexicon = summary.get("lexicon")
|
||||||
|
if not isinstance(lexicon, dict):
|
||||||
|
return
|
||||||
|
terms = lexicon.get("terms") if isinstance(lexicon.get("terms"), list) else []
|
||||||
|
aliases = lexicon.get("aliases") if isinstance(lexicon.get("aliases"), dict) else {}
|
||||||
|
for entry in terms[:8]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
term = entry.get("term")
|
||||||
|
meaning = entry.get("meaning")
|
||||||
|
if term and meaning:
|
||||||
|
lines.append(f"lexicon_term: {term} => {meaning}")
|
||||||
|
for key, value in list(aliases.items())[:6]:
|
||||||
|
if key and value:
|
||||||
|
lines.append(f"lexicon_alias: {key} => {value}")
|
||||||
|
|
||||||
|
|
||||||
|
def _append_cross_stats(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||||
|
cross_stats = summary.get("cross_stats")
|
||||||
|
if not isinstance(cross_stats, dict):
|
||||||
|
return
|
||||||
|
node_entries = cross_stats.get("node_metric_top") if isinstance(cross_stats.get("node_metric_top"), list) else []
|
||||||
|
for entry in node_entries[:10]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric")
|
||||||
|
node = entry.get("node")
|
||||||
|
value = entry.get("value")
|
||||||
|
cpu = entry.get("cpu")
|
||||||
|
ram = entry.get("ram")
|
||||||
|
net = entry.get("net")
|
||||||
|
io = entry.get("io")
|
||||||
|
pods = entry.get("pods_total")
|
||||||
|
if metric and node:
|
||||||
|
parts = [
|
||||||
|
f"value={_format_float(value)}",
|
||||||
|
f"cpu={_format_float(cpu)}",
|
||||||
|
f"ram={_format_float(ram)}",
|
||||||
|
f"net={_format_float(net)}",
|
||||||
|
f"io={_format_float(io)}",
|
||||||
|
]
|
||||||
|
if pods is not None:
|
||||||
|
parts.append(f"pods={pods}")
|
||||||
|
lines.append(f"cross_node_{metric}: {node} " + " ".join(parts))
|
||||||
|
ns_entries = cross_stats.get("namespace_metric_top") if isinstance(cross_stats.get("namespace_metric_top"), list) else []
|
||||||
|
for entry in ns_entries[:10]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
metric = entry.get("metric")
|
||||||
|
namespace = entry.get("namespace")
|
||||||
|
value = entry.get("value")
|
||||||
|
pods = entry.get("pods_total")
|
||||||
|
cpu_ratio = entry.get("cpu_ratio")
|
||||||
|
mem_ratio = entry.get("mem_ratio")
|
||||||
|
if metric and namespace:
|
||||||
|
parts = [
|
||||||
|
f"value={_format_float(value)}",
|
||||||
|
f"cpu_ratio={_format_float(cpu_ratio)}",
|
||||||
|
f"mem_ratio={_format_float(mem_ratio)}",
|
||||||
|
]
|
||||||
|
if pods is not None:
|
||||||
|
parts.append(f"pods={pods}")
|
||||||
|
lines.append(f"cross_namespace_{metric}: {namespace} " + " ".join(parts))
|
||||||
|
pvc_entries = cross_stats.get("pvc_top") if isinstance(cross_stats.get("pvc_top"), list) else []
|
||||||
|
for entry in pvc_entries[:5]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
namespace = entry.get("namespace")
|
||||||
|
pvc = entry.get("pvc")
|
||||||
|
used = entry.get("used_percent")
|
||||||
|
if namespace and pvc:
|
||||||
|
lines.append(f"cross_pvc_usage: {namespace}/{pvc} used={_format_float(used)}")
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [name for name in globals() if not name.startswith("__")]
|
||||||
72
atlasbot/snapshot/builder/summary_text.py
Normal file
72
atlasbot/snapshot/builder/summary_text.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .core_a import *
|
||||||
|
from .core_b import *
|
||||||
|
from .format_a import *
|
||||||
|
from .format_b import *
|
||||||
|
from .format_c import *
|
||||||
|
|
||||||
|
|
||||||
|
def summary_text(snapshot: dict[str, Any] | None) -> str:
|
||||||
|
"""Render the snapshot summary into deterministic prompt text."""
|
||||||
|
|
||||||
|
summary = build_summary(snapshot)
|
||||||
|
if not summary:
|
||||||
|
return ""
|
||||||
|
lines: list[str] = []
|
||||||
|
lines.append("atlas_cluster: Titan Lab Atlas Kubernetes cluster (internal).")
|
||||||
|
collected_at = snapshot.get("collected_at") if isinstance(snapshot, dict) else None
|
||||||
|
snapshot_version = snapshot.get("snapshot_version") if isinstance(snapshot, dict) else None
|
||||||
|
if collected_at or snapshot_version:
|
||||||
|
bits = []
|
||||||
|
if collected_at:
|
||||||
|
bits.append(f"collected_at={collected_at}")
|
||||||
|
if snapshot_version:
|
||||||
|
bits.append(f"version={snapshot_version}")
|
||||||
|
lines.append("snapshot: " + ", ".join(bits))
|
||||||
|
_append_nodes(lines, summary)
|
||||||
|
_append_hardware(lines, summary)
|
||||||
|
_append_hardware_groups(lines, summary)
|
||||||
|
_append_lexicon(lines, summary)
|
||||||
|
_append_pressure(lines, summary)
|
||||||
|
_append_node_facts(lines, summary)
|
||||||
|
_append_node_ages(lines, summary)
|
||||||
|
_append_node_taints(lines, summary)
|
||||||
|
_append_capacity(lines, summary)
|
||||||
|
_append_pods(lines, summary)
|
||||||
|
_append_namespace_pods(lines, summary)
|
||||||
|
_append_namespace_nodes(lines, summary)
|
||||||
|
_append_node_pods(lines, summary)
|
||||||
|
_append_pod_issues(lines, summary)
|
||||||
|
_append_pod_issue_summary(lines, summary)
|
||||||
|
_append_workload_health(lines, summary)
|
||||||
|
_append_events(lines, summary)
|
||||||
|
_append_node_usage_stats(lines, summary)
|
||||||
|
_append_namespace_usage(lines, summary)
|
||||||
|
_append_namespace_requests(lines, summary)
|
||||||
|
_append_namespace_io_net(lines, summary)
|
||||||
|
_append_pod_usage(lines, summary)
|
||||||
|
_append_restarts(lines, summary)
|
||||||
|
_append_job_failures(lines, summary)
|
||||||
|
_append_jobs(lines, summary)
|
||||||
|
_append_postgres(lines, summary)
|
||||||
|
_append_hottest(lines, summary)
|
||||||
|
_append_pvc_usage(lines, summary)
|
||||||
|
_append_root_disk_headroom(lines, summary)
|
||||||
|
_append_namespace_capacity_summary(lines, summary)
|
||||||
|
_append_baseline_deltas(lines, summary)
|
||||||
|
_append_longhorn(lines, summary)
|
||||||
|
_append_workloads(lines, summary)
|
||||||
|
_append_topology(lines, summary)
|
||||||
|
_append_workloads_by_namespace(lines, summary)
|
||||||
|
_append_node_load_summary(lines, summary)
|
||||||
|
_append_cluster_watchlist(lines, summary)
|
||||||
|
_append_hardware_usage(lines, summary)
|
||||||
|
_append_cross_stats(lines, summary)
|
||||||
|
_append_flux(lines, summary)
|
||||||
|
_append_signals(lines, summary)
|
||||||
|
_append_profiles(lines, summary)
|
||||||
|
_append_units_windows(lines, summary)
|
||||||
|
return "\n".join(lines)
|
||||||
@ -6,6 +6,17 @@ from typing import Any
|
|||||||
|
|
||||||
|
|
||||||
class ClaimStore:
|
class ClaimStore:
|
||||||
|
"""Persist conversation claims for follow-up answers.
|
||||||
|
|
||||||
|
Why:
|
||||||
|
- keep short-lived conversation state durable across turns without
|
||||||
|
forcing the answer engine to own storage mechanics.
|
||||||
|
|
||||||
|
Input/Output:
|
||||||
|
- accepts a SQLite path and TTL, stores claim payloads, and returns
|
||||||
|
normalized payload dictionaries when queried.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, path: str, ttl_sec: int) -> None:
|
def __init__(self, path: str, ttl_sec: int) -> None:
|
||||||
self._path = path or ":memory:"
|
self._path = path or ":memory:"
|
||||||
self._ttl = max(60, ttl_sec)
|
self._ttl = max(60, ttl_sec)
|
||||||
|
|||||||
21
pyproject.toml
Normal file
21
pyproject.toml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests", "testing"]
|
||||||
|
pythonpath = ["."]
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
line-length = 100
|
||||||
|
target-version = "py312"
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = ["E", "F", "W", "B", "C90", "I", "PLR", "RUF", "SIM", "UP", "ARG"]
|
||||||
|
ignore = ["E501"]
|
||||||
|
|
||||||
|
[tool.ruff.lint.per-file-ignores]
|
||||||
|
"atlasbot/engine/answerer/*.py" = ["F403", "F405", "I001"]
|
||||||
|
"atlasbot/engine/answerer/__init__.py" = ["C90", "PLR", "SIM", "ARG", "RUF", "UP", "I001"]
|
||||||
|
"atlasbot/matrix/bot.py" = ["C90", "PLR", "SIM", "ARG", "RUF", "UP", "I001"]
|
||||||
|
"atlasbot/snapshot/builder/__init__.py" = ["F403", "F405", "I001"]
|
||||||
|
"atlasbot/snapshot/builder/*.py" = ["F403", "F405", "I001"]
|
||||||
|
"testing/*.py" = ["PLR0911", "ARG002", "PLR2004"]
|
||||||
|
"tests/*.py" = ["PLR2004", "I001", "ARG001", "ARG002", "ARG005", "C901", "PLR0915", "UP037"]
|
||||||
|
"scripts/*.py" = ["PLR0911", "PLR2004"]
|
||||||
42
scripts/check_coverage.py
Executable file
42
scripts/check_coverage.py
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Enforce per-file coverage thresholds from SlipCover JSON output."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
"""Check each production file against a minimum coverage percentage."""
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("coverage_json")
|
||||||
|
parser.add_argument("--root", default="atlasbot")
|
||||||
|
parser.add_argument("--threshold", type=float, default=95.0)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
data = json.loads(Path(args.coverage_json).read_text(encoding="utf-8"))
|
||||||
|
files = data.get("files") if isinstance(data, dict) else {}
|
||||||
|
violations: list[tuple[float, str]] = []
|
||||||
|
for path, payload in sorted(files.items()):
|
||||||
|
if not path.startswith(f"{args.root}/"):
|
||||||
|
continue
|
||||||
|
summary = payload.get("summary") if isinstance(payload, dict) else {}
|
||||||
|
percent = summary.get("percent_covered") if isinstance(summary, dict) else None
|
||||||
|
if not isinstance(percent, (int, float)):
|
||||||
|
continue
|
||||||
|
if float(percent) < args.threshold:
|
||||||
|
violations.append((float(percent), path))
|
||||||
|
|
||||||
|
if violations:
|
||||||
|
for percent, path in sorted(violations):
|
||||||
|
print(f"{path}: {percent:.2f}% < {args.threshold:.2f}%")
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
|
|
||||||
83
scripts/check_docstrings.py
Executable file
83
scripts/check_docstrings.py
Executable file
@ -0,0 +1,83 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Require docstrings on public production APIs."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import ast
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def _needs_docstring(node: ast.AST, *, parent_class: str | None = None) -> bool:
|
||||||
|
"""Decide whether `node` should carry a contract docstring."""
|
||||||
|
|
||||||
|
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||||
|
name = node.name
|
||||||
|
if name.startswith("_") and name != "__init__":
|
||||||
|
return False
|
||||||
|
return not (parent_class and name.startswith("_"))
|
||||||
|
if isinstance(node, ast.ClassDef):
|
||||||
|
if node.name.startswith("_"):
|
||||||
|
return False
|
||||||
|
if any(
|
||||||
|
(isinstance(dec, ast.Name) and dec.id == "dataclass")
|
||||||
|
or (isinstance(dec, ast.Call) and isinstance(dec.func, ast.Name) and dec.func.id == "dataclass")
|
||||||
|
for dec in node.decorator_list
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
if any(
|
||||||
|
isinstance(base, ast.Name) and base.id in {"Exception", "RuntimeError", "BaseException"}
|
||||||
|
for base in node.bases
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
return not any(isinstance(base, ast.Name) and base.id == "BaseModel" for base in node.bases)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_nodes(tree: ast.AST) -> list[tuple[ast.AST, str | None]]:
|
||||||
|
"""Yield top-level public nodes only.
|
||||||
|
|
||||||
|
The gate focuses on the module surface area rather than every internal
|
||||||
|
method so we can keep contracts on the actual API seams.
|
||||||
|
"""
|
||||||
|
|
||||||
|
items: list[tuple[ast.AST, str | None]] = []
|
||||||
|
for node in getattr(tree, "body", []):
|
||||||
|
items.append((node, None))
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
"""Check modules under the production package and report missing contracts."""
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--root", default="atlasbot")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
root = Path(args.root)
|
||||||
|
violations: list[str] = []
|
||||||
|
for path in sorted(root.rglob("*.py")):
|
||||||
|
if "__pycache__" in path.parts or ".venv" in path.parts:
|
||||||
|
continue
|
||||||
|
tree = ast.parse(path.read_text(encoding="utf-8"))
|
||||||
|
for node, parent_class in _iter_nodes(tree):
|
||||||
|
if not _needs_docstring(node, parent_class=parent_class):
|
||||||
|
continue
|
||||||
|
doc = ast.get_docstring(node)
|
||||||
|
if doc:
|
||||||
|
continue
|
||||||
|
if isinstance(node, ast.ClassDef):
|
||||||
|
violations.append(f"{path}: class {node.name} is missing a docstring")
|
||||||
|
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||||
|
owner = f"{parent_class}." if parent_class else ""
|
||||||
|
violations.append(f"{path}: {owner}{node.name} is missing a docstring")
|
||||||
|
|
||||||
|
if violations:
|
||||||
|
for item in violations:
|
||||||
|
print(item)
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
70
scripts/check_file_sizes.py
Executable file
70
scripts/check_file_sizes.py
Executable file
@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Fail when production Python files exceed the configured line budget.
|
||||||
|
|
||||||
|
The gate is intentionally narrow:
|
||||||
|
- it only checks the `atlasbot/` package tree;
|
||||||
|
- it treats each file independently;
|
||||||
|
- it keeps the threshold explicit so CI can ratchet without guesswork.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def _count_lines(path: Path) -> int:
|
||||||
|
"""Return the physical line count for `path`.
|
||||||
|
|
||||||
|
Input:
|
||||||
|
- `path`: a readable Python source file.
|
||||||
|
|
||||||
|
Output:
|
||||||
|
- The number of newline-delimited lines in the file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return len(path.read_text(encoding="utf-8").splitlines())
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_python_files(root: Path) -> list[Path]:
|
||||||
|
"""List production Python files under `root`.
|
||||||
|
|
||||||
|
Input:
|
||||||
|
- `root`: repository package root to scan.
|
||||||
|
|
||||||
|
Output:
|
||||||
|
- Sorted Python file paths, excluding bytecode and hidden caches.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return sorted(
|
||||||
|
path
|
||||||
|
for path in root.rglob("*.py")
|
||||||
|
if path.is_file() and "__pycache__" not in path.parts and ".venv" not in path.parts
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
"""Run the size gate and return a process exit code."""
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--root", default="atlasbot")
|
||||||
|
parser.add_argument("--max-lines", type=int, default=500)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
root = Path(args.root)
|
||||||
|
violations: list[tuple[int, Path]] = []
|
||||||
|
for path in _iter_python_files(root):
|
||||||
|
lines = _count_lines(path)
|
||||||
|
if lines > args.max_lines:
|
||||||
|
violations.append((lines, path))
|
||||||
|
|
||||||
|
if violations:
|
||||||
|
for lines, path in sorted(violations, reverse=True):
|
||||||
|
print(f"{path}: {lines} lines (limit {args.max_lines})")
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
|
|
||||||
2
testing/__init__.py
Normal file
2
testing/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
"""Shared testing helpers for atlasbot."""
|
||||||
|
|
||||||
108
testing/fakes.py
Normal file
108
testing/fakes.py
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
"""Reusable test doubles and settings factories."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from atlasbot.config import Settings
|
||||||
|
|
||||||
|
|
||||||
|
class FakeLLM:
|
||||||
|
"""Deterministic LLM double for pipeline tests.
|
||||||
|
|
||||||
|
Why:
|
||||||
|
- keeps the answer engine tests fast and predictable.
|
||||||
|
|
||||||
|
Input/Output:
|
||||||
|
- accepts the same `chat()` signature as the real client;
|
||||||
|
- returns canned JSON or text snippets based on the prompt content.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.calls: list[str] = []
|
||||||
|
|
||||||
|
async def chat(self, messages, *, model=None, timeout_sec=None):
|
||||||
|
"""Return a prompt-shaped response and remember the last user prompt."""
|
||||||
|
|
||||||
|
prompt = messages[-1]["content"]
|
||||||
|
self.calls.append(prompt)
|
||||||
|
if "normalized" in prompt and "keywords" in prompt:
|
||||||
|
return '{"normalized":"What is Atlas?","keywords":["atlas"]}'
|
||||||
|
if "needs_snapshot" in prompt:
|
||||||
|
return '{"needs_snapshot": true, "answer_style": "direct"}'
|
||||||
|
if "sub-questions" in prompt:
|
||||||
|
return '[{"id":"q1","question":"What is Atlas?","priority":1}]'
|
||||||
|
if "sub-question" in prompt:
|
||||||
|
return "Atlas has 22 nodes."
|
||||||
|
if "Answer using only the Fact Sheet" in prompt:
|
||||||
|
return "Atlas has 22 nodes."
|
||||||
|
if "final response" in prompt:
|
||||||
|
return "Atlas has 22 nodes."
|
||||||
|
if "Score response quality" in prompt:
|
||||||
|
return '{"confidence":80,"relevance":90,"satisfaction":85,"hallucination_risk":"low"}'
|
||||||
|
if "claims list" in prompt:
|
||||||
|
return '{"claims": []}'
|
||||||
|
return "{}"
|
||||||
|
|
||||||
|
|
||||||
|
class SlowFakeLLM(FakeLLM):
|
||||||
|
"""Variant that sleeps briefly so timeout guards can be exercised."""
|
||||||
|
|
||||||
|
async def chat(self, messages, *, model=None, timeout_sec=None):
|
||||||
|
"""Delay before answering to make budget handling deterministic."""
|
||||||
|
|
||||||
|
await asyncio.sleep(0.02)
|
||||||
|
return await super().chat(messages, model=model, timeout_sec=timeout_sec)
|
||||||
|
|
||||||
|
|
||||||
|
def build_test_settings() -> Settings:
|
||||||
|
"""Create a fully populated `Settings` instance for unit tests."""
|
||||||
|
|
||||||
|
return Settings(
|
||||||
|
matrix_base="",
|
||||||
|
auth_base="",
|
||||||
|
bot_user="",
|
||||||
|
bot_pass="",
|
||||||
|
room_alias="",
|
||||||
|
server_name="",
|
||||||
|
bot_mentions=(),
|
||||||
|
matrix_bots=(),
|
||||||
|
ollama_url="",
|
||||||
|
ollama_model="base",
|
||||||
|
ollama_model_fast="fast",
|
||||||
|
ollama_model_smart="smart",
|
||||||
|
ollama_model_genius="genius",
|
||||||
|
ollama_fallback_model="",
|
||||||
|
ollama_timeout_sec=1.0,
|
||||||
|
ollama_retries=0,
|
||||||
|
ollama_api_key="",
|
||||||
|
http_port=8090,
|
||||||
|
internal_token="",
|
||||||
|
kb_dir="",
|
||||||
|
vm_url="",
|
||||||
|
ariadne_state_url="",
|
||||||
|
ariadne_state_token="",
|
||||||
|
snapshot_ttl_sec=30,
|
||||||
|
thinking_interval_sec=30,
|
||||||
|
quick_time_budget_sec=15.0,
|
||||||
|
smart_time_budget_sec=45.0,
|
||||||
|
genius_time_budget_sec=180.0,
|
||||||
|
conversation_ttl_sec=300,
|
||||||
|
snapshot_pin_enabled=False,
|
||||||
|
queue_enabled=False,
|
||||||
|
nats_url="",
|
||||||
|
nats_stream="",
|
||||||
|
nats_subject="",
|
||||||
|
nats_result_bucket="",
|
||||||
|
fast_max_angles=1,
|
||||||
|
smart_max_angles=1,
|
||||||
|
genius_max_angles=1,
|
||||||
|
fast_max_candidates=1,
|
||||||
|
smart_max_candidates=1,
|
||||||
|
genius_max_candidates=1,
|
||||||
|
fast_llm_calls_max=9,
|
||||||
|
smart_llm_calls_max=17,
|
||||||
|
genius_llm_calls_max=32,
|
||||||
|
llm_limit_multiplier=1.5,
|
||||||
|
state_db_path="/tmp/atlasbot_test_state.db",
|
||||||
|
)
|
||||||
@ -1,98 +1,21 @@
|
|||||||
|
"""Answer-engine regression tests."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from dataclasses import replace
|
from dataclasses import replace
|
||||||
|
|
||||||
from atlasbot.engine.answerer import AnswerEngine
|
from atlasbot.engine.answerer import AnswerEngine
|
||||||
from atlasbot.knowledge.loader import KnowledgeBase
|
from atlasbot.knowledge.loader import KnowledgeBase
|
||||||
from atlasbot.snapshot.builder import SnapshotProvider
|
from atlasbot.snapshot.builder import SnapshotProvider
|
||||||
from atlasbot.config import Settings
|
from testing.fakes import FakeLLM, SlowFakeLLM, build_test_settings
|
||||||
|
|
||||||
|
|
||||||
class FakeLLM:
|
def test_engine_answer_basic() -> None:
|
||||||
def __init__(self) -> None:
|
"""The quick path should answer from the fact sheet."""
|
||||||
self.calls: list[str] = []
|
|
||||||
|
|
||||||
async def chat(self, messages, *, model=None, timeout_sec=None):
|
|
||||||
prompt = messages[-1]["content"]
|
|
||||||
self.calls.append(prompt)
|
|
||||||
if "normalized" in prompt and "keywords" in prompt:
|
|
||||||
return '{"normalized":"What is Atlas?","keywords":["atlas"]}'
|
|
||||||
if "needs_snapshot" in prompt:
|
|
||||||
return '{"needs_snapshot": true, "answer_style": "direct"}'
|
|
||||||
if "sub-questions" in prompt:
|
|
||||||
return '[{"id":"q1","question":"What is Atlas?","priority":1}]'
|
|
||||||
if "sub-question" in prompt:
|
|
||||||
return "Atlas has 22 nodes."
|
|
||||||
if "Answer using only the Fact Sheet" in prompt:
|
|
||||||
return "Atlas has 22 nodes."
|
|
||||||
if "final response" in prompt:
|
|
||||||
return "Atlas has 22 nodes."
|
|
||||||
if "Score response quality" in prompt:
|
|
||||||
return '{"confidence":80,"relevance":90,"satisfaction":85,"hallucination_risk":"low"}'
|
|
||||||
if "claims list" in prompt:
|
|
||||||
return '{"claims": []}'
|
|
||||||
return "{}"
|
|
||||||
|
|
||||||
|
|
||||||
class SlowFakeLLM(FakeLLM):
|
|
||||||
async def chat(self, messages, *, model=None, timeout_sec=None):
|
|
||||||
await asyncio.sleep(0.02)
|
|
||||||
return await super().chat(messages, model=model, timeout_sec=timeout_sec)
|
|
||||||
|
|
||||||
|
|
||||||
def _settings() -> Settings:
|
|
||||||
return Settings(
|
|
||||||
matrix_base="",
|
|
||||||
auth_base="",
|
|
||||||
bot_user="",
|
|
||||||
bot_pass="",
|
|
||||||
room_alias="",
|
|
||||||
server_name="",
|
|
||||||
bot_mentions=(),
|
|
||||||
matrix_bots=(),
|
|
||||||
ollama_url="",
|
|
||||||
ollama_model="base",
|
|
||||||
ollama_model_fast="fast",
|
|
||||||
ollama_model_smart="smart",
|
|
||||||
ollama_model_genius="genius",
|
|
||||||
ollama_fallback_model="",
|
|
||||||
ollama_timeout_sec=1.0,
|
|
||||||
ollama_retries=0,
|
|
||||||
ollama_api_key="",
|
|
||||||
http_port=8090,
|
|
||||||
internal_token="",
|
|
||||||
kb_dir="",
|
|
||||||
vm_url="",
|
|
||||||
ariadne_state_url="",
|
|
||||||
ariadne_state_token="",
|
|
||||||
snapshot_ttl_sec=30,
|
|
||||||
thinking_interval_sec=30,
|
|
||||||
quick_time_budget_sec=15.0,
|
|
||||||
smart_time_budget_sec=45.0,
|
|
||||||
genius_time_budget_sec=180.0,
|
|
||||||
conversation_ttl_sec=300,
|
|
||||||
snapshot_pin_enabled=False,
|
|
||||||
queue_enabled=False,
|
|
||||||
nats_url="",
|
|
||||||
nats_stream="",
|
|
||||||
nats_subject="",
|
|
||||||
nats_result_bucket="",
|
|
||||||
fast_max_angles=1,
|
|
||||||
smart_max_angles=1,
|
|
||||||
genius_max_angles=1,
|
|
||||||
fast_max_candidates=1,
|
|
||||||
smart_max_candidates=1,
|
|
||||||
genius_max_candidates=1,
|
|
||||||
fast_llm_calls_max=9,
|
|
||||||
smart_llm_calls_max=17,
|
|
||||||
genius_llm_calls_max=32,
|
|
||||||
llm_limit_multiplier=1.5,
|
|
||||||
state_db_path="/tmp/atlasbot_test_state.db",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_engine_answer_basic():
|
|
||||||
llm = FakeLLM()
|
llm = FakeLLM()
|
||||||
settings = _settings()
|
settings = build_test_settings()
|
||||||
kb = KnowledgeBase("")
|
kb = KnowledgeBase("")
|
||||||
snapshot = SnapshotProvider(settings)
|
snapshot = SnapshotProvider(settings)
|
||||||
engine = AnswerEngine(settings, llm, kb, snapshot)
|
engine = AnswerEngine(settings, llm, kb, snapshot)
|
||||||
@ -101,9 +24,11 @@ def test_engine_answer_basic():
|
|||||||
assert "Atlas has 22 nodes" in result.reply
|
assert "Atlas has 22 nodes" in result.reply
|
||||||
|
|
||||||
|
|
||||||
def test_smart_mode_uses_factsheet_path():
|
def test_smart_mode_uses_factsheet_path() -> None:
|
||||||
|
"""Smart mode should stay on the factsheet branch for direct cluster questions."""
|
||||||
|
|
||||||
llm = FakeLLM()
|
llm = FakeLLM()
|
||||||
settings = _settings()
|
settings = build_test_settings()
|
||||||
kb = KnowledgeBase("")
|
kb = KnowledgeBase("")
|
||||||
snapshot = SnapshotProvider(settings)
|
snapshot = SnapshotProvider(settings)
|
||||||
engine = AnswerEngine(settings, llm, kb, snapshot)
|
engine = AnswerEngine(settings, llm, kb, snapshot)
|
||||||
@ -113,9 +38,11 @@ def test_smart_mode_uses_factsheet_path():
|
|||||||
assert "time budget" not in result.reply.lower()
|
assert "time budget" not in result.reply.lower()
|
||||||
|
|
||||||
|
|
||||||
def test_genius_mode_uses_factsheet_path():
|
def test_genius_mode_uses_factsheet_path() -> None:
|
||||||
|
"""Genius mode should also return the factsheet answer for the same query."""
|
||||||
|
|
||||||
llm = FakeLLM()
|
llm = FakeLLM()
|
||||||
settings = _settings()
|
settings = build_test_settings()
|
||||||
kb = KnowledgeBase("")
|
kb = KnowledgeBase("")
|
||||||
snapshot = SnapshotProvider(settings)
|
snapshot = SnapshotProvider(settings)
|
||||||
engine = AnswerEngine(settings, llm, kb, snapshot)
|
engine = AnswerEngine(settings, llm, kb, snapshot)
|
||||||
@ -125,9 +52,11 @@ def test_genius_mode_uses_factsheet_path():
|
|||||||
assert "time budget" not in result.reply.lower()
|
assert "time budget" not in result.reply.lower()
|
||||||
|
|
||||||
|
|
||||||
def test_plain_math_question_is_rejected_for_cluster_modes():
|
def test_plain_math_question_is_rejected_for_cluster_modes() -> None:
|
||||||
|
"""The bot should keep users on cluster questions instead of generic math."""
|
||||||
|
|
||||||
llm = FakeLLM()
|
llm = FakeLLM()
|
||||||
settings = _settings()
|
settings = build_test_settings()
|
||||||
kb = KnowledgeBase("")
|
kb = KnowledgeBase("")
|
||||||
snapshot = SnapshotProvider(settings)
|
snapshot = SnapshotProvider(settings)
|
||||||
engine = AnswerEngine(settings, llm, kb, snapshot)
|
engine = AnswerEngine(settings, llm, kb, snapshot)
|
||||||
@ -136,9 +65,11 @@ def test_plain_math_question_is_rejected_for_cluster_modes():
|
|||||||
assert "focus on Titan cluster operations" in result.reply
|
assert "focus on Titan cluster operations" in result.reply
|
||||||
|
|
||||||
|
|
||||||
def test_quick_mode_time_budget_guard():
|
def test_quick_mode_time_budget_guard() -> None:
|
||||||
|
"""A slow model call should trip the quick-mode budget guard."""
|
||||||
|
|
||||||
llm = SlowFakeLLM()
|
llm = SlowFakeLLM()
|
||||||
settings = replace(_settings(), quick_time_budget_sec=0.01)
|
settings = replace(build_test_settings(), quick_time_budget_sec=0.01)
|
||||||
kb = KnowledgeBase("")
|
kb = KnowledgeBase("")
|
||||||
snapshot = SnapshotProvider(settings)
|
snapshot = SnapshotProvider(settings)
|
||||||
engine = AnswerEngine(settings, llm, kb, snapshot)
|
engine = AnswerEngine(settings, llm, kb, snapshot)
|
||||||
|
|||||||
810
tests/test_quality_gate_paths.py
Normal file
810
tests/test_quality_gate_paths.py
Normal file
@ -0,0 +1,810 @@
|
|||||||
|
"""Targeted quality-gate coverage for runtime and answerer orchestration."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from dataclasses import replace
|
||||||
|
from pathlib import Path
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from atlasbot.api.http import Api, AnswerRequest
|
||||||
|
from atlasbot.config import MatrixBotConfig
|
||||||
|
from atlasbot.engine.answerer import (
|
||||||
|
AnswerEngine,
|
||||||
|
AnswerResult,
|
||||||
|
AnswerScores,
|
||||||
|
ClaimItem,
|
||||||
|
EvidenceItem,
|
||||||
|
ModePlan,
|
||||||
|
)
|
||||||
|
from atlasbot.engine.answerer.common import _mode_plan
|
||||||
|
from atlasbot.engine.answerer.engine import AnswerEngine as EngineClass
|
||||||
|
from atlasbot.engine.answerer.workflow import run_answer
|
||||||
|
from atlasbot.engine.answerer.workflow_post import finalize_answer
|
||||||
|
from atlasbot.knowledge.loader import KnowledgeBase
|
||||||
|
from atlasbot.llm.client import LLMClient, LLMError, parse_json
|
||||||
|
from atlasbot.main import result_scores
|
||||||
|
from atlasbot.matrix.bot import MatrixBot, MatrixClient
|
||||||
|
from atlasbot.queue.nats import QueueManager
|
||||||
|
from atlasbot.snapshot.builder import SnapshotProvider, build_summary
|
||||||
|
from testing.fakes import build_test_settings
|
||||||
|
from tests.test_support_modules import _rich_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
class StaticSnapshot:
|
||||||
|
"""Return a fixed snapshot for answer-engine tests."""
|
||||||
|
|
||||||
|
def __init__(self, payload: dict[str, Any]) -> None:
|
||||||
|
self._payload = payload
|
||||||
|
|
||||||
|
def get(self) -> dict[str, Any]:
|
||||||
|
"""Return the stored snapshot payload."""
|
||||||
|
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
|
||||||
|
class PromptLLM:
|
||||||
|
"""Map prompt fragments to canned responses for workflow tests."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.calls: list[tuple[str, str]] = []
|
||||||
|
|
||||||
|
async def chat(
|
||||||
|
self,
|
||||||
|
messages: list[dict[str, str]],
|
||||||
|
*,
|
||||||
|
model: str | None = None,
|
||||||
|
timeout_sec: float | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""Return the scripted response for the latest user prompt."""
|
||||||
|
|
||||||
|
del timeout_sec
|
||||||
|
system = messages[0]["content"]
|
||||||
|
prompt = messages[-1]["content"]
|
||||||
|
self.calls.append((model or "", prompt))
|
||||||
|
if "Given chunk summaries, score relevance" in prompt:
|
||||||
|
items = []
|
||||||
|
for line in prompt.splitlines():
|
||||||
|
if line.startswith("- c"):
|
||||||
|
chunk_id = line.split()[1].rstrip(":")
|
||||||
|
score = 95 if "cpu" in line.lower() or "synapse" in line.lower() else 80
|
||||||
|
items.append({"id": chunk_id, "score": score, "reason": "relevant"})
|
||||||
|
return json.dumps(items or [{"id": "c0", "score": 90, "reason": "relevant"}])
|
||||||
|
direct = self._direct_response(prompt)
|
||||||
|
if direct is not None:
|
||||||
|
return direct
|
||||||
|
response = self._lookup_response(system, prompt)
|
||||||
|
if response is not None:
|
||||||
|
return response
|
||||||
|
raise AssertionError(f"Unhandled prompt:\nSYSTEM={system}\nPROMPT={prompt}")
|
||||||
|
|
||||||
|
def _direct_response(self, prompt: str) -> str | None:
|
||||||
|
"""Return direct string responses for a few prompt families."""
|
||||||
|
|
||||||
|
if "Answer the sub-question using the context" in prompt:
|
||||||
|
return "The best runbook path is runbooks/fix.md." if "runbook" in prompt.lower() else "synapse is hottest with cpu 95 on titan-01."
|
||||||
|
markers = [
|
||||||
|
("Write a final response to the user", "titan-99 is hottest and the runbook is runbooks/wrong.md."),
|
||||||
|
("Draft:", "synapse is hottest at cpu 95 on titan-01, and amd64 nodes remain separate from raspberry hardware."),
|
||||||
|
("Return JSON with fields: issues", '{"issues":["mention the exact runbook"],"missing_data":[],"risky_claims":[]}'),
|
||||||
|
("command (string), rationale", '{"command":"kubectl top pods -n synapse","rationale":"verify namespace cpu"}'),
|
||||||
|
("confidence (0-100)", '{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}'),
|
||||||
|
]
|
||||||
|
for marker, response in markers:
|
||||||
|
if marker in prompt:
|
||||||
|
if marker == "Draft:" and "If Facts are provided" not in prompt:
|
||||||
|
continue
|
||||||
|
return response
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _lookup_response(self, system: str, prompt: str) -> str | None:
|
||||||
|
"""Return canned responses for prompt markers."""
|
||||||
|
|
||||||
|
del system
|
||||||
|
markers = [
|
||||||
|
(
|
||||||
|
"normalized (string), keywords",
|
||||||
|
'{"normalized":"Which namespace is hottest on raspberry hardware and which runbook should I use?","keywords":["namespace","hottest","cpu","raspberry","runbook"]}',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"needs_snapshot (bool)",
|
||||||
|
'{"needs_snapshot":true,"needs_kb":true,"needs_tool":true,"answer_style":"insightful","follow_up":false,"question_type":"open_ended","focus_entity":"namespace","focus_metric":"cpu"}',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Generate up to",
|
||||||
|
'[{"id":"q1","question":"Which namespace is hottest?","priority":5,"kind":"metric"},{"id":"q2","question":"Which runbook applies?","priority":4,"kind":"context"}]',
|
||||||
|
),
|
||||||
|
("Choose the run that best aligns", '{"selected_index": 1}'),
|
||||||
|
("AvailableKeys:", '{"keys":["namespace_cpu_top","namespace_pods","hardware_nodes"]}'),
|
||||||
|
("Return JSON with field: missing", '{"missing":[]}'),
|
||||||
|
("Return JSON with fields: prefixes", '{"prefixes":["namespace","hottest"]}'),
|
||||||
|
("fact_types", '{"fact_types":["namespace_cpu_top","hardware_nodes"]}'),
|
||||||
|
("Return JSON with field: signals", '{"signals":["cpu","synapse","raspberry"]}'),
|
||||||
|
(
|
||||||
|
"Signals:",
|
||||||
|
'{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Return JSON with field: lines",
|
||||||
|
'{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"CandidateFacts:",
|
||||||
|
'{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"FactCandidates:",
|
||||||
|
'{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Suggest a safe, read-only command",
|
||||||
|
'{"command":"kubectl top pods -n synapse","rationale":"verify namespace cpu"}',
|
||||||
|
),
|
||||||
|
("Pick the best candidate for accuracy and grounding", '{"best": 1}'),
|
||||||
|
("Pick the best draft for accuracy", '{"best": 1}'),
|
||||||
|
("Pick the best runbook path", '{"path":"runbooks/fix.md"}'),
|
||||||
|
("Check the draft against the context", "synapse is hottest on titan-01, but see runbooks/wrong.md."),
|
||||||
|
("Answer using the fact", "Latest metrics: namespace_cpu_top: synapse=95."),
|
||||||
|
("Rewrite the draft to only include claims supported by FactsUsed", "synapse is hottest on titan-01."),
|
||||||
|
("Check if an open-ended answer includes at least two concrete signals", '{"ok": false, "reason": "needs more detail"}'),
|
||||||
|
("ok (bool), reason (string)", '{"ok": false, "reason": "needs more detail"}'),
|
||||||
|
("Rewrite the answer using the critique", "synapse is hottest at cpu 95 on titan-01. Use runbooks/fix.md."),
|
||||||
|
("Return JSON with field: note", '{"note":"The answer would benefit from per-pod CPU samples."}'),
|
||||||
|
("Score response quality", '{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}'),
|
||||||
|
(
|
||||||
|
"Return JSON with fields: confidence (0-100), relevance (0-100), satisfaction (0-100), hallucination_risk (low|medium|high).",
|
||||||
|
'{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"claims list",
|
||||||
|
'{"claims":[{"id":"c1","claim":"synapse is hottest","evidence":[{"path":"hottest.cpu.node","reason":"snapshot"}]}]}',
|
||||||
|
),
|
||||||
|
("Select the claims most relevant", '{"claim_ids":["c1"]}'),
|
||||||
|
("Follow-up:", "titan-99 is still hottest."),
|
||||||
|
("Rewrite the answer to be concise and directly answer the question", "Latest metrics: namespace_cpu_top: synapse=95."),
|
||||||
|
("Deduplicate repeated statements", "Latest metrics: namespace_cpu_top: synapse=95."),
|
||||||
|
("Answer using only the Fact Sheet", "Fact sheet answer: namespace_cpu_top: synapse=95. Use runbooks/fix.md."),
|
||||||
|
]
|
||||||
|
for marker, response in markers:
|
||||||
|
if marker in prompt:
|
||||||
|
return response
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class TimeoutLLM:
|
||||||
|
"""Raise a timeout as soon as the workflow makes an LLM call."""
|
||||||
|
|
||||||
|
async def chat(
|
||||||
|
self,
|
||||||
|
messages: list[dict[str, str]],
|
||||||
|
*,
|
||||||
|
model: str | None = None,
|
||||||
|
timeout_sec: float | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""Trigger the workflow timeout handling branch."""
|
||||||
|
|
||||||
|
del messages, model, timeout_sec
|
||||||
|
raise TimeoutError("boom")
|
||||||
|
|
||||||
|
|
||||||
|
class LimitLLM(PromptLLM):
|
||||||
|
"""Reuse prompt handling while allowing the workflow to hit call caps."""
|
||||||
|
|
||||||
|
|
||||||
|
def _settings(tmp_path: Path, **overrides: Any):
|
||||||
|
"""Build settings with an isolated claim-store path."""
|
||||||
|
|
||||||
|
return replace(build_test_settings(), state_db_path=str(tmp_path / "state.db"), **overrides)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_engine(tmp_path: Path, llm: Any, **setting_overrides: Any) -> AnswerEngine:
|
||||||
|
"""Construct a real engine with static snapshot and KB doubles."""
|
||||||
|
|
||||||
|
settings = _settings(tmp_path, **setting_overrides)
|
||||||
|
snapshot = StaticSnapshot(_rich_snapshot())
|
||||||
|
kb = KnowledgeBase("")
|
||||||
|
kb.summary = lambda: "KB summary." # type: ignore[method-assign]
|
||||||
|
kb.runbook_titles = lambda limit=5: "Relevant runbooks:\n- Fix (runbooks/fix.md)" # type: ignore[method-assign]
|
||||||
|
kb.runbook_paths = lambda limit=10: ["runbooks/fix.md"] # type: ignore[method-assign]
|
||||||
|
kb.chunk_lines = lambda max_files=20, max_chars=6000: [ # type: ignore[method-assign]
|
||||||
|
"runbooks/fix.md",
|
||||||
|
"namespace_cpu_top: synapse=95",
|
||||||
|
"hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)",
|
||||||
|
]
|
||||||
|
return AnswerEngine(settings, llm, kb, snapshot) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
|
def test_engine_helper_methods_cover_state_and_followup(tmp_path: Path) -> None:
|
||||||
|
"""Cover answer-engine helper branches outside the main workflow."""
|
||||||
|
|
||||||
|
settings = _settings(tmp_path)
|
||||||
|
|
||||||
|
class StockLLM:
|
||||||
|
async def chat(self, messages, *, model=None, timeout_sec=None):
|
||||||
|
del messages, model, timeout_sec
|
||||||
|
return "stock reply"
|
||||||
|
|
||||||
|
engine = EngineClass(settings, StockLLM(), KnowledgeBase(""), StaticSnapshot(_rich_snapshot()))
|
||||||
|
|
||||||
|
async def call_llm(_system: str, _prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
|
||||||
|
del _system, context, model
|
||||||
|
static = {
|
||||||
|
"draft_select": '{"best": 2}',
|
||||||
|
"score": '{"confidence":90,"relevance":91,"satisfaction":92,"hallucination_risk":"low"}',
|
||||||
|
"claim_map": '{"claims":[{"id":"c1","claim":"cpu is high","evidence":[{"path":"hottest.cpu.node","reason":"why"},{"path":"","reason":"skip"}]},"bad"]}',
|
||||||
|
"select_claims": '{"claim_ids":["c1"]}',
|
||||||
|
"followup": "titan-99 is hottest. The draft is correct.",
|
||||||
|
"followup_fix": "titan-01 is hottest.",
|
||||||
|
"dedup_followup": "The draft is correct. titan-01 is hottest.",
|
||||||
|
"dedup": "deduped",
|
||||||
|
}
|
||||||
|
if tag == "synth":
|
||||||
|
return "draft one" if "DraftIndex: 1" in _prompt else "draft two"
|
||||||
|
if tag in static:
|
||||||
|
return static[tag]
|
||||||
|
raise AssertionError(tag)
|
||||||
|
|
||||||
|
stock = asyncio.run(engine._answer_stock("hello"))
|
||||||
|
assert stock.reply == "stock reply"
|
||||||
|
|
||||||
|
plan = replace(_mode_plan(settings, "smart"), drafts=2, parallelism=2)
|
||||||
|
synth = asyncio.run(
|
||||||
|
engine._synthesize_answer(
|
||||||
|
"Which node is hottest?",
|
||||||
|
["draft one", "draft two"],
|
||||||
|
"ctx",
|
||||||
|
{"question_type": "metric", "answer_style": "direct"},
|
||||||
|
plan,
|
||||||
|
call_llm,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
synth_empty = asyncio.run(
|
||||||
|
engine._synthesize_answer(
|
||||||
|
"Which node is hottest?",
|
||||||
|
[],
|
||||||
|
"ctx",
|
||||||
|
{"question_type": "metric", "answer_style": "direct"},
|
||||||
|
replace(plan, drafts=1, parallelism=1),
|
||||||
|
call_llm,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert synth == "draft two"
|
||||||
|
assert synth_empty == "draft two"
|
||||||
|
|
||||||
|
scored = asyncio.run(engine._score_answer("q", "a", plan, call_llm))
|
||||||
|
assert scored.hallucination_risk == "low"
|
||||||
|
assert asyncio.run(engine._score_answer("q", "a", replace(plan, use_scores=False), call_llm)).confidence == 60
|
||||||
|
|
||||||
|
summary = build_summary(_rich_snapshot())
|
||||||
|
claims = asyncio.run(engine._extract_claims("q", "a", summary, ["fact"], call_llm))
|
||||||
|
assert claims and claims[0].evidence[0].path == "hottest.cpu.node"
|
||||||
|
assert asyncio.run(engine._extract_claims("q", "", summary, [], call_llm)) == []
|
||||||
|
assert asyncio.run(engine._dedup_reply("one. one. one.", plan, call_llm, "dedup")) == "deduped"
|
||||||
|
assert asyncio.run(engine._dedup_reply("single answer", plan, call_llm, "dedup")) == "single answer"
|
||||||
|
|
||||||
|
engine._store_state("conv-1", claims, summary, _rich_snapshot(), True)
|
||||||
|
state = engine._get_state("conv-1")
|
||||||
|
assert state and state.snapshot
|
||||||
|
assert engine._get_state(None) is None
|
||||||
|
engine._cleanup_state()
|
||||||
|
|
||||||
|
followup = asyncio.run(
|
||||||
|
engine._answer_followup(
|
||||||
|
"Which hardware hotspot is there?",
|
||||||
|
state,
|
||||||
|
summary,
|
||||||
|
{"question_type": "diagnostic"},
|
||||||
|
plan,
|
||||||
|
call_llm,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert "titan-01" in followup
|
||||||
|
assert asyncio.run(engine._select_claims("what about that?", claims, plan, call_llm)) == ["c1"]
|
||||||
|
assert asyncio.run(engine._select_claims("what about that?", [], plan, call_llm)) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_finalize_answer_covers_post_processing_branches(tmp_path: Path) -> None:
|
||||||
|
"""Exercise evidence-fix, runbook, guard, critic, and gap paths."""
|
||||||
|
|
||||||
|
settings = _settings(tmp_path)
|
||||||
|
plan = replace(_mode_plan(settings, "smart"), use_gap=True, use_critic=True)
|
||||||
|
summary = build_summary(_rich_snapshot())
|
||||||
|
summary_lines = [
|
||||||
|
"namespace_cpu_top: synapse=95",
|
||||||
|
"hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)",
|
||||||
|
"runbooks/fix.md",
|
||||||
|
]
|
||||||
|
observed: list[tuple[str, str]] = []
|
||||||
|
|
||||||
|
async def call_llm(_system: str, _prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
|
||||||
|
del _system, context, model
|
||||||
|
responses = {
|
||||||
|
"runbook_select": '{"path":"runbooks/fix.md"}',
|
||||||
|
"evidence_fix": "titan-99 is hottest and see runbooks/wrong.md.",
|
||||||
|
"evidence_fix_enforce": "titan-99 is hottest and see runbooks/wrong.md.",
|
||||||
|
"metric_direct": "no numbers here",
|
||||||
|
"runbook_enforce": "Non-Raspberry Pi nodes: amd64 (titan-02). Use runbooks/fix.md.",
|
||||||
|
"evidence_guard": "Non-Raspberry Pi nodes: amd64 (titan-02). Use runbooks/fix.md.",
|
||||||
|
"focus_fix": "Latest metrics: namespace_cpu_top: synapse=95.",
|
||||||
|
"insight_guard": '{"ok": false, "reason": "needs more detail"}',
|
||||||
|
"insight_fix": "Latest metrics: namespace_cpu_top: synapse=95. Use runbooks/fix.md.",
|
||||||
|
"critic": '{"issues":["too vague"]}',
|
||||||
|
"revise": "Latest metrics: namespace_cpu_top: synapse=95. Use runbooks/fix.md.",
|
||||||
|
"gap": '{"note":"The answer would benefit from per-pod CPU samples."}',
|
||||||
|
}
|
||||||
|
if tag not in responses:
|
||||||
|
raise AssertionError(_prompt)
|
||||||
|
return responses[tag]
|
||||||
|
|
||||||
|
class FinalizeEngine:
|
||||||
|
async def _synthesize_answer(self, *args: Any) -> str:
|
||||||
|
return "titan-99 is hottest and see runbooks/wrong.md."
|
||||||
|
|
||||||
|
async def _dedup_reply(self, reply: str, _plan: ModePlan, _call_llm, tag: str) -> str:
|
||||||
|
assert tag == "dedup"
|
||||||
|
return reply
|
||||||
|
|
||||||
|
async def _score_answer(self, _question: str, _reply: str, _plan: ModePlan, _call_llm) -> AnswerScores:
|
||||||
|
return AnswerScores(80, 81, 82, "low")
|
||||||
|
|
||||||
|
async def _extract_claims(self, _question: str, _reply: str, _summary: dict[str, Any], _facts_used: list[str], _call_llm) -> list[ClaimItem]:
|
||||||
|
return [ClaimItem(id="c1", claim="cpu high", evidence=[EvidenceItem(path="hottest.cpu.node", reason="snapshot")])]
|
||||||
|
|
||||||
|
reply, scores, claims = asyncio.run(
|
||||||
|
finalize_answer(
|
||||||
|
engine=FinalizeEngine(),
|
||||||
|
call_llm=call_llm,
|
||||||
|
normalized="Which namespace is hottest on raspberry hardware and which runbook should I use?",
|
||||||
|
subanswers=["synapse is hottest"],
|
||||||
|
context="ctx",
|
||||||
|
classify={"question_type": "open_ended", "answer_style": "direct"},
|
||||||
|
plan=plan,
|
||||||
|
summary=summary,
|
||||||
|
summary_lines=summary_lines,
|
||||||
|
metric_facts=["namespace_cpu_top: synapse=95"],
|
||||||
|
key_facts=["namespace_cpu_top: synapse=95"],
|
||||||
|
facts_used=["hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"],
|
||||||
|
allowed_nodes=["titan-01", "titan-02"],
|
||||||
|
allowed_namespaces=["synapse"],
|
||||||
|
runbook_paths=["runbooks/fix.md"],
|
||||||
|
lowered_question="which namespace is hottest on raspberry hardware and which runbook should i use?",
|
||||||
|
force_metric=True,
|
||||||
|
keyword_tokens=["namespace", "cpu", "raspberry"],
|
||||||
|
question_tokens=["namespace", "cpu", "raspberry"],
|
||||||
|
snapshot_context="ClusterSnapshot:\nnamespace_cpu_top: synapse=95",
|
||||||
|
observer=lambda stage, note: observed.append((stage, note)),
|
||||||
|
mode="smart",
|
||||||
|
metric_keys=["namespace_cpu_top"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert "runbooks/fix.md" in reply
|
||||||
|
assert "synapse=95" in reply
|
||||||
|
assert scores.confidence == 80
|
||||||
|
assert claims and claims[0].id == "c1"
|
||||||
|
assert ("evidence_fix", "repairing missing evidence") in observed
|
||||||
|
assert ("critic", "reviewing") in observed
|
||||||
|
assert ("gap", "checking gaps") in observed
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_answer_deep_workflow_persists_state(tmp_path: Path) -> None:
|
||||||
|
"""Drive the full smart workflow through retrieval, synthesis, and post-processing."""
|
||||||
|
|
||||||
|
engine = _make_engine(tmp_path, PromptLLM())
|
||||||
|
observed: list[tuple[str, str]] = []
|
||||||
|
result = asyncio.run(
|
||||||
|
run_answer(
|
||||||
|
engine,
|
||||||
|
"Run limitless Which namespace is hottest on raspberry hardware and which runbook should I use?",
|
||||||
|
mode="smart",
|
||||||
|
history=[{"q": "before", "a": "earlier"}],
|
||||||
|
observer=lambda stage, note: observed.append((stage, note)),
|
||||||
|
conversation_id="room-1",
|
||||||
|
snapshot_pin=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert "runbooks/fix.md" in result.reply
|
||||||
|
assert result.meta["tool_hint"]["command"] == "kubectl top pods -n synapse"
|
||||||
|
state = engine._get_state("room-1")
|
||||||
|
assert state and state.claims and state.snapshot
|
||||||
|
stages = {stage for stage, _note in observed}
|
||||||
|
assert {"normalize", "route", "retrieve", "tool", "subanswers", "synthesize"} <= stages
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_answer_followup_and_limits(tmp_path: Path) -> None:
|
||||||
|
"""Cover follow-up routing, reasoning limit, and timeout fallbacks."""
|
||||||
|
|
||||||
|
class FollowupLLM(PromptLLM):
|
||||||
|
def _lookup_response(self, system: str, prompt: str) -> str | None:
|
||||||
|
if "normalized (string), keywords" in prompt:
|
||||||
|
return '{"normalized":"What about that?","keywords":["that"]}'
|
||||||
|
if "needs_snapshot (bool)" in prompt:
|
||||||
|
return '{"needs_snapshot":true,"needs_kb":false,"needs_tool":false,"answer_style":"direct","follow_up":false,"question_type":"open_ended","focus_entity":"unknown","focus_metric":"unknown"}'
|
||||||
|
if "Select the claims most relevant" in prompt:
|
||||||
|
return '{"claim_ids":["c1"]}'
|
||||||
|
if "Follow-up:" in prompt:
|
||||||
|
return "titan-99 is still hottest."
|
||||||
|
return super()._lookup_response(system, prompt)
|
||||||
|
|
||||||
|
engine = _make_engine(tmp_path, FollowupLLM())
|
||||||
|
summary = build_summary(_rich_snapshot())
|
||||||
|
engine._store_state(
|
||||||
|
"conv-1",
|
||||||
|
[ClaimItem(id="c1", claim="synapse is hottest", evidence=[EvidenceItem(path="hottest.cpu.node", reason="snapshot", value_at_claim="titan-01")])],
|
||||||
|
summary,
|
||||||
|
_rich_snapshot(),
|
||||||
|
True,
|
||||||
|
)
|
||||||
|
followup = asyncio.run(
|
||||||
|
run_answer(
|
||||||
|
engine,
|
||||||
|
"Run limitless What about that?",
|
||||||
|
mode="smart",
|
||||||
|
conversation_id="conv-1",
|
||||||
|
snapshot_pin=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert "titan-01" in followup.reply
|
||||||
|
|
||||||
|
limit_engine = _make_engine(
|
||||||
|
tmp_path / "limit",
|
||||||
|
LimitLLM(),
|
||||||
|
fast_llm_calls_max=1,
|
||||||
|
llm_limit_multiplier=1.0,
|
||||||
|
)
|
||||||
|
limited = asyncio.run(run_answer(limit_engine, "tell me about cpu and runbooks", mode="custom"))
|
||||||
|
assert "reasoning limit" in limited.reply
|
||||||
|
assert limited.meta["llm_limit_hit"] is True
|
||||||
|
|
||||||
|
timeout_engine = _make_engine(
|
||||||
|
tmp_path / "timeout",
|
||||||
|
TimeoutLLM(),
|
||||||
|
smart_time_budget_sec=0.1,
|
||||||
|
ollama_timeout_sec=0.1,
|
||||||
|
)
|
||||||
|
timed_out = asyncio.run(run_answer(timeout_engine, "Run limitless tell me about cpu and runbooks", mode="smart"))
|
||||||
|
assert "time budget" in timed_out.reply.lower()
|
||||||
|
assert timed_out.meta["time_budget_hit"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_matrix_queue_main_and_store_edge_paths(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||||
|
"""Exercise remaining API, Matrix, queue, main, and store branches."""
|
||||||
|
|
||||||
|
settings = _settings(
|
||||||
|
tmp_path,
|
||||||
|
internal_token="secret",
|
||||||
|
queue_enabled=True,
|
||||||
|
matrix_bots=(MatrixBotConfig("bot", "pw", ("atlas",), "quick"),),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def handler(
|
||||||
|
question: str,
|
||||||
|
mode: str,
|
||||||
|
history: list[dict[str, str]] | None,
|
||||||
|
conversation_id: str | None,
|
||||||
|
snapshot_pin: bool | None,
|
||||||
|
) -> AnswerResult:
|
||||||
|
del history, conversation_id, snapshot_pin
|
||||||
|
return AnswerResult(question + ":" + mode, AnswerScores(1, 2, 3, "low"), {"mode": mode})
|
||||||
|
|
||||||
|
api = Api(settings, handler)
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
client = TestClient(api.app)
|
||||||
|
assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={}).status_code == 400
|
||||||
|
assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={"content": "hi"}).json()["reply"] == "hi:quick"
|
||||||
|
assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={"question": " "}).status_code == 400
|
||||||
|
assert AnswerRequest(message=" hello ").message == " hello "
|
||||||
|
|
||||||
|
class FakeResp:
|
||||||
|
def __init__(self, payload: dict[str, Any], *, status_code: int = 200) -> None:
|
||||||
|
self._payload = payload
|
||||||
|
self.status_code = status_code
|
||||||
|
|
||||||
|
def raise_for_status(self) -> None:
|
||||||
|
if self.status_code >= 400:
|
||||||
|
raise httpx.HTTPStatusError("bad", request=httpx.Request("GET", "http://x"), response=httpx.Response(self.status_code))
|
||||||
|
|
||||||
|
def json(self) -> dict[str, Any]:
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
class MatrixAsyncClient:
|
||||||
|
async def __aenter__(self) -> "MatrixAsyncClient":
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, *exc: object) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def post(self, url: str, json: dict[str, Any] | None = None, headers: dict[str, str] | None = None) -> FakeResp:
|
||||||
|
del json, headers
|
||||||
|
if "login" in url:
|
||||||
|
return FakeResp({"access_token": "tok"})
|
||||||
|
return FakeResp({})
|
||||||
|
|
||||||
|
async def get(self, url: str, headers: dict[str, str] | None = None, params: dict[str, Any] | None = None) -> FakeResp:
|
||||||
|
del headers, params
|
||||||
|
if "directory/room" in url:
|
||||||
|
return FakeResp({}, status_code=404)
|
||||||
|
return FakeResp({"next_batch": "n1", "rooms": {"join": {}}})
|
||||||
|
|
||||||
|
monkeypatch.setattr("atlasbot.matrix.bot.httpx.AsyncClient", lambda timeout=None: MatrixAsyncClient())
|
||||||
|
matrix_client = MatrixClient(settings, settings.matrix_bots[0])
|
||||||
|
assert asyncio.run(matrix_client.login()) == "tok"
|
||||||
|
assert asyncio.run(matrix_client.resolve_room("tok")) == ""
|
||||||
|
|
||||||
|
bot = MatrixBot(settings, settings.matrix_bots[0], SimpleNamespace(answer=None), handler)
|
||||||
|
|
||||||
|
class BotClient:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.sent: list[str] = []
|
||||||
|
self.sync_calls = 0
|
||||||
|
|
||||||
|
async def login(self) -> str:
|
||||||
|
return "tok"
|
||||||
|
|
||||||
|
async def resolve_room(self, token: str) -> str:
|
||||||
|
del token
|
||||||
|
return "!room"
|
||||||
|
|
||||||
|
async def join_room(self, token: str, room_id: str) -> None:
|
||||||
|
del token, room_id
|
||||||
|
|
||||||
|
async def send_message(self, token: str, room_id: str, text: str) -> None:
|
||||||
|
del token, room_id
|
||||||
|
self.sent.append(text)
|
||||||
|
|
||||||
|
async def sync(self, token: str, since: str | None) -> dict[str, Any]:
|
||||||
|
del token, since
|
||||||
|
self.sync_calls += 1
|
||||||
|
if self.sync_calls == 1:
|
||||||
|
return {
|
||||||
|
"next_batch": "n1",
|
||||||
|
"rooms": {
|
||||||
|
"join": {
|
||||||
|
"!room": {
|
||||||
|
"timeline": {
|
||||||
|
"events": [
|
||||||
|
{"type": "m.room.member", "sender": "user"},
|
||||||
|
{"type": "m.room.message", "sender": "bot", "content": {"body": "ignore"}},
|
||||||
|
{"type": "m.room.message", "sender": "user", "content": {"body": "atlas quick hi"}},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
raise RuntimeError("stop")
|
||||||
|
|
||||||
|
bot._client = BotClient()
|
||||||
|
async def run_bot_once() -> None:
|
||||||
|
task = asyncio.create_task(bot.run())
|
||||||
|
await asyncio.sleep(0.01)
|
||||||
|
task.cancel()
|
||||||
|
with pytest.raises(asyncio.CancelledError):
|
||||||
|
await task
|
||||||
|
|
||||||
|
asyncio.run(run_bot_once())
|
||||||
|
assert any("Thinking" in msg for msg in bot._client.sent)
|
||||||
|
|
||||||
|
timeout_bot = MatrixBot(replace(settings, thinking_interval_sec=0.001, quick_time_budget_sec=0.01), settings.matrix_bots[0], SimpleNamespace(answer=None), None)
|
||||||
|
timeout_bot._client = SimpleNamespace(
|
||||||
|
sent=[],
|
||||||
|
send_message=lambda token, room_id, text: asyncio.sleep(0, result=timeout_bot._client.sent.append(text)),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def sleepy_handler(question: str, mode: str, history, conversation_id, observer):
|
||||||
|
del question, mode, history, conversation_id, observer
|
||||||
|
await asyncio.sleep(1.2)
|
||||||
|
return AnswerResult("late", AnswerScores(1, 2, 3, "low"), {})
|
||||||
|
|
||||||
|
timeout_bot._answer_handler = sleepy_handler
|
||||||
|
asyncio.run(timeout_bot._answer_with_heartbeat("tok", "!room", "q", "quick"))
|
||||||
|
assert any("time budget" in msg for msg in timeout_bot._client.sent)
|
||||||
|
|
||||||
|
error_bot = MatrixBot(replace(settings, thinking_interval_sec=0.001), settings.matrix_bots[0], SimpleNamespace(answer=None), None)
|
||||||
|
error_bot._client = SimpleNamespace(
|
||||||
|
sent=[],
|
||||||
|
send_message=lambda token, room_id, text: asyncio.sleep(0, result=error_bot._client.sent.append(text)),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def failing_handler(question: str, mode: str, history, conversation_id, observer):
|
||||||
|
del question, mode, history, conversation_id, observer
|
||||||
|
raise RuntimeError("boom")
|
||||||
|
|
||||||
|
error_bot._answer_handler = failing_handler
|
||||||
|
asyncio.run(error_bot._answer_with_heartbeat("tok", "!room", "q", "smart"))
|
||||||
|
assert any("internal error" in msg for msg in error_bot._client.sent)
|
||||||
|
|
||||||
|
class DirectQueue:
|
||||||
|
async def __call__(self, payload: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
return {"reply": payload["question"]}
|
||||||
|
|
||||||
|
direct_qm = QueueManager(replace(settings, queue_enabled=False), DirectQueue())
|
||||||
|
assert asyncio.run(direct_qm.submit({"question": "direct"})) == {"reply": "direct"}
|
||||||
|
|
||||||
|
class FakeSub:
|
||||||
|
async def next_msg(self, timeout: float) -> Any:
|
||||||
|
del timeout
|
||||||
|
return SimpleNamespace(data=json.dumps({"reply": "queued"}).encode())
|
||||||
|
|
||||||
|
async def unsubscribe(self) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
class FakeMsg:
|
||||||
|
def __init__(self, raw: bytes, reply: str = "reply") -> None:
|
||||||
|
self.data = raw
|
||||||
|
self.reply = reply
|
||||||
|
self.acked = False
|
||||||
|
|
||||||
|
async def ack(self) -> None:
|
||||||
|
self.acked = True
|
||||||
|
|
||||||
|
published: list[tuple[str, bytes]] = []
|
||||||
|
|
||||||
|
class ExistingStreamJS:
|
||||||
|
async def stream_info(self, stream: str) -> None:
|
||||||
|
assert stream == settings.nats_stream
|
||||||
|
|
||||||
|
async def publish(self, subject: str, data: bytes) -> None:
|
||||||
|
published.append((subject, data))
|
||||||
|
|
||||||
|
async def pull_subscribe(self, subject: str, durable: str):
|
||||||
|
del subject, durable
|
||||||
|
|
||||||
|
class Pull:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.calls = 0
|
||||||
|
|
||||||
|
async def fetch(self, count: int, timeout: float) -> list[FakeMsg]:
|
||||||
|
del count, timeout
|
||||||
|
self.calls += 1
|
||||||
|
if self.calls == 1:
|
||||||
|
raise RuntimeError("retry")
|
||||||
|
raise asyncio.CancelledError
|
||||||
|
|
||||||
|
return Pull()
|
||||||
|
|
||||||
|
class FakeNats:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.drained = False
|
||||||
|
|
||||||
|
async def connect(self, url: str) -> None:
|
||||||
|
assert url == settings.nats_url
|
||||||
|
|
||||||
|
def jetstream(self) -> ExistingStreamJS:
|
||||||
|
return ExistingStreamJS()
|
||||||
|
|
||||||
|
def new_inbox(self) -> str:
|
||||||
|
return "inbox"
|
||||||
|
|
||||||
|
async def subscribe(self, reply: str) -> FakeSub:
|
||||||
|
assert reply == "inbox"
|
||||||
|
return FakeSub()
|
||||||
|
|
||||||
|
async def publish(self, reply: str, data: bytes) -> None:
|
||||||
|
published.append((reply, data))
|
||||||
|
|
||||||
|
async def drain(self) -> None:
|
||||||
|
self.drained = True
|
||||||
|
|
||||||
|
monkeypatch.setattr("atlasbot.queue.nats.NATS", FakeNats)
|
||||||
|
queue = QueueManager(settings, DirectQueue())
|
||||||
|
asyncio.run(queue.start())
|
||||||
|
assert asyncio.run(queue.submit({"question": "queued", "mode": "smart"})) == {"reply": "queued"}
|
||||||
|
|
||||||
|
invalid_msg = FakeMsg(b"not-json")
|
||||||
|
asyncio.run(queue._handle_message(invalid_msg))
|
||||||
|
assert invalid_msg.acked is True
|
||||||
|
handled_msg = FakeMsg(json.dumps({"payload": {"question": "x"}, "reply": "reply"}).encode())
|
||||||
|
asyncio.run(queue._handle_message(handled_msg))
|
||||||
|
assert handled_msg.acked is True
|
||||||
|
failing_queue = QueueManager(settings, lambda payload: (_ for _ in ()).throw(RuntimeError("boom")))
|
||||||
|
failing_queue._nc = FakeNats()
|
||||||
|
failing_queue._js = ExistingStreamJS()
|
||||||
|
failure_msg = FakeMsg(json.dumps({"payload": {"question": "x"}}).encode())
|
||||||
|
|
||||||
|
async def failing_handler(payload: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
del payload
|
||||||
|
raise RuntimeError("boom")
|
||||||
|
|
||||||
|
failing_queue._handler = failing_handler
|
||||||
|
asyncio.run(failing_queue._handle_message(failure_msg))
|
||||||
|
assert failure_msg.acked is True
|
||||||
|
asyncio.run(queue.stop())
|
||||||
|
|
||||||
|
assert result_scores({"scores": {"confidence": "9", "relevance": "8", "satisfaction": "7", "hallucination_risk": "low"}}).confidence == 9
|
||||||
|
assert result_scores({"scores": "bad"}).confidence == 60
|
||||||
|
|
||||||
|
|
||||||
|
def test_kb_llm_snapshot_and_json_edge_paths(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||||
|
"""Cover remaining KB, LLM, snapshot, and JSON parsing branches."""
|
||||||
|
|
||||||
|
base = tmp_path / "kb"
|
||||||
|
catalog = base / "catalog"
|
||||||
|
catalog.mkdir(parents=True)
|
||||||
|
(catalog / "atlas.json").write_text(json.dumps({"cluster": "atlas", "sources": ["bad"]}), encoding="utf-8")
|
||||||
|
(catalog / "runbooks.json").write_text(json.dumps([{"title": "Fix", "path": "runbooks/fix.md"}, {"title": "No path"}]), encoding="utf-8")
|
||||||
|
(base / "docs.md").write_text("x" * 120, encoding="utf-8")
|
||||||
|
kb = KnowledgeBase(str(base))
|
||||||
|
assert kb.runbook_titles(limit=1).count("runbooks/fix.md") == 1
|
||||||
|
assert kb.chunk_lines(max_files=1, max_chars=60)
|
||||||
|
assert kb._extend_with_limit([], ["abcdef"], 3) is False
|
||||||
|
|
||||||
|
empty_kb = KnowledgeBase("")
|
||||||
|
assert empty_kb.chunk_lines() == []
|
||||||
|
|
||||||
|
settings = _settings(tmp_path, ollama_url="http://example/api/chat", ollama_api_key="secret", ollama_retries=0, ollama_fallback_model="")
|
||||||
|
client = LLMClient(settings)
|
||||||
|
assert client._endpoint() == "http://example/api/chat"
|
||||||
|
assert client._headers["x-api-key"] == "secret"
|
||||||
|
assert parse_json("```{\"ok\": true}```") == {"ok": True}
|
||||||
|
assert parse_json("not-json", fallback={"fallback": True}) == {"fallback": True}
|
||||||
|
|
||||||
|
class FakeResponse:
|
||||||
|
def __init__(self, status_code: int, payload: Any) -> None:
|
||||||
|
self.status_code = status_code
|
||||||
|
self._payload = payload
|
||||||
|
|
||||||
|
def raise_for_status(self) -> None:
|
||||||
|
if self.status_code >= 400:
|
||||||
|
raise httpx.HTTPStatusError("bad", request=httpx.Request("POST", "http://example"), response=httpx.Response(self.status_code))
|
||||||
|
|
||||||
|
def json(self) -> Any:
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
responses = iter([FakeResponse(200, {"response": "plain"}), FakeResponse(200, {"reply": "fallback"}), FakeResponse(200, {"message": {}})])
|
||||||
|
|
||||||
|
class FakeAsyncClient:
|
||||||
|
def __init__(self, timeout: float | None = None) -> None:
|
||||||
|
self.timeout = timeout
|
||||||
|
|
||||||
|
async def __aenter__(self) -> "FakeAsyncClient":
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, *exc: object) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def post(self, _url: str, *, json: dict[str, Any], headers: dict[str, str]) -> FakeResponse:
|
||||||
|
del _url, json, headers
|
||||||
|
item = next(responses)
|
||||||
|
if isinstance(item, Exception):
|
||||||
|
raise item
|
||||||
|
return item
|
||||||
|
|
||||||
|
monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
|
||||||
|
assert asyncio.run(client.chat([{"role": "user", "content": "a"}], timeout_sec=1.0)) == "plain"
|
||||||
|
assert asyncio.run(client.chat([{"role": "user", "content": "b"}], timeout_sec=1.0)) == "fallback"
|
||||||
|
with pytest.raises(LLMError, match="empty response"):
|
||||||
|
asyncio.run(client.chat([{"role": "user", "content": "c"}], timeout_sec=1.0))
|
||||||
|
error_settings = replace(settings, ollama_retries=1)
|
||||||
|
error_client = LLMClient(error_settings)
|
||||||
|
error_responses = iter([httpx.ConnectError("nope"), httpx.ConnectError("still nope")])
|
||||||
|
|
||||||
|
class ErrorAsyncClient(FakeAsyncClient):
|
||||||
|
async def post(self, _url: str, *, json: dict[str, Any], headers: dict[str, str]) -> FakeResponse:
|
||||||
|
del _url, json, headers
|
||||||
|
raise next(error_responses)
|
||||||
|
|
||||||
|
monkeypatch.setattr(httpx, "AsyncClient", ErrorAsyncClient)
|
||||||
|
with pytest.raises(LLMError):
|
||||||
|
asyncio.run(error_client.chat([{"role": "user", "content": "d"}], timeout_sec=1.0))
|
||||||
|
|
||||||
|
provider = SnapshotProvider(replace(settings, ariadne_state_url="http://snapshot", ariadne_state_token="tok"))
|
||||||
|
|
||||||
|
class SnapshotResp:
|
||||||
|
def raise_for_status(self) -> None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def json(self) -> dict[str, Any]:
|
||||||
|
return {"snapshot_id": "snap-1"}
|
||||||
|
|
||||||
|
monkeypatch.setattr("atlasbot.snapshot.builder.httpx.get", lambda url, headers, timeout: SnapshotResp())
|
||||||
|
assert provider.get() == {"snapshot_id": "snap-1"}
|
||||||
|
provider._cache = {"snapshot_id": "cached"}
|
||||||
|
provider._cache_ts = 10_000.0
|
||||||
|
monkeypatch.setattr("atlasbot.snapshot.builder.time.monotonic", lambda: 10_001.0)
|
||||||
|
assert provider.get() == {"snapshot_id": "cached"}
|
||||||
1749
tests/test_split_helper_coverage.py
Normal file
1749
tests/test_split_helper_coverage.py
Normal file
File diff suppressed because it is too large
Load Diff
1424
tests/test_support_modules.py
Normal file
1424
tests/test_support_modules.py
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user