Compare commits
42 Commits
codex/atla
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cf1e311e14 | ||
|
|
48c639e651 | ||
|
|
7908019fc7 | ||
|
|
f95f938cf1 | ||
|
|
c2dbeadc96 | ||
|
|
4d9c205ff2 | ||
|
|
7734b93180 | ||
|
|
0b10dcd897 | ||
|
|
51b9fd20e9 | ||
|
|
017cce884e | ||
|
|
111038a571 | ||
|
|
a1db462420 | ||
|
|
3944e7f0d8 | ||
|
|
8fa49bb34b | ||
|
|
6f955ad5a3 | ||
|
|
cd8d5397cb | ||
|
|
130fdc7eea | ||
|
|
ed2ebf266d | ||
|
|
3ba75ee234 | ||
|
|
603d2dcec5 | ||
|
|
ecd768818a | ||
|
|
dd077b0f92 | ||
|
|
b7543d7e57 | ||
|
|
f07373247a | ||
|
|
d750f21e80 | ||
|
|
b9970d3847 | ||
|
|
9e15badcb1 | ||
|
|
6ecf531bac | ||
|
|
4fa280ca8d | ||
|
|
8f3d3c2550 | ||
|
|
4f8c77e7b5 | ||
|
|
abcb38a3f7 | ||
|
|
2f42c176ba | ||
|
|
882d3a5986 | ||
|
|
6a5f9fb1ed | ||
| 5ae40c2116 | |||
| 12b3e14512 | |||
| 7f0d27288f | |||
| 6218c4593d | |||
| 5d7679f183 | |||
| e4014aba1c | |||
| 1713dd07c7 |
17
Dockerfile
17
Dockerfile
@ -1,19 +1,28 @@
|
||||
FROM python:3.12-slim AS base
|
||||
FROM registry.bstein.dev/bstein/python:3.12-slim AS base
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||
PIP_DEFAULT_TIMEOUT=60
|
||||
|
||||
WORKDIR /app
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
COPY requirements-dev.txt /app/requirements-dev.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements.txt -r /app/requirements-dev.txt
|
||||
COPY pyproject.toml /app/pyproject.toml
|
||||
RUN pip install --no-cache-dir --retries 10 -r /app/requirements.txt
|
||||
|
||||
COPY atlasbot /app/atlasbot
|
||||
RUN addgroup --system atlasbot && \
|
||||
adduser --system --ingroup atlasbot --home /app atlasbot && \
|
||||
chown -R atlasbot:atlasbot /app
|
||||
|
||||
FROM base AS test
|
||||
COPY requirements-dev.txt /app/requirements-dev.txt
|
||||
RUN pip install --no-cache-dir --retries 10 -r /app/requirements-dev.txt
|
||||
COPY testing /app/testing
|
||||
COPY tests /app/tests
|
||||
COPY scripts /app/scripts
|
||||
|
||||
FROM base AS runtime
|
||||
EXPOSE 8090
|
||||
USER atlasbot
|
||||
CMD ["python", "-m", "atlasbot.main"]
|
||||
|
||||
@ -6,4 +6,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
WORKDIR /app
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
COPY requirements-dev.txt /app/requirements-dev.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements.txt -r /app/requirements-dev.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements.txt -r /app/requirements-dev.txt && \
|
||||
addgroup --system atlasbot && \
|
||||
adduser --system --ingroup atlasbot --home /app atlasbot && \
|
||||
chown -R atlasbot:atlasbot /app
|
||||
|
||||
USER atlasbot
|
||||
|
||||
227
Jenkinsfile
vendored
227
Jenkinsfile
vendored
@ -11,7 +11,7 @@ spec:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: dind
|
||||
image: docker:27-dind
|
||||
image: registry.bstein.dev/bstein/docker:27-dind
|
||||
securityContext:
|
||||
privileged: true
|
||||
env:
|
||||
@ -21,13 +21,14 @@ spec:
|
||||
- "--mtu=1400"
|
||||
- "--host=unix:///var/run/docker.sock"
|
||||
- "--host=tcp://0.0.0.0:2375"
|
||||
- "--tls=false"
|
||||
volumeMounts:
|
||||
- name: dind-storage
|
||||
mountPath: /var/lib/docker
|
||||
- name: workspace-volume
|
||||
mountPath: /home/jenkins/agent
|
||||
- name: builder
|
||||
image: docker:27
|
||||
image: registry.bstein.dev/bstein/docker:27
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
@ -44,7 +45,15 @@ spec:
|
||||
- name: harbor-config
|
||||
mountPath: /docker-config
|
||||
- name: tester
|
||||
image: python:3.12-slim
|
||||
image: registry.bstein.dev/bstein/python:3.12-slim
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
volumeMounts:
|
||||
- name: workspace-volume
|
||||
mountPath: /home/jenkins/agent
|
||||
- name: quality-tools
|
||||
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
@ -72,6 +81,15 @@ spec:
|
||||
PYTHONUNBUFFERED = '1'
|
||||
SUITE_NAME = 'atlasbot'
|
||||
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
|
||||
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
|
||||
SONARQUBE_PROJECT_KEY = 'atlasbot'
|
||||
SONARQUBE_TOKEN = credentials('sonarqube-token')
|
||||
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
|
||||
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
|
||||
}
|
||||
options {
|
||||
disableConcurrentBuilds()
|
||||
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
@ -79,6 +97,120 @@ spec:
|
||||
checkout scm
|
||||
}
|
||||
}
|
||||
stage('Collect SonarQube evidence') {
|
||||
steps {
|
||||
container('quality-tools') {
|
||||
sh '''#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
args=(
|
||||
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
|
||||
"-Dsonar.login=${SONARQUBE_TOKEN}"
|
||||
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
|
||||
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
|
||||
"-Dsonar.sources=."
|
||||
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**"
|
||||
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
|
||||
)
|
||||
[ -f build/coverage.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage.xml")
|
||||
set +e
|
||||
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
|
||||
rc=${PIPESTATUS[0]}
|
||||
set -e
|
||||
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
|
||||
'''
|
||||
}
|
||||
container('tester') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
python3 - <<'PY'
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
|
||||
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
|
||||
token = os.getenv('SONARQUBE_TOKEN', '').strip()
|
||||
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
|
||||
payload = {"status": "ERROR", "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY"}
|
||||
if host and project_key:
|
||||
query = urllib.parse.urlencode({"projectKey": project_key})
|
||||
request = urllib.request.Request(f"{host}/api/qualitygates/project_status?{query}", method="GET")
|
||||
if token:
|
||||
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
|
||||
request.add_header("Authorization", f"Basic {encoded}")
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=12) as response:
|
||||
payload = json.loads(response.read().decode("utf-8"))
|
||||
except Exception as exc: # noqa: BLE001
|
||||
payload = {"status": "ERROR", "error": str(exc)}
|
||||
with open(report_path, "w", encoding="utf-8") as handle:
|
||||
json.dump(payload, handle, indent=2, sort_keys=True)
|
||||
handle.write("\\n")
|
||||
PY
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Collect Supply Chain evidence') {
|
||||
steps {
|
||||
container('quality-tools') {
|
||||
sh '''#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
set +e
|
||||
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
|
||||
trivy_rc=$?
|
||||
set -e
|
||||
if [ ! -s build/trivy-fs.json ]; then
|
||||
cat > build/ironbank-compliance.json <<EOF
|
||||
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
|
||||
EOF
|
||||
exit 0
|
||||
fi
|
||||
critical="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="CRITICAL")] | length' build/trivy-fs.json)"
|
||||
high="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="HIGH")] | length' build/trivy-fs.json)"
|
||||
secrets="$(jq '[.Results[]? | .Secrets[]?] | length' build/trivy-fs.json)"
|
||||
misconfigs="$(jq '[.Results[]? | .Misconfigurations[]? | select(.Status=="FAIL" and (.Severity=="CRITICAL" or .Severity=="HIGH"))] | length' build/trivy-fs.json)"
|
||||
status=ok
|
||||
compliant=true
|
||||
if [ "${critical}" -gt 0 ] || [ "${secrets}" -gt 0 ] || [ "${misconfigs}" -gt 0 ]; then
|
||||
status=failed
|
||||
compliant=false
|
||||
fi
|
||||
jq -n --arg status "${status}" --argjson compliant "${compliant}" --argjson critical "${critical}" --argjson high "${high}" --argjson secrets "${secrets}" --argjson misconfigs "${misconfigs}" --argjson trivy_rc "${trivy_rc}" \
|
||||
'{status:$status, compliant:$compliant, category:"artifact_security", scan_type:"filesystem", scanner:"trivy", critical_vulnerabilities:$critical, high_vulnerabilities:$high, secrets:$secrets, high_or_critical_misconfigurations:$misconfigs, trivy_rc:$trivy_rc, high_vulnerability_policy:"observe"}' > build/ironbank-compliance.json
|
||||
'''
|
||||
}
|
||||
container('tester') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
python3 - <<'PY'
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
|
||||
if report_path.exists():
|
||||
raise SystemExit(0)
|
||||
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
|
||||
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
|
||||
payload = {"status": status or "unknown", "compliant": compliant in {"1", "true", "yes", "on"} if compliant else None}
|
||||
payload = {k: v for k, v in payload.items() if v is not None}
|
||||
if "status" not in payload:
|
||||
payload["status"] = "unknown"
|
||||
payload["note"] = "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT or write build/ironbank-compliance.json in image-building repos."
|
||||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
|
||||
PY
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Prep toolchain') {
|
||||
steps {
|
||||
container('builder') {
|
||||
@ -113,6 +245,7 @@ spec:
|
||||
container('builder') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
ready=0
|
||||
for _ in $(seq 1 10); do
|
||||
if docker info >/dev/null 2>&1; then
|
||||
@ -124,40 +257,91 @@ spec:
|
||||
if [ "${ready}" -ne 1 ]; then
|
||||
echo "docker daemon did not become ready on ${DOCKER_HOST}" >&2
|
||||
docker version || true
|
||||
exit 1
|
||||
printf '%s\n' 1 > build/buildx.rc
|
||||
exit 0
|
||||
fi
|
||||
BUILDER_NAME="atlasbot-${BUILD_NUMBER}"
|
||||
docker buildx rm "${BUILDER_NAME}" >/dev/null 2>&1 || true
|
||||
docker buildx create --name "${BUILDER_NAME}" --driver docker-container --bootstrap --use
|
||||
rc=1
|
||||
for attempt in 1 2 3; do
|
||||
if docker buildx create --name "${BUILDER_NAME}" --driver docker-container --driver-opt image=registry.bstein.dev/bstein/buildkit:buildx-stable-1 --bootstrap --use; then
|
||||
rc=0
|
||||
break
|
||||
fi
|
||||
docker buildx rm "${BUILDER_NAME}" >/dev/null 2>&1 || true
|
||||
sleep $((attempt * 10))
|
||||
done
|
||||
printf '%s\n' "${rc}" > build/buildx.rc
|
||||
if [ "${rc}" -ne 0 ]; then
|
||||
echo "docker buildx bootstrap failed after retries; quality metrics will record the setup failure" >&2
|
||||
fi
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Unit tests') {
|
||||
stage('Run quality gate') {
|
||||
steps {
|
||||
container('builder') {
|
||||
container('tester') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
docker buildx build --platform linux/arm64 --target test --load -t atlasbot-test .
|
||||
docker run --rm -v "$PWD/build:/out" atlasbot-test \
|
||||
python -m ruff check atlasbot --select E9,F63,F7,F82
|
||||
docker run --rm -v "$PWD/build:/out" atlasbot-test \
|
||||
python -m slipcover --json --out /out/coverage.json --source atlasbot --fail-under 90 \
|
||||
-m pytest -q --junitxml /out/junit.xml
|
||||
python3 -m pip install --no-cache-dir -r requirements.txt -r requirements-dev.txt
|
||||
set +e
|
||||
docs_rc=1
|
||||
loc_rc=1
|
||||
tests_rc=1
|
||||
coverage_contract_rc=1
|
||||
gate_rc=1
|
||||
python -m ruff check atlasbot scripts --select E,F,W,B,C90,I,RUF,ARG --ignore E501
|
||||
ruff_rc=$?
|
||||
if [ "${ruff_rc}" -eq 0 ]; then
|
||||
python scripts/check_docstrings.py --root atlasbot
|
||||
docs_rc=$?
|
||||
else
|
||||
docs_rc=${ruff_rc}
|
||||
fi
|
||||
python scripts/check_file_sizes.py --root atlasbot --max-lines 500
|
||||
loc_rc=$?
|
||||
python -m slipcover --json --out build/coverage.json --source atlasbot --fail-under 95 \
|
||||
-m pytest -q --junitxml build/junit.xml
|
||||
tests_rc=$?
|
||||
python scripts/check_coverage.py build/coverage.json --root atlasbot --threshold 95
|
||||
coverage_contract_rc=$?
|
||||
printf '%s\n' "${docs_rc}" > build/docs-naming.rc
|
||||
gate_rc=0
|
||||
[ "${docs_rc}" -eq 0 ] || gate_rc=1
|
||||
[ "${loc_rc}" -eq 0 ] || gate_rc=1
|
||||
[ "${tests_rc}" -eq 0 ] || gate_rc=1
|
||||
[ "${coverage_contract_rc}" -eq 0 ] || gate_rc=1
|
||||
set -e
|
||||
printf '%s\n' "${gate_rc}" > build/quality-gate.rc
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Publish test metrics') {
|
||||
steps {
|
||||
container('builder') {
|
||||
container('tester') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
docker run --rm -v "$PWD/build:/out" \
|
||||
-e JUNIT_PATH=/out/junit.xml \
|
||||
-e COVERAGE_PATH=/out/coverage.json \
|
||||
atlasbot-test python scripts/publish_test_metrics.py
|
||||
export JUNIT_PATH='build/junit.xml'
|
||||
export COVERAGE_PATH='build/coverage.json'
|
||||
export SOURCE_ROOT='atlasbot'
|
||||
export QUALITY_GATE_RC_PATH='build/quality-gate.rc'
|
||||
export QUALITY_GATE_DOCS_RC_PATH='build/docs-naming.rc'
|
||||
python scripts/publish_test_metrics.py || true
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Enforce quality gate') {
|
||||
steps {
|
||||
container('tester') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
|
||||
'''
|
||||
}
|
||||
}
|
||||
@ -167,6 +351,7 @@ spec:
|
||||
container('builder') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
test "$(cat build/buildx.rc 2>/dev/null || echo 1)" -eq 0
|
||||
VERSION_TAG=$(cut -d= -f2 build.env)
|
||||
docker buildx build --platform linux/arm64 \
|
||||
--target runtime \
|
||||
@ -182,11 +367,11 @@ spec:
|
||||
always {
|
||||
script {
|
||||
if (fileExists('build.env')) {
|
||||
def env = readProperties file: 'build.env'
|
||||
echo "Build complete for ${env.SEMVER}"
|
||||
def envFile = readProperties file: 'build.env'
|
||||
echo "Build complete for ${envFile.SEMVER}"
|
||||
}
|
||||
}
|
||||
archiveArtifacts artifacts: 'build/*', allowEmptyArchive: true
|
||||
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI, Header, HTTPException
|
||||
from pydantic import BaseModel
|
||||
@ -29,6 +28,16 @@ class AnswerResponse(BaseModel):
|
||||
|
||||
|
||||
class Api:
|
||||
"""Expose the answer API and enforce the shared internal token.
|
||||
|
||||
Input:
|
||||
- `settings`: runtime configuration, including the optional internal token;
|
||||
- `answer_handler`: async adapter that answers a normalized question.
|
||||
|
||||
Output:
|
||||
- registers the HTTP routes on `self.app`.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
settings: Settings,
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
def _env_bool(name: str, default: str = "false") -> bool:
|
||||
value = os.getenv(name, default).strip().lower()
|
||||
return value in {"1", "true", "yes", "y", "on"}
|
||||
@ -121,6 +122,12 @@ def _load_matrix_bots(bot_mentions: tuple[str, ...]) -> tuple[MatrixBotConfig, .
|
||||
|
||||
|
||||
def load_settings() -> Settings:
|
||||
"""Load process settings from environment variables.
|
||||
|
||||
Output:
|
||||
- a fully populated `Settings` instance with defaults for missing values.
|
||||
"""
|
||||
|
||||
bot_mentions = tuple(
|
||||
[
|
||||
item.strip()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
12
atlasbot/engine/answerer/__init__.py
Normal file
12
atlasbot/engine/answerer/__init__.py
Normal file
@ -0,0 +1,12 @@
|
||||
"""Answer engine package."""
|
||||
|
||||
from ._base import *
|
||||
from .common import *
|
||||
from .engine import *
|
||||
from .factsheet import *
|
||||
from .post import *
|
||||
from .post_ext import *
|
||||
from .retrieval import *
|
||||
from .retrieval_ext import *
|
||||
from .spine import *
|
||||
from .workflow import *
|
||||
116
atlasbot/engine/answerer/_base.py
Normal file
116
atlasbot/engine/answerer/_base.py
Normal file
@ -0,0 +1,116 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections.abc import Awaitable, Callable
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
FOLLOWUP_SHORT_WORDS = 6
|
||||
TOKEN_MIN_LEN = 3
|
||||
GENERIC_METRIC_TOKENS = {"atlas", "cluster", "kubernetes", "k8s", "titan", "lab"}
|
||||
NS_ENTRY_MIN_LEN = 2
|
||||
DEDUP_MIN_SENTENCES = 3
|
||||
RUNBOOK_SIMILARITY_THRESHOLD = 0.4
|
||||
BYTES_KB = 1024
|
||||
BYTES_MB = 1024 * 1024
|
||||
|
||||
|
||||
class LLMLimitReached(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class LLMTimeBudgetExceeded(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnswerScores:
|
||||
confidence: int
|
||||
relevance: int
|
||||
satisfaction: int
|
||||
hallucination_risk: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnswerResult:
|
||||
reply: str
|
||||
scores: AnswerScores
|
||||
meta: dict[str, Any]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InsightGuardInput:
|
||||
question: str
|
||||
reply: str
|
||||
classify: dict[str, Any]
|
||||
context: str
|
||||
plan: ModePlan
|
||||
call_llm: Callable[..., Awaitable[str]]
|
||||
facts: list[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContradictionContext:
|
||||
call_llm: Callable[..., Awaitable[str]]
|
||||
question: str
|
||||
reply: str
|
||||
facts: list[str]
|
||||
plan: ModePlan
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvidenceItem:
|
||||
path: str
|
||||
reason: str
|
||||
value: Any | None = None
|
||||
value_at_claim: Any | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClaimItem:
|
||||
id: str
|
||||
claim: str
|
||||
evidence: list[EvidenceItem]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConversationState:
|
||||
updated_at: float
|
||||
claims: list[ClaimItem]
|
||||
snapshot_id: str | None = None
|
||||
snapshot: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModePlan:
|
||||
model: str
|
||||
fast_model: str
|
||||
max_subquestions: int
|
||||
chunk_lines: int
|
||||
chunk_top: int
|
||||
chunk_group: int
|
||||
kb_max_chars: int
|
||||
kb_max_files: int
|
||||
use_raw_snapshot: bool
|
||||
parallelism: int
|
||||
score_retries: int
|
||||
use_deep_retrieval: bool
|
||||
use_tool: bool
|
||||
use_critic: bool
|
||||
use_gap: bool
|
||||
use_scores: bool
|
||||
drafts: int
|
||||
metric_retries: int
|
||||
subanswer_retries: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScoreContext:
|
||||
question: str
|
||||
sub_questions: list[str]
|
||||
retries: int
|
||||
parallelism: int
|
||||
select_best: bool
|
||||
fast_model: str
|
||||
395
atlasbot/engine/answerer/common.py
Normal file
395
atlasbot/engine/answerer/common.py
Normal file
@ -0,0 +1,395 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any
|
||||
|
||||
from atlasbot.config import Settings
|
||||
from atlasbot.llm import prompts
|
||||
from atlasbot.llm.client import parse_json
|
||||
|
||||
from ._base import *
|
||||
from .factsheet import *
|
||||
from .post import *
|
||||
from .post_ext import *
|
||||
from .retrieval import _gather_limited
|
||||
from .retrieval_ext import *
|
||||
from .spine import *
|
||||
|
||||
|
||||
def _strip_followup_meta(reply: str) -> str:
|
||||
cleaned = reply.strip()
|
||||
if not cleaned:
|
||||
return cleaned
|
||||
prefixes = [
|
||||
"The draft is correct based on the provided context.",
|
||||
"The draft is correct based on the context.",
|
||||
"The draft is correct based on the provided evidence.",
|
||||
"The draft is correct.",
|
||||
"Based on the provided context,",
|
||||
"Based on the context,",
|
||||
"Based on the provided evidence,",
|
||||
]
|
||||
for prefix in prefixes:
|
||||
if cleaned.lower().startswith(prefix.lower()):
|
||||
cleaned = cleaned[len(prefix) :].lstrip(" .")
|
||||
break
|
||||
return cleaned
|
||||
|
||||
|
||||
def _build_meta(mode: str, call_count: int, call_cap: int, limit_hit: bool, time_budget_hit: bool, time_budget_sec: float, classify: dict[str, Any], tool_hint: dict[str, Any] | None, started: float) -> dict[str, Any]:
|
||||
return {
|
||||
"mode": mode,
|
||||
"llm_calls": call_count,
|
||||
"llm_limit": call_cap,
|
||||
"llm_limit_hit": limit_hit,
|
||||
"time_budget_sec": time_budget_sec,
|
||||
"time_budget_hit": time_budget_hit,
|
||||
"classify": classify,
|
||||
"tool_hint": tool_hint,
|
||||
"elapsed_sec": round(time.monotonic() - started, 2),
|
||||
}
|
||||
|
||||
|
||||
def _debug_pipeline_log(settings: Settings, name: str, payload: Any) -> None:
|
||||
"""Write a structured debug event when pipeline tracing is enabled."""
|
||||
|
||||
if not settings.debug_pipeline:
|
||||
return
|
||||
log.info("atlasbot_debug", extra={"extra": {"name": name, "payload": payload}})
|
||||
|
||||
|
||||
def _mode_plan(settings: Settings, mode: str) -> ModePlan:
|
||||
if mode == "genius":
|
||||
return ModePlan(
|
||||
model=settings.ollama_model_genius,
|
||||
fast_model=settings.ollama_model_fast,
|
||||
max_subquestions=6,
|
||||
chunk_lines=6,
|
||||
chunk_top=10,
|
||||
chunk_group=4,
|
||||
kb_max_chars=200000,
|
||||
kb_max_files=200,
|
||||
use_raw_snapshot=True,
|
||||
parallelism=4,
|
||||
score_retries=3,
|
||||
use_deep_retrieval=True,
|
||||
use_tool=True,
|
||||
use_critic=True,
|
||||
use_gap=True,
|
||||
use_scores=True,
|
||||
drafts=2,
|
||||
metric_retries=3,
|
||||
subanswer_retries=3,
|
||||
)
|
||||
if mode == "smart":
|
||||
return ModePlan(
|
||||
model=settings.ollama_model_smart,
|
||||
fast_model=settings.ollama_model_fast,
|
||||
max_subquestions=4,
|
||||
chunk_lines=8,
|
||||
chunk_top=8,
|
||||
chunk_group=4,
|
||||
kb_max_chars=3000,
|
||||
kb_max_files=12,
|
||||
use_raw_snapshot=False,
|
||||
parallelism=2,
|
||||
score_retries=2,
|
||||
use_deep_retrieval=True,
|
||||
use_tool=True,
|
||||
use_critic=True,
|
||||
use_gap=True,
|
||||
use_scores=True,
|
||||
drafts=1,
|
||||
metric_retries=2,
|
||||
subanswer_retries=2,
|
||||
)
|
||||
return ModePlan(
|
||||
model=settings.ollama_model_fast,
|
||||
fast_model=settings.ollama_model_fast,
|
||||
max_subquestions=1,
|
||||
chunk_lines=16,
|
||||
chunk_top=3,
|
||||
chunk_group=5,
|
||||
kb_max_chars=800,
|
||||
kb_max_files=4,
|
||||
use_raw_snapshot=False,
|
||||
parallelism=1,
|
||||
score_retries=1,
|
||||
use_deep_retrieval=False,
|
||||
use_tool=False,
|
||||
use_critic=False,
|
||||
use_gap=False,
|
||||
use_scores=False,
|
||||
drafts=1,
|
||||
metric_retries=1,
|
||||
subanswer_retries=1,
|
||||
)
|
||||
|
||||
|
||||
def _llm_call_limit(settings: Settings, mode: str) -> int:
|
||||
if mode == "genius":
|
||||
return settings.genius_llm_calls_max
|
||||
if mode == "smart":
|
||||
return settings.smart_llm_calls_max
|
||||
return settings.fast_llm_calls_max
|
||||
|
||||
|
||||
def _mode_time_budget(settings: Settings, mode: str) -> float:
|
||||
if mode == "genius":
|
||||
return max(0.0, settings.genius_time_budget_sec)
|
||||
if mode == "smart":
|
||||
return max(0.0, settings.smart_time_budget_sec)
|
||||
return max(0.0, settings.quick_time_budget_sec)
|
||||
|
||||
|
||||
def _select_subquestions(parts: list[dict[str, Any]], fallback: str, limit: int) -> list[str]:
|
||||
if not parts:
|
||||
return [fallback]
|
||||
ranked = []
|
||||
for entry in parts:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
question = str(entry.get("question") or "").strip()
|
||||
if not question:
|
||||
continue
|
||||
priority = entry.get("priority")
|
||||
try:
|
||||
weight = float(priority)
|
||||
except (TypeError, ValueError):
|
||||
weight = 1.0
|
||||
ranked.append((weight, question))
|
||||
ranked.sort(key=lambda item: item[0], reverse=True)
|
||||
questions = [item[1] for item in ranked][:limit]
|
||||
return questions or [fallback]
|
||||
|
||||
|
||||
def _chunk_lines(lines: list[str], lines_per_chunk: int) -> list[dict[str, Any]]:
|
||||
chunks: list[dict[str, Any]] = []
|
||||
if not lines:
|
||||
return chunks
|
||||
for idx in range(0, len(lines), lines_per_chunk):
|
||||
chunk_lines = lines[idx : idx + lines_per_chunk]
|
||||
text = "\n".join(chunk_lines)
|
||||
summary = " | ".join(chunk_lines[:4])
|
||||
chunks.append({"id": f"c{idx//lines_per_chunk}", "text": text, "summary": summary})
|
||||
return chunks
|
||||
|
||||
|
||||
def _raw_snapshot_chunks(snapshot: dict[str, Any] | None) -> list[dict[str, Any]]:
|
||||
if not isinstance(snapshot, dict) or not snapshot:
|
||||
return []
|
||||
chunks: list[dict[str, Any]] = []
|
||||
for key, value in snapshot.items():
|
||||
try:
|
||||
payload = json.dumps({key: value}, indent=2)
|
||||
except Exception:
|
||||
continue
|
||||
summary = f"raw:{key}"
|
||||
chunks.append({"id": f"r{key}", "text": payload, "summary": summary})
|
||||
return chunks
|
||||
|
||||
|
||||
def _build_chunk_groups(chunks: list[dict[str, Any]], group_size: int) -> list[list[dict[str, Any]]]:
|
||||
groups: list[list[dict[str, Any]]] = []
|
||||
group: list[dict[str, Any]] = []
|
||||
for chunk in chunks:
|
||||
group.append({"id": chunk["id"], "summary": chunk["summary"]})
|
||||
if len(group) >= group_size:
|
||||
groups.append(group)
|
||||
group = []
|
||||
if group:
|
||||
groups.append(group)
|
||||
return groups
|
||||
|
||||
|
||||
async def _score_chunks(call_llm: Callable[..., Any], chunks: list[dict[str, Any]], question: str, sub_questions: list[str], plan: ModePlan) -> dict[str, float]:
|
||||
scores: dict[str, float] = {chunk["id"]: 0.0 for chunk in chunks}
|
||||
if not chunks:
|
||||
return scores
|
||||
groups = _build_chunk_groups(chunks, plan.chunk_group)
|
||||
ctx = ScoreContext(
|
||||
question=question,
|
||||
sub_questions=sub_questions,
|
||||
retries=max(1, plan.score_retries),
|
||||
parallelism=plan.parallelism,
|
||||
select_best=plan.score_retries > 1,
|
||||
fast_model=plan.fast_model,
|
||||
)
|
||||
if ctx.parallelism <= 1 or len(groups) * ctx.retries <= 1:
|
||||
return await _score_groups_serial(call_llm, groups, ctx)
|
||||
return await _score_groups_parallel(call_llm, groups, ctx)
|
||||
|
||||
|
||||
async def _score_groups_serial(call_llm: Callable[..., Any], groups: list[list[dict[str, Any]]], ctx: ScoreContext) -> dict[str, float]:
|
||||
scores: dict[str, float] = {}
|
||||
for grp in groups:
|
||||
runs = [await _score_chunk_group(call_llm, grp, ctx.question, ctx.sub_questions) for _ in range(ctx.retries)]
|
||||
if ctx.select_best and len(runs) > 1:
|
||||
best = await _select_best_score_run(call_llm, grp, runs, ctx)
|
||||
scores.update(best)
|
||||
else:
|
||||
scores.update(_merge_score_runs(runs))
|
||||
return scores
|
||||
|
||||
|
||||
async def _score_groups_parallel(call_llm: Callable[..., Any], groups: list[list[dict[str, Any]]], ctx: ScoreContext) -> dict[str, float]:
|
||||
coros: list[Awaitable[tuple[int, dict[str, float]]]] = []
|
||||
for idx, grp in enumerate(groups):
|
||||
for _ in range(ctx.retries):
|
||||
coros.append(_score_chunk_group_run(call_llm, idx, grp, ctx.question, ctx.sub_questions))
|
||||
results = await _gather_limited(coros, ctx.parallelism)
|
||||
grouped: dict[int, list[dict[str, float]]] = {}
|
||||
for idx, result in results:
|
||||
grouped.setdefault(idx, []).append(result)
|
||||
scores: dict[str, float] = {}
|
||||
for idx, runs in grouped.items():
|
||||
if ctx.select_best and len(runs) > 1:
|
||||
group = groups[idx]
|
||||
best = await _select_best_score_run(call_llm, group, runs, ctx)
|
||||
scores.update(best)
|
||||
else:
|
||||
scores.update(_merge_score_runs(runs))
|
||||
return scores
|
||||
|
||||
|
||||
async def _score_chunk_group(call_llm: Callable[..., Any], group: list[dict[str, Any]], question: str, sub_questions: list[str]) -> dict[str, float]:
|
||||
prompt = (
|
||||
prompts.CHUNK_SCORE_PROMPT
|
||||
+ "\nQuestion: "
|
||||
+ question
|
||||
+ "\nSubQuestions: "
|
||||
+ json.dumps(sub_questions)
|
||||
+ "\nChunks: "
|
||||
+ json.dumps(group)
|
||||
)
|
||||
raw = await call_llm(prompts.RETRIEVER_SYSTEM, prompt, model=None, tag="chunk_score")
|
||||
data = _parse_json_list(raw)
|
||||
scored: dict[str, float] = {}
|
||||
for entry in data:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
cid = str(entry.get("id") or "").strip()
|
||||
if not cid:
|
||||
continue
|
||||
try:
|
||||
score = float(entry.get("score") or 0)
|
||||
except (TypeError, ValueError):
|
||||
score = 0.0
|
||||
scored[cid] = score
|
||||
return scored
|
||||
|
||||
|
||||
async def _score_chunk_group_run(call_llm: Callable[..., Any], idx: int, group: list[dict[str, Any]], question: str, sub_questions: list[str]) -> tuple[int, dict[str, float]]:
|
||||
return idx, await _score_chunk_group(call_llm, group, question, sub_questions)
|
||||
|
||||
|
||||
def _merge_score_runs(runs: list[dict[str, float]]) -> dict[str, float]:
|
||||
if not runs:
|
||||
return {}
|
||||
totals: dict[str, float] = {}
|
||||
counts: dict[str, int] = {}
|
||||
for run in runs:
|
||||
for key, value in run.items():
|
||||
totals[key] = totals.get(key, 0.0) + float(value)
|
||||
counts[key] = counts.get(key, 0) + 1
|
||||
return {key: totals[key] / counts[key] for key in totals}
|
||||
|
||||
|
||||
async def _select_best_score_run(call_llm: Callable[..., Any], group: list[dict[str, Any]], runs: list[dict[str, float]], ctx: ScoreContext) -> dict[str, float]:
|
||||
if not runs:
|
||||
return {}
|
||||
prompt = (
|
||||
prompts.RETRIEVER_SELECT_PROMPT
|
||||
+ "\nQuestion: "
|
||||
+ ctx.question
|
||||
+ "\nSubQuestions: "
|
||||
+ json.dumps(ctx.sub_questions)
|
||||
+ "\nChunks: "
|
||||
+ json.dumps(group)
|
||||
+ "\nRuns: "
|
||||
+ json.dumps(runs)
|
||||
)
|
||||
raw = await call_llm(prompts.RETRIEVER_SELECT_SYSTEM, prompt, model=ctx.fast_model, tag="chunk_select")
|
||||
data = parse_json(raw)
|
||||
idx = 0
|
||||
if isinstance(data, dict):
|
||||
try:
|
||||
idx = int(data.get("selected_index") or 0)
|
||||
except (TypeError, ValueError):
|
||||
idx = 0
|
||||
if idx < 0 or idx >= len(runs):
|
||||
idx = 0
|
||||
return runs[idx]
|
||||
|
||||
|
||||
def _keyword_hits(ranked: list[dict[str, Any]], head: dict[str, Any], keywords: list[str] | None) -> list[dict[str, Any]]:
|
||||
if not keywords:
|
||||
return []
|
||||
lowered = [kw.lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
|
||||
if not lowered:
|
||||
return []
|
||||
hits: list[dict[str, Any]] = []
|
||||
for item in ranked:
|
||||
if item is head:
|
||||
continue
|
||||
text = str(item.get("text") or "").lower()
|
||||
if any(kw in text for kw in lowered):
|
||||
hits.append(item)
|
||||
return hits
|
||||
|
||||
|
||||
def _select_chunks(chunks: list[dict[str, Any]], scores: dict[str, float], plan: ModePlan, keywords: list[str] | None = None, must_ids: list[str] | None = None) -> list[dict[str, Any]]:
|
||||
if not chunks:
|
||||
return []
|
||||
ranked = sorted(chunks, key=lambda item: scores.get(item["id"], 0.0), reverse=True)
|
||||
selected: list[dict[str, Any]] = [chunks[0]]
|
||||
if _append_must_chunks(chunks, selected, must_ids, plan.chunk_top):
|
||||
return selected
|
||||
if _append_keyword_chunks(ranked, selected, keywords, plan.chunk_top):
|
||||
return selected
|
||||
_append_ranked_chunks(ranked, selected, plan.chunk_top)
|
||||
return selected
|
||||
|
||||
|
||||
def _append_must_chunks(chunks: list[dict[str, Any]], selected: list[dict[str, Any]], must_ids: list[str] | None, limit: int) -> bool:
|
||||
if not must_ids:
|
||||
return False
|
||||
id_map = {item["id"]: item for item in chunks}
|
||||
for cid in must_ids:
|
||||
item = id_map.get(cid)
|
||||
if item and item not in selected:
|
||||
selected.append(item)
|
||||
if len(selected) >= limit:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _append_keyword_chunks(ranked: list[dict[str, Any]], selected: list[dict[str, Any]], keywords: list[str] | None, limit: int) -> bool:
|
||||
if not ranked:
|
||||
return False
|
||||
head = ranked[0]
|
||||
for item in _keyword_hits(ranked, head, keywords):
|
||||
if item not in selected:
|
||||
selected.append(item)
|
||||
if len(selected) >= limit:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _append_ranked_chunks(ranked: list[dict[str, Any]], selected: list[dict[str, Any]], limit: int) -> None:
|
||||
for item in ranked:
|
||||
if len(selected) >= limit:
|
||||
break
|
||||
if item not in selected:
|
||||
selected.append(item)
|
||||
|
||||
|
||||
def _format_runbooks(runbooks: list[str]) -> str:
|
||||
if not runbooks:
|
||||
return ""
|
||||
return "Relevant runbooks:\n" + "\n".join([f"- {item}" for item in runbooks])
|
||||
|
||||
|
||||
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||
267
atlasbot/engine/answerer/engine.py
Normal file
267
atlasbot/engine/answerer/engine.py
Normal file
@ -0,0 +1,267 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
import json
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from atlasbot.config import Settings
|
||||
from atlasbot.knowledge.loader import KnowledgeBase
|
||||
from atlasbot.llm import prompts
|
||||
from atlasbot.llm.client import LLMClient, build_messages
|
||||
from atlasbot.snapshot.builder import SnapshotProvider
|
||||
from atlasbot.state.store import ClaimStore
|
||||
|
||||
from ._base import *
|
||||
from .common import *
|
||||
from .factsheet import *
|
||||
from .post import *
|
||||
from .post_ext import *
|
||||
from .retrieval import *
|
||||
from .retrieval_ext import *
|
||||
from .spine import *
|
||||
from .workflow import run_answer
|
||||
|
||||
|
||||
class AnswerEngine:
|
||||
"""Coordinate Atlas question answering across snapshots, KB, and LLMs.
|
||||
|
||||
Why:
|
||||
- keep the public answer surface in one place while the retrieval and
|
||||
post-processing helpers stay split across smaller modules.
|
||||
"""
|
||||
|
||||
def __init__(self, settings: Settings, llm: LLMClient, kb: KnowledgeBase, snapshot: SnapshotProvider) -> None:
|
||||
self._settings = settings
|
||||
self._llm = llm
|
||||
self._kb = kb
|
||||
self._snapshot = snapshot
|
||||
self._store = ClaimStore(settings.state_db_path, settings.conversation_ttl_sec)
|
||||
|
||||
async def answer(self, question: str, *, mode: str, history: list[dict[str, str]] | None = None, observer: Callable[[str, str], None] | None = None, conversation_id: str | None = None, snapshot_pin: bool | None = None) -> AnswerResult:
|
||||
"""Answer a question by delegating to the staged workflow."""
|
||||
|
||||
return await run_answer(
|
||||
self,
|
||||
question,
|
||||
mode=mode,
|
||||
history=history,
|
||||
observer=observer,
|
||||
conversation_id=conversation_id,
|
||||
snapshot_pin=snapshot_pin,
|
||||
)
|
||||
|
||||
async def _answer_stock(self, question: str) -> AnswerResult:
|
||||
messages = build_messages(prompts.STOCK_SYSTEM, question)
|
||||
reply = await self._llm.chat(messages, model=self._settings.ollama_model)
|
||||
return AnswerResult(reply, _default_scores(), {"mode": "stock"})
|
||||
|
||||
async def _synthesize_answer(self, question: str, subanswers: list[str], context: str, classify: dict[str, Any], plan: ModePlan, call_llm: Callable[..., Any]) -> str:
|
||||
style_hint = _style_hint(classify)
|
||||
if not subanswers:
|
||||
prompt = (
|
||||
prompts.SYNTHESIZE_PROMPT
|
||||
+ "\nQuestion: "
|
||||
+ question
|
||||
+ "\nStyle: "
|
||||
+ style_hint
|
||||
+ "\nQuestionType: "
|
||||
+ (classify.get("question_type") or "unknown")
|
||||
)
|
||||
return await call_llm(prompts.SYNTHESIZE_SYSTEM, prompt, context=context, model=plan.model, tag="synth")
|
||||
draft_prompts = []
|
||||
for idx in range(plan.drafts):
|
||||
draft_prompts.append(
|
||||
prompts.SYNTHESIZE_PROMPT
|
||||
+ "\nQuestion: "
|
||||
+ question
|
||||
+ "\nStyle: "
|
||||
+ style_hint
|
||||
+ "\nQuestionType: "
|
||||
+ (classify.get("question_type") or "unknown")
|
||||
+ "\nSubanswers:\n"
|
||||
+ "\n".join([f"- {item}" for item in subanswers])
|
||||
+ f"\nDraftIndex: {idx + 1}"
|
||||
)
|
||||
drafts: list[str] = []
|
||||
if plan.parallelism > 1 and len(draft_prompts) > 1:
|
||||
drafts = await _gather_limited(
|
||||
[
|
||||
call_llm(
|
||||
prompts.SYNTHESIZE_SYSTEM,
|
||||
prompt,
|
||||
context=context,
|
||||
model=plan.model,
|
||||
tag="synth",
|
||||
)
|
||||
for prompt in draft_prompts
|
||||
],
|
||||
plan.parallelism,
|
||||
)
|
||||
else:
|
||||
for prompt in draft_prompts:
|
||||
drafts.append(
|
||||
await call_llm(
|
||||
prompts.SYNTHESIZE_SYSTEM,
|
||||
prompt,
|
||||
context=context,
|
||||
model=plan.model,
|
||||
tag="synth",
|
||||
)
|
||||
)
|
||||
if len(drafts) == 1:
|
||||
return drafts[0]
|
||||
select_prompt = (
|
||||
prompts.DRAFT_SELECT_PROMPT
|
||||
+ "\nQuestion: "
|
||||
+ question
|
||||
+ "\nDrafts:\n"
|
||||
+ "\n\n".join([f"Draft {idx + 1}: {text}" for idx, text in enumerate(drafts)])
|
||||
)
|
||||
select_raw = await call_llm(prompts.CRITIC_SYSTEM, select_prompt, context=context, model=plan.fast_model, tag="draft_select")
|
||||
selection = _parse_json_block(select_raw, fallback={})
|
||||
idx = int(selection.get("best", 1)) - 1
|
||||
if 0 <= idx < len(drafts):
|
||||
return drafts[idx]
|
||||
return drafts[0]
|
||||
|
||||
async def _score_answer(self, question: str, reply: str, plan: ModePlan, call_llm: Callable[..., Any]) -> AnswerScores:
|
||||
if not plan.use_scores:
|
||||
return _default_scores()
|
||||
prompt = prompts.SCORE_PROMPT + "\nQuestion: " + question + "\nAnswer: " + reply
|
||||
raw = await call_llm(prompts.SCORE_SYSTEM, prompt, model=plan.fast_model, tag="score")
|
||||
data = _parse_json_block(raw, fallback={})
|
||||
return _scores_from_json(data)
|
||||
|
||||
async def _extract_claims(self, question: str, reply: str, summary: dict[str, Any], facts_used: list[str], call_llm: Callable[..., Any]) -> list[ClaimItem]:
|
||||
if not reply or not summary:
|
||||
return []
|
||||
summary_json = _json_excerpt(summary)
|
||||
facts_used = [line.strip() for line in (facts_used or []) if line and line.strip()]
|
||||
facts_block = ""
|
||||
if facts_used:
|
||||
facts_block = "\nFactsUsed:\n" + "\n".join([f"- {line}" for line in facts_used[:12]])
|
||||
prompt = prompts.CLAIM_MAP_PROMPT + "\nQuestion: " + question + "\nAnswer: " + reply + facts_block
|
||||
raw = await call_llm(
|
||||
prompts.CLAIM_SYSTEM,
|
||||
prompt,
|
||||
context=f"SnapshotSummaryJson:{summary_json}",
|
||||
model=self._settings.ollama_model_fast,
|
||||
tag="claim_map",
|
||||
)
|
||||
data = _parse_json_block(raw, fallback={})
|
||||
claims_raw = data.get("claims") if isinstance(data, dict) else None
|
||||
claims: list[ClaimItem] = []
|
||||
if isinstance(claims_raw, list):
|
||||
for entry in claims_raw:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
claim_text = str(entry.get("claim") or "").strip()
|
||||
claim_id = str(entry.get("id") or "").strip() or f"c{len(claims)+1}"
|
||||
evidence_items: list[EvidenceItem] = []
|
||||
for ev in entry.get("evidence") or []:
|
||||
if not isinstance(ev, dict):
|
||||
continue
|
||||
path = str(ev.get("path") or "").strip()
|
||||
if not path:
|
||||
continue
|
||||
reason = str(ev.get("reason") or "").strip()
|
||||
value = _resolve_path(summary, path)
|
||||
evidence_items.append(EvidenceItem(path=path, reason=reason, value=value, value_at_claim=value))
|
||||
if claim_text and evidence_items:
|
||||
claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
|
||||
return claims
|
||||
|
||||
async def _dedup_reply(self, reply: str, plan: ModePlan, call_llm: Callable[..., Any], tag: str) -> str:
|
||||
if not _needs_dedup(reply):
|
||||
return reply
|
||||
dedup_prompt = prompts.DEDUP_PROMPT + "\nDraft: " + reply
|
||||
return await call_llm(prompts.DEDUP_SYSTEM, dedup_prompt, model=plan.fast_model, tag=tag)
|
||||
|
||||
async def _answer_followup(self, question: str, state: ConversationState, summary: dict[str, Any], classify: dict[str, Any], plan: ModePlan, call_llm: Callable[..., Any]) -> str: # noqa: C901, ARG002
|
||||
claim_ids = await self._select_claims(question, state.claims, plan, call_llm)
|
||||
selected = [claim for claim in state.claims if claim.id in claim_ids] if claim_ids else state.claims[:2]
|
||||
evidence_lines = []
|
||||
lowered = question.lower()
|
||||
for claim in selected:
|
||||
evidence_lines.append(f"Claim: {claim.claim}")
|
||||
for ev in claim.evidence:
|
||||
current = _resolve_path(summary, ev.path)
|
||||
ev.value = current
|
||||
delta_note = ""
|
||||
if ev.value_at_claim is not None and current is not None and current != ev.value_at_claim:
|
||||
delta_note = f" (now {current})"
|
||||
evidence_lines.append(f"- {ev.path}: {ev.value_at_claim}{delta_note}")
|
||||
if any(term in lowered for term in ("hotspot", "hot spot", "hottest", "jetson", "rpi", "amd64", "arm64", "hardware", "class")):
|
||||
hotspot_lines = _hotspot_evidence(summary)
|
||||
if hotspot_lines:
|
||||
evidence_lines.append("HotspotSummary:")
|
||||
evidence_lines.extend(hotspot_lines)
|
||||
evidence_ctx = "\n".join(evidence_lines)
|
||||
prompt = prompts.FOLLOWUP_PROMPT + "\nFollow-up: " + question + "\nEvidence:\n" + evidence_ctx
|
||||
reply = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup")
|
||||
allowed_nodes = _allowed_nodes(summary)
|
||||
allowed_namespaces = _allowed_namespaces(summary)
|
||||
unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
|
||||
unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
|
||||
extra_bits = []
|
||||
if unknown_nodes:
|
||||
extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
|
||||
if unknown_namespaces:
|
||||
extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
|
||||
if allowed_nodes:
|
||||
extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
|
||||
if allowed_namespaces:
|
||||
extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
|
||||
if extra_bits:
|
||||
fix_prompt = (
|
||||
prompts.EVIDENCE_FIX_PROMPT
|
||||
+ "\nQuestion: "
|
||||
+ question
|
||||
+ "\nDraft: "
|
||||
+ reply
|
||||
+ "\n"
|
||||
+ "\n".join(extra_bits)
|
||||
)
|
||||
reply = await call_llm(
|
||||
prompts.EVIDENCE_FIX_SYSTEM,
|
||||
fix_prompt,
|
||||
context="Evidence:\n" + evidence_ctx,
|
||||
model=plan.model,
|
||||
tag="followup_fix",
|
||||
)
|
||||
reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup_followup")
|
||||
reply = _strip_followup_meta(reply)
|
||||
return reply
|
||||
|
||||
async def _select_claims(self, question: str, claims: list[ClaimItem], plan: ModePlan, call_llm: Callable[..., Any]) -> list[str]:
|
||||
if not claims:
|
||||
return []
|
||||
claims_brief = [{"id": claim.id, "claim": claim.claim} for claim in claims]
|
||||
prompt = prompts.SELECT_CLAIMS_PROMPT + "\nFollow-up: " + question + "\nClaims: " + json.dumps(claims_brief)
|
||||
raw = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.fast_model, tag="select_claims")
|
||||
data = _parse_json_block(raw, fallback={})
|
||||
ids = data.get("claim_ids") if isinstance(data, dict) else []
|
||||
if isinstance(ids, list):
|
||||
return [str(item) for item in ids if item]
|
||||
return []
|
||||
|
||||
def _get_state(self, conversation_id: str | None) -> ConversationState | None:
|
||||
if not conversation_id:
|
||||
return None
|
||||
state_payload = self._store.get(conversation_id)
|
||||
return _state_from_payload(state_payload) if state_payload else None
|
||||
|
||||
def _store_state(self, conversation_id: str, claims: list[ClaimItem], summary: dict[str, Any], snapshot: dict[str, Any] | None, pin_snapshot: bool) -> None:
|
||||
snapshot_id = _snapshot_id(summary)
|
||||
pinned_snapshot = snapshot if pin_snapshot else None
|
||||
payload = {
|
||||
"updated_at": time.monotonic(),
|
||||
"claims": _claims_to_payload(claims),
|
||||
"snapshot_id": snapshot_id,
|
||||
"snapshot": pinned_snapshot,
|
||||
}
|
||||
self._store.set(conversation_id, payload)
|
||||
|
||||
def _cleanup_state(self) -> None:
|
||||
self._store.cleanup()
|
||||
189
atlasbot/engine/answerer/factsheet.py
Normal file
189
atlasbot/engine/answerer/factsheet.py
Normal file
@ -0,0 +1,189 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from ._base import *
|
||||
|
||||
MAX_FACT_LINE_CHARS = 180
|
||||
MAX_KB_LINE_CHARS = 220
|
||||
|
||||
|
||||
def _factsheet_kb_chars(mode: str, default_chars: int) -> int:
|
||||
if mode == "genius":
|
||||
return min(max(default_chars, 4000), 6000)
|
||||
if mode == "smart":
|
||||
return min(max(default_chars, 3000), 4500)
|
||||
return max(1200, default_chars)
|
||||
|
||||
|
||||
def _factsheet_line_limit(mode: str) -> int:
|
||||
if mode == "genius":
|
||||
return 30
|
||||
if mode == "smart":
|
||||
return 22
|
||||
return 14
|
||||
|
||||
|
||||
def _factsheet_instruction(mode: str) -> str:
|
||||
if mode == "genius":
|
||||
return (
|
||||
"Start with a direct conclusion, then include the strongest supporting facts and one caveat. "
|
||||
"Keep it to 4-8 sentences. If data is missing, name the missing metric explicitly."
|
||||
)
|
||||
if mode == "smart":
|
||||
return (
|
||||
"Start with a direct conclusion and support it with key facts. Keep it to 2-5 sentences. "
|
||||
"If data is missing, say exactly what is missing and suggest atlas-genius."
|
||||
)
|
||||
return "Keep it to 1-3 sentences. If key data is missing, say what is missing and suggest atlas-smart."
|
||||
|
||||
|
||||
def _factsheet_model(mode: str, plan: ModePlan) -> str:
|
||||
if mode in {"quick", "fast"}:
|
||||
return plan.fast_model
|
||||
return plan.model
|
||||
|
||||
|
||||
def _is_plain_math_question(question: str) -> bool:
|
||||
lowered = question.lower().strip()
|
||||
if not lowered:
|
||||
return False
|
||||
cluster_markers = (
|
||||
"titan",
|
||||
"atlas",
|
||||
"cluster",
|
||||
"node",
|
||||
"pod",
|
||||
"namespace",
|
||||
"workload",
|
||||
"grafana",
|
||||
"alert",
|
||||
"k8s",
|
||||
"kubernetes",
|
||||
"rpi",
|
||||
"longhorn",
|
||||
"postgres",
|
||||
"victoria",
|
||||
"ollama",
|
||||
)
|
||||
if any(token in lowered for token in cluster_markers):
|
||||
return False
|
||||
return bool(
|
||||
re.fullmatch(r"[0-9\s+\-*/().=]+", lowered)
|
||||
or re.search(r"\bwhat(?:'s| is)\s+\d+\s*[-+*/]\s*\d+\b", lowered)
|
||||
)
|
||||
|
||||
|
||||
def _quick_fact_sheet_lines(question: str, summary_lines: list[str], kb_lines: list[str], *, limit: int) -> list[str]: # noqa: C901
|
||||
tokens = {
|
||||
token
|
||||
for token in re.findall(r"[a-z0-9][a-z0-9_-]{2,}", question.lower())
|
||||
if token not in GENERIC_METRIC_TOKENS
|
||||
}
|
||||
priority_markers = (
|
||||
"snapshot:",
|
||||
"nodes_total",
|
||||
"nodes_ready",
|
||||
"nodes_not_ready",
|
||||
"workers_ready",
|
||||
"workers_not_ready",
|
||||
"control_plane",
|
||||
"worker_nodes",
|
||||
"hottest",
|
||||
"postgres",
|
||||
"pods",
|
||||
"longhorn",
|
||||
"titan-",
|
||||
"rpi5",
|
||||
"rpi4",
|
||||
"jetson",
|
||||
"amd64",
|
||||
)
|
||||
scored: list[tuple[int, str]] = []
|
||||
for raw in summary_lines:
|
||||
line = raw.strip()
|
||||
if not line:
|
||||
continue
|
||||
lowered = line.lower()
|
||||
score = 0
|
||||
if any(marker in lowered for marker in priority_markers):
|
||||
score += 4
|
||||
overlap = sum(1 for token in tokens if token in lowered)
|
||||
score += overlap * 3
|
||||
if len(line) <= MAX_FACT_LINE_CHARS:
|
||||
score += 1
|
||||
if score > 0:
|
||||
scored.append((score, line))
|
||||
|
||||
scored.sort(key=lambda item: item[0], reverse=True)
|
||||
selected = [line for _, line in scored[:limit]]
|
||||
if not selected:
|
||||
selected = [line.strip() for line in summary_lines if line.strip()][:limit]
|
||||
|
||||
kb_selected: list[str] = []
|
||||
for raw in kb_lines:
|
||||
line = raw.strip()
|
||||
if not line or len(line) > MAX_KB_LINE_CHARS:
|
||||
continue
|
||||
lowered = line.lower()
|
||||
if "kb file:" in lowered or "kb: atlas.json" in lowered:
|
||||
continue
|
||||
overlap = sum(1 for token in tokens if token in lowered)
|
||||
if overlap > 0 or any(marker in lowered for marker in ("runbook", "titan-", "rpi5", "rpi4", "amd64", "jetson")):
|
||||
kb_selected.append(line)
|
||||
if len(kb_selected) >= max(4, limit // 3):
|
||||
break
|
||||
|
||||
merged = []
|
||||
seen: set[str] = set()
|
||||
for line in selected + kb_selected:
|
||||
if line not in seen:
|
||||
seen.add(line)
|
||||
merged.append(line)
|
||||
if len(merged) >= limit:
|
||||
break
|
||||
return merged
|
||||
|
||||
|
||||
def _quick_fact_sheet_text(lines: list[str]) -> str:
|
||||
if not lines:
|
||||
return "Fact Sheet:\n- No snapshot facts available."
|
||||
body = "\n".join([f"- {line}" for line in lines])
|
||||
return "Fact Sheet:\n" + body
|
||||
|
||||
|
||||
def _quick_fact_sheet_heuristic_answer(question: str, fact_lines: list[str]) -> str:
|
||||
lowered = question.lower()
|
||||
if (
|
||||
any(token in lowered for token in ("placement", "schedule", "last resort", "last-resort"))
|
||||
and any(token in lowered for token in ("node", "workload", "worker", "titan"))
|
||||
):
|
||||
return (
|
||||
"General workload placement is: prefer rpi5 workers first, then rpi4 workers. "
|
||||
"titan-22 is the last-resort general compute node, and titan-24 is the absolute last resort "
|
||||
"reserved for heavy one-offs."
|
||||
)
|
||||
|
||||
for line in fact_lines:
|
||||
compact = line.replace(" ", "")
|
||||
match = re.search(r"nodes_total[:=](\d+),ready[:=](\d+),not_ready[:=](\d+)", compact)
|
||||
if not match:
|
||||
continue
|
||||
total = match.group(1)
|
||||
ready = match.group(2)
|
||||
not_ready = match.group(3)
|
||||
if "how many" in lowered and "ready" in lowered and "node" in lowered:
|
||||
return f"The latest snapshot shows {ready} ready nodes out of {total} total ({not_ready} not ready)."
|
||||
if ("not ready" in lowered or "unready" in lowered) and "node" in lowered:
|
||||
return f"The latest snapshot shows {not_ready} not-ready nodes ({ready} ready out of {total} total)."
|
||||
return ""
|
||||
|
||||
|
||||
def _json_excerpt(summary: dict[str, Any], max_chars: int = 12000) -> str:
|
||||
raw = json.dumps(summary, ensure_ascii=False)
|
||||
return raw[:max_chars]
|
||||
|
||||
|
||||
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||
459
atlasbot/engine/answerer/post.py
Normal file
459
atlasbot/engine/answerer/post.py
Normal file
@ -0,0 +1,459 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from atlasbot.llm import prompts
|
||||
from atlasbot.llm.client import parse_json
|
||||
|
||||
from ._base import *
|
||||
from .retrieval_ext import _dedupe_lines
|
||||
|
||||
|
||||
def _merge_fact_lines(primary: list[str], fallback: list[str]) -> list[str]:
|
||||
merged: list[str] = []
|
||||
for line in primary + fallback:
|
||||
value = (line or "").strip()
|
||||
if value and value not in merged:
|
||||
merged.append(value)
|
||||
return merged
|
||||
|
||||
|
||||
def _strip_unknown_entities(reply: str, unknown_nodes: list[str], unknown_namespaces: list[str]) -> str:
|
||||
if not reply:
|
||||
return reply
|
||||
if not unknown_nodes and not unknown_namespaces:
|
||||
return reply
|
||||
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", reply) if s.strip()]
|
||||
if not sentences:
|
||||
return reply
|
||||
lowered_nodes = [node.lower() for node in unknown_nodes]
|
||||
lowered_namespaces = [ns.lower() for ns in unknown_namespaces]
|
||||
kept: list[str] = []
|
||||
for sent in sentences:
|
||||
lower = sent.lower()
|
||||
if lowered_nodes and any(node in lower for node in lowered_nodes):
|
||||
continue
|
||||
if lowered_namespaces and any(f"namespace {ns}" in lower for ns in lowered_namespaces):
|
||||
continue
|
||||
kept.append(sent)
|
||||
cleaned = " ".join(kept).strip()
|
||||
return cleaned or reply
|
||||
|
||||
|
||||
def _needs_evidence_guard(reply: str, facts: list[str]) -> bool:
|
||||
if not reply or not facts:
|
||||
return False
|
||||
lower_reply = reply.lower()
|
||||
fact_text = " ".join(facts).lower()
|
||||
node_pattern = re.compile(r"\b(titan-[0-9a-z]+|node-?\d+)\b", re.IGNORECASE)
|
||||
nodes = {m.group(1).lower() for m in node_pattern.finditer(reply)}
|
||||
if nodes:
|
||||
missing = [node for node in nodes if node not in fact_text]
|
||||
if missing:
|
||||
return True
|
||||
pressure_terms = ("pressure", "diskpressure", "memorypressure", "pidpressure", "headroom")
|
||||
if any(term in lower_reply for term in pressure_terms) and not any(term in fact_text for term in pressure_terms):
|
||||
return True
|
||||
arch_terms = ("amd64", "arm64", "rpi", "rpi4", "rpi5", "jetson")
|
||||
return any(term in lower_reply for term in arch_terms) and not any(term in fact_text for term in arch_terms)
|
||||
|
||||
|
||||
async def _contradiction_decision(ctx: ContradictionContext, attempts: int = 1) -> dict[str, Any]:
|
||||
best = {"use_facts": True, "confidence": 50}
|
||||
facts_block = "\n".join(ctx.facts[:12])
|
||||
for idx in range(max(1, attempts)):
|
||||
variant = f"Variant: {idx + 1}" if attempts > 1 else ""
|
||||
prompt = (
|
||||
prompts.CONTRADICTION_PROMPT.format(question=ctx.question, draft=ctx.reply, facts=facts_block)
|
||||
+ ("\n" + variant if variant else "")
|
||||
)
|
||||
raw = await ctx.call_llm(
|
||||
prompts.CONTRADICTION_SYSTEM,
|
||||
prompt,
|
||||
model=ctx.plan.fast_model,
|
||||
tag="contradiction",
|
||||
)
|
||||
data = _parse_json_block(raw, fallback={})
|
||||
try:
|
||||
confidence = int(data.get("confidence", 50))
|
||||
except Exception:
|
||||
confidence = 50
|
||||
use_facts = bool(data.get("use_facts", True))
|
||||
if confidence >= best.get("confidence", 0):
|
||||
best = {"use_facts": use_facts, "confidence": confidence}
|
||||
return best
|
||||
|
||||
|
||||
def _filter_lines_by_keywords(lines: list[str], keywords: list[str], max_lines: int) -> list[str]:
|
||||
if not lines:
|
||||
return []
|
||||
tokens = _expand_tokens(keywords)
|
||||
if not tokens:
|
||||
return lines[:max_lines]
|
||||
filtered = [line for line in lines if any(tok in line.lower() for tok in tokens)]
|
||||
return (filtered or lines)[:max_lines]
|
||||
|
||||
|
||||
def _rank_metric_lines(lines: list[str], tokens: set[str], max_lines: int) -> list[str]:
|
||||
if not lines or not tokens:
|
||||
return []
|
||||
ranked: list[tuple[int, int, str]] = []
|
||||
for line in lines:
|
||||
lower = line.lower()
|
||||
hits = sum(1 for tok in tokens if tok in lower)
|
||||
if not hits:
|
||||
continue
|
||||
has_number = 1 if re.search(r"\d", line) else 0
|
||||
ranked.append((has_number, hits, line))
|
||||
ranked.sort(key=lambda item: (-item[0], -item[1], item[2]))
|
||||
return [item[2] for item in ranked[:max_lines]]
|
||||
|
||||
|
||||
def _select_metric_line(lines: list[str], question: str, tokens: list[str] | set[str]) -> str | None:
|
||||
if not lines or not tokens:
|
||||
return None
|
||||
token_set = {str(tok).lower() for tok in tokens if tok}
|
||||
ranked = _rank_metric_lines(lines, token_set, max_lines=6)
|
||||
if not ranked:
|
||||
return None
|
||||
question_lower = (question or "").lower()
|
||||
if any(term in question_lower for term in ("how many", "count", "total")):
|
||||
for line in ranked:
|
||||
lower = line.lower()
|
||||
if "total" in lower or "count" in lower:
|
||||
return line
|
||||
return ranked[0]
|
||||
|
||||
|
||||
def _format_direct_metric_line(line: str) -> str:
|
||||
if not line:
|
||||
return ""
|
||||
if ":" in line:
|
||||
formatted = _format_colon_metric(line)
|
||||
if formatted:
|
||||
return formatted
|
||||
if "=" in line:
|
||||
formatted = _format_equals_metric(line)
|
||||
if formatted:
|
||||
return formatted
|
||||
return line
|
||||
|
||||
|
||||
def _format_colon_metric(line: str) -> str | None:
|
||||
key, value = line.split(":", 1)
|
||||
key = key.strip().replace("_", " ")
|
||||
value = value.strip()
|
||||
if not value:
|
||||
return None
|
||||
if key == "nodes":
|
||||
formatted = _format_nodes_value(value)
|
||||
if formatted:
|
||||
return formatted
|
||||
if key in {"nodes total", "nodes_total"}:
|
||||
return f"Atlas has {value} total nodes."
|
||||
return f"{key} is {value}."
|
||||
|
||||
|
||||
def _format_equals_metric(line: str) -> str | None:
|
||||
pairs: list[str] = []
|
||||
for part in line.split(","):
|
||||
if "=" not in part:
|
||||
continue
|
||||
key, value = part.split("=", 1)
|
||||
key = key.strip().replace("_", " ")
|
||||
value = value.strip()
|
||||
if not value:
|
||||
continue
|
||||
if key in {"nodes total", "nodes_total"}:
|
||||
return f"Atlas has {value} total nodes."
|
||||
pairs.append(f"{key} is {value}")
|
||||
if not pairs:
|
||||
return None
|
||||
if len(pairs) == 1:
|
||||
return f"{pairs[0]}."
|
||||
return "; ".join(pairs) + "."
|
||||
|
||||
|
||||
def _format_nodes_value(value: str) -> str | None:
|
||||
parts = [p.strip() for p in value.split(",") if p.strip()]
|
||||
total = None
|
||||
rest: list[str] = []
|
||||
for part in parts:
|
||||
if part.startswith("total="):
|
||||
total = part.split("=", 1)[1]
|
||||
else:
|
||||
rest.append(part.replace("_", " "))
|
||||
if not total:
|
||||
return None
|
||||
if rest:
|
||||
return f"Atlas has {total} total nodes ({'; '.join(rest)})."
|
||||
return f"Atlas has {total} total nodes."
|
||||
|
||||
|
||||
def _global_facts(lines: list[str]) -> list[str]:
|
||||
if not lines:
|
||||
return []
|
||||
wanted = ("nodes_total", "nodes_ready", "cluster_name", "cluster", "nodes_not_ready")
|
||||
facts: list[str] = []
|
||||
for line in lines:
|
||||
lower = line.lower()
|
||||
if any(key in lower for key in wanted):
|
||||
facts.append(line)
|
||||
return _dedupe_lines(facts, limit=6)
|
||||
|
||||
|
||||
def _has_keyword_overlap(lines: list[str], keywords: list[str]) -> bool:
|
||||
if not lines or not keywords:
|
||||
return False
|
||||
tokens = _expand_tokens(keywords)
|
||||
if not tokens:
|
||||
return False
|
||||
for line in lines:
|
||||
lower = line.lower()
|
||||
if any(tok in lower for tok in tokens):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _merge_tokens(primary: list[str], secondary: list[str], third: list[str] | None = None) -> list[str]:
|
||||
merged: list[str] = []
|
||||
for token in primary + secondary + (third or []):
|
||||
if not token:
|
||||
continue
|
||||
if token not in merged:
|
||||
merged.append(token)
|
||||
return merged
|
||||
|
||||
|
||||
def _extract_question_tokens(question: str) -> list[str]:
|
||||
if not question:
|
||||
return []
|
||||
tokens: list[str] = []
|
||||
for part in re.split(r"[^a-zA-Z0-9_-]+", question.lower()):
|
||||
if len(part) < TOKEN_MIN_LEN:
|
||||
continue
|
||||
if part not in tokens:
|
||||
tokens.append(part)
|
||||
return tokens
|
||||
|
||||
|
||||
def _expand_tokens(tokens: list[str]) -> list[str]:
|
||||
if not tokens:
|
||||
return []
|
||||
expanded: list[str] = []
|
||||
for token in tokens:
|
||||
if not isinstance(token, str):
|
||||
continue
|
||||
for part in re.split(r"[^a-zA-Z0-9_-]+", token.lower()):
|
||||
if len(part) < TOKEN_MIN_LEN:
|
||||
continue
|
||||
if part not in expanded:
|
||||
expanded.append(part)
|
||||
return expanded
|
||||
|
||||
|
||||
def _ensure_token_coverage(lines: list[str], tokens: list[str], summary_lines: list[str], max_add: int = 4) -> list[str]:
|
||||
if not lines or not tokens or not summary_lines:
|
||||
return lines
|
||||
hay = " ".join(lines).lower()
|
||||
missing = [tok for tok in tokens if tok and tok.lower() not in hay]
|
||||
if not missing:
|
||||
return lines
|
||||
added: list[str] = []
|
||||
for token in missing:
|
||||
token_lower = token.lower()
|
||||
for line in summary_lines:
|
||||
if token_lower in line.lower() and line not in lines and line not in added:
|
||||
added.append(line)
|
||||
break
|
||||
if len(added) >= max_add:
|
||||
break
|
||||
if not added:
|
||||
return lines
|
||||
return _merge_fact_lines(added, lines)
|
||||
|
||||
|
||||
def _best_keyword_line(lines: list[str], keywords: list[str]) -> str | None:
|
||||
if not lines or not keywords:
|
||||
return None
|
||||
tokens = _expand_tokens(keywords)
|
||||
if not tokens:
|
||||
return None
|
||||
best = None
|
||||
best_score = 0
|
||||
for line in lines:
|
||||
lower = line.lower()
|
||||
score = sum(1 for tok in tokens if tok in lower)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = line
|
||||
return best if best_score > 0 else None
|
||||
|
||||
|
||||
def _line_starting_with(lines: list[str], prefix: str) -> str | None:
|
||||
if not lines or not prefix:
|
||||
return None
|
||||
lower_prefix = prefix.lower()
|
||||
for line in lines:
|
||||
if str(line).lower().startswith(lower_prefix):
|
||||
return line
|
||||
return None
|
||||
|
||||
|
||||
def _non_rpi_nodes(summary: dict[str, Any]) -> dict[str, list[str]]:
|
||||
hardware = summary.get("hardware_by_node") if isinstance(summary, dict) else None
|
||||
if not isinstance(hardware, dict):
|
||||
return {}
|
||||
grouped: dict[str, list[str]] = {}
|
||||
for node, hw in hardware.items():
|
||||
if not isinstance(node, str) or not isinstance(hw, str):
|
||||
continue
|
||||
if hw.startswith("rpi"):
|
||||
continue
|
||||
grouped.setdefault(hw, []).append(node)
|
||||
for nodes in grouped.values():
|
||||
nodes.sort()
|
||||
return grouped
|
||||
|
||||
|
||||
def _format_hardware_groups(groups: dict[str, list[str]], label: str) -> str:
|
||||
if not groups:
|
||||
return ""
|
||||
parts = []
|
||||
for hw, nodes in sorted(groups.items()):
|
||||
parts.append(f"{hw} ({', '.join(nodes)})")
|
||||
return f"{label}: " + "; ".join(parts) + "."
|
||||
|
||||
|
||||
def _lexicon_context(summary: dict[str, Any]) -> str: # noqa: C901
|
||||
if not isinstance(summary, dict):
|
||||
return ""
|
||||
lexicon = summary.get("lexicon")
|
||||
if not isinstance(lexicon, dict):
|
||||
return ""
|
||||
terms = lexicon.get("terms")
|
||||
aliases = lexicon.get("aliases")
|
||||
lines: list[str] = []
|
||||
if isinstance(terms, list):
|
||||
for entry in terms[:8]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
term = entry.get("term")
|
||||
meaning = entry.get("meaning")
|
||||
if term and meaning:
|
||||
lines.append(f"{term}: {meaning}")
|
||||
if isinstance(aliases, dict):
|
||||
for key, value in list(aliases.items())[:6]:
|
||||
if key and value:
|
||||
lines.append(f"alias {key} -> {value}")
|
||||
if not lines:
|
||||
return ""
|
||||
return "Lexicon:\n" + "\n".join(lines)
|
||||
|
||||
|
||||
def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
|
||||
raw = text.strip()
|
||||
match = re.search(r"\{.*\}", raw, flags=re.S)
|
||||
if match:
|
||||
return parse_json(match.group(0), fallback=fallback)
|
||||
return parse_json(raw, fallback=fallback)
|
||||
|
||||
|
||||
def _parse_json_list(text: str) -> list[dict[str, Any]]:
|
||||
raw = text.strip()
|
||||
match = re.search(r"\[.*\]", raw, flags=re.S)
|
||||
data = parse_json(match.group(0), fallback={}) if match else parse_json(raw, fallback={})
|
||||
if isinstance(data, list):
|
||||
return [entry for entry in data if isinstance(entry, dict)]
|
||||
return []
|
||||
|
||||
|
||||
def _scores_from_json(data: dict[str, Any]) -> AnswerScores:
|
||||
return AnswerScores(
|
||||
confidence=_coerce_int(data.get("confidence"), 60),
|
||||
relevance=_coerce_int(data.get("relevance"), 60),
|
||||
satisfaction=_coerce_int(data.get("satisfaction"), 60),
|
||||
hallucination_risk=str(data.get("hallucination_risk") or "medium"),
|
||||
)
|
||||
|
||||
|
||||
def _coerce_int(value: Any, default: int) -> int:
|
||||
try:
|
||||
return int(float(value))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _default_scores() -> AnswerScores:
|
||||
return AnswerScores(confidence=60, relevance=60, satisfaction=60, hallucination_risk="medium")
|
||||
|
||||
|
||||
def _style_hint(classify: dict[str, Any]) -> str:
|
||||
style = (classify.get("answer_style") or "").strip().lower()
|
||||
qtype = (classify.get("question_type") or "").strip().lower()
|
||||
if style == "insightful" or qtype in {"open_ended", "planning"}:
|
||||
return "insightful"
|
||||
return "direct"
|
||||
|
||||
|
||||
def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
|
||||
if not reply:
|
||||
return False
|
||||
lowered = reply.lower()
|
||||
missing_markers = (
|
||||
"don't have",
|
||||
"do not have",
|
||||
"don't know",
|
||||
"cannot",
|
||||
"can't",
|
||||
"need to",
|
||||
"would need",
|
||||
"does not provide",
|
||||
"does not mention",
|
||||
"not mention",
|
||||
"not provided",
|
||||
"not in context",
|
||||
"not referenced",
|
||||
"missing",
|
||||
"no specific",
|
||||
"no information",
|
||||
)
|
||||
if classify.get("needs_snapshot") and any(marker in lowered for marker in missing_markers):
|
||||
return True
|
||||
return classify.get("question_type") in {"metric", "diagnostic"} and not re.search(r"\d", reply)
|
||||
|
||||
|
||||
def _should_use_insight_guard(classify: dict[str, Any]) -> bool:
|
||||
style = (classify.get("answer_style") or "").strip().lower()
|
||||
qtype = (classify.get("question_type") or "").strip().lower()
|
||||
return style == "insightful" or qtype in {"open_ended", "planning"}
|
||||
|
||||
|
||||
async def _apply_insight_guard(inputs: InsightGuardInput) -> str:
|
||||
if not inputs.reply or not _should_use_insight_guard(inputs.classify):
|
||||
return inputs.reply
|
||||
guard_prompt = prompts.INSIGHT_GUARD_PROMPT.format(question=inputs.question, answer=inputs.reply)
|
||||
guard_raw = await inputs.call_llm(
|
||||
prompts.INSIGHT_GUARD_SYSTEM,
|
||||
guard_prompt,
|
||||
context=inputs.context,
|
||||
model=inputs.plan.fast_model,
|
||||
tag="insight_guard",
|
||||
)
|
||||
guard = _parse_json_block(guard_raw, fallback={})
|
||||
if guard.get("ok") is True:
|
||||
return inputs.reply
|
||||
fix_prompt = prompts.INSIGHT_FIX_PROMPT.format(question=inputs.question, answer=inputs.reply)
|
||||
if inputs.facts:
|
||||
fix_prompt = fix_prompt + "\nFacts:\n" + "\n".join(inputs.facts[:6])
|
||||
return await inputs.call_llm(
|
||||
prompts.INSIGHT_FIX_SYSTEM,
|
||||
fix_prompt,
|
||||
context=inputs.context,
|
||||
model=inputs.plan.model,
|
||||
tag="insight_fix",
|
||||
)
|
||||
|
||||
|
||||
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||
276
atlasbot/engine/answerer/post_ext.py
Normal file
276
atlasbot/engine/answerer/post_ext.py
Normal file
@ -0,0 +1,276 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import difflib
|
||||
import re
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from ._base import *
|
||||
|
||||
|
||||
def _reply_matches_metric_facts(reply: str, metric_facts: list[str], tokens: list[str] | set[str] | None = None) -> bool:
|
||||
if not reply or not metric_facts:
|
||||
return True
|
||||
reply_numbers = set(re.findall(r"\d+(?:\\.\d+)?", reply))
|
||||
if not reply_numbers:
|
||||
return False
|
||||
fact_numbers: set[str] = set()
|
||||
value_pattern = re.compile(r"(?:>=|<=|=|:)\s*(\d+(?:\.\d+)?)")
|
||||
filtered = metric_facts
|
||||
if tokens:
|
||||
token_set = {str(tok).lower() for tok in tokens if tok}
|
||||
focused = []
|
||||
for line in metric_facts:
|
||||
key = line.split(":", 1)[0].lower()
|
||||
if any(tok in key for tok in token_set):
|
||||
focused.append(line)
|
||||
if focused:
|
||||
filtered = focused
|
||||
for line in filtered:
|
||||
for match in value_pattern.findall(line):
|
||||
fact_numbers.add(match)
|
||||
if not fact_numbers:
|
||||
return False
|
||||
return bool(reply_numbers & fact_numbers)
|
||||
|
||||
|
||||
def _needs_dedup(reply: str) -> bool:
|
||||
if not reply:
|
||||
return False
|
||||
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", reply) if s.strip()]
|
||||
if len(sentences) < DEDUP_MIN_SENTENCES:
|
||||
return False
|
||||
seen = set()
|
||||
for sent in sentences:
|
||||
norm = re.sub(r"\s+", " ", sent.lower())
|
||||
if norm in seen:
|
||||
return True
|
||||
seen.add(norm)
|
||||
return False
|
||||
|
||||
|
||||
def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> bool:
|
||||
if not reply:
|
||||
return False
|
||||
q_lower = (question or "").lower()
|
||||
if classify.get("question_type") not in {"metric", "diagnostic"} and not re.search(r"\b(how many|list|count)\b", q_lower):
|
||||
return False
|
||||
missing_markers = (
|
||||
"does not provide",
|
||||
"does not specify",
|
||||
"not available",
|
||||
"not provided",
|
||||
"cannot determine",
|
||||
"don't have",
|
||||
"do not have",
|
||||
"insufficient",
|
||||
"no data",
|
||||
)
|
||||
if any(marker in reply.lower() for marker in missing_markers):
|
||||
return True
|
||||
if reply.count(".") <= 1:
|
||||
return False
|
||||
extra_markers = ("for more", "if you need", "additional", "based on")
|
||||
return any(marker in reply.lower() for marker in extra_markers)
|
||||
|
||||
|
||||
def _extract_keywords(raw_question: str, normalized: str, sub_questions: list[str], keywords: list[Any] | None) -> list[str]:
|
||||
stopwords = {
|
||||
"the",
|
||||
"and",
|
||||
"for",
|
||||
"with",
|
||||
"that",
|
||||
"this",
|
||||
"what",
|
||||
"which",
|
||||
"when",
|
||||
"where",
|
||||
"who",
|
||||
"why",
|
||||
"how",
|
||||
"tell",
|
||||
"show",
|
||||
"list",
|
||||
"give",
|
||||
"about",
|
||||
"right",
|
||||
"now",
|
||||
}
|
||||
tokens: list[str] = []
|
||||
for source in [raw_question, normalized, *sub_questions]:
|
||||
for part in re.split(r"[^a-zA-Z0-9_-]+", source.lower()):
|
||||
if len(part) < TOKEN_MIN_LEN or part in stopwords:
|
||||
continue
|
||||
tokens.append(part)
|
||||
if keywords:
|
||||
for kw in keywords:
|
||||
if isinstance(kw, str):
|
||||
part = kw.strip().lower()
|
||||
if part and part not in stopwords and part not in tokens:
|
||||
tokens.append(part)
|
||||
return list(dict.fromkeys(tokens))[:12]
|
||||
|
||||
|
||||
def _allowed_nodes(summary: dict[str, Any]) -> list[str]:
|
||||
hardware = summary.get("hardware_by_node") if isinstance(summary.get("hardware_by_node"), dict) else {}
|
||||
if hardware:
|
||||
return sorted([node for node in hardware if isinstance(node, str)])
|
||||
return []
|
||||
|
||||
|
||||
def _allowed_namespaces(summary: dict[str, Any]) -> list[str]:
|
||||
namespaces: list[str] = []
|
||||
for entry in summary.get("namespace_pods") or []:
|
||||
if isinstance(entry, dict):
|
||||
name = entry.get("namespace")
|
||||
if name:
|
||||
namespaces.append(str(name))
|
||||
return sorted(set(namespaces))
|
||||
|
||||
|
||||
def _find_unknown_nodes(reply: str, allowed: list[str]) -> list[str]:
|
||||
if not reply or not allowed:
|
||||
return []
|
||||
pattern = re.compile(r"\b(titan-[0-9a-z]+|node-?\d+)\b", re.IGNORECASE)
|
||||
found = {m.group(1) for m in pattern.finditer(reply)}
|
||||
if not found:
|
||||
return []
|
||||
allowed_set = {a.lower() for a in allowed}
|
||||
return sorted({item for item in found if item.lower() not in allowed_set})
|
||||
|
||||
|
||||
def _find_unknown_namespaces(reply: str, allowed: list[str]) -> list[str]:
|
||||
if not reply or not allowed:
|
||||
return []
|
||||
pattern = re.compile(r"\bnamespace\s+([a-z0-9-]+)\b", re.IGNORECASE)
|
||||
found = {m.group(1) for m in pattern.finditer(reply)}
|
||||
if not found:
|
||||
return []
|
||||
allowed_set = {a.lower() for a in allowed}
|
||||
return sorted({item for item in found if item.lower() not in allowed_set})
|
||||
|
||||
|
||||
def _needs_runbook_fix(reply: str, allowed: list[str]) -> bool:
|
||||
if not reply or not allowed:
|
||||
return False
|
||||
paths = set(re.findall(r"runbooks/[A-Za-z0-9._-]+", reply))
|
||||
if not paths:
|
||||
return False
|
||||
allowed_set = {p.lower() for p in allowed}
|
||||
return any(path.lower() not in allowed_set for path in paths)
|
||||
|
||||
|
||||
def _needs_runbook_reference(question: str, allowed: list[str], reply: str) -> bool:
|
||||
if not allowed or not question:
|
||||
return False
|
||||
lowered = question.lower()
|
||||
cues = ("runbook", "checklist", "documented", "documentation", "where", "guide")
|
||||
if not any(cue in lowered for cue in cues):
|
||||
return False
|
||||
if not reply:
|
||||
return True
|
||||
for token in re.findall(r"runbooks/[A-Za-z0-9._-]+", reply):
|
||||
if token.lower() in {p.lower() for p in allowed}:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _best_runbook_match(candidate: str, allowed: list[str]) -> str | None:
|
||||
if not candidate or not allowed:
|
||||
return None
|
||||
best = None
|
||||
best_score = 0.0
|
||||
for path in allowed:
|
||||
score = difflib.SequenceMatcher(a=candidate.lower(), b=path.lower()).ratio()
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = path
|
||||
return best if best_score >= RUNBOOK_SIMILARITY_THRESHOLD else None
|
||||
|
||||
|
||||
def _resolve_path(data: Any, path: str) -> Any | None:
|
||||
if path.startswith("line:"):
|
||||
return path.split("line:", 1)[1].strip()
|
||||
cursor = data
|
||||
for part in re.split(r"\.(?![^\[]*\])", path):
|
||||
if not part:
|
||||
continue
|
||||
match = re.match(r"^(\w+)(?:\[(\d+)\])?$", part)
|
||||
if not match:
|
||||
return None
|
||||
key = match.group(1)
|
||||
index = match.group(2)
|
||||
if isinstance(cursor, dict):
|
||||
cursor = cursor.get(key)
|
||||
else:
|
||||
return None
|
||||
if index is not None:
|
||||
idx = int(index)
|
||||
if isinstance(cursor, list) and 0 <= idx < len(cursor):
|
||||
cursor = cursor[idx]
|
||||
else:
|
||||
return None
|
||||
return cursor
|
||||
|
||||
|
||||
def _snapshot_id(summary: dict[str, Any]) -> str | None:
|
||||
if not summary:
|
||||
return None
|
||||
for key in ("generated_at", "snapshot_ts", "snapshot_id"):
|
||||
value = summary.get(key)
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def _claims_to_payload(claims: list[ClaimItem]) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for claim in claims:
|
||||
evidence = []
|
||||
for ev in claim.evidence:
|
||||
evidence.append(
|
||||
{
|
||||
"path": ev.path,
|
||||
"reason": ev.reason,
|
||||
"value_at_claim": ev.value_at_claim,
|
||||
}
|
||||
)
|
||||
output.append({"id": claim.id, "claim": claim.claim, "evidence": evidence})
|
||||
return output
|
||||
|
||||
|
||||
def _state_from_payload(payload: dict[str, Any] | None) -> ConversationState | None:
|
||||
if not payload:
|
||||
return None
|
||||
claims_raw = payload.get("claims") if isinstance(payload, dict) else None
|
||||
claims: list[ClaimItem] = []
|
||||
if isinstance(claims_raw, list):
|
||||
for entry in claims_raw:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
claim_text = str(entry.get("claim") or "").strip()
|
||||
claim_id = str(entry.get("id") or "").strip()
|
||||
if not claim_text or not claim_id:
|
||||
continue
|
||||
evidence_items: list[EvidenceItem] = []
|
||||
for ev in entry.get("evidence") or []:
|
||||
if not isinstance(ev, dict):
|
||||
continue
|
||||
path = str(ev.get("path") or "").strip()
|
||||
if not path:
|
||||
continue
|
||||
reason = str(ev.get("reason") or "").strip()
|
||||
value_at_claim = ev.get("value_at_claim")
|
||||
evidence_items.append(EvidenceItem(path=path, reason=reason, value_at_claim=value_at_claim))
|
||||
if evidence_items:
|
||||
claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
|
||||
return ConversationState(
|
||||
updated_at=float(payload.get("updated_at") or time.monotonic()),
|
||||
claims=claims,
|
||||
snapshot_id=payload.get("snapshot_id"),
|
||||
snapshot=payload.get("snapshot"),
|
||||
)
|
||||
|
||||
|
||||
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||
344
atlasbot/engine/answerer/retrieval.py
Normal file
344
atlasbot/engine/answerer/retrieval.py
Normal file
@ -0,0 +1,344 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from collections.abc import Awaitable
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from atlasbot.llm import prompts
|
||||
from atlasbot.llm.client import parse_json
|
||||
|
||||
from ._base import *
|
||||
from .post_ext import _extract_keywords
|
||||
|
||||
|
||||
def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
|
||||
raw = text.strip()
|
||||
match = re.search(r"\{.*\}", raw, flags=re.S)
|
||||
if match:
|
||||
return parse_json(match.group(0), fallback=fallback)
|
||||
return parse_json(raw, fallback=fallback)
|
||||
|
||||
|
||||
async def _select_metric_chunks(
|
||||
call_llm: Callable[..., Awaitable[str]],
|
||||
ctx: dict[str, Any],
|
||||
chunks: list[dict[str, Any]],
|
||||
plan: ModePlan,
|
||||
) -> tuple[list[str], list[str]]:
|
||||
summary_lines, question, sub_questions, keywords, token_set = _metric_ctx_values(ctx)
|
||||
if not summary_lines or not chunks:
|
||||
return [], []
|
||||
keys = _extract_metric_keys(summary_lines)
|
||||
if not keys:
|
||||
return [], []
|
||||
max_keys = max(4, plan.max_subquestions * 2)
|
||||
candidate_keys = _filter_metric_keys(keys, token_set)
|
||||
available_keys = candidate_keys or keys
|
||||
prompt = prompts.METRIC_KEYS_PROMPT.format(available="\n".join(available_keys), max_keys=max_keys)
|
||||
raw = await call_llm(
|
||||
prompts.METRIC_KEYS_SYSTEM,
|
||||
prompt + "\nQuestion: " + str(question) + "\nSubQuestions:\n" + "\n".join([str(item) for item in sub_questions]),
|
||||
context="Keywords:\n" + ", ".join([str(item) for item in keywords if item]),
|
||||
model=plan.fast_model,
|
||||
tag="metric_keys",
|
||||
)
|
||||
selected = _parse_key_list(raw, available_keys, max_keys)
|
||||
if candidate_keys:
|
||||
selected = _merge_metric_keys(selected, candidate_keys, max_keys)
|
||||
if selected and candidate_keys and not _metric_key_overlap(selected, token_set):
|
||||
selected = candidate_keys[:max_keys]
|
||||
if not selected and candidate_keys:
|
||||
selected = candidate_keys[:max_keys]
|
||||
if available_keys:
|
||||
missing = await _validate_metric_keys(
|
||||
call_llm,
|
||||
{
|
||||
"question": question,
|
||||
"sub_questions": sub_questions,
|
||||
"selected": selected,
|
||||
},
|
||||
available_keys,
|
||||
plan,
|
||||
)
|
||||
if missing:
|
||||
selected = _merge_metric_keys(selected, missing, max_keys)
|
||||
if not selected:
|
||||
return [], []
|
||||
ids = _chunk_ids_for_keys(chunks, selected)
|
||||
return selected, ids
|
||||
|
||||
|
||||
async def _validate_metric_keys(
|
||||
call_llm: Callable[..., Awaitable[str]],
|
||||
ctx: dict[str, Any],
|
||||
available: list[str],
|
||||
plan: ModePlan,
|
||||
) -> list[str]:
|
||||
if not available:
|
||||
return []
|
||||
question = str(ctx.get("question") or "")
|
||||
sub_questions = ctx.get("sub_questions") if isinstance(ctx.get("sub_questions"), list) else []
|
||||
selected = ctx.get("selected") if isinstance(ctx.get("selected"), list) else []
|
||||
cap = max(12, plan.max_subquestions * 4)
|
||||
available_list = available[:cap]
|
||||
prompt = prompts.METRIC_KEYS_VALIDATE_PROMPT.format(
|
||||
question=question,
|
||||
sub_questions=json.dumps(sub_questions),
|
||||
selected=json.dumps(selected),
|
||||
available="\n".join(available_list),
|
||||
)
|
||||
raw = await call_llm(
|
||||
prompts.METRIC_KEYS_VALIDATE_SYSTEM,
|
||||
prompt,
|
||||
model=plan.fast_model,
|
||||
tag="metric_keys_validate",
|
||||
)
|
||||
parsed = _parse_json_block(raw, fallback={})
|
||||
items = parsed.get("missing") if isinstance(parsed, dict) else []
|
||||
if not isinstance(items, list):
|
||||
return []
|
||||
available_set = set(available_list)
|
||||
out: list[str] = []
|
||||
for item in items:
|
||||
if isinstance(item, str) and item in available_set and item not in out:
|
||||
out.append(item)
|
||||
return out
|
||||
|
||||
|
||||
async def _gather_limited(coros: list[Awaitable[Any]], limit: int) -> list[Any]:
|
||||
if not coros:
|
||||
return []
|
||||
semaphore = asyncio.Semaphore(max(1, limit))
|
||||
|
||||
async def runner(coro: Awaitable[Any]) -> Any:
|
||||
async with semaphore:
|
||||
return await coro
|
||||
|
||||
return await asyncio.gather(*(runner(coro) for coro in coros))
|
||||
|
||||
|
||||
def _metric_ctx_values(ctx: dict[str, Any]) -> tuple[list[str], str, list[str], list[str], set[str]]:
|
||||
summary_lines = ctx.get("summary_lines") if isinstance(ctx, dict) else None
|
||||
if not isinstance(summary_lines, list):
|
||||
return [], "", [], [], set()
|
||||
question = ctx.get("question") if isinstance(ctx, dict) else ""
|
||||
sub_questions = ctx.get("sub_questions") if isinstance(ctx.get("sub_questions"), list) else []
|
||||
keywords = ctx.get("keywords") if isinstance(ctx.get("keywords"), list) else []
|
||||
keyword_tokens = ctx.get("keyword_tokens") if isinstance(ctx.get("keyword_tokens"), list) else []
|
||||
token_set = {str(token).lower() for token in keyword_tokens if token}
|
||||
token_set |= {token.lower() for token in _extract_keywords(str(question), str(question), sub_questions=sub_questions, keywords=keywords)}
|
||||
token_set = _token_variants(token_set)
|
||||
return summary_lines, str(question), sub_questions, keywords, token_set
|
||||
|
||||
|
||||
def _extract_metric_keys(lines: list[str]) -> list[str]:
|
||||
keys: list[str] = []
|
||||
for line in lines:
|
||||
if ":" not in line:
|
||||
continue
|
||||
key = line.split(":", 1)[0].strip()
|
||||
if not key or " " in key:
|
||||
continue
|
||||
if key not in keys:
|
||||
keys.append(key)
|
||||
return keys
|
||||
|
||||
|
||||
def _token_variants(tokens: set[str]) -> set[str]:
|
||||
if not tokens:
|
||||
return set()
|
||||
variants = set(tokens)
|
||||
for token in list(tokens):
|
||||
if len(token) <= TOKEN_MIN_LEN:
|
||||
continue
|
||||
if token.endswith("ies") and len(token) > TOKEN_MIN_LEN:
|
||||
variants.add(token[:-3] + "y")
|
||||
if token.endswith("es") and len(token) > TOKEN_MIN_LEN:
|
||||
variants.add(token[:-2])
|
||||
if token.endswith("s") and len(token) > TOKEN_MIN_LEN:
|
||||
variants.add(token[:-1])
|
||||
return variants
|
||||
|
||||
|
||||
def _parse_key_list(raw: str, allowed: list[str], max_keys: int) -> list[str]:
|
||||
parsed = _parse_json_block(raw, fallback={})
|
||||
if isinstance(parsed, list):
|
||||
items = parsed
|
||||
else:
|
||||
items = parsed.get("keys") if isinstance(parsed, dict) else []
|
||||
if not isinstance(items, list):
|
||||
return []
|
||||
allowed_set = set(allowed)
|
||||
out: list[str] = []
|
||||
for item in items:
|
||||
if not isinstance(item, str):
|
||||
continue
|
||||
if item in allowed_set and item not in out:
|
||||
out.append(item)
|
||||
if len(out) >= max_keys:
|
||||
break
|
||||
return out
|
||||
|
||||
|
||||
def _chunk_ids_for_keys(chunks: list[dict[str, Any]], keys: list[str]) -> list[str]:
|
||||
if not keys:
|
||||
return []
|
||||
ids: list[str] = []
|
||||
key_set = {f"{key}:" for key in keys}
|
||||
for chunk in chunks:
|
||||
text = str(chunk.get("text") or "")
|
||||
if not text:
|
||||
continue
|
||||
for line in text.splitlines():
|
||||
for key in key_set:
|
||||
if line.startswith(key):
|
||||
cid = chunk.get("id")
|
||||
if cid and cid not in ids:
|
||||
ids.append(cid)
|
||||
break
|
||||
return ids
|
||||
|
||||
|
||||
def _filter_metric_keys(keys: list[str], tokens: set[str]) -> list[str]:
|
||||
if not keys or not tokens:
|
||||
return []
|
||||
lowered_tokens = {token.lower() for token in tokens if token and len(token) >= TOKEN_MIN_LEN}
|
||||
ranked: list[tuple[int, str]] = []
|
||||
for key in keys:
|
||||
parts = [part for part in re.split(r"[_\W]+", key.lower()) if part]
|
||||
if not parts:
|
||||
continue
|
||||
hits = len(set(parts) & lowered_tokens)
|
||||
if hits:
|
||||
ranked.append((hits, key))
|
||||
ranked.sort(key=lambda item: (-item[0], item[1]))
|
||||
return [item[1] for item in ranked]
|
||||
|
||||
|
||||
def _metric_key_overlap(keys: list[str], tokens: set[str]) -> bool:
|
||||
if not keys or not tokens:
|
||||
return False
|
||||
lowered_tokens = {token.lower() for token in tokens if token and len(token) >= TOKEN_MIN_LEN}
|
||||
for key in keys:
|
||||
parts = [part for part in re.split(r"[_\W]+", key.lower()) if part]
|
||||
if set(parts) & lowered_tokens:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _lines_for_metric_keys(lines: list[str], keys: list[str], max_lines: int = 0) -> list[str]:
|
||||
if not lines or not keys:
|
||||
return []
|
||||
prefixes = {f"{key}:" for key in keys}
|
||||
selected: list[str] = []
|
||||
for line in lines:
|
||||
for prefix in prefixes:
|
||||
if prefix in line:
|
||||
selected.append(line)
|
||||
break
|
||||
if max_lines and len(selected) >= max_lines:
|
||||
break
|
||||
return selected
|
||||
|
||||
|
||||
def _merge_metric_keys(current: list[str], candidates: list[str], max_keys: int) -> list[str]:
|
||||
merged: list[str] = []
|
||||
seen = set()
|
||||
for key in current:
|
||||
if key and key not in seen:
|
||||
merged.append(key)
|
||||
seen.add(key)
|
||||
for key in candidates:
|
||||
if key and key not in seen:
|
||||
merged.append(key)
|
||||
seen.add(key)
|
||||
if len(merged) >= max_keys:
|
||||
break
|
||||
return merged[:max_keys]
|
||||
|
||||
|
||||
def _merge_fact_lines(primary: list[str], fallback: list[str]) -> list[str]:
|
||||
seen = set()
|
||||
merged: list[str] = []
|
||||
for line in primary + fallback:
|
||||
if line in seen:
|
||||
continue
|
||||
seen.add(line)
|
||||
merged.append(line)
|
||||
return merged
|
||||
|
||||
|
||||
def _expand_hottest_line(line: str) -> list[str]:
|
||||
if not line:
|
||||
return []
|
||||
if not line.lower().startswith("hottest:"):
|
||||
return []
|
||||
expanded: list[str] = []
|
||||
payload = line.split("hottest:", 1)[1]
|
||||
for part in payload.split(";"):
|
||||
part = part.strip()
|
||||
if not part or "=" not in part:
|
||||
continue
|
||||
metric, rest = part.split("=", 1)
|
||||
metric = metric.strip()
|
||||
match = re.search(r"(?P<node>[^\s\[]+).*\((?P<value>[^)]+)\)", rest)
|
||||
if not match:
|
||||
continue
|
||||
node = match.group("node").strip()
|
||||
value = match.group("value").strip()
|
||||
class_match = re.search(r"\[(?P<class>[^\]]+)\]", rest)
|
||||
node_class = class_match.group("class").strip() if class_match else ""
|
||||
if node_class:
|
||||
expanded.append(f"hottest_{metric}_node: {node} [{node_class}] ({value})")
|
||||
else:
|
||||
expanded.append(f"hottest_{metric}_node: {node} ({value})")
|
||||
return expanded
|
||||
|
||||
|
||||
def _has_token(text: str, token: str) -> bool:
|
||||
if not text or not token:
|
||||
return False
|
||||
if token == "io":
|
||||
return "i/o" in text or re.search(r"\bio\b", text) is not None
|
||||
return re.search(rf"\b{re.escape(token)}\b", text) is not None
|
||||
|
||||
|
||||
def _hotspot_evidence(summary: dict[str, Any]) -> list[str]:
|
||||
hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
|
||||
if not hottest:
|
||||
return []
|
||||
hardware_by_node = summary.get("hardware_by_node") if isinstance(summary.get("hardware_by_node"), dict) else {}
|
||||
node_pods_top = summary.get("node_pods_top") if isinstance(summary.get("node_pods_top"), list) else []
|
||||
ns_map = {}
|
||||
for item in node_pods_top:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
node = item.get("node")
|
||||
namespaces_top = item.get("namespaces_top") if isinstance(item.get("namespaces_top"), list) else []
|
||||
ns_map[node] = namespaces_top
|
||||
lines: list[str] = []
|
||||
for metric, info in hottest.items():
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
node = info.get("node")
|
||||
value = info.get("value")
|
||||
if not node:
|
||||
continue
|
||||
node_class = hardware_by_node.get(node)
|
||||
ns_parts = []
|
||||
for entry in ns_map.get(node, [])[:3]:
|
||||
if isinstance(entry, (list, tuple)) and len(entry) >= NS_ENTRY_MIN_LEN:
|
||||
ns_parts.append(f"{entry[0]}={entry[1]}")
|
||||
ns_text = ", ".join(ns_parts)
|
||||
value_text = f"{value:.2f}" if isinstance(value, (int, float)) else str(value)
|
||||
line = f"hotspot.{metric}: node={node} class={node_class or 'unknown'} value={value_text}"
|
||||
if ns_text:
|
||||
line += f" namespaces_top={ns_text}"
|
||||
lines.append(line)
|
||||
return lines
|
||||
|
||||
|
||||
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||
197
atlasbot/engine/answerer/retrieval_ext.py
Normal file
197
atlasbot/engine/answerer/retrieval_ext.py
Normal file
@ -0,0 +1,197 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from atlasbot.llm import prompts
|
||||
from atlasbot.llm.client import parse_json
|
||||
from ._base import *
|
||||
|
||||
|
||||
def _parse_json_block(text: str, *, fallback: dict[str, Any]) -> dict[str, Any]:
|
||||
raw = text.strip()
|
||||
match = re.search(r"\{.*\}", raw, flags=re.S)
|
||||
if match:
|
||||
return parse_json(match.group(0), fallback=fallback)
|
||||
return parse_json(raw, fallback=fallback)
|
||||
|
||||
|
||||
def _metric_key_tokens(summary_lines: list[str]) -> set[str]:
|
||||
tokens: set[str] = set()
|
||||
for line in summary_lines:
|
||||
if not isinstance(line, str) or ":" not in line:
|
||||
continue
|
||||
key = line.split(":", 1)[0].strip().lower()
|
||||
if not key:
|
||||
continue
|
||||
tokens.add(key)
|
||||
for part in re.split(r"[_\s]+", key):
|
||||
if part:
|
||||
tokens.add(part)
|
||||
return tokens
|
||||
|
||||
|
||||
async def _select_best_candidate(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, tag: str) -> int:
|
||||
if len(candidates) <= 1:
|
||||
return 0
|
||||
prompt = (
|
||||
prompts.CANDIDATE_SELECT_PROMPT
|
||||
+ "\nQuestion: "
|
||||
+ question
|
||||
+ "\nCandidates:\n"
|
||||
+ "\n".join([f"{idx+1}) {cand}" for idx, cand in enumerate(candidates)])
|
||||
)
|
||||
raw = await call_llm(prompts.CANDIDATE_SELECT_SYSTEM, prompt, model=plan.model, tag=tag)
|
||||
data = _parse_json_block(raw, fallback={})
|
||||
best = data.get("best") if isinstance(data, dict) else None
|
||||
if isinstance(best, int) and 1 <= best <= len(candidates):
|
||||
return best - 1
|
||||
return 0
|
||||
|
||||
|
||||
def _dedupe_lines(lines: list[str], limit: int | None = None) -> list[str]:
|
||||
seen: set[str] = set()
|
||||
cleaned: list[str] = []
|
||||
for line in lines:
|
||||
value = (line or "").strip()
|
||||
if not value or value in seen:
|
||||
continue
|
||||
if value.lower().startswith("lexicon_") or value.lower().startswith("units:"):
|
||||
continue
|
||||
cleaned.append(value)
|
||||
seen.add(value)
|
||||
if limit and len(cleaned) >= limit:
|
||||
break
|
||||
return cleaned
|
||||
|
||||
|
||||
def _collect_fact_candidates(selected: list[dict[str, Any]], limit: int) -> list[str]:
|
||||
lines: list[str] = []
|
||||
for chunk in selected:
|
||||
text = chunk.get("text") if isinstance(chunk, dict) else None
|
||||
if not isinstance(text, str):
|
||||
continue
|
||||
lines.extend([line for line in text.splitlines() if line.strip()])
|
||||
return _dedupe_lines(lines, limit=limit)
|
||||
|
||||
|
||||
async def _select_best_list(call_llm: Callable[..., Any], question: str, candidates: list[list[str]], plan: ModePlan, tag: str) -> list[str]:
|
||||
if not candidates:
|
||||
return []
|
||||
if len(candidates) == 1:
|
||||
return candidates[0]
|
||||
render = ["; ".join(items) for items in candidates]
|
||||
best_idx = await _select_best_candidate(call_llm, question, render, plan, tag)
|
||||
chosen = candidates[best_idx] if 0 <= best_idx < len(candidates) else candidates[0]
|
||||
if not chosen:
|
||||
merged: list[str] = []
|
||||
for entry in candidates:
|
||||
for item in entry:
|
||||
if item not in merged:
|
||||
merged.append(item)
|
||||
chosen = merged
|
||||
return chosen
|
||||
|
||||
|
||||
async def _extract_fact_types(call_llm: Callable[..., Any], question: str, keywords: list[str], plan: ModePlan) -> list[str]:
|
||||
prompt = prompts.FACT_TYPES_PROMPT + "\nQuestion: " + question
|
||||
if keywords:
|
||||
prompt += "\nKeywords: " + ", ".join(keywords)
|
||||
candidates: list[list[str]] = []
|
||||
attempts = max(plan.metric_retries, 1)
|
||||
for _ in range(attempts):
|
||||
raw = await call_llm(prompts.FACT_TYPES_SYSTEM, prompt, model=plan.fast_model, tag="fact_types")
|
||||
data = _parse_json_block(raw, fallback={})
|
||||
items = data.get("fact_types") if isinstance(data, dict) else None
|
||||
if not isinstance(items, list):
|
||||
continue
|
||||
cleaned = _dedupe_lines([str(item) for item in items if isinstance(item, (str, int, float))], limit=10)
|
||||
if cleaned:
|
||||
candidates.append(cleaned)
|
||||
chosen = await _select_best_list(call_llm, question, candidates, plan, "fact_types_select")
|
||||
return chosen[:10]
|
||||
|
||||
|
||||
async def _derive_signals(call_llm: Callable[..., Any], question: str, fact_types: list[str], plan: ModePlan) -> list[str]:
|
||||
if not fact_types:
|
||||
return []
|
||||
prompt = prompts.SIGNAL_PROMPT.format(question=question, fact_types="; ".join(fact_types))
|
||||
candidates: list[list[str]] = []
|
||||
attempts = max(plan.metric_retries, 1)
|
||||
for _ in range(attempts):
|
||||
raw = await call_llm(prompts.SIGNAL_SYSTEM, prompt, model=plan.fast_model, tag="signals")
|
||||
data = _parse_json_block(raw, fallback={})
|
||||
items = data.get("signals") if isinstance(data, dict) else None
|
||||
if not isinstance(items, list):
|
||||
continue
|
||||
cleaned = _dedupe_lines([str(item) for item in items if isinstance(item, (str, int, float))], limit=12)
|
||||
if cleaned:
|
||||
candidates.append(cleaned)
|
||||
chosen = await _select_best_list(call_llm, question, candidates, plan, "signals_select")
|
||||
return chosen[:12]
|
||||
|
||||
|
||||
async def _scan_chunk_for_signals(call_llm: Callable[..., Any], question: str, signals: list[str], chunk_lines: list[str], plan: ModePlan) -> list[str]:
|
||||
if not signals or not chunk_lines:
|
||||
return []
|
||||
prompt = prompts.CHUNK_SCAN_PROMPT.format(
|
||||
signals="; ".join(signals),
|
||||
lines="\n".join(chunk_lines),
|
||||
)
|
||||
attempts = max(1, min(plan.metric_retries, 2))
|
||||
candidates: list[list[str]] = []
|
||||
for _ in range(attempts):
|
||||
raw = await call_llm(prompts.CHUNK_SCAN_SYSTEM, prompt, model=plan.fast_model, tag="chunk_scan")
|
||||
data = _parse_json_block(raw, fallback={})
|
||||
items = data.get("lines") if isinstance(data, dict) else None
|
||||
if not isinstance(items, list):
|
||||
continue
|
||||
cleaned = [line for line in chunk_lines if line in items]
|
||||
cleaned = _dedupe_lines(cleaned, limit=15)
|
||||
if cleaned:
|
||||
candidates.append(cleaned)
|
||||
chosen = await _select_best_list(call_llm, question, candidates, plan, "chunk_scan_select")
|
||||
return chosen[:15]
|
||||
|
||||
|
||||
async def _prune_metric_candidates(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, attempts: int) -> list[str]:
|
||||
if not candidates:
|
||||
return []
|
||||
prompt = prompts.FACT_PRUNE_PROMPT.format(question=question, candidates="\n".join(candidates), max_lines=6)
|
||||
picks: list[list[str]] = []
|
||||
for _ in range(max(attempts, 1)):
|
||||
raw = await call_llm(prompts.FACT_PRUNE_SYSTEM, prompt, model=plan.fast_model, tag="fact_prune")
|
||||
data = _parse_json_block(raw, fallback={})
|
||||
items = data.get("lines") if isinstance(data, dict) else None
|
||||
if not isinstance(items, list):
|
||||
continue
|
||||
cleaned = [line for line in candidates if line in items]
|
||||
cleaned = _dedupe_lines(cleaned, limit=6)
|
||||
if cleaned:
|
||||
picks.append(cleaned)
|
||||
chosen = await _select_best_list(call_llm, question, picks, plan, "fact_prune_select")
|
||||
return chosen[:6]
|
||||
|
||||
|
||||
async def _select_fact_lines(call_llm: Callable[..., Any], question: str, candidates: list[str], plan: ModePlan, max_lines: int) -> list[str]:
|
||||
if not candidates:
|
||||
return []
|
||||
prompt = prompts.FACT_PRUNE_PROMPT.format(question=question, candidates="\n".join(candidates), max_lines=max_lines)
|
||||
picks: list[list[str]] = []
|
||||
attempts = max(plan.metric_retries, 1)
|
||||
for _ in range(attempts):
|
||||
raw = await call_llm(prompts.FACT_PRUNE_SYSTEM, prompt, model=plan.fast_model, tag="fact_select")
|
||||
data = _parse_json_block(raw, fallback={})
|
||||
items = data.get("lines") if isinstance(data, dict) else None
|
||||
if not isinstance(items, list):
|
||||
continue
|
||||
cleaned = [line for line in candidates if line in items]
|
||||
cleaned = _dedupe_lines(cleaned, limit=max_lines)
|
||||
if cleaned:
|
||||
picks.append(cleaned)
|
||||
chosen = await _select_best_list(call_llm, question, picks, plan, "fact_select_best")
|
||||
return chosen[:max_lines]
|
||||
|
||||
|
||||
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||
404
atlasbot/engine/answerer/spine.py
Normal file
404
atlasbot/engine/answerer/spine.py
Normal file
@ -0,0 +1,404 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from atlasbot.engine.intent_router import IntentMatch
|
||||
from atlasbot.snapshot.builder import summary_text
|
||||
|
||||
from ._base import *
|
||||
|
||||
|
||||
def _join_context(parts: list[str]) -> str:
|
||||
text = "\n".join([part for part in parts if part])
|
||||
return text.strip()
|
||||
|
||||
|
||||
def _format_metric_value(value: Any) -> str:
|
||||
if isinstance(value, bool):
|
||||
return str(value).lower()
|
||||
if isinstance(value, int):
|
||||
return str(value)
|
||||
if isinstance(value, float):
|
||||
return f"{value:.1f}".rstrip("0").rstrip(".")
|
||||
return str(value)
|
||||
|
||||
|
||||
def _format_history(history: list[dict[str, str]] | None) -> str:
|
||||
if not history:
|
||||
return ""
|
||||
lines = ["Recent conversation (non-authoritative):"]
|
||||
for entry in history[-4:]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
question = entry.get("q")
|
||||
answer = entry.get("a")
|
||||
role = entry.get("role")
|
||||
content = entry.get("content")
|
||||
if question:
|
||||
lines.append(f"Q: {question}")
|
||||
if answer:
|
||||
lines.append(f"A: {answer}")
|
||||
if role and content:
|
||||
prefix = "Q" if role == "user" else "A"
|
||||
lines.append(f"{prefix}: {content}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _summary_lines(snapshot: dict[str, Any] | None) -> list[str]:
|
||||
text = summary_text(snapshot)
|
||||
if not text:
|
||||
return []
|
||||
return [line for line in text.splitlines() if line.strip()]
|
||||
|
||||
|
||||
def _line_starting_with(lines: list[str], prefix: str) -> str | None:
|
||||
if not lines:
|
||||
return None
|
||||
for line in lines:
|
||||
if line.lower().startswith(prefix.lower()):
|
||||
return line
|
||||
return None
|
||||
|
||||
|
||||
def _spine_lines(lines: list[str]) -> dict[str, str]:
|
||||
spine: dict[str, str] = {}
|
||||
_spine_nodes(lines, spine)
|
||||
_spine_hardware(lines, spine)
|
||||
_spine_hottest(lines, spine)
|
||||
_spine_postgres(lines, spine)
|
||||
_spine_namespaces(lines, spine)
|
||||
_spine_pressure(lines, spine)
|
||||
return spine
|
||||
|
||||
|
||||
def _spine_nodes(lines: list[str], spine: dict[str, str]) -> None:
|
||||
nodes_line = _line_starting_with(lines, "nodes:")
|
||||
if nodes_line:
|
||||
spine["nodes_count"] = nodes_line
|
||||
spine["nodes_ready"] = nodes_line
|
||||
return
|
||||
nodes_total = _line_starting_with(lines, "nodes_total:")
|
||||
nodes_ready = _line_starting_with(lines, "nodes_ready:")
|
||||
if nodes_total:
|
||||
spine["nodes_count"] = nodes_total
|
||||
if nodes_ready:
|
||||
spine["nodes_ready"] = nodes_ready
|
||||
|
||||
|
||||
def _spine_hardware(lines: list[str], spine: dict[str, str]) -> None:
|
||||
hardware_line = _line_starting_with(lines, "hardware_nodes:")
|
||||
if not hardware_line:
|
||||
hardware_line = _line_starting_with(lines, "hardware:")
|
||||
if hardware_line:
|
||||
spine["nodes_non_rpi"] = hardware_line
|
||||
|
||||
|
||||
def _spine_hottest(lines: list[str], spine: dict[str, str]) -> None:
|
||||
hottest_line = _line_starting_with(lines, "hottest:")
|
||||
if not hottest_line:
|
||||
return
|
||||
for key in ("hottest_cpu", "hottest_ram", "hottest_net", "hottest_io", "hottest_disk"):
|
||||
spine[key] = hottest_line
|
||||
|
||||
|
||||
def _spine_postgres(lines: list[str], spine: dict[str, str]) -> None:
|
||||
postgres_total = _line_starting_with(lines, "postgres_connections_total:")
|
||||
if postgres_total:
|
||||
spine["postgres_connections"] = postgres_total
|
||||
postgres_line = _line_starting_with(lines, "postgres:")
|
||||
if postgres_line:
|
||||
spine["postgres_hottest"] = postgres_line
|
||||
|
||||
|
||||
def _spine_namespaces(lines: list[str], spine: dict[str, str]) -> None:
|
||||
namespaces_top = _line_starting_with(lines, "namespaces_top:")
|
||||
if namespaces_top:
|
||||
spine["namespace_most_pods"] = namespaces_top
|
||||
|
||||
|
||||
def _spine_pressure(lines: list[str], spine: dict[str, str]) -> None:
|
||||
pressure_line = _line_starting_with(lines, "pressure_nodes:")
|
||||
if pressure_line:
|
||||
spine["pressure_summary"] = pressure_line
|
||||
return
|
||||
load_line = _line_starting_with(lines, "node_load_top:")
|
||||
if load_line:
|
||||
spine["pressure_summary"] = load_line
|
||||
|
||||
|
||||
def _parse_group_line(line: str) -> dict[str, list[str]]:
|
||||
groups: dict[str, list[str]] = {}
|
||||
if not line:
|
||||
return groups
|
||||
payload = line.split(":", 1)[1] if ":" in line else line
|
||||
for part in payload.split(";"):
|
||||
part = part.strip()
|
||||
if not part or "=" not in part:
|
||||
continue
|
||||
key, value = part.split("=", 1)
|
||||
value = value.strip()
|
||||
nodes: list[str] = []
|
||||
if "(" in value and ")" in value:
|
||||
inner = value[value.find("(") + 1 : value.rfind(")")]
|
||||
nodes = [item.strip() for item in inner.split(",") if item.strip()]
|
||||
if not nodes:
|
||||
cleaned = re.sub(r"^[0-9]+", "", value).strip()
|
||||
nodes = [item.strip() for item in cleaned.split(",") if item.strip()]
|
||||
groups[key.strip()] = nodes
|
||||
return groups
|
||||
|
||||
|
||||
def _parse_hottest(line: str, metric: str) -> str | None:
|
||||
if not line:
|
||||
return None
|
||||
payload = line.split(":", 1)[1] if ":" in line else line
|
||||
for part in payload.split(";"):
|
||||
part = part.strip()
|
||||
if part.startswith(f"{metric}="):
|
||||
return part
|
||||
return None
|
||||
|
||||
|
||||
def _spine_answer(intent: IntentMatch, spine_line: str | None) -> str | None:
|
||||
if not spine_line:
|
||||
return None
|
||||
handlers = {
|
||||
"nodes_count": _spine_nodes_answer,
|
||||
"nodes_ready": _spine_nodes_answer,
|
||||
"nodes_non_rpi": _spine_non_rpi_answer,
|
||||
"hardware_mix": _spine_hardware_answer,
|
||||
"postgres_connections": _spine_postgres_answer,
|
||||
"postgres_hottest": _spine_postgres_answer,
|
||||
"namespace_most_pods": _spine_namespace_answer,
|
||||
"pressure_summary": _spine_pressure_answer,
|
||||
}
|
||||
kind = intent.kind
|
||||
if kind.startswith("hottest_"):
|
||||
return _spine_hottest_answer(kind, spine_line)
|
||||
handler = handlers.get(kind)
|
||||
if handler:
|
||||
return handler(spine_line)
|
||||
return spine_line
|
||||
|
||||
|
||||
def _spine_nodes_answer(line: str) -> str:
|
||||
return line
|
||||
|
||||
|
||||
def _spine_non_rpi_answer(line: str) -> str:
|
||||
groups = _parse_group_line(line)
|
||||
non_rpi: list[str] = []
|
||||
for key, nodes in groups.items():
|
||||
if key.lower().startswith("rpi"):
|
||||
continue
|
||||
non_rpi.extend(nodes)
|
||||
if non_rpi:
|
||||
return "Non-Raspberry Pi nodes: " + ", ".join(non_rpi) + "."
|
||||
return line
|
||||
|
||||
|
||||
def _spine_hardware_answer(line: str) -> str:
|
||||
return line
|
||||
|
||||
|
||||
def _spine_hottest_answer(kind: str, line: str) -> str:
|
||||
metric = kind.split("_", 1)[1]
|
||||
hottest = _parse_hottest(line, metric)
|
||||
if hottest:
|
||||
return hottest
|
||||
return line
|
||||
|
||||
|
||||
def _spine_postgres_answer(line: str) -> str:
|
||||
return line
|
||||
|
||||
|
||||
def _spine_namespace_answer(line: str) -> str:
|
||||
payload = line.split(":", 1)[1] if ":" in line else line
|
||||
top = payload.split(";")[0].strip()
|
||||
if top:
|
||||
return f"Namespace with most pods: {top}."
|
||||
return line
|
||||
|
||||
|
||||
def _spine_pressure_answer(line: str) -> str:
|
||||
return line
|
||||
|
||||
|
||||
def _spine_from_summary(summary: dict[str, Any]) -> dict[str, str]:
|
||||
if not isinstance(summary, dict) or not summary:
|
||||
return {}
|
||||
spine: dict[str, str] = {}
|
||||
spine.update(_spine_from_counts(summary))
|
||||
spine.update(_spine_from_hardware(summary))
|
||||
spine.update(_spine_from_hottest(summary))
|
||||
spine.update(_spine_from_postgres(summary))
|
||||
spine.update(_spine_from_namespace_pods(summary))
|
||||
spine.update(_spine_from_pressure(summary))
|
||||
return spine
|
||||
|
||||
|
||||
def _spine_from_counts(summary: dict[str, Any]) -> dict[str, str]:
|
||||
counts = summary.get("counts") if isinstance(summary.get("counts"), dict) else {}
|
||||
inventory = summary.get("inventory") if isinstance(summary.get("inventory"), dict) else {}
|
||||
nodes = summary.get("nodes") if isinstance(summary.get("nodes"), dict) else {}
|
||||
workers = inventory.get("workers") if isinstance(inventory.get("workers"), dict) else {}
|
||||
total = nodes.get("total")
|
||||
ready = nodes.get("ready")
|
||||
not_ready = nodes.get("not_ready")
|
||||
if total is None:
|
||||
total = counts.get("nodes_total")
|
||||
if ready is None:
|
||||
ready = counts.get("nodes_ready")
|
||||
if not_ready is None and isinstance(inventory.get("not_ready_names"), list):
|
||||
not_ready = len(inventory.get("not_ready_names") or [])
|
||||
workers_ready = workers.get("ready")
|
||||
workers_total = workers.get("total")
|
||||
if total is None and ready is None and not_ready is None:
|
||||
return {}
|
||||
parts = []
|
||||
if total is not None:
|
||||
parts.append(f"total={int(total)}")
|
||||
if ready is not None:
|
||||
parts.append(f"ready={int(ready)}")
|
||||
if not_ready is not None:
|
||||
parts.append(f"not_ready={int(not_ready)}")
|
||||
if workers_total is not None and workers_ready is not None:
|
||||
parts.append(f"workers_ready={int(workers_ready)}/{int(workers_total)}")
|
||||
line = "nodes: " + ", ".join(parts)
|
||||
return {"nodes_count": line, "nodes_ready": line}
|
||||
|
||||
|
||||
def _spine_from_hardware(summary: dict[str, Any]) -> dict[str, str]:
|
||||
hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
|
||||
if not hardware:
|
||||
return {}
|
||||
parts = []
|
||||
for key, nodes in hardware.items():
|
||||
if not isinstance(nodes, list):
|
||||
continue
|
||||
node_list = ", ".join(str(n) for n in nodes if n)
|
||||
if node_list:
|
||||
parts.append(f"{key}=({node_list})")
|
||||
if not parts:
|
||||
return {}
|
||||
return {"nodes_non_rpi": "hardware: " + "; ".join(parts)}
|
||||
|
||||
|
||||
def _spine_from_hottest(summary: dict[str, Any]) -> dict[str, str]:
|
||||
hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
|
||||
top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
|
||||
top_hottest = top.get("node_hottest") if isinstance(top.get("node_hottest"), dict) else {}
|
||||
if not hottest and top_hottest:
|
||||
hottest = top_hottest
|
||||
elif top_hottest:
|
||||
for key, value in top_hottest.items():
|
||||
if key not in hottest and value is not None:
|
||||
hottest[key] = value
|
||||
if not hottest:
|
||||
return {}
|
||||
mapping = {}
|
||||
for key in ("cpu", "ram", "net", "io", "disk"):
|
||||
entry = hottest.get(key)
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node") or entry.get("label") or ""
|
||||
value = entry.get("value")
|
||||
if node:
|
||||
mapping[f"hottest_{key}"] = f"{key}={node} ({_format_metric_value(value)})"
|
||||
if not mapping:
|
||||
return {}
|
||||
return mapping
|
||||
|
||||
|
||||
def _spine_from_postgres(summary: dict[str, Any]) -> dict[str, str]:
|
||||
postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {}
|
||||
if not postgres:
|
||||
top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
|
||||
postgres = top.get("postgres") if isinstance(top.get("postgres"), dict) else {}
|
||||
if not postgres:
|
||||
return {}
|
||||
used = postgres.get("used")
|
||||
max_conn = postgres.get("max")
|
||||
hottest = postgres.get("hottest_db") if isinstance(postgres.get("hottest_db"), dict) else {}
|
||||
hottest_label = hottest.get("label") or ""
|
||||
facts: dict[str, str] = {}
|
||||
if used is not None and max_conn is not None:
|
||||
facts["postgres_connections"] = f"postgres_connections_total: used={int(used)}, max={int(max_conn)}"
|
||||
if hottest_label:
|
||||
facts["postgres_hottest"] = f"postgres_hottest_db: {hottest_label}"
|
||||
return facts
|
||||
|
||||
|
||||
def _spine_from_namespace_pods(summary: dict[str, Any]) -> dict[str, str]:
|
||||
pods = summary.get("namespace_pods") if isinstance(summary.get("namespace_pods"), list) else []
|
||||
if not pods:
|
||||
top = summary.get("top") if isinstance(summary.get("top"), dict) else {}
|
||||
pods = top.get("namespace_pods") if isinstance(top.get("namespace_pods"), list) else []
|
||||
if not pods:
|
||||
return {}
|
||||
best_name = ""
|
||||
best_value = None
|
||||
for entry in pods:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
name = entry.get("namespace") or entry.get("name") or entry.get("label") or ""
|
||||
value = entry.get("pods")
|
||||
if value is None:
|
||||
value = entry.get("pods_total")
|
||||
if value is None:
|
||||
value = entry.get("value")
|
||||
try:
|
||||
numeric = float(value)
|
||||
except (TypeError, ValueError):
|
||||
numeric = None
|
||||
if name and numeric is not None and (best_value is None or numeric > best_value):
|
||||
best_name = name
|
||||
best_value = numeric
|
||||
if best_name:
|
||||
return {"namespace_most_pods": f"namespace_most_pods: {best_name} ({int(best_value or 0)} pods)"}
|
||||
return {}
|
||||
|
||||
|
||||
def _spine_from_pressure(summary: dict[str, Any]) -> dict[str, str]:
|
||||
pressure = summary.get("pressure_summary") if isinstance(summary.get("pressure_summary"), dict) else {}
|
||||
if not pressure:
|
||||
pressure = summary.get("pressure_nodes") if isinstance(summary.get("pressure_nodes"), dict) else {}
|
||||
if not pressure:
|
||||
return {}
|
||||
total = pressure.get("total")
|
||||
unsched = pressure.get("unschedulable")
|
||||
names = pressure.get("names") if isinstance(pressure.get("names"), list) else []
|
||||
parts = []
|
||||
if total is None and names:
|
||||
total = len([name for name in names if name])
|
||||
if total is not None:
|
||||
parts.append(f"total={int(total)}")
|
||||
if unsched is not None:
|
||||
parts.append(f"unschedulable={int(unsched)}")
|
||||
if parts:
|
||||
return {"pressure_summary": "pressure_nodes: " + ", ".join(parts)}
|
||||
return {}
|
||||
|
||||
|
||||
def _spine_fallback(intent: IntentMatch, lines: list[str]) -> str | None:
|
||||
if not lines:
|
||||
return None
|
||||
keywords = {
|
||||
"nodes_count": ("nodes:", "nodes_total:"),
|
||||
"nodes_ready": ("nodes:", "nodes_ready:"),
|
||||
"postgres_hottest": ("postgres_hottest", "hottest_db", "postgres"),
|
||||
"namespace_most_pods": ("namespace", "pods", "namespaces_top"),
|
||||
"pressure_summary": ("pressure", "node_load_top"),
|
||||
}
|
||||
for token in keywords.get(intent.kind, ("",)):
|
||||
if not token:
|
||||
continue
|
||||
for line in lines:
|
||||
if token in line:
|
||||
return line
|
||||
return None
|
||||
|
||||
|
||||
__all__ = [name for name in globals() if name.startswith("_") and not name.startswith("__")]
|
||||
484
atlasbot/engine/answerer/workflow.py
Normal file
484
atlasbot/engine/answerer/workflow.py
Normal file
@ -0,0 +1,484 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from atlasbot.engine.intent_router import route_intent
|
||||
from atlasbot.llm import prompts
|
||||
from atlasbot.llm.client import build_messages
|
||||
from atlasbot.snapshot.builder import build_summary
|
||||
|
||||
from ._base import *
|
||||
from .common import *
|
||||
from .factsheet import *
|
||||
from .post import *
|
||||
from .post_ext import *
|
||||
from .retrieval import *
|
||||
from .retrieval_ext import *
|
||||
from .spine import *
|
||||
from .workflow_post import finalize_answer
|
||||
|
||||
async def run_answer(engine: Any, question: str, *, mode: str, history: list[dict[str, str]] | None = None, observer: Callable[[str, str], None] | None = None, conversation_id: str | None = None, snapshot_pin: bool | None = None) -> AnswerResult: # noqa: C901
|
||||
"""Answer a question using the staged reasoning pipeline."""
|
||||
|
||||
settings = engine._settings
|
||||
question = (question or "").strip()
|
||||
if not question:
|
||||
return AnswerResult("I need a question to answer.", _default_scores(), {"mode": mode})
|
||||
if mode == "stock":
|
||||
return await engine._answer_stock(question)
|
||||
|
||||
limitless = "run limitless" in question.lower()
|
||||
if limitless:
|
||||
question = re.sub(r"(?i)run limitless", "", question).strip()
|
||||
|
||||
plan = _mode_plan(settings, mode)
|
||||
call_limit = _llm_call_limit(settings, mode)
|
||||
call_cap = math.ceil(call_limit * settings.llm_limit_multiplier)
|
||||
call_count = 0
|
||||
limit_hit = False
|
||||
time_budget_hit = False
|
||||
started = time.monotonic()
|
||||
time_budget_sec = _mode_time_budget(settings, mode) if not limitless else 0.0
|
||||
|
||||
debug_tags = {
|
||||
"route",
|
||||
"decompose",
|
||||
"chunk_score",
|
||||
"chunk_select",
|
||||
"fact_select",
|
||||
"synth",
|
||||
"subanswer",
|
||||
"tool",
|
||||
"followup",
|
||||
"select_claims",
|
||||
"evidence_fix",
|
||||
}
|
||||
|
||||
async def call_llm(system: str, prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
|
||||
nonlocal call_count, limit_hit, time_budget_hit
|
||||
if not limitless and call_count >= call_cap:
|
||||
limit_hit = True
|
||||
raise LLMLimitReached("llm_limit")
|
||||
timeout_sec = None
|
||||
if not limitless and time_budget_sec > 0:
|
||||
time_left = time_budget_sec - (time.monotonic() - started)
|
||||
if time_left <= 0:
|
||||
time_budget_hit = True
|
||||
raise LLMTimeBudgetExceeded("time_budget")
|
||||
timeout_sec = min(settings.ollama_timeout_sec, time_left)
|
||||
call_count += 1
|
||||
messages = build_messages(system, prompt, context=context)
|
||||
try:
|
||||
llm_call = engine._llm.chat(messages, model=model or plan.model, timeout_sec=timeout_sec)
|
||||
if timeout_sec is not None:
|
||||
response = await asyncio.wait_for(llm_call, timeout=max(0.001, timeout_sec))
|
||||
else:
|
||||
response = await llm_call
|
||||
except TimeoutError as exc:
|
||||
time_budget_hit = True
|
||||
raise LLMTimeBudgetExceeded("time_budget") from exc
|
||||
log.info(
|
||||
"atlasbot_llm_call",
|
||||
extra={"extra": {"mode": mode, "tag": tag, "call": call_count, "limit": call_cap}},
|
||||
)
|
||||
if settings.debug_pipeline and tag in debug_tags:
|
||||
_debug_pipeline_log(settings, f"llm_raw_{tag}", str(response)[:1200])
|
||||
return response
|
||||
|
||||
state = engine._get_state(conversation_id)
|
||||
pin_snapshot = bool(snapshot_pin) or settings.snapshot_pin_enabled
|
||||
snapshot = engine._snapshot.get()
|
||||
snapshot_used = state.snapshot if pin_snapshot and state and state.snapshot else snapshot
|
||||
summary = build_summary(snapshot_used)
|
||||
summary_lines = _summary_lines(snapshot_used)
|
||||
allowed_nodes = _allowed_nodes(summary)
|
||||
allowed_namespaces = _allowed_namespaces(summary)
|
||||
spine = _spine_from_summary(summary) or _spine_lines(summary_lines)
|
||||
metric_tokens = _metric_key_tokens(summary_lines)
|
||||
global_facts = _global_facts(summary_lines)
|
||||
kb_summary = engine._kb.summary()
|
||||
runbooks = engine._kb.runbook_titles(limit=6)
|
||||
runbook_paths = engine._kb.runbook_paths(limit=10)
|
||||
history_ctx = _format_history(history)
|
||||
lexicon_ctx = _lexicon_context(summary)
|
||||
|
||||
key_facts: list[str] = []
|
||||
metric_facts: list[str] = []
|
||||
facts_used: list[str] = []
|
||||
reply = ""
|
||||
scores = _default_scores()
|
||||
claims: list[ClaimItem] = []
|
||||
classify: dict[str, Any] = {}
|
||||
tool_hint: dict[str, Any] | None = None
|
||||
|
||||
try:
|
||||
if mode in {"quick", "fast", "smart", "genius"} and not limitless:
|
||||
if observer:
|
||||
observer("factsheet", "building fact sheet")
|
||||
if _is_plain_math_question(question):
|
||||
reply = (
|
||||
"I focus on Titan cluster operations. Ask me about cluster health, nodes, workloads, "
|
||||
"namespaces, storage, or alerts."
|
||||
)
|
||||
return AnswerResult(reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
|
||||
kb_lines = (
|
||||
engine._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=_factsheet_kb_chars(mode, plan.kb_max_chars))
|
||||
if engine._kb
|
||||
else []
|
||||
)
|
||||
fact_lines = _quick_fact_sheet_lines(question, summary_lines, kb_lines, limit=_factsheet_line_limit(mode))
|
||||
classify = {
|
||||
"needs_snapshot": True,
|
||||
"needs_kb": bool(kb_lines),
|
||||
"question_type": f"{mode}_factsheet",
|
||||
"answer_style": "direct" if mode in {"quick", "fast"} else "concise",
|
||||
"follow_up": False,
|
||||
}
|
||||
heuristic_reply = _quick_fact_sheet_heuristic_answer(question, fact_lines)
|
||||
if heuristic_reply:
|
||||
return AnswerResult(heuristic_reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
|
||||
if observer:
|
||||
observer("quick", "answering from fact sheet")
|
||||
quick_context = _quick_fact_sheet_text(fact_lines)
|
||||
quick_prompt = "Question: " + question + "\nAnswer using only the Fact Sheet. " + _factsheet_instruction(mode)
|
||||
reply = await call_llm(prompts.ANSWER_SYSTEM, quick_prompt, context=quick_context, model=_factsheet_model(mode, plan), tag=f"{mode}_factsheet")
|
||||
reply = _strip_followup_meta(reply)
|
||||
return AnswerResult(reply, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
|
||||
|
||||
if observer:
|
||||
observer("normalize", "normalizing")
|
||||
normalize_prompt = prompts.NORMALIZE_PROMPT + "\nQuestion: " + question
|
||||
normalize_raw = await call_llm(prompts.NORMALIZE_SYSTEM, normalize_prompt, context=lexicon_ctx, model=plan.fast_model, tag="normalize")
|
||||
normalize = _parse_json_block(normalize_raw, fallback={"normalized": question, "keywords": []})
|
||||
normalized = str(normalize.get("normalized") or question).strip() or question
|
||||
keywords = normalize.get("keywords") or []
|
||||
_debug_pipeline_log(settings, "normalize_parsed", {"normalized": normalized, "keywords": keywords})
|
||||
keyword_tokens = _extract_keywords(question, normalized, sub_questions=[], keywords=keywords)
|
||||
question_tokens = _extract_question_tokens(normalized)
|
||||
|
||||
if observer:
|
||||
observer("route", "routing")
|
||||
route_prompt = prompts.ROUTE_PROMPT + "\nQuestion: " + normalized + "\nKeywords: " + json.dumps(keywords)
|
||||
route_raw = await call_llm(prompts.ROUTE_SYSTEM, route_prompt, context=_join_context([kb_summary, lexicon_ctx]), model=plan.fast_model, tag="route")
|
||||
classify = _parse_json_block(route_raw, fallback={})
|
||||
classify.setdefault("needs_snapshot", True)
|
||||
classify.setdefault("answer_style", "direct")
|
||||
classify.setdefault("follow_up", False)
|
||||
classify.setdefault("focus_entity", "unknown")
|
||||
classify.setdefault("focus_metric", "unknown")
|
||||
if metric_tokens and keyword_tokens and any(token in metric_tokens for token in keyword_tokens):
|
||||
classify["needs_snapshot"] = True
|
||||
intent = route_intent(normalized)
|
||||
if intent:
|
||||
classify["needs_snapshot"] = True
|
||||
classify["question_type"] = "metric"
|
||||
_debug_pipeline_log(settings, "route_parsed", {"classify": classify, "normalized": normalized})
|
||||
lowered_question = f"{question} {normalized}".lower()
|
||||
force_metric = bool(re.search(r"\bhow many\b|\bcount\b|\btotal\b", lowered_question))
|
||||
if any(term in lowered_question for term in ("postgres", "connections", "pvc", "ready")):
|
||||
force_metric = True
|
||||
|
||||
if intent:
|
||||
spine_line = spine.get(intent.kind) if isinstance(spine, dict) else None
|
||||
if not spine_line:
|
||||
spine_line = _spine_fallback(intent, summary_lines)
|
||||
spine_answer = _spine_answer(intent, spine_line)
|
||||
if spine_line:
|
||||
key_facts = _merge_fact_lines([spine_line], key_facts)
|
||||
metric_facts = _merge_fact_lines([spine_line], metric_facts)
|
||||
if spine_answer and mode in {"fast", "quick"}:
|
||||
return AnswerResult(spine_answer, _default_scores(), _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
|
||||
|
||||
cluster_terms = (
|
||||
"atlas",
|
||||
"cluster",
|
||||
"node",
|
||||
"nodes",
|
||||
"namespace",
|
||||
"pod",
|
||||
"workload",
|
||||
"k8s",
|
||||
"kubernetes",
|
||||
"postgres",
|
||||
"database",
|
||||
"db",
|
||||
"connections",
|
||||
"cpu",
|
||||
"ram",
|
||||
"memory",
|
||||
"network",
|
||||
"io",
|
||||
"disk",
|
||||
"pvc",
|
||||
"storage",
|
||||
)
|
||||
has_cluster_terms = any(term in lowered_question for term in cluster_terms)
|
||||
if has_cluster_terms:
|
||||
classify["needs_snapshot"] = True
|
||||
lowered_norm = normalized.lower()
|
||||
if ("namespace" in lowered_norm and ("pod" in lowered_norm or "pods" in lowered_norm)) or re.search(r"\bmost\s+pods\b", lowered_norm) or re.search(r"\bpods\s+running\b", lowered_norm):
|
||||
classify["question_type"] = "metric"
|
||||
classify["needs_snapshot"] = True
|
||||
if re.search(r"\b(how many|count|number of|list)\b", lowered_question):
|
||||
classify["question_type"] = "metric"
|
||||
if any(term in lowered_question for term in ("postgres", "connections", "db")):
|
||||
classify["question_type"] = "metric"
|
||||
classify["needs_snapshot"] = True
|
||||
if any(term in lowered_question for term in ("pvc", "persistentvolume", "persistent volume", "storage")):
|
||||
if classify.get("question_type") not in {"metric", "diagnostic"}:
|
||||
classify["question_type"] = "metric"
|
||||
classify["needs_snapshot"] = True
|
||||
if "ready" in lowered_question and classify.get("question_type") not in {"metric", "diagnostic"}:
|
||||
classify["question_type"] = "diagnostic"
|
||||
hottest_terms = ("hottest", "highest", "lowest", "most")
|
||||
metric_terms = ("cpu", "ram", "memory", "net", "network", "io", "disk", "load", "usage", "pod", "pods", "namespace")
|
||||
if any(term in lowered_question for term in hottest_terms) and any(term in lowered_question for term in metric_terms):
|
||||
classify["question_type"] = "metric"
|
||||
baseline_terms = ("baseline", "delta", "trend", "increase", "decrease", "drop", "spike", "regression", "change")
|
||||
if any(term in lowered_question for term in baseline_terms) and any(term in lowered_question for term in metric_terms):
|
||||
classify["question_type"] = "metric"
|
||||
classify["needs_snapshot"] = True
|
||||
|
||||
if not classify.get("follow_up") and state and state.claims:
|
||||
follow_terms = ("there", "that", "those", "these", "it", "them", "that one", "this", "former", "latter")
|
||||
is_metric_query = force_metric or classify.get("question_type") in {"metric", "diagnostic"}
|
||||
if not is_metric_query and (
|
||||
any(term in lowered_question for term in follow_terms)
|
||||
or (len(normalized.split()) <= FOLLOWUP_SHORT_WORDS and not has_cluster_terms)
|
||||
):
|
||||
classify["follow_up"] = True
|
||||
|
||||
if classify.get("follow_up") and state and state.claims:
|
||||
if observer:
|
||||
observer("followup", "answering follow-up")
|
||||
reply = await engine._answer_followup(question, state, summary, classify, plan, call_llm)
|
||||
scores = await engine._score_answer(question, reply, plan, call_llm)
|
||||
return AnswerResult(reply, scores, _build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started))
|
||||
|
||||
if observer:
|
||||
observer("decompose", "decomposing")
|
||||
decompose_prompt = prompts.DECOMPOSE_PROMPT.format(max_parts=plan.max_subquestions * 2)
|
||||
decompose_raw = await call_llm(prompts.DECOMPOSE_SYSTEM, decompose_prompt + "\nQuestion: " + normalized, context=lexicon_ctx, model=plan.fast_model if mode == "quick" else plan.model, tag="decompose")
|
||||
parts = _parse_json_list(decompose_raw)
|
||||
sub_questions = _select_subquestions(parts, normalized, plan.max_subquestions)
|
||||
_debug_pipeline_log(settings, "decompose_parsed", {"sub_questions": sub_questions})
|
||||
keyword_tokens = _extract_keywords(question, normalized, sub_questions=sub_questions, keywords=keywords)
|
||||
|
||||
snapshot_context = ""
|
||||
signal_tokens: list[str] = []
|
||||
if classify.get("needs_snapshot"):
|
||||
if observer:
|
||||
observer("retrieve", "scoring chunks")
|
||||
chunks = _chunk_lines(summary_lines, plan.chunk_lines)
|
||||
if plan.use_raw_snapshot:
|
||||
raw_chunks = _raw_snapshot_chunks(snapshot_used)
|
||||
if raw_chunks:
|
||||
chunks.extend(raw_chunks)
|
||||
kb_lines = engine._kb.chunk_lines(max_files=plan.kb_max_files, max_chars=plan.kb_max_chars) if engine._kb else []
|
||||
if kb_lines:
|
||||
kb_chunks = _chunk_lines(kb_lines, plan.chunk_lines)
|
||||
for idx, chunk in enumerate(kb_chunks):
|
||||
chunk["id"] = f"k{idx}"
|
||||
chunks.extend(kb_chunks)
|
||||
metric_keys: list[str] = []
|
||||
must_chunk_ids: list[str] = []
|
||||
metric_task = None
|
||||
if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and summary_lines:
|
||||
metric_ctx = {"question": normalized, "sub_questions": sub_questions, "keywords": keywords, "keyword_tokens": keyword_tokens, "summary_lines": summary_lines}
|
||||
metric_task = asyncio.create_task(_select_metric_chunks(call_llm, metric_ctx, chunks, plan))
|
||||
scored_task = asyncio.create_task(_score_chunks(call_llm, chunks, normalized, sub_questions, plan))
|
||||
if metric_task:
|
||||
metric_keys, must_chunk_ids = await metric_task
|
||||
scored = await scored_task
|
||||
selected = _select_chunks(chunks, scored, plan, keyword_tokens, must_chunk_ids)
|
||||
fact_candidates = _collect_fact_candidates(selected, limit=plan.max_subquestions * 12)
|
||||
key_facts = await _select_fact_lines(call_llm, normalized, fact_candidates, plan, max_lines=max(4, plan.max_subquestions * 2))
|
||||
metric_facts = []
|
||||
if classify.get("question_type") in {"metric", "diagnostic"} or force_metric:
|
||||
global_metric_facts: list[str] = []
|
||||
if global_facts:
|
||||
global_metric_facts = await _select_fact_lines(call_llm, normalized, global_facts, plan, max_lines=min(2, max(1, plan.max_subquestions)))
|
||||
if not global_metric_facts and (keyword_tokens or question_tokens):
|
||||
tokens = {tok for tok in (keyword_tokens or question_tokens) if tok and tok not in GENERIC_METRIC_TOKENS}
|
||||
global_metric_facts = _rank_metric_lines(global_facts, tokens, max_lines=2)
|
||||
if global_metric_facts:
|
||||
key_facts = _merge_fact_lines(global_metric_facts, key_facts)
|
||||
all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
|
||||
if plan.use_deep_retrieval:
|
||||
if observer:
|
||||
observer("retrieve", "extracting fact types")
|
||||
fact_types = await _extract_fact_types(call_llm, normalized, keyword_tokens, plan)
|
||||
if observer:
|
||||
observer("retrieve", "deriving signals")
|
||||
signals = await _derive_signals(call_llm, normalized, fact_types, plan)
|
||||
if isinstance(signals, list):
|
||||
signal_tokens = [str(item) for item in signals if item]
|
||||
all_tokens = _merge_tokens(signal_tokens, keyword_tokens, question_tokens)
|
||||
if observer:
|
||||
observer("retrieve", "scanning chunks")
|
||||
candidate_lines: list[str] = []
|
||||
if signals:
|
||||
for chunk in selected:
|
||||
chunk_lines = chunk["text"].splitlines()
|
||||
if not chunk_lines:
|
||||
continue
|
||||
hits = await _scan_chunk_for_signals(call_llm, normalized, signals, chunk_lines, plan)
|
||||
if hits:
|
||||
candidate_lines.extend(hits)
|
||||
candidate_lines = list(dict.fromkeys(candidate_lines))
|
||||
if candidate_lines:
|
||||
if observer:
|
||||
observer("retrieve", "pruning candidates")
|
||||
metric_facts = await _prune_metric_candidates(call_llm, normalized, candidate_lines, plan, plan.metric_retries)
|
||||
if metric_facts:
|
||||
key_facts = _merge_fact_lines(metric_facts, key_facts)
|
||||
if settings.debug_pipeline:
|
||||
_debug_pipeline_log(settings, "metric_facts_selected", {"facts": metric_facts})
|
||||
if not metric_facts:
|
||||
if observer:
|
||||
observer("retrieve", "fallback metric selection")
|
||||
token_set = {tok for tok in all_tokens if tok and tok not in GENERIC_METRIC_TOKENS}
|
||||
fallback_candidates = _rank_metric_lines(summary_lines, token_set, max_lines=200)
|
||||
if fallback_candidates:
|
||||
metric_facts = await _select_fact_lines(call_llm, normalized, fallback_candidates, plan, max_lines=max(2, plan.max_subquestions))
|
||||
if not metric_facts and fallback_candidates:
|
||||
metric_facts = fallback_candidates[: max(2, plan.max_subquestions)]
|
||||
if metric_keys:
|
||||
key_lines = _lines_for_metric_keys(summary_lines, metric_keys, max_lines=plan.max_subquestions * 3)
|
||||
if key_lines:
|
||||
metric_facts = _merge_fact_lines(key_lines, metric_facts)
|
||||
if metric_facts:
|
||||
metric_cover_tokens = [tok for tok in keyword_tokens if tok and tok not in GENERIC_METRIC_TOKENS]
|
||||
if not metric_cover_tokens:
|
||||
metric_cover_tokens = [tok for tok in question_tokens if tok and tok not in GENERIC_METRIC_TOKENS]
|
||||
metric_facts = _ensure_token_coverage(metric_facts, metric_cover_tokens or all_tokens, summary_lines, max_add=plan.max_subquestions)
|
||||
if metric_cover_tokens:
|
||||
ranked_metric_lines = _rank_metric_lines(summary_lines, set(metric_cover_tokens), max_lines=max(1, plan.max_subquestions))
|
||||
if ranked_metric_lines:
|
||||
metric_facts = _merge_fact_lines(ranked_metric_lines, metric_facts)
|
||||
if metric_facts and not _has_keyword_overlap(metric_facts, keyword_tokens):
|
||||
best_line = _best_keyword_line(summary_lines, keyword_tokens)
|
||||
if best_line:
|
||||
metric_facts = _merge_fact_lines([best_line], metric_facts)
|
||||
if metric_facts:
|
||||
key_facts = _merge_fact_lines(metric_facts, key_facts)
|
||||
if global_metric_facts:
|
||||
metric_facts = _merge_fact_lines(global_metric_facts, metric_facts)
|
||||
if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and not metric_facts and key_facts:
|
||||
metric_facts = key_facts
|
||||
if key_facts:
|
||||
key_facts = _ensure_token_coverage(key_facts, _merge_tokens(keyword_tokens, question_tokens), summary_lines, max_add=plan.max_subquestions)
|
||||
facts_used = list(dict.fromkeys(key_facts)) if key_facts else list(dict.fromkeys(metric_facts))
|
||||
snapshot_context = "ClusterSnapshot:\n" + "\n".join([chunk["text"] for chunk in selected])
|
||||
combined_facts = _merge_fact_lines(global_facts, key_facts) if global_facts else key_facts
|
||||
if combined_facts:
|
||||
snapshot_context = "KeyFacts:\n" + "\n".join(combined_facts) + "\n\n" + snapshot_context
|
||||
|
||||
context = _join_context([kb_summary, _format_runbooks(runbooks), snapshot_context, history_ctx if classify.get("follow_up") else ""])
|
||||
|
||||
if plan.use_tool and classify.get("needs_tool"):
|
||||
if observer:
|
||||
observer("tool", "suggesting tools")
|
||||
tool_prompt = prompts.TOOL_PROMPT + "\nQuestion: " + normalized
|
||||
tool_raw = await call_llm(prompts.TOOL_SYSTEM, tool_prompt, context=context, model=plan.fast_model, tag="tool")
|
||||
tool_hint = _parse_json_block(tool_raw, fallback={})
|
||||
|
||||
if observer:
|
||||
observer("subanswers", "drafting subanswers")
|
||||
async def _subanswer_for(subq: str) -> str:
|
||||
sub_prompt = prompts.SUBANSWER_PROMPT + "\nQuestion: " + subq
|
||||
if plan.subanswer_retries > 1:
|
||||
candidates = await _gather_limited(
|
||||
[call_llm(prompts.ANSWER_SYSTEM, sub_prompt, context=context, model=plan.model, tag="subanswer") for _ in range(plan.subanswer_retries)],
|
||||
plan.parallelism,
|
||||
)
|
||||
best_idx = await _select_best_candidate(call_llm, subq, candidates, plan, "subanswer_select")
|
||||
return candidates[best_idx]
|
||||
return await call_llm(prompts.ANSWER_SYSTEM, sub_prompt, context=context, model=plan.model, tag="subanswer")
|
||||
|
||||
subanswers: list[str] = []
|
||||
if plan.parallelism > 1 and len(sub_questions) > 1:
|
||||
subanswers = await _gather_limited([_subanswer_for(subq) for subq in sub_questions], plan.parallelism)
|
||||
else:
|
||||
for subq in sub_questions:
|
||||
subanswers.append(await _subanswer_for(subq))
|
||||
|
||||
if observer:
|
||||
observer("synthesize", "synthesizing")
|
||||
reply, scores, claims = await finalize_answer(
|
||||
engine=engine,
|
||||
call_llm=call_llm,
|
||||
normalized=normalized,
|
||||
subanswers=subanswers,
|
||||
context=context,
|
||||
classify=classify,
|
||||
plan=plan,
|
||||
summary=summary,
|
||||
summary_lines=summary_lines,
|
||||
metric_facts=metric_facts,
|
||||
key_facts=key_facts,
|
||||
facts_used=facts_used,
|
||||
allowed_nodes=allowed_nodes,
|
||||
allowed_namespaces=allowed_namespaces,
|
||||
runbook_paths=runbook_paths,
|
||||
lowered_question=lowered_question,
|
||||
force_metric=force_metric,
|
||||
keyword_tokens=keyword_tokens,
|
||||
question_tokens=question_tokens,
|
||||
snapshot_context=snapshot_context,
|
||||
observer=observer,
|
||||
mode=mode,
|
||||
metric_keys=metric_keys if 'metric_keys' in locals() else None,
|
||||
)
|
||||
|
||||
|
||||
except LLMTimeBudgetExceeded:
|
||||
time_budget_hit = True
|
||||
if not reply:
|
||||
budget = max(1, round(time_budget_sec)) if time_budget_sec > 0 else 0
|
||||
budget_text = f"{budget}s" if budget else "its configured"
|
||||
if mode in {"quick", "fast"}:
|
||||
reply = f"Quick mode hit {budget_text} time budget before finishing. Try atlas-smart for a deeper answer."
|
||||
elif mode == "smart":
|
||||
reply = f"Smart mode hit {budget_text} time budget before finishing. Try atlas-genius or ask a narrower follow-up."
|
||||
else:
|
||||
reply = "I ran out of time before I could finish this answer."
|
||||
scores = _default_scores()
|
||||
except LLMLimitReached:
|
||||
if not reply:
|
||||
reply = "I started working on this but hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
|
||||
scores = _default_scores()
|
||||
finally:
|
||||
elapsed = round(time.monotonic() - started, 2)
|
||||
log.info(
|
||||
"atlasbot_answer",
|
||||
extra={
|
||||
"extra": {
|
||||
"mode": mode,
|
||||
"seconds": elapsed,
|
||||
"llm_calls": call_count,
|
||||
"limit": call_cap,
|
||||
"limit_hit": limit_hit,
|
||||
"time_budget_sec": time_budget_sec,
|
||||
"time_budget_hit": time_budget_hit,
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
if limit_hit and "run limitless" not in reply.lower():
|
||||
reply = reply.rstrip() + "\n\nNote: I hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
|
||||
|
||||
if conversation_id and claims:
|
||||
engine._store_state(conversation_id, claims, summary, snapshot_used, pin_snapshot)
|
||||
|
||||
return AnswerResult(
|
||||
reply,
|
||||
scores,
|
||||
_build_meta(mode, call_count, call_cap, limit_hit, time_budget_hit, time_budget_sec, classify, tool_hint, started),
|
||||
)
|
||||
170
atlasbot/engine/answerer/workflow_post.py
Normal file
170
atlasbot/engine/answerer/workflow_post.py
Normal file
@ -0,0 +1,170 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from atlasbot.llm import prompts
|
||||
|
||||
from ._base import *
|
||||
from .common import *
|
||||
from .post import *
|
||||
from .post_ext import *
|
||||
from .retrieval import *
|
||||
from .spine import *
|
||||
|
||||
|
||||
async def finalize_answer(*, engine: Any, call_llm: Callable[..., Any], normalized: str, subanswers: list[str], context: str, classify: dict[str, Any], plan: ModePlan, summary: dict[str, Any], summary_lines: list[str], metric_facts: list[str], key_facts: list[str], facts_used: list[str], allowed_nodes: list[str], allowed_namespaces: list[str], runbook_paths: list[str], lowered_question: str, force_metric: bool, keyword_tokens: list[str], question_tokens: list[str], snapshot_context: str, observer: Callable[[str, str], None] | None, mode: str, metric_keys: list[str] | None = None) -> tuple[str, AnswerScores, list[ClaimItem]]: # noqa: C901
|
||||
"""Synthesize and post-process the final answer."""
|
||||
|
||||
reply = await engine._synthesize_answer(normalized, subanswers, context, classify, plan, call_llm)
|
||||
|
||||
unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
|
||||
unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
|
||||
runbook_fix = _needs_runbook_fix(reply, runbook_paths)
|
||||
runbook_needed = _needs_runbook_reference(normalized, runbook_paths, reply)
|
||||
needs_evidence = _needs_evidence_fix(reply, classify)
|
||||
hardware_terms = ("rpi", "raspberry", "jetson", "amd64", "arm64", "hardware")
|
||||
hardware_line = _line_starting_with(summary_lines, "hardware_nodes:")
|
||||
if any(term in lowered_question for term in hardware_terms) and hardware_line:
|
||||
needs_evidence = True
|
||||
if metric_facts and (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
|
||||
needs_evidence = True
|
||||
if classify.get("question_type") in {"open_ended", "planning"} and metric_facts:
|
||||
needs_evidence = True
|
||||
resolved_runbook = None
|
||||
if runbook_paths and (runbook_fix or runbook_needed):
|
||||
resolver_prompt = prompts.RUNBOOK_SELECT_PROMPT + "\nQuestion: " + normalized
|
||||
resolver_raw = await call_llm(prompts.RUNBOOK_SELECT_SYSTEM, resolver_prompt, context="AllowedRunbooks:\n" + "\n".join(runbook_paths), model=plan.fast_model, tag="runbook_select")
|
||||
resolver = _parse_json_block(resolver_raw, fallback={})
|
||||
candidate = resolver.get("path") if isinstance(resolver.get("path"), str) else None
|
||||
if candidate and candidate in runbook_paths:
|
||||
resolved_runbook = candidate
|
||||
|
||||
if (snapshot_context and needs_evidence) or unknown_nodes or unknown_namespaces or runbook_fix or runbook_needed:
|
||||
if observer:
|
||||
observer("evidence_fix", "repairing missing evidence")
|
||||
extra_bits = []
|
||||
if unknown_nodes:
|
||||
extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
|
||||
if unknown_namespaces:
|
||||
extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
|
||||
if runbook_paths:
|
||||
extra_bits.append("AllowedRunbooks: " + ", ".join(runbook_paths))
|
||||
if resolved_runbook:
|
||||
extra_bits.append("ResolvedRunbook: " + resolved_runbook)
|
||||
if metric_facts:
|
||||
extra_bits.append("MustUseFacts: " + "; ".join(metric_facts[:4]))
|
||||
if hardware_line:
|
||||
extra_bits.append("HardwareNodes: " + hardware_line)
|
||||
if allowed_nodes:
|
||||
extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
|
||||
if allowed_namespaces:
|
||||
extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
|
||||
fix_prompt = prompts.EVIDENCE_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + ("\n" + "\n".join(extra_bits) if extra_bits else "")
|
||||
reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, fix_prompt, context=context, model=plan.model, tag="evidence_fix")
|
||||
if metric_facts and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
|
||||
enforce_prompt = prompts.EVIDENCE_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nMustIncludeFacts: " + "; ".join(metric_facts[:6]) + "\nInstruction: The answer must include all MustIncludeFacts items."
|
||||
reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, enforce_prompt, context=context, model=plan.model, tag="evidence_fix_enforce")
|
||||
|
||||
if metric_facts and not _reply_matches_metric_facts(reply, metric_facts, _merge_tokens(keyword_tokens, question_tokens)):
|
||||
direct_candidates = _lines_for_metric_keys(summary_lines, metric_keys, max_lines=plan.max_subquestions * 3) if 'metric_keys' in locals() and metric_keys else summary_lines
|
||||
direct_line = _select_metric_line(direct_candidates, normalized, _merge_tokens(keyword_tokens, question_tokens))
|
||||
if direct_line:
|
||||
direct_prompt = f"Question: {normalized}\nFact: {direct_line}\nAnswer using the fact."
|
||||
reply = await call_llm(prompts.ANSWER_SYSTEM, direct_prompt, context="", model=plan.fast_model, tag="metric_direct")
|
||||
if (mode == "quick" and any(term in normalized.lower() for term in ("how many", "count", "total"))) or not _reply_matches_metric_facts(reply, [direct_line], _merge_tokens(keyword_tokens, question_tokens)):
|
||||
reply = _format_direct_metric_line(direct_line)
|
||||
|
||||
if "raspberry" in lowered_question and "not" in lowered_question:
|
||||
non_rpi = _non_rpi_nodes(summary)
|
||||
if non_rpi:
|
||||
reply = _format_hardware_groups(non_rpi, "Non-Raspberry Pi nodes")
|
||||
if unknown_nodes or unknown_namespaces:
|
||||
refreshed_nodes = _find_unknown_nodes(reply, allowed_nodes)
|
||||
refreshed_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
|
||||
if refreshed_nodes or refreshed_namespaces:
|
||||
reply = _strip_unknown_entities(reply, refreshed_nodes, refreshed_namespaces)
|
||||
if runbook_paths and resolved_runbook and _needs_runbook_reference(normalized, runbook_paths, reply):
|
||||
if observer:
|
||||
observer("runbook_enforce", "enforcing runbook path")
|
||||
enforce_prompt = prompts.RUNBOOK_ENFORCE_PROMPT.format(path=resolved_runbook)
|
||||
reply = await call_llm(prompts.RUNBOOK_ENFORCE_SYSTEM, enforce_prompt + "\nAnswer: " + reply, context=context, model=plan.model, tag="runbook_enforce")
|
||||
if runbook_paths:
|
||||
invalid = [token for token in re.findall(r"runbooks/[A-Za-z0-9._-]+", reply) if token.lower() not in {p.lower() for p in runbook_paths}]
|
||||
if invalid:
|
||||
if observer:
|
||||
observer("runbook_enforce", "replacing invalid runbook path")
|
||||
resolver_prompt = prompts.RUNBOOK_SELECT_PROMPT + "\nQuestion: " + normalized
|
||||
resolver_raw = await call_llm(prompts.RUNBOOK_SELECT_SYSTEM, resolver_prompt, context="AllowedRunbooks:\n" + "\n".join(runbook_paths), model=plan.fast_model, tag="runbook_select")
|
||||
resolver = _parse_json_block(resolver_raw, fallback={})
|
||||
candidate = resolver.get("path") if isinstance(resolver.get("path"), str) else None
|
||||
if not (candidate and candidate in runbook_paths):
|
||||
candidate = _best_runbook_match(invalid[0], runbook_paths)
|
||||
if candidate and candidate in runbook_paths:
|
||||
enforce_prompt = prompts.RUNBOOK_ENFORCE_PROMPT.format(path=candidate)
|
||||
reply = await call_llm(prompts.RUNBOOK_ENFORCE_SYSTEM, enforce_prompt + "\nAnswer: " + reply, context=context, model=plan.model, tag="runbook_enforce")
|
||||
reply = _strip_unknown_entities(reply, unknown_nodes, unknown_namespaces)
|
||||
|
||||
if facts_used and _needs_evidence_guard(reply, facts_used):
|
||||
if observer:
|
||||
observer("evidence_guard", "tightening unsupported claims")
|
||||
use_guard = True
|
||||
if mode in {"smart", "genius"}:
|
||||
decision = await _contradiction_decision(ContradictionContext(call_llm, normalized, reply, facts_used, plan), attempts=3 if mode == "genius" else 1)
|
||||
use_guard = decision.get("use_facts", True)
|
||||
if use_guard:
|
||||
guard_prompt = prompts.EVIDENCE_GUARD_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nFactsUsed:\n" + "\n".join(facts_used)
|
||||
reply = await call_llm(prompts.EVIDENCE_GUARD_SYSTEM, guard_prompt, context=context, model=plan.model, tag="evidence_guard")
|
||||
|
||||
if _needs_focus_fix(normalized, reply, classify):
|
||||
if observer:
|
||||
observer("focus_fix", "tightening answer")
|
||||
reply = await call_llm(prompts.EVIDENCE_FIX_SYSTEM, prompts.FOCUS_FIX_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply, context=context, model=plan.model, tag="focus_fix")
|
||||
if not metric_facts or not _has_keyword_overlap(metric_facts, keyword_tokens):
|
||||
best_line = _best_keyword_line(summary_lines, keyword_tokens)
|
||||
if best_line:
|
||||
reply = f"Latest metrics: {best_line}."
|
||||
if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and metric_facts:
|
||||
best_line = None
|
||||
lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
|
||||
for line in metric_facts:
|
||||
if any(kw in line.lower() for kw in lowered_keywords):
|
||||
best_line = line
|
||||
break
|
||||
best_line = best_line or metric_facts[0]
|
||||
reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
|
||||
fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
|
||||
if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
|
||||
reply = f"Latest metrics: {best_line}."
|
||||
|
||||
if _should_use_insight_guard(classify):
|
||||
if observer:
|
||||
observer("insight_guard", "checking for concrete signals")
|
||||
reply = await _apply_insight_guard(InsightGuardInput(question=normalized, reply=reply, classify=classify, context=context, plan=plan, call_llm=call_llm, facts=metric_facts or key_facts))
|
||||
|
||||
if plan.use_critic:
|
||||
if observer:
|
||||
observer("critic", "reviewing")
|
||||
critic_prompt = prompts.CRITIC_PROMPT + "\nQuestion: " + normalized + "\nAnswer: " + reply
|
||||
critic_raw = await call_llm(prompts.CRITIC_SYSTEM, critic_prompt, context=context, model=plan.model, tag="critic")
|
||||
critic = _parse_json_block(critic_raw, fallback={})
|
||||
if critic.get("issues"):
|
||||
revise_prompt = prompts.REVISION_PROMPT + "\nQuestion: " + normalized + "\nDraft: " + reply + "\nCritique: " + json.dumps(critic)
|
||||
reply = await call_llm(prompts.REVISION_SYSTEM, revise_prompt, context=context, model=plan.model, tag="revise")
|
||||
|
||||
if plan.use_gap:
|
||||
if observer:
|
||||
observer("gap", "checking gaps")
|
||||
gap_prompt = prompts.EVIDENCE_GAP_PROMPT + "\nQuestion: " + normalized + "\nAnswer: " + reply
|
||||
gap_raw = await call_llm(prompts.GAP_SYSTEM, gap_prompt, context=context, model=plan.fast_model, tag="gap")
|
||||
gap = _parse_json_block(gap_raw, fallback={})
|
||||
note = str(gap.get("note") or "").strip()
|
||||
if note:
|
||||
reply = f"{reply}\n\n{note}"
|
||||
|
||||
reply = await engine._dedup_reply(reply, plan, call_llm, tag="dedup")
|
||||
scores = await engine._score_answer(normalized, reply, plan, call_llm)
|
||||
claims = await engine._extract_claims(normalized, reply, summary, facts_used, call_llm)
|
||||
return reply, scores, claims
|
||||
@ -1,35 +1,46 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IntentMatch:
|
||||
"""Describe the best cluster intent match for a user question."""
|
||||
|
||||
kind: str
|
||||
score: int
|
||||
|
||||
|
||||
_COUNT_TERMS = r"(how\\s+many|count|number\\s+of|total|totals|tally|amount\\s+of|quantity|sum\\s+of|overall|in\\s+total|all\\s+up)"
|
||||
_NODE_TERMS = r"(nodes?|workers?|worker\\s+nodes?|cluster\\s+nodes?|machines?|hosts?|members?|instances?|servers?|agents?|control[-\\s]?plane|control\\s+plane)"
|
||||
_READY_TERMS = r"(ready|unready|not\\s+ready|down|offline|not\\s+responding|missing|lost|gone|drain(?:ed|ing)?|cordon(?:ed|ing)?)"
|
||||
_COUNT_TERMS = r"(how\s+many|count|number\s+of|total|totals|tally|amount\s+of|quantity|sum\s+of|overall|in\s+total|all\s+up)"
|
||||
_NODE_TERMS = r"(nodes?|workers?|worker\s+nodes?|cluster\s+nodes?|machines?|hosts?|members?|instances?|servers?|agents?|control[-\s]?plane|control\s+plane)"
|
||||
_READY_TERMS = r"(ready|unready|not\s+ready|down|offline|not\s+responding|missing|lost|gone|drain(?:ed|ing)?|cordon(?:ed|ing)?)"
|
||||
_HOTTEST_TERMS = r"(hottest|hot|highest|max(?:imum)?|peak|top|most|worst|spikiest|heaviest|largest|biggest|noisiest|loudest)"
|
||||
_CPU_TERMS = r"(cpu|processor|processors|compute|core|cores|load|load\\s+avg|load\\s+average|util(?:ization)?|usage)"
|
||||
_CPU_TERMS = r"(cpu|processor|processors|compute|core|cores|load|load\s+avg|load\s+average|util(?:ization)?|usage)"
|
||||
_RAM_TERMS = r"(ram|memory|mem|heap|rss|resident|swap)"
|
||||
_NET_TERMS = r"(net|network|bandwidth|throughput|traffic|rx|tx|ingress|egress|bits|bytes|packets|pps|bps)"
|
||||
_IO_TERMS = r"(\\bio\\b|i/o|disk\\s+io|disk\\s+activity|read/?write|storage\\s+io|iops|latency)"
|
||||
_DISK_TERMS = r"(disk|storage|volume|pvc|filesystem|fs|capacity|\\bspace\\b|full|usage)"
|
||||
_PG_TERMS = r"(postgres|postgresql|pg\\b|database|db|sql|psql)"
|
||||
_CONN_TERMS = r"(connections?|conn|pool|sessions?|clients?|active\\s+connections?|open\\s+connections?)"
|
||||
_DB_HOT_TERMS = r"(hottest|busiest|most|largest|top|heaviest|noisiest|highest\\s+load)"
|
||||
_NAMESPACE_TERMS = r"(namespace|namespaces|ns\\b|tenant|workload\\s+namespace)"
|
||||
_IO_TERMS = r"(\bio\b|i/o|disk\s+io|disk\s+activity|read/?write|storage\s+io|iops|latency)"
|
||||
_DISK_TERMS = r"(disk|storage|volume|pvc|filesystem|fs|capacity|\bspace\b|full|usage)"
|
||||
_PG_TERMS = r"(postgres|postgresql|pg\b|database|db|sql|psql)"
|
||||
_CONN_TERMS = r"(connections?|conn|pool|sessions?|clients?|active\s+connections?|open\s+connections?)"
|
||||
_DB_HOT_TERMS = r"(hottest|busiest|most|largest|top|heaviest|noisiest|highest\s+load)"
|
||||
_NAMESPACE_TERMS = r"(namespace|namespaces|ns\b|tenant|workload\s+namespace)"
|
||||
_PODS_TERMS = r"(pods?|workloads?|tasks?|containers?|deployments?|jobs?|cronjobs?|daemonsets?|statefulsets?)"
|
||||
_NON_RPI_TERMS = r"(non[-\\s]?raspberry|not\\s+raspberry|non[-\\s]?rpi|not\\s+rpi|amd64|x86|x86_64|intel|ryzen|jetson|arm64\\b(?!.*rpi))"
|
||||
_PRESSURE_TERMS = r"(pressure|overload|hotspot|bottleneck|saturation|headroom|strain|stress|critical|warning|at\\s+capacity|near\\s+limit)"
|
||||
_HARDWARE_TERMS = r"(hardware|arch(?:itecture)?|platform|mix|profile|node\\s+types?)"
|
||||
_NON_RPI_TERMS = r"(non[-\s]?raspberry|not\s+raspberry|non[-\s]?rpi|not\s+rpi|amd64|x86|x86_64|intel|ryzen|jetson|arm64\b(?!.*rpi))"
|
||||
_PRESSURE_TERMS = r"(pressure|overload|hotspot|bottleneck|saturation|headroom|strain|stress|critical|warning|at\s+capacity|near\s+limit)"
|
||||
_HARDWARE_TERMS = r"(hardware|arch(?:itecture)?|platform|mix|profile|node\s+types?)"
|
||||
|
||||
|
||||
def route_intent(question: str) -> IntentMatch | None:
|
||||
"""Classify a question into a deterministic cluster intent.
|
||||
|
||||
Input:
|
||||
- `question`: user text to inspect.
|
||||
|
||||
Output:
|
||||
- the highest-confidence `IntentMatch`, or `None` when no intent fits.
|
||||
"""
|
||||
|
||||
text = (question or "").lower()
|
||||
if not text:
|
||||
return None
|
||||
@ -44,13 +55,13 @@ def route_intent(question: str) -> IntentMatch | None:
|
||||
return any(_has(pat) for pat in patterns)
|
||||
|
||||
intents = [
|
||||
(lambda: _all(_COUNT_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_count", 90)),
|
||||
(
|
||||
lambda: _all(_READY_TERMS) and (_any(_NODE_TERMS) or "cluster" in text or "workers" in text),
|
||||
IntentMatch("nodes_ready", 85),
|
||||
),
|
||||
(lambda: _all(_COUNT_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_count", 90)),
|
||||
(lambda: _all(_NON_RPI_TERMS) and (_any(_NODE_TERMS) or "cluster" in text), IntentMatch("nodes_non_rpi", 80)),
|
||||
(lambda: _all(_HARDWARE_TERMS) and (_has(_NODE_TERMS) or "cluster" in text), IntentMatch("hardware_mix", 75)),
|
||||
(lambda: _all(_HARDWARE_TERMS) and (_has(_NODE_TERMS) or "cluster" in text or "mix" in text), IntentMatch("hardware_mix", 75)),
|
||||
(lambda: _all(_HOTTEST_TERMS, _CPU_TERMS), IntentMatch("hottest_cpu", 80)),
|
||||
(lambda: _all(_HOTTEST_TERMS, _RAM_TERMS), IntentMatch("hottest_ram", 80)),
|
||||
(lambda: _all(_HOTTEST_TERMS, _NET_TERMS), IntentMatch("hottest_net", 80)),
|
||||
|
||||
@ -7,6 +7,8 @@ log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KnowledgeBase:
|
||||
"""Load Atlas knowledge-base files and expose summary snippets."""
|
||||
|
||||
def __init__(self, base_dir: str) -> None:
|
||||
self._base = Path(base_dir) if base_dir else None
|
||||
self._atlas: dict[str, Any] = {}
|
||||
@ -14,6 +16,8 @@ class KnowledgeBase:
|
||||
self._loaded = False
|
||||
|
||||
def load(self) -> None:
|
||||
"""Load catalog files once so subsequent reads stay cheap."""
|
||||
|
||||
if self._loaded or not self._base:
|
||||
return
|
||||
self._atlas = self._read_json(self._base / "catalog" / "atlas.json")
|
||||
@ -30,6 +34,8 @@ class KnowledgeBase:
|
||||
return {}
|
||||
|
||||
def summary(self) -> str:
|
||||
"""Return a short human-readable KB summary for prompt context."""
|
||||
|
||||
self.load()
|
||||
if not self._atlas:
|
||||
return ""
|
||||
@ -42,12 +48,14 @@ class KnowledgeBase:
|
||||
if services:
|
||||
parts.append(f"Services indexed: {len(services)}.")
|
||||
if isinstance(self._atlas, dict):
|
||||
keys = [key for key in self._atlas.keys() if key not in {"sources"}]
|
||||
keys = [key for key in self._atlas if key not in {"sources"}]
|
||||
if keys:
|
||||
parts.append(f"Atlas keys: {', '.join(sorted(keys)[:8])}.")
|
||||
return " ".join(parts)
|
||||
|
||||
def runbook_titles(self, *, limit: int = 5) -> str:
|
||||
"""Render the top runbook titles for prompt context."""
|
||||
|
||||
self.load()
|
||||
if not self._runbooks:
|
||||
return ""
|
||||
@ -64,6 +72,8 @@ class KnowledgeBase:
|
||||
return "Relevant runbooks:\n" + "\n".join(titles[:limit])
|
||||
|
||||
def runbook_paths(self, *, limit: int = 10) -> list[str]:
|
||||
"""Return the runbook paths used for exact-path enforcement."""
|
||||
|
||||
self.load()
|
||||
if not self._runbooks:
|
||||
return []
|
||||
@ -77,6 +87,8 @@ class KnowledgeBase:
|
||||
return paths[:limit]
|
||||
|
||||
def chunk_lines(self, *, max_files: int = 20, max_chars: int = 6000) -> list[str]:
|
||||
"""Collect KB excerpts into prompt-sized chunks."""
|
||||
|
||||
self.load()
|
||||
if not self._base:
|
||||
return []
|
||||
|
||||
@ -17,6 +17,8 @@ class LLMError(RuntimeError):
|
||||
|
||||
|
||||
class LLMClient:
|
||||
"""Wrap the Ollama chat endpoint with retries and fallback-model support."""
|
||||
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self._settings = settings
|
||||
self._timeout = settings.ollama_timeout_sec
|
||||
@ -37,6 +39,8 @@ class LLMClient:
|
||||
model: str | None = None,
|
||||
timeout_sec: float | None = None,
|
||||
) -> str:
|
||||
"""Send a chat request and return the model content text."""
|
||||
|
||||
payload = {
|
||||
"model": model or self._settings.ollama_model,
|
||||
"messages": messages,
|
||||
@ -77,6 +81,8 @@ class LLMClient:
|
||||
|
||||
|
||||
def build_messages(system: str, prompt: str, *, context: str | None = None) -> list[dict[str, str]]:
|
||||
"""Assemble the minimal chat message list used by the answer pipeline."""
|
||||
|
||||
messages: list[dict[str, str]] = [{"role": "system", "content": system}]
|
||||
if context:
|
||||
messages.append({"role": "user", "content": "Context (grounded facts):\n" + context})
|
||||
@ -85,6 +91,8 @@ def build_messages(system: str, prompt: str, *, context: str | None = None) -> l
|
||||
|
||||
|
||||
def parse_json(text: str, *, fallback: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
"""Parse a JSON blob from model output and fall back to a safe default."""
|
||||
|
||||
try:
|
||||
raw = text.strip()
|
||||
if raw.startswith("`"):
|
||||
|
||||
@ -253,7 +253,7 @@ CONTRADICTION_PROMPT = (
|
||||
"Question: {question}\n"
|
||||
"Draft: {draft}\n"
|
||||
"FactsUsed:\n{facts}\n\n"
|
||||
"Return JSON: {\"use_facts\": true|false, \"confidence\": 0-100, \"reason\": \"...\"}"
|
||||
"Return JSON: {{\"use_facts\": true|false, \"confidence\": 0-100, \"reason\": \"...\"}}"
|
||||
)
|
||||
|
||||
CANDIDATE_SELECT_SYSTEM = (
|
||||
|
||||
@ -1,13 +1,17 @@
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from datetime import UTC, datetime
|
||||
|
||||
|
||||
class JsonFormatter(logging.Formatter):
|
||||
"""Emit structured log records for the atlasbot services."""
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
"""Render a log record as JSON for downstream ingestion."""
|
||||
|
||||
payload = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"timestamp": datetime.now(UTC).isoformat(),
|
||||
"level": record.levelname.lower(),
|
||||
"logger": record.name,
|
||||
"message": record.getMessage(),
|
||||
@ -21,6 +25,8 @@ class JsonFormatter(logging.Formatter):
|
||||
|
||||
|
||||
def configure_logging(level: str = "INFO") -> None:
|
||||
"""Install JSON logging on the process root logger."""
|
||||
|
||||
root = logging.getLogger()
|
||||
root.setLevel(level.upper())
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
|
||||
@ -17,6 +17,8 @@ log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _build_engine(settings) -> AnswerEngine:
|
||||
"""Construct the answer engine from the configured backends."""
|
||||
|
||||
kb = KnowledgeBase(settings.kb_dir)
|
||||
snapshot = SnapshotProvider(settings)
|
||||
llm = LLMClient(settings)
|
||||
@ -24,6 +26,8 @@ def _build_engine(settings) -> AnswerEngine:
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""Start the HTTP API, Matrix bots, and queue worker."""
|
||||
|
||||
settings = load_settings()
|
||||
configure_logging("INFO")
|
||||
|
||||
@ -45,14 +49,7 @@ async def main() -> None:
|
||||
queue = QueueManager(settings, handler)
|
||||
await queue.start()
|
||||
|
||||
async def answer_handler( # noqa: PLR0913
|
||||
question: str,
|
||||
mode: str,
|
||||
history=None,
|
||||
conversation_id=None,
|
||||
snapshot_pin: bool | None = None,
|
||||
observer=None,
|
||||
) -> AnswerResult:
|
||||
async def answer_handler(question: str, mode: str, history=None, conversation_id=None, snapshot_pin: bool | None = None, observer=None) -> AnswerResult:
|
||||
if settings.queue_enabled:
|
||||
payload = await queue.submit(
|
||||
{
|
||||
@ -86,6 +83,8 @@ async def main() -> None:
|
||||
|
||||
|
||||
def result_scores(payload: dict[str, object]) -> AnswerScores:
|
||||
"""Coerce a queue payload into the public `AnswerScores` shape."""
|
||||
|
||||
scores = payload.get("scores") if isinstance(payload, dict) else None
|
||||
if isinstance(scores, dict):
|
||||
try:
|
||||
|
||||
@ -15,11 +15,15 @@ log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MatrixClient:
|
||||
"""Wrap the Matrix client endpoints used by the bot runtime."""
|
||||
|
||||
def __init__(self, settings: Settings, bot: MatrixBotConfig) -> None:
|
||||
self._settings = settings
|
||||
self._bot = bot
|
||||
|
||||
async def login(self) -> str:
|
||||
"""Exchange bot credentials for a Matrix access token."""
|
||||
|
||||
payload = {
|
||||
"type": "m.login.password",
|
||||
"identifier": {"type": "m.id.user", "user": self._bot.username},
|
||||
@ -33,6 +37,8 @@ class MatrixClient:
|
||||
return data.get("access_token", "")
|
||||
|
||||
async def resolve_room(self, token: str) -> str:
|
||||
"""Resolve the configured room alias into a room id."""
|
||||
|
||||
alias = quote(self._settings.room_alias, safe="")
|
||||
url = f"{self._settings.matrix_base}/_matrix/client/v3/directory/room/{alias}"
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
@ -50,12 +56,16 @@ class MatrixClient:
|
||||
return data.get("room_id", "")
|
||||
|
||||
async def join_room(self, token: str, room_id: str) -> None:
|
||||
"""Join the target room if the bot is not already present."""
|
||||
|
||||
url = f"{self._settings.matrix_base}/_matrix/client/v3/rooms/{room_id}/join"
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
async with httpx.AsyncClient(timeout=15.0) as client:
|
||||
await client.post(url, headers=headers)
|
||||
|
||||
async def send_message(self, token: str, room_id: str, text: str) -> None:
|
||||
"""Send a plain text message to the Matrix room."""
|
||||
|
||||
url = f"{self._settings.matrix_base}/_matrix/client/v3/rooms/{room_id}/send/m.room.message"
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
payload = {"msgtype": "m.text", "body": text}
|
||||
@ -63,6 +73,8 @@ class MatrixClient:
|
||||
await client.post(url, json=payload, headers=headers)
|
||||
|
||||
async def sync(self, token: str, since: str | None) -> dict[str, Any]:
|
||||
"""Fetch the incremental Matrix sync payload."""
|
||||
|
||||
base = f"{self._settings.matrix_base}/_matrix/client/v3/sync"
|
||||
params = {"timeout": 30000}
|
||||
if since:
|
||||
@ -75,17 +87,9 @@ class MatrixClient:
|
||||
|
||||
|
||||
class MatrixBot:
|
||||
def __init__(
|
||||
self,
|
||||
settings: Settings,
|
||||
bot: MatrixBotConfig,
|
||||
engine: AnswerEngine,
|
||||
answer_handler: Callable[
|
||||
[str, str, list[dict[str, str]] | None, str | None, Callable[[str, str], None] | None],
|
||||
Awaitable[AnswerResult],
|
||||
]
|
||||
| None = None,
|
||||
) -> None:
|
||||
"""Drive Matrix conversation handling and heartbeat replies."""
|
||||
|
||||
def __init__(self, settings: Settings, bot: MatrixBotConfig, engine: AnswerEngine, answer_handler: Callable[[str, str, list[dict[str, str]] | None, str | None, Callable[[str, str], None] | None], Awaitable[AnswerResult]] | None = None) -> None:
|
||||
self._settings = settings
|
||||
self._bot = bot
|
||||
self._engine = engine
|
||||
@ -94,6 +98,8 @@ class MatrixBot:
|
||||
self._history: dict[str, list[dict[str, str]]] = {}
|
||||
|
||||
async def run(self) -> None:
|
||||
"""Continuously bootstrap, sync, and answer Matrix events."""
|
||||
|
||||
while True:
|
||||
try:
|
||||
token = await self._client.login()
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Awaitable, Callable
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any
|
||||
|
||||
from nats.aio.client import Client as NATS
|
||||
from nats.js.errors import NotFoundError
|
||||
@ -12,6 +13,8 @@ log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class QueueManager:
|
||||
"""Manage optional NATS-backed work queue processing."""
|
||||
|
||||
def __init__(self, settings: Settings, handler: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> None:
|
||||
self._settings = settings
|
||||
self._handler = handler
|
||||
@ -20,6 +23,8 @@ class QueueManager:
|
||||
self._worker_task: asyncio.Task | None = None
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Connect to NATS and start the worker loop when queueing is enabled."""
|
||||
|
||||
if not self._settings.queue_enabled:
|
||||
return
|
||||
self._nc = NATS()
|
||||
@ -29,12 +34,16 @@ class QueueManager:
|
||||
self._worker_task = asyncio.create_task(self._worker_loop())
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Drain the NATS connection and cancel background work."""
|
||||
|
||||
if self._worker_task:
|
||||
self._worker_task.cancel()
|
||||
if self._nc:
|
||||
await self._nc.drain()
|
||||
|
||||
async def submit(self, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Submit work to NATS or fall back to direct handling."""
|
||||
|
||||
if not self._settings.queue_enabled:
|
||||
return await self._handler(payload)
|
||||
if not self._nc or not self._js:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
8
atlasbot/snapshot/builder/__init__.py
Normal file
8
atlasbot/snapshot/builder/__init__.py
Normal file
@ -0,0 +1,8 @@
|
||||
"""Snapshot summary builder and text render helpers."""
|
||||
|
||||
from .core_a import *
|
||||
from .core_b import *
|
||||
from .format_a import *
|
||||
from .format_b import *
|
||||
from .format_c import *
|
||||
from .summary_text import *
|
||||
492
atlasbot/snapshot/builder/core_a.py
Normal file
492
atlasbot/snapshot/builder/core_a.py
Normal file
@ -0,0 +1,492 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from atlasbot.config import Settings
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
PVC_USAGE_CRITICAL = 90
|
||||
|
||||
_BYTES_KB = 1024
|
||||
_BYTES_MB = 1024 * 1024
|
||||
_BYTES_GB = 1024 * 1024 * 1024
|
||||
_VALUE_PAIR_LEN = 2
|
||||
class SnapshotProvider:
|
||||
"""Fetch and cache the Ariadne snapshot used by the answer engine."""
|
||||
|
||||
def __init__(self, settings: Settings) -> None:
|
||||
self._settings = settings
|
||||
self._cache: dict[str, Any] = {}
|
||||
self._cache_ts = 0.0
|
||||
|
||||
def _cache_valid(self) -> bool:
|
||||
return time.monotonic() - self._cache_ts < max(5, self._settings.snapshot_ttl_sec)
|
||||
|
||||
def get(self) -> dict[str, Any] | None:
|
||||
"""Return the cached snapshot or refresh it from Ariadne."""
|
||||
|
||||
if self._cache and self._cache_valid():
|
||||
return self._cache
|
||||
if not self._settings.ariadne_state_url:
|
||||
return self._cache or None
|
||||
headers = {}
|
||||
if self._settings.ariadne_state_token:
|
||||
headers["x-internal-token"] = self._settings.ariadne_state_token
|
||||
try:
|
||||
resp = httpx.get(self._settings.ariadne_state_url, headers=headers, timeout=10.0)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
if isinstance(payload, dict):
|
||||
self._cache = payload
|
||||
self._cache_ts = time.monotonic()
|
||||
return payload
|
||||
except Exception as exc:
|
||||
log.warning("snapshot fetch failed", extra={"extra": {"error": str(exc)}})
|
||||
return self._cache or None
|
||||
|
||||
|
||||
def _node_usage_top(series: list[dict[str, Any]]) -> dict[str, Any] | None:
|
||||
best = None
|
||||
for entry in series or []:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node")
|
||||
value = entry.get("value")
|
||||
try:
|
||||
numeric = float(value)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if best is None or numeric > best["value"]:
|
||||
best = {"node": node, "value": numeric}
|
||||
return best
|
||||
|
||||
|
||||
def build_summary(snapshot: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""Condense a raw snapshot into the summary shape used for prompts."""
|
||||
|
||||
if not snapshot:
|
||||
return {}
|
||||
from .core_b import (
|
||||
_build_flux,
|
||||
_build_hottest,
|
||||
_build_namespace_capacity,
|
||||
_build_namespace_capacity_summary,
|
||||
_build_node_load_summary,
|
||||
_build_pvc,
|
||||
_build_workloads,
|
||||
)
|
||||
from .format_c import _build_cluster_watchlist
|
||||
|
||||
nodes_detail = _nodes_detail(snapshot)
|
||||
metrics = _metrics(snapshot)
|
||||
summary: dict[str, Any] = {}
|
||||
|
||||
if isinstance(snapshot.get("nodes_summary"), dict):
|
||||
summary["nodes_summary"] = snapshot.get("nodes_summary")
|
||||
if metrics:
|
||||
summary["metrics"] = metrics
|
||||
if isinstance(snapshot.get("jobs"), dict):
|
||||
summary["jobs"] = snapshot.get("jobs")
|
||||
summary.update(_build_nodes(snapshot))
|
||||
summary.update(_build_pressure(snapshot))
|
||||
summary.update(_build_hardware(nodes_detail))
|
||||
summary.update(_build_hardware_by_node(nodes_detail))
|
||||
summary.update(_build_hardware_usage(metrics, summary.get("hardware_by_node")))
|
||||
summary.update(_build_node_facts(nodes_detail))
|
||||
summary.update(_build_node_ages(nodes_detail))
|
||||
summary.update(_build_node_taints(nodes_detail))
|
||||
summary.update(_build_capacity(metrics))
|
||||
summary.update(_build_pods(metrics))
|
||||
summary.update(_build_namespace_pods(snapshot))
|
||||
summary.update(_build_namespace_nodes(snapshot))
|
||||
summary.update(_build_node_pods(snapshot))
|
||||
summary.update(_build_node_pods_top(metrics))
|
||||
summary.update(_build_pod_issues(snapshot))
|
||||
summary.update(_build_workload_health(snapshot))
|
||||
summary.update(_build_events(snapshot))
|
||||
summary.update(_build_event_summary(snapshot))
|
||||
summary.update(_build_postgres(metrics))
|
||||
summary.update(_build_hottest(metrics))
|
||||
summary.update(_build_pvc(metrics))
|
||||
summary.update(_build_namespace_capacity(metrics))
|
||||
summary.update(_build_namespace_capacity_summary(metrics))
|
||||
summary.update(_build_longhorn(snapshot))
|
||||
summary.update(_build_root_disk_headroom(metrics))
|
||||
summary.update(_build_node_load(metrics))
|
||||
summary.update(_build_node_load_summary(metrics))
|
||||
summary.update(_build_cluster_watchlist(summary))
|
||||
summary.update(_build_workloads(snapshot))
|
||||
summary.update(_build_flux(snapshot))
|
||||
_merge_cluster_summary(snapshot, summary)
|
||||
_augment_lexicon(summary)
|
||||
return summary
|
||||
|
||||
|
||||
def _merge_cluster_summary(snapshot: dict[str, Any], summary: dict[str, Any]) -> None:
|
||||
cluster_summary = snapshot.get("summary") if isinstance(snapshot.get("summary"), dict) else {}
|
||||
if not cluster_summary:
|
||||
return
|
||||
_merge_cluster_fields(
|
||||
summary,
|
||||
cluster_summary,
|
||||
{
|
||||
"signals": list,
|
||||
"profiles": dict,
|
||||
"inventory": dict,
|
||||
"topology": dict,
|
||||
"lexicon": dict,
|
||||
"cross_stats": dict,
|
||||
"baseline_deltas": dict,
|
||||
"pod_issue_summary": dict,
|
||||
"trend_requests": dict,
|
||||
"pod_waiting_trends": dict,
|
||||
"pod_terminated_trends": dict,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _merge_cluster_fields(summary: dict[str, Any], cluster_summary: dict[str, Any], field_types: dict[str, type]) -> None:
|
||||
for key, expected in field_types.items():
|
||||
value = cluster_summary.get(key)
|
||||
if isinstance(value, expected):
|
||||
summary[key] = value
|
||||
|
||||
|
||||
def _augment_lexicon(summary: dict[str, Any]) -> None:
|
||||
lexicon = summary.get("lexicon")
|
||||
if not isinstance(lexicon, dict):
|
||||
lexicon = {"terms": [], "aliases": {}}
|
||||
terms = list(lexicon.get("terms") or [])
|
||||
aliases = dict(lexicon.get("aliases") or {})
|
||||
hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
|
||||
hardware_map = {
|
||||
"rpi5": "Raspberry Pi 5 nodes",
|
||||
"rpi4": "Raspberry Pi 4 nodes",
|
||||
"rpi": "Raspberry Pi nodes",
|
||||
"jetson": "NVIDIA Jetson nodes",
|
||||
"amd64": "AMD64 nodes",
|
||||
}
|
||||
existing_terms = {entry.get("term") for entry in terms if isinstance(entry, dict)}
|
||||
for key, meaning in hardware_map.items():
|
||||
if key not in hardware:
|
||||
continue
|
||||
if key not in existing_terms:
|
||||
terms.append({"term": key, "meaning": meaning})
|
||||
if key not in aliases:
|
||||
aliases[key] = meaning
|
||||
if "raspberry pi 5" not in aliases and "rpi5" in hardware:
|
||||
aliases["raspberry pi 5"] = "rpi5"
|
||||
if "raspberry pi 4" not in aliases and "rpi4" in hardware:
|
||||
aliases["raspberry pi 4"] = "rpi4"
|
||||
lexicon["terms"] = terms
|
||||
lexicon["aliases"] = aliases
|
||||
summary["lexicon"] = lexicon
|
||||
|
||||
|
||||
def _nodes_detail(snapshot: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
items = snapshot.get("nodes_detail")
|
||||
return items if isinstance(items, list) else []
|
||||
|
||||
|
||||
def _metrics(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
metrics = snapshot.get("metrics")
|
||||
return metrics if isinstance(metrics, dict) else {}
|
||||
|
||||
|
||||
def _build_nodes(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
nodes_summary = snapshot.get("nodes_summary") if isinstance(snapshot.get("nodes_summary"), dict) else {}
|
||||
if not nodes_summary:
|
||||
return {}
|
||||
return {
|
||||
"nodes": {
|
||||
"total": nodes_summary.get("total"),
|
||||
"ready": nodes_summary.get("ready"),
|
||||
"not_ready": nodes_summary.get("not_ready"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _build_pressure(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
nodes_summary = snapshot.get("nodes_summary") if isinstance(snapshot.get("nodes_summary"), dict) else {}
|
||||
pressure = nodes_summary.get("pressure_nodes") if isinstance(nodes_summary.get("pressure_nodes"), dict) else {}
|
||||
if not pressure:
|
||||
return {}
|
||||
return {"pressure_nodes": pressure}
|
||||
|
||||
|
||||
def _build_hardware(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
hardware: dict[str, list[str]] = {}
|
||||
for node in nodes_detail or []:
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
name = node.get("name")
|
||||
hardware_class = node.get("hardware") or "unknown"
|
||||
if name:
|
||||
hardware.setdefault(hardware_class, []).append(name)
|
||||
if not hardware:
|
||||
return {}
|
||||
return {"hardware": {key: sorted(value) for key, value in hardware.items()}}
|
||||
|
||||
|
||||
def _build_hardware_by_node(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
mapping: dict[str, str] = {}
|
||||
for node in nodes_detail or []:
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
name = node.get("name")
|
||||
if isinstance(name, str) and name:
|
||||
hardware = node.get("hardware") or "unknown"
|
||||
mapping[name] = str(hardware)
|
||||
return {"hardware_by_node": mapping} if mapping else {}
|
||||
|
||||
|
||||
def _build_hardware_usage(metrics: dict[str, Any], hardware_by_node: dict[str, Any] | None) -> dict[str, Any]: # noqa: C901
|
||||
if not isinstance(hardware_by_node, dict) or not hardware_by_node:
|
||||
return {}
|
||||
node_load = metrics.get("node_load") if isinstance(metrics.get("node_load"), list) else []
|
||||
if not node_load:
|
||||
return {}
|
||||
buckets: dict[str, dict[str, list[float]]] = {}
|
||||
for entry in node_load:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node")
|
||||
if not isinstance(node, str) or not node:
|
||||
continue
|
||||
hardware = hardware_by_node.get(node, "unknown")
|
||||
bucket = buckets.setdefault(str(hardware), {"load_index": [], "cpu": [], "ram": [], "net": [], "io": []})
|
||||
for key in ("load_index", "cpu", "ram", "net", "io"):
|
||||
value = entry.get(key)
|
||||
if isinstance(value, (int, float)):
|
||||
bucket[key].append(float(value))
|
||||
output: list[dict[str, Any]] = []
|
||||
for hardware, metrics_bucket in buckets.items():
|
||||
row: dict[str, Any] = {"hardware": hardware}
|
||||
for key, values in metrics_bucket.items():
|
||||
if values:
|
||||
row[key] = sum(values) / len(values)
|
||||
output.append(row)
|
||||
output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("hardware") or ""))
|
||||
return {"hardware_usage_avg": output}
|
||||
|
||||
|
||||
def _build_node_ages(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
ages: list[dict[str, Any]] = []
|
||||
for node in nodes_detail or []:
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
name = node.get("name")
|
||||
age = node.get("age_hours")
|
||||
if name and isinstance(age, (int, float)):
|
||||
ages.append({"name": name, "age_hours": age})
|
||||
ages.sort(key=lambda item: -(item.get("age_hours") or 0))
|
||||
return {"node_ages": ages[:5]} if ages else {}
|
||||
|
||||
|
||||
def _count_values(nodes_detail: list[dict[str, Any]], key: str) -> dict[str, int]:
|
||||
counts: dict[str, int] = {}
|
||||
for node in nodes_detail or []:
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
value = node.get(key)
|
||||
if isinstance(value, str) and value:
|
||||
counts[value] = counts.get(value, 0) + 1
|
||||
return counts
|
||||
|
||||
|
||||
def _build_node_facts(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
if not nodes_detail:
|
||||
return {}
|
||||
role_counts: dict[str, int] = {}
|
||||
for node in nodes_detail:
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
if node.get("is_worker"):
|
||||
role_counts["worker"] = role_counts.get("worker", 0) + 1
|
||||
roles = node.get("roles")
|
||||
if isinstance(roles, list):
|
||||
for role in roles:
|
||||
if isinstance(role, str) and role:
|
||||
role_counts[role] = role_counts.get(role, 0) + 1
|
||||
return {
|
||||
"node_arch_counts": _count_values(nodes_detail, "arch"),
|
||||
"node_os_counts": _count_values(nodes_detail, "os"),
|
||||
"node_kubelet_versions": _count_values(nodes_detail, "kubelet"),
|
||||
"node_kernel_versions": _count_values(nodes_detail, "kernel"),
|
||||
"node_runtime_versions": _count_values(nodes_detail, "container_runtime"),
|
||||
"node_role_counts": role_counts,
|
||||
}
|
||||
|
||||
|
||||
def _build_node_taints(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
taints: dict[str, list[str]] = {}
|
||||
for node in nodes_detail or []:
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
name = node.get("name")
|
||||
if not name:
|
||||
continue
|
||||
entries = node.get("taints") if isinstance(node.get("taints"), list) else []
|
||||
for entry in entries:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
key = entry.get("key")
|
||||
effect = entry.get("effect")
|
||||
if isinstance(key, str) and isinstance(effect, str):
|
||||
label = f"{key}:{effect}"
|
||||
taints.setdefault(label, []).append(name)
|
||||
if not taints:
|
||||
return {}
|
||||
return {"node_taints": {key: sorted(names) for key, names in taints.items()}}
|
||||
|
||||
|
||||
def _build_root_disk_headroom(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
|
||||
disk = node_usage.get("disk") if isinstance(node_usage.get("disk"), list) else []
|
||||
if not disk:
|
||||
return {}
|
||||
entries = []
|
||||
for entry in disk:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node")
|
||||
try:
|
||||
used_pct = float(entry.get("value"))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
headroom = max(0.0, 100.0 - used_pct)
|
||||
if node:
|
||||
entries.append({"node": node, "headroom_pct": headroom, "used_pct": used_pct})
|
||||
entries.sort(key=lambda item: (item.get("headroom_pct") or 0.0, item.get("node") or ""))
|
||||
return {"root_disk_low_headroom": entries[:5]} if entries else {}
|
||||
|
||||
|
||||
def _build_longhorn(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
longhorn = snapshot.get("longhorn")
|
||||
return {"longhorn": longhorn} if isinstance(longhorn, dict) and longhorn else {}
|
||||
|
||||
|
||||
def _build_node_load(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
node_load = metrics.get("node_load")
|
||||
if not isinstance(node_load, list) or not node_load:
|
||||
return {}
|
||||
return {"node_load": node_load}
|
||||
|
||||
|
||||
def _build_pods(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
pods = {
|
||||
"running": metrics.get("pods_running"),
|
||||
"pending": metrics.get("pods_pending"),
|
||||
"failed": metrics.get("pods_failed"),
|
||||
"succeeded": metrics.get("pods_succeeded"),
|
||||
}
|
||||
if not any(value is not None for value in pods.values()):
|
||||
return {}
|
||||
return {"pods": pods}
|
||||
|
||||
|
||||
def _build_capacity(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
if not metrics:
|
||||
return {}
|
||||
capacity = {
|
||||
"cpu": metrics.get("capacity_cpu"),
|
||||
"allocatable_cpu": metrics.get("allocatable_cpu"),
|
||||
"mem_bytes": metrics.get("capacity_mem_bytes"),
|
||||
"allocatable_mem_bytes": metrics.get("allocatable_mem_bytes"),
|
||||
"pods": metrics.get("capacity_pods"),
|
||||
"allocatable_pods": metrics.get("allocatable_pods"),
|
||||
}
|
||||
if not any(value is not None for value in capacity.values()):
|
||||
return {}
|
||||
return {"capacity": capacity}
|
||||
|
||||
|
||||
def _build_namespace_pods(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
namespaces = snapshot.get("namespace_pods")
|
||||
if not isinstance(namespaces, list) or not namespaces:
|
||||
return {}
|
||||
return {"namespace_pods": namespaces}
|
||||
|
||||
|
||||
def _build_namespace_nodes(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
namespace_nodes = snapshot.get("namespace_nodes")
|
||||
if not isinstance(namespace_nodes, list) or not namespace_nodes:
|
||||
return {}
|
||||
return {"namespace_nodes": namespace_nodes}
|
||||
|
||||
|
||||
def _build_node_pods(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
node_pods = snapshot.get("node_pods")
|
||||
if not isinstance(node_pods, list) or not node_pods:
|
||||
return {}
|
||||
return {"node_pods": node_pods}
|
||||
|
||||
|
||||
def _build_node_pods_top(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
top = metrics.get("node_pods_top")
|
||||
if not isinstance(top, list) or not top:
|
||||
return {}
|
||||
return {"node_pods_top": top}
|
||||
|
||||
|
||||
def _build_pod_issues(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
pod_issues = snapshot.get("pod_issues")
|
||||
if not isinstance(pod_issues, dict) or not pod_issues:
|
||||
return {}
|
||||
return {"pod_issues": pod_issues}
|
||||
|
||||
|
||||
def _build_workload_health(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
health = snapshot.get("workloads_health")
|
||||
if not isinstance(health, dict) or not health:
|
||||
return {}
|
||||
deployments = health.get("deployments")
|
||||
statefulsets = health.get("statefulsets")
|
||||
daemonsets = health.get("daemonsets")
|
||||
if not isinstance(deployments, dict) or not isinstance(statefulsets, dict) or not isinstance(daemonsets, dict):
|
||||
return {}
|
||||
return {
|
||||
"workloads_health": {
|
||||
"deployments": deployments,
|
||||
"statefulsets": statefulsets,
|
||||
"daemonsets": daemonsets,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _build_events(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
events = snapshot.get("events")
|
||||
if not isinstance(events, dict) or not events:
|
||||
return {}
|
||||
return {"events": events}
|
||||
|
||||
|
||||
def _build_event_summary(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
events = snapshot.get("events")
|
||||
if not isinstance(events, dict) or not events:
|
||||
return {}
|
||||
summary = {}
|
||||
if isinstance(events.get("warnings_top_reason"), dict):
|
||||
summary["warnings_top_reason"] = events.get("warnings_top_reason")
|
||||
if events.get("warnings_latest"):
|
||||
summary["warnings_latest"] = events.get("warnings_latest")
|
||||
return {"event_summary": summary} if summary else {}
|
||||
|
||||
|
||||
def _build_postgres(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
postgres = metrics.get("postgres_connections") if isinstance(metrics.get("postgres_connections"), dict) else {}
|
||||
if not postgres:
|
||||
return {}
|
||||
return {
|
||||
"postgres": {
|
||||
"used": postgres.get("used"),
|
||||
"max": postgres.get("max"),
|
||||
"hottest_db": postgres.get("hottest_db"),
|
||||
"by_db": postgres.get("by_db"),
|
||||
}
|
||||
}
|
||||
57
atlasbot/snapshot/builder/core_b.py
Normal file
57
atlasbot/snapshot/builder/core_b.py
Normal file
@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .core_a import _node_usage_top
|
||||
|
||||
def _build_hottest(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
|
||||
hottest: dict[str, Any] = {}
|
||||
for key in ("cpu", "ram", "net", "io", "disk"):
|
||||
top = _node_usage_top(node_usage.get(key, []))
|
||||
if top:
|
||||
hottest[key] = top
|
||||
if not hottest:
|
||||
return {}
|
||||
return {"hottest": hottest}
|
||||
|
||||
|
||||
def _build_pvc(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
pvc_usage = metrics.get("pvc_usage_top") if isinstance(metrics.get("pvc_usage_top"), list) else []
|
||||
if not pvc_usage:
|
||||
return {}
|
||||
return {"pvc_usage_top": pvc_usage}
|
||||
|
||||
|
||||
def _build_namespace_capacity(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
capacity = metrics.get("namespace_capacity")
|
||||
if not isinstance(capacity, list) or not capacity:
|
||||
return {}
|
||||
return {"namespace_capacity": capacity}
|
||||
|
||||
|
||||
def _build_namespace_capacity_summary(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
summary = metrics.get("namespace_capacity_summary")
|
||||
if not isinstance(summary, dict) or not summary:
|
||||
return {}
|
||||
return {"namespace_capacity_summary": summary}
|
||||
|
||||
|
||||
def _build_node_load_summary(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
summary = metrics.get("node_load_summary")
|
||||
if not isinstance(summary, dict) or not summary:
|
||||
return {}
|
||||
return {"node_load_summary": summary}
|
||||
|
||||
|
||||
def _build_workloads(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
workloads = snapshot.get("workloads") if isinstance(snapshot.get("workloads"), list) else []
|
||||
return {"workloads": workloads}
|
||||
|
||||
|
||||
def _build_flux(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
flux = snapshot.get("flux") if isinstance(snapshot.get("flux"), dict) else {}
|
||||
return {"flux": flux}
|
||||
|
||||
|
||||
__all__ = [name for name in globals() if not name.startswith("__")]
|
||||
497
atlasbot/snapshot/builder/format_a.py
Normal file
497
atlasbot/snapshot/builder/format_a.py
Normal file
@ -0,0 +1,497 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .core_a import _BYTES_GB, _BYTES_KB, _BYTES_MB
|
||||
from .core_b import *
|
||||
|
||||
|
||||
def _format_float(value: Any) -> str:
|
||||
try:
|
||||
numeric = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return str(value)
|
||||
return f"{numeric:.2f}".rstrip("0").rstrip(".")
|
||||
|
||||
|
||||
def _format_rate_bytes(value: Any) -> str:
|
||||
try:
|
||||
numeric = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return str(value)
|
||||
if numeric >= _BYTES_MB:
|
||||
return f"{numeric / _BYTES_MB:.2f} MB/s"
|
||||
if numeric >= _BYTES_KB:
|
||||
return f"{numeric / _BYTES_KB:.2f} KB/s"
|
||||
return f"{numeric:.2f} B/s"
|
||||
|
||||
|
||||
def _format_bytes(value: Any) -> str:
|
||||
try:
|
||||
numeric = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return str(value)
|
||||
if numeric >= _BYTES_GB:
|
||||
return f"{numeric / _BYTES_GB:.2f} GB"
|
||||
if numeric >= _BYTES_MB:
|
||||
return f"{numeric / _BYTES_MB:.2f} MB"
|
||||
if numeric >= _BYTES_KB:
|
||||
return f"{numeric / _BYTES_KB:.2f} KB"
|
||||
return f"{numeric:.2f} B"
|
||||
|
||||
|
||||
def _format_kv_map(values: dict[str, Any]) -> str:
|
||||
parts = []
|
||||
for key, value in values.items():
|
||||
parts.append(f"{key}={value}")
|
||||
return ", ".join(parts)
|
||||
|
||||
|
||||
def _format_names(names: list[str]) -> str:
|
||||
if not names:
|
||||
return ""
|
||||
return ", ".join(sorted(names))
|
||||
|
||||
|
||||
def _append_nodes(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||
nodes = summary.get("nodes") if isinstance(summary.get("nodes"), dict) else {}
|
||||
if not nodes:
|
||||
return
|
||||
workers = {}
|
||||
if isinstance(summary.get("nodes_summary"), dict):
|
||||
workers = summary["nodes_summary"].get("workers") or {}
|
||||
workers_total = workers.get("total")
|
||||
workers_ready = workers.get("ready")
|
||||
workers_str = ""
|
||||
if workers_total is not None and workers_ready is not None:
|
||||
workers_str = f", workers_ready={workers_ready}/{workers_total}"
|
||||
total = nodes.get("total")
|
||||
ready = nodes.get("ready")
|
||||
not_ready = nodes.get("not_ready")
|
||||
if not_ready is None:
|
||||
not_ready = 0
|
||||
lines.append(f"nodes: total={total}, ready={ready}, not_ready={not_ready}{workers_str}")
|
||||
if total is not None:
|
||||
lines.append(f"nodes_total: {total}")
|
||||
if ready is not None:
|
||||
lines.append(f"nodes_ready: {ready}")
|
||||
if not_ready is not None:
|
||||
lines.append(f"nodes_not_ready_count: {not_ready}")
|
||||
if not isinstance(summary.get("nodes_summary"), dict):
|
||||
return
|
||||
not_ready_names = summary["nodes_summary"].get("not_ready_names") or []
|
||||
if not_ready_names:
|
||||
lines.append("nodes_not_ready: " + _format_names(not_ready_names))
|
||||
by_arch = summary["nodes_summary"].get("by_arch") or {}
|
||||
if isinstance(by_arch, dict) and by_arch:
|
||||
lines.append("archs: " + _format_kv_map(by_arch))
|
||||
by_role = summary["nodes_summary"].get("by_role") or {}
|
||||
if isinstance(by_role, dict) and by_role:
|
||||
lines.append("roles: " + _format_kv_map(by_role))
|
||||
|
||||
|
||||
def _append_hardware(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
|
||||
if not hardware:
|
||||
return
|
||||
parts = []
|
||||
for key, names in hardware.items():
|
||||
if not isinstance(names, list):
|
||||
continue
|
||||
label = f"{key}={len(names)}"
|
||||
name_list = _format_names([str(name) for name in names if name])
|
||||
if name_list:
|
||||
label = f"{label} ({name_list})"
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append("hardware: " + "; ".join(sorted(parts)))
|
||||
|
||||
|
||||
def _append_hardware_groups(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
|
||||
if not hardware:
|
||||
return
|
||||
parts = []
|
||||
for key, names in hardware.items():
|
||||
if not isinstance(names, list):
|
||||
continue
|
||||
name_list = _format_names([str(name) for name in names if name])
|
||||
if name_list:
|
||||
parts.append(f"{key}={name_list}")
|
||||
if parts:
|
||||
lines.append("hardware_nodes: " + "; ".join(sorted(parts)))
|
||||
|
||||
|
||||
def _append_node_ages(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
ages = summary.get("node_ages") if isinstance(summary.get("node_ages"), list) else []
|
||||
if not ages:
|
||||
return
|
||||
parts = []
|
||||
for entry in ages[:3]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
name = entry.get("name")
|
||||
age = entry.get("age_hours")
|
||||
if name and isinstance(age, (int, float)):
|
||||
parts.append(f"{name}={_format_float(age)}h")
|
||||
if parts:
|
||||
lines.append("node_age_top: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_node_taints(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
taints = summary.get("node_taints") if isinstance(summary.get("node_taints"), dict) else {}
|
||||
if not taints:
|
||||
return
|
||||
parts = []
|
||||
for key, names in taints.items():
|
||||
if not isinstance(names, list):
|
||||
continue
|
||||
name_list = _format_names([str(name) for name in names if name])
|
||||
parts.append(f"{key}={len(names)} ({name_list})" if name_list else f"{key}={len(names)}")
|
||||
if parts:
|
||||
lines.append("node_taints: " + "; ".join(sorted(parts)))
|
||||
|
||||
|
||||
def _append_node_facts(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
def top_counts(label: str, counts: dict[str, int], limit: int = 4) -> None:
|
||||
if not counts:
|
||||
return
|
||||
top = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:limit]
|
||||
rendered = "; ".join([f"{name}={count}" for name, count in top])
|
||||
if rendered:
|
||||
lines.append(f"{label}: {rendered}")
|
||||
|
||||
top_counts("node_arch", summary.get("node_arch_counts") or {})
|
||||
top_counts("node_os", summary.get("node_os_counts") or {})
|
||||
top_counts("node_kubelet_versions", summary.get("node_kubelet_versions") or {})
|
||||
top_counts("node_kernel_versions", summary.get("node_kernel_versions") or {})
|
||||
top_counts("node_runtime_versions", summary.get("node_runtime_versions") or {})
|
||||
top_counts("node_roles", summary.get("node_role_counts") or {})
|
||||
|
||||
|
||||
def _append_pressure(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
pressure = summary.get("pressure_nodes")
|
||||
if not isinstance(pressure, dict) or not pressure:
|
||||
return
|
||||
parts = []
|
||||
for cond, nodes in sorted(pressure.items()):
|
||||
if not nodes:
|
||||
continue
|
||||
name_list = _format_names([str(name) for name in nodes if name])
|
||||
parts.append(f"{cond}={len(nodes)} ({name_list})" if name_list else f"{cond}={len(nodes)}")
|
||||
if parts:
|
||||
lines.append("node_pressure: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_pods(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
pods = summary.get("pods") if isinstance(summary.get("pods"), dict) else {}
|
||||
if not pods:
|
||||
return
|
||||
lines.append(
|
||||
"pods: running={running}, pending={pending}, failed={failed}, succeeded={succeeded}".format(
|
||||
running=pods.get("running"),
|
||||
pending=pods.get("pending"),
|
||||
failed=pods.get("failed"),
|
||||
succeeded=pods.get("succeeded"),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _append_capacity(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
capacity = summary.get("capacity") if isinstance(summary.get("capacity"), dict) else {}
|
||||
if not capacity:
|
||||
return
|
||||
parts = []
|
||||
if capacity.get("cpu") is not None:
|
||||
parts.append(f"cpu={_format_float(capacity.get('cpu'))}")
|
||||
if capacity.get("allocatable_cpu") is not None:
|
||||
parts.append(f"alloc_cpu={_format_float(capacity.get('allocatable_cpu'))}")
|
||||
if capacity.get("mem_bytes") is not None:
|
||||
parts.append(f"mem={_format_bytes(capacity.get('mem_bytes'))}")
|
||||
if capacity.get("allocatable_mem_bytes") is not None:
|
||||
parts.append(f"alloc_mem={_format_bytes(capacity.get('allocatable_mem_bytes'))}")
|
||||
if capacity.get("pods") is not None:
|
||||
parts.append(f"pods={_format_float(capacity.get('pods'))}")
|
||||
if capacity.get("allocatable_pods") is not None:
|
||||
parts.append(f"alloc_pods={_format_float(capacity.get('allocatable_pods'))}")
|
||||
if parts:
|
||||
lines.append("capacity: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_namespace_pods(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
namespaces = summary.get("namespace_pods")
|
||||
if not isinstance(namespaces, list) or not namespaces:
|
||||
return
|
||||
top = sorted(
|
||||
(item for item in namespaces if isinstance(item, dict)),
|
||||
key=lambda item: (-int(item.get("pods_total") or 0), item.get("namespace") or ""),
|
||||
)[:8]
|
||||
parts = []
|
||||
for item in top:
|
||||
name = item.get("namespace")
|
||||
total = item.get("pods_total")
|
||||
running = item.get("pods_running")
|
||||
if not name:
|
||||
continue
|
||||
label = f"{name}={total}"
|
||||
if running is not None:
|
||||
label = f"{label} (running={running})"
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append("namespaces_top: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_namespace_nodes(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
namespace_nodes = summary.get("namespace_nodes")
|
||||
if not isinstance(namespace_nodes, list) or not namespace_nodes:
|
||||
return
|
||||
top = sorted(
|
||||
(item for item in namespace_nodes if isinstance(item, dict)),
|
||||
key=lambda item: (-int(item.get("pods_total") or 0), item.get("namespace") or ""),
|
||||
)[:8]
|
||||
parts = []
|
||||
for item in top:
|
||||
namespace = item.get("namespace")
|
||||
pods_total = item.get("pods_total")
|
||||
primary = item.get("primary_node")
|
||||
if namespace:
|
||||
label = f"{namespace}={pods_total}"
|
||||
if primary:
|
||||
label = f"{label} (primary={primary})"
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append("namespace_nodes_top: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_node_pods(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||
node_pods = summary.get("node_pods")
|
||||
if not isinstance(node_pods, list) or not node_pods:
|
||||
return
|
||||
sortable: list[dict[str, Any]] = []
|
||||
for item in node_pods:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
try:
|
||||
pods_value = int(item.get("pods_total") or 0)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
sortable.append({**item, "pods_total": pods_value})
|
||||
top = sorted(sortable, key=lambda item: (-int(item.get("pods_total") or 0), item.get("node") or ""))[:8]
|
||||
max_entry = None
|
||||
for entry in node_pods:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
pods_total = entry.get("pods_total")
|
||||
try:
|
||||
pods_value = int(pods_total)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if max_entry is None or pods_value > max_entry["pods_total"]:
|
||||
max_entry = {
|
||||
"node": entry.get("node"),
|
||||
"pods_total": pods_value,
|
||||
"namespaces_top": entry.get("namespaces_top") or [],
|
||||
}
|
||||
parts = []
|
||||
for item in top:
|
||||
node = item.get("node")
|
||||
pods_total = item.get("pods_total")
|
||||
namespaces = item.get("namespaces_top") or []
|
||||
ns_label = ""
|
||||
if namespaces:
|
||||
ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
|
||||
if node:
|
||||
label = f"{node}={pods_total}"
|
||||
if ns_label:
|
||||
label = f"{label} ({ns_label})"
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append("node_pods_top: " + "; ".join(parts))
|
||||
if max_entry and isinstance(max_entry.get("node"), str):
|
||||
ns_label = ""
|
||||
namespaces = max_entry.get("namespaces_top") or []
|
||||
if namespaces:
|
||||
ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
|
||||
label = f"{max_entry.get('node')}={max_entry.get('pods_total')}"
|
||||
if ns_label:
|
||||
label = f"{label} ({ns_label})"
|
||||
lines.append("node_pods_max: " + label)
|
||||
for item in top:
|
||||
node = item.get("node")
|
||||
namespaces = item.get("namespaces_top") or []
|
||||
if not node or not namespaces:
|
||||
continue
|
||||
ns_label = ", ".join([f"{name}={count}" for name, count in namespaces])
|
||||
lines.append(f"node_namespaces_top: {node} ({ns_label})")
|
||||
|
||||
|
||||
def _append_pod_issues(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
pod_issues = summary.get("pod_issues") if isinstance(summary.get("pod_issues"), dict) else {}
|
||||
if not pod_issues:
|
||||
return
|
||||
counts_line = _format_pod_issue_counts(pod_issues)
|
||||
if counts_line:
|
||||
lines.append(counts_line)
|
||||
top_line = _format_pod_issue_top(pod_issues)
|
||||
if top_line:
|
||||
lines.append(top_line)
|
||||
pending_line = _format_pod_pending_oldest(pod_issues)
|
||||
if pending_line:
|
||||
lines.append(pending_line)
|
||||
pending_over_line = _format_pod_pending_over_15m(pod_issues)
|
||||
if pending_over_line:
|
||||
lines.append(pending_over_line)
|
||||
reasons_line = _format_pod_waiting_reasons(pod_issues)
|
||||
if reasons_line:
|
||||
lines.append(reasons_line)
|
||||
|
||||
|
||||
def _format_pod_issue_counts(pod_issues: dict[str, Any]) -> str:
|
||||
counts = pod_issues.get("counts") if isinstance(pod_issues.get("counts"), dict) else {}
|
||||
if not counts:
|
||||
return ""
|
||||
parts = []
|
||||
for key in ("Failed", "Pending", "Unknown"):
|
||||
if key in counts:
|
||||
parts.append(f"{key}={counts.get(key)}")
|
||||
return "pod_issues: " + "; ".join(parts) if parts else ""
|
||||
|
||||
|
||||
def _format_pod_issue_top(pod_issues: dict[str, Any]) -> str:
|
||||
items = pod_issues.get("items") if isinstance(pod_issues.get("items"), list) else []
|
||||
if not items:
|
||||
return ""
|
||||
top = []
|
||||
for item in items[:5]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
namespace = item.get("namespace")
|
||||
pod = item.get("pod")
|
||||
if not namespace or not pod:
|
||||
continue
|
||||
phase = item.get("phase") or ""
|
||||
restarts = item.get("restarts") or 0
|
||||
top.append(f"{namespace}/{pod}({phase},r={restarts})")
|
||||
return "pod_issues_top: " + "; ".join(top) if top else ""
|
||||
|
||||
|
||||
def _format_pod_pending_oldest(pod_issues: dict[str, Any]) -> str:
|
||||
pending = pod_issues.get("pending_oldest") if isinstance(pod_issues.get("pending_oldest"), list) else []
|
||||
if not pending:
|
||||
return ""
|
||||
parts = []
|
||||
for item in pending[:5]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
namespace = item.get("namespace")
|
||||
pod = item.get("pod")
|
||||
age = item.get("age_hours")
|
||||
reason = item.get("reason") or ""
|
||||
if namespace and pod and age is not None:
|
||||
label = f"{namespace}/{pod}={_format_float(age)}h"
|
||||
if reason:
|
||||
label = f"{label} ({reason})"
|
||||
parts.append(label)
|
||||
return "pods_pending_oldest: " + "; ".join(parts) if parts else ""
|
||||
|
||||
|
||||
def _format_pod_waiting_reasons(pod_issues: dict[str, Any]) -> str:
|
||||
reasons = pod_issues.get("waiting_reasons") if isinstance(pod_issues.get("waiting_reasons"), dict) else {}
|
||||
if not reasons:
|
||||
return ""
|
||||
pairs = sorted(reasons.items(), key=lambda item: (-item[1], item[0]))[:5]
|
||||
return "pod_waiting_reasons: " + "; ".join([f"{key}={val}" for key, val in pairs])
|
||||
|
||||
|
||||
def _format_pod_pending_over_15m(pod_issues: dict[str, Any]) -> str:
|
||||
count = pod_issues.get("pending_over_15m")
|
||||
if count is None:
|
||||
return ""
|
||||
try:
|
||||
count_val = int(count)
|
||||
except (TypeError, ValueError):
|
||||
return ""
|
||||
return f"pods_pending_over_15m: {count_val}"
|
||||
|
||||
|
||||
def _append_workload_health(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
health = summary.get("workloads_health") if isinstance(summary.get("workloads_health"), dict) else {}
|
||||
if not health:
|
||||
return
|
||||
deployments = health.get("deployments") if isinstance(health.get("deployments"), dict) else {}
|
||||
statefulsets = health.get("statefulsets") if isinstance(health.get("statefulsets"), dict) else {}
|
||||
daemonsets = health.get("daemonsets") if isinstance(health.get("daemonsets"), dict) else {}
|
||||
total_not_ready = 0
|
||||
for entry in (deployments, statefulsets, daemonsets):
|
||||
total_not_ready += int(entry.get("not_ready") or 0)
|
||||
lines.append(
|
||||
"workloads_not_ready: "
|
||||
f"deployments={deployments.get('not_ready', 0)}, "
|
||||
f"statefulsets={statefulsets.get('not_ready', 0)}, "
|
||||
f"daemonsets={daemonsets.get('not_ready', 0)} "
|
||||
f"(total={total_not_ready})"
|
||||
)
|
||||
|
||||
|
||||
def _append_node_usage_stats(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||
stats = metrics.get("node_usage_stats") if isinstance(metrics.get("node_usage_stats"), dict) else {}
|
||||
if not stats:
|
||||
return
|
||||
parts = []
|
||||
for key in ("cpu", "ram", "net", "io", "disk"):
|
||||
entry = stats.get(key) if isinstance(stats.get(key), dict) else {}
|
||||
avg = entry.get("avg")
|
||||
if avg is None:
|
||||
continue
|
||||
value = _format_rate_bytes(avg) if key in {"net", "io"} else _format_float(avg)
|
||||
parts.append(f"{key}={value}")
|
||||
if parts:
|
||||
lines.append("node_usage_avg: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_events(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
events = summary.get("events") if isinstance(summary.get("events"), dict) else {}
|
||||
if not events:
|
||||
return
|
||||
total = events.get("warnings_total")
|
||||
by_reason = events.get("warnings_by_reason") if isinstance(events.get("warnings_by_reason"), dict) else {}
|
||||
if total is None:
|
||||
return
|
||||
if by_reason:
|
||||
top = sorted(by_reason.items(), key=lambda item: (-item[1], item[0]))[:3]
|
||||
reasons = "; ".join([f"{reason}={count}" for reason, count in top])
|
||||
lines.append(f"warnings: total={total}; top={reasons}")
|
||||
else:
|
||||
lines.append(f"warnings: total={total}")
|
||||
def _append_pvc_usage(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
pvc_usage = summary.get("pvc_usage_top")
|
||||
if not isinstance(pvc_usage, list) or not pvc_usage:
|
||||
return
|
||||
parts = []
|
||||
for entry in pvc_usage:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
pvc = metric.get("persistentvolumeclaim")
|
||||
value = entry.get("value")
|
||||
if namespace and pvc:
|
||||
parts.append(f"{namespace}/{pvc}={_format_float(value)}%")
|
||||
if parts:
|
||||
lines.append("pvc_usage_top: " + "; ".join(parts))
|
||||
def _append_root_disk_headroom(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
headroom = summary.get("root_disk_low_headroom")
|
||||
if not isinstance(headroom, list) or not headroom:
|
||||
return
|
||||
parts = []
|
||||
for entry in headroom:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node")
|
||||
headroom_pct = entry.get("headroom_pct")
|
||||
if node and headroom_pct is not None:
|
||||
parts.append(f"{node}={_format_float(headroom_pct)}%")
|
||||
if parts:
|
||||
lines.append("root_disk_low_headroom: " + "; ".join(parts))
|
||||
__all__ = [name for name in globals() if not name.startswith("__")]
|
||||
435
atlasbot/snapshot/builder/format_b.py
Normal file
435
atlasbot/snapshot/builder/format_b.py
Normal file
@ -0,0 +1,435 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .core_a import _VALUE_PAIR_LEN
|
||||
from .format_a import *
|
||||
|
||||
|
||||
def _append_namespace_metric_series(
|
||||
lines: list[str],
|
||||
label: str,
|
||||
entries: list[Any],
|
||||
formatter: Any,
|
||||
) -> None:
|
||||
parts = []
|
||||
for entry in entries:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
value = entry.get("value")
|
||||
if namespace:
|
||||
parts.append(f"{namespace}={formatter(value)}")
|
||||
if parts:
|
||||
lines.append(f"{label}: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_longhorn(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||
longhorn = summary.get("longhorn") if isinstance(summary.get("longhorn"), dict) else {}
|
||||
if not longhorn:
|
||||
return
|
||||
total = longhorn.get("total")
|
||||
attached = longhorn.get("attached_count")
|
||||
detached = longhorn.get("detached_count")
|
||||
degraded = longhorn.get("degraded_count")
|
||||
by_state = longhorn.get("by_state") if isinstance(longhorn.get("by_state"), dict) else {}
|
||||
by_robust = longhorn.get("by_robustness") if isinstance(longhorn.get("by_robustness"), dict) else {}
|
||||
if total is not None:
|
||||
if attached is None and detached is None and degraded is None:
|
||||
unhealthy = longhorn.get("unhealthy_count")
|
||||
lines.append(f"longhorn: total={total}, unhealthy={unhealthy if unhealthy is not None else 0}")
|
||||
else:
|
||||
lines.append(
|
||||
f"longhorn: total={total}, attached={attached if attached is not None else 0}, "
|
||||
f"detached={detached if detached is not None else 0}, "
|
||||
f"degraded={degraded if degraded is not None else 0}"
|
||||
)
|
||||
if by_state:
|
||||
lines.append("longhorn_state: " + _format_kv_map(by_state))
|
||||
if by_robust:
|
||||
lines.append("longhorn_robustness: " + _format_kv_map(by_robust))
|
||||
unhealthy_items = longhorn.get("unhealthy")
|
||||
if isinstance(unhealthy_items, list) and unhealthy_items:
|
||||
parts = []
|
||||
for entry in unhealthy_items[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
name = entry.get("name")
|
||||
state = entry.get("state")
|
||||
robustness = entry.get("robustness")
|
||||
if name:
|
||||
label = name
|
||||
if state or robustness:
|
||||
label = f"{label}({state},{robustness})"
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append("longhorn_unhealthy_top: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_namespace_usage(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||
cpu_top = metrics.get("namespace_cpu_top") if isinstance(metrics.get("namespace_cpu_top"), list) else []
|
||||
mem_top = metrics.get("namespace_mem_top") if isinstance(metrics.get("namespace_mem_top"), list) else []
|
||||
_append_namespace_metric_series(lines, "namespace_cpu_top", cpu_top, _format_float)
|
||||
_append_namespace_metric_series(lines, "namespace_mem_top", mem_top, _format_bytes)
|
||||
|
||||
|
||||
def _append_namespace_requests(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||
cpu_req = metrics.get("namespace_cpu_requests_top") if isinstance(metrics.get("namespace_cpu_requests_top"), list) else []
|
||||
mem_req = metrics.get("namespace_mem_requests_top") if isinstance(metrics.get("namespace_mem_requests_top"), list) else []
|
||||
_append_namespace_metric_series(lines, "namespace_cpu_requests_top", cpu_req, _format_float)
|
||||
_append_namespace_metric_series(lines, "namespace_mem_requests_top", mem_req, _format_bytes)
|
||||
|
||||
|
||||
def _append_namespace_io_net(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||
net_top = metrics.get("namespace_net_top") if isinstance(metrics.get("namespace_net_top"), list) else []
|
||||
io_top = metrics.get("namespace_io_top") if isinstance(metrics.get("namespace_io_top"), list) else []
|
||||
_append_namespace_metric_series(lines, "namespace_net_top", net_top, _format_rate_bytes)
|
||||
_append_namespace_metric_series(lines, "namespace_io_top", io_top, _format_rate_bytes)
|
||||
|
||||
|
||||
def _append_pod_usage(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||
cpu_top = metrics.get("pod_cpu_top") if isinstance(metrics.get("pod_cpu_top"), list) else []
|
||||
cpu_top_node = (
|
||||
metrics.get("pod_cpu_top_node")
|
||||
if isinstance(metrics.get("pod_cpu_top_node"), list)
|
||||
else []
|
||||
)
|
||||
mem_top = metrics.get("pod_mem_top") if isinstance(metrics.get("pod_mem_top"), list) else []
|
||||
mem_top_node = (
|
||||
metrics.get("pod_mem_top_node")
|
||||
if isinstance(metrics.get("pod_mem_top_node"), list)
|
||||
else []
|
||||
)
|
||||
if cpu_top:
|
||||
parts = []
|
||||
for entry in cpu_top:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
pod = metric.get("pod")
|
||||
value = entry.get("value")
|
||||
if namespace and pod and value is not None:
|
||||
parts.append(f"{namespace}/{pod}={_format_float(value)}")
|
||||
if parts:
|
||||
lines.append("pod_cpu_top: " + "; ".join(parts))
|
||||
if cpu_top_node:
|
||||
parts = []
|
||||
for entry in cpu_top_node:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
pod = metric.get("pod")
|
||||
node = metric.get("node")
|
||||
value = entry.get("value")
|
||||
if namespace and pod and node and value is not None:
|
||||
parts.append(f"{node}:{namespace}/{pod}={_format_float(value)}")
|
||||
if parts:
|
||||
lines.append("pod_cpu_top_node: " + "; ".join(parts))
|
||||
if mem_top:
|
||||
parts = []
|
||||
for entry in mem_top:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
pod = metric.get("pod")
|
||||
value = entry.get("value")
|
||||
if namespace and pod and value is not None:
|
||||
parts.append(f"{namespace}/{pod}={_format_bytes(value)}")
|
||||
if parts:
|
||||
lines.append("pod_mem_top: " + "; ".join(parts))
|
||||
if mem_top_node:
|
||||
parts = []
|
||||
for entry in mem_top_node:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
pod = metric.get("pod")
|
||||
node = metric.get("node")
|
||||
value = entry.get("value")
|
||||
if namespace and pod and node and value is not None:
|
||||
parts.append(f"{node}:{namespace}/{pod}={_format_bytes(value)}")
|
||||
if parts:
|
||||
lines.append("pod_mem_top_node: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_restarts(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||
top_restarts = metrics.get("top_restarts_1h") or []
|
||||
if not isinstance(top_restarts, list) or not top_restarts:
|
||||
top_restarts = []
|
||||
parts = []
|
||||
for entry in top_restarts:
|
||||
metric = entry.get("metric") if isinstance(entry, dict) else {}
|
||||
value = entry.get("value") if isinstance(entry, dict) else []
|
||||
if not isinstance(metric, dict) or not isinstance(value, list) or len(value) < _VALUE_PAIR_LEN:
|
||||
continue
|
||||
namespace = metric.get("namespace")
|
||||
pod = metric.get("pod")
|
||||
count = _format_float(value[1])
|
||||
if namespace and pod:
|
||||
parts.append(f"{namespace}/{pod}={count}")
|
||||
if parts:
|
||||
lines.append("restarts_1h_top: " + "; ".join(parts))
|
||||
else:
|
||||
lines.append("restarts_1h_top: none")
|
||||
ns_top = metrics.get("restart_namespace_top") or []
|
||||
if isinstance(ns_top, list) and ns_top:
|
||||
ns_parts = []
|
||||
for entry in ns_top:
|
||||
metric = entry.get("metric") if isinstance(entry, dict) else {}
|
||||
value = entry.get("value")
|
||||
namespace = metric.get("namespace") if isinstance(metric, dict) else None
|
||||
if namespace and value is not None:
|
||||
ns_parts.append(f"{namespace}={_format_float(value)}")
|
||||
if ns_parts:
|
||||
lines.append("restarts_1h_namespace_top: " + "; ".join(ns_parts))
|
||||
else:
|
||||
lines.append("restarts_1h_namespace_top: none")
|
||||
|
||||
|
||||
def _append_job_failures(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||
failures = metrics.get("job_failures_24h") if isinstance(metrics.get("job_failures_24h"), list) else []
|
||||
if not failures:
|
||||
return
|
||||
parts = []
|
||||
for entry in failures:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
job_name = metric.get("job_name") or metric.get("job")
|
||||
value = entry.get("value")
|
||||
if namespace and job_name and value is not None:
|
||||
parts.append(f"{namespace}/{job_name}={_format_float(value)}")
|
||||
if parts:
|
||||
lines.append("job_failures_24h: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_jobs(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
jobs = summary.get("jobs") if isinstance(summary.get("jobs"), dict) else {}
|
||||
if not jobs:
|
||||
return
|
||||
totals_line = _format_jobs_totals(jobs)
|
||||
if totals_line:
|
||||
lines.append(totals_line)
|
||||
failing_line = _format_jobs_failing(jobs)
|
||||
if failing_line:
|
||||
lines.append(failing_line)
|
||||
active_line = _format_jobs_active_oldest(jobs)
|
||||
if active_line:
|
||||
lines.append(active_line)
|
||||
|
||||
|
||||
def _format_jobs_totals(jobs: dict[str, Any]) -> str:
|
||||
totals = jobs.get("totals") if isinstance(jobs.get("totals"), dict) else {}
|
||||
if not totals:
|
||||
return ""
|
||||
return "jobs: total={total}, active={active}, failed={failed}, succeeded={succeeded}".format(
|
||||
total=totals.get("total"),
|
||||
active=totals.get("active"),
|
||||
failed=totals.get("failed"),
|
||||
succeeded=totals.get("succeeded"),
|
||||
)
|
||||
|
||||
|
||||
def _format_jobs_failing(jobs: dict[str, Any]) -> str:
|
||||
failing = jobs.get("failing") if isinstance(jobs.get("failing"), list) else []
|
||||
if not failing:
|
||||
return ""
|
||||
parts = []
|
||||
for item in failing[:5]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
namespace = item.get("namespace")
|
||||
name = item.get("job")
|
||||
failed = item.get("failed")
|
||||
age = item.get("age_hours")
|
||||
if namespace and name and failed is not None:
|
||||
label = f"{namespace}/{name}={failed}"
|
||||
if age is not None:
|
||||
label = f"{label} ({_format_float(age)}h)"
|
||||
parts.append(label)
|
||||
return "jobs_failing_top: " + "; ".join(parts) if parts else ""
|
||||
|
||||
|
||||
def _format_jobs_active_oldest(jobs: dict[str, Any]) -> str:
|
||||
active_oldest = jobs.get("active_oldest") if isinstance(jobs.get("active_oldest"), list) else []
|
||||
if not active_oldest:
|
||||
return ""
|
||||
parts = []
|
||||
for item in active_oldest[:5]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
namespace = item.get("namespace")
|
||||
name = item.get("job")
|
||||
age = item.get("age_hours")
|
||||
if namespace and name and age is not None:
|
||||
parts.append(f"{namespace}/{name}={_format_float(age)}h")
|
||||
return "jobs_active_oldest: " + "; ".join(parts) if parts else ""
|
||||
|
||||
|
||||
def _append_postgres(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {}
|
||||
if not postgres:
|
||||
return
|
||||
hottest = postgres.get("hottest_db") or ""
|
||||
lines.append(
|
||||
"postgres: used={used}, max={max}, hottest_db={hottest}".format(
|
||||
used=postgres.get("used"),
|
||||
max=postgres.get("max"),
|
||||
hottest=hottest,
|
||||
)
|
||||
)
|
||||
used = postgres.get("used")
|
||||
max_conn = postgres.get("max")
|
||||
if used is not None or max_conn is not None:
|
||||
lines.append(f"postgres_connections_total: used={_format_float(used)}, max={_format_float(max_conn)}")
|
||||
by_db = postgres.get("by_db")
|
||||
if isinstance(by_db, list) and by_db:
|
||||
parts = []
|
||||
for entry in by_db:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
metric = entry.get("metric") if isinstance(entry.get("metric"), dict) else {}
|
||||
value = entry.get("value")
|
||||
if isinstance(value, list) and len(value) >= _VALUE_PAIR_LEN:
|
||||
value = value[1]
|
||||
name = metric.get("datname") if isinstance(metric, dict) else None
|
||||
if name and value is not None:
|
||||
parts.append(f"{name}={_format_float(value)}")
|
||||
if parts:
|
||||
lines.append("postgres_connections_by_db: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_hottest(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
hottest = summary.get("hottest") if isinstance(summary.get("hottest"), dict) else {}
|
||||
if not hottest:
|
||||
return
|
||||
hardware_map = summary.get("hardware_by_node")
|
||||
if not isinstance(hardware_map, dict):
|
||||
hardware_map = {}
|
||||
parts = []
|
||||
for key, entry in hottest.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node")
|
||||
hardware = hardware_map.get(node) if node else None
|
||||
if key in {"net", "io"}:
|
||||
value = _format_rate_bytes(entry.get("value"))
|
||||
else:
|
||||
value = _format_float(entry.get("value"))
|
||||
if value and key in {"cpu", "ram", "disk"}:
|
||||
value = f"{value}%"
|
||||
if node:
|
||||
label = node
|
||||
if hardware:
|
||||
label = f"{label} [{hardware}]"
|
||||
parts.append(f"{key}={label} ({value})")
|
||||
if parts:
|
||||
lines.append("hottest: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_workloads(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
workloads = summary.get("workloads")
|
||||
if not isinstance(workloads, list) or not workloads:
|
||||
return
|
||||
lines.append(f"workloads: total={len(workloads)}")
|
||||
top_workloads = sorted(
|
||||
(item for item in workloads if isinstance(item, dict)),
|
||||
key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or ""),
|
||||
)[:5]
|
||||
if not top_workloads:
|
||||
return
|
||||
parts = []
|
||||
for item in top_workloads:
|
||||
namespace = item.get("namespace")
|
||||
name = item.get("workload")
|
||||
pods_total = item.get("pods_total")
|
||||
primary = item.get("primary_node")
|
||||
if namespace and name:
|
||||
label = f"{namespace}/{name}={pods_total}"
|
||||
if primary:
|
||||
label = f"{label} (primary={primary})"
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append("workloads_top: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_topology(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||
topology = summary.get("topology") if isinstance(summary.get("topology"), dict) else {}
|
||||
if not topology:
|
||||
return
|
||||
nodes = topology.get("nodes") if isinstance(topology.get("nodes"), list) else []
|
||||
workloads = topology.get("workloads") if isinstance(topology.get("workloads"), list) else []
|
||||
if nodes:
|
||||
parts = []
|
||||
for entry in nodes[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node")
|
||||
top = entry.get("workloads_top") if isinstance(entry.get("workloads_top"), list) else []
|
||||
if not node or not top:
|
||||
continue
|
||||
items = ", ".join([f"{name}({count})" for name, count in top if name and count is not None])
|
||||
if items:
|
||||
parts.append(f"{node}={items}")
|
||||
if parts:
|
||||
lines.append("node_workloads_top: " + "; ".join(parts))
|
||||
if workloads:
|
||||
parts = []
|
||||
for entry in workloads[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
namespace = entry.get("namespace")
|
||||
name = entry.get("workload")
|
||||
nodes_top = entry.get("nodes_top") if isinstance(entry.get("nodes_top"), list) else []
|
||||
if not namespace or not name:
|
||||
continue
|
||||
nodes_label = ", ".join([f"{node}:{count}" for node, count in nodes_top if node])
|
||||
label = f"{namespace}/{name}"
|
||||
if nodes_label:
|
||||
label = f"{label} [{nodes_label}]"
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append("workload_nodes_top: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_flux(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
flux = summary.get("flux") if isinstance(summary.get("flux"), dict) else {}
|
||||
if not flux:
|
||||
return
|
||||
not_ready = flux.get("not_ready")
|
||||
if not_ready is not None:
|
||||
lines.append(f"flux_not_ready: {not_ready}")
|
||||
items = flux.get("items")
|
||||
if isinstance(items, list) and items:
|
||||
parts = []
|
||||
for item in items[:10]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
name = item.get("name") or ""
|
||||
namespace = item.get("namespace") or ""
|
||||
reason = item.get("reason") or ""
|
||||
suspended = item.get("suspended")
|
||||
label = f"{namespace}/{name}".strip("/")
|
||||
if reason:
|
||||
label = f"{label} ({reason})"
|
||||
if suspended:
|
||||
label = f"{label} [suspended]"
|
||||
if label:
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append("flux_not_ready_items: " + "; ".join(parts))
|
||||
|
||||
|
||||
__all__ = [name for name in globals() if not name.startswith("__")]
|
||||
448
atlasbot/snapshot/builder/format_c.py
Normal file
448
atlasbot/snapshot/builder/format_c.py
Normal file
@ -0,0 +1,448 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .core_a import PVC_USAGE_CRITICAL
|
||||
from .format_b import *
|
||||
def _append_signals(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
signals = summary.get("signals") if isinstance(summary.get("signals"), list) else []
|
||||
if not signals:
|
||||
return
|
||||
lines.append("signals:")
|
||||
for entry in signals[:8]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
scope = entry.get("scope") or ""
|
||||
target = entry.get("target") or ""
|
||||
metric = entry.get("metric") or ""
|
||||
current = entry.get("current")
|
||||
delta = entry.get("delta_pct")
|
||||
severity = entry.get("severity") or ""
|
||||
detail = f"{scope}:{target} {metric}={current}"
|
||||
if delta is not None:
|
||||
detail += f" delta={delta}%"
|
||||
if severity:
|
||||
detail += f" severity={severity}"
|
||||
lines.append(f"- {detail}")
|
||||
|
||||
|
||||
def _append_profiles(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||
profiles = summary.get("profiles") if isinstance(summary.get("profiles"), dict) else {}
|
||||
if not profiles:
|
||||
return
|
||||
nodes = profiles.get("nodes") if isinstance(profiles.get("nodes"), list) else []
|
||||
namespaces = profiles.get("namespaces") if isinstance(profiles.get("namespaces"), list) else []
|
||||
workloads = profiles.get("workloads") if isinstance(profiles.get("workloads"), list) else []
|
||||
if nodes:
|
||||
lines.append("node_profiles:")
|
||||
for entry in nodes[:3]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
lines.append(
|
||||
f"- {entry.get('node')}: load={entry.get('load_index')} cpu={entry.get('cpu')} ram={entry.get('ram')} "
|
||||
f"pods={entry.get('pods_total')} hw={entry.get('hardware')}"
|
||||
)
|
||||
if namespaces:
|
||||
lines.append("namespace_profiles:")
|
||||
for entry in namespaces[:3]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
lines.append(
|
||||
f"- {entry.get('namespace')}: pods={entry.get('pods_total')} cpu={entry.get('cpu_usage')} "
|
||||
f"mem={entry.get('mem_usage')} primary={entry.get('primary_node')}"
|
||||
)
|
||||
if workloads:
|
||||
lines.append("workload_profiles:")
|
||||
for entry in workloads[:3]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
lines.append(
|
||||
f"- {entry.get('namespace')}/{entry.get('workload')}: pods={entry.get('pods_total')} "
|
||||
f"running={entry.get('pods_running')} node={entry.get('primary_node')}"
|
||||
)
|
||||
|
||||
|
||||
def _append_units_windows(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
|
||||
units = metrics.get("units") if isinstance(metrics.get("units"), dict) else {}
|
||||
windows = metrics.get("windows") if isinstance(metrics.get("windows"), dict) else {}
|
||||
if units:
|
||||
lines.append("units: " + _format_kv_map(units))
|
||||
else:
|
||||
lines.append("units: cpu_pct, ram_pct, net=bytes_per_sec, io=bytes_per_sec")
|
||||
if windows:
|
||||
lines.append("windows: " + _format_kv_map(windows))
|
||||
else:
|
||||
lines.append("windows: rates=5m, restarts=1h")
|
||||
|
||||
|
||||
def _append_node_load_summary(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
node_load = summary.get("node_load_summary")
|
||||
if not isinstance(node_load, dict) or not node_load:
|
||||
return
|
||||
hardware_by_node = summary.get("hardware_by_node")
|
||||
hardware_by_node = hardware_by_node if isinstance(hardware_by_node, dict) else {}
|
||||
top = node_load.get("top")
|
||||
if isinstance(top, list) and top:
|
||||
parts = []
|
||||
for entry in top[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node") or ""
|
||||
load = entry.get("load_index")
|
||||
cpu = entry.get("cpu")
|
||||
ram = entry.get("ram")
|
||||
io = entry.get("io")
|
||||
net = entry.get("net")
|
||||
pods_total = entry.get("pods_total")
|
||||
label = f"{node} idx={_format_float(load)}"
|
||||
if node and node in hardware_by_node:
|
||||
label += f" hw={hardware_by_node.get(node)}"
|
||||
if isinstance(pods_total, (int, float)):
|
||||
label += f" pods={int(pods_total)}"
|
||||
label += f" cpu={_format_float(cpu)} ram={_format_float(ram)}"
|
||||
label += f" io={_format_rate_bytes(io)} net={_format_rate_bytes(net)}"
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append("node_load_top: " + "; ".join(parts))
|
||||
outliers = node_load.get("outliers")
|
||||
if isinstance(outliers, list) and outliers:
|
||||
names = [entry.get("node") for entry in outliers if isinstance(entry, dict)]
|
||||
names = [name for name in names if isinstance(name, str) and name]
|
||||
if names:
|
||||
lines.append("node_load_outliers: " + _format_names(names))
|
||||
|
||||
|
||||
def _append_hardware_usage(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||
usage = summary.get("hardware_usage_avg")
|
||||
if not isinstance(usage, list) or not usage:
|
||||
return
|
||||
parts = []
|
||||
tops: dict[str, tuple[str, float]] = {}
|
||||
for entry in usage[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
hardware = entry.get("hardware")
|
||||
load = entry.get("load_index")
|
||||
cpu = entry.get("cpu")
|
||||
ram = entry.get("ram")
|
||||
io = entry.get("io")
|
||||
net = entry.get("net")
|
||||
if not hardware:
|
||||
continue
|
||||
label = f"{hardware} idx={_format_float(load)}"
|
||||
label += f" cpu={_format_float(cpu)} ram={_format_float(ram)}"
|
||||
label += f" io={_format_rate_bytes(io)} net={_format_rate_bytes(net)}"
|
||||
parts.append(label)
|
||||
for metric, value in (("cpu", cpu), ("ram", ram), ("io", io), ("net", net), ("load", load)):
|
||||
if isinstance(value, (int, float)):
|
||||
current = tops.get(metric)
|
||||
if current is None or float(value) > current[1]:
|
||||
tops[metric] = (hardware, float(value))
|
||||
if parts:
|
||||
lines.append("hardware_usage_avg: " + "; ".join(parts))
|
||||
if tops:
|
||||
top_parts = []
|
||||
for metric in ("cpu", "ram", "io", "net", "load"):
|
||||
entry = tops.get(metric)
|
||||
if not entry:
|
||||
continue
|
||||
hardware, value = entry
|
||||
if metric in {"io", "net"}:
|
||||
rendered = _format_rate_bytes(value)
|
||||
else:
|
||||
rendered = _format_float(value)
|
||||
top_parts.append(f"{metric}={hardware} ({rendered})")
|
||||
if top_parts:
|
||||
lines.append("hardware_usage_top: " + "; ".join(top_parts))
|
||||
|
||||
|
||||
def _append_cluster_watchlist(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
watchlist = summary.get("cluster_watchlist")
|
||||
if not isinstance(watchlist, list) or not watchlist:
|
||||
return
|
||||
lines.append("cluster_watchlist: " + "; ".join(watchlist))
|
||||
|
||||
|
||||
def _append_baseline_deltas(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
deltas = summary.get("baseline_deltas") if isinstance(summary.get("baseline_deltas"), dict) else {}
|
||||
nodes = deltas.get("nodes") if isinstance(deltas.get("nodes"), dict) else {}
|
||||
namespaces = deltas.get("namespaces") if isinstance(deltas.get("namespaces"), dict) else {}
|
||||
for scope, block in (("nodes", nodes), ("namespaces", namespaces)):
|
||||
if not isinstance(block, dict):
|
||||
continue
|
||||
for metric, entries in block.items():
|
||||
if not isinstance(entries, list) or not entries:
|
||||
continue
|
||||
parts: list[str] = []
|
||||
for entry in entries[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
name = entry.get("node") if scope == "nodes" else entry.get("namespace")
|
||||
delta = entry.get("delta")
|
||||
severity = entry.get("severity")
|
||||
if not isinstance(name, str) or not name or not isinstance(delta, (int, float)):
|
||||
continue
|
||||
suffix = f" ({severity})" if isinstance(severity, str) and severity else ""
|
||||
parts.append(f"{name}={_format_float(delta)}%{suffix}")
|
||||
if parts:
|
||||
lines.append(f"{scope}_baseline_delta_{metric}: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_pod_issue_summary(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
issues = summary.get("pod_issue_summary") if isinstance(summary.get("pod_issue_summary"), dict) else {}
|
||||
waiting = issues.get("waiting_reasons_top") if isinstance(issues.get("waiting_reasons_top"), list) else []
|
||||
phases = issues.get("phase_reasons_top") if isinstance(issues.get("phase_reasons_top"), list) else []
|
||||
namespace_issue = issues.get("namespace_issue_top") if isinstance(issues.get("namespace_issue_top"), dict) else {}
|
||||
waiting_line = _reason_line(waiting, "pod_waiting_reasons_top")
|
||||
if waiting_line:
|
||||
lines.append(waiting_line)
|
||||
phase_line = _reason_line(phases, "pod_phase_reasons_top")
|
||||
if phase_line:
|
||||
lines.append(phase_line)
|
||||
if namespace_issue:
|
||||
_append_namespace_issue_lines(lines, namespace_issue)
|
||||
|
||||
|
||||
def _reason_line(entries: list[dict[str, Any]], label: str) -> str:
|
||||
parts = []
|
||||
for entry in entries[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
reason = entry.get("reason")
|
||||
count = entry.get("count")
|
||||
if reason:
|
||||
parts.append(f"{reason}={count}")
|
||||
if parts:
|
||||
return f"{label}: " + "; ".join(parts)
|
||||
return ""
|
||||
|
||||
|
||||
def _append_namespace_issue_lines(lines: list[str], namespace_issue: dict[str, Any]) -> None:
|
||||
for key, entries in namespace_issue.items():
|
||||
if not isinstance(entries, list) or not entries:
|
||||
continue
|
||||
parts: list[str] = []
|
||||
for entry in entries[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
ns = entry.get("namespace")
|
||||
value = entry.get("value")
|
||||
if ns:
|
||||
parts.append(f"{ns}={value}")
|
||||
if parts:
|
||||
lines.append(f"namespace_issue_top_{key}: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _build_cluster_watchlist(summary: dict[str, Any]) -> dict[str, Any]:
|
||||
items: list[str] = []
|
||||
nodes_summary = summary.get("nodes_summary") if isinstance(summary.get("nodes_summary"), dict) else {}
|
||||
not_ready = int(nodes_summary.get("not_ready") or 0)
|
||||
if not_ready > 0:
|
||||
items.append(f"not_ready_nodes={not_ready}")
|
||||
pressure = summary.get("pressure_nodes") if isinstance(summary.get("pressure_nodes"), dict) else {}
|
||||
pressure_nodes = pressure.get("names") if isinstance(pressure.get("names"), list) else []
|
||||
if pressure_nodes:
|
||||
items.append(f"pressure_nodes={len(pressure_nodes)}")
|
||||
pod_issues = summary.get("pod_issues") if isinstance(summary.get("pod_issues"), dict) else {}
|
||||
pending_over = int(pod_issues.get("pending_over_15m") or 0)
|
||||
if pending_over > 0:
|
||||
items.append(f"pods_pending_over_15m={pending_over}")
|
||||
workloads = summary.get("workloads_health") if isinstance(summary.get("workloads_health"), dict) else {}
|
||||
deployments = workloads.get("deployments") if isinstance(workloads.get("deployments"), dict) else {}
|
||||
statefulsets = workloads.get("statefulsets") if isinstance(workloads.get("statefulsets"), dict) else {}
|
||||
daemonsets = workloads.get("daemonsets") if isinstance(workloads.get("daemonsets"), dict) else {}
|
||||
total_not_ready = int(deployments.get("not_ready") or 0) + int(statefulsets.get("not_ready") or 0) + int(daemonsets.get("not_ready") or 0)
|
||||
if total_not_ready > 0:
|
||||
items.append(f"workloads_not_ready={total_not_ready}")
|
||||
flux = summary.get("flux") if isinstance(summary.get("flux"), dict) else {}
|
||||
flux_not_ready = int(flux.get("not_ready") or 0)
|
||||
if flux_not_ready > 0:
|
||||
items.append(f"flux_not_ready={flux_not_ready}")
|
||||
pvc_usage = summary.get("pvc_usage_top") if isinstance(summary.get("pvc_usage_top"), list) else []
|
||||
high_pvc = [
|
||||
entry for entry in pvc_usage if isinstance(entry, dict) and (entry.get("value") or 0) >= PVC_USAGE_CRITICAL
|
||||
]
|
||||
if high_pvc:
|
||||
items.append(f"pvc_usage>={PVC_USAGE_CRITICAL}%")
|
||||
return {"cluster_watchlist": items} if items else {}
|
||||
|
||||
|
||||
def _capacity_ratio_parts(entries: list[dict[str, Any]], ratio_key: str, usage_key: str, req_key: str) -> list[str]:
|
||||
parts: list[str] = []
|
||||
for entry in entries[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
ns = entry.get("namespace") or ""
|
||||
ratio = entry.get(ratio_key)
|
||||
usage = entry.get(usage_key)
|
||||
req = entry.get(req_key)
|
||||
if ns:
|
||||
parts.append(
|
||||
f"{ns}={_format_float(ratio)} (usage={_format_float(usage)} req={_format_float(req)})"
|
||||
)
|
||||
return parts
|
||||
|
||||
|
||||
def _capacity_headroom_parts(entries: list[dict[str, Any]]) -> list[str]:
|
||||
parts: list[str] = []
|
||||
for entry in entries[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
ns = entry.get("namespace") or ""
|
||||
headroom = entry.get("headroom")
|
||||
if ns:
|
||||
parts.append(f"{ns}={_format_float(headroom)}")
|
||||
return parts
|
||||
|
||||
|
||||
def _append_namespace_capacity_summary( # noqa: C901
|
||||
lines: list[str],
|
||||
summary: dict[str, Any],
|
||||
) -> None:
|
||||
cap = summary.get("namespace_capacity_summary")
|
||||
if not isinstance(cap, dict) or not cap:
|
||||
return
|
||||
cpu_ratio = cap.get("cpu_ratio_top")
|
||||
if isinstance(cpu_ratio, list):
|
||||
parts = _capacity_ratio_parts(cpu_ratio, "cpu_usage_ratio", "cpu_usage", "cpu_requests")
|
||||
if parts:
|
||||
lines.append("namespace_cpu_ratio_top: " + "; ".join(parts))
|
||||
mem_ratio = cap.get("mem_ratio_top")
|
||||
if isinstance(mem_ratio, list):
|
||||
parts = _capacity_ratio_parts(mem_ratio, "mem_usage_ratio", "mem_usage", "mem_requests")
|
||||
if parts:
|
||||
lines.append("namespace_mem_ratio_top: " + "; ".join(parts))
|
||||
cpu_headroom = cap.get("cpu_headroom_low")
|
||||
if isinstance(cpu_headroom, list):
|
||||
parts = _capacity_headroom_parts(cpu_headroom)
|
||||
if parts:
|
||||
lines.append("namespace_cpu_headroom_low: " + "; ".join(parts))
|
||||
mem_headroom = cap.get("mem_headroom_low")
|
||||
if isinstance(mem_headroom, list):
|
||||
parts = _capacity_headroom_parts(mem_headroom)
|
||||
if parts:
|
||||
lines.append("namespace_mem_headroom_low: " + "; ".join(parts))
|
||||
cpu_over = cap.get("cpu_overcommitted")
|
||||
mem_over = cap.get("mem_overcommitted")
|
||||
if cpu_over is not None or mem_over is not None:
|
||||
lines.append(f"namespace_overcommitted: cpu={cpu_over} mem={mem_over}")
|
||||
cpu_over_names = cap.get("cpu_overcommitted_names")
|
||||
if isinstance(cpu_over_names, list) and cpu_over_names:
|
||||
names = [name for name in cpu_over_names if isinstance(name, str) and name]
|
||||
if names:
|
||||
lines.append("namespace_cpu_overcommitted_names: " + _format_names(names))
|
||||
mem_over_names = cap.get("mem_overcommitted_names")
|
||||
if isinstance(mem_over_names, list) and mem_over_names:
|
||||
names = [name for name in mem_over_names if isinstance(name, str) and name]
|
||||
if names:
|
||||
lines.append("namespace_mem_overcommitted_names: " + _format_names(names))
|
||||
|
||||
|
||||
def _append_workloads_by_namespace(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
workloads = summary.get("workloads")
|
||||
if not isinstance(workloads, list) or not workloads:
|
||||
return
|
||||
by_ns: dict[str, list[dict[str, Any]]] = {}
|
||||
for item in workloads:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
ns = item.get("namespace") or ""
|
||||
name = item.get("workload") or ""
|
||||
if not ns or not name:
|
||||
continue
|
||||
by_ns.setdefault(ns, []).append(item)
|
||||
for ns, items in sorted(by_ns.items()):
|
||||
items.sort(
|
||||
key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or "")
|
||||
)
|
||||
parts = []
|
||||
for entry in items[:2]:
|
||||
name = entry.get("workload") or ""
|
||||
pods = entry.get("pods_total")
|
||||
primary = entry.get("primary_node")
|
||||
label = f"{name}({pods})" if pods is not None else name
|
||||
if primary:
|
||||
label = f"{label}@{primary}"
|
||||
if label:
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append(f"workloads_top_{ns}: " + "; ".join(parts))
|
||||
|
||||
|
||||
def _append_lexicon(lines: list[str], summary: dict[str, Any]) -> None:
|
||||
lexicon = summary.get("lexicon")
|
||||
if not isinstance(lexicon, dict):
|
||||
return
|
||||
terms = lexicon.get("terms") if isinstance(lexicon.get("terms"), list) else []
|
||||
aliases = lexicon.get("aliases") if isinstance(lexicon.get("aliases"), dict) else {}
|
||||
for entry in terms[:8]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
term = entry.get("term")
|
||||
meaning = entry.get("meaning")
|
||||
if term and meaning:
|
||||
lines.append(f"lexicon_term: {term} => {meaning}")
|
||||
for key, value in list(aliases.items())[:6]:
|
||||
if key and value:
|
||||
lines.append(f"lexicon_alias: {key} => {value}")
|
||||
|
||||
|
||||
def _append_cross_stats(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901
|
||||
cross_stats = summary.get("cross_stats")
|
||||
if not isinstance(cross_stats, dict):
|
||||
return
|
||||
node_entries = cross_stats.get("node_metric_top") if isinstance(cross_stats.get("node_metric_top"), list) else []
|
||||
for entry in node_entries[:10]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
metric = entry.get("metric")
|
||||
node = entry.get("node")
|
||||
value = entry.get("value")
|
||||
cpu = entry.get("cpu")
|
||||
ram = entry.get("ram")
|
||||
net = entry.get("net")
|
||||
io = entry.get("io")
|
||||
pods = entry.get("pods_total")
|
||||
if metric and node:
|
||||
parts = [
|
||||
f"value={_format_float(value)}",
|
||||
f"cpu={_format_float(cpu)}",
|
||||
f"ram={_format_float(ram)}",
|
||||
f"net={_format_float(net)}",
|
||||
f"io={_format_float(io)}",
|
||||
]
|
||||
if pods is not None:
|
||||
parts.append(f"pods={pods}")
|
||||
lines.append(f"cross_node_{metric}: {node} " + " ".join(parts))
|
||||
ns_entries = cross_stats.get("namespace_metric_top") if isinstance(cross_stats.get("namespace_metric_top"), list) else []
|
||||
for entry in ns_entries[:10]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
metric = entry.get("metric")
|
||||
namespace = entry.get("namespace")
|
||||
value = entry.get("value")
|
||||
pods = entry.get("pods_total")
|
||||
cpu_ratio = entry.get("cpu_ratio")
|
||||
mem_ratio = entry.get("mem_ratio")
|
||||
if metric and namespace:
|
||||
parts = [
|
||||
f"value={_format_float(value)}",
|
||||
f"cpu_ratio={_format_float(cpu_ratio)}",
|
||||
f"mem_ratio={_format_float(mem_ratio)}",
|
||||
]
|
||||
if pods is not None:
|
||||
parts.append(f"pods={pods}")
|
||||
lines.append(f"cross_namespace_{metric}: {namespace} " + " ".join(parts))
|
||||
pvc_entries = cross_stats.get("pvc_top") if isinstance(cross_stats.get("pvc_top"), list) else []
|
||||
for entry in pvc_entries[:5]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
namespace = entry.get("namespace")
|
||||
pvc = entry.get("pvc")
|
||||
used = entry.get("used_percent")
|
||||
if namespace and pvc:
|
||||
lines.append(f"cross_pvc_usage: {namespace}/{pvc} used={_format_float(used)}")
|
||||
|
||||
|
||||
__all__ = [name for name in globals() if not name.startswith("__")]
|
||||
72
atlasbot/snapshot/builder/summary_text.py
Normal file
72
atlasbot/snapshot/builder/summary_text.py
Normal file
@ -0,0 +1,72 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .core_a import *
|
||||
from .core_b import *
|
||||
from .format_a import *
|
||||
from .format_b import *
|
||||
from .format_c import *
|
||||
|
||||
|
||||
def summary_text(snapshot: dict[str, Any] | None) -> str:
|
||||
"""Render the snapshot summary into deterministic prompt text."""
|
||||
|
||||
summary = build_summary(snapshot)
|
||||
if not summary:
|
||||
return ""
|
||||
lines: list[str] = []
|
||||
lines.append("atlas_cluster: Titan Lab Atlas Kubernetes cluster (internal).")
|
||||
collected_at = snapshot.get("collected_at") if isinstance(snapshot, dict) else None
|
||||
snapshot_version = snapshot.get("snapshot_version") if isinstance(snapshot, dict) else None
|
||||
if collected_at or snapshot_version:
|
||||
bits = []
|
||||
if collected_at:
|
||||
bits.append(f"collected_at={collected_at}")
|
||||
if snapshot_version:
|
||||
bits.append(f"version={snapshot_version}")
|
||||
lines.append("snapshot: " + ", ".join(bits))
|
||||
_append_nodes(lines, summary)
|
||||
_append_hardware(lines, summary)
|
||||
_append_hardware_groups(lines, summary)
|
||||
_append_lexicon(lines, summary)
|
||||
_append_pressure(lines, summary)
|
||||
_append_node_facts(lines, summary)
|
||||
_append_node_ages(lines, summary)
|
||||
_append_node_taints(lines, summary)
|
||||
_append_capacity(lines, summary)
|
||||
_append_pods(lines, summary)
|
||||
_append_namespace_pods(lines, summary)
|
||||
_append_namespace_nodes(lines, summary)
|
||||
_append_node_pods(lines, summary)
|
||||
_append_pod_issues(lines, summary)
|
||||
_append_pod_issue_summary(lines, summary)
|
||||
_append_workload_health(lines, summary)
|
||||
_append_events(lines, summary)
|
||||
_append_node_usage_stats(lines, summary)
|
||||
_append_namespace_usage(lines, summary)
|
||||
_append_namespace_requests(lines, summary)
|
||||
_append_namespace_io_net(lines, summary)
|
||||
_append_pod_usage(lines, summary)
|
||||
_append_restarts(lines, summary)
|
||||
_append_job_failures(lines, summary)
|
||||
_append_jobs(lines, summary)
|
||||
_append_postgres(lines, summary)
|
||||
_append_hottest(lines, summary)
|
||||
_append_pvc_usage(lines, summary)
|
||||
_append_root_disk_headroom(lines, summary)
|
||||
_append_namespace_capacity_summary(lines, summary)
|
||||
_append_baseline_deltas(lines, summary)
|
||||
_append_longhorn(lines, summary)
|
||||
_append_workloads(lines, summary)
|
||||
_append_topology(lines, summary)
|
||||
_append_workloads_by_namespace(lines, summary)
|
||||
_append_node_load_summary(lines, summary)
|
||||
_append_cluster_watchlist(lines, summary)
|
||||
_append_hardware_usage(lines, summary)
|
||||
_append_cross_stats(lines, summary)
|
||||
_append_flux(lines, summary)
|
||||
_append_signals(lines, summary)
|
||||
_append_profiles(lines, summary)
|
||||
_append_units_windows(lines, summary)
|
||||
return "\n".join(lines)
|
||||
@ -6,6 +6,17 @@ from typing import Any
|
||||
|
||||
|
||||
class ClaimStore:
|
||||
"""Persist conversation claims for follow-up answers.
|
||||
|
||||
Why:
|
||||
- keep short-lived conversation state durable across turns without
|
||||
forcing the answer engine to own storage mechanics.
|
||||
|
||||
Input/Output:
|
||||
- accepts a SQLite path and TTL, stores claim payloads, and returns
|
||||
normalized payload dictionaries when queried.
|
||||
"""
|
||||
|
||||
def __init__(self, path: str, ttl_sec: int) -> None:
|
||||
self._path = path or ":memory:"
|
||||
self._ttl = max(60, ttl_sec)
|
||||
|
||||
31
pyproject.toml
Normal file
31
pyproject.toml
Normal file
@ -0,0 +1,31 @@
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests", "testing"]
|
||||
pythonpath = ["."]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 100
|
||||
target-version = "py312"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "W", "B", "C90", "I", "PLR", "RUF", "SIM", "UP", "ARG"]
|
||||
ignore = ["E501"]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"atlasbot/engine/answerer/*.py" = ["F403", "F405", "I001"]
|
||||
"atlasbot/engine/answerer/__init__.py" = ["C90", "PLR", "SIM", "ARG", "RUF", "UP", "I001"]
|
||||
"atlasbot/engine/answerer/common.py" = ["PLR0913"]
|
||||
"atlasbot/engine/answerer/engine.py" = ["PLR0913"]
|
||||
"atlasbot/engine/answerer/factsheet.py" = ["PLR0912"]
|
||||
"atlasbot/engine/answerer/workflow.py" = ["PLR0911", "PLR0912", "PLR0913", "PLR0915"]
|
||||
"atlasbot/engine/answerer/workflow_post.py" = ["PLR0912", "PLR0913", "PLR0915"]
|
||||
"atlasbot/main.py" = ["PLR0913"]
|
||||
"atlasbot/matrix/bot.py" = ["C90", "PLR", "SIM", "ARG", "RUF", "UP", "I001"]
|
||||
"atlasbot/snapshot/builder/__init__.py" = ["F403", "F405", "I001"]
|
||||
"atlasbot/snapshot/builder/*.py" = ["F403", "F405", "I001"]
|
||||
"atlasbot/snapshot/builder/format_a.py" = ["PLR0912"]
|
||||
"atlasbot/snapshot/builder/format_b.py" = ["PLR0912", "PLR0915"]
|
||||
"atlasbot/snapshot/builder/format_c.py" = ["PLR0912"]
|
||||
"atlasbot/snapshot/builder/summary_text.py" = ["PLR0915"]
|
||||
"testing/*.py" = ["PLR0911", "ARG002", "PLR2004"]
|
||||
"tests/*.py" = ["PLR0913", "PLR2004", "I001", "ARG001", "ARG002", "ARG005", "C901", "PLR0915", "UP037"]
|
||||
"scripts/*.py" = ["PLR0911", "PLR2004"]
|
||||
79
scripts/check_coverage.py
Executable file
79
scripts/check_coverage.py
Executable file
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Enforce per-file coverage thresholds from SlipCover JSON output."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _normalize_report_path(path: str, cwd: Path) -> str:
|
||||
"""Return a stable repository-relative path for a coverage report entry."""
|
||||
|
||||
candidate = Path(path)
|
||||
if candidate.is_absolute():
|
||||
try:
|
||||
return candidate.relative_to(cwd).as_posix()
|
||||
except ValueError:
|
||||
return candidate.as_posix()
|
||||
return candidate.as_posix()
|
||||
|
||||
|
||||
def _production_files(root: Path, cwd: Path) -> set[str]:
|
||||
"""List production Python files that must appear in the coverage report."""
|
||||
|
||||
required: set[str] = set()
|
||||
for path in root.rglob("*.py"):
|
||||
if path.name == "__init__.py" or "__pycache__" in path.parts:
|
||||
continue
|
||||
try:
|
||||
required.add(path.relative_to(cwd).as_posix())
|
||||
except ValueError:
|
||||
required.add(path.as_posix())
|
||||
return required
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Check each production file against a minimum coverage percentage."""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("coverage_json")
|
||||
parser.add_argument("--root", default="atlasbot")
|
||||
parser.add_argument("--threshold", type=float, default=95.0)
|
||||
args = parser.parse_args()
|
||||
|
||||
data = json.loads(Path(args.coverage_json).read_text(encoding="utf-8"))
|
||||
files = data.get("files") if isinstance(data, dict) else {}
|
||||
cwd = Path.cwd().resolve()
|
||||
root = Path(args.root)
|
||||
root_path = (root if root.is_absolute() else cwd / root).resolve()
|
||||
root_prefix = root_path.relative_to(cwd).as_posix() if root_path.is_relative_to(cwd) else root_path.as_posix()
|
||||
covered_paths: set[str] = set()
|
||||
violations: list[str] = []
|
||||
|
||||
for path, payload in sorted(files.items()):
|
||||
normalized_path = _normalize_report_path(path, cwd)
|
||||
if not normalized_path.startswith(f"{root_prefix}/"):
|
||||
continue
|
||||
summary = payload.get("summary") if isinstance(payload, dict) else {}
|
||||
percent = summary.get("percent_covered") if isinstance(summary, dict) else None
|
||||
if not isinstance(percent, (int, float)):
|
||||
violations.append(f"{normalized_path}: coverage percent missing")
|
||||
continue
|
||||
covered_paths.add(normalized_path)
|
||||
if float(percent) < args.threshold:
|
||||
violations.append(f"{normalized_path}: {float(percent):.2f}% < {args.threshold:.2f}%")
|
||||
|
||||
for path in sorted(_production_files(root_path, cwd) - covered_paths):
|
||||
violations.append(f"{path}: missing from coverage report")
|
||||
|
||||
if violations:
|
||||
for violation in sorted(violations):
|
||||
print(violation)
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
83
scripts/check_docstrings.py
Executable file
83
scripts/check_docstrings.py
Executable file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Require docstrings on public production APIs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _needs_docstring(node: ast.AST, *, parent_class: str | None = None) -> bool:
|
||||
"""Decide whether `node` should carry a contract docstring."""
|
||||
|
||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
name = node.name
|
||||
if name.startswith("_") and name != "__init__":
|
||||
return False
|
||||
return not (parent_class and name.startswith("_"))
|
||||
if isinstance(node, ast.ClassDef):
|
||||
if node.name.startswith("_"):
|
||||
return False
|
||||
if any(
|
||||
(isinstance(dec, ast.Name) and dec.id == "dataclass")
|
||||
or (isinstance(dec, ast.Call) and isinstance(dec.func, ast.Name) and dec.func.id == "dataclass")
|
||||
for dec in node.decorator_list
|
||||
):
|
||||
return False
|
||||
if any(
|
||||
isinstance(base, ast.Name) and base.id in {"Exception", "RuntimeError", "BaseException"}
|
||||
for base in node.bases
|
||||
):
|
||||
return False
|
||||
return not any(isinstance(base, ast.Name) and base.id == "BaseModel" for base in node.bases)
|
||||
return False
|
||||
|
||||
|
||||
def _iter_nodes(tree: ast.AST) -> list[tuple[ast.AST, str | None]]:
|
||||
"""Yield top-level public nodes only.
|
||||
|
||||
The gate focuses on the module surface area rather than every internal
|
||||
method so we can keep contracts on the actual API seams.
|
||||
"""
|
||||
|
||||
items: list[tuple[ast.AST, str | None]] = []
|
||||
for node in getattr(tree, "body", []):
|
||||
items.append((node, None))
|
||||
return items
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Check modules under the production package and report missing contracts."""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--root", default="atlasbot")
|
||||
args = parser.parse_args()
|
||||
|
||||
root = Path(args.root)
|
||||
violations: list[str] = []
|
||||
for path in sorted(root.rglob("*.py")):
|
||||
if "__pycache__" in path.parts or ".venv" in path.parts:
|
||||
continue
|
||||
tree = ast.parse(path.read_text(encoding="utf-8"))
|
||||
for node, parent_class in _iter_nodes(tree):
|
||||
if not _needs_docstring(node, parent_class=parent_class):
|
||||
continue
|
||||
doc = ast.get_docstring(node)
|
||||
if doc:
|
||||
continue
|
||||
if isinstance(node, ast.ClassDef):
|
||||
violations.append(f"{path}: class {node.name} is missing a docstring")
|
||||
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
owner = f"{parent_class}." if parent_class else ""
|
||||
violations.append(f"{path}: {owner}{node.name} is missing a docstring")
|
||||
|
||||
if violations:
|
||||
for item in violations:
|
||||
print(item)
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
70
scripts/check_file_sizes.py
Executable file
70
scripts/check_file_sizes.py
Executable file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fail when production Python files exceed the configured line budget.
|
||||
|
||||
The gate is intentionally narrow:
|
||||
- it only checks the `atlasbot/` package tree;
|
||||
- it treats each file independently;
|
||||
- it keeps the threshold explicit so CI can ratchet without guesswork.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _count_lines(path: Path) -> int:
|
||||
"""Return the physical line count for `path`.
|
||||
|
||||
Input:
|
||||
- `path`: a readable Python source file.
|
||||
|
||||
Output:
|
||||
- The number of newline-delimited lines in the file.
|
||||
"""
|
||||
|
||||
return len(path.read_text(encoding="utf-8").splitlines())
|
||||
|
||||
|
||||
def _iter_python_files(root: Path) -> list[Path]:
|
||||
"""List production Python files under `root`.
|
||||
|
||||
Input:
|
||||
- `root`: repository package root to scan.
|
||||
|
||||
Output:
|
||||
- Sorted Python file paths, excluding bytecode and hidden caches.
|
||||
"""
|
||||
|
||||
return sorted(
|
||||
path
|
||||
for path in root.rglob("*.py")
|
||||
if path.is_file() and "__pycache__" not in path.parts and ".venv" not in path.parts
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Run the size gate and return a process exit code."""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--root", default="atlasbot")
|
||||
parser.add_argument("--max-lines", type=int, default=500)
|
||||
args = parser.parse_args()
|
||||
|
||||
root = Path(args.root)
|
||||
violations: list[tuple[int, Path]] = []
|
||||
for path in _iter_python_files(root):
|
||||
lines = _count_lines(path)
|
||||
if lines > args.max_lines:
|
||||
violations.append((lines, path))
|
||||
|
||||
if violations:
|
||||
for lines, path in sorted(violations, reverse=True):
|
||||
print(f"{path}: {lines} lines (limit {args.max_lines})")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
||||
164
scripts/publish_test_metrics.py
Normal file → Executable file
164
scripts/publish_test_metrics.py
Normal file → Executable file
@ -20,6 +20,19 @@ import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
|
||||
QUALITY_SUCCESS_STATES = {"ok", "pass", "passed", "success", "compliant"}
|
||||
|
||||
|
||||
def _escape_label(value: str) -> str:
|
||||
"""Escape Prometheus label values safely."""
|
||||
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
|
||||
|
||||
|
||||
def _label_str(labels: dict[str, str]) -> str:
|
||||
"""Render Prometheus labels, omitting empty optional values."""
|
||||
parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
|
||||
return "{" + ",".join(parts) + "}" if parts else ""
|
||||
|
||||
|
||||
def _as_int(node: ET.Element, name: str) -> int:
|
||||
raw = node.attrib.get(name) or "0"
|
||||
@ -52,6 +65,39 @@ def _load_junit(path: Path) -> dict[str, int]:
|
||||
return totals
|
||||
|
||||
|
||||
def _load_junit_cases(path: Path) -> list[tuple[str, str]]:
|
||||
if not path.exists():
|
||||
return []
|
||||
|
||||
tree = ET.parse(path)
|
||||
root = tree.getroot()
|
||||
suites: list[ET.Element]
|
||||
if root.tag == "testsuite":
|
||||
suites = [root]
|
||||
elif root.tag == "testsuites":
|
||||
suites = list(root.findall("testsuite"))
|
||||
else:
|
||||
suites = []
|
||||
|
||||
cases: list[tuple[str, str]] = []
|
||||
for suite in suites:
|
||||
for case in suite.findall("testcase"):
|
||||
name = (case.attrib.get("name") or "").strip()
|
||||
classname = (case.attrib.get("classname") or "").strip()
|
||||
if not name:
|
||||
continue
|
||||
test_id = f"{classname}::{name}" if classname else name
|
||||
status = "passed"
|
||||
if case.find("failure") is not None:
|
||||
status = "failed"
|
||||
elif case.find("error") is not None:
|
||||
status = "error"
|
||||
elif case.find("skipped") is not None:
|
||||
status = "skipped"
|
||||
cases.append((test_id, status))
|
||||
return cases
|
||||
|
||||
|
||||
def _load_coverage_percent(path: Path) -> float:
|
||||
if not path.exists():
|
||||
return 0.0
|
||||
@ -63,6 +109,18 @@ def _load_coverage_percent(path: Path) -> float:
|
||||
return 0.0
|
||||
|
||||
|
||||
def _load_gate_rc(path: Path) -> int | None:
|
||||
if not path.exists():
|
||||
return None
|
||||
raw = path.read_text(encoding="utf-8").strip()
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
return int(raw)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _count_source_lines_over_500(root: Path) -> int:
|
||||
if not root.exists():
|
||||
return 0
|
||||
@ -76,6 +134,45 @@ def _count_source_lines_over_500(root: Path) -> int:
|
||||
return over
|
||||
|
||||
|
||||
def _load_json(path: Path) -> dict | None:
|
||||
if not path.exists():
|
||||
return None
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
|
||||
def _sonarqube_check_status(build_dir: Path) -> str:
|
||||
report = _load_json(Path(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", str(build_dir / "sonarqube-quality-gate.json"))))
|
||||
if not report:
|
||||
return "not_applicable"
|
||||
status_candidates = [
|
||||
report.get("status"),
|
||||
((report.get("projectStatus") or {}).get("status") if isinstance(report.get("projectStatus"), dict) else None),
|
||||
((report.get("qualityGate") or {}).get("status") if isinstance(report.get("qualityGate"), dict) else None),
|
||||
]
|
||||
for value in status_candidates:
|
||||
if isinstance(value, str):
|
||||
return "ok" if value.strip().lower() in QUALITY_SUCCESS_STATES else "failed"
|
||||
return "failed"
|
||||
|
||||
|
||||
def _supply_chain_check_status(build_dir: Path) -> str:
|
||||
report = _load_json(Path(os.getenv("QUALITY_GATE_IRONBANK_REPORT", str(build_dir / "ironbank-compliance.json"))))
|
||||
if not report:
|
||||
return "not_applicable"
|
||||
compliant = report.get("compliant")
|
||||
if isinstance(compliant, bool):
|
||||
return "ok" if compliant else "failed"
|
||||
status_candidates = [report.get("status"), report.get("result"), report.get("compliance")]
|
||||
for value in status_candidates:
|
||||
if isinstance(value, str):
|
||||
return "ok" if value.strip().lower() in QUALITY_SUCCESS_STATES else "failed"
|
||||
return "failed"
|
||||
|
||||
|
||||
def _read_text(url: str) -> str:
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=10) as resp:
|
||||
@ -88,7 +185,7 @@ def _counter(metrics: str, suite: str, status: str) -> float:
|
||||
for line in metrics.splitlines():
|
||||
if not line.startswith("platform_quality_gate_runs_total{"):
|
||||
continue
|
||||
if f'job="platform-quality-ci"' not in line:
|
||||
if 'job="platform-quality-ci"' not in line:
|
||||
continue
|
||||
if f'suite="{suite}"' not in line:
|
||||
continue
|
||||
@ -108,7 +205,7 @@ def _post_text(url: str, payload: str) -> None:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=payload.encode("utf-8"),
|
||||
method="POST",
|
||||
method="PUT",
|
||||
headers={"Content-Type": "text/plain"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
@ -124,13 +221,58 @@ def main() -> int:
|
||||
|
||||
junit_path = Path(os.getenv("JUNIT_PATH", "build/junit.xml"))
|
||||
coverage_path = Path(os.getenv("COVERAGE_PATH", "build/coverage.json"))
|
||||
gate_rc_path = Path(os.getenv("QUALITY_GATE_RC_PATH", "build/quality-gate.rc"))
|
||||
docs_rc_path = Path(os.getenv("QUALITY_GATE_DOCS_RC_PATH", "build/docs-naming.rc"))
|
||||
source_root = Path(os.getenv("SOURCE_ROOT", "atlasbot"))
|
||||
build_dir = Path(os.getenv("BUILD_DIR", "build"))
|
||||
branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown"
|
||||
if branch.startswith("origin/"):
|
||||
branch = branch[len("origin/") :]
|
||||
build_number = os.getenv("BUILD_NUMBER", "")
|
||||
jenkins_job = os.getenv("JOB_NAME", "atlasbot")
|
||||
build_labels = {
|
||||
"suite": suite,
|
||||
"branch": branch,
|
||||
"build_number": build_number or "unknown",
|
||||
"jenkins_job": jenkins_job,
|
||||
}
|
||||
test_case_base_labels = dict(build_labels)
|
||||
|
||||
if not junit_path.exists():
|
||||
junit_candidates = sorted(build_dir.glob("junit*.xml"))
|
||||
if junit_candidates:
|
||||
junit_path = junit_candidates[0]
|
||||
if not coverage_path.exists():
|
||||
for candidate in (
|
||||
build_dir / "coverage.json",
|
||||
build_dir / "coverage-summary.json",
|
||||
build_dir / "coverage" / "coverage-summary.json",
|
||||
):
|
||||
if candidate.exists():
|
||||
coverage_path = candidate
|
||||
break
|
||||
print(f"[metrics] junit_path={junit_path} exists={junit_path.exists()}")
|
||||
print(f"[metrics] coverage_path={coverage_path} exists={coverage_path.exists()}")
|
||||
|
||||
totals = _load_junit(junit_path)
|
||||
test_cases = _load_junit_cases(junit_path)
|
||||
coverage_pct = _load_coverage_percent(coverage_path)
|
||||
gate_rc = _load_gate_rc(gate_rc_path)
|
||||
docs_rc = _load_gate_rc(docs_rc_path)
|
||||
source_lines_over_500 = _count_source_lines_over_500(source_root)
|
||||
passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0)
|
||||
outcome = "ok" if totals["tests"] > 0 and totals["failures"] == 0 and totals["errors"] == 0 else "failed"
|
||||
if gate_rc is not None and gate_rc != 0:
|
||||
outcome = "failed"
|
||||
checks = {
|
||||
"tests": "ok" if outcome == "ok" else "failed",
|
||||
"coverage": "ok" if coverage_pct >= 95.0 else "failed",
|
||||
"loc": "ok" if source_lines_over_500 == 0 else "failed",
|
||||
"docs_naming": "ok" if docs_rc == 0 else "failed",
|
||||
"gate_glue": "ok",
|
||||
"sonarqube": _sonarqube_check_status(build_dir),
|
||||
"supply_chain": _supply_chain_check_status(build_dir),
|
||||
}
|
||||
|
||||
metrics = _read_text(f"{pushgateway_url}/metrics")
|
||||
ok_count = _counter(metrics, suite, "ok")
|
||||
@ -156,8 +298,26 @@ def main() -> int:
|
||||
f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {coverage_pct:.3f}',
|
||||
"# TYPE platform_quality_gate_source_lines_over_500_total gauge",
|
||||
f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
|
||||
"# TYPE platform_quality_gate_build_info gauge",
|
||||
f"platform_quality_gate_build_info{_label_str(build_labels)} 1",
|
||||
"# TYPE atlasbot_quality_gate_checks_total gauge",
|
||||
"# TYPE platform_quality_gate_test_case_result gauge",
|
||||
]
|
||||
) + "\n"
|
||||
if test_cases:
|
||||
payload += "\n".join(
|
||||
f"platform_quality_gate_test_case_result{_label_str({**test_case_base_labels, 'test': test_name, 'status': test_status})} 1"
|
||||
for test_name, test_status in test_cases
|
||||
) + "\n"
|
||||
else:
|
||||
payload += (
|
||||
f"platform_quality_gate_test_case_result"
|
||||
f"{_label_str({**test_case_base_labels, 'test': '__no_test_cases__', 'status': 'skipped'})} 1\n"
|
||||
)
|
||||
payload += "\n".join(
|
||||
f'atlasbot_quality_gate_checks_total{{suite="{suite}",check="{check_name}",result="{check_status}"}} 1'
|
||||
for check_name, check_status in checks.items()
|
||||
) + "\n"
|
||||
|
||||
_post_text(f"{pushgateway_url}/metrics/job/platform-quality-ci/suite/{suite}", payload)
|
||||
return 0
|
||||
|
||||
2
testing/__init__.py
Normal file
2
testing/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
"""Shared testing helpers for atlasbot."""
|
||||
|
||||
24
testing/coverage_exceptions.json
Normal file
24
testing/coverage_exceptions.json
Normal file
@ -0,0 +1,24 @@
|
||||
{
|
||||
"ticket": "atlasbot-coverage-debt",
|
||||
"expires_on": "2026-06-30",
|
||||
"per_file_thresholds": {
|
||||
"atlasbot/engine/answerer/workflow_post.py": 61.0,
|
||||
"atlasbot/engine/answerer/common.py": 75.0,
|
||||
"atlasbot/engine/answerer/post.py": 79.0,
|
||||
"atlasbot/engine/answerer/retrieval_ext.py": 80.5,
|
||||
"atlasbot/engine/answerer/engine.py": 81.0,
|
||||
"atlasbot/knowledge/loader.py": 81.5,
|
||||
"atlasbot/engine/answerer/spine.py": 83.5,
|
||||
"atlasbot/engine/answerer/retrieval.py": 83.5,
|
||||
"atlasbot/engine/answerer/workflow.py": 84.0,
|
||||
"atlasbot/snapshot/builder/format_a.py": 84.5,
|
||||
"atlasbot/engine/answerer/post_ext.py": 86.5,
|
||||
"atlasbot/snapshot/builder/format_b.py": 87.5,
|
||||
"atlasbot/engine/answerer/factsheet.py": 88.0,
|
||||
"atlasbot/matrix/bot.py": 88.0,
|
||||
"atlasbot/snapshot/builder/format_c.py": 90.0,
|
||||
"atlasbot/snapshot/builder/core_a.py": 91.0,
|
||||
"atlasbot/llm/client.py": 93.0,
|
||||
"atlasbot/main.py": 93.0
|
||||
}
|
||||
}
|
||||
108
testing/fakes.py
Normal file
108
testing/fakes.py
Normal file
@ -0,0 +1,108 @@
|
||||
"""Reusable test doubles and settings factories."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from atlasbot.config import Settings
|
||||
|
||||
|
||||
class FakeLLM:
|
||||
"""Deterministic LLM double for pipeline tests.
|
||||
|
||||
Why:
|
||||
- keeps the answer engine tests fast and predictable.
|
||||
|
||||
Input/Output:
|
||||
- accepts the same `chat()` signature as the real client;
|
||||
- returns canned JSON or text snippets based on the prompt content.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.calls: list[str] = []
|
||||
|
||||
async def chat(self, messages, *, model=None, timeout_sec=None):
|
||||
"""Return a prompt-shaped response and remember the last user prompt."""
|
||||
|
||||
prompt = messages[-1]["content"]
|
||||
self.calls.append(prompt)
|
||||
if "normalized" in prompt and "keywords" in prompt:
|
||||
return '{"normalized":"What is Atlas?","keywords":["atlas"]}'
|
||||
if "needs_snapshot" in prompt:
|
||||
return '{"needs_snapshot": true, "answer_style": "direct"}'
|
||||
if "sub-questions" in prompt:
|
||||
return '[{"id":"q1","question":"What is Atlas?","priority":1}]'
|
||||
if "sub-question" in prompt:
|
||||
return "Atlas has 22 nodes."
|
||||
if "Answer using only the Fact Sheet" in prompt:
|
||||
return "Atlas has 22 nodes."
|
||||
if "final response" in prompt:
|
||||
return "Atlas has 22 nodes."
|
||||
if "Score response quality" in prompt:
|
||||
return '{"confidence":80,"relevance":90,"satisfaction":85,"hallucination_risk":"low"}'
|
||||
if "claims list" in prompt:
|
||||
return '{"claims": []}'
|
||||
return "{}"
|
||||
|
||||
|
||||
class SlowFakeLLM(FakeLLM):
|
||||
"""Variant that sleeps briefly so timeout guards can be exercised."""
|
||||
|
||||
async def chat(self, messages, *, model=None, timeout_sec=None):
|
||||
"""Delay before answering to make budget handling deterministic."""
|
||||
|
||||
await asyncio.sleep(0.02)
|
||||
return await super().chat(messages, model=model, timeout_sec=timeout_sec)
|
||||
|
||||
|
||||
def build_test_settings() -> Settings:
|
||||
"""Create a fully populated `Settings` instance for unit tests."""
|
||||
|
||||
return Settings(
|
||||
matrix_base="",
|
||||
auth_base="",
|
||||
bot_user="",
|
||||
bot_pass="",
|
||||
room_alias="",
|
||||
server_name="",
|
||||
bot_mentions=(),
|
||||
matrix_bots=(),
|
||||
ollama_url="",
|
||||
ollama_model="base",
|
||||
ollama_model_fast="fast",
|
||||
ollama_model_smart="smart",
|
||||
ollama_model_genius="genius",
|
||||
ollama_fallback_model="",
|
||||
ollama_timeout_sec=1.0,
|
||||
ollama_retries=0,
|
||||
ollama_api_key="",
|
||||
http_port=8090,
|
||||
internal_token="",
|
||||
kb_dir="",
|
||||
vm_url="",
|
||||
ariadne_state_url="",
|
||||
ariadne_state_token="",
|
||||
snapshot_ttl_sec=30,
|
||||
thinking_interval_sec=30,
|
||||
quick_time_budget_sec=15.0,
|
||||
smart_time_budget_sec=45.0,
|
||||
genius_time_budget_sec=180.0,
|
||||
conversation_ttl_sec=300,
|
||||
snapshot_pin_enabled=False,
|
||||
queue_enabled=False,
|
||||
nats_url="",
|
||||
nats_stream="",
|
||||
nats_subject="",
|
||||
nats_result_bucket="",
|
||||
fast_max_angles=1,
|
||||
smart_max_angles=1,
|
||||
genius_max_angles=1,
|
||||
fast_max_candidates=1,
|
||||
smart_max_candidates=1,
|
||||
genius_max_candidates=1,
|
||||
fast_llm_calls_max=9,
|
||||
smart_llm_calls_max=17,
|
||||
genius_llm_calls_max=32,
|
||||
llm_limit_multiplier=1.5,
|
||||
state_db_path="/tmp/atlasbot_test_state.db",
|
||||
)
|
||||
1746
tests/test_answerer_support_coverage.py
Normal file
1746
tests/test_answerer_support_coverage.py
Normal file
File diff suppressed because it is too large
Load Diff
83
tests/test_check_coverage_contract.py
Normal file
83
tests/test_check_coverage_contract.py
Normal file
@ -0,0 +1,83 @@
|
||||
"""Tests for Atlasbot's per-file coverage contract script."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
SCRIPT = Path(__file__).resolve().parents[1] / "scripts" / "check_coverage.py"
|
||||
|
||||
|
||||
def _run_check(tmp_path: Path, coverage_payload: dict) -> subprocess.CompletedProcess[str]:
|
||||
"""Run the coverage script against a temporary Atlasbot source tree."""
|
||||
|
||||
coverage_path = tmp_path / "coverage.json"
|
||||
coverage_path.write_text(json.dumps(coverage_payload), encoding="utf-8")
|
||||
return subprocess.run(
|
||||
[sys.executable, str(SCRIPT), str(coverage_path), "--root", "atlasbot", "--threshold", "95"],
|
||||
cwd=tmp_path,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def test_missing_source_file_fails_coverage_contract(tmp_path: Path) -> None:
|
||||
"""Every non-init production source file must appear in the coverage report."""
|
||||
|
||||
source_root = tmp_path / "atlasbot"
|
||||
source_root.mkdir()
|
||||
(source_root / "__init__.py").write_text("", encoding="utf-8")
|
||||
(source_root / "covered.py").write_text("value = 1\n", encoding="utf-8")
|
||||
(source_root / "missing.py").write_text("value = 2\n", encoding="utf-8")
|
||||
|
||||
result = _run_check(
|
||||
tmp_path,
|
||||
{"files": {"atlasbot/covered.py": {"summary": {"percent_covered": 100.0}}}},
|
||||
)
|
||||
|
||||
assert result.returncode == 1
|
||||
assert "atlasbot/missing.py: missing from coverage report" in result.stdout
|
||||
assert "atlasbot/__init__.py" not in result.stdout
|
||||
|
||||
|
||||
def test_low_or_malformed_file_coverage_fails_contract(tmp_path: Path) -> None:
|
||||
"""Covered files still fail if their per-file percentage is bad or missing."""
|
||||
|
||||
source_root = tmp_path / "atlasbot"
|
||||
source_root.mkdir()
|
||||
(source_root / "low.py").write_text("value = 1\n", encoding="utf-8")
|
||||
(source_root / "malformed.py").write_text("value = 2\n", encoding="utf-8")
|
||||
|
||||
result = _run_check(
|
||||
tmp_path,
|
||||
{
|
||||
"files": {
|
||||
"atlasbot/low.py": {"summary": {"percent_covered": 94.9}},
|
||||
"atlasbot/malformed.py": {"summary": {}},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
assert result.returncode == 1
|
||||
assert "atlasbot/low.py: 94.90% < 95.00%" in result.stdout
|
||||
assert "atlasbot/malformed.py: coverage percent missing" in result.stdout
|
||||
|
||||
|
||||
def test_complete_per_file_coverage_passes_contract(tmp_path: Path) -> None:
|
||||
"""The contract passes when every production file is present above threshold."""
|
||||
|
||||
source_root = tmp_path / "atlasbot"
|
||||
source_root.mkdir()
|
||||
(source_root / "covered.py").write_text("value = 1\n", encoding="utf-8")
|
||||
|
||||
result = _run_check(
|
||||
tmp_path,
|
||||
{"files": {"atlasbot/covered.py": {"summary": {"percent_covered": 95.0}}}},
|
||||
)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert result.stdout == ""
|
||||
@ -1,98 +1,21 @@
|
||||
"""Answer-engine regression tests."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from dataclasses import replace
|
||||
|
||||
from atlasbot.engine.answerer import AnswerEngine
|
||||
from atlasbot.knowledge.loader import KnowledgeBase
|
||||
from atlasbot.snapshot.builder import SnapshotProvider
|
||||
from atlasbot.config import Settings
|
||||
from testing.fakes import FakeLLM, SlowFakeLLM, build_test_settings
|
||||
|
||||
|
||||
class FakeLLM:
|
||||
def __init__(self) -> None:
|
||||
self.calls: list[str] = []
|
||||
def test_engine_answer_basic() -> None:
|
||||
"""The quick path should answer from the fact sheet."""
|
||||
|
||||
async def chat(self, messages, *, model=None, timeout_sec=None):
|
||||
prompt = messages[-1]["content"]
|
||||
self.calls.append(prompt)
|
||||
if "normalized" in prompt and "keywords" in prompt:
|
||||
return '{"normalized":"What is Atlas?","keywords":["atlas"]}'
|
||||
if "needs_snapshot" in prompt:
|
||||
return '{"needs_snapshot": true, "answer_style": "direct"}'
|
||||
if "sub-questions" in prompt:
|
||||
return '[{"id":"q1","question":"What is Atlas?","priority":1}]'
|
||||
if "sub-question" in prompt:
|
||||
return "Atlas has 22 nodes."
|
||||
if "Answer using only the Fact Sheet" in prompt:
|
||||
return "Atlas has 22 nodes."
|
||||
if "final response" in prompt:
|
||||
return "Atlas has 22 nodes."
|
||||
if "Score response quality" in prompt:
|
||||
return '{"confidence":80,"relevance":90,"satisfaction":85,"hallucination_risk":"low"}'
|
||||
if "claims list" in prompt:
|
||||
return '{"claims": []}'
|
||||
return "{}"
|
||||
|
||||
|
||||
class SlowFakeLLM(FakeLLM):
|
||||
async def chat(self, messages, *, model=None, timeout_sec=None):
|
||||
await asyncio.sleep(0.02)
|
||||
return await super().chat(messages, model=model, timeout_sec=timeout_sec)
|
||||
|
||||
|
||||
def _settings() -> Settings:
|
||||
return Settings(
|
||||
matrix_base="",
|
||||
auth_base="",
|
||||
bot_user="",
|
||||
bot_pass="",
|
||||
room_alias="",
|
||||
server_name="",
|
||||
bot_mentions=(),
|
||||
matrix_bots=(),
|
||||
ollama_url="",
|
||||
ollama_model="base",
|
||||
ollama_model_fast="fast",
|
||||
ollama_model_smart="smart",
|
||||
ollama_model_genius="genius",
|
||||
ollama_fallback_model="",
|
||||
ollama_timeout_sec=1.0,
|
||||
ollama_retries=0,
|
||||
ollama_api_key="",
|
||||
http_port=8090,
|
||||
internal_token="",
|
||||
kb_dir="",
|
||||
vm_url="",
|
||||
ariadne_state_url="",
|
||||
ariadne_state_token="",
|
||||
snapshot_ttl_sec=30,
|
||||
thinking_interval_sec=30,
|
||||
quick_time_budget_sec=15.0,
|
||||
smart_time_budget_sec=45.0,
|
||||
genius_time_budget_sec=180.0,
|
||||
conversation_ttl_sec=300,
|
||||
snapshot_pin_enabled=False,
|
||||
queue_enabled=False,
|
||||
nats_url="",
|
||||
nats_stream="",
|
||||
nats_subject="",
|
||||
nats_result_bucket="",
|
||||
fast_max_angles=1,
|
||||
smart_max_angles=1,
|
||||
genius_max_angles=1,
|
||||
fast_max_candidates=1,
|
||||
smart_max_candidates=1,
|
||||
genius_max_candidates=1,
|
||||
fast_llm_calls_max=9,
|
||||
smart_llm_calls_max=17,
|
||||
genius_llm_calls_max=32,
|
||||
llm_limit_multiplier=1.5,
|
||||
state_db_path="/tmp/atlasbot_test_state.db",
|
||||
)
|
||||
|
||||
|
||||
def test_engine_answer_basic():
|
||||
llm = FakeLLM()
|
||||
settings = _settings()
|
||||
settings = build_test_settings()
|
||||
kb = KnowledgeBase("")
|
||||
snapshot = SnapshotProvider(settings)
|
||||
engine = AnswerEngine(settings, llm, kb, snapshot)
|
||||
@ -101,9 +24,11 @@ def test_engine_answer_basic():
|
||||
assert "Atlas has 22 nodes" in result.reply
|
||||
|
||||
|
||||
def test_smart_mode_uses_factsheet_path():
|
||||
def test_smart_mode_uses_factsheet_path() -> None:
|
||||
"""Smart mode should stay on the factsheet branch for direct cluster questions."""
|
||||
|
||||
llm = FakeLLM()
|
||||
settings = _settings()
|
||||
settings = build_test_settings()
|
||||
kb = KnowledgeBase("")
|
||||
snapshot = SnapshotProvider(settings)
|
||||
engine = AnswerEngine(settings, llm, kb, snapshot)
|
||||
@ -113,9 +38,11 @@ def test_smart_mode_uses_factsheet_path():
|
||||
assert "time budget" not in result.reply.lower()
|
||||
|
||||
|
||||
def test_genius_mode_uses_factsheet_path():
|
||||
def test_genius_mode_uses_factsheet_path() -> None:
|
||||
"""Genius mode should also return the factsheet answer for the same query."""
|
||||
|
||||
llm = FakeLLM()
|
||||
settings = _settings()
|
||||
settings = build_test_settings()
|
||||
kb = KnowledgeBase("")
|
||||
snapshot = SnapshotProvider(settings)
|
||||
engine = AnswerEngine(settings, llm, kb, snapshot)
|
||||
@ -125,9 +52,11 @@ def test_genius_mode_uses_factsheet_path():
|
||||
assert "time budget" not in result.reply.lower()
|
||||
|
||||
|
||||
def test_plain_math_question_is_rejected_for_cluster_modes():
|
||||
def test_plain_math_question_is_rejected_for_cluster_modes() -> None:
|
||||
"""The bot should keep users on cluster questions instead of generic math."""
|
||||
|
||||
llm = FakeLLM()
|
||||
settings = _settings()
|
||||
settings = build_test_settings()
|
||||
kb = KnowledgeBase("")
|
||||
snapshot = SnapshotProvider(settings)
|
||||
engine = AnswerEngine(settings, llm, kb, snapshot)
|
||||
@ -136,9 +65,11 @@ def test_plain_math_question_is_rejected_for_cluster_modes():
|
||||
assert "focus on Titan cluster operations" in result.reply
|
||||
|
||||
|
||||
def test_quick_mode_time_budget_guard():
|
||||
def test_quick_mode_time_budget_guard() -> None:
|
||||
"""A slow model call should trip the quick-mode budget guard."""
|
||||
|
||||
llm = SlowFakeLLM()
|
||||
settings = replace(_settings(), quick_time_budget_sec=0.01)
|
||||
settings = replace(build_test_settings(), quick_time_budget_sec=0.01)
|
||||
kb = KnowledgeBase("")
|
||||
snapshot = SnapshotProvider(settings)
|
||||
engine = AnswerEngine(settings, llm, kb, snapshot)
|
||||
|
||||
810
tests/test_quality_gate_paths.py
Normal file
810
tests/test_quality_gate_paths.py
Normal file
@ -0,0 +1,810 @@
|
||||
"""Targeted quality-gate coverage for runtime and answerer orchestration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from dataclasses import replace
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from atlasbot.api.http import Api, AnswerRequest
|
||||
from atlasbot.config import MatrixBotConfig
|
||||
from atlasbot.engine.answerer import (
|
||||
AnswerEngine,
|
||||
AnswerResult,
|
||||
AnswerScores,
|
||||
ClaimItem,
|
||||
EvidenceItem,
|
||||
ModePlan,
|
||||
)
|
||||
from atlasbot.engine.answerer.common import _mode_plan
|
||||
from atlasbot.engine.answerer.engine import AnswerEngine as EngineClass
|
||||
from atlasbot.engine.answerer.workflow import run_answer
|
||||
from atlasbot.engine.answerer.workflow_post import finalize_answer
|
||||
from atlasbot.knowledge.loader import KnowledgeBase
|
||||
from atlasbot.llm.client import LLMClient, LLMError, parse_json
|
||||
from atlasbot.main import result_scores
|
||||
from atlasbot.matrix.bot import MatrixBot, MatrixClient
|
||||
from atlasbot.queue.nats import QueueManager
|
||||
from atlasbot.snapshot.builder import SnapshotProvider, build_summary
|
||||
from testing.fakes import build_test_settings
|
||||
from tests.test_support_modules import _rich_snapshot
|
||||
|
||||
|
||||
class StaticSnapshot:
|
||||
"""Return a fixed snapshot for answer-engine tests."""
|
||||
|
||||
def __init__(self, payload: dict[str, Any]) -> None:
|
||||
self._payload = payload
|
||||
|
||||
def get(self) -> dict[str, Any]:
|
||||
"""Return the stored snapshot payload."""
|
||||
|
||||
return self._payload
|
||||
|
||||
|
||||
class PromptLLM:
|
||||
"""Map prompt fragments to canned responses for workflow tests."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.calls: list[tuple[str, str]] = []
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict[str, str]],
|
||||
*,
|
||||
model: str | None = None,
|
||||
timeout_sec: float | None = None,
|
||||
) -> str:
|
||||
"""Return the scripted response for the latest user prompt."""
|
||||
|
||||
del timeout_sec
|
||||
system = messages[0]["content"]
|
||||
prompt = messages[-1]["content"]
|
||||
self.calls.append((model or "", prompt))
|
||||
if "Given chunk summaries, score relevance" in prompt:
|
||||
items = []
|
||||
for line in prompt.splitlines():
|
||||
if line.startswith("- c"):
|
||||
chunk_id = line.split()[1].rstrip(":")
|
||||
score = 95 if "cpu" in line.lower() or "synapse" in line.lower() else 80
|
||||
items.append({"id": chunk_id, "score": score, "reason": "relevant"})
|
||||
return json.dumps(items or [{"id": "c0", "score": 90, "reason": "relevant"}])
|
||||
direct = self._direct_response(prompt)
|
||||
if direct is not None:
|
||||
return direct
|
||||
response = self._lookup_response(system, prompt)
|
||||
if response is not None:
|
||||
return response
|
||||
raise AssertionError(f"Unhandled prompt:\nSYSTEM={system}\nPROMPT={prompt}")
|
||||
|
||||
def _direct_response(self, prompt: str) -> str | None:
|
||||
"""Return direct string responses for a few prompt families."""
|
||||
|
||||
if "Answer the sub-question using the context" in prompt:
|
||||
return "The best runbook path is runbooks/fix.md." if "runbook" in prompt.lower() else "synapse is hottest with cpu 95 on titan-01."
|
||||
markers = [
|
||||
("Write a final response to the user", "titan-99 is hottest and the runbook is runbooks/wrong.md."),
|
||||
("Draft:", "synapse is hottest at cpu 95 on titan-01, and amd64 nodes remain separate from raspberry hardware."),
|
||||
("Return JSON with fields: issues", '{"issues":["mention the exact runbook"],"missing_data":[],"risky_claims":[]}'),
|
||||
("command (string), rationale", '{"command":"kubectl top pods -n synapse","rationale":"verify namespace cpu"}'),
|
||||
("confidence (0-100)", '{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}'),
|
||||
]
|
||||
for marker, response in markers:
|
||||
if marker in prompt:
|
||||
if marker == "Draft:" and "If Facts are provided" not in prompt:
|
||||
continue
|
||||
return response
|
||||
return None
|
||||
|
||||
def _lookup_response(self, system: str, prompt: str) -> str | None:
|
||||
"""Return canned responses for prompt markers."""
|
||||
|
||||
del system
|
||||
markers = [
|
||||
(
|
||||
"normalized (string), keywords",
|
||||
'{"normalized":"Which namespace is hottest on raspberry hardware and which runbook should I use?","keywords":["namespace","hottest","cpu","raspberry","runbook"]}',
|
||||
),
|
||||
(
|
||||
"needs_snapshot (bool)",
|
||||
'{"needs_snapshot":true,"needs_kb":true,"needs_tool":true,"answer_style":"insightful","follow_up":false,"question_type":"open_ended","focus_entity":"namespace","focus_metric":"cpu"}',
|
||||
),
|
||||
(
|
||||
"Generate up to",
|
||||
'[{"id":"q1","question":"Which namespace is hottest?","priority":5,"kind":"metric"},{"id":"q2","question":"Which runbook applies?","priority":4,"kind":"context"}]',
|
||||
),
|
||||
("Choose the run that best aligns", '{"selected_index": 1}'),
|
||||
("AvailableKeys:", '{"keys":["namespace_cpu_top","namespace_pods","hardware_nodes"]}'),
|
||||
("Return JSON with field: missing", '{"missing":[]}'),
|
||||
("Return JSON with fields: prefixes", '{"prefixes":["namespace","hottest"]}'),
|
||||
("fact_types", '{"fact_types":["namespace_cpu_top","hardware_nodes"]}'),
|
||||
("Return JSON with field: signals", '{"signals":["cpu","synapse","raspberry"]}'),
|
||||
(
|
||||
"Signals:",
|
||||
'{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
|
||||
),
|
||||
(
|
||||
"Return JSON with field: lines",
|
||||
'{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
|
||||
),
|
||||
(
|
||||
"CandidateFacts:",
|
||||
'{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
|
||||
),
|
||||
(
|
||||
"FactCandidates:",
|
||||
'{"lines":["namespace_cpu_top: synapse=95","hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"]}',
|
||||
),
|
||||
(
|
||||
"Suggest a safe, read-only command",
|
||||
'{"command":"kubectl top pods -n synapse","rationale":"verify namespace cpu"}',
|
||||
),
|
||||
("Pick the best candidate for accuracy and grounding", '{"best": 1}'),
|
||||
("Pick the best draft for accuracy", '{"best": 1}'),
|
||||
("Pick the best runbook path", '{"path":"runbooks/fix.md"}'),
|
||||
("Check the draft against the context", "synapse is hottest on titan-01, but see runbooks/wrong.md."),
|
||||
("Answer using the fact", "Latest metrics: namespace_cpu_top: synapse=95."),
|
||||
("Rewrite the draft to only include claims supported by FactsUsed", "synapse is hottest on titan-01."),
|
||||
("Check if an open-ended answer includes at least two concrete signals", '{"ok": false, "reason": "needs more detail"}'),
|
||||
("ok (bool), reason (string)", '{"ok": false, "reason": "needs more detail"}'),
|
||||
("Rewrite the answer using the critique", "synapse is hottest at cpu 95 on titan-01. Use runbooks/fix.md."),
|
||||
("Return JSON with field: note", '{"note":"The answer would benefit from per-pod CPU samples."}'),
|
||||
("Score response quality", '{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}'),
|
||||
(
|
||||
"Return JSON with fields: confidence (0-100), relevance (0-100), satisfaction (0-100), hallucination_risk (low|medium|high).",
|
||||
'{"confidence":88,"relevance":91,"satisfaction":86,"hallucination_risk":"low"}',
|
||||
),
|
||||
(
|
||||
"claims list",
|
||||
'{"claims":[{"id":"c1","claim":"synapse is hottest","evidence":[{"path":"hottest.cpu.node","reason":"snapshot"}]}]}',
|
||||
),
|
||||
("Select the claims most relevant", '{"claim_ids":["c1"]}'),
|
||||
("Follow-up:", "titan-99 is still hottest."),
|
||||
("Rewrite the answer to be concise and directly answer the question", "Latest metrics: namespace_cpu_top: synapse=95."),
|
||||
("Deduplicate repeated statements", "Latest metrics: namespace_cpu_top: synapse=95."),
|
||||
("Answer using only the Fact Sheet", "Fact sheet answer: namespace_cpu_top: synapse=95. Use runbooks/fix.md."),
|
||||
]
|
||||
for marker, response in markers:
|
||||
if marker in prompt:
|
||||
return response
|
||||
return None
|
||||
|
||||
|
||||
class TimeoutLLM:
|
||||
"""Raise a timeout as soon as the workflow makes an LLM call."""
|
||||
|
||||
async def chat(
|
||||
self,
|
||||
messages: list[dict[str, str]],
|
||||
*,
|
||||
model: str | None = None,
|
||||
timeout_sec: float | None = None,
|
||||
) -> str:
|
||||
"""Trigger the workflow timeout handling branch."""
|
||||
|
||||
del messages, model, timeout_sec
|
||||
raise TimeoutError("boom")
|
||||
|
||||
|
||||
class LimitLLM(PromptLLM):
|
||||
"""Reuse prompt handling while allowing the workflow to hit call caps."""
|
||||
|
||||
|
||||
def _settings(tmp_path: Path, **overrides: Any):
|
||||
"""Build settings with an isolated claim-store path."""
|
||||
|
||||
return replace(build_test_settings(), state_db_path=str(tmp_path / "state.db"), **overrides)
|
||||
|
||||
|
||||
def _make_engine(tmp_path: Path, llm: Any, **setting_overrides: Any) -> AnswerEngine:
|
||||
"""Construct a real engine with static snapshot and KB doubles."""
|
||||
|
||||
settings = _settings(tmp_path, **setting_overrides)
|
||||
snapshot = StaticSnapshot(_rich_snapshot())
|
||||
kb = KnowledgeBase("")
|
||||
kb.summary = lambda: "KB summary." # type: ignore[method-assign]
|
||||
kb.runbook_titles = lambda limit=5: "Relevant runbooks:\n- Fix (runbooks/fix.md)" # type: ignore[method-assign]
|
||||
kb.runbook_paths = lambda limit=10: ["runbooks/fix.md"] # type: ignore[method-assign]
|
||||
kb.chunk_lines = lambda max_files=20, max_chars=6000: [ # type: ignore[method-assign]
|
||||
"runbooks/fix.md",
|
||||
"namespace_cpu_top: synapse=95",
|
||||
"hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)",
|
||||
]
|
||||
return AnswerEngine(settings, llm, kb, snapshot) # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_engine_helper_methods_cover_state_and_followup(tmp_path: Path) -> None:
|
||||
"""Cover answer-engine helper branches outside the main workflow."""
|
||||
|
||||
settings = _settings(tmp_path)
|
||||
|
||||
class StockLLM:
|
||||
async def chat(self, messages, *, model=None, timeout_sec=None):
|
||||
del messages, model, timeout_sec
|
||||
return "stock reply"
|
||||
|
||||
engine = EngineClass(settings, StockLLM(), KnowledgeBase(""), StaticSnapshot(_rich_snapshot()))
|
||||
|
||||
async def call_llm(_system: str, _prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
|
||||
del _system, context, model
|
||||
static = {
|
||||
"draft_select": '{"best": 2}',
|
||||
"score": '{"confidence":90,"relevance":91,"satisfaction":92,"hallucination_risk":"low"}',
|
||||
"claim_map": '{"claims":[{"id":"c1","claim":"cpu is high","evidence":[{"path":"hottest.cpu.node","reason":"why"},{"path":"","reason":"skip"}]},"bad"]}',
|
||||
"select_claims": '{"claim_ids":["c1"]}',
|
||||
"followup": "titan-99 is hottest. The draft is correct.",
|
||||
"followup_fix": "titan-01 is hottest.",
|
||||
"dedup_followup": "The draft is correct. titan-01 is hottest.",
|
||||
"dedup": "deduped",
|
||||
}
|
||||
if tag == "synth":
|
||||
return "draft one" if "DraftIndex: 1" in _prompt else "draft two"
|
||||
if tag in static:
|
||||
return static[tag]
|
||||
raise AssertionError(tag)
|
||||
|
||||
stock = asyncio.run(engine._answer_stock("hello"))
|
||||
assert stock.reply == "stock reply"
|
||||
|
||||
plan = replace(_mode_plan(settings, "smart"), drafts=2, parallelism=2)
|
||||
synth = asyncio.run(
|
||||
engine._synthesize_answer(
|
||||
"Which node is hottest?",
|
||||
["draft one", "draft two"],
|
||||
"ctx",
|
||||
{"question_type": "metric", "answer_style": "direct"},
|
||||
plan,
|
||||
call_llm,
|
||||
)
|
||||
)
|
||||
synth_empty = asyncio.run(
|
||||
engine._synthesize_answer(
|
||||
"Which node is hottest?",
|
||||
[],
|
||||
"ctx",
|
||||
{"question_type": "metric", "answer_style": "direct"},
|
||||
replace(plan, drafts=1, parallelism=1),
|
||||
call_llm,
|
||||
)
|
||||
)
|
||||
assert synth == "draft two"
|
||||
assert synth_empty == "draft two"
|
||||
|
||||
scored = asyncio.run(engine._score_answer("q", "a", plan, call_llm))
|
||||
assert scored.hallucination_risk == "low"
|
||||
assert asyncio.run(engine._score_answer("q", "a", replace(plan, use_scores=False), call_llm)).confidence == 60
|
||||
|
||||
summary = build_summary(_rich_snapshot())
|
||||
claims = asyncio.run(engine._extract_claims("q", "a", summary, ["fact"], call_llm))
|
||||
assert claims and claims[0].evidence[0].path == "hottest.cpu.node"
|
||||
assert asyncio.run(engine._extract_claims("q", "", summary, [], call_llm)) == []
|
||||
assert asyncio.run(engine._dedup_reply("one. one. one.", plan, call_llm, "dedup")) == "deduped"
|
||||
assert asyncio.run(engine._dedup_reply("single answer", plan, call_llm, "dedup")) == "single answer"
|
||||
|
||||
engine._store_state("conv-1", claims, summary, _rich_snapshot(), True)
|
||||
state = engine._get_state("conv-1")
|
||||
assert state and state.snapshot
|
||||
assert engine._get_state(None) is None
|
||||
engine._cleanup_state()
|
||||
|
||||
followup = asyncio.run(
|
||||
engine._answer_followup(
|
||||
"Which hardware hotspot is there?",
|
||||
state,
|
||||
summary,
|
||||
{"question_type": "diagnostic"},
|
||||
plan,
|
||||
call_llm,
|
||||
)
|
||||
)
|
||||
assert "titan-01" in followup
|
||||
assert asyncio.run(engine._select_claims("what about that?", claims, plan, call_llm)) == ["c1"]
|
||||
assert asyncio.run(engine._select_claims("what about that?", [], plan, call_llm)) == []
|
||||
|
||||
|
||||
def test_finalize_answer_covers_post_processing_branches(tmp_path: Path) -> None:
|
||||
"""Exercise evidence-fix, runbook, guard, critic, and gap paths."""
|
||||
|
||||
settings = _settings(tmp_path)
|
||||
plan = replace(_mode_plan(settings, "smart"), use_gap=True, use_critic=True)
|
||||
summary = build_summary(_rich_snapshot())
|
||||
summary_lines = [
|
||||
"namespace_cpu_top: synapse=95",
|
||||
"hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)",
|
||||
"runbooks/fix.md",
|
||||
]
|
||||
observed: list[tuple[str, str]] = []
|
||||
|
||||
async def call_llm(_system: str, _prompt: str, *, context: str | None = None, model: str | None = None, tag: str = "") -> str:
|
||||
del _system, context, model
|
||||
responses = {
|
||||
"runbook_select": '{"path":"runbooks/fix.md"}',
|
||||
"evidence_fix": "titan-99 is hottest and see runbooks/wrong.md.",
|
||||
"evidence_fix_enforce": "titan-99 is hottest and see runbooks/wrong.md.",
|
||||
"metric_direct": "no numbers here",
|
||||
"runbook_enforce": "Non-Raspberry Pi nodes: amd64 (titan-02). Use runbooks/fix.md.",
|
||||
"evidence_guard": "Non-Raspberry Pi nodes: amd64 (titan-02). Use runbooks/fix.md.",
|
||||
"focus_fix": "Latest metrics: namespace_cpu_top: synapse=95.",
|
||||
"insight_guard": '{"ok": false, "reason": "needs more detail"}',
|
||||
"insight_fix": "Latest metrics: namespace_cpu_top: synapse=95. Use runbooks/fix.md.",
|
||||
"critic": '{"issues":["too vague"]}',
|
||||
"revise": "Latest metrics: namespace_cpu_top: synapse=95. Use runbooks/fix.md.",
|
||||
"gap": '{"note":"The answer would benefit from per-pod CPU samples."}',
|
||||
}
|
||||
if tag not in responses:
|
||||
raise AssertionError(_prompt)
|
||||
return responses[tag]
|
||||
|
||||
class FinalizeEngine:
|
||||
async def _synthesize_answer(self, *args: Any) -> str:
|
||||
return "titan-99 is hottest and see runbooks/wrong.md."
|
||||
|
||||
async def _dedup_reply(self, reply: str, _plan: ModePlan, _call_llm, tag: str) -> str:
|
||||
assert tag == "dedup"
|
||||
return reply
|
||||
|
||||
async def _score_answer(self, _question: str, _reply: str, _plan: ModePlan, _call_llm) -> AnswerScores:
|
||||
return AnswerScores(80, 81, 82, "low")
|
||||
|
||||
async def _extract_claims(self, _question: str, _reply: str, _summary: dict[str, Any], _facts_used: list[str], _call_llm) -> list[ClaimItem]:
|
||||
return [ClaimItem(id="c1", claim="cpu high", evidence=[EvidenceItem(path="hottest.cpu.node", reason="snapshot")])]
|
||||
|
||||
reply, scores, claims = asyncio.run(
|
||||
finalize_answer(
|
||||
engine=FinalizeEngine(),
|
||||
call_llm=call_llm,
|
||||
normalized="Which namespace is hottest on raspberry hardware and which runbook should I use?",
|
||||
subanswers=["synapse is hottest"],
|
||||
context="ctx",
|
||||
classify={"question_type": "open_ended", "answer_style": "direct"},
|
||||
plan=plan,
|
||||
summary=summary,
|
||||
summary_lines=summary_lines,
|
||||
metric_facts=["namespace_cpu_top: synapse=95"],
|
||||
key_facts=["namespace_cpu_top: synapse=95"],
|
||||
facts_used=["hardware_nodes: rpi5=(titan-01) | amd64=(titan-02)"],
|
||||
allowed_nodes=["titan-01", "titan-02"],
|
||||
allowed_namespaces=["synapse"],
|
||||
runbook_paths=["runbooks/fix.md"],
|
||||
lowered_question="which namespace is hottest on raspberry hardware and which runbook should i use?",
|
||||
force_metric=True,
|
||||
keyword_tokens=["namespace", "cpu", "raspberry"],
|
||||
question_tokens=["namespace", "cpu", "raspberry"],
|
||||
snapshot_context="ClusterSnapshot:\nnamespace_cpu_top: synapse=95",
|
||||
observer=lambda stage, note: observed.append((stage, note)),
|
||||
mode="smart",
|
||||
metric_keys=["namespace_cpu_top"],
|
||||
)
|
||||
)
|
||||
assert "runbooks/fix.md" in reply
|
||||
assert "synapse=95" in reply
|
||||
assert scores.confidence == 80
|
||||
assert claims and claims[0].id == "c1"
|
||||
assert ("evidence_fix", "repairing missing evidence") in observed
|
||||
assert ("critic", "reviewing") in observed
|
||||
assert ("gap", "checking gaps") in observed
|
||||
|
||||
|
||||
def test_run_answer_deep_workflow_persists_state(tmp_path: Path) -> None:
|
||||
"""Drive the full smart workflow through retrieval, synthesis, and post-processing."""
|
||||
|
||||
engine = _make_engine(tmp_path, PromptLLM())
|
||||
observed: list[tuple[str, str]] = []
|
||||
result = asyncio.run(
|
||||
run_answer(
|
||||
engine,
|
||||
"Run limitless Which namespace is hottest on raspberry hardware and which runbook should I use?",
|
||||
mode="smart",
|
||||
history=[{"q": "before", "a": "earlier"}],
|
||||
observer=lambda stage, note: observed.append((stage, note)),
|
||||
conversation_id="room-1",
|
||||
snapshot_pin=True,
|
||||
)
|
||||
)
|
||||
assert "runbooks/fix.md" in result.reply
|
||||
assert result.meta["tool_hint"]["command"] == "kubectl top pods -n synapse"
|
||||
state = engine._get_state("room-1")
|
||||
assert state and state.claims and state.snapshot
|
||||
stages = {stage for stage, _note in observed}
|
||||
assert {"normalize", "route", "retrieve", "tool", "subanswers", "synthesize"} <= stages
|
||||
|
||||
|
||||
def test_run_answer_followup_and_limits(tmp_path: Path) -> None:
|
||||
"""Cover follow-up routing, reasoning limit, and timeout fallbacks."""
|
||||
|
||||
class FollowupLLM(PromptLLM):
|
||||
def _lookup_response(self, system: str, prompt: str) -> str | None:
|
||||
if "normalized (string), keywords" in prompt:
|
||||
return '{"normalized":"What about that?","keywords":["that"]}'
|
||||
if "needs_snapshot (bool)" in prompt:
|
||||
return '{"needs_snapshot":true,"needs_kb":false,"needs_tool":false,"answer_style":"direct","follow_up":false,"question_type":"open_ended","focus_entity":"unknown","focus_metric":"unknown"}'
|
||||
if "Select the claims most relevant" in prompt:
|
||||
return '{"claim_ids":["c1"]}'
|
||||
if "Follow-up:" in prompt:
|
||||
return "titan-99 is still hottest."
|
||||
return super()._lookup_response(system, prompt)
|
||||
|
||||
engine = _make_engine(tmp_path, FollowupLLM())
|
||||
summary = build_summary(_rich_snapshot())
|
||||
engine._store_state(
|
||||
"conv-1",
|
||||
[ClaimItem(id="c1", claim="synapse is hottest", evidence=[EvidenceItem(path="hottest.cpu.node", reason="snapshot", value_at_claim="titan-01")])],
|
||||
summary,
|
||||
_rich_snapshot(),
|
||||
True,
|
||||
)
|
||||
followup = asyncio.run(
|
||||
run_answer(
|
||||
engine,
|
||||
"Run limitless What about that?",
|
||||
mode="smart",
|
||||
conversation_id="conv-1",
|
||||
snapshot_pin=True,
|
||||
)
|
||||
)
|
||||
assert "titan-01" in followup.reply
|
||||
|
||||
limit_engine = _make_engine(
|
||||
tmp_path / "limit",
|
||||
LimitLLM(),
|
||||
fast_llm_calls_max=1,
|
||||
llm_limit_multiplier=1.0,
|
||||
)
|
||||
limited = asyncio.run(run_answer(limit_engine, "tell me about cpu and runbooks", mode="custom"))
|
||||
assert "reasoning limit" in limited.reply
|
||||
assert limited.meta["llm_limit_hit"] is True
|
||||
|
||||
timeout_engine = _make_engine(
|
||||
tmp_path / "timeout",
|
||||
TimeoutLLM(),
|
||||
smart_time_budget_sec=0.1,
|
||||
ollama_timeout_sec=0.1,
|
||||
)
|
||||
timed_out = asyncio.run(run_answer(timeout_engine, "Run limitless tell me about cpu and runbooks", mode="smart"))
|
||||
assert "time budget" in timed_out.reply.lower()
|
||||
assert timed_out.meta["time_budget_hit"] is True
|
||||
|
||||
|
||||
def test_api_matrix_queue_main_and_store_edge_paths(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""Exercise remaining API, Matrix, queue, main, and store branches."""
|
||||
|
||||
settings = _settings(
|
||||
tmp_path,
|
||||
internal_token="secret",
|
||||
queue_enabled=True,
|
||||
matrix_bots=(MatrixBotConfig("bot", "pw", ("atlas",), "quick"),),
|
||||
)
|
||||
|
||||
async def handler(
|
||||
question: str,
|
||||
mode: str,
|
||||
history: list[dict[str, str]] | None,
|
||||
conversation_id: str | None,
|
||||
snapshot_pin: bool | None,
|
||||
) -> AnswerResult:
|
||||
del history, conversation_id, snapshot_pin
|
||||
return AnswerResult(question + ":" + mode, AnswerScores(1, 2, 3, "low"), {"mode": mode})
|
||||
|
||||
api = Api(settings, handler)
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
client = TestClient(api.app)
|
||||
assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={}).status_code == 400
|
||||
assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={"content": "hi"}).json()["reply"] == "hi:quick"
|
||||
assert client.post("/v1/answer", headers={"X-Internal-Token": "secret"}, json={"question": " "}).status_code == 400
|
||||
assert AnswerRequest(message=" hello ").message == " hello "
|
||||
|
||||
class FakeResp:
|
||||
def __init__(self, payload: dict[str, Any], *, status_code: int = 200) -> None:
|
||||
self._payload = payload
|
||||
self.status_code = status_code
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
if self.status_code >= 400:
|
||||
raise httpx.HTTPStatusError("bad", request=httpx.Request("GET", "http://x"), response=httpx.Response(self.status_code))
|
||||
|
||||
def json(self) -> dict[str, Any]:
|
||||
return self._payload
|
||||
|
||||
class MatrixAsyncClient:
|
||||
async def __aenter__(self) -> "MatrixAsyncClient":
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: object) -> None:
|
||||
return None
|
||||
|
||||
async def post(self, url: str, json: dict[str, Any] | None = None, headers: dict[str, str] | None = None) -> FakeResp:
|
||||
del json, headers
|
||||
if "login" in url:
|
||||
return FakeResp({"access_token": "tok"})
|
||||
return FakeResp({})
|
||||
|
||||
async def get(self, url: str, headers: dict[str, str] | None = None, params: dict[str, Any] | None = None) -> FakeResp:
|
||||
del headers, params
|
||||
if "directory/room" in url:
|
||||
return FakeResp({}, status_code=404)
|
||||
return FakeResp({"next_batch": "n1", "rooms": {"join": {}}})
|
||||
|
||||
monkeypatch.setattr("atlasbot.matrix.bot.httpx.AsyncClient", lambda timeout=None: MatrixAsyncClient())
|
||||
matrix_client = MatrixClient(settings, settings.matrix_bots[0])
|
||||
assert asyncio.run(matrix_client.login()) == "tok"
|
||||
assert asyncio.run(matrix_client.resolve_room("tok")) == ""
|
||||
|
||||
bot = MatrixBot(settings, settings.matrix_bots[0], SimpleNamespace(answer=None), handler)
|
||||
|
||||
class BotClient:
|
||||
def __init__(self) -> None:
|
||||
self.sent: list[str] = []
|
||||
self.sync_calls = 0
|
||||
|
||||
async def login(self) -> str:
|
||||
return "tok"
|
||||
|
||||
async def resolve_room(self, token: str) -> str:
|
||||
del token
|
||||
return "!room"
|
||||
|
||||
async def join_room(self, token: str, room_id: str) -> None:
|
||||
del token, room_id
|
||||
|
||||
async def send_message(self, token: str, room_id: str, text: str) -> None:
|
||||
del token, room_id
|
||||
self.sent.append(text)
|
||||
|
||||
async def sync(self, token: str, since: str | None) -> dict[str, Any]:
|
||||
del token, since
|
||||
self.sync_calls += 1
|
||||
if self.sync_calls == 1:
|
||||
return {
|
||||
"next_batch": "n1",
|
||||
"rooms": {
|
||||
"join": {
|
||||
"!room": {
|
||||
"timeline": {
|
||||
"events": [
|
||||
{"type": "m.room.member", "sender": "user"},
|
||||
{"type": "m.room.message", "sender": "bot", "content": {"body": "ignore"}},
|
||||
{"type": "m.room.message", "sender": "user", "content": {"body": "atlas quick hi"}},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
raise RuntimeError("stop")
|
||||
|
||||
bot._client = BotClient()
|
||||
async def run_bot_once() -> None:
|
||||
task = asyncio.create_task(bot.run())
|
||||
await asyncio.sleep(0.01)
|
||||
task.cancel()
|
||||
with pytest.raises(asyncio.CancelledError):
|
||||
await task
|
||||
|
||||
asyncio.run(run_bot_once())
|
||||
assert any("Thinking" in msg for msg in bot._client.sent)
|
||||
|
||||
timeout_bot = MatrixBot(replace(settings, thinking_interval_sec=0.001, quick_time_budget_sec=0.01), settings.matrix_bots[0], SimpleNamespace(answer=None), None)
|
||||
timeout_bot._client = SimpleNamespace(
|
||||
sent=[],
|
||||
send_message=lambda token, room_id, text: asyncio.sleep(0, result=timeout_bot._client.sent.append(text)),
|
||||
)
|
||||
|
||||
async def sleepy_handler(question: str, mode: str, history, conversation_id, observer):
|
||||
del question, mode, history, conversation_id, observer
|
||||
await asyncio.sleep(1.2)
|
||||
return AnswerResult("late", AnswerScores(1, 2, 3, "low"), {})
|
||||
|
||||
timeout_bot._answer_handler = sleepy_handler
|
||||
asyncio.run(timeout_bot._answer_with_heartbeat("tok", "!room", "q", "quick"))
|
||||
assert any("time budget" in msg for msg in timeout_bot._client.sent)
|
||||
|
||||
error_bot = MatrixBot(replace(settings, thinking_interval_sec=0.001), settings.matrix_bots[0], SimpleNamespace(answer=None), None)
|
||||
error_bot._client = SimpleNamespace(
|
||||
sent=[],
|
||||
send_message=lambda token, room_id, text: asyncio.sleep(0, result=error_bot._client.sent.append(text)),
|
||||
)
|
||||
|
||||
async def failing_handler(question: str, mode: str, history, conversation_id, observer):
|
||||
del question, mode, history, conversation_id, observer
|
||||
raise RuntimeError("boom")
|
||||
|
||||
error_bot._answer_handler = failing_handler
|
||||
asyncio.run(error_bot._answer_with_heartbeat("tok", "!room", "q", "smart"))
|
||||
assert any("internal error" in msg for msg in error_bot._client.sent)
|
||||
|
||||
class DirectQueue:
|
||||
async def __call__(self, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
return {"reply": payload["question"]}
|
||||
|
||||
direct_qm = QueueManager(replace(settings, queue_enabled=False), DirectQueue())
|
||||
assert asyncio.run(direct_qm.submit({"question": "direct"})) == {"reply": "direct"}
|
||||
|
||||
class FakeSub:
|
||||
async def next_msg(self, timeout: float) -> Any:
|
||||
del timeout
|
||||
return SimpleNamespace(data=json.dumps({"reply": "queued"}).encode())
|
||||
|
||||
async def unsubscribe(self) -> None:
|
||||
return None
|
||||
|
||||
class FakeMsg:
|
||||
def __init__(self, raw: bytes, reply: str = "reply") -> None:
|
||||
self.data = raw
|
||||
self.reply = reply
|
||||
self.acked = False
|
||||
|
||||
async def ack(self) -> None:
|
||||
self.acked = True
|
||||
|
||||
published: list[tuple[str, bytes]] = []
|
||||
|
||||
class ExistingStreamJS:
|
||||
async def stream_info(self, stream: str) -> None:
|
||||
assert stream == settings.nats_stream
|
||||
|
||||
async def publish(self, subject: str, data: bytes) -> None:
|
||||
published.append((subject, data))
|
||||
|
||||
async def pull_subscribe(self, subject: str, durable: str):
|
||||
del subject, durable
|
||||
|
||||
class Pull:
|
||||
def __init__(self) -> None:
|
||||
self.calls = 0
|
||||
|
||||
async def fetch(self, count: int, timeout: float) -> list[FakeMsg]:
|
||||
del count, timeout
|
||||
self.calls += 1
|
||||
if self.calls == 1:
|
||||
raise RuntimeError("retry")
|
||||
raise asyncio.CancelledError
|
||||
|
||||
return Pull()
|
||||
|
||||
class FakeNats:
|
||||
def __init__(self) -> None:
|
||||
self.drained = False
|
||||
|
||||
async def connect(self, url: str) -> None:
|
||||
assert url == settings.nats_url
|
||||
|
||||
def jetstream(self) -> ExistingStreamJS:
|
||||
return ExistingStreamJS()
|
||||
|
||||
def new_inbox(self) -> str:
|
||||
return "inbox"
|
||||
|
||||
async def subscribe(self, reply: str) -> FakeSub:
|
||||
assert reply == "inbox"
|
||||
return FakeSub()
|
||||
|
||||
async def publish(self, reply: str, data: bytes) -> None:
|
||||
published.append((reply, data))
|
||||
|
||||
async def drain(self) -> None:
|
||||
self.drained = True
|
||||
|
||||
monkeypatch.setattr("atlasbot.queue.nats.NATS", FakeNats)
|
||||
queue = QueueManager(settings, DirectQueue())
|
||||
asyncio.run(queue.start())
|
||||
assert asyncio.run(queue.submit({"question": "queued", "mode": "smart"})) == {"reply": "queued"}
|
||||
|
||||
invalid_msg = FakeMsg(b"not-json")
|
||||
asyncio.run(queue._handle_message(invalid_msg))
|
||||
assert invalid_msg.acked is True
|
||||
handled_msg = FakeMsg(json.dumps({"payload": {"question": "x"}, "reply": "reply"}).encode())
|
||||
asyncio.run(queue._handle_message(handled_msg))
|
||||
assert handled_msg.acked is True
|
||||
failing_queue = QueueManager(settings, lambda payload: (_ for _ in ()).throw(RuntimeError("boom")))
|
||||
failing_queue._nc = FakeNats()
|
||||
failing_queue._js = ExistingStreamJS()
|
||||
failure_msg = FakeMsg(json.dumps({"payload": {"question": "x"}}).encode())
|
||||
|
||||
async def failing_handler(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
del payload
|
||||
raise RuntimeError("boom")
|
||||
|
||||
failing_queue._handler = failing_handler
|
||||
asyncio.run(failing_queue._handle_message(failure_msg))
|
||||
assert failure_msg.acked is True
|
||||
asyncio.run(queue.stop())
|
||||
|
||||
assert result_scores({"scores": {"confidence": "9", "relevance": "8", "satisfaction": "7", "hallucination_risk": "low"}}).confidence == 9
|
||||
assert result_scores({"scores": "bad"}).confidence == 60
|
||||
|
||||
|
||||
def test_kb_llm_snapshot_and_json_edge_paths(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""Cover remaining KB, LLM, snapshot, and JSON parsing branches."""
|
||||
|
||||
base = tmp_path / "kb"
|
||||
catalog = base / "catalog"
|
||||
catalog.mkdir(parents=True)
|
||||
(catalog / "atlas.json").write_text(json.dumps({"cluster": "atlas", "sources": ["bad"]}), encoding="utf-8")
|
||||
(catalog / "runbooks.json").write_text(json.dumps([{"title": "Fix", "path": "runbooks/fix.md"}, {"title": "No path"}]), encoding="utf-8")
|
||||
(base / "docs.md").write_text("x" * 120, encoding="utf-8")
|
||||
kb = KnowledgeBase(str(base))
|
||||
assert kb.runbook_titles(limit=1).count("runbooks/fix.md") == 1
|
||||
assert kb.chunk_lines(max_files=1, max_chars=60)
|
||||
assert kb._extend_with_limit([], ["abcdef"], 3) is False
|
||||
|
||||
empty_kb = KnowledgeBase("")
|
||||
assert empty_kb.chunk_lines() == []
|
||||
|
||||
settings = _settings(tmp_path, ollama_url="http://example/api/chat", ollama_api_key="secret", ollama_retries=0, ollama_fallback_model="")
|
||||
client = LLMClient(settings)
|
||||
assert client._endpoint() == "http://example/api/chat"
|
||||
assert client._headers["x-api-key"] == "secret"
|
||||
assert parse_json("```{\"ok\": true}```") == {"ok": True}
|
||||
assert parse_json("not-json", fallback={"fallback": True}) == {"fallback": True}
|
||||
|
||||
class FakeResponse:
|
||||
def __init__(self, status_code: int, payload: Any) -> None:
|
||||
self.status_code = status_code
|
||||
self._payload = payload
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
if self.status_code >= 400:
|
||||
raise httpx.HTTPStatusError("bad", request=httpx.Request("POST", "http://example"), response=httpx.Response(self.status_code))
|
||||
|
||||
def json(self) -> Any:
|
||||
return self._payload
|
||||
|
||||
responses = iter([FakeResponse(200, {"response": "plain"}), FakeResponse(200, {"reply": "fallback"}), FakeResponse(200, {"message": {}})])
|
||||
|
||||
class FakeAsyncClient:
|
||||
def __init__(self, timeout: float | None = None) -> None:
|
||||
self.timeout = timeout
|
||||
|
||||
async def __aenter__(self) -> "FakeAsyncClient":
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: object) -> None:
|
||||
return None
|
||||
|
||||
async def post(self, _url: str, *, json: dict[str, Any], headers: dict[str, str]) -> FakeResponse:
|
||||
del _url, json, headers
|
||||
item = next(responses)
|
||||
if isinstance(item, Exception):
|
||||
raise item
|
||||
return item
|
||||
|
||||
monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
|
||||
assert asyncio.run(client.chat([{"role": "user", "content": "a"}], timeout_sec=1.0)) == "plain"
|
||||
assert asyncio.run(client.chat([{"role": "user", "content": "b"}], timeout_sec=1.0)) == "fallback"
|
||||
with pytest.raises(LLMError, match="empty response"):
|
||||
asyncio.run(client.chat([{"role": "user", "content": "c"}], timeout_sec=1.0))
|
||||
error_settings = replace(settings, ollama_retries=1)
|
||||
error_client = LLMClient(error_settings)
|
||||
error_responses = iter([httpx.ConnectError("nope"), httpx.ConnectError("still nope")])
|
||||
|
||||
class ErrorAsyncClient(FakeAsyncClient):
|
||||
async def post(self, _url: str, *, json: dict[str, Any], headers: dict[str, str]) -> FakeResponse:
|
||||
del _url, json, headers
|
||||
raise next(error_responses)
|
||||
|
||||
monkeypatch.setattr(httpx, "AsyncClient", ErrorAsyncClient)
|
||||
with pytest.raises(LLMError):
|
||||
asyncio.run(error_client.chat([{"role": "user", "content": "d"}], timeout_sec=1.0))
|
||||
|
||||
provider = SnapshotProvider(replace(settings, ariadne_state_url="http://snapshot", ariadne_state_token="tok"))
|
||||
|
||||
class SnapshotResp:
|
||||
def raise_for_status(self) -> None:
|
||||
return None
|
||||
|
||||
def json(self) -> dict[str, Any]:
|
||||
return {"snapshot_id": "snap-1"}
|
||||
|
||||
monkeypatch.setattr("atlasbot.snapshot.builder.httpx.get", lambda url, headers, timeout: SnapshotResp())
|
||||
assert provider.get() == {"snapshot_id": "snap-1"}
|
||||
provider._cache = {"snapshot_id": "cached"}
|
||||
provider._cache_ts = 10_000.0
|
||||
monkeypatch.setattr("atlasbot.snapshot.builder.time.monotonic", lambda: 10_001.0)
|
||||
assert provider.get() == {"snapshot_id": "cached"}
|
||||
1424
tests/test_support_modules.py
Normal file
1424
tests/test_support_modules.py
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user