117 lines
2.1 KiB
Python

from __future__ import annotations
import logging
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import Any
log = logging.getLogger(__name__)
FOLLOWUP_SHORT_WORDS = 6
TOKEN_MIN_LEN = 3
GENERIC_METRIC_TOKENS = {"atlas", "cluster", "kubernetes", "k8s", "titan", "lab"}
NS_ENTRY_MIN_LEN = 2
DEDUP_MIN_SENTENCES = 3
RUNBOOK_SIMILARITY_THRESHOLD = 0.4
BYTES_KB = 1024
BYTES_MB = 1024 * 1024
class LLMLimitReached(RuntimeError):
pass
class LLMTimeBudgetExceeded(RuntimeError):
pass
@dataclass
class AnswerScores:
confidence: int
relevance: int
satisfaction: int
hallucination_risk: str
@dataclass
class AnswerResult:
reply: str
scores: AnswerScores
meta: dict[str, Any]
@dataclass(frozen=True)
class InsightGuardInput:
question: str
reply: str
classify: dict[str, Any]
context: str
plan: ModePlan
call_llm: Callable[..., Awaitable[str]]
facts: list[str]
@dataclass
class ContradictionContext:
call_llm: Callable[..., Awaitable[str]]
question: str
reply: str
facts: list[str]
plan: ModePlan
@dataclass
class EvidenceItem:
path: str
reason: str
value: Any | None = None
value_at_claim: Any | None = None
@dataclass
class ClaimItem:
id: str
claim: str
evidence: list[EvidenceItem]
@dataclass
class ConversationState:
updated_at: float
claims: list[ClaimItem]
snapshot_id: str | None = None
snapshot: dict[str, Any] | None = None
@dataclass
class ModePlan:
model: str
fast_model: str
max_subquestions: int
chunk_lines: int
chunk_top: int
chunk_group: int
kb_max_chars: int
kb_max_files: int
use_raw_snapshot: bool
parallelism: int
score_retries: int
use_deep_retrieval: bool
use_tool: bool
use_critic: bool
use_gap: bool
use_scores: bool
drafts: int
metric_retries: int
subanswer_retries: int
@dataclass
class ScoreContext:
question: str
sub_questions: list[str]
retries: int
parallelism: int
select_best: bool
fast_model: str