122 lines
2.8 KiB
Python
122 lines
2.8 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
_VALUE_PAIR_LEN = 2
|
|
_RATE_WINDOW = "5m"
|
|
_RESTARTS_WINDOW = "1h"
|
|
_BASELINE_WINDOW = "24h"
|
|
_TREND_WINDOWS = ("1h", "6h", "24h")
|
|
_TREND_NODE_LIMIT = 30
|
|
_TREND_NAMESPACE_LIMIT = 20
|
|
_TREND_PVC_LIMIT = 10
|
|
_TREND_JOB_LIMIT = 10
|
|
_TREND_POD_LIMIT = 15
|
|
_NODE_DISK_ALERT = 80.0
|
|
_NODE_CPU_ALERT = 80.0
|
|
_NODE_RAM_ALERT = 80.0
|
|
_NET_SPIKE_MULTIPLIER = 2.0
|
|
_IO_SPIKE_MULTIPLIER = 2.0
|
|
_NODE_UNAME_LABEL = 'node_uname_info{nodename!=""}'
|
|
_WORKLOAD_LABEL_KEYS = (
|
|
"app.kubernetes.io/name",
|
|
"app",
|
|
"k8s-app",
|
|
"app.kubernetes.io/instance",
|
|
"release",
|
|
)
|
|
_SYSTEM_NAMESPACES = {
|
|
"kube-system",
|
|
"kube-public",
|
|
"kube-node-lease",
|
|
"flux-system",
|
|
"monitoring",
|
|
"logging",
|
|
"traefik",
|
|
"cert-manager",
|
|
"maintenance",
|
|
"postgres",
|
|
"vault",
|
|
}
|
|
_WORKLOAD_ALLOWED_NAMESPACES = {
|
|
"maintenance",
|
|
}
|
|
_BASELINE_DELTA_WARN = 50.0
|
|
_BASELINE_DELTA_CRIT = 100.0
|
|
_SIGNAL_LIMIT = 15
|
|
_PROFILE_LIMIT = 6
|
|
_WORKLOAD_INDEX_LIMIT = 20
|
|
_NODE_WORKLOAD_LIMIT = 12
|
|
_NODE_WORKLOAD_TOP = 3
|
|
_EVENTS_SUMMARY_LIMIT = 5
|
|
_PVC_CRITICAL_THRESHOLD = 90.0
|
|
_CAPACITY_KEYS = {
|
|
"cpu",
|
|
"memory",
|
|
"pods",
|
|
"ephemeral-storage",
|
|
}
|
|
_PRESSURE_TYPES = {
|
|
"MemoryPressure",
|
|
"DiskPressure",
|
|
"PIDPressure",
|
|
"NetworkUnavailable",
|
|
}
|
|
_EVENTS_MAX = 20
|
|
_EVENT_WARNING = "Warning"
|
|
_PHASE_SEVERITY = {
|
|
"Failed": 3,
|
|
"Pending": 2,
|
|
"Unknown": 1,
|
|
}
|
|
_PENDING_15M_HOURS = 0.25
|
|
_LOAD_TOP_COUNT = 5
|
|
_NAMESPACE_TOP_COUNT = 5
|
|
_PVC_PRESSURE_THRESHOLD = 80.0
|
|
_ALERT_TOP_LIMIT = 10
|
|
_POD_REASON_LIMIT = 10
|
|
_POD_REASON_TREND_LIMIT = 10
|
|
_NAMESPACE_ISSUE_LIMIT = 8
|
|
_CROSS_NODE_TOP = 3
|
|
_CROSS_NAMESPACE_TOP = 3
|
|
_CROSS_PVC_TOP = 3
|
|
_POD_TERMINATED_REASONS = {
|
|
"oom_killed": "OOMKilled",
|
|
"error": "Error",
|
|
}
|
|
_POD_WAITING_REASONS = {
|
|
"crash_loop": "CrashLoopBackOff",
|
|
"image_pull_backoff": "ImagePullBackOff",
|
|
"err_image_pull": "ErrImagePull",
|
|
"create_config_error": "CreateContainerConfigError",
|
|
}
|
|
_DELTA_TOP_LIMIT = 6
|
|
_REASON_TOP_LIMIT = 5
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ClusterStateSummary:
|
|
nodes_total: int | None
|
|
nodes_ready: int | None
|
|
pods_running: int | None
|
|
kustomizations_not_ready: int | None
|
|
errors: int
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class SignalContext:
|
|
metrics: dict[str, Any]
|
|
node_context: list[dict[str, Any]]
|
|
namespace_context: list[dict[str, Any]]
|
|
workloads_health: dict[str, Any]
|
|
pod_issues: dict[str, Any]
|
|
kustomizations: dict[str, Any]
|
|
|
|
|
|
def _items(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
|
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
|
return [item for item in items if isinstance(item, dict)]
|
|
|
|
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|