161 lines
5.9 KiB
Python
161 lines
5.9 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
from .cluster_state_anomalies import *
|
|
from .cluster_state_contract import *
|
|
from .cluster_state_health import *
|
|
from .cluster_state_relationships import *
|
|
|
|
def _pod_issue_summary(pod_issues: dict[str, Any], metrics: dict[str, Any]) -> dict[str, Any]:
|
|
waiting = pod_issues.get("waiting_reasons") if isinstance(pod_issues, dict) else {}
|
|
phase = pod_issues.get("phase_reasons") if isinstance(pod_issues, dict) else {}
|
|
return {
|
|
"waiting_reasons_top": _reason_top(waiting),
|
|
"phase_reasons_top": _reason_top(phase),
|
|
"namespace_issue_top": metrics.get("namespace_issue_top") or {},
|
|
}
|
|
|
|
|
|
def _delta_hit(delta: Any) -> bool:
|
|
if not isinstance(delta, (int, float)):
|
|
return False
|
|
return abs(float(delta)) >= _BASELINE_DELTA_WARN
|
|
|
|
|
|
def _node_delta_signals(node_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
signals: list[dict[str, Any]] = []
|
|
for entry in node_context:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
node = entry.get("node")
|
|
deltas = entry.get("baseline_delta") if isinstance(entry.get("baseline_delta"), dict) else {}
|
|
baseline = entry.get("baseline") if isinstance(entry.get("baseline"), dict) else {}
|
|
if not isinstance(node, str) or not node:
|
|
continue
|
|
for metric in ("cpu", "ram", "net", "io", "disk"):
|
|
delta = deltas.get(metric)
|
|
if not _delta_hit(delta):
|
|
continue
|
|
avg = baseline.get(metric, {}).get("avg") if isinstance(baseline.get(metric), dict) else None
|
|
signals.append(
|
|
{
|
|
"scope": "node",
|
|
"target": node,
|
|
"metric": metric,
|
|
"current": entry.get(metric),
|
|
"baseline_avg": avg,
|
|
"delta_pct": delta,
|
|
"severity": _delta_severity(float(delta)),
|
|
}
|
|
)
|
|
return signals
|
|
|
|
|
|
def _namespace_delta_signals(namespace_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
signals: list[dict[str, Any]] = []
|
|
for entry in namespace_context:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
namespace = entry.get("namespace")
|
|
deltas = entry.get("baseline_delta") if isinstance(entry.get("baseline_delta"), dict) else {}
|
|
baseline = entry.get("baseline") if isinstance(entry.get("baseline"), dict) else {}
|
|
if not isinstance(namespace, str) or not namespace:
|
|
continue
|
|
for metric, current_key in (("cpu", "cpu_usage"), ("mem", "mem_usage")):
|
|
delta = deltas.get(metric)
|
|
if not _delta_hit(delta):
|
|
continue
|
|
avg = baseline.get(metric, {}).get("avg") if isinstance(baseline.get(metric), dict) else None
|
|
signals.append(
|
|
{
|
|
"scope": "namespace",
|
|
"target": namespace,
|
|
"metric": metric,
|
|
"current": entry.get(current_key),
|
|
"baseline_avg": avg,
|
|
"delta_pct": delta,
|
|
"severity": _delta_severity(float(delta)),
|
|
}
|
|
)
|
|
return signals
|
|
|
|
|
|
def _kustomization_signals(kustomizations: dict[str, Any]) -> list[dict[str, Any]]:
|
|
count = int(kustomizations.get("not_ready") or 0) if isinstance(kustomizations, dict) else 0
|
|
if count <= 0:
|
|
return []
|
|
return [
|
|
{
|
|
"scope": "flux",
|
|
"target": "kustomizations",
|
|
"metric": "not_ready",
|
|
"current": count,
|
|
"severity": "warning",
|
|
}
|
|
]
|
|
|
|
|
|
def _pod_issue_signals(pod_issues: dict[str, Any]) -> list[dict[str, Any]]:
|
|
if not isinstance(pod_issues, dict):
|
|
return []
|
|
signals: list[dict[str, Any]] = []
|
|
pending_over = int(pod_issues.get("pending_over_15m") or 0)
|
|
if pending_over > 0:
|
|
signals.append(
|
|
{
|
|
"scope": "pods",
|
|
"target": "pending_over_15m",
|
|
"metric": "count",
|
|
"current": pending_over,
|
|
"severity": "warning",
|
|
}
|
|
)
|
|
counts = pod_issues.get("counts") if isinstance(pod_issues.get("counts"), dict) else {}
|
|
failed = int(counts.get("Failed") or 0) if isinstance(counts, dict) else 0
|
|
if failed > 0:
|
|
signals.append(
|
|
{
|
|
"scope": "pods",
|
|
"target": "failed",
|
|
"metric": "count",
|
|
"current": failed,
|
|
"severity": "critical",
|
|
}
|
|
)
|
|
return signals
|
|
|
|
|
|
def _workload_health_signals(workloads_health: dict[str, Any]) -> list[dict[str, Any]]:
|
|
not_ready = _workload_not_ready_items(workloads_health)
|
|
if not not_ready:
|
|
return []
|
|
output: list[dict[str, Any]] = []
|
|
for entry in not_ready[:5]:
|
|
output.append(
|
|
{
|
|
"scope": "workload",
|
|
"target": f"{entry.get('namespace')}/{entry.get('workload')}",
|
|
"metric": "not_ready",
|
|
"current": entry.get("ready") or 0,
|
|
"desired": entry.get("desired") or 0,
|
|
"severity": "warning",
|
|
}
|
|
)
|
|
return output
|
|
|
|
|
|
def _build_signals(context: SignalContext) -> list[dict[str, Any]]:
|
|
signals = (
|
|
_node_delta_signals(context.node_context)
|
|
+ _namespace_delta_signals(context.namespace_context)
|
|
+ _workload_health_signals(context.workloads_health)
|
|
+ _pod_issue_signals(context.pod_issues)
|
|
+ _kustomization_signals(context.kustomizations)
|
|
+ _pvc_pressure_signals(context.metrics)
|
|
)
|
|
signals.sort(key=lambda item: (_severity_rank(item.get("severity")), item.get("scope") or ""))
|
|
return signals[:_SIGNAL_LIMIT]
|
|
|
|
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|