188 lines
7.8 KiB
Python
188 lines
7.8 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
from .cluster_state_contract import *
|
|
from .cluster_state_relationships import *
|
|
from .cluster_state_vm_client import *
|
|
|
|
def _pod_reason_totals(
|
|
reasons: dict[str, str],
|
|
series: str,
|
|
) -> dict[str, dict[str, dict[str, float | None]]]:
|
|
totals: dict[str, dict[str, dict[str, float | None]]] = {}
|
|
for key, reason in reasons.items():
|
|
expr = f'sum({series}{{reason="{reason}"}})'
|
|
totals[key] = _scalar_trends(expr, _TREND_WINDOWS)
|
|
return totals
|
|
|
|
|
|
def _node_usage_exprs() -> dict[str, str]:
|
|
return {
|
|
"cpu": (
|
|
f'avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{{mode="idle"}}[{_RATE_WINDOW}]))) * 100) '
|
|
'* on(instance) group_left(node) label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))'
|
|
),
|
|
"ram": (
|
|
'avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) '
|
|
'/ node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))'
|
|
),
|
|
"net": (
|
|
f'avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]) '
|
|
f'+ rate(node_network_transmit_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]))) * on(instance) group_left(node) '
|
|
'label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))'
|
|
),
|
|
"io": (
|
|
f'avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[{_RATE_WINDOW}]) + rate(node_disk_written_bytes_total[{_RATE_WINDOW}]))) '
|
|
'* on(instance) group_left(node) label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))'
|
|
),
|
|
"disk": (
|
|
'avg by (node) (((1 - avg by (instance) (node_filesystem_avail_bytes{mountpoint="/",fstype!~"tmpfs|overlay"} '
|
|
'/ node_filesystem_size_bytes{mountpoint="/",fstype!~"tmpfs|overlay"})) * 100) * on(instance) group_left(node) '
|
|
'label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))'
|
|
),
|
|
}
|
|
|
|
|
|
def _namespace_usage_exprs() -> dict[str, str]:
|
|
return {
|
|
"cpu": f'sum by (namespace) (rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}]))',
|
|
"mem": 'sum by (namespace) (container_memory_working_set_bytes{namespace!=""})',
|
|
}
|
|
|
|
|
|
def _namespace_request_exprs() -> dict[str, str]:
|
|
return {
|
|
"cpu_requests": "sum by (namespace) (kube_pod_container_resource_requests_cpu_cores)",
|
|
"mem_requests": "sum by (namespace) (kube_pod_container_resource_requests_memory_bytes)",
|
|
}
|
|
|
|
|
|
def _restart_namespace_trend(window: str) -> list[dict[str, Any]]:
|
|
entries = _vm_vector(
|
|
f"topk({_TREND_NAMESPACE_LIMIT}, sum by (namespace) (increase(kube_pod_container_status_restarts_total[{window}])))"
|
|
)
|
|
entries = _filter_namespace_vector(entries)
|
|
return _vector_to_named(entries, "namespace", "namespace")
|
|
|
|
|
|
def _job_failure_trend(window: str) -> list[dict[str, Any]]:
|
|
entries = _vm_vector(
|
|
f"topk({_TREND_JOB_LIMIT}, sum by (namespace,job_name) (increase(kube_job_status_failed[{window}])))"
|
|
)
|
|
output: list[dict[str, Any]] = []
|
|
for item in entries:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
|
namespace = metric.get("namespace")
|
|
job = metric.get("job_name")
|
|
if not isinstance(namespace, str) or not isinstance(job, str):
|
|
continue
|
|
output.append(
|
|
{
|
|
"namespace": namespace,
|
|
"job": job,
|
|
"value": item.get("value"),
|
|
}
|
|
)
|
|
output.sort(key=lambda item: (-(item.get("value") or 0), item.get("namespace") or "", item.get("job") or ""))
|
|
return output
|
|
|
|
|
|
def _pod_reason_entries(expr: str, limit: int) -> list[dict[str, Any]]:
|
|
entries = _vm_vector(f"topk({limit}, sum by (namespace,pod) ({expr}))")
|
|
output: list[dict[str, Any]] = []
|
|
for item in entries:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
|
namespace = metric.get("namespace")
|
|
pod = metric.get("pod")
|
|
if not isinstance(namespace, str) or not isinstance(pod, str):
|
|
continue
|
|
output.append(
|
|
{
|
|
"namespace": namespace,
|
|
"pod": pod,
|
|
"value": item.get("value"),
|
|
}
|
|
)
|
|
output.sort(key=lambda item: (-(item.get("value") or 0), item.get("namespace") or "", item.get("pod") or ""))
|
|
return output
|
|
|
|
|
|
def _namespace_reason_entries(expr: str, limit: int) -> list[dict[str, Any]]:
|
|
entries = _vm_vector(f"topk({limit}, sum by (namespace) ({expr}))")
|
|
entries = _filter_namespace_vector(entries)
|
|
return _vector_to_named(entries, "namespace", "namespace")
|
|
|
|
|
|
def _pod_waiting_now() -> dict[str, list[dict[str, Any]]]:
|
|
output: dict[str, list[dict[str, Any]]] = {}
|
|
for key, reason in _POD_WAITING_REASONS.items():
|
|
expr = f'kube_pod_container_status_waiting_reason{{reason="{reason}"}}'
|
|
output[key] = _pod_reason_entries(expr, _POD_REASON_LIMIT)
|
|
return output
|
|
|
|
|
|
def _pod_waiting_trends() -> dict[str, dict[str, list[dict[str, Any]]]]:
|
|
trends: dict[str, dict[str, list[dict[str, Any]]]] = {}
|
|
for key, reason in _POD_WAITING_REASONS.items():
|
|
expr = f'kube_pod_container_status_waiting_reason{{reason="{reason}"}}'
|
|
trends[key] = {
|
|
window: _pod_reason_entries(f"max_over_time(({expr})[{window}])", _POD_REASON_TREND_LIMIT)
|
|
for window in _TREND_WINDOWS
|
|
}
|
|
return trends
|
|
|
|
|
|
def _pod_terminated_now() -> dict[str, list[dict[str, Any]]]:
|
|
output: dict[str, list[dict[str, Any]]] = {}
|
|
for key, reason in _POD_TERMINATED_REASONS.items():
|
|
expr = f'kube_pod_container_status_terminated_reason{{reason="{reason}"}}'
|
|
output[key] = _pod_reason_entries(expr, _POD_REASON_LIMIT)
|
|
return output
|
|
|
|
|
|
def _pod_terminated_trends() -> dict[str, dict[str, list[dict[str, Any]]]]:
|
|
trends: dict[str, dict[str, list[dict[str, Any]]]] = {}
|
|
for key, reason in _POD_TERMINATED_REASONS.items():
|
|
expr = f'kube_pod_container_status_terminated_reason{{reason="{reason}"}}'
|
|
trends[key] = {
|
|
window: _pod_reason_entries(f"max_over_time(({expr})[{window}])", _POD_REASON_TREND_LIMIT)
|
|
for window in _TREND_WINDOWS
|
|
}
|
|
return trends
|
|
|
|
|
|
def _pods_phase_trends() -> dict[str, dict[str, dict[str, float | None]]]:
|
|
phases = {
|
|
"running": "sum(kube_pod_status_phase{phase=\"Running\"})",
|
|
"pending": "sum(kube_pod_status_phase{phase=\"Pending\"})",
|
|
"failed": "sum(kube_pod_status_phase{phase=\"Failed\"})",
|
|
}
|
|
trends: dict[str, dict[str, dict[str, float | None]]] = {}
|
|
for window in _TREND_WINDOWS:
|
|
window_entry: dict[str, dict[str, float | None]] = {}
|
|
for name, expr in phases.items():
|
|
window_entry[name] = {
|
|
"avg": _vm_scalar_window(expr, window, "avg_over_time"),
|
|
"max": _vm_scalar_window(expr, window, "max_over_time"),
|
|
}
|
|
trends[window] = window_entry
|
|
return trends
|
|
|
|
|
|
def _pvc_usage_trends() -> dict[str, list[dict[str, Any]]]:
|
|
trends: dict[str, list[dict[str, Any]]] = {}
|
|
expr = "kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes * 100"
|
|
for window in _TREND_WINDOWS:
|
|
entries = _vm_vector(
|
|
f"topk({_TREND_PVC_LIMIT}, max_over_time(({expr})[{window}]))"
|
|
)
|
|
trends[window] = _pvc_top(entries)
|
|
return trends
|
|
|
|
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|