Compare commits
No commits in common. "cacaaaad53c21598d204fbc928712e6b1f5dfa4a" and "558f5c1270905a6fd363e5ad7bf1dbb99eed6780" have entirely different histories.
cacaaaad53
...
558f5c1270
@ -41,25 +41,6 @@ _SYSTEM_NAMESPACES = {
|
|||||||
_WORKLOAD_ALLOWED_NAMESPACES = {
|
_WORKLOAD_ALLOWED_NAMESPACES = {
|
||||||
"maintenance",
|
"maintenance",
|
||||||
}
|
}
|
||||||
_CAPACITY_KEYS = {
|
|
||||||
"cpu",
|
|
||||||
"memory",
|
|
||||||
"pods",
|
|
||||||
"ephemeral-storage",
|
|
||||||
}
|
|
||||||
_PRESSURE_TYPES = {
|
|
||||||
"MemoryPressure",
|
|
||||||
"DiskPressure",
|
|
||||||
"PIDPressure",
|
|
||||||
"NetworkUnavailable",
|
|
||||||
}
|
|
||||||
_EVENTS_MAX = 20
|
|
||||||
_EVENT_WARNING = "Warning"
|
|
||||||
_PHASE_SEVERITY = {
|
|
||||||
"Failed": 3,
|
|
||||||
"Pending": 2,
|
|
||||||
"Unknown": 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@ -147,7 +128,6 @@ def _node_details(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
|||||||
details: list[dict[str, Any]] = []
|
details: list[dict[str, Any]] = []
|
||||||
for node in _items(payload):
|
for node in _items(payload):
|
||||||
metadata = node.get("metadata") if isinstance(node.get("metadata"), dict) else {}
|
metadata = node.get("metadata") if isinstance(node.get("metadata"), dict) else {}
|
||||||
spec = node.get("spec") if isinstance(node.get("spec"), dict) else {}
|
|
||||||
status = node.get("status") if isinstance(node.get("status"), dict) else {}
|
status = node.get("status") if isinstance(node.get("status"), dict) else {}
|
||||||
node_info = status.get("nodeInfo") if isinstance(status.get("nodeInfo"), dict) else {}
|
node_info = status.get("nodeInfo") if isinstance(status.get("nodeInfo"), dict) else {}
|
||||||
labels = metadata.get("labels") if isinstance(metadata.get("labels"), dict) else {}
|
labels = metadata.get("labels") if isinstance(metadata.get("labels"), dict) else {}
|
||||||
@ -155,9 +135,6 @@ def _node_details(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
|||||||
if not name:
|
if not name:
|
||||||
continue
|
continue
|
||||||
roles = _node_roles(labels)
|
roles = _node_roles(labels)
|
||||||
conditions = _node_pressure_conditions(status.get("conditions"))
|
|
||||||
created_at = metadata.get("creationTimestamp") if isinstance(metadata.get("creationTimestamp"), str) else ""
|
|
||||||
taints = _node_taints(spec.get("taints"))
|
|
||||||
details.append(
|
details.append(
|
||||||
{
|
{
|
||||||
"name": name,
|
"name": name,
|
||||||
@ -172,50 +149,12 @@ def _node_details(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
|||||||
"kubelet": node_info.get("kubeletVersion") or "",
|
"kubelet": node_info.get("kubeletVersion") or "",
|
||||||
"container_runtime": node_info.get("containerRuntimeVersion") or "",
|
"container_runtime": node_info.get("containerRuntimeVersion") or "",
|
||||||
"addresses": _node_addresses(status),
|
"addresses": _node_addresses(status),
|
||||||
"created_at": created_at,
|
|
||||||
"age_hours": _age_hours(created_at),
|
|
||||||
"taints": taints,
|
|
||||||
"unschedulable": bool(spec.get("unschedulable")),
|
|
||||||
"capacity": _node_capacity(status.get("capacity")),
|
|
||||||
"allocatable": _node_capacity(status.get("allocatable")),
|
|
||||||
"pressure": conditions,
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
details.sort(key=lambda item: item.get("name") or "")
|
details.sort(key=lambda item: item.get("name") or "")
|
||||||
return details
|
return details
|
||||||
|
|
||||||
|
|
||||||
def _age_hours(timestamp: str) -> float | None:
|
|
||||||
if not timestamp:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
parsed = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
|
|
||||||
except ValueError:
|
|
||||||
return None
|
|
||||||
return round((datetime.now(timezone.utc) - parsed).total_seconds() / 3600, 1)
|
|
||||||
|
|
||||||
|
|
||||||
def _node_taints(raw: Any) -> list[dict[str, str]]:
|
|
||||||
if not isinstance(raw, list):
|
|
||||||
return []
|
|
||||||
taints: list[dict[str, str]] = []
|
|
||||||
for entry in raw:
|
|
||||||
if not isinstance(entry, dict):
|
|
||||||
continue
|
|
||||||
key = entry.get("key")
|
|
||||||
effect = entry.get("effect")
|
|
||||||
value = entry.get("value")
|
|
||||||
if isinstance(key, str) and isinstance(effect, str):
|
|
||||||
taints.append(
|
|
||||||
{
|
|
||||||
"key": key,
|
|
||||||
"value": value if isinstance(value, str) else "",
|
|
||||||
"effect": effect,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return taints
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_inventory(details: list[dict[str, Any]]) -> dict[str, Any]:
|
def _summarize_inventory(details: list[dict[str, Any]]) -> dict[str, Any]:
|
||||||
summary = {
|
summary = {
|
||||||
"total": 0,
|
"total": 0,
|
||||||
@ -225,75 +164,33 @@ def _summarize_inventory(details: list[dict[str, Any]]) -> dict[str, Any]:
|
|||||||
"by_arch": {},
|
"by_arch": {},
|
||||||
"by_role": {},
|
"by_role": {},
|
||||||
"not_ready_names": [],
|
"not_ready_names": [],
|
||||||
"pressure_nodes": {key: [] for key in _PRESSURE_TYPES},
|
|
||||||
}
|
}
|
||||||
not_ready: list[str] = []
|
not_ready: list[str] = []
|
||||||
for node in details:
|
for node in details:
|
||||||
name = _apply_node_summary(summary, node)
|
name = node.get("name") if isinstance(node, dict) else ""
|
||||||
if name and not node.get("ready"):
|
if not isinstance(name, str) or not name:
|
||||||
|
continue
|
||||||
|
summary["total"] += 1
|
||||||
|
ready = bool(node.get("ready"))
|
||||||
|
if ready:
|
||||||
|
summary["ready"] += 1
|
||||||
|
else:
|
||||||
not_ready.append(name)
|
not_ready.append(name)
|
||||||
|
if node.get("is_worker"):
|
||||||
|
summary["workers"]["total"] += 1
|
||||||
|
if ready:
|
||||||
|
summary["workers"]["ready"] += 1
|
||||||
|
hardware = node.get("hardware") or "unknown"
|
||||||
|
arch = node.get("arch") or "unknown"
|
||||||
|
summary["by_hardware"][hardware] = summary["by_hardware"].get(hardware, 0) + 1
|
||||||
|
summary["by_arch"][arch] = summary["by_arch"].get(arch, 0) + 1
|
||||||
|
for role in node.get("roles") or []:
|
||||||
|
summary["by_role"][role] = summary["by_role"].get(role, 0) + 1
|
||||||
not_ready.sort()
|
not_ready.sort()
|
||||||
summary["not_ready_names"] = not_ready
|
summary["not_ready_names"] = not_ready
|
||||||
for cond_type in summary["pressure_nodes"]:
|
|
||||||
summary["pressure_nodes"][cond_type].sort()
|
|
||||||
return summary
|
return summary
|
||||||
|
|
||||||
|
|
||||||
def _apply_node_summary(summary: dict[str, Any], node: dict[str, Any]) -> str:
|
|
||||||
name = node.get("name") if isinstance(node, dict) else ""
|
|
||||||
if not isinstance(name, str) or not name:
|
|
||||||
return ""
|
|
||||||
summary["total"] += 1
|
|
||||||
ready = bool(node.get("ready"))
|
|
||||||
if ready:
|
|
||||||
summary["ready"] += 1
|
|
||||||
if node.get("is_worker"):
|
|
||||||
summary["workers"]["total"] += 1
|
|
||||||
if ready:
|
|
||||||
summary["workers"]["ready"] += 1
|
|
||||||
hardware = node.get("hardware") or "unknown"
|
|
||||||
arch = node.get("arch") or "unknown"
|
|
||||||
summary["by_hardware"][hardware] = summary["by_hardware"].get(hardware, 0) + 1
|
|
||||||
summary["by_arch"][arch] = summary["by_arch"].get(arch, 0) + 1
|
|
||||||
for role in node.get("roles") or []:
|
|
||||||
summary["by_role"][role] = summary["by_role"].get(role, 0) + 1
|
|
||||||
_apply_pressure(summary, node, name)
|
|
||||||
return name
|
|
||||||
|
|
||||||
|
|
||||||
def _apply_pressure(summary: dict[str, Any], node: dict[str, Any], name: str) -> None:
|
|
||||||
pressure = node.get("pressure") or {}
|
|
||||||
if not isinstance(pressure, dict):
|
|
||||||
return
|
|
||||||
for cond_type, active in pressure.items():
|
|
||||||
if active and cond_type in summary["pressure_nodes"]:
|
|
||||||
summary["pressure_nodes"][cond_type].append(name)
|
|
||||||
|
|
||||||
|
|
||||||
def _node_capacity(raw: Any) -> dict[str, str]:
|
|
||||||
if not isinstance(raw, dict):
|
|
||||||
return {}
|
|
||||||
output: dict[str, str] = {}
|
|
||||||
for key in _CAPACITY_KEYS:
|
|
||||||
value = raw.get(key)
|
|
||||||
if isinstance(value, (str, int, float)) and value != "":
|
|
||||||
output[key] = str(value)
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
def _node_pressure_conditions(conditions: Any) -> dict[str, bool]:
|
|
||||||
if not isinstance(conditions, list):
|
|
||||||
return {}
|
|
||||||
pressure: dict[str, bool] = {}
|
|
||||||
for condition in conditions:
|
|
||||||
if not isinstance(condition, dict):
|
|
||||||
continue
|
|
||||||
cond_type = condition.get("type")
|
|
||||||
if cond_type in _PRESSURE_TYPES:
|
|
||||||
pressure[cond_type] = condition.get("status") == "True"
|
|
||||||
return pressure
|
|
||||||
|
|
||||||
|
|
||||||
def _node_roles(labels: dict[str, Any]) -> list[str]:
|
def _node_roles(labels: dict[str, Any]) -> list[str]:
|
||||||
roles: list[str] = []
|
roles: list[str] = []
|
||||||
for key in labels.keys():
|
for key in labels.keys():
|
||||||
@ -394,67 +291,6 @@ def _namespace_allowed(namespace: str) -> bool:
|
|||||||
return namespace not in _SYSTEM_NAMESPACES
|
return namespace not in _SYSTEM_NAMESPACES
|
||||||
|
|
||||||
|
|
||||||
def _event_timestamp(event: dict[str, Any]) -> str:
|
|
||||||
for key in ("eventTime", "lastTimestamp", "firstTimestamp"):
|
|
||||||
value = event.get(key)
|
|
||||||
if isinstance(value, str) and value:
|
|
||||||
return value
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def _event_sort_key(timestamp: str) -> float:
|
|
||||||
if not timestamp:
|
|
||||||
return 0.0
|
|
||||||
try:
|
|
||||||
return datetime.fromisoformat(timestamp.replace("Z", "+00:00")).timestamp()
|
|
||||||
except ValueError:
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_events(payload: dict[str, Any]) -> dict[str, Any]:
|
|
||||||
warnings: list[dict[str, Any]] = []
|
|
||||||
by_reason: dict[str, int] = {}
|
|
||||||
by_namespace: dict[str, int] = {}
|
|
||||||
for event in _items(payload):
|
|
||||||
metadata = event.get("metadata") if isinstance(event.get("metadata"), dict) else {}
|
|
||||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
|
||||||
if not _namespace_allowed(namespace):
|
|
||||||
continue
|
|
||||||
event_type = event.get("type") if isinstance(event.get("type"), str) else ""
|
|
||||||
if event_type != _EVENT_WARNING:
|
|
||||||
continue
|
|
||||||
reason = event.get("reason") if isinstance(event.get("reason"), str) else ""
|
|
||||||
message = event.get("message") if isinstance(event.get("message"), str) else ""
|
|
||||||
count = event.get("count") if isinstance(event.get("count"), int) else 1
|
|
||||||
involved = (
|
|
||||||
event.get("involvedObject") if isinstance(event.get("involvedObject"), dict) else {}
|
|
||||||
)
|
|
||||||
timestamp = _event_timestamp(event)
|
|
||||||
warnings.append(
|
|
||||||
{
|
|
||||||
"namespace": namespace,
|
|
||||||
"reason": reason,
|
|
||||||
"message": message,
|
|
||||||
"count": count,
|
|
||||||
"last_seen": timestamp,
|
|
||||||
"object_kind": involved.get("kind") or "",
|
|
||||||
"object_name": involved.get("name") or "",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if reason:
|
|
||||||
by_reason[reason] = by_reason.get(reason, 0) + count
|
|
||||||
if namespace:
|
|
||||||
by_namespace[namespace] = by_namespace.get(namespace, 0) + count
|
|
||||||
warnings.sort(key=lambda item: _event_sort_key(item.get("last_seen") or ""), reverse=True)
|
|
||||||
top = warnings[:_EVENTS_MAX]
|
|
||||||
return {
|
|
||||||
"warnings_total": len(warnings),
|
|
||||||
"warnings_by_reason": by_reason,
|
|
||||||
"warnings_by_namespace": by_namespace,
|
|
||||||
"warnings_recent": top,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _workload_from_labels(labels: dict[str, Any]) -> tuple[str, str]:
|
def _workload_from_labels(labels: dict[str, Any]) -> tuple[str, str]:
|
||||||
for key in _WORKLOAD_LABEL_KEYS:
|
for key in _WORKLOAD_LABEL_KEYS:
|
||||||
value = labels.get(key)
|
value = labels.get(key)
|
||||||
@ -561,344 +397,6 @@ def _summarize_namespace_pods(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
def _summarize_namespace_nodes(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
|
||||||
namespaces: dict[str, dict[str, Any]] = {}
|
|
||||||
for pod in _items(payload):
|
|
||||||
metadata = pod.get("metadata") if isinstance(pod.get("metadata"), dict) else {}
|
|
||||||
spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {}
|
|
||||||
status = pod.get("status") if isinstance(pod.get("status"), dict) else {}
|
|
||||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
|
||||||
if not _namespace_allowed(namespace):
|
|
||||||
continue
|
|
||||||
node = spec.get("nodeName") if isinstance(spec.get("nodeName"), str) else ""
|
|
||||||
if not node:
|
|
||||||
continue
|
|
||||||
phase = status.get("phase") if isinstance(status.get("phase"), str) else ""
|
|
||||||
entry = namespaces.setdefault(
|
|
||||||
namespace,
|
|
||||||
{
|
|
||||||
"namespace": namespace,
|
|
||||||
"pods_total": 0,
|
|
||||||
"pods_running": 0,
|
|
||||||
"nodes": {},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
entry["pods_total"] += 1
|
|
||||||
if phase == "Running":
|
|
||||||
entry["pods_running"] += 1
|
|
||||||
nodes = entry["nodes"]
|
|
||||||
nodes[node] = nodes.get(node, 0) + 1
|
|
||||||
output: list[dict[str, Any]] = []
|
|
||||||
for entry in namespaces.values():
|
|
||||||
nodes = entry.get("nodes") or {}
|
|
||||||
primary = ""
|
|
||||||
if isinstance(nodes, dict) and nodes:
|
|
||||||
primary = sorted(nodes.items(), key=lambda item: (-item[1], item[0]))[0][0]
|
|
||||||
entry["primary_node"] = primary
|
|
||||||
output.append(entry)
|
|
||||||
output.sort(key=lambda item: (-item.get("pods_total", 0), item.get("namespace") or ""))
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
_NODE_PHASE_KEYS = {
|
|
||||||
"Running": "pods_running",
|
|
||||||
"Pending": "pods_pending",
|
|
||||||
"Failed": "pods_failed",
|
|
||||||
"Succeeded": "pods_succeeded",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_node_pods(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
|
||||||
nodes: dict[str, dict[str, Any]] = {}
|
|
||||||
for pod in _items(payload):
|
|
||||||
context = _node_pod_context(pod)
|
|
||||||
if not context:
|
|
||||||
continue
|
|
||||||
node, namespace, phase = context
|
|
||||||
entry = _node_pod_entry(nodes, node)
|
|
||||||
_node_pod_apply(entry, namespace, phase)
|
|
||||||
return _node_pod_finalize(nodes)
|
|
||||||
|
|
||||||
|
|
||||||
def _node_pod_context(pod: dict[str, Any]) -> tuple[str, str, str] | None:
|
|
||||||
metadata = pod.get("metadata") if isinstance(pod.get("metadata"), dict) else {}
|
|
||||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
|
||||||
if not _namespace_allowed(namespace):
|
|
||||||
return None
|
|
||||||
spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {}
|
|
||||||
node = spec.get("nodeName") if isinstance(spec.get("nodeName"), str) else ""
|
|
||||||
if not node:
|
|
||||||
return None
|
|
||||||
status = pod.get("status") if isinstance(pod.get("status"), dict) else {}
|
|
||||||
phase = status.get("phase") if isinstance(status.get("phase"), str) else ""
|
|
||||||
return node, namespace, phase
|
|
||||||
|
|
||||||
|
|
||||||
def _node_pod_entry(nodes: dict[str, dict[str, Any]], node: str) -> dict[str, Any]:
|
|
||||||
return nodes.setdefault(
|
|
||||||
node,
|
|
||||||
{
|
|
||||||
"node": node,
|
|
||||||
"pods_total": 0,
|
|
||||||
"pods_running": 0,
|
|
||||||
"pods_pending": 0,
|
|
||||||
"pods_failed": 0,
|
|
||||||
"pods_succeeded": 0,
|
|
||||||
"namespaces": {},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _node_pod_apply(entry: dict[str, Any], namespace: str, phase: str) -> None:
|
|
||||||
entry["pods_total"] += 1
|
|
||||||
phase_key = _NODE_PHASE_KEYS.get(phase)
|
|
||||||
if phase_key:
|
|
||||||
entry[phase_key] += 1
|
|
||||||
if namespace:
|
|
||||||
namespaces = entry["namespaces"]
|
|
||||||
namespaces[namespace] = namespaces.get(namespace, 0) + 1
|
|
||||||
|
|
||||||
|
|
||||||
def _node_pod_finalize(nodes: dict[str, dict[str, Any]]) -> list[dict[str, Any]]:
|
|
||||||
output: list[dict[str, Any]] = []
|
|
||||||
for entry in nodes.values():
|
|
||||||
namespaces = entry.get("namespaces") or {}
|
|
||||||
if isinstance(namespaces, dict):
|
|
||||||
entry["namespaces_top"] = sorted(
|
|
||||||
namespaces.items(), key=lambda item: (-item[1], item[0])
|
|
||||||
)[:3]
|
|
||||||
output.append(entry)
|
|
||||||
output.sort(key=lambda item: (-item.get("pods_total", 0), item.get("node") or ""))
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_pod_issues(payload: dict[str, Any]) -> dict[str, Any]:
|
|
||||||
items: list[dict[str, Any]] = []
|
|
||||||
counts: dict[str, int] = {key: 0 for key in _PHASE_SEVERITY}
|
|
||||||
for pod in _items(payload):
|
|
||||||
metadata = pod.get("metadata") if isinstance(pod.get("metadata"), dict) else {}
|
|
||||||
status = pod.get("status") if isinstance(pod.get("status"), dict) else {}
|
|
||||||
spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {}
|
|
||||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
|
||||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
|
||||||
if not name or not namespace:
|
|
||||||
continue
|
|
||||||
phase = status.get("phase") if isinstance(status.get("phase"), str) else ""
|
|
||||||
restarts = 0
|
|
||||||
waiting_reasons: list[str] = []
|
|
||||||
for container in status.get("containerStatuses") or []:
|
|
||||||
if not isinstance(container, dict):
|
|
||||||
continue
|
|
||||||
restarts += int(container.get("restartCount") or 0)
|
|
||||||
state = container.get("state") if isinstance(container.get("state"), dict) else {}
|
|
||||||
waiting = state.get("waiting") if isinstance(state.get("waiting"), dict) else {}
|
|
||||||
reason = waiting.get("reason")
|
|
||||||
if isinstance(reason, str) and reason:
|
|
||||||
waiting_reasons.append(reason)
|
|
||||||
if phase in counts:
|
|
||||||
counts[phase] += 1
|
|
||||||
if phase in _PHASE_SEVERITY or restarts > 0:
|
|
||||||
items.append(
|
|
||||||
{
|
|
||||||
"namespace": namespace,
|
|
||||||
"pod": name,
|
|
||||||
"node": spec.get("nodeName") or "",
|
|
||||||
"phase": phase,
|
|
||||||
"reason": status.get("reason") or "",
|
|
||||||
"restarts": restarts,
|
|
||||||
"waiting_reasons": sorted(set(waiting_reasons)),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
items.sort(
|
|
||||||
key=lambda item: (
|
|
||||||
-_PHASE_SEVERITY.get(item.get("phase") or "", 0),
|
|
||||||
-(item.get("restarts") or 0),
|
|
||||||
item.get("namespace") or "",
|
|
||||||
item.get("pod") or "",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return {"counts": counts, "items": items[:20]}
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_deployments(payload: dict[str, Any]) -> dict[str, Any]:
|
|
||||||
items = _items(payload)
|
|
||||||
unhealthy: list[dict[str, Any]] = []
|
|
||||||
for dep in items:
|
|
||||||
metadata = dep.get("metadata") if isinstance(dep.get("metadata"), dict) else {}
|
|
||||||
spec = dep.get("spec") if isinstance(dep.get("spec"), dict) else {}
|
|
||||||
status = dep.get("status") if isinstance(dep.get("status"), dict) else {}
|
|
||||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
|
||||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
|
||||||
desired = int(spec.get("replicas") or 0)
|
|
||||||
ready = int(status.get("readyReplicas") or 0)
|
|
||||||
available = int(status.get("availableReplicas") or 0)
|
|
||||||
updated = int(status.get("updatedReplicas") or 0)
|
|
||||||
if desired <= 0:
|
|
||||||
continue
|
|
||||||
if ready < desired or available < desired:
|
|
||||||
unhealthy.append(
|
|
||||||
{
|
|
||||||
"name": name,
|
|
||||||
"namespace": namespace,
|
|
||||||
"desired": desired,
|
|
||||||
"ready": ready,
|
|
||||||
"available": available,
|
|
||||||
"updated": updated,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
unhealthy.sort(key=lambda item: (item.get("namespace") or "", item.get("name") or ""))
|
|
||||||
return {
|
|
||||||
"total": len(items),
|
|
||||||
"not_ready": len(unhealthy),
|
|
||||||
"items": unhealthy,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_statefulsets(payload: dict[str, Any]) -> dict[str, Any]:
|
|
||||||
items = _items(payload)
|
|
||||||
unhealthy: list[dict[str, Any]] = []
|
|
||||||
for st in items:
|
|
||||||
metadata = st.get("metadata") if isinstance(st.get("metadata"), dict) else {}
|
|
||||||
spec = st.get("spec") if isinstance(st.get("spec"), dict) else {}
|
|
||||||
status = st.get("status") if isinstance(st.get("status"), dict) else {}
|
|
||||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
|
||||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
|
||||||
desired = int(spec.get("replicas") or 0)
|
|
||||||
ready = int(status.get("readyReplicas") or 0)
|
|
||||||
current = int(status.get("currentReplicas") or 0)
|
|
||||||
updated = int(status.get("updatedReplicas") or 0)
|
|
||||||
if desired <= 0:
|
|
||||||
continue
|
|
||||||
if ready < desired:
|
|
||||||
unhealthy.append(
|
|
||||||
{
|
|
||||||
"name": name,
|
|
||||||
"namespace": namespace,
|
|
||||||
"desired": desired,
|
|
||||||
"ready": ready,
|
|
||||||
"current": current,
|
|
||||||
"updated": updated,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
unhealthy.sort(key=lambda item: (item.get("namespace") or "", item.get("name") or ""))
|
|
||||||
return {
|
|
||||||
"total": len(items),
|
|
||||||
"not_ready": len(unhealthy),
|
|
||||||
"items": unhealthy,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_daemonsets(payload: dict[str, Any]) -> dict[str, Any]:
|
|
||||||
items = _items(payload)
|
|
||||||
unhealthy: list[dict[str, Any]] = []
|
|
||||||
for ds in items:
|
|
||||||
metadata = ds.get("metadata") if isinstance(ds.get("metadata"), dict) else {}
|
|
||||||
status = ds.get("status") if isinstance(ds.get("status"), dict) else {}
|
|
||||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
|
||||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
|
||||||
desired = int(status.get("desiredNumberScheduled") or 0)
|
|
||||||
ready = int(status.get("numberReady") or 0)
|
|
||||||
updated = int(status.get("updatedNumberScheduled") or 0)
|
|
||||||
if desired <= 0:
|
|
||||||
continue
|
|
||||||
if ready < desired:
|
|
||||||
unhealthy.append(
|
|
||||||
{
|
|
||||||
"name": name,
|
|
||||||
"namespace": namespace,
|
|
||||||
"desired": desired,
|
|
||||||
"ready": ready,
|
|
||||||
"updated": updated,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
unhealthy.sort(key=lambda item: (item.get("namespace") or "", item.get("name") or ""))
|
|
||||||
return {
|
|
||||||
"total": len(items),
|
|
||||||
"not_ready": len(unhealthy),
|
|
||||||
"items": unhealthy,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_workload_health(
|
|
||||||
deployments: dict[str, Any],
|
|
||||||
statefulsets: dict[str, Any],
|
|
||||||
daemonsets: dict[str, Any],
|
|
||||||
) -> dict[str, Any]:
|
|
||||||
return {
|
|
||||||
"deployments": deployments,
|
|
||||||
"statefulsets": statefulsets,
|
|
||||||
"daemonsets": daemonsets,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _fetch_nodes(errors: list[str]) -> tuple[dict[str, Any], list[dict[str, Any]], dict[str, Any]]:
|
|
||||||
nodes: dict[str, Any] = {}
|
|
||||||
details: list[dict[str, Any]] = []
|
|
||||||
summary: dict[str, Any] = {}
|
|
||||||
try:
|
|
||||||
payload = get_json("/api/v1/nodes")
|
|
||||||
nodes = _summarize_nodes(payload)
|
|
||||||
details = _node_details(payload)
|
|
||||||
summary = _summarize_inventory(details)
|
|
||||||
except Exception as exc:
|
|
||||||
errors.append(f"nodes: {exc}")
|
|
||||||
return nodes, details, summary
|
|
||||||
|
|
||||||
|
|
||||||
def _fetch_flux(errors: list[str]) -> dict[str, Any]:
|
|
||||||
try:
|
|
||||||
payload = get_json(
|
|
||||||
"/apis/kustomize.toolkit.fluxcd.io/v1/namespaces/flux-system/kustomizations"
|
|
||||||
)
|
|
||||||
return _summarize_kustomizations(payload)
|
|
||||||
except Exception as exc:
|
|
||||||
errors.append(f"flux: {exc}")
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
def _fetch_pods(
|
|
||||||
errors: list[str],
|
|
||||||
) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]], dict[str, Any]]:
|
|
||||||
workloads: list[dict[str, Any]] = []
|
|
||||||
namespace_pods: list[dict[str, Any]] = []
|
|
||||||
namespace_nodes: list[dict[str, Any]] = []
|
|
||||||
node_pods: list[dict[str, Any]] = []
|
|
||||||
pod_issues: dict[str, Any] = {}
|
|
||||||
try:
|
|
||||||
pods_payload = get_json("/api/v1/pods?limit=5000")
|
|
||||||
workloads = _summarize_workloads(pods_payload)
|
|
||||||
namespace_pods = _summarize_namespace_pods(pods_payload)
|
|
||||||
namespace_nodes = _summarize_namespace_nodes(pods_payload)
|
|
||||||
node_pods = _summarize_node_pods(pods_payload)
|
|
||||||
pod_issues = _summarize_pod_issues(pods_payload)
|
|
||||||
except Exception as exc:
|
|
||||||
errors.append(f"pods: {exc}")
|
|
||||||
return workloads, namespace_pods, namespace_nodes, node_pods, pod_issues
|
|
||||||
|
|
||||||
|
|
||||||
def _fetch_workload_health(errors: list[str]) -> dict[str, Any]:
|
|
||||||
try:
|
|
||||||
deployments_payload = get_json("/apis/apps/v1/deployments?limit=2000")
|
|
||||||
statefulsets_payload = get_json("/apis/apps/v1/statefulsets?limit=2000")
|
|
||||||
daemonsets_payload = get_json("/apis/apps/v1/daemonsets?limit=2000")
|
|
||||||
deployments = _summarize_deployments(deployments_payload)
|
|
||||||
statefulsets = _summarize_statefulsets(statefulsets_payload)
|
|
||||||
daemonsets = _summarize_daemonsets(daemonsets_payload)
|
|
||||||
return _summarize_workload_health(deployments, statefulsets, daemonsets)
|
|
||||||
except Exception as exc:
|
|
||||||
errors.append(f"workloads_health: {exc}")
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
def _fetch_events(errors: list[str]) -> dict[str, Any]:
|
|
||||||
try:
|
|
||||||
events_payload = get_json("/api/v1/events?limit=2000")
|
|
||||||
return _summarize_events(events_payload)
|
|
||||||
except Exception as exc:
|
|
||||||
errors.append(f"events: {exc}")
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
def _vm_query(expr: str) -> list[dict[str, Any]] | None:
|
def _vm_query(expr: str) -> list[dict[str, Any]] | None:
|
||||||
base = settings.vm_url
|
base = settings.vm_url
|
||||||
if not base:
|
if not base:
|
||||||
@ -1050,29 +548,11 @@ def _node_usage(errors: list[str]) -> dict[str, Any]:
|
|||||||
'* on(instance) group_left(node) label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))',
|
'* on(instance) group_left(node) label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))',
|
||||||
"node",
|
"node",
|
||||||
)
|
)
|
||||||
usage["disk"] = _vm_node_metric(
|
|
||||||
'avg by (node) (((1 - avg by (instance) (node_filesystem_avail_bytes{mountpoint="/",fstype!~"tmpfs|overlay"} '
|
|
||||||
'/ node_filesystem_size_bytes{mountpoint="/",fstype!~"tmpfs|overlay"})) * 100) * on(instance) group_left(node) '
|
|
||||||
'label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))',
|
|
||||||
"node",
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
errors.append(f"node_usage: {exc}")
|
errors.append(f"node_usage: {exc}")
|
||||||
return usage
|
return usage
|
||||||
|
|
||||||
|
|
||||||
def _pvc_usage(errors: list[str]) -> list[dict[str, Any]]:
|
|
||||||
try:
|
|
||||||
entries = _vm_vector(
|
|
||||||
"topk(5, max by (namespace,persistentvolumeclaim) "
|
|
||||||
"(kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes * 100))"
|
|
||||||
)
|
|
||||||
return _filter_namespace_vector(entries)
|
|
||||||
except Exception as exc:
|
|
||||||
errors.append(f"pvc_usage: {exc}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def _usage_stats(series: list[dict[str, Any]]) -> dict[str, float]:
|
def _usage_stats(series: list[dict[str, Any]]) -> dict[str, float]:
|
||||||
values: list[float] = []
|
values: list[float] = []
|
||||||
for entry in series:
|
for entry in series:
|
||||||
@ -1098,12 +578,6 @@ def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
|||||||
metrics["nodes_ready"] = _vm_scalar(
|
metrics["nodes_ready"] = _vm_scalar(
|
||||||
"count(kube_node_status_condition{condition=\"Ready\",status=\"true\"})"
|
"count(kube_node_status_condition{condition=\"Ready\",status=\"true\"})"
|
||||||
)
|
)
|
||||||
metrics["capacity_cpu"] = _vm_scalar("sum(kube_node_status_capacity_cpu_cores)")
|
|
||||||
metrics["allocatable_cpu"] = _vm_scalar("sum(kube_node_status_allocatable_cpu_cores)")
|
|
||||||
metrics["capacity_mem_bytes"] = _vm_scalar("sum(kube_node_status_capacity_memory_bytes)")
|
|
||||||
metrics["allocatable_mem_bytes"] = _vm_scalar("sum(kube_node_status_allocatable_memory_bytes)")
|
|
||||||
metrics["capacity_pods"] = _vm_scalar("sum(kube_node_status_capacity_pods)")
|
|
||||||
metrics["allocatable_pods"] = _vm_scalar("sum(kube_node_status_allocatable_pods)")
|
|
||||||
metrics["pods_running"] = _vm_scalar("sum(kube_pod_status_phase{phase=\"Running\"})")
|
metrics["pods_running"] = _vm_scalar("sum(kube_pod_status_phase{phase=\"Running\"})")
|
||||||
metrics["pods_pending"] = _vm_scalar("sum(kube_pod_status_phase{phase=\"Pending\"})")
|
metrics["pods_pending"] = _vm_scalar("sum(kube_pod_status_phase{phase=\"Pending\"})")
|
||||||
metrics["pods_failed"] = _vm_scalar("sum(kube_pod_status_phase{phase=\"Failed\"})")
|
metrics["pods_failed"] = _vm_scalar("sum(kube_pod_status_phase{phase=\"Failed\"})")
|
||||||
@ -1111,19 +585,6 @@ def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
|||||||
metrics["top_restarts_1h"] = _vm_vector(
|
metrics["top_restarts_1h"] = _vm_vector(
|
||||||
f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{_RESTARTS_WINDOW}])))"
|
f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{_RESTARTS_WINDOW}])))"
|
||||||
)
|
)
|
||||||
metrics["pod_cpu_top"] = _filter_namespace_vector(
|
|
||||||
_vm_vector(
|
|
||||||
f'topk(5, sum by (namespace,pod) (rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}])))'
|
|
||||||
)
|
|
||||||
)
|
|
||||||
metrics["pod_mem_top"] = _filter_namespace_vector(
|
|
||||||
_vm_vector(
|
|
||||||
"topk(5, sum by (namespace,pod) (container_memory_working_set_bytes{namespace!=\"\"}))"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
metrics["job_failures_24h"] = _vm_vector(
|
|
||||||
"topk(5, sum by (namespace,job_name) (increase(kube_job_status_failed[24h])))"
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
errors.append(f"vm: {exc}")
|
errors.append(f"vm: {exc}")
|
||||||
metrics["postgres_connections"] = _postgres_connections(errors)
|
metrics["postgres_connections"] = _postgres_connections(errors)
|
||||||
@ -1134,7 +595,6 @@ def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
|||||||
"ram": _usage_stats(metrics.get("node_usage", {}).get("ram", [])),
|
"ram": _usage_stats(metrics.get("node_usage", {}).get("ram", [])),
|
||||||
"net": _usage_stats(metrics.get("node_usage", {}).get("net", [])),
|
"net": _usage_stats(metrics.get("node_usage", {}).get("net", [])),
|
||||||
"io": _usage_stats(metrics.get("node_usage", {}).get("io", [])),
|
"io": _usage_stats(metrics.get("node_usage", {}).get("io", [])),
|
||||||
"disk": _usage_stats(metrics.get("node_usage", {}).get("disk", [])),
|
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
metrics["namespace_cpu_top"] = _filter_namespace_vector(
|
metrics["namespace_cpu_top"] = _filter_namespace_vector(
|
||||||
@ -1147,52 +607,16 @@ def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
|||||||
"topk(5, sum by (namespace) (container_memory_working_set_bytes{namespace!=\"\"}))"
|
"topk(5, sum by (namespace) (container_memory_working_set_bytes{namespace!=\"\"}))"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
metrics["namespace_cpu_requests_top"] = _filter_namespace_vector(
|
|
||||||
_vm_vector(
|
|
||||||
"topk(5, sum by (namespace) (kube_pod_container_resource_requests_cpu_cores))"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
metrics["namespace_mem_requests_top"] = _filter_namespace_vector(
|
|
||||||
_vm_vector(
|
|
||||||
"topk(5, sum by (namespace) (kube_pod_container_resource_requests_memory_bytes))"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
metrics["namespace_net_top"] = _filter_namespace_vector(
|
|
||||||
_vm_vector(
|
|
||||||
f"topk(5, sum by (namespace) (rate(container_network_receive_bytes_total{{namespace!=\"\"}}[{_RATE_WINDOW}]) + rate(container_network_transmit_bytes_total{{namespace!=\"\"}}[{_RATE_WINDOW}])))"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
metrics["namespace_io_top"] = _filter_namespace_vector(
|
|
||||||
_vm_vector(
|
|
||||||
f"topk(5, sum by (namespace) (rate(container_fs_reads_bytes_total{{namespace!=\"\"}}[{_RATE_WINDOW}]) + rate(container_fs_writes_bytes_total{{namespace!=\"\"}}[{_RATE_WINDOW}])))"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
errors.append(f"namespace_usage: {exc}")
|
errors.append(f"namespace_usage: {exc}")
|
||||||
metrics["pvc_usage_top"] = _pvc_usage(errors)
|
|
||||||
metrics["units"] = {
|
metrics["units"] = {
|
||||||
"cpu": "percent",
|
"cpu": "percent",
|
||||||
"ram": "percent",
|
"ram": "percent",
|
||||||
"net": "bytes_per_sec",
|
"net": "bytes_per_sec",
|
||||||
"io": "bytes_per_sec",
|
"io": "bytes_per_sec",
|
||||||
"disk": "percent",
|
|
||||||
"restarts": "count",
|
"restarts": "count",
|
||||||
"pod_cpu": "cores",
|
|
||||||
"pod_mem": "bytes",
|
|
||||||
"job_failures_24h": "count",
|
|
||||||
"namespace_cpu": "cores",
|
"namespace_cpu": "cores",
|
||||||
"namespace_mem": "bytes",
|
"namespace_mem": "bytes",
|
||||||
"namespace_cpu_requests": "cores",
|
|
||||||
"namespace_mem_requests": "bytes",
|
|
||||||
"namespace_net": "bytes_per_sec",
|
|
||||||
"namespace_io": "bytes_per_sec",
|
|
||||||
"pvc_used_percent": "percent",
|
|
||||||
"capacity_cpu": "cores",
|
|
||||||
"allocatable_cpu": "cores",
|
|
||||||
"capacity_mem_bytes": "bytes",
|
|
||||||
"allocatable_mem_bytes": "bytes",
|
|
||||||
"capacity_pods": "count",
|
|
||||||
"allocatable_pods": "count",
|
|
||||||
}
|
}
|
||||||
metrics["windows"] = {
|
metrics["windows"] = {
|
||||||
"rates": _RATE_WINDOW,
|
"rates": _RATE_WINDOW,
|
||||||
@ -1205,27 +629,45 @@ def collect_cluster_state() -> tuple[dict[str, Any], ClusterStateSummary]:
|
|||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
collected_at = datetime.now(timezone.utc)
|
collected_at = datetime.now(timezone.utc)
|
||||||
|
|
||||||
nodes, node_details, node_summary = _fetch_nodes(errors)
|
nodes: dict[str, Any] | None = None
|
||||||
kustomizations = _fetch_flux(errors)
|
node_details: list[dict[str, Any]] = []
|
||||||
workloads, namespace_pods, namespace_nodes, node_pods, pod_issues = _fetch_pods(errors)
|
node_summary: dict[str, Any] = {}
|
||||||
workload_health = _fetch_workload_health(errors)
|
try:
|
||||||
events = _fetch_events(errors)
|
payload = get_json("/api/v1/nodes")
|
||||||
|
nodes = _summarize_nodes(payload)
|
||||||
|
node_details = _node_details(payload)
|
||||||
|
node_summary = _summarize_inventory(node_details)
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"nodes: {exc}")
|
||||||
|
|
||||||
|
kustomizations: dict[str, Any] | None = None
|
||||||
|
try:
|
||||||
|
payload = get_json(
|
||||||
|
"/apis/kustomize.toolkit.fluxcd.io/v1/namespaces/flux-system/kustomizations"
|
||||||
|
)
|
||||||
|
kustomizations = _summarize_kustomizations(payload)
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"flux: {exc}")
|
||||||
|
|
||||||
|
workloads: list[dict[str, Any]] = []
|
||||||
|
namespace_pods: list[dict[str, Any]] = []
|
||||||
|
try:
|
||||||
|
pods_payload = get_json("/api/v1/pods?limit=5000")
|
||||||
|
workloads = _summarize_workloads(pods_payload)
|
||||||
|
namespace_pods = _summarize_namespace_pods(pods_payload)
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"pods: {exc}")
|
||||||
|
|
||||||
metrics = _summarize_metrics(errors)
|
metrics = _summarize_metrics(errors)
|
||||||
|
|
||||||
snapshot = {
|
snapshot = {
|
||||||
"collected_at": collected_at.isoformat(),
|
"collected_at": collected_at.isoformat(),
|
||||||
"nodes": nodes,
|
"nodes": nodes or {},
|
||||||
"nodes_summary": node_summary,
|
"nodes_summary": node_summary,
|
||||||
"nodes_detail": node_details,
|
"nodes_detail": node_details,
|
||||||
"flux": kustomizations,
|
"flux": kustomizations or {},
|
||||||
"workloads": workloads,
|
"workloads": workloads,
|
||||||
"namespace_pods": namespace_pods,
|
"namespace_pods": namespace_pods,
|
||||||
"namespace_nodes": namespace_nodes,
|
|
||||||
"node_pods": node_pods,
|
|
||||||
"pod_issues": pod_issues,
|
|
||||||
"workloads_health": workload_health,
|
|
||||||
"events": events,
|
|
||||||
"metrics": metrics,
|
"metrics": metrics,
|
||||||
"errors": errors,
|
"errors": errors,
|
||||||
}
|
}
|
||||||
|
|||||||
@ -31,16 +31,7 @@ def test_collect_cluster_state(monkeypatch) -> None:
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"metadata": {
|
"metadata": {"name": "node-b", "labels": {"kubernetes.io/arch": "amd64"}},
|
||||||
"name": "node-b",
|
|
||||||
"labels": {"kubernetes.io/arch": "amd64"},
|
|
||||||
"creationTimestamp": "2026-01-01T00:00:00Z",
|
|
||||||
},
|
|
||||||
"spec": {
|
|
||||||
"taints": [
|
|
||||||
{"key": "node-role.kubernetes.io/control-plane", "effect": "NoSchedule"}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"status": {
|
"status": {
|
||||||
"conditions": [{"type": "Ready", "status": "False"}],
|
"conditions": [{"type": "Ready", "status": "False"}],
|
||||||
"nodeInfo": {"architecture": "amd64"},
|
"nodeInfo": {"architecture": "amd64"},
|
||||||
@ -62,37 +53,6 @@ def test_collect_cluster_state(monkeypatch) -> None:
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
if path.startswith("/api/v1/events"):
|
|
||||||
return {"items": []}
|
|
||||||
if path.startswith("/apis/apps/v1/deployments"):
|
|
||||||
return {
|
|
||||||
"items": [
|
|
||||||
{
|
|
||||||
"metadata": {"name": "api", "namespace": "apps"},
|
|
||||||
"spec": {"replicas": 2},
|
|
||||||
"status": {"readyReplicas": 1, "availableReplicas": 1, "updatedReplicas": 1},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
if path.startswith("/apis/apps/v1/statefulsets"):
|
|
||||||
return {
|
|
||||||
"items": [
|
|
||||||
{
|
|
||||||
"metadata": {"name": "db", "namespace": "apps"},
|
|
||||||
"spec": {"replicas": 1},
|
|
||||||
"status": {"readyReplicas": 1, "currentReplicas": 1, "updatedReplicas": 1},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
if path.startswith("/apis/apps/v1/daemonsets"):
|
|
||||||
return {
|
|
||||||
"items": [
|
|
||||||
{
|
|
||||||
"metadata": {"name": "agent", "namespace": "apps"},
|
|
||||||
"status": {"desiredNumberScheduled": 3, "numberReady": 3, "updatedNumberScheduled": 3},
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
return {
|
return {
|
||||||
"items": [
|
"items": [
|
||||||
{
|
{
|
||||||
@ -118,31 +78,13 @@ def test_collect_cluster_state(monkeypatch) -> None:
|
|||||||
assert snapshot["flux"]["not_ready"] == 1
|
assert snapshot["flux"]["not_ready"] == 1
|
||||||
assert snapshot["nodes_summary"]["total"] == 2
|
assert snapshot["nodes_summary"]["total"] == 2
|
||||||
assert snapshot["nodes_summary"]["ready"] == 1
|
assert snapshot["nodes_summary"]["ready"] == 1
|
||||||
assert "pressure_nodes" in snapshot["nodes_summary"]
|
|
||||||
assert snapshot["nodes_detail"]
|
assert snapshot["nodes_detail"]
|
||||||
assert snapshot["nodes_detail"][1]["age_hours"] is not None
|
|
||||||
assert snapshot["nodes_detail"][1]["taints"]
|
|
||||||
assert snapshot["workloads"]
|
assert snapshot["workloads"]
|
||||||
assert snapshot["namespace_pods"]
|
assert snapshot["namespace_pods"]
|
||||||
assert snapshot["namespace_pods"][0]["namespace"] == "media"
|
assert snapshot["namespace_pods"][0]["namespace"] == "media"
|
||||||
assert snapshot["namespace_nodes"]
|
|
||||||
assert snapshot["node_pods"]
|
|
||||||
assert "pod_issues" in snapshot
|
|
||||||
assert "workloads_health" in snapshot
|
|
||||||
assert snapshot["workloads_health"]["deployments"]["total"] == 1
|
|
||||||
assert snapshot["workloads_health"]["deployments"]["not_ready"] == 1
|
|
||||||
assert snapshot["events"]["warnings_total"] == 0
|
|
||||||
assert "node_usage_stats" in snapshot["metrics"]
|
assert "node_usage_stats" in snapshot["metrics"]
|
||||||
assert snapshot["metrics"]["namespace_cpu_top"] == []
|
assert snapshot["metrics"]["namespace_cpu_top"] == []
|
||||||
assert snapshot["metrics"]["namespace_mem_top"] == []
|
assert snapshot["metrics"]["namespace_mem_top"] == []
|
||||||
assert snapshot["metrics"]["namespace_cpu_requests_top"] == []
|
|
||||||
assert snapshot["metrics"]["namespace_mem_requests_top"] == []
|
|
||||||
assert snapshot["metrics"]["namespace_net_top"] == []
|
|
||||||
assert snapshot["metrics"]["namespace_io_top"] == []
|
|
||||||
assert snapshot["metrics"]["pod_cpu_top"] == []
|
|
||||||
assert snapshot["metrics"]["pod_mem_top"] == []
|
|
||||||
assert snapshot["metrics"]["job_failures_24h"] == []
|
|
||||||
assert snapshot["metrics"]["pvc_usage_top"] == []
|
|
||||||
assert summary.nodes_total == 2
|
assert summary.nodes_total == 2
|
||||||
assert summary.nodes_ready == 1
|
assert summary.nodes_ready == 1
|
||||||
assert summary.pods_running == 5.0
|
assert summary.pods_running == 5.0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user