Reduce Atlas availability query density

This commit is contained in:
Brad Stein 2025-12-19 14:56:29 -03:00
parent 8be89cbd53
commit 89f95157d8
3 changed files with 5 additions and 3 deletions

View File

@ -205,6 +205,8 @@ STUCK_TERMINATING_EXPR = (
"or on() vector(0)"
)
UPTIME_WINDOW = "365d"
# Keep the subquery step coarse so we don't request an excessive number of points.
UPTIME_STEP = "1h"
TRAEFIK_READY_EXPR = (
"("
'sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})'
@ -225,7 +227,7 @@ NODE_TIEBREAKER = " + ".join(
f"({node_filter(node)}) * 1e-6 * {idx}"
for idx, node in enumerate(CONTROL_ALL + WORKER_NODES, start=1)
)
UPTIME_AVG_EXPR = f"avg_over_time(({UPTIME_AVAIL_EXPR})[{UPTIME_WINDOW}:5m])"
UPTIME_AVG_EXPR = f"avg_over_time(({UPTIME_AVAIL_EXPR})[{UPTIME_WINDOW}:{UPTIME_STEP}])"
UPTIME_PERCENT_EXPR = UPTIME_AVG_EXPR
UPTIME_NINES_EXPR = f"-log10(1 - clamp_max({UPTIME_AVG_EXPR}, 0.999999999))"
UPTIME_THRESHOLDS = {

View File

@ -226,7 +226,7 @@
},
"targets": [
{
"expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:5m])",
"expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:1h])",
"refId": "A"
}
],

View File

@ -235,7 +235,7 @@ data:
},
"targets": [
{
"expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:5m])",
"expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:1h])",
"refId": "A"
}
],