diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 7ca8194..a895bd8 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -214,7 +214,7 @@ UPTIME_AVAIL_EXPR = ( f"min(({CONTROL_READY_FRACTION_EXPR}), ({TRAEFIK_READY_EXPR}))" ) UPTIME_AVG_EXPR = f"avg_over_time(({UPTIME_AVAIL_EXPR})[{UPTIME_WINDOW}:5m])" -UPTIME_PERCENT_EXPR = f"({UPTIME_AVG_EXPR}) * 100" +UPTIME_PERCENT_EXPR = UPTIME_AVG_EXPR UPTIME_NINES_EXPR = f"-log10(1 - clamp_max({UPTIME_AVG_EXPR}, 0.999999999))" UPTIME_THRESHOLDS = { "mode": "absolute", @@ -229,9 +229,9 @@ UPTIME_PERCENT_THRESHOLDS = { "mode": "absolute", "steps": [ {"color": "red", "value": None}, - {"color": "orange", "value": 99}, - {"color": "yellow", "value": 99.9}, - {"color": "green", "value": 99.99}, + {"color": "orange", "value": 0.99}, + {"color": "yellow", "value": 0.999}, + {"color": "green", "value": 0.9999}, ], } PROBLEM_TABLE_EXPR = ( @@ -641,7 +641,6 @@ def build_overview(): "expr": UPTIME_PERCENT_EXPR, "kind": "stat", "thresholds": UPTIME_PERCENT_THRESHOLDS, - "value_suffix": "%", "unit": "percentunit", "decimals": 3, "text_mode": "value", diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 3dec2aa..b4416c7 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -226,7 +226,7 @@ }, "targets": [ { - "expr": "(avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[30d:5m])) * 100", + "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[30d:5m])", "refId": "A" } ], @@ -245,22 +245,21 @@ }, { "color": "orange", - "value": 99 + "value": 0.99 }, { "color": "yellow", - "value": 99.9 + "value": 0.999 }, { "color": "green", - "value": 99.99 + "value": 0.9999 } ] }, "unit": "percentunit", "custom": { - "displayMode": "auto", - "valueSuffix": "%" + "displayMode": "auto" }, "decimals": 3 }, diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 507a127..2dbcbb5 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -235,7 +235,7 @@ data: }, "targets": [ { - "expr": "(avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[30d:5m])) * 100", + "expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[30d:5m])", "refId": "A" } ], @@ -254,22 +254,21 @@ data: }, { "color": "orange", - "value": 99 + "value": 0.99 }, { "color": "yellow", - "value": 99.9 + "value": 0.999 }, { "color": "green", - "value": 99.99 + "value": 0.9999 } ] }, "unit": "percentunit", "custom": { - "displayMode": "auto", - "valueSuffix": "%" + "displayMode": "auto" }, "decimals": 3 },