monitoring: ignore availability scrape gaps

This commit is contained in:
jenkins 2026-05-10 16:38:05 -03:00
parent eb57c1fe0f
commit dad9e4e8f2
5 changed files with 60 additions and 6 deletions

View File

@ -1442,7 +1442,7 @@ def build_overview():
"decimals": 4, "decimals": 4,
"text_mode": "value", "text_mode": "value",
"instant": True, "instant": True,
"description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; missing slots after that first sample count as down.", "description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; observed down samples count as down, while scrape gaps are ignored.",
}, },
{ {
"id": 4, "id": 4,

View File

@ -59,7 +59,7 @@ def test_overview_availability_panel_uses_recorded_365d_rollup():
assert panel["targets"][0]["expr"] == 'last_over_time(atlas:availability:ratio_365d{scope="atlas"}[30m])' assert panel["targets"][0]["expr"] == 'last_over_time(atlas:availability:ratio_365d{scope="atlas"}[30m])'
assert panel["targets"][0]["instant"] is True assert panel["targets"][0]["instant"] is True
assert "precomputed" in panel["description"] assert "precomputed" in panel["description"]
assert "after that first sample count as down" in panel["description"] assert "scrape gaps are ignored" in panel["description"]
def test_render_configmap_writes(tmp_path): def test_render_configmap_writes(tmp_path):

View File

@ -283,7 +283,7 @@
}, },
"textMode": "value" "textMode": "value"
}, },
"description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; missing slots after that first sample count as down." "description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; observed down samples count as down, while scrape gaps are ignored."
}, },
{ {
"id": 4, "id": 4,

View File

@ -292,7 +292,7 @@ data:
}, },
"textMode": "value" "textMode": "value"
}, },
"description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; missing slots after that first sample count as down." "description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; observed down samples count as down, while scrape gaps are ignored."
}, },
{ {
"id": 4, "id": 4,

View File

@ -81,7 +81,61 @@ data:
0 0
) )
) )
/ 8761, 1) /
clamp_min(
(
(
sum(count_over_time((
min(
(
sum(kube_node_status_condition{condition="Ready",status="true",node=~"titan-0a|titan-0b|titan-0c"})
/ 3
),
(
sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})
/ clamp_min(sum(kube_deployment_spec_replicas{namespace=~"traefik|kube-system",deployment="traefik"}), 1)
)
)
)[365d:1h]))
or on() vector(0)
)
+
clamp_min(
8761
-
(
clamp_min(
floor(
(
time()
-
(
min(min_over_time(timestamp(
min(
(
sum(kube_node_status_condition{condition="Ready",status="true",node=~"titan-0a|titan-0b|titan-0c"})
/ 3
),
(
sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})
/ clamp_min(sum(kube_deployment_spec_replicas{namespace=~"traefik|kube-system",deployment="traefik"}), 1)
)
)
)[365d:1h]))
or on() vector(time() + 3600)
)
)
/ 3600
)
+ 1,
0
)
),
0
)
),
1
), 1)
labels: labels:
scope: atlas scope: atlas
rollup: yearly rollup: yearly
@ -114,7 +168,7 @@ spec:
labels: labels:
app: vmalert-atlas-availability app: vmalert-atlas-availability
annotations: annotations:
bstein.dev/rules-revision: "2026-05-10-availability-rollup-v5" bstein.dev/rules-revision: "2026-05-10-availability-rollup-v6"
spec: spec:
serviceAccountName: vmalert-atlas-availability serviceAccountName: vmalert-atlas-availability
affinity: affinity: