monitoring: ignore availability scrape gaps

This commit is contained in:
jenkins 2026-05-10 16:38:05 -03:00
parent eb57c1fe0f
commit dad9e4e8f2
5 changed files with 60 additions and 6 deletions

View File

@ -1442,7 +1442,7 @@ def build_overview():
"decimals": 4,
"text_mode": "value",
"instant": True,
"description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; missing slots after that first sample count as down.",
"description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; observed down samples count as down, while scrape gaps are ignored.",
},
{
"id": 4,

View File

@ -59,7 +59,7 @@ def test_overview_availability_panel_uses_recorded_365d_rollup():
assert panel["targets"][0]["expr"] == 'last_over_time(atlas:availability:ratio_365d{scope="atlas"}[30m])'
assert panel["targets"][0]["instant"] is True
assert "precomputed" in panel["description"]
assert "after that first sample count as down" in panel["description"]
assert "scrape gaps are ignored" in panel["description"]
def test_render_configmap_writes(tmp_path):

View File

@ -283,7 +283,7 @@
},
"textMode": "value"
},
"description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; missing slots after that first sample count as down."
"description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; observed down samples count as down, while scrape gaps are ignored."
},
{
"id": 4,

View File

@ -292,7 +292,7 @@ data:
},
"textMode": "value"
},
"description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; missing slots after that first sample count as down."
"description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Missing slots before the first raw availability sample are filled as 100% up; observed down samples count as down, while scrape gaps are ignored."
},
{
"id": 4,

View File

@ -81,7 +81,61 @@ data:
0
)
)
/ 8761, 1)
/
clamp_min(
(
(
sum(count_over_time((
min(
(
sum(kube_node_status_condition{condition="Ready",status="true",node=~"titan-0a|titan-0b|titan-0c"})
/ 3
),
(
sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})
/ clamp_min(sum(kube_deployment_spec_replicas{namespace=~"traefik|kube-system",deployment="traefik"}), 1)
)
)
)[365d:1h]))
or on() vector(0)
)
+
clamp_min(
8761
-
(
clamp_min(
floor(
(
time()
-
(
min(min_over_time(timestamp(
min(
(
sum(kube_node_status_condition{condition="Ready",status="true",node=~"titan-0a|titan-0b|titan-0c"})
/ 3
),
(
sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})
/ clamp_min(sum(kube_deployment_spec_replicas{namespace=~"traefik|kube-system",deployment="traefik"}), 1)
)
)
)[365d:1h]))
or on() vector(time() + 3600)
)
)
/ 3600
)
+ 1,
0
)
),
0
)
),
1
), 1)
labels:
scope: atlas
rollup: yearly
@ -114,7 +168,7 @@ spec:
labels:
app: vmalert-atlas-availability
annotations:
bstein.dev/rules-revision: "2026-05-10-availability-rollup-v5"
bstein.dev/rules-revision: "2026-05-10-availability-rollup-v6"
spec:
serviceAccountName: vmalert-atlas-availability
affinity: