monitoring(overview): fix pvc backup health/age panel query
This commit is contained in:
parent
db701b89c2
commit
a2172f56ec
@ -581,7 +581,11 @@ QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE = (
|
||||
QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE_WITH_MISSING = (
|
||||
f"({QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE}) or on(suite) (0 * ({QUALITY_GATE_SUITE_INDEX_30D}) - 1)"
|
||||
)
|
||||
PVC_BACKUP_AGE_HOURS_BY_PVC = "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))"
|
||||
PVC_BACKUP_AGE_HOURS_BY_PVC = (
|
||||
'sort_desc(max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver="restic"}) / 3600) '
|
||||
'or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver="restic",reason=~"missing|no_completed|lookup_failed|unknown_timestamp"} > 0) '
|
||||
'* (pvc_backup_count{driver="restic"} > bool 0)) * 999))))'
|
||||
)
|
||||
ANANKE_SELECTOR = 'job="ananke-power"'
|
||||
ANANKE_UPS_DB_NAME = "Pyrphoros"
|
||||
ANANKE_UPS_DB_NODE = "titan-db"
|
||||
@ -2168,7 +2172,8 @@ def build_overview():
|
||||
)
|
||||
panels[-1]["links"] = link_to("atlas-storage")
|
||||
panels[-1]["description"] = (
|
||||
"Oldest successful backup age in hours by PVC. PVCs with missing or unhealthy backup state are forced to 999h so the red bars stay visible in the overview."
|
||||
"Backup age in hours computed from last-success timestamps for restic-managed PVCs. "
|
||||
"PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility."
|
||||
)
|
||||
panels.append(
|
||||
jenkins_weather_bargauge_panel(
|
||||
|
||||
@ -2642,7 +2642,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))",
|
||||
"expr": "sort_desc(max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver=\"restic\"}) / 3600) or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver=\"restic\",reason=~\"missing|no_completed|lookup_failed|unknown_timestamp\"} > 0) * (pvc_backup_count{driver=\"restic\"} > bool 0)) * 999))))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}/{{pvc}}",
|
||||
"instant": true
|
||||
@ -2706,7 +2706,7 @@
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Oldest successful backup age in hours by PVC. PVCs with missing or unhealthy backup state are forced to 999h so the red bars stay visible in the overview."
|
||||
"description": "Backup age in hours computed from last-success timestamps for restic-managed PVCs. PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility."
|
||||
},
|
||||
{
|
||||
"id": 142,
|
||||
|
||||
@ -2651,7 +2651,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))",
|
||||
"expr": "sort_desc(max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver=\"restic\"}) / 3600) or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver=\"restic\",reason=~\"missing|no_completed|lookup_failed|unknown_timestamp\"} > 0) * (pvc_backup_count{driver=\"restic\"} > bool 0)) * 999))))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}/{{pvc}}",
|
||||
"instant": true
|
||||
@ -2715,7 +2715,7 @@ data:
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Oldest successful backup age in hours by PVC. PVCs with missing or unhealthy backup state are forced to 999h so the red bars stay visible in the overview."
|
||||
"description": "Backup age in hours computed from last-success timestamps for restic-managed PVCs. PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility."
|
||||
},
|
||||
{
|
||||
"id": 142,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user