monitoring: trial overview health timelines

This commit is contained in:
jenkins 2026-05-16 05:08:09 -03:00
parent 2ede953580
commit 5d01b3a60d
6 changed files with 275 additions and 210 deletions

View File

@ -1087,6 +1087,7 @@ def apply_bar_timeseries_style(panel, *, stacked=False, fill_opacity=70):
panel["fieldConfig"]["defaults"]["custom"] = { panel["fieldConfig"]["defaults"]["custom"] = {
"drawStyle": "bars", "drawStyle": "bars",
"barAlignment": 0, "barAlignment": 0,
"barWidthFactor": 0.72,
"lineWidth": 0, "lineWidth": 0,
"fillOpacity": fill_opacity, "fillOpacity": fill_opacity,
"gradientMode": "none", "gradientMode": "none",
@ -1515,8 +1516,8 @@ DASHBOARD_LINK_TITLES = {
"atlas-storage": "Open Atlas Storage", "atlas-storage": "Open Atlas Storage",
"atlas-network": "Open Atlas Network", "atlas-network": "Open Atlas Network",
"atlas-mail": "Open Atlas Mail", "atlas-mail": "Open Atlas Mail",
"atlas-jobs": "Open Atlas Testing", "atlas-jobs": "Atlas Testing",
"atlas-testing": "Open Atlas Testing", "atlas-testing": "Atlas Testing",
"atlas-power": "Open Atlas Power", "atlas-power": "Open Atlas Power",
"atlas-gitops": "Open Atlas GitOps", "atlas-gitops": "Open Atlas GitOps",
"atlas-gpu": "Open Atlas GPU", "atlas-gpu": "Open Atlas GPU",
@ -1550,10 +1551,18 @@ def build_overview():
climate_temp_series = f"max without ({climate_drop_labels}) (typhon_temperature_celsius != 0)" climate_temp_series = f"max without ({climate_drop_labels}) (typhon_temperature_celsius != 0)"
climate_humidity_series = f"max without ({climate_drop_labels}) (typhon_relative_humidity_percent != 0)" climate_humidity_series = f"max without ({climate_drop_labels}) (typhon_relative_humidity_percent != 0)"
climate_pressure_series = f"max without ({climate_drop_labels}) (typhon_vpd_kpa != 0)" climate_pressure_series = f"max without ({climate_drop_labels}) (typhon_vpd_kpa != 0)"
overview_pvc_backup_metric_presence = (
'count({__name__=~"pvc_backup_(count|last_success_timestamp_seconds|health_reason)",driver="restic"})'
)
overview_pvc_backup_missing = (
'label_replace(label_replace(vector(999), "namespace", "maintenance", "__name__", ".*"), '
'"pvc", "backup-telemetry-missing", "__name__", ".*")'
)
overview_pvc_backup_age = ( overview_pvc_backup_age = (
'max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver="restic"}) / 3600) ' 'max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver="restic"}) / 3600) '
'or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver="restic",reason=~"missing|no_completed|lookup_failed|unknown_timestamp"} > 0) ' 'or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver="restic",reason=~"missing|no_completed|lookup_failed|unknown_timestamp"} > 0) '
'* (pvc_backup_count{driver="restic"} > bool 0)) * 999))) or on() vector(0)' f'* (pvc_backup_count{{driver="restic"}} > bool 0)) * 999))) or on() '
f'(({overview_pvc_backup_missing}) unless on() (({overview_pvc_backup_metric_presence}) > 0))'
) )
def overview_metric_pair_expr(first_expr, first_name, second_expr, second_name): def overview_metric_pair_expr(first_expr, first_name, second_expr, second_name):
@ -1822,6 +1831,28 @@ def build_overview():
{"color": dark_blue, "value": 100}, {"color": dark_blue, "value": 100},
], ],
} }
fan_intensity_thresholds = {
"mode": "absolute",
"steps": [
{"color": dark_blue, "value": None},
{"color": dark_green, "value": 3},
{"color": dark_yellow, "value": 6},
{"color": dark_orange, "value": 8},
{"color": dark_red, "value": 10},
],
}
fan_intensity_expr = (
f'label_replace(max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="1"}}), "fan", "Outlet", "__name__", ".*") '
f'or label_replace(max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="2"}}), "fan", "Inlet - Inside", "__name__", ".*") '
f'or label_replace(max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="3"}}), "fan", "Inlet - Outside", "__name__", ".*") '
f'or label_replace(max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="4"}}), "fan", "Interior", "__name__", ".*")'
)
gitops_health_history_expr = (
f'label_replace({GITOPS_KUSTOMIZATION_READY_PCT}, "signal", "Kustomizations Ready", "__name__", ".*") '
f'or label_replace({GITOPS_HELM_READY_PCT}, "signal", "HelmReleases Ready", "__name__", ".*") '
f'or label_replace({GITOPS_KUSTOMIZATION_NOT_SUSPENDED_PCT}, "signal", "Kustomizations Not Suspended", "__name__", ".*") '
f'or label_replace({GITOPS_HELM_NOT_SUSPENDED_PCT}, "signal", "HelmReleases Not Suspended", "__name__", ".*")'
)
compact_current_text = {"titleSize": 11, "valueSize": 20} compact_current_text = {"titleSize": 11, "valueSize": 20}
perfect_count_thresholds = { perfect_count_thresholds = {
"mode": "absolute", "mode": "absolute",
@ -1891,6 +1922,7 @@ def build_overview():
links=overview_link("atlas-power"), links=overview_link("atlas-power"),
), ),
stacked=False, stacked=False,
fill_opacity=55,
) )
) )
temp_panel = stat_panel( temp_panel = stat_panel(
@ -2049,25 +2081,22 @@ def build_overview():
"showPoints": "never", "showPoints": "never",
"spanNulls": True, "spanNulls": True,
} }
panels.append( fan_panel = state_timeline_panel(
timeseries_panel( 141,
141, "Fan Intensity History",
"Fan History (0-10)", fan_intensity_expr,
None, {"h": 6, "w": 6, "x": 9, "y": 13},
{"h": 6, "w": 6, "x": 9, "y": 13}, unit="none",
unit="none", min_value=0,
max_value=10, max_value=10,
targets=[ legend="{{fan}}",
{"refId": "A", "expr": f'max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="1"}})', "legendFormat": "Outlet"}, thresholds=fan_intensity_thresholds,
{"refId": "B", "expr": f'max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="2"}})', "legendFormat": "Inlet - Inside"}, links=overview_link("atlas-power"),
{"refId": "C", "expr": f'max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="3"}})', "legendFormat": "Inlet - Outside"}, description="Fan intensity lanes on the 0-10 controller scale. Cooler colors are quiet/low intensity; warmer colors mean the enclosure is pushing harder.",
{"refId": "D", "expr": f'max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="4"}})', "legendFormat": "Interior"},
],
legend_display="table",
legend_placement="right",
links=overview_link("atlas-power"),
)
) )
fan_panel["options"]["legend"] = {"displayMode": "table", "placement": "right"}
fan_panel["options"]["tooltip"] = {"mode": "multi", "sort": "none"}
panels.append(fan_panel)
flux_source = stat_panel( flux_source = stat_panel(
140, 140,
"Flux Source", "Flux Source",
@ -2118,24 +2147,18 @@ def build_overview():
rail_panel["options"]["text"] = {"titleSize": 10, "valueSize": 19} rail_panel["options"]["text"] = {"titleSize": 10, "valueSize": 19}
panels.append(rail_panel) panels.append(rail_panel)
panels.append( panels.append(
bargauge_panel( state_timeline_panel(
150, 150,
"GitOps Health", "GitOps Health",
( gitops_health_history_expr,
f'label_replace({GITOPS_KUSTOMIZATION_READY_PCT}, "signal", "Kustomizations Ready", "__name__", ".*") ' {"h": 6, "w": 6, "x": 15, "y": 7},
f'or label_replace({GITOPS_HELM_READY_PCT}, "signal", "HelmReleases Ready", "__name__", ".*") '
f'or label_replace({GITOPS_KUSTOMIZATION_NOT_SUSPENDED_PCT}, "signal", "Kustomizations Not Suspended", "__name__", ".*") '
f'or label_replace({GITOPS_HELM_NOT_SUSPENDED_PCT}, "signal", "HelmReleases Not Suspended", "__name__", ".*")'
),
{"h": 6, "w": 6, "x": 15, "y": 13},
unit="percent", unit="percent",
instant=True, min_value=0,
max_value=100,
legend="{{signal}}", legend="{{signal}}",
sort_order="asc",
thresholds=test_success_thresholds, thresholds=test_success_thresholds,
decimals=0,
links=overview_link("atlas-gitops"), links=overview_link("atlas-gitops"),
description="Compact GitOps health: readiness plus suspension health for Kustomizations and HelmReleases.", description="GitOps readiness and suspension health over time. Blue means perfect; warmer colors mean a readiness or suspension problem appeared.",
) )
) )
@ -2174,23 +2197,20 @@ def build_overview():
) )
panels.append(apply_bar_timeseries_style(ariadne_volume, stacked=False)) panels.append(apply_bar_timeseries_style(ariadne_volume, stacked=False))
panels.append( panels.append(
bargauge_panel( state_timeline_panel(
46, 46,
"Gate Checks Passing by Suite", "Gate Checks Passing by Suite",
PLATFORM_TEST_CURRENT_GATE_HEALTH_BY_SUITE, PLATFORM_TEST_CURRENT_GATE_HEALTH_BY_SUITE,
{"h": 6, "w": 6, "x": 15, "y": 7}, {"h": 6, "w": 6, "x": 15, "y": 13},
unit="percent", unit="percent",
instant=True, min_value=0,
max_value=100,
legend="{{suite}}", legend="{{suite}}",
sort_order="asc",
thresholds=test_success_thresholds, thresholds=test_success_thresholds,
decimals=0,
links=overview_link("atlas-testing"), links=overview_link("atlas-testing"),
description="Percent of current gate dimensions passing per suite over time. There are seven gate dimensions, so 85.7% means one gate is failing.",
) )
) )
panels[-1]["description"] = (
"Percent of current gate dimensions passing per suite. There are seven gate dimensions, so 85.7% means one gate is failing."
)
for panel_id, title, metric, x_pos, description in [ for panel_id, title, metric, x_pos, description in [
( (
142, 142,
@ -4773,7 +4793,7 @@ def build_power_dashboard():
panels.append( panels.append(
timeseries_panel( timeseries_panel(
6, 6,
"Fan History (0-10)", "Fan Intensity History",
None, None,
{"h": 8, "w": 12, "x": 12, "y": 16}, {"h": 8, "w": 12, "x": 12, "y": 16},
unit="none", unit="none",

View File

@ -68,19 +68,36 @@ def test_overview_uses_readable_quality_power_and_gitops_panels():
panels_by_title = {panel["title"]: panel for panel in flatten_panels(dashboard["panels"])} panels_by_title = {panel["title"]: panel for panel in flatten_panels(dashboard["panels"])}
assert dashboard["links"] == [ assert dashboard["links"] == [
{"title": "Open Atlas Testing", "url": "/d/atlas-testing", "targetBlank": True} {"title": "Atlas Testing", "url": "/d/atlas-testing", "targetBlank": True}
] ]
assert "atlas-jobs" not in repr(dashboard) assert "atlas-jobs" not in repr(dashboard)
assert "Platform Test Success Rate" not in panels_by_title assert "Platform Test Success Rate" not in panels_by_title
assert panels_by_title["Gate Checks Passing by Suite"]["type"] == "bargauge" assert panels_by_title["Gate Checks Passing by Suite"]["type"] == "state-timeline"
assert panels_by_title["Gate Checks Passing by Suite"]["options"]["displayMode"] == "basic" assert panels_by_title["Gate Checks Passing by Suite"]["gridPos"] == {"h": 6, "w": 6, "x": 15, "y": 13}
assert panels_by_title["Gate Checks Passing by Suite"]["gridPos"] == {"h": 6, "w": 6, "x": 15, "y": 7} assert panels_by_title["Gate Checks Passing by Suite"]["targets"][0]["legendFormat"] == "{{suite}}"
assert panels_by_title["UPS History (Power Draw)"]["gridPos"] == {"h": 6, "w": 6, "x": 3, "y": 7} assert panels_by_title["UPS History (Power Draw)"]["gridPos"] == {"h": 6, "w": 6, "x": 3, "y": 7}
assert panels_by_title["Ariadne Run Volume"]["gridPos"] == {"h": 6, "w": 6, "x": 9, "y": 7} assert panels_by_title["Ariadne Run Volume"]["gridPos"] == {"h": 6, "w": 6, "x": 9, "y": 7}
assert panels_by_title["Pyrphoros UPS Current"]["gridPos"]["w"] == 3 assert panels_by_title["Pyrphoros UPS Current"]["gridPos"]["w"] == 3
assert panels_by_title["Current Enclosure Climate"]["gridPos"]["w"] == 3 assert panels_by_title["Current Enclosure Climate"]["gridPos"]["w"] == 3
assert panels_by_title["UPS History (Power Draw)"]["fieldConfig"]["defaults"]["custom"]["drawStyle"] == "bars" assert panels_by_title["UPS History (Power Draw)"]["fieldConfig"]["defaults"]["custom"]["drawStyle"] == "bars"
assert panels_by_title["UPS History (Power Draw)"]["fieldConfig"]["defaults"]["custom"]["barWidthFactor"] == 0.72
ups_overrides = panels_by_title["UPS History (Power Draw)"]["fieldConfig"]["overrides"]
assert any(
override["matcher"]["options"] == "Pyrphoros"
and override["properties"][0]["value"] == {"mode": "fixed", "fixedColor": "dark-blue"}
for override in ups_overrides
)
assert any(
override["matcher"]["options"] == "Statera"
and override["properties"][0]["value"] == {"mode": "fixed", "fixedColor": "dark-yellow"}
for override in ups_overrides
)
assert panels_by_title["Ariadne Run Volume"]["fieldConfig"]["defaults"]["custom"]["drawStyle"] == "bars" assert panels_by_title["Ariadne Run Volume"]["fieldConfig"]["defaults"]["custom"]["drawStyle"] == "bars"
assert "Fan History (0-10)" not in panels_by_title
assert panels_by_title["Fan Intensity History"]["type"] == "state-timeline"
assert panels_by_title["Fan Intensity History"]["gridPos"] == {"h": 6, "w": 6, "x": 9, "y": 13}
assert panels_by_title["Fan Intensity History"]["fieldConfig"]["defaults"]["max"] == 10
assert panels_by_title["Fan Intensity History"]["targets"][0]["legendFormat"] == "{{fan}}"
assert panels_by_title["Flux Source"]["type"] == "stat" assert panels_by_title["Flux Source"]["type"] == "stat"
assert panels_by_title["Flux Source"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 7} assert panels_by_title["Flux Source"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 7}
@ -88,12 +105,16 @@ def test_overview_uses_readable_quality_power_and_gitops_panels():
assert panels_by_title["Run Reliability (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 9} assert panels_by_title["Run Reliability (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 9}
assert panels_by_title["Fresh Suites (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 13} assert panels_by_title["Fresh Suites (24h)"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 13}
assert panels_by_title["LOC Clean Suites"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 17} assert panels_by_title["LOC Clean Suites"]["gridPos"] == {"h": 2, "w": 3, "x": 21, "y": 17}
assert panels_by_title["GitOps Health"]["type"] == "bargauge" assert panels_by_title["GitOps Health"]["type"] == "state-timeline"
assert panels_by_title["GitOps Health"]["gridPos"] == {"h": 6, "w": 6, "x": 15, "y": 13} assert panels_by_title["GitOps Health"]["gridPos"] == {"h": 6, "w": 6, "x": 15, "y": 7}
gitops_expr = panels_by_title["GitOps Health"]["targets"][0]["expr"] gitops_expr = panels_by_title["GitOps Health"]["targets"][0]["expr"]
assert "Kustomizations Not Suspended" in gitops_expr assert "Kustomizations Not Suspended" in gitops_expr
assert "HelmReleases Not Suspended" in gitops_expr assert "HelmReleases Not Suspended" in gitops_expr
pvc_backup_expr = panels_by_title["PVC Backup Health / Age"]["targets"][0]["expr"]
assert "backup-telemetry-missing" in pvc_backup_expr
assert 'pvc_backup_(count|last_success_timestamp_seconds|health_reason)' in pvc_backup_expr
def test_render_configmap_writes(tmp_path): def test_render_configmap_writes(tmp_path):
mod = load_module() mod = load_module()

View File

@ -1300,8 +1300,9 @@
"custom": { "custom": {
"drawStyle": "bars", "drawStyle": "bars",
"barAlignment": 0, "barAlignment": 0,
"barWidthFactor": 0.72,
"lineWidth": 0, "lineWidth": 0,
"fillOpacity": 70, "fillOpacity": 55,
"gradientMode": "none", "gradientMode": "none",
"showPoints": "never", "showPoints": "never",
"spanNulls": true "spanNulls": true
@ -1862,8 +1863,9 @@
}, },
{ {
"id": 141, "id": 141,
"type": "timeseries", "type": "state-timeline",
"title": "Fan History (0-10)", "title": "Fan Intensity History",
"description": "Fan intensity lanes on the 0-10 controller scale. Cooler colors are quiet/low intensity; warmer colors mean the enclosure is pushing harder.",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -1876,40 +1878,62 @@
}, },
"targets": [ "targets": [
{ {
"expr": "label_replace(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"1\"}), \"fan\", \"Outlet\", \"__name__\", \".*\") or label_replace(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"2\"}), \"fan\", \"Inlet - Inside\", \"__name__\", \".*\") or label_replace(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"3\"}), \"fan\", \"Inlet - Outside\", \"__name__\", \".*\") or label_replace(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"4\"}), \"fan\", \"Interior\", \"__name__\", \".*\")",
"refId": "A", "refId": "A",
"expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"1\"})", "legendFormat": "{{fan}}"
"legendFormat": "Outlet"
},
{
"refId": "B",
"expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"2\"})",
"legendFormat": "Inlet - Inside"
},
{
"refId": "C",
"expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"3\"})",
"legendFormat": "Inlet - Outside"
},
{
"refId": "D",
"expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"4\"})",
"legendFormat": "Interior"
} }
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": {
"mode": "thresholds"
},
"unit": "none", "unit": "none",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "dark-blue",
"value": null
},
{
"color": "dark-green",
"value": 3
},
{
"color": "dark-yellow",
"value": 6
},
{
"color": "dark-orange",
"value": 8
},
{
"color": "dark-red",
"value": 10
}
]
},
"custom": {
"fillOpacity": 70,
"lineWidth": 0,
"spanNulls": true
},
"min": 0,
"max": 10 "max": 10
}, },
"overrides": [] "overrides": []
}, },
"options": { "options": {
"mergeValues": true,
"showValue": "never",
"legend": { "legend": {
"displayMode": "table", "displayMode": "table",
"placement": "right" "placement": "right"
}, },
"tooltip": { "tooltip": {
"mode": "multi" "mode": "multi",
"sort": "none"
} }
}, },
"links": [ "links": [
@ -2409,8 +2433,9 @@
}, },
{ {
"id": 150, "id": 150,
"type": "bargauge", "type": "state-timeline",
"title": "GitOps Health", "title": "GitOps Health",
"description": "GitOps readiness and suspension health over time. Blue means perfect; warmer colors mean a readiness or suspension problem appeared.",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -2419,14 +2444,13 @@
"h": 6, "h": 6,
"w": 6, "w": 6,
"x": 15, "x": 15,
"y": 13 "y": 7
}, },
"targets": [ "targets": [
{ {
"expr": "sort(label_replace(100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})), 1), \"signal\", \"Kustomizations Ready\", \"__name__\", \".*\") or label_replace(100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})), 1), \"signal\", \"HelmReleases Ready\", \"__name__\", \".*\") or label_replace(100 * (1 - (sum(max by (namespace, name) (ananke_gitops_kustomization_suspended{job=\"ananke-power\"})) or on() vector(0)) / clamp_min((count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) or on() vector(0)), 1)), \"signal\", \"Kustomizations Not Suspended\", \"__name__\", \".*\") or label_replace(100 * (1 - (sum(max by (namespace, name) (ananke_gitops_helmrelease_suspended{job=\"ananke-power\"})) or on() vector(0)) / clamp_min((count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) or on() vector(0)), 1)), \"signal\", \"HelmReleases Not Suspended\", \"__name__\", \".*\"))", "expr": "label_replace(100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})), 1), \"signal\", \"Kustomizations Ready\", \"__name__\", \".*\") or label_replace(100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})), 1), \"signal\", \"HelmReleases Ready\", \"__name__\", \".*\") or label_replace(100 * (1 - (sum(max by (namespace, name) (ananke_gitops_kustomization_suspended{job=\"ananke-power\"})) or on() vector(0)) / clamp_min((count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) or on() vector(0)), 1)), \"signal\", \"Kustomizations Not Suspended\", \"__name__\", \".*\") or label_replace(100 * (1 - (sum(max by (namespace, name) (ananke_gitops_helmrelease_suspended{job=\"ananke-power\"})) or on() vector(0)) / clamp_min((count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) or on() vector(0)), 1)), \"signal\", \"HelmReleases Not Suspended\", \"__name__\", \".*\")",
"refId": "A", "refId": "A",
"legendFormat": "{{signal}}", "legendFormat": "{{signal}}"
"instant": true
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -2435,8 +2459,6 @@
"mode": "thresholds" "mode": "thresholds"
}, },
"unit": "percent", "unit": "percent",
"min": 0,
"max": 100,
"thresholds": { "thresholds": {
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
@ -2462,19 +2484,26 @@
} }
] ]
}, },
"decimals": 0 "custom": {
"fillOpacity": 70,
"lineWidth": 0,
"spanNulls": true
},
"min": 0,
"max": 100
}, },
"overrides": [] "overrides": []
}, },
"options": { "options": {
"displayMode": "basic", "mergeValues": true,
"orientation": "horizontal", "showValue": "never",
"reduceOptions": { "legend": {
"calcs": [ "displayMode": "list",
"lastNotNull" "placement": "bottom"
], },
"fields": "", "tooltip": {
"values": false "mode": "single",
"sort": "none"
} }
}, },
"links": [ "links": [
@ -2483,18 +2512,6 @@
"url": "/d/atlas-gitops", "url": "/d/atlas-gitops",
"targetBlank": true "targetBlank": true
} }
],
"description": "Compact GitOps health: readiness plus suspension health for Kustomizations and HelmReleases.",
"transformations": [
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "asc"
}
}
] ]
}, },
{ {
@ -2617,6 +2634,7 @@
"custom": { "custom": {
"drawStyle": "bars", "drawStyle": "bars",
"barAlignment": 0, "barAlignment": 0,
"barWidthFactor": 0.72,
"lineWidth": 0, "lineWidth": 0,
"fillOpacity": 70, "fillOpacity": 70,
"gradientMode": "none", "gradientMode": "none",
@ -2676,8 +2694,9 @@
}, },
{ {
"id": 46, "id": 46,
"type": "bargauge", "type": "state-timeline",
"title": "Gate Checks Passing by Suite", "title": "Gate Checks Passing by Suite",
"description": "Percent of current gate dimensions passing per suite over time. There are seven gate dimensions, so 85.7% means one gate is failing.",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -2686,14 +2705,13 @@
"h": 6, "h": 6,
"w": 6, "w": 6,
"x": 15, "x": 15,
"y": 7 "y": 13
}, },
"targets": [ "targets": [
{ {
"expr": "sort((100 * sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"ariadne|metis|ananke|atlasbot|pegasus|soteria|titan_iac|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))) / clamp_min(sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"ariadne|metis|ananke|atlasbot|pegasus|soteria|titan_iac|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\"} > bool 0))), 1)))", "expr": "(100 * sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"ariadne|metis|ananke|atlasbot|pegasus|soteria|titan_iac|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))) / clamp_min(sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"ariadne|metis|ananke|atlasbot|pegasus|soteria|titan_iac|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\"} > bool 0))), 1))",
"refId": "A", "refId": "A",
"legendFormat": "{{suite}}", "legendFormat": "{{suite}}"
"instant": true
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -2702,8 +2720,6 @@
"mode": "thresholds" "mode": "thresholds"
}, },
"unit": "percent", "unit": "percent",
"min": 0,
"max": 100,
"thresholds": { "thresholds": {
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
@ -2729,19 +2745,26 @@
} }
] ]
}, },
"decimals": 0 "custom": {
"fillOpacity": 70,
"lineWidth": 0,
"spanNulls": true
},
"min": 0,
"max": 100
}, },
"overrides": [] "overrides": []
}, },
"options": { "options": {
"displayMode": "basic", "mergeValues": true,
"orientation": "horizontal", "showValue": "never",
"reduceOptions": { "legend": {
"calcs": [ "displayMode": "list",
"lastNotNull" "placement": "bottom"
], },
"fields": "", "tooltip": {
"values": false "mode": "single",
"sort": "none"
} }
}, },
"links": [ "links": [
@ -2750,19 +2773,7 @@
"url": "/d/atlas-testing", "url": "/d/atlas-testing",
"targetBlank": true "targetBlank": true
} }
], ]
"transformations": [
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "asc"
}
}
],
"description": "Percent of current gate dimensions passing per suite. There are seven gate dimensions, so 85.7% means one gate is failing."
}, },
{ {
"id": 142, "id": 142,
@ -2994,7 +3005,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "sort_desc(max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver=\"restic\"}) / 3600) or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver=\"restic\",reason=~\"missing|no_completed|lookup_failed|unknown_timestamp\"} > 0) * (pvc_backup_count{driver=\"restic\"} > bool 0)) * 999))) or on() vector(0))", "expr": "sort_desc(max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver=\"restic\"}) / 3600) or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver=\"restic\",reason=~\"missing|no_completed|lookup_failed|unknown_timestamp\"} > 0) * (pvc_backup_count{driver=\"restic\"} > bool 0)) * 999))) or on() ((label_replace(label_replace(vector(999), \"namespace\", \"maintenance\", \"__name__\", \".*\"), \"pvc\", \"backup-telemetry-missing\", \"__name__\", \".*\")) unless on() ((count({__name__=~\"pvc_backup_(count|last_success_timestamp_seconds|health_reason)\",driver=\"restic\"})) > 0)))",
"refId": "A", "refId": "A",
"legendFormat": "{{namespace}}/{{pvc}}", "legendFormat": "{{namespace}}/{{pvc}}",
"instant": true "instant": true
@ -4365,7 +4376,7 @@
"refresh": "1m", "refresh": "1m",
"links": [ "links": [
{ {
"title": "Open Atlas Testing", "title": "Atlas Testing",
"url": "/d/atlas-testing", "url": "/d/atlas-testing",
"targetBlank": true "targetBlank": true
} }

View File

@ -253,6 +253,7 @@
"custom": { "custom": {
"drawStyle": "bars", "drawStyle": "bars",
"barAlignment": 0, "barAlignment": 0,
"barWidthFactor": 0.72,
"lineWidth": 0, "lineWidth": 0,
"fillOpacity": 70, "fillOpacity": 70,
"gradientMode": "none", "gradientMode": "none",
@ -618,7 +619,7 @@
{ {
"id": 6, "id": 6,
"type": "timeseries", "type": "timeseries",
"title": "Fan History (0-10)", "title": "Fan Intensity History",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"

View File

@ -1309,8 +1309,9 @@ data:
"custom": { "custom": {
"drawStyle": "bars", "drawStyle": "bars",
"barAlignment": 0, "barAlignment": 0,
"barWidthFactor": 0.72,
"lineWidth": 0, "lineWidth": 0,
"fillOpacity": 70, "fillOpacity": 55,
"gradientMode": "none", "gradientMode": "none",
"showPoints": "never", "showPoints": "never",
"spanNulls": true "spanNulls": true
@ -1871,8 +1872,9 @@ data:
}, },
{ {
"id": 141, "id": 141,
"type": "timeseries", "type": "state-timeline",
"title": "Fan History (0-10)", "title": "Fan Intensity History",
"description": "Fan intensity lanes on the 0-10 controller scale. Cooler colors are quiet/low intensity; warmer colors mean the enclosure is pushing harder.",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -1885,40 +1887,62 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "label_replace(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"1\"}), \"fan\", \"Outlet\", \"__name__\", \".*\") or label_replace(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"2\"}), \"fan\", \"Inlet - Inside\", \"__name__\", \".*\") or label_replace(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"3\"}), \"fan\", \"Inlet - Outside\", \"__name__\", \".*\") or label_replace(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"4\"}), \"fan\", \"Interior\", \"__name__\", \".*\")",
"refId": "A", "refId": "A",
"expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"1\"})", "legendFormat": "{{fan}}"
"legendFormat": "Outlet"
},
{
"refId": "B",
"expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"2\"})",
"legendFormat": "Inlet - Inside"
},
{
"refId": "C",
"expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"3\"})",
"legendFormat": "Inlet - Outside"
},
{
"refId": "D",
"expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"4\"})",
"legendFormat": "Interior"
} }
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": {
"mode": "thresholds"
},
"unit": "none", "unit": "none",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "dark-blue",
"value": null
},
{
"color": "dark-green",
"value": 3
},
{
"color": "dark-yellow",
"value": 6
},
{
"color": "dark-orange",
"value": 8
},
{
"color": "dark-red",
"value": 10
}
]
},
"custom": {
"fillOpacity": 70,
"lineWidth": 0,
"spanNulls": true
},
"min": 0,
"max": 10 "max": 10
}, },
"overrides": [] "overrides": []
}, },
"options": { "options": {
"mergeValues": true,
"showValue": "never",
"legend": { "legend": {
"displayMode": "table", "displayMode": "table",
"placement": "right" "placement": "right"
}, },
"tooltip": { "tooltip": {
"mode": "multi" "mode": "multi",
"sort": "none"
} }
}, },
"links": [ "links": [
@ -2418,8 +2442,9 @@ data:
}, },
{ {
"id": 150, "id": 150,
"type": "bargauge", "type": "state-timeline",
"title": "GitOps Health", "title": "GitOps Health",
"description": "GitOps readiness and suspension health over time. Blue means perfect; warmer colors mean a readiness or suspension problem appeared.",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -2428,14 +2453,13 @@ data:
"h": 6, "h": 6,
"w": 6, "w": 6,
"x": 15, "x": 15,
"y": 13 "y": 7
}, },
"targets": [ "targets": [
{ {
"expr": "sort(label_replace(100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})), 1), \"signal\", \"Kustomizations Ready\", \"__name__\", \".*\") or label_replace(100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})), 1), \"signal\", \"HelmReleases Ready\", \"__name__\", \".*\") or label_replace(100 * (1 - (sum(max by (namespace, name) (ananke_gitops_kustomization_suspended{job=\"ananke-power\"})) or on() vector(0)) / clamp_min((count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) or on() vector(0)), 1)), \"signal\", \"Kustomizations Not Suspended\", \"__name__\", \".*\") or label_replace(100 * (1 - (sum(max by (namespace, name) (ananke_gitops_helmrelease_suspended{job=\"ananke-power\"})) or on() vector(0)) / clamp_min((count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) or on() vector(0)), 1)), \"signal\", \"HelmReleases Not Suspended\", \"__name__\", \".*\"))", "expr": "label_replace(100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})), 1), \"signal\", \"Kustomizations Ready\", \"__name__\", \".*\") or label_replace(100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})), 1), \"signal\", \"HelmReleases Ready\", \"__name__\", \".*\") or label_replace(100 * (1 - (sum(max by (namespace, name) (ananke_gitops_kustomization_suspended{job=\"ananke-power\"})) or on() vector(0)) / clamp_min((count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) or on() vector(0)), 1)), \"signal\", \"Kustomizations Not Suspended\", \"__name__\", \".*\") or label_replace(100 * (1 - (sum(max by (namespace, name) (ananke_gitops_helmrelease_suspended{job=\"ananke-power\"})) or on() vector(0)) / clamp_min((count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) or on() vector(0)), 1)), \"signal\", \"HelmReleases Not Suspended\", \"__name__\", \".*\")",
"refId": "A", "refId": "A",
"legendFormat": "{{signal}}", "legendFormat": "{{signal}}"
"instant": true
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -2444,8 +2468,6 @@ data:
"mode": "thresholds" "mode": "thresholds"
}, },
"unit": "percent", "unit": "percent",
"min": 0,
"max": 100,
"thresholds": { "thresholds": {
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
@ -2471,19 +2493,26 @@ data:
} }
] ]
}, },
"decimals": 0 "custom": {
"fillOpacity": 70,
"lineWidth": 0,
"spanNulls": true
},
"min": 0,
"max": 100
}, },
"overrides": [] "overrides": []
}, },
"options": { "options": {
"displayMode": "basic", "mergeValues": true,
"orientation": "horizontal", "showValue": "never",
"reduceOptions": { "legend": {
"calcs": [ "displayMode": "list",
"lastNotNull" "placement": "bottom"
], },
"fields": "", "tooltip": {
"values": false "mode": "single",
"sort": "none"
} }
}, },
"links": [ "links": [
@ -2492,18 +2521,6 @@ data:
"url": "/d/atlas-gitops", "url": "/d/atlas-gitops",
"targetBlank": true "targetBlank": true
} }
],
"description": "Compact GitOps health: readiness plus suspension health for Kustomizations and HelmReleases.",
"transformations": [
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "asc"
}
}
] ]
}, },
{ {
@ -2626,6 +2643,7 @@ data:
"custom": { "custom": {
"drawStyle": "bars", "drawStyle": "bars",
"barAlignment": 0, "barAlignment": 0,
"barWidthFactor": 0.72,
"lineWidth": 0, "lineWidth": 0,
"fillOpacity": 70, "fillOpacity": 70,
"gradientMode": "none", "gradientMode": "none",
@ -2685,8 +2703,9 @@ data:
}, },
{ {
"id": 46, "id": 46,
"type": "bargauge", "type": "state-timeline",
"title": "Gate Checks Passing by Suite", "title": "Gate Checks Passing by Suite",
"description": "Percent of current gate dimensions passing per suite over time. There are seven gate dimensions, so 85.7% means one gate is failing.",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -2695,14 +2714,13 @@ data:
"h": 6, "h": 6,
"w": 6, "w": 6,
"x": 15, "x": 15,
"y": 7 "y": 13
}, },
"targets": [ "targets": [
{ {
"expr": "sort((100 * sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"ariadne|metis|ananke|atlasbot|pegasus|soteria|titan_iac|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))) / clamp_min(sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"ariadne|metis|ananke|atlasbot|pegasus|soteria|titan_iac|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\"} > bool 0))), 1)))", "expr": "(100 * sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"ariadne|metis|ananke|atlasbot|pegasus|soteria|titan_iac|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))) / clamp_min(sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"ariadne|metis|ananke|atlasbot|pegasus|soteria|titan_iac|bstein_home|data_prepper\",exported_job=\"platform-quality-ci\"} > bool 0))), 1))",
"refId": "A", "refId": "A",
"legendFormat": "{{suite}}", "legendFormat": "{{suite}}"
"instant": true
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -2711,8 +2729,6 @@ data:
"mode": "thresholds" "mode": "thresholds"
}, },
"unit": "percent", "unit": "percent",
"min": 0,
"max": 100,
"thresholds": { "thresholds": {
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
@ -2738,19 +2754,26 @@ data:
} }
] ]
}, },
"decimals": 0 "custom": {
"fillOpacity": 70,
"lineWidth": 0,
"spanNulls": true
},
"min": 0,
"max": 100
}, },
"overrides": [] "overrides": []
}, },
"options": { "options": {
"displayMode": "basic", "mergeValues": true,
"orientation": "horizontal", "showValue": "never",
"reduceOptions": { "legend": {
"calcs": [ "displayMode": "list",
"lastNotNull" "placement": "bottom"
], },
"fields": "", "tooltip": {
"values": false "mode": "single",
"sort": "none"
} }
}, },
"links": [ "links": [
@ -2759,19 +2782,7 @@ data:
"url": "/d/atlas-testing", "url": "/d/atlas-testing",
"targetBlank": true "targetBlank": true
} }
], ]
"transformations": [
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "asc"
}
}
],
"description": "Percent of current gate dimensions passing per suite. There are seven gate dimensions, so 85.7% means one gate is failing."
}, },
{ {
"id": 142, "id": 142,
@ -3003,7 +3014,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "sort_desc(max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver=\"restic\"}) / 3600) or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver=\"restic\",reason=~\"missing|no_completed|lookup_failed|unknown_timestamp\"} > 0) * (pvc_backup_count{driver=\"restic\"} > bool 0)) * 999))) or on() vector(0))", "expr": "sort_desc(max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver=\"restic\"}) / 3600) or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver=\"restic\",reason=~\"missing|no_completed|lookup_failed|unknown_timestamp\"} > 0) * (pvc_backup_count{driver=\"restic\"} > bool 0)) * 999))) or on() ((label_replace(label_replace(vector(999), \"namespace\", \"maintenance\", \"__name__\", \".*\"), \"pvc\", \"backup-telemetry-missing\", \"__name__\", \".*\")) unless on() ((count({__name__=~\"pvc_backup_(count|last_success_timestamp_seconds|health_reason)\",driver=\"restic\"})) > 0)))",
"refId": "A", "refId": "A",
"legendFormat": "{{namespace}}/{{pvc}}", "legendFormat": "{{namespace}}/{{pvc}}",
"instant": true "instant": true
@ -4374,7 +4385,7 @@ data:
"refresh": "1m", "refresh": "1m",
"links": [ "links": [
{ {
"title": "Open Atlas Testing", "title": "Atlas Testing",
"url": "/d/atlas-testing", "url": "/d/atlas-testing",
"targetBlank": true "targetBlank": true
} }

View File

@ -262,6 +262,7 @@ data:
"custom": { "custom": {
"drawStyle": "bars", "drawStyle": "bars",
"barAlignment": 0, "barAlignment": 0,
"barWidthFactor": 0.72,
"lineWidth": 0, "lineWidth": 0,
"fillOpacity": 70, "fillOpacity": 70,
"gradientMode": "none", "gradientMode": "none",
@ -627,7 +628,7 @@ data:
{ {
"id": 6, "id": 6,
"type": "timeseries", "type": "timeseries",
"title": "Fan History (0-10)", "title": "Fan Intensity History",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"