diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index e732523b..98e18aca 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -624,6 +624,28 @@ ANANKE_UPS_RUNTIME_HEADROOM_PERCENT = ( f"clamp_min(max(ananke_ups_threshold_seconds{{{ANANKE_SELECTOR}}}), 1)" ) ANANKE_UPS_TRIGGER_COUNT_1D = f"increase(ananke_shutdown_triggers_total{{{ANANKE_SELECTOR}}}[1d]) or on() vector(0)" +GITOPS_SELECTOR = ANANKE_SELECTOR +GITOPS_SOURCE_INFO = ( + f'max by (branch, revision) (ananke_gitops_flux_source_info{{{GITOPS_SELECTOR},namespace="flux-system",name="flux-system"}})' +) +GITOPS_KUSTOMIZATION_READY_PCT = ( + f"100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{{{GITOPS_SELECTOR}}})) " + f"/ clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{{{GITOPS_SELECTOR}}})), 1)" +) +GITOPS_KUSTOMIZATION_SUSPENDED = ( + f"sum(max by (namespace, name) (ananke_gitops_kustomization_suspended{{{GITOPS_SELECTOR}}})) or on() vector(0)" +) +GITOPS_HELM_READY_PCT = ( + f"100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{{{GITOPS_SELECTOR}}})) " + f"/ clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{{{GITOPS_SELECTOR}}})), 1)" +) +GITOPS_HELM_SUSPENDED = ( + f"sum(max by (namespace, name) (ananke_gitops_helmrelease_suspended{{{GITOPS_SELECTOR}}})) or on() vector(0)" +) +GITOPS_SCRAPE_SUCCESS = f"min(ananke_gitops_scrape_success{{{GITOPS_SELECTOR}}}) or on() vector(0)" +GITOPS_LAST_SCRAPE_AGE = ( + f"(time() - max(ananke_gitops_last_scrape_timestamp_seconds{{{GITOPS_SELECTOR}}})) or on() vector(0)" +) ANANKE_UPS_RUNTIME_DB = ( f'max(ananke_ups_runtime_seconds{{{ANANKE_UPS_DB_SELECTOR}}}) or on() vector(0)' ) @@ -973,6 +995,7 @@ def table_panel( footer=None, format=None, description=None, + field_overrides=None, ): """Return a Grafana table panel definition.""" # Optional PromQL subquery helpers in expr: share(), etc. @@ -992,7 +1015,7 @@ def table_panel( "datasource": PROM_DS, "gridPos": grid, "targets": [target], - "fieldConfig": {"defaults": field_defaults, "overrides": []}, + "fieldConfig": {"defaults": field_defaults, "overrides": field_overrides or []}, "options": panel_options, } if transformations: @@ -1360,6 +1383,7 @@ DASHBOARD_LINK_TITLES = { "atlas-mail": "Open Atlas Mail", "atlas-jobs": "Open Atlas Testing", "atlas-power": "Open Atlas Power", + "atlas-gitops": "Open Atlas GitOps", "atlas-gpu": "Open Atlas GPU", } @@ -1849,47 +1873,12 @@ def build_overview(): "showPoints": "never", "spanNulls": True, } - fan_current = ( - f'label_replace((round(max({climate_drop_labels and "max without (" + climate_drop_labels + ")"} (typhon_fan_speed_level{{port="1"}})) or on() vector(0))), "metric", "Outlet", "__name__", ".*") ' - f'or label_replace((round(max({"max without (" + climate_drop_labels + ")"} (typhon_fan_speed_level{{port="2"}})) or on() vector(0))), "metric", "Inlet - In", "__name__", ".*") ' - f'or label_replace((round(max({"max without (" + climate_drop_labels + ")"} (typhon_fan_speed_level{{port="3"}})) or on() vector(0))), "metric", "Inlet - Out", "__name__", ".*") ' - f'or label_replace((round(max({"max without (" + climate_drop_labels + ")"} (typhon_fan_speed_level{{port="4"}})) or on() vector(0))), "metric", "Interior", "__name__", ".*")' - ) - fan_panel = stat_panel( - 140, - "Fan Activity", - None, - {"h": 6, "w": 6, "x": 12, "y": 13}, - unit="none", - decimals=0, - text_mode="name_and_value", - targets=[{"expr": fan_current, "refId": "A", "legendFormat": "{{metric}}", "instant": True}], - field_overrides=[ - {"matcher": {"id": "byName", "options": "Outlet"}, "properties": [{"id": "decimals", "value": 0}]}, - {"matcher": {"id": "byName", "options": "Inlet - In"}, "properties": [{"id": "decimals", "value": 0}]}, - {"matcher": {"id": "byName", "options": "Inlet - Out"}, "properties": [{"id": "decimals", "value": 0}]}, - {"matcher": {"id": "byName", "options": "Interior"}, "properties": [{"id": "decimals", "value": 0}]}, - ], - thresholds={ - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 7}, - {"color": "red", "value": 9}, - ], - }, - orientation="vertical", - wide_layout=False, - links=overview_link("atlas-power"), - ) - fan_panel["options"]["text"] = {"valueSize": 26} - panels.append(fan_panel) panels.append( timeseries_panel( 141, "Fan History (0-10)", None, - {"h": 6, "w": 6, "x": 18, "y": 13}, + {"h": 6, "w": 6, "x": 12, "y": 13}, unit="none", max_value=10, targets=[ @@ -1903,6 +1892,77 @@ def build_overview(): links=overview_link("atlas-power"), ) ) + gitops_panel = stat_panel( + 140, + "GitOps Status", + None, + {"h": 6, "w": 6, "x": 18, "y": 13}, + unit="none", + text_mode="name_and_value", + decimals=0, + targets=[ + {"expr": f"{GITOPS_SOURCE_INFO} or on() vector(0)", "refId": "A", "legendFormat": "Flux {{branch}} · {{revision}}", "instant": True}, + {"expr": GITOPS_KUSTOMIZATION_READY_PCT, "refId": "B", "legendFormat": "Kustomizations Ready %", "instant": True}, + {"expr": GITOPS_KUSTOMIZATION_SUSPENDED, "refId": "C", "legendFormat": "Kustomizations Suspended", "instant": True}, + {"expr": GITOPS_HELM_READY_PCT, "refId": "D", "legendFormat": "Helm Ready %", "instant": True}, + {"expr": GITOPS_HELM_SUSPENDED, "refId": "E", "legendFormat": "Helm Suspended", "instant": True}, + ], + field_overrides=[ + { + "matcher": {"id": "byRegexp", "options": ".*Ready %"}, + "properties": [ + {"id": "unit", "value": "percent"}, + {"id": "decimals", "value": 1}, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "yellow", "value": 99}, + {"color": "blue", "value": 100}, + ], + }, + }, + ], + }, + { + "matcher": {"id": "byRegexp", "options": ".*Suspended"}, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + {"color": "blue", "value": None}, + {"color": "red", "value": 1}, + ], + }, + } + ], + }, + { + "matcher": {"id": "byRegexp", "options": "Flux .*"}, + "properties": [ + {"id": "color", "value": {"mode": "fixed", "fixedColor": "gray"}}, + {"id": "decimals", "value": 0}, + ], + }, + ], + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "blue", "value": 100}, + ], + }, + orientation="vertical", + wide_layout=False, + links=overview_link("atlas-gitops"), + description="Flux branch/revision plus compact readiness and suspended counts from Ananke's GitOps object-state exporter.", + ) + gitops_panel["options"]["text"] = {"valueSize": 18} + panels.append(gitops_panel) panels.append( bargauge_panel( @@ -4137,6 +4197,215 @@ def build_testing_dashboard(): return dashboard +def build_gitops_dashboard(): + gitops_value_overrides = [ + { + "matcher": {"id": "byName", "options": "Value"}, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "blue", "value": 1}, + ], + }, + } + ], + } + ] + kustomization_table = ( + f"max by (namespace, name, path, source_namespace, source_name, revision, ready, reason) " + f"(ananke_gitops_kustomization_info{{{GITOPS_SELECTOR}}}) " + f"* on(namespace, name) group_left() max by (namespace, name) " + f"(ananke_gitops_kustomization_ready{{{GITOPS_SELECTOR}}})" + ) + helm_table = ( + f"max by (namespace, name, chart, version, app_version, revision, ready, reason) " + f"(ananke_gitops_helmrelease_info{{{GITOPS_SELECTOR}}}) " + f"* on(namespace, name) group_left() max by (namespace, name) " + f"(ananke_gitops_helmrelease_ready{{{GITOPS_SELECTOR}}})" + ) + source_table = ( + f"max by (namespace, name, url, branch, revision, ready, reason) " + f"(ananke_gitops_flux_source_info{{{GITOPS_SELECTOR}}}) " + f"* on(namespace, name) group_left() max by (namespace, name) " + f"(ananke_gitops_flux_source_ready{{{GITOPS_SELECTOR}}})" + ) + + panels = [ + stat_panel( + 1, + "Flux Source", + f"{GITOPS_SOURCE_INFO} or on() vector(0)", + {"h": 4, "w": 8, "x": 0, "y": 0}, + text_mode="name", + targets=[{"expr": f"{GITOPS_SOURCE_INFO} or on() vector(0)", "refId": "A", "legendFormat": "{{branch}} · {{revision}}", "instant": True}], + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "blue", "value": 1}, + ], + }, + description="Branch and revision currently reported by Flux's GitRepository source.", + ), + stat_panel( + 2, + "Kustomizations Ready", + GITOPS_KUSTOMIZATION_READY_PCT, + {"h": 4, "w": 4, "x": 8, "y": 0}, + unit="percent", + decimals=1, + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "yellow", "value": 99}, + {"color": "blue", "value": 100}, + ], + }, + ), + stat_panel( + 3, + "Kustomizations Suspended", + GITOPS_KUSTOMIZATION_SUSPENDED, + {"h": 4, "w": 4, "x": 12, "y": 0}, + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "blue", "value": None}, + {"color": "red", "value": 1}, + ], + }, + ), + stat_panel( + 4, + "HelmReleases Ready", + GITOPS_HELM_READY_PCT, + {"h": 4, "w": 4, "x": 16, "y": 0}, + unit="percent", + decimals=1, + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "yellow", "value": 99}, + {"color": "blue", "value": 100}, + ], + }, + ), + stat_panel( + 5, + "HelmReleases Suspended", + GITOPS_HELM_SUSPENDED, + {"h": 4, "w": 4, "x": 20, "y": 0}, + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "blue", "value": None}, + {"color": "red", "value": 1}, + ], + }, + ), + stat_panel( + 6, + "GitOps Exporter", + None, + {"h": 4, "w": 8, "x": 0, "y": 4}, + text_mode="name_and_value", + targets=[ + {"expr": GITOPS_SCRAPE_SUCCESS, "refId": "A", "legendFormat": "Scrape Success", "instant": True}, + {"expr": GITOPS_LAST_SCRAPE_AGE, "refId": "B", "legendFormat": "Sample Age", "instant": True}, + ], + field_overrides=[ + {"matcher": {"id": "byName", "options": "Sample Age"}, "properties": [{"id": "unit", "value": "s"}]}, + { + "matcher": {"id": "byName", "options": "Scrape Success"}, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "blue", "value": 1}, + ], + }, + } + ], + }, + ], + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "blue", "value": 1}, + ], + }, + ), + timeseries_panel( + 7, + "Readiness History", + None, + {"h": 4, "w": 16, "x": 8, "y": 4}, + unit="percent", + targets=[ + {"expr": GITOPS_KUSTOMIZATION_READY_PCT, "refId": "A", "legendFormat": "Kustomizations"}, + {"expr": GITOPS_HELM_READY_PCT, "refId": "B", "legendFormat": "HelmReleases"}, + ], + legend_display="table", + legend_placement="right", + ), + table_panel( + 8, + "Flux Sources", + source_table, + {"h": 8, "w": 24, "x": 0, "y": 8}, + instant=True, + format="table", + transformations=[{"id": "labelsToFields", "options": {}}], + field_overrides=gitops_value_overrides, + description="A Value of 1 means Ready; 0 means not Ready.", + ), + table_panel( + 9, + "Kustomizations", + kustomization_table, + {"h": 12, "w": 24, "x": 0, "y": 16}, + instant=True, + format="table", + transformations=[{"id": "labelsToFields", "options": {}}], + field_overrides=gitops_value_overrides, + description="A Value of 1 means Ready; 0 means not Ready. The ready/reason labels come from Flux status.conditions.", + ), + table_panel( + 10, + "HelmReleases", + helm_table, + {"h": 12, "w": 24, "x": 0, "y": 28}, + instant=True, + format="table", + transformations=[{"id": "labelsToFields", "options": {}}], + field_overrides=gitops_value_overrides, + description="A Value of 1 means Ready; 0 means not Ready. Chart/version/app_version are included when Flux reports them.", + ), + ] + return { + "uid": "atlas-gitops", + "title": "Atlas GitOps", + "folderUid": PRIVATE_FOLDER, + "editable": True, + "panels": panels, + "time": {"from": "now-12h", "to": "now"}, + "annotations": {"list": []}, + "schemaVersion": 39, + "style": "dark", + "tags": ["atlas", "gitops", "flux"], + } + + def build_power_dashboard(): panels = [] status_mapping = [ @@ -4439,6 +4708,10 @@ DASHBOARDS = { "builder": build_testing_dashboard, "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-testing.yaml", }, + "atlas-gitops": { + "builder": build_gitops_dashboard, + "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gitops.yaml", + }, "atlas-power": { "builder": build_power_dashboard, "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-power.yaml", diff --git a/services/monitoring/dashboards/atlas-gitops.json b/services/monitoring/dashboards/atlas-gitops.json new file mode 100644 index 00000000..13391827 --- /dev/null +++ b/services/monitoring/dashboards/atlas-gitops.json @@ -0,0 +1,686 @@ +{ + "uid": "atlas-gitops", + "title": "Atlas GitOps", + "folderUid": "atlas-internal", + "editable": true, + "panels": [ + { + "id": 1, + "type": "stat", + "title": "Flux Source", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 0 + }, + "targets": [ + { + "expr": "max by (branch, revision) (ananke_gitops_flux_source_info{job=\"ananke-power\",namespace=\"flux-system\",name=\"flux-system\"}) or on() vector(0)", + "refId": "A", + "legendFormat": "{{branch}} \u00b7 {{revision}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name" + }, + "description": "Branch and revision currently reported by Flux's GitRepository source." + }, + { + "id": 2, + "type": "stat", + "title": "Kustomizations Ready", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 0 + }, + "targets": [ + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})), 1)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 99 + }, + { + "color": "blue", + "value": 100 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 3, + "type": "stat", + "title": "Kustomizations Suspended", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 0 + }, + "targets": [ + { + "expr": "sum(max by (namespace, name) (ananke_gitops_kustomization_suspended{job=\"ananke-power\"})) or on() vector(0)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 4, + "type": "stat", + "title": "HelmReleases Ready", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 0 + }, + "targets": [ + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})), 1)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 99 + }, + { + "color": "blue", + "value": 100 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 5, + "type": "stat", + "title": "HelmReleases Suspended", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 0 + }, + "targets": [ + { + "expr": "sum(max by (namespace, name) (ananke_gitops_helmrelease_suspended{job=\"ananke-power\"})) or on() vector(0)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 6, + "type": "stat", + "title": "GitOps Exporter", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 4 + }, + "targets": [ + { + "expr": "min(ananke_gitops_scrape_success{job=\"ananke-power\"}) or on() vector(0)", + "refId": "A", + "legendFormat": "Scrape Success", + "instant": true + }, + { + "expr": "(time() - max(ananke_gitops_last_scrape_timestamp_seconds{job=\"ananke-power\"})) or on() vector(0)", + "refId": "B", + "legendFormat": "Sample Age", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Sample Age" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Scrape Success" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value" + } + }, + { + "id": 7, + "type": "timeseries", + "title": "Readiness History", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 16, + "x": 8, + "y": 4 + }, + "targets": [ + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})), 1)", + "refId": "A", + "legendFormat": "Kustomizations" + }, + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})), 1)", + "refId": "B", + "legendFormat": "HelmReleases" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 8, + "type": "table", + "title": "Flux Sources", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "targets": [ + { + "expr": "max by (namespace, name, url, branch, revision, ready, reason) (ananke_gitops_flux_source_info{job=\"ananke-power\"}) * on(namespace, name) group_left() max by (namespace, name) (ananke_gitops_flux_source_ready{job=\"ananke-power\"})", + "refId": "A", + "instant": true, + "format": "table" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + } + ], + "description": "A Value of 1 means Ready; 0 means not Ready." + }, + { + "id": 9, + "type": "table", + "title": "Kustomizations", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 16 + }, + "targets": [ + { + "expr": "max by (namespace, name, path, source_namespace, source_name, revision, ready, reason) (ananke_gitops_kustomization_info{job=\"ananke-power\"}) * on(namespace, name) group_left() max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})", + "refId": "A", + "instant": true, + "format": "table" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + } + ], + "description": "A Value of 1 means Ready; 0 means not Ready. The ready/reason labels come from Flux status.conditions." + }, + { + "id": 10, + "type": "table", + "title": "HelmReleases", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 28 + }, + "targets": [ + { + "expr": "max by (namespace, name, chart, version, app_version, revision, ready, reason) (ananke_gitops_helmrelease_info{job=\"ananke-power\"}) * on(namespace, name) group_left() max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})", + "refId": "A", + "instant": true, + "format": "table" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + } + ], + "description": "A Value of 1 means Ready; 0 means not Ready. Chart/version/app_version are included when Flux reports them." + } + ], + "time": { + "from": "now-12h", + "to": "now" + }, + "annotations": { + "list": [] + }, + "schemaVersion": 39, + "style": "dark", + "tags": [ + "atlas", + "gitops", + "flux" + ] +} diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 9df0eb83..1e487af7 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1820,134 +1820,6 @@ ], "description": "Temperature on left axis, humidity and pressure on right axis with dynamic bound series so small swings remain visible." }, - { - "id": 140, - "type": "stat", - "title": "Fan Activity", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 13 - }, - "targets": [ - { - "expr": "label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"1\"})) or on() vector(0))), \"metric\", \"Outlet\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"2\"})) or on() vector(0))), \"metric\", \"Inlet - In\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"3\"})) or on() vector(0))), \"metric\", \"Inlet - Out\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"4\"})) or on() vector(0))), \"metric\", \"Interior\", \"__name__\", \".*\")", - "refId": "A", - "legendFormat": "{{metric}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 7 - }, - { - "color": "red", - "value": 9 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Outlet" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inlet - In" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inlet - Out" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Interior" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - } - ] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": false, - "text": { - "valueSize": 26 - } - }, - "links": [ - { - "title": "Open atlas-power dashboard", - "url": "/d/atlas-power", - "targetBlank": true - } - ] - }, { "id": 141, "type": "timeseries", @@ -1959,7 +1831,7 @@ "gridPos": { "h": 6, "w": 6, - "x": 18, + "x": 12, "y": 13 }, "targets": [ @@ -2008,6 +1880,186 @@ } ] }, + { + "id": 140, + "type": "stat", + "title": "GitOps Status", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 13 + }, + "targets": [ + { + "expr": "max by (branch, revision) (ananke_gitops_flux_source_info{job=\"ananke-power\",namespace=\"flux-system\",name=\"flux-system\"}) or on() vector(0)", + "refId": "A", + "legendFormat": "Flux {{branch}} \u00b7 {{revision}}", + "instant": true + }, + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})), 1)", + "refId": "B", + "legendFormat": "Kustomizations Ready %", + "instant": true + }, + { + "expr": "sum(max by (namespace, name) (ananke_gitops_kustomization_suspended{job=\"ananke-power\"})) or on() vector(0)", + "refId": "C", + "legendFormat": "Kustomizations Suspended", + "instant": true + }, + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})), 1)", + "refId": "D", + "legendFormat": "Helm Ready %", + "instant": true + }, + { + "expr": "sum(max by (namespace, name) (ananke_gitops_helmrelease_suspended{job=\"ananke-power\"})) or on() vector(0)", + "refId": "E", + "legendFormat": "Helm Suspended", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 100 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*Ready %" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 1 + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 99 + }, + { + "color": "blue", + "value": 100 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*Suspended" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "Flux .*" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "gray" + } + }, + { + "id": "decimals", + "value": 0 + } + ] + } + ] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value", + "orientation": "vertical", + "wideLayout": false, + "text": { + "valueSize": 18 + } + }, + "links": [ + { + "title": "Open atlas-gitops dashboard", + "url": "/d/atlas-gitops", + "targetBlank": true + } + ], + "description": "Flux branch/revision plus compact readiness and suspended counts from Ananke's GitOps object-state exporter." + }, { "id": 44, "type": "bargauge", diff --git a/services/monitoring/grafana-dashboard-gitops.yaml b/services/monitoring/grafana-dashboard-gitops.yaml new file mode 100644 index 00000000..caef98c9 --- /dev/null +++ b/services/monitoring/grafana-dashboard-gitops.yaml @@ -0,0 +1,695 @@ +# services/monitoring/grafana-dashboard-gitops.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-gitops + labels: + grafana_dashboard: "1" +data: + atlas-gitops.json: | + { + "uid": "atlas-gitops", + "title": "Atlas GitOps", + "folderUid": "atlas-internal", + "editable": true, + "panels": [ + { + "id": 1, + "type": "stat", + "title": "Flux Source", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 0 + }, + "targets": [ + { + "expr": "max by (branch, revision) (ananke_gitops_flux_source_info{job=\"ananke-power\",namespace=\"flux-system\",name=\"flux-system\"}) or on() vector(0)", + "refId": "A", + "legendFormat": "{{branch}} \u00b7 {{revision}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name" + }, + "description": "Branch and revision currently reported by Flux's GitRepository source." + }, + { + "id": 2, + "type": "stat", + "title": "Kustomizations Ready", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 0 + }, + "targets": [ + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})), 1)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 99 + }, + { + "color": "blue", + "value": 100 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 3, + "type": "stat", + "title": "Kustomizations Suspended", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 0 + }, + "targets": [ + { + "expr": "sum(max by (namespace, name) (ananke_gitops_kustomization_suspended{job=\"ananke-power\"})) or on() vector(0)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 4, + "type": "stat", + "title": "HelmReleases Ready", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 0 + }, + "targets": [ + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})), 1)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 99 + }, + { + "color": "blue", + "value": 100 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 5, + "type": "stat", + "title": "HelmReleases Suspended", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 0 + }, + "targets": [ + { + "expr": "sum(max by (namespace, name) (ananke_gitops_helmrelease_suspended{job=\"ananke-power\"})) or on() vector(0)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 6, + "type": "stat", + "title": "GitOps Exporter", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 4 + }, + "targets": [ + { + "expr": "min(ananke_gitops_scrape_success{job=\"ananke-power\"}) or on() vector(0)", + "refId": "A", + "legendFormat": "Scrape Success", + "instant": true + }, + { + "expr": "(time() - max(ananke_gitops_last_scrape_timestamp_seconds{job=\"ananke-power\"})) or on() vector(0)", + "refId": "B", + "legendFormat": "Sample Age", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Sample Age" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Scrape Success" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value" + } + }, + { + "id": 7, + "type": "timeseries", + "title": "Readiness History", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 16, + "x": 8, + "y": 4 + }, + "targets": [ + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})), 1)", + "refId": "A", + "legendFormat": "Kustomizations" + }, + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})), 1)", + "refId": "B", + "legendFormat": "HelmReleases" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 8, + "type": "table", + "title": "Flux Sources", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "targets": [ + { + "expr": "max by (namespace, name, url, branch, revision, ready, reason) (ananke_gitops_flux_source_info{job=\"ananke-power\"}) * on(namespace, name) group_left() max by (namespace, name) (ananke_gitops_flux_source_ready{job=\"ananke-power\"})", + "refId": "A", + "instant": true, + "format": "table" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + } + ], + "description": "A Value of 1 means Ready; 0 means not Ready." + }, + { + "id": 9, + "type": "table", + "title": "Kustomizations", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 16 + }, + "targets": [ + { + "expr": "max by (namespace, name, path, source_namespace, source_name, revision, ready, reason) (ananke_gitops_kustomization_info{job=\"ananke-power\"}) * on(namespace, name) group_left() max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})", + "refId": "A", + "instant": true, + "format": "table" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + } + ], + "description": "A Value of 1 means Ready; 0 means not Ready. The ready/reason labels come from Flux status.conditions." + }, + { + "id": 10, + "type": "table", + "title": "HelmReleases", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 28 + }, + "targets": [ + { + "expr": "max by (namespace, name, chart, version, app_version, revision, ready, reason) (ananke_gitops_helmrelease_info{job=\"ananke-power\"}) * on(namespace, name) group_left() max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})", + "refId": "A", + "instant": true, + "format": "table" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + } + ], + "description": "A Value of 1 means Ready; 0 means not Ready. Chart/version/app_version are included when Flux reports them." + } + ], + "time": { + "from": "now-12h", + "to": "now" + }, + "annotations": { + "list": [] + }, + "schemaVersion": 39, + "style": "dark", + "tags": [ + "atlas", + "gitops", + "flux" + ] + } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 3013cd35..3a1434eb 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1829,134 +1829,6 @@ data: ], "description": "Temperature on left axis, humidity and pressure on right axis with dynamic bound series so small swings remain visible." }, - { - "id": 140, - "type": "stat", - "title": "Fan Activity", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 13 - }, - "targets": [ - { - "expr": "label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"1\"})) or on() vector(0))), \"metric\", \"Outlet\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"2\"})) or on() vector(0))), \"metric\", \"Inlet - In\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"3\"})) or on() vector(0))), \"metric\", \"Inlet - Out\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"4\"})) or on() vector(0))), \"metric\", \"Interior\", \"__name__\", \".*\")", - "refId": "A", - "legendFormat": "{{metric}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 7 - }, - { - "color": "red", - "value": 9 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - }, - "decimals": 0 - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Outlet" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inlet - In" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inlet - Out" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Interior" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - } - ] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": false, - "text": { - "valueSize": 26 - } - }, - "links": [ - { - "title": "Open atlas-power dashboard", - "url": "/d/atlas-power", - "targetBlank": true - } - ] - }, { "id": 141, "type": "timeseries", @@ -1968,7 +1840,7 @@ data: "gridPos": { "h": 6, "w": 6, - "x": 18, + "x": 12, "y": 13 }, "targets": [ @@ -2017,6 +1889,186 @@ data: } ] }, + { + "id": 140, + "type": "stat", + "title": "GitOps Status", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 13 + }, + "targets": [ + { + "expr": "max by (branch, revision) (ananke_gitops_flux_source_info{job=\"ananke-power\",namespace=\"flux-system\",name=\"flux-system\"}) or on() vector(0)", + "refId": "A", + "legendFormat": "Flux {{branch}} \u00b7 {{revision}}", + "instant": true + }, + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{job=\"ananke-power\"})), 1)", + "refId": "B", + "legendFormat": "Kustomizations Ready %", + "instant": true + }, + { + "expr": "sum(max by (namespace, name) (ananke_gitops_kustomization_suspended{job=\"ananke-power\"})) or on() vector(0)", + "refId": "C", + "legendFormat": "Kustomizations Suspended", + "instant": true + }, + { + "expr": "100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})) / clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{job=\"ananke-power\"})), 1)", + "refId": "D", + "legendFormat": "Helm Ready %", + "instant": true + }, + { + "expr": "sum(max by (namespace, name) (ananke_gitops_helmrelease_suspended{job=\"ananke-power\"})) or on() vector(0)", + "refId": "E", + "legendFormat": "Helm Suspended", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "blue", + "value": 100 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*Ready %" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 1 + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 99 + }, + { + "color": "blue", + "value": 100 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*Suspended" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "Flux .*" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "gray" + } + }, + { + "id": "decimals", + "value": 0 + } + ] + } + ] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value", + "orientation": "vertical", + "wideLayout": false, + "text": { + "valueSize": 18 + } + }, + "links": [ + { + "title": "Open atlas-gitops dashboard", + "url": "/d/atlas-gitops", + "targetBlank": true + } + ], + "description": "Flux branch/revision plus compact readiness and suspended counts from Ananke's GitOps object-state exporter." + }, { "id": 44, "type": "bargauge", diff --git a/services/monitoring/kustomization.yaml b/services/monitoring/kustomization.yaml index 9b8ec822..3e381468 100644 --- a/services/monitoring/kustomization.yaml +++ b/services/monitoring/kustomization.yaml @@ -13,6 +13,7 @@ resources: - grafana-dashboard-storage.yaml - grafana-dashboard-network.yaml - grafana-dashboard-gpu.yaml + - grafana-dashboard-gitops.yaml - grafana-dashboard-power.yaml - grafana-dashboard-mail.yaml - grafana-dashboard-jobs.yaml