From d509dfaa22af75e0eff55a0a40aec1de57515a6d Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 22 Jan 2026 15:23:23 -0300 Subject: [PATCH] ops: restore portal/ariadne and add postgres panels --- scripts/dashboards_render_atlas.py | 36 ++++- .../bstein-dev-home/backend-deployment.yaml | 2 +- .../bstein-dev-home/frontend-deployment.yaml | 2 +- .../vault-sync-deployment.yaml | 2 +- services/maintenance/ariadne-deployment.yaml | 2 +- .../monitoring/dashboards/atlas-overview.json | 138 +++++++++++++++++- .../grafana-dashboard-overview.yaml | 138 +++++++++++++++++- 7 files changed, 298 insertions(+), 22 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 1f28489..f55896a 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -371,6 +371,10 @@ ARIADNE_TEST_SUCCESS_RATE = ( ARIADNE_TEST_FAILURES_24H = ( 'sum by (result) (max_over_time(ariadne_ci_tests_total{repo="ariadne",result=~"failed|error"}[24h]))' ) +POSTGRES_CONN_USED_PCT = ( + "100 * sum(pg_stat_activity_count) / clamp_min(max(pg_settings_max_connections), 1)" +) +POSTGRES_CONN_HOTTEST = 'topk(1, sum by (datname) (pg_stat_activity_count))' ONEOFF_JOB_OWNER = ( 'label_replace(kube_job_owner{owner_kind="CronJob"}, "owner_name", "$1", "job_name", "(.*)")' ) @@ -1057,7 +1061,7 @@ def build_overview(): 30, "Mail Sent (1d)", 'max(postmark_outbound_sent{window="1d"})', - {"h": 3, "w": 6, "x": 0, "y": 8}, + {"h": 3, "w": 4, "x": 0, "y": 8}, unit="none", links=link_to("atlas-mail"), ) @@ -1068,7 +1072,7 @@ def build_overview(): "type": "stat", "title": "Mail Bounces (1d)", "datasource": PROM_DS, - "gridPos": {"h": 3, "w": 6, "x": 12, "y": 8}, + "gridPos": {"h": 3, "w": 4, "x": 8, "y": 8}, "targets": [ { "expr": 'max(postmark_outbound_bounce_rate{window="1d"})', @@ -1114,7 +1118,7 @@ def build_overview(): 32, "Mail Success Rate (1d)", 'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)', - {"h": 3, "w": 6, "x": 6, "y": 8}, + {"h": 3, "w": 4, "x": 4, "y": 8}, unit="percent", thresholds=mail_success_thresholds, decimals=1, @@ -1126,13 +1130,37 @@ def build_overview(): 33, "Mail Limit Used (30d)", "max(postmark_sending_limit_used_percent)", - {"h": 3, "w": 6, "x": 18, "y": 8}, + {"h": 3, "w": 4, "x": 12, "y": 8}, unit="percent", thresholds=mail_limit_thresholds, decimals=1, links=link_to("atlas-mail"), ) ) + panels.append( + gauge_panel( + 34, + "Postgres Connections Used", + POSTGRES_CONN_USED_PCT, + {"h": 3, "w": 4, "x": 16, "y": 8}, + min_value=0, + max_value=100, + thresholds=PERCENT_THRESHOLDS, + ) + ) + panels.append( + stat_panel( + 35, + "Postgres Hottest Connections", + POSTGRES_CONN_HOTTEST, + {"h": 3, "w": 4, "x": 20, "y": 8}, + unit="none", + decimals=0, + text_mode="name_and_value", + legend="{{datname}}", + instant=True, + ) + ) storage_panels = [ (23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"), diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 100c3eb..2170396 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -5,7 +5,7 @@ metadata: name: bstein-dev-home-backend namespace: bstein-dev-home spec: - replicas: 0 + replicas: 1 revisionHistoryLimit: 3 selector: matchLabels: diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index bbe5981..ef26e73 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -5,7 +5,7 @@ metadata: name: bstein-dev-home-frontend namespace: bstein-dev-home spec: - replicas: 0 + replicas: 1 revisionHistoryLimit: 3 selector: matchLabels: diff --git a/services/bstein-dev-home/vault-sync-deployment.yaml b/services/bstein-dev-home/vault-sync-deployment.yaml index 2f2ddbb..ad50f1e 100644 --- a/services/bstein-dev-home/vault-sync-deployment.yaml +++ b/services/bstein-dev-home/vault-sync-deployment.yaml @@ -5,7 +5,7 @@ metadata: name: bstein-dev-home-vault-sync namespace: bstein-dev-home spec: - replicas: 0 + replicas: 1 selector: matchLabels: app: bstein-dev-home-vault-sync diff --git a/services/maintenance/ariadne-deployment.yaml b/services/maintenance/ariadne-deployment.yaml index e11f8db..581947c 100644 --- a/services/maintenance/ariadne-deployment.yaml +++ b/services/maintenance/ariadne-deployment.yaml @@ -5,7 +5,7 @@ metadata: name: ariadne namespace: maintenance spec: - replicas: 0 + replicas: 1 revisionHistoryLimit: 3 selector: matchLabels: diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 78744da..93a2d80 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -796,7 +796,7 @@ }, "gridPos": { "h": 3, - "w": 6, + "w": 4, "x": 0, "y": 8 }, @@ -863,8 +863,8 @@ }, "gridPos": { "h": 3, - "w": 6, - "x": 12, + "w": 4, + "x": 8, "y": 8 }, "targets": [ @@ -968,8 +968,8 @@ }, "gridPos": { "h": 3, - "w": 6, - "x": 6, + "w": 4, + "x": 4, "y": 8 }, "targets": [ @@ -1044,8 +1044,8 @@ }, "gridPos": { "h": 3, - "w": 6, - "x": 18, + "w": 4, + "x": 12, "y": 8 }, "targets": [ @@ -1110,6 +1110,130 @@ } ] }, + { + "id": 34, + "type": "gauge", + "title": "Postgres Connections Used", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 8 + }, + "targets": [ + { + "expr": "100 * sum(pg_stat_activity_count) / clamp_min(max(pg_settings_max_connections), 1)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "orange", + "value": 75 + }, + { + "color": "red", + "value": 91.5 + } + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "orientation": "auto", + "showThresholdMarkers": false, + "showThresholdLabels": false + } + }, + { + "id": 35, + "type": "stat", + "title": "Postgres Hottest Connections", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 8 + }, + "targets": [ + { + "expr": "topk(1, sum by (datname) (pg_stat_activity_count))", + "refId": "A", + "legendFormat": "{{datname}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value" + } + }, { "id": 23, "type": "stat", diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index fa19911..0e9526e 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -805,7 +805,7 @@ data: }, "gridPos": { "h": 3, - "w": 6, + "w": 4, "x": 0, "y": 8 }, @@ -872,8 +872,8 @@ data: }, "gridPos": { "h": 3, - "w": 6, - "x": 12, + "w": 4, + "x": 8, "y": 8 }, "targets": [ @@ -977,8 +977,8 @@ data: }, "gridPos": { "h": 3, - "w": 6, - "x": 6, + "w": 4, + "x": 4, "y": 8 }, "targets": [ @@ -1053,8 +1053,8 @@ data: }, "gridPos": { "h": 3, - "w": 6, - "x": 18, + "w": 4, + "x": 12, "y": 8 }, "targets": [ @@ -1119,6 +1119,130 @@ data: } ] }, + { + "id": 34, + "type": "gauge", + "title": "Postgres Connections Used", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 8 + }, + "targets": [ + { + "expr": "100 * sum(pg_stat_activity_count) / clamp_min(max(pg_settings_max_connections), 1)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "orange", + "value": 75 + }, + { + "color": "red", + "value": 91.5 + } + ] + } + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "orientation": "auto", + "showThresholdMarkers": false, + "showThresholdLabels": false + } + }, + { + "id": 35, + "type": "stat", + "title": "Postgres Hottest Connections", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 8 + }, + "targets": [ + { + "expr": "topk(1, sum by (datname) (pg_stat_activity_count))", + "refId": "A", + "legendFormat": "{{datname}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value" + } + }, { "id": 23, "type": "stat",