diff --git a/infrastructure/postgres/service.yaml b/infrastructure/postgres/service.yaml index 3dcab3c..b695045 100644 --- a/infrastructure/postgres/service.yaml +++ b/infrastructure/postgres/service.yaml @@ -4,6 +4,10 @@ kind: Service metadata: name: postgres-service namespace: postgres + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9187" + prometheus.io/path: "/metrics" spec: clusterIP: None ports: @@ -11,5 +15,9 @@ spec: port: 5432 protocol: TCP targetPort: 5432 + - name: metrics + port: 9187 + protocol: TCP + targetPort: 9187 selector: app: postgres diff --git a/infrastructure/postgres/statefulset.yaml b/infrastructure/postgres/statefulset.yaml index e1a1921..2c79248 100644 --- a/infrastructure/postgres/statefulset.yaml +++ b/infrastructure/postgres/statefulset.yaml @@ -58,6 +58,23 @@ spec: - name: vault-secrets mountPath: /mnt/vault readOnly: true + - name: postgres-exporter + image: quay.io/prometheuscommunity/postgres-exporter:v0.15.0 + ports: + - name: metrics + containerPort: 9187 + protocol: TCP + env: + - name: DATA_SOURCE_URI + value: "localhost:5432/postgres?sslmode=disable" + - name: DATA_SOURCE_USER + value: postgres + - name: DATA_SOURCE_PASS_FILE + value: /mnt/vault/postgres_password + volumeMounts: + - name: vault-secrets + mountPath: /mnt/vault + readOnly: true volumes: - name: vault-secrets csi: diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index f55896a..11479d9 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -371,8 +371,9 @@ ARIADNE_TEST_SUCCESS_RATE = ( ARIADNE_TEST_FAILURES_24H = ( 'sum by (result) (max_over_time(ariadne_ci_tests_total{repo="ariadne",result=~"failed|error"}[24h]))' ) -POSTGRES_CONN_USED_PCT = ( - "100 * sum(pg_stat_activity_count) / clamp_min(max(pg_settings_max_connections), 1)" +POSTGRES_CONN_USED = ( + 'label_replace(sum(pg_stat_activity_count), "conn", "used", "__name__", ".*") ' + 'or label_replace(max(pg_settings_max_connections), "conn", "max", "__name__", ".*")' ) POSTGRES_CONN_HOTTEST = 'topk(1, sum by (datname) (pg_stat_activity_count))' ONEOFF_JOB_OWNER = ( @@ -1138,14 +1139,15 @@ def build_overview(): ) ) panels.append( - gauge_panel( + stat_panel( 34, "Postgres Connections Used", - POSTGRES_CONN_USED_PCT, + POSTGRES_CONN_USED, {"h": 3, "w": 4, "x": 16, "y": 8}, - min_value=0, - max_value=100, - thresholds=PERCENT_THRESHOLDS, + decimals=0, + text_mode="name_and_value", + legend="{{conn}}", + instant=True, ) ) panels.append( diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 93a2d80..2d7f3e5 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1112,7 +1112,7 @@ }, { "id": 34, - "type": "gauge", + "type": "stat", "title": "Postgres Connections Used", "datasource": { "type": "prometheus", @@ -1126,39 +1126,43 @@ }, "targets": [ { - "expr": "100 * sum(pg_stat_activity_count) / clamp_min(max(pg_settings_max_connections), 1)", - "refId": "A" + "expr": "label_replace(sum(pg_stat_activity_count), \"conn\", \"used\", \"__name__\", \".*\") or label_replace(max(pg_settings_max_connections), \"conn\", \"max\", \"__name__\", \".*\")", + "refId": "A", + "legendFormat": "{{conn}}", + "instant": true } ], "fieldConfig": { "defaults": { - "min": 0, - "max": 100, + "color": { + "mode": "thresholds" + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", + "color": "rgba(115, 115, 115, 1)", "value": null }, { - "color": "yellow", - "value": 50 - }, - { - "color": "orange", - "value": 75 - }, - { - "color": "red", - "value": 91.5 + "color": "green", + "value": 1 } ] - } + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 }, "overrides": [] }, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" @@ -1166,9 +1170,7 @@ "fields": "", "values": false }, - "orientation": "auto", - "showThresholdMarkers": false, - "showThresholdLabels": false + "textMode": "name_and_value" } }, { diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 0e9526e..5336134 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1121,7 +1121,7 @@ data: }, { "id": 34, - "type": "gauge", + "type": "stat", "title": "Postgres Connections Used", "datasource": { "type": "prometheus", @@ -1135,39 +1135,43 @@ data: }, "targets": [ { - "expr": "100 * sum(pg_stat_activity_count) / clamp_min(max(pg_settings_max_connections), 1)", - "refId": "A" + "expr": "label_replace(sum(pg_stat_activity_count), \"conn\", \"used\", \"__name__\", \".*\") or label_replace(max(pg_settings_max_connections), \"conn\", \"max\", \"__name__\", \".*\")", + "refId": "A", + "legendFormat": "{{conn}}", + "instant": true } ], "fieldConfig": { "defaults": { - "min": 0, - "max": 100, + "color": { + "mode": "thresholds" + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", + "color": "rgba(115, 115, 115, 1)", "value": null }, { - "color": "yellow", - "value": 50 - }, - { - "color": "orange", - "value": 75 - }, - { - "color": "red", - "value": 91.5 + "color": "green", + "value": 1 } ] - } + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 }, "overrides": [] }, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" @@ -1175,9 +1179,7 @@ data: "fields": "", "values": false }, - "orientation": "auto", - "showThresholdMarkers": false, - "showThresholdLabels": false + "textMode": "name_and_value" } }, {