From f0265d6b949fd4b31d03ef9294baa9ca5975aef6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 13 Dec 2025 03:57:20 -0300 Subject: [PATCH] atlas pods: add namespace plurality by node table --- scripts/dashboards_render_atlas.py | 14 ++++++ services/keycloak/README.md | 27 ----------- services/monitoring/README.md | 28 ------------ .../monitoring/dashboards/atlas-pods.json | 45 +++++++++++++++++++ .../monitoring/grafana-dashboard-pods.yaml | 45 +++++++++++++++++++ 5 files changed, 104 insertions(+), 55 deletions(-) delete mode 100644 services/keycloak/README.md delete mode 100644 services/monitoring/README.md diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index f997b7d..181fb38 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -1171,6 +1171,20 @@ def build_pods_dashboard(): instant=True, ) ) + panels.append( + table_panel( + 10, + "Namespace Plurality by Node", + 'topk(1, sum by (namespace,node) (kube_pod_info{pod!=""}) ' + '/ ignoring(node) sum by (namespace) (kube_pod_info{pod!=""}))', + {"h": 8, "w": 24, "x": 0, "y": 42}, + unit="percent", + transformations=[ + {"id": "labelsToFields", "options": {}}, + {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}, + ], + ) + ) return { "uid": "atlas-pods", "title": "Atlas Pods", diff --git a/services/keycloak/README.md b/services/keycloak/README.md deleted file mode 100644 index bf7c21b..0000000 --- a/services/keycloak/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# services/keycloak - -Keycloak is deployed via raw manifests and backed by the shared Postgres (`postgres-service.postgres.svc.cluster.local:5432`). Create these secrets before applying: - -```bash -# DB creds (per-service DB/user in shared Postgres) -kubectl -n sso create secret generic keycloak-db \ - --from-literal=username=keycloak \ - --from-literal=password='' \ - --from-literal=database=keycloak - -# Admin console creds (maps to KC admin user) -kubectl -n sso create secret generic keycloak-admin \ - --from-literal=username=brad@bstein.dev \ - --from-literal=password='' -``` - -Apply: - -```bash -kubectl apply -k services/keycloak -``` - -Notes -- Service: `keycloak.sso.svc:80` (Ingress `sso.bstein.dev`, TLS via cert-manager). -- Uses Postgres schema `public`; DB/user should be provisioned in the shared Postgres instance. -- Health endpoints on :9000 are wired for probes. diff --git a/services/monitoring/README.md b/services/monitoring/README.md deleted file mode 100644 index 835ae1d..0000000 --- a/services/monitoring/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# services/monitoring - -## Grafana admin secret - -The Grafana Helm release expects a pre-existing secret named `grafana-admin` -in the `monitoring` namespace. Create or rotate it with: - -```bash -kubectl create secret generic grafana-admin \ - --namespace monitoring \ - --from-literal=admin-user=admin \ - --from-literal=admin-password='REPLACE_ME' -``` - -Update the password whenever you rotate credentials. - -## DCGM exporter image - -The NVIDIA GPU metrics DaemonSet expects `registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04`, mirrored from `docker.io/nvidia/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04`. Refresh it in Zot when bumping versions: - -```bash -skopeo copy \ - --all \ - docker://docker.io/nvidia/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04 \ - docker://registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04 -``` - -When finished mirroring from the control-plane, you can remove temporary tooling with `sudo apt-get purge -y skopeo && sudo apt-get autoremove -y` and clear `~/.config/containers/auth.json`. diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index bf7569d..3bb2c47 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -491,6 +491,51 @@ } } ] + }, + { + "id": 10, + "type": "table", + "title": "Namespace Plurality by Node", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 42 + }, + "targets": [ + { + "expr": "topk(1, sum by (namespace,node) (kube_pod_info{pod!=\"\"}) / ignoring(node) sum by (namespace) (kube_pod_info{pod!=\"\"}))", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "showHeader": true + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] } ], "time": { diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index 3fefa1d..ddffec2 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -500,6 +500,51 @@ data: } } ] + }, + { + "id": 10, + "type": "table", + "title": "Namespace Plurality by Node", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 42 + }, + "targets": [ + { + "expr": "topk(1, sum by (namespace,node) (kube_pod_info{pod!=\"\"}) / ignoring(node) sum by (namespace) (kube_pod_info{pod!=\"\"}))", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "showHeader": true + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] } ], "time": {