monitoring: split overview org

This commit is contained in:
Brad Stein 2026-01-01 17:54:01 -03:00
parent c72e1e1f9b
commit 100a11e0de
3 changed files with 123 additions and 1 deletions

View File

@ -0,0 +1,110 @@
# services/monitoring/grafana-org-bootstrap.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: grafana-org-bootstrap-1
namespace: monitoring
spec:
backoffLimit: 2
template:
spec:
restartPolicy: OnFailure
containers:
- name: bootstrap
image: python:3.11-alpine
env:
- name: GRAFANA_URL
value: http://grafana
- name: OVERVIEW_ORG_NAME
value: Overview
- name: GRAFANA_USER
valueFrom:
secretKeyRef:
name: grafana-admin
key: admin-user
- name: GRAFANA_PASSWORD
valueFrom:
secretKeyRef:
name: grafana-admin
key: admin-password
command: ["/bin/sh", "-c"]
args:
- |
set -euo pipefail
python - <<'PY'
import base64
import json
import os
import time
import urllib.error
import urllib.request
grafana_url = os.environ["GRAFANA_URL"].rstrip("/")
org_name = os.environ["OVERVIEW_ORG_NAME"]
user = os.environ["GRAFANA_USER"]
password = os.environ["GRAFANA_PASSWORD"]
auth = base64.b64encode(f"{user}:{password}".encode()).decode()
base_headers = {
"Authorization": f"Basic {auth}",
"Content-Type": "application/json",
}
def request(path, method="GET", data=None, org_id=None):
headers = dict(base_headers)
if org_id is not None:
headers["X-Grafana-Org-Id"] = str(org_id)
payload = None
if data is not None:
payload = json.dumps(data).encode()
req = urllib.request.Request(
f"{grafana_url}{path}",
data=payload,
headers=headers,
method=method,
)
return urllib.request.urlopen(req, timeout=10)
for _ in range(60):
try:
with request("/api/health") as resp:
if resp.status == 200:
break
except Exception:
time.sleep(2)
else:
raise SystemExit("Grafana API did not become ready in time")
with request("/api/orgs") as resp:
orgs = json.load(resp)
org_id = next((org["id"] for org in orgs if org["name"] == org_name), None)
if org_id is None:
with request("/api/orgs", method="POST", data={"name": org_name}) as resp:
org_id = json.load(resp).get("orgId")
if org_id is None:
raise SystemExit(f"Unable to resolve org ID for {org_name}")
datasource = {
"name": "VictoriaMetrics",
"type": "prometheus",
"access": "proxy",
"url": "http://victoria-metrics-single-server:8428",
"isDefault": True,
"uid": "atlas-vm",
"jsonData": {"timeInterval": "15s"},
}
try:
with request("/api/datasources/uid/atlas-vm", org_id=org_id) as resp:
if resp.status != 200:
raise urllib.error.HTTPError(resp.url, resp.status, resp.reason, resp.headers, None)
except urllib.error.HTTPError as err:
if err.code != 404:
raise
with request("/api/datasources", method="POST", data=datasource, org_id=org_id):
pass
with request("/api/admin/provisioning/datasources/reload", method="POST"):
pass
with request("/api/admin/provisioning/dashboards/reload", method="POST"):
pass
PY

View File

@ -251,6 +251,7 @@ spec:
GF_AUTH_GENERIC_OAUTH_CLIENT_ID: "grafana"
GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: ""
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_ANONYMOUS_ORG_NAME: "Overview"
GF_AUTH_ANONYMOUS_ORG_ROLE: "Viewer"
GF_SECURITY_ALLOW_EMBEDDING: "true"
GF_AUTH_GENERIC_OAUTH_ENABLED: "true"
@ -298,12 +299,22 @@ spec:
jsonData:
timeInterval: "15s"
uid: atlas-vm
orgId: 1
- name: VictoriaMetrics
type: prometheus
access: proxy
url: http://victoria-metrics-single-server:8428
isDefault: true
jsonData:
timeInterval: "15s"
uid: atlas-vm
orgId: 2
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: overview
orgId: 1
orgId: 2
folder: Overview
type: file
disableDeletion: false

View File

@ -14,3 +14,4 @@ resources:
- dcgm-exporter.yaml
- grafana-folders.yaml
- helmrelease.yaml
- grafana-org-bootstrap.yaml