# services/monitoring/oneoffs/grafana-org-bootstrap.yaml # One-off job for monitoring/grafana-org-bootstrap-3. # Purpose: grafana org bootstrap 3 (see container args/env in this file). # Run by setting spec.suspend to false, reconcile, then set it back to true. # Safe to delete the finished Job/pod; it should not run continuously. apiVersion: batch/v1 kind: Job metadata: name: grafana-org-bootstrap-3 namespace: monitoring spec: suspend: true backoffLimit: 2 template: metadata: annotations: vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/agent-pre-populate-only: "true" vault.hashicorp.com/role: "monitoring" vault.hashicorp.com/agent-inject-secret-grafana-env: "kv/data/atlas/monitoring/grafana-admin" vault.hashicorp.com/agent-inject-template-grafana-env: | {{- with secret "kv/data/atlas/monitoring/grafana-admin" -}} export GRAFANA_USER="{{ index .Data.data "admin-user" }}" export GRAFANA_PASSWORD="{{ index .Data.data "admin-password" }}" {{- end -}} spec: restartPolicy: OnFailure affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists preferredDuringSchedulingIgnoredDuringExecution: - weight: 100 preference: matchExpressions: - key: kubernetes.io/arch operator: In values: ["arm64"] serviceAccountName: monitoring-vault-sync containers: - name: bootstrap image: python:3.11-alpine env: - name: GRAFANA_URL value: http://grafana - name: OVERVIEW_ORG_NAME value: Overview command: ["/bin/sh", "-c"] args: - | set -euo pipefail . /vault/secrets/grafana-env python - <<'PY' import base64 import json import os import time import urllib.error import urllib.request grafana_url = os.environ["GRAFANA_URL"].rstrip("/") org_name = os.environ["OVERVIEW_ORG_NAME"] user = os.environ["GRAFANA_USER"] password = os.environ["GRAFANA_PASSWORD"] auth = base64.b64encode(f"{user}:{password}".encode()).decode() base_headers = { "Authorization": f"Basic {auth}", "Content-Type": "application/json", } def request(path, method="GET", data=None, org_id=None): headers = dict(base_headers) if org_id is not None: headers["X-Grafana-Org-Id"] = str(org_id) payload = None if data is not None: payload = json.dumps(data).encode() req = urllib.request.Request( f"{grafana_url}{path}", data=payload, headers=headers, method=method, ) return urllib.request.urlopen(req, timeout=10) for _ in range(60): try: with request("/api/health") as resp: if resp.status == 200: break except Exception: time.sleep(2) else: raise SystemExit("Grafana API did not become ready in time") with request("/api/orgs") as resp: orgs = json.load(resp) org_id = next((org["id"] for org in orgs if org["name"] == org_name), None) if org_id is None: with request("/api/orgs", method="POST", data={"name": org_name}) as resp: org_id = json.load(resp).get("orgId") if org_id is None: raise SystemExit(f"Unable to resolve org ID for {org_name}") datasource = { "name": "VictoriaMetrics", "type": "prometheus", "access": "proxy", "url": "http://victoria-metrics-single-server:8428", "isDefault": True, "uid": "atlas-vm", "jsonData": {"timeInterval": "15s"}, } try: with request("/api/datasources/uid/atlas-vm", org_id=org_id) as resp: if resp.status != 200: raise urllib.error.HTTPError(resp.url, resp.status, resp.reason, resp.headers, None) except urllib.error.HTTPError as err: if err.code != 404: raise with request("/api/datasources", method="POST", data=datasource, org_id=org_id): pass with request("/api/admin/provisioning/datasources/reload", method="POST"): pass with request("/api/admin/provisioning/dashboards/reload", method="POST"): pass PY