#!/usr/bin/env python3
"""Generate Atlas Grafana dashboards and render them into ConfigMaps.

Usage:
  scripts/dashboards_render_atlas.py --build   # rebuild JSON + ConfigMaps
  scripts/dashboards_render_atlas.py           # re-render ConfigMaps from JSON
"""

import argparse
import json
import textwrap
import urllib.parse
from pathlib import Path

# ---------------------------------------------------------------------------
# Paths, folders, and shared metadata
# ---------------------------------------------------------------------------

ROOT = Path(__file__).resolve().parents[1]
DASHBOARD_DIR = ROOT / "services" / "monitoring" / "dashboards"
CONFIG_TEMPLATE = textwrap.dedent(
    """# {relative_path}
apiVersion: v1
kind: ConfigMap
metadata:
  name: {name}
  labels:
    grafana_dashboard: "1"
data:
  {key}: |
{payload}
"""
)

PROM_DS = {"type": "prometheus", "uid": "atlas-vm"}
PUBLIC_FOLDER = "overview"
PRIVATE_FOLDER = "atlas-internal"

PERCENT_THRESHOLDS = {
    "mode": "absolute",
    "steps": [
        {"color": "green", "value": None},
        {"color": "yellow", "value": 50},
        {"color": "orange", "value": 75},
        {"color": "red", "value": 91.5},
    ],
}

NAMESPACE_CPU_WINDOW = "1m"

# ---------------------------------------------------------------------------
# Cluster metadata
# ---------------------------------------------------------------------------

CONTROL_PLANE_NODES = ["titan-0a", "titan-0b", "titan-0c"]
CONTROL_DEPENDENCIES = ["titan-db", "titan-jh"]
CONTROL_ALL = CONTROL_PLANE_NODES + CONTROL_DEPENDENCIES
WORKER_NODES = [
    "titan-04",
    "titan-05",
    "titan-06",
    "titan-07",
    "titan-08",
    "titan-09",
    "titan-10",
    "titan-11",
    "titan-20",
    "titan-21",
    "titan-12",
    "titan-13",
    "titan-14",
    "titan-15",
    "titan-17",
    "titan-18",
    "titan-19",
    "titan-22",
    "titan-24",
]

CONTROL_REGEX = "|".join(CONTROL_PLANE_NODES)
CONTROL_ALL_REGEX = "|".join(CONTROL_ALL)
WORKER_REGEX = "|".join(WORKER_NODES)
CONTROL_TOTAL = len(CONTROL_PLANE_NODES)
WORKER_TOTAL = len(WORKER_NODES)
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
# Namespaces considered infrastructure (excluded from workload counts)
INFRA_PATTERNS = [
    "kube-.*",
    ".*-system",
    "traefik",
    "monitoring",
    "logging",
    "cert-manager",
    "maintenance",
    "postgres",
]
INFRA_REGEX = f"^({'|'.join(INFRA_PATTERNS)})$"
# Namespaces allowed on control plane without counting as workloads
CP_ALLOWED_NS = INFRA_REGEX
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
GAUGE_WIDTHS = [4, 3, 3, 4, 3, 3, 4]
CONTROL_WORKLOADS_EXPR = (
    f'sum(kube_pod_info{{node=~"{CONTROL_REGEX}",namespace!~"{CP_ALLOWED_NS}"}}) or on() vector(0)'
)

# ---------------------------------------------------------------------------
# PromQL helpers
# ---------------------------------------------------------------------------

NODE_INFO = 'label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)")'


def node_filter(regex):
    """Return a selector that evaluates to 1 for nodes matching the regex."""
    return (
        f'label_replace(node_uname_info{{nodename=~"{regex}"}}, '
        '"node", "$1", "nodename", "(.*)")'
    )


def scoped_node_expr(base, scope=""):
    """Attach nodename metadata and optionally filter to a scope regex."""
    expr = f"avg by (node) (({base}) * on(instance) group_left(node) {NODE_INFO})"
    if scope:
        expr = f"({expr}) * on(node) group_left() {node_filter(scope)}"
    return expr


def node_cpu_expr(scope=""):
    idle = 'avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m]))'
    base = f"(1 - {idle}) * 100"
    return scoped_node_expr(base, scope)


def node_mem_expr(scope=""):
    usage = (
        "avg by (instance) ("
        "(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) "
        "/ node_memory_MemTotal_bytes * 100)"
    )
    return scoped_node_expr(usage, scope)


def filesystem_usage_expr(mount, scope=""):
    base = (
        f'avg by (instance) ('
        f'(1 - (node_filesystem_avail_bytes{{mountpoint="{mount}",fstype!~"tmpfs|overlay"}} '
        f'/ node_filesystem_size_bytes{{mountpoint="{mount}",fstype!~"tmpfs|overlay"}})) * 100)'
    )
    return scoped_node_expr(base, scope)


def root_usage_expr(scope=""):
    return filesystem_usage_expr("/", scope)


def astreae_usage_expr(mount):
    return (
        f"100 - (sum(node_filesystem_avail_bytes{{mountpoint=\"{mount}\",fstype!~\"tmpfs|overlay\"}}) / "
        f"sum(node_filesystem_size_bytes{{mountpoint=\"{mount}\",fstype!~\"tmpfs|overlay\"}}) * 100)"
    )


def astreae_free_expr(mount):
    return f"sum(node_filesystem_avail_bytes{{mountpoint=\"{mount}\",fstype!~\"tmpfs|overlay\"}})"


def topk_with_node(expr):
    return f'label_replace(topk(1, {expr}), "__name__", "$1", "node", "(.*)")'


def node_net_expr(scope=""):
    base = (
        'sum by (instance) ('
        'rate(node_network_receive_bytes_total{device!~"lo"}[5m]) '
        '+ rate(node_network_transmit_bytes_total{device!~"lo"}[5m]))'
    )
    return scoped_node_expr(base, scope)


def node_io_expr(scope=""):
    base = (
        "sum by (instance) (rate(node_disk_read_bytes_total[5m]) "
        "+ rate(node_disk_written_bytes_total[5m]))"
    )
    return scoped_node_expr(base, scope)


def namespace_selector(scope_var):
    return f'namespace!="",pod!="",container!="",container!="POD",{scope_var}'


def namespace_gpu_selector(scope_var):
    return f'namespace!="",pod!="",{scope_var}'


def namespace_cpu_raw(scope_var):
    return (
        "sum(rate(container_cpu_usage_seconds_total"
        f"{{{namespace_selector(scope_var)}}}[{NAMESPACE_CPU_WINDOW}])) by (namespace)"
    )


def namespace_ram_raw(scope_var):
    return f"sum(container_memory_working_set_bytes{{{namespace_selector(scope_var)}}}) by (namespace)"


def namespace_gpu_usage_instant(scope_var):
    return f"sum(DCGM_FI_DEV_GPU_UTIL{{{namespace_gpu_selector(scope_var)}}}) by (namespace)"


def namespace_share_expr(resource_expr):
    total = f"clamp_min(sum( {resource_expr} ), 1)"
    return f"100 * ( {resource_expr} ) / {total}"


def namespace_cpu_share_expr(scope_var):
    return namespace_share_expr(namespace_cpu_raw(scope_var))


def namespace_ram_share_expr(scope_var):
    return namespace_share_expr(namespace_ram_raw(scope_var))


def namespace_gpu_share_expr(scope_var):
    usage = namespace_gpu_usage_instant(scope_var)
    total = f"(sum({usage}) or on() vector(0))"
    share = f"100 * ({usage}) / clamp_min({total}, 1)"
    idle = 'label_replace(vector(100), "namespace", "idle", "", "") and on() (' + total + " == 0)"
    return f"({share}) or ({idle})"


PROBLEM_PODS_EXPR = (
    'sum(max by (namespace,pod) (kube_pod_status_phase{phase!~"Running|Succeeded"})) '
    "or on() vector(0)"
)
CRASHLOOP_EXPR = (
    'sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason'
    '{reason=~"CrashLoopBackOff|ImagePullBackOff"})) '
    "or on() vector(0)"
)
STUCK_TERMINATING_EXPR = (
    'sum(max by (namespace,pod) ('
    '((time() - kube_pod_deletion_timestamp{pod!=""}) > bool 600)'
    ' and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=""} > bool 0)'
    ')) '
    "or on() vector(0)"
)
UPTIME_WINDOW = "365d"
# Keep the subquery step coarse so we don't request an excessive number of points.
UPTIME_STEP = "1h"
TRAEFIK_READY_EXPR = (
    "("
    'sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})'
    " / clamp_min("
    'sum(kube_deployment_spec_replicas{namespace=~"traefik|kube-system",deployment="traefik"}), 1)'
    ")"
)
CONTROL_READY_FRACTION_EXPR = (
    f"(sum(kube_node_status_condition{{condition=\"Ready\",status=\"true\",node=~\"{CONTROL_REGEX}\"}})"
    f" / {CONTROL_TOTAL})"
)
UPTIME_AVAIL_EXPR = (
    f"min(({CONTROL_READY_FRACTION_EXPR}), ({TRAEFIK_READY_EXPR}))"
)

# Tie-breaker to deterministically pick one node per namespace when shares tie.
NODE_TIEBREAKER = " + ".join(
    f"({node_filter(node)}) * 1e-6 * {idx}"
    for idx, node in enumerate(CONTROL_ALL + WORKER_NODES, start=1)
)
UPTIME_AVG_EXPR = f"avg_over_time(({UPTIME_AVAIL_EXPR})[{UPTIME_WINDOW}:{UPTIME_STEP}])"
UPTIME_PERCENT_EXPR = UPTIME_AVG_EXPR
UPTIME_NINES_EXPR = f"-log10(1 - clamp_max({UPTIME_AVG_EXPR}, 0.999999999))"
UPTIME_THRESHOLDS = {
    "mode": "absolute",
    "steps": [
        {"color": "red", "value": None},
        {"color": "orange", "value": 2},
        {"color": "yellow", "value": 3},
        {"color": "green", "value": 3.5},
    ],
}
UPTIME_PERCENT_THRESHOLDS = {
    "mode": "absolute",
    "steps": [
        {"color": "red", "value": None},
        {"color": "orange", "value": 0.99},
        {"color": "yellow", "value": 0.999},
        {"color": "green", "value": 0.9999},
        {"color": "blue", "value": 0.99999},
    ],
}
PROBLEM_TABLE_EXPR = (
    "(time() - kube_pod_created{pod!=\"\"}) "
    "* on(namespace,pod) group_left(node) kube_pod_info "
    "* on(namespace,pod) group_left(phase) "
    "max by (namespace,pod,phase) (kube_pod_status_phase{phase!~\"Running|Succeeded\"})"
)
CRASHLOOP_TABLE_EXPR = (
    "(time() - kube_pod_created{pod!=\"\"}) "
    "* on(namespace,pod) group_left(node) kube_pod_info "
    "* on(namespace,pod,container) group_left(reason) "
    "max by (namespace,pod,container,reason) "
    "(kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})"
)
STUCK_TABLE_EXPR = (
    "("
    "((time() - kube_pod_deletion_timestamp{pod!=\"\"}) "
    "and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) "
    "* on(namespace,pod) group_left(node) kube_pod_info"
    ")"
)

NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"'
NAMESPACE_SCOPE_ALL = 'namespace=~".*"'
NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"'
NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"]
GLUE_LABEL = 'label_atlas_bstein_dev_glue="true"'
GLUE_JOBS = f"kube_cronjob_labels{{{GLUE_LABEL}}}"
GLUE_FILTER = f"and on(namespace,cronjob) {GLUE_JOBS}"
GLUE_LAST_SUCCESS = f"(kube_cronjob_status_last_successful_time {GLUE_FILTER})"
GLUE_LAST_SCHEDULE = f"(kube_cronjob_status_last_schedule_time {GLUE_FILTER})"
GLUE_SUSPENDED = f"(kube_cronjob_spec_suspend {GLUE_FILTER}) == 1"
GLUE_ACTIVE = f"(kube_cronjob_status_active {GLUE_FILTER})"
GLUE_LAST_SUCCESS_AGE = f"(time() - {GLUE_LAST_SUCCESS})"
GLUE_LAST_SCHEDULE_AGE = f"(time() - {GLUE_LAST_SCHEDULE})"
GLUE_LAST_SUCCESS_AGE_HOURS = f"({GLUE_LAST_SUCCESS_AGE}) / 3600"
GLUE_LAST_SCHEDULE_AGE_HOURS = f"({GLUE_LAST_SCHEDULE_AGE}) / 3600"
GLUE_STALE_WINDOW_SEC = 36 * 3600
GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})"
GLUE_MISSING = f"({GLUE_JOBS} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time)"
GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE}))"
GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE})"
GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED})"
ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))'
ARIADNE_TASK_SUCCESS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="ok"}[24h]))'
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600"
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
GPU_NODE_REGEX = "|".join(GPU_NODES)
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
TRAEFIK_NET_INGRESS = (
    'sum(rate(container_network_receive_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
    " or on() vector(0)"
)
TRAEFIK_NET_EGRESS = (
    'sum(rate(container_network_transmit_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
    " or on() vector(0)"
)
NET_CLUSTER_RX = (
    'sum(rate(container_network_receive_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
    " or on() vector(0)"
)
NET_CLUSTER_TX = (
    'sum(rate(container_network_transmit_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
    " or on() vector(0)"
)
PHYSICAL_NET_FILTER = 'device!~"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*"'
NET_NODE_RX_PHYS = (
    f'sum(rate(node_network_receive_bytes_total{{{PHYSICAL_NET_FILTER}}}[5m])) or on() vector(0)'
)
NET_NODE_TX_PHYS = (
    f'sum(rate(node_network_transmit_bytes_total{{{PHYSICAL_NET_FILTER}}}[5m])) or on() vector(0)'
)
NET_TOTAL_EXPR = NET_NODE_TX_PHYS
NET_INGRESS_EXPR = NET_NODE_RX_PHYS
NET_EGRESS_EXPR = NET_NODE_TX_PHYS
NET_INTERNAL_EXPR = (
    'sum(rate(container_network_receive_bytes_total{namespace!="traefik",pod!=""}[5m]) '
    '+ rate(container_network_transmit_bytes_total{namespace!="traefik",pod!=""}[5m]))'
    ' or on() vector(0)'
)
APISERVER_5XX_RATE = 'sum(rate(apiserver_request_total{code=~"5.."}[5m]))'
APISERVER_P99_LATENCY_MS = (
    "histogram_quantile(0.99, sum by (le) (rate(apiserver_request_duration_seconds_bucket[5m]))) * 1000"
)
ETCD_P99_LATENCY_MS = (
    "histogram_quantile(0.99, sum by (le) (rate(etcd_request_duration_seconds_bucket[5m]))) * 1000"
)
TRAEFIK_TOTAL_5M = "sum(rate(traefik_entrypoint_requests_total[5m]))"
TRAEFIK_SUCCESS_5M = 'sum(rate(traefik_entrypoint_requests_total{code!~"5.."}[5m]))'
TRAEFIK_SLI_5M = f"({TRAEFIK_SUCCESS_5M}) / clamp_min({TRAEFIK_TOTAL_5M}, 1)"
TRAEFIK_P99_LATENCY_MS = (
    "histogram_quantile(0.99, sum by (le) (rate(traefik_entrypoint_request_duration_seconds_bucket[5m]))) * 1000"
)
TRAEFIK_P95_LATENCY_MS = (
    "histogram_quantile(0.95, sum by (le) (rate(traefik_entrypoint_request_duration_seconds_bucket[5m]))) * 1000"
)
SLO_AVAILABILITY = 0.999


def traefik_sli(window):
    total = f'sum(rate(traefik_entrypoint_requests_total[{window}]))'
    success = f'sum(rate(traefik_entrypoint_requests_total{{code!~"5.."}}[{window}]))'
    return f"({success}) / clamp_min({total}, 1)"


def traefik_burn(window):
    sli = traefik_sli(window)
    return f"(1 - ({sli})) / {1 - SLO_AVAILABILITY}"

# ---------------------------------------------------------------------------
# Panel factories
# ---------------------------------------------------------------------------


def stat_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    unit="none",
    decimals=None,
    thresholds=None,
    text_mode="value",
    legend=None,
    instant=False,
    value_suffix=None,
    links=None,
):
    """Return a Grafana stat panel definition."""
    defaults = {
        "color": {"mode": "thresholds"},
        "mappings": [],
        "thresholds": thresholds
        or {
            "mode": "absolute",
            "steps": [
                {"color": "rgba(115, 115, 115, 1)", "value": None},
                {"color": "green", "value": 1},
            ],
        },
        "unit": unit,
        "custom": {"displayMode": "auto"},
    }
    if value_suffix:
        defaults["custom"]["valueSuffix"] = value_suffix
    if decimals is not None:
        defaults["decimals"] = decimals
    panel = {
        "id": panel_id,
        "type": "stat",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [{"expr": expr, "refId": "A"}],
        "fieldConfig": {"defaults": defaults, "overrides": []},
        "options": {
            "colorMode": "value",
            "graphMode": "area",
            "justifyMode": "center",
            "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
            "textMode": text_mode,
        },
    }
    if legend:
        panel["targets"][0]["legendFormat"] = legend
    if instant:
        panel["targets"][0]["instant"] = True
    if links:
        panel["links"] = links
    return panel


def gauge_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    min_value=0,
    max_value=1,
    thresholds=None,
    links=None,
):
    return {
        "id": panel_id,
        "type": "gauge",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [{"expr": expr, "refId": "A"}],
        "fieldConfig": {
            "defaults": {
                "min": min_value,
                "max": max_value,
                "thresholds": thresholds
                or {
                    "mode": "absolute",
                    "steps": [
                        {"color": "green", "value": None},
                        {"color": "red", "value": max_value},
                    ],
                },
            },
            "overrides": [],
        },
        "options": {
            "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
            "orientation": "auto",
            "showThresholdMarkers": False,
            "showThresholdLabels": False,
        },
        **({"links": links} if links else {}),
    }


def timeseries_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    unit="none",
    legend=None,
    legend_display="table",
    legend_placement="bottom",
    legend_calcs=None,
    time_from=None,
    links=None,
):
    """Return a Grafana time-series panel definition."""
    panel = {
        "id": panel_id,
        "type": "timeseries",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [{"expr": expr, "refId": "A"}],
        "fieldConfig": {"defaults": {"unit": unit}, "overrides": []},
        "options": {
            "legend": {
                "displayMode": legend_display,
                "placement": legend_placement,
            },
            "tooltip": {"mode": "multi"},
        },
    }
    if legend:
        panel["targets"][0]["legendFormat"] = legend
    if legend_calcs:
        panel["options"]["legend"]["calcs"] = legend_calcs
    if time_from:
        panel["timeFrom"] = time_from
    if links:
        panel["links"] = links
    return panel


def table_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    unit="none",
    transformations=None,
    instant=False,
    options=None,
    filterable=True,
    footer=None,
    format=None,
):
    """Return a Grafana table panel definition."""
    # Optional PromQL subquery helpers in expr: share(), etc.
    panel_options = {"showHeader": True, "columnFilters": False}
    if options:
        panel_options.update(options)
    if footer is not None:
        panel_options["footer"] = footer
    field_defaults = {"unit": unit, "custom": {"filterable": filterable}}
    target = {"expr": expr, "refId": "A", **({"instant": True} if instant else {})}
    if format:
        target["format"] = format
    panel = {
        "id": panel_id,
        "type": "table",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [target],
        "fieldConfig": {"defaults": field_defaults, "overrides": []},
        "options": panel_options,
    }
    if transformations:
        panel["transformations"] = transformations
    return panel


def pie_panel(panel_id, title, expr, grid, *, links=None, description=None):
    """Return a pie chart panel with readable namespace labels."""
    panel = {
        "id": panel_id,
        "type": "piechart",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [{"expr": expr, "refId": "A", "legendFormat": "{{namespace}}"}],
        "fieldConfig": {
            "defaults": {
                "unit": "percent",
                "color": {"mode": "palette-classic"},
            },
            "overrides": [],
        },
        "options": {
            "legend": {"displayMode": "list", "placement": "right"},
            "pieType": "pie",
            "displayLabels": [],
            "tooltip": {"mode": "single"},
            "colorScheme": "interpolateSpectral",
            "colorBy": "value",
            "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
        },
    }
    if links:
        panel["links"] = links
    if description:
        panel["description"] = description
    return panel


def namespace_scope_variable(var_name, label):
    options = [
        {
            "text": "workload namespaces only",
            "value": NAMESPACE_SCOPE_WORKLOAD,
            "selected": True,
        },
        {"text": "all namespaces", "value": NAMESPACE_SCOPE_ALL, "selected": False},
        {
            "text": "infrastructure namespaces only",
            "value": NAMESPACE_SCOPE_INFRA,
            "selected": False,
        },
    ]
    query = (
        "workload namespaces only : "
        + NAMESPACE_SCOPE_WORKLOAD
        + ",all namespaces : "
        + NAMESPACE_SCOPE_ALL
        + ",infrastructure namespaces only : "
        + NAMESPACE_SCOPE_INFRA
    )
    return {
        "name": var_name,
        "label": label,
        "type": "custom",
        "query": query,
        "current": {"text": options[0]["text"], "value": options[0]["value"], "selected": True},
        "options": options,
        "hide": 2,
        "multi": False,
        "includeAll": False,
        "refresh": 1,
        "sort": 0,
        "skipUrlSync": False,
    }


def namespace_scope_links(var_name):
    def with_value(value):
        encoded = urllib.parse.quote(value, safe="")
        params = []
        for other in NAMESPACE_SCOPE_VARS:
            if other == var_name:
                params.append(f"var-{other}={encoded}")
            else:
                params.append(f"var-{other}=${{{other}}}")
        return "?" + "&".join(params)

    return [
        {"title": "Workload namespaces only", "url": with_value(NAMESPACE_SCOPE_WORKLOAD), "targetBlank": False},
        {"title": "All namespaces", "url": with_value(NAMESPACE_SCOPE_ALL), "targetBlank": False},
        {
            "title": "Infrastructure namespaces only",
            "url": with_value(NAMESPACE_SCOPE_INFRA),
            "targetBlank": False,
        },
    ]


def bargauge_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    unit="none",
    links=None,
    limit=None,
    thresholds=None,
    decimals=None,
    instant=False,
):
    """Return a bar gauge panel with label-aware reduction."""
    panel = {
        "id": panel_id,
        "type": "bargauge",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [
            {"expr": expr, "refId": "A", "legendFormat": "{{node}}", **({"instant": True} if instant else {})}
        ],
        "fieldConfig": {
            "defaults": {
                "unit": unit,
                "min": 0,
                "max": 100 if unit == "percent" else None,
                "thresholds": thresholds
                or {
                    "mode": "absolute",
                    "steps": [
                        {"color": "green", "value": None},
                        {"color": "yellow", "value": 50},
                        {"color": "orange", "value": 70},
                        {"color": "red", "value": 85},
                    ],
                },
            },
            "overrides": [],
        },
        "options": {
            "displayMode": "gradient",
            "orientation": "horizontal",
            "reduceOptions": {
                "calcs": ["lastNotNull"],
                "fields": "",
                "values": False,
            },
        },
    }
    if decimals is not None:
        panel["fieldConfig"]["defaults"]["decimals"] = decimals
    if links:
        panel["links"] = links
    # Keep bars ordered by value descending for readability.
    panel["transformations"] = [
        {
            "id": "sortBy",
            "options": {"fields": ["Value"], "order": "desc"},
        }
    ]
    if limit:
        panel["transformations"].append({"id": "limit", "options": {"limit": limit}})
    return panel


def text_panel(panel_id, title, content, grid):
    return {
        "id": panel_id,
        "type": "text",
        "title": title,
        "gridPos": grid,
        "datasource": None,
        "options": {"mode": "markdown", "content": content},
    }


def link_to(uid):
    return [{"title": f"Open {uid} dashboard", "url": f"/d/{uid}", "targetBlank": True}]


# ---------------------------------------------------------------------------
# Dashboard builders
# ---------------------------------------------------------------------------


def build_overview():
    panels = []

    count_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "green", "value": None},
            {"color": "yellow", "value": 1},
            {"color": "orange", "value": 2},
            {"color": "red", "value": 3},
        ],
    }

    row1_stats = [
        {
            "id": 2,
            "title": "Control Plane Ready",
            "expr": f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{CONTROL_REGEX}"}})',
            "kind": "gauge",
            "max_value": CONTROL_TOTAL,
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "red", "value": None},
                    {"color": "green", "value": CONTROL_TOTAL},
                ],
            },
        },
        {
            "id": 3,
            "title": "Control Plane Workloads",
            "expr": CONTROL_WORKLOADS_EXPR,
            "kind": "stat",
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 3},
                ],
            },
            "links": link_to("atlas-pods"),
        },
        {
            "id": 5,
            "title": "Stuck Terminating",
            "expr": STUCK_TERMINATING_EXPR,
            "kind": "stat",
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 3},
                ],
            },
            "links": link_to("atlas-pods"),
        },
        {
            "id": 27,
            "title": "Atlas Availability",
            "expr": UPTIME_PERCENT_EXPR,
            "kind": "stat",
            "thresholds": UPTIME_PERCENT_THRESHOLDS,
            "unit": "percentunit",
            "decimals": 4,
            "text_mode": "value",
        },
        {
            "id": 4,
            "title": "Problem Pods",
            "expr": PROBLEM_PODS_EXPR,
            "kind": "stat",
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 3},
                ],
            },
            "links": link_to("atlas-pods"),
        },
        {
            "id": 6,
            "title": "CrashLoop / ImagePull",
            "expr": CRASHLOOP_EXPR,
            "kind": "stat",
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 3},
                ],
            },
            "links": link_to("atlas-pods"),
        },
        {
            "id": 1,
            "title": "Workers Ready",
            "expr": f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})',
            "kind": "gauge",
            "max_value": WORKER_TOTAL,
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "red", "value": None},
                    {"color": "orange", "value": WORKER_TOTAL - 2},
                    {"color": "yellow", "value": WORKER_TOTAL - 1},
                    {"color": "green", "value": WORKER_TOTAL},
                ],
            },
        },
    ]

    def gauge_grid(idx):
        width = GAUGE_WIDTHS[idx] if idx < len(GAUGE_WIDTHS) else 4
        x = sum(GAUGE_WIDTHS[:idx])
        return width, x

    for idx, item in enumerate(row1_stats):
        panel_id = item["id"]
        width, x = gauge_grid(idx)
        grid = {"h": 5, "w": width, "x": x, "y": 0}
        kind = item.get("kind", "gauge")
        if kind == "stat":
            panels.append(
                stat_panel(
                    panel_id,
                    item["title"],
                    item["expr"],
                    grid,
                    thresholds=item.get("thresholds"),
            legend=None,
            links=item.get("links"),
            text_mode=item.get("text_mode", "value"),
            value_suffix=item.get("value_suffix"),
            unit=item.get("unit", "none"),
            decimals=item.get("decimals"),
        )
    )
        else:
            panels.append(
                gauge_panel(
                    panel_id,
                    item["title"],
                    item["expr"],
                    grid,
                    min_value=0,
                    max_value=item.get("max_value", 5),
                    thresholds=item.get("thresholds"),
                    links=item.get("links"),
                )
            )

    hottest = [
        (7, "Hottest node: CPU", topk_with_node(node_cpu_expr()), "percent"),
        (8, "Hottest node: RAM", topk_with_node(node_mem_expr()), "percent"),
        (9, "Hottest node: NET (rx+tx)", topk_with_node(node_net_expr()), "Bps"),
        (10, "Hottest node: I/O (r+w)", topk_with_node(node_io_expr()), "Bps"),
    ]
    for idx, (panel_id, title, expr, unit) in enumerate(hottest):
        panels.append(
            stat_panel(
                panel_id,
                title,
                f"{expr}",
                {"h": 3, "w": 6, "x": 6 * idx, "y": 5},
                unit=unit,
                thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
                text_mode="name_and_value",
                legend="{{node}}",
                instant=True,
                links=link_to("atlas-nodes"),
            )
        )

    mail_bounce_rate_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "green", "value": None},
            {"color": "yellow", "value": 5},
            {"color": "orange", "value": 8},
            {"color": "red", "value": 10},
        ],
    }
    mail_limit_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "green", "value": None},
            {"color": "yellow", "value": 70},
            {"color": "orange", "value": 85},
            {"color": "red", "value": 95},
        ],
    }
    mail_success_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "red", "value": None},
            {"color": "orange", "value": 90},
            {"color": "yellow", "value": 95},
            {"color": "green", "value": 98},
        ],
    }
    panels.append(
        stat_panel(
            30,
            "Mail Sent (1d)",
            'max(postmark_outbound_sent{window="1d"})',
            {"h": 2, "w": 5, "x": 0, "y": 8},
            unit="none",
            links=link_to("atlas-mail"),
        )
    )
    panels.append(
        {
            "id": 31,
            "type": "stat",
            "title": "Mail Bounces (1d)",
            "datasource": PROM_DS,
            "gridPos": {"h": 2, "w": 5, "x": 10, "y": 8},
            "targets": [
                {
                    "expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
                    "refId": "A",
                    "legendFormat": "Rate",
                },
                {
                    "expr": 'max(postmark_outbound_bounced{window="1d"})',
                    "refId": "B",
                    "legendFormat": "Count",
                },
            ],
            "fieldConfig": {
                "defaults": {
                    "color": {"mode": "thresholds"},
                    "custom": {"displayMode": "auto"},
                    "thresholds": mail_bounce_rate_thresholds,
                    "unit": "none",
                },
                "overrides": [
                    {
                        "matcher": {"id": "byName", "options": "Rate"},
                        "properties": [{"id": "unit", "value": "percent"}],
                    },
                    {
                        "matcher": {"id": "byName", "options": "Count"},
                        "properties": [{"id": "unit", "value": "none"}],
                    },
                ],
            },
            "options": {
                "colorMode": "value",
                "graphMode": "area",
                "justifyMode": "center",
                "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
                "textMode": "name_and_value",
            },
            "links": link_to("atlas-mail"),
        }
    )
    panels.append(
        stat_panel(
            32,
            "Mail Success Rate (1d)",
            'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
            {"h": 2, "w": 5, "x": 5, "y": 8},
            unit="percent",
            thresholds=mail_success_thresholds,
            decimals=1,
            links=link_to("atlas-mail"),
        )
    )
    panels.append(
        stat_panel(
            33,
            "Mail Limit Used (30d)",
            "max(postmark_sending_limit_used_percent)",
            {"h": 2, "w": 5, "x": 15, "y": 8},
            unit="percent",
            thresholds=mail_limit_thresholds,
            decimals=1,
            links=link_to("atlas-mail"),
        )
    )

    storage_panels = [
        (23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"),
        (24, "Asteria Usage", astreae_usage_expr("/mnt/asteria"), "percent"),
        (25, "Astreae Free", astreae_free_expr("/mnt/astreae"), "decbytes"),
        (26, "Asteria Free", astreae_free_expr("/mnt/asteria"), "decbytes"),
    ]
    for idx, (panel_id, title, expr, unit) in enumerate(storage_panels):
        panels.append(
            stat_panel(
                panel_id,
                title,
                expr,
                {"h": 6, "w": 6, "x": 6 * idx, "y": 10},
                unit=unit,
                thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
                links=link_to("atlas-storage"),
            )
        )

    cpu_scope = "$namespace_scope_cpu"
    gpu_scope = "$namespace_scope_gpu"
    ram_scope = "$namespace_scope_ram"

    panels.append(
        pie_panel(
            11,
            "Namespace CPU Share",
            namespace_cpu_share_expr(cpu_scope),
            {"h": 9, "w": 8, "x": 0, "y": 16},
            links=namespace_scope_links("namespace_scope_cpu"),
            description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
        )
    )
    panels.append(
        pie_panel(
            12,
            "Namespace GPU Share",
            namespace_gpu_share_expr(gpu_scope),
            {"h": 9, "w": 8, "x": 8, "y": 16},
            links=namespace_scope_links("namespace_scope_gpu"),
            description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
        )
    )
    panels.append(
        pie_panel(
            13,
            "Namespace RAM Share",
            namespace_ram_share_expr(ram_scope),
            {"h": 9, "w": 8, "x": 16, "y": 16},
            links=namespace_scope_links("namespace_scope_ram"),
            description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
        )
    )

    worker_filter = f"{WORKER_REGEX}"
    panels.append(
        timeseries_panel(
            14,
            "Worker Node CPU",
            node_cpu_expr(worker_filter),
            {"h": 12, "w": 12, "x": 0, "y": 32},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
            links=link_to("atlas-nodes"),
        )
    )
    panels.append(
        timeseries_panel(
            15,
            "Worker Node RAM",
            node_mem_expr(worker_filter),
            {"h": 12, "w": 12, "x": 12, "y": 32},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
            links=link_to("atlas-nodes"),
        )
    )

    panels.append(
        timeseries_panel(
            16,
            "Control plane CPU",
            node_cpu_expr(CONTROL_ALL_REGEX),
            {"h": 10, "w": 12, "x": 0, "y": 44},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            17,
            "Control plane RAM",
            node_mem_expr(CONTROL_ALL_REGEX),
            {"h": 10, "w": 12, "x": 12, "y": 44},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
        )
    )

    panels.append(
        pie_panel(
            28,
            "Node Pod Share",
            '(sum(kube_pod_info{pod!="" , node!=""}) by (node) / clamp_min(sum(kube_pod_info{pod!="" , node!=""}), 1)) * 100',
            {"h": 10, "w": 12, "x": 0, "y": 54},
        )
    )
    panels.append(
        bargauge_panel(
            29,
            "Top Nodes by Pod Count",
            'topk(12, sum(kube_pod_info{pod!="" , node!=""}) by (node))',
            {"h": 10, "w": 12, "x": 12, "y": 54},
            unit="none",
            limit=12,
            decimals=0,
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 50},
                    {"color": "orange", "value": 75},
                    {"color": "red", "value": 100},
                ],
            },
            instant=True,
        )
    )

    panels.append(
        timeseries_panel(
            18,
            "Cluster Ingress Throughput",
            NET_INGRESS_EXPR,
            {"h": 7, "w": 8, "x": 0, "y": 25},
            unit="Bps",
            legend="Ingress (Traefik)",
            legend_display="list",
            legend_placement="bottom",
            links=link_to("atlas-network"),
        )
    )
    panels.append(
        timeseries_panel(
            19,
            "Cluster Egress Throughput",
            NET_EGRESS_EXPR,
            {"h": 7, "w": 8, "x": 8, "y": 25},
            unit="Bps",
            legend="Egress (Traefik)",
            legend_display="list",
            legend_placement="bottom",
            links=link_to("atlas-network"),
        )
    )
    panels.append(
        timeseries_panel(
            20,
            "Intra-Cluster Throughput",
            NET_INTERNAL_EXPR,
            {"h": 7, "w": 8, "x": 16, "y": 25},
            unit="Bps",
            legend="Internal traffic",
            legend_display="list",
            legend_placement="bottom",
            links=link_to("atlas-network"),
        )
    )

    panels.append(
        timeseries_panel(
            21,
            "Root Filesystem Usage",
            root_usage_expr(),
            {"h": 16, "w": 12, "x": 0, "y": 64},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
            time_from="30d",
            links=link_to("atlas-storage"),
        )
    )
    panels.append(
        bargauge_panel(
            22,
            "Nodes Closest to Full Root Disks",
            f"topk(12, {root_usage_expr()})",
            {"h": 16, "w": 12, "x": 12, "y": 64},
            unit="percent",
            thresholds=PERCENT_THRESHOLDS,
            links=link_to("atlas-storage"),
        )
    )
    return {
        "uid": "atlas-overview",
        "title": "Atlas Overview",
        "folderUid": PUBLIC_FOLDER,
        "editable": False,
        "annotations": {"list": []},
        "panels": panels,
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "overview"],
        "templating": {
            "list": [
                namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"),
                namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"),
                namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"),
            ]
        },
        "time": {"from": "now-1h", "to": "now"},
        "refresh": "1m",
        "links": [],
    }


def build_pods_dashboard():
    panels = []
    panels.append(
        stat_panel(
            1,
            "Problem Pods",
            PROBLEM_PODS_EXPR,
            {"h": 4, "w": 6, "x": 0, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "red", "value": 1},
                ],
            },
        )
    )
    panels.append(
        stat_panel(
            2,
            "CrashLoop / ImagePull",
            CRASHLOOP_EXPR,
            {"h": 4, "w": 6, "x": 6, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "red", "value": 1},
                ],
            },
        )
    )
    panels.append(
        stat_panel(
            3,
            "Stuck Terminating (>10m)",
            STUCK_TERMINATING_EXPR,
            {"h": 4, "w": 6, "x": 12, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "red", "value": 1},
                ],
            },
        )
    )
    panels.append(
        stat_panel(
            4,
            "Control Plane Workloads",
            f'sum(kube_pod_info{{node=~"{CONTROL_REGEX}",namespace!~"{CP_ALLOWED_NS}"}})',
            {"h": 4, "w": 6, "x": 18, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "red", "value": 1},
                ],
            },
        )
    )

    panels.append(
        table_panel(
            5,
            "Pods Not Running",
            PROBLEM_TABLE_EXPR,
            {"h": 10, "w": 24, "x": 0, "y": 4},
            unit="s",
            transformations=[{"id": "labelsToFields", "options": {}}],
        )
    )
    panels.append(
        table_panel(
            6,
            "CrashLoop / ImagePull",
            CRASHLOOP_TABLE_EXPR,
            {"h": 10, "w": 24, "x": 0, "y": 14},
            unit="s",
            transformations=[{"id": "labelsToFields", "options": {}}],
        )
    )
    panels.append(
        table_panel(
            7,
            "Terminating >10m",
            STUCK_TABLE_EXPR,
            {"h": 10, "w": 24, "x": 0, "y": 24},
            unit="s",
            transformations=[
                {"id": "labelsToFields", "options": {}},
                {"id": "filterByValue", "options": {"match": "Value", "operator": "gt", "value": 600}},
            ],
        )
    )
    panels.append(
        pie_panel(
            8,
            "Node Pod Share",
            '(sum(kube_pod_info{pod!="" , node!=""}) by (node) / clamp_min(sum(kube_pod_info{pod!="" , node!=""}), 1)) * 100',
            {"h": 8, "w": 12, "x": 12, "y": 34},
        )
    )
    panels.append(
        bargauge_panel(
            9,
            "Top Nodes by Pod Count",
            'topk(12, sum(kube_pod_info{pod!="" , node!=""}) by (node))',
            {"h": 8, "w": 12, "x": 0, "y": 34},
            unit="none",
            limit=12,
            decimals=0,
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 50},
                    {"color": "orange", "value": 75},
                    {"color": "red", "value": 100},
                ],
            },
            instant=True,
        )
    )

    share_expr = (
        '(sum by (namespace,node) (kube_pod_info{pod!="" , node!=""}) '
        '/ on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=""}), 1) * 100)'
    )
    rank_terms = [
        f"(sum by (node) (kube_node_info{{node=\"{node}\"}}) * 0 + {idx * 1e-3})"
        for idx, node in enumerate(CONTROL_ALL + WORKER_NODES, start=1)
    ]
    rank_expr = " or ".join(rank_terms)
    score_expr = f"{share_expr} + on(node) group_left() ({rank_expr})"
    mask_expr = (
        f"{score_expr} == bool on(namespace) group_left() "
        f"(max by (namespace) ({score_expr}))"
    )
    panels.append(
        table_panel(
            10,
            "Namespace Plurality by Node v27",
            (
                f"{share_expr} * on(namespace,node) group_left() "
                f"({mask_expr})"
            ),
            {"h": 8, "w": 24, "x": 0, "y": 42},
            unit="percent",
            transformations=[
                {"id": "labelsToFields", "options": {}},
                {"id": "organize", "options": {"excludeByName": {"Time": True}}},
                {"id": "filterByValue", "options": {"match": "Value", "operator": "gt", "value": 0}},
                {
                    "id": "sortBy",
                    "options": {"fields": ["Value"], "order": "desc"},
                },
                {
                    "id": "groupBy",
                    "options": {
                        "fields": {
                            "namespace": {
                                "aggregations": [
                                    {"field": "Value", "operation": "max"},
                                    {"field": "node", "operation": "first"},
                                ]
                            }
                        },
                        "rowBy": ["namespace"],
                    },
                },
            ],
            instant=True,
            options={"showColumnFilters": False},
            filterable=False,
            footer={"show": False, "fields": "", "calcs": []},
            format="table",
        )
    )

    return {
        "uid": "atlas-pods",
        "title": "Atlas Pods",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "pods"],
    }


def build_nodes_dashboard():
    panels = []
    panels.append(
        stat_panel(
            1,
            "Worker Nodes Ready",
            f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})',
            {"h": 4, "w": 8, "x": 0, "y": 0},
            value_suffix=WORKER_SUFFIX,
        )
    )
    panels.append(
        stat_panel(
            2,
            "Control Plane Ready",
            f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{CONTROL_REGEX}"}})',
            {"h": 4, "w": 8, "x": 8, "y": 0},
            value_suffix=CONTROL_SUFFIX,
        )
    )
    panels.append(
        stat_panel(
            3,
            "Control Plane Workloads",
            f'sum(kube_pod_info{{node=~"{CONTROL_REGEX}",namespace!~"{CP_ALLOWED_NS}"}})',
            {"h": 4, "w": 8, "x": 16, "y": 0},
        )
    )
    panels.append(
        stat_panel(
            9,
            "API Server 5xx rate",
            APISERVER_5XX_RATE,
            {"h": 4, "w": 8, "x": 0, "y": 4},
            unit="req/s",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 0.05},
                    {"color": "orange", "value": 0.2},
                    {"color": "red", "value": 0.5},
                ],
            },
            decimals=3,
        )
    )
    panels.append(
        stat_panel(
            10,
            "API Server P99 latency",
            APISERVER_P99_LATENCY_MS,
            {"h": 4, "w": 8, "x": 8, "y": 4},
            unit="ms",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 250},
                    {"color": "orange", "value": 400},
                    {"color": "red", "value": 600},
                ],
            },
            decimals=1,
        )
    )
    panels.append(
        stat_panel(
            11,
            "etcd P99 latency",
            ETCD_P99_LATENCY_MS,
            {"h": 4, "w": 8, "x": 16, "y": 4},
            unit="ms",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 50},
                    {"color": "orange", "value": 100},
                    {"color": "red", "value": 200},
                ],
            },
            decimals=1,
        )
    )
    panels.append(
        timeseries_panel(
            4,
            "Node CPU",
            node_cpu_expr(),
            {"h": 9, "w": 24, "x": 0, "y": 8},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            5,
            "Node RAM",
            node_mem_expr(),
            {"h": 9, "w": 24, "x": 0, "y": 17},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            6,
            "Control Plane (incl. titan-db) CPU",
            node_cpu_expr(CONTROL_ALL_REGEX),
            {"h": 9, "w": 12, "x": 0, "y": 26},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            7,
            "Control Plane (incl. titan-db) RAM",
            node_mem_expr(CONTROL_ALL_REGEX),
            {"h": 9, "w": 12, "x": 12, "y": 26},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            8,
            "Root Filesystem Usage",
            root_usage_expr(),
            {"h": 9, "w": 24, "x": 0, "y": 35},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
            time_from="30d",
        )
    )
    return {
        "uid": "atlas-nodes",
        "title": "Atlas Nodes",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "nodes"],
    }


def build_storage_dashboard():
    panels = []
    panels.append(
        stat_panel(
            1,
            "Astreae Usage",
            astreae_usage_expr("/mnt/astreae"),
            {"h": 5, "w": 6, "x": 0, "y": 0},
            unit="percent",
            thresholds=PERCENT_THRESHOLDS,
        )
    )
    panels.append(
        stat_panel(
            2,
            "Asteria Usage",
            astreae_usage_expr("/mnt/asteria"),
            {"h": 5, "w": 6, "x": 6, "y": 0},
            unit="percent",
            thresholds=PERCENT_THRESHOLDS,
        )
    )
    panels.append(
        stat_panel(
            3,
            "Astreae Free",
            astreae_free_expr("/mnt/astreae"),
            {"h": 5, "w": 6, "x": 12, "y": 0},
            unit="decbytes",
        )
    )
    panels.append(
        stat_panel(
            4,
            "Asteria Free",
            astreae_free_expr("/mnt/asteria"),
            {"h": 5, "w": 6, "x": 18, "y": 0},
            unit="decbytes",
        )
    )
    panels.append(
        timeseries_panel(
            5,
            "Astreae Per-Node Usage",
            filesystem_usage_expr("/mnt/astreae", LONGHORN_NODE_REGEX),
            {"h": 9, "w": 12, "x": 0, "y": 5},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
            time_from="30d",
        )
    )
    panels.append(
        timeseries_panel(
            6,
            "Asteria Per-Node Usage",
            filesystem_usage_expr("/mnt/asteria", LONGHORN_NODE_REGEX),
            {"h": 9, "w": 12, "x": 12, "y": 5},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
            time_from="30d",
        )
    )
    panels.append(
        timeseries_panel(
            7,
            "Astreae Usage History",
            astreae_usage_expr("/mnt/astreae"),
            {"h": 9, "w": 12, "x": 0, "y": 14},
            unit="percent",
            time_from="90d",
        )
    )
    panels.append(
        timeseries_panel(
            8,
            "Asteria Usage History",
            astreae_usage_expr("/mnt/asteria"),
            {"h": 9, "w": 12, "x": 12, "y": 14},
            unit="percent",
            time_from="90d",
        )
    )
    panels.append(
        stat_panel(
            30,
            "Maintenance Sweepers Ready",
            'kube_daemonset_status_number_ready{namespace="maintenance",daemonset="node-image-sweeper"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace="maintenance",daemonset="node-image-sweeper"} * 100',
            {"h": 4, "w": 12, "x": 0, "y": 44},
            unit="percent",
            thresholds=PERCENT_THRESHOLDS,
        )
    )
    panels.append(
        stat_panel(
            31,
            "Maintenance Cron Freshness (s)",
            'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob="image-sweeper"})',
            {"h": 4, "w": 12, "x": 12, "y": 44},
            unit="s",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 3600},
                    {"color": "red", "value": 10800},
                ],
            },
        )
    )
    return {
        "uid": "atlas-storage",
        "title": "Atlas Storage",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "storage"],
    }


def build_network_dashboard():
    panels = []
    panels.append(
        stat_panel(
            1,
            "Ingress Success Rate (5m)",
            TRAEFIK_SLI_5M,
            {"h": 4, "w": 6, "x": 0, "y": 0},
            unit="percentunit",
            decimals=2,
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "red", "value": None},
                    {"color": "orange", "value": 0.995},
                    {"color": "yellow", "value": 0.999},
                    {"color": "green", "value": 0.9995},
                ],
            },
        )
    )
    panels.append(
        stat_panel(
            2,
            "Error Budget Burn (1h)",
            traefik_burn("1h"),
            {"h": 4, "w": 6, "x": 6, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 4},
                ],
            },
            decimals=2,
        )
    )
    panels.append(
        stat_panel(
            3,
            "Error Budget Burn (6h)",
            traefik_burn("6h"),
            {"h": 4, "w": 6, "x": 12, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 4},
                ],
            },
            decimals=2,
        )
    )
    panels.append(
        stat_panel(
            4,
            "Edge P99 Latency (ms)",
            TRAEFIK_P99_LATENCY_MS,
            {"h": 4, "w": 6, "x": 18, "y": 0},
            unit="ms",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 200},
                    {"color": "orange", "value": 350},
                    {"color": "red", "value": 500},
                ],
            },
            decimals=1,
        )
    )
    panels.append(
        stat_panel(
            5,
            "Ingress Traffic",
            NET_INGRESS_EXPR,
            {"h": 4, "w": 8, "x": 0, "y": 4},
            unit="Bps",
        )
    )
    panels.append(
        stat_panel(
            6,
            "Egress Traffic",
            NET_EGRESS_EXPR,
            {"h": 4, "w": 8, "x": 8, "y": 4},
            unit="Bps",
        )
    )
    panels.append(
        stat_panel(
            7,
            "Intra-Cluster Traffic",
            NET_INTERNAL_EXPR,
            {"h": 4, "w": 8, "x": 16, "y": 4},
            unit="Bps",
        )
    )
    panels.append(
        timeseries_panel(
            8,
            "Per-Node Throughput",
            f'avg by (node) (({NET_NODE_TX_PHYS} + {NET_NODE_RX_PHYS}) * on(instance) group_left(node) {NODE_INFO})',
            {"h": 8, "w": 24, "x": 0, "y": 8},
            unit="Bps",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        table_panel(
            9,
            "Top Namespaces",
            'topk(10, sum(rate(container_network_transmit_bytes_total{namespace!=""}[5m]) '
            '+ rate(container_network_receive_bytes_total{namespace!=""}[5m])) by (namespace))',
            {"h": 9, "w": 12, "x": 0, "y": 16},
            unit="Bps",
            transformations=[{"id": "labelsToFields", "options": {}}],
        )
    )
    panels.append(
        table_panel(
            10,
            "Top Pods",
            'topk(10, sum(rate(container_network_transmit_bytes_total{pod!=""}[5m]) '
            '+ rate(container_network_receive_bytes_total{pod!=""}[5m])) by (namespace,pod))',
            {"h": 9, "w": 12, "x": 12, "y": 16},
            unit="Bps",
            transformations=[{"id": "labelsToFields", "options": {}}],
        )
    )
    panels.append(
        timeseries_panel(
            11,
            "Traefik Routers (req/s)",
            f"topk(10, {TRAEFIK_ROUTER_EXPR})",
            {"h": 9, "w": 12, "x": 0, "y": 25},
            unit="req/s",
            legend="{{router}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            12,
            "Traefik Entrypoints (req/s)",
            'sum by (entrypoint) (rate(traefik_entrypoint_requests_total[5m]))',
            {"h": 9, "w": 12, "x": 12, "y": 25},
            unit="req/s",
            legend="{{entrypoint}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    return {
        "uid": "atlas-network",
        "title": "Atlas Network",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "network"],
    }


def build_mail_dashboard():
    panels = []

    bounce_rate_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "green", "value": None},
            {"color": "yellow", "value": 5},
            {"color": "orange", "value": 8},
            {"color": "red", "value": 10},
        ],
    }
    limit_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "green", "value": None},
            {"color": "yellow", "value": 70},
            {"color": "orange", "value": 85},
            {"color": "red", "value": 95},
        ],
    }
    success_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "red", "value": None},
            {"color": "orange", "value": 90},
            {"color": "yellow", "value": 95},
            {"color": "green", "value": 98},
        ],
    }

    panels.append(
        stat_panel(
            1,
            "Sent (1d)",
            'max(postmark_outbound_sent{window="1d"})',
            {"h": 4, "w": 6, "x": 0, "y": 0},
            decimals=0,
        )
    )
    panels.append(
        stat_panel(
            2,
            "Sent (7d)",
            'max(postmark_outbound_sent{window="7d"})',
            {"h": 4, "w": 6, "x": 6, "y": 0},
            decimals=0,
        )
    )
    panels.append(
        {
            "id": 3,
            "type": "stat",
            "title": "Mail Bounces (1d)",
            "datasource": PROM_DS,
            "gridPos": {"h": 4, "w": 6, "x": 12, "y": 0},
            "targets": [
                {
                    "expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
                    "refId": "A",
                    "legendFormat": "Rate",
                },
                {
                    "expr": 'max(postmark_outbound_bounced{window="1d"})',
                    "refId": "B",
                    "legendFormat": "Count",
                },
            ],
            "fieldConfig": {
                "defaults": {
                    "color": {"mode": "thresholds"},
                    "custom": {"displayMode": "auto"},
                    "thresholds": bounce_rate_thresholds,
                    "unit": "none",
                },
                "overrides": [
                    {
                        "matcher": {"id": "byName", "options": "Rate"},
                        "properties": [{"id": "unit", "value": "percent"}],
                    },
                    {
                        "matcher": {"id": "byName", "options": "Count"},
                        "properties": [{"id": "unit", "value": "none"}],
                    },
                ],
            },
            "options": {
                "colorMode": "value",
                "graphMode": "area",
                "justifyMode": "center",
                "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
                "textMode": "name_and_value",
            },
        }
    )
    panels.append(
        stat_panel(
            4,
            "Success Rate (1d)",
            'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
            {"h": 4, "w": 6, "x": 18, "y": 0},
            unit="percent",
            thresholds=success_thresholds,
            decimals=1,
        )
    )

    panels.append(
        stat_panel(
            5,
            "Limit Used (30d)",
            "max(postmark_sending_limit_used_percent)",
            {"h": 4, "w": 6, "x": 0, "y": 4},
            thresholds=limit_thresholds,
            unit="percent",
            decimals=1,
        )
    )
    panels.append(
        stat_panel(
            6,
            "Send Limit (30d)",
            "max(postmark_sending_limit)",
            {"h": 4, "w": 6, "x": 6, "y": 4},
            decimals=0,
        )
    )
    panels.append(
        stat_panel(
            7,
            "Last Success",
            "max(postmark_last_success_timestamp_seconds)",
            {"h": 4, "w": 6, "x": 12, "y": 4},
            unit="dateTimeAsIso",
            decimals=0,
        )
    )
    panels.append(
        stat_panel(
            8,
            "Exporter Errors",
            "sum(postmark_request_errors_total)",
            {"h": 4, "w": 6, "x": 18, "y": 4},
            decimals=0,
        )
    )

    panels.append(
        timeseries_panel(
            13,
            "Bounce Rate (1d vs 7d)",
            "max by (window) (postmark_outbound_bounce_rate)",
            {"h": 8, "w": 12, "x": 0, "y": 12},
            unit="percent",
            legend="{{window}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            14,
            "Bounced (1d vs 7d)",
            "max by (window) (postmark_outbound_bounced)",
            {"h": 8, "w": 12, "x": 12, "y": 12},
            unit="none",
            legend="{{window}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            15,
            "Sent (1d vs 7d)",
            "max by (window) (postmark_outbound_sent)",
            {"h": 8, "w": 12, "x": 0, "y": 20},
            unit="none",
            legend="{{window}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            16,
            "Exporter Errors",
            "sum(postmark_request_errors_total)",
            {"h": 8, "w": 12, "x": 12, "y": 20},
            unit="none",
        )
    )

    return {
        "uid": "atlas-mail",
        "title": "Atlas Mail",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-30d", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "mail"],
    }


def build_testing_dashboard():
    panels = []
    sort_desc = [{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}]

    panels.append(
        stat_panel(
            1,
            "Glue Jobs Stale (>36h)",
            GLUE_STALE_COUNT,
            {"h": 4, "w": 6, "x": 0, "y": 0},
            unit="none",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 3},
                ],
            },
        )
    )
    panels.append(
        table_panel(
            2,
            "Glue Jobs Missing Success",
            GLUE_MISSING_ACTIVE,
            {"h": 4, "w": 6, "x": 6, "y": 0},
            unit="none",
            transformations=sort_desc,
            instant=True,
        )
    )
    panels.append(
        table_panel(
            3,
            "Glue Jobs Suspended",
            GLUE_SUSPENDED,
            {"h": 4, "w": 6, "x": 12, "y": 0},
            unit="none",
            transformations=sort_desc,
            instant=True,
        )
    )
    panels.append(
        table_panel(
            4,
            "Glue Jobs Active Runs",
            GLUE_ACTIVE,
            {"h": 4, "w": 6, "x": 18, "y": 0},
            unit="none",
            transformations=sort_desc,
            instant=True,
        )
    )
    panels.append(
        table_panel(
            5,
            "Glue Jobs Last Success (hours ago)",
            GLUE_LAST_SUCCESS_AGE_HOURS,
            {"h": 8, "w": 12, "x": 0, "y": 4},
            unit="h",
            transformations=sort_desc,
            instant=True,
        )
    )
    panels.append(
        table_panel(
            6,
            "Glue Jobs Last Schedule (hours ago)",
            GLUE_LAST_SCHEDULE_AGE_HOURS,
            {"h": 8, "w": 12, "x": 12, "y": 4},
            unit="h",
            transformations=sort_desc,
            instant=True,
        )
    )
    panels.append(
        table_panel(
            7,
            "Ariadne Task Errors (24h)",
            ARIADNE_TASK_ERRORS_24H,
            {"h": 6, "w": 12, "x": 0, "y": 12},
            unit="none",
            transformations=sort_desc,
            instant=True,
        )
    )
    panels.append(
        table_panel(
            8,
            "Ariadne Schedule Last Success (hours ago)",
            ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS,
            {"h": 6, "w": 12, "x": 12, "y": 12},
            unit="h",
            transformations=sort_desc,
            instant=True,
        )
    )
    panels.append(
        table_panel(
            9,
            "Ariadne Access Requests",
            ARIADNE_ACCESS_REQUESTS,
            {"h": 4, "w": 24, "x": 0, "y": 18},
            unit="none",
            transformations=sort_desc,
            instant=True,
        )
    )

    return {
        "uid": "atlas-testing",
        "title": "Atlas Testing",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-7d", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "testing"],
    }


def build_gpu_dashboard():
    panels = []
    gpu_scope = "$namespace_scope_gpu"
    panels.append(
        pie_panel(
            1,
            "Namespace GPU Share",
            namespace_gpu_share_expr(gpu_scope),
            {"h": 8, "w": 12, "x": 0, "y": 0},
            links=namespace_scope_links("namespace_scope_gpu"),
            description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
        )
    )
    panels.append(
        timeseries_panel(
            2,
            "GPU Util by Namespace",
            namespace_gpu_usage_instant(gpu_scope),
            {"h": 8, "w": 12, "x": 12, "y": 0},
            unit="percent",
            legend="{{namespace}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            3,
            "GPU Util by Node",
            'sum by (Hostname) (DCGM_FI_DEV_GPU_UTIL{pod!=""})',
            {"h": 8, "w": 12, "x": 0, "y": 8},
            unit="percent",
            legend="{{Hostname}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        table_panel(
            4,
            "Top Pods by GPU Util",
            'topk(10, sum(DCGM_FI_DEV_GPU_UTIL{pod!=""}) by (namespace,pod,Hostname))',
            {"h": 8, "w": 12, "x": 12, "y": 8},
            unit="percent",
            transformations=[{"id": "labelsToFields", "options": {}}],
        )
    )
    return {
        "uid": "atlas-gpu",
        "title": "Atlas GPU",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "gpu"],
        "templating": {
            "list": [
                namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"),
                namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"),
                namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"),
            ]
        },
    }


DASHBOARDS = {
    "atlas-overview": {
        "builder": build_overview,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-overview.yaml",
    },
    "atlas-pods": {
        "builder": build_pods_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-pods.yaml",
    },
    "atlas-nodes": {
        "builder": build_nodes_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-nodes.yaml",
    },
    "atlas-storage": {
        "builder": build_storage_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-storage.yaml",
    },
    "atlas-network": {
        "builder": build_network_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-network.yaml",
    },
    "atlas-mail": {
        "builder": build_mail_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml",
    },
    "atlas-testing": {
        "builder": build_testing_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-testing.yaml",
    },
    "atlas-gpu": {
        "builder": build_gpu_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml",
    },
}


def write_json(uid, data):
    DASHBOARD_DIR.mkdir(parents=True, exist_ok=True)
    path = DASHBOARD_DIR / f"{uid}.json"
    path.write_text(json.dumps(data, indent=2) + "\n")


def render_configmap(uid, info):
    json_path = DASHBOARD_DIR / f"{uid}.json"
    payload = json.dumps(json.loads(json_path.read_text()), indent=2)
    indented = "\n".join("    " + line for line in payload.splitlines())
    output_path = info["configmap"]
    content = CONFIG_TEMPLATE.format(
        relative_path=output_path.relative_to(ROOT),
        name=output_path.stem,
        key=json_path.name,
        payload=indented,
    )
    output_path.write_text(content)
    print(f"Rendered {json_path.name} -> {output_path.relative_to(ROOT)}")


def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--build", action="store_true", help="Regenerate dashboard JSON files from builders")
    args = parser.parse_args()

    if args.build:
        for uid, info in DASHBOARDS.items():
            write_json(uid, info["builder"]())

    for uid, info in DASHBOARDS.items():
        render_configmap(uid, info)


if __name__ == "__main__":
    main()