#!/usr/bin/env python3
"""Generate Atlas Grafana dashboards and render them into ConfigMaps.

Usage:
  scripts/dashboards_render_atlas.py --build   # rebuild JSON + ConfigMaps
  scripts/dashboards_render_atlas.py           # re-render ConfigMaps from JSON
"""

import argparse
import json
import textwrap
import urllib.parse
from pathlib import Path

# ---------------------------------------------------------------------------
# Paths, folders, and shared metadata
# ---------------------------------------------------------------------------

ROOT = Path(__file__).resolve().parents[1]
DASHBOARD_DIR = ROOT / "services" / "monitoring" / "dashboards"
CONFIG_TEMPLATE = textwrap.dedent(
    """# {relative_path}
apiVersion: v1
kind: ConfigMap
metadata:
  name: {name}
  labels:
    grafana_dashboard: "1"
data:
  {key}: |
{payload}
"""
)

PROM_DS = {"type": "prometheus", "uid": "atlas-vm"}
PUBLIC_FOLDER = "overview"
PUBLIC_DASHBOARD_FOLDER = "atlas-public"
PRIVATE_FOLDER = "atlas-internal"
ASTRAIOS_MOUNTPOINT = "/mnt/astraios"
GLOBAL_STATUS_COLOR_TONES = {
    "blue": "dark-blue",
    "green": "dark-green",
    "yellow": "dark-yellow",
    "orange": "dark-orange",
    "red": "dark-red",
}
COLOR_VALUE_KEYS = {"color", "fixedColor"}


def apply_global_status_palette(value, parent_key=None):
    """Normalize generated Grafana status colors to the shared Atlas tones."""
    if isinstance(value, dict):
        return {key: apply_global_status_palette(item, key) for key, item in value.items()}
    if isinstance(value, list):
        return [apply_global_status_palette(item, parent_key) for item in value]
    if parent_key in COLOR_VALUE_KEYS and isinstance(value, str):
        return GLOBAL_STATUS_COLOR_TONES.get(value, value)
    return value

PERCENT_THRESHOLDS = {
    "mode": "absolute",
    "steps": [
        {"color": "green", "value": None},
        {"color": "yellow", "value": 50},
        {"color": "orange", "value": 75},
        {"color": "red", "value": 91.5},
    ],
}

NAMESPACE_CPU_WINDOW = "1m"

# ---------------------------------------------------------------------------
# Cluster metadata
# ---------------------------------------------------------------------------

CONTROL_PLANE_NODES = ["titan-0a", "titan-0b", "titan-0c"]
CONTROL_DEPENDENCIES = ["titan-db", "titan-jh"]
CONTROL_ALL = CONTROL_PLANE_NODES + CONTROL_DEPENDENCIES
WORKER_NODES = [
    "titan-04",
    "titan-05",
    "titan-06",
    "titan-07",
    "titan-08",
    "titan-09",
    "titan-10",
    "titan-11",
    "titan-20",
    "titan-21",
    "titan-12",
    "titan-13",
    "titan-14",
    "titan-15",
    "titan-16",
    "titan-17",
    "titan-18",
    "titan-19",
    "titan-22",
    "titan-24",
]

CONTROL_REGEX = "|".join(CONTROL_PLANE_NODES)
CONTROL_ALL_REGEX = "|".join(CONTROL_ALL)
WORKER_REGEX = "|".join(WORKER_NODES)
CONTROL_TOTAL = len(CONTROL_PLANE_NODES)
WORKER_TOTAL = len(WORKER_NODES)
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
# Namespaces considered infrastructure (excluded from workload counts)
INFRA_PATTERNS = [
    "kube-.*",
    ".*-system",
    "traefik",
    "monitoring",
    "logging",
    "cert-manager",
    "maintenance",
    "postgres",
]
INFRA_REGEX = f"^({'|'.join(INFRA_PATTERNS)})$"
# Namespaces allowed on control plane without counting as workloads
CP_ALLOWED_NS = INFRA_REGEX
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
GAUGE_WIDTHS = [4, 3, 3, 4, 3, 3, 4]
CONTROL_WORKLOADS_EXPR = (
    f'sum(kube_pod_info{{node=~"{CONTROL_REGEX}",namespace!~"{CP_ALLOWED_NS}"}}) or on() vector(0)'
)

# ---------------------------------------------------------------------------
# PromQL helpers
# ---------------------------------------------------------------------------

NODE_INFO = 'label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)")'


def node_filter(regex):
    """Return a selector that evaluates to 1 for nodes matching the regex."""
    return (
        f'label_replace(node_uname_info{{nodename=~"{regex}"}}, '
        '"node", "$1", "nodename", "(.*)")'
    )


def scoped_node_expr(base, scope=""):
    """Attach nodename metadata and optionally filter to a scope regex."""
    expr = f"avg by (node) (({base}) * on(instance) group_left(node) {NODE_INFO})"
    if scope:
        expr = f"({expr}) * on(node) group_left() {node_filter(scope)}"
    return expr


def node_cpu_expr(scope=""):
    idle = 'avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m]))'
    base = f"(1 - {idle}) * 100"
    return scoped_node_expr(base, scope)


def node_mem_expr(scope=""):
    usage = (
        "avg by (instance) ("
        "(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) "
        "/ node_memory_MemTotal_bytes * 100)"
    )
    return scoped_node_expr(usage, scope)


def filesystem_usage_expr(mount, scope=""):
    base = (
        f'avg by (instance) ('
        f'(1 - (node_filesystem_avail_bytes{{mountpoint="{mount}",fstype!~"tmpfs|overlay"}} '
        f'/ node_filesystem_size_bytes{{mountpoint="{mount}",fstype!~"tmpfs|overlay"}})) * 100)'
    )
    return scoped_node_expr(base, scope)


def root_usage_expr(scope=""):
    return filesystem_usage_expr("/", scope)


def astraios_usage_expr(scope=""):
    return filesystem_usage_expr(ASTRAIOS_MOUNTPOINT, scope)


def astreae_usage_expr(mount):
    return (
        f"100 - (sum(node_filesystem_avail_bytes{{mountpoint=\"{mount}\",fstype!~\"tmpfs|overlay\"}}) / "
        f"sum(node_filesystem_size_bytes{{mountpoint=\"{mount}\",fstype!~\"tmpfs|overlay\"}}) * 100)"
    )


def astreae_free_expr(mount):
    return f"sum(node_filesystem_avail_bytes{{mountpoint=\"{mount}\",fstype!~\"tmpfs|overlay\"}})"


def topk_with_node(expr):
    return f'label_replace(topk(1, {expr}), "__name__", "$1", "node", "(.*)")'


def node_net_expr(scope=""):
    base = (
        'sum by (instance) ('
        'rate(node_network_receive_bytes_total{device!~"lo"}[5m]) '
        '+ rate(node_network_transmit_bytes_total{device!~"lo"}[5m]))'
    )
    return scoped_node_expr(base, scope)


def node_io_expr(scope=""):
    base = (
        "sum by (instance) (rate(node_disk_read_bytes_total[5m]) "
        "+ rate(node_disk_written_bytes_total[5m]))"
    )
    return scoped_node_expr(base, scope)


def namespace_selector(scope_var):
    return f'namespace!="",pod!="",container!="",container!="POD",{scope_var}'


def namespace_gpu_selector(scope_var):
    return f'namespace!="",pod!="",{scope_var}'


def namespace_cpu_raw(scope_var):
    return (
        "sum(rate(container_cpu_usage_seconds_total"
        f"{{{namespace_selector(scope_var)}}}[{NAMESPACE_CPU_WINDOW}])) by (namespace)"
    )


def namespace_ram_raw(scope_var):
    return f"sum(container_memory_working_set_bytes{{{namespace_selector(scope_var)}}}) by (namespace)"


def namespace_gpu_usage_instant(scope_var):
    return gpu_usage_by_namespace(scope_var)


def jetson_gpu_util_by_node():
    return 'max by (node) (jetson_gr3d_freq_percent{node!=""})'


def dcgm_gpu_util_by_node():
    dcgm_pod = 'label_replace(DCGM_FI_DEV_GPU_UTIL, "pod", "$1", "Hostname", "(.*)")'
    dcgm_ns = 'label_replace(' + dcgm_pod + ', "namespace", "monitoring", "", "")'
    return (
        "avg by (node) ("
        f"{dcgm_ns} * on(namespace,pod) group_left(node) "
        'kube_pod_info{namespace="monitoring"}'
        ")"
    )


def gpu_util_by_node():
    return f"{dcgm_gpu_util_by_node()} or {jetson_gpu_util_by_node()}"


def gpu_util_by_hostname():
    return 'label_replace(' + gpu_util_by_node() + ', "Hostname", "$1", "node", "(.*)")'


GPU_RESOURCE_REGEX = "nvidia(_com_|[.]com/)gpu.*"


def gpu_node_labels():
    return f'max by (node) (kube_node_status_allocatable{{resource=~"{GPU_RESOURCE_REGEX}"}} > bool 0)'


def gpu_requests_by_namespace_node(scope_var):
    return (
        "sum by (namespace,node) ("
        f'kube_pod_container_resource_requests{{resource=~"{GPU_RESOURCE_REGEX}",{scope_var}}} '
        "* on(namespace,pod) group_left(node) kube_pod_info "
        f"* on(node) group_left() ({gpu_node_labels()})"
        ")"
    )


def gpu_usage_by_namespace(scope_var):
    requests_by_ns = gpu_requests_by_namespace_node(scope_var)
    total_by_node = f"sum by (node) ({requests_by_ns})"
    return (
        "sum by (namespace) ("
        f"({requests_by_ns}) / on(node) group_left() clamp_min({total_by_node}, 1) "
        f"* on(node) group_left() ({gpu_util_by_node()})"
        ")"
    )


def jetson_gpu_usage_by_namespace(scope_var):
    requests_by_ns = gpu_requests_by_namespace_node(scope_var)
    total_by_node = f"sum by (node) ({requests_by_ns})"
    return (
        "sum by (namespace) ("
        f"({requests_by_ns}) / on(node) group_left() clamp_min({total_by_node}, 1) "
        f"* on(node) group_left() {jetson_gpu_util_by_node()}"
        ")"
    )


def namespace_share_expr(resource_expr):
    total = f"clamp_min(sum( {resource_expr} ), 1)"
    return f"100 * ( {resource_expr} ) / {total}"


def namespace_cpu_share_expr(scope_var):
    return namespace_share_expr(namespace_cpu_raw(scope_var))


def namespace_ram_share_expr(scope_var):
    return namespace_share_expr(namespace_ram_raw(scope_var))


def namespace_gpu_share_expr(scope_var):
    usage = namespace_gpu_usage_instant(scope_var)
    total = f"(sum({usage}) or on() vector(0))"
    share = f"100 * ({usage}) / clamp_min({total}, 1)"
    idle = 'label_replace(vector(100), "namespace", "idle", "", "") * scalar(' + total + " == bool 0)"
    return f"({share}) or ({idle})"


PROBLEM_PODS_EXPR = (
    'sum(max by (namespace,pod) (kube_pod_status_phase{phase!~"Running|Succeeded"})) '
    "or on() vector(0)"
)
CRASHLOOP_EXPR = (
    'sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason'
    '{reason=~"CrashLoopBackOff|ImagePullBackOff"})) '
    "or on() vector(0)"
)
STUCK_TERMINATING_EXPR = (
    'sum(max by (namespace,pod) ('
    '((time() - kube_pod_deletion_timestamp{pod!=""}) > bool 600)'
    ' and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=""} > bool 0)'
    ')) '
    "or on() vector(0)"
)
UPTIME_WINDOW = "365d"
# vmalert precomputes the expensive long-window rollup so Grafana only reads one compact series.
UPTIME_RECORDING_METRIC = f'atlas:availability:ratio_{UPTIME_WINDOW}{{scope="atlas"}}'
UPTIME_RECORDING_EXPR = f"last_over_time({UPTIME_RECORDING_METRIC}[24h])"
TRAEFIK_READY_EXPR = (
    "("
    'sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"})'
    " / clamp_min("
    'sum(kube_deployment_spec_replicas{namespace=~"traefik|kube-system",deployment="traefik"}), 1)'
    ")"
)
CONTROL_READY_FRACTION_EXPR = (
    f"(sum(kube_node_status_condition{{condition=\"Ready\",status=\"true\",node=~\"{CONTROL_REGEX}\"}})"
    f" / {CONTROL_TOTAL})"
)
UPTIME_AVAIL_EXPR = (
    f"min(({CONTROL_READY_FRACTION_EXPR}), ({TRAEFIK_READY_EXPR}))"
)

# Tie-breaker to deterministically pick one node per namespace when shares tie.
NODE_TIEBREAKER = " + ".join(
    f"({node_filter(node)}) * 1e-6 * {idx}"
    for idx, node in enumerate(CONTROL_ALL + WORKER_NODES, start=1)
)
UPTIME_AVG_EXPR = UPTIME_RECORDING_EXPR
UPTIME_PERCENT_EXPR = UPTIME_AVG_EXPR
UPTIME_NINES_EXPR = f"-log10(1 - clamp_max({UPTIME_AVG_EXPR}, 0.999999999))"
UPTIME_THRESHOLDS = {
    "mode": "absolute",
    "steps": [
        {"color": "red", "value": None},
        {"color": "orange", "value": 2},
        {"color": "yellow", "value": 3},
        {"color": "green", "value": 3.5},
    ],
}
UPTIME_PERCENT_THRESHOLDS = {
    "mode": "absolute",
    "steps": [
        {"color": "red", "value": None},
        {"color": "orange", "value": 0.99},
        {"color": "yellow", "value": 0.999},
        {"color": "green", "value": 0.9999},
        {"color": "blue", "value": 0.99999},
    ],
}
PROBLEM_TABLE_EXPR = (
    "(time() - kube_pod_created{pod!=\"\"}) "
    "* on(namespace,pod) group_left(node) kube_pod_info "
    "* on(namespace,pod) group_left(phase) "
    "max by (namespace,pod,phase) (kube_pod_status_phase{phase!~\"Running|Succeeded\"})"
)
CRASHLOOP_TABLE_EXPR = (
    "(time() - kube_pod_created{pod!=\"\"}) "
    "* on(namespace,pod) group_left(node) kube_pod_info "
    "* on(namespace,pod,container) group_left(reason) "
    "max by (namespace,pod,container,reason) "
    "(kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})"
)
STUCK_TABLE_EXPR = (
    "("
    "((time() - kube_pod_deletion_timestamp{pod!=\"\"}) "
    "and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) "
    "* on(namespace,pod) group_left(node) kube_pod_info"
    ")"
)

NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"'
NAMESPACE_SCOPE_ALL = 'namespace=~".*"'
NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"'
NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"]


def promql_task_regex(tasks):
    """Return a PromQL-safe regex alternation for the provided task names."""
    return "|".join(tasks)


ARIADNE_ALL_SCHEDULE_TASKS = [
    "schedule.mailu_sync",
    "schedule.nextcloud_sync",
    "schedule.nextcloud_cron",
    "schedule.nextcloud_maintenance",
    "schedule.vaultwarden_sync",
    "schedule.wger_user_sync",
    "schedule.wger_admin",
    "schedule.firefly_user_sync",
    "schedule.firefly_cron",
    "schedule.vault_k8s_auth",
    "schedule.vault_oidc",
    "schedule.comms_guest_name",
    "schedule.comms_pin_invite",
    "schedule.comms_reset_room",
    "schedule.comms_seed_room",
    "schedule.pod_cleaner",
    "schedule.opensearch_prune",
    "schedule.image_sweeper",
    "schedule.metis_k3s_token_sync",
    "schedule.platform_quality_suite_probe",
]
ARIADNE_FAST_SCHEDULE_TASKS = [
    task
    for task in ARIADNE_ALL_SCHEDULE_TASKS
    if task not in {"schedule.comms_pin_invite", "schedule.comms_reset_room"}
]
ARIADNE_SCHEDULE_HEALTH_TASKS = [
    "schedule.nextcloud_sync",
    "schedule.nextcloud_cron",
    "schedule.vaultwarden_sync",
    "schedule.wger_user_sync",
    "schedule.firefly_user_sync",
    "schedule.comms_guest_name",
    "schedule.comms_seed_room",
    "schedule.pod_cleaner",
    "schedule.image_sweeper",
    "schedule.metis_k3s_token_sync",
    "schedule.platform_quality_suite_probe",
]
ARIADNE_ALL_SCHEDULE_FILTER = f'task=~"^({promql_task_regex(ARIADNE_ALL_SCHEDULE_TASKS)})$"'
ARIADNE_FAST_SCHEDULE_FILTER = f'task=~"^({promql_task_regex(ARIADNE_FAST_SCHEDULE_TASKS)})$"'
ARIADNE_SCHEDULE_HEALTH_FILTER = f'task=~"^({promql_task_regex(ARIADNE_SCHEDULE_HEALTH_TASKS)})$"'
ARIADNE_ALL_SCHEDULE_NEXT_RUN = f"ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}"
ARIADNE_ALL_SCHEDULE_LAST_SUCCESS = (
    f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}"
)
ARIADNE_ALL_SCHEDULE_LAST_ERROR = f"ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}"
ARIADNE_ALL_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_ALL_SCHEDULE_FILTER}}}"
ARIADNE_FAST_SCHEDULE_LAST_SUCCESS = (
    f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_FAST_SCHEDULE_FILTER}}}"
)
ARIADNE_FAST_SCHEDULE_LAST_ERROR = f"ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_FAST_SCHEDULE_FILTER}}}"
ARIADNE_FAST_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_FAST_SCHEDULE_FILTER}}}"
ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS = (
    f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_HEALTH_FILTER}}}"
)
ARIADNE_HEALTH_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_HEALTH_FILTER}}}"
ARIADNE_SCHEDULE_LAST_SUCCESS_AGE = f"(time() - {ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS})"
ARIADNE_SCHEDULE_LAST_ERROR_AGE = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_ERROR})"
ARIADNE_SCHEDULE_LAST_SUCCESS_AGE_HOURS = f"({ARIADNE_SCHEDULE_LAST_SUCCESS_AGE}) / 3600"
ARIADNE_SCHEDULE_LAST_ERROR_AGE_HOURS = f"({ARIADNE_SCHEDULE_LAST_ERROR_AGE}) / 3600"
ARIADNE_SCHEDULE_STALE_WINDOW_SEC = 36 * 3600
ARIADNE_SCHEDULE_STALE = f"(({ARIADNE_SCHEDULE_LAST_SUCCESS_AGE}) > bool {ARIADNE_SCHEDULE_STALE_WINDOW_SEC})"
ARIADNE_SCHEDULE_MISSING = (
    f"({ARIADNE_ALL_SCHEDULE_NEXT_RUN} unless on(task) {ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS})"
)
ARIADNE_SCHEDULE_FAILED = f"((1 - {ARIADNE_HEALTH_SCHEDULE_LAST_STATUS}) > bool 0)"
ARIADNE_SCHEDULE_STALE_COUNT = f"sum({ARIADNE_SCHEDULE_STALE}) or on() vector(0)"
ARIADNE_SCHEDULE_MISSING_COUNT = f"count({ARIADNE_SCHEDULE_MISSING}) or on() vector(0)"
ARIADNE_SCHEDULE_FAILED_COUNT = f"sum({ARIADNE_SCHEDULE_FAILED}) or on() vector(0)"
ARIADNE_TASK_ERRORS_RANGE = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[$__range]))'
ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))'
ARIADNE_TASK_ERRORS_1H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[1h]))'
ARIADNE_TASK_ERRORS_30D = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[30d]))'
ARIADNE_TASK_SUCCESS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="ok"}[24h]))'
ARIADNE_TASK_RUNS_BY_STATUS_1H = 'sum by (status) (increase(ariadne_task_runs_total[1h]))'
ARIADNE_TASK_ERRORS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[1h]))'
ARIADNE_TASK_ERRORS_24H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[24h]))'
ARIADNE_TASK_RUNS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total[1h]))'
ARIADNE_TASK_ATTEMPTS_SERIES = 'sum(increase(ariadne_task_runs_total[5m]))'
ARIADNE_TASK_FAILURES_SERIES = 'sum(increase(ariadne_task_runs_total{status="error"}[5m]))'
ARIADNE_TASK_WARNINGS_SERIES = (
    'sum(increase(ariadne_task_runs_total{status!~"ok|error"}[$__interval])) or on() vector(0)'
)
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_SUCCESS}) / 3600"
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_ERROR}) / 3600"
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = (
    f"(time() - max_over_time({ARIADNE_ALL_SCHEDULE_LAST_SUCCESS}[$__range])) / 3600"
)
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
    f"(time() - max_over_time({ARIADNE_ALL_SCHEDULE_LAST_ERROR}[$__range])) / 3600"
)
ARIADNE_FAST_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = (
    f"(time() - max_over_time({ARIADNE_FAST_SCHEDULE_LAST_SUCCESS}[$__range])) / 3600"
)
ARIADNE_FAST_SCHEDULE_NEXT_RUN_HOURS = f"(({ARIADNE_ALL_SCHEDULE_NEXT_RUN} - time()) / 3600)"
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
PLATFORM_TEST_SUITE_NAMES = [
    "ariadne",
    "metis",
    "ananke",
    "atlasbot",
    "pegasus",
    "soteria",
    "titan_iac",
    "bstein_home",
    "data_prepper",
]
PLATFORM_TEST_SUCCESS_STATUS = "ok|passed|success"
PLATFORM_TEST_NON_FAILURE_STATUS = f"{PLATFORM_TEST_SUCCESS_STATUS}|not_applicable|skipped|na|n/a"
PLATFORM_TEST_CI_JOB = "platform-quality-ci"
PLATFORM_TEST_EXPORT_FILTER = f'exported_job="{PLATFORM_TEST_CI_JOB}"'
PLATFORM_TEST_SUITE_VALUE_BY_NAME = {
    "ariadne": "ariadne",
    "metis": "metis",
    "ananke": "ananke",
    "atlasbot": "atlasbot",
    "pegasus": "pegasus|pegasus-health|pegasus_health",
    "soteria": "soteria",
    "titan_iac": "titan_iac|titan-iac",
    "bstein_home": "bstein_home|bstein-home",
    "data_prepper": "data_prepper|data-prepper",
}
PLATFORM_TEST_JENKINS_JOB_BY_SUITE = {
    "ariadne": "ariadne",
    "metis": "metis",
    "ananke": "ananke",
    "atlasbot": "atlasbot",
    "pegasus": "pegasus",
    "soteria": "Soteria",
    "titan_iac": "titan-iac",
    "bstein_home": "bstein-dev-home",
    "data_prepper": "data-prepper",
}
JENKINS_UI_BASE_DEFAULT = "https://ci.bstein.dev"
PLATFORM_TEST_SUITE_MATCHER = "|".join(
    PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite) for suite in PLATFORM_TEST_SUITE_NAMES
)
PLATFORM_TEST_SUITE_CANONICAL_MATCHER = "|".join(PLATFORM_TEST_SUITE_NAMES)
PLATFORM_TEST_SUITE_VARIABLE_ALL_MATCHER = PLATFORM_TEST_SUITE_CANONICAL_MATCHER
PLATFORM_TEST_SUCCESS_EVENTS_30D = (
    f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[30d])) or on() vector(0))'
)
PLATFORM_TEST_TOTAL_EVENTS_30D = (
    f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[30d])) or on() vector(0))'
)
PLATFORM_TEST_SUCCESS_EVENTS_7D = (
    f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[7d])) or on() vector(0))'
)
PLATFORM_TEST_TOTAL_EVENTS_7D = (
    f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[7d])) or on() vector(0))'
)
PLATFORM_TEST_SUCCESS_EVENTS_24H = (
    f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h])) or on() vector(0))'
)
PLATFORM_TEST_TOTAL_EVENTS_24H = (
    f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h])) or on() vector(0))'
)
TEST_SUCCESS_RATE = (
    f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_30D}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_30D}), 1)"
)
TEST_SUCCESS_RATE_7D = (
    f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_7D}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_7D}), 1)"
)
TEST_SUCCESS_RATE_24H = (
    f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_24H}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_24H}), 1)"
)
TEST_FAILURES_24H_TOTAL = (
    f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h])) or on() vector(0))'
)
PLATFORM_TEST_FAILURES_24H_BY_SUITE = (
    f'sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h])))'
)
PLATFORM_TEST_ACTIVITY_30D = (
    f'sum by (suite, status) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[30d]))'
)
PLATFORM_TEST_RUNS_24H_TOTAL = PLATFORM_TEST_TOTAL_EVENTS_24H
PLATFORM_TEST_ACTIVE_SUITES_24H = (
    f'sum((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h])) > 0)) '
    "or on() vector(0)"
)
PLATFORM_TEST_POINT_WINDOW = "1h"
PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS = [
    {
        "refId": chr(ord("A") + index),
        "expr": (
            f'(100 * (sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}'
            f'[{PLATFORM_TEST_POINT_WINDOW}]))) / '
            f'clamp_min((sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}",{PLATFORM_TEST_EXPORT_FILTER}}}[{PLATFORM_TEST_POINT_WINDOW}]))), 1))'
        ),
        "legendFormat": suite,
    }
    for index, suite in enumerate(PLATFORM_TEST_SUITE_NAMES)
]
PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE = (
    f'sort_desc(100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h]))) '
    f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h]))), 1))'
)
QUALITY_GATE_SUITE_INDEX_30D = (
    f'sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[30d]))'
)
QUALITY_GATE_COVERAGE_BY_SUITE = (
    f'(max by (suite) ({{__name__=~".*_quality_gate_coverage_percent",{PLATFORM_TEST_EXPORT_FILTER}}})) '
    f'or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{PLATFORM_TEST_EXPORT_FILTER}}}))'
)
QUALITY_GATE_COVERAGE_BY_SUITE_WITH_MISSING = (
    f"({QUALITY_GATE_COVERAGE_BY_SUITE}) or on(suite) (0 * ({QUALITY_GATE_SUITE_INDEX_30D}) - 1)"
)
QUALITY_GATE_COVERAGE_GAP_BY_SUITE = (
    f"clamp_min(95 - ({QUALITY_GATE_COVERAGE_BY_SUITE}), 0)"
)
QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE = (
    f"max by (suite) (platform_quality_gate_source_lines_over_500_total{{{PLATFORM_TEST_EXPORT_FILTER}}})"
)
QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE_WITH_MISSING = (
    f"({QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE}) or on(suite) (0 * ({QUALITY_GATE_SUITE_INDEX_30D}) - 1)"
)
PLATFORM_TEST_CHECKS_SELECTOR = (
    f'__name__=~".*_quality_gate_checks_total",suite=~"{PLATFORM_TEST_SUITE_CANONICAL_MATCHER}",'
    f"{PLATFORM_TEST_EXPORT_FILTER}"
)
PLATFORM_TEST_CURRENT_GATE_HEALTH_BY_SUITE = (
    f'(100 * sum by (suite) (max by (suite, check) '
    f'(({{{PLATFORM_TEST_CHECKS_SELECTOR},result=~"{PLATFORM_TEST_NON_FAILURE_STATUS}"}} > bool 0))) '
    f'/ clamp_min(sum by (suite) (max by (suite, check) '
    f'(({{{PLATFORM_TEST_CHECKS_SELECTOR}}} > bool 0))), 1))'
)
PVC_BACKUP_AGE_HOURS_BY_PVC = "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))"
ANANKE_SELECTOR = 'job="ananke-power"'
ANANKE_UPS_DB_NAME = "Pyrphoros"
ANANKE_UPS_DB_NODE = "titan-db"
ANANKE_UPS_TETHYS_NAME = "Statera"
ANANKE_UPS_TETHYS_NODE = "titan-24"
ANANKE_UPS_DB_SELECTOR = f'{ANANKE_SELECTOR},source="{ANANKE_UPS_DB_NAME}"'
ANANKE_UPS_TETHYS_SELECTOR = f'{ANANKE_SELECTOR},source="{ANANKE_UPS_TETHYS_NAME}"'
ANANKE_UPS_ON_BATTERY = f"sum(ananke_ups_on_battery{{{ANANKE_SELECTOR}}}) or on() vector(0)"
ANANKE_UPS_LOW_BATTERY = f"sum(ananke_ups_low_battery{{{ANANKE_SELECTOR}}}) or on() vector(0)"
ANANKE_UPS_RUNTIME_MIN = f"min(ananke_ups_runtime_seconds{{{ANANKE_SELECTOR}}}) or on() vector(0)"
ANANKE_UPS_RUNTIME_HEADROOM_PERCENT = (
    f"100 * min(ananke_ups_runtime_seconds{{{ANANKE_SELECTOR}}}) / "
    f"clamp_min(max(ananke_ups_threshold_seconds{{{ANANKE_SELECTOR}}}), 1)"
)
ANANKE_UPS_TRIGGER_COUNT_1D = f"increase(ananke_shutdown_triggers_total{{{ANANKE_SELECTOR}}}[1d]) or on() vector(0)"
GITOPS_SELECTOR = ANANKE_SELECTOR
GITOPS_SOURCE_INFO = (
    f'max by (branch, revision) (ananke_gitops_flux_source_info{{{GITOPS_SELECTOR},namespace="flux-system",name="flux-system"}})'
)
GITOPS_KUSTOMIZATION_READY_PCT = (
    f"100 * sum(max by (namespace, name) (ananke_gitops_kustomization_ready{{{GITOPS_SELECTOR}}})) "
    f"/ clamp_min(count(max by (namespace, name) (ananke_gitops_kustomization_ready{{{GITOPS_SELECTOR}}})), 1)"
)
GITOPS_KUSTOMIZATION_READY_COUNT = (
    f"sum(max by (namespace, name) (ananke_gitops_kustomization_ready{{{GITOPS_SELECTOR}}})) or on() vector(0)"
)
GITOPS_KUSTOMIZATION_TOTAL_COUNT = (
    f"count(max by (namespace, name) (ananke_gitops_kustomization_ready{{{GITOPS_SELECTOR}}})) or on() vector(0)"
)
GITOPS_KUSTOMIZATION_SUSPENDED = (
    f"sum(max by (namespace, name) (ananke_gitops_kustomization_suspended{{{GITOPS_SELECTOR}}})) or on() vector(0)"
)
GITOPS_KUSTOMIZATION_NOT_SUSPENDED_PCT = (
    f"100 * (1 - ({GITOPS_KUSTOMIZATION_SUSPENDED}) / clamp_min(({GITOPS_KUSTOMIZATION_TOTAL_COUNT}), 1))"
)
GITOPS_HELM_READY_PCT = (
    f"100 * sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{{{GITOPS_SELECTOR}}})) "
    f"/ clamp_min(count(max by (namespace, name) (ananke_gitops_helmrelease_ready{{{GITOPS_SELECTOR}}})), 1)"
)
GITOPS_HELM_READY_COUNT = (
    f"sum(max by (namespace, name) (ananke_gitops_helmrelease_ready{{{GITOPS_SELECTOR}}})) or on() vector(0)"
)
GITOPS_HELM_TOTAL_COUNT = (
    f"count(max by (namespace, name) (ananke_gitops_helmrelease_ready{{{GITOPS_SELECTOR}}})) or on() vector(0)"
)
GITOPS_HELM_SUSPENDED = (
    f"sum(max by (namespace, name) (ananke_gitops_helmrelease_suspended{{{GITOPS_SELECTOR}}})) or on() vector(0)"
)
GITOPS_HELM_NOT_SUSPENDED_PCT = (
    f"100 * (1 - ({GITOPS_HELM_SUSPENDED}) / clamp_min(({GITOPS_HELM_TOTAL_COUNT}), 1))"
)
GITOPS_SCRAPE_SUCCESS = f"min(ananke_gitops_scrape_success{{{GITOPS_SELECTOR}}}) or on() vector(0)"
GITOPS_LAST_SCRAPE_AGE = (
    f"(time() - max(ananke_gitops_last_scrape_timestamp_seconds{{{GITOPS_SELECTOR}}})) or on() vector(0)"
)
ANANKE_UPS_RUNTIME_DB = (
    f'max(ananke_ups_runtime_seconds{{{ANANKE_UPS_DB_SELECTOR}}}) or on() vector(0)'
)
ANANKE_UPS_RUNTIME_TETHYS = (
    f'max(ananke_ups_runtime_seconds{{{ANANKE_UPS_TETHYS_SELECTOR}}}) or on() vector(0)'
)
ANANKE_UPS_ON_BATTERY_DB = (
    f'max(ananke_ups_on_battery{{{ANANKE_UPS_DB_SELECTOR}}}) or on() vector(0)'
)
ANANKE_UPS_ON_BATTERY_TETHYS = (
    f'max(ananke_ups_on_battery{{{ANANKE_UPS_TETHYS_SELECTOR}}}) or on() vector(0)'
)
ANANKE_UPS_BATTERY_CHARGE_DB = (
    f'max(ananke_ups_battery_charge_percent{{{ANANKE_UPS_DB_SELECTOR}}}) or on() vector(0)'
)
ANANKE_UPS_BATTERY_CHARGE_TETHYS = (
    f'max(ananke_ups_battery_charge_percent{{{ANANKE_UPS_TETHYS_SELECTOR}}}) or on() vector(0)'
)
ANANKE_UPS_LOAD_DB = (
    f'max(ananke_ups_load_percent{{{ANANKE_UPS_DB_SELECTOR}}}) or on() vector(0)'
)
ANANKE_UPS_LOAD_TETHYS = (
    f'max(ananke_ups_load_percent{{{ANANKE_UPS_TETHYS_SELECTOR}}}) or on() vector(0)'
)
ANANKE_UPS_DRAW_WATTS_DB = (
    f'max((ananke_ups_load_percent{{{ANANKE_UPS_DB_SELECTOR}}} '
    f'* ananke_ups_power_nominal_watts{{{ANANKE_UPS_DB_SELECTOR}}}) / 100) or on() vector(0)'
)
ANANKE_UPS_DRAW_WATTS_TETHYS = (
    f'max((ananke_ups_load_percent{{{ANANKE_UPS_TETHYS_SELECTOR}}} '
    f'* ananke_ups_power_nominal_watts{{{ANANKE_UPS_TETHYS_SELECTOR}}}) / 100) or on() vector(0)'
)
ANANKE_UPS_DRAW_WATTS_DB_SERIES = (
    f'((ananke_ups_load_percent{{{ANANKE_UPS_DB_SELECTOR}}} '
    f'* ananke_ups_power_nominal_watts{{{ANANKE_UPS_DB_SELECTOR}}}) / 100)'
)
ANANKE_UPS_DRAW_WATTS_TETHYS_SERIES = (
    f'((ananke_ups_load_percent{{{ANANKE_UPS_TETHYS_SELECTOR}}} '
    f'* ananke_ups_power_nominal_watts{{{ANANKE_UPS_TETHYS_SELECTOR}}}) / 100)'
)
ANANKE_UPS_RUNTIME_BY_SOURCE = f"ananke_ups_runtime_seconds{{{ANANKE_SELECTOR}}}"
ANANKE_UPS_LOAD_BY_SOURCE = f"ananke_ups_load_percent{{{ANANKE_SELECTOR}}}"
ANANKE_UPS_CHARGE_BY_SOURCE = f"ananke_ups_battery_charge_percent{{{ANANKE_SELECTOR}}}"
ANANKE_UPS_TRIGGER_BY_SOURCE = f"ananke_ups_trigger_active{{{ANANKE_SELECTOR}}}"
CLIMATE_SENSOR_COUNT = "count(typhon_temperature_celsius) or on() vector(0)"
CLIMATE_TEMP_MAX = "max(typhon_temperature_celsius) or on() vector(0)"
CLIMATE_PRESSURE_CURRENT = "max(typhon_vpd_kpa) or on() vector(0)"
CLIMATE_HUMIDITY_MAX = "max(typhon_relative_humidity_percent) or on() vector(0)"
CLIMATE_TEMP_SERIES = "typhon_temperature_celsius"
CLIMATE_PRESSURE_SERIES = "typhon_vpd_kpa"
CLIMATE_HUMIDITY_SERIES = "typhon_relative_humidity_percent"
CLIMATE_DEWPOINT_SERIES = (
    "(243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + "
    "(17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / "
    "(17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + "
    "(17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))"
)
CLIMATE_DEWPOINT_CURRENT = f"max({CLIMATE_DEWPOINT_SERIES}) or on() vector(0)"
CLIMATE_FAN_OUTLET_CURRENT = (
    'max(typhon_fan_speed_level{fan_group="outlet"}) or on() vector(0)'
)
CLIMATE_FAN_INSIDE_INLET_CURRENT = (
    'max(typhon_fan_speed_level{fan_group="inside_inlet"}) or on() vector(0)'
)
CLIMATE_FAN_OUTSIDE_INLET_CURRENT = (
    'max(typhon_fan_speed_level{fan_group="outside_inlet"}) or on() vector(0)'
)
CLIMATE_FAN_INTERIOR_CURRENT = (
    'max(typhon_fan_speed_level{fan_group="interior"}) or on() vector(0)'
)
CLIMATE_FAN_OUTLET_SERIES = (
    'typhon_fan_speed_level{fan_group="outlet"}'
)
CLIMATE_FAN_INSIDE_INLET_SERIES = (
    'typhon_fan_speed_level{fan_group="inside_inlet"}'
)
CLIMATE_FAN_OUTSIDE_INLET_SERIES = (
    'typhon_fan_speed_level{fan_group="outside_inlet"}'
)
CLIMATE_FAN_INTERIOR_SERIES = (
    'typhon_fan_speed_level{fan_group="interior"}'
)
POSTGRES_CONN_USED = (
    'label_replace(sum(pg_stat_activity_count), "conn", "used", "__name__", ".*") '
    'or label_replace(max(pg_settings_max_connections), "conn", "max", "__name__", ".*")'
)
POSTGRES_CONN_HOTTEST = 'topk(1, sum by (datname) (pg_stat_activity_count))'
ONEOFF_JOB_OWNER = (
    'label_replace(kube_job_owner{owner_kind="CronJob"}, "owner_name", "$1", "job_name", "(.*)")'
)
ONEOFF_JOB_PODS = f'(kube_pod_owner{{owner_kind="Job"}} unless on(namespace, owner_name) {ONEOFF_JOB_OWNER})'
ONEOFF_JOB_POD_AGE_HOURS = (
    '((time() - kube_pod_start_time{pod!=""}) / 3600) '
    f'* on(namespace,pod) group_left(owner_name) {ONEOFF_JOB_PODS} '
    '* on(namespace,pod) group_left(phase) '
    'max by (namespace,pod,phase) (kube_pod_status_phase{phase=~"Running|Succeeded"})'
)
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
GPU_NODE_REGEX = "|".join(GPU_NODES)
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
TRAEFIK_NET_INGRESS = (
    'sum(rate(container_network_receive_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
    " or on() vector(0)"
)
TRAEFIK_NET_EGRESS = (
    'sum(rate(container_network_transmit_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
    " or on() vector(0)"
)
NET_CLUSTER_RX = (
    'sum(rate(container_network_receive_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
    " or on() vector(0)"
)
NET_CLUSTER_TX = (
    'sum(rate(container_network_transmit_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
    " or on() vector(0)"
)
PHYSICAL_NET_FILTER = 'device!~"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*"'
NET_NODE_RX_PHYS = (
    f'sum(rate(node_network_receive_bytes_total{{{PHYSICAL_NET_FILTER}}}[5m])) or on() vector(0)'
)
NET_NODE_TX_PHYS = (
    f'sum(rate(node_network_transmit_bytes_total{{{PHYSICAL_NET_FILTER}}}[5m])) or on() vector(0)'
)
NET_TOTAL_EXPR = NET_NODE_TX_PHYS
NET_INGRESS_EXPR = NET_NODE_RX_PHYS
NET_EGRESS_EXPR = NET_NODE_TX_PHYS
NET_INTERNAL_EXPR = (
    'sum(rate(container_network_receive_bytes_total{namespace!="traefik",pod!=""}[5m]) '
    '+ rate(container_network_transmit_bytes_total{namespace!="traefik",pod!=""}[5m]))'
    ' or on() vector(0)'
)
APISERVER_5XX_RATE = 'sum(rate(apiserver_request_total{code=~"5.."}[5m]))'
APISERVER_P99_LATENCY_MS = (
    "histogram_quantile(0.99, sum by (le) (rate(apiserver_request_duration_seconds_bucket[5m]))) * 1000"
)
ETCD_P99_LATENCY_MS = (
    "histogram_quantile(0.99, sum by (le) (rate(etcd_request_duration_seconds_bucket[5m]))) * 1000"
)
TRAEFIK_TOTAL_5M = "sum(rate(traefik_entrypoint_requests_total[5m]))"
TRAEFIK_SUCCESS_5M = 'sum(rate(traefik_entrypoint_requests_total{code!~"5.."}[5m]))'
TRAEFIK_SLI_5M = f"({TRAEFIK_SUCCESS_5M}) / clamp_min({TRAEFIK_TOTAL_5M}, 1)"
TRAEFIK_P99_LATENCY_MS = (
    "histogram_quantile(0.99, sum by (le) (rate(traefik_entrypoint_request_duration_seconds_bucket[5m]))) * 1000"
)
TRAEFIK_P95_LATENCY_MS = (
    "histogram_quantile(0.95, sum by (le) (rate(traefik_entrypoint_request_duration_seconds_bucket[5m]))) * 1000"
)
SLO_AVAILABILITY = 0.999


def traefik_sli(window):
    total = f'sum(rate(traefik_entrypoint_requests_total[{window}]))'
    success = f'sum(rate(traefik_entrypoint_requests_total{{code!~"5.."}}[{window}]))'
    return f"({success}) / clamp_min({total}, 1)"


def traefik_burn(window):
    sli = traefik_sli(window)
    return f"(1 - ({sli})) / {1 - SLO_AVAILABILITY}"

# ---------------------------------------------------------------------------
# Panel factories
# ---------------------------------------------------------------------------


def stat_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    unit="none",
    decimals=None,
    thresholds=None,
    text_mode="value",
    legend=None,
    instant=False,
    value_suffix=None,
    links=None,
    targets=None,
    field_overrides=None,
    description=None,
    orientation=None,
    wide_layout=None,
):
    """Return a Grafana stat panel definition."""
    defaults = {
        "color": {"mode": "thresholds"},
        "mappings": [],
        "thresholds": thresholds
        or {
            "mode": "absolute",
            "steps": [
                {"color": "rgba(115, 115, 115, 1)", "value": None},
                {"color": "green", "value": 1},
            ],
        },
        "unit": unit,
        "custom": {"displayMode": "auto"},
    }
    if value_suffix:
        defaults["custom"]["valueSuffix"] = value_suffix
    if decimals is not None:
        defaults["decimals"] = decimals
    target_list = targets if targets is not None else [{"expr": expr, "refId": "A"}]

    panel = {
        "id": panel_id,
        "type": "stat",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": target_list,
        "fieldConfig": {"defaults": defaults, "overrides": field_overrides or []},
        "options": {
            "colorMode": "value",
            "graphMode": "area",
            "justifyMode": "center",
            "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
            "textMode": text_mode,
        },
    }
    if orientation:
        panel["options"]["orientation"] = orientation
    if wide_layout is not None:
        panel["options"]["wideLayout"] = wide_layout
    if legend and len(panel["targets"]) == 1:
        panel["targets"][0]["legendFormat"] = legend
    if instant:
        for t in panel["targets"]:
            t.setdefault("instant", True)
    if links:
        panel["links"] = links
    if description:
        panel["description"] = description
    return panel


def gauge_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    min_value=0,
    max_value=1,
    thresholds=None,
    links=None,
):
    return {
        "id": panel_id,
        "type": "gauge",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [{"expr": expr, "refId": "A"}],
        "fieldConfig": {
            "defaults": {
                "min": min_value,
                "max": max_value,
                "thresholds": thresholds
                or {
                    "mode": "absolute",
                    "steps": [
                        {"color": "green", "value": None},
                        {"color": "red", "value": max_value},
                    ],
                },
            },
            "overrides": [],
        },
        "options": {
            "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
            "orientation": "auto",
            "showThresholdMarkers": False,
            "showThresholdLabels": False,
        },
        **({"links": links} if links else {}),
    }


def timeseries_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    unit="none",
    max_value=None,
    legend=None,
    legend_display="table",
    legend_placement="bottom",
    legend_calcs=None,
    time_from=None,
    links=None,
    targets=None,
    field_overrides=None,
    description=None,
    data_links=None,
):
    """Return a Grafana time-series panel definition."""
    target_list = targets if targets is not None else [{"expr": expr, "refId": "A"}]
    panel = {
        "id": panel_id,
        "type": "timeseries",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": target_list,
        "fieldConfig": {"defaults": {"unit": unit}, "overrides": field_overrides or []},
        "options": {
            "legend": {
                "displayMode": legend_display,
                "placement": legend_placement,
            },
            "tooltip": {"mode": "multi"},
        },
    }
    if max_value is not None:
        panel["fieldConfig"]["defaults"]["max"] = max_value
    if legend and len(panel["targets"]) == 1:
        panel["targets"][0]["legendFormat"] = legend
    if legend_calcs:
        panel["options"]["legend"]["calcs"] = legend_calcs
    if time_from:
        panel["timeFrom"] = time_from
    if links:
        panel["links"] = links
    if data_links:
        panel["fieldConfig"]["defaults"]["links"] = data_links
    if description:
        panel["description"] = description
    return panel


def state_timeline_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    description,
    thresholds,
    unit="percent",
    min_value=0,
    max_value=100,
    legend="{{suite}}",
    links=None,
    data_links=None,
):
    """Return a lane-style state timeline panel for categorical health over time."""
    defaults = {
        "color": {"mode": "thresholds"},
        "unit": unit,
        "thresholds": thresholds,
        "custom": {
            "fillOpacity": 70,
            "lineWidth": 0,
            "spanNulls": True,
        },
    }
    if min_value is not None:
        defaults["min"] = min_value
    if max_value is not None:
        defaults["max"] = max_value
    panel = {
        "id": panel_id,
        "type": "state-timeline",
        "title": title,
        "description": description,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [{"expr": expr, "refId": "A", "legendFormat": legend}],
        "fieldConfig": {"defaults": defaults, "overrides": []},
        "options": {
            "mergeValues": True,
            "showValue": "never",
            "legend": {"displayMode": "list", "placement": "bottom"},
            "tooltip": {"mode": "single", "sort": "none"},
        },
    }
    if links:
        panel["links"] = links
    if data_links:
        panel["fieldConfig"]["defaults"]["links"] = data_links
    return panel


def apply_bar_timeseries_style(panel, *, stacked=False, fill_opacity=70):
    """Make a time-series panel read as volume bars instead of interpolated lines."""
    panel["fieldConfig"]["defaults"]["custom"] = {
        "drawStyle": "bars",
        "barAlignment": 0,
        "barWidthFactor": 0.72,
        "lineWidth": 0,
        "fillOpacity": fill_opacity,
        "gradientMode": "none",
        "showPoints": "never",
        "spanNulls": True,
    }
    if stacked:
        panel["fieldConfig"]["defaults"]["custom"]["stacking"] = {"mode": "normal", "group": "A"}
    return panel


def fixed_color_overrides(series_colors):
    """Return fixed-color overrides keyed by exact series name."""
    return [
        {
            "matcher": {"id": "byName", "options": series_name},
            "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": color}}],
        }
        for series_name, color in series_colors.items()
    ]


def table_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    unit="none",
    transformations=None,
    instant=False,
    options=None,
    filterable=True,
    footer=None,
    format=None,
    description=None,
    field_overrides=None,
    links=None,
):
    """Return a Grafana table panel definition."""
    # Optional PromQL subquery helpers in expr: share(), etc.
    panel_options = {"showHeader": True, "columnFilters": False}
    if options:
        panel_options.update(options)
    if footer is not None:
        panel_options["footer"] = footer
    field_defaults = {"unit": unit, "custom": {"filterable": filterable}}
    target = {"expr": expr, "refId": "A", **({"instant": True} if instant else {})}
    if format:
        target["format"] = format
    panel = {
        "id": panel_id,
        "type": "table",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [target],
        "fieldConfig": {"defaults": field_defaults, "overrides": field_overrides or []},
        "options": panel_options,
    }
    if transformations:
        panel["transformations"] = transformations
    if description:
        panel["description"] = description
    if links:
        panel["links"] = links
    return panel


def pie_panel(panel_id, title, expr, grid, *, links=None, description=None):
    """Return a pie chart panel with readable namespace labels."""
    panel = {
        "id": panel_id,
        "type": "piechart",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [{"expr": expr, "refId": "A", "legendFormat": "{{namespace}}"}],
        "fieldConfig": {
            "defaults": {
                "unit": "percent",
                "color": {"mode": "palette-classic"},
            },
            "overrides": [],
        },
        "options": {
            "legend": {"displayMode": "list", "placement": "right"},
            "pieType": "pie",
            "displayLabels": [],
            "tooltip": {"mode": "single"},
            "colorScheme": "interpolateSpectral",
            "colorBy": "value",
            "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
        },
    }
    if links:
        panel["links"] = links
    if description:
        panel["description"] = description
    return panel


def namespace_scope_variable(var_name, label):
    options = [
        {
            "text": "workload namespaces only",
            "value": NAMESPACE_SCOPE_WORKLOAD,
            "selected": True,
        },
        {"text": "all namespaces", "value": NAMESPACE_SCOPE_ALL, "selected": False},
        {
            "text": "infrastructure namespaces only",
            "value": NAMESPACE_SCOPE_INFRA,
            "selected": False,
        },
    ]
    query = (
        "workload namespaces only : "
        + NAMESPACE_SCOPE_WORKLOAD
        + ",all namespaces : "
        + NAMESPACE_SCOPE_ALL
        + ",infrastructure namespaces only : "
        + NAMESPACE_SCOPE_INFRA
    )
    return {
        "name": var_name,
        "label": label,
        "type": "custom",
        "query": query,
        "current": {"text": options[0]["text"], "value": options[0]["value"], "selected": True},
        "options": options,
        "hide": 2,
        "multi": False,
        "includeAll": False,
        "refresh": 1,
        "sort": 0,
        "skipUrlSync": False,
    }


def namespace_scope_links(var_name):
    def with_value(value):
        encoded = urllib.parse.quote(value, safe="")
        params = []
        for other in NAMESPACE_SCOPE_VARS:
            if other == var_name:
                params.append(f"var-{other}={encoded}")
            else:
                params.append(f"var-{other}=${{{other}}}")
        return "?" + "&".join(params)

    return [
        {"title": "Workload namespaces only", "url": with_value(NAMESPACE_SCOPE_WORKLOAD), "targetBlank": False},
        {"title": "All namespaces", "url": with_value(NAMESPACE_SCOPE_ALL), "targetBlank": False},
        {
            "title": "Infrastructure namespaces only",
            "url": with_value(NAMESPACE_SCOPE_INFRA),
            "targetBlank": False,
        },
    ]


def testing_suite_variable():
    options = [
        {
            "text": suite,
            "value": suite,
            "selected": False,
        }
        for suite in PLATFORM_TEST_SUITE_NAMES
    ]
    query = ",".join(f"{suite} : {suite}" for suite in PLATFORM_TEST_SUITE_NAMES)
    return {
        "name": "suite",
        "label": "Suite",
        "type": "custom",
        "query": query,
        "current": {"text": "All", "value": "$__all", "selected": True},
        "options": options,
        "hide": 0,
        "multi": False,
        "includeAll": True,
        "allValue": PLATFORM_TEST_SUITE_VARIABLE_ALL_MATCHER,
        "refresh": 1,
        "sort": 1,
        "skipUrlSync": False,
    }


def testing_case_variable():
    return {
        "name": "test",
        "label": "Test Case",
        "type": "query",
        "query": f'label_values(platform_quality_gate_test_case_result{{suite=~"${{suite:regex}}",branch!="",branch=~"${{branch:regex}}",test!="",test!="__no_test_cases__",{PLATFORM_TEST_EXPORT_FILTER}}}, test)',
        "current": {"text": "All", "value": "$__all", "selected": True},
        "options": [],
        "hide": 0,
        "multi": False,
        "includeAll": True,
        "allValue": ".*",
        "refresh": 2,
        "sort": 1,
        "skipUrlSync": False,
    }


def testing_branch_variable():
    return {
        "name": "branch",
        "label": "Branch",
        "type": "query",
        "query": f'label_values(platform_quality_gate_build_info{{suite=~"${{suite:regex}}",branch!="",{PLATFORM_TEST_EXPORT_FILTER}}}, branch)',
        "current": {"text": "All", "value": "$__all", "selected": True},
        "options": [],
        "hide": 0,
        "multi": False,
        "includeAll": True,
        "allValue": ".*",
        "refresh": 2,
        "sort": 1,
        "skipUrlSync": False,
    }


def jenkins_base_variable():
    return {
        "name": "jenkins_base",
        "label": "Jenkins Base URL",
        "type": "textbox",
        "query": JENKINS_UI_BASE_DEFAULT,
        "current": {
            "text": JENKINS_UI_BASE_DEFAULT,
            "value": JENKINS_UI_BASE_DEFAULT,
            "selected": True,
        },
        "hide": 0,
        "skipUrlSync": False,
    }


def jenkins_suite_links(base_var="${jenkins_base}"):
    links = [{"title": "Open Jenkins", "url": f"{base_var}/", "targetBlank": True}]
    for suite in PLATFORM_TEST_SUITE_NAMES:
        job = PLATFORM_TEST_JENKINS_JOB_BY_SUITE.get(suite, suite)
        encoded_job = urllib.parse.quote(job, safe="")
        links.append(
            {
                "title": f"{suite}: Job",
                "url": f"{base_var}/job/{encoded_job}/",
                "targetBlank": True,
            }
        )
        links.append(
            {
                "title": f"{suite}: Last Artifacts",
                "url": f"{base_var}/job/{encoded_job}/lastCompletedBuild/artifact/",
                "targetBlank": True,
            }
        )
    return links


def jenkins_artifact_data_links(base_var="${jenkins_base}"):
    return [
        {
            "title": "Open build artifacts",
            "url": f"{base_var}/job/${{__field.labels.jenkins_job}}/${{__field.labels.build_number}}/artifact/",
            "targetBlank": True,
        },
        {
            "title": "Open build",
            "url": f"{base_var}/job/${{__field.labels.jenkins_job}}/${{__field.labels.build_number}}/",
            "targetBlank": True,
        },
    ]


def jenkins_latest_artifact_data_links(base_var="${jenkins_base}"):
    return [
        {
            "title": "Open latest artifacts",
            "url": f"{base_var}/job/${{__field.labels.jenkins_job}}/lastCompletedBuild/artifact/",
            "targetBlank": True,
        },
        {
            "title": "Open Jenkins job",
            "url": f"{base_var}/job/${{__field.labels.jenkins_job}}/",
            "targetBlank": True,
        },
    ]


def bargauge_panel(
    panel_id,
    title,
    expr,
    grid,
    *,
    unit="none",
    legend=None,
    links=None,
    limit=None,
    sort_order="desc",
    thresholds=None,
    decimals=None,
    instant=False,
    overrides=None,
    data_links=None,
    include_color=True,
    description=None,
):
    """Return a bar gauge panel with label-aware reduction."""
    cleaned_expr = expr.strip()
    if not cleaned_expr.startswith(("sort(", "sort_desc(")):
        if sort_order == "desc":
            expr = f"sort_desc({expr})"
        elif sort_order == "asc":
            expr = f"sort({expr})"
    defaults = {}
    if include_color:
        defaults["color"] = {"mode": "thresholds"}
    defaults.update(
        {
            "unit": unit,
            "min": 0,
            "max": 100 if unit == "percent" else None,
            "thresholds": thresholds
            or {
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 50},
                    {"color": "orange", "value": 70},
                    {"color": "red", "value": 85},
                ],
            },
        }
    )
    panel = {
        "id": panel_id,
        "type": "bargauge",
        "title": title,
        "datasource": PROM_DS,
        "gridPos": grid,
        "targets": [
            {
                "expr": expr,
                "refId": "A",
                "legendFormat": legend or "{{node}}",
                **({"instant": True} if instant else {}),
            }
        ],
        "fieldConfig": {
            "defaults": defaults,
            "overrides": [],
        },
        "options": {
            "displayMode": "basic",
            "orientation": "horizontal",
            "reduceOptions": {
                "calcs": ["lastNotNull"],
                "fields": "",
                "values": False,
            },
        },
    }
    if overrides:
        panel["fieldConfig"]["overrides"].extend(overrides)
    if decimals is not None:
        panel["fieldConfig"]["defaults"]["decimals"] = decimals
    if links:
        panel["links"] = links
    if description:
        panel["description"] = description
    if data_links:
        panel["fieldConfig"]["defaults"]["links"] = data_links
    # Keep bars ordered by value descending for readability.
    panel["transformations"] = [
        {
            "id": "sortBy",
            "options": {"fields": ["Value"], "order": sort_order},
        }
    ]
    if limit:
        panel["transformations"].append({"id": "limit", "options": {"limit": limit}})
    return panel


def set_bargauge_display_mode(panels, display_mode):
    """Apply a display mode to bar gauges, including gauges inside collapsed rows."""
    for panel in panels:
        if panel.get("type") == "bargauge":
            panel["options"]["displayMode"] = display_mode
        if panel.get("panels"):
            set_bargauge_display_mode(panel["panels"], display_mode)


def text_panel(panel_id, title, content, grid):
    return {
        "id": panel_id,
        "type": "text",
        "title": title,
        "gridPos": grid,
        "datasource": None,
        "options": {"mode": "markdown", "content": content},
    }


def row_panel(panel_id, title, y, *, collapsed=True, panels=None):
    """Return a Grafana row, optionally carrying collapsed child panels."""
    return {
        "id": panel_id,
        "type": "row",
        "title": title,
        "gridPos": {"h": 1, "w": 24, "x": 0, "y": y},
        "collapsed": collapsed,
        **({"panels": panels or []} if collapsed else {}),
    }


DASHBOARD_LINK_TITLES = {
    "atlas-overview": "Open Atlas Overview",
    "atlas-pods": "Open Atlas Pods",
    "atlas-nodes": "Open Atlas Nodes",
    "atlas-storage": "Open Atlas Storage",
    "atlas-network": "Open Atlas Network",
    "atlas-mail": "Open Atlas Mail",
    "atlas-jobs": "Atlas Testing",
    "atlas-testing": "Atlas Testing",
    "atlas-power": "Open Atlas Power",
    "atlas-gitops": "Open Atlas GitOps",
    "atlas-gpu": "Open Atlas GPU",
}


def link_to(uid):
    return [
        {
            "title": DASHBOARD_LINK_TITLES.get(uid, f"Open {uid} dashboard"),
            "url": f"/d/{uid}",
            "targetBlank": True,
        }
    ]


def overview_link_to(uid):
    """Return the historical Overview dashboard link label."""
    return [{"title": f"Open {uid} dashboard", "url": f"/d/{uid}", "targetBlank": True}]


# ---------------------------------------------------------------------------
# Dashboard builders
# ---------------------------------------------------------------------------


def build_overview():
    panels = []
    overview_link = overview_link_to
    climate_drop_labels = "job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group"
    climate_temp_series = f"max without ({climate_drop_labels}) (typhon_temperature_celsius != 0)"
    climate_humidity_series = f"max without ({climate_drop_labels}) (typhon_relative_humidity_percent != 0)"
    climate_pressure_series = f"max without ({climate_drop_labels}) (typhon_vpd_kpa != 0)"
    overview_pvc_backup_metric_presence = (
        'count({__name__=~"pvc_backup_(count|last_success_timestamp_seconds|health_reason)",driver="restic"})'
    )
    overview_pvc_backup_missing = (
        'label_replace(label_replace(vector(999), "namespace", "maintenance", "__name__", ".*"), '
        '"pvc", "backup-telemetry-missing", "__name__", ".*")'
    )
    overview_pvc_backup_age = (
        'max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver="restic"}) / 3600) '
        'or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver="restic",reason=~"missing|no_completed|lookup_failed|unknown_timestamp"} > 0) '
        f'* (pvc_backup_count{{driver="restic"}} > bool 0)) * 999))) or on() '
        f'(({overview_pvc_backup_missing}) unless on() (({overview_pvc_backup_metric_presence}) > 0))'
    )

    def overview_metric_pair_expr(first_expr, first_name, second_expr, second_name):
        return (
            f'label_replace({first_expr}, "metric", "{first_name}", "__name__", ".*") '
            f'or label_replace({second_expr}, "metric", "{second_name}", "__name__", ".*")'
        )

    def overview_platform_test_success_targets():
        suites = [
            ("ariadne", "ariadne"),
            ("metis", "metis"),
            ("ananke", "ananke"),
            ("atlasbot", "atlasbot"),
            ("lesavka", "lesavka"),
            ("pegasus", "pegasus|pegasus-health|pegasus_health"),
            ("soteria", "soteria"),
            ("titan-iac", "titan-iac|titan_iac"),
            ("bstein-home", "bstein-home|bstein_home"),
            ("arcanagon", "arcanagon"),
            ("data-prepper", "data-prepper|data_prepper"),
        ]
        targets = []
        for index, (legend, suite_regex) in enumerate(suites):
            total = f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_regex}"}}[1h]))'
            passed = (
                f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_regex}",'
                f'status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[1h]))'
            )
            targets.append(
                {
                    "refId": chr(ord("A") + index),
                    "expr": f"(100 * ({passed}) / clamp_min(({total}), 1)) and on() (({total}) > 0) or on() vector(0)",
                    "legendFormat": legend,
                }
            )
        return targets

    age_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "green", "value": None},
            {"color": "yellow", "value": 6},
            {"color": "orange", "value": 24},
            {"color": "red", "value": 48},
        ],
    }

    row1_stats = [
        {
            "id": 2,
            "title": "Control Plane Ready",
            "expr": f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{CONTROL_REGEX}"}})',
            "kind": "gauge",
            "max_value": CONTROL_TOTAL,
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "red", "value": None},
                    {"color": "green", "value": CONTROL_TOTAL},
                ],
            },
        },
        {
            "id": 3,
            "title": "Control Plane Workloads",
            "expr": CONTROL_WORKLOADS_EXPR,
            "kind": "stat",
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 3},
                ],
            },
            "links": overview_link("atlas-pods"),
        },
        {
            "id": 5,
            "title": "Stuck Terminating",
            "expr": STUCK_TERMINATING_EXPR,
            "kind": "stat",
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 3},
                ],
            },
            "links": overview_link("atlas-pods"),
        },
        {
            "id": 27,
            "title": "Atlas Availability (365d)",
            "expr": UPTIME_PERCENT_EXPR,
            "kind": "stat",
            "thresholds": UPTIME_PERCENT_THRESHOLDS,
            "unit": "percentunit",
            "decimals": 4,
            "text_mode": "value",
            "instant": True,
            "description": "Rolling 365-day availability from vmalert's precomputed atlas:availability:ratio_365d series. Grafana keeps the last successful rollup for up to 24h so one missed long-window evaluation does not render as No data.",
        },
        {
            "id": 4,
            "title": "Problem Pods",
            "expr": PROBLEM_PODS_EXPR,
            "kind": "stat",
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 3},
                ],
            },
            "links": overview_link("atlas-pods"),
        },
        {
            "id": 6,
            "title": "CrashLoop / ImagePull",
            "expr": CRASHLOOP_EXPR,
            "kind": "stat",
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 3},
                ],
            },
            "links": overview_link("atlas-pods"),
        },
        {
            "id": 1,
            "title": "Workers Ready",
            "expr": f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})',
            "kind": "gauge",
            "max_value": WORKER_TOTAL,
            "thresholds": {
                "mode": "absolute",
                "steps": [
                    {"color": "red", "value": None},
                    {"color": "orange", "value": WORKER_TOTAL - 2},
                    {"color": "yellow", "value": WORKER_TOTAL - 1},
                    {"color": "green", "value": WORKER_TOTAL},
                ],
            },
        },
    ]

    def gauge_grid(idx):
        width = GAUGE_WIDTHS[idx] if idx < len(GAUGE_WIDTHS) else 4
        x = sum(GAUGE_WIDTHS[:idx])
        return width, x

    for idx, item in enumerate(row1_stats):
        panel_id = item["id"]
        width, x = gauge_grid(idx)
        grid = {"h": 5, "w": width, "x": x, "y": 0}
        kind = item.get("kind", "gauge")
        if kind == "stat":
            panels.append(
                stat_panel(
                    panel_id,
                    item["title"],
                    item["expr"],
                    grid,
                    thresholds=item.get("thresholds"),
                    legend=None,
                    links=item.get("links"),
                    text_mode=item.get("text_mode", "value"),
                    value_suffix=item.get("value_suffix"),
                    unit=item.get("unit", "none"),
                    decimals=item.get("decimals"),
                    instant=item.get("instant", False),
                    description=item.get("description"),
                )
            )
        else:
            panels.append(
                gauge_panel(
                    panel_id,
                    item["title"],
                    item["expr"],
                    grid,
                    min_value=0,
                    max_value=item.get("max_value", 5),
                    thresholds=item.get("thresholds"),
                    links=item.get("links"),
                )
            )

    top_health_panels = [
        (7, "Hottest node: CPU", topk_with_node(node_cpu_expr()), "percent"),
        (8, "Hottest node: RAM", topk_with_node(node_mem_expr()), "percent"),
        (9, "Hottest node: NET (rx+tx)", topk_with_node(node_net_expr()), "Bps"),
        (10, "Hottest node: I/O (r+w)", topk_with_node(node_io_expr()), "Bps"),
        (23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"),
        (24, "Asteria Usage", astreae_usage_expr("/mnt/asteria"), "percent"),
        (25, "Astreae Free", astreae_free_expr("/mnt/astreae"), "decbytes"),
        (26, "Asteria Free", astreae_free_expr("/mnt/asteria"), "decbytes"),
    ]
    for idx, (panel_id, title, expr, unit) in enumerate(top_health_panels):
        is_hottest_panel = panel_id in {7, 8, 9, 10}
        panels.append(
            stat_panel(
                panel_id,
                title,
                f"{expr}",
                {"h": 2, "w": 3, "x": 3 * idx, "y": 5},
                unit=unit,
                thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
                text_mode="name_and_value" if is_hottest_panel else "value",
                legend="{{node}}" if is_hottest_panel else None,
                instant=is_hottest_panel,
                links=overview_link("atlas-storage" if panel_id in {23, 24, 25, 26} else "atlas-nodes"),
            )
        )

    mail_bounce_rate_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "green", "value": None},
            {"color": "yellow", "value": 5},
            {"color": "orange", "value": 8},
            {"color": "red", "value": 10},
        ],
    }
    mail_limit_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "green", "value": None},
            {"color": "yellow", "value": 70},
            {"color": "orange", "value": 85},
            {"color": "red", "value": 95},
        ],
    }
    mail_success_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "red", "value": None},
            {"color": "orange", "value": 90},
            {"color": "yellow", "value": 95},
            {"color": "green", "value": 98},
        ],
    }
    dark_red = "dark-red"
    dark_orange = "dark-orange"
    dark_yellow = "dark-yellow"
    dark_green = "dark-green"
    dark_blue = "dark-blue"
    test_success_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": dark_red, "value": None},
            {"color": dark_orange, "value": 70},
            {"color": dark_yellow, "value": 85},
            {"color": dark_green, "value": 95},
            {"color": dark_blue, "value": 100},
        ],
    }
    fan_intensity_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "#1f60c4", "value": None},
            {"color": "#2870b8", "value": 1},
            {"color": "#2f8599", "value": 2},
            {"color": "#2f9e44", "value": 3},
            {"color": "#76a935", "value": 4},
            {"color": "#d4b106", "value": 5},
            {"color": "#d69605", "value": 6},
            {"color": "#e06c00", "value": 7},
            {"color": "#d95718", "value": 8},
            {"color": "#c92a2a", "value": 9},
            {"color": "#8f1d1d", "value": 10},
        ],
    }
    fan_intensity_expr = (
        f'label_replace(max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="1"}}), "fan", "Outlet", "__name__", ".*") '
        f'or label_replace(max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="2"}}), "fan", "Inlet - Inside", "__name__", ".*") '
        f'or label_replace(max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="3"}}), "fan", "Inlet - Outside", "__name__", ".*") '
        f'or label_replace(max without ({climate_drop_labels}) (typhon_fan_speed_level{{port="4"}}), "fan", "Interior", "__name__", ".*")'
    )
    gitops_health_history_expr = (
        f'label_replace({GITOPS_KUSTOMIZATION_READY_PCT}, "signal", "Kustomizations Ready", "__name__", ".*") '
        f'or label_replace({GITOPS_HELM_READY_PCT}, "signal", "HelmReleases Ready", "__name__", ".*") '
        f'or label_replace({GITOPS_KUSTOMIZATION_NOT_SUSPENDED_PCT}, "signal", "Kustomizations Not Suspended", "__name__", ".*") '
        f'or label_replace({GITOPS_HELM_NOT_SUSPENDED_PCT}, "signal", "HelmReleases Not Suspended", "__name__", ".*")'
    )
    compact_current_text = {"titleSize": 11, "valueSize": 20}
    perfect_count_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": dark_red, "value": None},
            {"color": dark_yellow, "value": max(len(PLATFORM_TEST_SUITE_NAMES) - 2, 1)},
            {"color": dark_green, "value": len(PLATFORM_TEST_SUITE_NAMES) - 1},
            {"color": dark_blue, "value": len(PLATFORM_TEST_SUITE_NAMES)},
        ],
    }
    failure_count_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": dark_blue, "value": None},
            {"color": dark_yellow, "value": 1},
            {"color": dark_orange, "value": 3},
            {"color": dark_red, "value": 5},
        ],
    }
    overview_avg_coverage = f"(avg(({QUALITY_GATE_COVERAGE_BY_SUITE})) or on() vector(0))"
    overview_loc_clean_suites = f"(sum(({QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE}) == bool 0) or on() vector(0))"
    for panel_id, title, draw_expr, runtime_expr, y_pos in [
        (40, "Pyrphoros UPS Current", ANANKE_UPS_DRAW_WATTS_DB, ANANKE_UPS_RUNTIME_DB, 7),
        (144, "Statera UPS Current", ANANKE_UPS_DRAW_WATTS_TETHYS, ANANKE_UPS_RUNTIME_TETHYS, 10),
    ]:
        panel = stat_panel(
            panel_id,
            title,
            None,
            {"h": 3, "w": 3, "x": 0, "y": y_pos},
            unit="none",
            text_mode="name_and_value",
            targets=[
                {
                    "expr": overview_metric_pair_expr(draw_expr, "Draw", runtime_expr, "Runtime"),
                    "refId": "A",
                    "legendFormat": "{{metric}}",
                    "instant": True,
                }
            ],
            field_overrides=[
                {"matcher": {"id": "byName", "options": "Draw"}, "properties": [{"id": "unit", "value": "watt"}]},
                {"matcher": {"id": "byName", "options": "Runtime"}, "properties": [{"id": "unit", "value": "s"}]},
            ],
            links=overview_link("atlas-power"),
        )
        panel["options"]["text"] = compact_current_text
        panels.append(panel)

    ups_history = timeseries_panel(
        41,
        "UPS History (Power Draw)",
        None,
        {"h": 6, "w": 6, "x": 3, "y": 7},
        unit="watt",
        targets=[
            {"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB_SERIES, "legendFormat": ANANKE_UPS_DB_NAME},
            {"refId": "B", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS_SERIES, "legendFormat": ANANKE_UPS_TETHYS_NAME},
        ],
        field_overrides=fixed_color_overrides(
            {ANANKE_UPS_DB_NAME: dark_blue, ANANKE_UPS_TETHYS_NAME: dark_yellow}
        ),
        legend_display="list",
        legend_placement="bottom",
        links=overview_link("atlas-power"),
    )
    ups_history["fieldConfig"]["defaults"]["custom"] = {
        "drawStyle": "line",
        "lineInterpolation": "linear",
        "lineWidth": 2,
        "fillOpacity": 18,
        "showPoints": "never",
        "spanNulls": True,
    }
    panels.append(ups_history)
    temp_panel = stat_panel(
        42,
        "Current Enclosure Temperature",
        None,
        {"h": 3, "w": 3, "x": 0, "y": 13},
        unit="none",
        text_mode="name_and_value",
        targets=[
            {
                "expr": overview_metric_pair_expr(
                    f"max({climate_temp_series}) or on() vector(0)",
                    "°C",
                    f"max(({climate_temp_series}) * 9 / 5 + 32) or on() vector(0)",
                    "°F",
                ),
                "refId": "A",
                "legendFormat": "{{metric}}",
                "instant": True,
            }
        ],
        field_overrides=[
            {"matcher": {"id": "byName", "options": "°C"}, "properties": [{"id": "unit", "value": "celsius"}]},
            {"matcher": {"id": "byName", "options": "°F"}, "properties": [{"id": "unit", "value": "fahrenheit"}]},
        ],
        links=overview_link("atlas-power"),
    )
    temp_panel["options"]["text"] = compact_current_text
    panels.append(temp_panel)
    climate_panel = stat_panel(
        143,
        "Current Enclosure Climate",
        None,
        {"h": 3, "w": 3, "x": 0, "y": 16},
        unit="none",
        text_mode="name_and_value",
        targets=[
            {
                "expr": overview_metric_pair_expr(
                    f"max({climate_humidity_series}) or on() vector(0)",
                    "%RH",
                    f"max({climate_pressure_series}) or on() vector(0)",
                    "kPa",
                ),
                "refId": "A",
                "legendFormat": "{{metric}}",
                "instant": True,
            }
        ],
        field_overrides=[
            {"matcher": {"id": "byName", "options": "%RH"}, "properties": [{"id": "unit", "value": "suffix:%RH"}]},
            {"matcher": {"id": "byName", "options": "kPa"}, "properties": [{"id": "unit", "value": "suffix:kPa"}]},
        ],
        links=overview_link("atlas-power"),
    )
    climate_panel["options"]["text"] = compact_current_text
    panels.append(climate_panel)
    panels.append(
        timeseries_panel(
            43,
            "Enclosure Climate History",
            None,
            {"h": 6, "w": 6, "x": 3, "y": 13},
            unit="none",
            targets=[
                {"refId": "A", "expr": climate_temp_series, "legendFormat": "C"},
                {"refId": "B", "expr": climate_humidity_series, "legendFormat": "RH"},
                {"refId": "C", "expr": climate_pressure_series, "legendFormat": "P"},
                {"refId": "D", "expr": f"(min_over_time({climate_temp_series}[$__range]) - 0.08)", "legendFormat": "C bound min"},
                {"refId": "E", "expr": f"(max_over_time({climate_temp_series}[$__range]) + 0.08)", "legendFormat": "C bound max"},
                {"refId": "F", "expr": f"clamp_min((min_over_time({climate_humidity_series}[$__range]) - 0.35), 0)", "legendFormat": "RH bound min"},
                {"refId": "G", "expr": f"clamp_max((max_over_time({climate_humidity_series}[$__range]) + 0.35), 100)", "legendFormat": "RH bound max"},
                {"refId": "H", "expr": f"clamp_min((min_over_time({climate_pressure_series}[$__range]) - 0.03), 0)", "legendFormat": "P bound min"},
                {"refId": "I", "expr": f"(max_over_time({climate_pressure_series}[$__range]) + 0.03)", "legendFormat": "P bound max"},
            ],
            field_overrides=[
                {
                    "matcher": {"id": "byName", "options": "C"},
                    "properties": [
                        {"id": "unit", "value": "suffix:°C"},
                        {"id": "decimals", "value": 2},
                        {"id": "custom.axisPlacement", "value": "left"},
                        {"id": "custom.axisCenteredZero", "value": False},
                    ],
                },
                {
                    "matcher": {"id": "byRegexp", "options": "C bound .*"},
                    "properties": [
                        {"id": "unit", "value": "suffix:°C"},
                        {"id": "custom.axisPlacement", "value": "left"},
                        {"id": "custom.axisCenteredZero", "value": False},
                        {"id": "custom.hideFrom", "value": {"legend": True, "tooltip": True, "viz": False}},
                        {"id": "custom.lineWidth", "value": 0},
                        {"id": "custom.fillOpacity", "value": 0},
                        {"id": "custom.showPoints", "value": "never"},
                        {"id": "color", "value": {"mode": "fixed", "fixedColor": "transparent"}},
                    ],
                },
                {
                    "matcher": {"id": "byName", "options": "RH"},
                    "properties": [
                        {"id": "unit", "value": "suffix:%"},
                        {"id": "decimals", "value": 2},
                        {"id": "custom.axisPlacement", "value": "right"},
                        {"id": "custom.axisCenteredZero", "value": False},
                    ],
                },
                {
                    "matcher": {"id": "byRegexp", "options": "RH bound .*"},
                    "properties": [
                        {"id": "unit", "value": "suffix:%"},
                        {"id": "custom.axisPlacement", "value": "right"},
                        {"id": "custom.axisCenteredZero", "value": False},
                        {"id": "custom.hideFrom", "value": {"legend": True, "tooltip": True, "viz": False}},
                        {"id": "custom.lineWidth", "value": 0},
                        {"id": "custom.fillOpacity", "value": 0},
                        {"id": "custom.showPoints", "value": "never"},
                        {"id": "color", "value": {"mode": "fixed", "fixedColor": "transparent"}},
                    ],
                },
                {
                    "matcher": {"id": "byName", "options": "P"},
                    "properties": [
                        {"id": "unit", "value": "suffix:kPa"},
                        {"id": "custom.axisPlacement", "value": "right"},
                        {"id": "decimals", "value": 2},
                        {"id": "custom.axisCenteredZero", "value": False},
                    ],
                },
                {
                    "matcher": {"id": "byRegexp", "options": "P bound .*"},
                    "properties": [
                        {"id": "unit", "value": "suffix:kPa"},
                        {"id": "custom.axisPlacement", "value": "right"},
                        {"id": "custom.axisCenteredZero", "value": False},
                        {"id": "custom.hideFrom", "value": {"legend": True, "tooltip": True, "viz": False}},
                        {"id": "custom.lineWidth", "value": 0},
                        {"id": "custom.fillOpacity", "value": 0},
                        {"id": "custom.showPoints", "value": "never"},
                        {"id": "color", "value": {"mode": "fixed", "fixedColor": "transparent"}},
                    ],
                },
            ],
            legend_display="list",
            legend_placement="bottom",
            links=overview_link("atlas-power"),
            description="Temperature on left axis, humidity and pressure on right axis with dynamic bound series so small swings remain visible.",
        )
    )
    panels[-1]["fieldConfig"]["defaults"]["custom"] = {
        "drawStyle": "line",
        "lineInterpolation": "linear",
        "lineWidth": 2,
        "fillOpacity": 10,
        "showPoints": "never",
        "spanNulls": True,
    }
    fan_panel = state_timeline_panel(
        141,
        "Fan Intensity History",
        fan_intensity_expr,
        {"h": 6, "w": 6, "x": 9, "y": 13},
        unit="none",
        min_value=0,
        max_value=10,
        legend="{{fan}}",
        thresholds=fan_intensity_thresholds,
        links=overview_link("atlas-power"),
        description="Fan intensity lanes on the 0-10 controller scale. Cooler colors are quiet/low intensity; warmer colors mean the enclosure is pushing harder.",
    )
    fan_panel["options"]["legend"] = {"displayMode": "list", "placement": "bottom"}
    fan_panel["options"]["mergeValues"] = False
    fan_panel["options"]["showValue"] = "auto"
    fan_panel["options"]["tooltip"] = {"mode": "multi", "sort": "none"}
    panels.append(fan_panel)
    flux_source = stat_panel(
        140,
        "Flux Source",
        None,
        {"h": 2, "w": 3, "x": 21, "y": 7},
        unit="none",
        text_mode="name",
        thresholds={
            "mode": "absolute",
            "steps": [
                {"color": dark_red, "value": None},
                {"color": dark_blue, "value": 1},
            ],
        },
        targets=[
            {
                "expr": f"{GITOPS_SOURCE_INFO} or on() vector(0)",
                "refId": "A",
                "legendFormat": "{{branch}}",
                "instant": True,
            }
        ],
        links=overview_link("atlas-gitops"),
        description="Flux GitRepository branch reported by Ananke. Revision and object detail live in Atlas GitOps.",
    )
    flux_source["options"]["graphMode"] = "none"
    flux_source["options"]["text"] = {"titleSize": 10, "valueSize": 14}
    panels.append(flux_source)
    for panel_id, title, expr, y_pos, unit, decimals, thresholds, links in [
        (151, "Run Reliability (24h)", TEST_SUCCESS_RATE_24H, 9, "percent", 1, test_success_thresholds, "atlas-testing"),
        (152, "Failed Runs (24h)", TEST_FAILURES_24H_TOTAL, 11, "none", 0, failure_count_thresholds, "atlas-testing"),
        (153, "Fresh Suites (24h)", PLATFORM_TEST_ACTIVE_SUITES_24H, 13, "none", 0, perfect_count_thresholds, "atlas-testing"),
        (154, "Avg Coverage", overview_avg_coverage, 15, "percent", 1, test_success_thresholds, "atlas-testing"),
        (155, "LOC Clean Suites", overview_loc_clean_suites, 17, "none", 0, perfect_count_thresholds, "atlas-testing"),
    ]:
        rail_panel = stat_panel(
            panel_id,
            title,
            expr,
            {"h": 2, "w": 3, "x": 21, "y": y_pos},
            unit=unit,
            decimals=decimals,
            instant=True,
            thresholds=thresholds,
            links=overview_link(links),
        )
        rail_panel["options"]["graphMode"] = "none"
        rail_panel["options"]["text"] = {"titleSize": 10, "valueSize": 19}
        panels.append(rail_panel)
    panels.append(
        state_timeline_panel(
            150,
            "GitOps Health",
            gitops_health_history_expr,
            {"h": 6, "w": 6, "x": 15, "y": 7},
            unit="percent",
            min_value=0,
            max_value=100,
            legend="{{signal}}",
            thresholds=test_success_thresholds,
            links=overview_link("atlas-gitops"),
            description="GitOps readiness and suspension health over time. Blue means perfect; warmer colors mean a readiness or suspension problem appeared.",
        )
    )

    panels.append(
        bargauge_panel(
            44,
            "One-off Job Pods (age hours)",
            ONEOFF_JOB_POD_AGE_HOURS,
            {"h": 5, "w": 8, "x": 0, "y": 32},
            unit="h",
            instant=True,
            legend="{{namespace}}/{{pod}}",
            thresholds=age_thresholds,
            limit=12,
            decimals=2,
            links=overview_link("atlas-testing"),
            include_color=False,
        )
    )
    ariadne_volume = timeseries_panel(
        45,
        "Ariadne Run Volume",
        None,
        {"h": 6, "w": 6, "x": 9, "y": 7},
        unit="none",
        targets=[
            {"expr": f"{ARIADNE_TASK_ATTEMPTS_SERIES} or on() vector(0)", "refId": "A", "legendFormat": "Attempts"},
            {"expr": f"{ARIADNE_TASK_FAILURES_SERIES} or on() vector(0)", "refId": "B", "legendFormat": "Failures"},
        ],
        legend_display="list",
        legend_placement="bottom",
        links=overview_link("atlas-testing"),
    )
    ariadne_volume["fieldConfig"]["overrides"] = fixed_color_overrides(
        {"Attempts": dark_blue, "Failures": dark_red}
    )
    panels.append(apply_bar_timeseries_style(ariadne_volume, stacked=False))
    panels.append(
        state_timeline_panel(
            46,
            "Gate Checks Passing by Suite",
            PLATFORM_TEST_CURRENT_GATE_HEALTH_BY_SUITE,
            {"h": 6, "w": 6, "x": 15, "y": 13},
            unit="percent",
            min_value=0,
            max_value=100,
            legend="{{suite}}",
            thresholds=test_success_thresholds,
            links=overview_link("atlas-testing"),
            description="Percent of current gate dimensions passing per suite over time. There are seven gate dimensions, so 85.7% means one gate is failing.",
        )
    )
    panels[-1]["options"]["legend"] = {"displayMode": "hidden", "placement": "bottom"}
    panels[-1]["options"]["mergeValues"] = False
    panels[-1]["options"]["showValue"] = "auto"
    for panel_id, title, metric, x_pos, description in [
        (
            142,
            "Jenkins Last Success (h, newest first)",
            "ariadne_jenkins_build_weather_job_last_success_timestamp_seconds",
            8,
            "Top 6 most recent Jenkins successes by age (newest first). Green means last run succeeded; red means last run did not succeed. Use Atlas Jobs for the full list.",
        ),
        (
            243,
            "Jenkins Last Failure (h, newest first)",
            "ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds",
            12,
            "Top 6 most recent Jenkins failures by age (newest first). Green means last run succeeded; red means last run did not succeed. Use Atlas Jobs for the full list.",
        ),
    ]:
        base_expr = f"min by (exported_job,job_url,weather_icon) ((time() - {metric}) / 3600)"
        topk_expr = f"sort(bottomk(6, {base_expr}))"
        success_expr = (
            f'label_replace(({topk_expr}) and on(exported_job,job_url,weather_icon) '
            '(max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1), '
            '"run_state", "ok", "exported_job", ".*")'
        )
        failure_expr = (
            f'label_replace(({topk_expr}) and on(exported_job,job_url,weather_icon) '
            '(max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) != 1), '
            '"run_state", "bad", "exported_job", ".*")'
        )
        panels.append(
            {
                "id": panel_id,
                "type": "stat",
                "title": title,
                "datasource": PROM_DS,
                "gridPos": {"h": 5, "w": 4, "x": x_pos, "y": 32},
                "targets": [
                    {
                        "refId": "A",
                        "expr": f"sort(({success_expr}) or ({failure_expr}))",
                        "instant": True,
                    }
                ],
                "fieldConfig": {
                    "defaults": {
                        "unit": "h",
                        "decimals": 1,
                        "min": 0,
                        "displayName": "${__field.labels.weather_icon} ${__field.labels.exported_job}",
                        "links": [
                            {
                                "title": "Open Jenkins job",
                                "url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/",
                                "targetBlank": True,
                            }
                        ],
                    },
                    "overrides": [
                        {
                            "matcher": {"id": "byRegexp", "options": '.*run_state="ok".*'},
                            "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}],
                        },
                        {
                            "matcher": {"id": "byRegexp", "options": '.*run_state="bad".*'},
                            "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}],
                        },
                    ],
                },
                "options": {
                    "colorMode": "value",
                    "graphMode": "none",
                    "justifyMode": "left",
                    "orientation": "horizontal",
                    "wideLayout": True,
                    "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
                    "textMode": "name_and_value",
                    "text": {"titleSize": 11, "valueSize": 11},
                },
                "transformations": [{"id": "sortBy", "options": {"fields": ["Value"], "order": "asc"}}],
                "links": overview_link("atlas-testing"),
                "description": description,
            }
        )
    panels.append(
        bargauge_panel(
            47,
            "PVC Backup Health / Age",
            overview_pvc_backup_age,
            {"h": 5, "w": 8, "x": 16, "y": 32},
            unit="h",
            instant=True,
            legend="{{namespace}}/{{pvc}}",
            sort_order="desc",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 20},
                    {"color": "orange", "value": 40},
                    {"color": "red", "value": 50},
                ],
            },
            include_color=False,
        )
    )
    panels[-1]["links"] = overview_link("atlas-storage")
    panels[-1]["description"] = (
        "Backup age in hours computed from last-success timestamps for restic-managed PVCs (nightly target: <=20h green, <40h yellow, <50h orange, >=50h red). PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility."
    )

    panels.append(
        stat_panel(
            30,
            "Mail Sent (1d)",
            'max(postmark_outbound_sent{window="1d"})',
            {"h": 2, "w": 4, "x": 0, "y": 19},
            unit="none",
            links=overview_link("atlas-mail"),
        )
    )
    panels.append(
        {
            "id": 31,
            "type": "stat",
            "title": "Mail Bounces (1d)",
            "datasource": PROM_DS,
            "gridPos": {"h": 2, "w": 4, "x": 8, "y": 19},
            "targets": [
                {
                    "expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
                    "refId": "A",
                    "legendFormat": "Rate",
                },
                {
                    "expr": 'max(postmark_outbound_bounced{window="1d"})',
                    "refId": "B",
                    "legendFormat": "Count",
                },
            ],
            "fieldConfig": {
                "defaults": {
                    "color": {"mode": "thresholds"},
                    "custom": {"displayMode": "auto"},
                    "thresholds": mail_bounce_rate_thresholds,
                    "unit": "none",
                },
                "overrides": [
                    {
                        "matcher": {"id": "byName", "options": "Rate"},
                        "properties": [{"id": "unit", "value": "percent"}],
                    },
                    {
                        "matcher": {"id": "byName", "options": "Count"},
                        "properties": [{"id": "unit", "value": "none"}],
                    },
                ],
            },
            "options": {
                "colorMode": "value",
                "graphMode": "area",
                "justifyMode": "center",
                "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
                "textMode": "name_and_value",
            },
            "links": overview_link("atlas-mail"),
        }
    )
    panels.append(
        stat_panel(
            32,
            "Mail Success Rate (1d)",
            'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
            {"h": 2, "w": 4, "x": 4, "y": 19},
            unit="percent",
            thresholds=mail_success_thresholds,
            decimals=1,
            links=overview_link("atlas-mail"),
        )
    )
    panels.append(
        stat_panel(
            33,
            "Mail Limit Used (30d)",
            "max(postmark_sending_limit_used_percent)",
            {"h": 2, "w": 4, "x": 12, "y": 19},
            unit="percent",
            thresholds=mail_limit_thresholds,
            decimals=1,
            links=overview_link("atlas-mail"),
        )
    )
    panels.append(
        stat_panel(
            34,
            "Postgres Connections Used",
            POSTGRES_CONN_USED,
            {"h": 2, "w": 4, "x": 16, "y": 19},
            decimals=0,
            text_mode="name_and_value",
            legend="{{conn}}",
            instant=True,
        )
    )
    panels.append(
        stat_panel(
            35,
            "Postgres Hottest Connections",
            POSTGRES_CONN_HOTTEST,
            {"h": 2, "w": 4, "x": 20, "y": 19},
            unit="none",
            decimals=0,
            text_mode="name_and_value",
            legend="{{datname}}",
            instant=True,
        )
    )

    cpu_scope = "$namespace_scope_cpu"
    gpu_scope = "$namespace_scope_gpu"
    ram_scope = "$namespace_scope_ram"

    panels.append(
        pie_panel(
            11,
            "Namespace CPU Share",
            namespace_cpu_share_expr(cpu_scope),
            {"h": 9, "w": 8, "x": 0, "y": 23},
            links=namespace_scope_links("namespace_scope_cpu"),
            description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
        )
    )
    panels.append(
        pie_panel(
            12,
            "Namespace GPU Share",
            namespace_gpu_share_expr(gpu_scope),
            {"h": 9, "w": 8, "x": 8, "y": 23},
            links=namespace_scope_links("namespace_scope_gpu"),
            description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
        )
    )
    panels.append(
        pie_panel(
            13,
            "Namespace RAM Share",
            namespace_ram_share_expr(ram_scope),
            {"h": 9, "w": 8, "x": 16, "y": 23},
            links=namespace_scope_links("namespace_scope_ram"),
            description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
        )
    )

    worker_filter = f"{WORKER_REGEX}"
    panels.append(
        timeseries_panel(
            14,
            "Worker Node CPU",
            node_cpu_expr(worker_filter),
            {"h": 12, "w": 12, "x": 0, "y": 44},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
            links=overview_link("atlas-nodes"),
        )
    )
    panels.append(
        timeseries_panel(
            15,
            "Worker Node RAM",
            node_mem_expr(worker_filter),
            {"h": 12, "w": 12, "x": 12, "y": 44},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
            links=overview_link("atlas-nodes"),
        )
    )

    panels.append(
        timeseries_panel(
            16,
            "Control plane CPU",
            node_cpu_expr(CONTROL_ALL_REGEX),
            {"h": 10, "w": 12, "x": 0, "y": 56},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            17,
            "Control plane RAM",
            node_mem_expr(CONTROL_ALL_REGEX),
            {"h": 10, "w": 12, "x": 12, "y": 56},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
        )
    )

    panels.append(
        pie_panel(
            28,
            "Node Pod Share",
            '(sum(kube_pod_info{pod!="" , node!=""}) by (node) / clamp_min(sum(kube_pod_info{pod!="" , node!=""}), 1)) * 100',
            {"h": 10, "w": 12, "x": 0, "y": 66},
        )
    )
    panels.append(
        bargauge_panel(
            29,
            "Top Nodes by Pod Count",
            'topk(12, sum(kube_pod_info{pod!="" , node!=""}) by (node))',
            {"h": 10, "w": 12, "x": 12, "y": 66},
            unit="none",
            limit=12,
            decimals=0,
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 50},
                    {"color": "orange", "value": 75},
                    {"color": "red", "value": 100},
                ],
            },
            instant=True,
            include_color=False,
        )
    )

    panels.append(
        timeseries_panel(
            18,
            "Cluster Ingress Throughput",
            NET_INGRESS_EXPR,
            {"h": 7, "w": 8, "x": 0, "y": 37},
            unit="Bps",
            legend="Ingress (Traefik)",
            legend_display="list",
            legend_placement="bottom",
            links=overview_link("atlas-network"),
        )
    )
    panels.append(
        timeseries_panel(
            19,
            "Cluster Egress Throughput",
            NET_EGRESS_EXPR,
            {"h": 7, "w": 8, "x": 8, "y": 37},
            unit="Bps",
            legend="Egress (Traefik)",
            legend_display="list",
            legend_placement="bottom",
            links=overview_link("atlas-network"),
        )
    )
    panels.append(
        timeseries_panel(
            20,
            "Intra-Cluster Throughput",
            NET_INTERNAL_EXPR,
            {"h": 7, "w": 8, "x": 16, "y": 37},
            unit="Bps",
            legend="Internal traffic",
            legend_display="list",
            legend_placement="bottom",
            links=overview_link("atlas-network"),
        )
    )

    panels.append(
        timeseries_panel(
            21,
            "Root Filesystem Usage",
            root_usage_expr(),
            {"h": 16, "w": 12, "x": 0, "y": 76},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
            time_from="30d",
            links=overview_link("atlas-storage"),
        )
    )
    panels.append(
        timeseries_panel(
            22,
            "Nodes Closest to Full Astraios Disks",
            astraios_usage_expr(),
            {"h": 16, "w": 12, "x": 12, "y": 76},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
            time_from="1w",
            links=overview_link("atlas-storage"),
        )
    )
    return {
        "uid": "atlas-overview",
        "title": "Atlas Overview",
        "folderUid": PUBLIC_FOLDER,
        "editable": False,
        "annotations": {"list": []},
        "panels": panels,
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "overview"],
        "templating": {
            "list": [
                namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"),
                namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"),
                namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"),
            ]
        },
        "time": {"from": "now-1h", "to": "now"},
        "refresh": "1m",
        "links": link_to("atlas-testing"),
    }


def build_pods_dashboard():
    panels = []
    panels.append(
        stat_panel(
            1,
            "Problem Pods",
            PROBLEM_PODS_EXPR,
            {"h": 4, "w": 6, "x": 0, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "red", "value": 1},
                ],
            },
        )
    )
    panels.append(
        stat_panel(
            2,
            "CrashLoop / ImagePull",
            CRASHLOOP_EXPR,
            {"h": 4, "w": 6, "x": 6, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "red", "value": 1},
                ],
            },
        )
    )
    panels.append(
        stat_panel(
            3,
            "Stuck Terminating (>10m)",
            STUCK_TERMINATING_EXPR,
            {"h": 4, "w": 6, "x": 12, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "red", "value": 1},
                ],
            },
        )
    )
    panels.append(
        stat_panel(
            4,
            "Control Plane Workloads",
            f'sum(kube_pod_info{{node=~"{CONTROL_REGEX}",namespace!~"{CP_ALLOWED_NS}"}})',
            {"h": 4, "w": 6, "x": 18, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "red", "value": 1},
                ],
            },
        )
    )

    panels.append(
        table_panel(
            5,
            "Pods Not Running",
            PROBLEM_TABLE_EXPR,
            {"h": 10, "w": 24, "x": 0, "y": 4},
            unit="s",
            transformations=[{"id": "labelsToFields", "options": {}}],
        )
    )
    panels.append(
        table_panel(
            6,
            "CrashLoop / ImagePull",
            CRASHLOOP_TABLE_EXPR,
            {"h": 10, "w": 24, "x": 0, "y": 14},
            unit="s",
            transformations=[{"id": "labelsToFields", "options": {}}],
        )
    )
    panels.append(
        table_panel(
            7,
            "Terminating >10m",
            STUCK_TABLE_EXPR,
            {"h": 10, "w": 24, "x": 0, "y": 24},
            unit="s",
            transformations=[
                {"id": "labelsToFields", "options": {}},
                {"id": "filterByValue", "options": {"match": "Value", "operator": "gt", "value": 600}},
            ],
        )
    )
    panels.append(
        pie_panel(
            8,
            "Node Pod Share",
            '(sum(kube_pod_info{pod!="" , node!=""}) by (node) / clamp_min(sum(kube_pod_info{pod!="" , node!=""}), 1)) * 100',
            {"h": 8, "w": 12, "x": 12, "y": 34},
        )
    )
    panels.append(
        bargauge_panel(
            9,
            "Top Nodes by Pod Count",
            'topk(12, sum(kube_pod_info{pod!="" , node!=""}) by (node))',
            {"h": 8, "w": 12, "x": 0, "y": 34},
            unit="none",
            limit=12,
            decimals=0,
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 50},
                    {"color": "orange", "value": 75},
                    {"color": "red", "value": 100},
                ],
            },
            instant=True,
        )
    )

    share_expr = (
        '(sum by (namespace,node) (kube_pod_info{pod!="" , node!=""}) '
        '/ on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=""}), 1) * 100)'
    )
    rank_terms = [
        f"(sum by (node) (kube_node_info{{node=\"{node}\"}}) * 0 + {idx * 1e-3})"
        for idx, node in enumerate(CONTROL_ALL + WORKER_NODES, start=1)
    ]
    rank_expr = " or ".join(rank_terms)
    score_expr = f"{share_expr} + on(node) group_left() ({rank_expr})"
    mask_expr = (
        f"{score_expr} == bool on(namespace) group_left() "
        f"(max by (namespace) ({score_expr}))"
    )
    panels.append(
        table_panel(
            10,
            "Namespace Plurality by Node v27",
            (
                f"{share_expr} * on(namespace,node) group_left() "
                f"({mask_expr})"
            ),
            {"h": 8, "w": 24, "x": 0, "y": 42},
            unit="percent",
            transformations=[
                {"id": "labelsToFields", "options": {}},
                {"id": "organize", "options": {"excludeByName": {"Time": True}}},
                {"id": "filterByValue", "options": {"match": "Value", "operator": "gt", "value": 0}},
                {
                    "id": "sortBy",
                    "options": {"fields": ["Value"], "order": "desc"},
                },
                {
                    "id": "groupBy",
                    "options": {
                        "fields": {
                            "namespace": {
                                "aggregations": [
                                    {"field": "Value", "operation": "max"},
                                    {"field": "node", "operation": "first"},
                                ]
                            }
                        },
                        "rowBy": ["namespace"],
                    },
                },
            ],
            instant=True,
            options={"showColumnFilters": False},
            filterable=False,
            footer={"show": False, "fields": "", "calcs": []},
            format="table",
        )
    )

    return {
        "uid": "atlas-pods",
        "title": "Atlas Pods",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "pods"],
    }


def build_nodes_dashboard():
    panels = []
    panels.append(
        stat_panel(
            1,
            "Worker Nodes Ready",
            f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})',
            {"h": 4, "w": 8, "x": 0, "y": 0},
            value_suffix=WORKER_SUFFIX,
        )
    )
    panels.append(
        stat_panel(
            2,
            "Control Plane Ready",
            f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{CONTROL_REGEX}"}})',
            {"h": 4, "w": 8, "x": 8, "y": 0},
            value_suffix=CONTROL_SUFFIX,
        )
    )
    panels.append(
        stat_panel(
            3,
            "Control Plane Workloads",
            f'sum(kube_pod_info{{node=~"{CONTROL_REGEX}",namespace!~"{CP_ALLOWED_NS}"}})',
            {"h": 4, "w": 8, "x": 16, "y": 0},
        )
    )
    panels.append(
        stat_panel(
            9,
            "API Server 5xx rate",
            APISERVER_5XX_RATE,
            {"h": 4, "w": 8, "x": 0, "y": 4},
            unit="req/s",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 0.05},
                    {"color": "orange", "value": 0.2},
                    {"color": "red", "value": 0.5},
                ],
            },
            decimals=3,
        )
    )
    panels.append(
        stat_panel(
            10,
            "API Server P99 latency",
            APISERVER_P99_LATENCY_MS,
            {"h": 4, "w": 8, "x": 8, "y": 4},
            unit="ms",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 250},
                    {"color": "orange", "value": 400},
                    {"color": "red", "value": 600},
                ],
            },
            decimals=1,
        )
    )
    panels.append(
        stat_panel(
            11,
            "etcd P99 latency",
            ETCD_P99_LATENCY_MS,
            {"h": 4, "w": 8, "x": 16, "y": 4},
            unit="ms",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 50},
                    {"color": "orange", "value": 100},
                    {"color": "red", "value": 200},
                ],
            },
            decimals=1,
        )
    )
    panels.append(
        timeseries_panel(
            4,
            "Node CPU",
            node_cpu_expr(),
            {"h": 9, "w": 24, "x": 0, "y": 8},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            5,
            "Node RAM",
            node_mem_expr(),
            {"h": 9, "w": 24, "x": 0, "y": 17},
            unit="percent",
            legend="{{node}}",
            legend_calcs=["last"],
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            6,
            "Control Plane (incl. titan-db) CPU",
            node_cpu_expr(CONTROL_ALL_REGEX),
            {"h": 9, "w": 12, "x": 0, "y": 26},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            7,
            "Control Plane (incl. titan-db) RAM",
            node_mem_expr(CONTROL_ALL_REGEX),
            {"h": 9, "w": 12, "x": 12, "y": 26},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            8,
            "Root Filesystem Usage",
            root_usage_expr(),
            {"h": 9, "w": 24, "x": 0, "y": 35},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
            time_from="30d",
        )
    )
    panels.append(
        timeseries_panel(
            9,
            "Astraios Usage",
            astraios_usage_expr(),
            {"h": 9, "w": 24, "x": 0, "y": 44},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
            time_from="30d",
        )
    )
    return {
        "uid": "atlas-nodes",
        "title": "Atlas Nodes",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "nodes"],
    }


def build_storage_dashboard():
    panels = []
    panels.append(
        stat_panel(
            1,
            "Astreae Usage",
            astreae_usage_expr("/mnt/astreae"),
            {"h": 5, "w": 6, "x": 0, "y": 0},
            unit="percent",
            thresholds=PERCENT_THRESHOLDS,
        )
    )
    panels.append(
        stat_panel(
            2,
            "Asteria Usage",
            astreae_usage_expr("/mnt/asteria"),
            {"h": 5, "w": 6, "x": 6, "y": 0},
            unit="percent",
            thresholds=PERCENT_THRESHOLDS,
        )
    )
    panels.append(
        stat_panel(
            3,
            "Astreae Free",
            astreae_free_expr("/mnt/astreae"),
            {"h": 5, "w": 6, "x": 12, "y": 0},
            unit="decbytes",
        )
    )
    panels.append(
        stat_panel(
            4,
            "Asteria Free",
            astreae_free_expr("/mnt/asteria"),
            {"h": 5, "w": 6, "x": 18, "y": 0},
            unit="decbytes",
        )
    )
    panels.append(
        timeseries_panel(
            5,
            "Astreae Per-Node Usage",
            filesystem_usage_expr("/mnt/astreae", LONGHORN_NODE_REGEX),
            {"h": 9, "w": 12, "x": 0, "y": 5},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
            time_from="30d",
        )
    )
    panels.append(
        timeseries_panel(
            6,
            "Asteria Per-Node Usage",
            filesystem_usage_expr("/mnt/asteria", LONGHORN_NODE_REGEX),
            {"h": 9, "w": 12, "x": 12, "y": 5},
            unit="percent",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
            time_from="30d",
        )
    )
    panels.append(
        timeseries_panel(
            7,
            "Astreae Usage History",
            astreae_usage_expr("/mnt/astreae"),
            {"h": 9, "w": 12, "x": 0, "y": 14},
            unit="percent",
            time_from="90d",
        )
    )
    panels.append(
        timeseries_panel(
            8,
            "Asteria Usage History",
            astreae_usage_expr("/mnt/asteria"),
            {"h": 9, "w": 12, "x": 12, "y": 14},
            unit="percent",
            time_from="90d",
        )
    )
    panels.append(
        stat_panel(
            30,
            "Maintenance Sweepers Ready",
            'kube_daemonset_status_number_ready{namespace="maintenance",daemonset="node-image-sweeper"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace="maintenance",daemonset="node-image-sweeper"} * 100',
            {"h": 4, "w": 12, "x": 0, "y": 44},
            unit="percent",
            thresholds=PERCENT_THRESHOLDS,
        )
    )
    panels.append(
        stat_panel(
            31,
            "Maintenance Cron Freshness (s)",
            'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob="image-sweeper"})',
            {"h": 4, "w": 12, "x": 12, "y": 44},
            unit="s",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 3600},
                    {"color": "red", "value": 10800},
                ],
            },
        )
    )
    return {
        "uid": "atlas-storage",
        "title": "Atlas Storage",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "storage"],
    }


def build_network_dashboard():
    panels = []
    panels.append(
        stat_panel(
            1,
            "Ingress Success Rate (5m)",
            TRAEFIK_SLI_5M,
            {"h": 4, "w": 6, "x": 0, "y": 0},
            unit="percentunit",
            decimals=2,
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "red", "value": None},
                    {"color": "orange", "value": 0.995},
                    {"color": "yellow", "value": 0.999},
                    {"color": "green", "value": 0.9995},
                ],
            },
        )
    )
    panels.append(
        stat_panel(
            2,
            "Error Budget Burn (1h)",
            traefik_burn("1h"),
            {"h": 4, "w": 6, "x": 6, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 4},
                ],
            },
            decimals=2,
        )
    )
    panels.append(
        stat_panel(
            3,
            "Error Budget Burn (6h)",
            traefik_burn("6h"),
            {"h": 4, "w": 6, "x": 12, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 1},
                    {"color": "orange", "value": 2},
                    {"color": "red", "value": 4},
                ],
            },
            decimals=2,
        )
    )
    panels.append(
        stat_panel(
            4,
            "Edge P99 Latency (ms)",
            TRAEFIK_P99_LATENCY_MS,
            {"h": 4, "w": 6, "x": 18, "y": 0},
            unit="ms",
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 200},
                    {"color": "orange", "value": 350},
                    {"color": "red", "value": 500},
                ],
            },
            decimals=1,
        )
    )
    panels.append(
        stat_panel(
            5,
            "Ingress Traffic",
            NET_INGRESS_EXPR,
            {"h": 4, "w": 8, "x": 0, "y": 4},
            unit="Bps",
        )
    )
    panels.append(
        stat_panel(
            6,
            "Egress Traffic",
            NET_EGRESS_EXPR,
            {"h": 4, "w": 8, "x": 8, "y": 4},
            unit="Bps",
        )
    )
    panels.append(
        stat_panel(
            7,
            "Intra-Cluster Traffic",
            NET_INTERNAL_EXPR,
            {"h": 4, "w": 8, "x": 16, "y": 4},
            unit="Bps",
        )
    )
    panels.append(
        timeseries_panel(
            8,
            "Per-Node Throughput",
            f'avg by (node) (({NET_NODE_TX_PHYS} + {NET_NODE_RX_PHYS}) * on(instance) group_left(node) {NODE_INFO})',
            {"h": 8, "w": 24, "x": 0, "y": 8},
            unit="Bps",
            legend="{{node}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        table_panel(
            9,
            "Top Namespaces",
            'topk(10, sum(rate(container_network_transmit_bytes_total{namespace!=""}[5m]) '
            '+ rate(container_network_receive_bytes_total{namespace!=""}[5m])) by (namespace))',
            {"h": 9, "w": 12, "x": 0, "y": 16},
            unit="Bps",
            transformations=[{"id": "labelsToFields", "options": {}}],
        )
    )
    panels.append(
        table_panel(
            10,
            "Top Pods",
            'topk(10, sum(rate(container_network_transmit_bytes_total{pod!=""}[5m]) '
            '+ rate(container_network_receive_bytes_total{pod!=""}[5m])) by (namespace,pod))',
            {"h": 9, "w": 12, "x": 12, "y": 16},
            unit="Bps",
            transformations=[{"id": "labelsToFields", "options": {}}],
        )
    )
    panels.append(
        timeseries_panel(
            11,
            "Traefik Routers (req/s)",
            f"topk(10, {TRAEFIK_ROUTER_EXPR})",
            {"h": 9, "w": 12, "x": 0, "y": 25},
            unit="req/s",
            legend="{{router}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            12,
            "Traefik Entrypoints (req/s)",
            'sum by (entrypoint) (rate(traefik_entrypoint_requests_total[5m]))',
            {"h": 9, "w": 12, "x": 12, "y": 25},
            unit="req/s",
            legend="{{entrypoint}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    return {
        "uid": "atlas-network",
        "title": "Atlas Network",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "network"],
    }


def build_mail_dashboard():
    panels = []

    bounce_rate_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "green", "value": None},
            {"color": "yellow", "value": 5},
            {"color": "orange", "value": 8},
            {"color": "red", "value": 10},
        ],
    }
    limit_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "green", "value": None},
            {"color": "yellow", "value": 70},
            {"color": "orange", "value": 85},
            {"color": "red", "value": 95},
        ],
    }
    success_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "red", "value": None},
            {"color": "orange", "value": 90},
            {"color": "yellow", "value": 95},
            {"color": "green", "value": 98},
        ],
    }

    panels.append(
        stat_panel(
            1,
            "Sent (1d)",
            'max(postmark_outbound_sent{window="1d"})',
            {"h": 4, "w": 6, "x": 0, "y": 0},
            decimals=0,
        )
    )
    panels.append(
        stat_panel(
            2,
            "Sent (7d)",
            'max(postmark_outbound_sent{window="7d"})',
            {"h": 4, "w": 6, "x": 6, "y": 0},
            decimals=0,
        )
    )
    panels.append(
        {
            "id": 3,
            "type": "stat",
            "title": "Mail Bounces (1d)",
            "datasource": PROM_DS,
            "gridPos": {"h": 4, "w": 6, "x": 12, "y": 0},
            "targets": [
                {
                    "expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
                    "refId": "A",
                    "legendFormat": "Rate",
                },
                {
                    "expr": 'max(postmark_outbound_bounced{window="1d"})',
                    "refId": "B",
                    "legendFormat": "Count",
                },
            ],
            "fieldConfig": {
                "defaults": {
                    "color": {"mode": "thresholds"},
                    "custom": {"displayMode": "auto"},
                    "thresholds": bounce_rate_thresholds,
                    "unit": "none",
                },
                "overrides": [
                    {
                        "matcher": {"id": "byName", "options": "Rate"},
                        "properties": [{"id": "unit", "value": "percent"}],
                    },
                    {
                        "matcher": {"id": "byName", "options": "Count"},
                        "properties": [{"id": "unit", "value": "none"}],
                    },
                ],
            },
            "options": {
                "colorMode": "value",
                "graphMode": "area",
                "justifyMode": "center",
                "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
                "textMode": "name_and_value",
            },
        }
    )
    panels.append(
        stat_panel(
            4,
            "Success Rate (1d)",
            'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
            {"h": 4, "w": 6, "x": 18, "y": 0},
            unit="percent",
            thresholds=success_thresholds,
            decimals=1,
        )
    )

    panels.append(
        stat_panel(
            5,
            "Limit Used (30d)",
            "max(postmark_sending_limit_used_percent)",
            {"h": 4, "w": 6, "x": 0, "y": 4},
            thresholds=limit_thresholds,
            unit="percent",
            decimals=1,
        )
    )
    panels.append(
        stat_panel(
            6,
            "Send Limit (30d)",
            "max(postmark_sending_limit)",
            {"h": 4, "w": 6, "x": 6, "y": 4},
            decimals=0,
        )
    )
    panels.append(
        stat_panel(
            7,
            "Last Success",
            "max(postmark_last_success_timestamp_seconds)",
            {"h": 4, "w": 6, "x": 12, "y": 4},
            unit="dateTimeAsIso",
            decimals=0,
        )
    )
    panels.append(
        stat_panel(
            8,
            "Exporter Errors",
            "sum(postmark_request_errors_total)",
            {"h": 4, "w": 6, "x": 18, "y": 4},
            decimals=0,
        )
    )

    panels.append(
        timeseries_panel(
            13,
            "Bounce Rate (1d vs 7d)",
            "max by (window) (postmark_outbound_bounce_rate)",
            {"h": 8, "w": 12, "x": 0, "y": 12},
            unit="percent",
            legend="{{window}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            14,
            "Bounced (1d vs 7d)",
            "max by (window) (postmark_outbound_bounced)",
            {"h": 8, "w": 12, "x": 12, "y": 12},
            unit="none",
            legend="{{window}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            15,
            "Sent (1d vs 7d)",
            "max by (window) (postmark_outbound_sent)",
            {"h": 8, "w": 12, "x": 0, "y": 20},
            unit="none",
            legend="{{window}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            16,
            "Exporter Errors",
            "sum(postmark_request_errors_total)",
            {"h": 8, "w": 12, "x": 12, "y": 20},
            unit="none",
        )
    )

    return {
        "uid": "atlas-mail",
        "title": "Atlas Mail",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-30d", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "mail"],
    }


def build_jobs_dashboard():
    panels = []
    suite_var = "${suite:regex}"
    test_var = "${test:regex}"
    branch_var = "${branch:regex}"
    success = PLATFORM_TEST_SUCCESS_STATUS
    exported = PLATFORM_TEST_EXPORT_FILTER
    runs_selector = f'suite=~"{suite_var}",{exported}'
    runs_success_selector = f'{runs_selector},status=~"{success}"'
    runs_failure_selector = f'{runs_selector},status!~"{success}"'
    checks_selector = f'__name__=~".*_quality_gate_checks_total",suite=~"{suite_var}",{exported}'
    coverage_metric_selector = f'__name__=~".*_quality_gate_coverage_percent",suite=~"{suite_var}",{exported}'
    workspace_coverage_selector = f'suite=~"{suite_var}",{exported}'
    smell_selector = f'suite=~"{suite_var}",{exported}'
    build_info_selector = f'suite=~"{suite_var}",branch!="",branch=~"{branch_var}",{exported}'
    selected_suite_universe = (
        f'(count by (suite) (platform_quality_gate_build_info{{{build_info_selector}}}) >= bool 0)'
    )
    selected_suite_zero = f"(0 * {selected_suite_universe})"

    suite_universe = " or ".join(
        f'label_replace(vector(1), "suite", "{suite}", "__name__", ".*")'
        for suite in PLATFORM_TEST_SUITE_NAMES
    )

    runs_24h = f'(sum(increase(platform_quality_gate_runs_total{{{runs_selector}}}[24h])) or on() vector(0))'
    runs_30d = f'(sum(increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])) or on() vector(0))'
    success_24h = (
        f'(sum(increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[24h])) or on() vector(0))'
    )
    success_30d = (
        f'(sum(increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[30d])) or on() vector(0))'
    )
    failures_24h = (
        f'(sum(increase(platform_quality_gate_runs_total{{{runs_failure_selector}}}[24h])) or on() vector(0))'
    )
    success_rate_24h = f"100 * ({success_24h}) / clamp_min(({runs_24h}), 1)"
    success_rate_30d = f"100 * ({success_30d}) / clamp_min(({runs_30d}), 1)"
    runs_by_suite_24h = f'sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[24h]))'
    success_by_suite_24h = (
        f'sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[24h]))'
    )
    success_rate_by_suite_24h = (
        f'sort_desc(((100 * ({success_by_suite_24h}) / clamp_min(({runs_by_suite_24h}), 1)) '
        f'and on(suite) (({runs_by_suite_24h}) > 0)) '
        f'or on(suite) ((0 * ({runs_by_suite_24h})) - 1))'
    )
    non_failure = PLATFORM_TEST_NON_FAILURE_STATUS
    current_gate_health_by_suite = (
        f'(100 * sum by (suite) (max by (suite, check) (({{{checks_selector},result=~"{non_failure}"}} > bool 0))) '
        f'/ clamp_min(sum by (suite) (max by (suite, check) (({{{checks_selector}}} > bool 0))), 1)) '
        f'or on(suite) ({selected_suite_zero})'
    )
    success_history_runs = f'sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[7d]))'
    success_history_by_suite = (
        f'(100 * sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[7d])) '
        f'/ ({success_history_runs})) and on(suite) (({success_history_runs}) > 0)'
    )
    daily_success_volume = (
        f'sum(increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[24h])) or on() vector(0)'
    )
    daily_failure_volume = (
        f'sum(increase(platform_quality_gate_runs_total{{{runs_failure_selector}}}[24h])) or on() vector(0)'
    )
    coverage_by_suite = (
        f'(max by (suite) ({{{coverage_metric_selector}}})) '
        f'or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{workspace_coverage_selector}}}))'
    )
    coverage_history_by_suite = (
        f'(max by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{workspace_coverage_selector}}})) '
        f'or on(suite) (max by (suite) ({{{coverage_metric_selector}}}))'
    )
    coverage_with_missing = (
        f"({coverage_by_suite}) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])))) - 1)"
    )
    smell_by_suite = f'max by (suite) (platform_quality_gate_source_lines_over_500_total{{{smell_selector}}})'
    loc_files_by_suite = f'max by (suite) (platform_quality_gate_source_files_total{{{smell_selector}}})'
    smell_with_missing = (
        f"({smell_by_suite}) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])))) - 1)"
    )
    loc_limit_compliance_by_suite = (
        f"(100 * clamp_min(({loc_files_by_suite}) - ({smell_by_suite}), 0) / ({loc_files_by_suite})) "
        f"and on(suite) (({loc_files_by_suite}) > 0)"
    )
    loc_limit_compliance_with_missing = (
        f"({loc_limit_compliance_by_suite}) "
        f"or on(suite) (100 * (1 - clamp_max(({smell_by_suite}), 1))) "
        f"or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])))) - 1)"
    )
    loc_limit_compliance_history = (
        f"({loc_limit_compliance_by_suite}) "
        f"or on(suite) (100 * (1 - clamp_max(({smell_by_suite}), 1)))"
    )
    average_coverage = f"(avg(({coverage_by_suite})) or on() vector(0))"
    suites_loc_violating = f'(sum((({smell_by_suite}) > bool 0)) or on() vector(0))'

    check_regex_tests = "tests|unit|build"
    check_regex_coverage = "coverage"
    check_regex_loc = "loc|smell"
    check_regex_style = "docs|naming|hygiene|lint|docs_naming|style"
    check_regex_gate_glue = "gate|glue|gate_glue"
    check_regex_sonarqube = "sonarqube|sonar"
    check_regex_supply_chain = "ironbank|supply_chain|image_compliance|artifact_security"

    def _check_state_percent_series(regex: str, failed: bool) -> str:
        state = f'result!~"{non_failure}"' if failed else f'result=~"{non_failure}"'
        state_checks = (
            f'sum by (suite) (max by (suite, check) (({{{checks_selector},check=~"{regex}",{state}}} > bool 0)))'
        )
        total_checks = (
            f'sum by (suite) (max by (suite, check) (({{{checks_selector},check=~"{regex}"}} > bool 0)))'
        )
        state_percent = f"(100 * ({state_checks}) / clamp_min(({total_checks}), 1))"
        return f"(({state_percent}) or on(suite) ({selected_suite_zero}))"

    rollup_failed_tests = (
        f'sum by (suite, test) (platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch!="",branch=~"{branch_var}",test!="",test!="__no_test_cases__",status="failed"}})'
    )
    raw_failed_tests = (
        f'sum by (suite, test) (max_over_time(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch!="",branch=~"{branch_var}",test!="",test!="__no_test_cases__",{exported},status="failed"}}[$__interval]))'
    )
    problematic_tests_history_core = f"topk(12, (({rollup_failed_tests}) or on(suite, test) ({raw_failed_tests})))"
    problematic_tests_history = problematic_tests_history_core
    rollup_failed_tests_30d = (
        f'sum by (suite, test) (sum_over_time(platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch!="",branch=~"{branch_var}",test!="",test!="__no_test_cases__",status="failed"}}[30d:1h]))'
    )
    raw_failed_tests_30d = (
        f'sum by (suite, test) (increase(platform_quality_gate_test_case_result{{suite=~"{suite_var}",branch!="",branch=~"{branch_var}",test!="",test!="__no_test_cases__",{exported},status="failed"}}[30d]))'
    )
    worst_test_per_suite_core = (
        f"topk by (suite) (1, (({rollup_failed_tests_30d}) or on(suite, test) ({raw_failed_tests_30d})))"
    )
    worst_test_per_suite = worst_test_per_suite_core

    def _selected_status_volume(status: str) -> str:
        return (
            f'(sum(platform_quality:test_case_status:count_1h{{suite=~"{suite_var}",branch!="",'
            f'branch=~"{branch_var}",test!="",test=~"{test_var}",test!="__no_test_cases__",'
            f'status="{status}"}}) or on() vector(0))'
        )

    selected_test_pass_fail = [
        {
            "refId": "A",
            "expr": _selected_status_volume("passed"),
            "legendFormat": "Passed",
        },
        {
            "refId": "B",
            "expr": _selected_status_volume("failed"),
            "legendFormat": "Failed",
        },
        {
            "refId": "C",
            "expr": _selected_status_volume("skipped"),
            "legendFormat": "Skipped",
        },
    ]
    selected_test_pass_rate = (
        f'avg by (suite) (platform_quality:test_case_pass_rate:percent_1h{{suite=~"{suite_var}",'
        f'branch!="",branch=~"{branch_var}",test!="",test=~"{test_var}",test!="__no_test_cases__"}})'
    )
    recent_branch_evidence = (
        f'sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d])))'
    )
    non_primary_branch_evidence = (
        f'count by (suite) (max_over_time(platform_quality_gate_build_info{{{build_info_selector},branch!~"main|master|origin/main|origin/master|unknown"}}[30d]))'
    )
    branch_evidence_by_suite = (
        f'count by (suite) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d]))'
    )
    primary_branch_clean_by_suite = (
        f'sort_desc((100 * ((({branch_evidence_by_suite}) > bool 0) '
        f'unless on(suite) (({non_primary_branch_evidence}) > bool 0))) '
        f'or on(suite) (0 * (({branch_evidence_by_suite}) > bool 0)))'
    )

    def _missing_suite_series(presence_expr: str) -> str:
        missing = f"(({suite_universe}) unless on(suite) {presence_expr})"
        return f"({missing}) or on(suite) (0 * ({suite_universe}))"

    def _present_suite_percent(presence_expr: str) -> str:
        present = f"(({suite_universe}) and on(suite) {presence_expr})"
        return f"(100 * ({present})) or on(suite) (0 * ({suite_universe}))"

    present_tests_by_suite = _present_suite_percent(
        f'count by (suite) ({{__name__=~".*_quality_gate_tests_total",{exported}}})'
    )
    present_checks_by_suite = _present_suite_percent(
        f'count by (suite) ({{__name__=~".*_quality_gate_checks_total",{exported}}})'
    )
    present_coverage_by_suite = _present_suite_percent(
        f"count by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{exported}}})"
    )
    present_loc_by_suite = _present_suite_percent(
        f"count by (suite) (platform_quality_gate_source_lines_over_500_total{{{exported}}}) "
        f"and on(suite) count by (suite) (platform_quality_gate_source_files_total{{{exported}}})"
    )
    present_test_case_by_suite = _present_suite_percent(
        f"count by (suite) (platform_quality_gate_test_case_result{{{exported}}})"
    )
    real_test_case_by_suite = _present_suite_percent(
        f'count by (suite) (platform_quality_gate_test_case_result{{{exported},test!="__no_test_cases__"}})'
    )

    dark_red = "dark-red"
    dark_orange = "dark-orange"
    dark_yellow = "dark-yellow"
    dark_green = "dark-green"
    dark_blue = "dark-blue"
    success_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": dark_red, "value": None},
            {"color": dark_orange, "value": 90},
            {"color": dark_yellow, "value": 93},
            {"color": dark_green, "value": 95},
            {"color": dark_blue, "value": 100},
        ],
    }
    coverage_thresholds = success_thresholds
    failures_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": dark_blue, "value": None},
            {"color": dark_green, "value": 0.01},
            {"color": dark_yellow, "value": 1},
            {"color": dark_orange, "value": 3},
            {"color": dark_red, "value": 5},
        ],
    }
    smell_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": dark_red, "value": None},
            {"color": dark_green, "value": 0},
            {"color": dark_yellow, "value": 1},
            {"color": dark_orange, "value": 3},
            {"color": dark_red, "value": 5},
        ],
    }
    missing_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": dark_green, "value": None},
            {"color": dark_red, "value": 1},
        ],
    }

    panels.append(
        stat_panel(
            2,
            "Run Reliability (24h)",
            success_rate_24h,
            {"h": 5, "w": 4, "x": 0, "y": 0},
            unit="percent",
            decimals=2,
            instant=True,
            thresholds=success_thresholds,
        )
    )
    panels.append(
        stat_panel(
            3,
            "Run Reliability (30d)",
            success_rate_30d,
            {"h": 5, "w": 4, "x": 4, "y": 0},
            unit="percent",
            decimals=2,
            instant=True,
            thresholds=success_thresholds,
        )
    )
    panels.append(
        stat_panel(
            4,
            "Failed Runs (24h)",
            failures_24h,
            {"h": 5, "w": 4, "x": 8, "y": 0},
            unit="none",
            instant=True,
            thresholds=failures_thresholds,
        )
    )
    panels.append(
        stat_panel(
            5,
            "Runs (24h)",
            runs_24h,
            {"h": 5, "w": 4, "x": 12, "y": 0},
            unit="none",
            instant=True,
            thresholds={
                "mode": "absolute",
                "steps": [{"color": dark_red, "value": None}, {"color": dark_green, "value": 1}],
            },
        )
    )
    panels.append(
        stat_panel(
            6,
            "Avg Coverage (%)",
            average_coverage,
            {"h": 5, "w": 4, "x": 16, "y": 0},
            unit="percent",
            decimals=2,
            instant=True,
            thresholds=success_thresholds,
        )
    )
    panels.append(
        stat_panel(
            7,
            "Suites with LOC >500",
            suites_loc_violating,
            {"h": 5, "w": 4, "x": 20, "y": 0},
            unit="none",
            instant=True,
            thresholds=smell_thresholds,
        )
    )

    panels.append(
        bargauge_panel(
            8,
            "Current Gate Health by Suite",
            current_gate_health_by_suite,
            {"h": 8, "w": 8, "x": 0, "y": 5},
            unit="percent",
            instant=True,
            legend="{{suite}}",
            sort_order="asc",
            thresholds=success_thresholds,
            decimals=2,
        )
    )
    panels[-1]["description"] = (
        "Current pass percentage across the required gate dimensions reported by each suite. "
        "This is the fastest place to answer whether the latest suite quality signal is healthy."
    )
    reliability_suite_panel = bargauge_panel(
        9,
        "Run Reliability by Suite (24h)",
        success_rate_by_suite_24h,
        {"h": 8, "w": 8, "x": 8, "y": 5},
        unit="percent",
        instant=True,
        legend="{{suite}}",
        sort_order="asc",
        thresholds=success_thresholds,
        decimals=2,
    )
    reliability_suite_panel["description"] = (
        "Rolling CI run success rate. This can stay low after failed/debug runs even when "
        "Current Gate Health is green."
    )
    reliability_suite_panel["fieldConfig"]["defaults"]["mappings"] = [
        {"type": "value", "options": {"-1": {"text": "no runs"}}}
    ]
    panels.append(reliability_suite_panel)
    history_panel = state_timeline_panel(
        11,
        "Run Reliability by Suite (7d rolling)",
        success_history_by_suite,
        {"h": 8, "w": 24, "x": 0, "y": 13},
        thresholds=success_thresholds,
        description=(
            "Seven-day rolling run success rate per suite. Each suite gets its own lane, "
            "so brief failed/debug runs lower the lane color without creating unreadable 0/100 spikes."
        ),
    )
    panels.append(history_panel)

    run_volume_panel = timeseries_panel(
        12,
        "Daily Run Volume (Selected Scope)",
        None,
        {"h": 8, "w": 8, "x": 0, "y": 21},
        unit="none",
        targets=[
            {"refId": "A", "expr": daily_success_volume, "legendFormat": "Success"},
            {"refId": "B", "expr": daily_failure_volume, "legendFormat": "Failure"},
        ],
        legend_display="list",
        legend_placement="bottom",
        legend_calcs=[],
    )
    run_volume_panel["description"] = (
        "Twenty-four-hour rolling run counts for the selected suite/branch scope. "
        "This is volume, not a pass-rate percentage."
    )
    run_volume_panel["fieldConfig"]["defaults"]["min"] = 0
    run_volume_panel["fieldConfig"]["defaults"]["custom"] = {
        "drawStyle": "bars",
        "barAlignment": 0,
        "lineWidth": 0,
        "fillOpacity": 70,
        "stacking": {"mode": "normal", "group": "A"},
    }
    panels.append(run_volume_panel)

    panels.append(
        state_timeline_panel(
            13,
            "Coverage History by Suite",
            coverage_history_by_suite,
            {"h": 8, "w": 8, "x": 8, "y": 21},
            thresholds=coverage_thresholds,
            description=(
                "Latest reported line coverage per suite over time. Coverage is separate "
                "from LOC compliance so one signal cannot hide the other."
            ),
        )
    )
    panels.append(
        state_timeline_panel(
            14,
            "Files <=500 LOC History by Suite",
            loc_limit_compliance_history,
            {"h": 8, "w": 8, "x": 16, "y": 21},
            thresholds=success_thresholds,
            description=(
                "Percent of LOC-gated source files at or under the 500-line limit. "
                "This uses the existing file-count telemetry; longest-file history needs a new publisher metric."
            ),
        )
    )

    check_dimensions = [
        ("Tests", check_regex_tests),
        ("Coverage", check_regex_coverage),
        ("LOC", check_regex_loc),
        ("Style", check_regex_style),
        ("Gate Glue", check_regex_gate_glue),
        ("SonarQube", check_regex_sonarqube),
        ("Supply Chain", check_regex_supply_chain),
    ]

    def _append_check_trends(start_id: int, title_prefix: str, failed: bool, y: int) -> None:
        trend_thresholds = failures_thresholds if failed else success_thresholds
        trend_description = (
            "Current bad-state percentage for this check family, evaluated over time. "
            "Higher means more of the selected suites/checks are failing right now; this is not an event-count spike chart."
            if failed
            else "Current acceptable-state percentage for this check family, evaluated over time. "
            "Higher means more of the selected suites/checks are healthy right now; gaps mean there was no check evidence."
        )
        for index, (label, regex) in enumerate(check_dimensions[:4]):
            panel = state_timeline_panel(
                start_id + index,
                f"{label} {title_prefix}",
                _check_state_percent_series(regex, failed),
                {"h": 7, "w": 6, "x": index * 6, "y": y},
                thresholds=trend_thresholds,
                description=trend_description,
            )
            panels.append(panel)
        for index, (label, regex) in enumerate(check_dimensions[4:]):
            panel = state_timeline_panel(
                start_id + 4 + index,
                f"{label} {title_prefix}",
                _check_state_percent_series(regex, failed),
                {"h": 7, "w": 8, "x": index * 8, "y": y + 7},
                thresholds=trend_thresholds,
                description=trend_description,
            )
            panels.append(panel)

    _append_check_trends(130, "Failure Rate", True, 29)
    _append_check_trends(138, "Healthy Rate", False, 43)
    panels.append(
        state_timeline_panel(
            145,
            "Problematic Tests Over Time (Top failures)",
            problematic_tests_history,
            {"h": 8, "w": 12, "x": 0, "y": 57},
            thresholds=failures_thresholds,
            unit="none",
            min_value=0,
            max_value=None,
            legend="{{suite}} - {{test}}",
            description=(
                "Top failing test cases over time, using memoized hourly rollups. "
                "Blank branch/test labels and placeholder no-test-case rows are excluded."
            ),
        )
    )
    panels[-1]["links"] = jenkins_suite_links()
    panels[-1]["fieldConfig"]["defaults"]["links"] = jenkins_latest_artifact_data_links()
    panels.append(
        bargauge_panel(
            147,
            "Most Problematic Test by Suite (30d)",
            worst_test_per_suite,
            {"h": 8, "w": 12, "x": 12, "y": 57},
            unit="none",
            instant=True,
            legend="{{suite}} · {{test}}",
            sort_order="desc",
            thresholds=failures_thresholds,
            limit=9,
            links=jenkins_suite_links(),
            data_links=jenkins_latest_artifact_data_links(),
        )
    )
    panels.append(
        timeseries_panel(
            146,
            "Selected Test Pass/Fail History",
            None,
            {"h": 8, "w": 12, "x": 0, "y": 65},
            unit="none",
            targets=selected_test_pass_fail,
            legend_display="list",
            legend_placement="bottom",
            legend_calcs=[],
            links=jenkins_suite_links(),
            data_links=jenkins_artifact_data_links(),
        )
    )
    panels[-1]["description"] = (
        "Stacked hourly outcome volume for the selected suite/branch/test scope. "
        "This uses vmalert rollups only, avoiding expensive raw 30-day per-test scans."
    )
    panels[-1]["fieldConfig"]["defaults"]["min"] = 0
    panels[-1]["fieldConfig"]["defaults"]["custom"] = {
        "drawStyle": "bars",
        "barAlignment": 0,
        "lineWidth": 0,
        "fillOpacity": 70,
        "stacking": {"mode": "normal", "group": "A"},
    }
    selected_pass_rate_panel = state_timeline_panel(
        152,
        "Selected Test Pass Rate History",
        selected_test_pass_rate,
        {"h": 8, "w": 12, "x": 12, "y": 65},
        thresholds=success_thresholds,
        legend="{{suite}}",
        description=(
            "Average pass rate per suite for the selected test filter, using memoized hourly "
            "test-case pass-rate rollups instead of raw historical scans."
        ),
    )
    selected_pass_rate_panel["links"] = jenkins_suite_links()
    selected_pass_rate_panel["fieldConfig"]["defaults"]["links"] = jenkins_artifact_data_links()
    panels.append(selected_pass_rate_panel)

    coverage_panel = bargauge_panel(
        17,
        "Coverage by Suite (Latest, gate 95)",
        coverage_with_missing,
        {"h": 8, "w": 12, "x": 0, "y": 73},
        unit="percent",
        instant=True,
        legend="{{suite}}",
        sort_order="asc",
        thresholds=coverage_thresholds,
        decimals=2,
    )
    coverage_panel["fieldConfig"]["defaults"]["mappings"] = [
        {"type": "value", "options": {"-1": {"text": "missing"}}}
    ]
    panels.append(coverage_panel)

    smell_panel = bargauge_panel(
        18,
        "Files <=500 LOC by Suite (Latest)",
        loc_limit_compliance_with_missing,
        {"h": 8, "w": 12, "x": 12, "y": 73},
        unit="percent",
        instant=True,
        legend="{{suite}}",
        sort_order="asc",
        thresholds=success_thresholds,
        decimals=0,
    )
    smell_panel["fieldConfig"]["defaults"]["mappings"] = [
        {"type": "value", "options": {"-1": {"text": "missing"}}}
    ]
    smell_panel["description"] = "Percent of managed LOC-gated files at or under 500 lines. Older suite payloads fall back to 100%/0% until they emit platform_quality_gate_source_files_total."
    panels.append(smell_panel)

    panels.append(
        bargauge_panel(
            27,
            "Tests Metrics Present by Suite",
            present_tests_by_suite,
            {"h": 7, "w": 6, "x": 0, "y": 81},
            unit="percent",
            instant=True,
            legend="{{suite}}",
            sort_order="desc",
            thresholds=success_thresholds,
            decimals=0,
        )
    )
    panels.append(
        bargauge_panel(
            28,
            "Checks Metrics Present by Suite",
            present_checks_by_suite,
            {"h": 7, "w": 6, "x": 6, "y": 81},
            unit="percent",
            instant=True,
            legend="{{suite}}",
            sort_order="desc",
            thresholds=success_thresholds,
            decimals=0,
        )
    )
    panels.append(
        bargauge_panel(
            29,
            "Coverage Metrics Present by Suite",
            present_coverage_by_suite,
            {"h": 7, "w": 6, "x": 12, "y": 81},
            unit="percent",
            instant=True,
            legend="{{suite}}",
            sort_order="desc",
            thresholds=success_thresholds,
            decimals=0,
        )
    )
    panels.append(
        bargauge_panel(
            30,
            "LOC Compliance Metrics Present by Suite",
            present_loc_by_suite,
            {"h": 7, "w": 6, "x": 18, "y": 81},
            unit="percent",
            instant=True,
            legend="{{suite}}",
            sort_order="desc",
            thresholds=success_thresholds,
            decimals=0,
        )
    )
    panels.append(
        stat_panel(
            31,
            "SonarQube API Up",
            "(max(sonarqube_up) or on() vector(0))",
            {"h": 6, "w": 4, "x": 0, "y": 88},
            unit="none",
            instant=True,
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": dark_red, "value": None},
                    {"color": dark_green, "value": 1},
                ],
            },
        )
    )
    panels.append(
        stat_panel(
            32,
            "Sonar Projects (Selected)",
            f'(count(max by (project_key) (sonarqube_project_quality_gate_pass{{project_key=~"{suite_var}"}})) or on() vector(0))',
            {"h": 6, "w": 4, "x": 4, "y": 88},
            unit="none",
            instant=True,
            thresholds=failures_thresholds,
        )
    )
    panels.append(
        stat_panel(
            33,
            "Sonar Gate Fetch Errors",
            "(max(sonarqube_quality_gate_fetch_errors_total) or on() vector(0))",
            {"h": 6, "w": 4, "x": 8, "y": 88},
            unit="none",
            instant=True,
            thresholds=failures_thresholds,
        )
    )
    sonar_status_mix_panel = pie_panel(
        34,
        "Sonar Gate Status Mix (Selected)",
        f'count by (status) (max by (project_key, status) (sonarqube_project_quality_gate_pass{{project_key=~"{suite_var}"}}))',
        {"h": 6, "w": 4, "x": 12, "y": 88},
    )
    sonar_status_mix_panel["targets"][0]["legendFormat"] = "{{status}}"
    panels.append(sonar_status_mix_panel)
    panels.append(
        state_timeline_panel(
            35,
            "Sonar Gate Health by Project",
            f'100 * max by (project_key) (sonarqube_project_quality_gate_pass{{project_key=~"{suite_var}"}})',
            {"h": 6, "w": 8, "x": 16, "y": 88},
            thresholds=success_thresholds,
            unit="percent",
            min_value=0,
            max_value=100,
            legend="{{project_key}}",
            description=(
                "SonarQube gate status over time by project. OK projects render as full healthy lanes; "
                "non-OK projects drop to red without disappearing."
            ),
        )
    )
    panels.append(
        bargauge_panel(
            148,
            "Test-Case Metrics Present by Suite",
            present_test_case_by_suite,
            {"h": 6, "w": 12, "x": 0, "y": 94},
            unit="percent",
            instant=True,
            legend="{{suite}}",
            sort_order="desc",
            thresholds=success_thresholds,
            decimals=0,
        )
    )
    panels.append(
        bargauge_panel(
            151,
            "Real Test Cases Present by Suite",
            real_test_case_by_suite,
            {"h": 6, "w": 12, "x": 12, "y": 94},
            unit="percent",
            instant=True,
            legend="{{suite}}",
            sort_order="desc",
            thresholds=success_thresholds,
            decimals=0,
        )
    )
    panels.append(
        bargauge_panel(
            149,
            "Recent Branch Evidence by Suite (30d)",
            recent_branch_evidence,
            {"h": 7, "w": 12, "x": 0, "y": 100},
            unit="none",
            instant=True,
            legend="{{suite}} · {{branch}}",
            sort_order="desc",
            thresholds=missing_thresholds,
            decimals=0,
            links=jenkins_suite_links(),
        )
    )
    panels.append(
        bargauge_panel(
            150,
            "Primary Branch Clean by Suite (30d)",
            primary_branch_clean_by_suite,
            {"h": 7, "w": 12, "x": 12, "y": 100},
            unit="percent",
            instant=True,
            legend="{{suite}}",
            sort_order="desc",
            thresholds=success_thresholds,
            decimals=0,
            links=jenkins_suite_links(),
        )
    )

    # Keep the first paint intentionally light. The detailed matrices remain
    # available, but they stay collapsed so browsers do not render every series
    # and legend before the operator asks for them.
    panel_by_id = {panel["id"]: panel for panel in panels}
    visible_layout = {
        2: {"h": 4, "w": 4, "x": 0, "y": 0},
        3: {"h": 4, "w": 4, "x": 4, "y": 0},
        4: {"h": 4, "w": 4, "x": 8, "y": 0},
        5: {"h": 4, "w": 4, "x": 12, "y": 0},
        6: {"h": 4, "w": 4, "x": 16, "y": 0},
        7: {"h": 4, "w": 4, "x": 20, "y": 0},
        8: {"h": 7, "w": 12, "x": 0, "y": 4},
        9: {"h": 7, "w": 12, "x": 12, "y": 4},
        17: {"h": 7, "w": 12, "x": 0, "y": 11},
        18: {"h": 7, "w": 12, "x": 12, "y": 11},
    }
    compact_panels = []
    for panel_id, grid in visible_layout.items():
        panel = panel_by_id[panel_id]
        panel["gridPos"] = grid
        compact_panels.append(panel)

    def children(ids):
        return [panel_by_id[panel_id] for panel_id in ids]

    row_layout = {
        11: {"h": 8, "w": 12, "x": 0, "y": 19},
        12: {"h": 8, "w": 12, "x": 12, "y": 19},
        13: {"h": 8, "w": 12, "x": 0, "y": 27},
        14: {"h": 8, "w": 12, "x": 12, "y": 27},
        145: {"h": 8, "w": 24, "x": 0, "y": 74},
        147: {"h": 8, "w": 8, "x": 0, "y": 83},
        146: {"h": 8, "w": 8, "x": 8, "y": 83},
        152: {"h": 8, "w": 8, "x": 16, "y": 83},
        27: {"h": 7, "w": 6, "x": 0, "y": 94},
        28: {"h": 7, "w": 6, "x": 6, "y": 94},
        29: {"h": 7, "w": 6, "x": 12, "y": 94},
        30: {"h": 7, "w": 6, "x": 18, "y": 94},
        148: {"h": 7, "w": 6, "x": 0, "y": 101},
        151: {"h": 7, "w": 6, "x": 6, "y": 101},
        149: {"h": 7, "w": 6, "x": 12, "y": 101},
        150: {"h": 7, "w": 6, "x": 18, "y": 101},
        31: {"h": 6, "w": 4, "x": 0, "y": 111},
        32: {"h": 6, "w": 4, "x": 4, "y": 111},
        33: {"h": 6, "w": 4, "x": 8, "y": 111},
        34: {"h": 6, "w": 4, "x": 12, "y": 111},
        35: {"h": 6, "w": 8, "x": 16, "y": 111},
    }
    for panel_id, grid in row_layout.items():
        panel_by_id[panel_id]["gridPos"] = grid

    compact_panels.extend(
        [
            row_panel(500, "Reliability And Run History", 18, panels=children([11, 12, 13, 14])),
            row_panel(
                501,
                "Check Failure Rates By Suite",
                19,
                panels=children([130, 131, 132, 133, 134, 135, 136]),
            ),
            row_panel(
                502,
                "Check Healthy Rates By Suite",
                20,
                panels=children([138, 139, 140, 141, 142, 143, 144]),
            ),
            row_panel(
                503,
                "Test Drilldowns And Problem Tests",
                21,
                panels=children([145, 147, 146, 152]),
            ),
            row_panel(
                504,
                "Telemetry Completeness And Branches",
                22,
                panels=children([27, 28, 29, 30, 148, 151, 149, 150]),
            ),
            row_panel(
                505,
                "SonarQube Project Health",
                23,
                panels=children([31, 32, 33, 34, 35]),
            ),
        ]
    )
    panels = compact_panels
    set_bargauge_display_mode(panels, "basic")

    return {
        "uid": "atlas-jobs",
        "title": "Atlas Testing",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-30d", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "testing", "quality-gate", "ci"],
        "templating": {
            "list": [
                testing_suite_variable(),
                testing_branch_variable(),
                testing_case_variable(),
                jenkins_base_variable(),
            ]
        },
    }


def build_testing_dashboard():
    dashboard = build_jobs_dashboard()
    dashboard["uid"] = "atlas-testing"
    dashboard["folderUid"] = PUBLIC_DASHBOARD_FOLDER
    dashboard["editable"] = False
    return dashboard


def build_gitops_dashboard():
    gitops_success_thresholds = {
        "mode": "absolute",
        "steps": [
            {"color": "red", "value": None},
            {"color": "yellow", "value": 99},
            {"color": "blue", "value": 100},
        ],
    }
    gitops_value_overrides = [
        {
            "matcher": {"id": "byName", "options": "Value"},
            "properties": [
                {
                    "id": "thresholds",
                    "value": {
                        "mode": "absolute",
                        "steps": [
                            {"color": "red", "value": None},
                            {"color": "blue", "value": 1},
                        ],
                    },
                }
            ],
        }
    ]
    kustomization_table = (
        f"max by (namespace, name, path, source_namespace, source_name, revision, ready, reason) "
        f"(ananke_gitops_kustomization_info{{{GITOPS_SELECTOR}}}) "
        f"* on(namespace, name) group_left() max by (namespace, name) "
        f"(ananke_gitops_kustomization_ready{{{GITOPS_SELECTOR}}})"
    )
    helm_table = (
        f"max by (namespace, name, chart, version, app_version, revision, ready, reason) "
        f"(ananke_gitops_helmrelease_info{{{GITOPS_SELECTOR}}}) "
        f"* on(namespace, name) group_left() max by (namespace, name) "
        f"(ananke_gitops_helmrelease_ready{{{GITOPS_SELECTOR}}})"
    )
    source_table = (
        f"max by (namespace, name, url, branch, revision, ready, reason) "
        f"(ananke_gitops_flux_source_info{{{GITOPS_SELECTOR}}}) "
        f"* on(namespace, name) group_left() max by (namespace, name) "
        f"(ananke_gitops_flux_source_ready{{{GITOPS_SELECTOR}}})"
    )

    panels = [
        stat_panel(
            1,
            "Flux Source",
            f"{GITOPS_SOURCE_INFO} or on() vector(0)",
            {"h": 4, "w": 8, "x": 0, "y": 0},
            text_mode="name",
            targets=[{"expr": f"{GITOPS_SOURCE_INFO} or on() vector(0)", "refId": "A", "legendFormat": "{{branch}} · {{revision}}", "instant": True}],
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "red", "value": None},
                    {"color": "blue", "value": 1},
                ],
            },
            description="Branch and revision currently reported by Flux's GitRepository source.",
        ),
        stat_panel(
            2,
            "Kustomizations Ready",
            GITOPS_KUSTOMIZATION_READY_PCT,
            {"h": 4, "w": 4, "x": 8, "y": 0},
            unit="percent",
            decimals=1,
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "red", "value": None},
                    {"color": "yellow", "value": 99},
                    {"color": "blue", "value": 100},
                ],
            },
        ),
        stat_panel(
            3,
            "Kustomizations Suspended",
            GITOPS_KUSTOMIZATION_SUSPENDED,
            {"h": 4, "w": 4, "x": 12, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "blue", "value": None},
                    {"color": "red", "value": 1},
                ],
            },
        ),
        stat_panel(
            4,
            "HelmReleases Ready",
            GITOPS_HELM_READY_PCT,
            {"h": 4, "w": 4, "x": 16, "y": 0},
            unit="percent",
            decimals=1,
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "red", "value": None},
                    {"color": "yellow", "value": 99},
                    {"color": "blue", "value": 100},
                ],
            },
        ),
        stat_panel(
            5,
            "HelmReleases Suspended",
            GITOPS_HELM_SUSPENDED,
            {"h": 4, "w": 4, "x": 20, "y": 0},
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "blue", "value": None},
                    {"color": "red", "value": 1},
                ],
            },
        ),
        stat_panel(
            6,
            "GitOps Exporter",
            None,
            {"h": 4, "w": 8, "x": 0, "y": 4},
            text_mode="name_and_value",
            targets=[
                {"expr": GITOPS_SCRAPE_SUCCESS, "refId": "A", "legendFormat": "Scrape Success", "instant": True},
                {"expr": GITOPS_LAST_SCRAPE_AGE, "refId": "B", "legendFormat": "Sample Age", "instant": True},
            ],
            field_overrides=[
                {"matcher": {"id": "byName", "options": "Sample Age"}, "properties": [{"id": "unit", "value": "s"}]},
                {
                    "matcher": {"id": "byName", "options": "Scrape Success"},
                    "properties": [
                        {
                            "id": "thresholds",
                            "value": {
                                "mode": "absolute",
                                "steps": [
                                    {"color": "red", "value": None},
                                    {"color": "blue", "value": 1},
                                ],
                            },
                        }
                    ],
                },
            ],
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "red", "value": None},
                    {"color": "blue", "value": 1},
                ],
            },
        ),
        state_timeline_panel(
            7,
            "Readiness History",
            (
                f'label_replace({GITOPS_KUSTOMIZATION_READY_PCT}, "kind", "Kustomizations", "__name__", ".*") '
                f'or label_replace({GITOPS_HELM_READY_PCT}, "kind", "HelmReleases", "__name__", ".*")'
            ),
            {"h": 4, "w": 16, "x": 8, "y": 4},
            thresholds=gitops_success_thresholds,
            legend="{{kind}}",
            description="Ready percentage over time for Flux Kustomizations and HelmReleases.",
        ),
        table_panel(
            8,
            "Flux Sources",
            source_table,
            {"h": 8, "w": 24, "x": 0, "y": 8},
            instant=True,
            format="table",
            transformations=[{"id": "labelsToFields", "options": {}}],
            field_overrides=gitops_value_overrides,
            description="A Value of 1 means Ready; 0 means not Ready.",
        ),
        table_panel(
            9,
            "Kustomizations",
            kustomization_table,
            {"h": 12, "w": 24, "x": 0, "y": 16},
            instant=True,
            format="table",
            transformations=[{"id": "labelsToFields", "options": {}}],
            field_overrides=gitops_value_overrides,
            description="A Value of 1 means Ready; 0 means not Ready. The ready/reason labels come from Flux status.conditions.",
        ),
        table_panel(
            10,
            "HelmReleases",
            helm_table,
            {"h": 12, "w": 24, "x": 0, "y": 28},
            instant=True,
            format="table",
            transformations=[{"id": "labelsToFields", "options": {}}],
            field_overrides=gitops_value_overrides,
            description="A Value of 1 means Ready; 0 means not Ready. Chart/version/app_version are included when Flux reports them.",
        ),
    ]
    return {
        "uid": "atlas-gitops",
        "title": "Atlas GitOps",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "gitops", "flux"],
    }


def build_power_dashboard():
    panels = []
    status_mapping = [
        {
            "type": "value",
            "options": {
                "0": {"text": "⚡ Charging"},
                "1": {"text": "🔋 Discharging"},
            },
        }
    ]

    panels.append(
        stat_panel(
            1,
            "UPS Current Load",
            None,
            {"h": 8, "w": 12, "x": 0, "y": 0},
            unit="none",
            decimals=1,
            text_mode="name_and_value",
            targets=[
                {"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Draw (W)", "instant": True},
                {"refId": "B", "expr": ANANKE_UPS_RUNTIME_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Discharge", "instant": True},
                {"refId": "C", "expr": ANANKE_UPS_ON_BATTERY_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Status", "instant": True},
                {"refId": "D", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)", "instant": True},
                {"refId": "E", "expr": ANANKE_UPS_RUNTIME_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Discharge", "instant": True},
                {"refId": "F", "expr": ANANKE_UPS_ON_BATTERY_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Status", "instant": True},
            ],
            field_overrides=[
                {
                    "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Draw (W)"},
                    "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}],
                },
                {
                    "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)"},
                    "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}],
                },
                {
                    "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Discharge"},
                    "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}],
                },
                {
                    "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Discharge"},
                    "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}],
                },
                {
                    "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Status"},
                    "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}],
                },
                {
                    "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Status"},
                    "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}],
                },
            ],
            orientation="horizontal",
            wide_layout=True,
            description=(
                "Per-UPS live snapshot: current draw in watts, estimated battery runtime if discharge started now, and charging/discharging status."
            ),
        )
    )
    panels.append(
        apply_bar_timeseries_style(
            timeseries_panel(
                2,
                "UPS History (Power Draw)",
                None,
                {"h": 8, "w": 12, "x": 12, "y": 0},
                unit="watt",
                targets=[
                    {"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB_SERIES, "legendFormat": ANANKE_UPS_DB_NAME},
                    {"refId": "B", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS_SERIES, "legendFormat": ANANKE_UPS_TETHYS_NAME},
                ],
                field_overrides=fixed_color_overrides(
                    {ANANKE_UPS_DB_NAME: "dark-blue", ANANKE_UPS_TETHYS_NAME: "dark-yellow"}
                ),
                legend_display="table",
                legend_placement="right",
                description="Historical UPS power consumption in watts for titan-db and tethys.",
            ),
            stacked=False,
        )
    )
    panels.append(
        stat_panel(
            3,
            "Current Climate",
            None,
            {"h": 8, "w": 12, "x": 0, "y": 8},
            unit="none",
            decimals=2,
            text_mode="name_and_value",
            targets=[
                {"refId": "A", "expr": CLIMATE_TEMP_MAX, "legendFormat": "Tent Temp (°C)", "instant": True},
                {"refId": "B", "expr": CLIMATE_PRESSURE_CURRENT, "legendFormat": "Tent VPD (kPa)", "instant": True},
                {"refId": "C", "expr": CLIMATE_HUMIDITY_MAX, "legendFormat": "Tent RH (%)", "instant": True},
                {"refId": "D", "expr": CLIMATE_DEWPOINT_CURRENT, "legendFormat": "Dew Point (°C)", "instant": True},
            ],
            field_overrides=[
                {"matcher": {"id": "byName", "options": "Tent Temp (°C)"}, "properties": [{"id": "unit", "value": "celsius"}]},
                {"matcher": {"id": "byName", "options": "Tent VPD (kPa)"}, "properties": [{"id": "unit", "value": "suffix:kPa"}]},
                {"matcher": {"id": "byName", "options": "Tent RH (%)"}, "properties": [{"id": "unit", "value": "percent"}]},
                {"matcher": {"id": "byName", "options": "Dew Point (°C)"}, "properties": [{"id": "unit", "value": "celsius"}]},
            ],
            orientation="horizontal",
            wide_layout=True,
            description="Current tent temperature, humidity, VPD, and dew point. These render once Typhon climate telemetry is online.",
        )
    )
    panels.append(
        timeseries_panel(
            4,
            "Climate History",
            None,
            {"h": 8, "w": 12, "x": 12, "y": 8},
            unit="celsius",
            targets=[
                {"refId": "A", "expr": CLIMATE_TEMP_SERIES, "legendFormat": "Temperature (°C)"},
                {"refId": "B", "expr": CLIMATE_HUMIDITY_SERIES, "legendFormat": "Humidity (%)"},
                {"refId": "C", "expr": CLIMATE_PRESSURE_SERIES, "legendFormat": "VPD (kPa)"},
                {"refId": "D", "expr": CLIMATE_DEWPOINT_SERIES, "legendFormat": "Dew Point (°C)"},
            ],
            field_overrides=[
                {
                    "matcher": {"id": "byName", "options": "Humidity (%)"},
                    "properties": [
                        {"id": "unit", "value": "percent"},
                    ],
                },
                {
                    "matcher": {"id": "byName", "options": "VPD (kPa)"},
                    "properties": [
                        {"id": "unit", "value": "none"},
                        {"id": "custom.axisPlacement", "value": "right"},
                        {"id": "custom.axisLabel", "value": "kPa"},
                        {"id": "decimals", "value": 2},
                    ],
                }
            ],
            legend_display="table",
            legend_placement="right",
            description="Two-axis chart: tent temperature/humidity/dew point (left axis) and tent VPD in kPa (right axis).",
        )
    )
    panels.append(
        stat_panel(
            5,
            "Fan Activity",
            None,
            {"h": 8, "w": 12, "x": 0, "y": 16},
            unit="none",
            decimals=0,
            text_mode="name_and_value",
            targets=[
                {"refId": "A", "expr": f"round({CLIMATE_FAN_OUTLET_CURRENT})", "legendFormat": "Inside Outlet", "instant": True},
                {"refId": "B", "expr": f"round({CLIMATE_FAN_INSIDE_INLET_CURRENT})", "legendFormat": "Inside Inlet", "instant": True},
                {"refId": "C", "expr": f"round({CLIMATE_FAN_OUTSIDE_INLET_CURRENT})", "legendFormat": "Outside Inlet", "instant": True},
                {"refId": "D", "expr": f"round({CLIMATE_FAN_INTERIOR_CURRENT})", "legendFormat": "Interior Fans", "instant": True},
            ],
            thresholds={
                "mode": "absolute",
                "steps": [
                    {"color": "green", "value": None},
                    {"color": "yellow", "value": 7},
                    {"color": "red", "value": 9},
                ],
            },
            orientation="horizontal",
            wide_layout=True,
            description="Current fan activity levels (0-10): inside outlet, inside inlet, outside inlet, and interior fans.",
        )
    )
    panels.append(
        timeseries_panel(
            6,
            "Fan Intensity History",
            None,
            {"h": 8, "w": 12, "x": 12, "y": 16},
            unit="none",
            max_value=10,
            targets=[
                {"refId": "A", "expr": CLIMATE_FAN_OUTLET_SERIES, "legendFormat": "Inside Outlet"},
                {"refId": "B", "expr": CLIMATE_FAN_INSIDE_INLET_SERIES, "legendFormat": "Inside Inlet"},
                {"refId": "C", "expr": CLIMATE_FAN_OUTSIDE_INLET_SERIES, "legendFormat": "Outside Inlet"},
                {"refId": "D", "expr": CLIMATE_FAN_INTERIOR_SERIES, "legendFormat": "Interior Fans"},
            ],
            legend_display="table",
            legend_placement="right",
            description="Historical fan activity for all four fan groups (0-10 scale).",
        )
    )

    return {
        "uid": "atlas-power",
        "title": "Atlas Power",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-24h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "power", "climate"],
    }


def build_gpu_dashboard():
    panels = []
    gpu_scope = "$namespace_scope_gpu"
    panels.append(
        pie_panel(
            1,
            "Namespace GPU Share",
            namespace_gpu_share_expr(gpu_scope),
            {"h": 8, "w": 12, "x": 0, "y": 0},
            links=namespace_scope_links("namespace_scope_gpu"),
            description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
        )
    )
    panels.append(
        timeseries_panel(
            2,
            "GPU Util by Namespace",
            namespace_gpu_usage_instant(gpu_scope),
            {"h": 8, "w": 12, "x": 12, "y": 0},
            unit="percent",
            legend="{{namespace}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        timeseries_panel(
            3,
            "GPU Util by Node",
            gpu_util_by_hostname(),
            {"h": 8, "w": 12, "x": 0, "y": 8},
            unit="percent",
            legend="{{Hostname}}",
            legend_display="table",
            legend_placement="right",
        )
    )
    panels.append(
        table_panel(
            4,
            "Top Pods by GPU Util",
            'topk(10, sum(DCGM_FI_DEV_GPU_UTIL{pod!=""}) by (namespace,pod,Hostname))',
            {"h": 8, "w": 12, "x": 12, "y": 8},
            unit="percent",
            transformations=[{"id": "labelsToFields", "options": {}}],
        )
    )
    return {
        "uid": "atlas-gpu",
        "title": "Atlas GPU",
        "folderUid": PRIVATE_FOLDER,
        "editable": True,
        "panels": panels,
        "time": {"from": "now-12h", "to": "now"},
        "annotations": {"list": []},
        "schemaVersion": 39,
        "style": "dark",
        "tags": ["atlas", "gpu"],
        "templating": {
            "list": [
                namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"),
                namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"),
                namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"),
            ]
        },
    }


DASHBOARDS = {
    "atlas-overview": {
        "builder": build_overview,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-overview.yaml",
    },
    "atlas-pods": {
        "builder": build_pods_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-pods.yaml",
    },
    "atlas-nodes": {
        "builder": build_nodes_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-nodes.yaml",
    },
    "atlas-storage": {
        "builder": build_storage_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-storage.yaml",
    },
    "atlas-network": {
        "builder": build_network_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-network.yaml",
    },
    "atlas-mail": {
        "builder": build_mail_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml",
    },
    "atlas-testing": {
        "builder": build_testing_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-testing.yaml",
    },
    "atlas-gitops": {
        "builder": build_gitops_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gitops.yaml",
    },
    "atlas-power": {
        "builder": build_power_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-power.yaml",
    },
    "atlas-gpu": {
        "builder": build_gpu_dashboard,
        "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml",
    },
}


def write_json(uid, data):
    DASHBOARD_DIR.mkdir(parents=True, exist_ok=True)
    path = DASHBOARD_DIR / f"{uid}.json"
    data = apply_global_status_palette(data)
    path.write_text(json.dumps(data, indent=2) + "\n")


def render_configmap(uid, info):
    json_path = DASHBOARD_DIR / f"{uid}.json"
    payload = json.dumps(apply_global_status_palette(json.loads(json_path.read_text())), indent=2)
    indented = "\n".join("    " + line for line in payload.splitlines())
    output_path = info["configmap"]
    content = CONFIG_TEMPLATE.format(
        relative_path=output_path.relative_to(ROOT),
        name=output_path.stem,
        key=json_path.name,
        payload=indented,
    )
    output_path.write_text(content)
    print(f"Rendered {json_path.name} -> {output_path.relative_to(ROOT)}")


def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--build", action="store_true", help="Regenerate dashboard JSON files from builders")
    args = parser.parse_args()

    if args.build:
        for uid, info in DASHBOARDS.items():
            write_json(uid, info["builder"]())

    for uid, info in DASHBOARDS.items():
        render_configmap(uid, info)


if __name__ == "__main__":
    main()