diff --git a/Jenkinsfile b/Jenkinsfile index fe976922..0b4a40c2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -7,7 +7,6 @@ pipeline { apiVersion: v1 kind: Pod spec: - serviceAccountName: "jenkins" nodeSelector: hardware: rpi5 kubernetes.io/arch: arm64 @@ -24,9 +23,13 @@ spec: environment { PIP_DISABLE_PIP_VERSION_CHECK = '1' PYTHONUNBUFFERED = '1' - SUITE_NAME = 'titan-iac' + SUITE_NAME = 'titan_iac' PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' - VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428' + QUALITY_GATE_SONARQUBE_ENFORCE = '1' + QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json' + QUALITY_GATE_IRONBANK_ENFORCE = '1' + QUALITY_GATE_IRONBANK_REQUIRED = '0' + QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json' } stages { stage('Checkout') { @@ -39,6 +42,83 @@ spec: sh 'pip install --no-cache-dir -r ci/requirements.txt' } } + stage('Collect SonarQube evidence') { + steps { + sh ''' + set -eu + mkdir -p build + python3 - <<'PY' +import base64 +import json +import os +import urllib.parse +import urllib.request + +host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/') +project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip() +token = os.getenv('SONARQUBE_TOKEN', '').strip() +report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json') + +payload = { + "status": "ERROR", + "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY", +} +if host and project_key: + query = urllib.parse.urlencode({"projectKey": project_key}) + request = urllib.request.Request( + f"{host}/api/qualitygates/project_status?{query}", + method="GET", + ) + if token: + encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8") + request.add_header("Authorization", f"Basic {encoded}") + try: + with urllib.request.urlopen(request, timeout=12) as response: + payload = json.loads(response.read().decode("utf-8")) + except Exception as exc: # noqa: BLE001 + payload = {"status": "ERROR", "error": str(exc)} + +with open(report_path, "w", encoding="utf-8") as handle: + json.dump(payload, handle, indent=2, sort_keys=True) + handle.write("\\n") +PY + ''' + } + } + stage('Collect IronBank evidence') { + steps { + sh ''' + set -eu + mkdir -p build + python3 - <<'PY' +import json +import os +from pathlib import Path + +report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json')) +if report_path.exists(): + raise SystemExit(0) + +status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip() +compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower() +payload = { + "status": status or "unknown", + "compliant": compliant in {"1", "true", "yes", "on"} if compliant else None, +} +payload = {k: v for k, v in payload.items() if v is not None} +if "status" not in payload: + payload["status"] = "unknown" +payload["note"] = ( + "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT " + "or write build/ironbank-compliance.json in image-building repos." +) + +report_path.parent.mkdir(parents=True, exist_ok=True) +report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8") +PY + ''' + } + } stage('Run quality gate') { steps { sh ''' @@ -76,7 +156,7 @@ spec: script { env.FLUX_BRANCH = sh( returnStdout: true, - script: '''awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml''' + script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml" ).trim() if (!env.FLUX_BRANCH) { error('Flux branch not found in gotk-sync.yaml') @@ -93,16 +173,14 @@ spec: } } steps { - container('jnlp') { - withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) { - sh ''' - set +x - git config user.email "jenkins@bstein.dev" - git config user.name "jenkins" - git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git - git push origin HEAD:${FLUX_BRANCH} - ''' - } + withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) { + sh ''' + set +x + git config user.email "jenkins@bstein.dev" + git config user.name "jenkins" + git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git + git push origin HEAD:${FLUX_BRANCH} + ''' } } } diff --git a/ci/Jenkinsfile.titan-iac b/ci/Jenkinsfile.titan-iac index bf639480..88b94106 100644 --- a/ci/Jenkinsfile.titan-iac +++ b/ci/Jenkinsfile.titan-iac @@ -6,7 +6,6 @@ pipeline { apiVersion: v1 kind: Pod spec: - serviceAccountName: "jenkins" nodeSelector: hardware: rpi5 kubernetes.io/arch: arm64 @@ -23,9 +22,13 @@ spec: environment { PIP_DISABLE_PIP_VERSION_CHECK = '1' PYTHONUNBUFFERED = '1' - SUITE_NAME = 'titan-iac' + SUITE_NAME = 'titan_iac' PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' - VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428' + QUALITY_GATE_SONARQUBE_ENFORCE = '1' + QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json' + QUALITY_GATE_IRONBANK_ENFORCE = '1' + QUALITY_GATE_IRONBANK_REQUIRED = '0' + QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json' } stages { stage('Checkout') { @@ -38,6 +41,83 @@ spec: sh 'pip install --no-cache-dir -r ci/requirements.txt' } } + stage('Collect SonarQube evidence') { + steps { + sh ''' + set -eu + mkdir -p build + python3 - <<'PY' +import base64 +import json +import os +import urllib.parse +import urllib.request + +host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/') +project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip() +token = os.getenv('SONARQUBE_TOKEN', '').strip() +report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json') + +payload = { + "status": "ERROR", + "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY", +} +if host and project_key: + query = urllib.parse.urlencode({"projectKey": project_key}) + request = urllib.request.Request( + f"{host}/api/qualitygates/project_status?{query}", + method="GET", + ) + if token: + encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8") + request.add_header("Authorization", f"Basic {encoded}") + try: + with urllib.request.urlopen(request, timeout=12) as response: + payload = json.loads(response.read().decode("utf-8")) + except Exception as exc: # noqa: BLE001 + payload = {"status": "ERROR", "error": str(exc)} + +with open(report_path, "w", encoding="utf-8") as handle: + json.dump(payload, handle, indent=2, sort_keys=True) + handle.write("\\n") +PY + ''' + } + } + stage('Collect IronBank evidence') { + steps { + sh ''' + set -eu + mkdir -p build + python3 - <<'PY' +import json +import os +from pathlib import Path + +report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json')) +if report_path.exists(): + raise SystemExit(0) + +status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip() +compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower() +payload = { + "status": status or "unknown", + "compliant": compliant in {"1", "true", "yes", "on"} if compliant else None, +} +payload = {k: v for k, v in payload.items() if v is not None} +if "status" not in payload: + payload["status"] = "unknown" +payload["note"] = ( + "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT " + "or write build/ironbank-compliance.json in image-building repos." +) + +report_path.parent.mkdir(parents=True, exist_ok=True) +report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8") +PY + ''' + } + } stage('Run quality gate') { steps { sh ''' @@ -75,7 +155,7 @@ spec: script { env.FLUX_BRANCH = sh( returnStdout: true, - script: '''awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml''' + script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml" ).trim() if (!env.FLUX_BRANCH) { error('Flux branch not found in gotk-sync.yaml') @@ -92,16 +172,14 @@ spec: } } steps { - container('jnlp') { - withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) { - sh ''' - set +x - git config user.email "jenkins@bstein.dev" - git config user.name "jenkins" - git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git - git push origin HEAD:${FLUX_BRANCH} - ''' - } + withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) { + sh ''' + set +x + git config user.email "jenkins@bstein.dev" + git config user.name "jenkins" + git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git + git push origin HEAD:${FLUX_BRANCH} + ''' } } } diff --git a/ci/scripts/publish_test_metrics.py b/ci/scripts/publish_test_metrics.py index 9ac417ee..514d7af2 100644 --- a/ci/scripts/publish_test_metrics.py +++ b/ci/scripts/publish_test_metrics.py @@ -12,20 +12,24 @@ import xml.etree.ElementTree as ET def _escape_label(value: str) -> str: + """Escape a Prometheus label value without changing its content.""" return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"') def _label_str(labels: dict[str, str]) -> str: + """Render a stable Prometheus label set from a mapping.""" parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val] return "{" + ",".join(parts) + "}" if parts else "" def _read_text(url: str) -> str: + """Fetch a plain-text response body from the given URL.""" with urllib.request.urlopen(url, timeout=10) as response: return response.read().decode("utf-8") def _post_text(url: str, payload: str) -> None: + """POST a plain-text payload and fail on any 4xx/5xx response.""" request = urllib.request.Request( url, data=payload.encode("utf-8"), @@ -38,6 +42,7 @@ def _post_text(url: str, payload: str) -> None: def _parse_junit(path: str) -> dict[str, int]: + """Parse a JUnit XML file into aggregate test counters.""" if not os.path.exists(path): return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0} @@ -64,6 +69,7 @@ def _parse_junit(path: str) -> dict[str, int]: def _collect_junit_totals(pattern: str) -> dict[str, int]: + """Sum JUnit counters across every XML file matching the pattern.""" totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0} for path in sorted(glob(pattern)): parsed = _parse_junit(path) @@ -73,6 +79,7 @@ def _collect_junit_totals(pattern: str) -> dict[str, int]: def _read_exit_code(path: str) -> int: + """Read the quality-gate exit code, defaulting to failure if missing.""" try: with open(path, "r", encoding="utf-8") as handle: return int(handle.read().strip()) @@ -81,6 +88,7 @@ def _read_exit_code(path: str) -> int: def _load_summary(path: str) -> dict: + """Load the JSON quality-gate summary, returning an empty mapping on error.""" try: with open(path, "r", encoding="utf-8") as handle: return json.load(handle) @@ -88,40 +96,26 @@ def _load_summary(path: str) -> dict: return {} -def _summary_coverage_percent(summary: dict | None) -> float: - if not isinstance(summary, dict): - return 0.0 - results = summary.get("results", []) - if not isinstance(results, list): - return 0.0 - for result in results: - if not isinstance(result, dict): - continue - if result.get("name") != "coverage": - continue - return 95.0 if result.get("status") == "ok" else 0.0 +def _summary_float(summary: dict, key: str) -> float: + """Extract a float-like value from the summary, defaulting to 0.0.""" + value = summary.get(key) + if isinstance(value, (int, float)): + return float(value) return 0.0 -def _summary_source_lines_over_500(summary: dict | None) -> int: - if not isinstance(summary, dict): - return 0 - results = summary.get("results", []) - if not isinstance(results, list): - return 0 - for result in results: - if not isinstance(result, dict): - continue - if result.get("name") != "hygiene": - continue - issues = result.get("issues", []) - if not isinstance(issues, list): - return 0 - return sum(1 for issue in issues if isinstance(issue, str) and "500" in issue) +def _summary_int(summary: dict, key: str) -> int: + """Extract an int-like value from the summary, defaulting to 0.""" + value = summary.get(key) + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) return 0 def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float: + """Return the current counter value for a labeled metric if present.""" text = _read_text(f"{pushgateway_url.rstrip('/')}/metrics") for line in text.splitlines(): if not line.startswith(metric + "{"): @@ -146,10 +140,11 @@ def _build_payload( failed_count: int, branch: str, build_number: str, - workspace_coverage_percent: float, - source_lines_over_500: int, summary: dict | None = None, + workspace_line_coverage_percent: float = 0.0, + source_lines_over_500: int = 0, ) -> str: + """Build the Pushgateway payload for the current suite run.""" passed = max(tests["tests"] - tests["failures"] - tests["errors"] - tests["skipped"], 0) build_labels = _label_str( { @@ -173,9 +168,9 @@ def _build_payload( "# TYPE titan_iac_quality_gate_build_info gauge", f"titan_iac_quality_gate_build_info{build_labels} 1", "# TYPE platform_quality_gate_workspace_line_coverage_percent gauge", - f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {workspace_coverage_percent:.3f}', + f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {workspace_line_coverage_percent:.3f}', "# TYPE platform_quality_gate_source_lines_over_500_total gauge", - f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {max(source_lines_over_500, 0)}', + f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}', ] results = summary.get("results", []) if isinstance(summary, dict) else [] if results: @@ -192,7 +187,8 @@ def _build_payload( def main() -> int: - suite = os.getenv("SUITE_NAME", "titan-iac") + """Publish the quality-gate metrics and print a compact run summary.""" + suite = os.getenv("SUITE_NAME", "titan_iac") pushgateway_url = os.getenv("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091") job_name = os.getenv("QUALITY_GATE_JOB_NAME", "platform-quality-ci") junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml")) @@ -205,8 +201,8 @@ def main() -> int: exit_code = _read_exit_code(exit_code_path) status = "ok" if exit_code == 0 else "failed" summary = _load_summary(summary_path) - workspace_coverage_percent = _summary_coverage_percent(summary) - source_lines_over_500 = _summary_source_lines_over_500(summary) + workspace_line_coverage_percent = _summary_float(summary, "workspace_line_coverage_percent") + source_lines_over_500 = _summary_int(summary, "source_lines_over_500") ok_count = int( _fetch_existing_counter( @@ -235,9 +231,9 @@ def main() -> int: failed_count=failed_count, branch=branch, build_number=build_number, - workspace_coverage_percent=workspace_coverage_percent, - source_lines_over_500=source_lines_over_500, summary=summary, + workspace_line_coverage_percent=workspace_line_coverage_percent, + source_lines_over_500=source_lines_over_500, ) push_url = f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}" _post_text(push_url, payload) @@ -249,11 +245,11 @@ def main() -> int: "tests_failed": tests["failures"], "tests_error": tests["errors"], "tests_skipped": tests["skipped"], - "workspace_coverage_percent": round(workspace_coverage_percent, 3), - "source_lines_over_500": source_lines_over_500, "ok_count": ok_count, "failed_count": failed_count, "checks_recorded": len(summary.get("results", [])) if isinstance(summary, dict) else 0, + "workspace_line_coverage_percent": workspace_line_coverage_percent, + "source_lines_over_500": source_lines_over_500, } print(json.dumps(summary, sort_keys=True)) return 0 diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index c2494381..1c22753c 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -21,6 +21,7 @@ resources: - sui-metrics/kustomization.yaml - openldap/kustomization.yaml - keycloak/kustomization.yaml + - quality/kustomization.yaml - oauth2-proxy/kustomization.yaml - mailu/kustomization.yaml - jenkins/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/quality/kustomization.yaml b/clusters/atlas/flux-system/applications/quality/kustomization.yaml new file mode 100644 index 00000000..fb9e154d --- /dev/null +++ b/clusters/atlas/flux-system/applications/quality/kustomization.yaml @@ -0,0 +1,35 @@ +# clusters/atlas/flux-system/applications/quality/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: quality + namespace: flux-system +spec: + interval: 10m + path: ./services/quality + prune: true + sourceRef: + kind: GitRepository + name: flux-system + targetNamespace: quality + dependsOn: + - name: traefik + - name: cert-manager + - name: keycloak + - name: vault + - name: postgres + healthChecks: + - apiVersion: apps/v1 + kind: Deployment + name: sonarqube + namespace: quality + - apiVersion: apps/v1 + kind: Deployment + name: sonarqube-exporter + namespace: quality + - apiVersion: apps/v1 + kind: Deployment + name: oauth2-proxy-sonarqube + namespace: quality + wait: false + timeout: 20m diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 8853f903..a1c867b2 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -382,25 +382,84 @@ NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"' NAMESPACE_SCOPE_ALL = 'namespace=~".*"' NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"' NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"] -GLUE_LABEL = 'label_atlas_bstein_dev_glue="true"' -GLUE_JOBS = f"kube_cronjob_labels{{{GLUE_LABEL}}}" -GLUE_FILTER = f"and on(namespace,cronjob) {GLUE_JOBS}" -GLUE_LAST_SUCCESS = f"(kube_cronjob_status_last_successful_time {GLUE_FILTER})" -GLUE_LAST_SCHEDULE = f"(kube_cronjob_status_last_schedule_time {GLUE_FILTER})" -GLUE_SUSPENDED = f"(kube_cronjob_spec_suspend {GLUE_FILTER}) == 1" -GLUE_ACTIVE = f"(kube_cronjob_status_active {GLUE_FILTER})" -GLUE_LAST_SUCCESS_AGE = f"(time() - {GLUE_LAST_SUCCESS})" -GLUE_LAST_SCHEDULE_AGE = f"(time() - {GLUE_LAST_SCHEDULE})" -GLUE_LAST_SUCCESS_AGE_HOURS = f"({GLUE_LAST_SUCCESS_AGE}) / 3600" -GLUE_LAST_SCHEDULE_AGE_HOURS = f"({GLUE_LAST_SCHEDULE_AGE}) / 3600" -GLUE_STALE_WINDOW_SEC = 36 * 3600 -GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})" -GLUE_MISSING = f"({GLUE_JOBS} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time)" -GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})" -GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})" -GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE})) or on() vector(0)" -GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE}) or on() vector(0)" -GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED}) or on() vector(0)" + + +def promql_task_regex(tasks): + """Return a PromQL-safe regex alternation for the provided task names.""" + return "|".join(tasks) + + +ARIADNE_ALL_SCHEDULE_TASKS = [ + "schedule.mailu_sync", + "schedule.nextcloud_sync", + "schedule.nextcloud_cron", + "schedule.nextcloud_maintenance", + "schedule.vaultwarden_sync", + "schedule.wger_user_sync", + "schedule.wger_admin", + "schedule.firefly_user_sync", + "schedule.firefly_cron", + "schedule.vault_k8s_auth", + "schedule.vault_oidc", + "schedule.comms_guest_name", + "schedule.comms_pin_invite", + "schedule.comms_reset_room", + "schedule.comms_seed_room", + "schedule.pod_cleaner", + "schedule.opensearch_prune", + "schedule.image_sweeper", + "schedule.metis_k3s_token_sync", + "schedule.platform_quality_suite_probe", +] +ARIADNE_FAST_SCHEDULE_TASKS = [ + task + for task in ARIADNE_ALL_SCHEDULE_TASKS + if task not in {"schedule.comms_pin_invite", "schedule.comms_reset_room"} +] +ARIADNE_SCHEDULE_HEALTH_TASKS = [ + "schedule.nextcloud_sync", + "schedule.nextcloud_cron", + "schedule.vaultwarden_sync", + "schedule.wger_user_sync", + "schedule.firefly_user_sync", + "schedule.comms_guest_name", + "schedule.comms_seed_room", + "schedule.pod_cleaner", + "schedule.image_sweeper", + "schedule.metis_k3s_token_sync", + "schedule.platform_quality_suite_probe", +] +ARIADNE_ALL_SCHEDULE_FILTER = f'task=~"^({promql_task_regex(ARIADNE_ALL_SCHEDULE_TASKS)})$"' +ARIADNE_FAST_SCHEDULE_FILTER = f'task=~"^({promql_task_regex(ARIADNE_FAST_SCHEDULE_TASKS)})$"' +ARIADNE_SCHEDULE_HEALTH_FILTER = f'task=~"^({promql_task_regex(ARIADNE_SCHEDULE_HEALTH_TASKS)})$"' +ARIADNE_ALL_SCHEDULE_NEXT_RUN = f"ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}" +ARIADNE_ALL_SCHEDULE_LAST_SUCCESS = ( + f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}" +) +ARIADNE_ALL_SCHEDULE_LAST_ERROR = f"ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}" +ARIADNE_ALL_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_ALL_SCHEDULE_FILTER}}}" +ARIADNE_FAST_SCHEDULE_LAST_SUCCESS = ( + f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_FAST_SCHEDULE_FILTER}}}" +) +ARIADNE_FAST_SCHEDULE_LAST_ERROR = f"ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_FAST_SCHEDULE_FILTER}}}" +ARIADNE_FAST_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_FAST_SCHEDULE_FILTER}}}" +ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS = ( + f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_HEALTH_FILTER}}}" +) +ARIADNE_HEALTH_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_HEALTH_FILTER}}}" +ARIADNE_SCHEDULE_LAST_SUCCESS_AGE = f"(time() - {ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS})" +ARIADNE_SCHEDULE_LAST_ERROR_AGE = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_ERROR})" +ARIADNE_SCHEDULE_LAST_SUCCESS_AGE_HOURS = f"({ARIADNE_SCHEDULE_LAST_SUCCESS_AGE}) / 3600" +ARIADNE_SCHEDULE_LAST_ERROR_AGE_HOURS = f"({ARIADNE_SCHEDULE_LAST_ERROR_AGE}) / 3600" +ARIADNE_SCHEDULE_STALE_WINDOW_SEC = 36 * 3600 +ARIADNE_SCHEDULE_STALE = f"(({ARIADNE_SCHEDULE_LAST_SUCCESS_AGE}) > bool {ARIADNE_SCHEDULE_STALE_WINDOW_SEC})" +ARIADNE_SCHEDULE_MISSING = ( + f"({ARIADNE_ALL_SCHEDULE_NEXT_RUN} unless on(task) {ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS})" +) +ARIADNE_SCHEDULE_FAILED = f"((1 - {ARIADNE_HEALTH_SCHEDULE_LAST_STATUS}) > bool 0)" +ARIADNE_SCHEDULE_STALE_COUNT = f"sum({ARIADNE_SCHEDULE_STALE}) or on() vector(0)" +ARIADNE_SCHEDULE_MISSING_COUNT = f"count({ARIADNE_SCHEDULE_MISSING}) or on() vector(0)" +ARIADNE_SCHEDULE_FAILED_COUNT = f"sum({ARIADNE_SCHEDULE_FAILED}) or on() vector(0)" ARIADNE_TASK_ERRORS_RANGE = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[$__range]))' ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))' ARIADNE_TASK_ERRORS_1H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[1h]))' @@ -410,195 +469,118 @@ ARIADNE_TASK_RUNS_BY_STATUS_1H = 'sum by (status) (increase(ariadne_task_runs_to ARIADNE_TASK_ERRORS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[1h]))' ARIADNE_TASK_ERRORS_24H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[24h]))' ARIADNE_TASK_RUNS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total[1h]))' -ARIADNE_TASK_ATTEMPTS_SERIES = 'sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)' -ARIADNE_TASK_FAILURES_SERIES = ( - 'sum(increase(ariadne_task_runs_total{status="error"}[$__interval])) or on() vector(0)' -) +ARIADNE_TASK_ATTEMPTS_SERIES = 'sum(increase(ariadne_task_runs_total[$__interval]))' +ARIADNE_TASK_FAILURES_SERIES = 'sum(increase(ariadne_task_runs_total{status="error"}[$__interval]))' ARIADNE_TASK_WARNINGS_SERIES = ( 'sum(increase(ariadne_task_runs_total{status!~"ok|error"}[$__interval])) or on() vector(0)' ) -ARIADNE_SCHEDULE_TASK_FILTER = 'task=~"^schedule\\..+$"' -ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = ( - f"(time() - ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) / 3600" -) -ARIADNE_SCHEDULE_LAST_ERROR_HOURS = ( - f"(time() - ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) / 3600" -) +ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_SUCCESS}) / 3600" +ARIADNE_SCHEDULE_LAST_ERROR_HOURS = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_ERROR}) / 3600" ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = ( - f"(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range])) / 3600" + f"(time() - max_over_time({ARIADNE_ALL_SCHEDULE_LAST_SUCCESS}[$__range])) / 3600" ) ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = ( - f"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range])) / 3600" + f"(time() - max_over_time({ARIADNE_ALL_SCHEDULE_LAST_ERROR}[$__range])) / 3600" ) -ARIADNE_SCHEDULE_NEXT_RUN_HOURS = ( - f"((ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}} - time()) / 3600)" -) -ARIADNE_SCHEDULE_TASK_INDEX = f"ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}" -ARIADNE_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_TASK_FILTER}}}" -ARIADNE_SCHEDULE_SIGNAL_COUNT = ( - f"count(ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) or on() vector(0)" -) -ARIADNE_SCHEDULE_STALE_WINDOW_SEC = 36 * 3600 -ARIADNE_SCHEDULE_STALE_COUNT = ( - f"sum(((time() - ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) > bool {ARIADNE_SCHEDULE_STALE_WINDOW_SEC}))" - " or on() vector(0)" -) -ARIADNE_SCHEDULE_MISSING_SUCCESS_COUNT = ( - f"count((ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}} unless on(task) " - f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}})) or on() vector(0)" -) -ARIADNE_SCHEDULE_FAILED_LAST_COUNT = ( - f"sum(((1 - ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) > bool 0)) or on() vector(0)" -) -ARIADNE_SCHEDULE_RUNS_RANGE = ( - f'sum by (task) (increase(ariadne_task_runs_total{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))' -) -ARIADNE_SCHEDULE_ERRORS_RANGE = ( - f'sum by (task) (increase(ariadne_task_runs_total{{status="error",{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))' -) -ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS_FALLBACK = ( - f"({ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS}) or on(task) (0 * {ARIADNE_SCHEDULE_TASK_INDEX} + 999)" -) -ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS_FALLBACK = ( - f"({ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS}) or on(task) (0 * {ARIADNE_SCHEDULE_TASK_INDEX} + 999)" -) -ARIADNE_SCHEDULE_LAST_STATUS_FALLBACK = ( - f"({ARIADNE_SCHEDULE_LAST_STATUS}) or on(task) (0 * {ARIADNE_SCHEDULE_TASK_INDEX} - 1)" -) -ARIADNE_SCHEDULE_RUNS_RANGE_FALLBACK = ( - f"({ARIADNE_SCHEDULE_RUNS_RANGE}) or on(task) (0 * {ARIADNE_SCHEDULE_TASK_INDEX})" -) -ARIADNE_SCHEDULE_ERRORS_RANGE_FALLBACK = ( - f"({ARIADNE_SCHEDULE_ERRORS_RANGE}) or on(task) (0 * {ARIADNE_SCHEDULE_TASK_INDEX})" -) -JENKINS_CLEANUP_SIGNAL_COUNT = ( - "count(ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) or on() vector(0)" -) -JENKINS_CLEANUP_RUNS_RANGE = ( - "sum by (mode, status) (increase(ariadne_jenkins_workspace_cleanup_runs_total[$__range]))" -) -JENKINS_CLEANUP_OBJECTS_RANGE = ( - "sum by (kind, action, mode) (increase(ariadne_jenkins_workspace_cleanup_objects_total[$__range]))" -) -JENKINS_CLEANUP_LAST_RUN_AGE_HOURS = ( - "((time() - ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) / 3600) or on() vector(999)" -) -JENKINS_CLEANUP_LAST_SUCCESS_AGE_HOURS = ( - "((time() - ariadne_jenkins_workspace_cleanup_last_success_timestamp_seconds) / 3600) or on() vector(999)" -) -JENKINS_CLEANUP_LAST_DELETED = "ariadne_jenkins_workspace_cleanup_last_deleted_total or on() vector(0)" -JENKINS_CLEANUP_LAST_PLANNED = "ariadne_jenkins_workspace_cleanup_last_planned_total or on() vector(0)" -JENKINS_BUILD_WEATHER_LAST_STATUS = "ariadne_jenkins_build_weather_job_last_status" -JENKINS_BUILD_WEATHER_LAST_RUN_AGE_HOURS = ( - "(time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600" -) -JENKINS_BUILD_WEATHER_LAST_SUCCESS_AGE_HOURS = ( - "(time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600" -) -JENKINS_BUILD_WEATHER_LAST_FAILURE_AGE_HOURS = ( - "(time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600" -) -JENKINS_BUILD_WEATHER_LAST_STATUS_BY_JOB = ( - f"max by (exported_job,job_url,weather_icon) ({JENKINS_BUILD_WEATHER_LAST_STATUS})" -) -JENKINS_BUILD_WEATHER_LAST_SUCCESS_AGE_HOURS_BY_JOB = ( - f"min by (exported_job,job_url,weather_icon) ({JENKINS_BUILD_WEATHER_LAST_SUCCESS_AGE_HOURS})" -) -JENKINS_BUILD_WEATHER_LAST_FAILURE_AGE_HOURS_BY_JOB = ( - f"min by (exported_job,job_url,weather_icon) ({JENKINS_BUILD_WEATHER_LAST_FAILURE_AGE_HOURS})" -) -JENKINS_BUILD_WEATHER_LAST_DURATION_MINUTES = ( - "ariadne_jenkins_build_weather_job_last_duration_seconds / 60" -) -JENKINS_WORKSPACE_PV_STALE_COUNT = ( - 'sum((kube_persistentvolume_status_phase{phase=~"Released|Failed"} > bool 0) ' - '* on(persistentvolume) group_left(claim_namespace,name) ' - 'kube_persistentvolume_claim_ref{claim_namespace="jenkins",name=~"pvc-workspace-.*"}) or on() vector(0)' -) -JENKINS_WORKSPACE_PV_STALE_AGE_HOURS = ( - '((time() - kube_persistentvolume_created) / 3600) ' - '* on(persistentvolume) group_left(claim_namespace,name) ' - 'kube_persistentvolume_claim_ref{claim_namespace="jenkins",name=~"pvc-workspace-.*"} ' - '* on(persistentvolume) group_left() (kube_persistentvolume_status_phase{phase=~"Released|Failed"} > bool 0)' +ARIADNE_FAST_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = ( + f"(time() - max_over_time({ARIADNE_FAST_SCHEDULE_LAST_SUCCESS}[$__range])) / 3600" ) +ARIADNE_FAST_SCHEDULE_NEXT_RUN_HOURS = f"(({ARIADNE_ALL_SCHEDULE_NEXT_RUN} - time()) / 3600)" ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total" PLATFORM_TEST_SUITE_NAMES = [ "ariadne", "metis", "ananke", "atlasbot", - "lesavka", "pegasus", "soteria", - "titan-iac", - "bstein-home", - "arcanagon", - "data-prepper", + "titan_iac", + "bstein_home", + "data_prepper", ] PLATFORM_TEST_SUCCESS_STATUS = "ok|passed|success" +PLATFORM_TEST_CI_JOB = "platform-quality-ci" +PLATFORM_TEST_EXPORT_FILTER = f'exported_job="{PLATFORM_TEST_CI_JOB}"' PLATFORM_TEST_SUITE_VALUE_BY_NAME = { "ariadne": "ariadne", "metis": "metis", "ananke": "ananke", "atlasbot": "atlasbot", - "lesavka": "lesavka", "pegasus": "pegasus|pegasus-health|pegasus_health", "soteria": "soteria", - "titan-iac": "titan-iac|titan_iac", - "bstein-home": "bstein-home|bstein_home", - "arcanagon": "arcanagon", - "data-prepper": "data-prepper|data_prepper", + "titan_iac": "titan_iac|titan-iac", + "bstein_home": "bstein_home|bstein-home", + "data_prepper": "data_prepper|data-prepper", } PLATFORM_TEST_SUITE_MATCHER = "|".join( PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite) for suite in PLATFORM_TEST_SUITE_NAMES ) -PLATFORM_TEST_SUITE_VARIABLE_ALL_MATCHER = "|".join( - PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite) for suite in PLATFORM_TEST_SUITE_NAMES -) +PLATFORM_TEST_SUITE_CANONICAL_MATCHER = "|".join(PLATFORM_TEST_SUITE_NAMES) +PLATFORM_TEST_SUITE_VARIABLE_ALL_MATCHER = PLATFORM_TEST_SUITE_CANONICAL_MATCHER PLATFORM_TEST_SUCCESS_EVENTS_30D = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[30d])) or on() vector(0))' + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[30d])) or on() vector(0))' ) PLATFORM_TEST_TOTAL_EVENTS_30D = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[30d])) or on() vector(0))' + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[30d])) or on() vector(0))' +) +PLATFORM_TEST_SUCCESS_EVENTS_7D = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[7d])) or on() vector(0))' +) +PLATFORM_TEST_TOTAL_EVENTS_7D = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[7d])) or on() vector(0))' +) +PLATFORM_TEST_SUCCESS_EVENTS_24H = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h])) or on() vector(0))' +) +PLATFORM_TEST_TOTAL_EVENTS_24H = ( + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h])) or on() vector(0))' ) TEST_SUCCESS_RATE = ( f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_30D}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_30D}), 1)" ) +TEST_SUCCESS_RATE_7D = ( + f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_7D}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_7D}), 1)" +) +TEST_SUCCESS_RATE_24H = ( + f"100 * ({PLATFORM_TEST_SUCCESS_EVENTS_24H}) / clamp_min(({PLATFORM_TEST_TOTAL_EVENTS_24H}), 1)" +) TEST_FAILURES_24H_TOTAL = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[24h])) or on() vector(0))' + f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h])) or on() vector(0))' ) PLATFORM_TEST_FAILURES_24H_BY_SUITE = ( - f'sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[24h])))' + f'sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h])))' ) PLATFORM_TEST_ACTIVITY_30D = ( - f'sum by (suite, status) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[30d]))' + f'sum by (suite, status) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[30d]))' +) +PLATFORM_TEST_RUNS_24H_TOTAL = PLATFORM_TEST_TOTAL_EVENTS_24H +PLATFORM_TEST_ACTIVE_SUITES_24H = ( + f'sum((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h])) > 0)) ' + "or on() vector(0)" ) PLATFORM_TEST_POINT_WINDOW = "1h" PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS = [ { "refId": chr(ord("A") + index), "expr": ( - f'(100 * (sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}' + f'(100 * (sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}' f'[{PLATFORM_TEST_POINT_WINDOW}]))) / ' - f'clamp_min((sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}"}}[{PLATFORM_TEST_POINT_WINDOW}]))), 1)) ' - f'and on() ((sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}"}}[{PLATFORM_TEST_POINT_WINDOW}]))) > 0) ' - "or on() vector(0)" + f'clamp_min((sum(increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_VALUE_BY_NAME.get(suite, suite)}",{PLATFORM_TEST_EXPORT_FILTER}}}[{PLATFORM_TEST_POINT_WINDOW}]))), 1))' ), "legendFormat": suite, } for index, suite in enumerate(PLATFORM_TEST_SUITE_NAMES) ] PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE = ( - f'sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}"}}[24h]))) ' - f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[24h]))), 1)) ' - f'and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[24h]))) > 0))' + f'sort_desc(100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status=~"{PLATFORM_TEST_SUCCESS_STATUS}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h]))) ' + f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[24h]))), 1))' ) QUALITY_GATE_SUITE_INDEX_30D = ( - f'sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}"}}[30d]))' + f'sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",{PLATFORM_TEST_EXPORT_FILTER}}}[30d]))' ) QUALITY_GATE_COVERAGE_BY_SUITE = ( - '(max by (suite) ({__name__=~".*_quality_gate_coverage_percent"})) ' - 'or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent))' + f'(max by (suite) ({{__name__=~".*_quality_gate_coverage_percent",{PLATFORM_TEST_EXPORT_FILTER}}})) ' + f'or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{PLATFORM_TEST_EXPORT_FILTER}}}))' ) QUALITY_GATE_COVERAGE_BY_SUITE_WITH_MISSING = ( f"({QUALITY_GATE_COVERAGE_BY_SUITE}) or on(suite) (0 * ({QUALITY_GATE_SUITE_INDEX_30D}) - 1)" @@ -607,16 +589,12 @@ QUALITY_GATE_COVERAGE_GAP_BY_SUITE = ( f"clamp_min(95 - ({QUALITY_GATE_COVERAGE_BY_SUITE}), 0)" ) QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE = ( - "max by (suite) (platform_quality_gate_source_lines_over_500_total)" + f"max by (suite) (platform_quality_gate_source_lines_over_500_total{{{PLATFORM_TEST_EXPORT_FILTER}}})" ) QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE_WITH_MISSING = ( f"({QUALITY_GATE_SMELL_INFRACTIONS_BY_SUITE}) or on(suite) (0 * ({QUALITY_GATE_SUITE_INDEX_30D}) - 1)" ) -PVC_BACKUP_AGE_HOURS_BY_PVC = ( - 'sort_desc(max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver="restic"}) / 3600) ' - 'or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver="restic",reason=~"missing|no_completed|lookup_failed|unknown_timestamp"} > 0) ' - '* (pvc_backup_count{driver="restic"} > bool 0)) * 999))) or on() vector(0))' -) +PVC_BACKUP_AGE_HOURS_BY_PVC = "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))" ANANKE_SELECTOR = 'job="ananke-power"' ANANKE_UPS_DB_NAME = "Pyrphoros" ANANKE_UPS_DB_NODE = "titan-db" @@ -644,12 +622,6 @@ ANANKE_UPS_ON_BATTERY_DB = ( ANANKE_UPS_ON_BATTERY_TETHYS = ( f'max(ananke_ups_on_battery{{{ANANKE_UPS_TETHYS_SELECTOR}}}) or on() vector(0)' ) -ANANKE_UPS_THRESHOLD_DB = ( - f'clamp_min(max(ananke_ups_threshold_seconds{{{ANANKE_UPS_DB_SELECTOR}}}), 1)' -) -ANANKE_UPS_THRESHOLD_TETHYS = ( - f'clamp_min(max(ananke_ups_threshold_seconds{{{ANANKE_UPS_TETHYS_SELECTOR}}}), 1)' -) ANANKE_UPS_BATTERY_CHARGE_DB = ( f'max(ananke_ups_battery_charge_percent{{{ANANKE_UPS_DB_SELECTOR}}}) or on() vector(0)' ) @@ -689,122 +661,43 @@ ANANKE_UPS_RUNTIME_BY_SOURCE = f"ananke_ups_runtime_seconds{{{ANANKE_SELECTOR}}} ANANKE_UPS_LOAD_BY_SOURCE = f"ananke_ups_load_percent{{{ANANKE_SELECTOR}}}" ANANKE_UPS_CHARGE_BY_SOURCE = f"ananke_ups_battery_charge_percent{{{ANANKE_SELECTOR}}}" ANANKE_UPS_TRIGGER_BY_SOURCE = f"ananke_ups_trigger_active{{{ANANKE_SELECTOR}}}" - - -def ups_discharge_risk_expr(on_battery_expr, runtime_expr, shutdown_threshold_expr): - return ( - f"((({on_battery_expr}) > bool 0) * (" - f"1 + (({runtime_expr}) < bool (3 * ({shutdown_threshold_expr}))) + " - f"(({runtime_expr}) < bool (2 * ({shutdown_threshold_expr})))" - f")) or on() vector(0)" - ) - - -ANANKE_UPS_DISCHARGE_RISK_DB = ups_discharge_risk_expr( - ANANKE_UPS_ON_BATTERY_DB, - ANANKE_UPS_RUNTIME_DB, - ANANKE_UPS_THRESHOLD_DB, -) -ANANKE_UPS_DISCHARGE_RISK_TETHYS = ups_discharge_risk_expr( - ANANKE_UPS_ON_BATTERY_TETHYS, - ANANKE_UPS_RUNTIME_TETHYS, - ANANKE_UPS_THRESHOLD_TETHYS, -) CLIMATE_SENSOR_COUNT = "count(typhon_temperature_celsius) or on() vector(0)" -# Drop volatile labels so historical series stay merged when controller naming/group metadata changes. -CLIMATE_DEDUP_LABELS = ( - "job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group" -) -CLIMATE_TEMP_SERIES = ( - f"max without ({CLIMATE_DEDUP_LABELS}) (typhon_temperature_celsius != 0)" -) -CLIMATE_TEMP_FAHRENHEIT_SERIES = f"({CLIMATE_TEMP_SERIES}) * 9 / 5 + 32" -CLIMATE_PRESSURE_SERIES = ( - f"max without ({CLIMATE_DEDUP_LABELS}) (typhon_vpd_kpa != 0)" -) -CLIMATE_HUMIDITY_SERIES = ( - f"max without ({CLIMATE_DEDUP_LABELS}) (typhon_relative_humidity_percent != 0)" -) -CLIMATE_TEMP_MAX = f"max({CLIMATE_TEMP_SERIES}) or on() vector(0)" -CLIMATE_TEMP_FAHRENHEIT_MAX = f"max({CLIMATE_TEMP_FAHRENHEIT_SERIES}) or on() vector(0)" -CLIMATE_PRESSURE_CURRENT = f"max({CLIMATE_PRESSURE_SERIES}) or on() vector(0)" -CLIMATE_HUMIDITY_MAX = f"max({CLIMATE_HUMIDITY_SERIES}) or on() vector(0)" -CLIMATE_TEMP_MIN_BOUND_SERIES = f"(min_over_time({CLIMATE_TEMP_SERIES}[$__range]) - 0.08)" -CLIMATE_TEMP_MAX_BOUND_SERIES = f"(max_over_time({CLIMATE_TEMP_SERIES}[$__range]) + 0.08)" -CLIMATE_HUMIDITY_MIN_BOUND_SERIES = ( - f"clamp_min((min_over_time({CLIMATE_HUMIDITY_SERIES}[$__range]) - 0.35), 0)" -) -CLIMATE_HUMIDITY_MAX_BOUND_SERIES = ( - f"clamp_max((max_over_time({CLIMATE_HUMIDITY_SERIES}[$__range]) + 0.35), 100)" -) -CLIMATE_PRESSURE_MIN_BOUND_SERIES = ( - f"clamp_min((min_over_time({CLIMATE_PRESSURE_SERIES}[$__range]) - 0.03), 0)" -) -CLIMATE_PRESSURE_MAX_BOUND_SERIES = ( - f"(max_over_time({CLIMATE_PRESSURE_SERIES}[$__range]) + 0.03)" +CLIMATE_TEMP_MAX = "max(typhon_temperature_celsius) or on() vector(0)" +CLIMATE_PRESSURE_CURRENT = "max(typhon_vpd_kpa) or on() vector(0)" +CLIMATE_HUMIDITY_MAX = "max(typhon_relative_humidity_percent) or on() vector(0)" +CLIMATE_TEMP_SERIES = "typhon_temperature_celsius" +CLIMATE_PRESSURE_SERIES = "typhon_vpd_kpa" +CLIMATE_HUMIDITY_SERIES = "typhon_relative_humidity_percent" +CLIMATE_DEWPOINT_SERIES = ( + "(243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + " + "(17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / " + "(17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + " + "(17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))" ) +CLIMATE_DEWPOINT_CURRENT = f"max({CLIMATE_DEWPOINT_SERIES}) or on() vector(0)" CLIMATE_FAN_OUTLET_CURRENT = ( - f'max(max without ({CLIMATE_DEDUP_LABELS}) (typhon_fan_speed_level{{port="1"}})) or on() vector(0)' + 'max(typhon_fan_speed_level{fan_group="outlet"}) or on() vector(0)' ) CLIMATE_FAN_INSIDE_INLET_CURRENT = ( - f'max(max without ({CLIMATE_DEDUP_LABELS}) (typhon_fan_speed_level{{port="2"}})) or on() vector(0)' + 'max(typhon_fan_speed_level{fan_group="inside_inlet"}) or on() vector(0)' ) CLIMATE_FAN_OUTSIDE_INLET_CURRENT = ( - f'max(max without ({CLIMATE_DEDUP_LABELS}) (typhon_fan_speed_level{{port="3"}})) or on() vector(0)' + 'max(typhon_fan_speed_level{fan_group="outside_inlet"}) or on() vector(0)' ) CLIMATE_FAN_INTERIOR_CURRENT = ( - f'max(max without ({CLIMATE_DEDUP_LABELS}) (typhon_fan_speed_level{{port="4"}})) or on() vector(0)' + 'max(typhon_fan_speed_level{fan_group="interior"}) or on() vector(0)' ) CLIMATE_FAN_OUTLET_SERIES = ( - f'max without ({CLIMATE_DEDUP_LABELS}) (typhon_fan_speed_level{{port="1"}})' + 'typhon_fan_speed_level{fan_group="outlet"}' ) CLIMATE_FAN_INSIDE_INLET_SERIES = ( - f'max without ({CLIMATE_DEDUP_LABELS}) (typhon_fan_speed_level{{port="2"}})' + 'typhon_fan_speed_level{fan_group="inside_inlet"}' ) CLIMATE_FAN_OUTSIDE_INLET_SERIES = ( - f'max without ({CLIMATE_DEDUP_LABELS}) (typhon_fan_speed_level{{port="3"}})' + 'typhon_fan_speed_level{fan_group="outside_inlet"}' ) CLIMATE_FAN_INTERIOR_SERIES = ( - f'max without ({CLIMATE_DEDUP_LABELS}) (typhon_fan_speed_level{{port="4"}})' -) - - -def with_metric_label(expr, metric): - return f'label_replace(({expr}), "metric", "{metric}", "__name__", ".*")' - - -def with_ups_metric_labels(expr, ups, metric): - return ( - f'label_replace(' - f'label_replace(({expr}), "ups", "{ups}", "__name__", ".*"), ' - f'"metric", "{metric}", "__name__", ".*"' - f')' - ) - - -CLIMATE_CURRENT_ROW_EXPR = " or ".join( - [ - with_metric_label(CLIMATE_TEMP_MAX, "Temp °C"), - with_metric_label(CLIMATE_TEMP_FAHRENHEIT_MAX, "Temp °F"), - with_metric_label(CLIMATE_HUMIDITY_MAX, "Humidity"), - with_metric_label(CLIMATE_PRESSURE_CURRENT, "Pressure"), - ] -) -CLIMATE_FAN_CURRENT_ROW_EXPR = " or ".join( - [ - with_metric_label(f"round({CLIMATE_FAN_OUTLET_CURRENT})", "Outlet"), - with_metric_label(f"round({CLIMATE_FAN_INSIDE_INLET_CURRENT})", "Inlet - In"), - with_metric_label(f"round({CLIMATE_FAN_OUTSIDE_INLET_CURRENT})", "Inlet - Out"), - with_metric_label(f"round({CLIMATE_FAN_INTERIOR_CURRENT})", "Interior"), - ] -) -UPS_CURRENT_ROW_EXPR = " or ".join( - [ - with_ups_metric_labels(ANANKE_UPS_DRAW_WATTS_DB, ANANKE_UPS_DB_NAME, "Draw"), - with_ups_metric_labels(ANANKE_UPS_RUNTIME_DB, ANANKE_UPS_DB_NAME, "Runtime"), - with_ups_metric_labels(ANANKE_UPS_DRAW_WATTS_TETHYS, ANANKE_UPS_TETHYS_NAME, "Draw"), - with_ups_metric_labels(ANANKE_UPS_RUNTIME_TETHYS, ANANKE_UPS_TETHYS_NAME, "Runtime"), - ] + 'typhon_fan_speed_level{fan_group="interior"}' ) POSTGRES_CONN_USED = ( 'label_replace(sum(pg_stat_activity_count), "conn", "used", "__name__", ".*") ' @@ -821,8 +714,6 @@ ONEOFF_JOB_POD_AGE_HOURS = ( '* on(namespace,pod) group_left(phase) ' 'max by (namespace,pod,phase) (kube_pod_status_phase{phase=~"Running|Succeeded"})' ) -GLUE_LAST_SUCCESS_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SUCCESS}[$__range])) / 3600" -GLUE_LAST_SCHEDULE_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SCHEDULE}[$__range])) / 3600" GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"] GPU_NODE_REGEX = "|".join(GPU_NODES) TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))" @@ -910,10 +801,6 @@ def stat_panel( description=None, orientation=None, wide_layout=None, - graph_mode="area", - justify_mode="center", - title_size=None, - value_size=None, ): """Return a Grafana stat panel definition.""" defaults = { @@ -946,8 +833,8 @@ def stat_panel( "fieldConfig": {"defaults": defaults, "overrides": field_overrides or []}, "options": { "colorMode": "value", - "graphMode": graph_mode, - "justifyMode": justify_mode, + "graphMode": "area", + "justifyMode": "center", "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False}, "textMode": text_mode, }, @@ -956,12 +843,6 @@ def stat_panel( panel["options"]["orientation"] = orientation if wide_layout is not None: panel["options"]["wideLayout"] = wide_layout - if title_size is not None or value_size is not None: - panel["options"]["text"] = {} - if title_size is not None: - panel["options"]["text"]["titleSize"] = title_size - if value_size is not None: - panel["options"]["text"]["valueSize"] = value_size if legend and len(panel["targets"]) == 1: panel["targets"][0]["legendFormat"] = legend if instant: @@ -1068,129 +949,6 @@ def timeseries_panel( return panel -def canvas_metric_grid_panel( - panel_id, - title, - grid, - *, - targets, - field_overrides=None, - links=None, - description=None, - metric_size=26, - label_size=12, - color_fields=None, - thresholds=None, -): - """Return a canvas panel with a deterministic 2x2 metric layout.""" - - if color_fields is None: - color_fields = [targets[0]["legendFormat"], targets[1]["legendFormat"], targets[2]["legendFormat"], targets[3]["legendFormat"]] - - def text_element(name, text, left, top): - return { - "type": "text", - "name": name, - "constraint": {"horizontal": "left", "vertical": "top"}, - "placement": { - "left": left, - "top": top, - "width": 146, - "height": 14, - }, - "background": {"color": {"fixed": "transparent"}}, - "border": {"color": {"fixed": "transparent"}}, - "config": { - "align": "center", - "valign": "middle", - "size": label_size, - "color": {"fixed": "text"}, - "text": { - "fixed": text, - }, - }, - "links": [], - } - - def metric_element(name, field, left, top): - return { - "type": "metric-value", - "name": name, - "constraint": {"horizontal": "left", "vertical": "top"}, - "placement": { - "left": left, - "top": top, - "width": 146, - "height": 42, - }, - "background": {"color": {"fixed": "transparent"}}, - "border": {"color": {"fixed": "transparent"}}, - "config": { - "align": "center", - "valign": "middle", - "size": metric_size, - "color": {"field": color_fields[int(name.split()[-1]) - 1], "fixed": "text"}, - "text": { - "mode": "field", - "field": field, - "fixed": "", - }, - }, - "links": [], - } - - panel = { - "id": panel_id, - "type": "canvas", - "title": title, - "datasource": PROM_DS, - "gridPos": grid, - "targets": targets, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": thresholds - or { - "mode": "absolute", - "steps": [ - {"color": "rgba(115, 115, 115, 1)", "value": None}, - {"color": "green", "value": 1}, - ], - }, - "color": {"mode": "thresholds"}, - }, - "overrides": field_overrides or [], - }, - "options": { - "inlineEditing": False, - "showAdvancedTypes": True, - "panZoom": False, - "infinitePan": False, - "root": { - "type": "frame", - "name": f"{title} frame", - "elements": [ - text_element("Cell 1 label", targets[0]["legendFormat"], 12, 18), - metric_element("Cell 1", targets[0]["legendFormat"], 12, 32), - text_element("Cell 2 label", targets[1]["legendFormat"], 168, 18), - metric_element("Cell 2", targets[1]["legendFormat"], 168, 32), - text_element("Cell 3 label", targets[2]["legendFormat"], 12, 76), - metric_element("Cell 3", targets[2]["legendFormat"], 12, 90), - text_element("Cell 4 label", targets[3]["legendFormat"], 168, 76), - metric_element("Cell 4", targets[3]["legendFormat"], 168, 90), - ], - "background": {"color": {"fixed": "transparent"}}, - "border": {"color": {"fixed": "transparent"}}, - }, - }, - } - if links: - panel["links"] = links - if description: - panel["description"] = description - return panel - - def table_panel( panel_id, title, @@ -1204,9 +962,6 @@ def table_panel( filterable=True, footer=None, format=None, - targets=None, - field_overrides=None, - links=None, description=None, ): """Return a Grafana table panel definition.""" @@ -1217,33 +972,27 @@ def table_panel( if footer is not None: panel_options["footer"] = footer field_defaults = {"unit": unit, "custom": {"filterable": filterable}} - target_list = targets if targets is not None else [{"expr": expr, "refId": "A"}] - if instant: - for target in target_list: - target.setdefault("instant", True) + target = {"expr": expr, "refId": "A", **({"instant": True} if instant else {})} if format: - for target in target_list: - target.setdefault("format", format) + target["format"] = format panel = { "id": panel_id, "type": "table", "title": title, "datasource": PROM_DS, "gridPos": grid, - "targets": target_list, - "fieldConfig": {"defaults": field_defaults, "overrides": field_overrides or []}, + "targets": [target], + "fieldConfig": {"defaults": field_defaults, "overrides": []}, "options": panel_options, } if transformations: panel["transformations"] = transformations - if links: - panel["links"] = links if description: panel["description"] = description return panel -def pie_panel(panel_id, title, expr, grid, *, links=None, description=None, legend="{{namespace}}", unit="percent", instant=False): +def pie_panel(panel_id, title, expr, grid, *, links=None, description=None): """Return a pie chart panel with readable namespace labels.""" panel = { "id": panel_id, @@ -1251,10 +1000,10 @@ def pie_panel(panel_id, title, expr, grid, *, links=None, description=None, lege "title": title, "datasource": PROM_DS, "gridPos": grid, - "targets": [{"expr": expr, "refId": "A", "legendFormat": legend, **({"instant": True} if instant else {})}], + "targets": [{"expr": expr, "refId": "A", "legendFormat": "{{namespace}}"}], "fieldConfig": { "defaults": { - "unit": unit, + "unit": "percent", "color": {"mode": "palette-classic"}, }, "overrides": [], @@ -1314,6 +1063,28 @@ def namespace_scope_variable(var_name, label): } +def namespace_scope_links(var_name): + def with_value(value): + encoded = urllib.parse.quote(value, safe="") + params = [] + for other in NAMESPACE_SCOPE_VARS: + if other == var_name: + params.append(f"var-{other}={encoded}") + else: + params.append(f"var-{other}=${{{other}}}") + return "?" + "&".join(params) + + return [ + {"title": "Workload namespaces only", "url": with_value(NAMESPACE_SCOPE_WORKLOAD), "targetBlank": False}, + {"title": "All namespaces", "url": with_value(NAMESPACE_SCOPE_ALL), "targetBlank": False}, + { + "title": "Infrastructure namespaces only", + "url": with_value(NAMESPACE_SCOPE_INFRA), + "targetBlank": False, + }, + ] + + def testing_suite_variable(): options = [ { @@ -1344,28 +1115,6 @@ def testing_suite_variable(): } -def namespace_scope_links(var_name): - def with_value(value): - encoded = urllib.parse.quote(value, safe="") - params = [] - for other in NAMESPACE_SCOPE_VARS: - if other == var_name: - params.append(f"var-{other}={encoded}") - else: - params.append(f"var-{other}=${{{other}}}") - return "?" + "&".join(params) - - return [ - {"title": "Workload namespaces only", "url": with_value(NAMESPACE_SCOPE_WORKLOAD), "targetBlank": False}, - {"title": "All namespaces", "url": with_value(NAMESPACE_SCOPE_ALL), "targetBlank": False}, - { - "title": "Infrastructure namespaces only", - "url": with_value(NAMESPACE_SCOPE_INFRA), - "targetBlank": False, - }, - ] - - def bargauge_panel( panel_id, title, @@ -1449,311 +1198,6 @@ def bargauge_panel( return panel -def _jenkins_weather_status_expr(base_expr, comparator): - return ( - f"({base_expr}) and on(exported_job,job_url,weather_icon) " - f"({JENKINS_BUILD_WEATHER_LAST_STATUS_BY_JOB} {comparator})" - ) - - -def _jenkins_weather_topk_expr(base_expr, topk_n=6): - return f"sort(bottomk({topk_n}, {base_expr}))" - - -def _jenkins_weather_topk_with_status_label_expr(base_expr, topk_n=6): - topk_expr = _jenkins_weather_topk_expr(base_expr, topk_n=topk_n) - success_expr = ( - f'label_replace(({topk_expr}) and on(exported_job,job_url,weather_icon) ' - f'({JENKINS_BUILD_WEATHER_LAST_STATUS_BY_JOB} == 1), ' - '"run_state", "ok", "exported_job", ".*")' - ) - failure_expr = ( - f'label_replace(({topk_expr}) and on(exported_job,job_url,weather_icon) ' - f'({JENKINS_BUILD_WEATHER_LAST_STATUS_BY_JOB} != 1), ' - '"run_state", "bad", "exported_job", ".*")' - ) - return f"sort(({success_expr}) or ({failure_expr}))" - - -def jenkins_weather_bargauge_panel( - panel_id, - title, - expr, - grid, - *, - unit="h", - decimals=2, - sort_order="asc", - limit=12, - thresholds=None, - links=None, - description=None, -): - panel = { - "id": panel_id, - "type": "bargauge", - "title": title, - "datasource": PROM_DS, - "gridPos": grid, - "targets": [ - { - "refId": "A", - "expr": _jenkins_weather_status_expr(expr, "== 1"), - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": True, - }, - { - "refId": "B", - "expr": _jenkins_weather_status_expr(expr, "== 0"), - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": True, - }, - { - "refId": "C", - "expr": _jenkins_weather_status_expr(expr, "== 2"), - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": True, - }, - { - "refId": "D", - "expr": _jenkins_weather_status_expr(expr, "< 0"), - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": True, - }, - ], - "fieldConfig": { - "defaults": { - "unit": unit, - "min": 0, - "thresholds": thresholds - or { - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 6}, - {"color": "orange", "value": 24}, - {"color": "red", "value": 72}, - ], - }, - "links": [ - { - "title": "Open Jenkins job", - "url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/", - "targetBlank": True, - } - ], - }, - "overrides": [ - { - "matcher": {"id": "byFrameRefID", "options": "A"}, - "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}], - }, - { - "matcher": {"id": "byFrameRefID", "options": "B"}, - "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}], - }, - { - "matcher": {"id": "byFrameRefID", "options": "C"}, - "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}], - }, - { - "matcher": {"id": "byFrameRefID", "options": "D"}, - "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}], - }, - ], - }, - "options": { - "displayMode": "basic", - "orientation": "horizontal", - "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False}, - }, - "transformations": [ - {"id": "sortBy", "options": {"fields": ["Value"], "order": sort_order}}, - ], - } - if decimals is not None: - panel["fieldConfig"]["defaults"]["decimals"] = decimals - if limit: - panel["transformations"].append({"id": "limit", "options": {"limit": limit}}) - if links: - panel["links"] = links - if description: - panel["description"] = description - return panel - - -def jenkins_weather_statlist_panel( - panel_id, - title, - expr, - grid, - *, - unit="h", - decimals=2, - sort_order="asc", - limit=12, - title_size=12, - value_size=12, - links=None, - description=None, -): - panel = { - "id": panel_id, - "type": "stat", - "title": title, - "datasource": PROM_DS, - "gridPos": grid, - "targets": [ - { - "refId": "A", - "expr": _jenkins_weather_status_expr(expr, "== 1"), - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": True, - }, - { - "refId": "B", - "expr": _jenkins_weather_status_expr(expr, "== 0"), - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": True, - }, - { - "refId": "C", - "expr": _jenkins_weather_status_expr(expr, "== 2"), - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": True, - }, - { - "refId": "D", - "expr": _jenkins_weather_status_expr(expr, "< 0"), - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": True, - }, - ], - "fieldConfig": { - "defaults": { - "unit": unit, - "decimals": decimals, - "min": 0, - "links": [ - { - "title": "Open Jenkins job", - "url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/", - "targetBlank": True, - } - ], - }, - "overrides": [ - { - "matcher": {"id": "byFrameRefID", "options": "A"}, - "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}], - }, - { - "matcher": {"id": "byFrameRefID", "options": "B"}, - "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}], - }, - { - "matcher": {"id": "byFrameRefID", "options": "C"}, - "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}], - }, - { - "matcher": {"id": "byFrameRefID", "options": "D"}, - "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}], - }, - ], - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "left", - "orientation": "horizontal", - "wideLayout": True, - "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False}, - "textMode": "name_and_value", - "text": {"titleSize": title_size, "valueSize": value_size}, - }, - "transformations": [{"id": "sortBy", "options": {"fields": ["Value"], "order": sort_order}}], - } - if limit: - panel["transformations"].append({"id": "limit", "options": {"limit": limit}}) - if links: - panel["links"] = links - if description: - panel["description"] = description - return panel - - -def jenkins_weather_statlist_topk_panel( - panel_id, - title, - base_expr, - grid, - *, - topk_n=6, - unit="h", - decimals=1, - title_size=11, - value_size=11, - links=None, - description=None, -): - expr = _jenkins_weather_topk_with_status_label_expr(base_expr, topk_n=topk_n) - panel = { - "id": panel_id, - "type": "stat", - "title": title, - "datasource": PROM_DS, - "gridPos": grid, - "targets": [ - { - "refId": "A", - "expr": expr, - "instant": True, - } - ], - "fieldConfig": { - "defaults": { - "unit": unit, - "decimals": decimals, - "min": 0, - "displayName": "${__field.labels.weather_icon} ${__field.labels.exported_job}", - "links": [ - { - "title": "Open Jenkins job", - "url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/", - "targetBlank": True, - } - ], - }, - "overrides": [ - { - "matcher": {"id": "byRegexp", "options": '.*run_state="ok".*'}, - "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}], - }, - { - "matcher": {"id": "byRegexp", "options": '.*run_state="bad".*'}, - "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}], - }, - ], - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "left", - "orientation": "horizontal", - "wideLayout": True, - "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False}, - "textMode": "name_and_value", - "text": {"titleSize": title_size, "valueSize": value_size}, - }, - "transformations": [{"id": "sortBy", "options": {"fields": ["Value"], "order": "asc"}}], - } - if links: - panel["links"] = links - if description: - panel["description"] = description - return panel - - def text_panel(panel_id, title, content, grid): return { "id": panel_id, @@ -1765,8 +1209,27 @@ def text_panel(panel_id, title, content, grid): } +DASHBOARD_LINK_TITLES = { + "atlas-overview": "Open Atlas Overview", + "atlas-pods": "Open Atlas Pods", + "atlas-nodes": "Open Atlas Nodes", + "atlas-storage": "Open Atlas Storage", + "atlas-network": "Open Atlas Network", + "atlas-mail": "Open Atlas Mail", + "atlas-jobs": "Open Atlas Testing", + "atlas-power": "Open Atlas Power", + "atlas-gpu": "Open Atlas GPU", +} + + def link_to(uid): - return [{"title": f"Open {uid} dashboard", "url": f"/d/{uid}", "targetBlank": True}] + return [ + { + "title": DASHBOARD_LINK_TITLES.get(uid, f"Open {uid} dashboard"), + "url": f"/d/{uid}", + "targetBlank": True, + } + ] # --------------------------------------------------------------------------- @@ -1997,52 +1460,63 @@ def build_overview(): {"color": "green", "value": 98}, ], } + status_mapping = [ + { + "type": "value", + "options": { + "0": {"text": "⚡ Charging"}, + "1": {"text": "🔋 Discharging"}, + }, + } + ] + panels.append( stat_panel( 40, - f"{ANANKE_UPS_DB_NAME} UPS Current", - ( - 'label_replace(' - + ANANKE_UPS_DRAW_WATTS_DB - + ', "metric", "Draw", "__name__", ".*") or label_replace(' - + ANANKE_UPS_RUNTIME_DB - + ', "metric", "Runtime", "__name__", ".*")' - ), - {"h": 3, "w": 6, "x": 0, "y": 7}, + "UPS Current Load", + None, + {"h": 6, "w": 4, "x": 0, "y": 12}, + unit="none", + decimals=1, text_mode="name_and_value", - legend="{{metric}}", - instant=True, - title_size=14, - value_size=30, - field_overrides=[ - {"matcher": {"id": "byName", "options": "Draw"}, "properties": [{"id": "unit", "value": "watt"}]}, - {"matcher": {"id": "byName", "options": "Runtime"}, "properties": [{"id": "unit", "value": "s"}]}, + targets=[ + {"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Draw (W)", "instant": True}, + {"refId": "B", "expr": ANANKE_UPS_RUNTIME_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Discharge", "instant": True}, + {"refId": "C", "expr": ANANKE_UPS_ON_BATTERY_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Status", "instant": True}, + {"refId": "D", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)", "instant": True}, + {"refId": "E", "expr": ANANKE_UPS_RUNTIME_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Discharge", "instant": True}, + {"refId": "F", "expr": ANANKE_UPS_ON_BATTERY_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Status", "instant": True}, ], - links=link_to("atlas-power"), - ) - ) - panels.append( - stat_panel( - 144, - f"{ANANKE_UPS_TETHYS_NAME} UPS Current", - ( - 'label_replace(' - + ANANKE_UPS_DRAW_WATTS_TETHYS - + ', "metric", "Draw", "__name__", ".*") or label_replace(' - + ANANKE_UPS_RUNTIME_TETHYS - + ', "metric", "Runtime", "__name__", ".*")' - ), - {"h": 3, "w": 6, "x": 0, "y": 10}, - text_mode="name_and_value", - legend="{{metric}}", - instant=True, - title_size=14, - value_size=30, field_overrides=[ - {"matcher": {"id": "byName", "options": "Draw"}, "properties": [{"id": "unit", "value": "watt"}]}, - {"matcher": {"id": "byName", "options": "Runtime"}, "properties": [{"id": "unit", "value": "s"}]}, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Draw (W)"}, + "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], + }, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)"}, + "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], + }, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Discharge"}, + "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], + }, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Discharge"}, + "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], + }, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Status"}, + "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], + }, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Status"}, + "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], + }, ], + orientation="horizontal", + wide_layout=True, links=link_to("atlas-power"), + description="Per-UPS live snapshot: current draw, discharge, and charging/discharging status.", ) ) panels.append( @@ -2050,176 +1524,96 @@ def build_overview(): 41, "UPS History (Power Draw)", None, - {"h": 6, "w": 6, "x": 6, "y": 7}, + {"h": 6, "w": 4, "x": 4, "y": 12}, unit="watt", targets=[ {"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB_SERIES, "legendFormat": ANANKE_UPS_DB_NAME}, {"refId": "B", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS_SERIES, "legendFormat": ANANKE_UPS_TETHYS_NAME}, {"refId": "C", "expr": ANANKE_UPS_DRAW_WATTS_TOTAL_SERIES, "legendFormat": "combined"}, ], - legend_display="table", - legend_placement="right", + legend_display="list", + legend_placement="bottom", links=link_to("atlas-power"), ) ) panels.append( stat_panel( 42, - "Current Enclosure Temperature", - ( - 'label_replace(' - + CLIMATE_TEMP_MAX - + ', "metric", "°C", "__name__", ".*") or label_replace(' - + CLIMATE_TEMP_FAHRENHEIT_MAX - + ', "metric", "°F", "__name__", ".*")' - ), - {"h": 3, "w": 6, "x": 0, "y": 13}, - text_mode="name_and_value", - legend="{{metric}}", - instant=True, - title_size=14, - value_size=30, + "Current Climate", + None, + {"h": 6, "w": 4, "x": 8, "y": 12}, + unit="none", + decimals=2, + text_mode="value", + targets=[ + {"refId": "A", "expr": CLIMATE_TEMP_MAX, "legendFormat": "Tent Temp (°C)", "instant": True}, + {"refId": "B", "expr": CLIMATE_PRESSURE_CURRENT, "legendFormat": "Tent VPD (kPa)", "instant": True}, + {"refId": "C", "expr": CLIMATE_HUMIDITY_MAX, "legendFormat": "Tent RH (%)", "instant": True}, + {"refId": "D", "expr": CLIMATE_DEWPOINT_CURRENT, "legendFormat": "Dew Point (°C)", "instant": True}, + ], field_overrides=[ - {"matcher": {"id": "byName", "options": "°C"}, "properties": [{"id": "unit", "value": "celsius"}]}, - {"matcher": {"id": "byName", "options": "°F"}, "properties": [{"id": "unit", "value": "fahrenheit"}]}, + {"matcher": {"id": "byName", "options": "Tent Temp (°C)"}, "properties": [{"id": "unit", "value": "celsius"}]}, + {"matcher": {"id": "byName", "options": "Tent VPD (kPa)"}, "properties": [{"id": "unit", "value": "suffix:kPa"}]}, + {"matcher": {"id": "byName", "options": "Tent RH (%)"}, "properties": [{"id": "unit", "value": "percent"}]}, + {"matcher": {"id": "byName", "options": "Dew Point (°C)"}, "properties": [{"id": "unit", "value": "celsius"}]}, ], links=link_to("atlas-power"), + description="Current tent temperature, humidity, VPD, and dew point.", + orientation="horizontal", + wide_layout=True, ) ) panels.append( - stat_panel( - 143, - "Current Enclosure Climate", - ( - 'label_replace(' - + CLIMATE_HUMIDITY_MAX - + ', "metric", "%RH", "__name__", ".*") or label_replace(' - + CLIMATE_PRESSURE_CURRENT - + ', "metric", "kPa", "__name__", ".*")' - ), - {"h": 3, "w": 6, "x": 0, "y": 16}, - text_mode="name_and_value", - legend="{{metric}}", - instant=True, - title_size=14, - value_size=30, - field_overrides=[ - {"matcher": {"id": "byName", "options": "%RH"}, "properties": [{"id": "unit", "value": "suffix:%RH"}]}, - {"matcher": {"id": "byName", "options": "kPa"}, "properties": [{"id": "unit", "value": "suffix:kPa"}]}, - ], - links=link_to("atlas-power"), - ) - ) - climate_history_panel = timeseries_panel( + timeseries_panel( 43, - "Enclosure Climate History", + "Climate History", None, - {"h": 6, "w": 6, "x": 6, "y": 13}, - unit="none", + {"h": 6, "w": 4, "x": 12, "y": 12}, + unit="celsius", targets=[ - {"refId": "A", "expr": CLIMATE_TEMP_SERIES, "legendFormat": "C"}, - {"refId": "B", "expr": CLIMATE_HUMIDITY_SERIES, "legendFormat": "RH"}, - {"refId": "C", "expr": CLIMATE_PRESSURE_SERIES, "legendFormat": "P"}, - {"refId": "D", "expr": CLIMATE_TEMP_MIN_BOUND_SERIES, "legendFormat": "C bound min"}, - {"refId": "E", "expr": CLIMATE_TEMP_MAX_BOUND_SERIES, "legendFormat": "C bound max"}, - {"refId": "F", "expr": CLIMATE_HUMIDITY_MIN_BOUND_SERIES, "legendFormat": "RH bound min"}, - {"refId": "G", "expr": CLIMATE_HUMIDITY_MAX_BOUND_SERIES, "legendFormat": "RH bound max"}, - {"refId": "H", "expr": CLIMATE_PRESSURE_MIN_BOUND_SERIES, "legendFormat": "P bound min"}, - {"refId": "I", "expr": CLIMATE_PRESSURE_MAX_BOUND_SERIES, "legendFormat": "P bound max"}, + {"refId": "A", "expr": CLIMATE_TEMP_SERIES, "legendFormat": "Temperature (°C)"}, + {"refId": "B", "expr": CLIMATE_HUMIDITY_SERIES, "legendFormat": "Humidity (%)"}, + {"refId": "C", "expr": CLIMATE_PRESSURE_SERIES, "legendFormat": "VPD (kPa)"}, + {"refId": "D", "expr": CLIMATE_DEWPOINT_SERIES, "legendFormat": "Dew Point (°C)"}, ], field_overrides=[ { - "matcher": {"id": "byName", "options": "C"}, + "matcher": {"id": "byName", "options": "Humidity (%)"}, "properties": [ - {"id": "unit", "value": "suffix:°C"}, + {"id": "unit", "value": "percent"}, + ], + }, + { + "matcher": {"id": "byName", "options": "VPD (kPa)"}, + "properties": [ + {"id": "unit", "value": "none"}, + {"id": "custom.axisPlacement", "value": "right"}, + {"id": "custom.axisLabel", "value": "kPa"}, {"id": "decimals", "value": 2}, - {"id": "custom.axisPlacement", "value": "left"}, - {"id": "custom.axisCenteredZero", "value": False}, ], - }, - { - "matcher": {"id": "byRegexp", "options": "C bound .*"}, - "properties": [ - {"id": "unit", "value": "suffix:°C"}, - {"id": "custom.axisPlacement", "value": "left"}, - {"id": "custom.axisCenteredZero", "value": False}, - {"id": "custom.hideFrom", "value": {"legend": True, "tooltip": True, "viz": False}}, - {"id": "custom.lineWidth", "value": 0}, - {"id": "custom.fillOpacity", "value": 0}, - {"id": "custom.showPoints", "value": "never"}, - {"id": "color", "value": {"mode": "fixed", "fixedColor": "transparent"}}, - ], - }, - { - "matcher": {"id": "byName", "options": "RH"}, - "properties": [ - {"id": "unit", "value": "suffix:%"}, - {"id": "decimals", "value": 2}, - {"id": "custom.axisPlacement", "value": "right"}, - {"id": "custom.axisCenteredZero", "value": False}, - ], - }, - { - "matcher": {"id": "byRegexp", "options": "RH bound .*"}, - "properties": [ - {"id": "unit", "value": "suffix:%"}, - {"id": "custom.axisPlacement", "value": "right"}, - {"id": "custom.axisCenteredZero", "value": False}, - {"id": "custom.hideFrom", "value": {"legend": True, "tooltip": True, "viz": False}}, - {"id": "custom.lineWidth", "value": 0}, - {"id": "custom.fillOpacity", "value": 0}, - {"id": "custom.showPoints", "value": "never"}, - {"id": "color", "value": {"mode": "fixed", "fixedColor": "transparent"}}, - ], - }, - { - "matcher": {"id": "byName", "options": "P"}, - "properties": [ - {"id": "unit", "value": "suffix:kPa"}, - {"id": "custom.axisPlacement", "value": "right"}, - {"id": "decimals", "value": 2}, - {"id": "custom.axisCenteredZero", "value": False}, - ], - }, - { - "matcher": {"id": "byRegexp", "options": "P bound .*"}, - "properties": [ - {"id": "unit", "value": "suffix:kPa"}, - {"id": "custom.axisPlacement", "value": "right"}, - {"id": "custom.axisCenteredZero", "value": False}, - {"id": "custom.hideFrom", "value": {"legend": True, "tooltip": True, "viz": False}}, - {"id": "custom.lineWidth", "value": 0}, - {"id": "custom.fillOpacity", "value": 0}, - {"id": "custom.showPoints", "value": "never"}, - {"id": "color", "value": {"mode": "fixed", "fixedColor": "transparent"}}, - ], - }, + } ], legend_display="list", legend_placement="bottom", links=link_to("atlas-power"), - description="Temperature on left axis, humidity and pressure on right axis with dynamic bound series so small swings remain visible.", + description="Two-axis chart: tent temperature/humidity/dew point (left axis) and VPD in kPa (right axis).", ) - climate_history_panel["fieldConfig"]["defaults"]["custom"] = { - "drawStyle": "line", - "lineInterpolation": "linear", - "lineWidth": 2, - "fillOpacity": 10, - "showPoints": "never", - "spanNulls": True, - } - panels.append(climate_history_panel) + ) panels.append( stat_panel( 140, "Fan Activity", - CLIMATE_FAN_CURRENT_ROW_EXPR, - {"h": 6, "w": 6, "x": 12, "y": 13}, + None, + {"h": 6, "w": 4, "x": 16, "y": 12}, unit="none", decimals=0, text_mode="name_and_value", - legend="{{metric}}", - instant=True, + targets=[ + {"refId": "A", "expr": f"round({CLIMATE_FAN_OUTLET_CURRENT})", "legendFormat": "Inside Outlet", "instant": True}, + {"refId": "B", "expr": f"round({CLIMATE_FAN_INSIDE_INLET_CURRENT})", "legendFormat": "Inside Inlet", "instant": True}, + {"refId": "C", "expr": f"round({CLIMATE_FAN_OUTSIDE_INLET_CURRENT})", "legendFormat": "Outside Inlet", "instant": True}, + {"refId": "D", "expr": f"round({CLIMATE_FAN_INTERIOR_CURRENT})", "legendFormat": "Interior Fans", "instant": True}, + ], thresholds={ "mode": "absolute", "steps": [ @@ -2228,16 +1622,9 @@ def build_overview(): {"color": "red", "value": 9}, ], }, - field_overrides=[ - {"matcher": {"id": "byName", "options": "Outlet"}, "properties": [{"id": "decimals", "value": 0}]}, - {"matcher": {"id": "byName", "options": "Inlet - In"}, "properties": [{"id": "decimals", "value": 0}]}, - {"matcher": {"id": "byName", "options": "Inlet - Out"}, "properties": [{"id": "decimals", "value": 0}]}, - {"matcher": {"id": "byName", "options": "Interior"}, "properties": [{"id": "decimals", "value": 0}]}, - ], + orientation="horizontal", + wide_layout=True, links=link_to("atlas-power"), - orientation="vertical", - wide_layout=False, - value_size=26, ) ) panels.append( @@ -2245,17 +1632,17 @@ def build_overview(): 141, "Fan History (0-10)", None, - {"h": 6, "w": 6, "x": 18, "y": 13}, + {"h": 6, "w": 4, "x": 20, "y": 12}, unit="none", max_value=10, targets=[ - {"refId": "A", "expr": CLIMATE_FAN_OUTLET_SERIES, "legendFormat": "Outlet"}, - {"refId": "B", "expr": CLIMATE_FAN_INSIDE_INLET_SERIES, "legendFormat": "Inlet - Inside"}, - {"refId": "C", "expr": CLIMATE_FAN_OUTSIDE_INLET_SERIES, "legendFormat": "Inlet - Outside"}, - {"refId": "D", "expr": CLIMATE_FAN_INTERIOR_SERIES, "legendFormat": "Interior"}, + {"refId": "A", "expr": CLIMATE_FAN_OUTLET_SERIES, "legendFormat": "Inside Outlet"}, + {"refId": "B", "expr": CLIMATE_FAN_INSIDE_INLET_SERIES, "legendFormat": "Inside Inlet"}, + {"refId": "C", "expr": CLIMATE_FAN_OUTSIDE_INLET_SERIES, "legendFormat": "Outside Inlet"}, + {"refId": "D", "expr": CLIMATE_FAN_INTERIOR_SERIES, "legendFormat": "Interior Fans"}, ], - legend_display="table", - legend_placement="right", + legend_display="list", + legend_placement="bottom", links=link_to("atlas-power"), ) ) @@ -2265,7 +1652,7 @@ def build_overview(): 44, "One-off Job Pods (age hours)", ONEOFF_JOB_POD_AGE_HOURS, - {"h": 5, "w": 8, "x": 0, "y": 32}, + {"h": 5, "w": 6, "x": 0, "y": 7}, unit="h", instant=True, legend="{{namespace}}/{{pod}}", @@ -2281,7 +1668,7 @@ def build_overview(): "type": "timeseries", "title": "Ariadne Attempts / Failures", "datasource": PROM_DS, - "gridPos": {"h": 6, "w": 6, "x": 12, "y": 7}, + "gridPos": {"h": 5, "w": 6, "x": 6, "y": 7}, "targets": [ {"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"}, {"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"}, @@ -2314,13 +1701,13 @@ def build_overview(): 46, "Platform Test Success Rate", None, - {"h": 6, "w": 6, "x": 18, "y": 7}, + {"h": 5, "w": 6, "x": 12, "y": 7}, unit="percent", targets=PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS, legend_display="table", legend_placement="right", legend_calcs=["lastNotNull"], - links=link_to("atlas-testing"), + links=link_to("atlas-jobs"), ) test_success["fieldConfig"]["defaults"]["min"] = 0 test_success["fieldConfig"]["defaults"]["max"] = 100 @@ -2338,50 +1725,12 @@ def build_overview(): "Per-run interval pass points (0-100) for each software suite over the last 7 days. Points are connected to show trend; missing-run intervals are ignored." ) panels.append(test_success) - panels.append( - jenkins_weather_statlist_topk_panel( - 142, - "Jenkins Last Success (h, newest first)", - JENKINS_BUILD_WEATHER_LAST_SUCCESS_AGE_HOURS_BY_JOB, - {"h": 5, "w": 4, "x": 8, "y": 32}, - topk_n=6, - unit="h", - decimals=1, - title_size=11, - value_size=11, - links=link_to("atlas-jobs"), - description=( - "Top 6 most recent Jenkins successes by age (newest first). " - "Green means last run succeeded; red means last run did not succeed. " - "Use Atlas Jobs for the full list." - ), - ) - ) - panels.append( - jenkins_weather_statlist_topk_panel( - 243, - "Jenkins Last Failure (h, newest first)", - JENKINS_BUILD_WEATHER_LAST_FAILURE_AGE_HOURS_BY_JOB, - {"h": 5, "w": 4, "x": 12, "y": 32}, - topk_n=6, - unit="h", - decimals=1, - title_size=11, - value_size=11, - links=link_to("atlas-jobs"), - description=( - "Top 6 most recent Jenkins failures by age (newest first). " - "Green means last run succeeded; red means last run did not succeed. " - "Use Atlas Jobs for the full list." - ), - ) - ) panels.append( bargauge_panel( 47, "PVC Backup Health / Age", PVC_BACKUP_AGE_HOURS_BY_PVC, - {"h": 5, "w": 8, "x": 16, "y": 32}, + {"h": 5, "w": 6, "x": 18, "y": 7}, unit="h", instant=True, legend="{{namespace}}/{{pvc}}", @@ -2399,10 +1748,7 @@ def build_overview(): ) panels[-1]["links"] = link_to("atlas-storage") panels[-1]["description"] = ( - "Backup age in hours computed from last-success timestamps for restic-managed PVCs " - "(nightly target: <=20h green, <40h yellow, <50h orange, >=50h red). " - "PVCs that have backup history but currently no successful backup (missing/no_completed/error) " - "are pinned to 999h for visibility." + "Oldest successful backup age in hours by PVC with nightly cadence thresholds (green <=20h, yellow <40h, orange <50h, red >=50h). PVCs with missing or unhealthy backup state are forced to 999h so critical bars stay visible." ) panels.append( @@ -2410,7 +1756,7 @@ def build_overview(): 30, "Mail Sent (1d)", 'max(postmark_outbound_sent{window="1d"})', - {"h": 2, "w": 4, "x": 0, "y": 19}, + {"h": 2, "w": 4, "x": 0, "y": 18}, unit="none", links=link_to("atlas-mail"), ) @@ -2421,7 +1767,7 @@ def build_overview(): "type": "stat", "title": "Mail Bounces (1d)", "datasource": PROM_DS, - "gridPos": {"h": 2, "w": 4, "x": 8, "y": 19}, + "gridPos": {"h": 2, "w": 4, "x": 8, "y": 18}, "targets": [ { "expr": 'max(postmark_outbound_bounce_rate{window="1d"})', @@ -2467,7 +1813,7 @@ def build_overview(): 32, "Mail Success Rate (1d)", 'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)', - {"h": 2, "w": 4, "x": 4, "y": 19}, + {"h": 2, "w": 4, "x": 4, "y": 18}, unit="percent", thresholds=mail_success_thresholds, decimals=1, @@ -2479,7 +1825,7 @@ def build_overview(): 33, "Mail Limit Used (30d)", "max(postmark_sending_limit_used_percent)", - {"h": 2, "w": 4, "x": 12, "y": 19}, + {"h": 2, "w": 4, "x": 12, "y": 18}, unit="percent", thresholds=mail_limit_thresholds, decimals=1, @@ -2491,7 +1837,7 @@ def build_overview(): 34, "Postgres Connections Used", POSTGRES_CONN_USED, - {"h": 2, "w": 4, "x": 16, "y": 19}, + {"h": 2, "w": 4, "x": 16, "y": 18}, decimals=0, text_mode="name_and_value", legend="{{conn}}", @@ -2503,7 +1849,7 @@ def build_overview(): 35, "Postgres Hottest Connections", POSTGRES_CONN_HOTTEST, - {"h": 2, "w": 4, "x": 20, "y": 19}, + {"h": 2, "w": 4, "x": 20, "y": 18}, unit="none", decimals=0, text_mode="name_and_value", @@ -2553,7 +1899,7 @@ def build_overview(): 14, "Worker Node CPU", node_cpu_expr(worker_filter), - {"h": 12, "w": 12, "x": 0, "y": 44}, + {"h": 12, "w": 12, "x": 0, "y": 39}, unit="percent", legend="{{node}}", legend_calcs=["last"], @@ -2567,7 +1913,7 @@ def build_overview(): 15, "Worker Node RAM", node_mem_expr(worker_filter), - {"h": 12, "w": 12, "x": 12, "y": 44}, + {"h": 12, "w": 12, "x": 12, "y": 39}, unit="percent", legend="{{node}}", legend_calcs=["last"], @@ -2582,7 +1928,7 @@ def build_overview(): 16, "Control plane CPU", node_cpu_expr(CONTROL_ALL_REGEX), - {"h": 10, "w": 12, "x": 0, "y": 56}, + {"h": 10, "w": 12, "x": 0, "y": 51}, unit="percent", legend="{{node}}", legend_display="table", @@ -2594,7 +1940,7 @@ def build_overview(): 17, "Control plane RAM", node_mem_expr(CONTROL_ALL_REGEX), - {"h": 10, "w": 12, "x": 12, "y": 56}, + {"h": 10, "w": 12, "x": 12, "y": 51}, unit="percent", legend="{{node}}", legend_display="table", @@ -2607,7 +1953,7 @@ def build_overview(): 28, "Node Pod Share", '(sum(kube_pod_info{pod!="" , node!=""}) by (node) / clamp_min(sum(kube_pod_info{pod!="" , node!=""}), 1)) * 100', - {"h": 10, "w": 12, "x": 0, "y": 66}, + {"h": 10, "w": 12, "x": 0, "y": 61}, ) ) panels.append( @@ -2615,7 +1961,7 @@ def build_overview(): 29, "Top Nodes by Pod Count", 'topk(12, sum(kube_pod_info{pod!="" , node!=""}) by (node))', - {"h": 10, "w": 12, "x": 12, "y": 66}, + {"h": 10, "w": 12, "x": 12, "y": 61}, unit="none", limit=12, decimals=0, @@ -2637,7 +1983,7 @@ def build_overview(): 18, "Cluster Ingress Throughput", NET_INGRESS_EXPR, - {"h": 7, "w": 8, "x": 0, "y": 37}, + {"h": 7, "w": 8, "x": 0, "y": 32}, unit="Bps", legend="Ingress (Traefik)", legend_display="list", @@ -2650,7 +1996,7 @@ def build_overview(): 19, "Cluster Egress Throughput", NET_EGRESS_EXPR, - {"h": 7, "w": 8, "x": 8, "y": 37}, + {"h": 7, "w": 8, "x": 8, "y": 32}, unit="Bps", legend="Egress (Traefik)", legend_display="list", @@ -2663,7 +2009,7 @@ def build_overview(): 20, "Intra-Cluster Throughput", NET_INTERNAL_EXPR, - {"h": 7, "w": 8, "x": 16, "y": 37}, + {"h": 7, "w": 8, "x": 16, "y": 32}, unit="Bps", legend="Internal traffic", legend_display="list", @@ -2677,7 +2023,7 @@ def build_overview(): 21, "Root Filesystem Usage", root_usage_expr(), - {"h": 16, "w": 12, "x": 0, "y": 76}, + {"h": 16, "w": 12, "x": 0, "y": 71}, unit="percent", legend="{{node}}", legend_calcs=["last"], @@ -2692,7 +2038,7 @@ def build_overview(): 22, "Nodes Closest to Full Astraios Disks", astraios_usage_expr(), - {"h": 16, "w": 12, "x": 12, "y": 76}, + {"h": 16, "w": 12, "x": 12, "y": 71}, unit="percent", legend="{{node}}", legend_calcs=["last"], @@ -2721,7 +2067,13 @@ def build_overview(): }, "time": {"from": "now-1h", "to": "now"}, "refresh": "1m", - "links": [], + "links": [ + { + "title": "Atlas Testing (Internal)", + "url": "/d/atlas-jobs", + "targetBlank": False, + } + ], } @@ -3605,514 +2957,100 @@ def build_mail_dashboard(): def build_jobs_dashboard(): - panels = [] - age_thresholds = { - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 6}, - {"color": "orange", "value": 24}, - {"color": "red", "value": 48}, - ], - } - old_age_thresholds = { - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 24}, - {"color": "orange", "value": 72}, - {"color": "red", "value": 168}, - ], - } - recent_error_thresholds = { - "mode": "absolute", - "steps": [ - {"color": "red", "value": None}, - {"color": "orange", "value": 1}, - {"color": "yellow", "value": 6}, - {"color": "green", "value": 24}, - ], - } - task_error_thresholds = { - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 1}, - {"color": "orange", "value": 3}, - {"color": "red", "value": 5}, - ], - } - count_thresholds = { - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 1}, - {"color": "orange", "value": 2}, - {"color": "red", "value": 3}, - ], - } - schedule_status_thresholds = { - "mode": "absolute", - "steps": [ - {"color": "red", "value": None}, - {"color": "yellow", "value": 0.5}, - {"color": "green", "value": 1}, - ], - } - - panels.append( - stat_panel( - 1, - "Schedule Metrics Exported", - ARIADNE_SCHEDULE_SIGNAL_COUNT, - {"h": 4, "w": 4, "x": 0, "y": 0}, - unit="none", - instant=True, - thresholds={ - "mode": "absolute", - "steps": [ - {"color": "red", "value": None}, - {"color": "green", "value": 1}, - ], - }, - ) - ) - panels.append( - stat_panel( - 2, - "Schedule Tasks Stale (>36h)", - ARIADNE_SCHEDULE_STALE_COUNT, - {"h": 4, "w": 4, "x": 4, "y": 0}, - unit="none", - thresholds=count_thresholds, - ) - ) - panels.append( - stat_panel( - 3, - "Schedule Tasks Missing Success", - ARIADNE_SCHEDULE_MISSING_SUCCESS_COUNT, - {"h": 4, "w": 4, "x": 8, "y": 0}, - unit="none", - thresholds=count_thresholds, - ) - ) - panels.append( - stat_panel( - 4, - "Schedule Tasks Failed Last Run", - ARIADNE_SCHEDULE_FAILED_LAST_COUNT, - {"h": 4, "w": 4, "x": 12, "y": 0}, - unit="none", - thresholds=count_thresholds, - ) - ) - panels.append( - stat_panel( - 5, - "Glue Jobs Stale (>36h)", - GLUE_STALE_COUNT, - {"h": 4, "w": 4, "x": 16, "y": 0}, - unit="none", - thresholds=count_thresholds, - ) - ) - panels.append( - stat_panel( - 6, - "Jenkins Workspace PV Backlog", - JENKINS_WORKSPACE_PV_STALE_COUNT, - {"h": 4, "w": 4, "x": 20, "y": 0}, - unit="none", - thresholds={ - "mode": "absolute", - "steps": [ - {"color": "green", "value": None}, - {"color": "yellow", "value": 1}, - {"color": "orange", "value": 10}, - {"color": "red", "value": 25}, - ], - }, - ) - ) - schedule_list_panel = table_panel( - 7, - "Ariadne Schedules: Last Success (h, newest first)", - ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS_FALLBACK, - {"h": 8, "w": 12, "x": 0, "y": 4}, - unit="h", - transformations=[ - {"id": "labelsToFields", "options": {}}, - {"id": "sortBy", "options": {"fields": ["Value"], "order": "asc"}}, - ], - instant=True, - ) - schedule_list_panel["description"] = "Primary schedule inventory ordered by recency so fresh jobs stay at the top." - panels.append(schedule_list_panel) - panels.append( - bargauge_panel( - 8, - "Ariadne Schedule Last Error (hours ago)", - ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS_FALLBACK, - {"h": 8, "w": 12, "x": 12, "y": 4}, - unit="h", - instant=True, - legend="{{task}}", - sort_order="asc", - thresholds=recent_error_thresholds, - decimals=2, - ) - ) - status_panel = bargauge_panel( - 9, - "Ariadne Schedule Last Status", - ARIADNE_SCHEDULE_LAST_STATUS_FALLBACK, - {"h": 8, "w": 8, "x": 0, "y": 12}, - unit="none", - instant=True, - legend="{{task}}", - sort_order="asc", - thresholds=schedule_status_thresholds, - decimals=0, - ) - status_panel["description"] = "1 means the last run was ok. 0 means the last run ended in error." - status_panel["fieldConfig"]["defaults"]["mappings"] = [ - { - "type": "value", - "options": { - "-1": {"text": "pending"}, - "0": {"text": "error"}, - "1": {"text": "ok"}, - }, - } - ] - panels.append(status_panel) - schedule_runs_panel = bargauge_panel( - 10, - "Ariadne Schedule Runs (range)", - ARIADNE_SCHEDULE_RUNS_RANGE_FALLBACK, - {"h": 8, "w": 8, "x": 8, "y": 12}, - unit="none", - instant=True, - legend="{{task}}", - thresholds={"mode": "absolute", "steps": [{"color": "green", "value": None}]}, - ) - schedule_runs_panel["description"] = "Number of runs by schedule task over the selected dashboard time range." - panels.append(schedule_runs_panel) - schedule_errors_panel = bargauge_panel( - 11, - "Ariadne Schedule Errors (range)", - ARIADNE_SCHEDULE_ERRORS_RANGE_FALLBACK, - {"h": 8, "w": 8, "x": 16, "y": 12}, - unit="none", - instant=True, - legend="{{task}}", - thresholds=task_error_thresholds, - ) - schedule_errors_panel["description"] = "Error run count by schedule task over the selected dashboard time range." - panels.append(schedule_errors_panel) - panels.append( - { - "id": 12, - "type": "timeseries", - "title": "Ariadne Attempts / Failures", - "datasource": PROM_DS, - "gridPos": {"h": 7, "w": 12, "x": 0, "y": 20}, - "targets": [ - {"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"}, - {"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"}, - ], - "fieldConfig": { - "defaults": {"unit": "none"}, - "overrides": [ - { - "matcher": {"id": "byName", "options": "Attempts"}, - "properties": [ - {"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}} - ], - }, - { - "matcher": {"id": "byName", "options": "Failures"}, - "properties": [ - {"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}} - ], - }, - ], - }, - "options": { - "legend": {"displayMode": "table", "placement": "right"}, - "tooltip": {"mode": "multi"}, - }, - } - ) - panels.append( - bargauge_panel( - 13, - "One-off Job Pods (age hours)", - ONEOFF_JOB_POD_AGE_HOURS, - {"h": 7, "w": 12, "x": 12, "y": 20}, - unit="h", - instant=True, - legend="{{namespace}}/{{pod}}", - thresholds=age_thresholds, - limit=12, - decimals=2, - ) - ) - panels.append( - bargauge_panel( - 14, - "Glue Jobs Last Success (hours ago)", - GLUE_LAST_SUCCESS_RANGE_HOURS, - {"h": 6, "w": 12, "x": 0, "y": 27}, - unit="h", - instant=True, - legend="{{namespace}}/{{cronjob}}", - thresholds=age_thresholds, - decimals=2, - ) - ) - panels.append( - bargauge_panel( - 15, - "Glue Jobs Last Schedule (hours ago)", - GLUE_LAST_SCHEDULE_RANGE_HOURS, - {"h": 6, "w": 12, "x": 12, "y": 27}, - unit="h", - instant=True, - legend="{{namespace}}/{{cronjob}}", - thresholds=age_thresholds, - decimals=2, - ) - ) - panels.append( - stat_panel( - 16, - "Jenkins Cleanup Signal Present", - JENKINS_CLEANUP_SIGNAL_COUNT, - {"h": 4, "w": 4, "x": 0, "y": 33}, - unit="none", - text_mode="name_and_value", - legend="Signal", - instant=True, - thresholds={ - "mode": "absolute", - "steps": [ - {"color": "red", "value": None}, - {"color": "green", "value": 1}, - ], - }, - orientation="vertical", - wide_layout=True, - graph_mode="none", - justify_mode="auto", - ) - ) - panels.append( - stat_panel( - 17, - "Jenkins Cleanup Last Run Age (h)", - JENKINS_CLEANUP_LAST_RUN_AGE_HOURS, - {"h": 4, "w": 4, "x": 4, "y": 33}, - unit="h", - decimals=2, - text_mode="name_and_value", - legend="Last Run", - instant=True, - thresholds=age_thresholds, - orientation="vertical", - wide_layout=True, - graph_mode="none", - justify_mode="auto", - ) - ) - panels.append( - stat_panel( - 18, - "Jenkins Cleanup Last Success Age (h)", - JENKINS_CLEANUP_LAST_SUCCESS_AGE_HOURS, - {"h": 4, "w": 4, "x": 8, "y": 33}, - unit="h", - decimals=2, - text_mode="name_and_value", - legend="Last Success", - instant=True, - thresholds=age_thresholds, - orientation="vertical", - wide_layout=True, - graph_mode="none", - justify_mode="auto", - ) - ) - panels.append( - stat_panel( - 19, - "Jenkins Cleanup Planned (last run)", - JENKINS_CLEANUP_LAST_PLANNED, - {"h": 4, "w": 4, "x": 12, "y": 33}, - unit="none", - text_mode="name_and_value", - legend="Planned", - instant=True, - orientation="vertical", - wide_layout=True, - graph_mode="none", - justify_mode="auto", - ) - ) - panels.append( - stat_panel( - 20, - "Jenkins Cleanup Deleted (last run)", - JENKINS_CLEANUP_LAST_DELETED, - {"h": 4, "w": 4, "x": 16, "y": 33}, - unit="none", - text_mode="name_and_value", - legend="Deleted", - instant=True, - orientation="vertical", - wide_layout=True, - graph_mode="none", - justify_mode="auto", - ) - ) - panels.append( - stat_panel( - 21, - "Ariadne Access Requests", - ARIADNE_ACCESS_REQUESTS, - {"h": 4, "w": 4, "x": 20, "y": 33}, - unit="none", - instant=True, - ) - ) - panels.append( - timeseries_panel( - 22, - "Jenkins Cleanup Runs (range)", - None, - {"h": 7, "w": 12, "x": 0, "y": 37}, - unit="none", - targets=[ - {"refId": "A", "expr": JENKINS_CLEANUP_RUNS_RANGE, "legendFormat": "{{mode}}/{{status}}"}, - ], - legend_display="table", - legend_placement="right", - ) - ) - panels.append( - timeseries_panel( - 23, - "Jenkins Cleanup Objects (range)", - None, - {"h": 7, "w": 12, "x": 12, "y": 37}, - unit="none", - targets=[ - {"refId": "A", "expr": JENKINS_CLEANUP_OBJECTS_RANGE, "legendFormat": "{{kind}}/{{action}}/{{mode}}"}, - ], - legend_display="table", - legend_placement="right", - ) - ) - panels.append( - jenkins_weather_statlist_panel( - 24, - "Jenkins Build Weather (last run h, newest first)", - JENKINS_BUILD_WEATHER_LAST_RUN_AGE_HOURS, - {"h": 9, "w": 24, "x": 0, "y": 44}, - unit="h", - decimals=1, - sort_order="asc", - limit=20, - description=( - "Jenkins homepage-style list from Ariadne with weather icons and status-colored job names. " - "Click a name to open the Jenkins job page." - ), - ) - ) - stale_volume_panel = bargauge_panel( - 27, - "Jenkins Workspace PV Age (h, detached only)", - JENKINS_WORKSPACE_PV_STALE_AGE_HOURS, - {"h": 10, "w": 24, "x": 0, "y": 53}, - unit="h", - instant=True, - legend="{{name}} -> {{persistentvolume}}", - thresholds=old_age_thresholds, - decimals=1, - limit=40, - ) - stale_volume_panel["description"] = ( - "Oldest detached Jenkins workspace volumes first. This is the direct cleanup backlog view." - ) - panels.append(stale_volume_panel) - - return { - "uid": "atlas-jobs", - "title": "Atlas Jobs", - "folderUid": PRIVATE_FOLDER, - "editable": True, - "panels": panels, - "time": {"from": "now-7d", "to": "now"}, - "annotations": {"list": []}, - "schemaVersion": 39, - "style": "dark", - "tags": ["atlas", "jobs", "glue"], - } - - -def build_testing_dashboard(): panels = [] suite_var = "${suite}" success = PLATFORM_TEST_SUCCESS_STATUS - runs_24h = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[24h])) or on() vector(0))' - ) - runs_30d = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[30d])) or on() vector(0))' + exported = PLATFORM_TEST_EXPORT_FILTER + runs_selector = f'suite=~"{suite_var}",{exported}' + runs_success_selector = f'{runs_selector},status=~"{success}"' + runs_failure_selector = f'{runs_selector},status!~"{success}"' + checks_selector = f'__name__=~".*_quality_gate_checks_total",suite=~"{suite_var}",{exported}' + tests_selector = f'__name__=~".*_quality_gate_tests_total",suite=~"{suite_var}",{exported}' + coverage_metric_selector = f'__name__=~".*_quality_gate_coverage_percent",suite=~"{suite_var}",{exported}' + workspace_coverage_selector = f'suite=~"{suite_var}",{exported}' + smell_selector = f'suite=~"{suite_var}",{exported}' + + suite_universe = " or ".join( + f'label_replace(vector(1), "suite", "{suite}", "__name__", ".*")' + for suite in PLATFORM_TEST_SUITE_NAMES ) + + runs_24h = f'(sum(increase(platform_quality_gate_runs_total{{{runs_selector}}}[24h])) or on() vector(0))' + runs_30d = f'(sum(increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d])) or on() vector(0))' success_24h = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{success}"}}[24h])) or on() vector(0))' + f'(sum(increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[24h])) or on() vector(0))' ) success_30d = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{success}"}}[30d])) or on() vector(0))' + f'(sum(increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[30d])) or on() vector(0))' ) failures_24h = ( - f'(sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status!~"{success}"}}[24h])) or on() vector(0))' + f'(sum(increase(platform_quality_gate_runs_total{{{runs_failure_selector}}}[24h])) or on() vector(0))' ) success_rate_24h = f"100 * ({success_24h}) / clamp_min(({runs_24h}), 1)" success_rate_30d = f"100 * ({success_30d}) / clamp_min(({runs_30d}), 1)" - suite_index_30d = ( - f'sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[30d]))' + success_rate_by_suite_24h = ( + f'sort_desc(100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[24h]))) ' + f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[24h]))), 1))' + ) + failures_by_suite_24h = ( + f'sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_failure_selector}}}[24h]))' + ) + success_history_by_suite = ( + f'100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[$__interval])) ' + f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval]))), 1))' ) coverage_by_suite = ( - f'(max by (suite) ({{__name__=~".*_quality_gate_coverage_percent",suite=~"{suite_var}"}})) ' - f'or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{{suite=~"{suite_var}"}}))' + f'(max by (suite) ({{{coverage_metric_selector}}})) ' + f'or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{workspace_coverage_selector}}}))' ) coverage_with_missing = ( - f"({coverage_by_suite}) or on(suite) (0 * ({suite_index_30d}) - 1)" + f"({coverage_by_suite}) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d]))) - 1)" ) coverage_gap = f"clamp_min(95 - ({coverage_by_suite}), 0)" - smell_by_suite = ( - f'max by (suite) (platform_quality_gate_source_lines_over_500_total{{suite=~"{suite_var}"}})' - ) + smell_by_suite = f'max by (suite) (platform_quality_gate_source_lines_over_500_total{{{smell_selector}}})' smell_with_missing = ( - f"({smell_by_suite}) or on(suite) (0 * ({suite_index_30d}) - 1)" + f"({smell_by_suite}) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d]))) - 1)" ) average_coverage = f"(avg(({coverage_by_suite})) or on() vector(0))" suites_loc_violating = f'(sum((({smell_by_suite}) > bool 0)) or on() vector(0))' - success_rate_by_suite_24h = ( - f'sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{success}"}}[24h]))) ' - f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[24h]))), 1)) ' - f'and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[24h]))) > 0))' + + checks_failed_total = f'(sum({{{checks_selector},result!~"{success}"}}) or on() vector(0))' + checks_failed_tests = ( + f'(sum(count by (suite) ({{{checks_selector},check=~"tests|unit|build",result!~"{success}"}})) or on() vector(0))' ) - success_history_by_suite = ( - f'(100 * (sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{success}"}}[$__interval]))) ' - f'/ clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[$__interval]))), 1)) ' - f'and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[$__interval]))) > 0))' + checks_failed_coverage = ( + f'(sum(count by (suite) ({{{checks_selector},check=~"coverage",result!~"{success}"}})) or on() vector(0))' + ) + checks_failed_loc = ( + f'(sum(count by (suite) ({{{checks_selector},check=~"loc|smell",result!~"{success}"}})) or on() vector(0))' + ) + checks_failed_docs = ( + f'(sum(count by (suite) ({{{checks_selector},check=~"docs|naming|hygiene|lint|docs_naming",result!~"{success}"}})) or on() vector(0))' + ) + checks_failed_gate = ( + f'(sum(count by (suite) ({{{checks_selector},check=~"gate|glue|gate_glue",result!~"{success}"}})) or on() vector(0))' + ) + checks_failed_sonarqube = ( + f'(sum(count by (suite) ({{{checks_selector},check=~"sonarqube|sonar",result!~"{success}"}})) or on() vector(0))' + ) + checks_failed_supply_chain = ( + f'(sum(count by (suite) ({{{checks_selector},check=~"ironbank|supply_chain|image_compliance|artifact_security",result!~"{success}"}})) or on() vector(0))' ) - pass_rate_thresholds = { + missing_tests_by_suite = ( + f'(({suite_universe}) unless on(suite) count by (suite) ({{__name__=~".*_quality_gate_tests_total",{exported}}}))' + ) + missing_checks_by_suite = ( + f'(({suite_universe}) unless on(suite) count by (suite) ({{__name__=~".*_quality_gate_checks_total",{exported}}}))' + ) + missing_coverage_by_suite = ( + f'(({suite_universe}) unless on(suite) count by (suite) (platform_quality_gate_workspace_line_coverage_percent{{{exported}}}))' + ) + missing_loc_by_suite = ( + f'(({suite_universe}) unless on(suite) count by (suite) (platform_quality_gate_source_lines_over_500_total{{{exported}}}))' + ) + + success_thresholds = { "mode": "absolute", "steps": [ {"color": "red", "value": None}, @@ -4149,30 +3087,24 @@ def build_testing_dashboard(): {"color": "red", "value": 5}, ], } + missing_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "red", "value": 1}, + ], + } - panels.append( - text_panel( - 1, - "Testing Modes", - ( - "### Atlas Testing\n" - "- **Overview mode**: keep `Suite=All` to compare every project.\n" - "- **Drilldown mode**: choose one suite to isolate quality checks, failures, and trends.\n" - "- Goal line: **95% coverage** and **0 files over 500 LOC** for every suite." - ), - {"h": 3, "w": 24, "x": 0, "y": 0}, - ) - ) panels.append( stat_panel( 2, "Success Rate (24h)", success_rate_24h, - {"h": 5, "w": 4, "x": 0, "y": 3}, + {"h": 5, "w": 4, "x": 0, "y": 0}, unit="percent", decimals=2, instant=True, - thresholds=pass_rate_thresholds, + thresholds=success_thresholds, ) ) panels.append( @@ -4180,11 +3112,11 @@ def build_testing_dashboard(): 3, "Success Rate (30d)", success_rate_30d, - {"h": 5, "w": 4, "x": 4, "y": 3}, + {"h": 5, "w": 4, "x": 4, "y": 0}, unit="percent", decimals=2, instant=True, - thresholds=pass_rate_thresholds, + thresholds=success_thresholds, ) ) panels.append( @@ -4192,7 +3124,7 @@ def build_testing_dashboard(): 4, "Failures (24h)", failures_24h, - {"h": 5, "w": 4, "x": 8, "y": 3}, + {"h": 5, "w": 4, "x": 8, "y": 0}, unit="none", instant=True, thresholds=failures_thresholds, @@ -4203,7 +3135,7 @@ def build_testing_dashboard(): 5, "Runs (24h)", runs_24h, - {"h": 5, "w": 4, "x": 12, "y": 3}, + {"h": 5, "w": 4, "x": 12, "y": 0}, unit="none", instant=True, thresholds={ @@ -4217,11 +3149,11 @@ def build_testing_dashboard(): 6, "Avg Coverage (%)", average_coverage, - {"h": 5, "w": 4, "x": 16, "y": 3}, + {"h": 5, "w": 4, "x": 16, "y": 0}, unit="percent", decimals=2, instant=True, - thresholds=pass_rate_thresholds, + thresholds=success_thresholds, ) ) panels.append( @@ -4229,18 +3161,108 @@ def build_testing_dashboard(): 7, "Suites with LOC >500", suites_loc_violating, - {"h": 5, "w": 4, "x": 20, "y": 3}, + {"h": 5, "w": 4, "x": 20, "y": 0}, unit="none", instant=True, thresholds=smell_thresholds, ) ) + + panels.append( + stat_panel( + 19, + "Failing Tests", + checks_failed_tests, + {"h": 4, "w": 3, "x": 0, "y": 5}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + panels.append( + stat_panel( + 20, + "Failing Coverage", + checks_failed_coverage, + {"h": 4, "w": 3, "x": 3, "y": 5}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + panels.append( + stat_panel( + 21, + "Failing LOC", + checks_failed_loc, + {"h": 4, "w": 3, "x": 6, "y": 5}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + panels.append( + stat_panel( + 22, + "Failing Docs/Naming", + checks_failed_docs, + {"h": 4, "w": 3, "x": 9, "y": 5}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + panels.append( + stat_panel( + 23, + "Failing Gate/Glue", + checks_failed_gate, + {"h": 4, "w": 3, "x": 12, "y": 5}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + panels.append( + stat_panel( + 24, + "Failing SonarQube", + checks_failed_sonarqube, + {"h": 4, "w": 3, "x": 15, "y": 5}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + panels.append( + stat_panel( + 25, + "Failing Supply Chain", + checks_failed_supply_chain, + {"h": 4, "w": 3, "x": 18, "y": 5}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + panels.append( + stat_panel( + 26, + "Total Failing Checks", + checks_failed_total, + {"h": 4, "w": 3, "x": 21, "y": 5}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + panels.append( bargauge_panel( 8, "Failures by Suite (24h)", - f'sum by (suite) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status!~"{success}"}}[24h]))', - {"h": 8, "w": 8, "x": 0, "y": 8}, + failures_by_suite_24h, + {"h": 8, "w": 8, "x": 0, "y": 9}, unit="none", instant=True, legend="{{suite}}", @@ -4252,12 +3274,12 @@ def build_testing_dashboard(): 9, "Success Rate by Suite (24h)", success_rate_by_suite_24h, - {"h": 8, "w": 8, "x": 8, "y": 8}, + {"h": 8, "w": 8, "x": 8, "y": 9}, unit="percent", instant=True, legend="{{suite}}", sort_order="asc", - thresholds=pass_rate_thresholds, + thresholds=success_thresholds, decimals=2, ) ) @@ -4265,7 +3287,7 @@ def build_testing_dashboard(): 10, "Coverage Gap to 95% by Suite", coverage_gap, - {"h": 8, "w": 8, "x": 16, "y": 8}, + {"h": 8, "w": 8, "x": 16, "y": 9}, unit="percent", instant=True, legend="{{suite}}", @@ -4275,11 +3297,12 @@ def build_testing_dashboard(): ) coverage_gap_panel["description"] = "Gap from the 95% target. 0 means the suite is at or above target." panels.append(coverage_gap_panel) + history_panel = timeseries_panel( 11, "Success History by Suite", success_history_by_suite, - {"h": 8, "w": 24, "x": 0, "y": 16}, + {"h": 8, "w": 24, "x": 0, "y": 17}, unit="percent", legend="{{suite}}", legend_display="list", @@ -4296,29 +3319,29 @@ def build_testing_dashboard(): "pointSize": 3, "spanNulls": True, } - history_panel["description"] = "Trend per suite. In drilldown mode this becomes the selected suite history." panels.append(history_panel) + panels.append( timeseries_panel( 12, "Run Outcomes (Selected Scope)", None, - {"h": 8, "w": 8, "x": 0, "y": 24}, + {"h": 8, "w": 8, "x": 0, "y": 25}, unit="none", targets=[ { "refId": "A", - "expr": f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status=~"{success}"}}[$__interval])) or on() vector(0)', + "expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[$__interval])) or on() vector(0)', "legendFormat": "Success", }, { "refId": "B", - "expr": f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}",status!~"{success}"}}[$__interval])) or on() vector(0)', + "expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_failure_selector}}}[$__interval])) or on() vector(0)', "legendFormat": "Failure", }, { "refId": "C", - "expr": f'sum(increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[$__interval])) or on() vector(0)', + "expr": f'sum(increase(platform_quality_gate_runs_total{{{runs_selector}}}[$__interval])) or on() vector(0)', "legendFormat": "Total", }, ], @@ -4332,17 +3355,17 @@ def build_testing_dashboard(): 13, "Coverage & LOC History (Selected Scope)", None, - {"h": 8, "w": 8, "x": 8, "y": 24}, + {"h": 8, "w": 8, "x": 8, "y": 25}, unit="none", targets=[ { "refId": "A", - "expr": f'max_over_time(platform_quality_gate_workspace_line_coverage_percent{{suite=~"{suite_var}"}}[$__interval])', + "expr": f'max_over_time(platform_quality_gate_workspace_line_coverage_percent{{{workspace_coverage_selector}}}[$__interval])', "legendFormat": "{{suite}} coverage %", }, { "refId": "B", - "expr": f'max_over_time(platform_quality_gate_source_lines_over_500_total{{suite=~"{suite_var}"}}[$__interval])', + "expr": f'max_over_time(platform_quality_gate_source_lines_over_500_total{{{smell_selector}}}[$__interval])', "legendFormat": "{{suite}} files >500 LOC", }, ], @@ -4351,41 +3374,34 @@ def build_testing_dashboard(): legend_calcs=["lastNotNull", "max"], ) ) + run_mix_panel = pie_panel( + 14, + "Run Status Mix (30d)", + f'sum by (status) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[30d]))', + {"h": 8, "w": 8, "x": 16, "y": 25}, + ) + run_mix_panel["targets"][0]["legendFormat"] = "{{status}}" + run_mix_panel["fieldConfig"]["defaults"]["unit"] = "none" + panels.append(run_mix_panel) + panels.append( - pie_panel( - 14, - "Run Status Mix (30d)", - f'sum by (status) (increase(platform_quality_gate_runs_total{{suite=~"{suite_var}"}}[30d]))', - {"h": 8, "w": 8, "x": 16, "y": 24}, - legend="{{status}}", + bargauge_panel( + 15, + "Latest Test Counters (Suite + Result)", + f'sum by (suite, result) ({{{tests_selector}}})', + {"h": 8, "w": 12, "x": 0, "y": 33}, unit="none", instant=True, + legend="{{suite}} · {{result}}", + sort_order="desc", + limit=24, ) ) - latest_tests_panel = bargauge_panel( - 15, - "Latest Test Counters (Suite + Result)", - f'sum by (suite, result) ({{__name__=~".*_quality_gate_tests_total",suite=~"{suite_var}"}})', - {"h": 8, "w": 12, "x": 0, "y": 32}, - unit="none", - instant=True, - legend="{{suite}} · {{result}}", - sort_order="desc", - limit=24, - ) - latest_tests_panel["description"] = ( - "Latest emitted test counters per suite/result. " - "If a suite is missing here, that exporter is not sending *_quality_gate_tests_total." - ) - panels.append(latest_tests_panel) failing_checks_panel = bargauge_panel( 16, "Failing Checks (Suite + Check)", - ( - f'sum by (suite, check) ' - f'({{__name__=~".*_quality_gate_checks_total",suite=~"{suite_var}",result!~"{success}"}})' - ), - {"h": 8, "w": 12, "x": 12, "y": 32}, + f'sum by (suite, check) ({{{checks_selector},result!~"{success}"}})', + {"h": 8, "w": 12, "x": 12, "y": 33}, unit="none", instant=True, legend="{{suite}} · {{check}}", @@ -4400,23 +3416,24 @@ def build_testing_dashboard(): 17, "Coverage by Suite (Latest, gate 95)", coverage_with_missing, - {"h": 8, "w": 12, "x": 0, "y": 40}, + {"h": 8, "w": 12, "x": 0, "y": 41}, unit="percent", instant=True, legend="{{suite}}", sort_order="asc", - thresholds=pass_rate_thresholds, + thresholds=success_thresholds, decimals=2, ) coverage_panel["fieldConfig"]["defaults"]["mappings"] = [ {"type": "value", "options": {"-1": {"text": "missing"}}} ] panels.append(coverage_panel) + smell_panel = bargauge_panel( 18, "Files >500 LOC by Suite (Latest)", smell_with_missing, - {"h": 8, "w": 12, "x": 12, "y": 40}, + {"h": 8, "w": 12, "x": 12, "y": 41}, unit="none", instant=True, legend="{{suite}}", @@ -4428,8 +3445,125 @@ def build_testing_dashboard(): ] panels.append(smell_panel) + panels.append( + bargauge_panel( + 27, + "Missing Tests Metrics by Suite", + missing_tests_by_suite, + {"h": 7, "w": 6, "x": 0, "y": 49}, + unit="none", + instant=True, + legend="{{suite}}", + sort_order="desc", + thresholds=missing_thresholds, + decimals=0, + ) + ) + panels.append( + bargauge_panel( + 28, + "Missing Checks Metrics by Suite", + missing_checks_by_suite, + {"h": 7, "w": 6, "x": 6, "y": 49}, + unit="none", + instant=True, + legend="{{suite}}", + sort_order="desc", + thresholds=missing_thresholds, + decimals=0, + ) + ) + panels.append( + bargauge_panel( + 29, + "Missing Coverage Metrics by Suite", + missing_coverage_by_suite, + {"h": 7, "w": 6, "x": 12, "y": 49}, + unit="none", + instant=True, + legend="{{suite}}", + sort_order="desc", + thresholds=missing_thresholds, + decimals=0, + ) + ) + panels.append( + bargauge_panel( + 30, + "Missing LOC Metrics by Suite", + missing_loc_by_suite, + {"h": 7, "w": 6, "x": 18, "y": 49}, + unit="none", + instant=True, + legend="{{suite}}", + sort_order="desc", + thresholds=missing_thresholds, + decimals=0, + ) + ) + panels.append( + stat_panel( + 31, + "SonarQube API Up", + "(max(sonarqube_up) or on() vector(0))", + {"h": 6, "w": 4, "x": 0, "y": 56}, + unit="none", + instant=True, + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "green", "value": 1}, + ], + }, + ) + ) + panels.append( + stat_panel( + 32, + "Sonar Projects (Selected)", + f'(count(sonarqube_project_quality_gate_pass{{project_key=~"{suite_var}"}}) or on() vector(0))', + {"h": 6, "w": 4, "x": 4, "y": 56}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + panels.append( + stat_panel( + 33, + "Sonar Gate Fetch Errors", + "(max(sonarqube_quality_gate_fetch_errors_total) or on() vector(0))", + {"h": 6, "w": 4, "x": 8, "y": 56}, + unit="none", + instant=True, + thresholds=failures_thresholds, + ) + ) + sonar_status_mix_panel = pie_panel( + 34, + "Sonar Gate Status Mix (Selected)", + f'count by (status) (sonarqube_project_quality_gate_pass{{project_key=~"{suite_var}"}})', + {"h": 6, "w": 6, "x": 12, "y": 56}, + ) + sonar_status_mix_panel["targets"][0]["legendFormat"] = "{{status}}" + panels.append(sonar_status_mix_panel) + panels.append( + bargauge_panel( + 35, + "Projects Failing Sonar Gate", + f'sort_desc(count by (project_key) (sonarqube_project_quality_gate_pass{{project_key=~"{suite_var}",status!~"OK|ok"}}))', + {"h": 6, "w": 6, "x": 18, "y": 56}, + unit="none", + instant=True, + legend="{{project_key}}", + sort_order="desc", + thresholds=failures_thresholds, + ) + ) + return { - "uid": "atlas-testing", + "uid": "atlas-jobs", "title": "Atlas Testing", "folderUid": PRIVATE_FOLDER, "editable": True, @@ -4438,35 +3572,74 @@ def build_testing_dashboard(): "annotations": {"list": []}, "schemaVersion": 39, "style": "dark", - "tags": ["atlas", "testing", "quality"], - "templating": {"list": [testing_suite_variable()]}, + "tags": ["atlas", "testing", "quality-gate", "ci"], + "templating": { + "list": [ + testing_suite_variable(), + ] + }, } def build_power_dashboard(): panels = [] + status_mapping = [ + { + "type": "value", + "options": { + "0": {"text": "⚡ Charging"}, + "1": {"text": "🔋 Discharging"}, + }, + } + ] panels.append( stat_panel( 1, "UPS Current Load", - UPS_CURRENT_ROW_EXPR, + None, {"h": 8, "w": 12, "x": 0, "y": 0}, unit="none", decimals=1, text_mode="name_and_value", - legend="{{ups}} {{metric}}", - instant=True, - field_overrides=[ - {"matcher": {"id": "byRegexp", "options": ".*Draw$"}, "properties": [{"id": "unit", "value": "watt"}]}, - {"matcher": {"id": "byRegexp", "options": ".*Runtime$"}, "properties": [{"id": "unit", "value": "s"}]}, + targets=[ + {"refId": "A", "expr": ANANKE_UPS_DRAW_WATTS_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Draw (W)", "instant": True}, + {"refId": "B", "expr": ANANKE_UPS_RUNTIME_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Discharge", "instant": True}, + {"refId": "C", "expr": ANANKE_UPS_ON_BATTERY_DB, "legendFormat": f"{ANANKE_UPS_DB_NAME} Status", "instant": True}, + {"refId": "D", "expr": ANANKE_UPS_DRAW_WATTS_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)", "instant": True}, + {"refId": "E", "expr": ANANKE_UPS_RUNTIME_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Discharge", "instant": True}, + {"refId": "F", "expr": ANANKE_UPS_ON_BATTERY_TETHYS, "legendFormat": f"{ANANKE_UPS_TETHYS_NAME} Status", "instant": True}, ], - orientation="vertical", - wide_layout=False, - title_size=14, - value_size=24, + field_overrides=[ + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Draw (W)"}, + "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], + }, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Draw (W)"}, + "properties": [{"id": "unit", "value": "watt"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], + }, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Discharge"}, + "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], + }, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Discharge"}, + "properties": [{"id": "unit", "value": "s"}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], + }, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_DB_NAME} Status"}, + "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_DB_NODE}"}], + }, + { + "matcher": {"id": "byName", "options": f"{ANANKE_UPS_TETHYS_NAME} Status"}, + "properties": [{"id": "mappings", "value": status_mapping}, {"id": "description", "value": f"Attached node: {ANANKE_UPS_TETHYS_NODE}"}], + }, + ], + orientation="horizontal", + wide_layout=True, description=( - "Per-UPS live snapshot: draw, discharge runtime, and charging/discharging status." + "Per-UPS live snapshot: current draw in watts, estimated battery runtime if discharge started now, and charging/discharging status." ), ) ) @@ -4491,24 +3664,26 @@ def build_power_dashboard(): stat_panel( 3, "Current Climate", - CLIMATE_CURRENT_ROW_EXPR, + None, {"h": 8, "w": 12, "x": 0, "y": 8}, unit="none", decimals=2, text_mode="name_and_value", - legend="{{metric}}", - instant=True, - field_overrides=[ - {"matcher": {"id": "byName", "options": "Temp °C"}, "properties": [{"id": "unit", "value": "celsius"}]}, - {"matcher": {"id": "byName", "options": "Temp °F"}, "properties": [{"id": "unit", "value": "fahrenheit"}]}, - {"matcher": {"id": "byName", "options": "Humidity"}, "properties": [{"id": "unit", "value": "percent"}]}, - {"matcher": {"id": "byName", "options": "Pressure"}, "properties": [{"id": "unit", "value": "suffix:kPa"}]}, + targets=[ + {"refId": "A", "expr": CLIMATE_TEMP_MAX, "legendFormat": "Tent Temp (°C)", "instant": True}, + {"refId": "B", "expr": CLIMATE_PRESSURE_CURRENT, "legendFormat": "Tent VPD (kPa)", "instant": True}, + {"refId": "C", "expr": CLIMATE_HUMIDITY_MAX, "legendFormat": "Tent RH (%)", "instant": True}, + {"refId": "D", "expr": CLIMATE_DEWPOINT_CURRENT, "legendFormat": "Dew Point (°C)", "instant": True}, ], - orientation="vertical", - wide_layout=False, - title_size=16, - value_size=28, - description="Current tent values: Temp °C, Temp °F, Humidity, Pressure.", + field_overrides=[ + {"matcher": {"id": "byName", "options": "Tent Temp (°C)"}, "properties": [{"id": "unit", "value": "celsius"}]}, + {"matcher": {"id": "byName", "options": "Tent VPD (kPa)"}, "properties": [{"id": "unit", "value": "suffix:kPa"}]}, + {"matcher": {"id": "byName", "options": "Tent RH (%)"}, "properties": [{"id": "unit", "value": "percent"}]}, + {"matcher": {"id": "byName", "options": "Dew Point (°C)"}, "properties": [{"id": "unit", "value": "celsius"}]}, + ], + orientation="horizontal", + wide_layout=True, + description="Current tent temperature, humidity, VPD, and dew point. These render once Typhon climate telemetry is online.", ) ) panels.append( @@ -4517,65 +3692,50 @@ def build_power_dashboard(): "Climate History", None, {"h": 8, "w": 12, "x": 12, "y": 8}, - unit="none", + unit="celsius", targets=[ {"refId": "A", "expr": CLIMATE_TEMP_SERIES, "legendFormat": "Temperature (°C)"}, - {"refId": "B", "expr": CLIMATE_TEMP_FAHRENHEIT_SERIES, "legendFormat": "Temperature (°F)"}, - {"refId": "C", "expr": CLIMATE_HUMIDITY_SERIES, "legendFormat": "Humidity (%)"}, - {"refId": "D", "expr": CLIMATE_PRESSURE_SERIES, "legendFormat": "Pressure (VPD kPa)"}, + {"refId": "B", "expr": CLIMATE_HUMIDITY_SERIES, "legendFormat": "Humidity (%)"}, + {"refId": "C", "expr": CLIMATE_PRESSURE_SERIES, "legendFormat": "VPD (kPa)"}, + {"refId": "D", "expr": CLIMATE_DEWPOINT_SERIES, "legendFormat": "Dew Point (°C)"}, ], field_overrides=[ - { - "matcher": {"id": "byName", "options": "Temperature (°C)"}, - "properties": [ - {"id": "unit", "value": "suffix:°C"}, - {"id": "decimals", "value": 2}, - {"id": "custom.axisCenteredZero", "value": False}, - ], - }, - { - "matcher": {"id": "byName", "options": "Temperature (°F)"}, - "properties": [ - {"id": "unit", "value": "suffix:°F"}, - {"id": "decimals", "value": 2}, - {"id": "custom.axisCenteredZero", "value": False}, - ], - }, { "matcher": {"id": "byName", "options": "Humidity (%)"}, "properties": [ - {"id": "unit", "value": "suffix:%"}, - {"id": "decimals", "value": 2}, - {"id": "custom.axisPlacement", "value": "right"}, - {"id": "custom.axisCenteredZero", "value": False}, + {"id": "unit", "value": "percent"}, ], }, { - "matcher": {"id": "byName", "options": "Pressure (VPD kPa)"}, + "matcher": {"id": "byName", "options": "VPD (kPa)"}, "properties": [ - {"id": "unit", "value": "suffix:kPa"}, + {"id": "unit", "value": "none"}, {"id": "custom.axisPlacement", "value": "right"}, + {"id": "custom.axisLabel", "value": "kPa"}, {"id": "decimals", "value": 2}, - {"id": "custom.axisCenteredZero", "value": False}, ], } ], legend_display="table", legend_placement="right", - description="Historical tent temperature (C/F), humidity, and pressure proxy (VPD kPa).", + description="Two-axis chart: tent temperature/humidity/dew point (left axis) and tent VPD in kPa (right axis).", ) ) panels.append( stat_panel( 5, "Fan Activity", - CLIMATE_FAN_CURRENT_ROW_EXPR, + None, {"h": 8, "w": 12, "x": 0, "y": 16}, unit="none", decimals=0, text_mode="name_and_value", - legend="{{metric}}", - instant=True, + targets=[ + {"refId": "A", "expr": f"round({CLIMATE_FAN_OUTLET_CURRENT})", "legendFormat": "Inside Outlet", "instant": True}, + {"refId": "B", "expr": f"round({CLIMATE_FAN_INSIDE_INLET_CURRENT})", "legendFormat": "Inside Inlet", "instant": True}, + {"refId": "C", "expr": f"round({CLIMATE_FAN_OUTSIDE_INLET_CURRENT})", "legendFormat": "Outside Inlet", "instant": True}, + {"refId": "D", "expr": f"round({CLIMATE_FAN_INTERIOR_CURRENT})", "legendFormat": "Interior Fans", "instant": True}, + ], thresholds={ "mode": "absolute", "steps": [ @@ -4584,15 +3744,9 @@ def build_power_dashboard(): {"color": "red", "value": 9}, ], }, - field_overrides=[ - {"matcher": {"id": "byName", "options": "Outlet"}, "properties": [{"id": "decimals", "value": 0}]}, - {"matcher": {"id": "byName", "options": "Inlet - In"}, "properties": [{"id": "decimals", "value": 0}]}, - {"matcher": {"id": "byName", "options": "Inlet - Out"}, "properties": [{"id": "decimals", "value": 0}]}, - {"matcher": {"id": "byName", "options": "Interior"}, "properties": [{"id": "decimals", "value": 0}]}, - ], - orientation="vertical", - wide_layout=False, - description="Current fan activity levels: outlet, inlet in, inlet out, interior (0-10).", + orientation="horizontal", + wide_layout=True, + description="Current fan activity levels (0-10): inside outlet, inside inlet, outside inlet, and interior fans.", ) ) panels.append( @@ -4604,10 +3758,10 @@ def build_power_dashboard(): unit="none", max_value=10, targets=[ - {"refId": "A", "expr": CLIMATE_FAN_OUTLET_SERIES, "legendFormat": "Outlet"}, - {"refId": "B", "expr": CLIMATE_FAN_INSIDE_INLET_SERIES, "legendFormat": "Inlet - Inside"}, - {"refId": "C", "expr": CLIMATE_FAN_OUTSIDE_INLET_SERIES, "legendFormat": "Inlet - Outside"}, - {"refId": "D", "expr": CLIMATE_FAN_INTERIOR_SERIES, "legendFormat": "Interior"}, + {"refId": "A", "expr": CLIMATE_FAN_OUTLET_SERIES, "legendFormat": "Inside Outlet"}, + {"refId": "B", "expr": CLIMATE_FAN_INSIDE_INLET_SERIES, "legendFormat": "Inside Inlet"}, + {"refId": "C", "expr": CLIMATE_FAN_OUTSIDE_INLET_SERIES, "legendFormat": "Outside Inlet"}, + {"refId": "D", "expr": CLIMATE_FAN_INTERIOR_SERIES, "legendFormat": "Interior Fans"}, ], legend_display="table", legend_placement="right", @@ -4726,10 +3880,6 @@ DASHBOARDS = { "builder": build_jobs_dashboard, "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-jobs.yaml", }, - "atlas-testing": { - "builder": build_testing_dashboard, - "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-testing.yaml", - }, "atlas-power": { "builder": build_power_dashboard, "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-power.yaml", diff --git a/services/jenkins/configmap-jcasc.yaml b/services/jenkins/configmap-jcasc.yaml index a25f0c9e..3bc78f81 100644 --- a/services/jenkins/configmap-jcasc.yaml +++ b/services/jenkins/configmap-jcasc.yaml @@ -203,6 +203,32 @@ data: } } } + pipelineJob('arcanagon') { + properties { + pipelineTriggers { + triggers { + scmTrigger { + scmpoll_spec('H/5 * * * *') + ignorePostCommitHooks(false) + } + } + } + } + definition { + cpsScm { + scm { + git { + remote { + url('https://scm.bstein.dev/bstein/arcanagon.git') + credentials('gitea-pat') + } + branches('*/master') + } + } + scriptPath('Jenkinsfile') + } + } + } pipelineJob('pegasus') { properties { pipelineTriggers { @@ -425,8 +451,10 @@ data: - name: "default" namespace: "jenkins" workspaceVolume: - emptyDirWorkspaceVolume: - memory: false + dynamicPVC: + accessModes: "ReadWriteOnce" + requestsSize: "20Gi" + storageClassName: "astreae" containers: - name: "jnlp" args: "^${computer.jnlpmac} ^${computer.name}" @@ -444,7 +472,7 @@ data: workingDir: /home/jenkins/agent idleMinutes: 0 instanceCap: 2147483647 - label: "jenkins-jenkins-agent" + label: "jenkins-jenkins-agent " nodeUsageMode: "NORMAL" podRetention: Never serviceAccount: "jenkins" diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index 07b372bd..24d897a8 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -24,6 +24,7 @@ resources: - oneoffs/logs-oidc-secret-ensure-job.yaml - oneoffs/metis-oidc-secret-ensure-job.yaml - oneoffs/soteria-oidc-secret-ensure-job.yaml + - oneoffs/quality-oidc-secret-ensure-job.yaml - oneoffs/metis-ssh-keys-secret-ensure-job.yaml - oneoffs/harbor-oidc-secret-ensure-job.yaml - oneoffs/vault-oidc-secret-ensure-job.yaml diff --git a/services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml b/services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml new file mode 100644 index 00000000..e0a0919c --- /dev/null +++ b/services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml @@ -0,0 +1,198 @@ +# services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml +# One-off job for sso/quality-oidc-secret-ensure-1. +# Purpose: ensure the SonarQube oauth2-proxy OIDC client and Vault secret exist. +# Keep this completed Job around; bump the suffix if it ever needs to be rerun. +apiVersion: batch/v1 +kind: Job +metadata: + name: quality-oidc-secret-ensure-1 + namespace: sso +spec: + backoffLimit: 0 + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso-secrets" + vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + spec: + serviceAccountName: mas-secrets-ensure + restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + containers: + - name: apply + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + . /vault/secrets/keycloak-admin-env.sh + KC_URL="http://keycloak.sso.svc.cluster.local" + ACCESS_TOKEN="" + for attempt in 1 2 3 4 5; do + TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=password" \ + -d "client_id=admin-cli" \ + -d "username=${KEYCLOAK_ADMIN}" \ + -d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)" + ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)" + if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then + break + fi + echo "Keycloak token request failed (attempt ${attempt})" >&2 + sleep $((attempt * 2)) + done + if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then + echo "Failed to fetch Keycloak admin token" >&2 + exit 1 + fi + + CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=sonarqube" || true)" + CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" + + if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + create_payload='{"clientId":"sonarqube","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://quality.bstein.dev/oauth2/callback"],"webOrigins":["https://quality.bstein.dev"],"rootUrl":"https://quality.bstein.dev","baseUrl":"/"}' + status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H 'Content-Type: application/json' \ + -d "${create_payload}" \ + "$KC_URL/admin/realms/atlas/clients")" + if [ "$status" != "201" ] && [ "$status" != "204" ] && [ "$status" != "409" ]; then + echo "Keycloak client create failed (status ${status})" >&2 + exit 1 + fi + CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=sonarqube" || true)" + CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" + fi + + if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + echo "Keycloak client sonarqube not found" >&2 + exit 1 + fi + + SCOPE_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/client-scopes?search=groups" | jq -r '.[] | select(.name=="groups") | .id' 2>/dev/null | head -n1 || true)" + if [ -z "$SCOPE_ID" ] || [ "$SCOPE_ID" = "null" ]; then + echo "Keycloak client scope groups not found" >&2 + exit 1 + fi + + DEFAULT_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/default-client-scopes" || true)" + OPTIONAL_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes" || true)" + + if ! echo "$DEFAULT_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1 \ + && ! echo "$OPTIONAL_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1; then + status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")" + if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then + status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")" + if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then + echo "Failed to attach groups client scope to sonarqube (status ${status})" >&2 + exit 1 + fi + fi + fi + + update_payload='{"enabled":true,"clientId":"sonarqube","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://quality.bstein.dev/oauth2/callback"],"webOrigins":["https://quality.bstein.dev"],"rootUrl":"https://quality.bstein.dev","baseUrl":"/"}' + status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H 'Content-Type: application/json' \ + -d "${update_payload}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}")" + if [ "$status" != "204" ]; then + echo "Keycloak client update failed (status ${status})" >&2 + exit 1 + fi + + CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)" + if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then + echo "Keycloak client secret not found" >&2 + exit 1 + fi + + vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" + vault_role="${VAULT_ROLE:-sso-secrets}" + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" + vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" + if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 + fi + + read_status="$(curl -sS -o /tmp/sonarqube-oidc-read.json -w "%{http_code}" \ + -H "X-Vault-Token: ${vault_token}" \ + "${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc" || true)" + COOKIE_SECRET="" + if [ "${read_status}" = "200" ]; then + COOKIE_SECRET="$(jq -r '.data.data.cookie_secret // empty' /tmp/sonarqube-oidc-read.json)" + elif [ "${read_status}" != "404" ]; then + echo "Vault read failed (status ${read_status})" >&2 + cat /tmp/sonarqube-oidc-read.json >&2 || true + exit 1 + fi + if [ -n "${COOKIE_SECRET}" ]; then + length="$(printf '%s' "${COOKIE_SECRET}" | wc -c | tr -d ' ')" + if [ "${length}" != "16" ] && [ "${length}" != "24" ] && [ "${length}" != "32" ]; then + COOKIE_SECRET="" + fi + fi + if [ -z "${COOKIE_SECRET}" ]; then + COOKIE_SECRET="$(openssl rand -hex 16 | tr -d '\n')" + fi + + payload="$(jq -nc \ + --arg client_id "sonarqube" \ + --arg client_secret "${CLIENT_SECRET}" \ + --arg cookie_secret "${COOKIE_SECRET}" \ + '{data:{client_id:$client_id,client_secret:$client_secret,cookie_secret:$cookie_secret}}')" + write_status="$(curl -sS -o /tmp/sonarqube-oidc-write.json -w "%{http_code}" -X POST \ + -H "X-Vault-Token: ${vault_token}" \ + -H 'Content-Type: application/json' \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc")" + if [ "${write_status}" != "200" ] && [ "${write_status}" != "204" ]; then + echo "Vault write failed (status ${write_status})" >&2 + cat /tmp/sonarqube-oidc-write.json >&2 || true + exit 1 + fi + + verify_status="$(curl -sS -o /tmp/sonarqube-oidc-verify.json -w "%{http_code}" \ + -H "X-Vault-Token: ${vault_token}" \ + "${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc" || true)" + if [ "${verify_status}" != "200" ]; then + echo "Vault verify failed (status ${verify_status})" >&2 + cat /tmp/sonarqube-oidc-verify.json >&2 || true + exit 1 + fi + + echo "SonarQube OIDC secret ready in Vault" diff --git a/services/logging/Jenkinsfile.data-prepper b/services/logging/Jenkinsfile.data-prepper index 742197bf..1e9b1a3b 100644 --- a/services/logging/Jenkinsfile.data-prepper +++ b/services/logging/Jenkinsfile.data-prepper @@ -32,13 +32,14 @@ spec: } } environment { - SUITE_NAME = 'data-prepper' + SUITE_NAME = 'data_prepper' PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' + QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json' + QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json' } parameters { - string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/monitoring/data-prepper', description: 'Docker repository for Data Prepper') + string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/streaming/data-prepper', description: 'Docker repository for Data Prepper') string(name: 'IMAGE_TAG', defaultValue: '2.8.0', description: 'Image tag to publish') - booleanParam(name: 'PUSH_IMAGE', defaultValue: false, description: 'Publish image artifacts (manual release only)') booleanParam(name: 'PUSH_LATEST', defaultValue: true, description: 'Also push the latest tag') } stages { @@ -49,19 +50,88 @@ spec: } } } - stage('Build & Push (optional)') { - when { - expression { return params.PUSH_IMAGE } + stage('Collect quality evidence') { + steps { + container('git') { + sh ''' + set -euo pipefail + apk add --no-cache curl jq >/dev/null 2>&1 || true + mkdir -p build + + sonar_report="${QUALITY_GATE_SONARQUBE_REPORT:-build/sonarqube-quality-gate.json}" + if [ ! -f "${sonar_report}" ]; then + if [ -n "${SONARQUBE_HOST_URL:-}" ] && [ -n "${SONARQUBE_PROJECT_KEY:-}" ]; then + host="${SONARQUBE_HOST_URL%/}" + query="$(printf '%s' "${SONARQUBE_PROJECT_KEY}" | sed 's/ /%20/g')" + sonar_ok=0 + if [ -n "${SONARQUBE_TOKEN:-}" ]; then + auth="$(printf '%s:' "${SONARQUBE_TOKEN}" | base64 | tr -d '\\n')" + if curl -fsS -H "Authorization: Basic ${auth}" "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}"; then + sonar_ok=1 + fi + else + if curl -fsS "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}"; then + sonar_ok=1 + fi + fi + if [ "${sonar_ok}" -ne 1 ]; then + cat > "${sonar_report}" < "${sonar_report}" < "${ironbank_report}" < "${ironbank_report}" </dev/null 2>&1 || true - suite="${SUITE_NAME}" - gateway="${PUSHGATEWAY_URL}" - fetch_counter() { - status="$1" - line="$(curl -fsS "${gateway}/metrics" 2>/dev/null | awk -v suite="${suite}" -v status="${status}" ' - /platform_quality_gate_runs_total/ { - if (index($0, "job=\\"platform-quality-ci\\"") && index($0, "suite=\\"" suite "\\"") && index($0, "status=\\"" status "\\"")) { - print $2 - exit - } - } - ' || true)" - [ -n "${line}" ] && printf '%s\n' "${line}" || printf '0\n' - } - ok_count="$(fetch_counter ok)" - failed_count="$(fetch_counter failed)" - ok_count=$((ok_count + 1)) - tests_passed=1 - tests_failed=0 - cat </dev/null -# TYPE platform_quality_gate_runs_total counter -platform_quality_gate_runs_total{suite="${suite}",status="ok"} ${ok_count} -platform_quality_gate_runs_total{suite="${suite}",status="failed"} ${failed_count} -# TYPE data_prepper_quality_gate_tests_total gauge -data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} ${tests_passed} -data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} ${tests_failed} -data_prepper_quality_gate_tests_total{suite="${suite}",result="error"} 0 -data_prepper_quality_gate_tests_total{suite="${suite}",result="skipped"} 0 -# TYPE platform_quality_gate_workspace_line_coverage_percent gauge -platform_quality_gate_workspace_line_coverage_percent{suite="${suite}"} 100 -# TYPE platform_quality_gate_source_lines_over_500_total gauge -platform_quality_gate_source_lines_over_500_total{suite="${suite}"} 0 -# TYPE data_prepper_quality_gate_checks_total gauge -data_prepper_quality_gate_checks_total{suite="${suite}",check="build",result="ok"} 1 -data_prepper_quality_gate_checks_total{suite="${suite}",check="coverage",result="ok"} 1 -data_prepper_quality_gate_checks_total{suite="${suite}",check="loc",result="ok"} 1 -METRICS - ''' + always { + script { + env.QUALITY_OUTCOME = currentBuild.currentResult == 'SUCCESS' ? 'ok' : 'failed' } - } - failure { container('git') { sh ''' set -euo pipefail - apk add --no-cache curl >/dev/null 2>&1 || true + apk add --no-cache curl jq >/dev/null 2>&1 || true suite="${SUITE_NAME}" gateway="${PUSHGATEWAY_URL}" + status="${QUALITY_OUTCOME:-failed}" fetch_counter() { - status="$1" - line="$(curl -fsS "${gateway}/metrics" 2>/dev/null | awk -v suite="${suite}" -v status="${status}" ' + status_name="$1" + line="$(curl -fsS "${gateway}/metrics" 2>/dev/null | awk -v suite="${suite}" -v status="${status_name}" ' /platform_quality_gate_runs_total/ { if (index($0, "job=\\"platform-quality-ci\\"") && index($0, "suite=\\"" suite "\\"") && index($0, "status=\\"" status "\\"")) { print $2 @@ -170,26 +185,66 @@ METRICS } ok_count="$(fetch_counter ok)" failed_count="$(fetch_counter failed)" - failed_count=$((failed_count + 1)) - tests_passed=0 - tests_failed=1 - cat </dev/null + if [ "${status}" = "ok" ]; then + ok_count=$((ok_count + 1)) + else + failed_count=$((failed_count + 1)) + fi + sonarqube_check="not_applicable" + if [ -f build/sonarqube-quality-gate.json ]; then + sonar_status="$(jq -r '.status // .projectStatus.status // .qualityGate.status // empty' build/sonarqube-quality-gate.json 2>/dev/null | tr '[:upper:]' '[:lower:]')" + if [ -n "${sonar_status}" ]; then + case "${sonar_status}" in + ok|pass|passed|success) sonarqube_check="ok" ;; + *) sonarqube_check="failed" ;; + esac + else + sonarqube_check="failed" + fi + fi + supply_chain_check="not_applicable" + if [ -f build/ironbank-compliance.json ]; then + compliant="$(jq -r '.compliant // empty' build/ironbank-compliance.json 2>/dev/null)" + if [ "${compliant}" = "true" ]; then + supply_chain_check="ok" + elif [ "${compliant}" = "false" ]; then + supply_chain_check="failed" + else + ironbank_status="$(jq -r '.status // .result // .compliance // empty' build/ironbank-compliance.json 2>/dev/null | tr '[:upper:]' '[:lower:]')" + case "${ironbank_status}" in + ok|pass|passed|success|compliant) supply_chain_check="ok" ;; + "") supply_chain_check="failed" ;; + *) supply_chain_check="failed" ;; + esac + fi + fi + gate_glue_check="ok" + if [ "${status}" != "ok" ]; then + gate_glue_check="failed" + fi + if ! cat </dev/null; then + echo "warning: metrics push failed for suite=${suite}" >&2 + fi # TYPE platform_quality_gate_runs_total counter platform_quality_gate_runs_total{suite="${suite}",status="ok"} ${ok_count} platform_quality_gate_runs_total{suite="${suite}",status="failed"} ${failed_count} # TYPE data_prepper_quality_gate_tests_total gauge -data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} ${tests_passed} -data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} ${tests_failed} +data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} 0 +data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} 0 data_prepper_quality_gate_tests_total{suite="${suite}",result="error"} 0 data_prepper_quality_gate_tests_total{suite="${suite}",result="skipped"} 0 # TYPE platform_quality_gate_workspace_line_coverage_percent gauge platform_quality_gate_workspace_line_coverage_percent{suite="${suite}"} 0 # TYPE platform_quality_gate_source_lines_over_500_total gauge -platform_quality_gate_source_lines_over_500_total{suite="${suite}"} 1 +platform_quality_gate_source_lines_over_500_total{suite="${suite}"} 0 # TYPE data_prepper_quality_gate_checks_total gauge -data_prepper_quality_gate_checks_total{suite="${suite}",check="build",result="failed"} 1 -data_prepper_quality_gate_checks_total{suite="${suite}",check="coverage",result="failed"} 1 -data_prepper_quality_gate_checks_total{suite="${suite}",check="loc",result="failed"} 1 +data_prepper_quality_gate_checks_total{suite="${suite}",check="tests",result="not_applicable"} 1 +data_prepper_quality_gate_checks_total{suite="${suite}",check="coverage",result="not_applicable"} 1 +data_prepper_quality_gate_checks_total{suite="${suite}",check="loc",result="not_applicable"} 1 +data_prepper_quality_gate_checks_total{suite="${suite}",check="docs_naming",result="not_applicable"} 1 +data_prepper_quality_gate_checks_total{suite="${suite}",check="gate_glue",result="${gate_glue_check}"} 1 +data_prepper_quality_gate_checks_total{suite="${suite}",check="sonarqube",result="${sonarqube_check}"} 1 +data_prepper_quality_gate_checks_total{suite="${suite}",check="supply_chain",result="${supply_chain_check}"} 1 METRICS ''' } diff --git a/services/monitoring/dashboards/atlas-jobs.json b/services/monitoring/dashboards/atlas-jobs.json index 5ba0e008..fa87c5c9 100644 --- a/services/monitoring/dashboards/atlas-jobs.json +++ b/services/monitoring/dashboards/atlas-jobs.json @@ -1,26 +1,26 @@ { "uid": "atlas-jobs", - "title": "Atlas Jobs", + "title": "Atlas Testing", "folderUid": "atlas-internal", "editable": true, "panels": [ { - "id": 1, + "id": 2, "type": "stat", - "title": "Schedule Metrics Exported", + "title": "Success Rate (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, "y": 0 }, "targets": [ { - "expr": "count(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}) or on() vector(0)", + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[24h])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[24h])) or on() vector(0))), 1)", "refId": "A", "instant": true } @@ -39,83 +39,24 @@ "value": null }, { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 2, - "type": "stat", - "title": "Schedule Tasks Stale (>36h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 0 - }, - "targets": [ - { - "expr": "sum(((time() - ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}) > bool 129600)) or on() vector(0)", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "color": "orange", + "value": 80 }, { "color": "yellow", - "value": 1 + "value": 95 }, { - "color": "orange", - "value": 2 - }, - { - "color": "red", - "value": 3 + "color": "green", + "value": 99 } ] }, - "unit": "none", + "unit": "percent", "custom": { "displayMode": "auto" - } + }, + "decimals": 2 }, "overrides": [] }, @@ -136,21 +77,22 @@ { "id": 3, "type": "stat", - "title": "Schedule Tasks Missing Success", + "title": "Success Rate (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 8, + "x": 4, "y": 0 }, "targets": [ { - "expr": "count((ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} unless on(task) ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"})) or on() vector(0)", - "refId": "A" + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[30d])) or on() vector(0))), 1)", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -163,27 +105,28 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "red", "value": null }, - { - "color": "yellow", - "value": 1 - }, { "color": "orange", - "value": 2 + "value": 80 }, { - "color": "red", - "value": 3 + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 99 } ] }, - "unit": "none", + "unit": "percent", "custom": { "displayMode": "auto" - } + }, + "decimals": 2 }, "overrides": [] }, @@ -204,21 +147,22 @@ { "id": 4, "type": "stat", - "title": "Schedule Tasks Failed Last Run", + "title": "Failures (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 12, + "x": 8, "y": 0 }, "targets": [ { - "expr": "sum(((1 - ariadne_schedule_last_status{task=~\"^schedule\\..+$\"}) > bool 0)) or on() vector(0)", - "refId": "A" + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -240,11 +184,11 @@ }, { "color": "orange", - "value": 2 + "value": 3 }, { "color": "red", - "value": 3 + "value": 5 } ] }, @@ -272,21 +216,22 @@ { "id": 5, "type": "stat", - "title": "Glue Jobs Stale (>36h)", + "title": "Runs (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 16, + "x": 12, "y": 0 }, "targets": [ { - "expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)", - "refId": "A" + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[24h])) or on() vector(0))", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -299,20 +244,12 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "red", "value": null }, { - "color": "yellow", + "color": "green", "value": 1 - }, - { - "color": "orange", - "value": 2 - }, - { - "color": "red", - "value": 3 } ] }, @@ -340,21 +277,22 @@ { "id": 6, "type": "stat", - "title": "Jenkins Workspace PV Backlog", + "title": "Avg Coverage (%)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 20, + "x": 16, "y": 0 }, "targets": [ { - "expr": "sum((kube_persistentvolume_status_phase{phase=~\"Released|Failed\"} > bool 0) * on(persistentvolume) group_left(claim_namespace,name) kube_persistentvolume_claim_ref{claim_namespace=\"jenkins\",name=~\"pvc-workspace-.*\"}) or on() vector(0)", - "refId": "A" + "expr": "(avg(((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})))) or on() vector(0))", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -367,20 +305,94 @@ "mode": "absolute", "steps": [ { - "color": "green", + "color": "red", "value": null }, + { + "color": "orange", + "value": 80 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 99 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 7, + "type": "stat", + "title": "Suites with LOC >500", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "targets": [ + { + "expr": "(sum(((max by (suite) (platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})) > bool 0)) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 0 + }, { "color": "yellow", "value": 1 }, { "color": "orange", - "value": 10 + "value": 3 }, { "color": "red", - "value": 25 + "value": 5 } ] }, @@ -406,135 +418,561 @@ } }, { - "id": 7, - "type": "table", - "title": "Ariadne Schedules: Last Success (h, newest first)", + "id": 19, + "type": "stat", + "title": "Failing Tests", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 8, - "w": 12, + "h": 4, + "w": 3, "x": 0, - "y": 4 + "y": 5 }, "targets": [ { - "expr": "((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} + 999)", + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!~\"ok|passed|success\"})) or on() vector(0))", "refId": "A", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "h", + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", "custom": { - "filterable": true + "displayMode": "auto" } }, "overrides": [] }, "options": { - "showHeader": true, - "columnFilters": false - }, - "transformations": [ - { - "id": "labelsToFields", - "options": {} + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, + "textMode": "value" + } + }, + { + "id": 20, + "type": "stat", + "title": "Failing Coverage", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 5 + }, + "targets": [ { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true } ], - "description": "Primary schedule inventory ordered by recency so fresh jobs stay at the top." + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 21, + "type": "stat", + "title": "Failing LOC", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 5 + }, + "targets": [ + { + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 22, + "type": "stat", + "title": "Failing Docs/Naming", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 5 + }, + "targets": [ + { + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 23, + "type": "stat", + "title": "Failing Gate/Glue", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 5 + }, + "targets": [ + { + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 24, + "type": "stat", + "title": "Failing SonarQube", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 5 + }, + "targets": [ + { + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 25, + "type": "stat", + "title": "Failing Supply Chain", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 18, + "y": 5 + }, + "targets": [ + { + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 26, + "type": "stat", + "title": "Total Failing Checks", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 5 + }, + "targets": [ + { + "expr": "(sum({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",result!~\"ok|passed|success\"}) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } }, { "id": 8, "type": "bargauge", - "title": "Ariadne Schedule Last Error (hours ago)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 4 - }, - "targets": [ - { - "expr": "sort(((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} + 999))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "green", - "value": 24 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } - } - ] - }, - { - "id": 9, - "type": "bargauge", - "title": "Ariadne Schedule Last Status", + "title": "Failures by Suite (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -543,164 +981,13 @@ "h": 8, "w": 8, "x": 0, - "y": 12 + "y": 9 }, "targets": [ { - "expr": "sort((ariadne_schedule_last_status{task=~\"^schedule\\..+$\"}) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} - 1))", + "expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status!~\"ok|passed|success\"}[24h])))", "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 0.5 - }, - { - "color": "green", - "value": 1 - } - ] - }, - "decimals": 0, - "mappings": [ - { - "type": "value", - "options": { - "-1": { - "text": "pending" - }, - "0": { - "text": "error" - }, - "1": { - "text": "ok" - } - } - } - ] - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } - } - ], - "description": "1 means the last run was ok. 0 means the last run ended in error." - }, - { - "id": 10, - "type": "bargauge", - "title": "Ariadne Schedule Runs (range)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 12 - }, - "targets": [ - { - "expr": "sort_desc((sum by (task) (increase(ariadne_task_runs_total{task=~\"^schedule\\..+$\"}[$__range]))) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"}))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ], - "description": "Number of runs by schedule task over the selected dashboard time range." - }, - { - "id": 11, - "type": "bargauge", - "title": "Ariadne Schedule Errors (range)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 12 - }, - "targets": [ - { - "expr": "sort_desc((sum by (task) (increase(ariadne_task_runs_total{status=\"error\",task=~\"^schedule\\..+$\"}[$__range]))) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"}))", - "refId": "A", - "legendFormat": "{{task}}", + "legendFormat": "{{suite}}", "instant": true } ], @@ -754,76 +1041,252 @@ "order": "desc" } } - ], - "description": "Error run count by schedule task over the selected dashboard time range." + ] }, { - "id": 12, - "type": "timeseries", - "title": "Ariadne Attempts / Failures", + "id": 9, + "type": "bargauge", + "title": "Success Rate by Suite (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 20 + "h": 8, + "w": 8, + "x": 8, + "y": 9 }, "targets": [ { - "expr": "sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)", + "expr": "sort_desc(100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[24h]))), 1))", "refId": "A", - "legendFormat": "Attempts" + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 99 + } + ] + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "asc" + } + } + ] + }, + { + "id": 10, + "type": "bargauge", + "title": "Coverage Gap to 95% by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 9 + }, + "targets": [ + { + "expr": "sort_desc(clamp_min(95 - ((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}))), 0))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Gap from the 95% target. 0 means the suite is at or above target." + }, + { + "id": 11, + "type": "timeseries", + "title": "Success History by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 17 + }, + "targets": [ + { + "expr": "100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[$__interval])) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[$__interval]))), 1))", + "refId": "A", + "legendFormat": "{{suite}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "custom": { + "drawStyle": "line", + "lineInterpolation": "linear", + "lineWidth": 2, + "fillOpacity": 8, + "showPoints": "always", + "pointSize": 3, + "spanNulls": true + } + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 12, + "type": "timeseries", + "title": "Run Outcomes (Selected Scope)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 25 + }, + "targets": [ + { + "refId": "A", + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Success" }, { - "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval])) or on() vector(0)", "refId": "B", - "legendFormat": "Failures" + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status!~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Failure" + }, + { + "refId": "C", + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[$__interval])) or on() vector(0)", + "legendFormat": "Total" } ], "fieldConfig": { "defaults": { "unit": "none" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Attempts" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Failures" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - } - ] + "overrides": [] }, "options": { "legend": { - "displayMode": "table", - "placement": "right" + "displayMode": "list", + "placement": "bottom", + "calcs": [ + "lastNotNull", + "sum" + ] }, "tooltip": { "mode": "multi" @@ -832,29 +1295,126 @@ }, { "id": 13, - "type": "bargauge", - "title": "One-off Job Pods (age hours)", + "type": "timeseries", + "title": "Coverage & LOC History (Selected Scope)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 20 + "h": 8, + "w": 8, + "x": 8, + "y": 25 }, "targets": [ { - "expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))", "refId": "A", - "legendFormat": "{{namespace}}/{{pod}}", + "expr": "max_over_time(platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[$__interval])", + "legendFormat": "{{suite}} coverage %" + }, + { + "refId": "B", + "expr": "max_over_time(platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[$__interval])", + "legendFormat": "{{suite}} files >500 LOC" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "calcs": [ + "lastNotNull", + "max" + ] + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 14, + "type": "piechart", + "title": "Run Status Mix (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 25 + }, + "targets": [ + { + "expr": "sum by (status) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[30d]))", + "refId": "A", + "legendFormat": "{{status}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "right" + }, + "pieType": "pie", + "displayLabels": [], + "tooltip": { + "mode": "single" + }, + "colorScheme": "interpolateSpectral", + "colorBy": "value", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + } + }, + { + "id": 15, + "type": "bargauge", + "title": "Latest Test Counters (Suite + Result)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "targets": [ + { + "expr": "sort_desc(sum by (suite, result) ({__name__=~\".*_quality_gate_tests_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}))", + "refId": "A", + "legendFormat": "{{suite}} \u00b7 {{result}}", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "h", + "unit": "none", "min": 0, "max": null, "thresholds": { @@ -866,19 +1426,18 @@ }, { "color": "yellow", - "value": 6 + "value": 50 }, { "color": "orange", - "value": 24 + "value": 70 }, { "color": "red", - "value": 48 + "value": 85 } ] - }, - "decimals": 2 + } }, "overrides": [] }, @@ -906,516 +1465,550 @@ { "id": "limit", "options": { - "limit": 12 - } - } - ] - }, - { - "id": 14, - "type": "bargauge", - "title": "Glue Jobs Last Success (hours ago)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 27 - }, - "targets": [ - { - "expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)", - "refId": "A", - "legendFormat": "{{namespace}}/{{cronjob}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 15, - "type": "bargauge", - "title": "Glue Jobs Last Schedule (hours ago)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 27 - }, - "targets": [ - { - "expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)", - "refId": "A", - "legendFormat": "{{namespace}}/{{cronjob}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" + "limit": 24 } } ] }, { "id": 16, - "type": "stat", - "title": "Jenkins Cleanup Signal Present", + "type": "bargauge", + "title": "Failing Checks (Suite + Check)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 33 - }, - "targets": [ - { - "expr": "count(ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) or on() vector(0)", - "refId": "A", - "legendFormat": "Signal", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": true - } - }, - { - "id": 17, - "type": "stat", - "title": "Jenkins Cleanup Last Run Age (h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 33 - }, - "targets": [ - { - "expr": "((time() - ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) / 3600) or on() vector(999)", - "refId": "A", - "legendFormat": "Last Run", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "unit": "h", - "custom": { - "displayMode": "auto" - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": true - } - }, - { - "id": 18, - "type": "stat", - "title": "Jenkins Cleanup Last Success Age (h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 33 - }, - "targets": [ - { - "expr": "((time() - ariadne_jenkins_workspace_cleanup_last_success_timestamp_seconds) / 3600) or on() vector(999)", - "refId": "A", - "legendFormat": "Last Success", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "unit": "h", - "custom": { - "displayMode": "auto" - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": true - } - }, - { - "id": 19, - "type": "stat", - "title": "Jenkins Cleanup Planned (last run)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, + "h": 8, + "w": 12, "x": 12, "y": 33 }, "targets": [ { - "expr": "ariadne_jenkins_workspace_cleanup_last_planned_total or on() vector(0)", + "expr": "sort_desc(sum by (suite, check) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",result!~\"ok|passed|success\"}))", "refId": "A", - "legendFormat": "Planned", + "legendFormat": "{{suite}} \u00b7 {{check}}", "instant": true } ], "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "unit": "none", + "min": 0, + "max": null, "thresholds": { "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "green", "value": null }, { - "color": "green", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 } ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" } }, "overrides": [] }, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", + "displayMode": "gradient", + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": true - } + { + "id": "limit", + "options": { + "limit": 24 + } + } + ], + "description": "Top failing checks in the selected scope. Empty is healthy." }, { - "id": 20, - "type": "stat", - "title": "Jenkins Cleanup Deleted (last run)", + "id": 17, + "type": "bargauge", + "title": "Coverage by Suite (Latest, gate 95)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 33 + "h": 8, + "w": 12, + "x": 0, + "y": 41 }, "targets": [ { - "expr": "ariadne_jenkins_workspace_cleanup_last_deleted_total or on() vector(0)", + "expr": "sort(((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}))) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[30d]))) - 1))", "refId": "A", - "legendFormat": "Deleted", + "legendFormat": "{{suite}}", "instant": true } ], "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "unit": "percent", + "min": 0, + "max": 100, "thresholds": { "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "red", "value": null }, + { + "color": "orange", + "value": 80 + }, + { + "color": "yellow", + "value": 95 + }, { "color": "green", - "value": 1 + "value": 99 } ] }, - "unit": "none", - "custom": { - "displayMode": "auto" - } + "decimals": 2, + "mappings": [ + { + "type": "value", + "options": { + "-1": { + "text": "missing" + } + } + } + ] }, "overrides": [] }, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", + "displayMode": "gradient", + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false - }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": true - } + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "asc" + } + } + ] }, { - "id": 21, - "type": "stat", - "title": "Ariadne Access Requests", + "id": 18, + "type": "bargauge", + "title": "Files >500 LOC by Suite (Latest)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 33 + "h": 8, + "w": 12, + "x": 12, + "y": 41 }, "targets": [ { - "expr": "ariadne_access_requests_total", + "expr": "sort_desc((max by (suite) (platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[30d]))) - 1))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "mappings": [ + { + "type": "value", + "options": { + "-1": { + "text": "missing" + } + } + } + ] + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 27, + "type": "bargauge", + "title": "Missing Tests Metrics by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 49 + }, + "targets": [ + { + "expr": "sort_desc(((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) unless on(suite) count by (suite) ({__name__=~\".*_quality_gate_tests_total\",exported_job=\"platform-quality-ci\"})))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 28, + "type": "bargauge", + "title": "Missing Checks Metrics by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 49 + }, + "targets": [ + { + "expr": "sort_desc(((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) unless on(suite) count by (suite) ({__name__=~\".*_quality_gate_checks_total\",exported_job=\"platform-quality-ci\"})))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 29, + "type": "bargauge", + "title": "Missing Coverage Metrics by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 49 + }, + "targets": [ + { + "expr": "sort_desc(((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) unless on(suite) count by (suite) (platform_quality_gate_workspace_line_coverage_percent{exported_job=\"platform-quality-ci\"})))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 30, + "type": "bargauge", + "title": "Missing LOC Metrics by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 49 + }, + "targets": [ + { + "expr": "sort_desc(((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) unless on(suite) count by (suite) (platform_quality_gate_source_lines_over_500_total{exported_job=\"platform-quality-ci\"})))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 31, + "type": "stat", + "title": "SonarQube API Up", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 56 + }, + "targets": [ + { + "expr": "(max(sonarqube_up) or on() vector(0))", "refId": "A", "instant": true } @@ -1430,7 +2023,7 @@ "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "red", "value": null }, { @@ -1461,201 +2054,64 @@ } }, { - "id": 22, - "type": "timeseries", - "title": "Jenkins Cleanup Runs (range)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 37 - }, - "targets": [ - { - "refId": "A", - "expr": "sum by (mode, status) (increase(ariadne_jenkins_workspace_cleanup_runs_total[$__range]))", - "legendFormat": "{{mode}}/{{status}}" - } - ], - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [] - }, - "options": { - "legend": { - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "multi" - } - } - }, - { - "id": 23, - "type": "timeseries", - "title": "Jenkins Cleanup Objects (range)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 37 - }, - "targets": [ - { - "refId": "A", - "expr": "sum by (kind, action, mode) (increase(ariadne_jenkins_workspace_cleanup_objects_total[$__range]))", - "legendFormat": "{{kind}}/{{action}}/{{mode}}" - } - ], - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [] - }, - "options": { - "legend": { - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "multi" - } - } - }, - { - "id": 24, + "id": 32, "type": "stat", - "title": "Jenkins Build Weather (last run h, newest first)", + "title": "Sonar Projects (Selected)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 44 + "h": 6, + "w": 4, + "x": 4, + "y": 56 }, "targets": [ { + "expr": "(count(sonarqube_project_quality_gate_pass{project_key=~\"${suite}\"}) or on() vector(0))", "refId": "A", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1)", - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": true - }, - { - "refId": "B", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 0)", - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": true - }, - { - "refId": "C", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 2)", - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": true - }, - { - "refId": "D", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) < 0)", - "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "h", - "decimals": 1, - "min": 0, - "links": [ - { - "title": "Open Jenkins job", - "url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/", - "targetBlank": true - } - ] - }, - "overrides": [ - { - "matcher": { - "id": "byFrameRefID", - "options": "A" - }, - "properties": [ + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 } ] }, - { - "matcher": { - "id": "byFrameRefID", - "options": "B" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - }, - { - "matcher": { - "id": "byFrameRefID", - "options": "C" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - }, - { - "matcher": { - "id": "byFrameRefID", - "options": "D" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] + "unit": "none", + "custom": { + "displayMode": "auto" } - ] + }, + "overrides": [] }, "options": { "colorMode": "value", - "graphMode": "none", - "justifyMode": "left", - "orientation": "horizontal", - "wideLayout": true, + "graphMode": "area", + "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" @@ -1663,56 +2119,154 @@ "fields": "", "values": false }, - "textMode": "name_and_value", - "text": { - "titleSize": 12, - "valueSize": 12 - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } - }, - { - "id": "limit", - "options": { - "limit": 20 - } - } - ], - "description": "Jenkins homepage-style list from Ariadne with weather icons and status-colored job names. Click a name to open the Jenkins job page." + "textMode": "value" + } }, { - "id": 27, - "type": "bargauge", - "title": "Jenkins Workspace PV Age (h, detached only)", + "id": 33, + "type": "stat", + "title": "Sonar Gate Fetch Errors", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 53 + "h": 6, + "w": 4, + "x": 8, + "y": 56 }, "targets": [ { - "expr": "sort_desc(((time() - kube_persistentvolume_created) / 3600) * on(persistentvolume) group_left(claim_namespace,name) kube_persistentvolume_claim_ref{claim_namespace=\"jenkins\",name=~\"pvc-workspace-.*\"} * on(persistentvolume) group_left() (kube_persistentvolume_status_phase{phase=~\"Released|Failed\"} > bool 0))", + "expr": "(max(sonarqube_quality_gate_fetch_errors_total) or on() vector(0))", "refId": "A", - "legendFormat": "{{name}} -> {{persistentvolume}}", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "h", + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 34, + "type": "piechart", + "title": "Sonar Gate Status Mix (Selected)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 56 + }, + "targets": [ + { + "expr": "count by (status) (sonarqube_project_quality_gate_pass{project_key=~\"${suite}\"})", + "refId": "A", + "legendFormat": "{{status}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "right" + }, + "pieType": "pie", + "displayLabels": [], + "tooltip": { + "mode": "single" + }, + "colorScheme": "interpolateSpectral", + "colorBy": "value", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + } + }, + { + "id": 35, + "type": "bargauge", + "title": "Projects Failing Sonar Gate", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 56 + }, + "targets": [ + { + "expr": "sort_desc(count by (project_key) (sonarqube_project_quality_gate_pass{project_key=~\"${suite}\",status!~\"OK|ok\"}))", + "refId": "A", + "legendFormat": "{{project_key}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", "min": 0, "max": null, "thresholds": { @@ -1724,19 +2278,18 @@ }, { "color": "yellow", - "value": 24 + "value": 1 }, { "color": "orange", - "value": 72 + "value": 3 }, { "color": "red", - "value": 168 + "value": 5 } ] - }, - "decimals": 1 + } }, "overrides": [] }, @@ -1760,19 +2313,12 @@ ], "order": "desc" } - }, - { - "id": "limit", - "options": { - "limit": 40 - } } - ], - "description": "Oldest detached Jenkins workspace volumes first. This is the direct cleanup backlog view." + ] } ], "time": { - "from": "now-7d", + "from": "now-30d", "to": "now" }, "annotations": { @@ -1782,7 +2328,77 @@ "style": "dark", "tags": [ "atlas", - "jobs", - "glue" - ] + "testing", + "quality-gate", + "ci" + ], + "templating": { + "list": [ + { + "name": "suite", + "label": "Suite", + "type": "custom", + "query": "ariadne : ariadne,metis : metis,ananke : ananke,atlasbot : atlasbot,pegasus : pegasus|pegasus-health|pegasus_health,soteria : soteria,titan_iac : titan_iac|titan-iac,bstein_home : bstein_home|bstein-home,data_prepper : data_prepper|data-prepper", + "current": { + "text": "All", + "value": "$__all", + "selected": true + }, + "options": [ + { + "text": "ariadne", + "value": "ariadne", + "selected": false + }, + { + "text": "metis", + "value": "metis", + "selected": false + }, + { + "text": "ananke", + "value": "ananke", + "selected": false + }, + { + "text": "atlasbot", + "value": "atlasbot", + "selected": false + }, + { + "text": "pegasus", + "value": "pegasus|pegasus-health|pegasus_health", + "selected": false + }, + { + "text": "soteria", + "value": "soteria", + "selected": false + }, + { + "text": "titan_iac", + "value": "titan_iac|titan-iac", + "selected": false + }, + { + "text": "bstein_home", + "value": "bstein_home|bstein-home", + "selected": false + }, + { + "text": "data_prepper", + "value": "data_prepper|data-prepper", + "selected": false + } + ], + "hide": 0, + "multi": false, + "includeAll": true, + "allValue": "ariadne|metis|ananke|atlasbot|pegasus|soteria|titan_iac|bstein_home|data_prepper", + "refresh": 1, + "sort": 1, + "skipUrlSync": false + } + ] + } } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 09a42a40..162b2541 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -129,7 +129,7 @@ }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -204,7 +204,7 @@ }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -352,7 +352,7 @@ }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -427,7 +427,7 @@ }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -565,7 +565,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -642,7 +642,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -711,7 +711,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -780,7 +780,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -855,7 +855,7 @@ }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -930,7 +930,7 @@ }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -997,7 +997,7 @@ }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -1064,7 +1064,7 @@ }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -1073,22 +1073,52 @@ { "id": 40, "type": "stat", - "title": "Pyrphoros UPS Current", + "title": "UPS Current Load", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 3, - "w": 6, + "h": 6, + "w": 4, "x": 0, - "y": 7 + "y": 12 }, "targets": [ { - "expr": "label_replace(max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100) or on() vector(0), \"metric\", \"Draw\", \"__name__\", \".*\") or label_replace(max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0), \"metric\", \"Runtime\", \"__name__\", \".*\")", "refId": "A", - "legendFormat": "{{metric}}", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100) or on() vector(0)", + "legendFormat": "Pyrphoros Draw (W)", + "instant": true + }, + { + "refId": "B", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", + "legendFormat": "Pyrphoros Discharge", + "instant": true + }, + { + "refId": "C", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", + "legendFormat": "Pyrphoros Status", + "instant": true + }, + { + "refId": "D", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100) or on() vector(0)", + "legendFormat": "Statera Draw (W)", + "instant": true + }, + { + "refId": "E", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", + "legendFormat": "Statera Discharge", + "instant": true + }, + { + "refId": "F", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", + "legendFormat": "Statera Status", "instant": true } ], @@ -1114,128 +1144,127 @@ "unit": "none", "custom": { "displayMode": "auto" - } + }, + "decimals": 1 }, "overrides": [ { "matcher": { "id": "byName", - "options": "Draw" + "options": "Pyrphoros Draw (W)" }, "properties": [ { "id": "unit", "value": "watt" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Runtime" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - } - ] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "text": { - "titleSize": 14, - "valueSize": 30 - } - }, - "links": [ - { - "title": "Open atlas-power dashboard", - "url": "/d/atlas-power", - "targetBlank": true - } - ] - }, - { - "id": 144, - "type": "stat", - "title": "Statera UPS Current", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 3, - "w": 6, - "x": 0, - "y": 10 - }, - "targets": [ - { - "expr": "label_replace(max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100) or on() vector(0), \"metric\", \"Draw\", \"__name__\", \".*\") or label_replace(max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0), \"metric\", \"Runtime\", \"__name__\", \".*\")", - "refId": "A", - "legendFormat": "{{metric}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(115, 115, 115, 1)", - "value": null }, { - "color": "green", - "value": 1 + "id": "description", + "value": "Attached node: titan-db" } ] }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [ { "matcher": { "id": "byName", - "options": "Draw" + "options": "Statera Draw (W)" }, "properties": [ { "id": "unit", "value": "watt" + }, + { + "id": "description", + "value": "Attached node: titan-24" } ] }, { "matcher": { "id": "byName", - "options": "Runtime" + "options": "Pyrphoros Discharge" }, "properties": [ { "id": "unit", "value": "s" + }, + { + "id": "description", + "value": "Attached node: titan-db" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Statera Discharge" + }, + "properties": [ + { + "id": "unit", + "value": "s" + }, + { + "id": "description", + "value": "Attached node: titan-24" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pyrphoros Status" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "type": "value", + "options": { + "0": { + "text": "\u26a1 Charging" + }, + "1": { + "text": "\ud83d\udd0b Discharging" + } + } + } + ] + }, + { + "id": "description", + "value": "Attached node: titan-db" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Statera Status" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "type": "value", + "options": { + "0": { + "text": "\u26a1 Charging" + }, + "1": { + "text": "\ud83d\udd0b Discharging" + } + } + } + ] + }, + { + "id": "description", + "value": "Attached node: titan-24" } ] } @@ -1253,18 +1282,17 @@ "values": false }, "textMode": "name_and_value", - "text": { - "titleSize": 14, - "valueSize": 30 - } + "orientation": "horizontal", + "wideLayout": true }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Per-UPS live snapshot: current draw, discharge, and charging/discharging status." }, { "id": 41, @@ -1276,9 +1304,9 @@ }, "gridPos": { "h": 6, - "w": 6, - "x": 6, - "y": 7 + "w": 4, + "x": 4, + "y": 12 }, "targets": [ { @@ -1305,8 +1333,8 @@ }, "options": { "legend": { - "displayMode": "table", - "placement": "right" + "displayMode": "list", + "placement": "bottom" }, "tooltip": { "mode": "multi" @@ -1314,7 +1342,7 @@ }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1323,22 +1351,40 @@ { "id": 42, "type": "stat", - "title": "Current Enclosure Temperature", + "title": "Current Climate", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 3, - "w": 6, - "x": 0, - "y": 13 + "h": 6, + "w": 4, + "x": 8, + "y": 12 }, "targets": [ { - "expr": "label_replace(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_temperature_celsius != 0)) or on() vector(0), \"metric\", \"\u00b0C\", \"__name__\", \".*\") or label_replace(max((max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_temperature_celsius != 0)) * 9 / 5 + 32) or on() vector(0), \"metric\", \"\u00b0F\", \"__name__\", \".*\")", "refId": "A", - "legendFormat": "{{metric}}", + "expr": "max(typhon_temperature_celsius) or on() vector(0)", + "legendFormat": "Tent Temp (\u00b0C)", + "instant": true + }, + { + "refId": "B", + "expr": "max(typhon_vpd_kpa) or on() vector(0)", + "legendFormat": "Tent VPD (kPa)", + "instant": true + }, + { + "refId": "C", + "expr": "max(typhon_relative_humidity_percent) or on() vector(0)", + "legendFormat": "Tent RH (%)", + "instant": true + }, + { + "refId": "D", + "expr": "max((243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))) or on() vector(0)", + "legendFormat": "Dew Point (\u00b0C)", "instant": true } ], @@ -1364,13 +1410,14 @@ "unit": "none", "custom": { "displayMode": "auto" - } + }, + "decimals": 2 }, "overrides": [ { "matcher": { "id": "byName", - "options": "\u00b0C" + "options": "Tent Temp (\u00b0C)" }, "properties": [ { @@ -1382,105 +1429,7 @@ { "matcher": { "id": "byName", - "options": "\u00b0F" - }, - "properties": [ - { - "id": "unit", - "value": "fahrenheit" - } - ] - } - ] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "text": { - "titleSize": 14, - "valueSize": 30 - } - }, - "links": [ - { - "title": "Open atlas-power dashboard", - "url": "/d/atlas-power", - "targetBlank": true - } - ] - }, - { - "id": 143, - "type": "stat", - "title": "Current Enclosure Climate", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 3, - "w": 6, - "x": 0, - "y": 16 - }, - "targets": [ - { - "expr": "label_replace(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_relative_humidity_percent != 0)) or on() vector(0), \"metric\", \"%RH\", \"__name__\", \".*\") or label_replace(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_vpd_kpa != 0)) or on() vector(0), \"metric\", \"kPa\", \"__name__\", \".*\")", - "refId": "A", - "legendFormat": "{{metric}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(115, 115, 115, 1)", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "%RH" - }, - "properties": [ - { - "id": "unit", - "value": "suffix:%RH" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "kPa" + "options": "Tent VPD (kPa)" }, "properties": [ { @@ -1488,6 +1437,30 @@ "value": "suffix:kPa" } ] + }, + { + "matcher": { + "id": "byName", + "options": "Tent RH (%)" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dew Point (\u00b0C)" + }, + "properties": [ + { + "id": "unit", + "value": "celsius" + } + ] } ] }, @@ -1502,304 +1475,93 @@ "fields": "", "values": false }, - "textMode": "name_and_value", - "text": { - "titleSize": 14, - "valueSize": 30 - } + "textMode": "value", + "orientation": "horizontal", + "wideLayout": true }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Current tent temperature, humidity, VPD, and dew point." }, { "id": 43, "type": "timeseries", - "title": "Enclosure Climate History", + "title": "Climate History", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 6, - "w": 6, - "x": 6, - "y": 13 + "w": 4, + "x": 12, + "y": 12 }, "targets": [ { "refId": "A", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_temperature_celsius != 0)", - "legendFormat": "C" + "expr": "typhon_temperature_celsius", + "legendFormat": "Temperature (\u00b0C)" }, { "refId": "B", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_relative_humidity_percent != 0)", - "legendFormat": "RH" + "expr": "typhon_relative_humidity_percent", + "legendFormat": "Humidity (%)" }, { "refId": "C", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_vpd_kpa != 0)", - "legendFormat": "P" + "expr": "typhon_vpd_kpa", + "legendFormat": "VPD (kPa)" }, { "refId": "D", - "expr": "(min_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_temperature_celsius != 0)[$__range]) - 0.08)", - "legendFormat": "C bound min" - }, - { - "refId": "E", - "expr": "(max_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_temperature_celsius != 0)[$__range]) + 0.08)", - "legendFormat": "C bound max" - }, - { - "refId": "F", - "expr": "clamp_min((min_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_relative_humidity_percent != 0)[$__range]) - 0.35), 0)", - "legendFormat": "RH bound min" - }, - { - "refId": "G", - "expr": "clamp_max((max_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_relative_humidity_percent != 0)[$__range]) + 0.35), 100)", - "legendFormat": "RH bound max" - }, - { - "refId": "H", - "expr": "clamp_min((min_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_vpd_kpa != 0)[$__range]) - 0.03), 0)", - "legendFormat": "P bound min" - }, - { - "refId": "I", - "expr": "(max_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_vpd_kpa != 0)[$__range]) + 0.03)", - "legendFormat": "P bound max" + "expr": "(243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))", + "legendFormat": "Dew Point (\u00b0C)" } ], "fieldConfig": { "defaults": { - "unit": "none", - "custom": { - "drawStyle": "line", - "lineInterpolation": "linear", - "lineWidth": 2, - "fillOpacity": 10, - "showPoints": "never", - "spanNulls": true - } + "unit": "celsius" }, "overrides": [ { "matcher": { "id": "byName", - "options": "C" + "options": "Humidity (%)" }, "properties": [ { "id": "unit", - "value": "suffix:\u00b0C" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.axisPlacement", - "value": "left" - }, - { - "id": "custom.axisCenteredZero", - "value": false - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "C bound .*" - }, - "properties": [ - { - "id": "unit", - "value": "suffix:\u00b0C" - }, - { - "id": "custom.axisPlacement", - "value": "left" - }, - { - "id": "custom.axisCenteredZero", - "value": false - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - }, - { - "id": "custom.lineWidth", - "value": 0 - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "transparent" - } + "value": "percent" } ] }, { "matcher": { "id": "byName", - "options": "RH" + "options": "VPD (kPa)" }, "properties": [ { "id": "unit", - "value": "suffix:%" + "value": "none" + }, + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "custom.axisLabel", + "value": "kPa" }, { "id": "decimals", "value": 2 - }, - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "custom.axisCenteredZero", - "value": false - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "RH bound .*" - }, - "properties": [ - { - "id": "unit", - "value": "suffix:%" - }, - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "custom.axisCenteredZero", - "value": false - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - }, - { - "id": "custom.lineWidth", - "value": 0 - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "transparent" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "P" - }, - "properties": [ - { - "id": "unit", - "value": "suffix:kPa" - }, - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.axisCenteredZero", - "value": false - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "P bound .*" - }, - "properties": [ - { - "id": "unit", - "value": "suffix:kPa" - }, - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "custom.axisCenteredZero", - "value": false - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - }, - { - "id": "custom.lineWidth", - "value": 0 - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "transparent" - } } ] } @@ -1816,12 +1578,12 @@ }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } ], - "description": "Temperature on left axis, humidity and pressure on right axis with dynamic bound series so small swings remain visible." + "description": "Two-axis chart: tent temperature/humidity/dew point (left axis) and VPD in kPa (right axis)." }, { "id": 140, @@ -1833,15 +1595,33 @@ }, "gridPos": { "h": 6, - "w": 6, - "x": 12, - "y": 13 + "w": 4, + "x": 16, + "y": 12 }, "targets": [ { - "expr": "label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"1\"})) or on() vector(0))), \"metric\", \"Outlet\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"2\"})) or on() vector(0))), \"metric\", \"Inlet - In\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"3\"})) or on() vector(0))), \"metric\", \"Inlet - Out\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"4\"})) or on() vector(0))), \"metric\", \"Interior\", \"__name__\", \".*\")", "refId": "A", - "legendFormat": "{{metric}}", + "expr": "round(max(typhon_fan_speed_level{fan_group=\"outlet\"}) or on() vector(0))", + "legendFormat": "Inside Outlet", + "instant": true + }, + { + "refId": "B", + "expr": "round(max(typhon_fan_speed_level{fan_group=\"inside_inlet\"}) or on() vector(0))", + "legendFormat": "Inside Inlet", + "instant": true + }, + { + "refId": "C", + "expr": "round(max(typhon_fan_speed_level{fan_group=\"outside_inlet\"}) or on() vector(0))", + "legendFormat": "Outside Inlet", + "instant": true + }, + { + "refId": "D", + "expr": "round(max(typhon_fan_speed_level{fan_group=\"interior\"}) or on() vector(0))", + "legendFormat": "Interior Fans", "instant": true } ], @@ -1874,56 +1654,7 @@ }, "decimals": 0 }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Outlet" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inlet - In" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inlet - Out" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Interior" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - } - ] + "overrides": [] }, "options": { "colorMode": "value", @@ -1937,15 +1668,12 @@ "values": false }, "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": false, - "text": { - "valueSize": 26 - } + "orientation": "horizontal", + "wideLayout": true }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1961,30 +1689,30 @@ }, "gridPos": { "h": 6, - "w": 6, - "x": 18, - "y": 13 + "w": 4, + "x": 20, + "y": 12 }, "targets": [ { "refId": "A", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"1\"})", - "legendFormat": "Outlet" + "expr": "typhon_fan_speed_level{fan_group=\"outlet\"}", + "legendFormat": "Inside Outlet" }, { "refId": "B", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"2\"})", - "legendFormat": "Inlet - Inside" + "expr": "typhon_fan_speed_level{fan_group=\"inside_inlet\"}", + "legendFormat": "Inside Inlet" }, { "refId": "C", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"3\"})", - "legendFormat": "Inlet - Outside" + "expr": "typhon_fan_speed_level{fan_group=\"outside_inlet\"}", + "legendFormat": "Outside Inlet" }, { "refId": "D", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"4\"})", - "legendFormat": "Interior" + "expr": "typhon_fan_speed_level{fan_group=\"interior\"}", + "legendFormat": "Interior Fans" } ], "fieldConfig": { @@ -1996,8 +1724,8 @@ }, "options": { "legend": { - "displayMode": "table", - "placement": "right" + "displayMode": "list", + "placement": "bottom" }, "tooltip": { "mode": "multi" @@ -2005,7 +1733,7 @@ }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -2021,9 +1749,9 @@ }, "gridPos": { "h": 5, - "w": 8, + "w": 6, "x": 0, - "y": 32 + "y": 7 }, "targets": [ { @@ -2076,7 +1804,7 @@ }, "links": [ { - "title": "Open atlas-jobs dashboard", + "title": "Open Atlas Testing", "url": "/d/atlas-jobs", "targetBlank": true } @@ -2108,19 +1836,19 @@ "uid": "atlas-vm" }, "gridPos": { - "h": 6, + "h": 5, "w": 6, - "x": 12, + "x": 6, "y": 7 }, "targets": [ { - "expr": "sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)", + "expr": "sum(increase(ariadne_task_runs_total[$__interval]))", "refId": "A", "legendFormat": "Attempts" }, { - "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval])) or on() vector(0)", + "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))", "refId": "B", "legendFormat": "Failures" } @@ -2173,7 +1901,7 @@ }, "links": [ { - "title": "Open atlas-jobs dashboard", + "title": "Open Atlas Testing", "url": "/d/atlas-jobs", "targetBlank": true } @@ -2188,66 +1916,56 @@ "uid": "atlas-vm" }, "gridPos": { - "h": 6, + "h": 5, "w": 6, - "x": 18, + "x": 12, "y": 7 }, "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "ariadne" }, { "refId": "B", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"metis\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "metis" }, { "refId": "C", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "ananke" }, { "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "atlasbot" }, { "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "lesavka" - }, - { - "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "pegasus" }, { - "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))) > 0) or on() vector(0)", + "refId": "F", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "soteria" }, + { + "refId": "G", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"titan_iac|titan-iac\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"titan_iac|titan-iac\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", + "legendFormat": "titan_iac" + }, { "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "titan-iac" + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"bstein_home|bstein-home\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein_home|bstein-home\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", + "legendFormat": "bstein_home" }, { "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "bstein-home" - }, - { - "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "arcanagon" - }, - { - "refId": "K", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "data-prepper" + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"data_prepper|data-prepper\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"data_prepper|data-prepper\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", + "legendFormat": "data_prepper" } ], "fieldConfig": { @@ -2281,228 +1999,14 @@ }, "links": [ { - "title": "Open atlas-testing dashboard", - "url": "/d/atlas-testing", + "title": "Open Atlas Testing", + "url": "/d/atlas-jobs", "targetBlank": true } ], "timeFrom": "7d", "description": "Per-run interval pass points (0-100) for each software suite over the last 7 days. Points are connected to show trend; missing-run intervals are ignored." }, - { - "id": 142, - "type": "stat", - "title": "Jenkins Last Success (h, newest first)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 32 - }, - "targets": [ - { - "refId": "A", - "expr": "sort((label_replace((sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1), \"run_state\", \"ok\", \"exported_job\", \".*\")) or (label_replace((sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) != 1), \"run_state\", \"bad\", \"exported_job\", \".*\")))", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "decimals": 1, - "min": 0, - "displayName": "${__field.labels.weather_icon} ${__field.labels.exported_job}", - "links": [ - { - "title": "Open Jenkins job", - "url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/", - "targetBlank": true - } - ] - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": ".*run_state=\"ok\".*" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": ".*run_state=\"bad\".*" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - } - ] - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "left", - "orientation": "horizontal", - "wideLayout": true, - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "text": { - "titleSize": 11, - "valueSize": 11 - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } - } - ], - "links": [ - { - "title": "Open atlas-jobs dashboard", - "url": "/d/atlas-jobs", - "targetBlank": true - } - ], - "description": "Top 6 most recent Jenkins successes by age (newest first). Green means last run succeeded; red means last run did not succeed. Use Atlas Jobs for the full list." - }, - { - "id": 243, - "type": "stat", - "title": "Jenkins Last Failure (h, newest first)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 32 - }, - "targets": [ - { - "refId": "A", - "expr": "sort((label_replace((sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1), \"run_state\", \"ok\", \"exported_job\", \".*\")) or (label_replace((sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) != 1), \"run_state\", \"bad\", \"exported_job\", \".*\")))", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "decimals": 1, - "min": 0, - "displayName": "${__field.labels.weather_icon} ${__field.labels.exported_job}", - "links": [ - { - "title": "Open Jenkins job", - "url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/", - "targetBlank": true - } - ] - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": ".*run_state=\"ok\".*" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": ".*run_state=\"bad\".*" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - } - ] - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "left", - "orientation": "horizontal", - "wideLayout": true, - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "text": { - "titleSize": 11, - "valueSize": 11 - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } - } - ], - "links": [ - { - "title": "Open atlas-jobs dashboard", - "url": "/d/atlas-jobs", - "targetBlank": true - } - ], - "description": "Top 6 most recent Jenkins failures by age (newest first). Green means last run succeeded; red means last run did not succeed. Use Atlas Jobs for the full list." - }, { "id": 47, "type": "bargauge", @@ -2513,13 +2017,13 @@ }, "gridPos": { "h": 5, - "w": 8, - "x": 16, - "y": 32 + "w": 6, + "x": 18, + "y": 7 }, "targets": [ { - "expr": "sort_desc(max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver=\"restic\"}) / 3600) or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver=\"restic\",reason=~\"missing|no_completed|lookup_failed|unknown_timestamp\"} > 0) * (pvc_backup_count{driver=\"restic\"} > bool 0)) * 999))) or on() vector(0))", + "expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))", "refId": "A", "legendFormat": "{{namespace}}/{{pvc}}", "instant": true @@ -2578,12 +2082,12 @@ ], "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } ], - "description": "Backup age in hours computed from last-success timestamps for restic-managed PVCs (nightly target: <=20h green, <40h yellow, <50h orange, >=50h red). PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility." + "description": "Oldest successful backup age in hours by PVC with nightly cadence thresholds (green <=20h, yellow <40h, orange <50h, red >=50h). PVCs with missing or unhealthy backup state are forced to 999h so critical bars stay visible." }, { "id": 30, @@ -2597,7 +2101,7 @@ "h": 2, "w": 4, "x": 0, - "y": 19 + "y": 18 }, "targets": [ { @@ -2646,7 +2150,7 @@ }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2664,7 +2168,7 @@ "h": 2, "w": 4, "x": 8, - "y": 19 + "y": 18 }, "targets": [ { @@ -2751,7 +2255,7 @@ }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2769,7 +2273,7 @@ "h": 2, "w": 4, "x": 4, - "y": 19 + "y": 18 }, "targets": [ { @@ -2827,7 +2331,7 @@ }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2845,7 +2349,7 @@ "h": 2, "w": 4, "x": 12, - "y": 19 + "y": 18 }, "targets": [ { @@ -2903,7 +2407,7 @@ }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2921,7 +2425,7 @@ "h": 2, "w": 4, "x": 16, - "y": 19 + "y": 18 }, "targets": [ { @@ -2984,7 +2488,7 @@ "h": 2, "w": 4, "x": 20, - "y": 19 + "y": 18 }, "targets": [ { @@ -3254,7 +2758,7 @@ "h": 12, "w": 12, "x": 0, - "y": 44 + "y": 39 }, "targets": [ { @@ -3283,7 +2787,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -3301,7 +2805,7 @@ "h": 12, "w": 12, "x": 12, - "y": 44 + "y": 39 }, "targets": [ { @@ -3330,7 +2834,7 @@ }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -3348,7 +2852,7 @@ "h": 10, "w": 12, "x": 0, - "y": 56 + "y": 51 }, "targets": [ { @@ -3385,7 +2889,7 @@ "h": 10, "w": 12, "x": 12, - "y": 56 + "y": 51 }, "targets": [ { @@ -3422,7 +2926,7 @@ "h": 10, "w": 12, "x": 0, - "y": 66 + "y": 61 }, "targets": [ { @@ -3473,7 +2977,7 @@ "h": 10, "w": 12, "x": 12, - "y": 66 + "y": 61 }, "targets": [ { @@ -3554,7 +3058,7 @@ "h": 7, "w": 8, "x": 0, - "y": 37 + "y": 32 }, "targets": [ { @@ -3580,7 +3084,7 @@ }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3598,7 +3102,7 @@ "h": 7, "w": 8, "x": 8, - "y": 37 + "y": 32 }, "targets": [ { @@ -3624,7 +3128,7 @@ }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3642,7 +3146,7 @@ "h": 7, "w": 8, "x": 16, - "y": 37 + "y": 32 }, "targets": [ { @@ -3668,7 +3172,7 @@ }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3686,7 +3190,7 @@ "h": 16, "w": 12, "x": 0, - "y": 76 + "y": 71 }, "targets": [ { @@ -3716,7 +3220,7 @@ "timeFrom": "30d", "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -3734,7 +3238,7 @@ "h": 16, "w": 12, "x": 12, - "y": 76 + "y": 71 }, "targets": [ { @@ -3764,7 +3268,7 @@ "timeFrom": "1w", "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -3888,5 +3392,11 @@ "to": "now" }, "refresh": "1m", - "links": [] + "links": [ + { + "title": "Atlas Testing (Internal)", + "url": "/d/atlas-jobs", + "targetBlank": false + } + ] } diff --git a/services/monitoring/grafana-dashboard-jobs.yaml b/services/monitoring/grafana-dashboard-jobs.yaml index 12fca0cb..b54c8be0 100644 --- a/services/monitoring/grafana-dashboard-jobs.yaml +++ b/services/monitoring/grafana-dashboard-jobs.yaml @@ -9,27 +9,27 @@ data: atlas-jobs.json: | { "uid": "atlas-jobs", - "title": "Atlas Jobs", + "title": "Atlas Testing", "folderUid": "atlas-internal", "editable": true, "panels": [ { - "id": 1, + "id": 2, "type": "stat", - "title": "Schedule Metrics Exported", + "title": "Success Rate (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, "y": 0 }, "targets": [ { - "expr": "count(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}) or on() vector(0)", + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[24h])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[24h])) or on() vector(0))), 1)", "refId": "A", "instant": true } @@ -48,83 +48,24 @@ data: "value": null }, { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - } - }, - { - "id": 2, - "type": "stat", - "title": "Schedule Tasks Stale (>36h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 0 - }, - "targets": [ - { - "expr": "sum(((time() - ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}) > bool 129600)) or on() vector(0)", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "color": "orange", + "value": 80 }, { "color": "yellow", - "value": 1 + "value": 95 }, { - "color": "orange", - "value": 2 - }, - { - "color": "red", - "value": 3 + "color": "green", + "value": 99 } ] }, - "unit": "none", + "unit": "percent", "custom": { "displayMode": "auto" - } + }, + "decimals": 2 }, "overrides": [] }, @@ -145,21 +86,22 @@ data: { "id": 3, "type": "stat", - "title": "Schedule Tasks Missing Success", + "title": "Success Rate (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 8, + "x": 4, "y": 0 }, "targets": [ { - "expr": "count((ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} unless on(task) ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"})) or on() vector(0)", - "refId": "A" + "expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[30d])) or on() vector(0))), 1)", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -172,27 +114,28 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "red", "value": null }, - { - "color": "yellow", - "value": 1 - }, { "color": "orange", - "value": 2 + "value": 80 }, { - "color": "red", - "value": 3 + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 99 } ] }, - "unit": "none", + "unit": "percent", "custom": { "displayMode": "auto" - } + }, + "decimals": 2 }, "overrides": [] }, @@ -213,21 +156,22 @@ data: { "id": 4, "type": "stat", - "title": "Schedule Tasks Failed Last Run", + "title": "Failures (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 12, + "x": 8, "y": 0 }, "targets": [ { - "expr": "sum(((1 - ariadne_schedule_last_status{task=~\"^schedule\\..+$\"}) > bool 0)) or on() vector(0)", - "refId": "A" + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -249,11 +193,11 @@ data: }, { "color": "orange", - "value": 2 + "value": 3 }, { "color": "red", - "value": 3 + "value": 5 } ] }, @@ -281,21 +225,22 @@ data: { "id": 5, "type": "stat", - "title": "Glue Jobs Stale (>36h)", + "title": "Runs (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 16, + "x": 12, "y": 0 }, "targets": [ { - "expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)", - "refId": "A" + "expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[24h])) or on() vector(0))", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -308,20 +253,12 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "red", "value": null }, { - "color": "yellow", + "color": "green", "value": 1 - }, - { - "color": "orange", - "value": 2 - }, - { - "color": "red", - "value": 3 } ] }, @@ -349,21 +286,22 @@ data: { "id": 6, "type": "stat", - "title": "Jenkins Workspace PV Backlog", + "title": "Avg Coverage (%)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 20, + "x": 16, "y": 0 }, "targets": [ { - "expr": "sum((kube_persistentvolume_status_phase{phase=~\"Released|Failed\"} > bool 0) * on(persistentvolume) group_left(claim_namespace,name) kube_persistentvolume_claim_ref{claim_namespace=\"jenkins\",name=~\"pvc-workspace-.*\"}) or on() vector(0)", - "refId": "A" + "expr": "(avg(((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})))) or on() vector(0))", + "refId": "A", + "instant": true } ], "fieldConfig": { @@ -376,20 +314,94 @@ data: "mode": "absolute", "steps": [ { - "color": "green", + "color": "red", "value": null }, + { + "color": "orange", + "value": 80 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 99 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 7, + "type": "stat", + "title": "Suites with LOC >500", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "targets": [ + { + "expr": "(sum(((max by (suite) (platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})) > bool 0)) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 0 + }, { "color": "yellow", "value": 1 }, { "color": "orange", - "value": 10 + "value": 3 }, { "color": "red", - "value": 25 + "value": 5 } ] }, @@ -415,135 +427,561 @@ data: } }, { - "id": 7, - "type": "table", - "title": "Ariadne Schedules: Last Success (h, newest first)", + "id": 19, + "type": "stat", + "title": "Failing Tests", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 8, - "w": 12, + "h": 4, + "w": 3, "x": 0, - "y": 4 + "y": 5 }, "targets": [ { - "expr": "((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} + 999)", + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!~\"ok|passed|success\"})) or on() vector(0))", "refId": "A", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "h", + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", "custom": { - "filterable": true + "displayMode": "auto" } }, "overrides": [] }, "options": { - "showHeader": true, - "columnFilters": false - }, - "transformations": [ - { - "id": "labelsToFields", - "options": {} + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, + "textMode": "value" + } + }, + { + "id": 20, + "type": "stat", + "title": "Failing Coverage", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 5 + }, + "targets": [ { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true } ], - "description": "Primary schedule inventory ordered by recency so fresh jobs stay at the top." + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 21, + "type": "stat", + "title": "Failing LOC", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 5 + }, + "targets": [ + { + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 22, + "type": "stat", + "title": "Failing Docs/Naming", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 5 + }, + "targets": [ + { + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 23, + "type": "stat", + "title": "Failing Gate/Glue", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 5 + }, + "targets": [ + { + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 24, + "type": "stat", + "title": "Failing SonarQube", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 5 + }, + "targets": [ + { + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 25, + "type": "stat", + "title": "Failing Supply Chain", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 18, + "y": 5 + }, + "targets": [ + { + "expr": "(sum(count by (suite) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!~\"ok|passed|success\"})) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 26, + "type": "stat", + "title": "Total Failing Checks", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 5 + }, + "targets": [ + { + "expr": "(sum({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",result!~\"ok|passed|success\"}) or on() vector(0))", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } }, { "id": 8, "type": "bargauge", - "title": "Ariadne Schedule Last Error (hours ago)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 4 - }, - "targets": [ - { - "expr": "sort(((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} + 999))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "orange", - "value": 1 - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "green", - "value": 24 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } - } - ] - }, - { - "id": 9, - "type": "bargauge", - "title": "Ariadne Schedule Last Status", + "title": "Failures by Suite (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -552,164 +990,13 @@ data: "h": 8, "w": 8, "x": 0, - "y": 12 + "y": 9 }, "targets": [ { - "expr": "sort((ariadne_schedule_last_status{task=~\"^schedule\\..+$\"}) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} - 1))", + "expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status!~\"ok|passed|success\"}[24h])))", "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 0.5 - }, - { - "color": "green", - "value": 1 - } - ] - }, - "decimals": 0, - "mappings": [ - { - "type": "value", - "options": { - "-1": { - "text": "pending" - }, - "0": { - "text": "error" - }, - "1": { - "text": "ok" - } - } - } - ] - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } - } - ], - "description": "1 means the last run was ok. 0 means the last run ended in error." - }, - { - "id": 10, - "type": "bargauge", - "title": "Ariadne Schedule Runs (range)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 12 - }, - "targets": [ - { - "expr": "sort_desc((sum by (task) (increase(ariadne_task_runs_total{task=~\"^schedule\\..+$\"}[$__range]))) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"}))", - "refId": "A", - "legendFormat": "{{task}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "none", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ], - "description": "Number of runs by schedule task over the selected dashboard time range." - }, - { - "id": 11, - "type": "bargauge", - "title": "Ariadne Schedule Errors (range)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 12 - }, - "targets": [ - { - "expr": "sort_desc((sum by (task) (increase(ariadne_task_runs_total{status=\"error\",task=~\"^schedule\\..+$\"}[$__range]))) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"}))", - "refId": "A", - "legendFormat": "{{task}}", + "legendFormat": "{{suite}}", "instant": true } ], @@ -763,76 +1050,252 @@ data: "order": "desc" } } - ], - "description": "Error run count by schedule task over the selected dashboard time range." + ] }, { - "id": 12, - "type": "timeseries", - "title": "Ariadne Attempts / Failures", + "id": 9, + "type": "bargauge", + "title": "Success Rate by Suite (24h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 20 + "h": 8, + "w": 8, + "x": 8, + "y": 9 }, "targets": [ { - "expr": "sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)", + "expr": "sort_desc(100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[24h]))), 1))", "refId": "A", - "legendFormat": "Attempts" + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "yellow", + "value": 95 + }, + { + "color": "green", + "value": 99 + } + ] + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "asc" + } + } + ] + }, + { + "id": 10, + "type": "bargauge", + "title": "Coverage Gap to 95% by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 9 + }, + "targets": [ + { + "expr": "sort_desc(clamp_min(95 - ((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}))), 0))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 5 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "decimals": 2 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ], + "description": "Gap from the 95% target. 0 means the suite is at or above target." + }, + { + "id": 11, + "type": "timeseries", + "title": "Success History by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 17 + }, + "targets": [ + { + "expr": "100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[$__interval])) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[$__interval]))), 1))", + "refId": "A", + "legendFormat": "{{suite}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "min": 0, + "max": 100, + "custom": { + "drawStyle": "line", + "lineInterpolation": "linear", + "lineWidth": 2, + "fillOpacity": 8, + "showPoints": "always", + "pointSize": 3, + "spanNulls": true + } + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 12, + "type": "timeseries", + "title": "Run Outcomes (Selected Scope)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 25 + }, + "targets": [ + { + "refId": "A", + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Success" }, { - "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval])) or on() vector(0)", "refId": "B", - "legendFormat": "Failures" + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\",status!~\"ok|passed|success\"}[$__interval])) or on() vector(0)", + "legendFormat": "Failure" + }, + { + "refId": "C", + "expr": "sum(increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[$__interval])) or on() vector(0)", + "legendFormat": "Total" } ], "fieldConfig": { "defaults": { "unit": "none" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Attempts" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Failures" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - } - ] + "overrides": [] }, "options": { "legend": { - "displayMode": "table", - "placement": "right" + "displayMode": "list", + "placement": "bottom", + "calcs": [ + "lastNotNull", + "sum" + ] }, "tooltip": { "mode": "multi" @@ -841,29 +1304,126 @@ data: }, { "id": 13, - "type": "bargauge", - "title": "One-off Job Pods (age hours)", + "type": "timeseries", + "title": "Coverage & LOC History (Selected Scope)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 20 + "h": 8, + "w": 8, + "x": 8, + "y": 25 }, "targets": [ { - "expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))", "refId": "A", - "legendFormat": "{{namespace}}/{{pod}}", + "expr": "max_over_time(platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[$__interval])", + "legendFormat": "{{suite}} coverage %" + }, + { + "refId": "B", + "expr": "max_over_time(platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[$__interval])", + "legendFormat": "{{suite}} files >500 LOC" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "calcs": [ + "lastNotNull", + "max" + ] + }, + "tooltip": { + "mode": "multi" + } + } + }, + { + "id": 14, + "type": "piechart", + "title": "Run Status Mix (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 25 + }, + "targets": [ + { + "expr": "sum by (status) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[30d]))", + "refId": "A", + "legendFormat": "{{status}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "right" + }, + "pieType": "pie", + "displayLabels": [], + "tooltip": { + "mode": "single" + }, + "colorScheme": "interpolateSpectral", + "colorBy": "value", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + } + }, + { + "id": 15, + "type": "bargauge", + "title": "Latest Test Counters (Suite + Result)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "targets": [ + { + "expr": "sort_desc(sum by (suite, result) ({__name__=~\".*_quality_gate_tests_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}))", + "refId": "A", + "legendFormat": "{{suite}} \u00b7 {{result}}", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "h", + "unit": "none", "min": 0, "max": null, "thresholds": { @@ -875,19 +1435,18 @@ data: }, { "color": "yellow", - "value": 6 + "value": 50 }, { "color": "orange", - "value": 24 + "value": 70 }, { "color": "red", - "value": 48 + "value": 85 } ] - }, - "decimals": 2 + } }, "overrides": [] }, @@ -915,516 +1474,550 @@ data: { "id": "limit", "options": { - "limit": 12 - } - } - ] - }, - { - "id": 14, - "type": "bargauge", - "title": "Glue Jobs Last Success (hours ago)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 27 - }, - "targets": [ - { - "expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)", - "refId": "A", - "legendFormat": "{{namespace}}/{{cronjob}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] - }, - { - "id": 15, - "type": "bargauge", - "title": "Glue Jobs Last Schedule (hours ago)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 27 - }, - "targets": [ - { - "expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)", - "refId": "A", - "legendFormat": "{{namespace}}/{{cronjob}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "min": 0, - "max": null, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" + "limit": 24 } } ] }, { "id": 16, - "type": "stat", - "title": "Jenkins Cleanup Signal Present", + "type": "bargauge", + "title": "Failing Checks (Suite + Check)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 33 - }, - "targets": [ - { - "expr": "count(ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) or on() vector(0)", - "refId": "A", - "legendFormat": "Signal", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": true - } - }, - { - "id": 17, - "type": "stat", - "title": "Jenkins Cleanup Last Run Age (h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 33 - }, - "targets": [ - { - "expr": "((time() - ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) / 3600) or on() vector(999)", - "refId": "A", - "legendFormat": "Last Run", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "unit": "h", - "custom": { - "displayMode": "auto" - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": true - } - }, - { - "id": 18, - "type": "stat", - "title": "Jenkins Cleanup Last Success Age (h)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 33 - }, - "targets": [ - { - "expr": "((time() - ariadne_jenkins_workspace_cleanup_last_success_timestamp_seconds) / 3600) or on() vector(999)", - "refId": "A", - "legendFormat": "Last Success", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 6 - }, - { - "color": "orange", - "value": 24 - }, - { - "color": "red", - "value": 48 - } - ] - }, - "unit": "h", - "custom": { - "displayMode": "auto" - }, - "decimals": 2 - }, - "overrides": [] - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": true - } - }, - { - "id": 19, - "type": "stat", - "title": "Jenkins Cleanup Planned (last run)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 4, - "w": 4, + "h": 8, + "w": 12, "x": 12, "y": 33 }, "targets": [ { - "expr": "ariadne_jenkins_workspace_cleanup_last_planned_total or on() vector(0)", + "expr": "sort_desc(sum by (suite, check) ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\",result!~\"ok|passed|success\"}))", "refId": "A", - "legendFormat": "Planned", + "legendFormat": "{{suite}} \u00b7 {{check}}", "instant": true } ], "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "unit": "none", + "min": 0, + "max": null, "thresholds": { "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "green", "value": null }, { - "color": "green", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 } ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" } }, "overrides": [] }, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", + "displayMode": "gradient", + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": true - } + { + "id": "limit", + "options": { + "limit": 24 + } + } + ], + "description": "Top failing checks in the selected scope. Empty is healthy." }, { - "id": 20, - "type": "stat", - "title": "Jenkins Cleanup Deleted (last run)", + "id": 17, + "type": "bargauge", + "title": "Coverage by Suite (Latest, gate 95)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 33 + "h": 8, + "w": 12, + "x": 0, + "y": 41 }, "targets": [ { - "expr": "ariadne_jenkins_workspace_cleanup_last_deleted_total or on() vector(0)", + "expr": "sort(((max by (suite) ({__name__=~\".*_quality_gate_coverage_percent\",suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})) or on(suite) (max by (suite) (platform_quality_gate_workspace_line_coverage_percent{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}))) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[30d]))) - 1))", "refId": "A", - "legendFormat": "Deleted", + "legendFormat": "{{suite}}", "instant": true } ], "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], + "unit": "percent", + "min": 0, + "max": 100, "thresholds": { "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "red", "value": null }, + { + "color": "orange", + "value": 80 + }, + { + "color": "yellow", + "value": 95 + }, { "color": "green", - "value": 1 + "value": 99 } ] }, - "unit": "none", - "custom": { - "displayMode": "auto" - } + "decimals": 2, + "mappings": [ + { + "type": "value", + "options": { + "-1": { + "text": "missing" + } + } + } + ] }, "overrides": [] }, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", + "displayMode": "gradient", + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false - }, - "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": true - } + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "asc" + } + } + ] }, { - "id": 21, - "type": "stat", - "title": "Ariadne Access Requests", + "id": 18, + "type": "bargauge", + "title": "Files >500 LOC by Suite (Latest)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 33 + "h": 8, + "w": 12, + "x": 12, + "y": 41 }, "targets": [ { - "expr": "ariadne_access_requests_total", + "expr": "sort_desc((max by (suite) (platform_quality_gate_source_lines_over_500_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"})) or on(suite) (0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite}\",exported_job=\"platform-quality-ci\"}[30d]))) - 1))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "mappings": [ + { + "type": "value", + "options": { + "-1": { + "text": "missing" + } + } + } + ] + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 27, + "type": "bargauge", + "title": "Missing Tests Metrics by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 49 + }, + "targets": [ + { + "expr": "sort_desc(((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) unless on(suite) count by (suite) ({__name__=~\".*_quality_gate_tests_total\",exported_job=\"platform-quality-ci\"})))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 28, + "type": "bargauge", + "title": "Missing Checks Metrics by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 49 + }, + "targets": [ + { + "expr": "sort_desc(((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) unless on(suite) count by (suite) ({__name__=~\".*_quality_gate_checks_total\",exported_job=\"platform-quality-ci\"})))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 29, + "type": "bargauge", + "title": "Missing Coverage Metrics by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 49 + }, + "targets": [ + { + "expr": "sort_desc(((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) unless on(suite) count by (suite) (platform_quality_gate_workspace_line_coverage_percent{exported_job=\"platform-quality-ci\"})))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 30, + "type": "bargauge", + "title": "Missing LOC Metrics by Suite", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 49 + }, + "targets": [ + { + "expr": "sort_desc(((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) unless on(suite) count by (suite) (platform_quality_gate_source_lines_over_500_total{exported_job=\"platform-quality-ci\"})))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "min": 0, + "max": null, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 31, + "type": "stat", + "title": "SonarQube API Up", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 56 + }, + "targets": [ + { + "expr": "(max(sonarqube_up) or on() vector(0))", "refId": "A", "instant": true } @@ -1439,7 +2032,7 @@ data: "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "red", "value": null }, { @@ -1470,201 +2063,64 @@ data: } }, { - "id": 22, - "type": "timeseries", - "title": "Jenkins Cleanup Runs (range)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 37 - }, - "targets": [ - { - "refId": "A", - "expr": "sum by (mode, status) (increase(ariadne_jenkins_workspace_cleanup_runs_total[$__range]))", - "legendFormat": "{{mode}}/{{status}}" - } - ], - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [] - }, - "options": { - "legend": { - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "multi" - } - } - }, - { - "id": 23, - "type": "timeseries", - "title": "Jenkins Cleanup Objects (range)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 37 - }, - "targets": [ - { - "refId": "A", - "expr": "sum by (kind, action, mode) (increase(ariadne_jenkins_workspace_cleanup_objects_total[$__range]))", - "legendFormat": "{{kind}}/{{action}}/{{mode}}" - } - ], - "fieldConfig": { - "defaults": { - "unit": "none" - }, - "overrides": [] - }, - "options": { - "legend": { - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "multi" - } - } - }, - { - "id": 24, + "id": 32, "type": "stat", - "title": "Jenkins Build Weather (last run h, newest first)", + "title": "Sonar Projects (Selected)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 44 + "h": 6, + "w": 4, + "x": 4, + "y": 56 }, "targets": [ { + "expr": "(count(sonarqube_project_quality_gate_pass{project_key=~\"${suite}\"}) or on() vector(0))", "refId": "A", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1)", - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": true - }, - { - "refId": "B", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 0)", - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": true - }, - { - "refId": "C", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 2)", - "legendFormat": "{{weather_icon}} {{exported_job}}", - "instant": true - }, - { - "refId": "D", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) < 0)", - "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "h", - "decimals": 1, - "min": 0, - "links": [ - { - "title": "Open Jenkins job", - "url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/", - "targetBlank": true - } - ] - }, - "overrides": [ - { - "matcher": { - "id": "byFrameRefID", - "options": "A" - }, - "properties": [ + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 } ] }, - { - "matcher": { - "id": "byFrameRefID", - "options": "B" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - }, - { - "matcher": { - "id": "byFrameRefID", - "options": "C" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - }, - { - "matcher": { - "id": "byFrameRefID", - "options": "D" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] + "unit": "none", + "custom": { + "displayMode": "auto" } - ] + }, + "overrides": [] }, "options": { "colorMode": "value", - "graphMode": "none", - "justifyMode": "left", - "orientation": "horizontal", - "wideLayout": true, + "graphMode": "area", + "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" @@ -1672,56 +2128,154 @@ data: "fields": "", "values": false }, - "textMode": "name_and_value", - "text": { - "titleSize": 12, - "valueSize": 12 - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } - }, - { - "id": "limit", - "options": { - "limit": 20 - } - } - ], - "description": "Jenkins homepage-style list from Ariadne with weather icons and status-colored job names. Click a name to open the Jenkins job page." + "textMode": "value" + } }, { - "id": 27, - "type": "bargauge", - "title": "Jenkins Workspace PV Age (h, detached only)", + "id": 33, + "type": "stat", + "title": "Sonar Gate Fetch Errors", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 53 + "h": 6, + "w": 4, + "x": 8, + "y": 56 }, "targets": [ { - "expr": "sort_desc(((time() - kube_persistentvolume_created) / 3600) * on(persistentvolume) group_left(claim_namespace,name) kube_persistentvolume_claim_ref{claim_namespace=\"jenkins\",name=~\"pvc-workspace-.*\"} * on(persistentvolume) group_left() (kube_persistentvolume_status_phase{phase=~\"Released|Failed\"} > bool 0))", + "expr": "(max(sonarqube_quality_gate_fetch_errors_total) or on() vector(0))", "refId": "A", - "legendFormat": "{{name}} -> {{persistentvolume}}", "instant": true } ], "fieldConfig": { "defaults": { - "unit": "h", + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 3 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 34, + "type": "piechart", + "title": "Sonar Gate Status Mix (Selected)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 56 + }, + "targets": [ + { + "expr": "count by (status) (sonarqube_project_quality_gate_pass{project_key=~\"${suite}\"})", + "refId": "A", + "legendFormat": "{{status}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percent", + "color": { + "mode": "palette-classic" + } + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "right" + }, + "pieType": "pie", + "displayLabels": [], + "tooltip": { + "mode": "single" + }, + "colorScheme": "interpolateSpectral", + "colorBy": "value", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + } + }, + { + "id": 35, + "type": "bargauge", + "title": "Projects Failing Sonar Gate", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 56 + }, + "targets": [ + { + "expr": "sort_desc(count by (project_key) (sonarqube_project_quality_gate_pass{project_key=~\"${suite}\",status!~\"OK|ok\"}))", + "refId": "A", + "legendFormat": "{{project_key}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", "min": 0, "max": null, "thresholds": { @@ -1733,19 +2287,18 @@ data: }, { "color": "yellow", - "value": 24 + "value": 1 }, { "color": "orange", - "value": 72 + "value": 3 }, { "color": "red", - "value": 168 + "value": 5 } ] - }, - "decimals": 1 + } }, "overrides": [] }, @@ -1769,19 +2322,12 @@ data: ], "order": "desc" } - }, - { - "id": "limit", - "options": { - "limit": 40 - } } - ], - "description": "Oldest detached Jenkins workspace volumes first. This is the direct cleanup backlog view." + ] } ], "time": { - "from": "now-7d", + "from": "now-30d", "to": "now" }, "annotations": { @@ -1791,7 +2337,77 @@ data: "style": "dark", "tags": [ "atlas", - "jobs", - "glue" - ] + "testing", + "quality-gate", + "ci" + ], + "templating": { + "list": [ + { + "name": "suite", + "label": "Suite", + "type": "custom", + "query": "ariadne : ariadne,metis : metis,ananke : ananke,atlasbot : atlasbot,pegasus : pegasus|pegasus-health|pegasus_health,soteria : soteria,titan_iac : titan_iac|titan-iac,bstein_home : bstein_home|bstein-home,data_prepper : data_prepper|data-prepper", + "current": { + "text": "All", + "value": "$__all", + "selected": true + }, + "options": [ + { + "text": "ariadne", + "value": "ariadne", + "selected": false + }, + { + "text": "metis", + "value": "metis", + "selected": false + }, + { + "text": "ananke", + "value": "ananke", + "selected": false + }, + { + "text": "atlasbot", + "value": "atlasbot", + "selected": false + }, + { + "text": "pegasus", + "value": "pegasus|pegasus-health|pegasus_health", + "selected": false + }, + { + "text": "soteria", + "value": "soteria", + "selected": false + }, + { + "text": "titan_iac", + "value": "titan_iac|titan-iac", + "selected": false + }, + { + "text": "bstein_home", + "value": "bstein_home|bstein-home", + "selected": false + }, + { + "text": "data_prepper", + "value": "data_prepper|data-prepper", + "selected": false + } + ], + "hide": 0, + "multi": false, + "includeAll": true, + "allValue": "ariadne|metis|ananke|atlasbot|pegasus|soteria|titan_iac|bstein_home|data_prepper", + "refresh": 1, + "sort": 1, + "skipUrlSync": false + } + ] + } } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index a223f180..2dc0c8d0 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -138,7 +138,7 @@ data: }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -213,7 +213,7 @@ data: }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -361,7 +361,7 @@ data: }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -436,7 +436,7 @@ data: }, "links": [ { - "title": "Open atlas-pods dashboard", + "title": "Open Atlas Pods", "url": "/d/atlas-pods", "targetBlank": true } @@ -574,7 +574,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -651,7 +651,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -720,7 +720,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -789,7 +789,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -864,7 +864,7 @@ data: }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -939,7 +939,7 @@ data: }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -1006,7 +1006,7 @@ data: }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -1073,7 +1073,7 @@ data: }, "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -1082,22 +1082,52 @@ data: { "id": 40, "type": "stat", - "title": "Pyrphoros UPS Current", + "title": "UPS Current Load", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 3, - "w": 6, + "h": 6, + "w": 4, "x": 0, - "y": 7 + "y": 12 }, "targets": [ { - "expr": "label_replace(max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100) or on() vector(0), \"metric\", \"Draw\", \"__name__\", \".*\") or label_replace(max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0), \"metric\", \"Runtime\", \"__name__\", \".*\")", "refId": "A", - "legendFormat": "{{metric}}", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100) or on() vector(0)", + "legendFormat": "Pyrphoros Draw (W)", + "instant": true + }, + { + "refId": "B", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", + "legendFormat": "Pyrphoros Discharge", + "instant": true + }, + { + "refId": "C", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)", + "legendFormat": "Pyrphoros Status", + "instant": true + }, + { + "refId": "D", + "expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100) or on() vector(0)", + "legendFormat": "Statera Draw (W)", + "instant": true + }, + { + "refId": "E", + "expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", + "legendFormat": "Statera Discharge", + "instant": true + }, + { + "refId": "F", + "expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)", + "legendFormat": "Statera Status", "instant": true } ], @@ -1123,128 +1153,127 @@ data: "unit": "none", "custom": { "displayMode": "auto" - } + }, + "decimals": 1 }, "overrides": [ { "matcher": { "id": "byName", - "options": "Draw" + "options": "Pyrphoros Draw (W)" }, "properties": [ { "id": "unit", "value": "watt" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Runtime" - }, - "properties": [ - { - "id": "unit", - "value": "s" - } - ] - } - ] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "text": { - "titleSize": 14, - "valueSize": 30 - } - }, - "links": [ - { - "title": "Open atlas-power dashboard", - "url": "/d/atlas-power", - "targetBlank": true - } - ] - }, - { - "id": 144, - "type": "stat", - "title": "Statera UPS Current", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 3, - "w": 6, - "x": 0, - "y": 10 - }, - "targets": [ - { - "expr": "label_replace(max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100) or on() vector(0), \"metric\", \"Draw\", \"__name__\", \".*\") or label_replace(max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0), \"metric\", \"Runtime\", \"__name__\", \".*\")", - "refId": "A", - "legendFormat": "{{metric}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(115, 115, 115, 1)", - "value": null }, { - "color": "green", - "value": 1 + "id": "description", + "value": "Attached node: titan-db" } ] }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [ { "matcher": { "id": "byName", - "options": "Draw" + "options": "Statera Draw (W)" }, "properties": [ { "id": "unit", "value": "watt" + }, + { + "id": "description", + "value": "Attached node: titan-24" } ] }, { "matcher": { "id": "byName", - "options": "Runtime" + "options": "Pyrphoros Discharge" }, "properties": [ { "id": "unit", "value": "s" + }, + { + "id": "description", + "value": "Attached node: titan-db" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Statera Discharge" + }, + "properties": [ + { + "id": "unit", + "value": "s" + }, + { + "id": "description", + "value": "Attached node: titan-24" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pyrphoros Status" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "type": "value", + "options": { + "0": { + "text": "\u26a1 Charging" + }, + "1": { + "text": "\ud83d\udd0b Discharging" + } + } + } + ] + }, + { + "id": "description", + "value": "Attached node: titan-db" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Statera Status" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "type": "value", + "options": { + "0": { + "text": "\u26a1 Charging" + }, + "1": { + "text": "\ud83d\udd0b Discharging" + } + } + } + ] + }, + { + "id": "description", + "value": "Attached node: titan-24" } ] } @@ -1262,18 +1291,17 @@ data: "values": false }, "textMode": "name_and_value", - "text": { - "titleSize": 14, - "valueSize": 30 - } + "orientation": "horizontal", + "wideLayout": true }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Per-UPS live snapshot: current draw, discharge, and charging/discharging status." }, { "id": 41, @@ -1285,9 +1313,9 @@ data: }, "gridPos": { "h": 6, - "w": 6, - "x": 6, - "y": 7 + "w": 4, + "x": 4, + "y": 12 }, "targets": [ { @@ -1314,8 +1342,8 @@ data: }, "options": { "legend": { - "displayMode": "table", - "placement": "right" + "displayMode": "list", + "placement": "bottom" }, "tooltip": { "mode": "multi" @@ -1323,7 +1351,7 @@ data: }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1332,22 +1360,40 @@ data: { "id": 42, "type": "stat", - "title": "Current Enclosure Temperature", + "title": "Current Climate", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { - "h": 3, - "w": 6, - "x": 0, - "y": 13 + "h": 6, + "w": 4, + "x": 8, + "y": 12 }, "targets": [ { - "expr": "label_replace(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_temperature_celsius != 0)) or on() vector(0), \"metric\", \"\u00b0C\", \"__name__\", \".*\") or label_replace(max((max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_temperature_celsius != 0)) * 9 / 5 + 32) or on() vector(0), \"metric\", \"\u00b0F\", \"__name__\", \".*\")", "refId": "A", - "legendFormat": "{{metric}}", + "expr": "max(typhon_temperature_celsius) or on() vector(0)", + "legendFormat": "Tent Temp (\u00b0C)", + "instant": true + }, + { + "refId": "B", + "expr": "max(typhon_vpd_kpa) or on() vector(0)", + "legendFormat": "Tent VPD (kPa)", + "instant": true + }, + { + "refId": "C", + "expr": "max(typhon_relative_humidity_percent) or on() vector(0)", + "legendFormat": "Tent RH (%)", + "instant": true + }, + { + "refId": "D", + "expr": "max((243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))) or on() vector(0)", + "legendFormat": "Dew Point (\u00b0C)", "instant": true } ], @@ -1373,13 +1419,14 @@ data: "unit": "none", "custom": { "displayMode": "auto" - } + }, + "decimals": 2 }, "overrides": [ { "matcher": { "id": "byName", - "options": "\u00b0C" + "options": "Tent Temp (\u00b0C)" }, "properties": [ { @@ -1391,105 +1438,7 @@ data: { "matcher": { "id": "byName", - "options": "\u00b0F" - }, - "properties": [ - { - "id": "unit", - "value": "fahrenheit" - } - ] - } - ] - }, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "text": { - "titleSize": 14, - "valueSize": 30 - } - }, - "links": [ - { - "title": "Open atlas-power dashboard", - "url": "/d/atlas-power", - "targetBlank": true - } - ] - }, - { - "id": 143, - "type": "stat", - "title": "Current Enclosure Climate", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 3, - "w": 6, - "x": 0, - "y": 16 - }, - "targets": [ - { - "expr": "label_replace(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_relative_humidity_percent != 0)) or on() vector(0), \"metric\", \"%RH\", \"__name__\", \".*\") or label_replace(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_vpd_kpa != 0)) or on() vector(0), \"metric\", \"kPa\", \"__name__\", \".*\")", - "refId": "A", - "legendFormat": "{{metric}}", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(115, 115, 115, 1)", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "none", - "custom": { - "displayMode": "auto" - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "%RH" - }, - "properties": [ - { - "id": "unit", - "value": "suffix:%RH" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "kPa" + "options": "Tent VPD (kPa)" }, "properties": [ { @@ -1497,6 +1446,30 @@ data: "value": "suffix:kPa" } ] + }, + { + "matcher": { + "id": "byName", + "options": "Tent RH (%)" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dew Point (\u00b0C)" + }, + "properties": [ + { + "id": "unit", + "value": "celsius" + } + ] } ] }, @@ -1511,304 +1484,93 @@ data: "fields": "", "values": false }, - "textMode": "name_and_value", - "text": { - "titleSize": 14, - "valueSize": 30 - } + "textMode": "value", + "orientation": "horizontal", + "wideLayout": true }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Current tent temperature, humidity, VPD, and dew point." }, { "id": 43, "type": "timeseries", - "title": "Enclosure Climate History", + "title": "Climate History", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 6, - "w": 6, - "x": 6, - "y": 13 + "w": 4, + "x": 12, + "y": 12 }, "targets": [ { "refId": "A", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_temperature_celsius != 0)", - "legendFormat": "C" + "expr": "typhon_temperature_celsius", + "legendFormat": "Temperature (\u00b0C)" }, { "refId": "B", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_relative_humidity_percent != 0)", - "legendFormat": "RH" + "expr": "typhon_relative_humidity_percent", + "legendFormat": "Humidity (%)" }, { "refId": "C", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_vpd_kpa != 0)", - "legendFormat": "P" + "expr": "typhon_vpd_kpa", + "legendFormat": "VPD (kPa)" }, { "refId": "D", - "expr": "(min_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_temperature_celsius != 0)[$__range]) - 0.08)", - "legendFormat": "C bound min" - }, - { - "refId": "E", - "expr": "(max_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_temperature_celsius != 0)[$__range]) + 0.08)", - "legendFormat": "C bound max" - }, - { - "refId": "F", - "expr": "clamp_min((min_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_relative_humidity_percent != 0)[$__range]) - 0.35), 0)", - "legendFormat": "RH bound min" - }, - { - "refId": "G", - "expr": "clamp_max((max_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_relative_humidity_percent != 0)[$__range]) + 0.35), 100)", - "legendFormat": "RH bound max" - }, - { - "refId": "H", - "expr": "clamp_min((min_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_vpd_kpa != 0)[$__range]) - 0.03), 0)", - "legendFormat": "P bound min" - }, - { - "refId": "I", - "expr": "(max_over_time(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_vpd_kpa != 0)[$__range]) + 0.03)", - "legendFormat": "P bound max" + "expr": "(243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))", + "legendFormat": "Dew Point (\u00b0C)" } ], "fieldConfig": { "defaults": { - "unit": "none", - "custom": { - "drawStyle": "line", - "lineInterpolation": "linear", - "lineWidth": 2, - "fillOpacity": 10, - "showPoints": "never", - "spanNulls": true - } + "unit": "celsius" }, "overrides": [ { "matcher": { "id": "byName", - "options": "C" + "options": "Humidity (%)" }, "properties": [ { "id": "unit", - "value": "suffix:\u00b0C" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.axisPlacement", - "value": "left" - }, - { - "id": "custom.axisCenteredZero", - "value": false - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "C bound .*" - }, - "properties": [ - { - "id": "unit", - "value": "suffix:\u00b0C" - }, - { - "id": "custom.axisPlacement", - "value": "left" - }, - { - "id": "custom.axisCenteredZero", - "value": false - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - }, - { - "id": "custom.lineWidth", - "value": 0 - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "transparent" - } + "value": "percent" } ] }, { "matcher": { "id": "byName", - "options": "RH" + "options": "VPD (kPa)" }, "properties": [ { "id": "unit", - "value": "suffix:%" + "value": "none" + }, + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "custom.axisLabel", + "value": "kPa" }, { "id": "decimals", "value": 2 - }, - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "custom.axisCenteredZero", - "value": false - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "RH bound .*" - }, - "properties": [ - { - "id": "unit", - "value": "suffix:%" - }, - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "custom.axisCenteredZero", - "value": false - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - }, - { - "id": "custom.lineWidth", - "value": 0 - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "transparent" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "P" - }, - "properties": [ - { - "id": "unit", - "value": "suffix:kPa" - }, - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.axisCenteredZero", - "value": false - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "P bound .*" - }, - "properties": [ - { - "id": "unit", - "value": "suffix:kPa" - }, - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "custom.axisCenteredZero", - "value": false - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - }, - { - "id": "custom.lineWidth", - "value": 0 - }, - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.showPoints", - "value": "never" - }, - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "transparent" - } } ] } @@ -1825,12 +1587,12 @@ data: }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } ], - "description": "Temperature on left axis, humidity and pressure on right axis with dynamic bound series so small swings remain visible." + "description": "Two-axis chart: tent temperature/humidity/dew point (left axis) and VPD in kPa (right axis)." }, { "id": 140, @@ -1842,15 +1604,33 @@ data: }, "gridPos": { "h": 6, - "w": 6, - "x": 12, - "y": 13 + "w": 4, + "x": 16, + "y": 12 }, "targets": [ { - "expr": "label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"1\"})) or on() vector(0))), \"metric\", \"Outlet\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"2\"})) or on() vector(0))), \"metric\", \"Inlet - In\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"3\"})) or on() vector(0))), \"metric\", \"Inlet - Out\", \"__name__\", \".*\") or label_replace((round(max(max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"4\"})) or on() vector(0))), \"metric\", \"Interior\", \"__name__\", \".*\")", "refId": "A", - "legendFormat": "{{metric}}", + "expr": "round(max(typhon_fan_speed_level{fan_group=\"outlet\"}) or on() vector(0))", + "legendFormat": "Inside Outlet", + "instant": true + }, + { + "refId": "B", + "expr": "round(max(typhon_fan_speed_level{fan_group=\"inside_inlet\"}) or on() vector(0))", + "legendFormat": "Inside Inlet", + "instant": true + }, + { + "refId": "C", + "expr": "round(max(typhon_fan_speed_level{fan_group=\"outside_inlet\"}) or on() vector(0))", + "legendFormat": "Outside Inlet", + "instant": true + }, + { + "refId": "D", + "expr": "round(max(typhon_fan_speed_level{fan_group=\"interior\"}) or on() vector(0))", + "legendFormat": "Interior Fans", "instant": true } ], @@ -1883,56 +1663,7 @@ data: }, "decimals": 0 }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Outlet" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inlet - In" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inlet - Out" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Interior" - }, - "properties": [ - { - "id": "decimals", - "value": 0 - } - ] - } - ] + "overrides": [] }, "options": { "colorMode": "value", @@ -1946,15 +1677,12 @@ data: "values": false }, "textMode": "name_and_value", - "orientation": "vertical", - "wideLayout": false, - "text": { - "valueSize": 26 - } + "orientation": "horizontal", + "wideLayout": true }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -1970,30 +1698,30 @@ data: }, "gridPos": { "h": 6, - "w": 6, - "x": 18, - "y": 13 + "w": 4, + "x": 20, + "y": 12 }, "targets": [ { "refId": "A", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"1\"})", - "legendFormat": "Outlet" + "expr": "typhon_fan_speed_level{fan_group=\"outlet\"}", + "legendFormat": "Inside Outlet" }, { "refId": "B", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"2\"})", - "legendFormat": "Inlet - Inside" + "expr": "typhon_fan_speed_level{fan_group=\"inside_inlet\"}", + "legendFormat": "Inside Inlet" }, { "refId": "C", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"3\"})", - "legendFormat": "Inlet - Outside" + "expr": "typhon_fan_speed_level{fan_group=\"outside_inlet\"}", + "legendFormat": "Outside Inlet" }, { "refId": "D", - "expr": "max without (job,instance,pod,service,endpoint,namespace,controller_name,port_name,fan_group) (typhon_fan_speed_level{port=\"4\"})", - "legendFormat": "Interior" + "expr": "typhon_fan_speed_level{fan_group=\"interior\"}", + "legendFormat": "Interior Fans" } ], "fieldConfig": { @@ -2005,8 +1733,8 @@ data: }, "options": { "legend": { - "displayMode": "table", - "placement": "right" + "displayMode": "list", + "placement": "bottom" }, "tooltip": { "mode": "multi" @@ -2014,7 +1742,7 @@ data: }, "links": [ { - "title": "Open atlas-power dashboard", + "title": "Open Atlas Power", "url": "/d/atlas-power", "targetBlank": true } @@ -2030,9 +1758,9 @@ data: }, "gridPos": { "h": 5, - "w": 8, + "w": 6, "x": 0, - "y": 32 + "y": 7 }, "targets": [ { @@ -2085,7 +1813,7 @@ data: }, "links": [ { - "title": "Open atlas-jobs dashboard", + "title": "Open Atlas Testing", "url": "/d/atlas-jobs", "targetBlank": true } @@ -2117,19 +1845,19 @@ data: "uid": "atlas-vm" }, "gridPos": { - "h": 6, + "h": 5, "w": 6, - "x": 12, + "x": 6, "y": 7 }, "targets": [ { - "expr": "sum(increase(ariadne_task_runs_total[$__interval])) or on() vector(0)", + "expr": "sum(increase(ariadne_task_runs_total[$__interval]))", "refId": "A", "legendFormat": "Attempts" }, { - "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval])) or on() vector(0)", + "expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))", "refId": "B", "legendFormat": "Failures" } @@ -2182,7 +1910,7 @@ data: }, "links": [ { - "title": "Open atlas-jobs dashboard", + "title": "Open Atlas Testing", "url": "/d/atlas-jobs", "targetBlank": true } @@ -2197,66 +1925,56 @@ data: "uid": "atlas-vm" }, "gridPos": { - "h": 6, + "h": 5, "w": 6, - "x": 18, + "x": 12, "y": 7 }, "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "ariadne" }, { "refId": "B", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"metis\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "metis" }, { "refId": "C", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "ananke" }, { "refId": "D", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "atlasbot" }, { "refId": "E", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "lesavka" - }, - { - "refId": "F", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))) > 0) or on() vector(0)", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "pegasus" }, { - "refId": "G", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))) > 0) or on() vector(0)", + "refId": "F", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", "legendFormat": "soteria" }, + { + "refId": "G", + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"titan_iac|titan-iac\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"titan_iac|titan-iac\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", + "legendFormat": "titan_iac" + }, { "refId": "H", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "titan-iac" + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"bstein_home|bstein-home\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein_home|bstein-home\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", + "legendFormat": "bstein_home" }, { "refId": "I", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "bstein-home" - }, - { - "refId": "J", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "arcanagon" - }, - { - "refId": "K", - "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))) > 0) or on() vector(0)", - "legendFormat": "data-prepper" + "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"data_prepper|data-prepper\",status=~\"ok|passed|success\",exported_job=\"platform-quality-ci\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"data_prepper|data-prepper\",exported_job=\"platform-quality-ci\"}[1h]))), 1))", + "legendFormat": "data_prepper" } ], "fieldConfig": { @@ -2290,228 +2008,14 @@ data: }, "links": [ { - "title": "Open atlas-testing dashboard", - "url": "/d/atlas-testing", + "title": "Open Atlas Testing", + "url": "/d/atlas-jobs", "targetBlank": true } ], "timeFrom": "7d", "description": "Per-run interval pass points (0-100) for each software suite over the last 7 days. Points are connected to show trend; missing-run intervals are ignored." }, - { - "id": 142, - "type": "stat", - "title": "Jenkins Last Success (h, newest first)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 32 - }, - "targets": [ - { - "refId": "A", - "expr": "sort((label_replace((sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1), \"run_state\", \"ok\", \"exported_job\", \".*\")) or (label_replace((sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) != 1), \"run_state\", \"bad\", \"exported_job\", \".*\")))", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "decimals": 1, - "min": 0, - "displayName": "${__field.labels.weather_icon} ${__field.labels.exported_job}", - "links": [ - { - "title": "Open Jenkins job", - "url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/", - "targetBlank": true - } - ] - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": ".*run_state=\"ok\".*" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": ".*run_state=\"bad\".*" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - } - ] - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "left", - "orientation": "horizontal", - "wideLayout": true, - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "text": { - "titleSize": 11, - "valueSize": 11 - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } - } - ], - "links": [ - { - "title": "Open atlas-jobs dashboard", - "url": "/d/atlas-jobs", - "targetBlank": true - } - ], - "description": "Top 6 most recent Jenkins successes by age (newest first). Green means last run succeeded; red means last run did not succeed. Use Atlas Jobs for the full list." - }, - { - "id": 243, - "type": "stat", - "title": "Jenkins Last Failure (h, newest first)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 32 - }, - "targets": [ - { - "refId": "A", - "expr": "sort((label_replace((sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1), \"run_state\", \"ok\", \"exported_job\", \".*\")) or (label_replace((sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) != 1), \"run_state\", \"bad\", \"exported_job\", \".*\")))", - "instant": true - } - ], - "fieldConfig": { - "defaults": { - "unit": "h", - "decimals": 1, - "min": 0, - "displayName": "${__field.labels.weather_icon} ${__field.labels.exported_job}", - "links": [ - { - "title": "Open Jenkins job", - "url": "https://ci.bstein.dev/job/${__field.labels.exported_job}/", - "targetBlank": true - } - ] - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": ".*run_state=\"ok\".*" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "green" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": ".*run_state=\"bad\".*" - }, - "properties": [ - { - "id": "color", - "value": { - "mode": "fixed", - "fixedColor": "red" - } - } - ] - } - ] - }, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "left", - "orientation": "horizontal", - "wideLayout": true, - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "name_and_value", - "text": { - "titleSize": 11, - "valueSize": 11 - } - }, - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "asc" - } - } - ], - "links": [ - { - "title": "Open atlas-jobs dashboard", - "url": "/d/atlas-jobs", - "targetBlank": true - } - ], - "description": "Top 6 most recent Jenkins failures by age (newest first). Green means last run succeeded; red means last run did not succeed. Use Atlas Jobs for the full list." - }, { "id": 47, "type": "bargauge", @@ -2522,13 +2026,13 @@ data: }, "gridPos": { "h": 5, - "w": 8, - "x": 16, - "y": 32 + "w": 6, + "x": 18, + "y": 7 }, "targets": [ { - "expr": "sort_desc(max by (namespace, pvc) (((time() - pvc_backup_last_success_timestamp_seconds{driver=\"restic\"}) / 3600) or on(namespace,pvc,volume,driver) ((((pvc_backup_health_reason{driver=\"restic\",reason=~\"missing|no_completed|lookup_failed|unknown_timestamp\"} > 0) * (pvc_backup_count{driver=\"restic\"} > bool 0)) * 999))) or on() vector(0))", + "expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))", "refId": "A", "legendFormat": "{{namespace}}/{{pvc}}", "instant": true @@ -2587,12 +2091,12 @@ data: ], "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } ], - "description": "Backup age in hours computed from last-success timestamps for restic-managed PVCs (nightly target: <=20h green, <40h yellow, <50h orange, >=50h red). PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility." + "description": "Oldest successful backup age in hours by PVC with nightly cadence thresholds (green <=20h, yellow <40h, orange <50h, red >=50h). PVCs with missing or unhealthy backup state are forced to 999h so critical bars stay visible." }, { "id": 30, @@ -2606,7 +2110,7 @@ data: "h": 2, "w": 4, "x": 0, - "y": 19 + "y": 18 }, "targets": [ { @@ -2655,7 +2159,7 @@ data: }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2673,7 +2177,7 @@ data: "h": 2, "w": 4, "x": 8, - "y": 19 + "y": 18 }, "targets": [ { @@ -2760,7 +2264,7 @@ data: }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2778,7 +2282,7 @@ data: "h": 2, "w": 4, "x": 4, - "y": 19 + "y": 18 }, "targets": [ { @@ -2836,7 +2340,7 @@ data: }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2854,7 +2358,7 @@ data: "h": 2, "w": 4, "x": 12, - "y": 19 + "y": 18 }, "targets": [ { @@ -2912,7 +2416,7 @@ data: }, "links": [ { - "title": "Open atlas-mail dashboard", + "title": "Open Atlas Mail", "url": "/d/atlas-mail", "targetBlank": true } @@ -2930,7 +2434,7 @@ data: "h": 2, "w": 4, "x": 16, - "y": 19 + "y": 18 }, "targets": [ { @@ -2993,7 +2497,7 @@ data: "h": 2, "w": 4, "x": 20, - "y": 19 + "y": 18 }, "targets": [ { @@ -3263,7 +2767,7 @@ data: "h": 12, "w": 12, "x": 0, - "y": 44 + "y": 39 }, "targets": [ { @@ -3292,7 +2796,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -3310,7 +2814,7 @@ data: "h": 12, "w": 12, "x": 12, - "y": 44 + "y": 39 }, "targets": [ { @@ -3339,7 +2843,7 @@ data: }, "links": [ { - "title": "Open atlas-nodes dashboard", + "title": "Open Atlas Nodes", "url": "/d/atlas-nodes", "targetBlank": true } @@ -3357,7 +2861,7 @@ data: "h": 10, "w": 12, "x": 0, - "y": 56 + "y": 51 }, "targets": [ { @@ -3394,7 +2898,7 @@ data: "h": 10, "w": 12, "x": 12, - "y": 56 + "y": 51 }, "targets": [ { @@ -3431,7 +2935,7 @@ data: "h": 10, "w": 12, "x": 0, - "y": 66 + "y": 61 }, "targets": [ { @@ -3482,7 +2986,7 @@ data: "h": 10, "w": 12, "x": 12, - "y": 66 + "y": 61 }, "targets": [ { @@ -3563,7 +3067,7 @@ data: "h": 7, "w": 8, "x": 0, - "y": 37 + "y": 32 }, "targets": [ { @@ -3589,7 +3093,7 @@ data: }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3607,7 +3111,7 @@ data: "h": 7, "w": 8, "x": 8, - "y": 37 + "y": 32 }, "targets": [ { @@ -3633,7 +3137,7 @@ data: }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3651,7 +3155,7 @@ data: "h": 7, "w": 8, "x": 16, - "y": 37 + "y": 32 }, "targets": [ { @@ -3677,7 +3181,7 @@ data: }, "links": [ { - "title": "Open atlas-network dashboard", + "title": "Open Atlas Network", "url": "/d/atlas-network", "targetBlank": true } @@ -3695,7 +3199,7 @@ data: "h": 16, "w": 12, "x": 0, - "y": 76 + "y": 71 }, "targets": [ { @@ -3725,7 +3229,7 @@ data: "timeFrom": "30d", "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -3743,7 +3247,7 @@ data: "h": 16, "w": 12, "x": 12, - "y": 76 + "y": 71 }, "targets": [ { @@ -3773,7 +3277,7 @@ data: "timeFrom": "1w", "links": [ { - "title": "Open atlas-storage dashboard", + "title": "Open Atlas Storage", "url": "/d/atlas-storage", "targetBlank": true } @@ -3897,5 +3401,11 @@ data: "to": "now" }, "refresh": "1m", - "links": [] + "links": [ + { + "title": "Atlas Testing (Internal)", + "url": "/d/atlas-jobs", + "targetBlank": false + } + ] } diff --git a/services/monitoring/scripts/platform_quality_suite_probe.sh b/services/monitoring/scripts/platform_quality_suite_probe.sh index 0991d676..df5fff36 100755 --- a/services/monitoring/scripts/platform_quality_suite_probe.sh +++ b/services/monitoring/scripts/platform_quality_suite_probe.sh @@ -74,7 +74,7 @@ failures=0 check_http_suite "atlasbot" "http://atlasbot.comms.svc.cluster.local:8090/health" "200" '"status": "ok"' || failures=$((failures + 1)) check_http_suite "pegasus" "http://pegasus.jellyfin.svc.cluster.local/healthz" "200" || failures=$((failures + 1)) -check_http_suite "bstein-home" "http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local/api/healthz" "200" || failures=$((failures + 1)) +check_http_suite "bstein_home" "http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local/api/healthz" "200" || failures=$((failures + 1)) if [ "${failures}" -gt 0 ]; then printf '[probe] completed with %s suite failure(s)\n' "${failures}" >&2 diff --git a/services/quality/kustomization.yaml b/services/quality/kustomization.yaml new file mode 100644 index 00000000..61ebb2a1 --- /dev/null +++ b/services/quality/kustomization.yaml @@ -0,0 +1,16 @@ +# services/quality/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - sonarqube-serviceaccount.yaml + - quality-vault-serviceaccount.yaml + - sonarqube-pvc.yaml + - sonarqube-service.yaml + - sonarqube-deployment.yaml + - sonarqube-exporter-configmap.yaml + - sonarqube-exporter-service.yaml + - sonarqube-exporter-deployment.yaml + - oauth2-proxy-sonarqube.yaml + - sonarqube-certificate.yaml + - sonarqube-ingress.yaml diff --git a/services/quality/namespace.yaml b/services/quality/namespace.yaml new file mode 100644 index 00000000..fc0b7405 --- /dev/null +++ b/services/quality/namespace.yaml @@ -0,0 +1,6 @@ +# services/quality/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: quality + diff --git a/services/quality/oauth2-proxy-sonarqube.yaml b/services/quality/oauth2-proxy-sonarqube.yaml new file mode 100644 index 00000000..ff958962 --- /dev/null +++ b/services/quality/oauth2-proxy-sonarqube.yaml @@ -0,0 +1,118 @@ +# services/quality/oauth2-proxy-sonarqube.yaml +apiVersion: v1 +kind: Service +metadata: + name: oauth2-proxy-sonarqube + namespace: quality + labels: + app: oauth2-proxy-sonarqube +spec: + ports: + - name: http + port: 80 + targetPort: 4180 + selector: + app: oauth2-proxy-sonarqube + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: oauth2-proxy-sonarqube + namespace: quality + labels: + app: oauth2-proxy-sonarqube +spec: + replicas: 2 + selector: + matchLabels: + app: oauth2-proxy-sonarqube + template: + metadata: + labels: + app: oauth2-proxy-sonarqube + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "quality" + vault.hashicorp.com/agent-inject-secret-oidc-config: "kv/data/atlas/quality/sonarqube-oidc" + vault.hashicorp.com/agent-inject-template-oidc-config: | + {{- with secret "kv/data/atlas/quality/sonarqube-oidc" -}} + client_id = "{{ .Data.data.client_id }}" + client_secret = "{{ .Data.data.client_secret }}" + cookie_secret = "{{ .Data.data.cookie_secret }}" + {{- end -}} + spec: + serviceAccountName: quality-vault-sync + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + - key: hardware + operator: In + values: ["rpi5"] + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + containers: + - name: oauth2-proxy + image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 + imagePullPolicy: IfNotPresent + args: + - --provider=oidc + - --config=/vault/secrets/oidc-config + - --redirect-url=https://quality.bstein.dev/oauth2/callback + - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas + - --scope=openid profile email groups + - --email-domain=* + - --allowed-group=admin + - --allowed-group=/admin + - --allowed-group=dev + - --allowed-group=/dev + - --set-xauthrequest=true + - --pass-access-token=true + - --set-authorization-header=true + - --cookie-secure=true + - --cookie-samesite=lax + - --cookie-refresh=20m + - --cookie-expire=168h + - --insecure-oidc-allow-unverified-email=true + - --upstream=http://sonarqube.quality.svc.cluster.local:9000 + - --http-address=0.0.0.0:4180 + - --skip-provider-button=true + - --approval-prompt=auto + - --skip-jwt-bearer-tokens=true + - --oidc-groups-claim=groups + - --cookie-domain=quality.bstein.dev + ports: + - containerPort: 4180 + name: http + readinessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /ping + port: 4180 + initialDelaySeconds: 20 + periodSeconds: 20 + resources: + requests: + cpu: 25m + memory: 64Mi + limits: + cpu: 250m + memory: 256Mi diff --git a/services/quality/quality-vault-serviceaccount.yaml b/services/quality/quality-vault-serviceaccount.yaml new file mode 100644 index 00000000..dfb2db10 --- /dev/null +++ b/services/quality/quality-vault-serviceaccount.yaml @@ -0,0 +1,7 @@ +# services/quality/quality-vault-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: quality-vault-sync + namespace: quality + diff --git a/services/quality/sonarqube-certificate.yaml b/services/quality/sonarqube-certificate.yaml new file mode 100644 index 00000000..f082bee6 --- /dev/null +++ b/services/quality/sonarqube-certificate.yaml @@ -0,0 +1,14 @@ +# services/quality/sonarqube-certificate.yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: quality-tls + namespace: quality +spec: + secretName: quality-tls + issuerRef: + kind: ClusterIssuer + name: letsencrypt + dnsNames: + - quality.bstein.dev + diff --git a/services/quality/sonarqube-deployment.yaml b/services/quality/sonarqube-deployment.yaml new file mode 100644 index 00000000..2c2f2bdd --- /dev/null +++ b/services/quality/sonarqube-deployment.yaml @@ -0,0 +1,122 @@ +# services/quality/sonarqube-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sonarqube + namespace: quality + labels: + app: sonarqube +spec: + replicas: 1 + selector: + matchLabels: + app: sonarqube + template: + metadata: + labels: + app: sonarqube + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "quality" + vault.hashicorp.com/agent-inject-secret-sonarqube-db-env.sh: "kv/data/atlas/quality/sonarqube-db" + vault.hashicorp.com/agent-inject-template-sonarqube-db-env.sh: | + {{- with secret "kv/data/atlas/quality/sonarqube-db" -}} + export SONAR_JDBC_USERNAME="{{ .Data.data.username }}" + export SONAR_JDBC_PASSWORD="{{ .Data.data.password }}" + {{- end -}} + spec: + serviceAccountName: sonarqube + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + - key: hardware + operator: In + values: ["rpi5"] + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + initContainers: + - name: prepare-volume-permissions + image: busybox:1.36 + command: + - /bin/sh + - -ec + - | + mkdir -p /opt/sonarqube/data /opt/sonarqube/extensions /opt/sonarqube/logs /opt/sonarqube/temp + chown -R 1000:1000 /opt/sonarqube + volumeMounts: + - name: sonarqube-data + mountPath: /opt/sonarqube + containers: + - name: sonarqube + image: sonarqube:lts-community + imagePullPolicy: IfNotPresent + command: + - /bin/bash + - -ec + args: + - | + set -euo pipefail + . /vault/secrets/sonarqube-db-env.sh + exec /opt/sonarqube/docker/entrypoint.sh + env: + - name: SONAR_JDBC_URL + value: jdbc:postgresql://postgres-service.postgres.svc.cluster.local:5432/sonarqube + - name: SONAR_ES_BOOTSTRAP_CHECKS_DISABLE + value: "true" + - name: SONAR_WEB_HOST + value: "0.0.0.0" + ports: + - containerPort: 9000 + name: http + readinessProbe: + httpGet: + path: /api/system/status + port: 9000 + initialDelaySeconds: 60 + timeoutSeconds: 5 + periodSeconds: 10 + failureThreshold: 12 + livenessProbe: + httpGet: + path: /api/system/status + port: 9000 + initialDelaySeconds: 120 + timeoutSeconds: 5 + periodSeconds: 20 + failureThreshold: 6 + resources: + requests: + cpu: 500m + memory: 2Gi + limits: + cpu: "2" + memory: 4Gi + volumeMounts: + - name: sonarqube-data + mountPath: /opt/sonarqube/data + subPath: data + - name: sonarqube-data + mountPath: /opt/sonarqube/extensions + subPath: extensions + - name: sonarqube-data + mountPath: /opt/sonarqube/logs + subPath: logs + - name: sonarqube-data + mountPath: /opt/sonarqube/temp + subPath: temp + volumes: + - name: sonarqube-data + persistentVolumeClaim: + claimName: sonarqube-data diff --git a/services/quality/sonarqube-exporter-configmap.yaml b/services/quality/sonarqube-exporter-configmap.yaml new file mode 100644 index 00000000..8c7dbdaa --- /dev/null +++ b/services/quality/sonarqube-exporter-configmap.yaml @@ -0,0 +1,192 @@ +# services/quality/sonarqube-exporter-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: sonarqube-exporter-script + namespace: quality +data: + exporter.py: | + #!/usr/bin/env python3 + import base64 + import json + import os + import threading + import time + import urllib.error + import urllib.parse + import urllib.request + from http.server import BaseHTTPRequestHandler, HTTPServer + + SONARQUBE_URL = os.getenv("SONARQUBE_URL", "http://sonarqube.quality.svc.cluster.local:9000").strip().rstrip("/") + SONARQUBE_TOKEN = os.getenv("SONARQUBE_TOKEN", "").strip() + SONARQUBE_TIMEOUT_SECONDS = float(os.getenv("SONARQUBE_TIMEOUT_SECONDS", "10")) + SONARQUBE_EXPORTER_PORT = int(os.getenv("SONARQUBE_EXPORTER_PORT", "9798")) + SONARQUBE_EXPORTER_CACHE_TTL_SECONDS = int(os.getenv("SONARQUBE_EXPORTER_CACHE_TTL_SECONDS", "45")) + SONARQUBE_PROJECT_LIMIT = int(os.getenv("SONARQUBE_PROJECT_LIMIT", "200")) + + CACHE_LOCK = threading.Lock() + CACHE_EXPIRES_AT = 0.0 + CACHE_BODY = "" + + + def _escape(value: str) -> str: + return value.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n") + + + def _fetch_json(path: str): + url = f"{SONARQUBE_URL}{path}" + req = urllib.request.Request(url, method="GET") + if SONARQUBE_TOKEN: + encoded = base64.b64encode(f"{SONARQUBE_TOKEN}:".encode("utf-8")).decode("utf-8") + req.add_header("Authorization", f"Basic {encoded}") + try: + with urllib.request.urlopen(req, timeout=SONARQUBE_TIMEOUT_SECONDS) as resp: + payload = json.loads(resp.read().decode("utf-8")) + return payload, "" + except urllib.error.HTTPError as exc: + return None, f"http_{exc.code}" + except Exception as exc: # noqa: BLE001 + return None, exc.__class__.__name__ + + + def _metrics_body() -> str: + lines = [] + now = time.time() + scrape_success = 1 + + lines.append("# HELP sonarqube_exporter_last_scrape_timestamp_seconds Unix timestamp when exporter last refreshed data.") + lines.append("# TYPE sonarqube_exporter_last_scrape_timestamp_seconds gauge") + lines.append(f"sonarqube_exporter_last_scrape_timestamp_seconds {now:.3f}") + + system_payload, system_error = _fetch_json("/api/system/status") + system_status = "unknown" + sonarqube_up = 0 + if isinstance(system_payload, dict): + system_status = str(system_payload.get("status") or "unknown") + elif system_error: + system_status = system_error + scrape_success = 0 + + if system_status.upper() in { + "UP", + "STARTING", + "DB_MIGRATION_NEEDED", + "DB_MIGRATION_RUNNING", + }: + sonarqube_up = 1 + + lines.append("# HELP sonarqube_up SonarQube API reachability and health (1=reachable/healthy-ish, 0=down).") + lines.append("# TYPE sonarqube_up gauge") + lines.append(f"sonarqube_up {sonarqube_up}") + + lines.append("# HELP sonarqube_system_status Current SonarQube system status label.") + lines.append("# TYPE sonarqube_system_status gauge") + lines.append(f'sonarqube_system_status{{status="{_escape(system_status)}"}} 1') + + projects_payload, projects_error = _fetch_json("/api/projects/search?ps=500&p=1") + project_items = [] + projects_total = 0 + if isinstance(projects_payload, dict): + paging = projects_payload.get("paging") or {} + projects_total = int(paging.get("total") or 0) + project_items = list(projects_payload.get("components") or []) + else: + scrape_success = 0 + + lines.append("# HELP sonarqube_projects_total Total discovered SonarQube projects.") + lines.append("# TYPE sonarqube_projects_total gauge") + lines.append(f"sonarqube_projects_total {projects_total}") + + gate_counts = {} + gate_fetch_errors = 0 + inspected = 0 + project_samples = [] + for project in project_items: + if inspected >= SONARQUBE_PROJECT_LIMIT: + break + key = str(project.get("key") or "").strip() + if not key: + continue + inspected += 1 + + gate_payload, gate_error = _fetch_json( + "/api/qualitygates/project_status?projectKey=" + urllib.parse.quote_plus(key) + ) + if not isinstance(gate_payload, dict): + gate_fetch_errors += 1 + continue + project_status = gate_payload.get("projectStatus") or {} + gate_status = str(project_status.get("status") or "UNKNOWN").upper() + gate_counts[gate_status] = gate_counts.get(gate_status, 0) + 1 + is_ok = 1 if gate_status == "OK" else 0 + project_samples.append( + f'sonarqube_project_quality_gate_pass{{project_key="{_escape(key)}",status="{_escape(gate_status)}"}} {is_ok}' + ) + + lines.append("# HELP sonarqube_project_quality_gate_pass Project quality gate pass state (1=OK, 0=not OK).") + lines.append("# TYPE sonarqube_project_quality_gate_pass gauge") + lines.extend(project_samples) + lines.append("# HELP sonarqube_quality_gate_projects_total Number of projects by quality gate status.") + lines.append("# TYPE sonarqube_quality_gate_projects_total gauge") + for status, count in sorted(gate_counts.items()): + lines.append(f'sonarqube_quality_gate_projects_total{{status="{_escape(status)}"}} {count}') + + lines.append("# HELP sonarqube_quality_gate_fetch_errors_total Number of project gate API fetch failures in the last scrape.") + lines.append("# TYPE sonarqube_quality_gate_fetch_errors_total gauge") + lines.append(f"sonarqube_quality_gate_fetch_errors_total {gate_fetch_errors}") + + lines.append("# HELP sonarqube_exporter_scrape_success Exporter scrape success (1=success, 0=partial/error).") + lines.append("# TYPE sonarqube_exporter_scrape_success gauge") + lines.append(f"sonarqube_exporter_scrape_success {scrape_success}") + + if projects_error: + lines.append("# HELP sonarqube_exporter_projects_error Indicates projects API failure on the most recent scrape.") + lines.append("# TYPE sonarqube_exporter_projects_error gauge") + lines.append(f'sonarqube_exporter_projects_error{{error="{_escape(projects_error)}"}} 1') + + return "\n".join(lines) + "\n" + + + def _get_metrics() -> str: + global CACHE_BODY, CACHE_EXPIRES_AT + now = time.time() + with CACHE_LOCK: + if CACHE_BODY and now < CACHE_EXPIRES_AT: + return CACHE_BODY + CACHE_BODY = _metrics_body() + CACHE_EXPIRES_AT = now + max(5, SONARQUBE_EXPORTER_CACHE_TTL_SECONDS) + return CACHE_BODY + + + class Handler(BaseHTTPRequestHandler): + def do_GET(self): # noqa: N802 + if self.path in ("/-/healthy", "/healthz"): + body = b"ok\n" + self.send_response(200) + self.send_header("Content-Type", "text/plain; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + return + if self.path == "/metrics": + body = _get_metrics().encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + return + self.send_response(404) + self.end_headers() + + def log_message(self, fmt, *args): # noqa: A003 + return + + + def main(): + server = HTTPServer(("0.0.0.0", SONARQUBE_EXPORTER_PORT), Handler) + server.serve_forever() + + + if __name__ == "__main__": + main() diff --git a/services/quality/sonarqube-exporter-deployment.yaml b/services/quality/sonarqube-exporter-deployment.yaml new file mode 100644 index 00000000..f794211c --- /dev/null +++ b/services/quality/sonarqube-exporter-deployment.yaml @@ -0,0 +1,97 @@ +# services/quality/sonarqube-exporter-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sonarqube-exporter + namespace: quality + labels: + app: sonarqube-exporter +spec: + replicas: 1 + selector: + matchLabels: + app: sonarqube-exporter + template: + metadata: + labels: + app: sonarqube-exporter + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9798" + prometheus.io/path: /metrics + spec: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + - key: hardware + operator: In + values: ["rpi5"] + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + containers: + - name: exporter + image: python:3.12-slim + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -ec + args: + - | + cp /config/exporter.py /app/exporter.py + python /app/exporter.py + env: + - name: SONARQUBE_URL + value: http://sonarqube.quality.svc.cluster.local:9000 + - name: SONARQUBE_EXPORTER_PORT + value: "9798" + - name: SONARQUBE_EXPORTER_CACHE_TTL_SECONDS + value: "45" + - name: SONARQUBE_PROJECT_LIMIT + value: "250" + ports: + - name: metrics + containerPort: 9798 + readinessProbe: + httpGet: + path: /-/healthy + port: 9798 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /-/healthy + port: 9798 + initialDelaySeconds: 20 + periodSeconds: 20 + resources: + requests: + cpu: 25m + memory: 96Mi + limits: + cpu: 250m + memory: 256Mi + volumeMounts: + - name: exporter-script + mountPath: /config + readOnly: true + - name: app-tmp + mountPath: /app + volumes: + - name: exporter-script + configMap: + name: sonarqube-exporter-script + defaultMode: 493 + - name: app-tmp + emptyDir: {} diff --git a/services/quality/sonarqube-exporter-service.yaml b/services/quality/sonarqube-exporter-service.yaml new file mode 100644 index 00000000..af53a2a2 --- /dev/null +++ b/services/quality/sonarqube-exporter-service.yaml @@ -0,0 +1,19 @@ +# services/quality/sonarqube-exporter-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: sonarqube-exporter + namespace: quality + labels: + app: sonarqube-exporter + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9798" + prometheus.io/path: /metrics +spec: + selector: + app: sonarqube-exporter + ports: + - name: metrics + port: 9798 + targetPort: metrics diff --git a/services/quality/sonarqube-ingress.yaml b/services/quality/sonarqube-ingress.yaml new file mode 100644 index 00000000..4f1f6c2e --- /dev/null +++ b/services/quality/sonarqube-ingress.yaml @@ -0,0 +1,28 @@ +# services/quality/sonarqube-ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: sonarqube + namespace: quality + annotations: + cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + ingressClassName: traefik + rules: + - host: quality.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: oauth2-proxy-sonarqube + port: + number: 80 + tls: + - hosts: + - quality.bstein.dev + secretName: quality-tls + diff --git a/services/quality/sonarqube-pvc.yaml b/services/quality/sonarqube-pvc.yaml new file mode 100644 index 00000000..a62d70d4 --- /dev/null +++ b/services/quality/sonarqube-pvc.yaml @@ -0,0 +1,14 @@ +# services/quality/sonarqube-pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: sonarqube-data + namespace: quality +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi + storageClassName: astreae + diff --git a/services/quality/sonarqube-service.yaml b/services/quality/sonarqube-service.yaml new file mode 100644 index 00000000..96d785de --- /dev/null +++ b/services/quality/sonarqube-service.yaml @@ -0,0 +1,15 @@ +# services/quality/sonarqube-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: sonarqube + namespace: quality + labels: + app: sonarqube +spec: + selector: + app: sonarqube + ports: + - name: http + port: 9000 + targetPort: 9000 diff --git a/services/quality/sonarqube-serviceaccount.yaml b/services/quality/sonarqube-serviceaccount.yaml new file mode 100644 index 00000000..ec2bddfd --- /dev/null +++ b/services/quality/sonarqube-serviceaccount.yaml @@ -0,0 +1,7 @@ +# services/quality/sonarqube-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sonarqube + namespace: quality +