monitoring: refine jobs/overview panels

This commit is contained in:
Brad Stein 2026-01-21 14:30:55 -03:00
parent 2138b93242
commit b0996e9a4f
9 changed files with 446 additions and 236 deletions

View File

@ -70,6 +70,7 @@ WORKER_NODES = [
"titan-13",
"titan-14",
"titan-15",
"titan-16",
"titan-17",
"titan-18",
"titan-19",
@ -333,9 +334,10 @@ GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})"
GLUE_MISSING = f"({GLUE_JOBS} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time)"
GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE}))"
GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE})"
GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED})"
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE})) or on() vector(0)"
GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE}) or on() vector(0)"
GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED}) or on() vector(0)"
ARIADNE_TASK_ERRORS_RANGE = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[$__range]))'
ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))'
ARIADNE_TASK_ERRORS_1H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[1h]))'
ARIADNE_TASK_ERRORS_30D = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[30d]))'
@ -344,10 +346,19 @@ ARIADNE_TASK_RUNS_BY_STATUS_1H = 'sum by (status) (increase(ariadne_task_runs_to
ARIADNE_TASK_ERRORS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[1h]))'
ARIADNE_TASK_ERRORS_24H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[24h]))'
ARIADNE_TASK_RUNS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total[1h]))'
ARIADNE_TASK_ATTEMPTS_1H = 'sum(increase(ariadne_task_runs_total[1h]))'
ARIADNE_TASK_FAILURES_1H = 'sum(increase(ariadne_task_runs_total{status="error"}[1h]))'
ARIADNE_TASK_ATTEMPTS_SERIES = 'sum(increase(ariadne_task_runs_total[$__interval]))'
ARIADNE_TASK_FAILURES_SERIES = 'sum(increase(ariadne_task_runs_total{status="error"}[$__interval]))'
ARIADNE_TASK_WARNINGS_SERIES = (
'sum(increase(ariadne_task_runs_total{status!~"ok|error"}[$__interval])) or on() vector(0)'
)
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600"
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600"
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = (
"(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600"
)
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600"
)
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
ARIADNE_CI_COVERAGE = 'ariadne_ci_coverage_percent{repo="ariadne"}'
ARIADNE_CI_TESTS = 'ariadne_ci_tests_total{repo="ariadne"}'
@ -370,6 +381,8 @@ ONEOFF_JOB_POD_AGE_HOURS = (
'* on(namespace,pod) group_left(phase) '
'max by (namespace,pod,phase) (kube_pod_status_phase{phase=~"Running|Succeeded"})'
)
GLUE_LAST_SUCCESS_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SUCCESS}[$__range])) / 3600"
GLUE_LAST_SCHEDULE_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SCHEDULE}[$__range])) / 3600"
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
GPU_NODE_REGEX = "|".join(GPU_NODES)
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
@ -1032,7 +1045,7 @@ def build_overview():
30,
"Mail Sent (1d)",
'max(postmark_outbound_sent{window="1d"})',
{"h": 3, "w": 5, "x": 0, "y": 8},
{"h": 3, "w": 6, "x": 0, "y": 8},
unit="none",
links=link_to("atlas-mail"),
)
@ -1043,7 +1056,7 @@ def build_overview():
"type": "stat",
"title": "Mail Bounces (1d)",
"datasource": PROM_DS,
"gridPos": {"h": 3, "w": 5, "x": 10, "y": 8},
"gridPos": {"h": 3, "w": 6, "x": 12, "y": 8},
"targets": [
{
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
@ -1089,7 +1102,7 @@ def build_overview():
32,
"Mail Success Rate (1d)",
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
{"h": 3, "w": 5, "x": 5, "y": 8},
{"h": 3, "w": 6, "x": 6, "y": 8},
unit="percent",
thresholds=mail_success_thresholds,
decimals=1,
@ -1101,7 +1114,7 @@ def build_overview():
33,
"Mail Limit Used (30d)",
"max(postmark_sending_limit_used_percent)",
{"h": 3, "w": 5, "x": 15, "y": 8},
{"h": 3, "w": 6, "x": 18, "y": 8},
unit="percent",
thresholds=mail_limit_thresholds,
decimals=1,
@ -1121,7 +1134,7 @@ def build_overview():
panel_id,
title,
expr,
{"h": 5, "w": 6, "x": 6 * idx, "y": 11},
{"h": 3, "w": 6, "x": 6 * idx, "y": 11},
unit=unit,
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
links=link_to("atlas-storage"),
@ -1133,26 +1146,44 @@ def build_overview():
40,
"One-off Job Pods (age hours)",
ONEOFF_JOB_POD_AGE_HOURS,
{"h": 6, "w": 4, "x": 0, "y": 16},
{"h": 6, "w": 6, "x": 0, "y": 14},
unit="h",
instant=True,
legend="{{namespace}}/{{pod}}",
thresholds=age_thresholds,
limit=8,
decimals=2,
)
)
panels.append(
{
"id": 41,
"type": "timeseries",
"title": "Ariadne Attempts vs Failures (1h)",
"title": "Ariadne Attempts / Warnings / Failures",
"datasource": PROM_DS,
"gridPos": {"h": 6, "w": 8, "x": 4, "y": 16},
"gridPos": {"h": 6, "w": 6, "x": 6, "y": 14},
"targets": [
{"expr": ARIADNE_TASK_ATTEMPTS_1H, "refId": "A", "legendFormat": "Attempts"},
{"expr": ARIADNE_TASK_FAILURES_1H, "refId": "B", "legendFormat": "Failures"},
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
{"expr": ARIADNE_TASK_WARNINGS_SERIES, "refId": "B", "legendFormat": "Warnings"},
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "C", "legendFormat": "Failures"},
],
"fieldConfig": {"defaults": {"unit": "none"}, "overrides": []},
"fieldConfig": {
"defaults": {"unit": "none"},
"overrides": [
{
"matcher": {"id": "byName", "options": "Warnings"},
"properties": [
{"id": "color", "value": {"mode": "fixed", "fixedColor": "yellow"}}
],
},
{
"matcher": {"id": "byName", "options": "Failures"},
"properties": [
{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}
],
},
],
},
"options": {
"legend": {"displayMode": "table", "placement": "right"},
"tooltip": {"mode": "multi"},
@ -1164,7 +1195,7 @@ def build_overview():
42,
"Ariadne Test Success Rate",
ARIADNE_TEST_SUCCESS_RATE,
{"h": 6, "w": 8, "x": 12, "y": 16},
{"h": 6, "w": 6, "x": 12, "y": 14},
unit="percent",
legend=None,
legend_display="list",
@ -1175,7 +1206,7 @@ def build_overview():
43,
"Tests with Failures (24h)",
ARIADNE_TEST_FAILURES_24H,
{"h": 6, "w": 4, "x": 20, "y": 16},
{"h": 6, "w": 6, "x": 18, "y": 14},
unit="none",
instant=True,
legend="{{result}}",
@ -1200,7 +1231,7 @@ def build_overview():
11,
"Namespace CPU Share",
namespace_cpu_share_expr(cpu_scope),
{"h": 9, "w": 8, "x": 0, "y": 22},
{"h": 9, "w": 8, "x": 0, "y": 20},
links=namespace_scope_links("namespace_scope_cpu"),
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
)
@ -1210,7 +1241,7 @@ def build_overview():
12,
"Namespace GPU Share",
namespace_gpu_share_expr(gpu_scope),
{"h": 9, "w": 8, "x": 8, "y": 22},
{"h": 9, "w": 8, "x": 8, "y": 20},
links=namespace_scope_links("namespace_scope_gpu"),
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
)
@ -1220,7 +1251,7 @@ def build_overview():
13,
"Namespace RAM Share",
namespace_ram_share_expr(ram_scope),
{"h": 9, "w": 8, "x": 16, "y": 22},
{"h": 9, "w": 8, "x": 16, "y": 20},
links=namespace_scope_links("namespace_scope_ram"),
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
)
@ -1232,7 +1263,7 @@ def build_overview():
14,
"Worker Node CPU",
node_cpu_expr(worker_filter),
{"h": 12, "w": 12, "x": 0, "y": 38},
{"h": 12, "w": 12, "x": 0, "y": 36},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
@ -1246,7 +1277,7 @@ def build_overview():
15,
"Worker Node RAM",
node_mem_expr(worker_filter),
{"h": 12, "w": 12, "x": 12, "y": 38},
{"h": 12, "w": 12, "x": 12, "y": 36},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
@ -1261,7 +1292,7 @@ def build_overview():
16,
"Control plane CPU",
node_cpu_expr(CONTROL_ALL_REGEX),
{"h": 10, "w": 12, "x": 0, "y": 50},
{"h": 10, "w": 12, "x": 0, "y": 48},
unit="percent",
legend="{{node}}",
legend_display="table",
@ -1273,7 +1304,7 @@ def build_overview():
17,
"Control plane RAM",
node_mem_expr(CONTROL_ALL_REGEX),
{"h": 10, "w": 12, "x": 12, "y": 50},
{"h": 10, "w": 12, "x": 12, "y": 48},
unit="percent",
legend="{{node}}",
legend_display="table",
@ -1286,7 +1317,7 @@ def build_overview():
28,
"Node Pod Share",
'(sum(kube_pod_info{pod!="" , node!=""}) by (node) / clamp_min(sum(kube_pod_info{pod!="" , node!=""}), 1)) * 100',
{"h": 10, "w": 12, "x": 0, "y": 60},
{"h": 10, "w": 12, "x": 0, "y": 58},
)
)
panels.append(
@ -1294,7 +1325,7 @@ def build_overview():
29,
"Top Nodes by Pod Count",
'topk(12, sum(kube_pod_info{pod!="" , node!=""}) by (node))',
{"h": 10, "w": 12, "x": 12, "y": 60},
{"h": 10, "w": 12, "x": 12, "y": 58},
unit="none",
limit=12,
decimals=0,
@ -1316,7 +1347,7 @@ def build_overview():
18,
"Cluster Ingress Throughput",
NET_INGRESS_EXPR,
{"h": 7, "w": 8, "x": 0, "y": 31},
{"h": 7, "w": 8, "x": 0, "y": 29},
unit="Bps",
legend="Ingress (Traefik)",
legend_display="list",
@ -1329,7 +1360,7 @@ def build_overview():
19,
"Cluster Egress Throughput",
NET_EGRESS_EXPR,
{"h": 7, "w": 8, "x": 8, "y": 31},
{"h": 7, "w": 8, "x": 8, "y": 29},
unit="Bps",
legend="Egress (Traefik)",
legend_display="list",
@ -1342,7 +1373,7 @@ def build_overview():
20,
"Intra-Cluster Throughput",
NET_INTERNAL_EXPR,
{"h": 7, "w": 8, "x": 16, "y": 31},
{"h": 7, "w": 8, "x": 16, "y": 29},
unit="Bps",
legend="Internal traffic",
legend_display="list",
@ -1356,7 +1387,7 @@ def build_overview():
21,
"Root Filesystem Usage",
root_usage_expr(),
{"h": 16, "w": 12, "x": 0, "y": 70},
{"h": 16, "w": 12, "x": 0, "y": 68},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
@ -1371,7 +1402,7 @@ def build_overview():
22,
"Nodes Closest to Full Root Disks",
f"topk(12, {root_usage_expr()})",
{"h": 16, "w": 12, "x": 12, "y": 70},
{"h": 16, "w": 12, "x": 12, "y": 68},
unit="percent",
thresholds=PERCENT_THRESHOLDS,
links=link_to("atlas-storage"),
@ -2300,9 +2331,9 @@ def build_jobs_dashboard():
panels.append(
bargauge_panel(
1,
"Ariadne Task Errors (24h)",
ARIADNE_TASK_ERRORS_24H,
{"h": 7, "w": 6, "x": 0, "y": 0},
"Ariadne Task Errors (range)",
ARIADNE_TASK_ERRORS_RANGE,
{"h": 7, "w": 8, "x": 0, "y": 0},
unit="none",
instant=True,
legend="{{task}}",
@ -2313,14 +2344,31 @@ def build_jobs_dashboard():
{
"id": 2,
"type": "timeseries",
"title": "Ariadne Attempts vs Failures (1h)",
"title": "Ariadne Attempts / Warnings / Failures",
"datasource": PROM_DS,
"gridPos": {"h": 7, "w": 12, "x": 6, "y": 0},
"gridPos": {"h": 7, "w": 8, "x": 8, "y": 0},
"targets": [
{"expr": ARIADNE_TASK_ATTEMPTS_1H, "refId": "A", "legendFormat": "Attempts"},
{"expr": ARIADNE_TASK_FAILURES_1H, "refId": "B", "legendFormat": "Failures"},
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
{"expr": ARIADNE_TASK_WARNINGS_SERIES, "refId": "B", "legendFormat": "Warnings"},
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "C", "legendFormat": "Failures"},
],
"fieldConfig": {"defaults": {"unit": "none"}, "overrides": []},
"fieldConfig": {
"defaults": {"unit": "none"},
"overrides": [
{
"matcher": {"id": "byName", "options": "Warnings"},
"properties": [
{"id": "color", "value": {"mode": "fixed", "fixedColor": "yellow"}}
],
},
{
"matcher": {"id": "byName", "options": "Failures"},
"properties": [
{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}
],
},
],
},
"options": {
"legend": {"displayMode": "table", "placement": "right"},
"tooltip": {"mode": "multi"},
@ -2332,12 +2380,13 @@ def build_jobs_dashboard():
3,
"One-off Job Pods (age hours)",
ONEOFF_JOB_POD_AGE_HOURS,
{"h": 7, "w": 6, "x": 18, "y": 0},
{"h": 7, "w": 8, "x": 16, "y": 0},
unit="h",
instant=True,
legend="{{namespace}}/{{pod}}",
thresholds=age_thresholds,
limit=12,
decimals=2,
)
)
panels.append(
@ -2407,48 +2456,53 @@ def build_jobs_dashboard():
bargauge_panel(
10,
"Ariadne Schedule Last Error (hours ago)",
ARIADNE_SCHEDULE_LAST_ERROR_HOURS,
{"h": 8, "w": 12, "x": 0, "y": 11},
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS,
{"h": 6, "w": 12, "x": 0, "y": 17},
unit="h",
instant=True,
legend="{{task}}",
thresholds=recent_error_thresholds,
sort_order="asc",
decimals=2,
)
)
panels.append(
bargauge_panel(
11,
"Ariadne Schedule Last Success (hours ago)",
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS,
{"h": 8, "w": 12, "x": 12, "y": 11},
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS,
{"h": 6, "w": 12, "x": 12, "y": 17},
unit="h",
instant=True,
legend="{{task}}",
thresholds=age_thresholds,
decimals=2,
)
)
panels.append(
bargauge_panel(
12,
"Glue Jobs Last Success (hours ago)",
GLUE_LAST_SUCCESS_AGE_HOURS,
{"h": 8, "w": 12, "x": 0, "y": 19},
GLUE_LAST_SUCCESS_RANGE_HOURS,
{"h": 6, "w": 12, "x": 0, "y": 23},
unit="h",
instant=True,
legend="{{namespace}}/{{cronjob}}",
thresholds=age_thresholds,
decimals=2,
)
)
panels.append(
bargauge_panel(
13,
"Glue Jobs Last Schedule (hours ago)",
GLUE_LAST_SCHEDULE_AGE_HOURS,
{"h": 8, "w": 12, "x": 12, "y": 19},
GLUE_LAST_SCHEDULE_RANGE_HOURS,
{"h": 6, "w": 12, "x": 12, "y": 23},
unit="h",
instant=True,
legend="{{namespace}}/{{cronjob}}",
thresholds=age_thresholds,
decimals=2,
)
)
panels.append(
@ -2456,7 +2510,7 @@ def build_jobs_dashboard():
14,
"Ariadne Task Errors (1h)",
ARIADNE_TASK_ERRORS_1H,
{"h": 8, "w": 12, "x": 0, "y": 27},
{"h": 6, "w": 12, "x": 0, "y": 29},
unit="none",
instant=True,
legend="{{task}}",
@ -2468,7 +2522,7 @@ def build_jobs_dashboard():
15,
"Ariadne Task Errors (30d)",
ARIADNE_TASK_ERRORS_30D,
{"h": 8, "w": 12, "x": 12, "y": 27},
{"h": 6, "w": 12, "x": 12, "y": 29},
unit="none",
instant=True,
legend="{{task}}",
@ -2480,7 +2534,7 @@ def build_jobs_dashboard():
16,
"Ariadne Access Requests",
ARIADNE_ACCESS_REQUESTS,
{"h": 6, "w": 8, "x": 0, "y": 35},
{"h": 6, "w": 8, "x": 0, "y": 11},
unit="none",
instant=True,
legend="{{status}}",
@ -2491,7 +2545,7 @@ def build_jobs_dashboard():
17,
"Ariadne CI Coverage (%)",
ARIADNE_CI_COVERAGE,
{"h": 6, "w": 4, "x": 8, "y": 35},
{"h": 6, "w": 4, "x": 8, "y": 11},
unit="percent",
decimals=1,
instant=True,
@ -2503,7 +2557,7 @@ def build_jobs_dashboard():
18,
"Ariadne CI Tests (latest)",
ARIADNE_CI_TESTS,
{"h": 6, "w": 12, "x": 12, "y": 35},
{"h": 6, "w": 12, "x": 12, "y": 11},
unit="none",
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
instant=True,

View File

@ -7,20 +7,20 @@
{
"id": 1,
"type": "bargauge",
"title": "Ariadne Task Errors (24h)",
"title": "Ariadne Task Errors (range)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 7,
"w": 6,
"w": 8,
"x": 0,
"y": 0
},
"targets": [
{
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[24h]))",
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range]))",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -81,26 +81,31 @@
{
"id": 2,
"type": "timeseries",
"title": "Ariadne Attempts vs Failures (1h)",
"title": "Ariadne Attempts / Warnings / Failures",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 7,
"w": 12,
"x": 6,
"w": 8,
"x": 8,
"y": 0
},
"targets": [
{
"expr": "sum(increase(ariadne_task_runs_total[1h]))",
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
"refId": "A",
"legendFormat": "Attempts"
},
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))",
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
"refId": "B",
"legendFormat": "Warnings"
},
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
"refId": "C",
"legendFormat": "Failures"
}
],
@ -108,7 +113,38 @@
"defaults": {
"unit": "none"
},
"overrides": []
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Warnings"
},
"properties": [
{
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "yellow"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Failures"
},
"properties": [
{
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "red"
}
}
]
}
]
},
"options": {
"legend": {
@ -130,8 +166,8 @@
},
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"w": 8,
"x": 16,
"y": 0
},
"targets": [
@ -167,7 +203,8 @@
"value": 48
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -216,7 +253,7 @@
},
"targets": [
{
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)))",
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)",
"refId": "A"
}
],
@ -284,7 +321,7 @@
},
"targets": [
{
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))",
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) or on() vector(0)",
"refId": "A"
}
],
@ -344,7 +381,7 @@
},
"targets": [
{
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)",
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1) or on() vector(0)",
"refId": "A"
}
],
@ -577,14 +614,14 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 0,
"y": 11
"y": 17
},
"targets": [
{
"expr": "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600",
"expr": "(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -615,7 +652,8 @@
"value": 24
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -637,7 +675,7 @@
"fields": [
"Value"
],
"order": "desc"
"order": "asc"
}
}
]
@ -651,14 +689,14 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 12,
"y": 11
"y": 17
},
"targets": [
{
"expr": "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600",
"expr": "(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -689,7 +727,8 @@
"value": 48
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -725,14 +764,14 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 0,
"y": 19
"y": 23
},
"targets": [
{
"expr": "((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600",
"expr": "(time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
"refId": "A",
"legendFormat": "{{namespace}}/{{cronjob}}",
"instant": true
@ -763,7 +802,8 @@
"value": 48
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -799,14 +839,14 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 12,
"y": 19
"y": 23
},
"targets": [
{
"expr": "((time() - (kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600",
"expr": "(time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
"refId": "A",
"legendFormat": "{{namespace}}/{{cronjob}}",
"instant": true
@ -837,7 +877,8 @@
"value": 48
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -873,10 +914,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 0,
"y": 27
"y": 29
},
"targets": [
{
@ -947,10 +988,10 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 12,
"y": 27
"y": 29
},
"targets": [
{
@ -1024,7 +1065,7 @@
"h": 6,
"w": 8,
"x": 0,
"y": 35
"y": 11
},
"targets": [
{
@ -1098,7 +1139,7 @@
"h": 6,
"w": 4,
"x": 8,
"y": 35
"y": 11
},
"targets": [
{
@ -1161,7 +1202,7 @@
"h": 6,
"w": 12,
"x": 12,
"y": 35
"y": 11
},
"targets": [
{

View File

@ -20,7 +20,7 @@
},
"targets": [
{
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
"refId": "A"
}
],
@ -46,7 +46,7 @@
"unit": "none",
"custom": {
"displayMode": "auto",
"valueSuffix": "/19"
"valueSuffix": "/20"
}
},
"overrides": []

View File

@ -449,14 +449,14 @@
},
"targets": [
{
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"min": 0,
"max": 19,
"max": 20,
"thresholds": {
"mode": "absolute",
"steps": [
@ -466,15 +466,15 @@
},
{
"color": "orange",
"value": 17
},
{
"color": "yellow",
"value": 18
},
{
"color": "green",
"color": "yellow",
"value": 19
},
{
"color": "green",
"value": 20
}
]
}
@ -796,7 +796,7 @@
},
"gridPos": {
"h": 3,
"w": 5,
"w": 6,
"x": 0,
"y": 8
},
@ -863,8 +863,8 @@
},
"gridPos": {
"h": 3,
"w": 5,
"x": 10,
"w": 6,
"x": 12,
"y": 8
},
"targets": [
@ -968,8 +968,8 @@
},
"gridPos": {
"h": 3,
"w": 5,
"x": 5,
"w": 6,
"x": 6,
"y": 8
},
"targets": [
@ -1044,8 +1044,8 @@
},
"gridPos": {
"h": 3,
"w": 5,
"x": 15,
"w": 6,
"x": 18,
"y": 8
},
"targets": [
@ -1119,7 +1119,7 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"h": 3,
"w": 6,
"x": 0,
"y": 11
@ -1194,7 +1194,7 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"h": 3,
"w": 6,
"x": 6,
"y": 11
@ -1269,7 +1269,7 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"h": 3,
"w": 6,
"x": 12,
"y": 11
@ -1336,7 +1336,7 @@
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"h": 3,
"w": 6,
"x": 18,
"y": 11
@ -1404,9 +1404,9 @@
},
"gridPos": {
"h": 6,
"w": 4,
"w": 6,
"x": 0,
"y": 16
"y": 14
},
"targets": [
{
@ -1441,7 +1441,8 @@
"value": 48
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -1477,26 +1478,31 @@
{
"id": 41,
"type": "timeseries",
"title": "Ariadne Attempts vs Failures (1h)",
"title": "Ariadne Attempts / Warnings / Failures",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"w": 8,
"x": 4,
"y": 16
"w": 6,
"x": 6,
"y": 14
},
"targets": [
{
"expr": "sum(increase(ariadne_task_runs_total[1h]))",
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
"refId": "A",
"legendFormat": "Attempts"
},
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))",
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
"refId": "B",
"legendFormat": "Warnings"
},
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
"refId": "C",
"legendFormat": "Failures"
}
],
@ -1504,7 +1510,38 @@
"defaults": {
"unit": "none"
},
"overrides": []
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Warnings"
},
"properties": [
{
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "yellow"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Failures"
},
"properties": [
{
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "red"
}
}
]
}
]
},
"options": {
"legend": {
@ -1526,9 +1563,9 @@
},
"gridPos": {
"h": 6,
"w": 8,
"w": 6,
"x": 12,
"y": 16
"y": 14
},
"targets": [
{
@ -1562,9 +1599,9 @@
},
"gridPos": {
"h": 6,
"w": 4,
"x": 20,
"y": 16
"w": 6,
"x": 18,
"y": 14
},
"targets": [
{
@ -1638,7 +1675,7 @@
"h": 9,
"w": 8,
"x": 0,
"y": 22
"y": 20
},
"targets": [
{
@ -1707,7 +1744,7 @@
"h": 9,
"w": 8,
"x": 8,
"y": 22
"y": 20
},
"targets": [
{
@ -1776,7 +1813,7 @@
"h": 9,
"w": 8,
"x": 16,
"y": 22
"y": 20
},
"targets": [
{
@ -1845,11 +1882,11 @@
"h": 12,
"w": 12,
"x": 0,
"y": 38
"y": 36
},
"targets": [
{
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A",
"legendFormat": "{{node}}"
}
@ -1892,11 +1929,11 @@
"h": 12,
"w": 12,
"x": 12,
"y": 38
"y": 36
},
"targets": [
{
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A",
"legendFormat": "{{node}}"
}
@ -1939,7 +1976,7 @@
"h": 10,
"w": 12,
"x": 0,
"y": 50
"y": 48
},
"targets": [
{
@ -1976,7 +2013,7 @@
"h": 10,
"w": 12,
"x": 12,
"y": 50
"y": 48
},
"targets": [
{
@ -2013,7 +2050,7 @@
"h": 10,
"w": 12,
"x": 0,
"y": 60
"y": 58
},
"targets": [
{
@ -2064,7 +2101,7 @@
"h": 10,
"w": 12,
"x": 12,
"y": 60
"y": 58
},
"targets": [
{
@ -2145,7 +2182,7 @@
"h": 7,
"w": 8,
"x": 0,
"y": 31
"y": 29
},
"targets": [
{
@ -2189,7 +2226,7 @@
"h": 7,
"w": 8,
"x": 8,
"y": 31
"y": 29
},
"targets": [
{
@ -2233,7 +2270,7 @@
"h": 7,
"w": 8,
"x": 16,
"y": 31
"y": 29
},
"targets": [
{
@ -2277,7 +2314,7 @@
"h": 16,
"w": 12,
"x": 0,
"y": 70
"y": 68
},
"targets": [
{
@ -2325,7 +2362,7 @@
"h": 16,
"w": 12,
"x": 12,
"y": 70
"y": 68
},
"targets": [
{

View File

@ -520,7 +520,7 @@
},
"targets": [
{
"expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)))))",
"expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)))))",
"refId": "A",
"instant": true,
"format": "table"

View File

@ -16,20 +16,20 @@ data:
{
"id": 1,
"type": "bargauge",
"title": "Ariadne Task Errors (24h)",
"title": "Ariadne Task Errors (range)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 7,
"w": 6,
"w": 8,
"x": 0,
"y": 0
},
"targets": [
{
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[24h]))",
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range]))",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -90,26 +90,31 @@ data:
{
"id": 2,
"type": "timeseries",
"title": "Ariadne Attempts vs Failures (1h)",
"title": "Ariadne Attempts / Warnings / Failures",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 7,
"w": 12,
"x": 6,
"w": 8,
"x": 8,
"y": 0
},
"targets": [
{
"expr": "sum(increase(ariadne_task_runs_total[1h]))",
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
"refId": "A",
"legendFormat": "Attempts"
},
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))",
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
"refId": "B",
"legendFormat": "Warnings"
},
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
"refId": "C",
"legendFormat": "Failures"
}
],
@ -117,7 +122,38 @@ data:
"defaults": {
"unit": "none"
},
"overrides": []
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Warnings"
},
"properties": [
{
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "yellow"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Failures"
},
"properties": [
{
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "red"
}
}
]
}
]
},
"options": {
"legend": {
@ -139,8 +175,8 @@ data:
},
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"w": 8,
"x": 16,
"y": 0
},
"targets": [
@ -176,7 +212,8 @@ data:
"value": 48
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -225,7 +262,7 @@ data:
},
"targets": [
{
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)))",
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)",
"refId": "A"
}
],
@ -293,7 +330,7 @@ data:
},
"targets": [
{
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))",
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) or on() vector(0)",
"refId": "A"
}
],
@ -353,7 +390,7 @@ data:
},
"targets": [
{
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)",
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1) or on() vector(0)",
"refId": "A"
}
],
@ -586,14 +623,14 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 0,
"y": 11
"y": 17
},
"targets": [
{
"expr": "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600",
"expr": "(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -624,7 +661,8 @@ data:
"value": 24
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -646,7 +684,7 @@ data:
"fields": [
"Value"
],
"order": "desc"
"order": "asc"
}
}
]
@ -660,14 +698,14 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 12,
"y": 11
"y": 17
},
"targets": [
{
"expr": "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600",
"expr": "(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -698,7 +736,8 @@ data:
"value": 48
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -734,14 +773,14 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 0,
"y": 19
"y": 23
},
"targets": [
{
"expr": "((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600",
"expr": "(time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
"refId": "A",
"legendFormat": "{{namespace}}/{{cronjob}}",
"instant": true
@ -772,7 +811,8 @@ data:
"value": 48
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -808,14 +848,14 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 12,
"y": 19
"y": 23
},
"targets": [
{
"expr": "((time() - (kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600",
"expr": "(time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
"refId": "A",
"legendFormat": "{{namespace}}/{{cronjob}}",
"instant": true
@ -846,7 +886,8 @@ data:
"value": 48
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -882,10 +923,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 0,
"y": 27
"y": 29
},
"targets": [
{
@ -956,10 +997,10 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"h": 6,
"w": 12,
"x": 12,
"y": 27
"y": 29
},
"targets": [
{
@ -1033,7 +1074,7 @@ data:
"h": 6,
"w": 8,
"x": 0,
"y": 35
"y": 11
},
"targets": [
{
@ -1107,7 +1148,7 @@ data:
"h": 6,
"w": 4,
"x": 8,
"y": 35
"y": 11
},
"targets": [
{
@ -1170,7 +1211,7 @@ data:
"h": 6,
"w": 12,
"x": 12,
"y": 35
"y": 11
},
"targets": [
{

View File

@ -29,7 +29,7 @@ data:
},
"targets": [
{
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
"refId": "A"
}
],
@ -55,7 +55,7 @@ data:
"unit": "none",
"custom": {
"displayMode": "auto",
"valueSuffix": "/19"
"valueSuffix": "/20"
}
},
"overrides": []

View File

@ -458,14 +458,14 @@ data:
},
"targets": [
{
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"min": 0,
"max": 19,
"max": 20,
"thresholds": {
"mode": "absolute",
"steps": [
@ -475,15 +475,15 @@ data:
},
{
"color": "orange",
"value": 17
},
{
"color": "yellow",
"value": 18
},
{
"color": "green",
"color": "yellow",
"value": 19
},
{
"color": "green",
"value": 20
}
]
}
@ -805,7 +805,7 @@ data:
},
"gridPos": {
"h": 3,
"w": 5,
"w": 6,
"x": 0,
"y": 8
},
@ -872,8 +872,8 @@ data:
},
"gridPos": {
"h": 3,
"w": 5,
"x": 10,
"w": 6,
"x": 12,
"y": 8
},
"targets": [
@ -977,8 +977,8 @@ data:
},
"gridPos": {
"h": 3,
"w": 5,
"x": 5,
"w": 6,
"x": 6,
"y": 8
},
"targets": [
@ -1053,8 +1053,8 @@ data:
},
"gridPos": {
"h": 3,
"w": 5,
"x": 15,
"w": 6,
"x": 18,
"y": 8
},
"targets": [
@ -1128,7 +1128,7 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"h": 3,
"w": 6,
"x": 0,
"y": 11
@ -1203,7 +1203,7 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"h": 3,
"w": 6,
"x": 6,
"y": 11
@ -1278,7 +1278,7 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"h": 3,
"w": 6,
"x": 12,
"y": 11
@ -1345,7 +1345,7 @@ data:
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"h": 3,
"w": 6,
"x": 18,
"y": 11
@ -1413,9 +1413,9 @@ data:
},
"gridPos": {
"h": 6,
"w": 4,
"w": 6,
"x": 0,
"y": 16
"y": 14
},
"targets": [
{
@ -1450,7 +1450,8 @@ data:
"value": 48
}
]
}
},
"decimals": 2
},
"overrides": []
},
@ -1486,26 +1487,31 @@ data:
{
"id": 41,
"type": "timeseries",
"title": "Ariadne Attempts vs Failures (1h)",
"title": "Ariadne Attempts / Warnings / Failures",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
"w": 8,
"x": 4,
"y": 16
"w": 6,
"x": 6,
"y": 14
},
"targets": [
{
"expr": "sum(increase(ariadne_task_runs_total[1h]))",
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
"refId": "A",
"legendFormat": "Attempts"
},
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))",
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
"refId": "B",
"legendFormat": "Warnings"
},
{
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
"refId": "C",
"legendFormat": "Failures"
}
],
@ -1513,7 +1519,38 @@ data:
"defaults": {
"unit": "none"
},
"overrides": []
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Warnings"
},
"properties": [
{
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "yellow"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Failures"
},
"properties": [
{
"id": "color",
"value": {
"mode": "fixed",
"fixedColor": "red"
}
}
]
}
]
},
"options": {
"legend": {
@ -1535,9 +1572,9 @@ data:
},
"gridPos": {
"h": 6,
"w": 8,
"w": 6,
"x": 12,
"y": 16
"y": 14
},
"targets": [
{
@ -1571,9 +1608,9 @@ data:
},
"gridPos": {
"h": 6,
"w": 4,
"x": 20,
"y": 16
"w": 6,
"x": 18,
"y": 14
},
"targets": [
{
@ -1647,7 +1684,7 @@ data:
"h": 9,
"w": 8,
"x": 0,
"y": 22
"y": 20
},
"targets": [
{
@ -1716,7 +1753,7 @@ data:
"h": 9,
"w": 8,
"x": 8,
"y": 22
"y": 20
},
"targets": [
{
@ -1785,7 +1822,7 @@ data:
"h": 9,
"w": 8,
"x": 16,
"y": 22
"y": 20
},
"targets": [
{
@ -1854,11 +1891,11 @@ data:
"h": 12,
"w": 12,
"x": 0,
"y": 38
"y": 36
},
"targets": [
{
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A",
"legendFormat": "{{node}}"
}
@ -1901,11 +1938,11 @@ data:
"h": 12,
"w": 12,
"x": 12,
"y": 38
"y": 36
},
"targets": [
{
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A",
"legendFormat": "{{node}}"
}
@ -1948,7 +1985,7 @@ data:
"h": 10,
"w": 12,
"x": 0,
"y": 50
"y": 48
},
"targets": [
{
@ -1985,7 +2022,7 @@ data:
"h": 10,
"w": 12,
"x": 12,
"y": 50
"y": 48
},
"targets": [
{
@ -2022,7 +2059,7 @@ data:
"h": 10,
"w": 12,
"x": 0,
"y": 60
"y": 58
},
"targets": [
{
@ -2073,7 +2110,7 @@ data:
"h": 10,
"w": 12,
"x": 12,
"y": 60
"y": 58
},
"targets": [
{
@ -2154,7 +2191,7 @@ data:
"h": 7,
"w": 8,
"x": 0,
"y": 31
"y": 29
},
"targets": [
{
@ -2198,7 +2235,7 @@ data:
"h": 7,
"w": 8,
"x": 8,
"y": 31
"y": 29
},
"targets": [
{
@ -2242,7 +2279,7 @@ data:
"h": 7,
"w": 8,
"x": 16,
"y": 31
"y": 29
},
"targets": [
{
@ -2286,7 +2323,7 @@ data:
"h": 16,
"w": 12,
"x": 0,
"y": 70
"y": 68
},
"targets": [
{
@ -2334,7 +2371,7 @@ data:
"h": 16,
"w": 12,
"x": 12,
"y": 70
"y": 68
},
"targets": [
{

View File

@ -529,7 +529,7 @@ data:
},
"targets": [
{
"expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)))))",
"expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)))))",
"refId": "A",
"instant": true,
"format": "table"