monitoring: refine jobs/overview panels
This commit is contained in:
parent
2138b93242
commit
b0996e9a4f
@ -70,6 +70,7 @@ WORKER_NODES = [
|
||||
"titan-13",
|
||||
"titan-14",
|
||||
"titan-15",
|
||||
"titan-16",
|
||||
"titan-17",
|
||||
"titan-18",
|
||||
"titan-19",
|
||||
@ -333,9 +334,10 @@ GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})"
|
||||
GLUE_MISSING = f"({GLUE_JOBS} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time)"
|
||||
GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
|
||||
GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
|
||||
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE}))"
|
||||
GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE})"
|
||||
GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED})"
|
||||
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE})) or on() vector(0)"
|
||||
GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE}) or on() vector(0)"
|
||||
GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED}) or on() vector(0)"
|
||||
ARIADNE_TASK_ERRORS_RANGE = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[$__range]))'
|
||||
ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))'
|
||||
ARIADNE_TASK_ERRORS_1H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[1h]))'
|
||||
ARIADNE_TASK_ERRORS_30D = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[30d]))'
|
||||
@ -344,10 +346,19 @@ ARIADNE_TASK_RUNS_BY_STATUS_1H = 'sum by (status) (increase(ariadne_task_runs_to
|
||||
ARIADNE_TASK_ERRORS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[1h]))'
|
||||
ARIADNE_TASK_ERRORS_24H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[24h]))'
|
||||
ARIADNE_TASK_RUNS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total[1h]))'
|
||||
ARIADNE_TASK_ATTEMPTS_1H = 'sum(increase(ariadne_task_runs_total[1h]))'
|
||||
ARIADNE_TASK_FAILURES_1H = 'sum(increase(ariadne_task_runs_total{status="error"}[1h]))'
|
||||
ARIADNE_TASK_ATTEMPTS_SERIES = 'sum(increase(ariadne_task_runs_total[$__interval]))'
|
||||
ARIADNE_TASK_FAILURES_SERIES = 'sum(increase(ariadne_task_runs_total{status="error"}[$__interval]))'
|
||||
ARIADNE_TASK_WARNINGS_SERIES = (
|
||||
'sum(increase(ariadne_task_runs_total{status!~"ok|error"}[$__interval])) or on() vector(0)'
|
||||
)
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600"
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600"
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = (
|
||||
"(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600"
|
||||
)
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
|
||||
"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600"
|
||||
)
|
||||
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
||||
ARIADNE_CI_COVERAGE = 'ariadne_ci_coverage_percent{repo="ariadne"}'
|
||||
ARIADNE_CI_TESTS = 'ariadne_ci_tests_total{repo="ariadne"}'
|
||||
@ -370,6 +381,8 @@ ONEOFF_JOB_POD_AGE_HOURS = (
|
||||
'* on(namespace,pod) group_left(phase) '
|
||||
'max by (namespace,pod,phase) (kube_pod_status_phase{phase=~"Running|Succeeded"})'
|
||||
)
|
||||
GLUE_LAST_SUCCESS_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SUCCESS}[$__range])) / 3600"
|
||||
GLUE_LAST_SCHEDULE_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SCHEDULE}[$__range])) / 3600"
|
||||
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
|
||||
GPU_NODE_REGEX = "|".join(GPU_NODES)
|
||||
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
|
||||
@ -1032,7 +1045,7 @@ def build_overview():
|
||||
30,
|
||||
"Mail Sent (1d)",
|
||||
'max(postmark_outbound_sent{window="1d"})',
|
||||
{"h": 3, "w": 5, "x": 0, "y": 8},
|
||||
{"h": 3, "w": 6, "x": 0, "y": 8},
|
||||
unit="none",
|
||||
links=link_to("atlas-mail"),
|
||||
)
|
||||
@ -1043,7 +1056,7 @@ def build_overview():
|
||||
"type": "stat",
|
||||
"title": "Mail Bounces (1d)",
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": {"h": 3, "w": 5, "x": 10, "y": 8},
|
||||
"gridPos": {"h": 3, "w": 6, "x": 12, "y": 8},
|
||||
"targets": [
|
||||
{
|
||||
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
|
||||
@ -1089,7 +1102,7 @@ def build_overview():
|
||||
32,
|
||||
"Mail Success Rate (1d)",
|
||||
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
|
||||
{"h": 3, "w": 5, "x": 5, "y": 8},
|
||||
{"h": 3, "w": 6, "x": 6, "y": 8},
|
||||
unit="percent",
|
||||
thresholds=mail_success_thresholds,
|
||||
decimals=1,
|
||||
@ -1101,7 +1114,7 @@ def build_overview():
|
||||
33,
|
||||
"Mail Limit Used (30d)",
|
||||
"max(postmark_sending_limit_used_percent)",
|
||||
{"h": 3, "w": 5, "x": 15, "y": 8},
|
||||
{"h": 3, "w": 6, "x": 18, "y": 8},
|
||||
unit="percent",
|
||||
thresholds=mail_limit_thresholds,
|
||||
decimals=1,
|
||||
@ -1121,7 +1134,7 @@ def build_overview():
|
||||
panel_id,
|
||||
title,
|
||||
expr,
|
||||
{"h": 5, "w": 6, "x": 6 * idx, "y": 11},
|
||||
{"h": 3, "w": 6, "x": 6 * idx, "y": 11},
|
||||
unit=unit,
|
||||
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
|
||||
links=link_to("atlas-storage"),
|
||||
@ -1133,26 +1146,44 @@ def build_overview():
|
||||
40,
|
||||
"One-off Job Pods (age hours)",
|
||||
ONEOFF_JOB_POD_AGE_HOURS,
|
||||
{"h": 6, "w": 4, "x": 0, "y": 16},
|
||||
{"h": 6, "w": 6, "x": 0, "y": 14},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{pod}}",
|
||||
thresholds=age_thresholds,
|
||||
limit=8,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
{
|
||||
"id": 41,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne Attempts vs Failures (1h)",
|
||||
"title": "Ariadne Attempts / Warnings / Failures",
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": {"h": 6, "w": 8, "x": 4, "y": 16},
|
||||
"gridPos": {"h": 6, "w": 6, "x": 6, "y": 14},
|
||||
"targets": [
|
||||
{"expr": ARIADNE_TASK_ATTEMPTS_1H, "refId": "A", "legendFormat": "Attempts"},
|
||||
{"expr": ARIADNE_TASK_FAILURES_1H, "refId": "B", "legendFormat": "Failures"},
|
||||
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
|
||||
{"expr": ARIADNE_TASK_WARNINGS_SERIES, "refId": "B", "legendFormat": "Warnings"},
|
||||
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "C", "legendFormat": "Failures"},
|
||||
],
|
||||
"fieldConfig": {"defaults": {"unit": "none"}, "overrides": []},
|
||||
"fieldConfig": {
|
||||
"defaults": {"unit": "none"},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {"id": "byName", "options": "Warnings"},
|
||||
"properties": [
|
||||
{"id": "color", "value": {"mode": "fixed", "fixedColor": "yellow"}}
|
||||
],
|
||||
},
|
||||
{
|
||||
"matcher": {"id": "byName", "options": "Failures"},
|
||||
"properties": [
|
||||
{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
"options": {
|
||||
"legend": {"displayMode": "table", "placement": "right"},
|
||||
"tooltip": {"mode": "multi"},
|
||||
@ -1164,7 +1195,7 @@ def build_overview():
|
||||
42,
|
||||
"Ariadne Test Success Rate",
|
||||
ARIADNE_TEST_SUCCESS_RATE,
|
||||
{"h": 6, "w": 8, "x": 12, "y": 16},
|
||||
{"h": 6, "w": 6, "x": 12, "y": 14},
|
||||
unit="percent",
|
||||
legend=None,
|
||||
legend_display="list",
|
||||
@ -1175,7 +1206,7 @@ def build_overview():
|
||||
43,
|
||||
"Tests with Failures (24h)",
|
||||
ARIADNE_TEST_FAILURES_24H,
|
||||
{"h": 6, "w": 4, "x": 20, "y": 16},
|
||||
{"h": 6, "w": 6, "x": 18, "y": 14},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{result}}",
|
||||
@ -1200,7 +1231,7 @@ def build_overview():
|
||||
11,
|
||||
"Namespace CPU Share",
|
||||
namespace_cpu_share_expr(cpu_scope),
|
||||
{"h": 9, "w": 8, "x": 0, "y": 22},
|
||||
{"h": 9, "w": 8, "x": 0, "y": 20},
|
||||
links=namespace_scope_links("namespace_scope_cpu"),
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
@ -1210,7 +1241,7 @@ def build_overview():
|
||||
12,
|
||||
"Namespace GPU Share",
|
||||
namespace_gpu_share_expr(gpu_scope),
|
||||
{"h": 9, "w": 8, "x": 8, "y": 22},
|
||||
{"h": 9, "w": 8, "x": 8, "y": 20},
|
||||
links=namespace_scope_links("namespace_scope_gpu"),
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
@ -1220,7 +1251,7 @@ def build_overview():
|
||||
13,
|
||||
"Namespace RAM Share",
|
||||
namespace_ram_share_expr(ram_scope),
|
||||
{"h": 9, "w": 8, "x": 16, "y": 22},
|
||||
{"h": 9, "w": 8, "x": 16, "y": 20},
|
||||
links=namespace_scope_links("namespace_scope_ram"),
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
@ -1232,7 +1263,7 @@ def build_overview():
|
||||
14,
|
||||
"Worker Node CPU",
|
||||
node_cpu_expr(worker_filter),
|
||||
{"h": 12, "w": 12, "x": 0, "y": 38},
|
||||
{"h": 12, "w": 12, "x": 0, "y": 36},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_calcs=["last"],
|
||||
@ -1246,7 +1277,7 @@ def build_overview():
|
||||
15,
|
||||
"Worker Node RAM",
|
||||
node_mem_expr(worker_filter),
|
||||
{"h": 12, "w": 12, "x": 12, "y": 38},
|
||||
{"h": 12, "w": 12, "x": 12, "y": 36},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_calcs=["last"],
|
||||
@ -1261,7 +1292,7 @@ def build_overview():
|
||||
16,
|
||||
"Control plane CPU",
|
||||
node_cpu_expr(CONTROL_ALL_REGEX),
|
||||
{"h": 10, "w": 12, "x": 0, "y": 50},
|
||||
{"h": 10, "w": 12, "x": 0, "y": 48},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_display="table",
|
||||
@ -1273,7 +1304,7 @@ def build_overview():
|
||||
17,
|
||||
"Control plane RAM",
|
||||
node_mem_expr(CONTROL_ALL_REGEX),
|
||||
{"h": 10, "w": 12, "x": 12, "y": 50},
|
||||
{"h": 10, "w": 12, "x": 12, "y": 48},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_display="table",
|
||||
@ -1286,7 +1317,7 @@ def build_overview():
|
||||
28,
|
||||
"Node Pod Share",
|
||||
'(sum(kube_pod_info{pod!="" , node!=""}) by (node) / clamp_min(sum(kube_pod_info{pod!="" , node!=""}), 1)) * 100',
|
||||
{"h": 10, "w": 12, "x": 0, "y": 60},
|
||||
{"h": 10, "w": 12, "x": 0, "y": 58},
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -1294,7 +1325,7 @@ def build_overview():
|
||||
29,
|
||||
"Top Nodes by Pod Count",
|
||||
'topk(12, sum(kube_pod_info{pod!="" , node!=""}) by (node))',
|
||||
{"h": 10, "w": 12, "x": 12, "y": 60},
|
||||
{"h": 10, "w": 12, "x": 12, "y": 58},
|
||||
unit="none",
|
||||
limit=12,
|
||||
decimals=0,
|
||||
@ -1316,7 +1347,7 @@ def build_overview():
|
||||
18,
|
||||
"Cluster Ingress Throughput",
|
||||
NET_INGRESS_EXPR,
|
||||
{"h": 7, "w": 8, "x": 0, "y": 31},
|
||||
{"h": 7, "w": 8, "x": 0, "y": 29},
|
||||
unit="Bps",
|
||||
legend="Ingress (Traefik)",
|
||||
legend_display="list",
|
||||
@ -1329,7 +1360,7 @@ def build_overview():
|
||||
19,
|
||||
"Cluster Egress Throughput",
|
||||
NET_EGRESS_EXPR,
|
||||
{"h": 7, "w": 8, "x": 8, "y": 31},
|
||||
{"h": 7, "w": 8, "x": 8, "y": 29},
|
||||
unit="Bps",
|
||||
legend="Egress (Traefik)",
|
||||
legend_display="list",
|
||||
@ -1342,7 +1373,7 @@ def build_overview():
|
||||
20,
|
||||
"Intra-Cluster Throughput",
|
||||
NET_INTERNAL_EXPR,
|
||||
{"h": 7, "w": 8, "x": 16, "y": 31},
|
||||
{"h": 7, "w": 8, "x": 16, "y": 29},
|
||||
unit="Bps",
|
||||
legend="Internal traffic",
|
||||
legend_display="list",
|
||||
@ -1356,7 +1387,7 @@ def build_overview():
|
||||
21,
|
||||
"Root Filesystem Usage",
|
||||
root_usage_expr(),
|
||||
{"h": 16, "w": 12, "x": 0, "y": 70},
|
||||
{"h": 16, "w": 12, "x": 0, "y": 68},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_calcs=["last"],
|
||||
@ -1371,7 +1402,7 @@ def build_overview():
|
||||
22,
|
||||
"Nodes Closest to Full Root Disks",
|
||||
f"topk(12, {root_usage_expr()})",
|
||||
{"h": 16, "w": 12, "x": 12, "y": 70},
|
||||
{"h": 16, "w": 12, "x": 12, "y": 68},
|
||||
unit="percent",
|
||||
thresholds=PERCENT_THRESHOLDS,
|
||||
links=link_to("atlas-storage"),
|
||||
@ -2300,9 +2331,9 @@ def build_jobs_dashboard():
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
1,
|
||||
"Ariadne Task Errors (24h)",
|
||||
ARIADNE_TASK_ERRORS_24H,
|
||||
{"h": 7, "w": 6, "x": 0, "y": 0},
|
||||
"Ariadne Task Errors (range)",
|
||||
ARIADNE_TASK_ERRORS_RANGE,
|
||||
{"h": 7, "w": 8, "x": 0, "y": 0},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
@ -2313,14 +2344,31 @@ def build_jobs_dashboard():
|
||||
{
|
||||
"id": 2,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne Attempts vs Failures (1h)",
|
||||
"title": "Ariadne Attempts / Warnings / Failures",
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": {"h": 7, "w": 12, "x": 6, "y": 0},
|
||||
"gridPos": {"h": 7, "w": 8, "x": 8, "y": 0},
|
||||
"targets": [
|
||||
{"expr": ARIADNE_TASK_ATTEMPTS_1H, "refId": "A", "legendFormat": "Attempts"},
|
||||
{"expr": ARIADNE_TASK_FAILURES_1H, "refId": "B", "legendFormat": "Failures"},
|
||||
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
|
||||
{"expr": ARIADNE_TASK_WARNINGS_SERIES, "refId": "B", "legendFormat": "Warnings"},
|
||||
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "C", "legendFormat": "Failures"},
|
||||
],
|
||||
"fieldConfig": {"defaults": {"unit": "none"}, "overrides": []},
|
||||
"fieldConfig": {
|
||||
"defaults": {"unit": "none"},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {"id": "byName", "options": "Warnings"},
|
||||
"properties": [
|
||||
{"id": "color", "value": {"mode": "fixed", "fixedColor": "yellow"}}
|
||||
],
|
||||
},
|
||||
{
|
||||
"matcher": {"id": "byName", "options": "Failures"},
|
||||
"properties": [
|
||||
{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
"options": {
|
||||
"legend": {"displayMode": "table", "placement": "right"},
|
||||
"tooltip": {"mode": "multi"},
|
||||
@ -2332,12 +2380,13 @@ def build_jobs_dashboard():
|
||||
3,
|
||||
"One-off Job Pods (age hours)",
|
||||
ONEOFF_JOB_POD_AGE_HOURS,
|
||||
{"h": 7, "w": 6, "x": 18, "y": 0},
|
||||
{"h": 7, "w": 8, "x": 16, "y": 0},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{pod}}",
|
||||
thresholds=age_thresholds,
|
||||
limit=12,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -2407,48 +2456,53 @@ def build_jobs_dashboard():
|
||||
bargauge_panel(
|
||||
10,
|
||||
"Ariadne Schedule Last Error (hours ago)",
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_HOURS,
|
||||
{"h": 8, "w": 12, "x": 0, "y": 11},
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 17},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=recent_error_thresholds,
|
||||
sort_order="asc",
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
11,
|
||||
"Ariadne Schedule Last Success (hours ago)",
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 11},
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 17},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
12,
|
||||
"Glue Jobs Last Success (hours ago)",
|
||||
GLUE_LAST_SUCCESS_AGE_HOURS,
|
||||
{"h": 8, "w": 12, "x": 0, "y": 19},
|
||||
GLUE_LAST_SUCCESS_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 23},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{cronjob}}",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
13,
|
||||
"Glue Jobs Last Schedule (hours ago)",
|
||||
GLUE_LAST_SCHEDULE_AGE_HOURS,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 19},
|
||||
GLUE_LAST_SCHEDULE_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 23},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{cronjob}}",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -2456,7 +2510,7 @@ def build_jobs_dashboard():
|
||||
14,
|
||||
"Ariadne Task Errors (1h)",
|
||||
ARIADNE_TASK_ERRORS_1H,
|
||||
{"h": 8, "w": 12, "x": 0, "y": 27},
|
||||
{"h": 6, "w": 12, "x": 0, "y": 29},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
@ -2468,7 +2522,7 @@ def build_jobs_dashboard():
|
||||
15,
|
||||
"Ariadne Task Errors (30d)",
|
||||
ARIADNE_TASK_ERRORS_30D,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 27},
|
||||
{"h": 6, "w": 12, "x": 12, "y": 29},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
@ -2480,7 +2534,7 @@ def build_jobs_dashboard():
|
||||
16,
|
||||
"Ariadne Access Requests",
|
||||
ARIADNE_ACCESS_REQUESTS,
|
||||
{"h": 6, "w": 8, "x": 0, "y": 35},
|
||||
{"h": 6, "w": 8, "x": 0, "y": 11},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{status}}",
|
||||
@ -2491,7 +2545,7 @@ def build_jobs_dashboard():
|
||||
17,
|
||||
"Ariadne CI Coverage (%)",
|
||||
ARIADNE_CI_COVERAGE,
|
||||
{"h": 6, "w": 4, "x": 8, "y": 35},
|
||||
{"h": 6, "w": 4, "x": 8, "y": 11},
|
||||
unit="percent",
|
||||
decimals=1,
|
||||
instant=True,
|
||||
@ -2503,7 +2557,7 @@ def build_jobs_dashboard():
|
||||
18,
|
||||
"Ariadne CI Tests (latest)",
|
||||
ARIADNE_CI_TESTS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 35},
|
||||
{"h": 6, "w": 12, "x": 12, "y": 11},
|
||||
unit="none",
|
||||
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
|
||||
instant=True,
|
||||
|
||||
@ -7,20 +7,20 @@
|
||||
{
|
||||
"id": 1,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne Task Errors (24h)",
|
||||
"title": "Ariadne Task Errors (range)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 6,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[24h]))",
|
||||
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
@ -81,26 +81,31 @@
|
||||
{
|
||||
"id": 2,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne Attempts vs Failures (1h)",
|
||||
"title": "Ariadne Attempts / Warnings / Failures",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 6,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total[1h]))",
|
||||
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "Attempts"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))",
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
|
||||
"refId": "B",
|
||||
"legendFormat": "Warnings"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
||||
"refId": "C",
|
||||
"legendFormat": "Failures"
|
||||
}
|
||||
],
|
||||
@ -108,7 +113,38 @@
|
||||
"defaults": {
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Warnings"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"mode": "fixed",
|
||||
"fixedColor": "yellow"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Failures"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"mode": "fixed",
|
||||
"fixedColor": "red"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
@ -130,8 +166,8 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
@ -167,7 +203,8 @@
|
||||
"value": 48
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -216,7 +253,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)))",
|
||||
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -284,7 +321,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))",
|
||||
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -344,7 +381,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)",
|
||||
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -577,14 +614,14 @@
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
"y": 17
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600",
|
||||
"expr": "(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
@ -615,7 +652,8 @@
|
||||
"value": 24
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -637,7 +675,7 @@
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
"order": "asc"
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -651,14 +689,14 @@
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 11
|
||||
"y": 17
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600",
|
||||
"expr": "(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
@ -689,7 +727,8 @@
|
||||
"value": 48
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -725,14 +764,14 @@
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 19
|
||||
"y": 23
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600",
|
||||
"expr": "(time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}/{{cronjob}}",
|
||||
"instant": true
|
||||
@ -763,7 +802,8 @@
|
||||
"value": 48
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -799,14 +839,14 @@
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 19
|
||||
"y": 23
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "((time() - (kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600",
|
||||
"expr": "(time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}/{{cronjob}}",
|
||||
"instant": true
|
||||
@ -837,7 +877,8 @@
|
||||
"value": 48
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -873,10 +914,10 @@
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 27
|
||||
"y": 29
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -947,10 +988,10 @@
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 27
|
||||
"y": 29
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1024,7 +1065,7 @@
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 35
|
||||
"y": 11
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1098,7 +1139,7 @@
|
||||
"h": 6,
|
||||
"w": 4,
|
||||
"x": 8,
|
||||
"y": 35
|
||||
"y": 11
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1161,7 +1202,7 @@
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 35
|
||||
"y": 11
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
|
||||
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -46,7 +46,7 @@
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"displayMode": "auto",
|
||||
"valueSuffix": "/19"
|
||||
"valueSuffix": "/20"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
|
||||
@ -449,14 +449,14 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
|
||||
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"min": 0,
|
||||
"max": 19,
|
||||
"max": 20,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
@ -466,15 +466,15 @@
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 17
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 18
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"color": "yellow",
|
||||
"value": 19
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 20
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -796,7 +796,7 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 5,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
@ -863,8 +863,8 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 5,
|
||||
"x": 10,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -968,8 +968,8 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 5,
|
||||
"x": 5,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -1044,8 +1044,8 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 5,
|
||||
"x": 15,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -1119,7 +1119,7 @@
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"h": 3,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
@ -1194,7 +1194,7 @@
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"h": 3,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 11
|
||||
@ -1269,7 +1269,7 @@
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"h": 3,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 11
|
||||
@ -1336,7 +1336,7 @@
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"h": 3,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 11
|
||||
@ -1404,9 +1404,9 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
"y": 14
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1441,7 +1441,8 @@
|
||||
"value": 48
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -1477,26 +1478,31 @@
|
||||
{
|
||||
"id": 41,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne Attempts vs Failures (1h)",
|
||||
"title": "Ariadne Attempts / Warnings / Failures",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 4,
|
||||
"y": 16
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 14
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total[1h]))",
|
||||
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "Attempts"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))",
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
|
||||
"refId": "B",
|
||||
"legendFormat": "Warnings"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
||||
"refId": "C",
|
||||
"legendFormat": "Failures"
|
||||
}
|
||||
],
|
||||
@ -1504,7 +1510,38 @@
|
||||
"defaults": {
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Warnings"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"mode": "fixed",
|
||||
"fixedColor": "yellow"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Failures"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"mode": "fixed",
|
||||
"fixedColor": "red"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
@ -1526,9 +1563,9 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
"y": 14
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1562,9 +1599,9 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 4,
|
||||
"x": 20,
|
||||
"y": 16
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 14
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1638,7 +1675,7 @@
|
||||
"h": 9,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 22
|
||||
"y": 20
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1707,7 +1744,7 @@
|
||||
"h": 9,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 22
|
||||
"y": 20
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1776,7 +1813,7 @@
|
||||
"h": 9,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 22
|
||||
"y": 20
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1845,11 +1882,11 @@
|
||||
"h": 12,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 38
|
||||
"y": 36
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
@ -1892,11 +1929,11 @@
|
||||
"h": 12,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 38
|
||||
"y": 36
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
@ -1939,7 +1976,7 @@
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 50
|
||||
"y": 48
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1976,7 +2013,7 @@
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 50
|
||||
"y": 48
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2013,7 +2050,7 @@
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 60
|
||||
"y": 58
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2064,7 +2101,7 @@
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 60
|
||||
"y": 58
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2145,7 +2182,7 @@
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 31
|
||||
"y": 29
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2189,7 +2226,7 @@
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 31
|
||||
"y": 29
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2233,7 +2270,7 @@
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 31
|
||||
"y": 29
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2277,7 +2314,7 @@
|
||||
"h": 16,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 70
|
||||
"y": 68
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2325,7 +2362,7 @@
|
||||
"h": 16,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 70
|
||||
"y": 68
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
|
||||
@ -520,7 +520,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)))))",
|
||||
"expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)))))",
|
||||
"refId": "A",
|
||||
"instant": true,
|
||||
"format": "table"
|
||||
|
||||
@ -16,20 +16,20 @@ data:
|
||||
{
|
||||
"id": 1,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne Task Errors (24h)",
|
||||
"title": "Ariadne Task Errors (range)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 6,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[24h]))",
|
||||
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
@ -90,26 +90,31 @@ data:
|
||||
{
|
||||
"id": 2,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne Attempts vs Failures (1h)",
|
||||
"title": "Ariadne Attempts / Warnings / Failures",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 6,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total[1h]))",
|
||||
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "Attempts"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))",
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
|
||||
"refId": "B",
|
||||
"legendFormat": "Warnings"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
||||
"refId": "C",
|
||||
"legendFormat": "Failures"
|
||||
}
|
||||
],
|
||||
@ -117,7 +122,38 @@ data:
|
||||
"defaults": {
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Warnings"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"mode": "fixed",
|
||||
"fixedColor": "yellow"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Failures"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"mode": "fixed",
|
||||
"fixedColor": "red"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
@ -139,8 +175,8 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
@ -176,7 +212,8 @@ data:
|
||||
"value": 48
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -225,7 +262,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)))",
|
||||
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -293,7 +330,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))",
|
||||
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -353,7 +390,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)",
|
||||
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -586,14 +623,14 @@ data:
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
"y": 17
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600",
|
||||
"expr": "(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
@ -624,7 +661,8 @@ data:
|
||||
"value": 24
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -646,7 +684,7 @@ data:
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
"order": "asc"
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -660,14 +698,14 @@ data:
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 11
|
||||
"y": 17
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600",
|
||||
"expr": "(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
@ -698,7 +736,8 @@ data:
|
||||
"value": 48
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -734,14 +773,14 @@ data:
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 19
|
||||
"y": 23
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600",
|
||||
"expr": "(time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}/{{cronjob}}",
|
||||
"instant": true
|
||||
@ -772,7 +811,8 @@ data:
|
||||
"value": 48
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -808,14 +848,14 @@ data:
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 19
|
||||
"y": 23
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "((time() - (kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600",
|
||||
"expr": "(time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}/{{cronjob}}",
|
||||
"instant": true
|
||||
@ -846,7 +886,8 @@ data:
|
||||
"value": 48
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -882,10 +923,10 @@ data:
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 27
|
||||
"y": 29
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -956,10 +997,10 @@ data:
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 27
|
||||
"y": 29
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1033,7 +1074,7 @@ data:
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 35
|
||||
"y": 11
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1107,7 +1148,7 @@ data:
|
||||
"h": 6,
|
||||
"w": 4,
|
||||
"x": 8,
|
||||
"y": 35
|
||||
"y": 11
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1170,7 +1211,7 @@ data:
|
||||
"h": 6,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 35
|
||||
"y": 11
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
|
||||
@ -29,7 +29,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
|
||||
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -55,7 +55,7 @@ data:
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"displayMode": "auto",
|
||||
"valueSuffix": "/19"
|
||||
"valueSuffix": "/20"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
|
||||
@ -458,14 +458,14 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
|
||||
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"min": 0,
|
||||
"max": 19,
|
||||
"max": 20,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
@ -475,15 +475,15 @@ data:
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 17
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 18
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"color": "yellow",
|
||||
"value": 19
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 20
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -805,7 +805,7 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 5,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
@ -872,8 +872,8 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 5,
|
||||
"x": 10,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -977,8 +977,8 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 5,
|
||||
"x": 5,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -1053,8 +1053,8 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 5,
|
||||
"x": 15,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -1128,7 +1128,7 @@ data:
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"h": 3,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
@ -1203,7 +1203,7 @@ data:
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"h": 3,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 11
|
||||
@ -1278,7 +1278,7 @@ data:
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"h": 3,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 11
|
||||
@ -1345,7 +1345,7 @@ data:
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"h": 3,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 11
|
||||
@ -1413,9 +1413,9 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
"y": 14
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1450,7 +1450,8 @@ data:
|
||||
"value": 48
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -1486,26 +1487,31 @@ data:
|
||||
{
|
||||
"id": 41,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne Attempts vs Failures (1h)",
|
||||
"title": "Ariadne Attempts / Warnings / Failures",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 4,
|
||||
"y": 16
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 14
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total[1h]))",
|
||||
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "Attempts"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[1h]))",
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
|
||||
"refId": "B",
|
||||
"legendFormat": "Warnings"
|
||||
},
|
||||
{
|
||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
||||
"refId": "C",
|
||||
"legendFormat": "Failures"
|
||||
}
|
||||
],
|
||||
@ -1513,7 +1519,38 @@ data:
|
||||
"defaults": {
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Warnings"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"mode": "fixed",
|
||||
"fixedColor": "yellow"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Failures"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"mode": "fixed",
|
||||
"fixedColor": "red"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
@ -1535,9 +1572,9 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
"y": 14
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1571,9 +1608,9 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 4,
|
||||
"x": 20,
|
||||
"y": 16
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 14
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1647,7 +1684,7 @@ data:
|
||||
"h": 9,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 22
|
||||
"y": 20
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1716,7 +1753,7 @@ data:
|
||||
"h": 9,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 22
|
||||
"y": 20
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1785,7 +1822,7 @@ data:
|
||||
"h": 9,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 22
|
||||
"y": 20
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1854,11 +1891,11 @@ data:
|
||||
"h": 12,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 38
|
||||
"y": 36
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
@ -1901,11 +1938,11 @@ data:
|
||||
"h": 12,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 38
|
||||
"y": 36
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
@ -1948,7 +1985,7 @@ data:
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 50
|
||||
"y": 48
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -1985,7 +2022,7 @@ data:
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 50
|
||||
"y": 48
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2022,7 +2059,7 @@ data:
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 60
|
||||
"y": 58
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2073,7 +2110,7 @@ data:
|
||||
"h": 10,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 60
|
||||
"y": 58
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2154,7 +2191,7 @@ data:
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 31
|
||||
"y": 29
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2198,7 +2235,7 @@ data:
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 31
|
||||
"y": 29
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2242,7 +2279,7 @@ data:
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 31
|
||||
"y": 29
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2286,7 +2323,7 @@ data:
|
||||
"h": 16,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 70
|
||||
"y": 68
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
@ -2334,7 +2371,7 @@ data:
|
||||
"h": 16,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 70
|
||||
"y": 68
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
|
||||
@ -529,7 +529,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.024)))))",
|
||||
"expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-20\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-21\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.023) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.024) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.025)))))",
|
||||
"refId": "A",
|
||||
"instant": true,
|
||||
"format": "table"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user