monitoring: enforce sorted job lists
This commit is contained in:
parent
db4c3b7c51
commit
4721d44a33
@ -748,6 +748,12 @@ def bargauge_panel(
|
|||||||
overrides=None,
|
overrides=None,
|
||||||
):
|
):
|
||||||
"""Return a bar gauge panel with label-aware reduction."""
|
"""Return a bar gauge panel with label-aware reduction."""
|
||||||
|
cleaned_expr = expr.strip()
|
||||||
|
if not cleaned_expr.startswith(("sort(", "sort_desc(")):
|
||||||
|
if sort_order == "desc":
|
||||||
|
expr = f"sort_desc({expr})"
|
||||||
|
elif sort_order == "asc":
|
||||||
|
expr = f"sort({expr})"
|
||||||
panel = {
|
panel = {
|
||||||
"id": panel_id,
|
"id": panel_id,
|
||||||
"type": "bargauge",
|
"type": "bargauge",
|
||||||
@ -1165,21 +1171,20 @@ def build_overview():
|
|||||||
{
|
{
|
||||||
"id": 41,
|
"id": 41,
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Ariadne Attempts / Warnings / Failures",
|
"title": "Ariadne Attempts / Failures",
|
||||||
"datasource": PROM_DS,
|
"datasource": PROM_DS,
|
||||||
"gridPos": {"h": 6, "w": 6, "x": 6, "y": 14},
|
"gridPos": {"h": 6, "w": 6, "x": 6, "y": 14},
|
||||||
"targets": [
|
"targets": [
|
||||||
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
|
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
|
||||||
{"expr": ARIADNE_TASK_WARNINGS_SERIES, "refId": "B", "legendFormat": "Warnings"},
|
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"},
|
||||||
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "C", "legendFormat": "Failures"},
|
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {"unit": "none"},
|
"defaults": {"unit": "none"},
|
||||||
"overrides": [
|
"overrides": [
|
||||||
{
|
{
|
||||||
"matcher": {"id": "byName", "options": "Warnings"},
|
"matcher": {"id": "byName", "options": "Attempts"},
|
||||||
"properties": [
|
"properties": [
|
||||||
{"id": "color", "value": {"mode": "fixed", "fixedColor": "yellow"}}
|
{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -2361,21 +2366,20 @@ def build_jobs_dashboard():
|
|||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Ariadne Attempts / Warnings / Failures",
|
"title": "Ariadne Attempts / Failures",
|
||||||
"datasource": PROM_DS,
|
"datasource": PROM_DS,
|
||||||
"gridPos": {"h": 7, "w": 8, "x": 8, "y": 0},
|
"gridPos": {"h": 7, "w": 8, "x": 8, "y": 0},
|
||||||
"targets": [
|
"targets": [
|
||||||
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
|
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
|
||||||
{"expr": ARIADNE_TASK_WARNINGS_SERIES, "refId": "B", "legendFormat": "Warnings"},
|
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"},
|
||||||
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "C", "legendFormat": "Failures"},
|
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {"unit": "none"},
|
"defaults": {"unit": "none"},
|
||||||
"overrides": [
|
"overrides": [
|
||||||
{
|
{
|
||||||
"matcher": {"id": "byName", "options": "Warnings"},
|
"matcher": {"id": "byName", "options": "Attempts"},
|
||||||
"properties": [
|
"properties": [
|
||||||
{"id": "color", "value": {"mode": "fixed", "fixedColor": "yellow"}}
|
{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -20,7 +20,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range]))",
|
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{task}}",
|
"legendFormat": "{{task}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -81,7 +81,7 @@
|
|||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Ariadne Attempts / Warnings / Failures",
|
"title": "Ariadne Attempts / Failures",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -98,14 +98,9 @@
|
|||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "Attempts"
|
"legendFormat": "Attempts"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
|
|
||||||
"refId": "B",
|
|
||||||
"legendFormat": "Warnings"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
||||||
"refId": "C",
|
"refId": "B",
|
||||||
"legendFormat": "Failures"
|
"legendFormat": "Failures"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -117,14 +112,14 @@
|
|||||||
{
|
{
|
||||||
"matcher": {
|
"matcher": {
|
||||||
"id": "byName",
|
"id": "byName",
|
||||||
"options": "Warnings"
|
"options": "Attempts"
|
||||||
},
|
},
|
||||||
"properties": [
|
"properties": [
|
||||||
{
|
{
|
||||||
"id": "color",
|
"id": "color",
|
||||||
"value": {
|
"value": {
|
||||||
"mode": "fixed",
|
"mode": "fixed",
|
||||||
"fixedColor": "yellow"
|
"fixedColor": "green"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@ -172,7 +167,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})",
|
"expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}/{{pod}}",
|
"legendFormat": "{{namespace}}/{{pod}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -621,7 +616,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600",
|
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{task}}",
|
"legendFormat": "{{task}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -696,7 +691,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600",
|
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{task}}",
|
"legendFormat": "{{task}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -771,7 +766,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
|
"expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}/{{cronjob}}",
|
"legendFormat": "{{namespace}}/{{cronjob}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -846,7 +841,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
|
"expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}/{{cronjob}}",
|
"legendFormat": "{{namespace}}/{{cronjob}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -921,7 +916,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[1h]))",
|
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[1h])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{task}}",
|
"legendFormat": "{{task}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -995,7 +990,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[30d]))",
|
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[30d])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{task}}",
|
"legendFormat": "{{task}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -1069,7 +1064,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "ariadne_access_requests_total",
|
"expr": "sort_desc(ariadne_access_requests_total)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{status}}",
|
"legendFormat": "{{status}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
|
|||||||
@ -1410,7 +1410,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})",
|
"expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}/{{pod}}",
|
"legendFormat": "{{namespace}}/{{pod}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -1478,7 +1478,7 @@
|
|||||||
{
|
{
|
||||||
"id": 41,
|
"id": 41,
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Ariadne Attempts / Warnings / Failures",
|
"title": "Ariadne Attempts / Failures",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1495,14 +1495,9 @@
|
|||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "Attempts"
|
"legendFormat": "Attempts"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
|
|
||||||
"refId": "B",
|
|
||||||
"legendFormat": "Warnings"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
||||||
"refId": "C",
|
"refId": "B",
|
||||||
"legendFormat": "Failures"
|
"legendFormat": "Failures"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -1514,14 +1509,14 @@
|
|||||||
{
|
{
|
||||||
"matcher": {
|
"matcher": {
|
||||||
"id": "byName",
|
"id": "byName",
|
||||||
"options": "Warnings"
|
"options": "Attempts"
|
||||||
},
|
},
|
||||||
"properties": [
|
"properties": [
|
||||||
{
|
{
|
||||||
"id": "color",
|
"id": "color",
|
||||||
"value": {
|
"value": {
|
||||||
"mode": "fixed",
|
"mode": "fixed",
|
||||||
"fixedColor": "yellow"
|
"fixedColor": "green"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@ -1606,7 +1601,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (result) (max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"failed|error\"}[24h]))",
|
"expr": "sort_desc(sum by (result) (max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"failed|error\"}[24h])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{result}}",
|
"legendFormat": "{{result}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -2137,7 +2132,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node))",
|
"expr": "sort_desc(topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node)))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -2398,7 +2393,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(12, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
"expr": "sort_desc(topk(12, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}"
|
"legendFormat": "{{node}}"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -439,7 +439,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node))",
|
"expr": "sort_desc(topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node)))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
|
|||||||
@ -29,7 +29,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range]))",
|
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[$__range])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{task}}",
|
"legendFormat": "{{task}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -90,7 +90,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Ariadne Attempts / Warnings / Failures",
|
"title": "Ariadne Attempts / Failures",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -107,14 +107,9 @@ data:
|
|||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "Attempts"
|
"legendFormat": "Attempts"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
|
|
||||||
"refId": "B",
|
|
||||||
"legendFormat": "Warnings"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
||||||
"refId": "C",
|
"refId": "B",
|
||||||
"legendFormat": "Failures"
|
"legendFormat": "Failures"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -126,14 +121,14 @@ data:
|
|||||||
{
|
{
|
||||||
"matcher": {
|
"matcher": {
|
||||||
"id": "byName",
|
"id": "byName",
|
||||||
"options": "Warnings"
|
"options": "Attempts"
|
||||||
},
|
},
|
||||||
"properties": [
|
"properties": [
|
||||||
{
|
{
|
||||||
"id": "color",
|
"id": "color",
|
||||||
"value": {
|
"value": {
|
||||||
"mode": "fixed",
|
"mode": "fixed",
|
||||||
"fixedColor": "yellow"
|
"fixedColor": "green"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@ -181,7 +176,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})",
|
"expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}/{{pod}}",
|
"legendFormat": "{{namespace}}/{{pod}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -630,7 +625,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600",
|
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{task}}",
|
"legendFormat": "{{task}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -705,7 +700,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600",
|
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{task}}",
|
"legendFormat": "{{task}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -780,7 +775,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
|
"expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}/{{cronjob}}",
|
"legendFormat": "{{namespace}}/{{cronjob}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -855,7 +850,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600",
|
"expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}/{{cronjob}}",
|
"legendFormat": "{{namespace}}/{{cronjob}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -930,7 +925,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[1h]))",
|
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[1h])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{task}}",
|
"legendFormat": "{{task}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -1004,7 +999,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[30d]))",
|
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\"}[30d])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{task}}",
|
"legendFormat": "{{task}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -1078,7 +1073,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "ariadne_access_requests_total",
|
"expr": "sort_desc(ariadne_access_requests_total)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{status}}",
|
"legendFormat": "{{status}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
|
|||||||
@ -1419,7 +1419,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})",
|
"expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}/{{pod}}",
|
"legendFormat": "{{namespace}}/{{pod}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -1487,7 +1487,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 41,
|
"id": 41,
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Ariadne Attempts / Warnings / Failures",
|
"title": "Ariadne Attempts / Failures",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1504,14 +1504,9 @@ data:
|
|||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "Attempts"
|
"legendFormat": "Attempts"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"expr": "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)",
|
|
||||||
"refId": "B",
|
|
||||||
"legendFormat": "Warnings"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
||||||
"refId": "C",
|
"refId": "B",
|
||||||
"legendFormat": "Failures"
|
"legendFormat": "Failures"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -1523,14 +1518,14 @@ data:
|
|||||||
{
|
{
|
||||||
"matcher": {
|
"matcher": {
|
||||||
"id": "byName",
|
"id": "byName",
|
||||||
"options": "Warnings"
|
"options": "Attempts"
|
||||||
},
|
},
|
||||||
"properties": [
|
"properties": [
|
||||||
{
|
{
|
||||||
"id": "color",
|
"id": "color",
|
||||||
"value": {
|
"value": {
|
||||||
"mode": "fixed",
|
"mode": "fixed",
|
||||||
"fixedColor": "yellow"
|
"fixedColor": "green"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@ -1615,7 +1610,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (result) (max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"failed|error\"}[24h]))",
|
"expr": "sort_desc(sum by (result) (max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"failed|error\"}[24h])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{result}}",
|
"legendFormat": "{{result}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -2146,7 +2141,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node))",
|
"expr": "sort_desc(topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node)))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -2407,7 +2402,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(12, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
"expr": "sort_desc(topk(12, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}"
|
"legendFormat": "{{node}}"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -448,7 +448,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node))",
|
"expr": "sort_desc(topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node)))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user