monitoring: polish dashboards
This commit is contained in:
parent
8f5781d3cf
commit
349d9c56ac
@ -80,6 +80,7 @@ WORKER_TOTAL = len(WORKER_NODES)
|
||||
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
||||
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
||||
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring"
|
||||
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PromQL helpers
|
||||
@ -149,9 +150,10 @@ CRASHLOOP_EXPR = (
|
||||
'{reason=~"CrashLoopBackOff|ImagePullBackOff"}))'
|
||||
)
|
||||
STUCK_TERMINATING_EXPR = (
|
||||
'sum(max by (namespace,pod) (('
|
||||
'(time() - kube_pod_deletion_timestamp{pod!=""}) > 600'
|
||||
') and on(namespace,pod) kube_pod_deletion_timestamp{pod!=""} > 0))'
|
||||
'sum(max by (namespace,pod) ('
|
||||
'((time() - kube_pod_deletion_timestamp{pod!=""}) > bool 600)'
|
||||
' and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=""} > bool 0)'
|
||||
'))'
|
||||
)
|
||||
|
||||
PROBLEM_TABLE_EXPR = (
|
||||
@ -168,9 +170,11 @@ CRASHLOOP_TABLE_EXPR = (
|
||||
"(kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})"
|
||||
)
|
||||
STUCK_TABLE_EXPR = (
|
||||
"("
|
||||
"((time() - kube_pod_deletion_timestamp{pod!=\"\"}) "
|
||||
"* on(namespace,pod) group_left(node) kube_pod_info) "
|
||||
"and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0"
|
||||
"and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) "
|
||||
"* on(namespace,pod) group_left(node) kube_pod_info"
|
||||
")"
|
||||
)
|
||||
|
||||
NAMESPACE_CPU_EXPR = (
|
||||
@ -192,6 +196,7 @@ IO_SERIES_EXPR = (
|
||||
"+ rate(node_disk_written_bytes_total[5m]))"
|
||||
)
|
||||
IO_TOP_EXPR = f"topk(1, {IO_SERIES_EXPR})"
|
||||
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
|
||||
NET_INGRESS_EXPR = (
|
||||
'sum(rate(container_network_receive_bytes_total{namespace!="" ,pod!=""}[5m])) '
|
||||
"or on() vector(0)"
|
||||
@ -216,6 +221,7 @@ def stat_panel(
|
||||
thresholds=None,
|
||||
text_mode="value",
|
||||
legend=None,
|
||||
display_name=None,
|
||||
value_suffix=None,
|
||||
links=None,
|
||||
):
|
||||
@ -236,6 +242,8 @@ def stat_panel(
|
||||
}
|
||||
if value_suffix:
|
||||
defaults["custom"]["valueSuffix"] = value_suffix
|
||||
if display_name:
|
||||
defaults["displayName"] = display_name
|
||||
panel = {
|
||||
"id": panel_id,
|
||||
"type": "stat",
|
||||
@ -449,8 +457,8 @@ def build_overview():
|
||||
hottest = [
|
||||
(7, "Hottest node: CPU", f"topk(1, {node_cpu_expr()})", "percent"),
|
||||
(8, "Hottest node: RAM", f"topk(1, {node_mem_expr()})", "percent"),
|
||||
(9, "Hottest node: NET", NET_TOP_EXPR, "bytes/sec"),
|
||||
(10, "Hottest node: I/O", IO_TOP_EXPR, "bytes/sec"),
|
||||
(9, "Hottest node: NET", NET_TOP_EXPR, "Bps"),
|
||||
(10, "Hottest node: I/O", IO_TOP_EXPR, "Bps"),
|
||||
]
|
||||
for idx, (panel_id, title, expr, unit) in enumerate(hottest):
|
||||
panels.append(
|
||||
@ -462,7 +470,7 @@ def build_overview():
|
||||
unit=unit,
|
||||
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
|
||||
text_mode="value_and_name",
|
||||
legend="{{node}}",
|
||||
display_name="{{node}}",
|
||||
links=link_to("atlas-nodes"),
|
||||
)
|
||||
)
|
||||
@ -544,7 +552,7 @@ def build_overview():
|
||||
"Cluster ingress throughput",
|
||||
NET_INGRESS_EXPR,
|
||||
{"h": 7, "w": 12, "x": 0, "y": 34},
|
||||
unit="bytes/sec",
|
||||
unit="Bps",
|
||||
legend_display="list",
|
||||
legend_placement="bottom",
|
||||
links=link_to("atlas-network"),
|
||||
@ -556,7 +564,7 @@ def build_overview():
|
||||
"Cluster egress throughput",
|
||||
NET_EGRESS_EXPR,
|
||||
{"h": 7, "w": 12, "x": 12, "y": 34},
|
||||
unit="bytes/sec",
|
||||
unit="Bps",
|
||||
legend_display="list",
|
||||
legend_placement="bottom",
|
||||
links=link_to("atlas-network"),
|
||||
@ -616,8 +624,8 @@ def build_overview():
|
||||
storage_panels = [
|
||||
(21, "Astreae usage", astreae_usage_expr("/mnt/astreae"), "percent"),
|
||||
(22, "Asteria usage", astreae_usage_expr("/mnt/asteria"), "percent"),
|
||||
(23, "Astreae free", astreae_free_expr("/mnt/astreae"), "bytesSI"),
|
||||
(24, "Asteria free", astreae_free_expr("/mnt/asteria"), "bytesSI"),
|
||||
(23, "Astreae free", astreae_free_expr("/mnt/astreae"), "decbytes"),
|
||||
(24, "Asteria free", astreae_free_expr("/mnt/asteria"), "decbytes"),
|
||||
]
|
||||
for idx, (panel_id, title, expr, unit) in enumerate(storage_panels):
|
||||
panels.append(
|
||||
@ -911,7 +919,7 @@ def build_storage_dashboard():
|
||||
"Astreae free",
|
||||
astreae_free_expr("/mnt/astreae"),
|
||||
{"h": 5, "w": 6, "x": 12, "y": 0},
|
||||
unit="bytesSI",
|
||||
unit="decbytes",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -920,14 +928,14 @@ def build_storage_dashboard():
|
||||
"Asteria free",
|
||||
astreae_free_expr("/mnt/asteria"),
|
||||
{"h": 5, "w": 6, "x": 18, "y": 0},
|
||||
unit="bytesSI",
|
||||
unit="decbytes",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
timeseries_panel(
|
||||
5,
|
||||
"Astreae per-node usage",
|
||||
filesystem_usage_expr("/mnt/astreae"),
|
||||
filesystem_usage_expr("/mnt/astreae", LONGHORN_NODE_REGEX),
|
||||
{"h": 9, "w": 12, "x": 0, "y": 5},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
@ -940,7 +948,7 @@ def build_storage_dashboard():
|
||||
timeseries_panel(
|
||||
6,
|
||||
"Asteria per-node usage",
|
||||
filesystem_usage_expr("/mnt/asteria"),
|
||||
filesystem_usage_expr("/mnt/asteria", LONGHORN_NODE_REGEX),
|
||||
{"h": 9, "w": 12, "x": 12, "y": 5},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
@ -986,18 +994,19 @@ def build_storage_dashboard():
|
||||
def build_network_dashboard():
|
||||
panels = []
|
||||
panels.append(
|
||||
stat_panel(1, "Ingress bytes/s", NET_INGRESS_EXPR, {"h": 4, "w": 8, "x": 0, "y": 0}, unit="bytes/sec")
|
||||
stat_panel(1, "Ingress traffic", NET_INGRESS_EXPR, {"h": 4, "w": 8, "x": 0, "y": 0}, unit="Bps")
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(2, "Egress bytes/s", NET_EGRESS_EXPR, {"h": 4, "w": 8, "x": 8, "y": 0}, unit="bytes/sec")
|
||||
stat_panel(2, "Egress traffic", NET_EGRESS_EXPR, {"h": 4, "w": 8, "x": 8, "y": 0}, unit="Bps")
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
3,
|
||||
"Top router req/s",
|
||||
'max(topk(1, rate(traefik_router_requests_total[5m])))',
|
||||
f"topk(1, {TRAEFIK_ROUTER_EXPR})",
|
||||
{"h": 4, "w": 8, "x": 16, "y": 0},
|
||||
unit="req/s",
|
||||
display_name="{{router}}",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -1006,7 +1015,7 @@ def build_network_dashboard():
|
||||
"Per-node throughput",
|
||||
NET_SERIES_EXPR,
|
||||
{"h": 8, "w": 24, "x": 0, "y": 4},
|
||||
unit="bytes/sec",
|
||||
unit="Bps",
|
||||
legend="{{node}}",
|
||||
legend_display="table",
|
||||
legend_placement="right",
|
||||
@ -1019,7 +1028,7 @@ def build_network_dashboard():
|
||||
'topk(10, sum(rate(container_network_transmit_bytes_total{namespace!=""}[5m]) '
|
||||
'+ rate(container_network_receive_bytes_total{namespace!=""}[5m])) by (namespace))',
|
||||
{"h": 9, "w": 12, "x": 0, "y": 12},
|
||||
unit="bytes/sec",
|
||||
unit="Bps",
|
||||
transformations=[{"id": "labelsToFields", "options": {}}],
|
||||
)
|
||||
)
|
||||
@ -1030,7 +1039,7 @@ def build_network_dashboard():
|
||||
'topk(10, sum(rate(container_network_transmit_bytes_total{pod!=""}[5m]) '
|
||||
'+ rate(container_network_receive_bytes_total{pod!=""}[5m])) by (namespace,pod))',
|
||||
{"h": 9, "w": 12, "x": 12, "y": 12},
|
||||
unit="bytes/sec",
|
||||
unit="Bps",
|
||||
transformations=[{"id": "labelsToFields", "options": {}}],
|
||||
)
|
||||
)
|
||||
@ -1038,7 +1047,7 @@ def build_network_dashboard():
|
||||
timeseries_panel(
|
||||
7,
|
||||
"Traefik routers (req/s)",
|
||||
'topk(10, rate(traefik_router_requests_total[5m]))',
|
||||
f"topk(10, {TRAEFIK_ROUTER_EXPR})",
|
||||
{"h": 9, "w": 12, "x": 0, "y": 21},
|
||||
unit="req/s",
|
||||
legend="{{router}}",
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
{
|
||||
"id": 1,
|
||||
"type": "stat",
|
||||
"title": "Ingress bytes/s",
|
||||
"title": "Ingress traffic",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -43,7 +43,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes/sec",
|
||||
"unit": "Bps",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
@ -67,7 +67,7 @@
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Egress bytes/s",
|
||||
"title": "Egress traffic",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -103,7 +103,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes/sec",
|
||||
"unit": "Bps",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
@ -140,7 +140,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(topk(1, rate(traefik_router_requests_total[5m])))",
|
||||
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -166,7 +166,8 @@
|
||||
"unit": "req/s",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"displayName": "{{router}}"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -207,7 +208,7 @@
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes/sec"
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -243,7 +244,7 @@
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes/sec"
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -279,7 +280,7 @@
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes/sec"
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -309,7 +310,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, rate(traefik_router_requests_total[5m]))",
|
||||
"expr": "topk(10, sum by (router) (rate(traefik_router_requests_total[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{router}}"
|
||||
}
|
||||
|
||||
@ -339,7 +339,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))",
|
||||
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -407,8 +407,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -437,7 +436,8 @@
|
||||
"unit": "percent",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"displayName": "{{node}}"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -479,8 +479,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -509,7 +508,8 @@
|
||||
"unit": "percent",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"displayName": "{{node}}"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -551,8 +551,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -574,10 +573,11 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes/sec",
|
||||
"unit": "Bps",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"displayName": "{{node}}"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -619,8 +619,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -642,10 +641,11 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes/sec",
|
||||
"unit": "Bps",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"displayName": "{{node}}"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -944,7 +944,7 @@
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes/sec"
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -987,7 +987,7 @@
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes/sec"
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -1306,7 +1306,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytesSI",
|
||||
"unit": "decbytes",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
@ -1373,7 +1373,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytesSI",
|
||||
"unit": "decbytes",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
|
||||
@ -140,7 +140,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))",
|
||||
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -332,7 +332,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "((time() - kube_pod_deletion_timestamp{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0",
|
||||
"expr": "(((time() - kube_pod_deletion_timestamp{pod!=\"\"}) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) * on(namespace,pod) group_left(node) kube_pod_info)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -171,7 +171,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytesSI",
|
||||
"unit": "decbytes",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
@ -231,7 +231,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytesSI",
|
||||
"unit": "decbytes",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
@ -268,7 +268,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
||||
"expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
@ -306,7 +306,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
||||
"expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
|
||||
@ -16,7 +16,7 @@ data:
|
||||
{
|
||||
"id": 1,
|
||||
"type": "stat",
|
||||
"title": "Ingress bytes/s",
|
||||
"title": "Ingress traffic",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -52,7 +52,7 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes/sec",
|
||||
"unit": "Bps",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
@ -76,7 +76,7 @@ data:
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Egress bytes/s",
|
||||
"title": "Egress traffic",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -112,7 +112,7 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes/sec",
|
||||
"unit": "Bps",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
@ -149,7 +149,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(topk(1, rate(traefik_router_requests_total[5m])))",
|
||||
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -175,7 +175,8 @@ data:
|
||||
"unit": "req/s",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"displayName": "{{router}}"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -216,7 +217,7 @@ data:
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes/sec"
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -252,7 +253,7 @@ data:
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes/sec"
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -288,7 +289,7 @@ data:
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes/sec"
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -318,7 +319,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, rate(traefik_router_requests_total[5m]))",
|
||||
"expr": "topk(10, sum by (router) (rate(traefik_router_requests_total[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{router}}"
|
||||
}
|
||||
|
||||
@ -348,7 +348,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))",
|
||||
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -416,8 +416,7 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -446,7 +445,8 @@ data:
|
||||
"unit": "percent",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"displayName": "{{node}}"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -488,8 +488,7 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -518,7 +517,8 @@ data:
|
||||
"unit": "percent",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"displayName": "{{node}}"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -560,8 +560,7 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -583,10 +582,11 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes/sec",
|
||||
"unit": "Bps",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"displayName": "{{node}}"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -628,8 +628,7 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -651,10 +650,11 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes/sec",
|
||||
"unit": "Bps",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"displayName": "{{node}}"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -953,7 +953,7 @@ data:
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes/sec"
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -996,7 +996,7 @@ data:
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "bytes/sec"
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -1315,7 +1315,7 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytesSI",
|
||||
"unit": "decbytes",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
@ -1382,7 +1382,7 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytesSI",
|
||||
"unit": "decbytes",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
|
||||
@ -149,7 +149,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))",
|
||||
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -341,7 +341,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "((time() - kube_pod_deletion_timestamp{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0",
|
||||
"expr": "(((time() - kube_pod_deletion_timestamp{pod!=\"\"}) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) * on(namespace,pod) group_left(node) kube_pod_info)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -180,7 +180,7 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytesSI",
|
||||
"unit": "decbytes",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
@ -240,7 +240,7 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytesSI",
|
||||
"unit": "decbytes",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
@ -277,7 +277,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
||||
"expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
@ -315,7 +315,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
||||
"expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user