monitoring: fix hottest node labels
This commit is contained in:
parent
bcaa0a3327
commit
4aece7e5cb
@ -144,6 +144,12 @@ def astreae_free_expr(mount):
|
|||||||
return f"sum(node_filesystem_avail_bytes{{mountpoint=\"{mount}\",fstype!~\"tmpfs|overlay\"}})"
|
return f"sum(node_filesystem_avail_bytes{{mountpoint=\"{mount}\",fstype!~\"tmpfs|overlay\"}})"
|
||||||
|
|
||||||
|
|
||||||
|
def hottest_stat_expr(inner_expr):
|
||||||
|
return (
|
||||||
|
f'label_replace(topk(1, {inner_expr}), "__name__", "$1", "node", "(.*)")'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
PROBLEM_PODS_EXPR = 'sum(max by (namespace,pod) (kube_pod_status_phase{phase!~"Running|Succeeded"}))'
|
PROBLEM_PODS_EXPR = 'sum(max by (namespace,pod) (kube_pod_status_phase{phase!~"Running|Succeeded"}))'
|
||||||
CRASHLOOP_EXPR = (
|
CRASHLOOP_EXPR = (
|
||||||
'sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason'
|
'sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason'
|
||||||
@ -224,7 +230,6 @@ def stat_panel(
|
|||||||
instant=False,
|
instant=False,
|
||||||
value_suffix=None,
|
value_suffix=None,
|
||||||
links=None,
|
links=None,
|
||||||
display_name=None,
|
|
||||||
):
|
):
|
||||||
"""Return a Grafana stat panel definition."""
|
"""Return a Grafana stat panel definition."""
|
||||||
defaults = {
|
defaults = {
|
||||||
@ -243,8 +248,6 @@ def stat_panel(
|
|||||||
}
|
}
|
||||||
if value_suffix:
|
if value_suffix:
|
||||||
defaults["custom"]["valueSuffix"] = value_suffix
|
defaults["custom"]["valueSuffix"] = value_suffix
|
||||||
if display_name:
|
|
||||||
defaults["displayName"] = display_name
|
|
||||||
panel = {
|
panel = {
|
||||||
"id": panel_id,
|
"id": panel_id,
|
||||||
"type": "stat",
|
"type": "stat",
|
||||||
@ -464,10 +467,10 @@ def build_overview():
|
|||||||
)
|
)
|
||||||
|
|
||||||
hottest = [
|
hottest = [
|
||||||
(7, "Hottest node: CPU", f"topk(1, {node_cpu_expr()})", "percent"),
|
(7, "Hottest node: CPU", hottest_stat_expr(node_cpu_expr()), "percent"),
|
||||||
(8, "Hottest node: RAM", f"topk(1, {node_mem_expr()})", "percent"),
|
(8, "Hottest node: RAM", hottest_stat_expr(node_mem_expr()), "percent"),
|
||||||
(9, "Hottest node: NET", NET_TOP_EXPR, "Bps"),
|
(9, "Hottest node: NET", hottest_stat_expr(NET_SERIES_EXPR), "Bps"),
|
||||||
(10, "Hottest node: I/O", IO_TOP_EXPR, "Bps"),
|
(10, "Hottest node: I/O", hottest_stat_expr(IO_SERIES_EXPR), "Bps"),
|
||||||
]
|
]
|
||||||
for idx, (panel_id, title, expr, unit) in enumerate(hottest):
|
for idx, (panel_id, title, expr, unit) in enumerate(hottest):
|
||||||
panels.append(
|
panels.append(
|
||||||
@ -479,9 +482,8 @@ def build_overview():
|
|||||||
unit=unit,
|
unit=unit,
|
||||||
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
|
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
|
||||||
text_mode="name_and_value",
|
text_mode="name_and_value",
|
||||||
legend="{{node}}",
|
legend=None,
|
||||||
instant=True,
|
instant=True,
|
||||||
display_name="{{__field.labels.node}}\\n",
|
|
||||||
links=link_to("atlas-nodes"),
|
links=link_to("atlas-nodes"),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@ -438,9 +438,8 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
"expr": "label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -470,8 +469,7 @@
|
|||||||
"unit": "percent",
|
"unit": "percent",
|
||||||
"custom": {
|
"custom": {
|
||||||
"displayMode": "auto"
|
"displayMode": "auto"
|
||||||
},
|
}
|
||||||
"displayName": "{{__field.labels.node}}\\n"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
@ -512,9 +510,8 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
"expr": "label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -544,8 +541,7 @@
|
|||||||
"unit": "percent",
|
"unit": "percent",
|
||||||
"custom": {
|
"custom": {
|
||||||
"displayMode": "auto"
|
"displayMode": "auto"
|
||||||
},
|
}
|
||||||
"displayName": "{{__field.labels.node}}\\n"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
@ -586,9 +582,8 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
|
"expr": "label_replace(topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -614,8 +609,7 @@
|
|||||||
"unit": "Bps",
|
"unit": "Bps",
|
||||||
"custom": {
|
"custom": {
|
||||||
"displayMode": "auto"
|
"displayMode": "auto"
|
||||||
},
|
}
|
||||||
"displayName": "{{__field.labels.node}}\\n"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
@ -656,9 +650,8 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
|
"expr": "label_replace(topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m]))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -684,8 +677,7 @@
|
|||||||
"unit": "Bps",
|
"unit": "Bps",
|
||||||
"custom": {
|
"custom": {
|
||||||
"displayMode": "auto"
|
"displayMode": "auto"
|
||||||
},
|
}
|
||||||
"displayName": "{{__field.labels.node}}\\n"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
|
|||||||
@ -447,9 +447,8 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
"expr": "label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -479,8 +478,7 @@ data:
|
|||||||
"unit": "percent",
|
"unit": "percent",
|
||||||
"custom": {
|
"custom": {
|
||||||
"displayMode": "auto"
|
"displayMode": "auto"
|
||||||
},
|
}
|
||||||
"displayName": "{{__field.labels.node}}\\n"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
@ -521,9 +519,8 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
"expr": "label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -553,8 +550,7 @@ data:
|
|||||||
"unit": "percent",
|
"unit": "percent",
|
||||||
"custom": {
|
"custom": {
|
||||||
"displayMode": "auto"
|
"displayMode": "auto"
|
||||||
},
|
}
|
||||||
"displayName": "{{__field.labels.node}}\\n"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
@ -595,9 +591,8 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
|
"expr": "label_replace(topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -623,8 +618,7 @@ data:
|
|||||||
"unit": "Bps",
|
"unit": "Bps",
|
||||||
"custom": {
|
"custom": {
|
||||||
"displayMode": "auto"
|
"displayMode": "auto"
|
||||||
},
|
}
|
||||||
"displayName": "{{__field.labels.node}}\\n"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
@ -665,9 +659,8 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
|
"expr": "label_replace(topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m]))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}",
|
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -693,8 +686,7 @@ data:
|
|||||||
"unit": "Bps",
|
"unit": "Bps",
|
||||||
"custom": {
|
"custom": {
|
||||||
"displayMode": "auto"
|
"displayMode": "auto"
|
||||||
},
|
}
|
||||||
"displayName": "{{__field.labels.node}}\\n"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user