fix(cluster-state): normalize hottest node label expr
This commit is contained in:
parent
bdb7cc4fcd
commit
c9708a83ea
@ -18,6 +18,7 @@ logger = get_logger(__name__)
|
|||||||
_VALUE_PAIR_LEN = 2
|
_VALUE_PAIR_LEN = 2
|
||||||
_RATE_WINDOW = "5m"
|
_RATE_WINDOW = "5m"
|
||||||
_RESTARTS_WINDOW = "1h"
|
_RESTARTS_WINDOW = "1h"
|
||||||
|
_NODE_UNAME_LABEL = 'node_uname_info{nodename!=""}'
|
||||||
_WORKLOAD_LABEL_KEYS = (
|
_WORKLOAD_LABEL_KEYS = (
|
||||||
"app.kubernetes.io/name",
|
"app.kubernetes.io/name",
|
||||||
"app",
|
"app",
|
||||||
@ -1162,22 +1163,22 @@ def _hottest_nodes(errors: list[str]) -> dict[str, Any]:
|
|||||||
try:
|
try:
|
||||||
hottest["cpu"] = _vm_topk(
|
hottest["cpu"] = _vm_topk(
|
||||||
f'label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{{mode="idle"}}[{_RATE_WINDOW}]))) * 100) '
|
f'label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{{mode="idle"}}[{_RATE_WINDOW}]))) * 100) '
|
||||||
'* on(instance) group_left(node) label_replace(node_uname_info{{nodename!=""}}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
f'* on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||||
"node",
|
"node",
|
||||||
)
|
)
|
||||||
hottest["ram"] = _vm_topk(
|
hottest["ram"] = _vm_topk(
|
||||||
'label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) '
|
f'label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) '
|
||||||
'/ node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{{nodename!=""}}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
f'/ node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||||
"node",
|
"node",
|
||||||
)
|
)
|
||||||
hottest["net"] = _vm_topk(
|
hottest["net"] = _vm_topk(
|
||||||
f'label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]) '
|
f'label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]) '
|
||||||
f'+ rate(node_network_transmit_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]))) * on(instance) group_left(node) label_replace(node_uname_info{{nodename!=""}}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
f'+ rate(node_network_transmit_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]))) * on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||||
"node",
|
"node",
|
||||||
)
|
)
|
||||||
hottest["io"] = _vm_topk(
|
hottest["io"] = _vm_topk(
|
||||||
f'label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[{_RATE_WINDOW}]) + rate(node_disk_written_bytes_total[{_RATE_WINDOW}]))) '
|
f'label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[{_RATE_WINDOW}]) + rate(node_disk_written_bytes_total[{_RATE_WINDOW}]))) '
|
||||||
'* on(instance) group_left(node) label_replace(node_uname_info{{nodename!=""}}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
f'* on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||||
"node",
|
"node",
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user