fix(cluster-state): normalize hottest node label expr
This commit is contained in:
parent
bdb7cc4fcd
commit
c9708a83ea
@ -18,6 +18,7 @@ logger = get_logger(__name__)
|
||||
_VALUE_PAIR_LEN = 2
|
||||
_RATE_WINDOW = "5m"
|
||||
_RESTARTS_WINDOW = "1h"
|
||||
_NODE_UNAME_LABEL = 'node_uname_info{nodename!=""}'
|
||||
_WORKLOAD_LABEL_KEYS = (
|
||||
"app.kubernetes.io/name",
|
||||
"app",
|
||||
@ -1162,22 +1163,22 @@ def _hottest_nodes(errors: list[str]) -> dict[str, Any]:
|
||||
try:
|
||||
hottest["cpu"] = _vm_topk(
|
||||
f'label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{{mode="idle"}}[{_RATE_WINDOW}]))) * 100) '
|
||||
'* on(instance) group_left(node) label_replace(node_uname_info{{nodename!=""}}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
f'* on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
"node",
|
||||
)
|
||||
hottest["ram"] = _vm_topk(
|
||||
'label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) '
|
||||
'/ node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{{nodename!=""}}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
f'label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) '
|
||||
f'/ node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
"node",
|
||||
)
|
||||
hottest["net"] = _vm_topk(
|
||||
f'label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]) '
|
||||
f'+ rate(node_network_transmit_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]))) * on(instance) group_left(node) label_replace(node_uname_info{{nodename!=""}}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
f'+ rate(node_network_transmit_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]))) * on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
"node",
|
||||
)
|
||||
hottest["io"] = _vm_topk(
|
||||
f'label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[{_RATE_WINDOW}]) + rate(node_disk_written_bytes_total[{_RATE_WINDOW}]))) '
|
||||
'* on(instance) group_left(node) label_replace(node_uname_info{{nodename!=""}}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
f'* on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
"node",
|
||||
)
|
||||
except Exception as exc:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user