monitoring: refine network metrics and control-plane allowance
This commit is contained in:
parent
c7b7bc7a6d
commit
f06be37f44
@ -79,7 +79,7 @@ CONTROL_TOTAL = len(CONTROL_PLANE_NODES)
|
||||
WORKER_TOTAL = len(WORKER_NODES)
|
||||
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
||||
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
||||
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring"
|
||||
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system"
|
||||
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
||||
GAUGE_WIDTHS = [5, 5, 5, 5, 4]
|
||||
|
||||
@ -271,10 +271,21 @@ NET_CLUSTER_TX = (
|
||||
'sum(rate(container_network_transmit_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
|
||||
" or on() vector(0)"
|
||||
)
|
||||
NET_TOTAL_EXPR = NET_CLUSTER_TX
|
||||
NET_INGRESS_EXPR = TRAEFIK_NET_INGRESS
|
||||
NET_EGRESS_EXPR = TRAEFIK_NET_EGRESS
|
||||
NET_INTERNAL_EXPR = f"clamp_min((({NET_CLUSTER_RX}) + ({NET_CLUSTER_TX})) - (({TRAEFIK_NET_INGRESS}) + ({TRAEFIK_NET_EGRESS})), 0)"
|
||||
PHYSICAL_NET_FILTER = 'device!~"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*"'
|
||||
NET_NODE_RX_PHYS = (
|
||||
f'sum(rate(node_network_receive_bytes_total{{{PHYSICAL_NET_FILTER}}}[5m])) or on() vector(0)'
|
||||
)
|
||||
NET_NODE_TX_PHYS = (
|
||||
f'sum(rate(node_network_transmit_bytes_total{{{PHYSICAL_NET_FILTER}}}[5m])) or on() vector(0)'
|
||||
)
|
||||
NET_TOTAL_EXPR = NET_NODE_TX_PHYS
|
||||
NET_INGRESS_EXPR = NET_NODE_RX_PHYS
|
||||
NET_EGRESS_EXPR = NET_NODE_TX_PHYS
|
||||
NET_INTERNAL_EXPR = (
|
||||
'sum(rate(container_network_receive_bytes_total{namespace!="traefik",pod!="",container!=""}[5m]) '
|
||||
'+ rate(container_network_transmit_bytes_total{namespace!="traefik",pod!="",container!=""}[5m]))'
|
||||
' or on() vector(0)'
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Panel factories
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
||||
"expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -80,7 +80,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
||||
"expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -140,7 +140,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -142,7 +142,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -137,7 +137,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -1228,7 +1228,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
||||
"expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||
"refId": "A",
|
||||
"legendFormat": "Ingress (Traefik)"
|
||||
}
|
||||
@ -1272,7 +1272,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
||||
"expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||
"refId": "A",
|
||||
"legendFormat": "Egress (Traefik)"
|
||||
}
|
||||
@ -1316,7 +1316,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)",
|
||||
"refId": "A",
|
||||
"legendFormat": "Internal traffic"
|
||||
}
|
||||
|
||||
@ -200,7 +200,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -29,7 +29,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
||||
"expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -89,7 +89,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
||||
"expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -149,7 +149,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -151,7 +151,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -146,7 +146,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -1237,7 +1237,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
||||
"expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||
"refId": "A",
|
||||
"legendFormat": "Ingress (Traefik)"
|
||||
}
|
||||
@ -1281,7 +1281,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
||||
"expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||
"refId": "A",
|
||||
"legendFormat": "Egress (Traefik)"
|
||||
}
|
||||
@ -1325,7 +1325,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)",
|
||||
"refId": "A",
|
||||
"legendFormat": "Internal traffic"
|
||||
}
|
||||
|
||||
@ -209,7 +209,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user