monitoring: refine network metrics and control-plane allowance
This commit is contained in:
parent
c7b7bc7a6d
commit
f06be37f44
@ -79,7 +79,7 @@ CONTROL_TOTAL = len(CONTROL_PLANE_NODES)
|
|||||||
WORKER_TOTAL = len(WORKER_NODES)
|
WORKER_TOTAL = len(WORKER_NODES)
|
||||||
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
||||||
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
||||||
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring"
|
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system"
|
||||||
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
||||||
GAUGE_WIDTHS = [5, 5, 5, 5, 4]
|
GAUGE_WIDTHS = [5, 5, 5, 5, 4]
|
||||||
|
|
||||||
@ -271,10 +271,21 @@ NET_CLUSTER_TX = (
|
|||||||
'sum(rate(container_network_transmit_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
|
'sum(rate(container_network_transmit_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
|
||||||
" or on() vector(0)"
|
" or on() vector(0)"
|
||||||
)
|
)
|
||||||
NET_TOTAL_EXPR = NET_CLUSTER_TX
|
PHYSICAL_NET_FILTER = 'device!~"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*"'
|
||||||
NET_INGRESS_EXPR = TRAEFIK_NET_INGRESS
|
NET_NODE_RX_PHYS = (
|
||||||
NET_EGRESS_EXPR = TRAEFIK_NET_EGRESS
|
f'sum(rate(node_network_receive_bytes_total{{{PHYSICAL_NET_FILTER}}}[5m])) or on() vector(0)'
|
||||||
NET_INTERNAL_EXPR = f"clamp_min((({NET_CLUSTER_RX}) + ({NET_CLUSTER_TX})) - (({TRAEFIK_NET_INGRESS}) + ({TRAEFIK_NET_EGRESS})), 0)"
|
)
|
||||||
|
NET_NODE_TX_PHYS = (
|
||||||
|
f'sum(rate(node_network_transmit_bytes_total{{{PHYSICAL_NET_FILTER}}}[5m])) or on() vector(0)'
|
||||||
|
)
|
||||||
|
NET_TOTAL_EXPR = NET_NODE_TX_PHYS
|
||||||
|
NET_INGRESS_EXPR = NET_NODE_RX_PHYS
|
||||||
|
NET_EGRESS_EXPR = NET_NODE_TX_PHYS
|
||||||
|
NET_INTERNAL_EXPR = (
|
||||||
|
'sum(rate(container_network_receive_bytes_total{namespace!="traefik",pod!="",container!=""}[5m]) '
|
||||||
|
'+ rate(container_network_transmit_bytes_total{namespace!="traefik",pod!="",container!=""}[5m]))'
|
||||||
|
' or on() vector(0)'
|
||||||
|
)
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Panel factories
|
# Panel factories
|
||||||
|
|||||||
@ -20,7 +20,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
"expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -80,7 +80,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
"expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -140,7 +140,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
|
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@ -142,7 +142,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@ -137,7 +137,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -1228,7 +1228,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
"expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "Ingress (Traefik)"
|
"legendFormat": "Ingress (Traefik)"
|
||||||
}
|
}
|
||||||
@ -1272,7 +1272,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
"expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "Egress (Traefik)"
|
"legendFormat": "Egress (Traefik)"
|
||||||
}
|
}
|
||||||
@ -1316,7 +1316,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
|
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "Internal traffic"
|
"legendFormat": "Internal traffic"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -200,7 +200,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@ -29,7 +29,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
"expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -89,7 +89,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
"expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -149,7 +149,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
|
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@ -151,7 +151,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@ -146,7 +146,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -1237,7 +1237,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
"expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "Ingress (Traefik)"
|
"legendFormat": "Ingress (Traefik)"
|
||||||
}
|
}
|
||||||
@ -1281,7 +1281,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)",
|
"expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "Egress (Traefik)"
|
"legendFormat": "Egress (Traefik)"
|
||||||
}
|
}
|
||||||
@ -1325,7 +1325,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
|
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "Internal traffic"
|
"legendFormat": "Internal traffic"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -209,7 +209,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
|
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user