monitoring: fix hottest stats and titan-db scrape
This commit is contained in:
parent
fe8deea9c7
commit
a1e731e929
@ -221,6 +221,7 @@ def stat_panel(
|
|||||||
thresholds=None,
|
thresholds=None,
|
||||||
text_mode="value",
|
text_mode="value",
|
||||||
legend=None,
|
legend=None,
|
||||||
|
instant=False,
|
||||||
value_suffix=None,
|
value_suffix=None,
|
||||||
links=None,
|
links=None,
|
||||||
):
|
):
|
||||||
@ -259,6 +260,8 @@ def stat_panel(
|
|||||||
}
|
}
|
||||||
if legend:
|
if legend:
|
||||||
panel["targets"][0]["legendFormat"] = legend
|
panel["targets"][0]["legendFormat"] = legend
|
||||||
|
if instant:
|
||||||
|
panel["targets"][0]["instant"] = True
|
||||||
if links:
|
if links:
|
||||||
panel["links"] = links
|
panel["links"] = links
|
||||||
return panel
|
return panel
|
||||||
@ -339,14 +342,8 @@ def pie_panel(panel_id, title, expr, grid):
|
|||||||
"title": title,
|
"title": title,
|
||||||
"datasource": PROM_DS,
|
"datasource": PROM_DS,
|
||||||
"gridPos": grid,
|
"gridPos": grid,
|
||||||
"targets": [{"expr": expr, "refId": "A"}],
|
"targets": [{"expr": expr, "refId": "A", "legendFormat": "{{namespace}}"}],
|
||||||
"fieldConfig": {
|
"fieldConfig": {"defaults": {"unit": "percent"}, "overrides": []},
|
||||||
"defaults": {
|
|
||||||
"unit": "percent",
|
|
||||||
"displayName": "{{namespace}}",
|
|
||||||
},
|
|
||||||
"overrides": [],
|
|
||||||
},
|
|
||||||
"options": {
|
"options": {
|
||||||
"legend": {"displayMode": "list", "placement": "right"},
|
"legend": {"displayMode": "list", "placement": "right"},
|
||||||
"pieType": "pie",
|
"pieType": "pie",
|
||||||
@ -382,7 +379,7 @@ def build_overview():
|
|||||||
(1, "Running pods", 'sum(kube_pod_status_phase{phase="Running"})', None, None, None),
|
(1, "Running pods", 'sum(kube_pod_status_phase{phase="Running"})', None, None, None),
|
||||||
(
|
(
|
||||||
2,
|
2,
|
||||||
"Ready workers",
|
"Workers ready",
|
||||||
f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})',
|
f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})',
|
||||||
WORKER_SUFFIX,
|
WORKER_SUFFIX,
|
||||||
WORKER_TOTAL,
|
WORKER_TOTAL,
|
||||||
@ -480,6 +477,7 @@ def build_overview():
|
|||||||
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
|
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
|
||||||
text_mode="value_and_name",
|
text_mode="value_and_name",
|
||||||
legend="{{node}}",
|
legend="{{node}}",
|
||||||
|
instant=True,
|
||||||
links=link_to("atlas-nodes"),
|
links=link_to("atlas-nodes"),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -1016,6 +1014,7 @@ def build_network_dashboard():
|
|||||||
{"h": 4, "w": 8, "x": 16, "y": 0},
|
{"h": 4, "w": 8, "x": 16, "y": 0},
|
||||||
unit="req/s",
|
unit="req/s",
|
||||||
legend="{{router}}",
|
legend="{{router}}",
|
||||||
|
instant=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
|
|||||||
@ -142,7 +142,8 @@
|
|||||||
{
|
{
|
||||||
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
|
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{router}}"
|
"legendFormat": "{{router}}",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
|
|||||||
@ -70,7 +70,7 @@
|
|||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"type": "stat",
|
"type": "stat",
|
||||||
"title": "Ready workers",
|
"title": "Workers ready",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -440,7 +440,8 @@
|
|||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}"
|
"legendFormat": "{{node}}",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
@ -512,7 +513,8 @@
|
|||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}"
|
"legendFormat": "{{node}}",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
@ -584,7 +586,8 @@
|
|||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
|
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}"
|
"legendFormat": "{{node}}",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
@ -652,7 +655,8 @@
|
|||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
|
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}"
|
"legendFormat": "{{node}}",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
@ -719,13 +723,13 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))",
|
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))",
|
||||||
"refId": "A"
|
"refId": "A",
|
||||||
|
"legendFormat": "{{namespace}}"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "percent",
|
"unit": "percent"
|
||||||
"displayName": "{{namespace}}"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
@ -761,13 +765,13 @@
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))",
|
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))",
|
||||||
"refId": "A"
|
"refId": "A",
|
||||||
|
"legendFormat": "{{namespace}}"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "percent",
|
"unit": "percent"
|
||||||
"displayName": "{{namespace}}"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
|
|||||||
@ -151,7 +151,8 @@ data:
|
|||||||
{
|
{
|
||||||
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
|
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{router}}"
|
"legendFormat": "{{router}}",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
|
|||||||
@ -79,7 +79,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"type": "stat",
|
"type": "stat",
|
||||||
"title": "Ready workers",
|
"title": "Workers ready",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -449,7 +449,8 @@ data:
|
|||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}"
|
"legendFormat": "{{node}}",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
@ -521,7 +522,8 @@ data:
|
|||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}"
|
"legendFormat": "{{node}}",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
@ -593,7 +595,8 @@ data:
|
|||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
|
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}"
|
"legendFormat": "{{node}}",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
@ -661,7 +664,8 @@ data:
|
|||||||
{
|
{
|
||||||
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
|
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{node}}"
|
"legendFormat": "{{node}}",
|
||||||
|
"instant": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
@ -728,13 +732,13 @@ data:
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))",
|
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))",
|
||||||
"refId": "A"
|
"refId": "A",
|
||||||
|
"legendFormat": "{{namespace}}"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "percent",
|
"unit": "percent"
|
||||||
"displayName": "{{namespace}}"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
@ -770,13 +774,13 @@ data:
|
|||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))",
|
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))",
|
||||||
"refId": "A"
|
"refId": "A",
|
||||||
|
"legendFormat": "{{namespace}}"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
"unit": "percent",
|
"unit": "percent"
|
||||||
"displayName": "{{namespace}}"
|
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
|
|||||||
@ -209,6 +209,16 @@ spec:
|
|||||||
- action: keep
|
- action: keep
|
||||||
source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app_kubernetes_io_part_of]
|
source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app_kubernetes_io_part_of]
|
||||||
regex: flux-system;flux
|
regex: flux-system;flux
|
||||||
|
- job_name: "titan-db"
|
||||||
|
static_configs:
|
||||||
|
- targets: ["titan-db:9100"]
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__address__]
|
||||||
|
target_label: instance
|
||||||
|
metric_relabel_configs:
|
||||||
|
- source_labels: [instance]
|
||||||
|
target_label: node
|
||||||
|
replacement: titan-db
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user