monitoring: fix hottest stats and titan-db scrape
This commit is contained in:
parent
fe8deea9c7
commit
a1e731e929
@ -221,6 +221,7 @@ def stat_panel(
|
||||
thresholds=None,
|
||||
text_mode="value",
|
||||
legend=None,
|
||||
instant=False,
|
||||
value_suffix=None,
|
||||
links=None,
|
||||
):
|
||||
@ -259,6 +260,8 @@ def stat_panel(
|
||||
}
|
||||
if legend:
|
||||
panel["targets"][0]["legendFormat"] = legend
|
||||
if instant:
|
||||
panel["targets"][0]["instant"] = True
|
||||
if links:
|
||||
panel["links"] = links
|
||||
return panel
|
||||
@ -339,14 +342,8 @@ def pie_panel(panel_id, title, expr, grid):
|
||||
"title": title,
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": grid,
|
||||
"targets": [{"expr": expr, "refId": "A"}],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"displayName": "{{namespace}}",
|
||||
},
|
||||
"overrides": [],
|
||||
},
|
||||
"targets": [{"expr": expr, "refId": "A", "legendFormat": "{{namespace}}"}],
|
||||
"fieldConfig": {"defaults": {"unit": "percent"}, "overrides": []},
|
||||
"options": {
|
||||
"legend": {"displayMode": "list", "placement": "right"},
|
||||
"pieType": "pie",
|
||||
@ -382,7 +379,7 @@ def build_overview():
|
||||
(1, "Running pods", 'sum(kube_pod_status_phase{phase="Running"})', None, None, None),
|
||||
(
|
||||
2,
|
||||
"Ready workers",
|
||||
"Workers ready",
|
||||
f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})',
|
||||
WORKER_SUFFIX,
|
||||
WORKER_TOTAL,
|
||||
@ -480,6 +477,7 @@ def build_overview():
|
||||
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
|
||||
text_mode="value_and_name",
|
||||
legend="{{node}}",
|
||||
instant=True,
|
||||
links=link_to("atlas-nodes"),
|
||||
)
|
||||
)
|
||||
@ -1016,6 +1014,7 @@ def build_network_dashboard():
|
||||
{"h": 4, "w": 8, "x": 16, "y": 0},
|
||||
unit="req/s",
|
||||
legend="{{router}}",
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
|
||||
@ -142,7 +142,8 @@
|
||||
{
|
||||
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{router}}"
|
||||
"legendFormat": "{{router}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
|
||||
@ -70,7 +70,7 @@
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Ready workers",
|
||||
"title": "Workers ready",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -440,7 +440,8 @@
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"legendFormat": "{{node}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -512,7 +513,8 @@
|
||||
{
|
||||
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"legendFormat": "{{node}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -584,7 +586,8 @@
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"legendFormat": "{{node}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -652,7 +655,8 @@
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"legendFormat": "{{node}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -719,13 +723,13 @@
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))",
|
||||
"refId": "A"
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"displayName": "{{namespace}}"
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -761,13 +765,13 @@
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))",
|
||||
"refId": "A"
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"displayName": "{{namespace}}"
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
|
||||
@ -151,7 +151,8 @@ data:
|
||||
{
|
||||
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{router}}"
|
||||
"legendFormat": "{{router}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
|
||||
@ -79,7 +79,7 @@ data:
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Ready workers",
|
||||
"title": "Workers ready",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -449,7 +449,8 @@ data:
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"legendFormat": "{{node}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -521,7 +522,8 @@ data:
|
||||
{
|
||||
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"legendFormat": "{{node}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -593,7 +595,8 @@ data:
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"legendFormat": "{{node}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -661,7 +664,8 @@ data:
|
||||
{
|
||||
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
"legendFormat": "{{node}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -728,13 +732,13 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))",
|
||||
"refId": "A"
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"displayName": "{{namespace}}"
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
@ -770,13 +774,13 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))",
|
||||
"refId": "A"
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"displayName": "{{namespace}}"
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
|
||||
@ -209,6 +209,16 @@ spec:
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app_kubernetes_io_part_of]
|
||||
regex: flux-system;flux
|
||||
- job_name: "titan-db"
|
||||
static_configs:
|
||||
- targets: ["titan-db:9100"]
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: instance
|
||||
metric_relabel_configs:
|
||||
- source_labels: [instance]
|
||||
target_label: node
|
||||
replacement: titan-db
|
||||
|
||||
---
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user