From a14726350cff30dddc2727c5fa68bc35669df4cb Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 09:50:40 -0300 Subject: [PATCH] monitoring: add titan-jh control plane node --- scripts/dashboards_render_atlas.py | 2 +- services/monitoring/dashboards/atlas-nodes.json | 4 ++-- services/monitoring/dashboards/atlas-overview.json | 4 ++-- services/monitoring/dashboards/atlas-pods.json | 2 +- services/monitoring/grafana-dashboard-nodes.yaml | 4 ++-- services/monitoring/grafana-dashboard-overview.yaml | 4 ++-- services/monitoring/grafana-dashboard-pods.yaml | 2 +- services/monitoring/helmrelease.yaml | 9 +++++++++ 8 files changed, 20 insertions(+), 11 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 0dcd5dd..fa8f609 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -53,7 +53,7 @@ NAMESPACE_CPU_WINDOW = "1m" # --------------------------------------------------------------------------- CONTROL_PLANE_NODES = ["titan-0a", "titan-0b", "titan-0c"] -CONTROL_DEPENDENCIES = ["titan-db"] +CONTROL_DEPENDENCIES = ["titan-db", "titan-jh"] CONTROL_ALL = CONTROL_PLANE_NODES + CONTROL_DEPENDENCIES WORKER_NODES = [ "titan-04", diff --git a/services/monitoring/dashboards/atlas-nodes.json b/services/monitoring/dashboards/atlas-nodes.json index ff69739..9be3adb 100644 --- a/services/monitoring/dashboards/atlas-nodes.json +++ b/services/monitoring/dashboards/atlas-nodes.json @@ -489,7 +489,7 @@ }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -526,7 +526,7 @@ }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 957a6ed..7a9c73e 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1711,7 +1711,7 @@ }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -1748,7 +1748,7 @@ }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index b6d0be0..0d46f90 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -520,7 +520,7 @@ }, "targets": [ { - "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.022)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.022)))))", + "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)))))", "refId": "A", "instant": true, "format": "table" diff --git a/services/monitoring/grafana-dashboard-nodes.yaml b/services/monitoring/grafana-dashboard-nodes.yaml index 854f68a..1b87c60 100644 --- a/services/monitoring/grafana-dashboard-nodes.yaml +++ b/services/monitoring/grafana-dashboard-nodes.yaml @@ -498,7 +498,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -535,7 +535,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index cccde73..82ca78c 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1720,7 +1720,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } @@ -1757,7 +1757,7 @@ data: }, "targets": [ { - "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", + "expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")", "refId": "A", "legendFormat": "{{node}}" } diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index 7d02e22..dda1a41 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -529,7 +529,7 @@ data: }, "targets": [ { - "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.022)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.022)))))", + "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 0 + 0.001) or (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 0 + 0.002) or (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 0 + 0.003) or (sum by (node) (kube_node_info{node=\"titan-db\"}) * 0 + 0.004) or (sum by (node) (kube_node_info{node=\"titan-jh\"}) * 0 + 0.005) or (sum by (node) (kube_node_info{node=\"titan-04\"}) * 0 + 0.006) or (sum by (node) (kube_node_info{node=\"titan-05\"}) * 0 + 0.007) or (sum by (node) (kube_node_info{node=\"titan-06\"}) * 0 + 0.008) or (sum by (node) (kube_node_info{node=\"titan-07\"}) * 0 + 0.009000000000000001) or (sum by (node) (kube_node_info{node=\"titan-08\"}) * 0 + 0.01) or (sum by (node) (kube_node_info{node=\"titan-09\"}) * 0 + 0.011) or (sum by (node) (kube_node_info{node=\"titan-10\"}) * 0 + 0.012) or (sum by (node) (kube_node_info{node=\"titan-11\"}) * 0 + 0.013000000000000001) or (sum by (node) (kube_node_info{node=\"titan-12\"}) * 0 + 0.014) or (sum by (node) (kube_node_info{node=\"titan-13\"}) * 0 + 0.015) or (sum by (node) (kube_node_info{node=\"titan-14\"}) * 0 + 0.016) or (sum by (node) (kube_node_info{node=\"titan-15\"}) * 0 + 0.017) or (sum by (node) (kube_node_info{node=\"titan-16\"}) * 0 + 0.018000000000000002) or (sum by (node) (kube_node_info{node=\"titan-17\"}) * 0 + 0.019) or (sum by (node) (kube_node_info{node=\"titan-18\"}) * 0 + 0.02) or (sum by (node) (kube_node_info{node=\"titan-19\"}) * 0 + 0.021) or (sum by (node) (kube_node_info{node=\"titan-22\"}) * 0 + 0.022) or (sum by (node) (kube_node_info{node=\"titan-24\"}) * 0 + 0.023)))))", "refId": "A", "instant": true, "format": "table" diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index 40db9f4..ddd24e5 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -195,6 +195,15 @@ spec: target_label: instance replacement: titan-db + # --- titan-jh node_exporter (external control-plane host) --- + - job_name: "titan-jh" + static_configs: + - targets: ["192.168.22.8:9100"] + relabel_configs: + - source_labels: [__address__] + target_label: instance + replacement: titan-jh + # --- cert-manager (pods expose on 9402) --- - job_name: "cert-manager" kubernetes_sd_configs: [{ role: pod }]