212 lines
7.9 KiB
YAML
212 lines
7.9 KiB
YAML
# services/monitoring/kube-state-metrics-helmrelease.yaml
|
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
|
kind: HelmRelease
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: monitoring
|
|
spec:
|
|
interval: 15m
|
|
chart:
|
|
spec:
|
|
chart: kube-state-metrics
|
|
version: "~6.0.0"
|
|
sourceRef:
|
|
kind: HelmRepository
|
|
name: prometheus-community
|
|
namespace: flux-system
|
|
values:
|
|
prometheusScrape: true # annotates for /metrics auto-scrape. :contentReference[oaicite:16]{index=16}
|
|
service:
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "8080" # ksm serves metrics on 8080 by default
|
|
prometheus.io/path: "/metrics"
|
|
|
|
---
|
|
|
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
|
kind: HelmRelease
|
|
metadata:
|
|
name: node-exporter
|
|
namespace: monitoring
|
|
spec:
|
|
interval: 15m
|
|
chart:
|
|
spec:
|
|
chart: prometheus-node-exporter
|
|
version: "~4.0.0"
|
|
sourceRef:
|
|
kind: HelmRepository
|
|
name: prometheus-community
|
|
namespace: flux-system
|
|
values:
|
|
service:
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "9100"
|
|
|
|
---
|
|
|
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
|
kind: HelmRelease
|
|
metadata:
|
|
name: victoria-metrics-single
|
|
namespace: monitoring
|
|
spec:
|
|
interval: 15m
|
|
chart:
|
|
spec:
|
|
chart: victoria-metrics-single
|
|
version: "~0.15.0" # or omit to track appVersion
|
|
sourceRef:
|
|
kind: HelmRepository
|
|
name: victoria-metrics
|
|
namespace: flux-system
|
|
values:
|
|
server:
|
|
# keep ~3 months; change as you like (supports "d", "y")
|
|
extraArgs:
|
|
retentionPeriod: "90d" # VM flag -retentionPeriod=90d. :contentReference[oaicite:11]{index=11}
|
|
|
|
persistentVolume:
|
|
enabled: true
|
|
size: 100Gi # adjust; uses default StorageClass (Longhorn)
|
|
# storageClassName: "" # set if you want a specific class
|
|
|
|
# Enable built-in Kubernetes scraping
|
|
scrape:
|
|
enabled: true # chart enables promscrape. :contentReference[oaicite:12]{index=12}
|
|
config:
|
|
global:
|
|
scrape_interval: 15s
|
|
|
|
scrape_configs:
|
|
# VM self-metrics
|
|
- job_name: victoriametrics
|
|
static_configs:
|
|
- targets: ["localhost:8428"]
|
|
|
|
# --- K8s control-plane & nodes (from VM docs guide) ---
|
|
- job_name: "kubernetes-apiservers"
|
|
kubernetes_sd_configs: [{ role: endpoints }]
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
insecure_skip_verify: true
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
relabel_configs:
|
|
- action: keep
|
|
source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_service_name,__meta_kubernetes_endpoint_port_name]
|
|
regex: default;kubernetes;https
|
|
|
|
- job_name: "kubernetes-nodes"
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
insecure_skip_verify: true
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
kubernetes_sd_configs: [{ role: node }]
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- target_label: __address__
|
|
replacement: kubernetes.default.svc:443
|
|
- source_labels: [__meta_kubernetes_node_name]
|
|
regex: (.+)
|
|
target_label: __metrics_path__
|
|
replacement: /api/v1/nodes/$1/proxy/metrics
|
|
|
|
- job_name: "kubernetes-nodes-cadvisor"
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
insecure_skip_verify: true
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
kubernetes_sd_configs: [{ role: node }]
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- target_label: __address__
|
|
replacement: kubernetes.default.svc:443
|
|
- source_labels: [__meta_kubernetes_node_name]
|
|
regex: (.+)
|
|
target_label: __metrics_path__
|
|
replacement: /api/v1/nodes/$1/proxy/metrics/cadvisor
|
|
|
|
# --- Annotated Services (generic autodiscovery) ---
|
|
- job_name: "kubernetes-service-endpoints"
|
|
kubernetes_sd_configs: [{ role: endpoints }]
|
|
relabel_configs:
|
|
- action: keep
|
|
source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
|
regex: "true"
|
|
- action: replace
|
|
source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
|
regex: (https?)
|
|
target_label: __scheme__
|
|
- action: replace
|
|
source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
|
target_label: __metrics_path__
|
|
- action: replace
|
|
regex: (.+)(?::\d+);(\d+)
|
|
replacement: $1:$2
|
|
source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
|
target_label: __address__
|
|
|
|
# --- Annotated Pods (generic autodiscovery) ---
|
|
- job_name: "kubernetes-pods"
|
|
kubernetes_sd_configs: [{ role: pod }]
|
|
relabel_configs:
|
|
- action: keep
|
|
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
regex: "true"
|
|
- action: drop
|
|
source_labels: [__meta_kubernetes_pod_container_port_name]
|
|
regex: ".*health.*"
|
|
- action: replace
|
|
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
target_label: __metrics_path__
|
|
- action: replace
|
|
regex: (.+):(?:\d+);(\d+)
|
|
replacement: $1:$2
|
|
source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
|
target_label: __address__
|
|
|
|
# --- kube-state-metrics (via its Service) ---
|
|
- job_name: "kube-state-metrics"
|
|
kubernetes_sd_configs: [{ role: endpoints }]
|
|
relabel_configs:
|
|
- action: keep
|
|
source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name]
|
|
regex: kube-state-metrics
|
|
|
|
# --- Longhorn ---
|
|
- job_name: "longhorn-backend"
|
|
static_configs:
|
|
- targets: ["longhorn-backend.longhorn-system.svc:9500"]
|
|
metrics_path: /metrics
|
|
|
|
# --- cert-manager (pods expose on 9402) ---
|
|
- job_name: "cert-manager"
|
|
kubernetes_sd_configs: [{ role: pod }]
|
|
relabel_configs:
|
|
- action: keep
|
|
source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app_kubernetes_io_name]
|
|
regex: cert-manager;cert-manager
|
|
- action: drop
|
|
source_labels: [__meta_kubernetes_pod_container_port_name]
|
|
regex: ".*health.*"
|
|
- action: replace
|
|
source_labels: [__address__]
|
|
regex: "(.+):\\d+"
|
|
replacement: "$1:9402"
|
|
target_label: __address__
|
|
|
|
# --- Flux controllers (default :8080/metrics) ---
|
|
- job_name: "flux"
|
|
kubernetes_sd_configs: [{ role: pod }]
|
|
relabel_configs:
|
|
- action: keep
|
|
source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_pod_label_app_kubernetes_io_part_of]
|
|
regex: flux-system;flux
|