# services/monitoring/vmalert-atlas-availability.yaml apiVersion: v1 kind: ConfigMap metadata: name: vmalert-atlas-availability-rules namespace: monitoring data: atlas-availability.yaml: | groups: - name: atlas.availability interval: 15m eval_offset: 7m rules: - record: atlas:availability:ratio_1h expr: | avg_over_time(( min( ( sum(kube_node_status_condition{condition="Ready",status="true",node=~"titan-0a|titan-0b|titan-0c"}) / 3 ), ( sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~"traefik|kube-system",deployment="traefik"}), 1) ) ) )[1h:5m]) labels: scope: atlas rollup: hourly - record: atlas:availability:ratio_365d expr: | avg_over_time(( min( ( sum(kube_node_status_condition{condition="Ready",status="true",node=~"titan-0a|titan-0b|titan-0c"}) / 3 ), ( sum(kube_deployment_status_replicas_available{namespace=~"traefik|kube-system",deployment="traefik"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~"traefik|kube-system",deployment="traefik"}), 1) ) ) )[365d:6h]) labels: scope: atlas rollup: yearly --- apiVersion: v1 kind: ServiceAccount metadata: name: vmalert-atlas-availability namespace: monitoring --- apiVersion: apps/v1 kind: Deployment metadata: name: vmalert-atlas-availability namespace: monitoring labels: app: vmalert-atlas-availability spec: replicas: 1 revisionHistoryLimit: 3 selector: matchLabels: app: vmalert-atlas-availability template: metadata: labels: app: vmalert-atlas-availability annotations: bstein.dev/rules-revision: "2026-05-10-availability-rollup-v2" spec: serviceAccountName: vmalert-atlas-availability affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: kubernetes.io/hostname operator: NotIn values: - titan-22 - titan-24 containers: - name: vmalert image: victoriametrics/vmalert:v1.113.0 args: - -datasource.url=http://victoria-metrics-single-server:8428 - -remoteWrite.url=http://victoria-metrics-single-server:8428 - -rule=/etc/vmalert/rules/*.yaml - -evaluationInterval=15m - -httpListenAddr=:8880 ports: - name: http containerPort: 8880 readinessProbe: tcpSocket: port: http initialDelaySeconds: 5 periodSeconds: 10 livenessProbe: tcpSocket: port: http initialDelaySeconds: 20 periodSeconds: 30 resources: requests: cpu: 25m memory: 64Mi limits: cpu: 500m memory: 256Mi volumeMounts: - name: rules mountPath: /etc/vmalert/rules readOnly: true volumes: - name: rules configMap: name: vmalert-atlas-availability-rules --- apiVersion: v1 kind: Service metadata: name: vmalert-atlas-availability namespace: monitoring annotations: prometheus.io/scrape: "true" prometheus.io/port: "8880" spec: selector: app: vmalert-atlas-availability ports: - name: http port: 8880 targetPort: http