# services/monitoring/jetson-tegrastats-exporter.yaml apiVersion: apps/v1 kind: DaemonSet metadata: name: jetson-tegrastats-exporter namespace: monitoring labels: app: jetson-tegrastats-exporter spec: selector: matchLabels: app: jetson-tegrastats-exporter template: metadata: labels: app: jetson-tegrastats-exporter annotations: prometheus.io/scrape: "true" prometheus.io/port: "9100" spec: serviceAccountName: default hostPID: true tolerations: - operator: Exists nodeSelector: jetson: "true" containers: - name: exporter # Exposes tegrastats output as Prometheus metrics for Jetson devices. image: python:3.10-slim imagePullPolicy: IfNotPresent securityContext: privileged: true ports: - name: metrics containerPort: 9100 resources: requests: cpu: 50m memory: 64Mi limits: cpu: 200m memory: 256Mi env: - name: JETSON_EXPORTER_PORT value: "9100" volumeMounts: - name: script mountPath: /etc/tegrastats-exporter readOnly: true - name: tegrastats-bin mountPath: /host/usr/bin/tegrastats readOnly: true command: - python - /etc/tegrastats-exporter/exporter.py volumes: - name: script configMap: name: jetson-tegrastats-exporter-script defaultMode: 0555 - name: tegrastats-bin hostPath: path: /usr/bin/tegrastats type: File --- apiVersion: v1 kind: Service metadata: name: jetson-tegrastats-exporter namespace: monitoring labels: app: jetson-tegrastats-exporter spec: selector: app: jetson-tegrastats-exporter ports: - name: metrics port: 9100 targetPort: metrics --- apiVersion: v1 kind: ConfigMap metadata: name: jetson-tegrastats-exporter-script namespace: monitoring data: exporter.py: | import http.server import os import re import socketserver import subprocess import threading from time import time PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100")) METRICS = { "gr3d_freq_percent": 0.0, "gpu_temp_c": 0.0, "cpu_temp_c": 0.0, "ram_used_mb": 0.0, "ram_total_mb": 0.0, "power_5v_in_mw": 0.0, "last_scrape_ts": 0.0, } LOCK = threading.Lock() def parse_line(line: str): updates = {} m = re.search(r"GR3D_FREQ\\s+(\\d+)%", line) if m: updates["gr3d_freq_percent"] = float(m.group(1)) m = re.search(r"GPU@(\\d+(?:\\.\\d+)?)C", line) if m: updates["gpu_temp_c"] = float(m.group(1)) m = re.search(r"CPU@(\\d+(?:\\.\\d+)?)C", line) if m: updates["cpu_temp_c"] = float(m.group(1)) m = re.search(r"RAM\\s+(\\d+)/(\\d+)MB", line) if m: updates["ram_used_mb"] = float(m.group(1)) updates["ram_total_mb"] = float(m.group(2)) m = re.search(r"POM_5V_IN\\s+(\\d+)/(\\d+)", line) if m: updates["power_5v_in_mw"] = float(m.group(1)) with LOCK: METRICS.update(updates) METRICS["last_scrape_ts"] = time() def run_tegrastats(): proc = subprocess.Popen( ["/host/usr/bin/tegrastats", "--interval", "1000"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, ) for line in proc.stdout: parse_line(line) class Handler(http.server.BaseHTTPRequestHandler): def do_GET(self): if self.path != "/metrics": self.send_response(404) self.end_headers() return with LOCK: metrics = METRICS.copy() out = [] for k, v in metrics.items(): out.append(f"# TYPE jetson_{k} gauge") out.append(f"jetson_{k} {v}") body = "\\n".join(out) + "\\n" self.send_response(200) self.send_header("Content-Type", "text/plain; version=0.0.4") self.send_header("Content-Length", str(len(body))) self.end_headers() self.wfile.write(body.encode("utf-8")) def log_message(self, fmt, *args): return if __name__ == "__main__": t = threading.Thread(target=run_tegrastats, daemon=True) t.start() with socketserver.TCPServer(("", PORT), Handler) as httpd: httpd.serve_forever()