From 246ed6617e8b44caf1adee7423b790fa93d47861 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 27 Jan 2026 16:27:45 -0300 Subject: [PATCH] monitoring: read jetson stats on demand --- .../jetson-tegrastats-exporter.yaml | 2 +- .../scripts/jetson_tegrastats_exporter.py | 27 +++++++++---------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/services/monitoring/jetson-tegrastats-exporter.yaml b/services/monitoring/jetson-tegrastats-exporter.yaml index a6612c6..d80d83e 100644 --- a/services/monitoring/jetson-tegrastats-exporter.yaml +++ b/services/monitoring/jetson-tegrastats-exporter.yaml @@ -17,7 +17,7 @@ spec: annotations: prometheus.io/scrape: "true" prometheus.io/port: "9100" - monitoring.bstein.dev/restart-rev: "3" + monitoring.bstein.dev/restart-rev: "4" spec: serviceAccountName: default hostPID: true diff --git a/services/monitoring/scripts/jetson_tegrastats_exporter.py b/services/monitoring/scripts/jetson_tegrastats_exporter.py index 4cbf6ca..204e439 100644 --- a/services/monitoring/scripts/jetson_tegrastats_exporter.py +++ b/services/monitoring/scripts/jetson_tegrastats_exporter.py @@ -3,13 +3,12 @@ import os import re import socketserver import subprocess -import threading from time import time PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100")) NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename LOGFILE = "/tmp/tegrastats.log" -METRICS = { +BASE_METRICS = { "gr3d_freq_percent": 0.0, "gpu_temp_c": 0.0, "cpu_temp_c": 0.0, @@ -18,9 +17,8 @@ METRICS = { "power_5v_in_mw": 0.0, "last_scrape_ts": 0.0, } -LOCK = threading.Lock() -def parse_line(line: str): +def parse_line(line: str) -> dict: line = line.strip() updates = {} m = re.search(r"GR3D_FREQ\\s+(\\d+)%", line) @@ -39,9 +37,7 @@ def parse_line(line: str): m = re.search(r"(?:POM_5V_IN|VDD_IN)\\s+(\\d+)/(\\d+)", line) if m: updates["power_5v_in_mw"] = float(m.group(1)) - with LOCK: - METRICS.update(updates) - METRICS["last_scrape_ts"] = time() + return updates def start_tegrastats(): subprocess.Popen( @@ -52,19 +48,18 @@ def start_tegrastats(): ) -def refresh_from_log(): +def read_latest_line() -> str: if not os.path.exists(LOGFILE): - return + return "" try: with open(LOGFILE, "rb") as handle: handle.seek(0, os.SEEK_END) size = handle.tell() handle.seek(max(size - 4096, 0), os.SEEK_SET) tail = handle.read().decode("utf-8", errors="ignore").splitlines() - if tail: - parse_line(tail[-1]) + return tail[-1] if tail else "" except OSError: - return + return "" class Handler(http.server.BaseHTTPRequestHandler): def do_GET(self): @@ -72,9 +67,11 @@ class Handler(http.server.BaseHTTPRequestHandler): self.send_response(404) self.end_headers() return - refresh_from_log() - with LOCK: - metrics = METRICS.copy() + metrics = BASE_METRICS.copy() + line = read_latest_line() + if line: + metrics.update(parse_line(line)) + metrics["last_scrape_ts"] = time() out = [] label = f'{{node="{NODE_NAME}"}}' for k, v in metrics.items():