From 19512910904b75a36e627377a2abdb190aa08ece Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 27 Jan 2026 16:23:23 -0300 Subject: [PATCH] monitoring: refresh jetson stats on scrape --- .../jetson-tegrastats-exporter.yaml | 2 +- .../scripts/jetson_tegrastats_exporter.py | 37 +++++++++++-------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/services/monitoring/jetson-tegrastats-exporter.yaml b/services/monitoring/jetson-tegrastats-exporter.yaml index 0074394..a6612c6 100644 --- a/services/monitoring/jetson-tegrastats-exporter.yaml +++ b/services/monitoring/jetson-tegrastats-exporter.yaml @@ -17,7 +17,7 @@ spec: annotations: prometheus.io/scrape: "true" prometheus.io/port: "9100" - monitoring.bstein.dev/restart-rev: "2" + monitoring.bstein.dev/restart-rev: "3" spec: serviceAccountName: default hostPID: true diff --git a/services/monitoring/scripts/jetson_tegrastats_exporter.py b/services/monitoring/scripts/jetson_tegrastats_exporter.py index 3858d96..4cbf6ca 100644 --- a/services/monitoring/scripts/jetson_tegrastats_exporter.py +++ b/services/monitoring/scripts/jetson_tegrastats_exporter.py @@ -4,10 +4,11 @@ import re import socketserver import subprocess import threading -from time import sleep, time +from time import time PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100")) NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename +LOGFILE = "/tmp/tegrastats.log" METRICS = { "gr3d_freq_percent": 0.0, "gpu_temp_c": 0.0, @@ -42,24 +43,28 @@ def parse_line(line: str): METRICS.update(updates) METRICS["last_scrape_ts"] = time() -def run_tegrastats(): - logfile = "/tmp/tegrastats.log" +def start_tegrastats(): subprocess.Popen( - ["/host/usr/bin/tegrastats", "--interval", "1000", "--logfile", logfile], + ["/host/usr/bin/tegrastats", "--interval", "1000", "--logfile", LOGFILE], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, text=True, ) - while not os.path.exists(logfile): - sleep(0.1) - with open(logfile, "r", encoding="utf-8", errors="ignore") as handle: - handle.seek(0, os.SEEK_END) - while True: - line = handle.readline() - if not line: - sleep(0.2) - continue - parse_line(line) + + +def refresh_from_log(): + if not os.path.exists(LOGFILE): + return + try: + with open(LOGFILE, "rb") as handle: + handle.seek(0, os.SEEK_END) + size = handle.tell() + handle.seek(max(size - 4096, 0), os.SEEK_SET) + tail = handle.read().decode("utf-8", errors="ignore").splitlines() + if tail: + parse_line(tail[-1]) + except OSError: + return class Handler(http.server.BaseHTTPRequestHandler): def do_GET(self): @@ -67,6 +72,7 @@ class Handler(http.server.BaseHTTPRequestHandler): self.send_response(404) self.end_headers() return + refresh_from_log() with LOCK: metrics = METRICS.copy() out = [] @@ -85,7 +91,6 @@ class Handler(http.server.BaseHTTPRequestHandler): return if __name__ == "__main__": - t = threading.Thread(target=run_tegrastats, daemon=True) - t.start() + start_tegrastats() with socketserver.TCPServer(("", PORT), Handler) as httpd: httpd.serve_forever()