monitoring: refresh jetson stats on scrape

This commit is contained in:
Brad Stein 2026-01-27 16:23:23 -03:00
parent 62a423f32c
commit 1951291090
2 changed files with 22 additions and 17 deletions

View File

@ -17,7 +17,7 @@ spec:
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9100"
monitoring.bstein.dev/restart-rev: "2"
monitoring.bstein.dev/restart-rev: "3"
spec:
serviceAccountName: default
hostPID: true

View File

@ -4,10 +4,11 @@ import re
import socketserver
import subprocess
import threading
from time import sleep, time
from time import time
PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100"))
NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename
LOGFILE = "/tmp/tegrastats.log"
METRICS = {
"gr3d_freq_percent": 0.0,
"gpu_temp_c": 0.0,
@ -42,24 +43,28 @@ def parse_line(line: str):
METRICS.update(updates)
METRICS["last_scrape_ts"] = time()
def run_tegrastats():
logfile = "/tmp/tegrastats.log"
def start_tegrastats():
subprocess.Popen(
["/host/usr/bin/tegrastats", "--interval", "1000", "--logfile", logfile],
["/host/usr/bin/tegrastats", "--interval", "1000", "--logfile", LOGFILE],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
text=True,
)
while not os.path.exists(logfile):
sleep(0.1)
with open(logfile, "r", encoding="utf-8", errors="ignore") as handle:
handle.seek(0, os.SEEK_END)
while True:
line = handle.readline()
if not line:
sleep(0.2)
continue
parse_line(line)
def refresh_from_log():
if not os.path.exists(LOGFILE):
return
try:
with open(LOGFILE, "rb") as handle:
handle.seek(0, os.SEEK_END)
size = handle.tell()
handle.seek(max(size - 4096, 0), os.SEEK_SET)
tail = handle.read().decode("utf-8", errors="ignore").splitlines()
if tail:
parse_line(tail[-1])
except OSError:
return
class Handler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
@ -67,6 +72,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_response(404)
self.end_headers()
return
refresh_from_log()
with LOCK:
metrics = METRICS.copy()
out = []
@ -85,7 +91,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
return
if __name__ == "__main__":
t = threading.Thread(target=run_tegrastats, daemon=True)
t.start()
start_tegrastats()
with socketserver.TCPServer(("", PORT), Handler) as httpd:
httpd.serve_forever()