monitoring: refresh jetson stats on scrape

This commit is contained in:
Brad Stein 2026-01-27 16:23:23 -03:00
parent 62a423f32c
commit 1951291090
2 changed files with 22 additions and 17 deletions

View File

@ -17,7 +17,7 @@ spec:
annotations: annotations:
prometheus.io/scrape: "true" prometheus.io/scrape: "true"
prometheus.io/port: "9100" prometheus.io/port: "9100"
monitoring.bstein.dev/restart-rev: "2" monitoring.bstein.dev/restart-rev: "3"
spec: spec:
serviceAccountName: default serviceAccountName: default
hostPID: true hostPID: true

View File

@ -4,10 +4,11 @@ import re
import socketserver import socketserver
import subprocess import subprocess
import threading import threading
from time import sleep, time from time import time
PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100")) PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100"))
NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename
LOGFILE = "/tmp/tegrastats.log"
METRICS = { METRICS = {
"gr3d_freq_percent": 0.0, "gr3d_freq_percent": 0.0,
"gpu_temp_c": 0.0, "gpu_temp_c": 0.0,
@ -42,24 +43,28 @@ def parse_line(line: str):
METRICS.update(updates) METRICS.update(updates)
METRICS["last_scrape_ts"] = time() METRICS["last_scrape_ts"] = time()
def run_tegrastats(): def start_tegrastats():
logfile = "/tmp/tegrastats.log"
subprocess.Popen( subprocess.Popen(
["/host/usr/bin/tegrastats", "--interval", "1000", "--logfile", logfile], ["/host/usr/bin/tegrastats", "--interval", "1000", "--logfile", LOGFILE],
stdout=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
text=True, text=True,
) )
while not os.path.exists(logfile):
sleep(0.1)
with open(logfile, "r", encoding="utf-8", errors="ignore") as handle: def refresh_from_log():
handle.seek(0, os.SEEK_END) if not os.path.exists(LOGFILE):
while True: return
line = handle.readline() try:
if not line: with open(LOGFILE, "rb") as handle:
sleep(0.2) handle.seek(0, os.SEEK_END)
continue size = handle.tell()
parse_line(line) handle.seek(max(size - 4096, 0), os.SEEK_SET)
tail = handle.read().decode("utf-8", errors="ignore").splitlines()
if tail:
parse_line(tail[-1])
except OSError:
return
class Handler(http.server.BaseHTTPRequestHandler): class Handler(http.server.BaseHTTPRequestHandler):
def do_GET(self): def do_GET(self):
@ -67,6 +72,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_response(404) self.send_response(404)
self.end_headers() self.end_headers()
return return
refresh_from_log()
with LOCK: with LOCK:
metrics = METRICS.copy() metrics = METRICS.copy()
out = [] out = []
@ -85,7 +91,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
return return
if __name__ == "__main__": if __name__ == "__main__":
t = threading.Thread(target=run_tegrastats, daemon=True) start_tegrastats()
t.start()
with socketserver.TCPServer(("", PORT), Handler) as httpd: with socketserver.TCPServer(("", PORT), Handler) as httpd:
httpd.serve_forever() httpd.serve_forever()