monitoring: read jetson stats on demand

This commit is contained in:
Brad Stein 2026-01-27 16:27:45 -03:00
parent 1951291090
commit 246ed6617e
2 changed files with 13 additions and 16 deletions

View File

@ -17,7 +17,7 @@ spec:
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9100"
monitoring.bstein.dev/restart-rev: "3"
monitoring.bstein.dev/restart-rev: "4"
spec:
serviceAccountName: default
hostPID: true

View File

@ -3,13 +3,12 @@ import os
import re
import socketserver
import subprocess
import threading
from time import time
PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100"))
NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename
LOGFILE = "/tmp/tegrastats.log"
METRICS = {
BASE_METRICS = {
"gr3d_freq_percent": 0.0,
"gpu_temp_c": 0.0,
"cpu_temp_c": 0.0,
@ -18,9 +17,8 @@ METRICS = {
"power_5v_in_mw": 0.0,
"last_scrape_ts": 0.0,
}
LOCK = threading.Lock()
def parse_line(line: str):
def parse_line(line: str) -> dict:
line = line.strip()
updates = {}
m = re.search(r"GR3D_FREQ\\s+(\\d+)%", line)
@ -39,9 +37,7 @@ def parse_line(line: str):
m = re.search(r"(?:POM_5V_IN|VDD_IN)\\s+(\\d+)/(\\d+)", line)
if m:
updates["power_5v_in_mw"] = float(m.group(1))
with LOCK:
METRICS.update(updates)
METRICS["last_scrape_ts"] = time()
return updates
def start_tegrastats():
subprocess.Popen(
@ -52,19 +48,18 @@ def start_tegrastats():
)
def refresh_from_log():
def read_latest_line() -> str:
if not os.path.exists(LOGFILE):
return
return ""
try:
with open(LOGFILE, "rb") as handle:
handle.seek(0, os.SEEK_END)
size = handle.tell()
handle.seek(max(size - 4096, 0), os.SEEK_SET)
tail = handle.read().decode("utf-8", errors="ignore").splitlines()
if tail:
parse_line(tail[-1])
return tail[-1] if tail else ""
except OSError:
return
return ""
class Handler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
@ -72,9 +67,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
self.send_response(404)
self.end_headers()
return
refresh_from_log()
with LOCK:
metrics = METRICS.copy()
metrics = BASE_METRICS.copy()
line = read_latest_line()
if line:
metrics.update(parse_line(line))
metrics["last_scrape_ts"] = time()
out = []
label = f'{{node="{NODE_NAME}"}}'
for k, v in metrics.items():