monitoring: read tegrastats per scrape

This commit is contained in:
Brad Stein 2026-01-27 16:34:31 -03:00
parent 246ed6617e
commit a7f3d49fea
2 changed files with 14 additions and 20 deletions

View File

@ -17,7 +17,7 @@ spec:
annotations: annotations:
prometheus.io/scrape: "true" prometheus.io/scrape: "true"
prometheus.io/port: "9100" prometheus.io/port: "9100"
monitoring.bstein.dev/restart-rev: "4" monitoring.bstein.dev/restart-rev: "5"
spec: spec:
serviceAccountName: default serviceAccountName: default
hostPID: true hostPID: true

View File

@ -7,7 +7,6 @@ from time import time
PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100")) PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100"))
NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename
LOGFILE = "/tmp/tegrastats.log"
BASE_METRICS = { BASE_METRICS = {
"gr3d_freq_percent": 0.0, "gr3d_freq_percent": 0.0,
"gpu_temp_c": 0.0, "gpu_temp_c": 0.0,
@ -39,25 +38,21 @@ def parse_line(line: str) -> dict:
updates["power_5v_in_mw"] = float(m.group(1)) updates["power_5v_in_mw"] = float(m.group(1))
return updates return updates
def start_tegrastats():
subprocess.Popen(
["/host/usr/bin/tegrastats", "--interval", "1000", "--logfile", LOGFILE],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
text=True,
)
def read_latest_line() -> str: def read_latest_line() -> str:
if not os.path.exists(LOGFILE):
return ""
try: try:
with open(LOGFILE, "rb") as handle: proc = subprocess.Popen(
handle.seek(0, os.SEEK_END) ["/host/usr/bin/tegrastats", "--interval", "1000"],
size = handle.tell() stdout=subprocess.PIPE,
handle.seek(max(size - 4096, 0), os.SEEK_SET) stderr=subprocess.STDOUT,
tail = handle.read().decode("utf-8", errors="ignore").splitlines() text=True,
return tail[-1] if tail else "" )
line = proc.stdout.readline()
proc.terminate()
try:
proc.wait(timeout=1)
except subprocess.TimeoutExpired:
proc.kill()
return line
except OSError: except OSError:
return "" return ""
@ -88,6 +83,5 @@ class Handler(http.server.BaseHTTPRequestHandler):
return return
if __name__ == "__main__": if __name__ == "__main__":
start_tegrastats()
with socketserver.TCPServer(("", PORT), Handler) as httpd: with socketserver.TCPServer(("", PORT), Handler) as httpd:
httpd.serve_forever() httpd.serve_forever()