titan-iac/services/monitoring/scripts/jetson_tegrastats_exporter.py

92 lines
2.7 KiB
Python

import http.server
import os
import re
import socketserver
import subprocess
import threading
from time import sleep, time
PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100"))
NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename
METRICS = {
"gr3d_freq_percent": 0.0,
"gpu_temp_c": 0.0,
"cpu_temp_c": 0.0,
"ram_used_mb": 0.0,
"ram_total_mb": 0.0,
"power_5v_in_mw": 0.0,
"last_scrape_ts": 0.0,
}
LOCK = threading.Lock()
def parse_line(line: str):
line = line.strip()
updates = {}
m = re.search(r"GR3D_FREQ\\s+(\\d+)%", line)
if m:
updates["gr3d_freq_percent"] = float(m.group(1))
m = re.search(r"GPU@(\\d+(?:\\.\\d+)?)C", line)
if m:
updates["gpu_temp_c"] = float(m.group(1))
m = re.search(r"CPU@(\\d+(?:\\.\\d+)?)C", line)
if m:
updates["cpu_temp_c"] = float(m.group(1))
m = re.search(r"RAM\\s+(\\d+)/(\\d+)MB", line)
if m:
updates["ram_used_mb"] = float(m.group(1))
updates["ram_total_mb"] = float(m.group(2))
m = re.search(r"(?:POM_5V_IN|VDD_IN)\\s+(\\d+)/(\\d+)", line)
if m:
updates["power_5v_in_mw"] = float(m.group(1))
with LOCK:
METRICS.update(updates)
METRICS["last_scrape_ts"] = time()
def run_tegrastats():
logfile = "/tmp/tegrastats.log"
subprocess.Popen(
["/host/usr/bin/tegrastats", "--interval", "1000", "--logfile", logfile],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
text=True,
)
while not os.path.exists(logfile):
sleep(0.1)
with open(logfile, "r", encoding="utf-8", errors="ignore") as handle:
handle.seek(0, os.SEEK_END)
while True:
line = handle.readline()
if not line:
sleep(0.2)
continue
parse_line(line)
class Handler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
if self.path != "/metrics":
self.send_response(404)
self.end_headers()
return
with LOCK:
metrics = METRICS.copy()
out = []
label = f'{{node="{NODE_NAME}"}}'
for k, v in metrics.items():
out.append(f"# TYPE jetson_{k} gauge")
out.append(f"jetson_{k}{label} {v}")
body = "\n".join(out) + "\n"
self.send_response(200)
self.send_header("Content-Type", "text/plain; version=0.0.4")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body.encode("utf-8"))
def log_message(self, fmt, *args):
return
if __name__ == "__main__":
t = threading.Thread(target=run_tegrastats, daemon=True)
t.start()
with socketserver.TCPServer(("", PORT), Handler) as httpd:
httpd.serve_forever()