titan-iac/services/monitoring/scripts/jetson_tegrastats_exporter.py

88 lines
2.6 KiB
Python
Raw Normal View History

2026-01-13 09:59:39 -03:00
import http.server
import os
import re
import socketserver
import subprocess
from time import time
2026-01-13 09:59:39 -03:00
PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100"))
2026-01-26 22:26:24 -03:00
NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename
BASE_METRICS = {
2026-01-13 09:59:39 -03:00
"gr3d_freq_percent": 0.0,
"gpu_temp_c": 0.0,
"cpu_temp_c": 0.0,
"ram_used_mb": 0.0,
"ram_total_mb": 0.0,
"power_5v_in_mw": 0.0,
"last_scrape_ts": 0.0,
}
def parse_line(line: str) -> dict:
2026-01-27 16:19:30 -03:00
line = line.strip()
2026-01-13 09:59:39 -03:00
updates = {}
m = re.search(r"GR3D_FREQ\\s+(\\d+)%", line)
if m:
updates["gr3d_freq_percent"] = float(m.group(1))
m = re.search(r"GPU@(\\d+(?:\\.\\d+)?)C", line)
if m:
updates["gpu_temp_c"] = float(m.group(1))
m = re.search(r"CPU@(\\d+(?:\\.\\d+)?)C", line)
if m:
updates["cpu_temp_c"] = float(m.group(1))
m = re.search(r"RAM\\s+(\\d+)/(\\d+)MB", line)
if m:
updates["ram_used_mb"] = float(m.group(1))
updates["ram_total_mb"] = float(m.group(2))
2026-01-27 16:19:30 -03:00
m = re.search(r"(?:POM_5V_IN|VDD_IN)\\s+(\\d+)/(\\d+)", line)
2026-01-13 09:59:39 -03:00
if m:
updates["power_5v_in_mw"] = float(m.group(1))
return updates
2026-01-13 09:59:39 -03:00
def read_latest_line() -> str:
try:
2026-01-27 16:34:31 -03:00
proc = subprocess.Popen(
["/host/usr/bin/tegrastats", "--interval", "1000"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
line = proc.stdout.readline()
proc.terminate()
try:
proc.wait(timeout=1)
except subprocess.TimeoutExpired:
proc.kill()
return line
except OSError:
return ""
2026-01-13 09:59:39 -03:00
class Handler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
if self.path != "/metrics":
self.send_response(404)
self.end_headers()
return
metrics = BASE_METRICS.copy()
line = read_latest_line()
if line:
metrics.update(parse_line(line))
metrics["last_scrape_ts"] = time()
2026-01-13 09:59:39 -03:00
out = []
2026-01-26 22:26:24 -03:00
label = f'{{node="{NODE_NAME}"}}'
2026-01-13 09:59:39 -03:00
for k, v in metrics.items():
out.append(f"# TYPE jetson_{k} gauge")
2026-01-26 22:26:24 -03:00
out.append(f"jetson_{k}{label} {v}")
body = "\n".join(out) + "\n"
2026-01-13 09:59:39 -03:00
self.send_response(200)
self.send_header("Content-Type", "text/plain; version=0.0.4")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body.encode("utf-8"))
def log_message(self, fmt, *args):
return
if __name__ == "__main__":
with socketserver.TCPServer(("", PORT), Handler) as httpd:
httpd.serve_forever()