titan-iac/services/monitoring/scripts/jetson_tegrastats_exporter.py

90 lines
2.7 KiB
Python

import http.server
import os
import re
import socketserver
import subprocess
from time import time
PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100"))
NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename
BASE_METRICS = {
"gr3d_freq_percent": 0.0,
"gpu_temp_c": 0.0,
"cpu_temp_c": 0.0,
"ram_used_mb": 0.0,
"ram_total_mb": 0.0,
"power_5v_in_mw": 0.0,
"log_line_len": 0.0,
"last_scrape_ts": 0.0,
}
def parse_line(line: str) -> dict:
line = line.strip()
updates = {}
m = re.search(r"GR3D_FREQ\s+(\d+)%", line)
if m:
updates["gr3d_freq_percent"] = float(m.group(1))
m = re.search(r"GPU@(\d+(?:\.\d+)?)C", line)
if m:
updates["gpu_temp_c"] = float(m.group(1))
m = re.search(r"CPU@(\d+(?:\.\d+)?)C", line)
if m:
updates["cpu_temp_c"] = float(m.group(1))
m = re.search(r"RAM\s+(\d+)/(\d+)MB", line)
if m:
updates["ram_used_mb"] = float(m.group(1))
updates["ram_total_mb"] = float(m.group(2))
m = re.search(r"(?:POM_5V_IN|VDD_IN)\s+(\d+)(?:mW)?/(\d+)(?:mW)?", line)
if m:
updates["power_5v_in_mw"] = float(m.group(1))
return updates
def read_latest_line() -> str:
try:
proc = subprocess.Popen(
["/host/usr/bin/tegrastats", "--interval", "1000"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
line = proc.stdout.readline()
proc.terminate()
try:
proc.wait(timeout=1)
except subprocess.TimeoutExpired:
proc.kill()
return line
except OSError:
return ""
class Handler(http.server.BaseHTTPRequestHandler):
def do_GET(self):
if self.path != "/metrics":
self.send_response(404)
self.end_headers()
return
metrics = BASE_METRICS.copy()
line = read_latest_line()
if line:
metrics.update(parse_line(line))
metrics["log_line_len"] = float(len(line))
metrics["last_scrape_ts"] = time()
out = []
label = f'{{node="{NODE_NAME}"}}'
for k, v in metrics.items():
out.append(f"# TYPE jetson_{k} gauge")
out.append(f"jetson_{k}{label} {v}")
body = "\n".join(out) + "\n"
self.send_response(200)
self.send_header("Content-Type", "text/plain; version=0.0.4")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body.encode("utf-8"))
def log_message(self, fmt, *args):
return
if __name__ == "__main__":
with socketserver.TCPServer(("", PORT), Handler) as httpd:
httpd.serve_forever()