monitoring: refresh jetson stats on scrape
This commit is contained in:
parent
62a423f32c
commit
1951291090
@ -17,7 +17,7 @@ spec:
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9100"
|
||||
monitoring.bstein.dev/restart-rev: "2"
|
||||
monitoring.bstein.dev/restart-rev: "3"
|
||||
spec:
|
||||
serviceAccountName: default
|
||||
hostPID: true
|
||||
|
||||
@ -4,10 +4,11 @@ import re
|
||||
import socketserver
|
||||
import subprocess
|
||||
import threading
|
||||
from time import sleep, time
|
||||
from time import time
|
||||
|
||||
PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100"))
|
||||
NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename
|
||||
LOGFILE = "/tmp/tegrastats.log"
|
||||
METRICS = {
|
||||
"gr3d_freq_percent": 0.0,
|
||||
"gpu_temp_c": 0.0,
|
||||
@ -42,24 +43,28 @@ def parse_line(line: str):
|
||||
METRICS.update(updates)
|
||||
METRICS["last_scrape_ts"] = time()
|
||||
|
||||
def run_tegrastats():
|
||||
logfile = "/tmp/tegrastats.log"
|
||||
def start_tegrastats():
|
||||
subprocess.Popen(
|
||||
["/host/usr/bin/tegrastats", "--interval", "1000", "--logfile", logfile],
|
||||
["/host/usr/bin/tegrastats", "--interval", "1000", "--logfile", LOGFILE],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
)
|
||||
while not os.path.exists(logfile):
|
||||
sleep(0.1)
|
||||
with open(logfile, "r", encoding="utf-8", errors="ignore") as handle:
|
||||
handle.seek(0, os.SEEK_END)
|
||||
while True:
|
||||
line = handle.readline()
|
||||
if not line:
|
||||
sleep(0.2)
|
||||
continue
|
||||
parse_line(line)
|
||||
|
||||
|
||||
def refresh_from_log():
|
||||
if not os.path.exists(LOGFILE):
|
||||
return
|
||||
try:
|
||||
with open(LOGFILE, "rb") as handle:
|
||||
handle.seek(0, os.SEEK_END)
|
||||
size = handle.tell()
|
||||
handle.seek(max(size - 4096, 0), os.SEEK_SET)
|
||||
tail = handle.read().decode("utf-8", errors="ignore").splitlines()
|
||||
if tail:
|
||||
parse_line(tail[-1])
|
||||
except OSError:
|
||||
return
|
||||
|
||||
class Handler(http.server.BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
@ -67,6 +72,7 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
return
|
||||
refresh_from_log()
|
||||
with LOCK:
|
||||
metrics = METRICS.copy()
|
||||
out = []
|
||||
@ -85,7 +91,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
return
|
||||
|
||||
if __name__ == "__main__":
|
||||
t = threading.Thread(target=run_tegrastats, daemon=True)
|
||||
t.start()
|
||||
start_tegrastats()
|
||||
with socketserver.TCPServer(("", PORT), Handler) as httpd:
|
||||
httpd.serve_forever()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user