monitoring: read jetson stats on demand
This commit is contained in:
parent
1951291090
commit
246ed6617e
@ -17,7 +17,7 @@ spec:
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9100"
|
||||
monitoring.bstein.dev/restart-rev: "3"
|
||||
monitoring.bstein.dev/restart-rev: "4"
|
||||
spec:
|
||||
serviceAccountName: default
|
||||
hostPID: true
|
||||
|
||||
@ -3,13 +3,12 @@ import os
|
||||
import re
|
||||
import socketserver
|
||||
import subprocess
|
||||
import threading
|
||||
from time import time
|
||||
|
||||
PORT = int(os.environ.get("JETSON_EXPORTER_PORT", "9100"))
|
||||
NODE_NAME = os.environ.get("NODE_NAME") or os.uname().nodename
|
||||
LOGFILE = "/tmp/tegrastats.log"
|
||||
METRICS = {
|
||||
BASE_METRICS = {
|
||||
"gr3d_freq_percent": 0.0,
|
||||
"gpu_temp_c": 0.0,
|
||||
"cpu_temp_c": 0.0,
|
||||
@ -18,9 +17,8 @@ METRICS = {
|
||||
"power_5v_in_mw": 0.0,
|
||||
"last_scrape_ts": 0.0,
|
||||
}
|
||||
LOCK = threading.Lock()
|
||||
|
||||
def parse_line(line: str):
|
||||
def parse_line(line: str) -> dict:
|
||||
line = line.strip()
|
||||
updates = {}
|
||||
m = re.search(r"GR3D_FREQ\\s+(\\d+)%", line)
|
||||
@ -39,9 +37,7 @@ def parse_line(line: str):
|
||||
m = re.search(r"(?:POM_5V_IN|VDD_IN)\\s+(\\d+)/(\\d+)", line)
|
||||
if m:
|
||||
updates["power_5v_in_mw"] = float(m.group(1))
|
||||
with LOCK:
|
||||
METRICS.update(updates)
|
||||
METRICS["last_scrape_ts"] = time()
|
||||
return updates
|
||||
|
||||
def start_tegrastats():
|
||||
subprocess.Popen(
|
||||
@ -52,19 +48,18 @@ def start_tegrastats():
|
||||
)
|
||||
|
||||
|
||||
def refresh_from_log():
|
||||
def read_latest_line() -> str:
|
||||
if not os.path.exists(LOGFILE):
|
||||
return
|
||||
return ""
|
||||
try:
|
||||
with open(LOGFILE, "rb") as handle:
|
||||
handle.seek(0, os.SEEK_END)
|
||||
size = handle.tell()
|
||||
handle.seek(max(size - 4096, 0), os.SEEK_SET)
|
||||
tail = handle.read().decode("utf-8", errors="ignore").splitlines()
|
||||
if tail:
|
||||
parse_line(tail[-1])
|
||||
return tail[-1] if tail else ""
|
||||
except OSError:
|
||||
return
|
||||
return ""
|
||||
|
||||
class Handler(http.server.BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
@ -72,9 +67,11 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
return
|
||||
refresh_from_log()
|
||||
with LOCK:
|
||||
metrics = METRICS.copy()
|
||||
metrics = BASE_METRICS.copy()
|
||||
line = read_latest_line()
|
||||
if line:
|
||||
metrics.update(parse_line(line))
|
||||
metrics["last_scrape_ts"] = time()
|
||||
out = []
|
||||
label = f'{{node="{NODE_NAME}"}}'
|
||||
for k, v in metrics.items():
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user