lesavka/scripts/manual/client_rct_upstream_sync_sampler.py

124 lines
3.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""Poll server upstream-sync state while a client-to-RCT probe is active."""
from __future__ import annotations
import json
import os
import pathlib
import subprocess
import sys
import time
def parse_relayctl_fields(text: str) -> dict:
"""Parse relayctl key/value output into a dictionary.
Inputs: text from `lesavka-relayctl upstream-sync`.
Outputs: field names mapped to string values.
Why: the sampler should preserve raw operator output while also making
timing fields easy for follow-up scripts to query with `jq`.
"""
fields: dict[str, str] = {}
for line in text.splitlines():
if "=" not in line:
continue
key, value = line.split("=", 1)
fields[key.strip()] = value.strip()
return fields
def process_alive(pid: int) -> bool:
"""Return whether a local probe process still exists.
Inputs: process id to monitor.
Outputs: true while the process can be signaled with zero.
Why: the harness needs a passwordless background sampler that exits
naturally when the active probe finishes.
"""
try:
os.kill(pid, 0)
except OSError:
return False
return True
def sample_until_probe_exits(
relayctl: str,
server: str,
tls_domain: str,
probe_pid: int,
interval_s: float,
jsonl_path: pathlib.Path,
text_path: pathlib.Path,
) -> int:
"""Write upstream-sync samples while the probe process is alive.
Inputs: relayctl path, server address, TLS domain, probe pid, interval, and
output paths.
Outputs: process exit code only; sample artifacts are written to disk.
Why: if the RCT black-box result fails, these samples show whether the
server planner saw stale client queues, late presentation, or healthy ingress.
"""
jsonl_path.parent.mkdir(parents=True, exist_ok=True)
text_path.parent.mkdir(parents=True, exist_ok=True)
with jsonl_path.open("w", encoding="utf-8") as jsonl, text_path.open(
"w", encoding="utf-8"
) as text:
while process_alive(probe_pid):
sample_ns = time.time_ns()
env = os.environ.copy()
env["LESAVKA_TLS_DOMAIN"] = tls_domain
result = subprocess.run(
[relayctl, "--server", server, "upstream-sync"],
text=True,
capture_output=True,
env=env,
check=False,
)
raw = result.stdout.strip()
if result.stderr.strip():
raw = raw + ("\n" if raw else "") + result.stderr.strip()
row = {
"schema": "lesavka.client-rct-upstream-sync-sample.v1",
"sample_unix_ns": sample_ns,
"ok": result.returncode == 0,
"returncode": result.returncode,
"fields": parse_relayctl_fields(raw),
"raw": raw,
}
jsonl.write(json.dumps(row, sort_keys=True) + "\n")
jsonl.flush()
text.write(f"--- sample_unix_ns={sample_ns} ok={row['ok']} ---\n{raw}\n")
text.flush()
time.sleep(interval_s)
return 0
def main() -> int:
"""CLI entrypoint for the upstream-sync sampler."""
if len(sys.argv) != 8:
print(
"usage: client_rct_upstream_sync_sampler.py RELAYCTL SERVER TLS_DOMAIN PROBE_PID INTERVAL_SECONDS JSONL_OUT TXT_OUT",
file=sys.stderr,
)
return 2
relayctl, server, tls_domain, pid_raw, interval_raw, jsonl_out, txt_out = sys.argv[1:]
return sample_until_probe_exits(
relayctl,
server,
tls_domain,
int(pid_raw),
float(interval_raw),
pathlib.Path(jsonl_out),
pathlib.Path(txt_out),
)
if __name__ == "__main__":
raise SystemExit(main())