lesavka/scripts/manual/client_rct_timing_trace_summary.py

343 lines
14 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""Build a T0-T5 timing trace from a client-to-RCT probe artifact."""
from __future__ import annotations
import argparse
import json
import math
import pathlib
import statistics
from typing import Any
def load_json(path: pathlib.Path) -> dict[str, Any]:
"""Read a JSON object from `path`.
Inputs: an artifact path produced by the client-to-RCT harness.
Output: the parsed object. Why: the trace intentionally reuses the blind
probe artifacts instead of adding another media generator.
"""
return json.loads(path.read_text())
def load_jsonl(path: pathlib.Path) -> list[dict[str, Any]]:
"""Read valid JSON records from an optional JSONL artifact.
Inputs: a JSONL path. Output: parsed objects with malformed lines ignored.
Why: operational probe logs can be truncated when a run fails, but partial
timing evidence is still useful for locating the layer that drifted.
"""
if not path.exists():
return []
records: list[dict[str, Any]] = []
for line in path.read_text(errors="replace").splitlines():
if not line.strip():
continue
try:
records.append(json.loads(line))
except json.JSONDecodeError:
continue
return records
def percentile(values: list[float], q: float) -> float | None:
"""Return a nearest-rank percentile for finite values.
Inputs: numeric samples and a quantile. Output: the selected percentile or
`None`. Why: the trace needs the same conservative p95 language as the sync
and freshness gates.
"""
finite = sorted(value for value in values if math.isfinite(value))
if not finite:
return None
index = min(len(finite) - 1, max(0, math.ceil(len(finite) * q) - 1))
return finite[index]
def fmt_ms(value: float | None) -> str:
"""Format optional millisecond evidence for human reports."""
return f"{value:.1f}ms" if value is not None else "unavailable"
def capture_start_ns(capture_log: pathlib.Path) -> int | None:
"""Return the RCT recorder Unix start timestamp from the capture log.
Inputs: the recorder log. Output: Unix nanoseconds or `None`.
Why: converting capture-relative detections into client time lets us split
end-to-end freshness into pre-send and post-send portions.
"""
if not capture_log.exists():
return None
for line in capture_log.read_text(errors="replace").splitlines():
if line.startswith("capture_start_unix_ns="):
return int(line.split("=", 1)[1].strip())
return None
def as_float(value: Any) -> float | None:
"""Parse a finite float from relayctl fields."""
if value in (None, "pending"):
return None
try:
parsed = float(str(value).replace("+", ""))
except (TypeError, ValueError):
return None
return parsed if math.isfinite(parsed) else None
def summarize_upstream_samples(records: list[dict[str, Any]]) -> dict[str, Any] | None:
"""Summarize sampled server receive and sink timing.
Inputs: `upstream-sync-samples.jsonl` records. Output: p95 layer metrics.
Why: the current server telemetry is sampled instead of per-event; this
still tells us whether the failure is already visible at receive/sink time.
"""
buckets: dict[str, list[float]] = {
"camera_client_queue_age_ms": [],
"microphone_client_queue_age_ms": [],
"camera_server_receive_age_ms": [],
"microphone_server_receive_age_ms": [],
"camera_sink_late_ms": [],
"microphone_sink_late_ms": [],
"server_receive_abs_skew_ms": [],
"sink_handoff_abs_skew_ms": [],
}
live_samples = 0
for record in records:
fields = record.get("fields") or {}
if fields.get("planner_phase") == "live":
live_samples += 1
mapping = {
"planner_camera_client_queue_age_ms": "camera_client_queue_age_ms",
"planner_microphone_client_queue_age_ms": "microphone_client_queue_age_ms",
"planner_camera_server_receive_age_ms": "camera_server_receive_age_ms",
"planner_microphone_server_receive_age_ms": "microphone_server_receive_age_ms",
"planner_camera_sink_late_ms": "camera_sink_late_ms",
"planner_microphone_sink_late_ms": "microphone_sink_late_ms",
"planner_server_receive_abs_skew_p95_ms": "server_receive_abs_skew_ms",
"planner_sink_handoff_abs_skew_p95_ms": "sink_handoff_abs_skew_ms",
}
for source, target in mapping.items():
parsed = as_float(fields.get(source))
if parsed is not None:
buckets[target].append(parsed)
if live_samples == 0 and not any(buckets.values()):
return None
return {
"sample_count": live_samples,
**{f"{name}_p95": percentile(values, 0.95) for name, values in buckets.items()},
}
def event_send_records(
event: dict[str, Any],
send_records: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Return client send records whose video PTS falls inside an event window.
Inputs: a synthetic event and client bundle send records. Output: matching
bundles. Why: T1 should be the bundle send containing the coded video flash,
not merely the nearest arbitrary frame.
"""
start = int(event.get("planned_start_us") or -1)
end = int(event.get("planned_end_us") or -1)
if start < 0 or end <= start:
return []
return [
record
for record in send_records
if start <= int(record.get("video_capture_pts_us") or -1) < end
]
def build_trace(report_dir: pathlib.Path) -> dict[str, Any]:
"""Build the T0-T5 trace summary for one completed probe directory.
Inputs: a client-to-RCT artifact directory. Output: structured trace data.
Why: failed blind runs need enough layer evidence to decide whether to tune
client generation, transport/server ingress, HEVC decode/UVC handoff, or
final RCT capture before we touch production offsets.
"""
timeline = load_json(report_dir / "client-transport-timeline.json")
report = load_json(report_dir / "report.json")
transport = load_json(report_dir / "client-rct-transport-summary.json")
clock = load_json(report_dir / "clock-alignment.json")
send_records = load_jsonl(report_dir / "client-send-bundles.jsonl")
upstream_records = load_jsonl(report_dir / "upstream-sync-samples.jsonl")
capture_start = capture_start_ns(report_dir / "capture.log")
offset_ns = int(clock.get("capture_clock_offset_from_client_ns") or 0)
pairs = {
int(pair.get("server_event_id", pair.get("event_id", -1))): pair
for pair in report.get("paired_events", [])
}
summary_events = {
int(event.get("event_id", -1)): event
for event in transport.get("events", [])
}
rows: list[dict[str, Any]] = []
t0_t1_values: list[float] = []
t1_t5_video_values: list[float] = []
t1_t5_audio_values: list[float] = []
for event in timeline.get("events", []):
event_id = int(event.get("event_id", -1))
matches = event_send_records(event, send_records)
first_send = min(matches, key=lambda record: int(record["send_unix_ns"])) if matches else None
pair = pairs.get(event_id)
summary_event = summary_events.get(event_id)
t0_ns = int(event.get("client_capture_unix_ns") or 0)
t1_ns = int(first_send["send_unix_ns"]) if first_send else None
t0_t1_ms = ((t1_ns - t0_ns) / 1_000_000.0) if t1_ns and t0_ns else None
if t0_t1_ms is not None:
t0_t1_values.append(t0_t1_ms)
t1_t5_video_ms = None
t1_t5_audio_ms = None
if pair and t1_ns and capture_start is not None:
send_capture_s = (t1_ns + offset_ns - capture_start) / 1_000_000_000.0
t1_t5_video_ms = (float(pair["video_time_s"]) - send_capture_s) * 1000.0
t1_t5_audio_ms = (float(pair["audio_time_s"]) - send_capture_s) * 1000.0
t1_t5_video_values.append(t1_t5_video_ms)
t1_t5_audio_values.append(t1_t5_audio_ms)
rows.append(
{
"event_id": event_id,
"code": event.get("code"),
"t0_client_capture_unix_ns": t0_ns or None,
"t1_client_send_unix_ns": t1_ns,
"t0_to_t1_local_send_ms": t0_t1_ms,
"event_video_bundle_count": len(matches),
"t5_rct_video_s": pair.get("video_time_s") if pair else None,
"t5_rct_audio_s": pair.get("audio_time_s") if pair else None,
"t0_to_t5_video_ms": summary_event.get("video_age_ms") if summary_event else None,
"t0_to_t5_audio_ms": summary_event.get("audio_age_ms") if summary_event else None,
"t1_to_t5_video_ms": t1_t5_video_ms,
"t1_to_t5_audio_ms": t1_t5_audio_ms,
"sync_skew_ms": pair.get("skew_ms") if pair else None,
"paired": pair is not None,
}
)
uvc_summary_path = report_dir / "uvc-frame-meta-summary.json"
uvc_summary = load_json(uvc_summary_path) if uvc_summary_path.exists() else None
return {
"schema": "lesavka.client-rct-timing-trace.v1",
"report_dir": str(report_dir),
"verdict": "pass" if transport.get("passed") else "fail",
"sync_status": transport.get("sync_status"),
"sync_p95_abs_skew_ms": (report.get("verdict") or {}).get("p95_abs_skew_ms"),
"sync_median_skew_ms": report.get("median_skew_ms"),
"sync_drift_ms": report.get("drift_ms"),
"paired_event_count": transport.get("paired_event_count"),
"expected_event_count": transport.get("expected_event_count"),
"freshness_budget_ms": transport.get("freshness_budget_ms"),
"freshness_limit_ms": transport.get("freshness_limit_ms"),
"t0_to_t1_local_send_p95_ms": percentile(t0_t1_values, 0.95),
"t1_to_t5_video_p95_ms": percentile(t1_t5_video_values, 0.95),
"t1_to_t5_audio_p95_ms": percentile(t1_t5_audio_values, 0.95),
"t0_to_t5_video_p95_ms": transport.get("video_age_p95_ms"),
"t0_to_t5_audio_p95_ms": transport.get("audio_age_p95_ms"),
"upstream_sampled_layers": summarize_upstream_samples(upstream_records),
"uvc_spool": uvc_summary,
"smoothness": transport.get("smoothness"),
"events": rows,
"notes": [
"T0 is synthetic capture time on the client.",
"T1 is the first outgoing bundle whose video PTS lands inside the coded event window.",
"T2/T3 are sampled server receive/sink telemetry, not per-event timestamps yet.",
"T4 UVC spool evidence is present only when the server metadata log is enabled.",
"T5 is final RCT capture detection from the flash/tone analyzer.",
],
}
def text_report(trace: dict[str, Any]) -> str:
"""Render the timing trace summary for humans."""
upstream = trace.get("upstream_sampled_layers") or {}
uvc = trace.get("uvc_spool") or {}
smoothness = trace.get("smoothness") or {}
lines = [
f"Client-to-RCT T0-T5 timing trace for {trace['report_dir']}",
f"- verdict: {trace['verdict']}",
f"- sync: {trace.get('sync_status')} p95={fmt_ms(trace.get('sync_p95_abs_skew_ms'))} "
f"median={fmt_ms(trace.get('sync_median_skew_ms'))} drift={fmt_ms(trace.get('sync_drift_ms'))}",
f"- evidence: paired={trace.get('paired_event_count')}/{trace.get('expected_event_count')}",
f"- freshness: budget={fmt_ms(trace.get('freshness_budget_ms'))} "
f"limit={fmt_ms(trace.get('freshness_limit_ms'))}",
"- layer p95:",
f" T0->T1 local bundle send: {fmt_ms(trace.get('t0_to_t1_local_send_p95_ms'))}",
f" T1->T5 RCT video detect: {fmt_ms(trace.get('t1_to_t5_video_p95_ms'))}",
f" T1->T5 RCT audio detect: {fmt_ms(trace.get('t1_to_t5_audio_p95_ms'))}",
f" T0->T5 RCT video detect: {fmt_ms(trace.get('t0_to_t5_video_p95_ms'))}",
f" T0->T5 RCT audio detect: {fmt_ms(trace.get('t0_to_t5_audio_p95_ms'))}",
"- sampled server layers:",
f" T2 server receive age p95: video={fmt_ms(upstream.get('camera_server_receive_age_ms_p95'))} "
f"audio={fmt_ms(upstream.get('microphone_server_receive_age_ms_p95'))}",
f" T2 receive A/V skew p95: {fmt_ms(upstream.get('server_receive_abs_skew_ms_p95'))}",
f" T3 sink late p95: video={fmt_ms(upstream.get('camera_sink_late_ms_p95'))} "
f"audio={fmt_ms(upstream.get('microphone_sink_late_ms_p95'))}",
f" T3 sink handoff skew p95: {fmt_ms(upstream.get('sink_handoff_abs_skew_ms_p95'))}",
"- optional UVC spool layer:",
]
if uvc:
coverage = uvc.get("event_coverage") or {}
lines.append(
f" T4 records={uvc.get('record_count')} profiles={uvc.get('profiles')} "
f"covered_events={coverage.get('covered_events')}/{coverage.get('expected_events')}"
)
else:
lines.append(" T4 unavailable; enable LESAVKA_UVC_FRAME_META_LOG_PATH for spool-boundary evidence")
lines.extend(
[
"- smoothness:",
f" video_hiccups={smoothness.get('video_hiccups')} "
f"audio_hiccups={smoothness.get('audio_hiccups')} "
f"video_p95_jitter={fmt_ms(smoothness.get('video_p95_jitter_ms'))}",
]
)
missing = [event for event in trace["events"] if not event["paired"]]
if missing:
lines.append(
"- missing paired event codes: "
+ ",".join(str(event.get("code")) for event in missing if event.get("code") is not None)
)
return "\n".join(lines) + "\n"
def parse_args() -> argparse.Namespace:
"""Parse CLI arguments for the trace summarizer."""
parser = argparse.ArgumentParser()
parser.add_argument("report_dir", type=pathlib.Path)
parser.add_argument("json_out", type=pathlib.Path)
parser.add_argument("txt_out", type=pathlib.Path)
return parser.parse_args()
def main() -> None:
"""Write the JSON and text T0-T5 trace artifacts."""
args = parse_args()
trace = build_trace(args.report_dir)
args.json_out.write_text(json.dumps(trace, indent=2, sort_keys=True) + "\n")
args.txt_out.write_text(text_report(trace))
print(args.txt_out.read_text(), end="")
if __name__ == "__main__":
main()