lesavka/scripts/manual/client_rct_timing_trace_summary.py

#!/usr/bin/env python3
"""Build a T0-T5 timing trace from a client-to-RCT probe artifact."""

from __future__ import annotations

import argparse
import json
import math
import pathlib
import statistics
from typing import Any


def load_json(path: pathlib.Path) -> dict[str, Any]:
    """Read a JSON object from `path`.

    Inputs: an artifact path produced by the client-to-RCT harness.
    Output: the parsed object. Why: the trace intentionally reuses the blind
    probe artifacts instead of adding another media generator.
    """

    return json.loads(path.read_text())


def load_jsonl(path: pathlib.Path) -> list[dict[str, Any]]:
    """Read valid JSON records from an optional JSONL artifact.

    Inputs: a JSONL path. Output: parsed objects with malformed lines ignored.
    Why: operational probe logs can be truncated when a run fails, but partial
    timing evidence is still useful for locating the layer that drifted.
    """

    if not path.exists():
        return []
    records: list[dict[str, Any]] = []
    for line in path.read_text(errors="replace").splitlines():
        if not line.strip():
            continue
        try:
            records.append(json.loads(line))
        except json.JSONDecodeError:
            continue
    return records


def percentile(values: list[float], q: float) -> float | None:
    """Return a nearest-rank percentile for finite values.

    Inputs: numeric samples and a quantile. Output: the selected percentile or
    `None`. Why: the trace needs the same conservative p95 language as the sync
    and freshness gates.
    """

    finite = sorted(value for value in values if math.isfinite(value))
    if not finite:
        return None
    index = min(len(finite) - 1, max(0, math.ceil(len(finite) * q) - 1))
    return finite[index]


def fmt_ms(value: float | None) -> str:
    """Format optional millisecond evidence for human reports."""

    return f"{value:.1f}ms" if value is not None else "unavailable"


def capture_start_ns(capture_log: pathlib.Path) -> int | None:
    """Return the RCT recorder Unix start timestamp from the capture log.

    Inputs: the recorder log. Output: Unix nanoseconds or `None`.
    Why: converting capture-relative detections into client time lets us split
    end-to-end freshness into pre-send and post-send portions.
    """

    if not capture_log.exists():
        return None
    for line in capture_log.read_text(errors="replace").splitlines():
        if line.startswith("capture_start_unix_ns="):
            return int(line.split("=", 1)[1].strip())
    return None


def as_float(value: Any) -> float | None:
    """Parse a finite float from relayctl fields."""

    if value in (None, "pending"):
        return None
    try:
        parsed = float(str(value).replace("+", ""))
    except (TypeError, ValueError):
        return None
    return parsed if math.isfinite(parsed) else None


def summarize_upstream_samples(records: list[dict[str, Any]]) -> dict[str, Any] | None:
    """Summarize sampled server receive and sink timing.

    Inputs: `upstream-sync-samples.jsonl` records. Output: p95 layer metrics.
    Why: the current server telemetry is sampled instead of per-event; this
    still tells us whether the failure is already visible at receive/sink time.
    """

    buckets: dict[str, list[float]] = {
        "camera_client_queue_age_ms": [],
        "microphone_client_queue_age_ms": [],
        "camera_server_receive_age_ms": [],
        "microphone_server_receive_age_ms": [],
        "camera_sink_late_ms": [],
        "microphone_sink_late_ms": [],
        "server_receive_abs_skew_ms": [],
        "sink_handoff_abs_skew_ms": [],
    }
    live_samples = 0
    for record in records:
        fields = record.get("fields") or {}
        if fields.get("planner_phase") == "live":
            live_samples += 1
        mapping = {
            "planner_camera_client_queue_age_ms": "camera_client_queue_age_ms",
            "planner_microphone_client_queue_age_ms": "microphone_client_queue_age_ms",
            "planner_camera_server_receive_age_ms": "camera_server_receive_age_ms",
            "planner_microphone_server_receive_age_ms": "microphone_server_receive_age_ms",
            "planner_camera_sink_late_ms": "camera_sink_late_ms",
            "planner_microphone_sink_late_ms": "microphone_sink_late_ms",
            "planner_server_receive_abs_skew_p95_ms": "server_receive_abs_skew_ms",
            "planner_sink_handoff_abs_skew_p95_ms": "sink_handoff_abs_skew_ms",
        }
        for source, target in mapping.items():
            parsed = as_float(fields.get(source))
            if parsed is not None:
                buckets[target].append(parsed)

    if live_samples == 0 and not any(buckets.values()):
        return None
    return {
        "sample_count": live_samples,
        **{f"{name}_p95": percentile(values, 0.95) for name, values in buckets.items()},
    }


def event_send_records(
    event: dict[str, Any],
    send_records: list[dict[str, Any]],
) -> list[dict[str, Any]]:
    """Return client send records whose video PTS falls inside an event window.

    Inputs: a synthetic event and client bundle send records. Output: matching
    bundles. Why: T1 should be the bundle send containing the coded video flash,
    not merely the nearest arbitrary frame.
    """

    start = int(event.get("planned_start_us") or -1)
    end = int(event.get("planned_end_us") or -1)
    if start < 0 or end <= start:
        return []
    return [
        record
        for record in send_records
        if start <= int(record.get("video_capture_pts_us") or -1) < end
    ]


def build_trace(report_dir: pathlib.Path) -> dict[str, Any]:
    """Build the T0-T5 trace summary for one completed probe directory.

    Inputs: a client-to-RCT artifact directory. Output: structured trace data.
    Why: failed blind runs need enough layer evidence to decide whether to tune
    client generation, transport/server ingress, HEVC decode/UVC handoff, or
    final RCT capture before we touch production offsets.
    """

    timeline = load_json(report_dir / "client-transport-timeline.json")
    report = load_json(report_dir / "report.json")
    transport = load_json(report_dir / "client-rct-transport-summary.json")
    clock = load_json(report_dir / "clock-alignment.json")
    send_records = load_jsonl(report_dir / "client-send-bundles.jsonl")
    upstream_records = load_jsonl(report_dir / "upstream-sync-samples.jsonl")
    capture_start = capture_start_ns(report_dir / "capture.log")
    offset_ns = int(clock.get("capture_clock_offset_from_client_ns") or 0)

    pairs = {
        int(pair.get("server_event_id", pair.get("event_id", -1))): pair
        for pair in report.get("paired_events", [])
    }
    summary_events = {
        int(event.get("event_id", -1)): event
        for event in transport.get("events", [])
    }

    rows: list[dict[str, Any]] = []
    t0_t1_values: list[float] = []
    t1_t5_video_values: list[float] = []
    t1_t5_audio_values: list[float] = []
    for event in timeline.get("events", []):
        event_id = int(event.get("event_id", -1))
        matches = event_send_records(event, send_records)
        first_send = min(matches, key=lambda record: int(record["send_unix_ns"])) if matches else None
        pair = pairs.get(event_id)
        summary_event = summary_events.get(event_id)
        t0_ns = int(event.get("client_capture_unix_ns") or 0)
        t1_ns = int(first_send["send_unix_ns"]) if first_send else None
        t0_t1_ms = ((t1_ns - t0_ns) / 1_000_000.0) if t1_ns and t0_ns else None
        if t0_t1_ms is not None:
            t0_t1_values.append(t0_t1_ms)

        t1_t5_video_ms = None
        t1_t5_audio_ms = None
        if pair and t1_ns and capture_start is not None:
            send_capture_s = (t1_ns + offset_ns - capture_start) / 1_000_000_000.0
            t1_t5_video_ms = (float(pair["video_time_s"]) - send_capture_s) * 1000.0
            t1_t5_audio_ms = (float(pair["audio_time_s"]) - send_capture_s) * 1000.0
            t1_t5_video_values.append(t1_t5_video_ms)
            t1_t5_audio_values.append(t1_t5_audio_ms)

        rows.append(
            {
                "event_id": event_id,
                "code": event.get("code"),
                "t0_client_capture_unix_ns": t0_ns or None,
                "t1_client_send_unix_ns": t1_ns,
                "t0_to_t1_local_send_ms": t0_t1_ms,
                "event_video_bundle_count": len(matches),
                "t5_rct_video_s": pair.get("video_time_s") if pair else None,
                "t5_rct_audio_s": pair.get("audio_time_s") if pair else None,
                "t0_to_t5_video_ms": summary_event.get("video_age_ms") if summary_event else None,
                "t0_to_t5_audio_ms": summary_event.get("audio_age_ms") if summary_event else None,
                "t1_to_t5_video_ms": t1_t5_video_ms,
                "t1_to_t5_audio_ms": t1_t5_audio_ms,
                "sync_skew_ms": pair.get("skew_ms") if pair else None,
                "paired": pair is not None,
            }
        )

    uvc_summary_path = report_dir / "uvc-frame-meta-summary.json"
    uvc_summary = load_json(uvc_summary_path) if uvc_summary_path.exists() else None
    return {
        "schema": "lesavka.client-rct-timing-trace.v1",
        "report_dir": str(report_dir),
        "verdict": "pass" if transport.get("passed") else "fail",
        "sync_status": transport.get("sync_status"),
        "sync_p95_abs_skew_ms": (report.get("verdict") or {}).get("p95_abs_skew_ms"),
        "sync_median_skew_ms": report.get("median_skew_ms"),
        "sync_drift_ms": report.get("drift_ms"),
        "paired_event_count": transport.get("paired_event_count"),
        "expected_event_count": transport.get("expected_event_count"),
        "freshness_budget_ms": transport.get("freshness_budget_ms"),
        "freshness_limit_ms": transport.get("freshness_limit_ms"),
        "t0_to_t1_local_send_p95_ms": percentile(t0_t1_values, 0.95),
        "t1_to_t5_video_p95_ms": percentile(t1_t5_video_values, 0.95),
        "t1_to_t5_audio_p95_ms": percentile(t1_t5_audio_values, 0.95),
        "t0_to_t5_video_p95_ms": transport.get("video_age_p95_ms"),
        "t0_to_t5_audio_p95_ms": transport.get("audio_age_p95_ms"),
        "upstream_sampled_layers": summarize_upstream_samples(upstream_records),
        "uvc_spool": uvc_summary,
        "smoothness": transport.get("smoothness"),
        "events": rows,
        "notes": [
            "T0 is synthetic capture time on the client.",
            "T1 is the first outgoing bundle whose video PTS lands inside the coded event window.",
            "T2/T3 are sampled server receive/sink telemetry, not per-event timestamps yet.",
            "T4 UVC spool evidence is present only when the server metadata log is enabled.",
            "T5 is final RCT capture detection from the flash/tone analyzer.",
        ],
    }


def text_report(trace: dict[str, Any]) -> str:
    """Render the timing trace summary for humans."""

    upstream = trace.get("upstream_sampled_layers") or {}
    uvc = trace.get("uvc_spool") or {}
    smoothness = trace.get("smoothness") or {}
    lines = [
        f"Client-to-RCT T0-T5 timing trace for {trace['report_dir']}",
        f"- verdict: {trace['verdict']}",
        f"- sync: {trace.get('sync_status')} p95={fmt_ms(trace.get('sync_p95_abs_skew_ms'))} "
        f"median={fmt_ms(trace.get('sync_median_skew_ms'))} drift={fmt_ms(trace.get('sync_drift_ms'))}",
        f"- evidence: paired={trace.get('paired_event_count')}/{trace.get('expected_event_count')}",
        f"- freshness: budget={fmt_ms(trace.get('freshness_budget_ms'))} "
        f"limit={fmt_ms(trace.get('freshness_limit_ms'))}",
        "- layer p95:",
        f"  T0->T1 local bundle send: {fmt_ms(trace.get('t0_to_t1_local_send_p95_ms'))}",
        f"  T1->T5 RCT video detect: {fmt_ms(trace.get('t1_to_t5_video_p95_ms'))}",
        f"  T1->T5 RCT audio detect: {fmt_ms(trace.get('t1_to_t5_audio_p95_ms'))}",
        f"  T0->T5 RCT video detect: {fmt_ms(trace.get('t0_to_t5_video_p95_ms'))}",
        f"  T0->T5 RCT audio detect: {fmt_ms(trace.get('t0_to_t5_audio_p95_ms'))}",
        "- sampled server layers:",
        f"  T2 server receive age p95: video={fmt_ms(upstream.get('camera_server_receive_age_ms_p95'))} "
        f"audio={fmt_ms(upstream.get('microphone_server_receive_age_ms_p95'))}",
        f"  T2 receive A/V skew p95: {fmt_ms(upstream.get('server_receive_abs_skew_ms_p95'))}",
        f"  T3 sink late p95: video={fmt_ms(upstream.get('camera_sink_late_ms_p95'))} "
        f"audio={fmt_ms(upstream.get('microphone_sink_late_ms_p95'))}",
        f"  T3 sink handoff skew p95: {fmt_ms(upstream.get('sink_handoff_abs_skew_ms_p95'))}",
        "- optional UVC spool layer:",
    ]
    if uvc:
        coverage = uvc.get("event_coverage") or {}
        lines.append(
            f"  T4 records={uvc.get('record_count')} profiles={uvc.get('profiles')} "
            f"covered_events={coverage.get('covered_events')}/{coverage.get('expected_events')}"
        )
    else:
        lines.append("  T4 unavailable; enable LESAVKA_UVC_FRAME_META_LOG_PATH for spool-boundary evidence")
    lines.extend(
        [
            "- smoothness:",
            f"  video_hiccups={smoothness.get('video_hiccups')} "
            f"audio_hiccups={smoothness.get('audio_hiccups')} "
            f"video_p95_jitter={fmt_ms(smoothness.get('video_p95_jitter_ms'))}",
        ]
    )
    missing = [event for event in trace["events"] if not event["paired"]]
    if missing:
        lines.append(
            "- missing paired event codes: "
            + ",".join(str(event.get("code")) for event in missing if event.get("code") is not None)
        )
    return "\n".join(lines) + "\n"


def parse_args() -> argparse.Namespace:
    """Parse CLI arguments for the trace summarizer."""

    parser = argparse.ArgumentParser()
    parser.add_argument("report_dir", type=pathlib.Path)
    parser.add_argument("json_out", type=pathlib.Path)
    parser.add_argument("txt_out", type=pathlib.Path)
    return parser.parse_args()


def main() -> None:
    """Write the JSON and text T0-T5 trace artifacts."""

    args = parse_args()
    trace = build_trace(args.report_dir)
    args.json_out.write_text(json.dumps(trace, indent=2, sort_keys=True) + "\n")
    args.txt_out.write_text(text_report(trace))
    print(args.txt_out.read_text(), end="")


if __name__ == "__main__":
    main()