"""Layer attribution helpers for client-to-RCT transport summaries.""" from __future__ import annotations import json import math import pathlib def percentile(values: list[float], q: float) -> float | None: """Return a nearest-rank percentile for finite values. Inputs: numeric samples and a quantile in `[0, 1]`. Outputs: the selected percentile or `None`. Why: stage attribution should use the same p95 style as the main transport summary without coupling this helper back to the CLI. """ finite = sorted(value for value in values if math.isfinite(value)) if not finite: return None index = min(len(finite) - 1, max(0, math.ceil(len(finite) * q) - 1)) return finite[index] def client_send_summary(report_path: pathlib.Path, joined: list[dict]) -> dict | None: """Summarize client-side bundle send timing from optional JSONL artifacts. Inputs: the final report path, used to find sibling `client-send-bundles.jsonl`, and joined RCT events. Outputs: local queue age plus post-send-to-RCT age percentiles. Why: if final freshness fails, the next question is whether delay existed before the client wrote the gRPC bundle or appeared after the bundle left the client. """ send_path = report_path.parent / "client-send-bundles.jsonl" if not send_path.exists(): return None rows_by_video_pts: dict[int, dict] = {} local_ages: list[float] = [] for line in send_path.read_text(errors="replace").splitlines(): try: row = json.loads(line) except json.JSONDecodeError: continue if row.get("schema") != "lesavka.sync-probe-send.v1": continue try: video_pts = int(row["video_capture_pts_us"]) local_age = float(row["local_age_ms"]) except (KeyError, TypeError, ValueError): continue rows_by_video_pts[video_pts] = row local_ages.append(local_age) if not rows_by_video_pts: return None post_send_video_ages: list[float] = [] post_send_audio_ages: list[float] = [] joined_with_send_rows = 0 for event in joined: planned_start_us = event.get("client_planned_start_us") if planned_start_us is None: continue row = rows_by_video_pts.get(int(planned_start_us)) if not row: continue joined_with_send_rows += 1 local_age = float(row.get("local_age_ms") or 0.0) if event.get("video_age_ms") is not None: post_send_video_ages.append(float(event["video_age_ms"]) - local_age) if event.get("audio_age_ms") is not None: post_send_audio_ages.append(float(event["audio_age_ms"]) - local_age) post_send_worst = max( value for value in [ percentile(post_send_video_ages, 0.95), percentile(post_send_audio_ages, 0.95), ] if value is not None ) if post_send_video_ages or post_send_audio_ages else None return { "bundle_count": len(rows_by_video_pts), "joined_event_count": joined_with_send_rows, "local_bundle_age_p95_ms": percentile(local_ages, 0.95), "local_bundle_age_max_ms": max(local_ages) if local_ages else None, "post_client_send_video_age_p95_ms": percentile(post_send_video_ages, 0.95), "post_client_send_audio_age_p95_ms": percentile(post_send_audio_ages, 0.95), "post_client_send_worst_p95_ms": post_send_worst, } def freshness_bottleneck(summary: dict) -> str: """Classify the most likely freshness bottleneck from available artifacts. Inputs: the structured summary assembled from RCT capture, client send log, and optional upstream-sync samples. Outputs: a short machine-readable label. Why: first-pass client->server work should remain black-box, but failed runs should still point us toward client queueing, transport/server receive, or post-send output/RCT delay before we add invasive introspection. """ if summary.get("freshness_passed"): return "within_limit" if summary.get("paired_event_count", 0) < summary.get("min_paired_events", 0): return "evidence_incomplete" client_send = summary.get("client_send") or {} local_age = client_send.get("local_bundle_age_p95_ms") post_send = client_send.get("post_client_send_worst_p95_ms") if local_age is not None and local_age > 500.0: return "client_queue_or_bundle_generation" upstream = summary.get("upstream_sync") or {} receive_age = upstream.get("server_receive_age_p95_ms") transport_lag = upstream.get("media_transport_lag_p95_ms") if receive_age is not None and receive_age > 500.0: return "server_receive_or_ingress_queue" if transport_lag is not None and transport_lag > 1_000.0: return "client_to_server_transport" if post_send is not None and post_send > 500.0: return "post_client_send_to_rct_path" return "needs_deeper_introspection"