test: summarize adaptive sync probe targets

2026-05-02 13:52:31 -03:00 · 2026-05-02 13:52:31 -03:00 · 61c1c94f68
commit 61c1c94f68
parent c86addf944
7 changed files with 188 additions and 6 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -330,3 +330,17 @@ Context: 0.17.13 adds safe measured calibration apply/refuse plumbing, but it is
 - [x] Update manual probe contract tests for segmented live calibration mode.
 - [x] Run focused script/CLI checks and package checks.
 - [x] Push clean semver `0.17.14` for installed client/server testing.
+
+## 0.17.15 Adaptive Probe Metrics and Blind Target Checklist
+
+Context: 0.17.14 can keep one Lesavka session alive across multiple measured segments, but we still need the probe to teach Lesavka what "good" looks like from server-only telemetry. 0.17.15 turns segmented runs into an adaptive calibration dataset: every segment gets probe truth, planner state, and calibration state joined into artifacts that can drive blind calibration/healing targets when Tethys/browser probe access is not available.
+
+- [x] Keep 0.17.15 scoped to probe intelligence and metrics correlation; do not change media playout policy.
+- [x] Add adaptive calibration ergonomics for longer near-continuous runs without changing the default one-segment probe.
+- [x] Write per-run segment metrics as CSV and JSONL, joining analyzer verdicts with planner/calibration before/after snapshots.
+- [x] Emit a blind-target candidate JSON from segments whose probe verdict passes, including server-visible planner lag/skew ranges.
+- [x] Record when no segment is probe-good enough so blind-target generation refuses instead of inventing targets.
+- [x] Keep calibration mutation gated by the existing ready/refuse logic and `LESAVKA_SYNC_APPLY_CALIBRATION=1`.
+- [x] Update manual probe contract tests for the adaptive artifacts and controls.
+- [x] Run focused script checks and package checks.
+- [ ] Push clean semver `0.17.15` for installed client/server testing.
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"

 [[package]]
 name = "lesavka_client"
-version = "0.17.14"
+version = "0.17.15"
 dependencies = [
 "anyhow",
 "async-stream",
@ -1686,7 +1686,7 @@ dependencies = [

 [[package]]
 name = "lesavka_common"
-version = "0.17.14"
+version = "0.17.15"
 dependencies = [
 "anyhow",
 "base64",
@ -1698,7 +1698,7 @@ dependencies = [

 [[package]]
 name = "lesavka_server"
-version = "0.17.14"
+version = "0.17.15"
 dependencies = [
 "anyhow",
 "base64",
--- a/client/Cargo.toml
+++ b/client/Cargo.toml
@ -4,7 +4,7 @@ path                    = "src/main.rs"

 [package]
 name                    = "lesavka_client"
-version                 = "0.17.14"
+version                 = "0.17.15"
 edition                 = "2024"

 [dependencies]
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name                    = "lesavka_common"
-version                 = "0.17.14"
+version                 = "0.17.15"
 edition                 = "2024"
 build                   = "build.rs"

--- a/scripts/manual/run_upstream_mirrored_av_sync.sh
+++ b/scripts/manual/run_upstream_mirrored_av_sync.sh
@ -23,6 +23,8 @@ PROBE_PULSE_PERIOD_MS=${PROBE_PULSE_PERIOD_MS:-1000}
 PROBE_PULSE_WIDTH_MS=${PROBE_PULSE_WIDTH_MS:-120}
 PROBE_MARKER_TICK_PERIOD=${PROBE_MARKER_TICK_PERIOD:-5}
 PROBE_EVENT_WIDTH_CODES=${PROBE_EVENT_WIDTH_CODES:-1,2,1,3,2,4,1,1,3,1,4,2,1,2,3,4,1,3,2,2,4,1,2,4,3,1,1,4,2,3,1,2}
+LESAVKA_SYNC_CALIBRATION_SEGMENTS_SET=${LESAVKA_SYNC_CALIBRATION_SEGMENTS+x}
+LESAVKA_SYNC_ADAPTIVE_CALIBRATION=${LESAVKA_SYNC_ADAPTIVE_CALIBRATION:-0}
 LESAVKA_SYNC_APPLY_CALIBRATION=${LESAVKA_SYNC_APPLY_CALIBRATION:-0}
 LESAVKA_SYNC_SAVE_CALIBRATION=${LESAVKA_SYNC_SAVE_CALIBRATION:-0}
 LESAVKA_SYNC_CALIBRATION_TARGET=${LESAVKA_SYNC_CALIBRATION_TARGET:-video}
@ -48,6 +50,10 @@ STIMULUS_PID=""
 STIMULUS_BROWSER_PID=""
 CLIENT_PID=""

+if [[ "${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}" == "1" && -z "${LESAVKA_SYNC_CALIBRATION_SEGMENTS_SET}" ]]; then
+  LESAVKA_SYNC_CALIBRATION_SEGMENTS=4
+fi
+
 if ! [[ "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" =~ ^[1-9][0-9]*$ ]]; then
  echo "LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer" >&2
  exit 2
@ -474,6 +480,157 @@ run_mirrored_segments() {
  return "${run_status}"
 }

+summarize_adaptive_probe_metrics() {
+  echo "==> summarizing segmented probe metrics"
+  python3 - "${ARTIFACT_DIR}" "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" <<'PY'
+import csv
+import json
+import os
+import sys
+from pathlib import Path
+
+root = Path(sys.argv[1])
+segment_count = int(sys.argv[2])
+
+
+def read_env(path):
+    values = {}
+    if not path.exists():
+        return values
+    for raw in path.read_text(encoding="utf-8").splitlines():
+        if not raw or "=" not in raw:
+            continue
+        key, value = raw.split("=", 1)
+        values[key] = value
+    return values
+
+
+def latest_report(segment_dir):
+    reports = list(segment_dir.glob("*/report.json"))
+    if not reports:
+        return None
+    return max(reports, key=lambda path: path.stat().st_mtime)
+
+
+def as_float(value):
+    if value is None or value in {"", "pending"}:
+        return None
+    try:
+        return float(value)
+    except ValueError:
+        return None
+
+
+def range_for(rows, key):
+    values = [row[key] for row in rows if isinstance(row.get(key), (int, float))]
+    if not values:
+        return None
+    return {
+        "min": round(min(values), 3),
+        "max": round(max(values), 3),
+        "mean": round(sum(values) / len(values), 3),
+    }
+
+
+rows = []
+for segment in range(1, segment_count + 1):
+    segment_dir = root / f"segment-{segment}"
+    report_path = latest_report(segment_dir)
+    report = {}
+    verdict = {}
+    calibration = {}
+    if report_path is not None:
+        report = json.loads(report_path.read_text(encoding="utf-8"))
+        verdict = report.get("verdict", {})
+        calibration = report.get("calibration", {})
+
+    planner_before = read_env(segment_dir / "planner-before.env")
+    planner_after = read_env(segment_dir / "planner-after.env")
+    calibration_before = read_env(segment_dir / "calibration-before.env")
+    calibration_after = read_env(segment_dir / "calibration-after.env")
+    decision = read_env(segment_dir / "calibration-decision.env")
+
+    row = {
+        "segment": segment,
+        "report_json": str(report_path) if report_path else "",
+        "probe_status": verdict.get("status", "missing"),
+        "probe_passed": bool(verdict.get("passed", False)),
+        "probe_p95_abs_skew_ms": as_float(str(verdict.get("p95_abs_skew_ms", ""))),
+        "probe_max_abs_skew_ms": as_float(str(verdict.get("max_abs_skew_ms", ""))),
+        "probe_median_skew_ms": as_float(str(report.get("median_skew_ms", ""))),
+        "probe_mean_skew_ms": as_float(str(report.get("mean_skew_ms", ""))),
+        "probe_drift_ms": as_float(str(report.get("drift_ms", ""))),
+        "probe_paired_pulses": report.get("paired_event_count", 0),
+        "probe_activity_start_delta_ms": as_float(str(report.get("activity_start_delta_ms", ""))),
+        "calibration_ready": bool(calibration.get("ready", False)),
+        "calibration_note": calibration.get("note", ""),
+        "decision_video_delta_us": as_float(decision.get("calibration_apply_video_delta_us")),
+        "decision_audio_delta_us": as_float(decision.get("calibration_apply_audio_delta_us")),
+        "planner_phase_before": planner_before.get("planner_phase", ""),
+        "planner_phase_after": planner_after.get("planner_phase", ""),
+        "planner_live_lag_ms_before": as_float(planner_before.get("planner_live_lag_ms")),
+        "planner_live_lag_ms_after": as_float(planner_after.get("planner_live_lag_ms")),
+        "planner_skew_ms_before": as_float(planner_before.get("planner_skew_ms")),
+        "planner_skew_ms_after": as_float(planner_after.get("planner_skew_ms")),
+        "planner_video_freezes_before": as_float(planner_before.get("planner_video_freezes")),
+        "planner_video_freezes_after": as_float(planner_after.get("planner_video_freezes")),
+        "planner_freshness_reanchors_before": as_float(planner_before.get("planner_freshness_reanchors")),
+        "planner_freshness_reanchors_after": as_float(planner_after.get("planner_freshness_reanchors")),
+        "active_audio_offset_us_before": as_float(calibration_before.get("calibration_active_audio_offset_us")),
+        "active_audio_offset_us_after": as_float(calibration_after.get("calibration_active_audio_offset_us")),
+        "active_video_offset_us_before": as_float(calibration_before.get("calibration_active_video_offset_us")),
+        "active_video_offset_us_after": as_float(calibration_after.get("calibration_active_video_offset_us")),
+    }
+    rows.append(row)
+
+csv_path = root / "segment-metrics.csv"
+jsonl_path = root / "segment-metrics.jsonl"
+fieldnames = list(rows[0].keys()) if rows else ["segment"]
+with csv_path.open("w", newline="", encoding="utf-8") as handle:
+    writer = csv.DictWriter(handle, fieldnames=fieldnames)
+    writer.writeheader()
+    writer.writerows(rows)
+with jsonl_path.open("w", encoding="utf-8") as handle:
+    for row in rows:
+        handle.write(json.dumps(row, sort_keys=True) + "\n")
+
+good_rows = [row for row in rows if row.get("probe_passed")]
+target_path = root / "blind-targets.json"
+if good_rows:
+    target = {
+        "ready": True,
+        "source": "probe-passing segmented mirrored run",
+        "good_segments": [row["segment"] for row in good_rows],
+        "planner_live_lag_ms_after": range_for(good_rows, "planner_live_lag_ms_after"),
+        "planner_skew_ms_after": range_for(good_rows, "planner_skew_ms_after"),
+        "active_audio_offset_us_after": range_for(good_rows, "active_audio_offset_us_after"),
+        "active_video_offset_us_after": range_for(good_rows, "active_video_offset_us_after"),
+        "probe_p95_abs_skew_ms": range_for(good_rows, "probe_p95_abs_skew_ms"),
+        "probe_median_skew_ms": range_for(good_rows, "probe_median_skew_ms"),
+    }
+else:
+    sortable = [
+        row for row in rows
+        if isinstance(row.get("probe_p95_abs_skew_ms"), (int, float))
+    ]
+    best = min(sortable, key=lambda row: row["probe_p95_abs_skew_ms"], default=None)
+    target = {
+        "ready": False,
+        "reason": "no segment produced a passing probe verdict; refusing to invent blind targets",
+        "segments_seen": len(rows),
+        "best_segment": best["segment"] if best else None,
+        "best_probe_status": best["probe_status"] if best else "missing",
+        "best_probe_p95_abs_skew_ms": best["probe_p95_abs_skew_ms"] if best else None,
+    }
+target_path.write_text(json.dumps(target, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+print(f"   ↪ segment_metrics_csv={csv_path}")
+print(f"   ↪ segment_metrics_jsonl={jsonl_path}")
+print(f"   ↪ blind_targets_json={target_path}")
+print(f"   ↪ blind_targets_ready={str(bool(target.get('ready'))).lower()}")
+PY
+}
+
 echo "==> prebuilding real client and analyzer"
 (
  cd "${REPO_ROOT}"
@ -490,6 +647,7 @@ run_status=0
 run_mirrored_segments || run_status=$?
 print_upstream_sync_state "after mirrored run" "${ARTIFACT_DIR}/planner-after.env"
 print_upstream_calibration_state "after mirrored run" "${ARTIFACT_DIR}/calibration-after.env"
+summarize_adaptive_probe_metrics

 if ((run_status != 0)); then
  echo "==> mirrored probe failed"
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@ -10,7 +10,7 @@ bench                   = false

 [package]
 name                    = "lesavka_server"
-version                 = "0.17.14"
+version                 = "0.17.15"
 edition                 = "2024"
 autobins                = false

--- a/testing/tests/client_manual_sync_script_contract.rs
+++ b/testing/tests/client_manual_sync_script_contract.rs
@ -114,15 +114,25 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
        "LESAVKA_SYNC_APPLY_CALIBRATION=${LESAVKA_SYNC_APPLY_CALIBRATION:-0}",
        "LESAVKA_SYNC_SAVE_CALIBRATION=${LESAVKA_SYNC_SAVE_CALIBRATION:-0}",
        "LESAVKA_SYNC_CALIBRATION_TARGET=${LESAVKA_SYNC_CALIBRATION_TARGET:-video}",
+        "LESAVKA_SYNC_ADAPTIVE_CALIBRATION=${LESAVKA_SYNC_ADAPTIVE_CALIBRATION:-0}",
        "LESAVKA_SYNC_CALIBRATION_SEGMENTS=${LESAVKA_SYNC_CALIBRATION_SEGMENTS:-1}",
        "LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS=${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS:-3}",
+        "LESAVKA_SYNC_ADAPTIVE_CALIBRATION",
+        "LESAVKA_SYNC_CALIBRATION_SEGMENTS=4",
        "LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer",
        "run_mirrored_segments",
+        "summarize_adaptive_probe_metrics",
        "for segment in $(seq 1 \"${LESAVKA_SYNC_CALIBRATION_SEGMENTS}\")",
        "segment-${segment}",
        "calibration-before.env",
        "planner-before.env",
        "calibration-decision.env",
+        "segment-metrics.csv",
+        "segment-metrics.jsonl",
+        "blind-targets.json",
+        "no segment produced a passing probe verdict; refusing to invent blind targets",
+        "planner_live_lag_ms_after",
+        "probe_p95_abs_skew_ms",
        "settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next calibration segment",
        "print_upstream_calibration_state \"before mirrored run\"",
        "maybe_apply_probe_calibration",