diff --git a/AGENTS.md b/AGENTS.md index 9302330..1253061 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -330,3 +330,17 @@ Context: 0.17.13 adds safe measured calibration apply/refuse plumbing, but it is - [x] Update manual probe contract tests for segmented live calibration mode. - [x] Run focused script/CLI checks and package checks. - [x] Push clean semver `0.17.14` for installed client/server testing. + +## 0.17.15 Adaptive Probe Metrics and Blind Target Checklist + +Context: 0.17.14 can keep one Lesavka session alive across multiple measured segments, but we still need the probe to teach Lesavka what "good" looks like from server-only telemetry. 0.17.15 turns segmented runs into an adaptive calibration dataset: every segment gets probe truth, planner state, and calibration state joined into artifacts that can drive blind calibration/healing targets when Tethys/browser probe access is not available. + +- [x] Keep 0.17.15 scoped to probe intelligence and metrics correlation; do not change media playout policy. +- [x] Add adaptive calibration ergonomics for longer near-continuous runs without changing the default one-segment probe. +- [x] Write per-run segment metrics as CSV and JSONL, joining analyzer verdicts with planner/calibration before/after snapshots. +- [x] Emit a blind-target candidate JSON from segments whose probe verdict passes, including server-visible planner lag/skew ranges. +- [x] Record when no segment is probe-good enough so blind-target generation refuses instead of inventing targets. +- [x] Keep calibration mutation gated by the existing ready/refuse logic and `LESAVKA_SYNC_APPLY_CALIBRATION=1`. +- [x] Update manual probe contract tests for the adaptive artifacts and controls. +- [x] Run focused script checks and package checks. +- [ ] Push clean semver `0.17.15` for installed client/server testing. diff --git a/Cargo.lock b/Cargo.lock index 17d9293..87ba547 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "lesavka_client" -version = "0.17.14" +version = "0.17.15" dependencies = [ "anyhow", "async-stream", @@ -1686,7 +1686,7 @@ dependencies = [ [[package]] name = "lesavka_common" -version = "0.17.14" +version = "0.17.15" dependencies = [ "anyhow", "base64", @@ -1698,7 +1698,7 @@ dependencies = [ [[package]] name = "lesavka_server" -version = "0.17.14" +version = "0.17.15" dependencies = [ "anyhow", "base64", diff --git a/client/Cargo.toml b/client/Cargo.toml index e27c741..284d175 100644 --- a/client/Cargo.toml +++ b/client/Cargo.toml @@ -4,7 +4,7 @@ path = "src/main.rs" [package] name = "lesavka_client" -version = "0.17.14" +version = "0.17.15" edition = "2024" [dependencies] diff --git a/common/Cargo.toml b/common/Cargo.toml index 2c9fa3f..bca8b0e 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lesavka_common" -version = "0.17.14" +version = "0.17.15" edition = "2024" build = "build.rs" diff --git a/scripts/manual/run_upstream_mirrored_av_sync.sh b/scripts/manual/run_upstream_mirrored_av_sync.sh index 772b8c0..dc2b365 100755 --- a/scripts/manual/run_upstream_mirrored_av_sync.sh +++ b/scripts/manual/run_upstream_mirrored_av_sync.sh @@ -23,6 +23,8 @@ PROBE_PULSE_PERIOD_MS=${PROBE_PULSE_PERIOD_MS:-1000} PROBE_PULSE_WIDTH_MS=${PROBE_PULSE_WIDTH_MS:-120} PROBE_MARKER_TICK_PERIOD=${PROBE_MARKER_TICK_PERIOD:-5} PROBE_EVENT_WIDTH_CODES=${PROBE_EVENT_WIDTH_CODES:-1,2,1,3,2,4,1,1,3,1,4,2,1,2,3,4,1,3,2,2,4,1,2,4,3,1,1,4,2,3,1,2} +LESAVKA_SYNC_CALIBRATION_SEGMENTS_SET=${LESAVKA_SYNC_CALIBRATION_SEGMENTS+x} +LESAVKA_SYNC_ADAPTIVE_CALIBRATION=${LESAVKA_SYNC_ADAPTIVE_CALIBRATION:-0} LESAVKA_SYNC_APPLY_CALIBRATION=${LESAVKA_SYNC_APPLY_CALIBRATION:-0} LESAVKA_SYNC_SAVE_CALIBRATION=${LESAVKA_SYNC_SAVE_CALIBRATION:-0} LESAVKA_SYNC_CALIBRATION_TARGET=${LESAVKA_SYNC_CALIBRATION_TARGET:-video} @@ -48,6 +50,10 @@ STIMULUS_PID="" STIMULUS_BROWSER_PID="" CLIENT_PID="" +if [[ "${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}" == "1" && -z "${LESAVKA_SYNC_CALIBRATION_SEGMENTS_SET}" ]]; then + LESAVKA_SYNC_CALIBRATION_SEGMENTS=4 +fi + if ! [[ "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" =~ ^[1-9][0-9]*$ ]]; then echo "LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer" >&2 exit 2 @@ -474,6 +480,157 @@ run_mirrored_segments() { return "${run_status}" } +summarize_adaptive_probe_metrics() { + echo "==> summarizing segmented probe metrics" + python3 - "${ARTIFACT_DIR}" "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" <<'PY' +import csv +import json +import os +import sys +from pathlib import Path + +root = Path(sys.argv[1]) +segment_count = int(sys.argv[2]) + + +def read_env(path): + values = {} + if not path.exists(): + return values + for raw in path.read_text(encoding="utf-8").splitlines(): + if not raw or "=" not in raw: + continue + key, value = raw.split("=", 1) + values[key] = value + return values + + +def latest_report(segment_dir): + reports = list(segment_dir.glob("*/report.json")) + if not reports: + return None + return max(reports, key=lambda path: path.stat().st_mtime) + + +def as_float(value): + if value is None or value in {"", "pending"}: + return None + try: + return float(value) + except ValueError: + return None + + +def range_for(rows, key): + values = [row[key] for row in rows if isinstance(row.get(key), (int, float))] + if not values: + return None + return { + "min": round(min(values), 3), + "max": round(max(values), 3), + "mean": round(sum(values) / len(values), 3), + } + + +rows = [] +for segment in range(1, segment_count + 1): + segment_dir = root / f"segment-{segment}" + report_path = latest_report(segment_dir) + report = {} + verdict = {} + calibration = {} + if report_path is not None: + report = json.loads(report_path.read_text(encoding="utf-8")) + verdict = report.get("verdict", {}) + calibration = report.get("calibration", {}) + + planner_before = read_env(segment_dir / "planner-before.env") + planner_after = read_env(segment_dir / "planner-after.env") + calibration_before = read_env(segment_dir / "calibration-before.env") + calibration_after = read_env(segment_dir / "calibration-after.env") + decision = read_env(segment_dir / "calibration-decision.env") + + row = { + "segment": segment, + "report_json": str(report_path) if report_path else "", + "probe_status": verdict.get("status", "missing"), + "probe_passed": bool(verdict.get("passed", False)), + "probe_p95_abs_skew_ms": as_float(str(verdict.get("p95_abs_skew_ms", ""))), + "probe_max_abs_skew_ms": as_float(str(verdict.get("max_abs_skew_ms", ""))), + "probe_median_skew_ms": as_float(str(report.get("median_skew_ms", ""))), + "probe_mean_skew_ms": as_float(str(report.get("mean_skew_ms", ""))), + "probe_drift_ms": as_float(str(report.get("drift_ms", ""))), + "probe_paired_pulses": report.get("paired_event_count", 0), + "probe_activity_start_delta_ms": as_float(str(report.get("activity_start_delta_ms", ""))), + "calibration_ready": bool(calibration.get("ready", False)), + "calibration_note": calibration.get("note", ""), + "decision_video_delta_us": as_float(decision.get("calibration_apply_video_delta_us")), + "decision_audio_delta_us": as_float(decision.get("calibration_apply_audio_delta_us")), + "planner_phase_before": planner_before.get("planner_phase", ""), + "planner_phase_after": planner_after.get("planner_phase", ""), + "planner_live_lag_ms_before": as_float(planner_before.get("planner_live_lag_ms")), + "planner_live_lag_ms_after": as_float(planner_after.get("planner_live_lag_ms")), + "planner_skew_ms_before": as_float(planner_before.get("planner_skew_ms")), + "planner_skew_ms_after": as_float(planner_after.get("planner_skew_ms")), + "planner_video_freezes_before": as_float(planner_before.get("planner_video_freezes")), + "planner_video_freezes_after": as_float(planner_after.get("planner_video_freezes")), + "planner_freshness_reanchors_before": as_float(planner_before.get("planner_freshness_reanchors")), + "planner_freshness_reanchors_after": as_float(planner_after.get("planner_freshness_reanchors")), + "active_audio_offset_us_before": as_float(calibration_before.get("calibration_active_audio_offset_us")), + "active_audio_offset_us_after": as_float(calibration_after.get("calibration_active_audio_offset_us")), + "active_video_offset_us_before": as_float(calibration_before.get("calibration_active_video_offset_us")), + "active_video_offset_us_after": as_float(calibration_after.get("calibration_active_video_offset_us")), + } + rows.append(row) + +csv_path = root / "segment-metrics.csv" +jsonl_path = root / "segment-metrics.jsonl" +fieldnames = list(rows[0].keys()) if rows else ["segment"] +with csv_path.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) +with jsonl_path.open("w", encoding="utf-8") as handle: + for row in rows: + handle.write(json.dumps(row, sort_keys=True) + "\n") + +good_rows = [row for row in rows if row.get("probe_passed")] +target_path = root / "blind-targets.json" +if good_rows: + target = { + "ready": True, + "source": "probe-passing segmented mirrored run", + "good_segments": [row["segment"] for row in good_rows], + "planner_live_lag_ms_after": range_for(good_rows, "planner_live_lag_ms_after"), + "planner_skew_ms_after": range_for(good_rows, "planner_skew_ms_after"), + "active_audio_offset_us_after": range_for(good_rows, "active_audio_offset_us_after"), + "active_video_offset_us_after": range_for(good_rows, "active_video_offset_us_after"), + "probe_p95_abs_skew_ms": range_for(good_rows, "probe_p95_abs_skew_ms"), + "probe_median_skew_ms": range_for(good_rows, "probe_median_skew_ms"), + } +else: + sortable = [ + row for row in rows + if isinstance(row.get("probe_p95_abs_skew_ms"), (int, float)) + ] + best = min(sortable, key=lambda row: row["probe_p95_abs_skew_ms"], default=None) + target = { + "ready": False, + "reason": "no segment produced a passing probe verdict; refusing to invent blind targets", + "segments_seen": len(rows), + "best_segment": best["segment"] if best else None, + "best_probe_status": best["probe_status"] if best else "missing", + "best_probe_p95_abs_skew_ms": best["probe_p95_abs_skew_ms"] if best else None, + } +target_path.write_text(json.dumps(target, indent=2, sort_keys=True) + "\n", encoding="utf-8") + +print(f" ↪ segment_metrics_csv={csv_path}") +print(f" ↪ segment_metrics_jsonl={jsonl_path}") +print(f" ↪ blind_targets_json={target_path}") +print(f" ↪ blind_targets_ready={str(bool(target.get('ready'))).lower()}") +PY +} + echo "==> prebuilding real client and analyzer" ( cd "${REPO_ROOT}" @@ -490,6 +647,7 @@ run_status=0 run_mirrored_segments || run_status=$? print_upstream_sync_state "after mirrored run" "${ARTIFACT_DIR}/planner-after.env" print_upstream_calibration_state "after mirrored run" "${ARTIFACT_DIR}/calibration-after.env" +summarize_adaptive_probe_metrics if ((run_status != 0)); then echo "==> mirrored probe failed" diff --git a/server/Cargo.toml b/server/Cargo.toml index 4757318..ce6dd62 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -10,7 +10,7 @@ bench = false [package] name = "lesavka_server" -version = "0.17.14" +version = "0.17.15" edition = "2024" autobins = false diff --git a/testing/tests/client_manual_sync_script_contract.rs b/testing/tests/client_manual_sync_script_contract.rs index de19514..df1a4d4 100644 --- a/testing/tests/client_manual_sync_script_contract.rs +++ b/testing/tests/client_manual_sync_script_contract.rs @@ -114,15 +114,25 @@ fn mirrored_sync_script_uses_real_client_capture_path() { "LESAVKA_SYNC_APPLY_CALIBRATION=${LESAVKA_SYNC_APPLY_CALIBRATION:-0}", "LESAVKA_SYNC_SAVE_CALIBRATION=${LESAVKA_SYNC_SAVE_CALIBRATION:-0}", "LESAVKA_SYNC_CALIBRATION_TARGET=${LESAVKA_SYNC_CALIBRATION_TARGET:-video}", + "LESAVKA_SYNC_ADAPTIVE_CALIBRATION=${LESAVKA_SYNC_ADAPTIVE_CALIBRATION:-0}", "LESAVKA_SYNC_CALIBRATION_SEGMENTS=${LESAVKA_SYNC_CALIBRATION_SEGMENTS:-1}", "LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS=${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS:-3}", + "LESAVKA_SYNC_ADAPTIVE_CALIBRATION", + "LESAVKA_SYNC_CALIBRATION_SEGMENTS=4", "LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer", "run_mirrored_segments", + "summarize_adaptive_probe_metrics", "for segment in $(seq 1 \"${LESAVKA_SYNC_CALIBRATION_SEGMENTS}\")", "segment-${segment}", "calibration-before.env", "planner-before.env", "calibration-decision.env", + "segment-metrics.csv", + "segment-metrics.jsonl", + "blind-targets.json", + "no segment produced a passing probe verdict; refusing to invent blind targets", + "planner_live_lag_ms_after", + "probe_p95_abs_skew_ms", "settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next calibration segment", "print_upstream_calibration_state \"before mirrored run\"", "maybe_apply_probe_calibration",