diff --git a/AGENTS.md b/AGENTS.md index d081cb4..be4289c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -479,3 +479,19 @@ low paired-pulse counts as product failure. - [x] Update manual probe contract coverage for the audio-gain control. - [x] Run focused analyzer/manual-probe tests and package checks. - [x] Push clean semver `0.17.23` for installed client/server testing. + +## 0.17.24 Probe Truthfulness And Localization Checklist + +Context: the 0.17.23 run proved adaptive calibration is now live-editing the server, +but confirmation still failed. Segment 3 passed and triggered a provisional calibration +nudge, while the confirmation segment failed with a near-centered median but high p95/drift. +This means the fastest high-quality path is localization tooling, not another static offset +guess. + +- [x] Treat the latest failure as timing instability/outlier drift until the probe proves otherwise. +- [x] Fix analyzer-failure raw activity delta parsing so bounded raw-delta calibration can use the evidence it prints. +- [x] Stop marking `blind-targets.json` ready from calibration-only passes when confirmation segments exist and fail. +- [x] Emit combined `segment-events.csv` and `segment-events.jsonl` artifacts so each run exposes per-pulse skew and confidence across segments. +- [ ] Use the next run to decide whether bad p95 is caused by low-confidence analyzer pairings, camera/mic capture instability, or server planner/output jitter. +- [ ] Add stage-local timing evidence for stimulus schedule, client capture onsets, server output timing, and browser/device capture if the event table still cannot isolate the source. +- [ ] Only save calibration defaults after a confirmation segment passes. diff --git a/Cargo.lock b/Cargo.lock index 6d0076e..19a460d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "lesavka_client" -version = "0.17.23" +version = "0.17.24" dependencies = [ "anyhow", "async-stream", @@ -1686,7 +1686,7 @@ dependencies = [ [[package]] name = "lesavka_common" -version = "0.17.23" +version = "0.17.24" dependencies = [ "anyhow", "base64", @@ -1698,7 +1698,7 @@ dependencies = [ [[package]] name = "lesavka_server" -version = "0.17.23" +version = "0.17.24" dependencies = [ "anyhow", "base64", diff --git a/client/Cargo.toml b/client/Cargo.toml index dc4ca0d..c372c13 100644 --- a/client/Cargo.toml +++ b/client/Cargo.toml @@ -4,7 +4,7 @@ path = "src/main.rs" [package] name = "lesavka_client" -version = "0.17.23" +version = "0.17.24" edition = "2024" [dependencies] diff --git a/common/Cargo.toml b/common/Cargo.toml index 5ffba2b..51759f1 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lesavka_common" -version = "0.17.23" +version = "0.17.24" edition = "2024" build = "build.rs" diff --git a/scripts/manual/run_upstream_browser_av_sync.sh b/scripts/manual/run_upstream_browser_av_sync.sh index 6a2e395..818f6ff 100755 --- a/scripts/manual/run_upstream_browser_av_sync.sh +++ b/scripts/manual/run_upstream_browser_av_sync.sh @@ -260,8 +260,8 @@ if not reason: reason = lines[-1].strip() if lines else "analyzer failed" raw_match = re.search( - r"raw activity delta was ([+-]?[0-9]+(?:\\.[0-9]+)?) ms " - r"\\(video=([0-9]+(?:\\.[0-9]+)?)s audio=([0-9]+(?:\\.[0-9]+)?)s\\)", + r"raw activity delta was ([+-]?[0-9]+(?:\.[0-9]+)?) ms " + r"\(video=([0-9]+(?:\.[0-9]+)?)s audio=([0-9]+(?:\.[0-9]+)?)s\)", text, ) paired_match = re.search(r"saw ([0-9]+)", reason) diff --git a/scripts/manual/run_upstream_mirrored_av_sync.sh b/scripts/manual/run_upstream_mirrored_av_sync.sh index fe6092a..e2cfa9b 100755 --- a/scripts/manual/run_upstream_mirrored_av_sync.sh +++ b/scripts/manual/run_upstream_mirrored_av_sync.sh @@ -789,6 +789,7 @@ def range_for(rows, key): rows = [] +event_rows = [] for segment in range(1, segment_count + 1): segment_dir = root / f"segment-{segment}" report_path = latest_report(segment_dir) @@ -857,6 +858,21 @@ for segment in range(1, segment_count + 1): } rows.append(row) + for event in report.get("paired_events", []): + if not isinstance(event, dict): + continue + event_rows.append({ + "segment": segment, + "segment_phase": phase, + "probe_status": row["probe_status"], + "probe_passed": row["probe_passed"], + "event_id": event.get("event_id"), + "video_time_s": as_float(str(event.get("video_time_s", ""))), + "audio_time_s": as_float(str(event.get("audio_time_s", ""))), + "skew_ms": as_float(str(event.get("skew_ms", ""))), + "confidence": as_float(str(event.get("confidence", ""))), + }) + csv_path = root / "segment-metrics.csv" jsonl_path = root / "segment-metrics.jsonl" fieldnames = list(rows[0].keys()) if rows else ["segment"] @@ -868,21 +884,50 @@ with jsonl_path.open("w", encoding="utf-8") as handle: for row in rows: handle.write(json.dumps(row, sort_keys=True) + "\n") +events_csv_path = root / "segment-events.csv" +events_jsonl_path = root / "segment-events.jsonl" +event_fieldnames = list(event_rows[0].keys()) if event_rows else [ + "segment", + "segment_phase", + "probe_status", + "probe_passed", + "event_id", + "video_time_s", + "audio_time_s", + "skew_ms", + "confidence", +] +with events_csv_path.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=event_fieldnames) + writer.writeheader() + writer.writerows(event_rows) +with events_jsonl_path.open("w", encoding="utf-8") as handle: + for row in event_rows: + handle.write(json.dumps(row, sort_keys=True) + "\n") + good_rows = [row for row in rows if row.get("probe_passed")] confirmation_rows = [row for row in rows if row.get("segment_phase") == "confirmation"] passing_confirmation_rows = [row for row in confirmation_rows if row.get("probe_passed")] target_path = root / "blind-targets.json" -if good_rows: +target_source_rows = passing_confirmation_rows if confirmation_rows else good_rows +if target_source_rows: target = { "ready": True, - "source": "probe-passing segmented mirrored run", - "good_segments": [row["segment"] for row in good_rows], - "planner_live_lag_ms_after": range_for(good_rows, "planner_live_lag_ms_after"), - "planner_skew_ms_after": range_for(good_rows, "planner_skew_ms_after"), - "active_audio_offset_us_after": range_for(good_rows, "active_audio_offset_us_after"), - "active_video_offset_us_after": range_for(good_rows, "active_video_offset_us_after"), - "probe_p95_abs_skew_ms": range_for(good_rows, "probe_p95_abs_skew_ms"), - "probe_median_skew_ms": range_for(good_rows, "probe_median_skew_ms"), + "source": ( + "passing confirmation segment" + if confirmation_rows else + "probe-passing segmented mirrored run" + ), + "good_segments": [row["segment"] for row in target_source_rows], + "candidate_good_calibration_segments": [ + row["segment"] for row in good_rows if row.get("segment_phase") != "confirmation" + ], + "planner_live_lag_ms_after": range_for(target_source_rows, "planner_live_lag_ms_after"), + "planner_skew_ms_after": range_for(target_source_rows, "planner_skew_ms_after"), + "active_audio_offset_us_after": range_for(target_source_rows, "active_audio_offset_us_after"), + "active_video_offset_us_after": range_for(target_source_rows, "active_video_offset_us_after"), + "probe_p95_abs_skew_ms": range_for(target_source_rows, "probe_p95_abs_skew_ms"), + "probe_median_skew_ms": range_for(target_source_rows, "probe_median_skew_ms"), } else: sortable = [ @@ -892,8 +937,15 @@ else: best = min(sortable, key=lambda row: row["probe_p95_abs_skew_ms"], default=None) target = { "ready": False, - "reason": "no segment produced a passing probe verdict; refusing to invent blind targets", + "reason": ( + "confirmation did not pass; refusing to promote calibration-only segments to blind targets" + if confirmation_rows else + "no segment produced a passing probe verdict; refusing to invent blind targets" + ), "segments_seen": len(rows), + "candidate_good_calibration_segments": [ + row["segment"] for row in good_rows if row.get("segment_phase") != "confirmation" + ], "best_segment": best["segment"] if best else None, "best_probe_status": best["probe_status"] if best else "missing", "best_probe_p95_abs_skew_ms": best["probe_p95_abs_skew_ms"] if best else None, @@ -930,6 +982,8 @@ confirmation_path.write_text(json.dumps(confirmation, indent=2, sort_keys=True) print(f" ↪ segment_metrics_csv={csv_path}") print(f" ↪ segment_metrics_jsonl={jsonl_path}") +print(f" ↪ segment_events_csv={events_csv_path}") +print(f" ↪ segment_events_jsonl={events_jsonl_path}") print(f" ↪ blind_targets_json={target_path}") print(f" ↪ blind_targets_ready={str(bool(target.get('ready'))).lower()}") print(f" ↪ confirmation_summary_json={confirmation_path}") diff --git a/server/Cargo.toml b/server/Cargo.toml index aa83f33..b95762f 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -10,7 +10,7 @@ bench = false [package] name = "lesavka_server" -version = "0.17.23" +version = "0.17.24" edition = "2024" autobins = false diff --git a/testing/tests/client_manual_sync_script_contract.rs b/testing/tests/client_manual_sync_script_contract.rs index 6d9d699..2ebff14 100644 --- a/testing/tests/client_manual_sync_script_contract.rs +++ b/testing/tests/client_manual_sync_script_contract.rs @@ -88,12 +88,18 @@ fn browser_sync_script_can_delegate_to_a_real_path_driver() { "for attempt in 1 2 3 4 5", "capture fetch attempt ${attempt} failed; retrying", "failed to fetch browser capture from ${TETHYS_HOST}:${REMOTE_CAPTURE}", + r"raw activity delta was ([+-]?[0-9]+(?:\.[0-9]+)?) ms ", + r"\(video=([0-9]+(?:\.[0-9]+)?)s audio=([0-9]+(?:\.[0-9]+)?)s\)", ] { assert!( BROWSER_SYNC_SCRIPT.contains(expected), "browser sync script should contain {expected}" ); } + assert!( + !BROWSER_SYNC_SCRIPT.contains(r"(?:\\.[0-9]+)?"), + "browser sync raw-delta parser should not require a literal backslash before decimals" + ); } #[test] @@ -162,6 +168,8 @@ fn mirrored_sync_script_uses_real_client_capture_path() { "calibration-decision.env", "segment-metrics.csv", "segment-metrics.jsonl", + "segment-events.csv", + "segment-events.jsonl", "confirmation-summary.json", "confirmation_passed", "check_confirmation_result", @@ -170,6 +178,8 @@ fn mirrored_sync_script_uses_real_client_capture_path() { "probe_activity_start_delta_ms", "blind-targets.json", "no segment produced a passing probe verdict; refusing to invent blind targets", + "confirmation did not pass; refusing to promote calibration-only segments to blind targets", + "candidate_good_calibration_segments", "decision_mode", "decision_provisional_video_recommendation_us", "planner_live_lag_ms_after",