From e52fb312929e1902fc964e6e37b619d982e314d0 Mon Sep 17 00:00:00 2001
From: Brad Stein <Brad.Stein@gmail.com>
Date: Sat, 2 May 2026 13:48:36 -0300
Subject: [PATCH] test: segment mirrored sync calibration probe

---
 AGENTS.md                                     | 15 ++++
 Cargo.lock                                    |  6 +-
 client/Cargo.toml                             |  2 +-
 common/Cargo.toml                             |  2 +-
 .../manual/run_upstream_mirrored_av_sync.sh   | 71 +++++++++++++++----
 server/Cargo.toml                             |  2 +-
 .../client_manual_sync_script_contract.rs     | 12 +++-
 7 files changed, 91 insertions(+), 19 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index ffccb38..5f94cdf 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -315,3 +315,18 @@ Context: 0.17.12 installed cleanly on both ends (`2b26fde`) and moved the paired
 - [x] Push clean semver `0.17.13` for installed client/server testing.
 
 Follow-up candidate: after 0.17.13 proves safe measured apply/refuse behavior, add a segmented live-calibration probe. The current browser probe uploads one WebM after recording ends, so it can only do measure/apply/rerun. A true same-session loop should run a longer stimulus, capture/analyze separate Tethys browser windows, apply calibration only between windows, and use the next window as the confirmation segment so before/after evidence is not mixed.
+
+## 0.17.14 Segmented Live Calibration Probe Checklist
+
+Context: 0.17.13 adds safe measured calibration apply/refuse plumbing, but it is still a single-window measure-then-rerun workflow. The next probe should keep the same Lesavka client/server session alive across multiple browser-capture windows so we can measure, apply, and re-measure without reinstalling or restarting the media path. This is the bridge from probe truth to blind/server-side calibration targets.
+
+- [x] Keep 0.17.14 scoped to probe tooling and observability; do not change media planner policy.
+- [x] Add optional multi-segment mirrored probe mode via `LESAVKA_SYNC_CALIBRATION_SEGMENTS`.
+- [x] Keep one local stimulus browser and one headless Lesavka sender alive across all segments.
+- [x] Run a fresh Tethys browser recording/analyzer pass per segment so before/after calibration evidence is not mixed in one WebM.
+- [x] Allow calibration apply between segments using the 0.17.13 ready/refuse gate.
+- [x] Capture planner and calibration snapshots before and after each segment for metric correlation.
+- [x] Preserve single-segment default behavior for normal manual probes.
+- [x] Update manual probe contract tests for segmented live calibration mode.
+- [x] Run focused script/CLI checks and package checks.
+- [ ] Push clean semver `0.17.14` for installed client/server testing.
diff --git a/Cargo.lock b/Cargo.lock
index f9a4c94..17d9293 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
 
 [[package]]
 name = "lesavka_client"
-version = "0.17.13"
+version = "0.17.14"
 dependencies = [
  "anyhow",
  "async-stream",
@@ -1686,7 +1686,7 @@ dependencies = [
 
 [[package]]
 name = "lesavka_common"
-version = "0.17.13"
+version = "0.17.14"
 dependencies = [
  "anyhow",
  "base64",
@@ -1698,7 +1698,7 @@ dependencies = [
 
 [[package]]
 name = "lesavka_server"
-version = "0.17.13"
+version = "0.17.14"
 dependencies = [
  "anyhow",
  "base64",
diff --git a/client/Cargo.toml b/client/Cargo.toml
index d4195de..e27c741 100644
--- a/client/Cargo.toml
+++ b/client/Cargo.toml
@@ -4,7 +4,7 @@ path                    = "src/main.rs"
 
 [package]
 name                    = "lesavka_client"
-version                 = "0.17.13"
+version                 = "0.17.14"
 edition                 = "2024"
 
 [dependencies]
diff --git a/common/Cargo.toml b/common/Cargo.toml
index abd1edc..2c9fa3f 100644
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name                    = "lesavka_common"
-version                 = "0.17.13"
+version                 = "0.17.14"
 edition                 = "2024"
 build                   = "build.rs"
 
diff --git a/scripts/manual/run_upstream_mirrored_av_sync.sh b/scripts/manual/run_upstream_mirrored_av_sync.sh
index 930d6da..772b8c0 100755
--- a/scripts/manual/run_upstream_mirrored_av_sync.sh
+++ b/scripts/manual/run_upstream_mirrored_av_sync.sh
@@ -26,6 +26,8 @@ PROBE_EVENT_WIDTH_CODES=${PROBE_EVENT_WIDTH_CODES:-1,2,1,3,2,4,1,1,3,1,4,2,1,2,3
 LESAVKA_SYNC_APPLY_CALIBRATION=${LESAVKA_SYNC_APPLY_CALIBRATION:-0}
 LESAVKA_SYNC_SAVE_CALIBRATION=${LESAVKA_SYNC_SAVE_CALIBRATION:-0}
 LESAVKA_SYNC_CALIBRATION_TARGET=${LESAVKA_SYNC_CALIBRATION_TARGET:-video}
+LESAVKA_SYNC_CALIBRATION_SEGMENTS=${LESAVKA_SYNC_CALIBRATION_SEGMENTS:-1}
+LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS=${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS:-3}
 STIMULUS_PORT=${STIMULUS_PORT:-18444}
 STIMULUS_SETTLE_SECONDS=${STIMULUS_SETTLE_SECONDS:-10}
 LOCAL_OUTPUT_DIR=${LOCAL_OUTPUT_DIR:-"${REPO_ROOT}/tmp"}
@@ -46,6 +48,11 @@ STIMULUS_PID=""
 STIMULUS_BROWSER_PID=""
 CLIENT_PID=""
 
+if ! [[ "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" =~ ^[1-9][0-9]*$ ]]; then
+  echo "LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer" >&2
+  exit 2
+fi
+
 cleanup() {
   set +e
   [[ -n "${CLIENT_PID}" ]] && kill "${CLIENT_PID}" >/dev/null 2>&1
@@ -205,6 +212,7 @@ print_lesavka_versions() {
 
 print_upstream_sync_state() {
   local label="$1"
+  local output_path="${2:-}"
   echo "==> upstream sync planner state (${label})"
   if [[ ! -x "${REPO_ROOT}/target/debug/lesavka-relayctl" ]]; then
     (cd "${REPO_ROOT}" && cargo build -p lesavka_client --bin lesavka-relayctl >/dev/null)
@@ -217,8 +225,10 @@ print_upstream_sync_state() {
       upstream-sync 2>&1
   )"; then
     echo "   ↪ planner query failed: ${sync_output}"
+    [[ -n "${output_path}" ]] && printf '%s\n' "${sync_output}" >"${output_path}"
     return 0
   fi
+  [[ -n "${output_path}" ]] && printf '%s\n' "${sync_output}" >"${output_path}"
   while IFS= read -r line; do
     [[ -n "${line}" ]] && echo "   ↪ ${line}"
   done <<<"${sync_output}"
@@ -226,6 +236,7 @@ print_upstream_sync_state() {
 
 print_upstream_calibration_state() {
   local label="$1"
+  local output_path="${2:-}"
   echo "==> upstream calibration state (${label})"
   if [[ ! -x "${REPO_ROOT}/target/debug/lesavka-relayctl" ]]; then
     (cd "${REPO_ROOT}" && cargo build -p lesavka_client --bin lesavka-relayctl >/dev/null)
@@ -238,24 +249,29 @@ print_upstream_calibration_state() {
       calibration 2>&1
   )"; then
     echo "   ↪ calibration query failed: ${calibration_output}"
+    [[ -n "${output_path}" ]] && printf '%s\n' "${calibration_output}" >"${output_path}"
     return 0
   fi
+  [[ -n "${output_path}" ]] && printf '%s\n' "${calibration_output}" >"${output_path}"
   while IFS= read -r line; do
     [[ -n "${line}" ]] && echo "   ↪ ${line}"
   done <<<"${calibration_output}"
 }
 
 latest_report_json() {
-  find "${ARTIFACT_DIR}" -mindepth 2 -maxdepth 2 -type f -name report.json -printf '%T@ %p\n' 2>/dev/null \
+  local report_root="${1:-${ARTIFACT_DIR}}"
+  find "${report_root}" -mindepth 2 -maxdepth 2 -type f -name report.json -printf '%T@ %p\n' 2>/dev/null \
     | sort -n \
     | tail -n 1 \
     | cut -d' ' -f2-
 }
 
 maybe_apply_probe_calibration() {
+  local report_root="${1:-${ARTIFACT_DIR}}"
+  local label="${2:-mirrored run}"
   local report_json
-  report_json="$(latest_report_json)"
-  echo "==> probe calibration decision"
+  report_json="$(latest_report_json "${report_root}")"
+  echo "==> probe calibration decision (${label})"
   if [[ -z "${report_json}" || ! -f "${report_json}" ]]; then
     echo "   ↪ report_json=missing"
     echo "   ↪ calibration apply skipped: analyzer report was not produced"
@@ -307,6 +323,7 @@ PY
   fi
 
   eval "${summary}"
+  printf '%s\n' "${summary}" >"${report_root}/calibration-decision.env"
   echo "   ↪ report_json=${report_json}"
   echo "   ↪ verdict_status=${verdict_status}"
   echo "   ↪ paired_pulses=${paired_pulses}"
@@ -332,7 +349,7 @@ PY
     return 0
   fi
 
-  local note="mirrored probe ${STAMP}: target=${calibration_target}, median=${median_skew_ms}ms, p95=${p95_abs_skew_ms}ms, pairs=${paired_pulses}"
+  local note="mirrored probe ${STAMP} ${label}: target=${calibration_target}, median=${median_skew_ms}ms, p95=${p95_abs_skew_ms}ms, pairs=${paired_pulses}"
   echo "   ↪ applying calibration: audio_delta_us=${calibration_apply_audio_delta_us}, video_delta_us=${calibration_apply_video_delta_us}"
   LESAVKA_TLS_DOMAIN="${LESAVKA_TLS_DOMAIN}" \
     "${REPO_ROOT}/target/debug/lesavka-relayctl" \
@@ -413,19 +430,50 @@ start_real_lesavka_client() {
 }
 
 run_browser_capture_with_real_driver() {
+  local segment_label="$1"
+  local segment_output_dir="$2"
   local record_seconds=$((PROBE_DURATION_SECONDS + 3))
   local wait_seconds=$((PROBE_DURATION_SECONDS + 2))
   local driver_command="curl -fsS -X POST http://127.0.0.1:${STIMULUS_PORT}/start >/dev/null; sleep ${wait_seconds}"
-  echo "==> starting Tethys browser consumer and mirrored driver"
+  mkdir -p "${segment_output_dir}"
+  echo "==> starting Tethys browser consumer and mirrored driver (${segment_label})"
   BROWSER_RECORD_SECONDS="${record_seconds}" \
   PROBE_DURATION_SECONDS="${PROBE_DURATION_SECONDS}" \
   BROWSER_SYNC_DRIVER_COMMAND="${driver_command}" \
   SYNC_ANALYZE_EVENT_WIDTH_CODES="${PROBE_EVENT_WIDTH_CODES}" \
-  LOCAL_OUTPUT_DIR="${ARTIFACT_DIR}" \
+  LOCAL_OUTPUT_DIR="${segment_output_dir}" \
   LESAVKA_SERVER_ADDR="${RESOLVED_LESAVKA_SERVER_ADDR}" \
   "${REPO_ROOT}/scripts/manual/run_upstream_browser_av_sync.sh"
 }
 
+run_mirrored_segments() {
+  local run_status=0
+  local segment
+  for segment in $(seq 1 "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}"); do
+    local segment_label="segment ${segment}/${LESAVKA_SYNC_CALIBRATION_SEGMENTS}"
+    local segment_dir="${ARTIFACT_DIR}/segment-${segment}"
+    mkdir -p "${segment_dir}"
+    echo "==> mirrored calibration ${segment_label}"
+    print_upstream_calibration_state "before ${segment_label}" "${segment_dir}/calibration-before.env"
+    print_upstream_sync_state "before ${segment_label}" "${segment_dir}/planner-before.env"
+    if run_browser_capture_with_real_driver "${segment_label}" "${segment_dir}"; then
+      maybe_apply_probe_calibration "${segment_dir}" "${segment_label}"
+      print_upstream_sync_state "after ${segment_label}" "${segment_dir}/planner-after.env"
+      print_upstream_calibration_state "after ${segment_label}" "${segment_dir}/calibration-after.env"
+    else
+      run_status=$?
+      print_upstream_sync_state "after failed ${segment_label}" "${segment_dir}/planner-after-failed.env"
+      print_upstream_calibration_state "after failed ${segment_label}" "${segment_dir}/calibration-after-failed.env"
+      break
+    fi
+    if (( segment < LESAVKA_SYNC_CALIBRATION_SEGMENTS )); then
+      echo "==> settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next calibration segment"
+      sleep "${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}"
+    fi
+  done
+  return "${run_status}"
+}
+
 echo "==> prebuilding real client and analyzer"
 (
   cd "${REPO_ROOT}"
@@ -434,15 +482,14 @@ echo "==> prebuilding real client and analyzer"
 
 start_server_tunnel_if_needed
 print_lesavka_versions
-print_upstream_calibration_state "before mirrored run"
-print_upstream_sync_state "before mirrored run"
+print_upstream_calibration_state "before mirrored run" "${ARTIFACT_DIR}/calibration-before.env"
+print_upstream_sync_state "before mirrored run" "${ARTIFACT_DIR}/planner-before.env"
 start_local_stimulus
 start_real_lesavka_client
 run_status=0
-run_browser_capture_with_real_driver || run_status=$?
-maybe_apply_probe_calibration
-print_upstream_sync_state "after mirrored run"
-print_upstream_calibration_state "after mirrored run"
+run_mirrored_segments || run_status=$?
+print_upstream_sync_state "after mirrored run" "${ARTIFACT_DIR}/planner-after.env"
+print_upstream_calibration_state "after mirrored run" "${ARTIFACT_DIR}/calibration-after.env"
 
 if ((run_status != 0)); then
   echo "==> mirrored probe failed"
diff --git a/server/Cargo.toml b/server/Cargo.toml
index 55441c3..4757318 100644
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@@ -10,7 +10,7 @@ bench                   = false
 
 [package]
 name                    = "lesavka_server"
-version                 = "0.17.13"
+version                 = "0.17.14"
 edition                 = "2024"
 autobins                = false
 
diff --git a/testing/tests/client_manual_sync_script_contract.rs b/testing/tests/client_manual_sync_script_contract.rs
index 3bf6052..de19514 100644
--- a/testing/tests/client_manual_sync_script_contract.rs
+++ b/testing/tests/client_manual_sync_script_contract.rs
@@ -110,10 +110,20 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
         "server_revision=",
         "combined version+revision",
         "run_status=0",
-        "run_browser_capture_with_real_driver || run_status=$?",
+        "run_mirrored_segments || run_status=$?",
         "LESAVKA_SYNC_APPLY_CALIBRATION=${LESAVKA_SYNC_APPLY_CALIBRATION:-0}",
         "LESAVKA_SYNC_SAVE_CALIBRATION=${LESAVKA_SYNC_SAVE_CALIBRATION:-0}",
         "LESAVKA_SYNC_CALIBRATION_TARGET=${LESAVKA_SYNC_CALIBRATION_TARGET:-video}",
+        "LESAVKA_SYNC_CALIBRATION_SEGMENTS=${LESAVKA_SYNC_CALIBRATION_SEGMENTS:-1}",
+        "LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS=${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS:-3}",
+        "LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer",
+        "run_mirrored_segments",
+        "for segment in $(seq 1 \"${LESAVKA_SYNC_CALIBRATION_SEGMENTS}\")",
+        "segment-${segment}",
+        "calibration-before.env",
+        "planner-before.env",
+        "calibration-decision.env",
+        "settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next calibration segment",
         "print_upstream_calibration_state \"before mirrored run\"",
         "maybe_apply_probe_calibration",
         "calibration_ready=${calibration_ready}",