test: make adaptive sync probe confirm calibration

2026-05-02 15:59:53 -03:00 · 2026-05-02 15:59:53 -03:00 · 3b6c049a73
commit 3b6c049a73
parent 8b8fbec63f
7 changed files with 235 additions and 23 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -429,3 +429,22 @@ judge whether the correction helped.
 - [x] Update manual probe contract tests for provisional calibration controls and output.
 - [x] Run shell syntax checks, focused contract tests, and package checks.
 - [x] Push clean semver `0.17.20` for installed client/server testing.
+
+## 0.17.21 Calibrate-Then-Confirm Probe Checklist
+
+Context: 0.17.20 made adaptive runs capable of provisional calibration between measured
+segments, but that still did not strictly guarantee the user-requested flow: run the probe,
+calibrate the server while it is running, then run a post-calibration test segment. It also
+still ignored analyzer-failure captures that contained a bounded raw activity delta. 0.17.21
+makes the probe behavior explicit: calibration segments mutate active server calibration,
+confirmation segments do not mutate it, and adaptive runs fail unless confirmation passes.
+
+- [x] Treat `LESAVKA_SYNC_CALIBRATION_SEGMENTS` as calibration windows in adaptive confirm mode.
+- [x] Add post-calibration confirmation windows via `LESAVKA_SYNC_CONFIRMATION_SEGMENTS`.
+- [x] Disable calibration apply during confirmation windows so they are a clean test.
+- [x] Require confirmation pass by default in adaptive confirm mode.
+- [x] Add bounded raw-activity provisional calibration for analyzer failures that still report a raw A/V delta.
+- [x] Include confirmation summaries and segment phase in adaptive artifacts.
+- [x] Update manual probe contract tests for calibrate-then-confirm behavior.
+- [x] Run shell syntax checks, focused contract tests, and package checks.
+- [x] Push clean semver `0.17.21` for installed client/server testing.
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"

 [[package]]
 name = "lesavka_client"
-version = "0.17.20"
+version = "0.17.21"
 dependencies = [
 "anyhow",
 "async-stream",
@ -1686,7 +1686,7 @@ dependencies = [

 [[package]]
 name = "lesavka_common"
-version = "0.17.20"
+version = "0.17.21"
 dependencies = [
 "anyhow",
 "base64",
@ -1698,7 +1698,7 @@ dependencies = [

 [[package]]
 name = "lesavka_server"
-version = "0.17.20"
+version = "0.17.21"
 dependencies = [
 "anyhow",
 "base64",
--- a/client/Cargo.toml
+++ b/client/Cargo.toml
@ -4,7 +4,7 @@ path                    = "src/main.rs"

 [package]
 name                    = "lesavka_client"
-version                 = "0.17.20"
+version                 = "0.17.21"
 edition                 = "2024"

 [dependencies]
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name                    = "lesavka_common"
-version                 = "0.17.20"
+version                 = "0.17.21"
 edition                 = "2024"
 build                   = "build.rs"

--- a/scripts/manual/run_upstream_mirrored_av_sync.sh
+++ b/scripts/manual/run_upstream_mirrored_av_sync.sh
@ -38,6 +38,11 @@ LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS:-350}
 LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS:-250}
 LESAVKA_SYNC_PROVISIONAL_GAIN=${LESAVKA_SYNC_PROVISIONAL_GAIN:-0.5}
 LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US=${LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US:-150000}
+LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-${LESAVKA_SYNC_PROVISIONAL_CALIBRATION}}
+LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS=${LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS:-350}
+LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION=${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}
+LESAVKA_SYNC_CONFIRMATION_SEGMENTS=${LESAVKA_SYNC_CONFIRMATION_SEGMENTS:-1}
+LESAVKA_SYNC_REQUIRE_CONFIRMATION_PASS=${LESAVKA_SYNC_REQUIRE_CONFIRMATION_PASS:-${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION}}
 STIMULUS_PORT=${STIMULUS_PORT:-18444}
 STIMULUS_SETTLE_SECONDS=${STIMULUS_SETTLE_SECONDS:-10}
 LOCAL_OUTPUT_DIR=${LOCAL_OUTPUT_DIR:-"${REPO_ROOT}/tmp"}
@ -66,6 +71,14 @@ if ! [[ "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" =~ ^[1-9][0-9]*$ ]]; then
  echo "LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer" >&2
  exit 2
 fi
+if [[ "${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION}" != "1" ]]; then
+  LESAVKA_SYNC_CONFIRMATION_SEGMENTS=0
+fi
+if ! [[ "${LESAVKA_SYNC_CONFIRMATION_SEGMENTS}" =~ ^[0-9]+$ ]]; then
+  echo "LESAVKA_SYNC_CONFIRMATION_SEGMENTS must be a non-negative integer" >&2
+  exit 2
+fi
+LESAVKA_SYNC_TOTAL_SEGMENTS=$((LESAVKA_SYNC_CALIBRATION_SEGMENTS + LESAVKA_SYNC_CONFIRMATION_SEGMENTS))

 cleanup() {
  set +e
@ -280,20 +293,34 @@ latest_report_json() {
    | cut -d' ' -f2-
 }

+latest_analysis_failure_json() {
+  local report_root="${1:-${ARTIFACT_DIR}}"
+  find "${report_root}" -mindepth 2 -maxdepth 2 -type f -name analysis-failure.json -printf '%T@ %p\n' 2>/dev/null \
+    | sort -n \
+    | tail -n 1 \
+    | cut -d' ' -f2-
+}
+
 maybe_apply_probe_calibration() {
  local report_root="${1:-${ARTIFACT_DIR}}"
  local label="${2:-mirrored run}"
+  local allow_apply="${3:-1}"
  local report_json
  report_json="$(latest_report_json "${report_root}")"
-  echo "==> probe calibration decision (${label})"
+  local analysis_failure_json=""
  if [[ -z "${report_json}" || ! -f "${report_json}" ]]; then
+    analysis_failure_json="$(latest_analysis_failure_json "${report_root}")"
+  fi
+  echo "==> probe calibration decision (${label})"
+  if [[ -z "${report_json}" || ! -f "${report_json}" ]] && [[ -z "${analysis_failure_json}" || ! -f "${analysis_failure_json}" ]]; then
    echo "   ↪ report_json=missing"
-    echo "   ↪ calibration apply skipped: analyzer report was not produced"
+    echo "   ↪ analysis_failure_json=missing"
+    echo "   ↪ calibration apply skipped: analyzer evidence was not produced"
    return 0
  fi

  local summary
-  if ! summary="$(python3 - "${report_json}" "${LESAVKA_SYNC_CALIBRATION_TARGET}" <<'PY'
+  if ! summary="$(python3 - "${report_json:-}" "${analysis_failure_json:-}" "${LESAVKA_SYNC_CALIBRATION_TARGET}" <<'PY'
 import json
 import math
 import os
@ -301,9 +328,17 @@ import shlex
 import sys

 report_path = sys.argv[1]
-target = sys.argv[2].strip().lower()
-with open(report_path, "r", encoding="utf-8") as handle:
+failure_path = sys.argv[2]
+target = sys.argv[3].strip().lower()
+
+report = {}
+failure = {}
+if report_path:
+    with open(report_path, "r", encoding="utf-8") as handle:
        report = json.load(handle)
+elif failure_path:
+    with open(failure_path, "r", encoding="utf-8") as handle:
+        failure = json.load(handle)

 cal = report.get("calibration", {})
 verdict = report.get("verdict", {})
@ -351,11 +386,28 @@ provisional_max_p95_ms = env_float("LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS", 350.0)
 provisional_max_drift_ms = env_float("LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS", 250.0)
 provisional_gain = env_float("LESAVKA_SYNC_PROVISIONAL_GAIN", 0.5)
 provisional_max_step_us = env_int("LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US", 150000)
+raw_failure_enabled = env_bool("LESAVKA_SYNC_RAW_FAILURE_CALIBRATION", False)
+raw_failure_max_abs_delta_ms = env_float("LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS", 350.0)

 ready_audio_recommendation = int(cal.get("recommended_audio_offset_adjust_us") or 0)
 ready_video_recommendation = int(cal.get("recommended_video_offset_adjust_us") or 0)
 provisional_audio_recommendation = int(round(-median_skew_ms * 1000.0))
 provisional_video_recommendation = int(round(median_skew_ms * 1000.0))
+decision_source = "report"
+raw_activity_delta_ms = None
+failure_reason = ""
+if not report:
+    decision_source = "analysis_failure"
+    failure_reason = str(failure.get("reason", "analyzer failed"))
+    paired_pulses = int(failure.get("paired_pulses", 0) or 0)
+    raw_value = failure.get("raw_activity_delta_ms")
+    if raw_value is not None:
+        raw_activity_delta_ms = as_float(raw_value)
+        median_skew_ms = raw_activity_delta_ms
+        p95_abs_skew_ms = abs(raw_activity_delta_ms)
+        drift_ms = 0.0
+        provisional_audio_recommendation = int(round(-median_skew_ms * 1000.0))
+        provisional_video_recommendation = int(round(median_skew_ms * 1000.0))

 audio_recommendation = ready_audio_recommendation
 video_recommendation = ready_video_recommendation
@ -364,7 +416,34 @@ video_delta = video_recommendation if target == "video" else 0
 decision_mode = "ready" if ready else "refused"
 decision_note = "analyzer marked this report calibration-ready" if ready else "analyzer did not mark this report calibration-ready"

-if not ready and provisional_enabled:
+if not report:
+    if not raw_failure_enabled:
+        decision_note = "raw analyzer-failure calibration disabled"
+    elif raw_activity_delta_ms is None:
+        decision_note = "raw analyzer-failure calibration refused: no raw activity delta was reported"
+    elif abs(raw_activity_delta_ms) > raw_failure_max_abs_delta_ms:
+        decision_note = (
+            "raw analyzer-failure calibration refused: "
+            f"abs(raw_activity_delta_ms) {abs(raw_activity_delta_ms):.1f} > {raw_failure_max_abs_delta_ms:.1f}"
+        )
+    else:
+        audio_recommendation = provisional_audio_recommendation
+        video_recommendation = provisional_video_recommendation
+        if target == "audio":
+            audio_delta = clamp(audio_recommendation * provisional_gain, provisional_max_step_us)
+            video_delta = 0
+        else:
+            audio_delta = 0
+            video_delta = clamp(video_recommendation * provisional_gain, provisional_max_step_us)
+        if audio_delta == 0 and video_delta == 0:
+            decision_note = "raw analyzer-failure calibration skipped: rounded correction was zero"
+        else:
+            decision_mode = "raw_provisional"
+            decision_note = (
+                "bounded provisional correction from analyzer-failure raw activity; "
+                "not safe to save until a confirming coded report"
+            )
+elif not ready and provisional_enabled:
    refusal_reasons = []
    if paired_pulses < provisional_min_pairs:
        refusal_reasons.append(f"paired_pulses {paired_pulses} < {provisional_min_pairs}")
@ -395,9 +474,11 @@ if not ready and provisional_enabled:

 fields = {
    "report_json": report_path,
+    "analysis_failure_json": failure_path,
    "calibration_ready": str(ready).lower(),
    "calibration_target": target,
    "calibration_decision_mode": decision_mode,
+    "calibration_decision_source": decision_source,
    "calibration_decision_note": decision_note,
    "calibration_audio_recommendation_us": audio_recommendation,
    "calibration_video_recommendation_us": video_recommendation,
@ -414,7 +495,11 @@ fields = {
    "provisional_max_drift_ms": f"{provisional_max_drift_ms:.1f}",
    "provisional_gain": f"{provisional_gain:.3f}",
    "provisional_max_step_us": provisional_max_step_us,
-    "verdict_status": verdict.get("status", ""),
+    "raw_failure_calibration_enabled": str(raw_failure_enabled).lower(),
+    "raw_failure_max_abs_delta_ms": f"{raw_failure_max_abs_delta_ms:.1f}",
+    "raw_activity_delta_ms": "" if raw_activity_delta_ms is None else f"{raw_activity_delta_ms:+.1f}",
+    "analysis_failure_reason": failure_reason,
+    "verdict_status": verdict.get("status", failure.get("status", "")),
    "paired_pulses": paired_pulses,
    "median_skew_ms": f"{median_skew_ms:+.1f}",
    "p95_abs_skew_ms": f"{p95_abs_skew_ms:.1f}",
@ -431,6 +516,7 @@ PY
  eval "${summary}"
  printf '%s\n' "${summary}" >"${report_root}/calibration-decision.env"
  echo "   ↪ report_json=${report_json}"
+  echo "   ↪ analysis_failure_json=${analysis_failure_json}"
  echo "   ↪ verdict_status=${verdict_status}"
  echo "   ↪ paired_pulses=${paired_pulses}"
  echo "   ↪ median_skew_ms=${median_skew_ms}"
@ -439,6 +525,7 @@ PY
  echo "   ↪ calibration_ready=${calibration_ready}"
  echo "   ↪ calibration_target=${calibration_target}"
  echo "   ↪ calibration_decision_mode=${calibration_decision_mode}"
+  echo "   ↪ calibration_decision_source=${calibration_decision_source}"
  echo "   ↪ recommended_audio_offset_adjust_us=${calibration_audio_recommendation_us}"
  echo "   ↪ recommended_video_offset_adjust_us=${calibration_video_recommendation_us}"
  echo "   ↪ ready_audio_offset_adjust_us=${calibration_ready_audio_recommendation_us}"
@ -451,9 +538,17 @@ PY
  echo "   ↪ provisional_max_drift_ms=${provisional_max_drift_ms}"
  echo "   ↪ provisional_gain=${provisional_gain}"
  echo "   ↪ provisional_max_step_us=${provisional_max_step_us}"
+  echo "   ↪ raw_failure_calibration_enabled=${raw_failure_calibration_enabled}"
+  echo "   ↪ raw_failure_max_abs_delta_ms=${raw_failure_max_abs_delta_ms}"
+  [[ -n "${raw_activity_delta_ms}" ]] && echo "   ↪ raw_activity_delta_ms=${raw_activity_delta_ms}"
+  [[ -n "${analysis_failure_reason}" ]] && echo "   ↪ analysis_failure_reason=${analysis_failure_reason}"
  echo "   ↪ calibration_note=${calibration_note}"
  echo "   ↪ calibration_decision_note=${calibration_decision_note}"

+  if [[ "${allow_apply}" != "1" ]]; then
+    echo "   ↪ confirmation segment: calibration apply disabled so this segment tests the active calibration"
+    return 0
+  fi
  if [[ "${LESAVKA_SYNC_APPLY_CALIBRATION}" != "1" ]]; then
    echo "   ↪ calibration apply disabled; set LESAVKA_SYNC_APPLY_CALIBRATION=1 to apply ready or provisional recommendations"
    return 0
@ -583,15 +678,26 @@ run_browser_capture_with_real_driver() {
 run_mirrored_segments() {
  local run_status=0
  local segment
-  for segment in $(seq 1 "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}"); do
-    local segment_label="segment ${segment}/${LESAVKA_SYNC_CALIBRATION_SEGMENTS}"
+  for segment in $(seq 1 "${LESAVKA_SYNC_TOTAL_SEGMENTS}"); do
+    local phase="calibration"
+    local phase_index="${segment}"
+    local phase_count="${LESAVKA_SYNC_CALIBRATION_SEGMENTS}"
+    local allow_calibration_apply=1
+    if (( segment > LESAVKA_SYNC_CALIBRATION_SEGMENTS )); then
+      phase="confirmation"
+      phase_index=$((segment - LESAVKA_SYNC_CALIBRATION_SEGMENTS))
+      phase_count="${LESAVKA_SYNC_CONFIRMATION_SEGMENTS}"
+      allow_calibration_apply=0
+    fi
+    local segment_label="${phase} segment ${phase_index}/${phase_count} (overall ${segment}/${LESAVKA_SYNC_TOTAL_SEGMENTS})"
    local segment_dir="${ARTIFACT_DIR}/segment-${segment}"
    mkdir -p "${segment_dir}"
-    echo "==> mirrored calibration ${segment_label}"
+    printf 'segment_phase=%s\n' "${phase}" >"${segment_dir}/segment-phase.env"
+    echo "==> mirrored ${segment_label}"
    print_upstream_calibration_state "before ${segment_label}" "${segment_dir}/calibration-before.env"
    print_upstream_sync_state "before ${segment_label}" "${segment_dir}/planner-before.env"
    if run_browser_capture_with_real_driver "${segment_label}" "${segment_dir}" "${segment}"; then
-      maybe_apply_probe_calibration "${segment_dir}" "${segment_label}"
+      maybe_apply_probe_calibration "${segment_dir}" "${segment_label}" "${allow_calibration_apply}"
      print_upstream_sync_state "after ${segment_label}" "${segment_dir}/planner-after.env"
      print_upstream_calibration_state "after ${segment_label}" "${segment_dir}/calibration-after.env"
    else
@ -600,8 +706,8 @@ run_mirrored_segments() {
      print_upstream_calibration_state "after failed ${segment_label}" "${segment_dir}/calibration-after-failed.env"
      break
    fi
-    if (( segment < LESAVKA_SYNC_CALIBRATION_SEGMENTS )); then
-      echo "==> settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next calibration segment"
+    if (( segment < LESAVKA_SYNC_TOTAL_SEGMENTS )); then
+      echo "==> settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next segment"
      sleep "${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}"
    fi
  done
@ -610,7 +716,7 @@ run_mirrored_segments() {

 summarize_adaptive_probe_metrics() {
  echo "==> summarizing segmented probe metrics"
-  python3 - "${ARTIFACT_DIR}" "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" <<'PY'
+  python3 - "${ARTIFACT_DIR}" "${LESAVKA_SYNC_TOTAL_SEGMENTS}" "${LESAVKA_SYNC_CALIBRATION_SEGMENTS}" <<'PY'
 import csv
 import json
 import os
@ -619,6 +725,7 @@ from pathlib import Path

 root = Path(sys.argv[1])
 segment_count = int(sys.argv[2])
+calibration_segment_count = int(sys.argv[3])


 def read_env(path):
@ -688,9 +795,14 @@ for segment in range(1, segment_count + 1):
    calibration_before = read_env(segment_dir / "calibration-before.env")
    calibration_after = read_env(segment_dir / "calibration-after.env")
    decision = read_env(segment_dir / "calibration-decision.env")
+    phase = read_env(segment_dir / "segment-phase.env").get(
+        "segment_phase",
+        "calibration" if segment <= calibration_segment_count else "confirmation",
+    )

    row = {
        "segment": segment,
+        "segment_phase": phase,
        "report_json": str(report_path) if report_path else "",
        "analysis_failure_json": str(failure_path) if failure_path else "",
        "analysis_failure_reason": failure.get("reason", ""),
@ -708,6 +820,7 @@ for segment in range(1, segment_count + 1):
        "calibration_ready": bool(calibration.get("ready", False)),
        "calibration_note": calibration.get("note", ""),
        "decision_mode": decision.get("calibration_decision_mode", ""),
+        "decision_source": decision.get("calibration_decision_source", ""),
        "decision_note": decision.get("calibration_decision_note", ""),
        "decision_video_delta_us": as_float(decision.get("calibration_apply_video_delta_us")),
        "decision_audio_delta_us": as_float(decision.get("calibration_apply_audio_delta_us")),
@ -742,6 +855,8 @@ with jsonl_path.open("w", encoding="utf-8") as handle:
        handle.write(json.dumps(row, sort_keys=True) + "\n")

 good_rows = [row for row in rows if row.get("probe_passed")]
+confirmation_rows = [row for row in rows if row.get("segment_phase") == "confirmation"]
+passing_confirmation_rows = [row for row in confirmation_rows if row.get("probe_passed")]
 target_path = root / "blind-targets.json"
 if good_rows:
    target = {
@ -771,10 +886,70 @@ else:
    }
 target_path.write_text(json.dumps(target, indent=2, sort_keys=True) + "\n", encoding="utf-8")

+confirmation_path = root / "confirmation-summary.json"
+if confirmation_rows:
+    best_confirmation = min(
+        [
+            row for row in confirmation_rows
+            if isinstance(row.get("probe_p95_abs_skew_ms"), (int, float))
+        ],
+        key=lambda row: row["probe_p95_abs_skew_ms"],
+        default=None,
+    )
+    confirmation = {
+        "required": True,
+        "passed": bool(passing_confirmation_rows),
+        "confirmation_segments": [row["segment"] for row in confirmation_rows],
+        "passing_confirmation_segments": [row["segment"] for row in passing_confirmation_rows],
+        "best_confirmation_segment": best_confirmation["segment"] if best_confirmation else None,
+        "best_confirmation_status": best_confirmation["probe_status"] if best_confirmation else "missing",
+        "best_confirmation_p95_abs_skew_ms": best_confirmation["probe_p95_abs_skew_ms"] if best_confirmation else None,
+    }
+else:
+    confirmation = {
+        "required": False,
+        "passed": False,
+        "confirmation_segments": [],
+        "passing_confirmation_segments": [],
+    }
+confirmation_path.write_text(json.dumps(confirmation, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
 print(f"   ↪ segment_metrics_csv={csv_path}")
 print(f"   ↪ segment_metrics_jsonl={jsonl_path}")
 print(f"   ↪ blind_targets_json={target_path}")
 print(f"   ↪ blind_targets_ready={str(bool(target.get('ready'))).lower()}")
+print(f"   ↪ confirmation_summary_json={confirmation_path}")
+print(f"   ↪ confirmation_passed={str(bool(confirmation.get('passed'))).lower()}")
+PY
+}
+
+check_confirmation_result() {
+  if [[ "${LESAVKA_SYNC_REQUIRE_CONFIRMATION_PASS}" != "1" ]]; then
+    return 0
+  fi
+  local confirmation_json="${ARTIFACT_DIR}/confirmation-summary.json"
+  if [[ ! -f "${confirmation_json}" ]]; then
+    echo "==> confirmation check failed"
+    echo "   ↪ confirmation_summary_json=missing"
+    return 1
+  fi
+  python3 - "${confirmation_json}" <<'PY'
+import json
+import sys
+from pathlib import Path
+
+confirmation = json.loads(Path(sys.argv[1]).read_text(encoding="utf-8"))
+if confirmation.get("passed"):
+    print("==> confirmation check passed")
+    print(f"   ↪ passing_confirmation_segments={confirmation.get('passing_confirmation_segments', [])}")
+    sys.exit(0)
+
+print("==> confirmation check failed")
+print(f"   ↪ confirmation_segments={confirmation.get('confirmation_segments', [])}")
+print(f"   ↪ best_confirmation_segment={confirmation.get('best_confirmation_segment')}")
+print(f"   ↪ best_confirmation_status={confirmation.get('best_confirmation_status')}")
+print(f"   ↪ best_confirmation_p95_abs_skew_ms={confirmation.get('best_confirmation_p95_abs_skew_ms')}")
+sys.exit(1)
 PY
 }

@ -795,6 +970,9 @@ run_mirrored_segments || run_status=$?
 print_upstream_sync_state "after mirrored run" "${ARTIFACT_DIR}/planner-after.env"
 print_upstream_calibration_state "after mirrored run" "${ARTIFACT_DIR}/calibration-after.env"
 summarize_adaptive_probe_metrics
+if ! check_confirmation_result; then
+  run_status=1
+fi

 if ((run_status != 0)); then
  echo "==> mirrored probe failed"
--- a/server/Cargo.toml
+++ b/server/Cargo.toml
@ -10,7 +10,7 @@ bench                   = false

 [package]
 name                    = "lesavka_server"
-version                 = "0.17.20"
+version                 = "0.17.21"
 edition                 = "2024"
 autobins                = false

--- a/testing/tests/client_manual_sync_script_contract.rs
+++ b/testing/tests/client_manual_sync_script_contract.rs
@ -133,6 +133,13 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
        "LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS:-250}",
        "LESAVKA_SYNC_PROVISIONAL_GAIN=${LESAVKA_SYNC_PROVISIONAL_GAIN:-0.5}",
        "LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US=${LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US:-150000}",
+        "LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-${LESAVKA_SYNC_PROVISIONAL_CALIBRATION}}",
+        "LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS=${LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS:-350}",
+        "LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION=${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}",
+        "LESAVKA_SYNC_CONFIRMATION_SEGMENTS=${LESAVKA_SYNC_CONFIRMATION_SEGMENTS:-1}",
+        "LESAVKA_SYNC_REQUIRE_CONFIRMATION_PASS=${LESAVKA_SYNC_REQUIRE_CONFIRMATION_PASS:-${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION}}",
+        "LESAVKA_SYNC_CONFIRMATION_SEGMENTS must be a non-negative integer",
+        "LESAVKA_SYNC_TOTAL_SEGMENTS=$((LESAVKA_SYNC_CALIBRATION_SEGMENTS + LESAVKA_SYNC_CONFIRMATION_SEGMENTS))",
        "LESAVKA_SYNC_ADAPTIVE_CALIBRATION",
        "LESAVKA_SYNC_CALIBRATION_SEGMENTS=4",
        "browser_consumer_reuse_session=${reuse_browser_session}",
@ -142,13 +149,19 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
        "LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer",
        "run_mirrored_segments",
        "summarize_adaptive_probe_metrics",
-        "for segment in $(seq 1 \"${LESAVKA_SYNC_CALIBRATION_SEGMENTS}\")",
+        "for segment in $(seq 1 \"${LESAVKA_SYNC_TOTAL_SEGMENTS}\")",
+        "segment_phase",
+        "confirmation segment: calibration apply disabled so this segment tests the active calibration",
        "segment-${segment}",
        "calibration-before.env",
        "planner-before.env",
        "calibration-decision.env",
        "segment-metrics.csv",
        "segment-metrics.jsonl",
+        "confirmation-summary.json",
+        "confirmation_passed",
+        "check_confirmation_result",
+        "confirmation check failed",
        "analysis_failure_reason",
        "probe_activity_start_delta_ms",
        "blind-targets.json",
@ -157,12 +170,14 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
        "decision_provisional_video_recommendation_us",
        "planner_live_lag_ms_after",
        "probe_p95_abs_skew_ms",
-        "settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next calibration segment",
+        "settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next segment",
        "print_upstream_calibration_state \"before mirrored run\"",
        "maybe_apply_probe_calibration",
        "calibration_ready=${calibration_ready}",
        "calibration_decision_mode=${calibration_decision_mode}",
        "bounded provisional correction from median skew",
+        "bounded provisional correction from analyzer-failure raw activity",
+        "raw_failure_calibration_enabled",
        "provisional calibration not saved",
        "calibration apply refused: ${calibration_decision_note}",
        "calibrate \"${calibration_apply_audio_delta_us}\" \"${calibration_apply_video_delta_us}\"",