fix: refuse raw-only probe calibration by default

This commit is contained in:
Brad Stein 2026-05-02 20:05:24 -03:00
parent 0188c8661b
commit 5634e7197d
7 changed files with 74 additions and 10 deletions

View File

@ -551,3 +551,33 @@ stayed empty, and client timing skew included a false cross-pipeline PTS offset.
- [x] Add tests proving sink handoff survives large offset-compensated local PTS gaps. - [x] Add tests proving sink handoff survives large offset-compensated local PTS gaps.
- [x] Add tests proving audio/video timing metadata no longer copies packet PTS domains into blind sidecar fields. - [x] Add tests proving audio/video timing metadata no longer copies packet PTS domains into blind sidecar fields.
- [ ] Next mirrored run should show non-zero `planner_sink_handoff_window_samples` and much smaller client send/capture p95 skew before trusting blind healing. - [ ] Next mirrored run should show non-zero `planner_sink_handoff_window_samples` and much smaller client send/capture p95 skew before trusting blind healing.
## 0.17.29 Enqueue-Bound Client Timing Checklist
Context: the first blind-healing runs showed huge client capture/send skew even though media packets
were latest-only. The sidecar timestamps were being written in async sender tasks after queueing, so
parallel scheduling delay leaked into the diagnostic clock and made blind healing distrust the wrong
layer.
- [x] Stamp client timing metadata at the capture/enqueue boundary instead of the async gRPC send boundary.
- [x] Keep async sender updates limited to queue depth and queue age so scheduling delay stays observable but does not rewrite capture/send time.
- [x] Pair server-side client timing samples by nearby enqueue/send time before reporting rolling skew windows.
- [x] Add regression tests proving queue delay no longer changes capture/send timestamps.
- [x] Push clean semver `0.17.29` for installed client/server testing.
- [x] Use the next mirrored run to confirm client capture/send p95 drops from seconds to single-digit milliseconds.
## 0.17.30 Raw-Failure Calibration Safety Checklist
Context: the 0.17.29 mirrored run confirmed the client-side scheduling leak is fixed, but the probe
then applied large opposite calibration nudges from analyzer failures with zero or one coded pair.
Raw activity deltas are useful diagnostic breadcrumbs; they are not safe steering evidence when coded
pairing collapses.
- [x] Treat the 0.17.29 run as proof that client sidecar timing is now trustworthy enough to move the investigation downstream.
- [x] Default raw analyzer-failure calibration to off instead of inheriting provisional calibration.
- [x] Add `LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS` so even explicit raw-failure calibration refuses weak coded evidence.
- [x] Print the raw-failure pair floor in calibration decisions and segment artifacts.
- [x] Prefer server-side receive/sink blockers over probe-pairing blockers when root-cause evidence is available.
- [x] Update manual probe contract coverage for the safer defaults and refusal reason.
- [ ] Re-run the probe-calibrate-confirm flow; analyzer failures should diagnose but not mutate calibration unless raw fallback is explicitly enabled and has enough coded support.
- [ ] If client send/capture p95 stays low and server receive p95 stays high, localize the transport/server-receive timing layer next.

6
Cargo.lock generated
View File

@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
[[package]] [[package]]
name = "lesavka_client" name = "lesavka_client"
version = "0.17.29" version = "0.17.30"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-stream", "async-stream",
@ -1686,7 +1686,7 @@ dependencies = [
[[package]] [[package]]
name = "lesavka_common" name = "lesavka_common"
version = "0.17.29" version = "0.17.30"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"base64", "base64",
@ -1698,7 +1698,7 @@ dependencies = [
[[package]] [[package]]
name = "lesavka_server" name = "lesavka_server"
version = "0.17.29" version = "0.17.30"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"base64", "base64",

View File

@ -4,7 +4,7 @@ path = "src/main.rs"
[package] [package]
name = "lesavka_client" name = "lesavka_client"
version = "0.17.29" version = "0.17.30"
edition = "2024" edition = "2024"
[dependencies] [dependencies]

View File

@ -1,6 +1,6 @@
[package] [package]
name = "lesavka_common" name = "lesavka_common"
version = "0.17.29" version = "0.17.30"
edition = "2024" edition = "2024"
build = "build.rs" build = "build.rs"

View File

@ -39,7 +39,8 @@ LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS:-350}
LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS:-250} LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS:-250}
LESAVKA_SYNC_PROVISIONAL_GAIN=${LESAVKA_SYNC_PROVISIONAL_GAIN:-0.5} LESAVKA_SYNC_PROVISIONAL_GAIN=${LESAVKA_SYNC_PROVISIONAL_GAIN:-0.5}
LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US=${LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US:-150000} LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US=${LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US:-150000}
LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-${LESAVKA_SYNC_PROVISIONAL_CALIBRATION}} LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-0}
LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS=${LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS:-3}
LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS=${LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS:-350} LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS=${LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS:-350}
LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION=${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}} LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION=${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}
LESAVKA_SYNC_CONFIRMATION_SEGMENTS=${LESAVKA_SYNC_CONFIRMATION_SEGMENTS:-1} LESAVKA_SYNC_CONFIRMATION_SEGMENTS=${LESAVKA_SYNC_CONFIRMATION_SEGMENTS:-1}
@ -88,6 +89,7 @@ export LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS
export LESAVKA_SYNC_PROVISIONAL_GAIN export LESAVKA_SYNC_PROVISIONAL_GAIN
export LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US export LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US
export LESAVKA_SYNC_RAW_FAILURE_CALIBRATION export LESAVKA_SYNC_RAW_FAILURE_CALIBRATION
export LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS
export LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS export LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS
export LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION export LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION
export LESAVKA_SYNC_CONFIRMATION_SEGMENTS export LESAVKA_SYNC_CONFIRMATION_SEGMENTS
@ -400,6 +402,7 @@ provisional_max_drift_ms = env_float("LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS", 25
provisional_gain = env_float("LESAVKA_SYNC_PROVISIONAL_GAIN", 0.5) provisional_gain = env_float("LESAVKA_SYNC_PROVISIONAL_GAIN", 0.5)
provisional_max_step_us = env_int("LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US", 150000) provisional_max_step_us = env_int("LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US", 150000)
raw_failure_enabled = env_bool("LESAVKA_SYNC_RAW_FAILURE_CALIBRATION", False) raw_failure_enabled = env_bool("LESAVKA_SYNC_RAW_FAILURE_CALIBRATION", False)
raw_failure_min_pairs = env_int("LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS", 3)
raw_failure_max_abs_delta_ms = env_float("LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS", 350.0) raw_failure_max_abs_delta_ms = env_float("LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS", 350.0)
ready_audio_recommendation = int(cal.get("recommended_audio_offset_adjust_us") or 0) ready_audio_recommendation = int(cal.get("recommended_audio_offset_adjust_us") or 0)
@ -432,6 +435,11 @@ decision_note = "analyzer marked this report calibration-ready" if ready else "a
if not report: if not report:
if not raw_failure_enabled: if not raw_failure_enabled:
decision_note = "raw analyzer-failure calibration disabled" decision_note = "raw analyzer-failure calibration disabled"
elif paired_pulses < raw_failure_min_pairs:
decision_note = (
"raw analyzer-failure calibration refused: "
f"paired_pulses {paired_pulses} < {raw_failure_min_pairs}"
)
elif raw_activity_delta_ms is None: elif raw_activity_delta_ms is None:
decision_note = "raw analyzer-failure calibration refused: no raw activity delta was reported" decision_note = "raw analyzer-failure calibration refused: no raw activity delta was reported"
elif abs(raw_activity_delta_ms) > raw_failure_max_abs_delta_ms: elif abs(raw_activity_delta_ms) > raw_failure_max_abs_delta_ms:
@ -509,6 +517,7 @@ fields = {
"provisional_gain": f"{provisional_gain:.3f}", "provisional_gain": f"{provisional_gain:.3f}",
"provisional_max_step_us": provisional_max_step_us, "provisional_max_step_us": provisional_max_step_us,
"raw_failure_calibration_enabled": str(raw_failure_enabled).lower(), "raw_failure_calibration_enabled": str(raw_failure_enabled).lower(),
"raw_failure_min_pairs": raw_failure_min_pairs,
"raw_failure_max_abs_delta_ms": f"{raw_failure_max_abs_delta_ms:.1f}", "raw_failure_max_abs_delta_ms": f"{raw_failure_max_abs_delta_ms:.1f}",
"raw_activity_delta_ms": "" if raw_activity_delta_ms is None else f"{raw_activity_delta_ms:+.1f}", "raw_activity_delta_ms": "" if raw_activity_delta_ms is None else f"{raw_activity_delta_ms:+.1f}",
"analysis_failure_reason": failure_reason, "analysis_failure_reason": failure_reason,
@ -552,6 +561,7 @@ PY
echo " ↪ provisional_gain=${provisional_gain}" echo " ↪ provisional_gain=${provisional_gain}"
echo " ↪ provisional_max_step_us=${provisional_max_step_us}" echo " ↪ provisional_max_step_us=${provisional_max_step_us}"
echo " ↪ raw_failure_calibration_enabled=${raw_failure_calibration_enabled}" echo " ↪ raw_failure_calibration_enabled=${raw_failure_calibration_enabled}"
echo " ↪ raw_failure_min_pairs=${raw_failure_min_pairs}"
echo " ↪ raw_failure_max_abs_delta_ms=${raw_failure_max_abs_delta_ms}" echo " ↪ raw_failure_max_abs_delta_ms=${raw_failure_max_abs_delta_ms}"
[[ -n "${raw_activity_delta_ms}" ]] && echo " ↪ raw_activity_delta_ms=${raw_activity_delta_ms}" [[ -n "${raw_activity_delta_ms}" ]] && echo " ↪ raw_activity_delta_ms=${raw_activity_delta_ms}"
[[ -n "${analysis_failure_reason}" ]] && echo " ↪ analysis_failure_reason=${analysis_failure_reason}" [[ -n "${analysis_failure_reason}" ]] && echo " ↪ analysis_failure_reason=${analysis_failure_reason}"
@ -905,7 +915,7 @@ def diagnose_segment(row):
"blocker", "blocker",
"server_receive_skew_p95_high", "server_receive_skew_p95_high",
"Audio/video timing becomes unstable between client send and server receive.", "Audio/video timing becomes unstable between client send and server receive.",
"Treat this as network/gRPC receive jitter; heal freshness with drop/reanchor policy, not static calibration.", "Treat this as transport/server receive jitter; heal freshness with drop/reanchor policy, not static calibration.",
) )
if over(row, "planner_camera_sink_late_p95_ms_after", 120) or over(row, "planner_microphone_sink_late_p95_ms_after", 120): if over(row, "planner_camera_sink_late_p95_ms_after", 120) or over(row, "planner_microphone_sink_late_p95_ms_after", 120):
add_finding( add_finding(
@ -981,7 +991,26 @@ def diagnose_segment(row):
def primary_finding(findings): def primary_finding(findings):
severity_rank = {"blocker": 0, "warning": 1, "info": 2} severity_rank = {"blocker": 0, "warning": 1, "info": 2}
return sorted(findings, key=lambda item: severity_rank.get(item.get("severity"), 9))[0] layer_rank = {
"client_uplink": 0,
"network_receive": 1,
"server_sink_scheduler": 2,
"server_sink_handoff": 3,
"server_evidence": 4,
"server_calibration": 5,
"external_boundary": 6,
"probe_video": 7,
"probe_pairing": 8,
"unknown": 9,
"none": 10,
}
return sorted(
findings,
key=lambda item: (
severity_rank.get(item.get("severity"), 9),
layer_rank.get(item.get("layer"), 9),
),
)[0]
rows = [] rows = []

View File

@ -10,7 +10,7 @@ bench = false
[package] [package]
name = "lesavka_server" name = "lesavka_server"
version = "0.17.29" version = "0.17.30"
edition = "2024" edition = "2024"
autobins = false autobins = false

View File

@ -140,7 +140,8 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
"LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS:-250}", "LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS:-250}",
"LESAVKA_SYNC_PROVISIONAL_GAIN=${LESAVKA_SYNC_PROVISIONAL_GAIN:-0.5}", "LESAVKA_SYNC_PROVISIONAL_GAIN=${LESAVKA_SYNC_PROVISIONAL_GAIN:-0.5}",
"LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US=${LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US:-150000}", "LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US=${LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US:-150000}",
"LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-${LESAVKA_SYNC_PROVISIONAL_CALIBRATION}}", "LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-0}",
"LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS=${LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS:-3}",
"LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS=${LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS:-350}", "LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS=${LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS:-350}",
"LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION=${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}", "LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION=${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}",
"LESAVKA_SYNC_CONFIRMATION_SEGMENTS=${LESAVKA_SYNC_CONFIRMATION_SEGMENTS:-1}", "LESAVKA_SYNC_CONFIRMATION_SEGMENTS=${LESAVKA_SYNC_CONFIRMATION_SEGMENTS:-1}",
@ -149,6 +150,7 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
"LESAVKA_SYNC_TOTAL_SEGMENTS=$((LESAVKA_SYNC_CALIBRATION_SEGMENTS + LESAVKA_SYNC_CONFIRMATION_SEGMENTS))", "LESAVKA_SYNC_TOTAL_SEGMENTS=$((LESAVKA_SYNC_CALIBRATION_SEGMENTS + LESAVKA_SYNC_CONFIRMATION_SEGMENTS))",
"export LESAVKA_SYNC_PROVISIONAL_CALIBRATION", "export LESAVKA_SYNC_PROVISIONAL_CALIBRATION",
"export LESAVKA_SYNC_RAW_FAILURE_CALIBRATION", "export LESAVKA_SYNC_RAW_FAILURE_CALIBRATION",
"export LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS",
"LESAVKA_SYNC_ADAPTIVE_CALIBRATION", "LESAVKA_SYNC_ADAPTIVE_CALIBRATION",
"LESAVKA_SYNC_CALIBRATION_SEGMENTS=4", "LESAVKA_SYNC_CALIBRATION_SEGMENTS=4",
"browser_consumer_reuse_session=${reuse_browser_session}", "browser_consumer_reuse_session=${reuse_browser_session}",
@ -184,6 +186,7 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
"decision_provisional_video_recommendation_us", "decision_provisional_video_recommendation_us",
"planner_live_lag_ms_after", "planner_live_lag_ms_after",
"probe_p95_abs_skew_ms", "probe_p95_abs_skew_ms",
"transport/server receive jitter",
"settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next segment", "settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next segment",
"print_upstream_calibration_state \"before mirrored run\"", "print_upstream_calibration_state \"before mirrored run\"",
"maybe_apply_probe_calibration", "maybe_apply_probe_calibration",
@ -192,6 +195,8 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
"bounded provisional correction from median skew", "bounded provisional correction from median skew",
"bounded provisional correction from analyzer-failure raw activity", "bounded provisional correction from analyzer-failure raw activity",
"raw_failure_calibration_enabled", "raw_failure_calibration_enabled",
"raw analyzer-failure calibration refused: ",
"raw_failure_min_pairs",
"provisional calibration not saved", "provisional calibration not saved",
"calibration apply refused: ${calibration_decision_note}", "calibration apply refused: ${calibration_decision_note}",
"calibrate \"${calibration_apply_audio_delta_us}\" \"${calibration_apply_video_delta_us}\"", "calibrate \"${calibration_apply_audio_delta_us}\" \"${calibration_apply_video_delta_us}\"",