fix: refuse raw-only probe calibration by default
This commit is contained in:
parent
0188c8661b
commit
5634e7197d
30
AGENTS.md
30
AGENTS.md
@ -551,3 +551,33 @@ stayed empty, and client timing skew included a false cross-pipeline PTS offset.
|
||||
- [x] Add tests proving sink handoff survives large offset-compensated local PTS gaps.
|
||||
- [x] Add tests proving audio/video timing metadata no longer copies packet PTS domains into blind sidecar fields.
|
||||
- [ ] Next mirrored run should show non-zero `planner_sink_handoff_window_samples` and much smaller client send/capture p95 skew before trusting blind healing.
|
||||
|
||||
## 0.17.29 Enqueue-Bound Client Timing Checklist
|
||||
|
||||
Context: the first blind-healing runs showed huge client capture/send skew even though media packets
|
||||
were latest-only. The sidecar timestamps were being written in async sender tasks after queueing, so
|
||||
parallel scheduling delay leaked into the diagnostic clock and made blind healing distrust the wrong
|
||||
layer.
|
||||
|
||||
- [x] Stamp client timing metadata at the capture/enqueue boundary instead of the async gRPC send boundary.
|
||||
- [x] Keep async sender updates limited to queue depth and queue age so scheduling delay stays observable but does not rewrite capture/send time.
|
||||
- [x] Pair server-side client timing samples by nearby enqueue/send time before reporting rolling skew windows.
|
||||
- [x] Add regression tests proving queue delay no longer changes capture/send timestamps.
|
||||
- [x] Push clean semver `0.17.29` for installed client/server testing.
|
||||
- [x] Use the next mirrored run to confirm client capture/send p95 drops from seconds to single-digit milliseconds.
|
||||
|
||||
## 0.17.30 Raw-Failure Calibration Safety Checklist
|
||||
|
||||
Context: the 0.17.29 mirrored run confirmed the client-side scheduling leak is fixed, but the probe
|
||||
then applied large opposite calibration nudges from analyzer failures with zero or one coded pair.
|
||||
Raw activity deltas are useful diagnostic breadcrumbs; they are not safe steering evidence when coded
|
||||
pairing collapses.
|
||||
|
||||
- [x] Treat the 0.17.29 run as proof that client sidecar timing is now trustworthy enough to move the investigation downstream.
|
||||
- [x] Default raw analyzer-failure calibration to off instead of inheriting provisional calibration.
|
||||
- [x] Add `LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS` so even explicit raw-failure calibration refuses weak coded evidence.
|
||||
- [x] Print the raw-failure pair floor in calibration decisions and segment artifacts.
|
||||
- [x] Prefer server-side receive/sink blockers over probe-pairing blockers when root-cause evidence is available.
|
||||
- [x] Update manual probe contract coverage for the safer defaults and refusal reason.
|
||||
- [ ] Re-run the probe-calibrate-confirm flow; analyzer failures should diagnose but not mutate calibration unless raw fallback is explicitly enabled and has enough coded support.
|
||||
- [ ] If client send/capture p95 stays low and server receive p95 stays high, localize the transport/server-receive timing layer next.
|
||||
|
||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
|
||||
|
||||
[[package]]
|
||||
name = "lesavka_client"
|
||||
version = "0.17.29"
|
||||
version = "0.17.30"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
@ -1686,7 +1686,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lesavka_common"
|
||||
version = "0.17.29"
|
||||
version = "0.17.30"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
@ -1698,7 +1698,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lesavka_server"
|
||||
version = "0.17.29"
|
||||
version = "0.17.30"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
|
||||
@ -4,7 +4,7 @@ path = "src/main.rs"
|
||||
|
||||
[package]
|
||||
name = "lesavka_client"
|
||||
version = "0.17.29"
|
||||
version = "0.17.30"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lesavka_common"
|
||||
version = "0.17.29"
|
||||
version = "0.17.30"
|
||||
edition = "2024"
|
||||
build = "build.rs"
|
||||
|
||||
|
||||
@ -39,7 +39,8 @@ LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS:-350}
|
||||
LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS:-250}
|
||||
LESAVKA_SYNC_PROVISIONAL_GAIN=${LESAVKA_SYNC_PROVISIONAL_GAIN:-0.5}
|
||||
LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US=${LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US:-150000}
|
||||
LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-${LESAVKA_SYNC_PROVISIONAL_CALIBRATION}}
|
||||
LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-0}
|
||||
LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS=${LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS:-3}
|
||||
LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS=${LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS:-350}
|
||||
LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION=${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}
|
||||
LESAVKA_SYNC_CONFIRMATION_SEGMENTS=${LESAVKA_SYNC_CONFIRMATION_SEGMENTS:-1}
|
||||
@ -88,6 +89,7 @@ export LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS
|
||||
export LESAVKA_SYNC_PROVISIONAL_GAIN
|
||||
export LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US
|
||||
export LESAVKA_SYNC_RAW_FAILURE_CALIBRATION
|
||||
export LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS
|
||||
export LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS
|
||||
export LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION
|
||||
export LESAVKA_SYNC_CONFIRMATION_SEGMENTS
|
||||
@ -400,6 +402,7 @@ provisional_max_drift_ms = env_float("LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS", 25
|
||||
provisional_gain = env_float("LESAVKA_SYNC_PROVISIONAL_GAIN", 0.5)
|
||||
provisional_max_step_us = env_int("LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US", 150000)
|
||||
raw_failure_enabled = env_bool("LESAVKA_SYNC_RAW_FAILURE_CALIBRATION", False)
|
||||
raw_failure_min_pairs = env_int("LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS", 3)
|
||||
raw_failure_max_abs_delta_ms = env_float("LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS", 350.0)
|
||||
|
||||
ready_audio_recommendation = int(cal.get("recommended_audio_offset_adjust_us") or 0)
|
||||
@ -432,6 +435,11 @@ decision_note = "analyzer marked this report calibration-ready" if ready else "a
|
||||
if not report:
|
||||
if not raw_failure_enabled:
|
||||
decision_note = "raw analyzer-failure calibration disabled"
|
||||
elif paired_pulses < raw_failure_min_pairs:
|
||||
decision_note = (
|
||||
"raw analyzer-failure calibration refused: "
|
||||
f"paired_pulses {paired_pulses} < {raw_failure_min_pairs}"
|
||||
)
|
||||
elif raw_activity_delta_ms is None:
|
||||
decision_note = "raw analyzer-failure calibration refused: no raw activity delta was reported"
|
||||
elif abs(raw_activity_delta_ms) > raw_failure_max_abs_delta_ms:
|
||||
@ -509,6 +517,7 @@ fields = {
|
||||
"provisional_gain": f"{provisional_gain:.3f}",
|
||||
"provisional_max_step_us": provisional_max_step_us,
|
||||
"raw_failure_calibration_enabled": str(raw_failure_enabled).lower(),
|
||||
"raw_failure_min_pairs": raw_failure_min_pairs,
|
||||
"raw_failure_max_abs_delta_ms": f"{raw_failure_max_abs_delta_ms:.1f}",
|
||||
"raw_activity_delta_ms": "" if raw_activity_delta_ms is None else f"{raw_activity_delta_ms:+.1f}",
|
||||
"analysis_failure_reason": failure_reason,
|
||||
@ -552,6 +561,7 @@ PY
|
||||
echo " ↪ provisional_gain=${provisional_gain}"
|
||||
echo " ↪ provisional_max_step_us=${provisional_max_step_us}"
|
||||
echo " ↪ raw_failure_calibration_enabled=${raw_failure_calibration_enabled}"
|
||||
echo " ↪ raw_failure_min_pairs=${raw_failure_min_pairs}"
|
||||
echo " ↪ raw_failure_max_abs_delta_ms=${raw_failure_max_abs_delta_ms}"
|
||||
[[ -n "${raw_activity_delta_ms}" ]] && echo " ↪ raw_activity_delta_ms=${raw_activity_delta_ms}"
|
||||
[[ -n "${analysis_failure_reason}" ]] && echo " ↪ analysis_failure_reason=${analysis_failure_reason}"
|
||||
@ -905,7 +915,7 @@ def diagnose_segment(row):
|
||||
"blocker",
|
||||
"server_receive_skew_p95_high",
|
||||
"Audio/video timing becomes unstable between client send and server receive.",
|
||||
"Treat this as network/gRPC receive jitter; heal freshness with drop/reanchor policy, not static calibration.",
|
||||
"Treat this as transport/server receive jitter; heal freshness with drop/reanchor policy, not static calibration.",
|
||||
)
|
||||
if over(row, "planner_camera_sink_late_p95_ms_after", 120) or over(row, "planner_microphone_sink_late_p95_ms_after", 120):
|
||||
add_finding(
|
||||
@ -981,7 +991,26 @@ def diagnose_segment(row):
|
||||
|
||||
def primary_finding(findings):
|
||||
severity_rank = {"blocker": 0, "warning": 1, "info": 2}
|
||||
return sorted(findings, key=lambda item: severity_rank.get(item.get("severity"), 9))[0]
|
||||
layer_rank = {
|
||||
"client_uplink": 0,
|
||||
"network_receive": 1,
|
||||
"server_sink_scheduler": 2,
|
||||
"server_sink_handoff": 3,
|
||||
"server_evidence": 4,
|
||||
"server_calibration": 5,
|
||||
"external_boundary": 6,
|
||||
"probe_video": 7,
|
||||
"probe_pairing": 8,
|
||||
"unknown": 9,
|
||||
"none": 10,
|
||||
}
|
||||
return sorted(
|
||||
findings,
|
||||
key=lambda item: (
|
||||
severity_rank.get(item.get("severity"), 9),
|
||||
layer_rank.get(item.get("layer"), 9),
|
||||
),
|
||||
)[0]
|
||||
|
||||
|
||||
rows = []
|
||||
|
||||
@ -10,7 +10,7 @@ bench = false
|
||||
|
||||
[package]
|
||||
name = "lesavka_server"
|
||||
version = "0.17.29"
|
||||
version = "0.17.30"
|
||||
edition = "2024"
|
||||
autobins = false
|
||||
|
||||
|
||||
@ -140,7 +140,8 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
|
||||
"LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_DRIFT_MS:-250}",
|
||||
"LESAVKA_SYNC_PROVISIONAL_GAIN=${LESAVKA_SYNC_PROVISIONAL_GAIN:-0.5}",
|
||||
"LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US=${LESAVKA_SYNC_PROVISIONAL_MAX_STEP_US:-150000}",
|
||||
"LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-${LESAVKA_SYNC_PROVISIONAL_CALIBRATION}}",
|
||||
"LESAVKA_SYNC_RAW_FAILURE_CALIBRATION=${LESAVKA_SYNC_RAW_FAILURE_CALIBRATION:-0}",
|
||||
"LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS=${LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS:-3}",
|
||||
"LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS=${LESAVKA_SYNC_RAW_FAILURE_MAX_ABS_DELTA_MS:-350}",
|
||||
"LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION=${LESAVKA_SYNC_CONFIRM_AFTER_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}",
|
||||
"LESAVKA_SYNC_CONFIRMATION_SEGMENTS=${LESAVKA_SYNC_CONFIRMATION_SEGMENTS:-1}",
|
||||
@ -149,6 +150,7 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
|
||||
"LESAVKA_SYNC_TOTAL_SEGMENTS=$((LESAVKA_SYNC_CALIBRATION_SEGMENTS + LESAVKA_SYNC_CONFIRMATION_SEGMENTS))",
|
||||
"export LESAVKA_SYNC_PROVISIONAL_CALIBRATION",
|
||||
"export LESAVKA_SYNC_RAW_FAILURE_CALIBRATION",
|
||||
"export LESAVKA_SYNC_RAW_FAILURE_MIN_PAIRS",
|
||||
"LESAVKA_SYNC_ADAPTIVE_CALIBRATION",
|
||||
"LESAVKA_SYNC_CALIBRATION_SEGMENTS=4",
|
||||
"browser_consumer_reuse_session=${reuse_browser_session}",
|
||||
@ -184,6 +186,7 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
|
||||
"decision_provisional_video_recommendation_us",
|
||||
"planner_live_lag_ms_after",
|
||||
"probe_p95_abs_skew_ms",
|
||||
"transport/server receive jitter",
|
||||
"settling ${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS}s before next segment",
|
||||
"print_upstream_calibration_state \"before mirrored run\"",
|
||||
"maybe_apply_probe_calibration",
|
||||
@ -192,6 +195,8 @@ fn mirrored_sync_script_uses_real_client_capture_path() {
|
||||
"bounded provisional correction from median skew",
|
||||
"bounded provisional correction from analyzer-failure raw activity",
|
||||
"raw_failure_calibration_enabled",
|
||||
"raw analyzer-failure calibration refused: ",
|
||||
"raw_failure_min_pairs",
|
||||
"provisional calibration not saved",
|
||||
"calibration apply refused: ${calibration_decision_note}",
|
||||
"calibrate \"${calibration_apply_audio_delta_us}\" \"${calibration_apply_video_delta_us}\"",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user