From c82c61c652bbf1e107d4ec39f7949c086187d20b Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 2 May 2026 16:32:03 -0300 Subject: [PATCH] test: harden mirrored probe audio detection --- AGENTS.md | 17 +++++++++++ Cargo.lock | 6 ++-- client/Cargo.toml | 2 +- .../src/sync_probe/analyze/onset_detection.rs | 29 ++++++++++++++---- .../analyze/onset_detection/tests.rs | 30 +++++++++++++++++++ common/Cargo.toml | 2 +- scripts/manual/local_av_stimulus.py | 6 +++- .../manual/run_upstream_mirrored_av_sync.sh | 2 ++ server/Cargo.toml | 2 +- .../client_manual_sync_script_contract.rs | 4 +++ 10 files changed, 88 insertions(+), 12 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 7f9e7ae..d081cb4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -462,3 +462,20 @@ variables via `os.environ`. - [x] Add contract coverage so provisional calibration defaults cannot silently stop reaching the Python decision helper. - [x] Run shell syntax checks, focused contract tests, and package checks. - [x] Push clean semver `0.17.22` for installed client/server testing. + +## 0.17.23 Audio Probe Robustness Checklist + +Context: recent mirrored runs consistently detected more video events than audio events. That can +represent a real audio path problem, but the probe should not under-count audio just because the +room/speaker/mic path is quieter or mildly chopped. Harden the test tooling before interpreting +low paired-pulse counts as product failure. + +- [x] Raise the default local stimulus tone level and expose it as `PROBE_AUDIO_GAIN`. +- [x] Pass the configured audio gain into the local stimulus browser page. +- [x] Lower the analyzer audio peak floor so faint but valid probe tones are accepted. +- [x] Smooth the audio envelope before thresholding so single-window dips do not erase pulses. +- [x] Merge longer internal tone dropouts inside one pulse without merging adjacent 1s pulses. +- [x] Add analyzer tests for faint tones and longer within-pulse audio dropouts. +- [x] Update manual probe contract coverage for the audio-gain control. +- [x] Run focused analyzer/manual-probe tests and package checks. +- [x] Push clean semver `0.17.23` for installed client/server testing. diff --git a/Cargo.lock b/Cargo.lock index 5195c52..6d0076e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "lesavka_client" -version = "0.17.22" +version = "0.17.23" dependencies = [ "anyhow", "async-stream", @@ -1686,7 +1686,7 @@ dependencies = [ [[package]] name = "lesavka_common" -version = "0.17.22" +version = "0.17.23" dependencies = [ "anyhow", "base64", @@ -1698,7 +1698,7 @@ dependencies = [ [[package]] name = "lesavka_server" -version = "0.17.22" +version = "0.17.23" dependencies = [ "anyhow", "base64", diff --git a/client/Cargo.toml b/client/Cargo.toml index 18d2271..dc4ca0d 100644 --- a/client/Cargo.toml +++ b/client/Cargo.toml @@ -4,7 +4,7 @@ path = "src/main.rs" [package] name = "lesavka_client" -version = "0.17.22" +version = "0.17.23" edition = "2024" [dependencies] diff --git a/client/src/sync_probe/analyze/onset_detection.rs b/client/src/sync_probe/analyze/onset_detection.rs index 2daa81d..5a08b4f 100644 --- a/client/src/sync_probe/analyze/onset_detection.rs +++ b/client/src/sync_probe/analyze/onset_detection.rs @@ -16,7 +16,10 @@ const MAX_VIDEO_FLICKER_SEGMENT_FRAME_MULTIPLIER: f64 = 1.5; const MIN_COLOR_PULSE_SATURATION: u8 = 36; const MIN_COLOR_PULSE_VALUE: u8 = 70; const MAX_COLOR_DISTANCE_SQUARED: u32 = 24_000; -const MAX_AUDIO_PULSE_INTERNAL_GAP_S: f64 = 0.09; +const MAX_AUDIO_PULSE_INTERNAL_GAP_S: f64 = 0.16; +const MIN_AUDIO_PROBE_PEAK: f64 = 25.0; +const AUDIO_ENVELOPE_THRESHOLD_FRACTION: f64 = 0.30; +const AUDIO_SAMPLE_THRESHOLD_FRACTION: f64 = 0.22; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(super) struct VideoColorFrame { @@ -331,7 +334,7 @@ pub(crate) fn detect_audio_segments( } let window_samples = ((sample_rate_hz as usize * window_ms as usize) / 1000).max(1); - let envelope = samples + let raw_envelope = samples .chunks(window_samples) .map(|chunk| { let total: u64 = chunk @@ -341,19 +344,21 @@ pub(crate) fn detect_audio_segments( total as f64 / chunk.len() as f64 }) .collect::>(); + let envelope = smooth_envelope(&raw_envelope); let peak = envelope.iter().copied().fold(0.0_f64, f64::max); - if peak < 50.0 { + if peak < MIN_AUDIO_PROBE_PEAK { bail!("audio probe peaks are too quiet to detect sync pulses"); } let baseline = median(envelope.clone()); - let threshold = baseline + ((peak - baseline) * 0.45); + let threshold = baseline + ((peak - baseline) * AUDIO_ENVELOPE_THRESHOLD_FRACTION); let sample_abs = samples .iter() .map(|sample| i32::from(*sample).unsigned_abs() as f64) .collect::>(); let sample_peak = sample_abs.iter().copied().fold(0.0_f64, f64::max); let sample_baseline = median(sample_abs.clone()); - let sample_threshold = sample_baseline + ((sample_peak - sample_baseline) * 0.35); + let sample_threshold = + sample_baseline + ((sample_peak - sample_baseline) * AUDIO_SAMPLE_THRESHOLD_FRACTION); let mut segments = Vec::new(); let mut previous_active = false; let mut segment_start = 0usize; @@ -388,6 +393,20 @@ pub(crate) fn detect_audio_segments( Ok(merge_nearby_audio_segments(segments)) } +fn smooth_envelope(envelope: &[f64]) -> Vec { + if envelope.len() < 3 { + return envelope.to_vec(); + } + + (0..envelope.len()) + .map(|index| { + let start = index.saturating_sub(1); + let end = (index + 2).min(envelope.len()); + envelope[start..end].iter().sum::() / (end - start) as f64 + }) + .collect() +} + pub(super) fn edge_midpoint(previous_s: f64, current_s: f64) -> f64 { previous_s + ((current_s - previous_s) / 2.0) } diff --git a/client/src/sync_probe/analyze/onset_detection/tests.rs b/client/src/sync_probe/analyze/onset_detection/tests.rs index 5c1e0ae..55b108b 100644 --- a/client/src/sync_probe/analyze/onset_detection/tests.rs +++ b/client/src/sync_probe/analyze/onset_detection/tests.rs @@ -147,6 +147,36 @@ fn detect_audio_segments_merges_short_internal_dropouts_inside_one_pulse() { assert!(segments[0].duration_s > 0.11); } +#[test] +fn detect_audio_segments_accepts_faint_probe_tones() { + let mut samples = vec![0i16; 48_000]; + for start in [4_800usize, 24_000] { + for sample in samples.iter_mut().skip(start).take(5_760) { + *sample = 40; + } + } + + let segments = detect_audio_segments(&samples, 48_000, 5).expect("faint audio segments"); + assert_eq!(segments.len(), 2); + assert!((segments[0].start_s - 0.1).abs() < 0.01); + assert!((segments[1].start_s - 0.5).abs() < 0.01); +} + +#[test] +fn detect_audio_segments_merges_longer_probe_dropouts_inside_one_pulse() { + let mut samples = vec![0i16; 48_000]; + for sample in samples.iter_mut().skip(4_800).take(12_000) { + *sample = 1_200; + } + for sample in samples.iter_mut().skip(7_200).take(5_760) { + *sample = 0; + } + + let segments = detect_audio_segments(&samples, 48_000, 5).expect("dropout audio segment"); + assert_eq!(segments.len(), 1); + assert!(segments[0].duration_s > 0.24); +} + #[test] fn detect_video_segments_closes_a_pulse_that_stays_active_until_the_last_frame() { let timestamps = [0.0, 0.1, 0.2, 0.3]; diff --git a/common/Cargo.toml b/common/Cargo.toml index 1411c45..5ffba2b 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lesavka_common" -version = "0.17.22" +version = "0.17.23" edition = "2024" build = "build.rs" diff --git a/scripts/manual/local_av_stimulus.py b/scripts/manual/local_av_stimulus.py index c255d24..ec62a00 100755 --- a/scripts/manual/local_av_stimulus.py +++ b/scripts/manual/local_av_stimulus.py @@ -24,9 +24,11 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--pulse-period-ms", type=int, default=1000) parser.add_argument("--pulse-width-ms", type=int, default=120) parser.add_argument("--marker-tick-period", type=int, default=5) + parser.add_argument("--audio-gain", type=float, default=0.55) parser.add_argument("--event-width-codes", default=DEFAULT_EVENT_WIDTH_CODES) args = parser.parse_args() args.event_width_codes = parse_event_width_codes(args.event_width_codes) + args.audio_gain = max(0.0, min(1.0, args.audio_gain)) return args @@ -78,6 +80,7 @@ class StimulusState: "pulse_period_ms": self.args.pulse_period_ms, "pulse_width_ms": self.args.pulse_width_ms, "marker_tick_period": self.args.marker_tick_period, + "audio_gain": self.args.audio_gain, "event_width_codes": self.args.event_width_codes, }) return snap @@ -180,7 +183,8 @@ async function runStimulus(command) { stage.style.setProperty('--pulse-color', pulseColors[widthCode] || pulseColors[1]); stage.classList.toggle('active', active); oscillator.frequency.setTargetAtTime(pulseFrequencies[widthCode] || pulseFrequencies[1], audioCtx.currentTime, 0.003); - gain.gain.setTargetAtTime(active ? 0.28 : 0.0, audioCtx.currentTime, 0.005); + const audioGain = Math.max(0, Math.min(1, Number(command.audio_gain ?? 0.55))); + gain.gain.setTargetAtTime(active ? audioGain : 0.0, audioCtx.currentTime, 0.005); setStatus(`running\nelapsed=${(elapsed / 1000).toFixed(2)}s\nactive=${active}\nevent=${pulseIndex}\nwidth_code=${widthCode}\nPoint the real webcam at this window and keep the real microphone hearing the tone.`); if (elapsed <= command.duration_seconds * 1000 + 500) { requestAnimationFrame(tick); diff --git a/scripts/manual/run_upstream_mirrored_av_sync.sh b/scripts/manual/run_upstream_mirrored_av_sync.sh index e903596..fe6092a 100755 --- a/scripts/manual/run_upstream_mirrored_av_sync.sh +++ b/scripts/manual/run_upstream_mirrored_av_sync.sh @@ -23,6 +23,7 @@ PROBE_PULSE_PERIOD_MS=${PROBE_PULSE_PERIOD_MS:-1000} PROBE_PULSE_WIDTH_MS=${PROBE_PULSE_WIDTH_MS:-120} PROBE_MARKER_TICK_PERIOD=${PROBE_MARKER_TICK_PERIOD:-5} PROBE_EVENT_WIDTH_CODES=${PROBE_EVENT_WIDTH_CODES:-1,2,1,3,2,4,1,1,3,1,4,2,1,2,3,4,1,3,2,2,4,1,2,4,3,1,1,4,2,3,1,2} +PROBE_AUDIO_GAIN=${PROBE_AUDIO_GAIN:-0.55} LESAVKA_SYNC_CALIBRATION_SEGMENTS_SET=${LESAVKA_SYNC_CALIBRATION_SEGMENTS+x} LESAVKA_SYNC_ADAPTIVE_CALIBRATION=${LESAVKA_SYNC_ADAPTIVE_CALIBRATION:-0} LESAVKA_SYNC_APPLY_CALIBRATION=${LESAVKA_SYNC_APPLY_CALIBRATION:-0} @@ -606,6 +607,7 @@ start_local_stimulus() { --pulse-period-ms "${PROBE_PULSE_PERIOD_MS}" \ --pulse-width-ms "${PROBE_PULSE_WIDTH_MS}" \ --marker-tick-period "${PROBE_MARKER_TICK_PERIOD}" \ + --audio-gain "${PROBE_AUDIO_GAIN}" \ --event-width-codes "${PROBE_EVENT_WIDTH_CODES}" \ >"${ARTIFACT_DIR}/stimulus-server.log" 2>&1 & STIMULUS_PID=$! diff --git a/server/Cargo.toml b/server/Cargo.toml index d640d79..aa83f33 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -10,7 +10,7 @@ bench = false [package] name = "lesavka_server" -version = "0.17.22" +version = "0.17.23" edition = "2024" autobins = false diff --git a/testing/tests/client_manual_sync_script_contract.rs b/testing/tests/client_manual_sync_script_contract.rs index 63fe547..6d9d699 100644 --- a/testing/tests/client_manual_sync_script_contract.rs +++ b/testing/tests/client_manual_sync_script_contract.rs @@ -127,6 +127,7 @@ fn mirrored_sync_script_uses_real_client_capture_path() { "LESAVKA_SYNC_CONTINUOUS_BROWSER=${LESAVKA_SYNC_CONTINUOUS_BROWSER:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}", "LESAVKA_SYNC_CONTINUE_ON_ANALYSIS_FAILURE=${LESAVKA_SYNC_CONTINUE_ON_ANALYSIS_FAILURE:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}", "LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS=${LESAVKA_SYNC_SEGMENT_SETTLE_SECONDS:-3}", + "PROBE_AUDIO_GAIN=${PROBE_AUDIO_GAIN:-0.55}", "LESAVKA_SYNC_PROVISIONAL_CALIBRATION=${LESAVKA_SYNC_PROVISIONAL_CALIBRATION:-${LESAVKA_SYNC_ADAPTIVE_CALIBRATION}}", "LESAVKA_SYNC_PROVISIONAL_MIN_PAIRS=${LESAVKA_SYNC_PROVISIONAL_MIN_PAIRS:-3}", "LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS=${LESAVKA_SYNC_PROVISIONAL_MAX_P95_MS:-350}", @@ -148,6 +149,7 @@ fn mirrored_sync_script_uses_real_client_capture_path() { "browser_analysis_required=${analysis_required}", "BROWSER_CONSUMER_REUSE_SESSION=\"${reuse_browser_session}\"", "BROWSER_ANALYSIS_REQUIRED=\"${analysis_required}\"", + "--audio-gain \"${PROBE_AUDIO_GAIN}\"", "LESAVKA_SYNC_CALIBRATION_SEGMENTS must be a positive integer", "run_mirrored_segments", "summarize_adaptive_probe_metrics", @@ -206,8 +208,10 @@ fn local_stimulus_matches_sync_analyzer_pulse_contract() { "--pulse-period-ms", "--pulse-width-ms", "--marker-tick-period", + "--audio-gain", "--event-width-codes", "event_width_codes", + "audio_gain", "widthCode", "oscillator.frequency.value = 880", "setStatus(`ready",