fix: compensate bundled media egress sync

This commit is contained in:
Brad Stein 2026-05-03 11:03:15 -03:00
parent 7637f7005a
commit c1a3205a7c
11 changed files with 291 additions and 43 deletions

View File

@ -1,6 +1,6 @@
# Lesavka Agent Notes
## 0.18.3 Bundled Webcam A/V Migration Checklist
## 0.18.4 Bundled Webcam A/V Migration Checklist
Context: manual Google Meet and mirrored-probe testing showed the split webcam
and microphone uplink design is too fragile under real browser/device pressure.
@ -34,9 +34,9 @@ explicit no-camera path.
- [x] Bundled webcam sessions enforce a hard one-second freshness ceiling:
server-side reanchors may improve smoothness, but stale/future waits over
the live budget are dropped instead of preserving lag.
- [x] Bundled webcam sessions do not inherit legacy split-path static A/V
calibration offsets by default; the client-owned capture timeline is the
sync source, with only explicit bundled-offset env overrides allowed.
- [x] Bundled webcam sessions use the shared client capture timeline for
transit sync, then apply runtime output-path calibration when splitting
into UVC/UAC so Meet sees synchronized presentation.
### Wire Protocol
- [x] Add `UpstreamMediaBundle` containing one optional video frame plus zero or
@ -78,6 +78,10 @@ explicit no-camera path.
- [x] Drop mixed A/V bundles coherently when one side fails freshness/sync planning.
- [x] Reanchor bundled playout when due times drift too far into the future, and
drop packets whose predicted playout age would exceed the one-second budget.
- [x] Keep bundled audio scheduled early enough that UVC/UAC output-path
compensation can fit inside the one-second freshness budget when possible.
- [x] Trim bundled output-path offset spans only when the active calibration
would otherwise violate the one-second freshness ceiling.
- [x] Activate the camera relay before opening the microphone sink so UVC can
become ready even if UAC setup is slow.
- [x] Log the first bundled video frame handed to the camera sink.

6
Cargo.lock generated
View File

@ -1652,7 +1652,7 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
[[package]]
name = "lesavka_client"
version = "0.18.3"
version = "0.18.4"
dependencies = [
"anyhow",
"async-stream",
@ -1686,7 +1686,7 @@ dependencies = [
[[package]]
name = "lesavka_common"
version = "0.18.3"
version = "0.18.4"
dependencies = [
"anyhow",
"base64",
@ -1698,7 +1698,7 @@ dependencies = [
[[package]]
name = "lesavka_server"
version = "0.18.3"
version = "0.18.4"
dependencies = [
"anyhow",
"base64",

View File

@ -4,7 +4,7 @@ path = "src/main.rs"
[package]
name = "lesavka_client"
version = "0.18.3"
version = "0.18.4"
edition = "2024"
[dependencies]

View File

@ -1,6 +1,6 @@
[package]
name = "lesavka_common"
version = "0.18.3"
version = "0.18.4"
edition = "2024"
build = "build.rs"

View File

@ -249,8 +249,9 @@ from `LESAVKA_CLIENT_PKI_SSH_SOURCE` over SSH. Runtime clients require the insta
| `LESAVKA_UAC_SESSION_CLOCK_ALIGN` | server audio sink clock-alignment override; `0` is the host-validated default |
| `LESAVKA_UPSTREAM_AUDIO_PLAYOUT_OFFSET_US` | legacy/split server upstream playout override; shifts gadget-audio presentation relative to the shared playout epoch |
| `LESAVKA_UPSTREAM_AUDIO_MASTER_WAIT_GRACE_MS` | server upstream sync override; how long video may wait past its nominal due time for UAC audio to reach the matching timestamp, defaults to `350` |
| `LESAVKA_UPSTREAM_BUNDLED_AUDIO_PLAYOUT_OFFSET_US` | bundled webcam server playout override; defaults to `0` because bundled client capture owns A/V sync |
| `LESAVKA_UPSTREAM_BUNDLED_VIDEO_PLAYOUT_OFFSET_US` | bundled webcam server playout override; defaults to `0` because bundled client capture owns A/V sync |
| `LESAVKA_UPSTREAM_BUNDLED_AUDIO_PLAYOUT_OFFSET_US` | bundled webcam server playout override; defaults to the active runtime audio output-path calibration when unset, with the final A/V offset span trimmed to fit the live budget |
| `LESAVKA_UPSTREAM_BUNDLED_PLAYOUT_DELAY_MS` | bundled webcam jitter buffer before output-path compensation; defaults to `20` so compensated video can stay under the live budget |
| `LESAVKA_UPSTREAM_BUNDLED_VIDEO_PLAYOUT_OFFSET_US` | bundled webcam server playout override; defaults to the active runtime video output-path calibration when unset, with the final A/V offset span trimmed to fit the live budget |
| `LESAVKA_UPSTREAM_MAX_LIVE_LAG_MS` | server upstream planner freshness ceiling; planner-approved audio/video should not exceed this live lag budget, defaults to `1000` and is capped at `1000` |
| `LESAVKA_UPSTREAM_PAIR_SLACK_US` | server upstream pairing override; how far video may diverge from the planned audio-master capture moment before the frame is held or dropped, defaults to `80000` |
| `LESAVKA_UPSTREAM_PLAYOUT_DELAY_MS` | server upstream pairing/synchronization target buffer; the server uses this shared buffer to pair webcam frames with matching gadget-mic audio before remote presentation, defaults to `350` |

View File

@ -10,7 +10,7 @@ bench = false
[package]
name = "lesavka_server"
version = "0.18.3"
version = "0.18.4"
edition = "2024"
autobins = false

View File

@ -24,6 +24,13 @@ impl BundledUpstreamEvent {
Self::Video(_) => UpstreamMediaKind::Camera,
}
}
fn playout_order(&self) -> u8 {
match self {
Self::Audio(_) => 0,
Self::Video(_) => 1,
}
}
}
#[cfg(not(coverage))]
@ -120,7 +127,7 @@ fn bundled_upstream_playout_delay() -> Duration {
.ok()
.and_then(|value| value.trim().parse::<u64>().ok())
.map(Duration::from_millis)
.unwrap_or_else(|| Duration::from_millis(350))
.unwrap_or_else(|| Duration::from_millis(20))
}
#[cfg(not(coverage))]
@ -336,7 +343,7 @@ impl Relay for Handler {
if events.is_empty() {
continue;
}
events.sort_by_key(BundledUpstreamEvent::remote_pts_us);
events.sort_by_key(|event| (event.remote_pts_us(), event.playout_order()));
if last_bundle_session_id.is_some_and(|session_id| session_id != bundle.session_id) {
warn!(
rpc_id,

View File

@ -12,10 +12,11 @@ mod state;
mod types;
use config::{
apply_playout_offset, upstream_audio_master_wait_grace, upstream_bundled_playout_offset_us,
upstream_camera_startup_grace_us, upstream_max_live_lag, upstream_pairing_master_slack,
upstream_playout_delay, upstream_playout_offset_us, upstream_reanchor_late_threshold,
upstream_require_paired_startup, upstream_startup_timeout, upstream_timing_trace_enabled,
apply_playout_offset, upstream_audio_master_wait_grace, upstream_bundled_playout_delay,
upstream_bundled_playout_offset_override_us, upstream_camera_startup_grace_us,
upstream_max_live_lag, upstream_pairing_master_slack, upstream_playout_delay,
upstream_playout_offset_us, upstream_reanchor_late_threshold, upstream_require_paired_startup,
upstream_startup_timeout, upstream_timing_trace_enabled,
};
use state::{UpstreamClockState, UpstreamSyncPhase};
pub use types::{
@ -109,6 +110,46 @@ impl UpstreamMediaRuntime {
}
}
fn raw_bundled_playout_offset_us(&self, kind: UpstreamMediaKind) -> i64 {
upstream_bundled_playout_offset_override_us(kind)
.unwrap_or_else(|| self.playout_offset_us(kind))
}
fn bundled_playout_offsets_us(&self) -> (i64, i64) {
let mut camera_offset_us = self.raw_bundled_playout_offset_us(UpstreamMediaKind::Camera);
let mut microphone_offset_us =
self.raw_bundled_playout_offset_us(UpstreamMediaKind::Microphone);
let max_span_us = upstream_max_live_lag()
.saturating_sub(upstream_bundled_playout_delay())
.as_micros()
.min(i64::MAX as u128) as i64;
let span_us = camera_offset_us.saturating_sub(microphone_offset_us);
if span_us > max_span_us {
camera_offset_us = microphone_offset_us.saturating_add(max_span_us);
} else if span_us < -max_span_us {
microphone_offset_us = camera_offset_us.saturating_add(max_span_us);
}
(camera_offset_us, microphone_offset_us)
}
fn bundled_playout_offset_us(&self, kind: UpstreamMediaKind) -> i64 {
let (camera_offset_us, microphone_offset_us) = self.bundled_playout_offsets_us();
match kind {
UpstreamMediaKind::Camera => camera_offset_us,
UpstreamMediaKind::Microphone => microphone_offset_us,
}
}
fn bundled_later_offset_reserve_us(&self, kind: UpstreamMediaKind) -> u64 {
let (camera_offset_us, microphone_offset_us) = self.bundled_playout_offsets_us();
let slowest_offset_us = camera_offset_us.max(microphone_offset_us);
let kind_offset_us = match kind {
UpstreamMediaKind::Camera => camera_offset_us,
UpstreamMediaKind::Microphone => microphone_offset_us,
};
slowest_offset_us.saturating_sub(kind_offset_us).max(0) as u64
}
/// Mark one audio chunk as actually handed to the UAC sink.
pub fn mark_audio_presented(&self, local_pts_us: u64, due_at: Instant) {
let mut state = self
@ -382,18 +423,21 @@ impl UpstreamMediaRuntime {
}
*last_slot = Some(local_pts_us);
let sink_offset_us = upstream_bundled_playout_offset_us(kind);
let sink_offset_us = self.bundled_playout_offset_us(kind);
let epoch = state.playout_epoch.unwrap_or(bundle_epoch);
let mut due_at =
apply_playout_offset(epoch + Duration::from_micros(local_pts_us), sink_offset_us);
let now = Instant::now();
let mut late_by = now.checked_duration_since(due_at).unwrap_or_default();
let playout_delay = upstream_playout_delay().min(max_live_lag);
let playout_delay = upstream_bundled_playout_delay().min(max_live_lag);
let reanchor_threshold = upstream_reanchor_late_threshold(playout_delay);
let max_future_wait = max_live_lag.saturating_sub(source_lag);
let later_offset_reserve =
Duration::from_micros(self.bundled_later_offset_reserve_us(kind));
let max_kind_future_wait = max_future_wait.saturating_sub(later_offset_reserve);
let due_future_wait = due_at.saturating_duration_since(now);
if late_by > reanchor_threshold || due_future_wait > max_future_wait {
let desired_delay = playout_delay.min(max_future_wait);
if late_by > reanchor_threshold || due_future_wait > max_kind_future_wait {
let desired_delay = playout_delay.min(max_kind_future_wait);
let desired_due_at = now + desired_delay;
let unoffset_due_at = apply_playout_offset(desired_due_at, -sink_offset_us);
let recovered_epoch = unoffset_due_at
@ -418,6 +462,7 @@ impl UpstreamMediaRuntime {
recovery_buffer_ms = desired_delay.as_millis(),
max_live_lag_ms = max_live_lag.as_millis(),
source_lag_ms = source_lag.as_millis(),
later_offset_reserve_ms = later_offset_reserve.as_millis(),
"bundled upstream media playhead reanchored to preserve freshness"
);
}

View File

@ -25,6 +25,15 @@ pub(super) fn upstream_playout_delay() -> Duration {
Duration::from_millis(delay_ms)
}
pub(super) fn upstream_bundled_playout_delay() -> Duration {
let delay_ms = std::env::var("LESAVKA_UPSTREAM_BUNDLED_PLAYOUT_DELAY_MS")
.or_else(|_| std::env::var("LESAVKA_UPSTREAM_PLAYOUT_DELAY_MS"))
.ok()
.and_then(|value| value.trim().parse::<u64>().ok())
.unwrap_or(20);
Duration::from_millis(delay_ms)
}
pub(super) fn upstream_max_live_lag() -> Duration {
let lag_ms = std::env::var("LESAVKA_UPSTREAM_MAX_LIVE_LAG_MS")
.ok()
@ -71,7 +80,7 @@ pub(super) fn upstream_playout_offset_us(kind: UpstreamMediaKind) -> i64 {
.unwrap_or(default_offset_us)
}
pub(super) fn upstream_bundled_playout_offset_us(kind: UpstreamMediaKind) -> i64 {
pub(super) fn upstream_bundled_playout_offset_override_us(kind: UpstreamMediaKind) -> Option<i64> {
let name = match kind {
UpstreamMediaKind::Camera => "LESAVKA_UPSTREAM_BUNDLED_VIDEO_PLAYOUT_OFFSET_US",
UpstreamMediaKind::Microphone => "LESAVKA_UPSTREAM_BUNDLED_AUDIO_PLAYOUT_OFFSET_US",
@ -79,7 +88,6 @@ pub(super) fn upstream_bundled_playout_offset_us(kind: UpstreamMediaKind) -> i64
std::env::var(name)
.ok()
.and_then(|value| value.trim().parse::<i64>().ok())
.unwrap_or(0)
}
pub(super) fn upstream_pairing_master_slack() -> Duration {

View File

@ -15,6 +15,39 @@ fn upstream_playout_delay_defaults_to_freshness_budget_and_accepts_overrides() {
});
}
#[test]
#[serial(upstream_media_runtime)]
fn upstream_bundled_playout_delay_defaults_low_and_accepts_overrides() {
temp_env::with_var_unset("LESAVKA_UPSTREAM_BUNDLED_PLAYOUT_DELAY_MS", || {
temp_env::with_var_unset("LESAVKA_UPSTREAM_PLAYOUT_DELAY_MS", || {
assert_eq!(
super::upstream_bundled_playout_delay(),
Duration::from_millis(20)
);
});
});
temp_env::with_var("LESAVKA_UPSTREAM_PLAYOUT_DELAY_MS", Some("250"), || {
temp_env::with_var_unset("LESAVKA_UPSTREAM_BUNDLED_PLAYOUT_DELAY_MS", || {
assert_eq!(
super::upstream_bundled_playout_delay(),
Duration::from_millis(250)
);
});
});
temp_env::with_var(
"LESAVKA_UPSTREAM_BUNDLED_PLAYOUT_DELAY_MS",
Some("40"),
|| {
assert_eq!(
super::upstream_bundled_playout_delay(),
Duration::from_millis(40)
);
},
);
}
#[test]
#[serial(upstream_media_runtime)]
fn upstream_max_live_lag_defaults_to_one_second_and_accepts_overrides() {
@ -108,16 +141,16 @@ fn upstream_playout_offsets_default_to_mjpeg_calibration_and_accept_overrides()
#[test]
#[serial(upstream_media_runtime)]
fn bundled_playout_offsets_default_to_zero_and_accept_explicit_overrides() {
fn bundled_playout_offsets_default_to_runtime_calibration_and_accept_explicit_overrides() {
temp_env::with_var_unset("LESAVKA_UPSTREAM_BUNDLED_AUDIO_PLAYOUT_OFFSET_US", || {
temp_env::with_var_unset("LESAVKA_UPSTREAM_BUNDLED_VIDEO_PLAYOUT_OFFSET_US", || {
assert_eq!(
super::upstream_bundled_playout_offset_us(UpstreamMediaKind::Microphone),
0
super::upstream_bundled_playout_offset_override_us(UpstreamMediaKind::Microphone),
None
);
assert_eq!(
super::upstream_bundled_playout_offset_us(UpstreamMediaKind::Camera),
0
super::upstream_bundled_playout_offset_override_us(UpstreamMediaKind::Camera),
None
);
});
});
@ -131,12 +164,16 @@ fn bundled_playout_offsets_default_to_zero_and_accept_explicit_overrides() {
Some("-3000"),
|| {
assert_eq!(
super::upstream_bundled_playout_offset_us(UpstreamMediaKind::Microphone),
12_000
super::upstream_bundled_playout_offset_override_us(
UpstreamMediaKind::Microphone
),
Some(12_000)
);
assert_eq!(
super::upstream_bundled_playout_offset_us(UpstreamMediaKind::Camera),
-3_000
super::upstream_bundled_playout_offset_override_us(
UpstreamMediaKind::Camera
),
Some(-3_000)
);
},
);

View File

@ -78,15 +78,107 @@ fn bundled_media_uses_client_epoch_without_pairing_wait() {
#[test]
#[serial(upstream_media_runtime)]
fn bundled_media_ignores_legacy_static_calibration_offsets_by_default() {
temp_env::with_var("LESAVKA_UPSTREAM_PLAYOUT_DELAY_MS", Some("0"), || {
temp_env::with_var_unset("LESAVKA_UPSTREAM_BUNDLED_AUDIO_PLAYOUT_OFFSET_US", || {
temp_env::with_var_unset("LESAVKA_UPSTREAM_BUNDLED_VIDEO_PLAYOUT_OFFSET_US", || {
fn bundled_media_uses_runtime_output_calibration_offsets_by_default() {
temp_env::with_var(
"LESAVKA_UPSTREAM_BUNDLED_PLAYOUT_DELAY_MS",
Some("0"),
|| {
temp_env::with_var_unset("LESAVKA_UPSTREAM_BUNDLED_AUDIO_PLAYOUT_OFFSET_US", || {
temp_env::with_var_unset(
"LESAVKA_UPSTREAM_BUNDLED_VIDEO_PLAYOUT_OFFSET_US",
|| {
let runtime = UpstreamMediaRuntime::new();
runtime.set_playout_offsets(120_000, 0);
let _camera = runtime.activate_camera();
let _microphone = runtime.activate_microphone();
let epoch = tokio::time::Instant::now();
let audio = play(runtime.plan_bundled_pts(
super::UpstreamMediaKind::Microphone,
1_000_000,
1,
1_000_000,
epoch,
));
let video = play(runtime.plan_bundled_pts(
super::UpstreamMediaKind::Camera,
1_000_000,
16_666,
1_000_000,
epoch,
));
assert_eq!(audio.local_pts_us, video.local_pts_us);
assert_eq!(
video.due_at.saturating_duration_since(audio.due_at),
Duration::from_millis(120)
);
},
);
});
},
);
}
#[test]
#[serial(upstream_media_runtime)]
fn bundled_media_explicit_offsets_can_disable_runtime_output_calibration() {
temp_env::with_var(
"LESAVKA_UPSTREAM_BUNDLED_PLAYOUT_DELAY_MS",
Some("0"),
|| {
temp_env::with_var(
"LESAVKA_UPSTREAM_BUNDLED_AUDIO_PLAYOUT_OFFSET_US",
Some("0"),
|| {
temp_env::with_var(
"LESAVKA_UPSTREAM_BUNDLED_VIDEO_PLAYOUT_OFFSET_US",
Some("0"),
|| {
let runtime = UpstreamMediaRuntime::new();
runtime.set_playout_offsets(120_000, 0);
let _camera = runtime.activate_camera();
let _microphone = runtime.activate_microphone();
let epoch = tokio::time::Instant::now();
let audio = play(runtime.plan_bundled_pts(
super::UpstreamMediaKind::Microphone,
1_000_000,
1,
1_000_000,
epoch,
));
let video = play(runtime.plan_bundled_pts(
super::UpstreamMediaKind::Camera,
1_000_000,
16_666,
1_000_000,
epoch,
));
assert_eq!(audio.due_at, video.due_at);
},
);
},
);
},
);
}
#[test]
#[serial(upstream_media_runtime)]
fn bundled_media_schedules_audio_early_so_compensated_video_stays_fresh() {
temp_env::with_var(
"LESAVKA_UPSTREAM_BUNDLED_PLAYOUT_DELAY_MS",
Some("20"),
|| {
temp_env::with_var("LESAVKA_UPSTREAM_MAX_LIVE_LAG_MS", Some("1000"), || {
let runtime = UpstreamMediaRuntime::new();
runtime.set_playout_offsets(1_090_000, 0);
runtime.set_playout_offsets(950_000, 0);
let _camera = runtime.activate_camera();
let _microphone = runtime.activate_microphone();
let epoch = tokio::time::Instant::now();
let now = tokio::time::Instant::now();
let epoch = now + Duration::from_millis(20);
let audio = play(runtime.plan_bundled_pts(
super::UpstreamMediaKind::Microphone,
@ -103,11 +195,65 @@ fn bundled_media_ignores_legacy_static_calibration_offsets_by_default() {
epoch,
));
assert_eq!(audio.due_at, video.due_at);
assert_eq!(audio.local_pts_us, video.local_pts_us);
assert!(
audio.due_at.saturating_duration_since(now) <= Duration::from_millis(50),
"audio should be released promptly when video needs nearly the full freshness budget"
);
assert_eq!(
video.due_at.saturating_duration_since(audio.due_at),
Duration::from_millis(950)
);
assert!(
video.due_at.saturating_duration_since(now) <= Duration::from_secs(1),
"compensated video should still land within the one-second live budget"
);
});
});
});
},
);
}
#[test]
#[serial(upstream_media_runtime)]
fn bundled_media_clamps_output_compensation_to_freshness_budget() {
temp_env::with_var(
"LESAVKA_UPSTREAM_BUNDLED_PLAYOUT_DELAY_MS",
Some("20"),
|| {
temp_env::with_var("LESAVKA_UPSTREAM_MAX_LIVE_LAG_MS", Some("1000"), || {
let runtime = UpstreamMediaRuntime::new();
runtime.set_playout_offsets(1_090_000, 0);
let _camera = runtime.activate_camera();
let _microphone = runtime.activate_microphone();
let now = tokio::time::Instant::now();
let epoch = now + Duration::from_millis(20);
let audio = play(runtime.plan_bundled_pts(
super::UpstreamMediaKind::Microphone,
1_000_000,
1,
1_000_000,
epoch,
));
let video = play(runtime.plan_bundled_pts(
super::UpstreamMediaKind::Camera,
1_000_000,
16_666,
1_000_000,
epoch,
));
assert_eq!(
video.due_at.saturating_duration_since(audio.due_at),
Duration::from_millis(980),
"factory output compensation should be trimmed only enough to respect freshness"
);
assert!(
video.due_at.saturating_duration_since(now) <= Duration::from_secs(1),
"clamped output compensation should preserve the live ceiling"
);
});
},
);
}
#[test]