295 lines
11 KiB
Rust
295 lines
11 KiB
Rust
//! Shared audio-transport metadata helpers.
|
|
//!
|
|
//! Why: upstream audio is currently raw PCM, but Opus experiments need an
|
|
//! explicit schema and sizing model so client, server, and tests agree before
|
|
//! any production switch is made.
|
|
|
|
use crate::lesavka::{AudioEncoding, AudioPacket, UpstreamMediaBundle};
|
|
|
|
pub const PCM_SAMPLE_RATE: u32 = 48_000;
|
|
pub const PCM_CHANNELS: u32 = 2;
|
|
pub const PCM_FRAME_DURATION_US: u32 = 20_000;
|
|
pub const OPUS_SAMPLE_RATE: u32 = 48_000;
|
|
pub const OPUS_CHANNELS: u32 = 2;
|
|
pub const OPUS_FRAME_DURATION_US: u32 = 20_000;
|
|
pub const OPUS_DEFAULT_BITRATE_BPS: u32 = 96_000;
|
|
|
|
/// Operator-facing upstream audio transport choice.
|
|
///
|
|
/// Inputs: a UI/env/control-file token. Output: either the stable raw PCM route
|
|
/// or the compressed Opus route. Why: both ends need the same vocabulary so
|
|
/// Opus can be tried without weakening the known-good PCM fallback path.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum UpstreamAudioCodec {
|
|
PcmS16le,
|
|
Opus,
|
|
}
|
|
|
|
impl UpstreamAudioCodec {
|
|
/// Return the compact id stored in control files and environment values.
|
|
#[must_use]
|
|
pub const fn as_id(self) -> &'static str {
|
|
match self {
|
|
Self::PcmS16le => "pcm",
|
|
Self::Opus => "opus",
|
|
}
|
|
}
|
|
|
|
/// Return a short label suitable for the launcher UI and diagnostics.
|
|
#[must_use]
|
|
pub const fn label(self) -> &'static str {
|
|
match self {
|
|
Self::PcmS16le => "PCM",
|
|
Self::Opus => "Opus",
|
|
}
|
|
}
|
|
|
|
/// Return the packet profile this transport should emit.
|
|
#[must_use]
|
|
pub const fn profile(self) -> AudioTransportProfile {
|
|
match self {
|
|
Self::PcmS16le => AudioTransportProfile::pcm_s16le(),
|
|
Self::Opus => AudioTransportProfile::opus_voice(),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parse a launcher/env codec token into an upstream audio transport.
|
|
#[must_use]
|
|
pub fn parse_upstream_audio_codec(raw: &str) -> Option<UpstreamAudioCodec> {
|
|
match raw.trim().to_ascii_lowercase().as_str() {
|
|
"pcm" | "pcm_s16le" | "s16le" | "raw" | "uncompressed" => {
|
|
Some(UpstreamAudioCodec::PcmS16le)
|
|
}
|
|
"opus" | "compressed" | "voice" => Some(UpstreamAudioCodec::Opus),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// A normalized description of the payload carried by `AudioPacket::data`.
|
|
///
|
|
/// Inputs are codec metadata resolved from a packet or bundle. Outputs are the
|
|
/// exact framing values the transport should preserve. Why: the Opus path must
|
|
/// compare bandwidth and timing without weakening the known-good PCM fallback.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub struct AudioTransportProfile {
|
|
pub encoding: AudioEncoding,
|
|
pub sample_rate: u32,
|
|
pub channels: u32,
|
|
pub frame_duration_us: u32,
|
|
pub target_bitrate_bps: Option<u32>,
|
|
}
|
|
|
|
impl AudioTransportProfile {
|
|
/// Return the stable raw PCM profile used by today's upstream microphone path.
|
|
#[must_use]
|
|
pub const fn pcm_s16le() -> Self {
|
|
Self {
|
|
encoding: AudioEncoding::PcmS16le,
|
|
sample_rate: PCM_SAMPLE_RATE,
|
|
channels: PCM_CHANNELS,
|
|
frame_duration_us: PCM_FRAME_DURATION_US,
|
|
target_bitrate_bps: None,
|
|
}
|
|
}
|
|
|
|
/// Return the first Opus profile Lesavka should test for upstream audio.
|
|
///
|
|
/// Inputs: none. Outputs: a 48 kHz stereo, 20 ms, 96 kbps low-delay
|
|
/// profile. Why: Opus always runs internally at 48 kHz, and 20 ms frames
|
|
/// keep latency bounded while preserving enough quality for live speech.
|
|
#[must_use]
|
|
pub const fn opus_voice() -> Self {
|
|
Self {
|
|
encoding: AudioEncoding::Opus,
|
|
sample_rate: OPUS_SAMPLE_RATE,
|
|
channels: OPUS_CHANNELS,
|
|
frame_duration_us: OPUS_FRAME_DURATION_US,
|
|
target_bitrate_bps: Some(OPUS_DEFAULT_BITRATE_BPS),
|
|
}
|
|
}
|
|
|
|
/// Estimate the payload budget for one frame in this transport profile.
|
|
///
|
|
/// Inputs: profile fields. Output: bytes per frame. Why: performance tests
|
|
/// need a stable, codec-independent way to compare PCM debt against Opus.
|
|
#[must_use]
|
|
pub fn expected_payload_bytes(self) -> u32 {
|
|
match self.encoding {
|
|
AudioEncoding::PcmS16le | AudioEncoding::AudioUnspecified => {
|
|
let samples = u64::from(self.sample_rate)
|
|
.saturating_mul(u64::from(self.frame_duration_us))
|
|
/ 1_000_000;
|
|
samples
|
|
.saturating_mul(u64::from(self.channels))
|
|
.saturating_mul(2)
|
|
.min(u64::from(u32::MAX)) as u32
|
|
}
|
|
AudioEncoding::Opus => {
|
|
let bits = u64::from(self.target_bitrate_bps.unwrap_or(OPUS_DEFAULT_BITRATE_BPS))
|
|
.saturating_mul(u64::from(self.frame_duration_us))
|
|
/ 1_000_000;
|
|
bits.div_ceil(8).min(u64::from(u32::MAX)) as u32
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Interpret unset/legacy audio encoding metadata as the stable PCM transport.
|
|
///
|
|
/// Inputs: raw proto enum integer. Output: normalized encoding. Why:
|
|
/// `AUDIO_UNSPECIFIED` exists for wire compatibility but should not leave
|
|
/// callers guessing whether a payload is safe for the raw UAC sink.
|
|
#[must_use]
|
|
pub fn normalize_audio_encoding(raw: i32) -> AudioEncoding {
|
|
match AudioEncoding::try_from(raw).unwrap_or(AudioEncoding::AudioUnspecified) {
|
|
AudioEncoding::AudioUnspecified | AudioEncoding::PcmS16le => AudioEncoding::PcmS16le,
|
|
AudioEncoding::Opus => AudioEncoding::Opus,
|
|
}
|
|
}
|
|
|
|
/// Resolve the packet-level transport profile.
|
|
///
|
|
/// Inputs: a protobuf audio packet. Output: a normalized profile. Why: callers
|
|
/// should not need to duplicate the legacy-PCM fallback or Opus defaults.
|
|
#[must_use]
|
|
pub fn packet_audio_profile(packet: &AudioPacket) -> AudioTransportProfile {
|
|
match normalize_audio_encoding(packet.encoding) {
|
|
AudioEncoding::Opus => AudioTransportProfile {
|
|
encoding: AudioEncoding::Opus,
|
|
sample_rate: nonzero_or(packet.sample_rate, OPUS_SAMPLE_RATE),
|
|
channels: nonzero_or(packet.channels, OPUS_CHANNELS),
|
|
frame_duration_us: nonzero_or(packet.frame_duration_us, OPUS_FRAME_DURATION_US),
|
|
target_bitrate_bps: Some(OPUS_DEFAULT_BITRATE_BPS),
|
|
},
|
|
AudioEncoding::PcmS16le | AudioEncoding::AudioUnspecified => AudioTransportProfile {
|
|
encoding: AudioEncoding::PcmS16le,
|
|
sample_rate: nonzero_or(packet.sample_rate, PCM_SAMPLE_RATE),
|
|
channels: nonzero_or(packet.channels, PCM_CHANNELS),
|
|
frame_duration_us: nonzero_or(packet.frame_duration_us, PCM_FRAME_DURATION_US),
|
|
target_bitrate_bps: None,
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Resolve the bundle-level transport profile from the bundle marker or packets.
|
|
///
|
|
/// Inputs: an upstream media bundle. Output: the audio profile receivers should
|
|
/// expect. Why: the server needs to route the whole A/V bundle without splitting
|
|
/// audio away from video just to discover its codec.
|
|
#[must_use]
|
|
pub fn bundle_audio_profile(bundle: &UpstreamMediaBundle) -> AudioTransportProfile {
|
|
let raw = if bundle.audio_encoding != 0 {
|
|
bundle.audio_encoding
|
|
} else {
|
|
bundle
|
|
.audio
|
|
.first()
|
|
.map(|packet| packet.encoding)
|
|
.unwrap_or_default()
|
|
};
|
|
match normalize_audio_encoding(raw) {
|
|
AudioEncoding::Opus => AudioTransportProfile {
|
|
encoding: AudioEncoding::Opus,
|
|
sample_rate: nonzero_or(bundle.audio_sample_rate, OPUS_SAMPLE_RATE),
|
|
channels: nonzero_or(bundle.audio_channels, OPUS_CHANNELS),
|
|
frame_duration_us: bundle
|
|
.audio
|
|
.first()
|
|
.map(|packet| nonzero_or(packet.frame_duration_us, OPUS_FRAME_DURATION_US))
|
|
.unwrap_or(OPUS_FRAME_DURATION_US),
|
|
target_bitrate_bps: Some(OPUS_DEFAULT_BITRATE_BPS),
|
|
},
|
|
AudioEncoding::PcmS16le | AudioEncoding::AudioUnspecified => AudioTransportProfile {
|
|
encoding: AudioEncoding::PcmS16le,
|
|
sample_rate: nonzero_or(bundle.audio_sample_rate, PCM_SAMPLE_RATE),
|
|
channels: nonzero_or(bundle.audio_channels, PCM_CHANNELS),
|
|
frame_duration_us: bundle
|
|
.audio
|
|
.first()
|
|
.map(|packet| nonzero_or(packet.frame_duration_us, PCM_FRAME_DURATION_US))
|
|
.unwrap_or(PCM_FRAME_DURATION_US),
|
|
target_bitrate_bps: None,
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Stamp a packet as raw PCM with the current stable microphone framing.
|
|
///
|
|
/// Inputs: mutable packet. Output: side-effect only. Why: explicit metadata lets
|
|
/// future Opus tests distinguish today's known-good path from experimental
|
|
/// compressed audio without relying on payload-size guesses.
|
|
pub fn mark_packet_pcm_s16le(packet: &mut AudioPacket) {
|
|
packet.encoding = AudioEncoding::PcmS16le as i32;
|
|
packet.sample_rate = PCM_SAMPLE_RATE;
|
|
packet.channels = PCM_CHANNELS;
|
|
if packet.frame_duration_us == 0 {
|
|
packet.frame_duration_us = PCM_FRAME_DURATION_US;
|
|
}
|
|
}
|
|
|
|
/// Stamp a packet as Opus with the current low-latency voice framing.
|
|
///
|
|
/// Inputs: mutable packet. Output: side-effect only. Why: receivers must never
|
|
/// infer compressed audio from payload size, especially when PCM fallback can
|
|
/// intentionally coexist with Opus during startup.
|
|
pub fn mark_packet_opus(packet: &mut AudioPacket) {
|
|
packet.encoding = AudioEncoding::Opus as i32;
|
|
packet.sample_rate = OPUS_SAMPLE_RATE;
|
|
packet.channels = OPUS_CHANNELS;
|
|
if packet.frame_duration_us == 0 {
|
|
packet.frame_duration_us = OPUS_FRAME_DURATION_US;
|
|
}
|
|
}
|
|
|
|
/// Stamp a bundle with normalized audio metadata.
|
|
///
|
|
/// Inputs: mutable bundle. Output: side-effect only. Why: the server should be
|
|
/// able to choose the PCM or future Opus handoff path from the bundle envelope.
|
|
pub fn mark_bundle_audio_profile(bundle: &mut UpstreamMediaBundle, profile: AudioTransportProfile) {
|
|
bundle.audio_encoding = profile.encoding as i32;
|
|
bundle.audio_sample_rate = profile.sample_rate;
|
|
bundle.audio_channels = profile.channels;
|
|
}
|
|
|
|
/// True when a packet can be handed directly to the raw UAC appsrc.
|
|
#[must_use]
|
|
pub fn packet_is_raw_pcm_s16le(packet: &AudioPacket) -> bool {
|
|
packet_audio_profile(packet).encoding == AudioEncoding::PcmS16le
|
|
}
|
|
|
|
const fn nonzero_or(value: u32, fallback: u32) -> u32 {
|
|
if value == 0 { fallback } else { value }
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn pcm_and_opus_profiles_keep_expected_frame_budgets() {
|
|
assert_eq!(
|
|
AudioTransportProfile::pcm_s16le().expected_payload_bytes(),
|
|
3840
|
|
);
|
|
assert_eq!(
|
|
AudioTransportProfile::opus_voice().expected_payload_bytes(),
|
|
160
|
|
);
|
|
assert!(
|
|
AudioTransportProfile::opus_voice().expected_payload_bytes() * 20
|
|
<= AudioTransportProfile::pcm_s16le().expected_payload_bytes(),
|
|
"Opus profile should be dramatically smaller than raw PCM"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn legacy_packets_normalize_to_pcm() {
|
|
let packet = AudioPacket::default();
|
|
let profile = packet_audio_profile(&packet);
|
|
assert_eq!(profile.encoding, AudioEncoding::PcmS16le);
|
|
assert_eq!(profile.sample_rate, 48_000);
|
|
assert_eq!(profile.channels, 2);
|
|
}
|
|
}
|