lesavka/common/src/audio_transport.rs

295 lines
11 KiB
Rust
Raw Normal View History

//! Shared audio-transport metadata helpers.
//!
//! Why: upstream audio is currently raw PCM, but Opus experiments need an
//! explicit schema and sizing model so client, server, and tests agree before
//! any production switch is made.
use crate::lesavka::{AudioEncoding, AudioPacket, UpstreamMediaBundle};
pub const PCM_SAMPLE_RATE: u32 = 48_000;
pub const PCM_CHANNELS: u32 = 2;
pub const PCM_FRAME_DURATION_US: u32 = 20_000;
pub const OPUS_SAMPLE_RATE: u32 = 48_000;
pub const OPUS_CHANNELS: u32 = 2;
pub const OPUS_FRAME_DURATION_US: u32 = 20_000;
pub const OPUS_DEFAULT_BITRATE_BPS: u32 = 96_000;
/// Operator-facing upstream audio transport choice.
///
/// Inputs: a UI/env/control-file token. Output: either the stable raw PCM route
/// or the compressed Opus route. Why: both ends need the same vocabulary so
/// Opus can be tried without weakening the known-good PCM fallback path.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UpstreamAudioCodec {
PcmS16le,
Opus,
}
impl UpstreamAudioCodec {
/// Return the compact id stored in control files and environment values.
#[must_use]
pub const fn as_id(self) -> &'static str {
match self {
Self::PcmS16le => "pcm",
Self::Opus => "opus",
}
}
/// Return a short label suitable for the launcher UI and diagnostics.
#[must_use]
pub const fn label(self) -> &'static str {
match self {
Self::PcmS16le => "PCM",
Self::Opus => "Opus",
}
}
/// Return the packet profile this transport should emit.
#[must_use]
pub const fn profile(self) -> AudioTransportProfile {
match self {
Self::PcmS16le => AudioTransportProfile::pcm_s16le(),
Self::Opus => AudioTransportProfile::opus_voice(),
}
}
}
/// Parse a launcher/env codec token into an upstream audio transport.
#[must_use]
pub fn parse_upstream_audio_codec(raw: &str) -> Option<UpstreamAudioCodec> {
match raw.trim().to_ascii_lowercase().as_str() {
"pcm" | "pcm_s16le" | "s16le" | "raw" | "uncompressed" => {
Some(UpstreamAudioCodec::PcmS16le)
}
"opus" | "compressed" | "voice" => Some(UpstreamAudioCodec::Opus),
_ => None,
}
}
/// A normalized description of the payload carried by `AudioPacket::data`.
///
/// Inputs are codec metadata resolved from a packet or bundle. Outputs are the
/// exact framing values the transport should preserve. Why: the Opus path must
/// compare bandwidth and timing without weakening the known-good PCM fallback.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct AudioTransportProfile {
pub encoding: AudioEncoding,
pub sample_rate: u32,
pub channels: u32,
pub frame_duration_us: u32,
pub target_bitrate_bps: Option<u32>,
}
impl AudioTransportProfile {
/// Return the stable raw PCM profile used by today's upstream microphone path.
#[must_use]
pub const fn pcm_s16le() -> Self {
Self {
encoding: AudioEncoding::PcmS16le,
sample_rate: PCM_SAMPLE_RATE,
channels: PCM_CHANNELS,
frame_duration_us: PCM_FRAME_DURATION_US,
target_bitrate_bps: None,
}
}
/// Return the first Opus profile Lesavka should test for upstream audio.
///
/// Inputs: none. Outputs: a 48 kHz stereo, 20 ms, 96 kbps low-delay
/// profile. Why: Opus always runs internally at 48 kHz, and 20 ms frames
/// keep latency bounded while preserving enough quality for live speech.
#[must_use]
pub const fn opus_voice() -> Self {
Self {
encoding: AudioEncoding::Opus,
sample_rate: OPUS_SAMPLE_RATE,
channels: OPUS_CHANNELS,
frame_duration_us: OPUS_FRAME_DURATION_US,
target_bitrate_bps: Some(OPUS_DEFAULT_BITRATE_BPS),
}
}
/// Estimate the payload budget for one frame in this transport profile.
///
/// Inputs: profile fields. Output: bytes per frame. Why: performance tests
/// need a stable, codec-independent way to compare PCM debt against Opus.
#[must_use]
pub fn expected_payload_bytes(self) -> u32 {
match self.encoding {
AudioEncoding::PcmS16le | AudioEncoding::AudioUnspecified => {
let samples = u64::from(self.sample_rate)
.saturating_mul(u64::from(self.frame_duration_us))
/ 1_000_000;
samples
.saturating_mul(u64::from(self.channels))
.saturating_mul(2)
.min(u64::from(u32::MAX)) as u32
}
AudioEncoding::Opus => {
let bits = u64::from(self.target_bitrate_bps.unwrap_or(OPUS_DEFAULT_BITRATE_BPS))
.saturating_mul(u64::from(self.frame_duration_us))
/ 1_000_000;
bits.div_ceil(8).min(u64::from(u32::MAX)) as u32
}
}
}
}
/// Interpret unset/legacy audio encoding metadata as the stable PCM transport.
///
/// Inputs: raw proto enum integer. Output: normalized encoding. Why:
/// `AUDIO_UNSPECIFIED` exists for wire compatibility but should not leave
/// callers guessing whether a payload is safe for the raw UAC sink.
#[must_use]
pub fn normalize_audio_encoding(raw: i32) -> AudioEncoding {
match AudioEncoding::try_from(raw).unwrap_or(AudioEncoding::AudioUnspecified) {
AudioEncoding::AudioUnspecified | AudioEncoding::PcmS16le => AudioEncoding::PcmS16le,
AudioEncoding::Opus => AudioEncoding::Opus,
}
}
/// Resolve the packet-level transport profile.
///
/// Inputs: a protobuf audio packet. Output: a normalized profile. Why: callers
/// should not need to duplicate the legacy-PCM fallback or Opus defaults.
#[must_use]
pub fn packet_audio_profile(packet: &AudioPacket) -> AudioTransportProfile {
match normalize_audio_encoding(packet.encoding) {
AudioEncoding::Opus => AudioTransportProfile {
encoding: AudioEncoding::Opus,
sample_rate: nonzero_or(packet.sample_rate, OPUS_SAMPLE_RATE),
channels: nonzero_or(packet.channels, OPUS_CHANNELS),
frame_duration_us: nonzero_or(packet.frame_duration_us, OPUS_FRAME_DURATION_US),
target_bitrate_bps: Some(OPUS_DEFAULT_BITRATE_BPS),
},
AudioEncoding::PcmS16le | AudioEncoding::AudioUnspecified => AudioTransportProfile {
encoding: AudioEncoding::PcmS16le,
sample_rate: nonzero_or(packet.sample_rate, PCM_SAMPLE_RATE),
channels: nonzero_or(packet.channels, PCM_CHANNELS),
frame_duration_us: nonzero_or(packet.frame_duration_us, PCM_FRAME_DURATION_US),
target_bitrate_bps: None,
},
}
}
/// Resolve the bundle-level transport profile from the bundle marker or packets.
///
/// Inputs: an upstream media bundle. Output: the audio profile receivers should
/// expect. Why: the server needs to route the whole A/V bundle without splitting
/// audio away from video just to discover its codec.
#[must_use]
pub fn bundle_audio_profile(bundle: &UpstreamMediaBundle) -> AudioTransportProfile {
let raw = if bundle.audio_encoding != 0 {
bundle.audio_encoding
} else {
bundle
.audio
.first()
.map(|packet| packet.encoding)
.unwrap_or_default()
};
match normalize_audio_encoding(raw) {
AudioEncoding::Opus => AudioTransportProfile {
encoding: AudioEncoding::Opus,
sample_rate: nonzero_or(bundle.audio_sample_rate, OPUS_SAMPLE_RATE),
channels: nonzero_or(bundle.audio_channels, OPUS_CHANNELS),
frame_duration_us: bundle
.audio
.first()
.map(|packet| nonzero_or(packet.frame_duration_us, OPUS_FRAME_DURATION_US))
.unwrap_or(OPUS_FRAME_DURATION_US),
target_bitrate_bps: Some(OPUS_DEFAULT_BITRATE_BPS),
},
AudioEncoding::PcmS16le | AudioEncoding::AudioUnspecified => AudioTransportProfile {
encoding: AudioEncoding::PcmS16le,
sample_rate: nonzero_or(bundle.audio_sample_rate, PCM_SAMPLE_RATE),
channels: nonzero_or(bundle.audio_channels, PCM_CHANNELS),
frame_duration_us: bundle
.audio
.first()
.map(|packet| nonzero_or(packet.frame_duration_us, PCM_FRAME_DURATION_US))
.unwrap_or(PCM_FRAME_DURATION_US),
target_bitrate_bps: None,
},
}
}
/// Stamp a packet as raw PCM with the current stable microphone framing.
///
/// Inputs: mutable packet. Output: side-effect only. Why: explicit metadata lets
/// future Opus tests distinguish today's known-good path from experimental
/// compressed audio without relying on payload-size guesses.
pub fn mark_packet_pcm_s16le(packet: &mut AudioPacket) {
packet.encoding = AudioEncoding::PcmS16le as i32;
packet.sample_rate = PCM_SAMPLE_RATE;
packet.channels = PCM_CHANNELS;
if packet.frame_duration_us == 0 {
packet.frame_duration_us = PCM_FRAME_DURATION_US;
}
}
/// Stamp a packet as Opus with the current low-latency voice framing.
///
/// Inputs: mutable packet. Output: side-effect only. Why: receivers must never
/// infer compressed audio from payload size, especially when PCM fallback can
/// intentionally coexist with Opus during startup.
pub fn mark_packet_opus(packet: &mut AudioPacket) {
packet.encoding = AudioEncoding::Opus as i32;
packet.sample_rate = OPUS_SAMPLE_RATE;
packet.channels = OPUS_CHANNELS;
if packet.frame_duration_us == 0 {
packet.frame_duration_us = OPUS_FRAME_DURATION_US;
}
}
/// Stamp a bundle with normalized audio metadata.
///
/// Inputs: mutable bundle. Output: side-effect only. Why: the server should be
/// able to choose the PCM or future Opus handoff path from the bundle envelope.
pub fn mark_bundle_audio_profile(bundle: &mut UpstreamMediaBundle, profile: AudioTransportProfile) {
bundle.audio_encoding = profile.encoding as i32;
bundle.audio_sample_rate = profile.sample_rate;
bundle.audio_channels = profile.channels;
}
/// True when a packet can be handed directly to the raw UAC appsrc.
#[must_use]
pub fn packet_is_raw_pcm_s16le(packet: &AudioPacket) -> bool {
packet_audio_profile(packet).encoding == AudioEncoding::PcmS16le
}
const fn nonzero_or(value: u32, fallback: u32) -> u32 {
if value == 0 { fallback } else { value }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pcm_and_opus_profiles_keep_expected_frame_budgets() {
assert_eq!(
AudioTransportProfile::pcm_s16le().expected_payload_bytes(),
3840
);
assert_eq!(
AudioTransportProfile::opus_voice().expected_payload_bytes(),
160
);
assert!(
AudioTransportProfile::opus_voice().expected_payload_bytes() * 20
<= AudioTransportProfile::pcm_s16le().expected_payload_bytes(),
"Opus profile should be dramatically smaller than raw PCM"
);
}
#[test]
fn legacy_packets_normalize_to_pcm() {
let packet = AudioPacket::default();
let profile = packet_audio_profile(&packet);
assert_eq!(profile.encoding, AudioEncoding::PcmS16le);
assert_eq!(profile.sample_rate, 48_000);
assert_eq!(profile.channels, 2);
}
}