lesavka/common/src/audio_transport.rs

//! Shared audio-transport metadata helpers.
//!
//! Why: upstream audio is currently raw PCM, but Opus experiments need an
//! explicit schema and sizing model so client, server, and tests agree before
//! any production switch is made.

use crate::lesavka::{AudioEncoding, AudioPacket, UpstreamMediaBundle};

pub const PCM_SAMPLE_RATE: u32 = 48_000;
pub const PCM_CHANNELS: u32 = 2;
pub const PCM_FRAME_DURATION_US: u32 = 20_000;
pub const OPUS_SAMPLE_RATE: u32 = 48_000;
pub const OPUS_CHANNELS: u32 = 2;
pub const OPUS_FRAME_DURATION_US: u32 = 20_000;
pub const OPUS_DEFAULT_BITRATE_BPS: u32 = 96_000;

/// Operator-facing upstream audio transport choice.
///
/// Inputs: a UI/env/control-file token. Output: either the stable raw PCM route
/// or the compressed Opus route. Why: both ends need the same vocabulary so
/// Opus can be tried without weakening the known-good PCM fallback path.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UpstreamAudioCodec {
    PcmS16le,
    Opus,
}

impl UpstreamAudioCodec {
    /// Return the compact id stored in control files and environment values.
    #[must_use]
    pub const fn as_id(self) -> &'static str {
        match self {
            Self::PcmS16le => "pcm",
            Self::Opus => "opus",
        }
    }

    /// Return a short label suitable for the launcher UI and diagnostics.
    #[must_use]
    pub const fn label(self) -> &'static str {
        match self {
            Self::PcmS16le => "PCM",
            Self::Opus => "Opus",
        }
    }

    /// Return the packet profile this transport should emit.
    #[must_use]
    pub const fn profile(self) -> AudioTransportProfile {
        match self {
            Self::PcmS16le => AudioTransportProfile::pcm_s16le(),
            Self::Opus => AudioTransportProfile::opus_voice(),
        }
    }
}

/// Parse a launcher/env codec token into an upstream audio transport.
#[must_use]
pub fn parse_upstream_audio_codec(raw: &str) -> Option<UpstreamAudioCodec> {
    match raw.trim().to_ascii_lowercase().as_str() {
        "pcm" | "pcm_s16le" | "s16le" | "raw" | "uncompressed" => {
            Some(UpstreamAudioCodec::PcmS16le)
        }
        "opus" | "compressed" | "voice" => Some(UpstreamAudioCodec::Opus),
        _ => None,
    }
}

/// A normalized description of the payload carried by `AudioPacket::data`.
///
/// Inputs are codec metadata resolved from a packet or bundle. Outputs are the
/// exact framing values the transport should preserve. Why: the Opus path must
/// compare bandwidth and timing without weakening the known-good PCM fallback.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct AudioTransportProfile {
    pub encoding: AudioEncoding,
    pub sample_rate: u32,
    pub channels: u32,
    pub frame_duration_us: u32,
    pub target_bitrate_bps: Option<u32>,
}

impl AudioTransportProfile {
    /// Return the stable raw PCM profile used by today's upstream microphone path.
    #[must_use]
    pub const fn pcm_s16le() -> Self {
        Self {
            encoding: AudioEncoding::PcmS16le,
            sample_rate: PCM_SAMPLE_RATE,
            channels: PCM_CHANNELS,
            frame_duration_us: PCM_FRAME_DURATION_US,
            target_bitrate_bps: None,
        }
    }

    /// Return the first Opus profile Lesavka should test for upstream audio.
    ///
    /// Inputs: none. Outputs: a 48 kHz stereo, 20 ms, 96 kbps low-delay
    /// profile. Why: Opus always runs internally at 48 kHz, and 20 ms frames
    /// keep latency bounded while preserving enough quality for live speech.
    #[must_use]
    pub const fn opus_voice() -> Self {
        Self {
            encoding: AudioEncoding::Opus,
            sample_rate: OPUS_SAMPLE_RATE,
            channels: OPUS_CHANNELS,
            frame_duration_us: OPUS_FRAME_DURATION_US,
            target_bitrate_bps: Some(OPUS_DEFAULT_BITRATE_BPS),
        }
    }

    /// Estimate the payload budget for one frame in this transport profile.
    ///
    /// Inputs: profile fields. Output: bytes per frame. Why: performance tests
    /// need a stable, codec-independent way to compare PCM debt against Opus.
    #[must_use]
    pub fn expected_payload_bytes(self) -> u32 {
        match self.encoding {
            AudioEncoding::PcmS16le | AudioEncoding::AudioUnspecified => {
                let samples = u64::from(self.sample_rate)
                    .saturating_mul(u64::from(self.frame_duration_us))
                    / 1_000_000;
                samples
                    .saturating_mul(u64::from(self.channels))
                    .saturating_mul(2)
                    .min(u64::from(u32::MAX)) as u32
            }
            AudioEncoding::Opus => {
                let bits = u64::from(self.target_bitrate_bps.unwrap_or(OPUS_DEFAULT_BITRATE_BPS))
                    .saturating_mul(u64::from(self.frame_duration_us))
                    / 1_000_000;
                bits.div_ceil(8).min(u64::from(u32::MAX)) as u32
            }
        }
    }
}

/// Interpret unset/legacy audio encoding metadata as the stable PCM transport.
///
/// Inputs: raw proto enum integer. Output: normalized encoding. Why:
/// `AUDIO_UNSPECIFIED` exists for wire compatibility but should not leave
/// callers guessing whether a payload is safe for the raw UAC sink.
#[must_use]
pub fn normalize_audio_encoding(raw: i32) -> AudioEncoding {
    match AudioEncoding::try_from(raw).unwrap_or(AudioEncoding::AudioUnspecified) {
        AudioEncoding::AudioUnspecified | AudioEncoding::PcmS16le => AudioEncoding::PcmS16le,
        AudioEncoding::Opus => AudioEncoding::Opus,
    }
}

/// Resolve the packet-level transport profile.
///
/// Inputs: a protobuf audio packet. Output: a normalized profile. Why: callers
/// should not need to duplicate the legacy-PCM fallback or Opus defaults.
#[must_use]
pub fn packet_audio_profile(packet: &AudioPacket) -> AudioTransportProfile {
    match normalize_audio_encoding(packet.encoding) {
        AudioEncoding::Opus => AudioTransportProfile {
            encoding: AudioEncoding::Opus,
            sample_rate: nonzero_or(packet.sample_rate, OPUS_SAMPLE_RATE),
            channels: nonzero_or(packet.channels, OPUS_CHANNELS),
            frame_duration_us: nonzero_or(packet.frame_duration_us, OPUS_FRAME_DURATION_US),
            target_bitrate_bps: Some(OPUS_DEFAULT_BITRATE_BPS),
        },
        AudioEncoding::PcmS16le | AudioEncoding::AudioUnspecified => AudioTransportProfile {
            encoding: AudioEncoding::PcmS16le,
            sample_rate: nonzero_or(packet.sample_rate, PCM_SAMPLE_RATE),
            channels: nonzero_or(packet.channels, PCM_CHANNELS),
            frame_duration_us: nonzero_or(packet.frame_duration_us, PCM_FRAME_DURATION_US),
            target_bitrate_bps: None,
        },
    }
}

/// Resolve the bundle-level transport profile from the bundle marker or packets.
///
/// Inputs: an upstream media bundle. Output: the audio profile receivers should
/// expect. Why: the server needs to route the whole A/V bundle without splitting
/// audio away from video just to discover its codec.
#[must_use]
pub fn bundle_audio_profile(bundle: &UpstreamMediaBundle) -> AudioTransportProfile {
    let raw = if bundle.audio_encoding != 0 {
        bundle.audio_encoding
    } else {
        bundle
            .audio
            .first()
            .map(|packet| packet.encoding)
            .unwrap_or_default()
    };
    match normalize_audio_encoding(raw) {
        AudioEncoding::Opus => AudioTransportProfile {
            encoding: AudioEncoding::Opus,
            sample_rate: nonzero_or(bundle.audio_sample_rate, OPUS_SAMPLE_RATE),
            channels: nonzero_or(bundle.audio_channels, OPUS_CHANNELS),
            frame_duration_us: bundle
                .audio
                .first()
                .map(|packet| nonzero_or(packet.frame_duration_us, OPUS_FRAME_DURATION_US))
                .unwrap_or(OPUS_FRAME_DURATION_US),
            target_bitrate_bps: Some(OPUS_DEFAULT_BITRATE_BPS),
        },
        AudioEncoding::PcmS16le | AudioEncoding::AudioUnspecified => AudioTransportProfile {
            encoding: AudioEncoding::PcmS16le,
            sample_rate: nonzero_or(bundle.audio_sample_rate, PCM_SAMPLE_RATE),
            channels: nonzero_or(bundle.audio_channels, PCM_CHANNELS),
            frame_duration_us: bundle
                .audio
                .first()
                .map(|packet| nonzero_or(packet.frame_duration_us, PCM_FRAME_DURATION_US))
                .unwrap_or(PCM_FRAME_DURATION_US),
            target_bitrate_bps: None,
        },
    }
}

/// Stamp a packet as raw PCM with the current stable microphone framing.
///
/// Inputs: mutable packet. Output: side-effect only. Why: explicit metadata lets
/// future Opus tests distinguish today's known-good path from experimental
/// compressed audio without relying on payload-size guesses.
pub fn mark_packet_pcm_s16le(packet: &mut AudioPacket) {
    packet.encoding = AudioEncoding::PcmS16le as i32;
    packet.sample_rate = PCM_SAMPLE_RATE;
    packet.channels = PCM_CHANNELS;
    if packet.frame_duration_us == 0 {
        packet.frame_duration_us = PCM_FRAME_DURATION_US;
    }
}

/// Stamp a packet as Opus with the current low-latency voice framing.
///
/// Inputs: mutable packet. Output: side-effect only. Why: receivers must never
/// infer compressed audio from payload size, especially when PCM fallback can
/// intentionally coexist with Opus during startup.
pub fn mark_packet_opus(packet: &mut AudioPacket) {
    packet.encoding = AudioEncoding::Opus as i32;
    packet.sample_rate = OPUS_SAMPLE_RATE;
    packet.channels = OPUS_CHANNELS;
    if packet.frame_duration_us == 0 {
        packet.frame_duration_us = OPUS_FRAME_DURATION_US;
    }
}

/// Stamp a bundle with normalized audio metadata.
///
/// Inputs: mutable bundle. Output: side-effect only. Why: the server should be
/// able to choose the PCM or future Opus handoff path from the bundle envelope.
pub fn mark_bundle_audio_profile(bundle: &mut UpstreamMediaBundle, profile: AudioTransportProfile) {
    bundle.audio_encoding = profile.encoding as i32;
    bundle.audio_sample_rate = profile.sample_rate;
    bundle.audio_channels = profile.channels;
}

/// True when a packet can be handed directly to the raw UAC appsrc.
#[must_use]
pub fn packet_is_raw_pcm_s16le(packet: &AudioPacket) -> bool {
    packet_audio_profile(packet).encoding == AudioEncoding::PcmS16le
}

const fn nonzero_or(value: u32, fallback: u32) -> u32 {
    if value == 0 { fallback } else { value }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn pcm_and_opus_profiles_keep_expected_frame_budgets() {
        assert_eq!(
            AudioTransportProfile::pcm_s16le().expected_payload_bytes(),
            3840
        );
        assert_eq!(
            AudioTransportProfile::opus_voice().expected_payload_bytes(),
            160
        );
        assert!(
            AudioTransportProfile::opus_voice().expected_payload_bytes() * 20
                <= AudioTransportProfile::pcm_s16le().expected_payload_bytes(),
            "Opus profile should be dramatically smaller than raw PCM"
        );
    }

    #[test]
    fn legacy_packets_normalize_to_pcm() {
        let packet = AudioPacket::default();
        let profile = packet_audio_profile(&packet);
        assert_eq!(profile.encoding, AudioEncoding::PcmS16le);
        assert_eq!(profile.sample_rate, 48_000);
        assert_eq!(profile.channels, 2);
    }
}