lesavka/client/src/input/microphone.rs

// client/src/input/microphone.rs
use anyhow::{Context, Result};
use gst::prelude::*;
use gstreamer as gst;
use gstreamer_app as gst_app;
use lesavka_common::lesavka::AudioPacket;
use shell_escape::unix::escape;
#[cfg(not(coverage))]
use std::sync::atomic::{AtomicU64, Ordering};
use std::{
    path::{Path as StdPath, PathBuf},
    sync::{
        Arc,
        atomic::{AtomicBool, Ordering as AtomicOrdering},
    },
    thread,
    time::Duration,
};
use tracing::{debug, warn};
#[cfg(not(coverage))]
use tracing::{error, info, trace};

const MIC_GAIN_ENV: &str = "LESAVKA_MIC_GAIN";
const MIC_GAIN_CONTROL_ENV: &str = "LESAVKA_MIC_GAIN_CONTROL";
const MIC_LEVEL_TAP_ENV: &str = "LESAVKA_UPLINK_MIC_LEVEL";

pub struct MicrophoneCapture {
    #[allow(dead_code)] // kept alive to hold PLAYING state
    pipeline: gst::Pipeline,
    sink: gst_app::AppSink,
    level_tap_running: Option<Arc<AtomicBool>>,
    pts_rebaser: crate::live_capture_clock::SourcePtsRebaser,
}

impl MicrophoneCapture {
    pub fn new() -> Result<Self> {
        gst::init().ok(); // idempotent

        /* preferred path: pipewiresrc; fallback: pulsesrc ----------------*/
        let source_desc = match std::env::var("LESAVKA_MIC_SOURCE") {
            Ok(s) if !s.is_empty() => match Self::resolve_source_desc(&s) {
                Some(desc) => desc,
                None => {
                    warn!("🎤 requested mic '{s}' not found; using default");
                    Self::default_source_desc()
                }
            },
            _ => Self::default_source_desc(),
        };
        debug!("🎤 source: {source_desc}");
        let aac = ["avenc_aac", "fdkaacenc", "faac", "opusenc"]
            .into_iter()
            .find(|e| gst::ElementFactory::find(e).is_some())
            .unwrap_or("opusenc");
        let parser = parser_for_encoder(aac);
        let gain = mic_gain_from_env();
        let level_tap_path = mic_level_tap_path();
        let desc =
            microphone_pipeline_desc(&source_desc, aac, parser, gain, level_tap_path.is_some());

        let pipeline: gst::Pipeline = gst::parse::launch(&desc)?.downcast().expect("pipeline");
        let sink: gst_app::AppSink = pipeline.by_name("asink").unwrap().downcast().unwrap();
        let volume = pipeline
            .by_name("mic_input_gain")
            .context("missing mic_input_gain volume")?;

        #[cfg(not(coverage))]
        {
            /* ─── bus for diagnostics ───────────────────────────────────────*/
            let bus = pipeline.bus().unwrap();
            std::thread::spawn(move || {
                use gst::MessageView::*;
                for msg in bus.iter_timed(gst::ClockTime::NONE) {
                    match msg.view() {
                        StateChanged(s)
                            if s.current() == gst::State::Playing
                                && msg.src().map(|s| s.is::<gst::Pipeline>()).unwrap_or(false) =>
                        {
                            info!("🎤 mic pipeline ▶️")
                        }
                        Error(e) => error!(
                            "🎤💥 mic: {} ({})",
                            e.error(),
                            e.debug().unwrap_or_default()
                        ),
                        Warning(w) => warn!(
                            "🎤⚠️ mic: {} ({})",
                            w.error(),
                            w.debug().unwrap_or_default()
                        ),
                        _ => {}
                    }
                }
            });
        }

        if let Err(err) = pipeline.set_state(gst::State::Playing) {
            let _ = pipeline.set_state(gst::State::Null);
            return Err(err).context("start mic pipeline");
        }
        maybe_spawn_mic_gain_control(volume);
        let level_tap_running = if let Some(path) = level_tap_path {
            let level_sink = pipeline
                .by_name("level_sink")
                .context("missing microphone level tap appsink")?
                .downcast::<gst_app::AppSink>()
                .expect("microphone level tap appsink");
            Some(spawn_mic_level_tap(level_sink, path))
        } else {
            None
        };

        Ok(Self {
            pipeline,
            sink,
            level_tap_running,
            pts_rebaser: crate::live_capture_clock::SourcePtsRebaser::default(),
        })
    }

    /// Blocking pull; call from an async wrapper
    pub fn pull(&self) -> Option<AudioPacket> {
        match self.sink.pull_sample() {
            Ok(sample) => {
                let buf = sample.buffer().unwrap();
                let map = buf.map_readable().unwrap();
                let source_pts_us = buf.pts().map(|ts| ts.nseconds() / 1_000);
                let timing = self.pts_rebaser.rebase_or_now(source_pts_us, 1);
                let pts = timing.packet_pts_us;
                #[cfg(not(coverage))]
                {
                    static CNT: AtomicU64 = AtomicU64::new(0);
                    let n = CNT.fetch_add(1, Ordering::Relaxed);
                    if crate::live_capture_clock::upstream_timing_trace_enabled()
                        && (n < 10 || n.is_multiple_of(300))
                    {
                        info!(
                            packet_index = n,
                            source_pts_us = timing.source_pts_us.unwrap_or_default(),
                            source_base_us = timing.source_base_us.unwrap_or_default(),
                            capture_base_us = timing.capture_base_us.unwrap_or_default(),
                            capture_now_us = timing.capture_now_us,
                            packet_pts_us = timing.packet_pts_us,
                            pull_path_delay_us =
                                timing.capture_now_us as i128 - timing.packet_pts_us as i128,
                            used_source_pts = timing.used_source_pts,
                            bytes = map.len(),
                            "🎤 upstream microphone timing sample"
                        );
                    }
                    if n < 10 || n.is_multiple_of(300) {
                        trace!("🎤⇧ cli  pkt#{n}  {} bytes", map.len());
                    }
                }
                Some(AudioPacket {
                    id: 0,
                    pts,
                    data: map.as_slice().to_vec(),
                })
            }
            Err(_) => None,
        }
    }

    /// Resolve launcher-selected mic names while preserving Pulse catalog routing.
    fn resolve_source_desc(fragment: &str) -> Option<String> {
        if looks_like_pulse_source_name(fragment)
            && let Some(full) = Self::pulse_source_by_substr(fragment)
        {
            return Some(Self::pulse_source_desc(Some(&full)));
        }
        if Self::pipewire_source_available()
            && let Some(full) = Self::pipewire_source_by_substr(fragment)
        {
            return Some(Self::pipewire_source_desc(Some(&full)));
        }
        Self::pulse_source_by_substr(fragment).map(|full| Self::pulse_source_desc(Some(&full)))
    }

    fn pipewire_source_available() -> bool {
        #[cfg(coverage)]
        if std::env::var("LESAVKA_MIC_DISABLE_PIPEWIRE").is_ok() {
            return false;
        }
        gst::ElementFactory::find("pipewiresrc").is_some()
    }

    fn pipewire_source_desc(source: Option<&str>) -> String {
        match source {
            Some(source) if !source.trim().is_empty() => {
                format!(
                    "pipewiresrc target-object={} do-timestamp=true",
                    escape(source.to_string().into())
                )
            }
            _ => "pipewiresrc do-timestamp=true".to_string(),
        }
    }

    fn pulse_source_desc(source: Option<&str>) -> String {
        match source {
            Some(source) if !source.trim().is_empty() => {
                format!(
                    "pulsesrc device={} do-timestamp=true",
                    escape(source.to_string().into())
                )
            }
            _ => "pulsesrc do-timestamp=true".to_string(),
        }
    }

    fn pipewire_source_by_substr(fragment: &str) -> Option<String> {
        let out = std::process::Command::new("pw-dump").output().ok()?;
        let list = serde_json::from_slice::<serde_json::Value>(&out.stdout).ok()?;
        let objects = list.as_array()?;
        objects.iter().find_map(|object| {
            let props = object.get("info")?.get("props")?.as_object()?;
            if props.get("media.class")?.as_str()? != "Audio/Source" {
                return None;
            }
            let name = props
                .get("node.name")
                .or_else(|| props.get("node.nick"))?
                .as_str()?;
            if name.contains(fragment) && !name.ends_with(".monitor") {
                Some(name.to_owned())
            } else {
                None
            }
        })
    }

    fn pulse_source_by_substr(fragment: &str) -> Option<String> {
        use std::process::Command;
        let out = Command::new("pactl")
            .args(["list", "short", "sources"])
            .output()
            .ok()?;
        let list = String::from_utf8_lossy(&out.stdout);
        list.lines().find_map(|ln| {
            let mut cols = ln.split_whitespace();
            let _id = cols.next()?;
            let name = cols.next()?; // column #1
            if name.contains(fragment) {
                Some(name.to_owned())
            } else {
                None
            }
        })
    }

    fn default_source_desc() -> String {
        #[cfg(coverage)]
        if let Ok(source) = std::env::var("LESAVKA_MIC_TEST_SOURCE_DESC")
            && !source.trim().is_empty()
        {
            return source;
        }
        if Self::pipewire_source_available() {
            return Self::pipewire_source_desc(None);
        }
        Self::pulse_source_desc(None)
    }
}

fn mic_level_tap_path() -> Option<PathBuf> {
    std::env::var(MIC_LEVEL_TAP_ENV)
        .ok()
        .map(|value| value.trim().to_string())
        .filter(|value| !value.is_empty())
        .map(PathBuf::from)
}

fn parser_for_encoder(aac: &str) -> &'static str {
    if aac.contains("opus") {
        "capsfilter caps=audio/x-opus,rate=48000,channels=2"
    } else {
        "aacparse ! capsfilter caps=audio/mpeg,stream-format=adts,rate=48000,channels=2"
    }
}

fn microphone_pipeline_desc(
    source_desc: &str,
    encoder: &str,
    parser: &str,
    gain: f64,
    level_tap_enabled: bool,
) -> String {
    let gain = format_mic_gain_for_gst(gain);
    if level_tap_enabled {
        format!(
            "{source_desc} ! \
             audioconvert ! audioresample ! \
             audio/x-raw,format=S16LE,channels=2,rate=48000 ! \
             volume name=mic_input_gain volume={gain} ! \
             tee name=t \
             t. ! queue max-size-buffers=100 leaky=downstream ! \
                  audioconvert ! audioresample ! \
                  audio/x-raw,channels=2,rate=48000 ! \
                  {encoder} bitrate=128000 ! \
                  {parser} ! \
                  appsink name=asink emit-signals=true max-buffers=50 drop=true \
             t. ! queue max-size-buffers=8 leaky=downstream ! \
                  audio/x-raw,format=S16LE,channels=2,rate=48000 ! \
                  appsink name=level_sink emit-signals=false sync=false max-buffers=8 drop=true"
        )
    } else {
        format!(
            "{source_desc} ! \
             audioconvert ! audioresample ! \
             audio/x-raw,channels=2,rate=48000 ! \
             volume name=mic_input_gain volume={gain} ! \
             audioconvert ! audioresample ! \
             audio/x-raw,channels=2,rate=48000 ! \
             {encoder} bitrate=128000 ! \
             {parser} ! \
             queue max-size-buffers=100 leaky=downstream ! \
             appsink name=asink emit-signals=true max-buffers=50 drop=true"
        )
    }
}

/// Detect launcher catalog names that should be opened through Pulse directly.
fn looks_like_pulse_source_name(source: &str) -> bool {
    let source = source.trim();
    source.starts_with("alsa_input.")
        || source.starts_with("bluez_input.")
        || source.starts_with("input.")
}

fn mic_gain_from_env() -> f64 {
    std::env::var(MIC_GAIN_ENV)
        .ok()
        .and_then(|raw| parse_mic_gain(&raw))
        .unwrap_or(1.0)
}

fn parse_mic_gain(raw: &str) -> Option<f64> {
    let value = raw.split_ascii_whitespace().next()?.parse::<f64>().ok()?;
    value.is_finite().then_some(clamp_mic_gain(value))
}

fn clamp_mic_gain(value: f64) -> f64 {
    value.clamp(0.0, 4.0)
}

fn format_mic_gain_for_gst(gain: f64) -> String {
    format!("{:.3}", clamp_mic_gain(gain))
}

fn maybe_spawn_mic_gain_control(volume: gst::Element) {
    let Ok(path) = std::env::var(MIC_GAIN_CONTROL_ENV) else {
        return;
    };
    let path = std::path::PathBuf::from(path);
    thread::spawn(move || {
        let mut last_gain = None;
        loop {
            if let Some(gain) = read_mic_gain_control(&path)
                && last_gain != Some(gain)
            {
                volume.set_property("volume", gain);
                last_gain = Some(gain);
                tracing::info!("🎤 mic gain set to {gain:.2}x");
            }
            thread::sleep(Duration::from_millis(100));
        }
    });
}

fn spawn_mic_level_tap(sink: gst_app::AppSink, path: PathBuf) -> Arc<AtomicBool> {
    let running = Arc::new(AtomicBool::new(true));
    let thread_running = Arc::clone(&running);
    thread::spawn(move || {
        while thread_running.load(AtomicOrdering::Acquire) {
            if let Some(sample) = sink.try_pull_sample(gst::ClockTime::from_mseconds(250))
                && let Some(buffer) = sample.buffer()
                && let Ok(map) = buffer.map_readable()
            {
                let level = pcm_peak_fraction(map.as_slice());
                if let Err(err) = write_mic_level_tap(&path, level) {
                    tracing::debug!("🎤 local uplink level tap write failed: {err:#}");
                }
            }
        }
    });
    running
}

fn pcm_peak_fraction(bytes: &[u8]) -> f64 {
    let peak = bytes
        .chunks_exact(2)
        .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]]).unsigned_abs() as f64)
        .fold(0.0, f64::max);
    (peak / i16::MAX as f64).clamp(0.0, 1.0)
}

fn write_mic_level_tap(path: &StdPath, level: f64) -> Result<()> {
    let tmp_path = path.with_extension("tmp");
    std::fs::write(&tmp_path, format!("{level:.6}\n"))
        .with_context(|| format!("writing {}", tmp_path.display()))?;
    std::fs::rename(&tmp_path, path).with_context(|| format!("publishing {}", path.display()))?;
    Ok(())
}

fn read_mic_gain_control(path: &StdPath) -> Option<f64> {
    std::fs::read_to_string(path)
        .ok()
        .and_then(|raw| parse_mic_gain(&raw))
}

impl Drop for MicrophoneCapture {
    fn drop(&mut self) {
        if let Some(running) = &self.level_tap_running {
            running.store(false, AtomicOrdering::Release);
        }
        let _ = self.pipeline.set_state(gst::State::Null);
    }
}