lesavka/client/src/input/microphone.rs

420 lines
15 KiB
Rust

// client/src/input/microphone.rs
use anyhow::{Context, Result};
use gst::prelude::*;
use gstreamer as gst;
use gstreamer_app as gst_app;
use lesavka_common::lesavka::AudioPacket;
use shell_escape::unix::escape;
#[cfg(not(coverage))]
use std::sync::atomic::{AtomicU64, Ordering};
use std::{
path::{Path as StdPath, PathBuf},
sync::{
Arc,
atomic::{AtomicBool, Ordering as AtomicOrdering},
},
thread,
time::Duration,
};
use tracing::{debug, warn};
#[cfg(not(coverage))]
use tracing::{error, info, trace};
const MIC_GAIN_ENV: &str = "LESAVKA_MIC_GAIN";
const MIC_GAIN_CONTROL_ENV: &str = "LESAVKA_MIC_GAIN_CONTROL";
const MIC_LEVEL_TAP_ENV: &str = "LESAVKA_UPLINK_MIC_LEVEL";
pub struct MicrophoneCapture {
#[allow(dead_code)] // kept alive to hold PLAYING state
pipeline: gst::Pipeline,
sink: gst_app::AppSink,
level_tap_running: Option<Arc<AtomicBool>>,
pts_rebaser: crate::live_capture_clock::SourcePtsRebaser,
}
impl MicrophoneCapture {
pub fn new() -> Result<Self> {
gst::init().ok(); // idempotent
/* preferred path: pipewiresrc; fallback: pulsesrc ----------------*/
let source_desc = match std::env::var("LESAVKA_MIC_SOURCE") {
Ok(s) if !s.is_empty() => match Self::resolve_source_desc(&s) {
Some(desc) => desc,
None => {
warn!("🎤 requested mic '{s}' not found; using default");
Self::default_source_desc()
}
},
_ => Self::default_source_desc(),
};
debug!("🎤 source: {source_desc}");
let aac = ["avenc_aac", "fdkaacenc", "faac", "opusenc"]
.into_iter()
.find(|e| gst::ElementFactory::find(e).is_some())
.unwrap_or("opusenc");
let parser = parser_for_encoder(aac);
let gain = mic_gain_from_env();
let level_tap_path = mic_level_tap_path();
let desc =
microphone_pipeline_desc(&source_desc, aac, parser, gain, level_tap_path.is_some());
let pipeline: gst::Pipeline = gst::parse::launch(&desc)?.downcast().expect("pipeline");
let sink: gst_app::AppSink = pipeline.by_name("asink").unwrap().downcast().unwrap();
let volume = pipeline
.by_name("mic_input_gain")
.context("missing mic_input_gain volume")?;
#[cfg(not(coverage))]
{
/* ─── bus for diagnostics ───────────────────────────────────────*/
let bus = pipeline.bus().unwrap();
std::thread::spawn(move || {
use gst::MessageView::*;
for msg in bus.iter_timed(gst::ClockTime::NONE) {
match msg.view() {
StateChanged(s)
if s.current() == gst::State::Playing
&& msg.src().map(|s| s.is::<gst::Pipeline>()).unwrap_or(false) =>
{
info!("🎤 mic pipeline ▶️")
}
Error(e) => error!(
"🎤💥 mic: {} ({})",
e.error(),
e.debug().unwrap_or_default()
),
Warning(w) => warn!(
"🎤⚠️ mic: {} ({})",
w.error(),
w.debug().unwrap_or_default()
),
_ => {}
}
}
});
}
if let Err(err) = pipeline.set_state(gst::State::Playing) {
let _ = pipeline.set_state(gst::State::Null);
return Err(err).context("start mic pipeline");
}
maybe_spawn_mic_gain_control(volume);
let level_tap_running = if let Some(path) = level_tap_path {
let level_sink = pipeline
.by_name("level_sink")
.context("missing microphone level tap appsink")?
.downcast::<gst_app::AppSink>()
.expect("microphone level tap appsink");
Some(spawn_mic_level_tap(level_sink, path))
} else {
None
};
Ok(Self {
pipeline,
sink,
level_tap_running,
pts_rebaser: crate::live_capture_clock::SourcePtsRebaser::default(),
})
}
/// Blocking pull; call from an async wrapper
pub fn pull(&self) -> Option<AudioPacket> {
match self.sink.pull_sample() {
Ok(sample) => {
let buf = sample.buffer().unwrap();
let map = buf.map_readable().unwrap();
let source_pts_us = buf.pts().map(|ts| ts.nseconds() / 1_000);
let timing = self.pts_rebaser.rebase_or_now(source_pts_us, 1);
let pts = timing.packet_pts_us;
#[cfg(not(coverage))]
{
static CNT: AtomicU64 = AtomicU64::new(0);
let n = CNT.fetch_add(1, Ordering::Relaxed);
if crate::live_capture_clock::upstream_timing_trace_enabled()
&& (n < 10 || n.is_multiple_of(300))
{
info!(
packet_index = n,
source_pts_us = timing.source_pts_us.unwrap_or_default(),
source_base_us = timing.source_base_us.unwrap_or_default(),
capture_base_us = timing.capture_base_us.unwrap_or_default(),
capture_now_us = timing.capture_now_us,
packet_pts_us = timing.packet_pts_us,
pull_path_delay_us =
timing.capture_now_us as i128 - timing.packet_pts_us as i128,
used_source_pts = timing.used_source_pts,
bytes = map.len(),
"🎤 upstream microphone timing sample"
);
}
if n < 10 || n.is_multiple_of(300) {
trace!("🎤⇧ cli pkt#{n} {} bytes", map.len());
}
}
Some(AudioPacket {
id: 0,
pts,
data: map.as_slice().to_vec(),
})
}
Err(_) => None,
}
}
/// Resolve launcher-selected mic names while preserving Pulse catalog routing.
fn resolve_source_desc(fragment: &str) -> Option<String> {
if looks_like_pulse_source_name(fragment)
&& let Some(full) = Self::pulse_source_by_substr(fragment)
{
return Some(Self::pulse_source_desc(Some(&full)));
}
if Self::pipewire_source_available()
&& let Some(full) = Self::pipewire_source_by_substr(fragment)
{
return Some(Self::pipewire_source_desc(Some(&full)));
}
Self::pulse_source_by_substr(fragment).map(|full| Self::pulse_source_desc(Some(&full)))
}
fn pipewire_source_available() -> bool {
#[cfg(coverage)]
if std::env::var("LESAVKA_MIC_DISABLE_PIPEWIRE").is_ok() {
return false;
}
gst::ElementFactory::find("pipewiresrc").is_some()
}
fn pipewire_source_desc(source: Option<&str>) -> String {
match source {
Some(source) if !source.trim().is_empty() => {
format!(
"pipewiresrc target-object={} do-timestamp=true",
escape(source.to_string().into())
)
}
_ => "pipewiresrc do-timestamp=true".to_string(),
}
}
fn pulse_source_desc(source: Option<&str>) -> String {
match source {
Some(source) if !source.trim().is_empty() => {
format!(
"pulsesrc device={} do-timestamp=true",
escape(source.to_string().into())
)
}
_ => "pulsesrc do-timestamp=true".to_string(),
}
}
fn pipewire_source_by_substr(fragment: &str) -> Option<String> {
let out = std::process::Command::new("pw-dump").output().ok()?;
let list = serde_json::from_slice::<serde_json::Value>(&out.stdout).ok()?;
let objects = list.as_array()?;
objects.iter().find_map(|object| {
let props = object.get("info")?.get("props")?.as_object()?;
if props.get("media.class")?.as_str()? != "Audio/Source" {
return None;
}
let name = props
.get("node.name")
.or_else(|| props.get("node.nick"))?
.as_str()?;
if name.contains(fragment) && !name.ends_with(".monitor") {
Some(name.to_owned())
} else {
None
}
})
}
fn pulse_source_by_substr(fragment: &str) -> Option<String> {
use std::process::Command;
let out = Command::new("pactl")
.args(["list", "short", "sources"])
.output()
.ok()?;
let list = String::from_utf8_lossy(&out.stdout);
list.lines().find_map(|ln| {
let mut cols = ln.split_whitespace();
let _id = cols.next()?;
let name = cols.next()?; // column #1
if name.contains(fragment) {
Some(name.to_owned())
} else {
None
}
})
}
fn default_source_desc() -> String {
#[cfg(coverage)]
if let Ok(source) = std::env::var("LESAVKA_MIC_TEST_SOURCE_DESC")
&& !source.trim().is_empty()
{
return source;
}
if Self::pipewire_source_available() {
return Self::pipewire_source_desc(None);
}
Self::pulse_source_desc(None)
}
}
fn mic_level_tap_path() -> Option<PathBuf> {
std::env::var(MIC_LEVEL_TAP_ENV)
.ok()
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
.map(PathBuf::from)
}
fn parser_for_encoder(aac: &str) -> &'static str {
if aac.contains("opus") {
"capsfilter caps=audio/x-opus,rate=48000,channels=2"
} else {
"aacparse ! capsfilter caps=audio/mpeg,stream-format=adts,rate=48000,channels=2"
}
}
fn microphone_pipeline_desc(
source_desc: &str,
encoder: &str,
parser: &str,
gain: f64,
level_tap_enabled: bool,
) -> String {
let gain = format_mic_gain_for_gst(gain);
if level_tap_enabled {
format!(
"{source_desc} ! \
audioconvert ! audioresample ! \
audio/x-raw,format=S16LE,channels=2,rate=48000 ! \
volume name=mic_input_gain volume={gain} ! \
tee name=t \
t. ! queue max-size-buffers=100 leaky=downstream ! \
audioconvert ! audioresample ! \
audio/x-raw,channels=2,rate=48000 ! \
{encoder} bitrate=128000 ! \
{parser} ! \
appsink name=asink emit-signals=true max-buffers=50 drop=true \
t. ! queue max-size-buffers=8 leaky=downstream ! \
audio/x-raw,format=S16LE,channels=2,rate=48000 ! \
appsink name=level_sink emit-signals=false sync=false max-buffers=8 drop=true"
)
} else {
format!(
"{source_desc} ! \
audioconvert ! audioresample ! \
audio/x-raw,channels=2,rate=48000 ! \
volume name=mic_input_gain volume={gain} ! \
audioconvert ! audioresample ! \
audio/x-raw,channels=2,rate=48000 ! \
{encoder} bitrate=128000 ! \
{parser} ! \
queue max-size-buffers=100 leaky=downstream ! \
appsink name=asink emit-signals=true max-buffers=50 drop=true"
)
}
}
/// Detect launcher catalog names that should be opened through Pulse directly.
fn looks_like_pulse_source_name(source: &str) -> bool {
let source = source.trim();
source.starts_with("alsa_input.")
|| source.starts_with("bluez_input.")
|| source.starts_with("input.")
}
fn mic_gain_from_env() -> f64 {
std::env::var(MIC_GAIN_ENV)
.ok()
.and_then(|raw| parse_mic_gain(&raw))
.unwrap_or(1.0)
}
fn parse_mic_gain(raw: &str) -> Option<f64> {
let value = raw.split_ascii_whitespace().next()?.parse::<f64>().ok()?;
value.is_finite().then_some(clamp_mic_gain(value))
}
fn clamp_mic_gain(value: f64) -> f64 {
value.clamp(0.0, 4.0)
}
fn format_mic_gain_for_gst(gain: f64) -> String {
format!("{:.3}", clamp_mic_gain(gain))
}
fn maybe_spawn_mic_gain_control(volume: gst::Element) {
let Ok(path) = std::env::var(MIC_GAIN_CONTROL_ENV) else {
return;
};
let path = std::path::PathBuf::from(path);
thread::spawn(move || {
let mut last_gain = None;
loop {
if let Some(gain) = read_mic_gain_control(&path)
&& last_gain != Some(gain)
{
volume.set_property("volume", gain);
last_gain = Some(gain);
tracing::info!("🎤 mic gain set to {gain:.2}x");
}
thread::sleep(Duration::from_millis(100));
}
});
}
fn spawn_mic_level_tap(sink: gst_app::AppSink, path: PathBuf) -> Arc<AtomicBool> {
let running = Arc::new(AtomicBool::new(true));
let thread_running = Arc::clone(&running);
thread::spawn(move || {
while thread_running.load(AtomicOrdering::Acquire) {
if let Some(sample) = sink.try_pull_sample(gst::ClockTime::from_mseconds(250))
&& let Some(buffer) = sample.buffer()
&& let Ok(map) = buffer.map_readable()
{
let level = pcm_peak_fraction(map.as_slice());
if let Err(err) = write_mic_level_tap(&path, level) {
tracing::debug!("🎤 local uplink level tap write failed: {err:#}");
}
}
}
});
running
}
fn pcm_peak_fraction(bytes: &[u8]) -> f64 {
let peak = bytes
.chunks_exact(2)
.map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]]).unsigned_abs() as f64)
.fold(0.0, f64::max);
(peak / i16::MAX as f64).clamp(0.0, 1.0)
}
fn write_mic_level_tap(path: &StdPath, level: f64) -> Result<()> {
let tmp_path = path.with_extension("tmp");
std::fs::write(&tmp_path, format!("{level:.6}\n"))
.with_context(|| format!("writing {}", tmp_path.display()))?;
std::fs::rename(&tmp_path, path).with_context(|| format!("publishing {}", path.display()))?;
Ok(())
}
fn read_mic_gain_control(path: &StdPath) -> Option<f64> {
std::fs::read_to_string(path)
.ok()
.and_then(|raw| parse_mic_gain(&raw))
}
impl Drop for MicrophoneCapture {
fn drop(&mut self) {
if let Some(running) = &self.level_tap_running {
running.store(false, AtomicOrdering::Release);
}
let _ = self.pipeline.set_state(gst::State::Null);
}
}