385 lines
14 KiB
Rust
385 lines
14 KiB
Rust
// client/src/input/microphone.rs
|
|
use anyhow::{Context, Result, bail};
|
|
use gst::prelude::*;
|
|
use gstreamer as gst;
|
|
use gstreamer_app as gst_app;
|
|
use lesavka_common::{
|
|
audio_transport::{self, UpstreamAudioCodec},
|
|
lesavka::AudioPacket,
|
|
};
|
|
use shell_escape::unix::escape;
|
|
#[cfg(not(coverage))]
|
|
use std::sync::atomic::{AtomicU64, Ordering};
|
|
use std::{
|
|
collections::VecDeque,
|
|
path::{Path as StdPath, PathBuf},
|
|
sync::{
|
|
Arc, Mutex,
|
|
atomic::{AtomicBool, Ordering as AtomicOrdering},
|
|
},
|
|
thread,
|
|
time::Duration,
|
|
};
|
|
use tracing::{debug, info, warn};
|
|
#[cfg(not(coverage))]
|
|
use tracing::{error, trace};
|
|
|
|
const MIC_GAIN_ENV: &str = "LESAVKA_MIC_GAIN";
|
|
const MIC_GAIN_CONTROL_ENV: &str = "LESAVKA_MIC_GAIN_CONTROL";
|
|
const MIC_LEVEL_TAP_ENV: &str = "LESAVKA_UPLINK_MIC_LEVEL";
|
|
const MIC_PULSE_BUFFER_TIME_ENV: &str = "LESAVKA_MIC_PULSE_BUFFER_TIME_US";
|
|
const MIC_PULSE_LATENCY_TIME_ENV: &str = "LESAVKA_MIC_PULSE_LATENCY_TIME_US";
|
|
const MIC_PACKET_TARGET_DURATION_ENV: &str = "LESAVKA_MIC_PACKET_TARGET_US";
|
|
const REQUIRE_EXPLICIT_MEDIA_SOURCES_ENV: &str = "LESAVKA_REQUIRE_EXPLICIT_MEDIA_SOURCES";
|
|
const MIC_NOISE_SUPPRESSION_ENV: &str = "LESAVKA_MIC_NOISE_SUPPRESSION";
|
|
const MIC_NOISE_SUPPRESSION_LEVEL_ENV: &str = "LESAVKA_MIC_NOISE_SUPPRESSION_LEVEL";
|
|
const MIC_SAMPLE_RATE: u64 = 48_000;
|
|
const MIC_CHANNELS: usize = 2;
|
|
const MIC_SAMPLE_BYTES: usize = std::mem::size_of::<i16>();
|
|
const DEFAULT_MIC_PULSE_BUFFER_TIME_US: u64 = 40_000;
|
|
const DEFAULT_MIC_PULSE_LATENCY_TIME_US: u64 = 10_000;
|
|
const DEFAULT_MIC_PACKET_TARGET_DURATION_US: u64 = 20_000;
|
|
const MIC_MAIN_QUEUE_MAX_BUFFERS: u32 = 8;
|
|
const MIC_MAIN_QUEUE_MAX_TIME_NS: u64 = 80_000_000;
|
|
const MIC_APPSINK_MAX_BUFFERS: u32 = 8;
|
|
|
|
pub struct MicrophoneCapture {
|
|
#[allow(dead_code)] // kept alive to hold PLAYING state
|
|
pipeline: gst::Pipeline,
|
|
sink: gst_app::AppSink,
|
|
level_tap_running: Option<Arc<AtomicBool>>,
|
|
pts_rebaser: crate::live_capture_clock::DurationPacedSourcePtsRebaser,
|
|
pending_packets: Mutex<VecDeque<AudioPacket>>,
|
|
audio_encoder: Mutex<Option<crate::input::audio_codec::OpusPacketEncoder>>,
|
|
}
|
|
|
|
include!("microphone/capture_runtime.rs");
|
|
fn mic_level_tap_path() -> Option<PathBuf> {
|
|
std::env::var(MIC_LEVEL_TAP_ENV)
|
|
.ok()
|
|
.map(|value| value.trim().to_string())
|
|
.filter(|value| !value.is_empty())
|
|
.map(PathBuf::from)
|
|
}
|
|
|
|
/// Keeps `microphone_pipeline_desc` explicit because it sits on microphone capture setup, where host audio stacks expose different source names and latency controls.
|
|
/// Inputs are the typed parameters; output is the return value or side effect.
|
|
fn microphone_pipeline_desc(
|
|
source_desc: &str,
|
|
gain: f64,
|
|
level_tap_enabled: bool,
|
|
noise_suppression: bool,
|
|
) -> String {
|
|
let gain = format_mic_gain_for_gst(gain);
|
|
let noise_stage = microphone_noise_suppression_stage(noise_suppression);
|
|
if level_tap_enabled {
|
|
format!(
|
|
"{source_desc} ! \
|
|
audioconvert ! audioresample ! \
|
|
audio/x-raw,format=S16LE,channels={MIC_CHANNELS},rate={MIC_SAMPLE_RATE} ! \
|
|
{noise_stage}\
|
|
volume name=mic_input_gain volume={gain} ! \
|
|
tee name=t \
|
|
t. ! queue max-size-buffers={MIC_MAIN_QUEUE_MAX_BUFFERS} max-size-time={MIC_MAIN_QUEUE_MAX_TIME_NS} leaky=downstream ! \
|
|
audio/x-raw,format=S16LE,channels={MIC_CHANNELS},rate={MIC_SAMPLE_RATE} ! \
|
|
appsink name=asink emit-signals=true max-buffers={MIC_APPSINK_MAX_BUFFERS} drop=true \
|
|
t. ! queue max-size-buffers=8 leaky=downstream ! \
|
|
audio/x-raw,format=S16LE,channels={MIC_CHANNELS},rate={MIC_SAMPLE_RATE} ! \
|
|
appsink name=level_sink emit-signals=false sync=false max-buffers=8 drop=true"
|
|
)
|
|
} else {
|
|
format!(
|
|
"{source_desc} ! \
|
|
audioconvert ! audioresample ! \
|
|
audio/x-raw,format=S16LE,channels={MIC_CHANNELS},rate={MIC_SAMPLE_RATE} ! \
|
|
{noise_stage}\
|
|
volume name=mic_input_gain volume={gain} ! \
|
|
queue max-size-buffers={MIC_MAIN_QUEUE_MAX_BUFFERS} max-size-time={MIC_MAIN_QUEUE_MAX_TIME_NS} leaky=downstream ! \
|
|
appsink name=asink emit-signals=true max-buffers={MIC_APPSINK_MAX_BUFFERS} drop=true"
|
|
)
|
|
}
|
|
}
|
|
|
|
fn microphone_noise_suppression_stage(enabled: bool) -> String {
|
|
if enabled && gst::ElementFactory::find("webrtcdsp").is_some() {
|
|
format!(
|
|
"webrtcdsp echo-cancel=false noise-suppression=true noise-suppression-level={} high-pass-filter=true gain-control=false limiter=true ! ",
|
|
mic_noise_suppression_level()
|
|
)
|
|
} else {
|
|
String::new()
|
|
}
|
|
}
|
|
|
|
fn mic_noise_suppression_level() -> &'static str {
|
|
std::env::var(MIC_NOISE_SUPPRESSION_LEVEL_ENV)
|
|
.ok()
|
|
.and_then(|raw| match raw.trim().to_ascii_lowercase().as_str() {
|
|
"low" => Some("low"),
|
|
"moderate" | "medium" => Some("moderate"),
|
|
"high" => Some("high"),
|
|
"very-high" | "very_high" | "veryhigh" | "aggressive" => Some("very-high"),
|
|
_ => None,
|
|
})
|
|
.unwrap_or("very-high")
|
|
}
|
|
|
|
fn buffer_duration_us(buf: &gst::BufferRef, bytes: usize) -> u64 {
|
|
let payload_duration_us = pcm_payload_duration_us(bytes);
|
|
buf.duration()
|
|
.map(|ts| ts.nseconds() / 1_000)
|
|
.filter(|duration_us| duration_matches_pcm_payload(*duration_us, payload_duration_us))
|
|
.unwrap_or(payload_duration_us)
|
|
.max(1)
|
|
}
|
|
|
|
fn pcm_payload_duration_us(bytes: usize) -> u64 {
|
|
let bytes_per_frame = MIC_CHANNELS * MIC_SAMPLE_BYTES;
|
|
let frames = bytes / bytes_per_frame.max(1);
|
|
((frames as u128 * 1_000_000u128) / MIC_SAMPLE_RATE as u128).min(u64::MAX as u128) as u64
|
|
}
|
|
|
|
#[cfg(not(coverage))]
|
|
/// Keeps `log_microphone_lag_clamped_source` explicit because it sits on microphone capture setup, where host audio stacks expose different source names and latency controls.
|
|
/// Inputs are the typed parameters; output is the return value or side effect.
|
|
fn log_microphone_lag_clamped_source(
|
|
timing: crate::live_capture_clock::RebasedSourcePts,
|
|
bytes: usize,
|
|
) {
|
|
static MIC_LAG_CLAMPED_PACKETS: AtomicU64 = AtomicU64::new(0);
|
|
let packet_index = MIC_LAG_CLAMPED_PACKETS.fetch_add(1, Ordering::Relaxed);
|
|
if packet_index < 10 || packet_index.is_multiple_of(300) {
|
|
warn!(
|
|
packet_index,
|
|
bytes,
|
|
source_pts_us = timing.source_pts_us.unwrap_or_default(),
|
|
capture_now_us = timing.capture_now_us,
|
|
packet_pts_us = timing.packet_pts_us,
|
|
"🎤 clamped laggy microphone source timestamp before bundled uplink"
|
|
);
|
|
}
|
|
}
|
|
|
|
#[cfg(coverage)]
|
|
fn log_microphone_lag_clamped_source(
|
|
_timing: crate::live_capture_clock::RebasedSourcePts,
|
|
_bytes: usize,
|
|
) {
|
|
}
|
|
|
|
/// Keeps `split_audio_sample` explicit because it sits on microphone capture setup, where host audio stacks expose different source names and latency controls.
|
|
/// Inputs are the typed parameters; output is the return value or side effect.
|
|
fn split_audio_sample(base_pts_us: u64, data: &[u8], target_bytes: usize) -> VecDeque<AudioPacket> {
|
|
let frame_bytes = (MIC_CHANNELS * MIC_SAMPLE_BYTES).max(1);
|
|
let target_bytes = frame_aligned_packet_bytes(target_bytes.max(frame_bytes));
|
|
let mut packets = VecDeque::new();
|
|
let mut offset = 0usize;
|
|
while offset < data.len() {
|
|
let remaining = data.len() - offset;
|
|
let mut take = remaining.min(target_bytes);
|
|
if remaining > take {
|
|
take -= take % frame_bytes;
|
|
if take == 0 {
|
|
take = frame_bytes.min(remaining);
|
|
}
|
|
}
|
|
let end = offset.saturating_add(take).min(data.len());
|
|
if end == offset {
|
|
break;
|
|
}
|
|
let duration_us = pcm_payload_duration_us(take);
|
|
let mut packet = AudioPacket {
|
|
id: 0,
|
|
pts: base_pts_us.saturating_add(pcm_payload_duration_us(offset)),
|
|
data: data[offset..end].to_vec(),
|
|
frame_duration_us: duration_us.min(u64::from(u32::MAX)) as u32,
|
|
..Default::default()
|
|
};
|
|
audio_transport::mark_packet_pcm_s16le(&mut packet);
|
|
packets.push_back(packet);
|
|
offset = end;
|
|
}
|
|
packets
|
|
}
|
|
|
|
fn mic_packet_target_bytes() -> usize {
|
|
let frame_bytes = MIC_CHANNELS * MIC_SAMPLE_BYTES;
|
|
let target_us = mic_packet_target_duration_us().clamp(1_000, 100_000);
|
|
let frames = ((MIC_SAMPLE_RATE as u128 * target_us as u128) / 1_000_000u128)
|
|
.max(1)
|
|
.min(usize::MAX as u128) as usize;
|
|
frame_aligned_packet_bytes(frames.saturating_mul(frame_bytes))
|
|
}
|
|
|
|
fn frame_aligned_packet_bytes(bytes: usize) -> usize {
|
|
let frame_bytes = (MIC_CHANNELS * MIC_SAMPLE_BYTES).max(1);
|
|
((bytes / frame_bytes).max(1)).saturating_mul(frame_bytes)
|
|
}
|
|
|
|
/// Rejects bogus capture timestamps before they can poison mic PTS rebasing.
|
|
fn duration_matches_pcm_payload(reported_us: u64, payload_us: u64) -> bool {
|
|
if reported_us == 0 {
|
|
return false;
|
|
}
|
|
if payload_us <= 1 {
|
|
return true;
|
|
}
|
|
let lower = (payload_us / 8).max(1);
|
|
let upper = payload_us.saturating_mul(8);
|
|
reported_us >= lower && reported_us <= upper
|
|
}
|
|
|
|
fn mic_pulse_buffer_time_us() -> u64 {
|
|
positive_u64_env(MIC_PULSE_BUFFER_TIME_ENV, DEFAULT_MIC_PULSE_BUFFER_TIME_US)
|
|
}
|
|
|
|
fn mic_pulse_latency_time_us() -> u64 {
|
|
positive_u64_env(
|
|
MIC_PULSE_LATENCY_TIME_ENV,
|
|
DEFAULT_MIC_PULSE_LATENCY_TIME_US,
|
|
)
|
|
}
|
|
|
|
fn mic_packet_target_duration_us() -> u64 {
|
|
positive_u64_env(
|
|
MIC_PACKET_TARGET_DURATION_ENV,
|
|
DEFAULT_MIC_PACKET_TARGET_DURATION_US,
|
|
)
|
|
}
|
|
|
|
fn positive_u64_env(name: &str, default_value: u64) -> u64 {
|
|
std::env::var(name)
|
|
.ok()
|
|
.and_then(|value| value.trim().parse::<u64>().ok())
|
|
.filter(|value| *value > 0)
|
|
.unwrap_or(default_value)
|
|
}
|
|
|
|
fn explicit_media_sources_required() -> bool {
|
|
bool_env_enabled(REQUIRE_EXPLICIT_MEDIA_SOURCES_ENV)
|
|
}
|
|
|
|
fn mic_noise_suppression_from_env() -> bool {
|
|
bool_env_enabled(MIC_NOISE_SUPPRESSION_ENV)
|
|
}
|
|
|
|
fn bool_env_enabled(name: &str) -> bool {
|
|
std::env::var(name).ok().is_some_and(|value| {
|
|
let value = value.trim();
|
|
value == "1"
|
|
|| value.eq_ignore_ascii_case("true")
|
|
|| value.eq_ignore_ascii_case("yes")
|
|
|| value.eq_ignore_ascii_case("on")
|
|
})
|
|
}
|
|
|
|
/// Detect launcher catalog names that should be opened through Pulse directly.
|
|
fn looks_like_pulse_source_name(source: &str) -> bool {
|
|
let source = source.trim();
|
|
source.starts_with("alsa_input.")
|
|
|| source.starts_with("bluez_input.")
|
|
|| source.starts_with("input.")
|
|
}
|
|
|
|
fn mic_gain_from_env() -> f64 {
|
|
std::env::var(MIC_GAIN_ENV)
|
|
.ok()
|
|
.and_then(|raw| parse_mic_gain(&raw))
|
|
.unwrap_or(1.0)
|
|
}
|
|
|
|
/// Keeps `parse_mic_gain` explicit because it sits on microphone capture setup, where host audio stacks expose different source names and latency controls.
|
|
/// Inputs are the typed parameters; output is the return value or side effect.
|
|
fn parse_mic_gain(raw: &str) -> Option<f64> {
|
|
let value = raw.split_ascii_whitespace().next()?.parse::<f64>().ok()?;
|
|
value.is_finite().then_some(clamp_mic_gain(value))
|
|
}
|
|
|
|
fn clamp_mic_gain(value: f64) -> f64 {
|
|
value.clamp(0.0, 4.0)
|
|
}
|
|
|
|
fn format_mic_gain_for_gst(gain: f64) -> String {
|
|
format!("{:.3}", clamp_mic_gain(gain))
|
|
}
|
|
|
|
/// Keeps `maybe_spawn_mic_gain_control` explicit because it sits on microphone capture setup, where host audio stacks expose different source names and latency controls.
|
|
/// Inputs are the typed parameters; output is the return value or side effect.
|
|
fn maybe_spawn_mic_gain_control(volume: gst::Element) {
|
|
let Ok(path) = std::env::var(MIC_GAIN_CONTROL_ENV) else {
|
|
return;
|
|
};
|
|
let path = std::path::PathBuf::from(path);
|
|
thread::spawn(move || {
|
|
let mut last_gain = None;
|
|
loop {
|
|
if let Some(gain) = read_mic_gain_control(&path)
|
|
&& last_gain != Some(gain)
|
|
{
|
|
volume.set_property("volume", gain);
|
|
last_gain = Some(gain);
|
|
tracing::info!("🎤 mic gain set to {gain:.2}x");
|
|
}
|
|
thread::sleep(Duration::from_millis(100));
|
|
}
|
|
});
|
|
}
|
|
|
|
/// Keeps `spawn_mic_level_tap` explicit because it sits on microphone capture setup, where host audio stacks expose different source names and latency controls.
|
|
/// Inputs are the typed parameters; output is the return value or side effect.
|
|
fn spawn_mic_level_tap(sink: gst_app::AppSink, path: PathBuf) -> Arc<AtomicBool> {
|
|
let running = Arc::new(AtomicBool::new(true));
|
|
let thread_running = Arc::clone(&running);
|
|
thread::spawn(move || {
|
|
while thread_running.load(AtomicOrdering::Acquire) {
|
|
if let Some(sample) = sink.try_pull_sample(gst::ClockTime::from_mseconds(250))
|
|
&& let Some(buffer) = sample.buffer()
|
|
&& let Ok(map) = buffer.map_readable()
|
|
{
|
|
let level = pcm_peak_fraction(map.as_slice());
|
|
if let Err(err) = write_mic_level_tap(&path, level) {
|
|
tracing::debug!("🎤 local uplink level tap write failed: {err:#}");
|
|
}
|
|
}
|
|
}
|
|
});
|
|
running
|
|
}
|
|
|
|
fn pcm_peak_fraction(bytes: &[u8]) -> f64 {
|
|
let peak = bytes
|
|
.chunks_exact(2)
|
|
.map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]]).unsigned_abs() as f64)
|
|
.fold(0.0, f64::max);
|
|
(peak / i16::MAX as f64).clamp(0.0, 1.0)
|
|
}
|
|
|
|
fn write_mic_level_tap(path: &StdPath, level: f64) -> Result<()> {
|
|
let tmp_path = path.with_extension("tmp");
|
|
std::fs::write(&tmp_path, format!("{level:.6}\n"))
|
|
.with_context(|| format!("writing {}", tmp_path.display()))?;
|
|
std::fs::rename(&tmp_path, path).with_context(|| format!("publishing {}", path.display()))?;
|
|
Ok(())
|
|
}
|
|
|
|
fn read_mic_gain_control(path: &StdPath) -> Option<f64> {
|
|
std::fs::read_to_string(path)
|
|
.ok()
|
|
.and_then(|raw| parse_mic_gain(&raw))
|
|
}
|
|
|
|
impl Drop for MicrophoneCapture {
|
|
/// Keeps `drop` explicit because it sits on microphone capture setup, where host audio stacks expose different source names and latency controls.
|
|
/// Inputs are the typed parameters; output is the return value or side effect.
|
|
fn drop(&mut self) {
|
|
if let Some(running) = &self.level_tap_running {
|
|
running.store(false, AtomicOrdering::Release);
|
|
}
|
|
let _ = self.pipeline.set_state(gst::State::Null);
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
#[path = "microphone/tests/mod.rs"]
|
|
mod tests;
|