lesavka/server/src/bin/lesavka-synthetic-uplink.rs

502 lines
16 KiB
Rust
Raw Normal View History

#![forbid(unsafe_code)]
use std::{path::PathBuf, time::Duration};
use anyhow::{Context, Result, bail};
use gstreamer as gst;
use gstreamer::prelude::*;
use gstreamer_app as gst_app;
use lesavka_common::lesavka::{
AudioEncoding, AudioPacket, UpstreamMediaBundle, VideoPacket, relay_client::RelayClient,
};
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use tonic::Request;
const DEFAULT_SERVER: &str = "http://127.0.0.1:50051";
const DEFAULT_SAMPLE_RATE: u32 = 48_000;
const DEFAULT_CHANNELS: u32 = 2;
const DEFAULT_JPEG_QUALITY: i32 = 90;
const MARKER_BITS: usize = 32;
const MARKER_COLUMNS: usize = 16;
#[derive(Clone, Debug)]
struct Args {
server: String,
width: usize,
height: usize,
fps: u32,
duration: Duration,
jpeg_quality: i32,
session_id: u64,
artifact_dir: Option<PathBuf>,
print_every: u64,
}
impl Args {
fn parse() -> Result<Self> {
let mut args = Self {
server: DEFAULT_SERVER.to_string(),
width: 1280,
height: 720,
fps: 30,
duration: Duration::from_secs(300),
jpeg_quality: DEFAULT_JPEG_QUALITY,
session_id: unix_millis(),
artifact_dir: None,
print_every: 150,
};
let mut it = std::env::args().skip(1);
while let Some(flag) = it.next() {
match flag.as_str() {
"--server" => args.server = next_value(&mut it, &flag)?,
"--width" => args.width = parse_next(&mut it, &flag)?,
"--height" => args.height = parse_next(&mut it, &flag)?,
"--fps" => args.fps = parse_next(&mut it, &flag)?,
"--duration" => {
let seconds: f64 = parse_next(&mut it, &flag)?;
args.duration = Duration::from_secs_f64(seconds.max(0.0));
}
"--jpeg-quality" => args.jpeg_quality = parse_next(&mut it, &flag)?,
"--session-id" => args.session_id = parse_next(&mut it, &flag)?,
"--artifact-dir" => {
args.artifact_dir = Some(PathBuf::from(next_value(&mut it, &flag)?))
}
"--print-every" => args.print_every = parse_next(&mut it, &flag)?,
"--mode" => {
let value = next_value(&mut it, &flag)?;
let (width, height, fps) = parse_mode(&value)?;
args.width = width;
args.height = height;
args.fps = fps;
}
"--help" | "-h" => {
print_help();
std::process::exit(0);
}
other => bail!("unknown argument {other:?}; pass --help for usage"),
}
}
if args.width == 0 || args.height == 0 || args.fps == 0 {
bail!("width, height, and fps must be positive");
}
args.jpeg_quality = args.jpeg_quality.clamp(1, 100);
Ok(args)
}
fn frame_step_us(&self) -> u64 {
(1_000_000_u64 / u64::from(self.fps)).max(1)
}
fn total_frames(&self) -> u64 {
let frames = self.duration.as_secs_f64() * f64::from(self.fps);
frames.ceil().max(1.0) as u64
}
}
struct MjpegEncoder {
src: gst_app::AppSrc,
sink: gst_app::AppSink,
pipeline: gst::Pipeline,
width: usize,
height: usize,
frame_step_us: u64,
}
impl MjpegEncoder {
fn new(args: &Args) -> Result<Self> {
gst::init().context("gst init")?;
let width = args.width as i32;
let height = args.height as i32;
let fps = args.fps as i32;
let raw_caps = gst::Caps::builder("video/x-raw")
.field("format", "RGB")
.field("width", width)
.field("height", height)
.field("framerate", gst::Fraction::new(fps, 1))
.build();
let jpeg_caps = gst::Caps::builder("image/jpeg")
.field("parsed", true)
.field("width", width)
.field("height", height)
.field("framerate", gst::Fraction::new(fps, 1))
.build();
let pipeline = gst::Pipeline::new();
let src = gst::ElementFactory::make("appsrc")
.name("lesavka_synthetic_uplink_src")
.build()?
.downcast::<gst_app::AppSrc>()
.expect("appsrc");
src.set_is_live(false);
src.set_format(gst::Format::Time);
src.set_property("do-timestamp", false);
src.set_caps(Some(&raw_caps));
let convert = gst::ElementFactory::make("videoconvert").build()?;
let encoder = gst::ElementFactory::make("jpegenc")
.property("quality", args.jpeg_quality)
.build()?;
let capsfilter = gst::ElementFactory::make("capsfilter")
.property("caps", &jpeg_caps)
.build()?;
let sink = gst::ElementFactory::make("appsink")
.name("lesavka_synthetic_uplink_sink")
.property("sync", false)
.property("emit-signals", false)
.property("max-buffers", 8u32)
.build()?
.downcast::<gst_app::AppSink>()
.expect("appsink");
pipeline.add_many([
src.upcast_ref(),
&convert,
&encoder,
&capsfilter,
sink.upcast_ref(),
])?;
gst::Element::link_many([
src.upcast_ref(),
&convert,
&encoder,
&capsfilter,
sink.upcast_ref(),
])?;
pipeline
.set_state(gst::State::Playing)
.context("starting synthetic MJPEG encoder")?;
Ok(Self {
src,
sink,
pipeline,
width: args.width,
height: args.height,
frame_step_us: args.frame_step_us(),
})
}
fn encode(&mut self, sequence: u64) -> Result<Vec<u8>> {
let pts_us = sequence.saturating_mul(self.frame_step_us);
let mut buffer =
gst::Buffer::from_slice(synthetic_rgb_frame(self.width, self.height, sequence));
if let Some(meta) = buffer.get_mut() {
let pts = gst::ClockTime::from_useconds(pts_us);
meta.set_pts(Some(pts));
meta.set_dts(Some(pts));
meta.set_duration(Some(gst::ClockTime::from_useconds(self.frame_step_us)));
}
self.src
.push_buffer(buffer)
.context("encoding synthetic frame")?;
let sample = self
.sink
.pull_sample()
.context("pulling encoded synthetic frame")?;
let buffer = sample
.buffer()
.context("encoded synthetic frame had no buffer")?;
let map = buffer
.map_readable()
.context("mapping encoded synthetic frame")?;
Ok(map.as_slice().to_vec())
}
}
impl Drop for MjpegEncoder {
fn drop(&mut self) {
let _ = self.src.end_of_stream();
let _ = self.pipeline.set_state(gst::State::Null);
}
}
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<()> {
let args = Args::parse()?;
if let Some(dir) = &args.artifact_dir {
std::fs::create_dir_all(dir).with_context(|| format!("creating {}", dir.display()))?;
std::fs::write(
dir.join("command.txt"),
std::env::args().collect::<Vec<_>>().join(" ") + "\n",
)?;
std::fs::write(dir.join("summary.json"), args_summary_json(&args) + "\n")?;
}
let channel = tonic::transport::Channel::from_shared(args.server.clone())?
.tcp_nodelay(true)
.connect()
.await
.with_context(|| format!("connecting to {}", args.server))?;
let mut client = RelayClient::new(channel);
let (tx, rx) = mpsc::channel::<UpstreamMediaBundle>(8);
let response_task = tokio::spawn(async move {
let response = client
.stream_webcam_media(Request::new(ReceiverStream::new(rx)))
.await
.context("opening StreamWebcamMedia")?;
let mut inbound = response.into_inner();
while inbound
.message()
.await
.context("reading StreamWebcamMedia response")?
.is_some()
{}
Ok::<(), anyhow::Error>(())
});
let mut encoder = MjpegEncoder::new(&args)?;
let frame_step = Duration::from_micros(args.frame_step_us());
let started = tokio::time::Instant::now() + Duration::from_millis(250);
let total_frames = args.total_frames();
eprintln!(
"lesavka synthetic uplink: mode={}x{}@{} frames={} server={} session={}",
args.width, args.height, args.fps, total_frames, args.server, args.session_id
);
for sequence in 0..total_frames {
tokio::time::sleep_until(started + duration_mul(frame_step, sequence)).await;
let pts_us = sequence.saturating_mul(args.frame_step_us());
let encoded = encoder.encode(sequence)?;
let bundle = synthetic_bundle(&args, sequence, pts_us, encoded);
tx.send(bundle).await.context("sending synthetic bundle")?;
if args.print_every > 0 && sequence > 0 && sequence % args.print_every == 0 {
eprintln!("sent synthetic frame {sequence}/{total_frames}");
}
}
drop(tx);
response_task
.await
.context("joining StreamWebcamMedia task")??;
eprintln!("lesavka synthetic uplink complete: frames={total_frames}");
Ok(())
}
fn synthetic_bundle(args: &Args, sequence: u64, pts_us: u64, data: Vec<u8>) -> UpstreamMediaBundle {
let video = VideoPacket {
id: 0,
pts: pts_us,
data,
seq: sequence,
effective_fps: args.fps,
client_capture_pts_us: pts_us,
client_send_pts_us: pts_us,
..Default::default()
};
let audio = AudioPacket {
id: 0,
pts: pts_us,
data: silence_pcm(args.frame_step_us()),
seq: sequence,
client_capture_pts_us: pts_us,
client_send_pts_us: pts_us,
encoding: AudioEncoding::PcmS16le as i32,
sample_rate: DEFAULT_SAMPLE_RATE,
channels: DEFAULT_CHANNELS,
frame_duration_us: args.frame_step_us().min(u64::from(u32::MAX)) as u32,
..Default::default()
};
UpstreamMediaBundle {
session_id: args.session_id,
seq: sequence,
capture_start_us: pts_us,
capture_end_us: pts_us,
video: Some(video),
audio: vec![audio],
audio_sample_rate: DEFAULT_SAMPLE_RATE,
audio_channels: DEFAULT_CHANNELS,
video_width: args.width as u32,
video_height: args.height as u32,
video_fps: args.fps,
audio_encoding: AudioEncoding::PcmS16le as i32,
}
}
fn silence_pcm(duration_us: u64) -> Vec<u8> {
let samples = (u64::from(DEFAULT_SAMPLE_RATE).saturating_mul(duration_us) / 1_000_000).max(1);
let bytes = samples
.saturating_mul(u64::from(DEFAULT_CHANNELS))
.saturating_mul(std::mem::size_of::<i16>() as u64)
.min(usize::MAX as u64) as usize;
vec![0; bytes]
}
fn synthetic_rgb_frame(width: usize, height: usize, sequence: u64) -> Vec<u8> {
let mut frame = vec![0u8; width.saturating_mul(height).saturating_mul(3)];
let moving_period = (width / 3).max(64);
let moving_width = (width / 18).max(12);
let moving_offset = (sequence as usize).wrapping_mul(17) % moving_period;
for y in 0..height {
for x in 0..width {
let mut value = synthetic_luma(
width,
height,
x,
y,
sequence,
moving_period,
moving_width,
moving_offset,
);
if y >= height / 2 && (((x / 32) + (y / 24) + sequence as usize) & 1) == 0 {
value /= 3;
}
let offset = (y * width + x) * 3;
frame[offset] = value;
frame[offset + 1] = value;
frame[offset + 2] = value;
}
}
draw_sequence_marker(&mut frame, width, height, sequence);
frame
}
fn synthetic_luma(
width: usize,
height: usize,
x: usize,
y: usize,
sequence: u64,
moving_period: usize,
moving_width: usize,
moving_offset: usize,
) -> u8 {
let mut value = ((x as u64 * 3 + y as u64 * 5 + sequence.saturating_mul(11)) & 0xff) as u8;
let moving = (x + moving_offset) % moving_period;
if moving < moving_width {
value = value.saturating_add(70);
}
let center_x = width / 2;
let center_y = height / 2;
if x.abs_diff(center_x) < width / 9 && y.abs_diff(center_y) < height / 12 {
value = 255u8.saturating_sub(value / 2);
}
value
}
fn marker_cell(width: usize, height: usize) -> usize {
(width.min(height) / 80).clamp(6, 16)
}
fn draw_sequence_marker(frame: &mut [u8], width: usize, height: usize, sequence: u64) {
let cell = marker_cell(width, height);
let rows = MARKER_BITS.div_ceil(MARKER_COLUMNS);
if width < (MARKER_COLUMNS + 4) * cell || height < (rows + 4) * cell {
return;
}
let x0 = 2 * cell;
let y0 = 2 * cell;
fill_rect(
frame,
width,
cell,
cell,
(MARKER_COLUMNS + 2) * cell,
(rows + 2) * cell,
32,
);
fill_rect(frame, width, x0 - cell, y0 - cell, cell, cell, 255);
fill_rect(
frame,
width,
x0 + MARKER_COLUMNS * cell,
y0 - cell,
cell,
cell,
0,
);
for bit in 0..MARKER_BITS {
let col = bit % MARKER_COLUMNS;
let row = bit / MARKER_COLUMNS;
let value = if ((sequence >> bit) & 1) != 0 { 255 } else { 0 };
fill_rect(
frame,
width,
x0 + col * cell,
y0 + row * cell,
cell,
cell,
value,
);
}
}
fn fill_rect(frame: &mut [u8], width: usize, x0: usize, y0: usize, w: usize, h: usize, value: u8) {
let pixels = frame.len() / 3;
let height = pixels / width.max(1);
let x1 = (x0 + w).min(width);
let y1 = (y0 + h).min(height);
for y in y0..y1 {
for x in x0..x1 {
let offset = (y * width + x) * 3;
if let Some(pixel) = frame.get_mut(offset..offset + 3) {
pixel[0] = value;
pixel[1] = value;
pixel[2] = value;
}
}
}
}
fn parse_mode(value: &str) -> Result<(usize, usize, u32)> {
let (size, fps) = value
.split_once('@')
.with_context(|| format!("mode must look like WIDTHxHEIGHT@FPS, got {value:?}"))?;
let (width, height) = size
.split_once('x')
.with_context(|| format!("mode must look like WIDTHxHEIGHT@FPS, got {value:?}"))?;
Ok((width.parse()?, height.parse()?, fps.parse()?))
}
fn next_value(it: &mut impl Iterator<Item = String>, flag: &str) -> Result<String> {
it.next()
.with_context(|| format!("{flag} requires a value"))
}
fn parse_next<T>(it: &mut impl Iterator<Item = String>, flag: &str) -> Result<T>
where
T: std::str::FromStr,
T::Err: std::error::Error + Send + Sync + 'static,
{
Ok(next_value(it, flag)?.parse()?)
}
fn duration_mul(duration: Duration, count: u64) -> Duration {
Duration::from_nanos(
duration
.as_nanos()
.saturating_mul(u128::from(count))
.min(u128::from(u64::MAX)) as u64,
)
}
fn unix_millis() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis()
.min(u128::from(u64::MAX)) as u64
}
fn args_summary_json(args: &Args) -> String {
format!(
"{{\"schema\":\"lesavka.synthetic-uplink.v1\",\"server\":{server:?},\"width\":{width},\"height\":{height},\"fps\":{fps},\"duration_s\":{duration:.3},\"session_id\":{session}}}",
server = args.server,
width = args.width,
height = args.height,
fps = args.fps,
duration = args.duration.as_secs_f64(),
session = args.session_id,
)
}
fn print_help() {
println!(
"lesavka-synthetic-uplink\n\n\
Sends sequence-coded synthetic MJPEG plus silent PCM through StreamWebcamMedia.\n\n\
Options:\n\
--server URL gRPC endpoint, default {DEFAULT_SERVER}\n\
--mode WIDTHxHEIGHT@FPS shorthand for width/height/fps\n\
--width N --height N --fps N\n\
--duration SECONDS default 300\n\
--jpeg-quality N default {DEFAULT_JPEG_QUALITY}\n\
--artifact-dir PATH write command/summary metadata\n\
--print-every N progress interval in frames"
);
}