lesavka/client/src/sync_probe/analyze/media_extract.rs

679 lines
21 KiB
Rust

use anyhow::{Context, Result, bail};
use serde::Deserialize;
use std::path::Path;
use std::process::Command;
use super::onset_detection::VideoColorFrame;
const VIDEO_ANALYSIS_SIDE_PX: usize = 64;
const VIDEO_ANALYSIS_FPS: usize = 60;
const MIN_ADAPTIVE_ROI_PIXELS: usize = 16;
const MAX_ADAPTIVE_ROI_FRACTION: f64 = 0.35;
const ADAPTIVE_ROI_SCORE_FRACTION: f64 = 0.30;
const MIN_RGB_ROI_SCORE: f64 = 24.0;
const MIN_GRAY_ROI_SCORE: f64 = 8.0;
#[derive(Debug, Deserialize)]
struct ProbeFrameResponse {
#[serde(default)]
frames: Vec<ProbeFrameEntry>,
}
#[derive(Debug, Deserialize)]
struct ProbeFrameEntry {
best_effort_timestamp_time: Option<String>,
}
pub(super) fn extract_video_timestamps(capture_path: &Path) -> Result<Vec<f64>> {
let output = run_command(
Command::new("ffprobe")
.arg("-hide_banner")
.arg("-loglevel")
.arg("error")
.arg("-select_streams")
.arg("v:0")
.arg("-show_frames")
.arg("-show_entries")
.arg("frame=best_effort_timestamp_time")
.arg("-of")
.arg("json")
.arg(capture_path),
"ffprobe video timestamps",
)?;
let response: ProbeFrameResponse =
serde_json::from_slice(&output).context("parsing ffprobe frame JSON")?;
let timestamps = response
.frames
.into_iter()
.filter_map(|entry| entry.best_effort_timestamp_time)
.map(|value| value.parse::<f64>().context("parsing frame timestamp"))
.collect::<Result<Vec<_>>>()?;
if timestamps.is_empty() {
bail!("ffprobe did not return any video frame timestamps");
}
Ok(timestamps)
}
pub(super) fn extract_video_brightness(capture_path: &Path) -> Result<Vec<u8>> {
let output = run_command(
Command::new("ffmpeg")
.arg("-hide_banner")
.arg("-loglevel")
.arg("error")
.arg("-i")
.arg(capture_path)
.arg("-map")
.arg("0:v:0")
.arg("-vf")
.arg(format!(
"fps={fps},scale={side}:{side}:flags=area,format=gray",
fps = VIDEO_ANALYSIS_FPS,
side = VIDEO_ANALYSIS_SIDE_PX
))
.arg("-f")
.arg("rawvideo")
.arg("-pix_fmt")
.arg("gray")
.arg("-"),
"ffmpeg video brightness extraction",
)?;
if output.is_empty() {
bail!("ffmpeg did not emit any video brightness data");
}
let frame_pixels = VIDEO_ANALYSIS_SIDE_PX * VIDEO_ANALYSIS_SIDE_PX;
if output.len() % frame_pixels != 0 {
bail!(
"ffmpeg emitted {} bytes of video brightness data, which is not divisible by the {}-pixel analysis frame size",
output.len(),
frame_pixels
);
}
let extracted_frames = output.len() / frame_pixels;
Ok(summarize_gray_frames_with_adaptive_roi(
output.chunks_exact(frame_pixels).take(extracted_frames),
frame_pixels,
))
}
pub(super) fn extract_video_colors(capture_path: &Path) -> Result<Vec<VideoColorFrame>> {
let output = run_command(
Command::new("ffmpeg")
.arg("-hide_banner")
.arg("-loglevel")
.arg("error")
.arg("-i")
.arg(capture_path)
.arg("-map")
.arg("0:v:0")
.arg("-vf")
.arg(format!(
"fps={fps},scale={side}:{side}:flags=area,format=rgb24",
fps = VIDEO_ANALYSIS_FPS,
side = VIDEO_ANALYSIS_SIDE_PX
))
.arg("-f")
.arg("rawvideo")
.arg("-pix_fmt")
.arg("rgb24")
.arg("-"),
"ffmpeg video color extraction",
)?;
if output.is_empty() {
bail!("ffmpeg did not emit any video color data");
}
let frame_bytes = VIDEO_ANALYSIS_SIDE_PX * VIDEO_ANALYSIS_SIDE_PX * 3;
if output.len() % frame_bytes != 0 {
bail!(
"ffmpeg emitted {} bytes of video color data, which is not divisible by the {}-byte analysis frame size",
output.len(),
frame_bytes
);
}
let extracted_frames = output.len() / frame_bytes;
Ok(summarize_rgb_frames_with_adaptive_roi(
output.chunks_exact(frame_bytes).take(extracted_frames),
VIDEO_ANALYSIS_SIDE_PX * VIDEO_ANALYSIS_SIDE_PX,
))
}
pub(super) fn extract_audio_samples(capture_path: &Path) -> Result<Vec<i16>> {
let output = run_command(
Command::new("ffmpeg")
.arg("-hide_banner")
.arg("-loglevel")
.arg("error")
.arg("-i")
.arg(capture_path)
.arg("-map")
.arg("0:a:0")
.arg("-ac")
.arg("1")
.arg("-ar")
.arg(super::onset_detection::DEFAULT_AUDIO_SAMPLE_RATE_HZ.to_string())
.arg("-f")
.arg("s16le")
.arg("-acodec")
.arg("pcm_s16le")
.arg("-"),
"ffmpeg audio extraction",
)?;
if output.len() < 2 {
bail!("ffmpeg did not emit enough audio data to analyze");
}
Ok(output
.chunks_exact(2)
.map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]]))
.collect())
}
pub(super) fn run_command(command: &mut Command, description: &str) -> Result<Vec<u8>> {
let output = command
.output()
.with_context(|| format!("running {description}"))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
bail!("{description} failed: {}", stderr.trim());
}
Ok(output.stdout)
}
fn summarize_gray_frames_with_adaptive_roi<'a>(
frames: impl Iterator<Item = &'a [u8]>,
pixel_count: usize,
) -> Vec<u8> {
let frames = frames.collect::<Vec<_>>();
let mask = adaptive_gray_roi_mask(&frames, pixel_count);
frames
.iter()
.map(|frame| summarize_frame_brightness(frame, mask.as_deref()))
.collect()
}
fn summarize_rgb_frames_with_adaptive_roi<'a>(
frames: impl Iterator<Item = &'a [u8]>,
pixel_count: usize,
) -> Vec<VideoColorFrame> {
let frames = frames.collect::<Vec<_>>();
let mask = adaptive_rgb_roi_mask(&frames, pixel_count);
frames
.iter()
.map(|frame| summarize_frame_color(frame, mask.as_deref()))
.collect()
}
fn summarize_frame_brightness(frame: &[u8], mask: Option<&[bool]>) -> u8 {
let mut sum = 0u64;
let mut selected = 0u64;
for (index, value) in frame.iter().copied().enumerate() {
if mask.is_none_or(|mask| mask.get(index).copied().unwrap_or(false)) {
sum += u64::from(value);
selected += 1;
}
}
if selected == 0 {
sum = frame.iter().map(|value| u64::from(*value)).sum();
selected = frame.len().max(1) as u64;
}
let mean = sum / selected;
mean.min(u64::from(u8::MAX)) as u8
}
fn summarize_frame_color(frame: &[u8], mask: Option<&[bool]>) -> VideoColorFrame {
let mut r_sum = 0u64;
let mut g_sum = 0u64;
let mut b_sum = 0u64;
let mut selected = 0u64;
for (index, pixel) in frame.chunks_exact(3).enumerate() {
if !mask.is_none_or(|mask| mask.get(index).copied().unwrap_or(false)) {
continue;
}
let r = pixel[0];
let g = pixel[1];
let b = pixel[2];
let max = r.max(g).max(b);
let min = r.min(g).min(b);
if max >= 60 && max.saturating_sub(min) >= 24 {
r_sum += u64::from(r);
g_sum += u64::from(g);
b_sum += u64::from(b);
selected += 1;
}
}
if selected == 0 {
for (index, pixel) in frame.chunks_exact(3).enumerate() {
if !mask.is_none_or(|mask| mask.get(index).copied().unwrap_or(false)) {
continue;
}
r_sum += u64::from(pixel[0]);
g_sum += u64::from(pixel[1]);
b_sum += u64::from(pixel[2]);
selected += 1;
}
}
if selected == 0 {
for pixel in frame.chunks_exact(3) {
r_sum += u64::from(pixel[0]);
g_sum += u64::from(pixel[1]);
b_sum += u64::from(pixel[2]);
selected += 1;
}
}
selected = selected.max(1);
VideoColorFrame {
r: (r_sum / selected).min(u64::from(u8::MAX)) as u8,
g: (g_sum / selected).min(u64::from(u8::MAX)) as u8,
b: (b_sum / selected).min(u64::from(u8::MAX)) as u8,
}
}
fn adaptive_gray_roi_mask(frames: &[&[u8]], pixel_count: usize) -> Option<Vec<bool>> {
if frames.len() < 2 || pixel_count == 0 {
return None;
}
let mut scores = vec![0.0; pixel_count];
for pixel_index in 0..pixel_count {
let mut min = u8::MAX;
let mut max = u8::MIN;
for frame in frames {
let value = frame[pixel_index];
min = min.min(value);
max = max.max(value);
}
scores[pixel_index] = f64::from(max.saturating_sub(min)) * dark_roi_factor(min);
}
adaptive_roi_mask_from_scores(&scores, MIN_GRAY_ROI_SCORE)
}
fn adaptive_rgb_roi_mask(frames: &[&[u8]], pixel_count: usize) -> Option<Vec<bool>> {
if frames.len() < 2 || pixel_count == 0 {
return None;
}
let mut scores = vec![0.0; pixel_count];
for pixel_index in 0..pixel_count {
let mut min_r = u8::MAX;
let mut min_g = u8::MAX;
let mut min_b = u8::MAX;
let mut max_r = u8::MIN;
let mut max_g = u8::MIN;
let mut max_b = u8::MIN;
let mut min_luma = u8::MAX;
let mut max_luma = u8::MIN;
let mut best_palette_score = 0.0_f64;
for frame in frames {
let offset = pixel_index * 3;
let r = frame[offset];
let g = frame[offset + 1];
let b = frame[offset + 2];
min_r = min_r.min(r);
min_g = min_g.min(g);
min_b = min_b.min(b);
max_r = max_r.max(r);
max_g = max_g.max(g);
max_b = max_b.max(b);
let luma = luma_u8(r, g, b);
min_luma = min_luma.min(luma);
max_luma = max_luma.max(luma);
best_palette_score = best_palette_score.max(palette_match_score(r, g, b));
}
let rgb_span = f64::from(max_r.saturating_sub(min_r))
+ f64::from(max_g.saturating_sub(min_g))
+ f64::from(max_b.saturating_sub(min_b));
let luma_span = f64::from(max_luma.saturating_sub(min_luma));
scores[pixel_index] =
(rgb_span + (2.0 * luma_span)) * (1.0 + best_palette_score) * dark_roi_factor(min_luma);
}
adaptive_roi_mask_from_scores(&scores, MIN_RGB_ROI_SCORE)
}
fn adaptive_roi_mask_from_scores(scores: &[f64], min_score: f64) -> Option<Vec<bool>> {
let max_score = scores.iter().copied().fold(0.0_f64, f64::max);
if max_score < min_score {
return None;
}
let mut ranked = scores
.iter()
.copied()
.enumerate()
.filter(|(_, score)| score.is_finite() && *score > 0.0)
.collect::<Vec<_>>();
ranked.sort_by(|left, right| right.1.total_cmp(&left.1));
let max_selected = ((scores.len() as f64 * MAX_ADAPTIVE_ROI_FRACTION).round() as usize)
.max(MIN_ADAPTIVE_ROI_PIXELS)
.min(scores.len());
let score_floor = (max_score * ADAPTIVE_ROI_SCORE_FRACTION).max(min_score);
let mut mask = vec![false; scores.len()];
let mut selected = 0usize;
for (index, score) in ranked.into_iter().take(max_selected) {
if score < score_floor && selected >= MIN_ADAPTIVE_ROI_PIXELS {
break;
}
mask[index] = true;
selected += 1;
}
let mask = retain_largest_connected_roi(mask);
let selected = mask.iter().filter(|selected| **selected).count();
(selected >= MIN_ADAPTIVE_ROI_PIXELS).then_some(mask)
}
fn retain_largest_connected_roi(mask: Vec<bool>) -> Vec<bool> {
let side = (mask.len() as f64).sqrt().round() as usize;
if side == 0 || side * side != mask.len() {
return mask;
}
let mut visited = vec![false; mask.len()];
let mut best_component = Vec::<usize>::new();
for start in 0..mask.len() {
if !mask[start] || visited[start] {
continue;
}
let mut stack = vec![start];
let mut component = Vec::new();
visited[start] = true;
while let Some(index) = stack.pop() {
component.push(index);
let x = index % side;
let y = index / side;
let mut push_neighbor = |neighbor: usize| {
if mask[neighbor] && !visited[neighbor] {
visited[neighbor] = true;
stack.push(neighbor);
}
};
if x > 0 {
push_neighbor(index - 1);
}
if x + 1 < side {
push_neighbor(index + 1);
}
if y > 0 {
push_neighbor(index - side);
}
if y + 1 < side {
push_neighbor(index + side);
}
}
if component.len() > best_component.len() {
best_component = component;
}
}
if best_component.len() < MIN_ADAPTIVE_ROI_PIXELS {
return mask;
}
let mut retained = vec![false; mask.len()];
for index in best_component {
retained[index] = true;
}
retained
}
fn luma_u8(r: u8, g: u8, b: u8) -> u8 {
((u16::from(r) * 77 + u16::from(g) * 150 + u16::from(b) * 29) / 256) as u8
}
fn dark_roi_factor(min_luma: u8) -> f64 {
match min_luma {
0..=80 => 1.0,
81..=120 => 0.55,
121..=160 => 0.25,
_ => 0.10,
}
}
fn palette_match_score(r: u8, g: u8, b: u8) -> f64 {
let max = r.max(g).max(b);
let min = r.min(g).min(b);
if max < 50 || max.saturating_sub(min) < 20 {
return 0.0;
}
const PALETTE: [(u8, u8, u8); 16] = [
(255, 45, 45),
(0, 230, 118),
(41, 121, 255),
(255, 179, 0),
(216, 27, 96),
(0, 188, 212),
(205, 220, 57),
(126, 87, 194),
(255, 112, 67),
(38, 166, 154),
(255, 64, 129),
(92, 107, 192),
(255, 235, 59),
(105, 240, 174),
(171, 71, 188),
(3, 169, 244),
];
let best_distance = PALETTE
.into_iter()
.map(|(pr, pg, pb)| {
let dr = f64::from(r) - f64::from(pr);
let dg = f64::from(g) - f64::from(pg);
let db = f64::from(b) - f64::from(pb);
dr * dr + dg * dg + db * db
})
.fold(f64::INFINITY, f64::min);
(1.0 - (best_distance / 65_025.0)).clamp(0.0, 1.0)
}
#[cfg(test)]
mod tests {
use super::{
extract_audio_samples, extract_video_brightness, extract_video_colors,
extract_video_timestamps, run_command,
};
use crate::sync_probe::analyze::test_support::{
audio_samples_to_bytes, frame_json, thumbnail_rgb_video_bytes, thumbnail_video_bytes,
with_fake_media_tools,
};
use std::process::Command;
#[test]
fn extract_video_timestamps_reads_fake_ffprobe_output() {
let timestamps = vec![0.0, 0.5, 1.0];
with_fake_media_tools(
&frame_json(&timestamps),
&[1, 2, 3],
&[1, 0],
|capture_path| {
let parsed = extract_video_timestamps(capture_path).expect("video timestamps");
assert_eq!(parsed, timestamps);
},
);
}
#[test]
fn extract_video_timestamps_rejects_empty_and_invalid_outputs() {
with_fake_media_tools(br#"{"frames":[]}"#, &[1], &[1, 0], |capture_path| {
let error = extract_video_timestamps(capture_path).expect_err("empty frames fail");
assert!(
error
.to_string()
.contains("did not return any video frame timestamps")
);
});
with_fake_media_tools(
br#"{"frames":[{"best_effort_timestamp_time":"bad"}]}"#,
&[1],
&[1, 0],
|capture_path| {
let error =
extract_video_timestamps(capture_path).expect_err("invalid timestamp fails");
assert!(error.to_string().contains("parsing frame timestamp"));
},
);
}
#[test]
fn extract_video_brightness_reads_fake_ffmpeg_output() {
let brightness = vec![5u8, 100, 250];
with_fake_media_tools(
br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#,
&thumbnail_video_bytes(&brightness),
&[1, 0],
|capture_path| {
let parsed = extract_video_brightness(capture_path).expect("video brightness");
assert_eq!(parsed, brightness);
},
);
}
#[test]
fn extract_video_brightness_rejects_empty_output() {
with_fake_media_tools(
br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#,
&[],
&[1, 0],
|capture_path| {
let error = extract_video_brightness(capture_path).expect_err("empty brightness");
assert!(
error
.to_string()
.contains("did not emit any video brightness data")
);
},
);
}
#[test]
fn extract_video_brightness_uses_full_frame_thumbnail_average() {
let brightness = vec![20u8, 45, 20];
with_fake_media_tools(
&frame_json(&[0.0, 0.1, 0.2]),
&thumbnail_video_bytes(&brightness),
&[1, 0],
|capture_path| {
let parsed = extract_video_brightness(capture_path).expect("video brightness");
assert_eq!(parsed, brightness);
},
);
}
#[test]
fn extract_video_brightness_rejects_truncated_frame_data() {
with_fake_media_tools(&frame_json(&[0.0]), &[1, 2, 3], &[1, 0], |capture_path| {
let error = extract_video_brightness(capture_path).expect_err("truncated frame bytes");
assert!(error.to_string().contains("not divisible"));
});
}
#[test]
fn extract_video_colors_reads_fake_ffmpeg_output() {
let colors = vec![(255, 45, 45), (0, 230, 118), (41, 121, 255)];
with_fake_media_tools(
&frame_json(&[0.0, 0.1, 0.2]),
&thumbnail_rgb_video_bytes(&colors),
&[1, 0],
|capture_path| {
let parsed = extract_video_colors(capture_path).expect("video colors");
assert_eq!(parsed[0].r, 255);
assert_eq!(parsed[1].g, 230);
assert_eq!(parsed[2].b, 255);
},
);
}
#[test]
fn extract_video_colors_tracks_small_flashing_screen_region() {
const SIDE: usize = 64;
let mut bytes = Vec::new();
for color in [(24, 28, 32), (255, 45, 45), (24, 28, 32), (0, 230, 118)] {
let mut frame = vec![34u8; SIDE * SIDE * 3];
for y in 6..18 {
for x in 40..54 {
let offset = (y * SIDE + x) * 3;
frame[offset] = color.0;
frame[offset + 1] = color.1;
frame[offset + 2] = color.2;
}
}
bytes.extend_from_slice(&frame);
}
with_fake_media_tools(
&frame_json(&[0.0, 0.1, 0.2, 0.3]),
&bytes,
&[1, 0],
|capture_path| {
let parsed = extract_video_colors(capture_path).expect("video colors");
assert!(
parsed[1].r > 220 && parsed[1].g < 80,
"red pulse should dominate selected ROI: {:?}",
parsed[1]
);
assert!(
parsed[3].g > 190 && parsed[3].r < 60,
"green pulse should dominate selected ROI: {:?}",
parsed[3]
);
},
);
}
#[test]
fn extract_audio_samples_reads_fake_ffmpeg_output() {
let samples = vec![1i16, -2, 32_000];
with_fake_media_tools(
br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#,
&[1],
&audio_samples_to_bytes(&samples),
|capture_path| {
let parsed = extract_audio_samples(capture_path).expect("audio samples");
assert_eq!(parsed, samples);
},
);
}
#[test]
fn extract_audio_samples_rejects_too_short_output() {
with_fake_media_tools(
br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#,
&[1],
&[7],
|capture_path| {
let error = extract_audio_samples(capture_path).expect_err("short audio");
assert!(
error
.to_string()
.contains("did not emit enough audio data to analyze")
);
},
);
}
#[test]
fn run_command_reports_success_and_failure() {
let output = run_command(
Command::new("sh").arg("-c").arg("printf 'ok'"),
"success command",
)
.expect("success output");
assert_eq!(output, b"ok");
let error = run_command(
Command::new("sh")
.arg("-c")
.arg("printf 'boom' >&2; exit 7"),
"failing command",
)
.expect_err("failing command should error");
assert!(error.to_string().contains("failing command failed: boom"));
}
}