use anyhow::{Context, Result, bail}; use serde::Deserialize; use std::path::Path; use std::process::Command; use super::onset_detection::VideoColorFrame; const VIDEO_ANALYSIS_SIDE_PX: usize = 64; const VIDEO_ANALYSIS_FPS: usize = 60; const MIN_ADAPTIVE_ROI_PIXELS: usize = 16; const MAX_ADAPTIVE_ROI_FRACTION: f64 = 0.35; const ADAPTIVE_ROI_SCORE_FRACTION: f64 = 0.30; const MIN_RGB_ROI_SCORE: f64 = 24.0; const MIN_GRAY_ROI_SCORE: f64 = 8.0; #[derive(Debug, Deserialize)] struct ProbeFrameResponse { #[serde(default)] frames: Vec, } #[derive(Debug, Deserialize)] struct ProbeFrameEntry { best_effort_timestamp_time: Option, } pub(super) fn extract_video_timestamps(capture_path: &Path) -> Result> { let output = run_command( Command::new("ffprobe") .arg("-hide_banner") .arg("-loglevel") .arg("error") .arg("-select_streams") .arg("v:0") .arg("-show_frames") .arg("-show_entries") .arg("frame=best_effort_timestamp_time") .arg("-of") .arg("json") .arg(capture_path), "ffprobe video timestamps", )?; let response: ProbeFrameResponse = serde_json::from_slice(&output).context("parsing ffprobe frame JSON")?; let timestamps = response .frames .into_iter() .filter_map(|entry| entry.best_effort_timestamp_time) .map(|value| value.parse::().context("parsing frame timestamp")) .collect::>>()?; if timestamps.is_empty() { bail!("ffprobe did not return any video frame timestamps"); } Ok(timestamps) } pub(super) fn extract_video_brightness(capture_path: &Path) -> Result> { let output = run_command( Command::new("ffmpeg") .arg("-hide_banner") .arg("-loglevel") .arg("error") .arg("-i") .arg(capture_path) .arg("-map") .arg("0:v:0") .arg("-vf") .arg(format!( "fps={fps},scale={side}:{side}:flags=area,format=gray", fps = VIDEO_ANALYSIS_FPS, side = VIDEO_ANALYSIS_SIDE_PX )) .arg("-f") .arg("rawvideo") .arg("-pix_fmt") .arg("gray") .arg("-"), "ffmpeg video brightness extraction", )?; if output.is_empty() { bail!("ffmpeg did not emit any video brightness data"); } let frame_pixels = VIDEO_ANALYSIS_SIDE_PX * VIDEO_ANALYSIS_SIDE_PX; if output.len() % frame_pixels != 0 { bail!( "ffmpeg emitted {} bytes of video brightness data, which is not divisible by the {}-pixel analysis frame size", output.len(), frame_pixels ); } let extracted_frames = output.len() / frame_pixels; Ok(summarize_gray_frames_with_adaptive_roi( output.chunks_exact(frame_pixels).take(extracted_frames), frame_pixels, )) } pub(super) fn extract_video_colors(capture_path: &Path) -> Result> { let output = run_command( Command::new("ffmpeg") .arg("-hide_banner") .arg("-loglevel") .arg("error") .arg("-i") .arg(capture_path) .arg("-map") .arg("0:v:0") .arg("-vf") .arg(format!( "fps={fps},scale={side}:{side}:flags=area,format=rgb24", fps = VIDEO_ANALYSIS_FPS, side = VIDEO_ANALYSIS_SIDE_PX )) .arg("-f") .arg("rawvideo") .arg("-pix_fmt") .arg("rgb24") .arg("-"), "ffmpeg video color extraction", )?; if output.is_empty() { bail!("ffmpeg did not emit any video color data"); } let frame_bytes = VIDEO_ANALYSIS_SIDE_PX * VIDEO_ANALYSIS_SIDE_PX * 3; if output.len() % frame_bytes != 0 { bail!( "ffmpeg emitted {} bytes of video color data, which is not divisible by the {}-byte analysis frame size", output.len(), frame_bytes ); } let extracted_frames = output.len() / frame_bytes; Ok(summarize_rgb_frames_with_adaptive_roi( output.chunks_exact(frame_bytes).take(extracted_frames), VIDEO_ANALYSIS_SIDE_PX * VIDEO_ANALYSIS_SIDE_PX, )) } pub(super) fn extract_audio_samples(capture_path: &Path) -> Result> { let output = run_command( Command::new("ffmpeg") .arg("-hide_banner") .arg("-loglevel") .arg("error") .arg("-i") .arg(capture_path) .arg("-map") .arg("0:a:0") .arg("-ac") .arg("1") .arg("-ar") .arg(super::onset_detection::DEFAULT_AUDIO_SAMPLE_RATE_HZ.to_string()) .arg("-f") .arg("s16le") .arg("-acodec") .arg("pcm_s16le") .arg("-"), "ffmpeg audio extraction", )?; if output.len() < 2 { bail!("ffmpeg did not emit enough audio data to analyze"); } Ok(output .chunks_exact(2) .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]])) .collect()) } pub(super) fn run_command(command: &mut Command, description: &str) -> Result> { let output = command .output() .with_context(|| format!("running {description}"))?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); bail!("{description} failed: {}", stderr.trim()); } Ok(output.stdout) } fn summarize_gray_frames_with_adaptive_roi<'a>( frames: impl Iterator, pixel_count: usize, ) -> Vec { let frames = frames.collect::>(); let mask = adaptive_gray_roi_mask(&frames, pixel_count); frames .iter() .map(|frame| summarize_frame_brightness(frame, mask.as_deref())) .collect() } fn summarize_rgb_frames_with_adaptive_roi<'a>( frames: impl Iterator, pixel_count: usize, ) -> Vec { let frames = frames.collect::>(); let mask = adaptive_rgb_roi_mask(&frames, pixel_count); frames .iter() .map(|frame| summarize_frame_color(frame, mask.as_deref())) .collect() } fn summarize_frame_brightness(frame: &[u8], mask: Option<&[bool]>) -> u8 { let mut sum = 0u64; let mut selected = 0u64; for (index, value) in frame.iter().copied().enumerate() { if mask.is_none_or(|mask| mask.get(index).copied().unwrap_or(false)) { sum += u64::from(value); selected += 1; } } if selected == 0 { sum = frame.iter().map(|value| u64::from(*value)).sum(); selected = frame.len().max(1) as u64; } let mean = sum / selected; mean.min(u64::from(u8::MAX)) as u8 } fn summarize_frame_color(frame: &[u8], mask: Option<&[bool]>) -> VideoColorFrame { let mut r_sum = 0u64; let mut g_sum = 0u64; let mut b_sum = 0u64; let mut selected = 0u64; for (index, pixel) in frame.chunks_exact(3).enumerate() { if !mask.is_none_or(|mask| mask.get(index).copied().unwrap_or(false)) { continue; } let r = pixel[0]; let g = pixel[1]; let b = pixel[2]; let max = r.max(g).max(b); let min = r.min(g).min(b); if max >= 60 && max.saturating_sub(min) >= 24 { r_sum += u64::from(r); g_sum += u64::from(g); b_sum += u64::from(b); selected += 1; } } if selected == 0 { for (index, pixel) in frame.chunks_exact(3).enumerate() { if !mask.is_none_or(|mask| mask.get(index).copied().unwrap_or(false)) { continue; } r_sum += u64::from(pixel[0]); g_sum += u64::from(pixel[1]); b_sum += u64::from(pixel[2]); selected += 1; } } if selected == 0 { for pixel in frame.chunks_exact(3) { r_sum += u64::from(pixel[0]); g_sum += u64::from(pixel[1]); b_sum += u64::from(pixel[2]); selected += 1; } } selected = selected.max(1); VideoColorFrame { r: (r_sum / selected).min(u64::from(u8::MAX)) as u8, g: (g_sum / selected).min(u64::from(u8::MAX)) as u8, b: (b_sum / selected).min(u64::from(u8::MAX)) as u8, } } fn adaptive_gray_roi_mask(frames: &[&[u8]], pixel_count: usize) -> Option> { if frames.len() < 2 || pixel_count == 0 { return None; } let mut scores = vec![0.0; pixel_count]; for pixel_index in 0..pixel_count { let mut min = u8::MAX; let mut max = u8::MIN; for frame in frames { let value = frame[pixel_index]; min = min.min(value); max = max.max(value); } scores[pixel_index] = f64::from(max.saturating_sub(min)) * dark_roi_factor(min); } adaptive_roi_mask_from_scores(&scores, MIN_GRAY_ROI_SCORE) } fn adaptive_rgb_roi_mask(frames: &[&[u8]], pixel_count: usize) -> Option> { if frames.len() < 2 || pixel_count == 0 { return None; } let mut scores = vec![0.0; pixel_count]; for pixel_index in 0..pixel_count { let mut min_r = u8::MAX; let mut min_g = u8::MAX; let mut min_b = u8::MAX; let mut max_r = u8::MIN; let mut max_g = u8::MIN; let mut max_b = u8::MIN; let mut min_luma = u8::MAX; let mut max_luma = u8::MIN; let mut best_palette_score = 0.0_f64; for frame in frames { let offset = pixel_index * 3; let r = frame[offset]; let g = frame[offset + 1]; let b = frame[offset + 2]; min_r = min_r.min(r); min_g = min_g.min(g); min_b = min_b.min(b); max_r = max_r.max(r); max_g = max_g.max(g); max_b = max_b.max(b); let luma = luma_u8(r, g, b); min_luma = min_luma.min(luma); max_luma = max_luma.max(luma); best_palette_score = best_palette_score.max(palette_match_score(r, g, b)); } let rgb_span = f64::from(max_r.saturating_sub(min_r)) + f64::from(max_g.saturating_sub(min_g)) + f64::from(max_b.saturating_sub(min_b)); let luma_span = f64::from(max_luma.saturating_sub(min_luma)); scores[pixel_index] = (rgb_span + (2.0 * luma_span)) * (1.0 + best_palette_score) * dark_roi_factor(min_luma); } adaptive_roi_mask_from_scores(&scores, MIN_RGB_ROI_SCORE) } fn adaptive_roi_mask_from_scores(scores: &[f64], min_score: f64) -> Option> { let max_score = scores.iter().copied().fold(0.0_f64, f64::max); if max_score < min_score { return None; } let mut ranked = scores .iter() .copied() .enumerate() .filter(|(_, score)| score.is_finite() && *score > 0.0) .collect::>(); ranked.sort_by(|left, right| right.1.total_cmp(&left.1)); let max_selected = ((scores.len() as f64 * MAX_ADAPTIVE_ROI_FRACTION).round() as usize) .max(MIN_ADAPTIVE_ROI_PIXELS) .min(scores.len()); let score_floor = (max_score * ADAPTIVE_ROI_SCORE_FRACTION).max(min_score); let mut mask = vec![false; scores.len()]; let mut selected = 0usize; for (index, score) in ranked.into_iter().take(max_selected) { if score < score_floor && selected >= MIN_ADAPTIVE_ROI_PIXELS { break; } mask[index] = true; selected += 1; } let mask = retain_largest_connected_roi(mask); let selected = mask.iter().filter(|selected| **selected).count(); (selected >= MIN_ADAPTIVE_ROI_PIXELS).then_some(mask) } fn retain_largest_connected_roi(mask: Vec) -> Vec { let side = (mask.len() as f64).sqrt().round() as usize; if side == 0 || side * side != mask.len() { return mask; } let mut visited = vec![false; mask.len()]; let mut best_component = Vec::::new(); for start in 0..mask.len() { if !mask[start] || visited[start] { continue; } let mut stack = vec![start]; let mut component = Vec::new(); visited[start] = true; while let Some(index) = stack.pop() { component.push(index); let x = index % side; let y = index / side; let mut push_neighbor = |neighbor: usize| { if mask[neighbor] && !visited[neighbor] { visited[neighbor] = true; stack.push(neighbor); } }; if x > 0 { push_neighbor(index - 1); } if x + 1 < side { push_neighbor(index + 1); } if y > 0 { push_neighbor(index - side); } if y + 1 < side { push_neighbor(index + side); } } if component.len() > best_component.len() { best_component = component; } } if best_component.len() < MIN_ADAPTIVE_ROI_PIXELS { return mask; } let mut retained = vec![false; mask.len()]; for index in best_component { retained[index] = true; } retained } fn luma_u8(r: u8, g: u8, b: u8) -> u8 { ((u16::from(r) * 77 + u16::from(g) * 150 + u16::from(b) * 29) / 256) as u8 } fn dark_roi_factor(min_luma: u8) -> f64 { match min_luma { 0..=80 => 1.0, 81..=120 => 0.55, 121..=160 => 0.25, _ => 0.10, } } fn palette_match_score(r: u8, g: u8, b: u8) -> f64 { let max = r.max(g).max(b); let min = r.min(g).min(b); if max < 50 || max.saturating_sub(min) < 20 { return 0.0; } const PALETTE: [(u8, u8, u8); 16] = [ (255, 45, 45), (0, 230, 118), (41, 121, 255), (255, 179, 0), (216, 27, 96), (0, 188, 212), (205, 220, 57), (126, 87, 194), (255, 112, 67), (38, 166, 154), (255, 64, 129), (92, 107, 192), (255, 235, 59), (105, 240, 174), (171, 71, 188), (3, 169, 244), ]; let best_distance = PALETTE .into_iter() .map(|(pr, pg, pb)| { let dr = f64::from(r) - f64::from(pr); let dg = f64::from(g) - f64::from(pg); let db = f64::from(b) - f64::from(pb); dr * dr + dg * dg + db * db }) .fold(f64::INFINITY, f64::min); (1.0 - (best_distance / 65_025.0)).clamp(0.0, 1.0) } #[cfg(test)] mod tests { use super::{ extract_audio_samples, extract_video_brightness, extract_video_colors, extract_video_timestamps, run_command, }; use crate::sync_probe::analyze::test_support::{ audio_samples_to_bytes, frame_json, thumbnail_rgb_video_bytes, thumbnail_video_bytes, with_fake_media_tools, }; use std::process::Command; #[test] fn extract_video_timestamps_reads_fake_ffprobe_output() { let timestamps = vec![0.0, 0.5, 1.0]; with_fake_media_tools( &frame_json(×tamps), &[1, 2, 3], &[1, 0], |capture_path| { let parsed = extract_video_timestamps(capture_path).expect("video timestamps"); assert_eq!(parsed, timestamps); }, ); } #[test] fn extract_video_timestamps_rejects_empty_and_invalid_outputs() { with_fake_media_tools(br#"{"frames":[]}"#, &[1], &[1, 0], |capture_path| { let error = extract_video_timestamps(capture_path).expect_err("empty frames fail"); assert!( error .to_string() .contains("did not return any video frame timestamps") ); }); with_fake_media_tools( br#"{"frames":[{"best_effort_timestamp_time":"bad"}]}"#, &[1], &[1, 0], |capture_path| { let error = extract_video_timestamps(capture_path).expect_err("invalid timestamp fails"); assert!(error.to_string().contains("parsing frame timestamp")); }, ); } #[test] fn extract_video_brightness_reads_fake_ffmpeg_output() { let brightness = vec![5u8, 100, 250]; with_fake_media_tools( br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#, &thumbnail_video_bytes(&brightness), &[1, 0], |capture_path| { let parsed = extract_video_brightness(capture_path).expect("video brightness"); assert_eq!(parsed, brightness); }, ); } #[test] fn extract_video_brightness_rejects_empty_output() { with_fake_media_tools( br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#, &[], &[1, 0], |capture_path| { let error = extract_video_brightness(capture_path).expect_err("empty brightness"); assert!( error .to_string() .contains("did not emit any video brightness data") ); }, ); } #[test] fn extract_video_brightness_uses_full_frame_thumbnail_average() { let brightness = vec![20u8, 45, 20]; with_fake_media_tools( &frame_json(&[0.0, 0.1, 0.2]), &thumbnail_video_bytes(&brightness), &[1, 0], |capture_path| { let parsed = extract_video_brightness(capture_path).expect("video brightness"); assert_eq!(parsed, brightness); }, ); } #[test] fn extract_video_brightness_rejects_truncated_frame_data() { with_fake_media_tools(&frame_json(&[0.0]), &[1, 2, 3], &[1, 0], |capture_path| { let error = extract_video_brightness(capture_path).expect_err("truncated frame bytes"); assert!(error.to_string().contains("not divisible")); }); } #[test] fn extract_video_colors_reads_fake_ffmpeg_output() { let colors = vec![(255, 45, 45), (0, 230, 118), (41, 121, 255)]; with_fake_media_tools( &frame_json(&[0.0, 0.1, 0.2]), &thumbnail_rgb_video_bytes(&colors), &[1, 0], |capture_path| { let parsed = extract_video_colors(capture_path).expect("video colors"); assert_eq!(parsed[0].r, 255); assert_eq!(parsed[1].g, 230); assert_eq!(parsed[2].b, 255); }, ); } #[test] fn extract_video_colors_tracks_small_flashing_screen_region() { const SIDE: usize = 64; let mut bytes = Vec::new(); for color in [(24, 28, 32), (255, 45, 45), (24, 28, 32), (0, 230, 118)] { let mut frame = vec![34u8; SIDE * SIDE * 3]; for y in 6..18 { for x in 40..54 { let offset = (y * SIDE + x) * 3; frame[offset] = color.0; frame[offset + 1] = color.1; frame[offset + 2] = color.2; } } bytes.extend_from_slice(&frame); } with_fake_media_tools( &frame_json(&[0.0, 0.1, 0.2, 0.3]), &bytes, &[1, 0], |capture_path| { let parsed = extract_video_colors(capture_path).expect("video colors"); assert!( parsed[1].r > 220 && parsed[1].g < 80, "red pulse should dominate selected ROI: {:?}", parsed[1] ); assert!( parsed[3].g > 190 && parsed[3].r < 60, "green pulse should dominate selected ROI: {:?}", parsed[3] ); }, ); } #[test] fn extract_audio_samples_reads_fake_ffmpeg_output() { let samples = vec![1i16, -2, 32_000]; with_fake_media_tools( br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#, &[1], &audio_samples_to_bytes(&samples), |capture_path| { let parsed = extract_audio_samples(capture_path).expect("audio samples"); assert_eq!(parsed, samples); }, ); } #[test] fn extract_audio_samples_rejects_too_short_output() { with_fake_media_tools( br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#, &[1], &[7], |capture_path| { let error = extract_audio_samples(capture_path).expect_err("short audio"); assert!( error .to_string() .contains("did not emit enough audio data to analyze") ); }, ); } #[test] fn run_command_reports_success_and_failure() { let output = run_command( Command::new("sh").arg("-c").arg("printf 'ok'"), "success command", ) .expect("success output"); assert_eq!(output, b"ok"); let error = run_command( Command::new("sh") .arg("-c") .arg("printf 'boom' >&2; exit 7"), "failing command", ) .expect_err("failing command should error"); assert!(error.to_string().contains("failing command failed: boom")); } }