lesavka/client/src/sync_probe/analyze/media_extract.rs

use anyhow::{Context, Result, bail};
use serde::Deserialize;
use std::path::Path;
use std::process::Command;

use super::onset_detection::VideoColorFrame;

const VIDEO_ANALYSIS_SIDE_PX: usize = 64;
const VIDEO_ANALYSIS_FPS: usize = 60;
const MIN_ADAPTIVE_ROI_PIXELS: usize = 16;
const MAX_ADAPTIVE_ROI_FRACTION: f64 = 0.35;
const ADAPTIVE_ROI_SCORE_FRACTION: f64 = 0.30;
const MIN_RGB_ROI_SCORE: f64 = 24.0;
const MIN_GRAY_ROI_SCORE: f64 = 8.0;

#[derive(Debug, Deserialize)]
struct ProbeFrameResponse {
    #[serde(default)]
    frames: Vec<ProbeFrameEntry>,
}

#[derive(Debug, Deserialize)]
struct ProbeFrameEntry {
    best_effort_timestamp_time: Option<String>,
}

pub(super) fn extract_video_timestamps(capture_path: &Path) -> Result<Vec<f64>> {
    let output = run_command(
        Command::new("ffprobe")
            .arg("-hide_banner")
            .arg("-loglevel")
            .arg("error")
            .arg("-select_streams")
            .arg("v:0")
            .arg("-show_frames")
            .arg("-show_entries")
            .arg("frame=best_effort_timestamp_time")
            .arg("-of")
            .arg("json")
            .arg(capture_path),
        "ffprobe video timestamps",
    )?;
    let response: ProbeFrameResponse =
        serde_json::from_slice(&output).context("parsing ffprobe frame JSON")?;
    let timestamps = response
        .frames
        .into_iter()
        .filter_map(|entry| entry.best_effort_timestamp_time)
        .map(|value| value.parse::<f64>().context("parsing frame timestamp"))
        .collect::<Result<Vec<_>>>()?;
    if timestamps.is_empty() {
        bail!("ffprobe did not return any video frame timestamps");
    }
    Ok(timestamps)
}

pub(super) fn extract_video_brightness(capture_path: &Path) -> Result<Vec<u8>> {
    let output = run_command(
        Command::new("ffmpeg")
            .arg("-hide_banner")
            .arg("-loglevel")
            .arg("error")
            .arg("-i")
            .arg(capture_path)
            .arg("-map")
            .arg("0:v:0")
            .arg("-vf")
            .arg(format!(
                "fps={fps},scale={side}:{side}:flags=area,format=gray",
                fps = VIDEO_ANALYSIS_FPS,
                side = VIDEO_ANALYSIS_SIDE_PX
            ))
            .arg("-f")
            .arg("rawvideo")
            .arg("-pix_fmt")
            .arg("gray")
            .arg("-"),
        "ffmpeg video brightness extraction",
    )?;
    if output.is_empty() {
        bail!("ffmpeg did not emit any video brightness data");
    }

    let frame_pixels = VIDEO_ANALYSIS_SIDE_PX * VIDEO_ANALYSIS_SIDE_PX;
    if output.len() % frame_pixels != 0 {
        bail!(
            "ffmpeg emitted {} bytes of video brightness data, which is not divisible by the {}-pixel analysis frame size",
            output.len(),
            frame_pixels
        );
    }
    let extracted_frames = output.len() / frame_pixels;

    Ok(summarize_gray_frames_with_adaptive_roi(
        output.chunks_exact(frame_pixels).take(extracted_frames),
        frame_pixels,
    ))
}

pub(super) fn extract_video_colors(capture_path: &Path) -> Result<Vec<VideoColorFrame>> {
    let output = run_command(
        Command::new("ffmpeg")
            .arg("-hide_banner")
            .arg("-loglevel")
            .arg("error")
            .arg("-i")
            .arg(capture_path)
            .arg("-map")
            .arg("0:v:0")
            .arg("-vf")
            .arg(format!(
                "fps={fps},scale={side}:{side}:flags=area,format=rgb24",
                fps = VIDEO_ANALYSIS_FPS,
                side = VIDEO_ANALYSIS_SIDE_PX
            ))
            .arg("-f")
            .arg("rawvideo")
            .arg("-pix_fmt")
            .arg("rgb24")
            .arg("-"),
        "ffmpeg video color extraction",
    )?;
    if output.is_empty() {
        bail!("ffmpeg did not emit any video color data");
    }

    let frame_bytes = VIDEO_ANALYSIS_SIDE_PX * VIDEO_ANALYSIS_SIDE_PX * 3;
    if output.len() % frame_bytes != 0 {
        bail!(
            "ffmpeg emitted {} bytes of video color data, which is not divisible by the {}-byte analysis frame size",
            output.len(),
            frame_bytes
        );
    }
    let extracted_frames = output.len() / frame_bytes;

    Ok(summarize_rgb_frames_with_adaptive_roi(
        output.chunks_exact(frame_bytes).take(extracted_frames),
        VIDEO_ANALYSIS_SIDE_PX * VIDEO_ANALYSIS_SIDE_PX,
    ))
}

pub(super) fn extract_audio_samples(capture_path: &Path) -> Result<Vec<i16>> {
    let output = run_command(
        Command::new("ffmpeg")
            .arg("-hide_banner")
            .arg("-loglevel")
            .arg("error")
            .arg("-i")
            .arg(capture_path)
            .arg("-map")
            .arg("0:a:0")
            .arg("-ac")
            .arg("1")
            .arg("-ar")
            .arg(super::onset_detection::DEFAULT_AUDIO_SAMPLE_RATE_HZ.to_string())
            .arg("-f")
            .arg("s16le")
            .arg("-acodec")
            .arg("pcm_s16le")
            .arg("-"),
        "ffmpeg audio extraction",
    )?;
    if output.len() < 2 {
        bail!("ffmpeg did not emit enough audio data to analyze");
    }
    Ok(output
        .chunks_exact(2)
        .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]]))
        .collect())
}

pub(super) fn run_command(command: &mut Command, description: &str) -> Result<Vec<u8>> {
    let output = command
        .output()
        .with_context(|| format!("running {description}"))?;
    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        bail!("{description} failed: {}", stderr.trim());
    }
    Ok(output.stdout)
}

fn summarize_gray_frames_with_adaptive_roi<'a>(
    frames: impl Iterator<Item = &'a [u8]>,
    pixel_count: usize,
) -> Vec<u8> {
    let frames = frames.collect::<Vec<_>>();
    let mask = adaptive_gray_roi_mask(&frames, pixel_count);
    frames
        .iter()
        .map(|frame| summarize_frame_brightness(frame, mask.as_deref()))
        .collect()
}

fn summarize_rgb_frames_with_adaptive_roi<'a>(
    frames: impl Iterator<Item = &'a [u8]>,
    pixel_count: usize,
) -> Vec<VideoColorFrame> {
    let frames = frames.collect::<Vec<_>>();
    let mask = adaptive_rgb_roi_mask(&frames, pixel_count);
    frames
        .iter()
        .map(|frame| summarize_frame_color(frame, mask.as_deref()))
        .collect()
}

fn summarize_frame_brightness(frame: &[u8], mask: Option<&[bool]>) -> u8 {
    let mut sum = 0u64;
    let mut selected = 0u64;
    for (index, value) in frame.iter().copied().enumerate() {
        if mask.is_none_or(|mask| mask.get(index).copied().unwrap_or(false)) {
            sum += u64::from(value);
            selected += 1;
        }
    }
    if selected == 0 {
        sum = frame.iter().map(|value| u64::from(*value)).sum();
        selected = frame.len().max(1) as u64;
    }
    let mean = sum / selected;
    mean.min(u64::from(u8::MAX)) as u8
}

fn summarize_frame_color(frame: &[u8], mask: Option<&[bool]>) -> VideoColorFrame {
    let mut r_sum = 0u64;
    let mut g_sum = 0u64;
    let mut b_sum = 0u64;
    let mut selected = 0u64;

    for (index, pixel) in frame.chunks_exact(3).enumerate() {
        if !mask.is_none_or(|mask| mask.get(index).copied().unwrap_or(false)) {
            continue;
        }
        let r = pixel[0];
        let g = pixel[1];
        let b = pixel[2];
        let max = r.max(g).max(b);
        let min = r.min(g).min(b);
        if max >= 60 && max.saturating_sub(min) >= 24 {
            r_sum += u64::from(r);
            g_sum += u64::from(g);
            b_sum += u64::from(b);
            selected += 1;
        }
    }

    if selected == 0 {
        for (index, pixel) in frame.chunks_exact(3).enumerate() {
            if !mask.is_none_or(|mask| mask.get(index).copied().unwrap_or(false)) {
                continue;
            }
            r_sum += u64::from(pixel[0]);
            g_sum += u64::from(pixel[1]);
            b_sum += u64::from(pixel[2]);
            selected += 1;
        }
    }

    if selected == 0 {
        for pixel in frame.chunks_exact(3) {
            r_sum += u64::from(pixel[0]);
            g_sum += u64::from(pixel[1]);
            b_sum += u64::from(pixel[2]);
            selected += 1;
        }
    }
    selected = selected.max(1);

    VideoColorFrame {
        r: (r_sum / selected).min(u64::from(u8::MAX)) as u8,
        g: (g_sum / selected).min(u64::from(u8::MAX)) as u8,
        b: (b_sum / selected).min(u64::from(u8::MAX)) as u8,
    }
}

fn adaptive_gray_roi_mask(frames: &[&[u8]], pixel_count: usize) -> Option<Vec<bool>> {
    if frames.len() < 2 || pixel_count == 0 {
        return None;
    }
    let mut scores = vec![0.0; pixel_count];
    for pixel_index in 0..pixel_count {
        let mut min = u8::MAX;
        let mut max = u8::MIN;
        for frame in frames {
            let value = frame[pixel_index];
            min = min.min(value);
            max = max.max(value);
        }
        scores[pixel_index] = f64::from(max.saturating_sub(min)) * dark_roi_factor(min);
    }
    adaptive_roi_mask_from_scores(&scores, MIN_GRAY_ROI_SCORE)
}

fn adaptive_rgb_roi_mask(frames: &[&[u8]], pixel_count: usize) -> Option<Vec<bool>> {
    if frames.len() < 2 || pixel_count == 0 {
        return None;
    }
    let mut scores = vec![0.0; pixel_count];
    for pixel_index in 0..pixel_count {
        let mut min_r = u8::MAX;
        let mut min_g = u8::MAX;
        let mut min_b = u8::MAX;
        let mut max_r = u8::MIN;
        let mut max_g = u8::MIN;
        let mut max_b = u8::MIN;
        let mut min_luma = u8::MAX;
        let mut max_luma = u8::MIN;
        let mut best_palette_score = 0.0_f64;

        for frame in frames {
            let offset = pixel_index * 3;
            let r = frame[offset];
            let g = frame[offset + 1];
            let b = frame[offset + 2];
            min_r = min_r.min(r);
            min_g = min_g.min(g);
            min_b = min_b.min(b);
            max_r = max_r.max(r);
            max_g = max_g.max(g);
            max_b = max_b.max(b);
            let luma = luma_u8(r, g, b);
            min_luma = min_luma.min(luma);
            max_luma = max_luma.max(luma);
            best_palette_score = best_palette_score.max(palette_match_score(r, g, b));
        }

        let rgb_span = f64::from(max_r.saturating_sub(min_r))
            + f64::from(max_g.saturating_sub(min_g))
            + f64::from(max_b.saturating_sub(min_b));
        let luma_span = f64::from(max_luma.saturating_sub(min_luma));
        scores[pixel_index] =
            (rgb_span + (2.0 * luma_span)) * (1.0 + best_palette_score) * dark_roi_factor(min_luma);
    }
    adaptive_roi_mask_from_scores(&scores, MIN_RGB_ROI_SCORE)
}

fn adaptive_roi_mask_from_scores(scores: &[f64], min_score: f64) -> Option<Vec<bool>> {
    let max_score = scores.iter().copied().fold(0.0_f64, f64::max);
    if max_score < min_score {
        return None;
    }

    let mut ranked = scores
        .iter()
        .copied()
        .enumerate()
        .filter(|(_, score)| score.is_finite() && *score > 0.0)
        .collect::<Vec<_>>();
    ranked.sort_by(|left, right| right.1.total_cmp(&left.1));

    let max_selected = ((scores.len() as f64 * MAX_ADAPTIVE_ROI_FRACTION).round() as usize)
        .max(MIN_ADAPTIVE_ROI_PIXELS)
        .min(scores.len());
    let score_floor = (max_score * ADAPTIVE_ROI_SCORE_FRACTION).max(min_score);
    let mut mask = vec![false; scores.len()];
    let mut selected = 0usize;
    for (index, score) in ranked.into_iter().take(max_selected) {
        if score < score_floor && selected >= MIN_ADAPTIVE_ROI_PIXELS {
            break;
        }
        mask[index] = true;
        selected += 1;
    }

    let mask = retain_largest_connected_roi(mask);
    let selected = mask.iter().filter(|selected| **selected).count();
    (selected >= MIN_ADAPTIVE_ROI_PIXELS).then_some(mask)
}

fn retain_largest_connected_roi(mask: Vec<bool>) -> Vec<bool> {
    let side = (mask.len() as f64).sqrt().round() as usize;
    if side == 0 || side * side != mask.len() {
        return mask;
    }

    let mut visited = vec![false; mask.len()];
    let mut best_component = Vec::<usize>::new();
    for start in 0..mask.len() {
        if !mask[start] || visited[start] {
            continue;
        }
        let mut stack = vec![start];
        let mut component = Vec::new();
        visited[start] = true;
        while let Some(index) = stack.pop() {
            component.push(index);
            let x = index % side;
            let y = index / side;
            let mut push_neighbor = |neighbor: usize| {
                if mask[neighbor] && !visited[neighbor] {
                    visited[neighbor] = true;
                    stack.push(neighbor);
                }
            };
            if x > 0 {
                push_neighbor(index - 1);
            }
            if x + 1 < side {
                push_neighbor(index + 1);
            }
            if y > 0 {
                push_neighbor(index - side);
            }
            if y + 1 < side {
                push_neighbor(index + side);
            }
        }
        if component.len() > best_component.len() {
            best_component = component;
        }
    }

    if best_component.len() < MIN_ADAPTIVE_ROI_PIXELS {
        return mask;
    }
    let mut retained = vec![false; mask.len()];
    for index in best_component {
        retained[index] = true;
    }
    retained
}

fn luma_u8(r: u8, g: u8, b: u8) -> u8 {
    ((u16::from(r) * 77 + u16::from(g) * 150 + u16::from(b) * 29) / 256) as u8
}

fn dark_roi_factor(min_luma: u8) -> f64 {
    match min_luma {
        0..=80 => 1.0,
        81..=120 => 0.55,
        121..=160 => 0.25,
        _ => 0.10,
    }
}

fn palette_match_score(r: u8, g: u8, b: u8) -> f64 {
    let max = r.max(g).max(b);
    let min = r.min(g).min(b);
    if max < 50 || max.saturating_sub(min) < 20 {
        return 0.0;
    }

    const PALETTE: [(u8, u8, u8); 16] = [
        (255, 45, 45),
        (0, 230, 118),
        (41, 121, 255),
        (255, 179, 0),
        (216, 27, 96),
        (0, 188, 212),
        (205, 220, 57),
        (126, 87, 194),
        (255, 112, 67),
        (38, 166, 154),
        (255, 64, 129),
        (92, 107, 192),
        (255, 235, 59),
        (105, 240, 174),
        (171, 71, 188),
        (3, 169, 244),
    ];
    let best_distance = PALETTE
        .into_iter()
        .map(|(pr, pg, pb)| {
            let dr = f64::from(r) - f64::from(pr);
            let dg = f64::from(g) - f64::from(pg);
            let db = f64::from(b) - f64::from(pb);
            dr * dr + dg * dg + db * db
        })
        .fold(f64::INFINITY, f64::min);
    (1.0 - (best_distance / 65_025.0)).clamp(0.0, 1.0)
}

#[cfg(test)]
mod tests {
    use super::{
        extract_audio_samples, extract_video_brightness, extract_video_colors,
        extract_video_timestamps, run_command,
    };
    use crate::sync_probe::analyze::test_support::{
        audio_samples_to_bytes, frame_json, thumbnail_rgb_video_bytes, thumbnail_video_bytes,
        with_fake_media_tools,
    };
    use std::process::Command;

    #[test]
    fn extract_video_timestamps_reads_fake_ffprobe_output() {
        let timestamps = vec![0.0, 0.5, 1.0];
        with_fake_media_tools(
            &frame_json(&timestamps),
            &[1, 2, 3],
            &[1, 0],
            |capture_path| {
                let parsed = extract_video_timestamps(capture_path).expect("video timestamps");
                assert_eq!(parsed, timestamps);
            },
        );
    }

    #[test]
    fn extract_video_timestamps_rejects_empty_and_invalid_outputs() {
        with_fake_media_tools(br#"{"frames":[]}"#, &[1], &[1, 0], |capture_path| {
            let error = extract_video_timestamps(capture_path).expect_err("empty frames fail");
            assert!(
                error
                    .to_string()
                    .contains("did not return any video frame timestamps")
            );
        });

        with_fake_media_tools(
            br#"{"frames":[{"best_effort_timestamp_time":"bad"}]}"#,
            &[1],
            &[1, 0],
            |capture_path| {
                let error =
                    extract_video_timestamps(capture_path).expect_err("invalid timestamp fails");
                assert!(error.to_string().contains("parsing frame timestamp"));
            },
        );
    }

    #[test]
    fn extract_video_brightness_reads_fake_ffmpeg_output() {
        let brightness = vec![5u8, 100, 250];
        with_fake_media_tools(
            br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#,
            &thumbnail_video_bytes(&brightness),
            &[1, 0],
            |capture_path| {
                let parsed = extract_video_brightness(capture_path).expect("video brightness");
                assert_eq!(parsed, brightness);
            },
        );
    }

    #[test]
    fn extract_video_brightness_rejects_empty_output() {
        with_fake_media_tools(
            br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#,
            &[],
            &[1, 0],
            |capture_path| {
                let error = extract_video_brightness(capture_path).expect_err("empty brightness");
                assert!(
                    error
                        .to_string()
                        .contains("did not emit any video brightness data")
                );
            },
        );
    }

    #[test]
    fn extract_video_brightness_uses_full_frame_thumbnail_average() {
        let brightness = vec![20u8, 45, 20];
        with_fake_media_tools(
            &frame_json(&[0.0, 0.1, 0.2]),
            &thumbnail_video_bytes(&brightness),
            &[1, 0],
            |capture_path| {
                let parsed = extract_video_brightness(capture_path).expect("video brightness");
                assert_eq!(parsed, brightness);
            },
        );
    }

    #[test]
    fn extract_video_brightness_rejects_truncated_frame_data() {
        with_fake_media_tools(&frame_json(&[0.0]), &[1, 2, 3], &[1, 0], |capture_path| {
            let error = extract_video_brightness(capture_path).expect_err("truncated frame bytes");
            assert!(error.to_string().contains("not divisible"));
        });
    }

    #[test]
    fn extract_video_colors_reads_fake_ffmpeg_output() {
        let colors = vec![(255, 45, 45), (0, 230, 118), (41, 121, 255)];
        with_fake_media_tools(
            &frame_json(&[0.0, 0.1, 0.2]),
            &thumbnail_rgb_video_bytes(&colors),
            &[1, 0],
            |capture_path| {
                let parsed = extract_video_colors(capture_path).expect("video colors");
                assert_eq!(parsed[0].r, 255);
                assert_eq!(parsed[1].g, 230);
                assert_eq!(parsed[2].b, 255);
            },
        );
    }

    #[test]
    fn extract_video_colors_tracks_small_flashing_screen_region() {
        const SIDE: usize = 64;
        let mut bytes = Vec::new();
        for color in [(24, 28, 32), (255, 45, 45), (24, 28, 32), (0, 230, 118)] {
            let mut frame = vec![34u8; SIDE * SIDE * 3];
            for y in 6..18 {
                for x in 40..54 {
                    let offset = (y * SIDE + x) * 3;
                    frame[offset] = color.0;
                    frame[offset + 1] = color.1;
                    frame[offset + 2] = color.2;
                }
            }
            bytes.extend_from_slice(&frame);
        }

        with_fake_media_tools(
            &frame_json(&[0.0, 0.1, 0.2, 0.3]),
            &bytes,
            &[1, 0],
            |capture_path| {
                let parsed = extract_video_colors(capture_path).expect("video colors");
                assert!(
                    parsed[1].r > 220 && parsed[1].g < 80,
                    "red pulse should dominate selected ROI: {:?}",
                    parsed[1]
                );
                assert!(
                    parsed[3].g > 190 && parsed[3].r < 60,
                    "green pulse should dominate selected ROI: {:?}",
                    parsed[3]
                );
            },
        );
    }

    #[test]
    fn extract_audio_samples_reads_fake_ffmpeg_output() {
        let samples = vec![1i16, -2, 32_000];
        with_fake_media_tools(
            br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#,
            &[1],
            &audio_samples_to_bytes(&samples),
            |capture_path| {
                let parsed = extract_audio_samples(capture_path).expect("audio samples");
                assert_eq!(parsed, samples);
            },
        );
    }

    #[test]
    fn extract_audio_samples_rejects_too_short_output() {
        with_fake_media_tools(
            br#"{"frames":[{"best_effort_timestamp_time":"0.0"}]}"#,
            &[1],
            &[7],
            |capture_path| {
                let error = extract_audio_samples(capture_path).expect_err("short audio");
                assert!(
                    error
                        .to_string()
                        .contains("did not emit enough audio data to analyze")
                );
            },
        );
    }

    #[test]
    fn run_command_reports_success_and_failure() {
        let output = run_command(
            Command::new("sh").arg("-c").arg("printf 'ok'"),
            "success command",
        )
        .expect("success output");
        assert_eq!(output, b"ok");

        let error = run_command(
            Command::new("sh")
                .arg("-c")
                .arg("printf 'boom' >&2; exit 7"),
            "failing command",
        )
        .expect_err("failing command should error");
        assert!(error.to_string().contains("failing command failed: boom"));
    }
}