//! Hardware H.264 encoder via Windows Media Foundation (Task 7).
//!
//! FIRST-CUT / COMPILE-VERIFIED ONLY. This encoder is wired end-to-end (init ->
//! feed -> drain -> emit `EncodedFrame{h264}`) and is selected only when the
//! agent advertised hardware support AND the server negotiated H.264. It has NOT
//! been validated on real hardware with live frames — that is plan Task 8. On
//! ANY initialization or per-frame failure it surfaces an error; the encoder
//! factory (`create_encoder_for`) downgrades to the raw+Zstd encoder so a
//! session never breaks because of H.264.
//!
//! Pipeline:
//!   BGRA capture --(color::bgra_to_nv12)--> NV12 sample --> MFT(H.264) --> H.264
//!   Annex-B/length-prefixed elementary stream --> proto EncodedFrame.
//!
//! Design notes:
//! - The MFT is enumerated with `MFTEnumEx(MFT_CATEGORY_VIDEO_ENCODER,
//!   MFT_ENUM_FLAG_HARDWARE, …, MFVideoFormat_H264)` (same probe as
//!   `capability`). We `ActivateObject` the first match.
//! - Input is configured as NV12, output as H.264, with frame size, frame rate
//!   and an average bitrate derived from `quality`.
//! - Both the SYNCHRONOUS MFT model (ProcessInput/ProcessOutput) and the
//!   ASYNCHRONOUS hardware-MFT model (METransformNeedInput / METransformHaveOutput
//!   events) exist. To keep this first cut bounded and predictable we DRAIN the
//!   MFT synchronously after each input and treat `MF_E_TRANSFORM_NEED_MORE_INPUT`
//!   as "no output this tick". A fully async event-driven loop is a Task-8
//!   refinement (documented below).
//! - `MFT_MESSAGE_SET_D3D_MANAGER` is intentionally NOT set — we feed CPU NV12
//!   buffers (software input samples), which every HW H.264 MFT accepts. D3D11
//!   zero-copy is a later optimization.

#![cfg(windows)]

use super::{EncodedFrame, Encoder};
use crate::capture::CapturedFrame;
use crate::encoder::color;
use crate::proto::{video_frame, EncodedFrame as ProtoEncodedFrame, VideoFrame};
use anyhow::{anyhow, Context, Result};
use windows::Win32::Media::MediaFoundation::{
    IMFActivate, IMFMediaType, IMFSample, IMFTransform, MFCreateMediaType, MFCreateMemoryBuffer,
    MFCreateSample, MFMediaType_Video, MFShutdown, MFStartup, MFTEnumEx, MFVideoFormat_H264,
    MFVideoFormat_NV12, MFVideoInterlace_Progressive, MFSTARTUP_LITE, MFT_CATEGORY_VIDEO_ENCODER,
    MFT_ENUM_FLAG_HARDWARE, MFT_ENUM_FLAG_SORTANDFILTER, MFT_ENUM_FLAG_TRANSCODE_ONLY,
    MFT_MESSAGE_COMMAND_FLUSH, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING,
    MFT_MESSAGE_NOTIFY_END_OF_STREAM, MFT_MESSAGE_NOTIFY_END_STREAMING,
    MFT_MESSAGE_NOTIFY_START_OF_STREAM, MFT_OUTPUT_DATA_BUFFER, MFT_OUTPUT_STREAM_INFO,
    MFT_REGISTER_TYPE_INFO, MF_E_TRANSFORM_NEED_MORE_INPUT, MF_MT_AVG_BITRATE, MF_MT_FRAME_RATE,
    MF_MT_FRAME_SIZE, MF_MT_INTERLACE_MODE, MF_MT_MAJOR_TYPE, MF_MT_PIXEL_ASPECT_RATIO,
    MF_MT_SUBTYPE,
};

/// Encoder-internal state, created once and reused per frame.
pub struct H264Encoder {
    /// The activated encoder transform.
    transform: IMFTransform,
    /// Configured frame dimensions; a capture-size change forces re-init.
    width: u32,
    height: u32,
    /// Quality (1-100) used to derive the bitrate; kept for re-init on resize.
    quality: u32,
    /// Frame sequence counter (mirrors RawEncoder).
    sequence: u32,
    /// Force the next frame to request a keyframe.
    force_keyframe: bool,
    /// Whether `MFT_MESSAGE_NOTIFY_BEGIN_STREAMING` was sent.
    streaming: bool,
    /// Reusable NV12 staging buffer (resized on dimension change).
    nv12: Vec<u8>,
    /// Input/output stream identifiers (most encoders use 0/0).
    input_stream_id: u32,
    output_stream_id: u32,
    /// True if MF was started by THIS encoder and must be shut down on drop.
    mf_started: bool,
}

// IMFTransform is a COM interface; it is not auto-Send. We only ever touch the
// encoder from the single capture/encode thread (the session owns it behind a
// &mut), so it is safe to move between threads as long as it is not shared.
unsafe impl Send for H264Encoder {}

impl H264Encoder {
    /// Construct and fully initialize a hardware H.264 encoder. Returns an error
    /// (so the factory can fall back to raw) if MF is unavailable, no hardware
    /// encoder exists, or media-type negotiation fails. A default frame size is
    /// used and re-negotiated on the first frame if the real capture differs.
    pub fn new(quality: u32) -> Result<Self> {
        // 1920x1080 default; re-init on the first frame if the capture differs.
        Self::with_dimensions(quality, 1920, 1080)
    }

    fn with_dimensions(quality: u32, width: u32, height: u32) -> Result<Self> {
        unsafe {
            // MF must be initialized on this thread. MFSTARTUP_LITE avoids the
            // sockets/network stack we don't need.
            MFStartup(mf_version(), MFSTARTUP_LITE).context("MFStartup failed")?;
            let mf_started = true;

            let transform = match Self::activate_hw_encoder() {
                Ok(t) => t,
                Err(e) => {
                    // Balance the MFStartup we just did before bailing.
                    let _ = MFShutdown();
                    return Err(e);
                }
            };

            let mut enc = Self {
                transform,
                width,
                height,
                quality,
                sequence: 0,
                force_keyframe: true,
                streaming: false,
                nv12: Vec::new(),
                input_stream_id: 0,
                output_stream_id: 0,
                mf_started,
            };

            // `enc`'s Drop will shut MF down and release the transform on error.
            enc.configure_media_types()?;

            Ok(enc)
        }
    }

    /// Enumerate hardware H.264 encoder MFTs and activate the first one.
    unsafe fn activate_hw_encoder() -> Result<IMFTransform> {
        let output_type = MFT_REGISTER_TYPE_INFO {
            guidMajorType: MFMediaType_Video,
            guidSubtype: MFVideoFormat_H264,
        };

        let mut activate_ptr: *mut Option<IMFActivate> = std::ptr::null_mut();
        let mut count: u32 = 0;

        MFTEnumEx(
            MFT_CATEGORY_VIDEO_ENCODER,
            MFT_ENUM_FLAG_HARDWARE | MFT_ENUM_FLAG_SORTANDFILTER | MFT_ENUM_FLAG_TRANSCODE_ONLY,
            None,
            Some(&output_type as *const _),
            &mut activate_ptr,
            &mut count,
        )
        .context("MFTEnumEx (hardware H.264) failed")?;

        if count == 0 || activate_ptr.is_null() {
            if !activate_ptr.is_null() {
                windows::Win32::System::Com::CoTaskMemFree(Some(activate_ptr as *const _));
            }
            return Err(anyhow!("no hardware H.264 encoder MFT available"));
        }

        let slice = std::slice::from_raw_parts_mut(activate_ptr, count as usize);

        // Activate the first usable encoder; release every IMFActivate.
        let mut chosen: Option<IMFTransform> = None;
        for entry in slice.iter_mut() {
            if chosen.is_none() {
                if let Some(activate) = entry.as_ref() {
                    if let Ok(transform) = activate.ActivateObject::<IMFTransform>() {
                        chosen = Some(transform);
                    }
                }
            }
            // Release this IMFActivate reference.
            entry.take();
        }
        windows::Win32::System::Com::CoTaskMemFree(Some(activate_ptr as *const _));

        chosen.ok_or_else(|| anyhow!("failed to activate any hardware H.264 encoder MFT"))
    }

    /// Set the H.264 output type and NV12 input type, in the order MF requires
    /// (output type FIRST for encoders, then the matching input type).
    unsafe fn configure_media_types(&mut self) -> Result<()> {
        // Discover the real stream identifiers (most encoders report 0/0).
        let mut input_ids = [0u32; 1];
        let mut output_ids = [0u32; 1];
        // GetStreamIDs may return E_NOTIMPL meaning "ids are 0..n-1"; ignore err.
        let _ = self.transform.GetStreamIDs(&mut input_ids, &mut output_ids);
        // If GetStreamIDs populated nonzero ids use them, else default 0/0.
        if input_ids[0] != 0 {
            self.input_stream_id = input_ids[0];
        }
        if output_ids[0] != 0 {
            self.output_stream_id = output_ids[0];
        }

        let fps_num = 30u32;
        let fps_den = 1u32;
        let bitrate = quality_to_bitrate(self.quality, self.width, self.height);

        // ---- OUTPUT (H.264) ----
        let out_type: IMFMediaType = MFCreateMediaType().context("MFCreateMediaType(out)")?;
        out_type.SetGUID(&MF_MT_MAJOR_TYPE, &MFMediaType_Video)?;
        out_type.SetGUID(&MF_MT_SUBTYPE, &MFVideoFormat_H264)?;
        out_type.SetUINT32(&MF_MT_AVG_BITRATE, bitrate)?;
        set_attr_size(&out_type, &MF_MT_FRAME_SIZE, self.width, self.height)?;
        set_attr_ratio(&out_type, &MF_MT_FRAME_RATE, fps_num, fps_den)?;
        set_attr_ratio(&out_type, &MF_MT_PIXEL_ASPECT_RATIO, 1, 1)?;
        out_type.SetUINT32(&MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive.0 as u32)?;
        self.transform
            .SetOutputType(self.output_stream_id, &out_type, 0)
            .context("SetOutputType(H264)")?;

        // ---- INPUT (NV12) ----
        let in_type: IMFMediaType = MFCreateMediaType().context("MFCreateMediaType(in)")?;
        in_type.SetGUID(&MF_MT_MAJOR_TYPE, &MFMediaType_Video)?;
        in_type.SetGUID(&MF_MT_SUBTYPE, &MFVideoFormat_NV12)?;
        set_attr_size(&in_type, &MF_MT_FRAME_SIZE, self.width, self.height)?;
        set_attr_ratio(&in_type, &MF_MT_FRAME_RATE, fps_num, fps_den)?;
        set_attr_ratio(&in_type, &MF_MT_PIXEL_ASPECT_RATIO, 1, 1)?;
        in_type.SetUINT32(&MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive.0 as u32)?;
        self.transform
            .SetInputType(self.input_stream_id, &in_type, 0)
            .context("SetInputType(NV12)")?;

        Ok(())
    }

    /// Begin streaming if not already started (idempotent).
    unsafe fn ensure_streaming(&mut self) -> Result<()> {
        if !self.streaming {
            self.transform
                .ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0)
                .context("NOTIFY_BEGIN_STREAMING")?;
            self.transform
                .ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0)
                .context("NOTIFY_START_OF_STREAM")?;
            self.streaming = true;
        }
        Ok(())
    }

    /// Re-initialize the encoder for a new frame size (capture resolution change).
    unsafe fn reinit_for_size(&mut self, width: u32, height: u32) -> Result<()> {
        if self.streaming {
            let _ = self.transform.ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, 0);
            let _ = self
                .transform
                .ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, 0);
            let _ = self
                .transform
                .ProcessMessage(MFT_MESSAGE_NOTIFY_END_STREAMING, 0);
            self.streaming = false;
        }
        self.width = width;
        self.height = height;
        self.force_keyframe = true;
        self.configure_media_types()
    }

    /// Wrap an NV12 byte buffer into an `IMFSample` with the given timestamp.
    /// A free associated fn (does not borrow `self`) so the caller can pass
    /// `&self.nv12` without a clone while `self` is mutably borrowed elsewhere.
    unsafe fn make_input_sample(nv12: &[u8], pts_100ns: i64) -> Result<IMFSample> {
        let sample: IMFSample = MFCreateSample().context("MFCreateSample")?;
        let buffer = MFCreateMemoryBuffer(nv12.len() as u32).context("MFCreateMemoryBuffer")?;

        // Lock, copy NV12 in, set current length, unlock.
        let mut data_ptr: *mut u8 = std::ptr::null_mut();
        let mut max_len: u32 = 0;
        buffer
            .Lock(&mut data_ptr, Some(&mut max_len), None)
            .context("IMFMediaBuffer::Lock")?;
        if (max_len as usize) < nv12.len() || data_ptr.is_null() {
            let _ = buffer.Unlock();
            return Err(anyhow!("MF buffer too small for NV12 frame"));
        }
        std::ptr::copy_nonoverlapping(nv12.as_ptr(), data_ptr, nv12.len());
        buffer.SetCurrentLength(nv12.len() as u32)?;
        buffer.Unlock()?;

        sample.AddBuffer(&buffer)?;
        sample.SetSampleTime(pts_100ns)?;
        // 33.367ms per frame at ~30fps, in 100ns units.
        sample.SetSampleDuration(333_667)?;
        Ok(sample)
    }

    /// Drain one available output sample, if any. Returns the encoded bytes and
    /// whether the MFT flagged it a keyframe (clean point). `Ok(None)` means the
    /// MFT needs more input before it can produce output this tick.
    unsafe fn drain_one_output(&mut self) -> Result<Option<(Vec<u8>, bool)>> {
        let stream_info: MFT_OUTPUT_STREAM_INFO = self
            .transform
            .GetOutputStreamInfo(self.output_stream_id)
            .context("GetOutputStreamInfo")?;

        // If the MFT does not allocate its own output samples we must provide one.
        const MFT_OUTPUT_STREAM_PROVIDES_SAMPLES: u32 = 0x100;
        let mft_provides = stream_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES != 0;

        let mut out_buffer = MFT_OUTPUT_DATA_BUFFER {
            dwStreamID: self.output_stream_id,
            ..Default::default()
        };

        if !mft_provides {
            let alloc_size = stream_info.cbSize.max(1);
            let sample: IMFSample = MFCreateSample().context("MFCreateSample(out)")?;
            let buffer = MFCreateMemoryBuffer(alloc_size).context("MFCreateMemoryBuffer(out)")?;
            sample.AddBuffer(&buffer)?;
            out_buffer.pSample = std::mem::ManuallyDrop::new(Some(sample));
        }

        let mut status: u32 = 0;
        let mut bufs = [out_buffer];
        let hr = self.transform.ProcessOutput(0, &mut bufs, &mut status);

        // Take ownership of whatever sample is now in the buffer (ours or MFT's).
        let produced = std::mem::ManuallyDrop::take(&mut bufs[0].pSample);

        match hr {
            Ok(()) => {
                let Some(sample) = produced else {
                    return Ok(None);
                };
                let bytes = sample_to_vec(&sample)?;
                let keyframe = sample_is_keyframe(&sample);
                Ok(Some((bytes, keyframe)))
            }
            Err(e) if e.code() == MF_E_TRANSFORM_NEED_MORE_INPUT => Ok(None),
            Err(e) => Err(anyhow!("ProcessOutput failed: {e:#}")),
        }
    }
}

impl Encoder for H264Encoder {
    fn encode(&mut self, frame: &CapturedFrame) -> Result<EncodedFrame> {
        self.sequence = self.sequence.wrapping_add(1);

        // H.264 4:2:0 needs even dimensions. Reject odd captures up front so we
        // surface a clean error (the factory already fell back to raw if HW was
        // missing; a per-frame error here lets the session log + continue).
        if !frame.width.is_multiple_of(2) || !frame.height.is_multiple_of(2) {
            return Err(anyhow!(
                "H.264 requires even dimensions, got {}x{}",
                frame.width,
                frame.height
            ));
        }

        unsafe {
            // Re-init on a resolution change.
            if frame.width != self.width || frame.height != self.height {
                self.reinit_for_size(frame.width, frame.height)
                    .context("H.264 re-init for new frame size")?;
            }

            self.ensure_streaming()?;

            // BGRA -> NV12 into the reusable staging buffer.
            let need = color::nv12_size(frame.width, frame.height);
            if self.nv12.len() != need {
                self.nv12.resize(need, 0);
            }
            color::bgra_to_nv12(&frame.data, frame.width, frame.height, &mut self.nv12)
                .map_err(|e| anyhow!("BGRA->NV12 failed: {e}"))?;

            // PTS in 100ns units derived from the frame's capture instant.
            let pts_100ns = (frame.timestamp.elapsed().as_nanos() / 100) as i64;
            let sample = Self::make_input_sample(&self.nv12, pts_100ns)?;

            // Feed the encoder. NEED_MORE_INPUT is normal back-pressure handling;
            // for the synchronous first cut we only push one frame per tick.
            match self
                .transform
                .ProcessInput(self.input_stream_id, &sample, 0)
            {
                Ok(()) => {}
                Err(e) if e.code() == MF_E_TRANSFORM_NEED_MORE_INPUT => {}
                Err(e) => return Err(anyhow!("ProcessInput failed: {e:#}")),
            }

            // Drain whatever output is ready.
            let Some((data, mft_keyframe)) = self.drain_one_output()? else {
                // No compressed output yet (encoder latency / GOP buffering).
                // Emit an empty frame so the session skips sending this tick.
                return Ok(EncodedFrame {
                    frame: VideoFrame::default(),
                    size: 0,
                    is_keyframe: false,
                });
            };

            let is_keyframe = mft_keyframe || self.force_keyframe;
            self.force_keyframe = false;

            let size = data.len();
            let encoded = ProtoEncodedFrame {
                data,
                keyframe: is_keyframe,
                pts: pts_100ns,
                dts: pts_100ns,
            };

            Ok(EncodedFrame {
                frame: VideoFrame {
                    timestamp: frame.timestamp.elapsed().as_millis() as i64,
                    display_id: frame.display_id as i32,
                    sequence: self.sequence as i32,
                    encoding: Some(video_frame::Encoding::H264(encoded)),
                },
                size,
                is_keyframe,
            })
        }
    }

    fn request_keyframe(&mut self) {
        // A precise force-IDR uses the MFT codec API
        // (CODECAPI_AVEncVideoForceKeyFrame); for the first cut we flag the next
        // emitted frame as a keyframe so the viewer treats it as a clean point.
        self.force_keyframe = true;
    }

    fn name(&self) -> &str {
        "h264-mediafoundation"
    }
}

impl Drop for H264Encoder {
    fn drop(&mut self) {
        unsafe {
            if self.streaming {
                let _ = self
                    .transform
                    .ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, 0);
                let _ = self
                    .transform
                    .ProcessMessage(MFT_MESSAGE_NOTIFY_END_STREAMING, 0);
            }
            // The IMFTransform releases when `self.transform` drops.
            if self.mf_started {
                let _ = MFShutdown();
            }
        }
    }
}

/// MF version word expected by `MFStartup` (MF_VERSION = (MF_API_VERSION<<16)|MF_SDK_VERSION).
fn mf_version() -> u32 {
    // MF_SDK_VERSION = 0x0002, MF_API_VERSION = 0x0070 -> 0x00020070.
    0x0002_0070
}

/// Derive a target average bitrate (bps) from the 1-100 quality knob and the
/// frame area. Tuned conservatively for desktop content (mostly static).
fn quality_to_bitrate(quality: u32, width: u32, height: u32) -> u32 {
    let q = quality.clamp(1, 100) as u64;
    let pixels = (width as u64) * (height as u64);
    // Base ~0.06 bits/pixel/frame at 30fps for q=100, scaled by quality.
    // bps = pixels * 30 * bpp; bpp scales 0.01..0.10 with quality.
    let bpp_milli = 10 + (q * 90 / 100); // 0.010 .. 0.100 in milli-bits
    let bps = pixels.saturating_mul(30).saturating_mul(bpp_milli) / 1000;
    bps.clamp(500_000, 50_000_000) as u32
}

/// Pack (width, height) into the 64-bit MF_MT_FRAME_SIZE attribute.
#[cfg(windows)]
unsafe fn set_attr_size(
    media_type: &IMFMediaType,
    key: &windows::core::GUID,
    width: u32,
    height: u32,
) -> Result<()> {
    let packed = ((width as u64) << 32) | (height as u64);
    media_type.SetUINT64(key, packed)?;
    Ok(())
}

/// Pack (numerator, denominator) into a 64-bit ratio MF attribute.
#[cfg(windows)]
unsafe fn set_attr_ratio(
    media_type: &IMFMediaType,
    key: &windows::core::GUID,
    num: u32,
    den: u32,
) -> Result<()> {
    let packed = ((num as u64) << 32) | (den as u64);
    media_type.SetUINT64(key, packed)?;
    Ok(())
}

/// Copy all bytes out of an `IMFSample` (single contiguous buffer) into a Vec.
#[cfg(windows)]
unsafe fn sample_to_vec(sample: &IMFSample) -> Result<Vec<u8>> {
    let buffer = sample
        .ConvertToContiguousBuffer()
        .context("ConvertToContiguousBuffer")?;
    let mut ptr: *mut u8 = std::ptr::null_mut();
    let mut len: u32 = 0;
    buffer
        .Lock(&mut ptr, None, Some(&mut len))
        .context("output buffer Lock")?;
    let out = if ptr.is_null() || len == 0 {
        Vec::new()
    } else {
        std::slice::from_raw_parts(ptr, len as usize).to_vec()
    };
    let _ = buffer.Unlock();
    Ok(out)
}

/// Read the "clean point" (keyframe) flag off a sample, if present.
#[cfg(windows)]
unsafe fn sample_is_keyframe(sample: &IMFSample) -> bool {
    use windows::Win32::Media::MediaFoundation::MFSampleExtension_CleanPoint;
    sample
        .GetUINT32(&MFSampleExtension_CleanPoint)
        .map(|v| v != 0)
        .unwrap_or(false)
}