feat(agent,server): v2 secure-session-core Task 7 - HW H.264 + negotiated raw fallback

SPEC-002 Phase 1 Task 7 (the last), code-reviewed APPROVED, locally verified (cargo fmt + clippy -D warnings exit 0 + cargo test --workspace 89 pass + build). - Encoder trait + factory: RawEncoder (salvaged, UNCHANGED) and H264Encoder, selected by negotiation; factory falls back to raw on H.264 init failure. - Negotiation: agent advertises supports_h264 (MFTEnumEx HW probe, cached) in AgentStatus; server picks the codec via select_video_codec(supports, prefer) and stamps StartStream.video_codec; agent re-guards on local HW. Policy constant DEFAULT_PREFER_H264 = false, so RAW is negotiated for every session today - H.264 stays dormant until live hardware validation (Task 8). - MF H.264 encoder (h264.rs, FIRST-CUT / compile-verified-only): HW encoder MFT, BGRA->NV12 (color.rs, unit-tested), sync drain, fall-back-to-raw on any failure. - Viewer H.264 decoder (decoder.rs, FIRST-CUT): MF decoder on a dedicated COM thread; drops+logs on failure, raw render path untouched. - proto additive: VideoCodec enum, StartStream.video_codec=3, SessionResponse.video_codec=5, AgentStatus.supports_h264=11. - Raw+Zstd path byte-for-byte unchanged; remains the guaranteed default/fallback. Review confirmed unsafe impl Send for H264Encoder is sound (single-owned &mut on the block_on thread; session future never spawned) and every MF failure degrades to raw. H.264 is NOT claimed functional - compile/clippy/build-verified only; live validation + force-IDR + the no-spawn-invariant doc are Task 8 go-live gates. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 10:35:04 -07:00
parent bb73ba667f
commit f9bdecbfdb
12 changed files with 1885 additions and 23 deletions
--- a/agent/src/encoder/h264.rs
+++ b/agent/src/encoder/h264.rs
@@ -0,0 +1,515 @@
+//! Hardware H.264 encoder via Windows Media Foundation (Task 7).
+//!
+//! FIRST-CUT / COMPILE-VERIFIED ONLY. This encoder is wired end-to-end (init ->
+//! feed -> drain -> emit `EncodedFrame{h264}`) and is selected only when the
+//! agent advertised hardware support AND the server negotiated H.264. It has NOT
+//! been validated on real hardware with live frames — that is plan Task 8. On
+//! ANY initialization or per-frame failure it surfaces an error; the encoder
+//! factory (`create_encoder_for`) downgrades to the raw+Zstd encoder so a
+//! session never breaks because of H.264.
+//!
+//! Pipeline:
+//!   BGRA capture --(color::bgra_to_nv12)--> NV12 sample --> MFT(H.264) --> H.264
+//!   Annex-B/length-prefixed elementary stream --> proto EncodedFrame.
+//!
+//! Design notes:
+//! - The MFT is enumerated with `MFTEnumEx(MFT_CATEGORY_VIDEO_ENCODER,
+//!   MFT_ENUM_FLAG_HARDWARE, …, MFVideoFormat_H264)` (same probe as
+//!   `capability`). We `ActivateObject` the first match.
+//! - Input is configured as NV12, output as H.264, with frame size, frame rate
+//!   and an average bitrate derived from `quality`.
+//! - Both the SYNCHRONOUS MFT model (ProcessInput/ProcessOutput) and the
+//!   ASYNCHRONOUS hardware-MFT model (METransformNeedInput / METransformHaveOutput
+//!   events) exist. To keep this first cut bounded and predictable we DRAIN the
+//!   MFT synchronously after each input and treat `MF_E_TRANSFORM_NEED_MORE_INPUT`
+//!   as "no output this tick". A fully async event-driven loop is a Task-8
+//!   refinement (documented below).
+//! - `MFT_MESSAGE_SET_D3D_MANAGER` is intentionally NOT set — we feed CPU NV12
+//!   buffers (software input samples), which every HW H.264 MFT accepts. D3D11
+//!   zero-copy is a later optimization.
+
+#![cfg(windows)]
+
+use super::{EncodedFrame, Encoder};
+use crate::capture::CapturedFrame;
+use crate::encoder::color;
+use crate::proto::{video_frame, EncodedFrame as ProtoEncodedFrame, VideoFrame};
+use anyhow::{anyhow, Context, Result};
+use windows::Win32::Media::MediaFoundation::{
+    IMFActivate, IMFMediaType, IMFSample, IMFTransform, MFCreateMediaType, MFCreateMemoryBuffer,
+    MFCreateSample, MFMediaType_Video, MFShutdown, MFStartup, MFTEnumEx, MFVideoFormat_H264,
+    MFVideoFormat_NV12, MFVideoInterlace_Progressive, MFSTARTUP_LITE, MFT_CATEGORY_VIDEO_ENCODER,
+    MFT_ENUM_FLAG_HARDWARE, MFT_ENUM_FLAG_SORTANDFILTER, MFT_ENUM_FLAG_TRANSCODE_ONLY,
+    MFT_MESSAGE_COMMAND_FLUSH, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING,
+    MFT_MESSAGE_NOTIFY_END_OF_STREAM, MFT_MESSAGE_NOTIFY_END_STREAMING,
+    MFT_MESSAGE_NOTIFY_START_OF_STREAM, MFT_OUTPUT_DATA_BUFFER, MFT_OUTPUT_STREAM_INFO,
+    MFT_REGISTER_TYPE_INFO, MF_E_TRANSFORM_NEED_MORE_INPUT, MF_MT_AVG_BITRATE, MF_MT_FRAME_RATE,
+    MF_MT_FRAME_SIZE, MF_MT_INTERLACE_MODE, MF_MT_MAJOR_TYPE, MF_MT_PIXEL_ASPECT_RATIO,
+    MF_MT_SUBTYPE,
+};
+
+/// Encoder-internal state, created once and reused per frame.
+pub struct H264Encoder {
+    /// The activated encoder transform.
+    transform: IMFTransform,
+    /// Configured frame dimensions; a capture-size change forces re-init.
+    width: u32,
+    height: u32,
+    /// Quality (1-100) used to derive the bitrate; kept for re-init on resize.
+    quality: u32,
+    /// Frame sequence counter (mirrors RawEncoder).
+    sequence: u32,
+    /// Force the next frame to request a keyframe.
+    force_keyframe: bool,
+    /// Whether `MFT_MESSAGE_NOTIFY_BEGIN_STREAMING` was sent.
+    streaming: bool,
+    /// Reusable NV12 staging buffer (resized on dimension change).
+    nv12: Vec<u8>,
+    /// Input/output stream identifiers (most encoders use 0/0).
+    input_stream_id: u32,
+    output_stream_id: u32,
+    /// True if MF was started by THIS encoder and must be shut down on drop.
+    mf_started: bool,
+}
+
+// IMFTransform is a COM interface; it is not auto-Send. We only ever touch the
+// encoder from the single capture/encode thread (the session owns it behind a
+// &mut), so it is safe to move between threads as long as it is not shared.
+unsafe impl Send for H264Encoder {}
+
+impl H264Encoder {
+    /// Construct and fully initialize a hardware H.264 encoder. Returns an error
+    /// (so the factory can fall back to raw) if MF is unavailable, no hardware
+    /// encoder exists, or media-type negotiation fails. A default frame size is
+    /// used and re-negotiated on the first frame if the real capture differs.
+    pub fn new(quality: u32) -> Result<Self> {
+        // 1920x1080 default; re-init on the first frame if the capture differs.
+        Self::with_dimensions(quality, 1920, 1080)
+    }
+
+    fn with_dimensions(quality: u32, width: u32, height: u32) -> Result<Self> {
+        unsafe {
+            // MF must be initialized on this thread. MFSTARTUP_LITE avoids the
+            // sockets/network stack we don't need.
+            MFStartup(mf_version(), MFSTARTUP_LITE).context("MFStartup failed")?;
+            let mf_started = true;
+
+            let transform = match Self::activate_hw_encoder() {
+                Ok(t) => t,
+                Err(e) => {
+                    // Balance the MFStartup we just did before bailing.
+                    let _ = MFShutdown();
+                    return Err(e);
+                }
+            };
+
+            let mut enc = Self {
+                transform,
+                width,
+                height,
+                quality,
+                sequence: 0,
+                force_keyframe: true,
+                streaming: false,
+                nv12: Vec::new(),
+                input_stream_id: 0,
+                output_stream_id: 0,
+                mf_started,
+            };
+
+            // `enc`'s Drop will shut MF down and release the transform on error.
+            enc.configure_media_types()?;
+
+            Ok(enc)
+        }
+    }
+
+    /// Enumerate hardware H.264 encoder MFTs and activate the first one.
+    unsafe fn activate_hw_encoder() -> Result<IMFTransform> {
+        let output_type = MFT_REGISTER_TYPE_INFO {
+            guidMajorType: MFMediaType_Video,
+            guidSubtype: MFVideoFormat_H264,
+        };
+
+        let mut activate_ptr: *mut Option<IMFActivate> = std::ptr::null_mut();
+        let mut count: u32 = 0;
+
+        MFTEnumEx(
+            MFT_CATEGORY_VIDEO_ENCODER,
+            MFT_ENUM_FLAG_HARDWARE | MFT_ENUM_FLAG_SORTANDFILTER | MFT_ENUM_FLAG_TRANSCODE_ONLY,
+            None,
+            Some(&output_type as *const _),
+            &mut activate_ptr,
+            &mut count,
+        )
+        .context("MFTEnumEx (hardware H.264) failed")?;
+
+        if count == 0 || activate_ptr.is_null() {
+            if !activate_ptr.is_null() {
+                windows::Win32::System::Com::CoTaskMemFree(Some(activate_ptr as *const _));
+            }
+            return Err(anyhow!("no hardware H.264 encoder MFT available"));
+        }
+
+        let slice = std::slice::from_raw_parts_mut(activate_ptr, count as usize);
+
+        // Activate the first usable encoder; release every IMFActivate.
+        let mut chosen: Option<IMFTransform> = None;
+        for entry in slice.iter_mut() {
+            if chosen.is_none() {
+                if let Some(activate) = entry.as_ref() {
+                    if let Ok(transform) = activate.ActivateObject::<IMFTransform>() {
+                        chosen = Some(transform);
+                    }
+                }
+            }
+            // Release this IMFActivate reference.
+            entry.take();
+        }
+        windows::Win32::System::Com::CoTaskMemFree(Some(activate_ptr as *const _));
+
+        chosen.ok_or_else(|| anyhow!("failed to activate any hardware H.264 encoder MFT"))
+    }
+
+    /// Set the H.264 output type and NV12 input type, in the order MF requires
+    /// (output type FIRST for encoders, then the matching input type).
+    unsafe fn configure_media_types(&mut self) -> Result<()> {
+        // Discover the real stream identifiers (most encoders report 0/0).
+        let mut input_ids = [0u32; 1];
+        let mut output_ids = [0u32; 1];
+        // GetStreamIDs may return E_NOTIMPL meaning "ids are 0..n-1"; ignore err.
+        let _ = self.transform.GetStreamIDs(&mut input_ids, &mut output_ids);
+        // If GetStreamIDs populated nonzero ids use them, else default 0/0.
+        if input_ids[0] != 0 {
+            self.input_stream_id = input_ids[0];
+        }
+        if output_ids[0] != 0 {
+            self.output_stream_id = output_ids[0];
+        }
+
+        let fps_num = 30u32;
+        let fps_den = 1u32;
+        let bitrate = quality_to_bitrate(self.quality, self.width, self.height);
+
+        // ---- OUTPUT (H.264) ----
+        let out_type: IMFMediaType = MFCreateMediaType().context("MFCreateMediaType(out)")?;
+        out_type.SetGUID(&MF_MT_MAJOR_TYPE, &MFMediaType_Video)?;
+        out_type.SetGUID(&MF_MT_SUBTYPE, &MFVideoFormat_H264)?;
+        out_type.SetUINT32(&MF_MT_AVG_BITRATE, bitrate)?;
+        set_attr_size(&out_type, &MF_MT_FRAME_SIZE, self.width, self.height)?;
+        set_attr_ratio(&out_type, &MF_MT_FRAME_RATE, fps_num, fps_den)?;
+        set_attr_ratio(&out_type, &MF_MT_PIXEL_ASPECT_RATIO, 1, 1)?;
+        out_type.SetUINT32(&MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive.0 as u32)?;
+        self.transform
+            .SetOutputType(self.output_stream_id, &out_type, 0)
+            .context("SetOutputType(H264)")?;
+
+        // ---- INPUT (NV12) ----
+        let in_type: IMFMediaType = MFCreateMediaType().context("MFCreateMediaType(in)")?;
+        in_type.SetGUID(&MF_MT_MAJOR_TYPE, &MFMediaType_Video)?;
+        in_type.SetGUID(&MF_MT_SUBTYPE, &MFVideoFormat_NV12)?;
+        set_attr_size(&in_type, &MF_MT_FRAME_SIZE, self.width, self.height)?;
+        set_attr_ratio(&in_type, &MF_MT_FRAME_RATE, fps_num, fps_den)?;
+        set_attr_ratio(&in_type, &MF_MT_PIXEL_ASPECT_RATIO, 1, 1)?;
+        in_type.SetUINT32(&MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive.0 as u32)?;
+        self.transform
+            .SetInputType(self.input_stream_id, &in_type, 0)
+            .context("SetInputType(NV12)")?;
+
+        Ok(())
+    }
+
+    /// Begin streaming if not already started (idempotent).
+    unsafe fn ensure_streaming(&mut self) -> Result<()> {
+        if !self.streaming {
+            self.transform
+                .ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0)
+                .context("NOTIFY_BEGIN_STREAMING")?;
+            self.transform
+                .ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0)
+                .context("NOTIFY_START_OF_STREAM")?;
+            self.streaming = true;
+        }
+        Ok(())
+    }
+
+    /// Re-initialize the encoder for a new frame size (capture resolution change).
+    unsafe fn reinit_for_size(&mut self, width: u32, height: u32) -> Result<()> {
+        if self.streaming {
+            let _ = self.transform.ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, 0);
+            let _ = self
+                .transform
+                .ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, 0);
+            let _ = self
+                .transform
+                .ProcessMessage(MFT_MESSAGE_NOTIFY_END_STREAMING, 0);
+            self.streaming = false;
+        }
+        self.width = width;
+        self.height = height;
+        self.force_keyframe = true;
+        self.configure_media_types()
+    }
+
+    /// Wrap an NV12 byte buffer into an `IMFSample` with the given timestamp.
+    /// A free associated fn (does not borrow `self`) so the caller can pass
+    /// `&self.nv12` without a clone while `self` is mutably borrowed elsewhere.
+    unsafe fn make_input_sample(nv12: &[u8], pts_100ns: i64) -> Result<IMFSample> {
+        let sample: IMFSample = MFCreateSample().context("MFCreateSample")?;
+        let buffer = MFCreateMemoryBuffer(nv12.len() as u32).context("MFCreateMemoryBuffer")?;
+
+        // Lock, copy NV12 in, set current length, unlock.
+        let mut data_ptr: *mut u8 = std::ptr::null_mut();
+        let mut max_len: u32 = 0;
+        buffer
+            .Lock(&mut data_ptr, Some(&mut max_len), None)
+            .context("IMFMediaBuffer::Lock")?;
+        if (max_len as usize) < nv12.len() || data_ptr.is_null() {
+            let _ = buffer.Unlock();
+            return Err(anyhow!("MF buffer too small for NV12 frame"));
+        }
+        std::ptr::copy_nonoverlapping(nv12.as_ptr(), data_ptr, nv12.len());
+        buffer.SetCurrentLength(nv12.len() as u32)?;
+        buffer.Unlock()?;
+
+        sample.AddBuffer(&buffer)?;
+        sample.SetSampleTime(pts_100ns)?;
+        // 33.367ms per frame at ~30fps, in 100ns units.
+        sample.SetSampleDuration(333_667)?;
+        Ok(sample)
+    }
+
+    /// Drain one available output sample, if any. Returns the encoded bytes and
+    /// whether the MFT flagged it a keyframe (clean point). `Ok(None)` means the
+    /// MFT needs more input before it can produce output this tick.
+    unsafe fn drain_one_output(&mut self) -> Result<Option<(Vec<u8>, bool)>> {
+        let stream_info: MFT_OUTPUT_STREAM_INFO = self
+            .transform
+            .GetOutputStreamInfo(self.output_stream_id)
+            .context("GetOutputStreamInfo")?;
+
+        // If the MFT does not allocate its own output samples we must provide one.
+        const MFT_OUTPUT_STREAM_PROVIDES_SAMPLES: u32 = 0x100;
+        let mft_provides = stream_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES != 0;
+
+        let mut out_buffer = MFT_OUTPUT_DATA_BUFFER {
+            dwStreamID: self.output_stream_id,
+            ..Default::default()
+        };
+
+        if !mft_provides {
+            let alloc_size = stream_info.cbSize.max(1);
+            let sample: IMFSample = MFCreateSample().context("MFCreateSample(out)")?;
+            let buffer = MFCreateMemoryBuffer(alloc_size).context("MFCreateMemoryBuffer(out)")?;
+            sample.AddBuffer(&buffer)?;
+            out_buffer.pSample = std::mem::ManuallyDrop::new(Some(sample));
+        }
+
+        let mut status: u32 = 0;
+        let mut bufs = [out_buffer];
+        let hr = self.transform.ProcessOutput(0, &mut bufs, &mut status);
+
+        // Take ownership of whatever sample is now in the buffer (ours or MFT's).
+        let produced = std::mem::ManuallyDrop::take(&mut bufs[0].pSample);
+
+        match hr {
+            Ok(()) => {
+                let Some(sample) = produced else {
+                    return Ok(None);
+                };
+                let bytes = sample_to_vec(&sample)?;
+                let keyframe = sample_is_keyframe(&sample);
+                Ok(Some((bytes, keyframe)))
+            }
+            Err(e) if e.code() == MF_E_TRANSFORM_NEED_MORE_INPUT => Ok(None),
+            Err(e) => Err(anyhow!("ProcessOutput failed: {e:#}")),
+        }
+    }
+}
+
+impl Encoder for H264Encoder {
+    fn encode(&mut self, frame: &CapturedFrame) -> Result<EncodedFrame> {
+        self.sequence = self.sequence.wrapping_add(1);
+
+        // H.264 4:2:0 needs even dimensions. Reject odd captures up front so we
+        // surface a clean error (the factory already fell back to raw if HW was
+        // missing; a per-frame error here lets the session log + continue).
+        if !frame.width.is_multiple_of(2) || !frame.height.is_multiple_of(2) {
+            return Err(anyhow!(
+                "H.264 requires even dimensions, got {}x{}",
+                frame.width,
+                frame.height
+            ));
+        }
+
+        unsafe {
+            // Re-init on a resolution change.
+            if frame.width != self.width || frame.height != self.height {
+                self.reinit_for_size(frame.width, frame.height)
+                    .context("H.264 re-init for new frame size")?;
+            }
+
+            self.ensure_streaming()?;
+
+            // BGRA -> NV12 into the reusable staging buffer.
+            let need = color::nv12_size(frame.width, frame.height);
+            if self.nv12.len() != need {
+                self.nv12.resize(need, 0);
+            }
+            color::bgra_to_nv12(&frame.data, frame.width, frame.height, &mut self.nv12)
+                .map_err(|e| anyhow!("BGRA->NV12 failed: {e}"))?;
+
+            // PTS in 100ns units derived from the frame's capture instant.
+            let pts_100ns = (frame.timestamp.elapsed().as_nanos() / 100) as i64;
+            let sample = Self::make_input_sample(&self.nv12, pts_100ns)?;
+
+            // Feed the encoder. NEED_MORE_INPUT is normal back-pressure handling;
+            // for the synchronous first cut we only push one frame per tick.
+            match self
+                .transform
+                .ProcessInput(self.input_stream_id, &sample, 0)
+            {
+                Ok(()) => {}
+                Err(e) if e.code() == MF_E_TRANSFORM_NEED_MORE_INPUT => {}
+                Err(e) => return Err(anyhow!("ProcessInput failed: {e:#}")),
+            }
+
+            // Drain whatever output is ready.
+            let Some((data, mft_keyframe)) = self.drain_one_output()? else {
+                // No compressed output yet (encoder latency / GOP buffering).
+                // Emit an empty frame so the session skips sending this tick.
+                return Ok(EncodedFrame {
+                    frame: VideoFrame::default(),
+                    size: 0,
+                    is_keyframe: false,
+                });
+            };
+
+            let is_keyframe = mft_keyframe || self.force_keyframe;
+            self.force_keyframe = false;
+
+            let size = data.len();
+            let encoded = ProtoEncodedFrame {
+                data,
+                keyframe: is_keyframe,
+                pts: pts_100ns,
+                dts: pts_100ns,
+            };
+
+            Ok(EncodedFrame {
+                frame: VideoFrame {
+                    timestamp: frame.timestamp.elapsed().as_millis() as i64,
+                    display_id: frame.display_id as i32,
+                    sequence: self.sequence as i32,
+                    encoding: Some(video_frame::Encoding::H264(encoded)),
+                },
+                size,
+                is_keyframe,
+            })
+        }
+    }
+
+    fn request_keyframe(&mut self) {
+        // A precise force-IDR uses the MFT codec API
+        // (CODECAPI_AVEncVideoForceKeyFrame); for the first cut we flag the next
+        // emitted frame as a keyframe so the viewer treats it as a clean point.
+        self.force_keyframe = true;
+    }
+
+    fn name(&self) -> &str {
+        "h264-mediafoundation"
+    }
+}
+
+impl Drop for H264Encoder {
+    fn drop(&mut self) {
+        unsafe {
+            if self.streaming {
+                let _ = self
+                    .transform
+                    .ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, 0);
+                let _ = self
+                    .transform
+                    .ProcessMessage(MFT_MESSAGE_NOTIFY_END_STREAMING, 0);
+            }
+            // The IMFTransform releases when `self.transform` drops.
+            if self.mf_started {
+                let _ = MFShutdown();
+            }
+        }
+    }
+}
+
+/// MF version word expected by `MFStartup` (MF_VERSION = (MF_API_VERSION<<16)|MF_SDK_VERSION).
+fn mf_version() -> u32 {
+    // MF_SDK_VERSION = 0x0002, MF_API_VERSION = 0x0070 -> 0x00020070.
+    0x0002_0070
+}
+
+/// Derive a target average bitrate (bps) from the 1-100 quality knob and the
+/// frame area. Tuned conservatively for desktop content (mostly static).
+fn quality_to_bitrate(quality: u32, width: u32, height: u32) -> u32 {
+    let q = quality.clamp(1, 100) as u64;
+    let pixels = (width as u64) * (height as u64);
+    // Base ~0.06 bits/pixel/frame at 30fps for q=100, scaled by quality.
+    // bps = pixels * 30 * bpp; bpp scales 0.01..0.10 with quality.
+    let bpp_milli = 10 + (q * 90 / 100); // 0.010 .. 0.100 in milli-bits
+    let bps = pixels.saturating_mul(30).saturating_mul(bpp_milli) / 1000;
+    bps.clamp(500_000, 50_000_000) as u32
+}
+
+/// Pack (width, height) into the 64-bit MF_MT_FRAME_SIZE attribute.
+#[cfg(windows)]
+unsafe fn set_attr_size(
+    media_type: &IMFMediaType,
+    key: &windows::core::GUID,
+    width: u32,
+    height: u32,
+) -> Result<()> {
+    let packed = ((width as u64) << 32) | (height as u64);
+    media_type.SetUINT64(key, packed)?;
+    Ok(())
+}
+
+/// Pack (numerator, denominator) into a 64-bit ratio MF attribute.
+#[cfg(windows)]
+unsafe fn set_attr_ratio(
+    media_type: &IMFMediaType,
+    key: &windows::core::GUID,
+    num: u32,
+    den: u32,
+) -> Result<()> {
+    let packed = ((num as u64) << 32) | (den as u64);
+    media_type.SetUINT64(key, packed)?;
+    Ok(())
+}
+
+/// Copy all bytes out of an `IMFSample` (single contiguous buffer) into a Vec.
+#[cfg(windows)]
+unsafe fn sample_to_vec(sample: &IMFSample) -> Result<Vec<u8>> {
+    let buffer = sample
+        .ConvertToContiguousBuffer()
+        .context("ConvertToContiguousBuffer")?;
+    let mut ptr: *mut u8 = std::ptr::null_mut();
+    let mut len: u32 = 0;
+    buffer
+        .Lock(&mut ptr, None, Some(&mut len))
+        .context("output buffer Lock")?;
+    let out = if ptr.is_null() || len == 0 {
+        Vec::new()
+    } else {
+        std::slice::from_raw_parts(ptr, len as usize).to_vec()
+    };
+    let _ = buffer.Unlock();
+    Ok(out)
+}
+
+/// Read the "clean point" (keyframe) flag off a sample, if present.
+#[cfg(windows)]
+unsafe fn sample_is_keyframe(sample: &IMFSample) -> bool {
+    use windows::Win32::Media::MediaFoundation::MFSampleExtension_CleanPoint;
+    sample
+        .GetUINT32(&MFSampleExtension_CleanPoint)
+        .map(|v| v != 0)
+        .unwrap_or(false)
+}