#!/usr/bin/env python3 """ Quick test script to verify faster-whisper works on Mac M4. Transcribes first 60 seconds of an episode. """ import time from pathlib import Path from faster_whisper import WhisperModel from pydub import AudioSegment # Config EPISODE = Path("training-data/episodes/2011-06-04-hr1.mp3") TEST_DURATION_MS = 60_000 # 60 seconds MODEL_SIZE = "base" # Start small for testing, switch to large-v3 for production def main(): print(f"[INFO] Loading {MODEL_SIZE} model on CPU...") start = time.time() # Use CPU - faster-whisper/ctranslate2 doesn't support MPS model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8") print(f"[OK] Model loaded in {time.time() - start:.1f}s") # Extract first 60 seconds print(f"[INFO] Extracting first {TEST_DURATION_MS // 1000}s from {EPISODE.name}...") audio = AudioSegment.from_mp3(str(EPISODE)) test_clip = audio[:TEST_DURATION_MS] # Export to temp file temp_file = Path("/tmp/test_clip.wav") test_clip.export(str(temp_file), format="wav") print(f"[OK] Test clip exported ({temp_file.stat().st_size // 1024}KB)") # Transcribe print("[INFO] Transcribing...") start = time.time() segments, info = model.transcribe( str(temp_file), language="en", beam_size=5, vad_filter=True, ) # Collect segments results = [] for seg in segments: results.append({ "start": seg.start, "end": seg.end, "text": seg.text.strip() }) elapsed = time.time() - start print(f"[OK] Transcription complete in {elapsed:.1f}s") print(f"[INFO] Speed: {TEST_DURATION_MS / 1000 / elapsed:.2f}x realtime") print(f"[INFO] Segments: {len(results)}") print() print("=" * 60) print("TRANSCRIPT:") print("=" * 60) for seg in results: print(f"[{seg['start']:.1f}s - {seg['end']:.1f}s] {seg['text']}") # Cleanup temp_file.unlink() print() print("[SUCCESS] Test complete!") if __name__ == "__main__": main()