Files
claudetools/projects/radio-show/audio-processor/src/clip_extractor.py
Mike Swanson 79abef9dc9 radio: diarization pipeline fixes, benchmark setup, test episode set
- Fix voice_profiler threshold bug (HOST label overwrote Unknown unconditionally)
- Audio preload optimization: single ffmpeg per episode, 149.5x realtime on 5070 Ti
- WavLM threshold raised to 0.85 (Mike 0.90-0.99, callers 0.46-0.83)
- Promo/bumper filter: weighted signature scoring, 42->27 clean Q&A pairs
- Text-only Q&A fallback for episodes with no CALLER diarization labels
- TRANSFORMERS_OFFLINE=1 to skip HuggingFace freshness checks
- Add diarize_2018.py for targeted re-run + FTS5 rebuild
- Add benchmark.py + BENCH_SETUP.md for GURU-BEAST-ROG (RTX 4090) comparison
- Commit 9-episode training diarization.json outputs
- Session log: 2026-04-27-diarization-pipeline.md

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-27 13:20:40 -07:00

106 lines
2.9 KiB
Python

"""
Audio clip extraction using ffmpeg.
Cuts clips from original broadcast MP3s for use in Audition/Audacity.
"""
import subprocess
from pathlib import Path
from rich.console import Console
console = Console()
def extract_clip(
source_path: Path,
start: float,
end: float,
output_path: Path,
padding: float = 1.5,
fade_ms: int = 200,
) -> Path:
"""
Extract a clip from source_path between start and end seconds.
Adds padding on both sides and applies fade in/out.
Returns the output path.
"""
source_path = Path(source_path)
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
clip_start = max(0.0, start - padding)
clip_end = end + padding
duration = clip_end - clip_start
fade_s = fade_ms / 1000.0
cmd = [
"ffmpeg", "-y",
"-ss", f"{clip_start:.3f}",
"-i", str(source_path),
"-t", f"{duration:.3f}",
"-af", f"afade=t=in:st=0:d={fade_s},afade=t=out:st={duration - fade_s:.3f}:d={fade_s}",
"-q:a", "2",
str(output_path),
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {result.stderr[-500:]}")
return output_path
def extract_clips_for_results(results, output_dir: Path, padding: float = 1.5) -> dict[int, Path]:
"""
Extract clips for a list of QAResult or SearchResult objects.
Returns {index: clip_path}.
"""
output_dir = Path(output_dir)
clip_paths = {}
for i, result in enumerate(results):
episode = result.episode_id
audio_path = Path(result.audio_path)
if not audio_path.exists():
console.print(f"[yellow]Audio not found: {audio_path}[/yellow]")
continue
# Determine time range
if hasattr(result, "question_start"):
# QAResult
start = result.question_start
end = result.answer_end
else:
# SearchResult
start = result.start
end = result.end
def fmt(s):
m, sec = divmod(int(s), 60)
h, m = divmod(m, 60)
return f"{h}h{m:02d}m{sec:02d}s" if h else f"{m}m{sec:02d}s"
clip_name = f"{episode}_{fmt(start)}.mp3"
clip_path = output_dir / clip_name
try:
extract_clip(audio_path, start, end, clip_path, padding=padding)
clip_paths[i] = clip_path
console.print(f"[green]Clip {i+1}:[/green] {clip_name}")
except Exception as e:
console.print(f"[red]Clip {i+1} failed:[/red] {e}")
return clip_paths
def format_timestamp(seconds: float) -> str:
"""Format seconds as H:MM:SS or M:SS."""
h = int(seconds // 3600)
m = int((seconds % 3600) // 60)
s = int(seconds % 60)
if h:
return f"{h}:{m:02d}:{s:02d}"
return f"{m}:{s:02d}"