- Fix voice_profiler threshold bug (HOST label overwrote Unknown unconditionally) - Audio preload optimization: single ffmpeg per episode, 149.5x realtime on 5070 Ti - WavLM threshold raised to 0.85 (Mike 0.90-0.99, callers 0.46-0.83) - Promo/bumper filter: weighted signature scoring, 42->27 clean Q&A pairs - Text-only Q&A fallback for episodes with no CALLER diarization labels - TRANSFORMERS_OFFLINE=1 to skip HuggingFace freshness checks - Add diarize_2018.py for targeted re-run + FTS5 rebuild - Add benchmark.py + BENCH_SETUP.md for GURU-BEAST-ROG (RTX 4090) comparison - Commit 9-episode training diarization.json outputs - Session log: 2026-04-27-diarization-pipeline.md Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
106 lines
2.9 KiB
Python
106 lines
2.9 KiB
Python
"""
|
|
Audio clip extraction using ffmpeg.
|
|
Cuts clips from original broadcast MP3s for use in Audition/Audacity.
|
|
"""
|
|
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
from rich.console import Console
|
|
|
|
console = Console()
|
|
|
|
|
|
def extract_clip(
|
|
source_path: Path,
|
|
start: float,
|
|
end: float,
|
|
output_path: Path,
|
|
padding: float = 1.5,
|
|
fade_ms: int = 200,
|
|
) -> Path:
|
|
"""
|
|
Extract a clip from source_path between start and end seconds.
|
|
Adds padding on both sides and applies fade in/out.
|
|
Returns the output path.
|
|
"""
|
|
source_path = Path(source_path)
|
|
output_path = Path(output_path)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
clip_start = max(0.0, start - padding)
|
|
clip_end = end + padding
|
|
duration = clip_end - clip_start
|
|
|
|
fade_s = fade_ms / 1000.0
|
|
|
|
cmd = [
|
|
"ffmpeg", "-y",
|
|
"-ss", f"{clip_start:.3f}",
|
|
"-i", str(source_path),
|
|
"-t", f"{duration:.3f}",
|
|
"-af", f"afade=t=in:st=0:d={fade_s},afade=t=out:st={duration - fade_s:.3f}:d={fade_s}",
|
|
"-q:a", "2",
|
|
str(output_path),
|
|
]
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
if result.returncode != 0:
|
|
raise RuntimeError(f"ffmpeg failed: {result.stderr[-500:]}")
|
|
|
|
return output_path
|
|
|
|
|
|
def extract_clips_for_results(results, output_dir: Path, padding: float = 1.5) -> dict[int, Path]:
|
|
"""
|
|
Extract clips for a list of QAResult or SearchResult objects.
|
|
Returns {index: clip_path}.
|
|
"""
|
|
output_dir = Path(output_dir)
|
|
clip_paths = {}
|
|
|
|
for i, result in enumerate(results):
|
|
episode = result.episode_id
|
|
audio_path = Path(result.audio_path)
|
|
|
|
if not audio_path.exists():
|
|
console.print(f"[yellow]Audio not found: {audio_path}[/yellow]")
|
|
continue
|
|
|
|
# Determine time range
|
|
if hasattr(result, "question_start"):
|
|
# QAResult
|
|
start = result.question_start
|
|
end = result.answer_end
|
|
else:
|
|
# SearchResult
|
|
start = result.start
|
|
end = result.end
|
|
|
|
def fmt(s):
|
|
m, sec = divmod(int(s), 60)
|
|
h, m = divmod(m, 60)
|
|
return f"{h}h{m:02d}m{sec:02d}s" if h else f"{m}m{sec:02d}s"
|
|
|
|
clip_name = f"{episode}_{fmt(start)}.mp3"
|
|
clip_path = output_dir / clip_name
|
|
|
|
try:
|
|
extract_clip(audio_path, start, end, clip_path, padding=padding)
|
|
clip_paths[i] = clip_path
|
|
console.print(f"[green]Clip {i+1}:[/green] {clip_name}")
|
|
except Exception as e:
|
|
console.print(f"[red]Clip {i+1} failed:[/red] {e}")
|
|
|
|
return clip_paths
|
|
|
|
|
|
def format_timestamp(seconds: float) -> str:
|
|
"""Format seconds as H:MM:SS or M:SS."""
|
|
h = int(seconds // 3600)
|
|
m = int((seconds % 3600) // 60)
|
|
s = int(seconds % 60)
|
|
if h:
|
|
return f"{h}:{m:02d}:{s:02d}"
|
|
return f"{m}:{s:02d}"
|