radio: diarization pipeline fixes, benchmark setup, test episode set
- Fix voice_profiler threshold bug (HOST label overwrote Unknown unconditionally) - Audio preload optimization: single ffmpeg per episode, 149.5x realtime on 5070 Ti - WavLM threshold raised to 0.85 (Mike 0.90-0.99, callers 0.46-0.83) - Promo/bumper filter: weighted signature scoring, 42->27 clean Q&A pairs - Text-only Q&A fallback for episodes with no CALLER diarization labels - TRANSFORMERS_OFFLINE=1 to skip HuggingFace freshness checks - Add diarize_2018.py for targeted re-run + FTS5 rebuild - Add benchmark.py + BENCH_SETUP.md for GURU-BEAST-ROG (RTX 4090) comparison - Commit 9-episode training diarization.json outputs - Session log: 2026-04-27-diarization-pipeline.md Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
84
projects/radio-show/audio-processor/check_scores.py
Normal file
84
projects/radio-show/audio-processor/check_scores.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""
|
||||
Quick diagnostic: print per-window WavLM similarity scores for one episode.
|
||||
Run before diarize_training.py to understand score distribution.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
|
||||
os.environ["PYTHONIOENCODING"] = "utf-8"
|
||||
if hasattr(sys.stdout, "reconfigure"):
|
||||
sys.stdout.reconfigure(encoding="utf-8")
|
||||
os.environ["TRANSFORMERS_OFFLINE"] = "1"
|
||||
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from src.gpu import ensure_cuda_libs
|
||||
ensure_cuda_libs()
|
||||
|
||||
from src.voice_profiler import VoiceProfiler
|
||||
from src.config import load_config
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
|
||||
BASE = Path(__file__).parent
|
||||
config = load_config()
|
||||
profiles_dir = config.resolve_path(config.diarization.voice_profiles_dir)
|
||||
|
||||
import torch
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
console.print(f"Device: {device}")
|
||||
|
||||
profiler = VoiceProfiler(profiles_dir, device=device)
|
||||
|
||||
if not profiler.profiles:
|
||||
console.print("[red]No voice profiles loaded[/red]")
|
||||
sys.exit(1)
|
||||
|
||||
# Use the first available episode
|
||||
episodes = sorted((BASE / "training-data" / "episodes").glob("*.mp3"))
|
||||
if not episodes:
|
||||
console.print("[red]No episodes found[/red]")
|
||||
sys.exit(1)
|
||||
|
||||
ep = episodes[0]
|
||||
console.print(f"\nAnalyzing first 20 minutes of: {ep.name}")
|
||||
console.print("Format: [time] similarity_score label\n")
|
||||
|
||||
duration = profiler._get_duration(ep)
|
||||
# Scan 10-40 minutes — intro monologue usually ends before 10 min, callers appear after
|
||||
scan_start = min(600.0, duration * 0.15) # ~10 min in or 15%
|
||||
scan_end = min(duration, 2400.0) # up to 40 min
|
||||
|
||||
window_s = 10.0
|
||||
hop_s = 30.0 # coarse pass — one window per 30s for speed
|
||||
|
||||
scores = []
|
||||
for start in np.arange(scan_start, scan_end - window_s, hop_s):
|
||||
end = start + window_s
|
||||
try:
|
||||
emb = profiler.extract_embedding(ep, start, end)
|
||||
best_score = 0.0
|
||||
best_name = ""
|
||||
for name, profile in profiler.profiles.items():
|
||||
s = profile.similarity(emb)
|
||||
if s > best_score:
|
||||
best_score = s
|
||||
best_name = name
|
||||
|
||||
label = f"HOST ({best_name})" if best_score >= 0.85 else (
|
||||
f"CALLER (below 0.85)" if best_score >= 0.70 else "UNKNOWN"
|
||||
)
|
||||
console.print(f" [{start:6.0f}s-{end:.0f}s] {best_score:.4f} {label}")
|
||||
scores.append(best_score)
|
||||
except Exception as e:
|
||||
console.print(f" [{start:6.0f}s] ERROR: {e}")
|
||||
|
||||
if scores:
|
||||
console.print(f"\nScore distribution over first 20 min:")
|
||||
console.print(f" min={min(scores):.4f} max={max(scores):.4f} mean={np.mean(scores):.4f} median={np.median(scores):.4f}")
|
||||
buckets = [0.0, 0.6, 0.7, 0.75, 0.80, 0.85, 0.90, 0.95, 1.01]
|
||||
for lo, hi in zip(buckets, buckets[1:]):
|
||||
count = sum(1 for s in scores if lo <= s < hi)
|
||||
bar = "#" * count
|
||||
console.print(f" [{lo:.2f}-{hi:.2f}): {count:3d} {bar}")
|
||||
Reference in New Issue
Block a user