Files
claudetools/projects/radio-show/audio-processor/check_scores.py
Mike Swanson 79abef9dc9 radio: diarization pipeline fixes, benchmark setup, test episode set
- Fix voice_profiler threshold bug (HOST label overwrote Unknown unconditionally)
- Audio preload optimization: single ffmpeg per episode, 149.5x realtime on 5070 Ti
- WavLM threshold raised to 0.85 (Mike 0.90-0.99, callers 0.46-0.83)
- Promo/bumper filter: weighted signature scoring, 42->27 clean Q&A pairs
- Text-only Q&A fallback for episodes with no CALLER diarization labels
- TRANSFORMERS_OFFLINE=1 to skip HuggingFace freshness checks
- Add diarize_2018.py for targeted re-run + FTS5 rebuild
- Add benchmark.py + BENCH_SETUP.md for GURU-BEAST-ROG (RTX 4090) comparison
- Commit 9-episode training diarization.json outputs
- Session log: 2026-04-27-diarization-pipeline.md

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-27 13:20:40 -07:00

85 lines
2.8 KiB
Python

"""
Quick diagnostic: print per-window WavLM similarity scores for one episode.
Run before diarize_training.py to understand score distribution.
"""
import sys
import os
os.environ["PYTHONIOENCODING"] = "utf-8"
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8")
os.environ["TRANSFORMERS_OFFLINE"] = "1"
from pathlib import Path
import numpy as np
from src.gpu import ensure_cuda_libs
ensure_cuda_libs()
from src.voice_profiler import VoiceProfiler
from src.config import load_config
from rich.console import Console
console = Console()
BASE = Path(__file__).parent
config = load_config()
profiles_dir = config.resolve_path(config.diarization.voice_profiles_dir)
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
console.print(f"Device: {device}")
profiler = VoiceProfiler(profiles_dir, device=device)
if not profiler.profiles:
console.print("[red]No voice profiles loaded[/red]")
sys.exit(1)
# Use the first available episode
episodes = sorted((BASE / "training-data" / "episodes").glob("*.mp3"))
if not episodes:
console.print("[red]No episodes found[/red]")
sys.exit(1)
ep = episodes[0]
console.print(f"\nAnalyzing first 20 minutes of: {ep.name}")
console.print("Format: [time] similarity_score label\n")
duration = profiler._get_duration(ep)
# Scan 10-40 minutes — intro monologue usually ends before 10 min, callers appear after
scan_start = min(600.0, duration * 0.15) # ~10 min in or 15%
scan_end = min(duration, 2400.0) # up to 40 min
window_s = 10.0
hop_s = 30.0 # coarse pass — one window per 30s for speed
scores = []
for start in np.arange(scan_start, scan_end - window_s, hop_s):
end = start + window_s
try:
emb = profiler.extract_embedding(ep, start, end)
best_score = 0.0
best_name = ""
for name, profile in profiler.profiles.items():
s = profile.similarity(emb)
if s > best_score:
best_score = s
best_name = name
label = f"HOST ({best_name})" if best_score >= 0.85 else (
f"CALLER (below 0.85)" if best_score >= 0.70 else "UNKNOWN"
)
console.print(f" [{start:6.0f}s-{end:.0f}s] {best_score:.4f} {label}")
scores.append(best_score)
except Exception as e:
console.print(f" [{start:6.0f}s] ERROR: {e}")
if scores:
console.print(f"\nScore distribution over first 20 min:")
console.print(f" min={min(scores):.4f} max={max(scores):.4f} mean={np.mean(scores):.4f} median={np.median(scores):.4f}")
buckets = [0.0, 0.6, 0.7, 0.75, 0.80, 0.85, 0.90, 0.95, 1.01]
for lo, hi in zip(buckets, buckets[1:]):
count = sum(1 for s in scores if lo <= s < hi)
bar = "#" * count
console.print(f" [{lo:.2f}-{hi:.2f}): {count:3d} {bar}")