""" Quick diagnostic: print per-window WavLM similarity scores for one episode. Run before diarize_training.py to understand score distribution. """ import sys import os os.environ["PYTHONIOENCODING"] = "utf-8" if hasattr(sys.stdout, "reconfigure"): sys.stdout.reconfigure(encoding="utf-8") os.environ["TRANSFORMERS_OFFLINE"] = "1" from pathlib import Path import numpy as np from src.gpu import ensure_cuda_libs ensure_cuda_libs() from src.voice_profiler import VoiceProfiler from src.config import load_config from rich.console import Console console = Console() BASE = Path(__file__).parent config = load_config() profiles_dir = config.resolve_path(config.diarization.voice_profiles_dir) import torch device = "cuda" if torch.cuda.is_available() else "cpu" console.print(f"Device: {device}") profiler = VoiceProfiler(profiles_dir, device=device) if not profiler.profiles: console.print("[red]No voice profiles loaded[/red]") sys.exit(1) # Use the first available episode episodes = sorted((BASE / "training-data" / "episodes").glob("*.mp3")) if not episodes: console.print("[red]No episodes found[/red]") sys.exit(1) ep = episodes[0] console.print(f"\nAnalyzing first 20 minutes of: {ep.name}") console.print("Format: [time] similarity_score label\n") duration = profiler._get_duration(ep) # Scan 10-40 minutes — intro monologue usually ends before 10 min, callers appear after scan_start = min(600.0, duration * 0.15) # ~10 min in or 15% scan_end = min(duration, 2400.0) # up to 40 min window_s = 10.0 hop_s = 30.0 # coarse pass — one window per 30s for speed scores = [] for start in np.arange(scan_start, scan_end - window_s, hop_s): end = start + window_s try: emb = profiler.extract_embedding(ep, start, end) best_score = 0.0 best_name = "" for name, profile in profiler.profiles.items(): s = profile.similarity(emb) if s > best_score: best_score = s best_name = name label = f"HOST ({best_name})" if best_score >= 0.85 else ( f"CALLER (below 0.85)" if best_score >= 0.70 else "UNKNOWN" ) console.print(f" [{start:6.0f}s-{end:.0f}s] {best_score:.4f} {label}") scores.append(best_score) except Exception as e: console.print(f" [{start:6.0f}s] ERROR: {e}") if scores: console.print(f"\nScore distribution over first 20 min:") console.print(f" min={min(scores):.4f} max={max(scores):.4f} mean={np.mean(scores):.4f} median={np.median(scores):.4f}") buckets = [0.0, 0.6, 0.7, 0.75, 0.80, 0.85, 0.90, 0.95, 1.01] for lo, hi in zip(buckets, buckets[1:]): count = sum(1 for s in scores if lo <= s < hi) bar = "#" * count console.print(f" [{lo:.2f}-{hi:.2f}): {count:3d} {bar}")