diff --git a/.claude/memory/radio_show_no_cohost_named_tom.md b/.claude/memory/radio_show_no_cohost_named_tom.md index 0e2b2d1..00fa4ef 100644 --- a/.claude/memory/radio_show_no_cohost_named_tom.md +++ b/.claude/memory/radio_show_no_cohost_named_tom.md @@ -38,7 +38,7 @@ In **2015-s7e19** (Jan 2015 New Year episode): ### Recurring guests / fill-ins | Person | Affiliation | Confirmed in audio | Profile built | |---|---|---|---| -| **Clay** | "Nerd Junkies" — fills in for Tara when she's out (Mike: rarely appears in other episodes) | 2015-s7e19 (throughout — Tara was out, Clay covered) | pending | +| **Clay** | "Nerd Junkies" — fills in for Tara when she's out (Mike: rarely appears in other episodes) | 2015-s7e19 (throughout — Tara was out, Clay covered) | **skipped** — first attempt failed (Clay vs Mike sim = 0.994); Mike chose to accept 2015-s7e19's Q&A as noisy rather than build cleanly. Mike's rationale: Clay is rare in other episodes, so the cost of not having his profile is bounded | Tara's role is explicit per transcript at 2015-s7e19 @ 00:51: "in Tara's place, we have Clay. Clay from the Nerd Junkies." — Tara is the regular co-host for that era; Clay is a fill-in. diff --git a/projects/radio-show/audio-processor/build_clay_profile.py b/projects/radio-show/audio-processor/build_clay_profile.py new file mode 100644 index 0000000..9aaa4fb --- /dev/null +++ b/projects/radio-show/audio-processor/build_clay_profile.py @@ -0,0 +1,136 @@ +""" +Build voice profile for Clay (Nerd Junkies — fill-in for Tara) from +hand-picked windows in 2015-s7e19. + +Adds a Mike-similarity filter (skip any chunk whose cosine vs Mike's +composite is >= 0.85) so Mike's interjections during Clay's monologues +don't contaminate Clay's profile. +""" +import os, sys +os.environ["PYTHONIOENCODING"] = "utf-8" +os.environ["TRANSFORMERS_OFFLINE"] = "1" +if hasattr(sys.stdout, "reconfigure"): + sys.stdout.reconfigure(encoding="utf-8") + +from pathlib import Path +import numpy as np +from src.gpu import ensure_cuda_libs +ensure_cuda_libs() + +import torch +from src.voice_profiler import VoiceProfiler, SpeakerProfile +from rich.console import Console + +console = Console() + +BASE = Path(__file__).parent +PROFILES_DIR = BASE / "voice-profiles" +EPISODES_DIR = BASE / "test-data" / "episodes" + +# Clay windows in 2015-s7e19 (transcript-vetted: Mike+Clay banter, +# no callers in these ranges). Chunks matching Mike's profile will +# be filtered out at build time. +CLAY_WINDOWS = { + "2015-s7e19.mp3": [ + (90, 150), # 01:30-02:30 — Clay introducing Nerd Junkies team + (2520, 2640), # 42:00-44:00 — Clay's 2014 gaming year-in-review + (2730, 2820), # 45:30-47:00 — Clay on VR/Oculus + ], +} + +COHOST_NAME = "Clay" +# Mike-filter would drop everything (Mike's profile matches at 0.92+ on +# any chunk in these windows because Mike is interjecting and his profile +# is broad). Disabled — relying on cosine comparison at diarization time +# to put Mike chunks in Mike's bucket and Clay chunks in Clay's. +MIKE_FILTER_THRESHOLD = 1.01 # effectively disabled + +device = "cuda" if torch.cuda.is_available() else "cpu" +console.print(f"Device: {device}") + +profiler = VoiceProfiler(PROFILES_DIR, device=device) + +mike = profiler.profiles.get("Mike Swanson") +if mike is None or mike.composite_embedding is None: + console.print("[red]Mike's profile not loaded — abort.[/red]") + sys.exit(1) + +if COHOST_NAME not in profiler.profiles: + profiler.profiles[COHOST_NAME] = SpeakerProfile( + name=COHOST_NAME, + role="cohost", + embeddings=[], + source_episodes=[], + ) + +profile = profiler.profiles[COHOST_NAME] +console.print(f"\n[bold]Building voice profile: {COHOST_NAME}[/bold]") +console.print(f" Mike-similarity filter @ >= {MIKE_FILTER_THRESHOLD}") + +mike_norm = np.linalg.norm(mike.composite_embedding) + +kept = 0 +skipped_mike = 0 +failed = 0 + +for ep_name, windows in CLAY_WINDOWS.items(): + ep_path = EPISODES_DIR / ep_name + if not ep_path.exists(): + console.print(f"[yellow] Skipping {ep_name} — not found[/yellow]") + continue + + console.print(f"\n Loading {ep_name}...") + audio = profiler._load_full_audio(ep_path) + profiler._get_model() + + SAMPLE_RATE = 16000 + chunk_s = 10.0 + chunk_samples = int(chunk_s * SAMPLE_RATE) + + for win_start, win_end in windows: + for chunk_start in range(win_start, win_end - int(chunk_s), int(chunk_s)): + chunk_end = chunk_start + int(chunk_s) + s = int(chunk_start * SAMPLE_RATE) + e = s + chunk_samples + if e > len(audio): + break + try: + emb = profiler._embed_audio_np(audio[s:e]) + # Skip chunks that match Mike strongly (Mike interjections) + mike_sim = float(np.dot(mike.composite_embedding, emb) / + (mike_norm * np.linalg.norm(emb) + 1e-8)) + if mike_sim >= MIKE_FILTER_THRESHOLD: + skipped_mike += 1 + console.print(f" [dim yellow]skip Mike @ {chunk_start}s " + f"(sim={mike_sim:.2f})[/dim yellow]") + continue + profile.embeddings.append(emb) + kept += 1 + console.print(f" [dim]+1 @ {chunk_start}s (mike={mike_sim:.2f})[/dim]") + except Exception as ex: + failed += 1 + console.print(f" [red]Failed @ {chunk_start}s: {ex}[/red]") + + profile.source_episodes.append(ep_name) + +if not profile.embeddings: + console.print("[red]No embeddings collected — check windows / Mike threshold[/red]") + sys.exit(1) + +profile.compute_composite() +console.print(f"\n[green]{COHOST_NAME} profile built: {profile.num_samples} embeddings, " + f"skipped {skipped_mike} as Mike, {failed} failed[/green]") + +# Diagnostics +mike_sim = float(np.dot(mike.composite_embedding, profile.composite_embedding) / + (mike_norm * np.linalg.norm(profile.composite_embedding) + 1e-8)) +console.print(f"[bold]Clay vs Mike similarity:[/bold] {mike_sim:.3f} (lower is better separation)") + +tara = profiler.profiles.get("Tara") +if tara and tara.composite_embedding is not None: + tara_sim = float(np.dot(tara.composite_embedding, profile.composite_embedding) / + (np.linalg.norm(tara.composite_embedding) * np.linalg.norm(profile.composite_embedding) + 1e-8)) + console.print(f"[bold]Clay vs Tara similarity:[/bold] {tara_sim:.3f}") + +profiler.save_profiles() +console.print("[bold green]Profile saved.[/bold green]")