radio: skip Clay profile build (failed) — accept 2015-s7e19 Q&A as noisy
First attempt at Clay's voice profile from 2015-s7e19 produced Clay-vs-Mike cosine similarity of 0.994 — essentially a Mike clone. Root cause: 10s WavLM x-vector chunks averaged Mike's frequent interjections together with Clay's dialogue, and Mike's well-trained profile dominated the resulting embedding signal. Mike's call: skip Clay, accept the 2015-s7e19 Q&A as noisy. Clay rarely appears in other episodes, so the cost of not having his profile is bounded to this one episode plus any rare future appearances. Cleanup: - voice-profiles/clay/ removed - voice-profiles/profiles.json: Clay entry removed - Memory updated to record the decision and the failure mode Kept build_clay_profile.py in-repo as documentation of the attempt and the Mike-similarity-filter pattern. Useful starting point if a future attempt provides cleaner pure-Clay timestamps. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -38,7 +38,7 @@ In **2015-s7e19** (Jan 2015 New Year episode):
|
||||
### Recurring guests / fill-ins
|
||||
| Person | Affiliation | Confirmed in audio | Profile built |
|
||||
|---|---|---|---|
|
||||
| **Clay** | "Nerd Junkies" — fills in for Tara when she's out (Mike: rarely appears in other episodes) | 2015-s7e19 (throughout — Tara was out, Clay covered) | pending |
|
||||
| **Clay** | "Nerd Junkies" — fills in for Tara when she's out (Mike: rarely appears in other episodes) | 2015-s7e19 (throughout — Tara was out, Clay covered) | **skipped** — first attempt failed (Clay vs Mike sim = 0.994); Mike chose to accept 2015-s7e19's Q&A as noisy rather than build cleanly. Mike's rationale: Clay is rare in other episodes, so the cost of not having his profile is bounded |
|
||||
|
||||
Tara's role is explicit per transcript at 2015-s7e19 @ 00:51: "in Tara's place, we have Clay. Clay from the Nerd Junkies." — Tara is the regular co-host for that era; Clay is a fill-in.
|
||||
|
||||
|
||||
136
projects/radio-show/audio-processor/build_clay_profile.py
Normal file
136
projects/radio-show/audio-processor/build_clay_profile.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Build voice profile for Clay (Nerd Junkies — fill-in for Tara) from
|
||||
hand-picked windows in 2015-s7e19.
|
||||
|
||||
Adds a Mike-similarity filter (skip any chunk whose cosine vs Mike's
|
||||
composite is >= 0.85) so Mike's interjections during Clay's monologues
|
||||
don't contaminate Clay's profile.
|
||||
"""
|
||||
import os, sys
|
||||
os.environ["PYTHONIOENCODING"] = "utf-8"
|
||||
os.environ["TRANSFORMERS_OFFLINE"] = "1"
|
||||
if hasattr(sys.stdout, "reconfigure"):
|
||||
sys.stdout.reconfigure(encoding="utf-8")
|
||||
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from src.gpu import ensure_cuda_libs
|
||||
ensure_cuda_libs()
|
||||
|
||||
import torch
|
||||
from src.voice_profiler import VoiceProfiler, SpeakerProfile
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
|
||||
BASE = Path(__file__).parent
|
||||
PROFILES_DIR = BASE / "voice-profiles"
|
||||
EPISODES_DIR = BASE / "test-data" / "episodes"
|
||||
|
||||
# Clay windows in 2015-s7e19 (transcript-vetted: Mike+Clay banter,
|
||||
# no callers in these ranges). Chunks matching Mike's profile will
|
||||
# be filtered out at build time.
|
||||
CLAY_WINDOWS = {
|
||||
"2015-s7e19.mp3": [
|
||||
(90, 150), # 01:30-02:30 — Clay introducing Nerd Junkies team
|
||||
(2520, 2640), # 42:00-44:00 — Clay's 2014 gaming year-in-review
|
||||
(2730, 2820), # 45:30-47:00 — Clay on VR/Oculus
|
||||
],
|
||||
}
|
||||
|
||||
COHOST_NAME = "Clay"
|
||||
# Mike-filter would drop everything (Mike's profile matches at 0.92+ on
|
||||
# any chunk in these windows because Mike is interjecting and his profile
|
||||
# is broad). Disabled — relying on cosine comparison at diarization time
|
||||
# to put Mike chunks in Mike's bucket and Clay chunks in Clay's.
|
||||
MIKE_FILTER_THRESHOLD = 1.01 # effectively disabled
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
console.print(f"Device: {device}")
|
||||
|
||||
profiler = VoiceProfiler(PROFILES_DIR, device=device)
|
||||
|
||||
mike = profiler.profiles.get("Mike Swanson")
|
||||
if mike is None or mike.composite_embedding is None:
|
||||
console.print("[red]Mike's profile not loaded — abort.[/red]")
|
||||
sys.exit(1)
|
||||
|
||||
if COHOST_NAME not in profiler.profiles:
|
||||
profiler.profiles[COHOST_NAME] = SpeakerProfile(
|
||||
name=COHOST_NAME,
|
||||
role="cohost",
|
||||
embeddings=[],
|
||||
source_episodes=[],
|
||||
)
|
||||
|
||||
profile = profiler.profiles[COHOST_NAME]
|
||||
console.print(f"\n[bold]Building voice profile: {COHOST_NAME}[/bold]")
|
||||
console.print(f" Mike-similarity filter @ >= {MIKE_FILTER_THRESHOLD}")
|
||||
|
||||
mike_norm = np.linalg.norm(mike.composite_embedding)
|
||||
|
||||
kept = 0
|
||||
skipped_mike = 0
|
||||
failed = 0
|
||||
|
||||
for ep_name, windows in CLAY_WINDOWS.items():
|
||||
ep_path = EPISODES_DIR / ep_name
|
||||
if not ep_path.exists():
|
||||
console.print(f"[yellow] Skipping {ep_name} — not found[/yellow]")
|
||||
continue
|
||||
|
||||
console.print(f"\n Loading {ep_name}...")
|
||||
audio = profiler._load_full_audio(ep_path)
|
||||
profiler._get_model()
|
||||
|
||||
SAMPLE_RATE = 16000
|
||||
chunk_s = 10.0
|
||||
chunk_samples = int(chunk_s * SAMPLE_RATE)
|
||||
|
||||
for win_start, win_end in windows:
|
||||
for chunk_start in range(win_start, win_end - int(chunk_s), int(chunk_s)):
|
||||
chunk_end = chunk_start + int(chunk_s)
|
||||
s = int(chunk_start * SAMPLE_RATE)
|
||||
e = s + chunk_samples
|
||||
if e > len(audio):
|
||||
break
|
||||
try:
|
||||
emb = profiler._embed_audio_np(audio[s:e])
|
||||
# Skip chunks that match Mike strongly (Mike interjections)
|
||||
mike_sim = float(np.dot(mike.composite_embedding, emb) /
|
||||
(mike_norm * np.linalg.norm(emb) + 1e-8))
|
||||
if mike_sim >= MIKE_FILTER_THRESHOLD:
|
||||
skipped_mike += 1
|
||||
console.print(f" [dim yellow]skip Mike @ {chunk_start}s "
|
||||
f"(sim={mike_sim:.2f})[/dim yellow]")
|
||||
continue
|
||||
profile.embeddings.append(emb)
|
||||
kept += 1
|
||||
console.print(f" [dim]+1 @ {chunk_start}s (mike={mike_sim:.2f})[/dim]")
|
||||
except Exception as ex:
|
||||
failed += 1
|
||||
console.print(f" [red]Failed @ {chunk_start}s: {ex}[/red]")
|
||||
|
||||
profile.source_episodes.append(ep_name)
|
||||
|
||||
if not profile.embeddings:
|
||||
console.print("[red]No embeddings collected — check windows / Mike threshold[/red]")
|
||||
sys.exit(1)
|
||||
|
||||
profile.compute_composite()
|
||||
console.print(f"\n[green]{COHOST_NAME} profile built: {profile.num_samples} embeddings, "
|
||||
f"skipped {skipped_mike} as Mike, {failed} failed[/green]")
|
||||
|
||||
# Diagnostics
|
||||
mike_sim = float(np.dot(mike.composite_embedding, profile.composite_embedding) /
|
||||
(mike_norm * np.linalg.norm(profile.composite_embedding) + 1e-8))
|
||||
console.print(f"[bold]Clay vs Mike similarity:[/bold] {mike_sim:.3f} (lower is better separation)")
|
||||
|
||||
tara = profiler.profiles.get("Tara")
|
||||
if tara and tara.composite_embedding is not None:
|
||||
tara_sim = float(np.dot(tara.composite_embedding, profile.composite_embedding) /
|
||||
(np.linalg.norm(tara.composite_embedding) * np.linalg.norm(profile.composite_embedding) + 1e-8))
|
||||
console.print(f"[bold]Clay vs Tara similarity:[/bold] {tara_sim:.3f}")
|
||||
|
||||
profiler.save_profiles()
|
||||
console.print("[bold green]Profile saved.[/bold green]")
|
||||
Reference in New Issue
Block a user