Audio processor: working voice profiler with WavLM speaker embeddings

- Voice profiler using microsoft/wavlm-base-sv (512-dim x-vector embeddings)
- Bootstrap from archive: 180 embeddings from 9 episodes across 2010-2018
- Host identification accuracy: 0.87-0.98 similarity for live speech,
  0.60-0.64 for non-host audio (produced intros, co-host)
- Dropped speechbrain dependency (requires torchaudio, CUDA version conflicts)
- Patched torchaudio CUDA 12.8/13.1 version check (warning instead of error)
- Profile stored in voice-profiles/mike-swanson/ with per-chunk embeddings

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-21 12:19:13 -07:00
parent 87f5a9306a
commit 826141a319
2 changed files with 430 additions and 6 deletions

View File

@@ -334,13 +334,29 @@ def _cmd_split(args, config):
def _cmd_bootstrap_voice(args, config):
"""Bootstrap host voice profile from archive episodes."""
console.print("[bold]Bootstrapping host voice profile[/bold]")
console.print(f"Archive: {args.archive_dir}")
console.print(f"Speaker: {args.speaker_name}")
console.print(f"Sampling {args.sample_count} episodes")
from .voice_profiler import VoiceProfiler
# TODO: Implement archive sampling + diarization + embedding extraction
console.print("[yellow]Not yet implemented — run individual diarizations first[/yellow]")
archive_dir = Path(args.archive_dir)
profiler = VoiceProfiler(
config.resolve_path(config.paths.voice_profiles),
device="cuda",
)
# Find MP3 files in archive directory
mp3_files = sorted(archive_dir.glob("**/*.mp3"))
if not mp3_files:
console.print(f"[red]No MP3 files found in {archive_dir}[/red]")
return
# Sample if we have more than requested
if len(mp3_files) > args.sample_count:
step = len(mp3_files) // args.sample_count
mp3_files = [mp3_files[i * step] for i in range(args.sample_count)]
console.print(f"[dim]Found {len(mp3_files)} episodes to process[/dim]")
profiler.bootstrap_host_from_episodes(mp3_files, host_name=args.speaker_name)
profiler.print_profiles()
def _cmd_review_elements(args, config):