Audio processor: working voice profiler with WavLM speaker embeddings
- Voice profiler using microsoft/wavlm-base-sv (512-dim x-vector embeddings) - Bootstrap from archive: 180 embeddings from 9 episodes across 2010-2018 - Host identification accuracy: 0.87-0.98 similarity for live speech, 0.60-0.64 for non-host audio (produced intros, co-host) - Dropped speechbrain dependency (requires torchaudio, CUDA version conflicts) - Patched torchaudio CUDA 12.8/13.1 version check (warning instead of error) - Profile stored in voice-profiles/mike-swanson/ with per-chunk embeddings Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -334,13 +334,29 @@ def _cmd_split(args, config):
|
||||
|
||||
def _cmd_bootstrap_voice(args, config):
|
||||
"""Bootstrap host voice profile from archive episodes."""
|
||||
console.print("[bold]Bootstrapping host voice profile[/bold]")
|
||||
console.print(f"Archive: {args.archive_dir}")
|
||||
console.print(f"Speaker: {args.speaker_name}")
|
||||
console.print(f"Sampling {args.sample_count} episodes")
|
||||
from .voice_profiler import VoiceProfiler
|
||||
|
||||
# TODO: Implement archive sampling + diarization + embedding extraction
|
||||
console.print("[yellow]Not yet implemented — run individual diarizations first[/yellow]")
|
||||
archive_dir = Path(args.archive_dir)
|
||||
profiler = VoiceProfiler(
|
||||
config.resolve_path(config.paths.voice_profiles),
|
||||
device="cuda",
|
||||
)
|
||||
|
||||
# Find MP3 files in archive directory
|
||||
mp3_files = sorted(archive_dir.glob("**/*.mp3"))
|
||||
if not mp3_files:
|
||||
console.print(f"[red]No MP3 files found in {archive_dir}[/red]")
|
||||
return
|
||||
|
||||
# Sample if we have more than requested
|
||||
if len(mp3_files) > args.sample_count:
|
||||
step = len(mp3_files) // args.sample_count
|
||||
mp3_files = [mp3_files[i * step] for i in range(args.sample_count)]
|
||||
|
||||
console.print(f"[dim]Found {len(mp3_files)} episodes to process[/dim]")
|
||||
|
||||
profiler.bootstrap_host_from_episodes(mp3_files, host_name=args.speaker_name)
|
||||
profiler.print_profiles()
|
||||
|
||||
|
||||
def _cmd_review_elements(args, config):
|
||||
|
||||
Reference in New Issue
Block a user