Add radio show audio processor and post-show workflow

- Audio processor CLI tool with 6-stage pipeline: transcribe (faster-whisper GPU), diarize (pyannote), detect segments (multi-signal classifier), remove commercials, split segments, analyze content (Ollama) - Post-show workflow doc for episode posts, forum threads, deep-dive blog posts - Training plan for using 579-episode archive for voice profiles and commercial detection - Successful test: 45min episode transcribed in 2:37 on RTX 5070 Ti - Sample transcript output from S7E30 (March 2015) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 11:51:59 -07:00
parent a8c8c6b7b6
commit a1e0442d8b
17 changed files with 58344 additions and 0 deletions
--- a/projects/radio-show/audio-processor/src/cli.py
+++ b/projects/radio-show/audio-processor/src/cli.py
@@ -0,0 +1,356 @@
+"""CLI entry point for the radio show audio processor."""
+
+import argparse
+import sys
+from pathlib import Path
+
+from rich.console import Console
+from rich.panel import Panel
+
+from .config import load_config
+
+console = Console()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Radio Show Audio Processor — The Computer Guru Show",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s process episode.mp3
+  %(prog)s process episode.mp3 --show-prep show-prep.md
+  %(prog)s process hr1.mp3 hr2.mp3 --archive-mode --date 2016-03-15
+  %(prog)s transcribe episode.mp3
+  %(prog)s bootstrap-voice archive/
+  %(prog)s review-elements
+  %(prog)s review-speakers
+        """,
+    )
+    parser.add_argument("--config", type=str, default=None,
+                        help="Path to config.yaml")
+
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    # === process ===
+    p_process = subparsers.add_parser("process", help="Full pipeline")
+    p_process.add_argument("audio", nargs="+", type=str,
+                           help="Audio file(s) to process")
+    p_process.add_argument("--show-prep", type=str, default=None,
+                           help="Path to show prep markdown file")
+    p_process.add_argument("--output", type=str, default=None,
+                           help="Output directory")
+    p_process.add_argument("--archive-mode", action="store_true",
+                           help="Archive mode: learn elements and voices")
+    p_process.add_argument("--date", type=str, default=None,
+                           help="Episode date (for archive mode)")
+    p_process.add_argument("--skip-transcribe", action="store_true",
+                           help="Skip transcription (use existing transcript)")
+    p_process.add_argument("--skip-diarize", action="store_true",
+                           help="Skip diarization")
+    p_process.add_argument("--skip-analysis", action="store_true",
+                           help="Skip LLM analysis")
+
+    # === transcribe ===
+    p_transcribe = subparsers.add_parser("transcribe", help="Transcribe only")
+    p_transcribe.add_argument("audio", type=str, help="Audio file")
+    p_transcribe.add_argument("--output", type=str, default=None)
+    p_transcribe.add_argument("--model", type=str, default=None,
+                              help="Whisper model size")
+
+    # === diarize ===
+    p_diarize = subparsers.add_parser("diarize", help="Diarize only")
+    p_diarize.add_argument("audio", type=str, help="Audio file")
+    p_diarize.add_argument("--output", type=str, default=None)
+
+    # === detect ===
+    p_detect = subparsers.add_parser("detect", help="Detect segments only")
+    p_detect.add_argument("audio", type=str, help="Audio file")
+    p_detect.add_argument("--output", type=str, default=None)
+    p_detect.add_argument("--show-prep", type=str, default=None)
+
+    # === split ===
+    p_split = subparsers.add_parser("split", help="Split into segments")
+    p_split.add_argument("audio", type=str, help="Audio file")
+    p_split.add_argument("--detection-report", type=str, required=True,
+                         help="Path to detection-report.json")
+    p_split.add_argument("--output", type=str, default=None)
+
+    # === bootstrap-voice ===
+    p_voice = subparsers.add_parser("bootstrap-voice",
+                                     help="Bootstrap host voice profile from archive")
+    p_voice.add_argument("archive_dir", type=str,
+                         help="Directory containing archive MP3s")
+    p_voice.add_argument("--speaker-name", type=str, default="Mike Swanson")
+    p_voice.add_argument("--sample-count", type=int, default=10,
+                         help="Number of episodes to sample")
+
+    # === review-elements ===
+    subparsers.add_parser("review-elements",
+                          help="Review discovered audio elements")
+
+    # === review-speakers ===
+    subparsers.add_parser("review-speakers",
+                          help="Review unknown speaker clusters")
+
+    args = parser.parse_args()
+    config = load_config(args.config)
+
+    console.print(Panel.fit(
+        "[bold]Radio Show Audio Processor[/bold]\n"
+        f"[dim]The Computer Guru Show[/dim]",
+        border_style="blue",
+    ))
+
+    if args.command == "process":
+        _cmd_process(args, config)
+    elif args.command == "transcribe":
+        _cmd_transcribe(args, config)
+    elif args.command == "diarize":
+        _cmd_diarize(args, config)
+    elif args.command == "detect":
+        _cmd_detect(args, config)
+    elif args.command == "split":
+        _cmd_split(args, config)
+    elif args.command == "bootstrap-voice":
+        _cmd_bootstrap_voice(args, config)
+    elif args.command == "review-elements":
+        _cmd_review_elements(args, config)
+    elif args.command == "review-speakers":
+        _cmd_review_speakers(args, config)
+
+
+def _cmd_process(args, config):
+    """Full processing pipeline."""
+    from .transcriber import transcribe
+    from .diarizer import diarize, VoiceProfileStore
+    from .segment_detector import SegmentDetector
+    from .audio_editor import remove_commercials, split_segments, generate_chapters
+    from .analyzer import analyze_episode
+
+    audio_files = [Path(f) for f in args.audio]
+    audio_path = audio_files[0]  # Primary file
+
+    # If multiple files (HR1 + HR2), concatenate first
+    if len(audio_files) > 1:
+        audio_path = _concatenate_audio(audio_files, config)
+
+    output_dir = Path(args.output) if args.output else audio_path.parent / "processed"
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Load show prep if provided
+    show_prep = None
+    if args.show_prep:
+        show_prep = Path(args.show_prep).read_text()
+
+    # Stage 1: Transcribe
+    transcript = None
+    if not args.skip_transcribe:
+        transcript = transcribe(
+            audio_path,
+            model_size=config.audio.whisper_model,
+            language=config.audio.whisper_language,
+        )
+        transcript.save(output_dir)
+    else:
+        console.print("[dim]Skipping transcription[/dim]")
+        # Try to load existing transcript
+        transcript_file = output_dir / "transcript.json"
+        if transcript_file.exists():
+            from .transcriber import Transcript, TranscriptSegment, TranscriptWord
+            import json
+            with open(transcript_file) as f:
+                data = json.load(f)
+            transcript = Transcript(
+                segments=[
+                    TranscriptSegment(
+                        id=s["id"], text=s["text"],
+                        start=s["start"], end=s["end"],
+                        words=[TranscriptWord(**w) for w in s.get("words", [])],
+                    )
+                    for s in data["segments"]
+                ],
+                language=data["language"],
+                language_probability=data["language_probability"],
+                duration=data["duration"],
+            )
+
+    # Stage 2: Diarize
+    diarization = None
+    if not args.skip_diarize:
+        voice_profiles = VoiceProfileStore(
+            config.resolve_path(config.diarization.voice_profiles_dir)
+        )
+        diarization = diarize(
+            audio_path,
+            voice_profiles=voice_profiles,
+            min_speakers=config.diarization.min_speakers,
+            max_speakers=config.diarization.max_speakers,
+        )
+        diarization.save(output_dir)
+    else:
+        console.print("[dim]Skipping diarization[/dim]")
+
+    # Stage 3: Detect segments
+    detector = SegmentDetector(config)
+    detection = detector.detect(
+        audio_path,
+        transcript=transcript,
+        diarization=diarization,
+        show_prep=show_prep,
+    )
+    detection.save(output_dir)
+
+    # Stage 4: Remove commercials
+    clean_path = output_dir / f"podcast-episode.{config.audio.output_format}"
+    remove_commercials(
+        audio_path, detection.segments, clean_path,
+        crossfade_ms=config.audio.crossfade_ms,
+        bitrate=config.audio.output_bitrate,
+        normalize=config.audio.normalize,
+    )
+
+    # Stage 5: Split segments
+    segments_dir = output_dir / "segments"
+    split_segments(
+        audio_path, detection.segments, segments_dir,
+        bitrate=config.audio.output_bitrate,
+    )
+
+    # Generate chapters
+    generate_chapters(detection.segments, output_dir / "chapters.json")
+
+    # Stage 6: Analyze
+    if not args.skip_analysis and transcript:
+        analysis = analyze_episode(
+            transcript_text=transcript.full_text,
+            diarization_data=diarization.to_dict() if diarization else None,
+            show_prep=show_prep,
+            segments=detection.segments,
+            model=config.llm.model,
+            ollama_host=config.llm.ollama_host,
+        )
+        generated_dir = output_dir.parent / "generated"
+        analysis.save(generated_dir)
+
+    console.print("\n[bold green]Processing complete![/bold green]")
+    console.print(f"Output: {output_dir}")
+
+
+def _cmd_transcribe(args, config):
+    """Transcribe only."""
+    from .transcriber import transcribe
+
+    audio_path = Path(args.audio)
+    output_dir = Path(args.output) if args.output else audio_path.parent / "processed"
+    model = args.model or config.audio.whisper_model
+
+    transcript = transcribe(audio_path, model_size=model)
+    transcript.save(output_dir)
+
+
+def _cmd_diarize(args, config):
+    """Diarize only."""
+    from .diarizer import diarize, VoiceProfileStore
+
+    audio_path = Path(args.audio)
+    output_dir = Path(args.output) if args.output else audio_path.parent / "processed"
+
+    voice_profiles = VoiceProfileStore(
+        config.resolve_path(config.diarization.voice_profiles_dir)
+    )
+    result = diarize(audio_path, voice_profiles=voice_profiles)
+    result.save(output_dir)
+
+
+def _cmd_detect(args, config):
+    """Segment detection only."""
+    from .segment_detector import SegmentDetector
+
+    audio_path = Path(args.audio)
+    output_dir = Path(args.output) if args.output else audio_path.parent / "processed"
+
+    show_prep = None
+    if args.show_prep:
+        show_prep = Path(args.show_prep).read_text()
+
+    detector = SegmentDetector(config)
+    result = detector.detect(audio_path, show_prep=show_prep)
+    result.save(output_dir)
+
+
+def _cmd_split(args, config):
+    """Split using existing detection report."""
+    from .audio_editor import split_segments, generate_chapters
+    from .segment_detector import DetectedSegment, SegmentType
+    import json
+
+    audio_path = Path(args.audio)
+    output_dir = Path(args.output) if args.output else audio_path.parent / "segments"
+
+    with open(args.detection_report) as f:
+        report = json.load(f)
+
+    segments = [
+        DetectedSegment(
+            start=s["start"], end=s["end"],
+            segment_type=SegmentType(s["type"]),
+            confidence=s["confidence"],
+            label=s.get("label", ""),
+        )
+        for s in report["segments"]
+    ]
+
+    split_segments(audio_path, segments, output_dir, config.audio.output_bitrate)
+    generate_chapters(segments, output_dir.parent / "chapters.json")
+
+
+def _cmd_bootstrap_voice(args, config):
+    """Bootstrap host voice profile from archive episodes."""
+    console.print("[bold]Bootstrapping host voice profile[/bold]")
+    console.print(f"Archive: {args.archive_dir}")
+    console.print(f"Speaker: {args.speaker_name}")
+    console.print(f"Sampling {args.sample_count} episodes")
+
+    # TODO: Implement archive sampling + diarization + embedding extraction
+    console.print("[yellow]Not yet implemented — run individual diarizations first[/yellow]")
+
+
+def _cmd_review_elements(args, config):
+    """Review discovered audio elements."""
+    console.print("[bold]Reviewing discovered elements[/bold]")
+    # TODO: Implement element review UI
+    console.print("[yellow]Not yet implemented[/yellow]")
+
+
+def _cmd_review_speakers(args, config):
+    """Review unknown speaker clusters."""
+    console.print("[bold]Reviewing unknown speakers[/bold]")
+    # TODO: Implement speaker review UI
+    console.print("[yellow]Not yet implemented[/yellow]")
+
+
+def _concatenate_audio(files: list[Path], config) -> Path:
+    """Concatenate multiple audio files (e.g., HR1 + HR2)."""
+    import subprocess
+
+    output = files[0].parent / f"combined_{files[0].stem}.mp3"
+    concat_file = files[0].parent / ".concat_list.txt"
+
+    with open(concat_file, "w") as f:
+        for audio_file in files:
+            f.write(f"file '{audio_file}'\n")
+
+    subprocess.run(
+        ["ffmpeg", "-y", "-f", "concat", "-safe", "0",
+         "-i", str(concat_file), "-c", "copy", str(output)],
+        capture_output=True, check=True,
+    )
+    concat_file.unlink()
+
+    console.print(f"[dim]Concatenated {len(files)} files -> {output.name}[/dim]")
+    return output
+
+
+if __name__ == "__main__":
+    main()