"""CLI entry point for the radio show audio processor.""" # Must set CUDA paths before any torch/ctranslate2 imports from .gpu import ensure_cuda_libs ensure_cuda_libs() import argparse import sys from pathlib import Path from rich.console import Console from rich.panel import Panel from .config import load_config console = Console() def main(): parser = argparse.ArgumentParser( description="Radio Show Audio Processor — The Computer Guru Show", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: %(prog)s process episode.mp3 %(prog)s process episode.mp3 --show-prep show-prep.md %(prog)s process hr1.mp3 hr2.mp3 --archive-mode --date 2016-03-15 %(prog)s transcribe episode.mp3 %(prog)s bootstrap-voice archive/ %(prog)s review-elements %(prog)s review-speakers """, ) parser.add_argument("--config", type=str, default=None, help="Path to config.yaml") subparsers = parser.add_subparsers(dest="command", required=True) # === process === p_process = subparsers.add_parser("process", help="Full pipeline") p_process.add_argument("audio", nargs="+", type=str, help="Audio file(s) to process") p_process.add_argument("--show-prep", type=str, default=None, help="Path to show prep markdown file") p_process.add_argument("--output", type=str, default=None, help="Output directory") p_process.add_argument("--archive-mode", action="store_true", help="Archive mode: learn elements and voices") p_process.add_argument("--date", type=str, default=None, help="Episode date (for archive mode)") p_process.add_argument("--skip-transcribe", action="store_true", help="Skip transcription (use existing transcript)") p_process.add_argument("--skip-diarize", action="store_true", help="Skip diarization") p_process.add_argument("--skip-analysis", action="store_true", help="Skip LLM analysis") # === transcribe === p_transcribe = subparsers.add_parser("transcribe", help="Transcribe only") p_transcribe.add_argument("audio", type=str, help="Audio file") p_transcribe.add_argument("--output", type=str, default=None) p_transcribe.add_argument("--model", type=str, default=None, help="Whisper model size") # === diarize === p_diarize = subparsers.add_parser("diarize", help="Diarize only") p_diarize.add_argument("audio", type=str, help="Audio file") p_diarize.add_argument("--output", type=str, default=None) # === detect === p_detect = subparsers.add_parser("detect", help="Detect segments only") p_detect.add_argument("audio", type=str, help="Audio file") p_detect.add_argument("--output", type=str, default=None) p_detect.add_argument("--show-prep", type=str, default=None) # === split === p_split = subparsers.add_parser("split", help="Split into segments") p_split.add_argument("audio", type=str, help="Audio file") p_split.add_argument("--detection-report", type=str, required=True, help="Path to detection-report.json") p_split.add_argument("--output", type=str, default=None) # === bootstrap-voice === p_voice = subparsers.add_parser("bootstrap-voice", help="Bootstrap host voice profile from archive") p_voice.add_argument("archive_dir", type=str, help="Directory containing archive MP3s") p_voice.add_argument("--speaker-name", type=str, default="Mike Swanson") p_voice.add_argument("--sample-count", type=int, default=10, help="Number of episodes to sample") # === review-elements === subparsers.add_parser("review-elements", help="Review discovered audio elements") # === review-speakers === subparsers.add_parser("review-speakers", help="Review unknown speaker clusters") args = parser.parse_args() config = load_config(args.config) console.print(Panel.fit( "[bold]Radio Show Audio Processor[/bold]\n" f"[dim]The Computer Guru Show[/dim]", border_style="blue", )) if args.command == "process": _cmd_process(args, config) elif args.command == "transcribe": _cmd_transcribe(args, config) elif args.command == "diarize": _cmd_diarize(args, config) elif args.command == "detect": _cmd_detect(args, config) elif args.command == "split": _cmd_split(args, config) elif args.command == "bootstrap-voice": _cmd_bootstrap_voice(args, config) elif args.command == "review-elements": _cmd_review_elements(args, config) elif args.command == "review-speakers": _cmd_review_speakers(args, config) def _cmd_process(args, config): """Full processing pipeline.""" from .transcriber import transcribe from .diarizer import diarize, VoiceProfileStore from .segment_detector import SegmentDetector from .audio_editor import remove_commercials, split_segments, generate_chapters from .analyzer import analyze_episode audio_files = [Path(f) for f in args.audio] audio_path = audio_files[0] # Primary file # If multiple files (HR1 + HR2), concatenate first if len(audio_files) > 1: audio_path = _concatenate_audio(audio_files, config) output_dir = Path(args.output) if args.output else audio_path.parent / "processed" output_dir.mkdir(parents=True, exist_ok=True) # Load show prep if provided show_prep = None if args.show_prep: show_prep = Path(args.show_prep).read_text() # Stage 1: Transcribe transcript = None if not args.skip_transcribe: transcript = transcribe( audio_path, model_size=config.audio.whisper_model, language=config.audio.whisper_language, ) transcript.save(output_dir) else: console.print("[dim]Skipping transcription[/dim]") # Try to load existing transcript transcript_file = output_dir / "transcript.json" if transcript_file.exists(): from .transcriber import Transcript, TranscriptSegment, TranscriptWord import json with open(transcript_file) as f: data = json.load(f) transcript = Transcript( segments=[ TranscriptSegment( id=s["id"], text=s["text"], start=s["start"], end=s["end"], words=[TranscriptWord(**w) for w in s.get("words", [])], ) for s in data["segments"] ], language=data["language"], language_probability=data["language_probability"], duration=data["duration"], ) # Stage 2: Diarize diarization = None if not args.skip_diarize: voice_profiles = VoiceProfileStore( config.resolve_path(config.diarization.voice_profiles_dir) ) diarization = diarize( audio_path, voice_profiles=voice_profiles, min_speakers=config.diarization.min_speakers, max_speakers=config.diarization.max_speakers, ) diarization.save(output_dir) else: console.print("[dim]Skipping diarization[/dim]") # Stage 3: Detect segments detector = SegmentDetector(config) detection = detector.detect( audio_path, transcript=transcript, diarization=diarization, show_prep=show_prep, ) detection.save(output_dir) # Stage 4: Remove commercials clean_path = output_dir / f"podcast-episode.{config.audio.output_format}" remove_commercials( audio_path, detection.segments, clean_path, crossfade_ms=config.audio.crossfade_ms, bitrate=config.audio.output_bitrate, normalize=config.audio.normalize, ) # Stage 5: Split segments segments_dir = output_dir / "segments" split_segments( audio_path, detection.segments, segments_dir, bitrate=config.audio.output_bitrate, ) # Generate chapters generate_chapters(detection.segments, output_dir / "chapters.json") # Stage 6: Analyze if not args.skip_analysis and transcript: analysis = analyze_episode( transcript_text=transcript.full_text, diarization_data=diarization.to_dict() if diarization else None, show_prep=show_prep, segments=detection.segments, model=config.llm.model, ollama_host=config.llm.ollama_host, ) generated_dir = output_dir.parent / "generated" analysis.save(generated_dir) console.print("\n[bold green]Processing complete![/bold green]") console.print(f"Output: {output_dir}") def _cmd_transcribe(args, config): """Transcribe only.""" from .transcriber import transcribe audio_path = Path(args.audio) output_dir = Path(args.output) if args.output else audio_path.parent / "processed" model = args.model or config.audio.whisper_model transcript = transcribe(audio_path, model_size=model) transcript.save(output_dir) def _cmd_diarize(args, config): """Diarize only.""" from .diarizer import diarize, VoiceProfileStore audio_path = Path(args.audio) output_dir = Path(args.output) if args.output else audio_path.parent / "processed" voice_profiles = VoiceProfileStore( config.resolve_path(config.diarization.voice_profiles_dir) ) result = diarize(audio_path, voice_profiles=voice_profiles) result.save(output_dir) def _cmd_detect(args, config): """Segment detection only.""" from .segment_detector import SegmentDetector audio_path = Path(args.audio) output_dir = Path(args.output) if args.output else audio_path.parent / "processed" show_prep = None if args.show_prep: show_prep = Path(args.show_prep).read_text() # Load existing transcript if available transcript = None transcript_file = output_dir / "transcript.json" if transcript_file.exists(): from .transcriber import Transcript, TranscriptSegment, TranscriptWord import json console.print(f"[dim]Loading transcript from {transcript_file}[/dim]") with open(transcript_file) as f: data = json.load(f) transcript = Transcript( segments=[ TranscriptSegment( id=s["id"], text=s["text"], start=s["start"], end=s["end"], words=[TranscriptWord(**w) for w in s.get("words", [])], ) for s in data["segments"] ], language=data["language"], language_probability=data["language_probability"], duration=data["duration"], ) detector = SegmentDetector(config) result = detector.detect(audio_path, transcript=transcript, show_prep=show_prep) result.save(output_dir) def _cmd_split(args, config): """Split using existing detection report.""" from .audio_editor import split_segments, generate_chapters from .segment_detector import DetectedSegment, SegmentType import json audio_path = Path(args.audio) output_dir = Path(args.output) if args.output else audio_path.parent / "segments" with open(args.detection_report) as f: report = json.load(f) segments = [ DetectedSegment( start=s["start"], end=s["end"], segment_type=SegmentType(s["type"]), confidence=s["confidence"], label=s.get("label", ""), ) for s in report["segments"] ] split_segments(audio_path, segments, output_dir, config.audio.output_bitrate) generate_chapters(segments, output_dir.parent / "chapters.json") def _cmd_bootstrap_voice(args, config): """Bootstrap host voice profile from archive episodes.""" from .voice_profiler import VoiceProfiler archive_dir = Path(args.archive_dir) profiler = VoiceProfiler( config.resolve_path(config.paths.voice_profiles), device="cuda", ) # Find MP3 files in archive directory mp3_files = sorted(archive_dir.glob("**/*.mp3")) if not mp3_files: console.print(f"[red]No MP3 files found in {archive_dir}[/red]") return # Sample if we have more than requested if len(mp3_files) > args.sample_count: step = len(mp3_files) // args.sample_count mp3_files = [mp3_files[i * step] for i in range(args.sample_count)] console.print(f"[dim]Found {len(mp3_files)} episodes to process[/dim]") profiler.bootstrap_host_from_episodes(mp3_files, host_name=args.speaker_name) profiler.print_profiles() def _cmd_review_elements(args, config): """Review discovered audio elements.""" console.print("[bold]Reviewing discovered elements[/bold]") # TODO: Implement element review UI console.print("[yellow]Not yet implemented[/yellow]") def _cmd_review_speakers(args, config): """Review unknown speaker clusters.""" console.print("[bold]Reviewing unknown speakers[/bold]") # TODO: Implement speaker review UI console.print("[yellow]Not yet implemented[/yellow]") def _concatenate_audio(files: list[Path], config) -> Path: """Concatenate multiple audio files (e.g., HR1 + HR2).""" import subprocess output = files[0].parent / f"combined_{files[0].stem}.mp3" concat_file = files[0].parent / ".concat_list.txt" with open(concat_file, "w") as f: for audio_file in files: f.write(f"file '{audio_file}'\n") subprocess.run( ["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(concat_file), "-c", "copy", str(output)], capture_output=True, check=True, ) concat_file.unlink() console.print(f"[dim]Concatenated {len(files)} files -> {output.name}[/dim]") return output if __name__ == "__main__": main()