Add radio show audio processor and post-show workflow
- Audio processor CLI tool with 6-stage pipeline: transcribe (faster-whisper GPU), diarize (pyannote), detect segments (multi-signal classifier), remove commercials, split segments, analyze content (Ollama) - Post-show workflow doc for episode posts, forum threads, deep-dive blog posts - Training plan for using 579-episode archive for voice profiles and commercial detection - Successful test: 45min episode transcribed in 2:37 on RTX 5070 Ti - Sample transcript output from S7E30 (March 2015) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
356
projects/radio-show/audio-processor/src/cli.py
Normal file
356
projects/radio-show/audio-processor/src/cli.py
Normal file
@@ -0,0 +1,356 @@
|
||||
"""CLI entry point for the radio show audio processor."""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
|
||||
from .config import load_config
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Radio Show Audio Processor — The Computer Guru Show",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s process episode.mp3
|
||||
%(prog)s process episode.mp3 --show-prep show-prep.md
|
||||
%(prog)s process hr1.mp3 hr2.mp3 --archive-mode --date 2016-03-15
|
||||
%(prog)s transcribe episode.mp3
|
||||
%(prog)s bootstrap-voice archive/
|
||||
%(prog)s review-elements
|
||||
%(prog)s review-speakers
|
||||
""",
|
||||
)
|
||||
parser.add_argument("--config", type=str, default=None,
|
||||
help="Path to config.yaml")
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
# === process ===
|
||||
p_process = subparsers.add_parser("process", help="Full pipeline")
|
||||
p_process.add_argument("audio", nargs="+", type=str,
|
||||
help="Audio file(s) to process")
|
||||
p_process.add_argument("--show-prep", type=str, default=None,
|
||||
help="Path to show prep markdown file")
|
||||
p_process.add_argument("--output", type=str, default=None,
|
||||
help="Output directory")
|
||||
p_process.add_argument("--archive-mode", action="store_true",
|
||||
help="Archive mode: learn elements and voices")
|
||||
p_process.add_argument("--date", type=str, default=None,
|
||||
help="Episode date (for archive mode)")
|
||||
p_process.add_argument("--skip-transcribe", action="store_true",
|
||||
help="Skip transcription (use existing transcript)")
|
||||
p_process.add_argument("--skip-diarize", action="store_true",
|
||||
help="Skip diarization")
|
||||
p_process.add_argument("--skip-analysis", action="store_true",
|
||||
help="Skip LLM analysis")
|
||||
|
||||
# === transcribe ===
|
||||
p_transcribe = subparsers.add_parser("transcribe", help="Transcribe only")
|
||||
p_transcribe.add_argument("audio", type=str, help="Audio file")
|
||||
p_transcribe.add_argument("--output", type=str, default=None)
|
||||
p_transcribe.add_argument("--model", type=str, default=None,
|
||||
help="Whisper model size")
|
||||
|
||||
# === diarize ===
|
||||
p_diarize = subparsers.add_parser("diarize", help="Diarize only")
|
||||
p_diarize.add_argument("audio", type=str, help="Audio file")
|
||||
p_diarize.add_argument("--output", type=str, default=None)
|
||||
|
||||
# === detect ===
|
||||
p_detect = subparsers.add_parser("detect", help="Detect segments only")
|
||||
p_detect.add_argument("audio", type=str, help="Audio file")
|
||||
p_detect.add_argument("--output", type=str, default=None)
|
||||
p_detect.add_argument("--show-prep", type=str, default=None)
|
||||
|
||||
# === split ===
|
||||
p_split = subparsers.add_parser("split", help="Split into segments")
|
||||
p_split.add_argument("audio", type=str, help="Audio file")
|
||||
p_split.add_argument("--detection-report", type=str, required=True,
|
||||
help="Path to detection-report.json")
|
||||
p_split.add_argument("--output", type=str, default=None)
|
||||
|
||||
# === bootstrap-voice ===
|
||||
p_voice = subparsers.add_parser("bootstrap-voice",
|
||||
help="Bootstrap host voice profile from archive")
|
||||
p_voice.add_argument("archive_dir", type=str,
|
||||
help="Directory containing archive MP3s")
|
||||
p_voice.add_argument("--speaker-name", type=str, default="Mike Swanson")
|
||||
p_voice.add_argument("--sample-count", type=int, default=10,
|
||||
help="Number of episodes to sample")
|
||||
|
||||
# === review-elements ===
|
||||
subparsers.add_parser("review-elements",
|
||||
help="Review discovered audio elements")
|
||||
|
||||
# === review-speakers ===
|
||||
subparsers.add_parser("review-speakers",
|
||||
help="Review unknown speaker clusters")
|
||||
|
||||
args = parser.parse_args()
|
||||
config = load_config(args.config)
|
||||
|
||||
console.print(Panel.fit(
|
||||
"[bold]Radio Show Audio Processor[/bold]\n"
|
||||
f"[dim]The Computer Guru Show[/dim]",
|
||||
border_style="blue",
|
||||
))
|
||||
|
||||
if args.command == "process":
|
||||
_cmd_process(args, config)
|
||||
elif args.command == "transcribe":
|
||||
_cmd_transcribe(args, config)
|
||||
elif args.command == "diarize":
|
||||
_cmd_diarize(args, config)
|
||||
elif args.command == "detect":
|
||||
_cmd_detect(args, config)
|
||||
elif args.command == "split":
|
||||
_cmd_split(args, config)
|
||||
elif args.command == "bootstrap-voice":
|
||||
_cmd_bootstrap_voice(args, config)
|
||||
elif args.command == "review-elements":
|
||||
_cmd_review_elements(args, config)
|
||||
elif args.command == "review-speakers":
|
||||
_cmd_review_speakers(args, config)
|
||||
|
||||
|
||||
def _cmd_process(args, config):
|
||||
"""Full processing pipeline."""
|
||||
from .transcriber import transcribe
|
||||
from .diarizer import diarize, VoiceProfileStore
|
||||
from .segment_detector import SegmentDetector
|
||||
from .audio_editor import remove_commercials, split_segments, generate_chapters
|
||||
from .analyzer import analyze_episode
|
||||
|
||||
audio_files = [Path(f) for f in args.audio]
|
||||
audio_path = audio_files[0] # Primary file
|
||||
|
||||
# If multiple files (HR1 + HR2), concatenate first
|
||||
if len(audio_files) > 1:
|
||||
audio_path = _concatenate_audio(audio_files, config)
|
||||
|
||||
output_dir = Path(args.output) if args.output else audio_path.parent / "processed"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Load show prep if provided
|
||||
show_prep = None
|
||||
if args.show_prep:
|
||||
show_prep = Path(args.show_prep).read_text()
|
||||
|
||||
# Stage 1: Transcribe
|
||||
transcript = None
|
||||
if not args.skip_transcribe:
|
||||
transcript = transcribe(
|
||||
audio_path,
|
||||
model_size=config.audio.whisper_model,
|
||||
language=config.audio.whisper_language,
|
||||
)
|
||||
transcript.save(output_dir)
|
||||
else:
|
||||
console.print("[dim]Skipping transcription[/dim]")
|
||||
# Try to load existing transcript
|
||||
transcript_file = output_dir / "transcript.json"
|
||||
if transcript_file.exists():
|
||||
from .transcriber import Transcript, TranscriptSegment, TranscriptWord
|
||||
import json
|
||||
with open(transcript_file) as f:
|
||||
data = json.load(f)
|
||||
transcript = Transcript(
|
||||
segments=[
|
||||
TranscriptSegment(
|
||||
id=s["id"], text=s["text"],
|
||||
start=s["start"], end=s["end"],
|
||||
words=[TranscriptWord(**w) for w in s.get("words", [])],
|
||||
)
|
||||
for s in data["segments"]
|
||||
],
|
||||
language=data["language"],
|
||||
language_probability=data["language_probability"],
|
||||
duration=data["duration"],
|
||||
)
|
||||
|
||||
# Stage 2: Diarize
|
||||
diarization = None
|
||||
if not args.skip_diarize:
|
||||
voice_profiles = VoiceProfileStore(
|
||||
config.resolve_path(config.diarization.voice_profiles_dir)
|
||||
)
|
||||
diarization = diarize(
|
||||
audio_path,
|
||||
voice_profiles=voice_profiles,
|
||||
min_speakers=config.diarization.min_speakers,
|
||||
max_speakers=config.diarization.max_speakers,
|
||||
)
|
||||
diarization.save(output_dir)
|
||||
else:
|
||||
console.print("[dim]Skipping diarization[/dim]")
|
||||
|
||||
# Stage 3: Detect segments
|
||||
detector = SegmentDetector(config)
|
||||
detection = detector.detect(
|
||||
audio_path,
|
||||
transcript=transcript,
|
||||
diarization=diarization,
|
||||
show_prep=show_prep,
|
||||
)
|
||||
detection.save(output_dir)
|
||||
|
||||
# Stage 4: Remove commercials
|
||||
clean_path = output_dir / f"podcast-episode.{config.audio.output_format}"
|
||||
remove_commercials(
|
||||
audio_path, detection.segments, clean_path,
|
||||
crossfade_ms=config.audio.crossfade_ms,
|
||||
bitrate=config.audio.output_bitrate,
|
||||
normalize=config.audio.normalize,
|
||||
)
|
||||
|
||||
# Stage 5: Split segments
|
||||
segments_dir = output_dir / "segments"
|
||||
split_segments(
|
||||
audio_path, detection.segments, segments_dir,
|
||||
bitrate=config.audio.output_bitrate,
|
||||
)
|
||||
|
||||
# Generate chapters
|
||||
generate_chapters(detection.segments, output_dir / "chapters.json")
|
||||
|
||||
# Stage 6: Analyze
|
||||
if not args.skip_analysis and transcript:
|
||||
analysis = analyze_episode(
|
||||
transcript_text=transcript.full_text,
|
||||
diarization_data=diarization.to_dict() if diarization else None,
|
||||
show_prep=show_prep,
|
||||
segments=detection.segments,
|
||||
model=config.llm.model,
|
||||
ollama_host=config.llm.ollama_host,
|
||||
)
|
||||
generated_dir = output_dir.parent / "generated"
|
||||
analysis.save(generated_dir)
|
||||
|
||||
console.print("\n[bold green]Processing complete![/bold green]")
|
||||
console.print(f"Output: {output_dir}")
|
||||
|
||||
|
||||
def _cmd_transcribe(args, config):
|
||||
"""Transcribe only."""
|
||||
from .transcriber import transcribe
|
||||
|
||||
audio_path = Path(args.audio)
|
||||
output_dir = Path(args.output) if args.output else audio_path.parent / "processed"
|
||||
model = args.model or config.audio.whisper_model
|
||||
|
||||
transcript = transcribe(audio_path, model_size=model)
|
||||
transcript.save(output_dir)
|
||||
|
||||
|
||||
def _cmd_diarize(args, config):
|
||||
"""Diarize only."""
|
||||
from .diarizer import diarize, VoiceProfileStore
|
||||
|
||||
audio_path = Path(args.audio)
|
||||
output_dir = Path(args.output) if args.output else audio_path.parent / "processed"
|
||||
|
||||
voice_profiles = VoiceProfileStore(
|
||||
config.resolve_path(config.diarization.voice_profiles_dir)
|
||||
)
|
||||
result = diarize(audio_path, voice_profiles=voice_profiles)
|
||||
result.save(output_dir)
|
||||
|
||||
|
||||
def _cmd_detect(args, config):
|
||||
"""Segment detection only."""
|
||||
from .segment_detector import SegmentDetector
|
||||
|
||||
audio_path = Path(args.audio)
|
||||
output_dir = Path(args.output) if args.output else audio_path.parent / "processed"
|
||||
|
||||
show_prep = None
|
||||
if args.show_prep:
|
||||
show_prep = Path(args.show_prep).read_text()
|
||||
|
||||
detector = SegmentDetector(config)
|
||||
result = detector.detect(audio_path, show_prep=show_prep)
|
||||
result.save(output_dir)
|
||||
|
||||
|
||||
def _cmd_split(args, config):
|
||||
"""Split using existing detection report."""
|
||||
from .audio_editor import split_segments, generate_chapters
|
||||
from .segment_detector import DetectedSegment, SegmentType
|
||||
import json
|
||||
|
||||
audio_path = Path(args.audio)
|
||||
output_dir = Path(args.output) if args.output else audio_path.parent / "segments"
|
||||
|
||||
with open(args.detection_report) as f:
|
||||
report = json.load(f)
|
||||
|
||||
segments = [
|
||||
DetectedSegment(
|
||||
start=s["start"], end=s["end"],
|
||||
segment_type=SegmentType(s["type"]),
|
||||
confidence=s["confidence"],
|
||||
label=s.get("label", ""),
|
||||
)
|
||||
for s in report["segments"]
|
||||
]
|
||||
|
||||
split_segments(audio_path, segments, output_dir, config.audio.output_bitrate)
|
||||
generate_chapters(segments, output_dir.parent / "chapters.json")
|
||||
|
||||
|
||||
def _cmd_bootstrap_voice(args, config):
|
||||
"""Bootstrap host voice profile from archive episodes."""
|
||||
console.print("[bold]Bootstrapping host voice profile[/bold]")
|
||||
console.print(f"Archive: {args.archive_dir}")
|
||||
console.print(f"Speaker: {args.speaker_name}")
|
||||
console.print(f"Sampling {args.sample_count} episodes")
|
||||
|
||||
# TODO: Implement archive sampling + diarization + embedding extraction
|
||||
console.print("[yellow]Not yet implemented — run individual diarizations first[/yellow]")
|
||||
|
||||
|
||||
def _cmd_review_elements(args, config):
|
||||
"""Review discovered audio elements."""
|
||||
console.print("[bold]Reviewing discovered elements[/bold]")
|
||||
# TODO: Implement element review UI
|
||||
console.print("[yellow]Not yet implemented[/yellow]")
|
||||
|
||||
|
||||
def _cmd_review_speakers(args, config):
|
||||
"""Review unknown speaker clusters."""
|
||||
console.print("[bold]Reviewing unknown speakers[/bold]")
|
||||
# TODO: Implement speaker review UI
|
||||
console.print("[yellow]Not yet implemented[/yellow]")
|
||||
|
||||
|
||||
def _concatenate_audio(files: list[Path], config) -> Path:
|
||||
"""Concatenate multiple audio files (e.g., HR1 + HR2)."""
|
||||
import subprocess
|
||||
|
||||
output = files[0].parent / f"combined_{files[0].stem}.mp3"
|
||||
concat_file = files[0].parent / ".concat_list.txt"
|
||||
|
||||
with open(concat_file, "w") as f:
|
||||
for audio_file in files:
|
||||
f.write(f"file '{audio_file}'\n")
|
||||
|
||||
subprocess.run(
|
||||
["ffmpeg", "-y", "-f", "concat", "-safe", "0",
|
||||
"-i", str(concat_file), "-c", "copy", str(output)],
|
||||
capture_output=True, check=True,
|
||||
)
|
||||
concat_file.unlink()
|
||||
|
||||
console.print(f"[dim]Concatenated {len(files)} files -> {output.name}[/dim]")
|
||||
return output
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user