Add radio show audio processor and post-show workflow

- Audio processor CLI tool with 6-stage pipeline: transcribe (faster-whisper GPU), diarize (pyannote), detect segments (multi-signal classifier), remove commercials, split segments, analyze content (Ollama) - Post-show workflow doc for episode posts, forum threads, deep-dive blog posts - Training plan for using 579-episode archive for voice profiles and commercial detection - Successful test: 45min episode transcribed in 2:37 on RTX 5070 Ti - Sample transcript output from S7E30 (March 2015) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 11:51:59 -07:00
parent 98c3ee4225
commit 3cbb1b8aab
14 changed files with 2723 additions and 0 deletions
--- a/projects/radio-show/audio-processor/src/config.py
+++ b/projects/radio-show/audio-processor/src/config.py
@@ -0,0 +1,126 @@
+"""Configuration loader for the radio show audio processor."""
+
+from pathlib import Path
+from dataclasses import dataclass, field
+import yaml
+
+
+@dataclass
+class ShowConfig:
+    name: str = "The Computer Guru Show"
+    host: str = "Mike Swanson"
+    typical_duration_minutes: int = 120
+    segment_count: int = 6
+    has_commercials: bool = True
+
+
+@dataclass
+class AudioConfig:
+    whisper_model: str = "large-v3"
+    whisper_language: str = "en"
+    output_format: str = "mp3"
+    output_bitrate: str = "192k"
+    normalize: bool = True
+    crossfade_ms: int = 500
+
+
+@dataclass
+class DetectionWeights:
+    fingerprint_match: float = 0.30
+    speaker_identity: float = 0.25
+    audio_characteristics: float = 0.20
+    break_pattern: float = 0.15
+    structural_heuristic: float = 0.10
+
+
+@dataclass
+class SegmentDetectionConfig:
+    fingerprint_db: str = "element-library/fingerprints.db"
+    fingerprint_match_threshold: float = 0.85
+    discover_unknown_elements: bool = True
+    min_element_duration_s: float = 1.0
+    max_element_duration_s: float = 30.0
+    cluster_similarity_threshold: float = 0.90
+    min_cluster_occurrences: int = 3
+    min_break_duration_s: int = 30
+    max_break_duration_s: int = 300
+    silence_threshold_db: int = -40
+    confidence_threshold: float = 0.70
+    weights: DetectionWeights = field(default_factory=DetectionWeights)
+
+
+@dataclass
+class DiarizationConfig:
+    min_speakers: int = 1
+    max_speakers: int = 6
+    voice_profiles_dir: str = "voice-profiles/"
+    host_match_threshold: float = 0.75
+
+
+@dataclass
+class LLMConfig:
+    model: str = "qwen3:14b"
+    ollama_host: str = "http://localhost:11434"
+
+
+@dataclass
+class PathsConfig:
+    episodes_dir: str = "episodes/"
+    voice_profiles: str = "voice-profiles/"
+    element_library: str = "element-library/"
+    output_dir: str = "processed/"
+
+
+@dataclass
+class ArchiveConfig:
+    server: str = "172.16.3.10"
+    path: str = "/home/gurushow/public_html/archive/"
+    elements_path: str = "/home/gurushow/public_html/archive/Radio/Elements/"
+
+
+@dataclass
+class Config:
+    show: ShowConfig = field(default_factory=ShowConfig)
+    audio: AudioConfig = field(default_factory=AudioConfig)
+    segment_detection: SegmentDetectionConfig = field(default_factory=SegmentDetectionConfig)
+    diarization: DiarizationConfig = field(default_factory=DiarizationConfig)
+    llm: LLMConfig = field(default_factory=LLMConfig)
+    paths: PathsConfig = field(default_factory=PathsConfig)
+    archive: ArchiveConfig = field(default_factory=ArchiveConfig)
+    base_dir: Path = field(default_factory=lambda: Path.cwd())
+
+    def resolve_path(self, relative: str) -> Path:
+        return self.base_dir / relative
+
+
+def load_config(config_path: str | Path | None = None) -> Config:
+    if config_path is None:
+        config_path = Path(__file__).parent.parent / "config.yaml"
+
+    config_path = Path(config_path)
+    if not config_path.exists():
+        return Config(base_dir=config_path.parent)
+
+    with open(config_path) as f:
+        raw = yaml.safe_load(f) or {}
+
+    config = Config(base_dir=config_path.parent)
+
+    if "show" in raw:
+        config.show = ShowConfig(**raw["show"])
+    if "audio" in raw:
+        config.audio = AudioConfig(**raw["audio"])
+    if "segment_detection" in raw:
+        sd = raw["segment_detection"]
+        weights = DetectionWeights(**sd.pop("weights", {}))
+        config.segment_detection = SegmentDetectionConfig(weights=weights, **sd)
+    if "diarization" in raw:
+        config.diarization = DiarizationConfig(**raw["diarization"])
+    if "llm" in raw:
+        config.llm = LLMConfig(**raw["llm"])
+    if "paths" in raw:
+        config.paths = PathsConfig(**raw["paths"])
+    if "archive" in raw:
+        config.archive = ArchiveConfig(**raw["archive"])
+
+    return config