Add radio show audio processor and post-show workflow
- Audio processor CLI tool with 6-stage pipeline: transcribe (faster-whisper GPU), diarize (pyannote), detect segments (multi-signal classifier), remove commercials, split segments, analyze content (Ollama) - Post-show workflow doc for episode posts, forum threads, deep-dive blog posts - Training plan for using 579-episode archive for voice profiles and commercial detection - Successful test: 45min episode transcribed in 2:37 on RTX 5070 Ti - Sample transcript output from S7E30 (March 2015) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
126
projects/radio-show/audio-processor/src/config.py
Normal file
126
projects/radio-show/audio-processor/src/config.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""Configuration loader for the radio show audio processor."""
|
||||
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass
|
||||
class ShowConfig:
|
||||
name: str = "The Computer Guru Show"
|
||||
host: str = "Mike Swanson"
|
||||
typical_duration_minutes: int = 120
|
||||
segment_count: int = 6
|
||||
has_commercials: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class AudioConfig:
|
||||
whisper_model: str = "large-v3"
|
||||
whisper_language: str = "en"
|
||||
output_format: str = "mp3"
|
||||
output_bitrate: str = "192k"
|
||||
normalize: bool = True
|
||||
crossfade_ms: int = 500
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionWeights:
|
||||
fingerprint_match: float = 0.30
|
||||
speaker_identity: float = 0.25
|
||||
audio_characteristics: float = 0.20
|
||||
break_pattern: float = 0.15
|
||||
structural_heuristic: float = 0.10
|
||||
|
||||
|
||||
@dataclass
|
||||
class SegmentDetectionConfig:
|
||||
fingerprint_db: str = "element-library/fingerprints.db"
|
||||
fingerprint_match_threshold: float = 0.85
|
||||
discover_unknown_elements: bool = True
|
||||
min_element_duration_s: float = 1.0
|
||||
max_element_duration_s: float = 30.0
|
||||
cluster_similarity_threshold: float = 0.90
|
||||
min_cluster_occurrences: int = 3
|
||||
min_break_duration_s: int = 30
|
||||
max_break_duration_s: int = 300
|
||||
silence_threshold_db: int = -40
|
||||
confidence_threshold: float = 0.70
|
||||
weights: DetectionWeights = field(default_factory=DetectionWeights)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DiarizationConfig:
|
||||
min_speakers: int = 1
|
||||
max_speakers: int = 6
|
||||
voice_profiles_dir: str = "voice-profiles/"
|
||||
host_match_threshold: float = 0.75
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMConfig:
|
||||
model: str = "qwen3:14b"
|
||||
ollama_host: str = "http://localhost:11434"
|
||||
|
||||
|
||||
@dataclass
|
||||
class PathsConfig:
|
||||
episodes_dir: str = "episodes/"
|
||||
voice_profiles: str = "voice-profiles/"
|
||||
element_library: str = "element-library/"
|
||||
output_dir: str = "processed/"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ArchiveConfig:
|
||||
server: str = "172.16.3.10"
|
||||
path: str = "/home/gurushow/public_html/archive/"
|
||||
elements_path: str = "/home/gurushow/public_html/archive/Radio/Elements/"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
show: ShowConfig = field(default_factory=ShowConfig)
|
||||
audio: AudioConfig = field(default_factory=AudioConfig)
|
||||
segment_detection: SegmentDetectionConfig = field(default_factory=SegmentDetectionConfig)
|
||||
diarization: DiarizationConfig = field(default_factory=DiarizationConfig)
|
||||
llm: LLMConfig = field(default_factory=LLMConfig)
|
||||
paths: PathsConfig = field(default_factory=PathsConfig)
|
||||
archive: ArchiveConfig = field(default_factory=ArchiveConfig)
|
||||
base_dir: Path = field(default_factory=lambda: Path.cwd())
|
||||
|
||||
def resolve_path(self, relative: str) -> Path:
|
||||
return self.base_dir / relative
|
||||
|
||||
|
||||
def load_config(config_path: str | Path | None = None) -> Config:
|
||||
if config_path is None:
|
||||
config_path = Path(__file__).parent.parent / "config.yaml"
|
||||
|
||||
config_path = Path(config_path)
|
||||
if not config_path.exists():
|
||||
return Config(base_dir=config_path.parent)
|
||||
|
||||
with open(config_path) as f:
|
||||
raw = yaml.safe_load(f) or {}
|
||||
|
||||
config = Config(base_dir=config_path.parent)
|
||||
|
||||
if "show" in raw:
|
||||
config.show = ShowConfig(**raw["show"])
|
||||
if "audio" in raw:
|
||||
config.audio = AudioConfig(**raw["audio"])
|
||||
if "segment_detection" in raw:
|
||||
sd = raw["segment_detection"]
|
||||
weights = DetectionWeights(**sd.pop("weights", {}))
|
||||
config.segment_detection = SegmentDetectionConfig(weights=weights, **sd)
|
||||
if "diarization" in raw:
|
||||
config.diarization = DiarizationConfig(**raw["diarization"])
|
||||
if "llm" in raw:
|
||||
config.llm = LLMConfig(**raw["llm"])
|
||||
if "paths" in raw:
|
||||
config.paths = PathsConfig(**raw["paths"])
|
||||
if "archive" in raw:
|
||||
config.archive = ArchiveConfig(**raw["archive"])
|
||||
|
||||
return config
|
||||
Reference in New Issue
Block a user