Audio processor: fix segment detection with transcript-driven breaks

- Add transcript break phrase detection (going_to_break/coming_back cues)
- Create segments from transcript breaks with silence boundary snapping
- Fix segment dedup in merge_adjacent (handle overlapping segments)
- Add CUDA 12 library path fix (gpu.py + venv activate hook)
- Auto-load existing transcript in detect command
- Tested on 2011-03-05 HR1: correctly identifies commercial break at 34:38

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-21 11:59:54 -07:00
parent a1e0442d8b
commit 87f5a9306a
3 changed files with 215 additions and 21 deletions

View File

@@ -1,5 +1,9 @@
"""CLI entry point for the radio show audio processor."""
# Must set CUDA paths before any torch/ctranslate2 imports
from .gpu import ensure_cuda_libs
ensure_cuda_libs()
import argparse
import sys
from pathlib import Path
@@ -274,8 +278,31 @@ def _cmd_detect(args, config):
if args.show_prep:
show_prep = Path(args.show_prep).read_text()
# Load existing transcript if available
transcript = None
transcript_file = output_dir / "transcript.json"
if transcript_file.exists():
from .transcriber import Transcript, TranscriptSegment, TranscriptWord
import json
console.print(f"[dim]Loading transcript from {transcript_file}[/dim]")
with open(transcript_file) as f:
data = json.load(f)
transcript = Transcript(
segments=[
TranscriptSegment(
id=s["id"], text=s["text"],
start=s["start"], end=s["end"],
words=[TranscriptWord(**w) for w in s.get("words", [])],
)
for s in data["segments"]
],
language=data["language"],
language_probability=data["language_probability"],
duration=data["duration"],
)
detector = SegmentDetector(config)
result = detector.detect(audio_path, show_prep=show_prep)
result = detector.detect(audio_path, transcript=transcript, show_prep=show_prep)
result.save(output_dir)