Audio processor: fix segment detection with transcript-driven breaks

- Add transcript break phrase detection (going_to_break/coming_back cues) - Create segments from transcript breaks with silence boundary snapping - Fix segment dedup in merge_adjacent (handle overlapping segments) - Add CUDA 12 library path fix (gpu.py + venv activate hook) - Auto-load existing transcript in detect command - Tested on 2011-03-05 HR1: correctly identifies commercial break at 34:38 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-21 11:59:54 -07:00
parent a1e0442d8b
commit 87f5a9306a
3 changed files with 215 additions and 21 deletions
--- a/projects/radio-show/audio-processor/src/cli.py
+++ b/projects/radio-show/audio-processor/src/cli.py
@@ -1,5 +1,9 @@
 """CLI entry point for the radio show audio processor."""

+# Must set CUDA paths before any torch/ctranslate2 imports
+from .gpu import ensure_cuda_libs
+ensure_cuda_libs()
+
 import argparse
 import sys
 from pathlib import Path
@@ -274,8 +278,31 @@ def _cmd_detect(args, config):
    if args.show_prep:
        show_prep = Path(args.show_prep).read_text()

+    # Load existing transcript if available
+    transcript = None
+    transcript_file = output_dir / "transcript.json"
+    if transcript_file.exists():
+        from .transcriber import Transcript, TranscriptSegment, TranscriptWord
+        import json
+        console.print(f"[dim]Loading transcript from {transcript_file}[/dim]")
+        with open(transcript_file) as f:
+            data = json.load(f)
+        transcript = Transcript(
+            segments=[
+                TranscriptSegment(
+                    id=s["id"], text=s["text"],
+                    start=s["start"], end=s["end"],
+                    words=[TranscriptWord(**w) for w in s.get("words", [])],
+                )
+                for s in data["segments"]
+            ],
+            language=data["language"],
+            language_probability=data["language_probability"],
+            duration=data["duration"],
+        )
+
    detector = SegmentDetector(config)
-    result = detector.detect(audio_path, show_prep=show_prep)
+    result = detector.detect(audio_path, transcript=transcript, show_prep=show_prep)
    result.save(output_dir)