Audio processor: fix segment detection with transcript-driven breaks
- Add transcript break phrase detection (going_to_break/coming_back cues) - Create segments from transcript breaks with silence boundary snapping - Fix segment dedup in merge_adjacent (handle overlapping segments) - Add CUDA 12 library path fix (gpu.py + venv activate hook) - Auto-load existing transcript in detect command - Tested on 2011-03-05 HR1: correctly identifies commercial break at 34:38 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,9 @@
|
||||
"""CLI entry point for the radio show audio processor."""
|
||||
|
||||
# Must set CUDA paths before any torch/ctranslate2 imports
|
||||
from .gpu import ensure_cuda_libs
|
||||
ensure_cuda_libs()
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
@@ -274,8 +278,31 @@ def _cmd_detect(args, config):
|
||||
if args.show_prep:
|
||||
show_prep = Path(args.show_prep).read_text()
|
||||
|
||||
# Load existing transcript if available
|
||||
transcript = None
|
||||
transcript_file = output_dir / "transcript.json"
|
||||
if transcript_file.exists():
|
||||
from .transcriber import Transcript, TranscriptSegment, TranscriptWord
|
||||
import json
|
||||
console.print(f"[dim]Loading transcript from {transcript_file}[/dim]")
|
||||
with open(transcript_file) as f:
|
||||
data = json.load(f)
|
||||
transcript = Transcript(
|
||||
segments=[
|
||||
TranscriptSegment(
|
||||
id=s["id"], text=s["text"],
|
||||
start=s["start"], end=s["end"],
|
||||
words=[TranscriptWord(**w) for w in s.get("words", [])],
|
||||
)
|
||||
for s in data["segments"]
|
||||
],
|
||||
language=data["language"],
|
||||
language_probability=data["language_probability"],
|
||||
duration=data["duration"],
|
||||
)
|
||||
|
||||
detector = SegmentDetector(config)
|
||||
result = detector.detect(audio_path, show_prep=show_prep)
|
||||
result = detector.detect(audio_path, transcript=transcript, show_prep=show_prep)
|
||||
result.save(output_dir)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user