radio show: co-host voice profile, Q&A extraction fixes, archive index
- Build Tom (co-host) voice profile (44 embeddings, 0.698 similarity to Mike) - diarizer.py: add CO-HOST speaker label for cohost-role profiles - voice_profiler.py: emit "Cohost: <name>" label for cohost role - qa_extractor.py: overlap resolution at load time (midpoint boundary split), 4s CALLER-preference threshold, turn-based caller-intro lookback (2 HOST turns), _preceded_by_caller_intro() helper, _PHONE_GREETING pattern, 751-1041 + "we'll get your problem solved" promo signatures - benchmark.py: use src.transcriber.transcribe with batch_size=16 - add index_test_episodes.py and build_cohost_profile.py scripts - add .gitignore (exclude episodes, transcripts, *.db, .venv) - session log: 2026-04-27-qa-extraction-cohost-indexing.md Result: 2016-s8e43 drops from 12 false-positive Q&A pairs to 2 real caller pairs. archive.db: 6 episodes, 762 segments, 10 Q&A pairs, FTS5 search verified. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
25
projects/radio-show/audio-processor/.gitignore
vendored
Normal file
25
projects/radio-show/audio-processor/.gitignore
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
.venv/
|
||||
*.egg-info/
|
||||
|
||||
# Large data files
|
||||
test-data/episodes/
|
||||
test-data/transcripts/
|
||||
episodes/
|
||||
processed/
|
||||
|
||||
# Databases (regenerable)
|
||||
*.db
|
||||
*.sqlite
|
||||
|
||||
# Model cache
|
||||
.cache/
|
||||
*.pt
|
||||
*.bin
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
@@ -57,13 +57,15 @@ trans_results = []
|
||||
trans_total_audio = 0.0
|
||||
trans_total_wall = 0.0
|
||||
|
||||
import json
|
||||
from src.transcriber import transcribe as _transcribe
|
||||
|
||||
for ep in EPISODES:
|
||||
trans_ep_dir = TRANS_DIR / ep.stem
|
||||
trans_ep_dir.mkdir(parents=True, exist_ok=True)
|
||||
transcript_path = trans_ep_dir / "transcript.json"
|
||||
|
||||
if transcript_path.exists():
|
||||
import json
|
||||
with open(transcript_path) as f:
|
||||
td = json.load(f)
|
||||
dur = td.get("duration", 0)
|
||||
@@ -74,30 +76,15 @@ for ep in EPISODES:
|
||||
console.print(f" Transcribing {ep.name}...")
|
||||
t0 = time.monotonic()
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
if not hasattr(sys, "_whisper_model"):
|
||||
console.print(" [dim]Loading Whisper large-v3...[/dim]")
|
||||
sys._whisper_model = WhisperModel("large-v3", device=device, compute_type="float16")
|
||||
|
||||
model = sys._whisper_model
|
||||
segments_iter, info = model.transcribe(str(ep), language="en", beam_size=5)
|
||||
|
||||
import json
|
||||
segs = []
|
||||
for seg in segments_iter:
|
||||
segs.append({"id": seg.id, "start": seg.start, "end": seg.end, "text": seg.text})
|
||||
|
||||
duration = info.duration
|
||||
transcript = _transcribe(ep, model_size="large-v3", device=device, batch_size=16)
|
||||
wall = time.monotonic() - t0
|
||||
rtf = duration / wall
|
||||
rtf = transcript.duration / wall
|
||||
|
||||
result = {"duration": duration, "language": "en", "segments": segs}
|
||||
with open(transcript_path, "w") as f:
|
||||
json.dump(result, f)
|
||||
transcript.save(trans_ep_dir)
|
||||
|
||||
console.print(f" [green]{ep.stem}: {duration:.0f}s audio in {wall:.1f}s = {rtf:.1f}x realtime[/green]")
|
||||
trans_results.append((ep, transcript_path, duration, wall))
|
||||
trans_total_audio += duration
|
||||
console.print(f" [green]{ep.stem}: {transcript.duration:.0f}s audio in {wall:.1f}s = {rtf:.1f}x realtime[/green]")
|
||||
trans_results.append((ep, transcript_path, transcript.duration, wall))
|
||||
trans_total_audio += transcript.duration
|
||||
trans_total_wall += wall
|
||||
|
||||
if trans_total_wall > 0:
|
||||
|
||||
115
projects/radio-show/audio-processor/build_cohost_profile.py
Normal file
115
projects/radio-show/audio-processor/build_cohost_profile.py
Normal file
@@ -0,0 +1,115 @@
|
||||
"""
|
||||
Build voice profile for Tom (co-host) from known co-host speech windows.
|
||||
|
||||
Uses CALLER-labeled windows from the first 60 min of co-host-era episodes,
|
||||
before any real callers would have called in.
|
||||
"""
|
||||
import os, sys
|
||||
os.environ["PYTHONIOENCODING"] = "utf-8"
|
||||
os.environ["TRANSFORMERS_OFFLINE"] = "1"
|
||||
if hasattr(sys.stdout, "reconfigure"):
|
||||
sys.stdout.reconfigure(encoding="utf-8")
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
import numpy as np
|
||||
from src.gpu import ensure_cuda_libs
|
||||
ensure_cuda_libs()
|
||||
|
||||
import torch
|
||||
from src.voice_profiler import VoiceProfiler, SpeakerProfile
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
|
||||
BASE = Path(__file__).parent
|
||||
PROFILES_DIR = BASE / "voice-profiles"
|
||||
EPISODES_DIR = BASE / "test-data" / "episodes"
|
||||
TRANS_DIR = BASE / "test-data" / "transcripts"
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
console.print(f"Device: {device}")
|
||||
|
||||
profiler = VoiceProfiler(PROFILES_DIR, device=device)
|
||||
|
||||
# Tom's known speech windows per episode
|
||||
# CALLER turns from diarization that are in the first 60 min (before real callers)
|
||||
# Windows at 0-40s excluded (promo/jingle, not Tom's voice)
|
||||
TOM_WINDOWS = {
|
||||
"2014-s6e19.mp3": [
|
||||
(195, 260),
|
||||
(320, 425),
|
||||
(600, 650),
|
||||
(675, 710),
|
||||
],
|
||||
"2016-s8e43.mp3": [
|
||||
(100, 115),
|
||||
(135, 160),
|
||||
(270, 295),
|
||||
(575, 605),
|
||||
(1185, 1235),
|
||||
(1790, 1870),
|
||||
(2020, 2055),
|
||||
],
|
||||
}
|
||||
|
||||
COHOST_NAME = "Tom"
|
||||
|
||||
if COHOST_NAME not in profiler.profiles:
|
||||
profiler.profiles[COHOST_NAME] = SpeakerProfile(
|
||||
name=COHOST_NAME,
|
||||
role="cohost",
|
||||
embeddings=[],
|
||||
source_episodes=[],
|
||||
)
|
||||
|
||||
profile = profiler.profiles[COHOST_NAME]
|
||||
console.print(f"\n[bold]Building co-host profile for: {COHOST_NAME}[/bold]")
|
||||
|
||||
for ep_name, windows in TOM_WINDOWS.items():
|
||||
ep_path = EPISODES_DIR / ep_name
|
||||
if not ep_path.exists():
|
||||
console.print(f"[yellow] Skipping {ep_name} — not found[/yellow]")
|
||||
continue
|
||||
|
||||
console.print(f"\n Loading {ep_name}...")
|
||||
audio = profiler._load_full_audio(ep_path)
|
||||
profiler._get_model()
|
||||
|
||||
SAMPLE_RATE = 16000
|
||||
chunk_s = 10.0
|
||||
chunk_samples = int(chunk_s * SAMPLE_RATE)
|
||||
|
||||
for win_start, win_end in windows:
|
||||
for chunk_start in range(win_start, win_end - int(chunk_s), int(chunk_s)):
|
||||
chunk_end = chunk_start + int(chunk_s)
|
||||
s = int(chunk_start * SAMPLE_RATE)
|
||||
e = s + chunk_samples
|
||||
if e > len(audio):
|
||||
break
|
||||
try:
|
||||
emb = profiler._embed_audio_np(audio[s:e])
|
||||
profile.embeddings.append(emb)
|
||||
console.print(f" [dim]+1 embedding @ {chunk_start}s[/dim]")
|
||||
except Exception as ex:
|
||||
console.print(f" [red]Failed @ {chunk_start}s: {ex}[/red]")
|
||||
|
||||
profile.source_episodes.append(ep_name)
|
||||
|
||||
if not profile.embeddings:
|
||||
console.print("[red]No embeddings collected — check episode paths[/red]")
|
||||
sys.exit(1)
|
||||
|
||||
profile.compute_composite()
|
||||
console.print(f"\n[green]Tom profile built: {profile.num_samples} embeddings "
|
||||
f"from {len(profile.source_episodes)} episodes[/green]")
|
||||
|
||||
# Verify: check cosine similarity vs Mike to ensure separation
|
||||
mike = profiler.profiles.get("Mike Swanson")
|
||||
if mike and mike.composite_embedding is not None and profile.composite_embedding is not None:
|
||||
sim = float(np.dot(mike.composite_embedding, profile.composite_embedding) /
|
||||
(np.linalg.norm(mike.composite_embedding) * np.linalg.norm(profile.composite_embedding) + 1e-8))
|
||||
console.print(f"Tom vs Mike similarity: {sim:.3f} (lower is better separation)")
|
||||
|
||||
profiler.save_profiles()
|
||||
console.print("[bold green]Profile saved.[/bold green]")
|
||||
102
projects/radio-show/audio-processor/index_test_episodes.py
Normal file
102
projects/radio-show/audio-processor/index_test_episodes.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Index the 6 test episodes into archive.db.
|
||||
Reads pre-computed transcripts + diarization from test-data/transcripts/.
|
||||
"""
|
||||
import os, sys, re
|
||||
os.environ["PYTHONIOENCODING"] = "utf-8"
|
||||
os.environ["TRANSFORMERS_OFFLINE"] = "1"
|
||||
if hasattr(sys.stdout, "reconfigure"):
|
||||
sys.stdout.reconfigure(encoding="utf-8")
|
||||
|
||||
from pathlib import Path
|
||||
from src.indexer import ArchiveIndex
|
||||
from src.qa_extractor import load_diarized_transcript, extract_qa_pairs
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
console = Console()
|
||||
|
||||
BASE = Path(__file__).parent
|
||||
TRANS_DIR = BASE / "test-data" / "transcripts"
|
||||
EP_DIR = BASE / "test-data" / "episodes"
|
||||
DB_PATH = BASE / "archive.db"
|
||||
|
||||
_DATE_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})")
|
||||
|
||||
|
||||
def parse_episode_meta(ep_id: str) -> tuple[str, int | None]:
|
||||
"""Return (date_str_or_year, hr) from episode directory name."""
|
||||
m = _DATE_RE.match(ep_id)
|
||||
if m:
|
||||
date = m.group(1)
|
||||
hr = int(ep_id[-1]) if ep_id.endswith(("-hr1", "-hr2")) else None
|
||||
return date, hr
|
||||
# season/episode format e.g. 2016-s8e43 — use year only
|
||||
year = ep_id[:4]
|
||||
return year, None
|
||||
|
||||
|
||||
console.print(f"\n[bold]Indexing test episodes into {DB_PATH.name}[/bold]")
|
||||
|
||||
with ArchiveIndex(DB_PATH) as idx:
|
||||
rows = []
|
||||
|
||||
for ep_dir in sorted(TRANS_DIR.iterdir()):
|
||||
t_path = ep_dir / "transcript.json"
|
||||
d_path = ep_dir / "diarization.json"
|
||||
if not t_path.exists():
|
||||
continue
|
||||
|
||||
ep_id = ep_dir.name
|
||||
date, hr = parse_episode_meta(ep_id)
|
||||
audio_path = EP_DIR / f"{ep_id}.mp3"
|
||||
|
||||
# Episode duration from transcript
|
||||
import json
|
||||
with open(t_path) as f:
|
||||
td = json.load(f)
|
||||
duration = td.get("duration", 0)
|
||||
|
||||
# Register episode
|
||||
idx.add_episode(
|
||||
episode_id=ep_id,
|
||||
audio_path=audio_path,
|
||||
date=date,
|
||||
duration=duration,
|
||||
hr=hr,
|
||||
)
|
||||
|
||||
# Load diarized segments and index
|
||||
segs = load_diarized_transcript(t_path, d_path if d_path.exists() else None)
|
||||
idx.add_segments(ep_id, segs)
|
||||
|
||||
# Extract and index Q&A pairs
|
||||
pairs = extract_qa_pairs(segs)
|
||||
for p in pairs:
|
||||
idx.add_qa_pair(
|
||||
episode_id=ep_id,
|
||||
q_start=p.question_start, q_end=p.question_end,
|
||||
a_start=p.answer_start, a_end=p.answer_end,
|
||||
question=p.question_text, answer=p.answer_text,
|
||||
topic=p.topic, tags=p.topic_tags,
|
||||
)
|
||||
|
||||
rows.append((ep_id, date, f"{duration:.0f}s", len(segs), len(pairs)))
|
||||
console.print(f" [green]{ep_id}[/green]: {len(segs)} segs, {len(pairs)} Q&A pairs")
|
||||
|
||||
stats = idx.stats()
|
||||
|
||||
table = Table(title="Index Summary")
|
||||
table.add_column("Episode")
|
||||
table.add_column("Date")
|
||||
table.add_column("Duration")
|
||||
table.add_column("Segments")
|
||||
table.add_column("Q&A")
|
||||
for ep_id, date, dur, segs, qa in rows:
|
||||
table.add_row(ep_id, date, dur, str(segs), str(qa))
|
||||
|
||||
console.print()
|
||||
console.print(table)
|
||||
console.print(f"\n[bold]DB totals:[/bold] {stats['episodes']} episodes, "
|
||||
f"{stats['segments']} segments, {stats['qa_pairs']} Q&A pairs")
|
||||
console.print(f"[dim]DB path: {DB_PATH}[/dim]")
|
||||
@@ -202,6 +202,8 @@ def diarize(audio_path: str | Path,
|
||||
label = seg.speaker_label.split(" (")[0] # strip confidence score
|
||||
if label.startswith("Host:") or label.startswith("Host "):
|
||||
speaker = "HOST"
|
||||
elif label.startswith("Cohost:"):
|
||||
speaker = "CO-HOST"
|
||||
elif label == "[error]":
|
||||
speaker = "UNKNOWN"
|
||||
else:
|
||||
|
||||
@@ -53,10 +53,12 @@ _PROMO_SIGS: list[tuple[re.Pattern, int]] = [
|
||||
(re.compile(r"\bcomputer running slow\b", re.I), 1),
|
||||
(re.compile(r"\bafter these messages\b", re.I), 1),
|
||||
(re.compile(r"\b790.?2040\b", re.I), 1),
|
||||
(re.compile(r"\b751.?1041\b", re.I), 1),
|
||||
(re.compile(r"\bgurushow\.com\b", re.I), 1),
|
||||
(re.compile(r"\bcall in now\b", re.I), 1),
|
||||
(re.compile(r"\bcomputer troubles\?", re.I), 1),
|
||||
(re.compile(r"\bhardware installation\b", re.I), 1),
|
||||
(re.compile(r"we.?ll get your problem solved", re.I), 1),
|
||||
]
|
||||
|
||||
|
||||
@@ -127,10 +129,19 @@ def extract_qa_pairs(diarized_segments: list[dict]) -> list[QAPair]:
|
||||
if _is_promo_or_bumper(turn["text"]):
|
||||
i += 1
|
||||
continue
|
||||
# Skip the opening 90s — real callers never call before the show starts
|
||||
if turn["start"] < 90:
|
||||
i += 1
|
||||
continue
|
||||
q_duration = turn["end"] - turn["start"]
|
||||
if q_duration < MIN_QUESTION_DURATION:
|
||||
i += 1
|
||||
continue
|
||||
# Require caller-intro context: host must have introduced the call, OR
|
||||
# the caller opens with a phone greeting ("hello", "hi", "hey")
|
||||
if not _preceded_by_caller_intro(turns, i) and not _PHONE_GREETING.match(turn["text"].strip()):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Look ahead for HOST answer turn(s)
|
||||
j = i + 1
|
||||
@@ -329,25 +340,71 @@ def load_diarized_transcript(transcript_path: Path,
|
||||
with open(diarization_path) as f:
|
||||
diarization = json.load(f)
|
||||
|
||||
turns = diarization.get("turns", [])
|
||||
raw_turns = diarization.get("turns", [])
|
||||
|
||||
def speaker_at(t: float) -> str:
|
||||
"""Find which diarization turn covers time t."""
|
||||
# Resolve overlapping boundaries left by the sliding-window diarizer:
|
||||
# place each transition at the midpoint of the overlap region.
|
||||
resolved: list[dict] = []
|
||||
for turn in sorted(raw_turns, key=lambda t: t["start"]):
|
||||
if not resolved:
|
||||
resolved.append(dict(turn))
|
||||
continue
|
||||
prev = resolved[-1]
|
||||
if turn["start"] < prev["end"]:
|
||||
mid = (turn["start"] + prev["end"]) / 2
|
||||
prev["end"] = mid
|
||||
resolved.append({**turn, "start": mid})
|
||||
else:
|
||||
resolved.append(dict(turn))
|
||||
turns = resolved
|
||||
|
||||
# Minimum CALLER coverage to label a transcript segment as CALLER.
|
||||
# Batch transcription produces ~25s segments; caller windows are 10s.
|
||||
# Require 4s of CALLER overlap so brief HOST-edge segments aren't over-claimed.
|
||||
_CALLER_MIN_S = 4.0
|
||||
|
||||
def speaker_for_segment(seg_start: float, seg_end: float) -> str:
|
||||
caller_cov = 0.0
|
||||
coverage: dict[str, float] = {}
|
||||
for turn in turns:
|
||||
if turn["start"] <= t <= turn["end"]:
|
||||
return turn["speaker"]
|
||||
return "UNKNOWN"
|
||||
overlap = min(seg_end, turn["end"]) - max(seg_start, turn["start"])
|
||||
if overlap <= 0:
|
||||
continue
|
||||
coverage[turn["speaker"]] = coverage.get(turn["speaker"], 0) + overlap
|
||||
if turn["speaker"] == "CALLER":
|
||||
caller_cov += overlap
|
||||
if not coverage:
|
||||
return "UNKNOWN"
|
||||
if caller_cov >= _CALLER_MIN_S:
|
||||
return "CALLER"
|
||||
return max(coverage, key=coverage.__getitem__)
|
||||
|
||||
return [
|
||||
{"start": s["start"], "end": s["end"],
|
||||
"text": s["text"],
|
||||
"speaker": speaker_at((s["start"] + s["end"]) / 2)}
|
||||
"speaker": speaker_for_segment(s["start"], s["end"])}
|
||||
for s in segments
|
||||
]
|
||||
|
||||
|
||||
# ── Helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
_PHONE_GREETING = re.compile(r"^(hello|hi|hey|good (morning|afternoon|evening))\b", re.IGNORECASE)
|
||||
|
||||
|
||||
def _preceded_by_caller_intro(turns: list[dict], idx: int, max_host_turns: int = 2) -> bool:
|
||||
"""Return True if a preceding HOST turn (within max_host_turns HOST turns) contains a caller-intro phrase."""
|
||||
host_count = 0
|
||||
for j in range(idx - 1, -1, -1):
|
||||
if turns[j]["speaker"] == "HOST":
|
||||
if _CALLER_INTRO.search(turns[j]["text"]):
|
||||
return True
|
||||
host_count += 1
|
||||
if host_count >= max_host_turns:
|
||||
break
|
||||
return False
|
||||
|
||||
|
||||
def _looks_like_question(text: str) -> bool:
|
||||
return bool(QUESTION_PATTERN.search(text))
|
||||
|
||||
|
||||
@@ -113,61 +113,60 @@ def _format_srt_time(seconds: float) -> str:
|
||||
|
||||
|
||||
def transcribe(audio_path: str | Path, model_size: str = "large-v3",
|
||||
language: str = "en", device: str = "cuda") -> Transcript:
|
||||
"""Transcribe an audio file using faster-whisper."""
|
||||
from faster_whisper import WhisperModel
|
||||
language: str = "en", device: str = "cuda",
|
||||
batch_size: int = 16) -> Transcript:
|
||||
"""Transcribe an audio file using faster-whisper.
|
||||
|
||||
Uses BatchedInferencePipeline + int8_float16 + VAD for archive/batch work.
|
||||
Word timestamps are skipped in batch mode (not needed for segment-level search).
|
||||
Pass batch_size=0 to fall back to sequential WhisperModel with word timestamps.
|
||||
"""
|
||||
from faster_whisper import WhisperModel, BatchedInferencePipeline
|
||||
|
||||
audio_path = Path(audio_path)
|
||||
use_batched = batch_size > 0
|
||||
|
||||
console.print(f"[bold]Transcribing:[/bold] {audio_path.name}")
|
||||
console.print(f"[dim]Model: {model_size}, Device: {device}[/dim]")
|
||||
|
||||
model = WhisperModel(model_size, device=device, compute_type="float16")
|
||||
|
||||
segments_raw, info = model.transcribe(
|
||||
str(audio_path),
|
||||
language=language,
|
||||
word_timestamps=True,
|
||||
vad_filter=True,
|
||||
vad_parameters=dict(
|
||||
min_silence_duration_ms=500,
|
||||
speech_pad_ms=200,
|
||||
),
|
||||
console.print(
|
||||
f"[dim]Model: {model_size} | "
|
||||
f"{'batched x' + str(batch_size) + ' int8_float16' if use_batched else 'sequential float16'} | "
|
||||
f"Device: {device}[/dim]"
|
||||
)
|
||||
|
||||
console.print(f"[dim]Detected language: {info.language} "
|
||||
f"(probability: {info.language_probability:.2f})[/dim]")
|
||||
console.print(f"[dim]Duration: {info.duration:.1f}s "
|
||||
f"({info.duration / 60:.1f} min)[/dim]")
|
||||
if use_batched:
|
||||
base_model = WhisperModel(model_size, device=device, compute_type="int8_float16")
|
||||
model = BatchedInferencePipeline(model=base_model)
|
||||
segments_raw, info = model.transcribe(
|
||||
str(audio_path),
|
||||
language=language,
|
||||
batch_size=batch_size,
|
||||
)
|
||||
else:
|
||||
model = WhisperModel(model_size, device=device, compute_type="float16")
|
||||
segments_raw, info = model.transcribe(
|
||||
str(audio_path),
|
||||
language=language,
|
||||
word_timestamps=True,
|
||||
vad_filter=True,
|
||||
vad_parameters=dict(min_silence_duration_ms=500, speech_pad_ms=200),
|
||||
)
|
||||
|
||||
console.print(f"[dim]Duration: {info.duration:.1f}s ({info.duration / 60:.1f} min)[/dim]")
|
||||
|
||||
segments = []
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
TextColumn("{task.completed} segments"),
|
||||
TimeElapsedColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
task = progress.add_task("Transcribing...", total=None)
|
||||
|
||||
for i, seg in enumerate(segments_raw):
|
||||
for i, seg in enumerate(segments_raw):
|
||||
words = []
|
||||
if not use_batched:
|
||||
words = [
|
||||
TranscriptWord(
|
||||
word=w.word,
|
||||
start=w.start,
|
||||
end=w.end,
|
||||
probability=w.probability,
|
||||
)
|
||||
TranscriptWord(word=w.word, start=w.start,
|
||||
end=w.end, probability=w.probability)
|
||||
for w in (seg.words or [])
|
||||
]
|
||||
segments.append(TranscriptSegment(
|
||||
id=i,
|
||||
text=seg.text,
|
||||
start=seg.start,
|
||||
end=seg.end,
|
||||
words=words,
|
||||
))
|
||||
progress.update(task, completed=i + 1)
|
||||
segments.append(TranscriptSegment(
|
||||
id=i, text=seg.text, start=seg.start, end=seg.end, words=words,
|
||||
))
|
||||
if i % 50 == 0:
|
||||
console.print(f"[dim] {i} segments... ({seg.end:.0f}s)[/dim]")
|
||||
|
||||
console.print(f"[green]Transcription complete: {len(segments)} segments[/green]")
|
||||
|
||||
|
||||
@@ -319,8 +319,11 @@ class VoiceProfiler:
|
||||
best_match = name
|
||||
|
||||
if best_score >= threshold:
|
||||
if best_match and self.profiles[best_match].role == "host":
|
||||
role = self.profiles[best_match].role if best_match else "unknown"
|
||||
if role == "host":
|
||||
label = f"Host: {best_match}"
|
||||
elif role == "cohost":
|
||||
label = f"Cohost: {best_match}"
|
||||
else:
|
||||
label = best_match
|
||||
else:
|
||||
|
||||
@@ -1,26 +1,34 @@
|
||||
{
|
||||
"Mike Swanson": {
|
||||
"role": "host",
|
||||
"num_samples": 180,
|
||||
"source_episodes": [
|
||||
"2010-10-02-hr1.mp3",
|
||||
"2011-06-04-hr1.mp3",
|
||||
"2011-09-10-hr1.mp3",
|
||||
"2014-s6e05.mp3",
|
||||
"2015-s7e30.mp3",
|
||||
"2016-s8e42.mp3",
|
||||
"2017-s9e26.mp3",
|
||||
"2018-s10e17.mp3",
|
||||
"2018-s10e21.mp3",
|
||||
"2010-10-02-hr1.mp3",
|
||||
"2011-06-04-hr1.mp3",
|
||||
"2011-09-10-hr1.mp3",
|
||||
"2014-s6e05.mp3",
|
||||
"2015-s7e30.mp3",
|
||||
"2016-s8e42.mp3",
|
||||
"2017-s9e26.mp3",
|
||||
"2018-s10e17.mp3",
|
||||
"2018-s10e21.mp3"
|
||||
]
|
||||
}
|
||||
{
|
||||
"Mike Swanson": {
|
||||
"role": "host",
|
||||
"num_samples": 180,
|
||||
"source_episodes": [
|
||||
"2010-10-02-hr1.mp3",
|
||||
"2011-06-04-hr1.mp3",
|
||||
"2011-09-10-hr1.mp3",
|
||||
"2014-s6e05.mp3",
|
||||
"2015-s7e30.mp3",
|
||||
"2016-s8e42.mp3",
|
||||
"2017-s9e26.mp3",
|
||||
"2018-s10e17.mp3",
|
||||
"2018-s10e21.mp3",
|
||||
"2010-10-02-hr1.mp3",
|
||||
"2011-06-04-hr1.mp3",
|
||||
"2011-09-10-hr1.mp3",
|
||||
"2014-s6e05.mp3",
|
||||
"2015-s7e30.mp3",
|
||||
"2016-s8e42.mp3",
|
||||
"2017-s9e26.mp3",
|
||||
"2018-s10e17.mp3",
|
||||
"2018-s10e21.mp3"
|
||||
]
|
||||
},
|
||||
"Tom": {
|
||||
"role": "cohost",
|
||||
"num_samples": 44,
|
||||
"source_episodes": [
|
||||
"2014-s6e19.mp3",
|
||||
"2016-s8e43.mp3"
|
||||
]
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user