radio: bumper detection in diarizer + full archive download script
Adds a transcript-driven bumper filter to the diarization pipeline. When
a transcript segment matches qa_extractor's promo/bumper signatures, the
overlapping audio windows are labeled BUMPER and the WavLM cosine match
is skipped. Prevents music/promo from being matched against speaker
profiles (the failure mode Mike caught in 2018-s10e18 @ 09:20-10:05).
Code changes:
- src/voice_profiler.py: identify_speakers() takes optional skip_ranges
parameter; windows whose midpoint falls in a skip range get labeled
"[bumper]" and skip cosine match
- src/diarizer.py: diarize() takes optional transcript_path; pre-computes
bumper time ranges via qa_extractor._is_promo_or_bumper, passes to
identify_speakers; adds BUMPER speaker label
- benchmark.py: passes transcript_path to diarize()
Aggregate impact across 9-episode test set:
Tara attribution: 4880s -> 3680s (-1200s / -25%)
Q&A pairs: 17 -> 19 (+2)
(bumper-flagged segments had been disrupting conversation detection
in 2017-s9e30 and 2018-s10e18)
CALLER total: 1320s -> 1190s (bumpers previously labeled CALLER moved)
Per-episode bumpers caught: 1-8, total ~165 bumper segments across set
Remaining Tara false positives are real callers acoustically similar to
Tara (Christopher in 2018, Kay in 2012, William and Charles in 2015) and
guest Clay in 2015-s7e19 — those need profile rebuild + Clay profile,
not bumper filtering.
Adds download_full_archive.py — resumable mirror-style downloader that
walks IX server's /home/gurushow/public_html/archive/{year}/ and copies
all MP3s to archive-data/episodes/. Run is in progress (~589 files,
~10-15GB). Used to source clean profile windows for the remaining
co-hosts (Tara rebuild, Clay, Tony, Rob, Randall, producers).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -108,7 +108,8 @@ for ep, transcript_path, audio_dur, _ in trans_results:
|
||||
audio_dur = json.load(f).get("duration", 0)
|
||||
|
||||
t0 = time.monotonic()
|
||||
result = diarize(ep, voice_profiles=voice_profiles, host_match_threshold=0.85)
|
||||
result = diarize(ep, voice_profiles=voice_profiles, host_match_threshold=0.85,
|
||||
transcript_path=transcript_path)
|
||||
wall = time.monotonic() - t0
|
||||
rtf = audio_dur / wall if wall > 0 else 0
|
||||
|
||||
|
||||
103
projects/radio-show/audio-processor/download_full_archive.py
Normal file
103
projects/radio-show/audio-processor/download_full_archive.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""
|
||||
Download the full Computer Guru Show archive from IX server (172.16.3.10).
|
||||
|
||||
Mirrors the year-based directory structure as-is to archive-data/episodes/.
|
||||
Resumable: skips files already present with matching size.
|
||||
Requires Tailscale.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import paramiko
|
||||
from pathlib import Path
|
||||
|
||||
password = os.environ.get("IX_PASSWORD")
|
||||
if not password:
|
||||
print("IX_PASSWORD env var not set", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
LOCAL_ROOT = Path(__file__).parent / "archive-data" / "episodes"
|
||||
LOCAL_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
REMOTE_ROOT = "/home/gurushow/public_html/archive"
|
||||
YEARS = ["2010", "2011", "2012", "2014", "2015", "2016", "2017", "2018"]
|
||||
|
||||
print(f"Connecting to 172.16.3.10...", flush=True)
|
||||
client = paramiko.SSHClient()
|
||||
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
client.connect("172.16.3.10", username="root", password=password,
|
||||
look_for_keys=False, allow_agent=False, timeout=30)
|
||||
sftp = client.open_sftp()
|
||||
print("Connected.", flush=True)
|
||||
|
||||
|
||||
def list_remote_mp3s(year: str) -> list[str]:
|
||||
cmd = f"find '{REMOTE_ROOT}/{year}' -iname '*.mp3' 2>/dev/null"
|
||||
stdin, stdout, stderr = client.exec_command(cmd)
|
||||
return [line.strip() for line in stdout.read().decode().splitlines() if line.strip()]
|
||||
|
||||
|
||||
total_files = 0
|
||||
total_bytes = 0
|
||||
skipped_files = 0
|
||||
skipped_bytes = 0
|
||||
downloaded_files = 0
|
||||
downloaded_bytes = 0
|
||||
errors = []
|
||||
|
||||
t_start = time.monotonic()
|
||||
|
||||
for year in YEARS:
|
||||
print(f"\n=== {year} ===", flush=True)
|
||||
remote_paths = list_remote_mp3s(year)
|
||||
print(f" {len(remote_paths)} MP3 files found on remote", flush=True)
|
||||
|
||||
for remote in remote_paths:
|
||||
rel = remote[len(REMOTE_ROOT) + 1:]
|
||||
local = LOCAL_ROOT / rel
|
||||
local.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
remote_stat = sftp.stat(remote)
|
||||
remote_size = remote_stat.st_size
|
||||
except Exception as e:
|
||||
errors.append(f"stat {remote}: {e}")
|
||||
continue
|
||||
|
||||
total_files += 1
|
||||
total_bytes += remote_size
|
||||
|
||||
if local.exists() and local.stat().st_size == remote_size:
|
||||
skipped_files += 1
|
||||
skipped_bytes += remote_size
|
||||
continue
|
||||
|
||||
size_mb = remote_size / 1024 / 1024
|
||||
print(f" [{downloaded_files + 1:3d}] {rel} ({size_mb:.1f} MB)...", end="", flush=True)
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
sftp.get(remote, str(local))
|
||||
elapsed = time.monotonic() - t0
|
||||
mbps = size_mb / elapsed if elapsed > 0 else 0
|
||||
print(f" done ({elapsed:.1f}s, {mbps:.1f} MB/s)", flush=True)
|
||||
downloaded_files += 1
|
||||
downloaded_bytes += remote_size
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}", flush=True)
|
||||
errors.append(f"get {remote}: {e}")
|
||||
|
||||
elapsed_total = time.monotonic() - t_start
|
||||
print(f"\n=== Summary ===", flush=True)
|
||||
print(f" Total remote files : {total_files}", flush=True)
|
||||
print(f" Total remote bytes : {total_bytes / 1024 / 1024 / 1024:.2f} GB", flush=True)
|
||||
print(f" Already present : {skipped_files} files / {skipped_bytes / 1024 / 1024 / 1024:.2f} GB", flush=True)
|
||||
print(f" Newly downloaded : {downloaded_files} files / {downloaded_bytes / 1024 / 1024 / 1024:.2f} GB", flush=True)
|
||||
print(f" Errors : {len(errors)}", flush=True)
|
||||
print(f" Wall time : {elapsed_total:.1f}s", flush=True)
|
||||
if errors:
|
||||
print(f"\n=== Errors ===", flush=True)
|
||||
for e in errors[:20]:
|
||||
print(f" {e}", flush=True)
|
||||
|
||||
sftp.close()
|
||||
client.close()
|
||||
@@ -158,12 +158,17 @@ def diarize(audio_path: str | Path,
|
||||
voice_profiles: VoiceProfileStore | None = None,
|
||||
min_speakers: int = 1,
|
||||
max_speakers: int = 6,
|
||||
host_match_threshold: float = 0.85) -> DiarizationResult:
|
||||
host_match_threshold: float = 0.85,
|
||||
transcript_path: str | Path | None = None) -> DiarizationResult:
|
||||
"""Run speaker diarization using WavLM sliding-window speaker identification.
|
||||
|
||||
Uses the built-in VoiceProfiler (WavLM x-vectors) — no HuggingFace token
|
||||
or gated model required. Identifies HOST vs non-HOST speakers using the
|
||||
stored voice profile for Mike Swanson.
|
||||
|
||||
If transcript_path is provided, time ranges containing show promo/bumper
|
||||
text are pre-marked and skipped at speaker-identification time so vocal
|
||||
music doesn't match cohost profiles.
|
||||
"""
|
||||
import torch
|
||||
from .voice_profiler import VoiceProfiler
|
||||
@@ -190,10 +195,28 @@ def diarize(audio_path: str | Path,
|
||||
speaker_map={"HOST": "HOST"},
|
||||
)
|
||||
|
||||
# Pre-compute bumper / promo time ranges from transcript if available
|
||||
bumper_ranges: list[tuple[float, float]] = []
|
||||
if transcript_path is not None:
|
||||
transcript_path = Path(transcript_path)
|
||||
if transcript_path.exists():
|
||||
from .qa_extractor import _is_promo_or_bumper
|
||||
with open(transcript_path) as f:
|
||||
tdata = json.load(f)
|
||||
for seg in tdata.get("segments", []):
|
||||
if _is_promo_or_bumper(seg.get("text", "")):
|
||||
bumper_ranges.append((seg["start"], seg["end"]))
|
||||
if bumper_ranges:
|
||||
console.print(
|
||||
f"[dim]Bumper filter: {len(bumper_ranges)} promo/bumper "
|
||||
f"transcript segments will be skipped during speaker match[/dim]"
|
||||
)
|
||||
|
||||
# Sliding-window identification: 10s windows, 5s hop
|
||||
voice_segs = profiler.identify_speakers(
|
||||
audio_path, window_s=10.0, hop_s=5.0,
|
||||
threshold=host_match_threshold,
|
||||
skip_ranges=bumper_ranges,
|
||||
)
|
||||
|
||||
# Convert VoiceSegment labels to HOST / CALLER
|
||||
@@ -204,6 +227,8 @@ def diarize(audio_path: str | Path,
|
||||
speaker = "HOST"
|
||||
elif label.startswith("Cohost:"):
|
||||
speaker = "CO-HOST"
|
||||
elif label == "[bumper]":
|
||||
speaker = "BUMPER"
|
||||
elif label == "[error]":
|
||||
speaker = "UNKNOWN"
|
||||
else:
|
||||
|
||||
@@ -279,12 +279,19 @@ class VoiceProfiler:
|
||||
def identify_speakers(self, audio_path: Path,
|
||||
window_s: float = 10.0,
|
||||
hop_s: float = 5.0,
|
||||
threshold: float = 0.70) -> list[VoiceSegment]:
|
||||
threshold: float = 0.70,
|
||||
skip_ranges: list[tuple[float, float]] | None = None
|
||||
) -> list[VoiceSegment]:
|
||||
"""Identify speakers throughout an audio file using sliding window.
|
||||
|
||||
Loads the full audio once then slices in memory — avoids spawning
|
||||
hundreds of ffmpeg subprocesses.
|
||||
Returns timestamped segments with speaker labels and embeddings.
|
||||
|
||||
skip_ranges: list of (start, end) seconds. Windows whose midpoint
|
||||
falls inside any of these ranges are labeled "[bumper]" and the
|
||||
speaker cosine match is skipped — used to suppress music/promo
|
||||
from being matched against speaker profiles.
|
||||
"""
|
||||
console.print(f"[bold]Identifying speakers:[/bold] {audio_path.name}")
|
||||
|
||||
@@ -293,6 +300,8 @@ class VoiceProfiler:
|
||||
audio = self._load_full_audio(audio_path) # float32 mono array
|
||||
self._get_model() # ensure model is warm before the loop
|
||||
|
||||
skip_ranges = skip_ranges or []
|
||||
|
||||
segments = []
|
||||
window_samples = int(window_s * SAMPLE_RATE)
|
||||
hop_samples = int(hop_s * SAMPLE_RATE)
|
||||
@@ -306,6 +315,16 @@ class VoiceProfiler:
|
||||
s = int(start * SAMPLE_RATE)
|
||||
e = min(s + window_samples, total_samples)
|
||||
|
||||
mid = (start + end) / 2
|
||||
in_bumper = any(rs <= mid <= re for rs, re in skip_ranges)
|
||||
|
||||
if in_bumper:
|
||||
segments.append(VoiceSegment(
|
||||
start=start, end=end,
|
||||
speaker_label="[bumper] (1.00)",
|
||||
))
|
||||
continue
|
||||
|
||||
try:
|
||||
emb = self._embed_audio_np(audio[s:e])
|
||||
|
||||
|
||||
@@ -1,26 +1,21 @@
|
||||
{
|
||||
"num_speakers": 3,
|
||||
"num_speakers": 4,
|
||||
"speaker_map": {
|
||||
"CALLER": "CALLER",
|
||||
"HOST": "HOST",
|
||||
"CO-HOST": "CO-HOST"
|
||||
"CO-HOST": "CO-HOST",
|
||||
"BUMPER": "BUMPER",
|
||||
"CALLER": "CALLER"
|
||||
},
|
||||
"turns": [
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"speaker": "BUMPER",
|
||||
"start": 0.0,
|
||||
"end": 20.0,
|
||||
"confidence": 0.89
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 15.0,
|
||||
"end": 25.0,
|
||||
"confidence": 0.87
|
||||
"end": 35.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 20.0,
|
||||
"start": 30.0,
|
||||
"end": 40.0,
|
||||
"confidence": 0.88
|
||||
},
|
||||
|
||||
@@ -1,34 +1,23 @@
|
||||
{
|
||||
"num_speakers": 3,
|
||||
"num_speakers": 4,
|
||||
"speaker_map": {
|
||||
"CALLER": "CALLER",
|
||||
"HOST": "HOST",
|
||||
"CO-HOST": "CO-HOST"
|
||||
"CO-HOST": "CO-HOST",
|
||||
"BUMPER": "BUMPER",
|
||||
"CALLER": "CALLER"
|
||||
},
|
||||
"turns": [
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"speaker": "BUMPER",
|
||||
"start": 0.0,
|
||||
"end": 20.0,
|
||||
"confidence": 0.88
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 15.0,
|
||||
"end": 25.0,
|
||||
"confidence": 0.87
|
||||
},
|
||||
{
|
||||
"speaker": "CALLER",
|
||||
"start": 20.0,
|
||||
"end": 30.0,
|
||||
"confidence": 0.84
|
||||
"end": 35.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 25.0,
|
||||
"start": 30.0,
|
||||
"end": 430.0,
|
||||
"confidence": 0.86
|
||||
"confidence": 0.96
|
||||
},
|
||||
{
|
||||
"speaker": "CALLER",
|
||||
|
||||
@@ -1,28 +1,23 @@
|
||||
{
|
||||
"num_speakers": 3,
|
||||
"num_speakers": 4,
|
||||
"speaker_map": {
|
||||
"CALLER": "CALLER",
|
||||
"HOST": "HOST",
|
||||
"CO-HOST": "CO-HOST"
|
||||
"CO-HOST": "CO-HOST",
|
||||
"BUMPER": "BUMPER",
|
||||
"CALLER": "CALLER"
|
||||
},
|
||||
"turns": [
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"speaker": "BUMPER",
|
||||
"start": 0.0,
|
||||
"end": 20.0,
|
||||
"confidence": 0.9
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 15.0,
|
||||
"end": 25.0,
|
||||
"confidence": 0.87
|
||||
"end": 35.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 20.0,
|
||||
"start": 30.0,
|
||||
"end": 690.0,
|
||||
"confidence": 0.86
|
||||
"confidence": 0.97
|
||||
},
|
||||
{
|
||||
"speaker": "CALLER",
|
||||
@@ -33,14 +28,20 @@
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 690.0,
|
||||
"end": 1350.0,
|
||||
"end": 1330.0,
|
||||
"confidence": 0.92
|
||||
},
|
||||
{
|
||||
"speaker": "BUMPER",
|
||||
"start": 1325.0,
|
||||
"end": 1355.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 1345.0,
|
||||
"start": 1350.0,
|
||||
"end": 1470.0,
|
||||
"confidence": 0.92
|
||||
"confidence": 0.93
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
|
||||
@@ -1,16 +1,23 @@
|
||||
{
|
||||
"num_speakers": 3,
|
||||
"num_speakers": 4,
|
||||
"speaker_map": {
|
||||
"CALLER": "CALLER",
|
||||
"HOST": "HOST",
|
||||
"CALLER": "CALLER",
|
||||
"BUMPER": "BUMPER",
|
||||
"CO-HOST": "CO-HOST"
|
||||
},
|
||||
"turns": [
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"speaker": "BUMPER",
|
||||
"start": 0.0,
|
||||
"end": 35.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 30.0,
|
||||
"end": 40.0,
|
||||
"confidence": 0.96
|
||||
"confidence": 0.93
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
@@ -61,22 +68,28 @@
|
||||
"confidence": 0.96
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"speaker": "BUMPER",
|
||||
"start": 660.0,
|
||||
"end": 680.0,
|
||||
"confidence": 0.98
|
||||
"end": 695.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 675.0,
|
||||
"end": 710.0,
|
||||
"confidence": 0.94
|
||||
"start": 690.0,
|
||||
"end": 700.0,
|
||||
"confidence": 0.95
|
||||
},
|
||||
{
|
||||
"speaker": "BUMPER",
|
||||
"start": 695.0,
|
||||
"end": 740.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 705.0,
|
||||
"start": 735.0,
|
||||
"end": 985.0,
|
||||
"confidence": 0.9
|
||||
"confidence": 0.87
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
@@ -159,18 +172,18 @@
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 2055.0,
|
||||
"end": 2155.0,
|
||||
"end": 2120.0,
|
||||
"confidence": 0.94
|
||||
},
|
||||
{
|
||||
"speaker": "CALLER",
|
||||
"start": 2150.0,
|
||||
"end": 2160.0,
|
||||
"confidence": 0.83
|
||||
"speaker": "BUMPER",
|
||||
"start": 2115.0,
|
||||
"end": 2165.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 2155.0,
|
||||
"start": 2160.0,
|
||||
"end": 2170.0,
|
||||
"confidence": 0.97
|
||||
},
|
||||
|
||||
@@ -1,14 +1,21 @@
|
||||
{
|
||||
"num_speakers": 3,
|
||||
"num_speakers": 4,
|
||||
"speaker_map": {
|
||||
"CALLER": "CALLER",
|
||||
"HOST": "HOST",
|
||||
"CALLER": "CALLER",
|
||||
"BUMPER": "BUMPER",
|
||||
"CO-HOST": "CO-HOST"
|
||||
},
|
||||
"turns": [
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"speaker": "BUMPER",
|
||||
"start": 0.0,
|
||||
"end": 35.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 30.0,
|
||||
"end": 40.0,
|
||||
"confidence": 0.96
|
||||
},
|
||||
@@ -123,14 +130,20 @@
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 550.0,
|
||||
"end": 580.0,
|
||||
"end": 565.0,
|
||||
"confidence": 0.98
|
||||
},
|
||||
{
|
||||
"speaker": "BUMPER",
|
||||
"start": 560.0,
|
||||
"end": 595.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 575.0,
|
||||
"start": 590.0,
|
||||
"end": 600.0,
|
||||
"confidence": 0.96
|
||||
"confidence": 0.93
|
||||
},
|
||||
{
|
||||
"speaker": "CALLER",
|
||||
@@ -153,12 +166,18 @@
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 1055.0,
|
||||
"end": 1190.0,
|
||||
"end": 1160.0,
|
||||
"confidence": 0.99
|
||||
},
|
||||
{
|
||||
"speaker": "BUMPER",
|
||||
"start": 1155.0,
|
||||
"end": 1205.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 1185.0,
|
||||
"start": 1200.0,
|
||||
"end": 1215.0,
|
||||
"confidence": 0.98
|
||||
},
|
||||
@@ -255,12 +274,18 @@
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 2020.0,
|
||||
"end": 2055.0,
|
||||
"end": 2030.0,
|
||||
"confidence": 0.92
|
||||
},
|
||||
{
|
||||
"speaker": "BUMPER",
|
||||
"start": 2025.0,
|
||||
"end": 2060.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 2050.0,
|
||||
"start": 2055.0,
|
||||
"end": 2105.0,
|
||||
"confidence": 0.98
|
||||
},
|
||||
@@ -549,14 +574,14 @@
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 3370.0,
|
||||
"end": 3395.0,
|
||||
"end": 3390.0,
|
||||
"confidence": 0.94
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 3390.0,
|
||||
"speaker": "BUMPER",
|
||||
"start": 3385.0,
|
||||
"end": 3435.0,
|
||||
"confidence": 0.85
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
@@ -565,22 +590,16 @@
|
||||
"confidence": 0.98
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"speaker": "BUMPER",
|
||||
"start": 3965.0,
|
||||
"end": 3980.0,
|
||||
"confidence": 0.96
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 3975.0,
|
||||
"end": 3990.0,
|
||||
"confidence": 0.97
|
||||
"end": 4020.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 3985.0,
|
||||
"start": 4015.0,
|
||||
"end": 4025.0,
|
||||
"confidence": 0.86
|
||||
"confidence": 0.95
|
||||
},
|
||||
{
|
||||
"speaker": "CALLER",
|
||||
@@ -723,14 +742,20 @@
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 4575.0,
|
||||
"end": 4680.0,
|
||||
"end": 4655.0,
|
||||
"confidence": 0.97
|
||||
},
|
||||
{
|
||||
"speaker": "BUMPER",
|
||||
"start": 4650.0,
|
||||
"end": 4695.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 4675.0,
|
||||
"start": 4690.0,
|
||||
"end": 4715.0,
|
||||
"confidence": 0.92
|
||||
"confidence": 0.94
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
|
||||
@@ -1,22 +1,17 @@
|
||||
{
|
||||
"num_speakers": 3,
|
||||
"num_speakers": 4,
|
||||
"speaker_map": {
|
||||
"CALLER": "CALLER",
|
||||
"HOST": "HOST",
|
||||
"CO-HOST": "CO-HOST"
|
||||
"CO-HOST": "CO-HOST",
|
||||
"BUMPER": "BUMPER",
|
||||
"CALLER": "CALLER"
|
||||
},
|
||||
"turns": [
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"speaker": "BUMPER",
|
||||
"start": 0.0,
|
||||
"end": 20.0,
|
||||
"confidence": 0.88
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 15.0,
|
||||
"end": 25.0,
|
||||
"confidence": 0.92
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
@@ -69,14 +64,20 @@
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 615.0,
|
||||
"end": 730.0,
|
||||
"end": 710.0,
|
||||
"confidence": 0.89
|
||||
},
|
||||
{
|
||||
"speaker": "BUMPER",
|
||||
"start": 705.0,
|
||||
"end": 750.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 725.0,
|
||||
"start": 745.0,
|
||||
"end": 770.0,
|
||||
"confidence": 0.91
|
||||
"confidence": 0.96
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
@@ -117,9 +118,21 @@
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 1310.0,
|
||||
"end": 1355.0,
|
||||
"end": 1320.0,
|
||||
"confidence": 0.98
|
||||
},
|
||||
{
|
||||
"speaker": "BUMPER",
|
||||
"start": 1315.0,
|
||||
"end": 1350.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 1345.0,
|
||||
"end": 1355.0,
|
||||
"confidence": 0.97
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 1350.0,
|
||||
@@ -189,20 +202,20 @@
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 1460.0,
|
||||
"end": 2130.0,
|
||||
"end": 2110.0,
|
||||
"confidence": 0.88
|
||||
},
|
||||
{
|
||||
"speaker": "CALLER",
|
||||
"start": 2125.0,
|
||||
"end": 2135.0,
|
||||
"confidence": 0.78
|
||||
"speaker": "BUMPER",
|
||||
"start": 2105.0,
|
||||
"end": 2155.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 2130.0,
|
||||
"start": 2150.0,
|
||||
"end": 2175.0,
|
||||
"confidence": 0.86
|
||||
"confidence": 0.89
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
@@ -219,20 +232,20 @@
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 2650.0,
|
||||
"end": 2725.0,
|
||||
"end": 2715.0,
|
||||
"confidence": 0.97
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 2720.0,
|
||||
"end": 2730.0,
|
||||
"confidence": 0.89
|
||||
"speaker": "BUMPER",
|
||||
"start": 2710.0,
|
||||
"end": 2745.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 2725.0,
|
||||
"start": 2740.0,
|
||||
"end": 2995.0,
|
||||
"confidence": 0.91
|
||||
"confidence": 0.99
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
@@ -273,20 +286,20 @@
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 3375.0,
|
||||
"end": 3410.0,
|
||||
"end": 3390.0,
|
||||
"confidence": 0.91
|
||||
},
|
||||
{
|
||||
"speaker": "CALLER",
|
||||
"start": 3405.0,
|
||||
"end": 3415.0,
|
||||
"confidence": 0.84
|
||||
"speaker": "BUMPER",
|
||||
"start": 3385.0,
|
||||
"end": 3425.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 3410.0,
|
||||
"start": 3420.0,
|
||||
"end": 4185.0,
|
||||
"confidence": 0.96
|
||||
"confidence": 0.98
|
||||
},
|
||||
{
|
||||
"speaker": "CALLER",
|
||||
@@ -387,14 +400,20 @@
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
"start": 4550.0,
|
||||
"end": 4595.0,
|
||||
"end": 4565.0,
|
||||
"confidence": 0.89
|
||||
},
|
||||
{
|
||||
"speaker": "BUMPER",
|
||||
"start": 4560.0,
|
||||
"end": 4605.0,
|
||||
"confidence": 1.0
|
||||
},
|
||||
{
|
||||
"speaker": "HOST",
|
||||
"start": 4590.0,
|
||||
"start": 4600.0,
|
||||
"end": 5285.0,
|
||||
"confidence": 0.95
|
||||
"confidence": 0.94
|
||||
},
|
||||
{
|
||||
"speaker": "CO-HOST",
|
||||
|
||||
Reference in New Issue
Block a user