diff --git a/.claude/memory/radio_show_no_cohost_named_tom.md b/.claude/memory/radio_show_no_cohost_named_tom.md index b71217b..0e2b2d1 100644 --- a/.claude/memory/radio_show_no_cohost_named_tom.md +++ b/.claude/memory/radio_show_no_cohost_named_tom.md @@ -15,7 +15,38 @@ The show has had multiple **co-hosts** rotating through, plus **producers / boar |---|---|---|---| | **Randall** | early years | not yet | no | | **Rob** | early years + appearances in 2018/2019 (Mike unsure of exact dates) | not yet | no | -| **Tara** | confirmed 2014-s6e19, 2016-s8e43; diarizer also found her in 2017-s9e30 (610s/11.4%) — pending Mike spot-check | yes | yes — `voice-profiles/tara/` (44 embeddings) | +| **Tony** | 2012-era co-host (Mike unsure whether on-air in 2012-06-09-hr1) | not yet | no | +| **Tara** | confirmed 2014-s6e19, 2016-s8e43, **2018-s10e18 @ 50:50** (verified by Mike 2026-04-27 listen). Plausible in 2015 and 2017 (pending verify). | yes | yes — `voice-profiles/tara/` (44 embeddings, **possibly contaminated**, see below) | + +### Tara profile contamination flag + +Mike spot-checked CO-HOST-flagged windows on 2026-04-27 and found the diarizer matching: + +In **2018-s10e18**: +- **A bumper** (09:20-10:05, music/promo — not a voice) +- **Tara** (50:50 — true positive) +- **A caller, "Christopher"** (~82:10 — false positive, real caller misattributed as Tara) + +In **2012-06-09-hr1**: +- **A caller, "Kay"** (22:10-26:00 — real caller misattributed as Tara). Spans the 22:25-24:30 (125s) and 25:15-25:55 (40s) CO-HOST turns. Mike unsure whether co-host Tony was on-air this episode. + +In **2015-s7e19** (Jan 2015 New Year episode): +- **A caller, "William"** (~35:30 — confirmed in transcript: "let's talk to William. Hello, William. How are you?", asks about Excel→Word mail merge) +- **A caller, "Charles"** (~16:30 — Mike-identified, transcript not yet verified) +- **A recurring special guest, "Clay" from "Nerd Junkies"** — appears multiple times: transcript at 33:13 "More Clay from the Nerd Junkies", at 37:33 "I'm just curious, Clay, do you have any feedback". Clay is a recurring guest, not a co-host. The 4:40 of "Tara"-attributed audio in this episode is likely **all** Clay + callers, with no actual Tara presence. + +### Recurring guests / fill-ins +| Person | Affiliation | Confirmed in audio | Profile built | +|---|---|---|---| +| **Clay** | "Nerd Junkies" — fills in for Tara when she's out (Mike: rarely appears in other episodes) | 2015-s7e19 (throughout — Tara was out, Clay covered) | pending | + +Tara's role is explicit per transcript at 2015-s7e19 @ 00:51: "in Tara's place, we have Clay. Clay from the Nerd Junkies." — Tara is the regular co-host for that era; Clay is a fill-in. + +Root cause is likely contamination in `build_cohost_profile.py`: the TARA_WINDOWS were sourced from "first 60 min CALLER turns" under the assumption "real callers don't call in during the first hour of a 2-hour show." That assumption appears to leak — at least one real caller ended up in Tara's training data, and the resulting profile now matches a too-broad acoustic space. + +Two distinct fixes needed: +1. **Bumper handling in diarizer** — the qa_extractor has bumper signature detection but the diarizer doesn't filter music/promo segments before speaker matching. Bumpers with vocal content can trigger speaker matches. +2. **Tara profile rebuild from vetted windows** — Mike-confirmed windows only, not the heuristic-selected first-60-min approach. The 2026-04-27 listen confirmed 50:50 in 2018-s10e18 as a clean Tara window; more would be needed. ### Producers / board ops (sometimes on-air) | Person | Profile built | @@ -23,6 +54,7 @@ The show has had multiple **co-hosts** rotating through, plus **producers / boar | **Andrew** | no | | **Shannon** | no | | **Ken** | no | +| **Unknown board op (2015-s7e19 opening)** | no — Mike heard him at the very start of 2015-s7e19, name forgotten | | (Mike: "a couple more" he doesn't recall off-hand) | no | Mike: "The 'producer' (board op) would also be on-air sometimes." Anywhere a producer's voice appears, they're currently being labeled CALLER, which inflates Q&A false positives. Same problem as unprofiled co-hosts. diff --git a/projects/radio-show/audio-processor/benchmark.py b/projects/radio-show/audio-processor/benchmark.py index e44bf94..4ad30c3 100644 --- a/projects/radio-show/audio-processor/benchmark.py +++ b/projects/radio-show/audio-processor/benchmark.py @@ -108,7 +108,8 @@ for ep, transcript_path, audio_dur, _ in trans_results: audio_dur = json.load(f).get("duration", 0) t0 = time.monotonic() - result = diarize(ep, voice_profiles=voice_profiles, host_match_threshold=0.85) + result = diarize(ep, voice_profiles=voice_profiles, host_match_threshold=0.85, + transcript_path=transcript_path) wall = time.monotonic() - t0 rtf = audio_dur / wall if wall > 0 else 0 diff --git a/projects/radio-show/audio-processor/download_full_archive.py b/projects/radio-show/audio-processor/download_full_archive.py new file mode 100644 index 0000000..42fa53e --- /dev/null +++ b/projects/radio-show/audio-processor/download_full_archive.py @@ -0,0 +1,103 @@ +""" +Download the full Computer Guru Show archive from IX server (172.16.3.10). + +Mirrors the year-based directory structure as-is to archive-data/episodes/. +Resumable: skips files already present with matching size. +Requires Tailscale. +""" +import os +import sys +import time +import paramiko +from pathlib import Path + +password = os.environ.get("IX_PASSWORD") +if not password: + print("IX_PASSWORD env var not set", file=sys.stderr) + sys.exit(1) + +LOCAL_ROOT = Path(__file__).parent / "archive-data" / "episodes" +LOCAL_ROOT.mkdir(parents=True, exist_ok=True) + +REMOTE_ROOT = "/home/gurushow/public_html/archive" +YEARS = ["2010", "2011", "2012", "2014", "2015", "2016", "2017", "2018"] + +print(f"Connecting to 172.16.3.10...", flush=True) +client = paramiko.SSHClient() +client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +client.connect("172.16.3.10", username="root", password=password, + look_for_keys=False, allow_agent=False, timeout=30) +sftp = client.open_sftp() +print("Connected.", flush=True) + + +def list_remote_mp3s(year: str) -> list[str]: + cmd = f"find '{REMOTE_ROOT}/{year}' -iname '*.mp3' 2>/dev/null" + stdin, stdout, stderr = client.exec_command(cmd) + return [line.strip() for line in stdout.read().decode().splitlines() if line.strip()] + + +total_files = 0 +total_bytes = 0 +skipped_files = 0 +skipped_bytes = 0 +downloaded_files = 0 +downloaded_bytes = 0 +errors = [] + +t_start = time.monotonic() + +for year in YEARS: + print(f"\n=== {year} ===", flush=True) + remote_paths = list_remote_mp3s(year) + print(f" {len(remote_paths)} MP3 files found on remote", flush=True) + + for remote in remote_paths: + rel = remote[len(REMOTE_ROOT) + 1:] + local = LOCAL_ROOT / rel + local.parent.mkdir(parents=True, exist_ok=True) + + try: + remote_stat = sftp.stat(remote) + remote_size = remote_stat.st_size + except Exception as e: + errors.append(f"stat {remote}: {e}") + continue + + total_files += 1 + total_bytes += remote_size + + if local.exists() and local.stat().st_size == remote_size: + skipped_files += 1 + skipped_bytes += remote_size + continue + + size_mb = remote_size / 1024 / 1024 + print(f" [{downloaded_files + 1:3d}] {rel} ({size_mb:.1f} MB)...", end="", flush=True) + t0 = time.monotonic() + try: + sftp.get(remote, str(local)) + elapsed = time.monotonic() - t0 + mbps = size_mb / elapsed if elapsed > 0 else 0 + print(f" done ({elapsed:.1f}s, {mbps:.1f} MB/s)", flush=True) + downloaded_files += 1 + downloaded_bytes += remote_size + except Exception as e: + print(f" FAILED: {e}", flush=True) + errors.append(f"get {remote}: {e}") + +elapsed_total = time.monotonic() - t_start +print(f"\n=== Summary ===", flush=True) +print(f" Total remote files : {total_files}", flush=True) +print(f" Total remote bytes : {total_bytes / 1024 / 1024 / 1024:.2f} GB", flush=True) +print(f" Already present : {skipped_files} files / {skipped_bytes / 1024 / 1024 / 1024:.2f} GB", flush=True) +print(f" Newly downloaded : {downloaded_files} files / {downloaded_bytes / 1024 / 1024 / 1024:.2f} GB", flush=True) +print(f" Errors : {len(errors)}", flush=True) +print(f" Wall time : {elapsed_total:.1f}s", flush=True) +if errors: + print(f"\n=== Errors ===", flush=True) + for e in errors[:20]: + print(f" {e}", flush=True) + +sftp.close() +client.close() diff --git a/projects/radio-show/audio-processor/src/diarizer.py b/projects/radio-show/audio-processor/src/diarizer.py index 340ea4d..f393fd1 100644 --- a/projects/radio-show/audio-processor/src/diarizer.py +++ b/projects/radio-show/audio-processor/src/diarizer.py @@ -158,12 +158,17 @@ def diarize(audio_path: str | Path, voice_profiles: VoiceProfileStore | None = None, min_speakers: int = 1, max_speakers: int = 6, - host_match_threshold: float = 0.85) -> DiarizationResult: + host_match_threshold: float = 0.85, + transcript_path: str | Path | None = None) -> DiarizationResult: """Run speaker diarization using WavLM sliding-window speaker identification. Uses the built-in VoiceProfiler (WavLM x-vectors) — no HuggingFace token or gated model required. Identifies HOST vs non-HOST speakers using the stored voice profile for Mike Swanson. + + If transcript_path is provided, time ranges containing show promo/bumper + text are pre-marked and skipped at speaker-identification time so vocal + music doesn't match cohost profiles. """ import torch from .voice_profiler import VoiceProfiler @@ -190,10 +195,28 @@ def diarize(audio_path: str | Path, speaker_map={"HOST": "HOST"}, ) + # Pre-compute bumper / promo time ranges from transcript if available + bumper_ranges: list[tuple[float, float]] = [] + if transcript_path is not None: + transcript_path = Path(transcript_path) + if transcript_path.exists(): + from .qa_extractor import _is_promo_or_bumper + with open(transcript_path) as f: + tdata = json.load(f) + for seg in tdata.get("segments", []): + if _is_promo_or_bumper(seg.get("text", "")): + bumper_ranges.append((seg["start"], seg["end"])) + if bumper_ranges: + console.print( + f"[dim]Bumper filter: {len(bumper_ranges)} promo/bumper " + f"transcript segments will be skipped during speaker match[/dim]" + ) + # Sliding-window identification: 10s windows, 5s hop voice_segs = profiler.identify_speakers( audio_path, window_s=10.0, hop_s=5.0, threshold=host_match_threshold, + skip_ranges=bumper_ranges, ) # Convert VoiceSegment labels to HOST / CALLER @@ -204,6 +227,8 @@ def diarize(audio_path: str | Path, speaker = "HOST" elif label.startswith("Cohost:"): speaker = "CO-HOST" + elif label == "[bumper]": + speaker = "BUMPER" elif label == "[error]": speaker = "UNKNOWN" else: diff --git a/projects/radio-show/audio-processor/src/voice_profiler.py b/projects/radio-show/audio-processor/src/voice_profiler.py index 7fe327a..6cecc4d 100644 --- a/projects/radio-show/audio-processor/src/voice_profiler.py +++ b/projects/radio-show/audio-processor/src/voice_profiler.py @@ -279,12 +279,19 @@ class VoiceProfiler: def identify_speakers(self, audio_path: Path, window_s: float = 10.0, hop_s: float = 5.0, - threshold: float = 0.70) -> list[VoiceSegment]: + threshold: float = 0.70, + skip_ranges: list[tuple[float, float]] | None = None + ) -> list[VoiceSegment]: """Identify speakers throughout an audio file using sliding window. Loads the full audio once then slices in memory — avoids spawning hundreds of ffmpeg subprocesses. Returns timestamped segments with speaker labels and embeddings. + + skip_ranges: list of (start, end) seconds. Windows whose midpoint + falls inside any of these ranges are labeled "[bumper]" and the + speaker cosine match is skipped — used to suppress music/promo + from being matched against speaker profiles. """ console.print(f"[bold]Identifying speakers:[/bold] {audio_path.name}") @@ -293,6 +300,8 @@ class VoiceProfiler: audio = self._load_full_audio(audio_path) # float32 mono array self._get_model() # ensure model is warm before the loop + skip_ranges = skip_ranges or [] + segments = [] window_samples = int(window_s * SAMPLE_RATE) hop_samples = int(hop_s * SAMPLE_RATE) @@ -306,6 +315,16 @@ class VoiceProfiler: s = int(start * SAMPLE_RATE) e = min(s + window_samples, total_samples) + mid = (start + end) / 2 + in_bumper = any(rs <= mid <= re for rs, re in skip_ranges) + + if in_bumper: + segments.append(VoiceSegment( + start=start, end=end, + speaker_label="[bumper] (1.00)", + )) + continue + try: emb = self._embed_audio_np(audio[s:e]) diff --git a/projects/radio-show/audio-processor/test-data/transcripts/2011-03-12-hr1/diarization.json b/projects/radio-show/audio-processor/test-data/transcripts/2011-03-12-hr1/diarization.json index 63cb04b..7d90060 100644 --- a/projects/radio-show/audio-processor/test-data/transcripts/2011-03-12-hr1/diarization.json +++ b/projects/radio-show/audio-processor/test-data/transcripts/2011-03-12-hr1/diarization.json @@ -1,26 +1,21 @@ { - "num_speakers": 3, + "num_speakers": 4, "speaker_map": { - "CALLER": "CALLER", "HOST": "HOST", - "CO-HOST": "CO-HOST" + "CO-HOST": "CO-HOST", + "BUMPER": "BUMPER", + "CALLER": "CALLER" }, "turns": [ { - "speaker": "HOST", + "speaker": "BUMPER", "start": 0.0, - "end": 20.0, - "confidence": 0.89 - }, - { - "speaker": "CO-HOST", - "start": 15.0, - "end": 25.0, - "confidence": 0.87 + "end": 35.0, + "confidence": 1.0 }, { "speaker": "HOST", - "start": 20.0, + "start": 30.0, "end": 40.0, "confidence": 0.88 }, diff --git a/projects/radio-show/audio-processor/test-data/transcripts/2012-03-10-hr1/diarization.json b/projects/radio-show/audio-processor/test-data/transcripts/2012-03-10-hr1/diarization.json index b69e922..8e28edb 100644 --- a/projects/radio-show/audio-processor/test-data/transcripts/2012-03-10-hr1/diarization.json +++ b/projects/radio-show/audio-processor/test-data/transcripts/2012-03-10-hr1/diarization.json @@ -1,34 +1,23 @@ { - "num_speakers": 3, + "num_speakers": 4, "speaker_map": { - "CALLER": "CALLER", "HOST": "HOST", - "CO-HOST": "CO-HOST" + "CO-HOST": "CO-HOST", + "BUMPER": "BUMPER", + "CALLER": "CALLER" }, "turns": [ { - "speaker": "HOST", + "speaker": "BUMPER", "start": 0.0, - "end": 20.0, - "confidence": 0.88 - }, - { - "speaker": "CO-HOST", - "start": 15.0, - "end": 25.0, - "confidence": 0.87 - }, - { - "speaker": "CALLER", - "start": 20.0, - "end": 30.0, - "confidence": 0.84 + "end": 35.0, + "confidence": 1.0 }, { "speaker": "HOST", - "start": 25.0, + "start": 30.0, "end": 430.0, - "confidence": 0.86 + "confidence": 0.96 }, { "speaker": "CALLER", diff --git a/projects/radio-show/audio-processor/test-data/transcripts/2012-06-09-hr1/diarization.json b/projects/radio-show/audio-processor/test-data/transcripts/2012-06-09-hr1/diarization.json index f5ea9df..88b2fcd 100644 --- a/projects/radio-show/audio-processor/test-data/transcripts/2012-06-09-hr1/diarization.json +++ b/projects/radio-show/audio-processor/test-data/transcripts/2012-06-09-hr1/diarization.json @@ -1,28 +1,23 @@ { - "num_speakers": 3, + "num_speakers": 4, "speaker_map": { - "CALLER": "CALLER", "HOST": "HOST", - "CO-HOST": "CO-HOST" + "CO-HOST": "CO-HOST", + "BUMPER": "BUMPER", + "CALLER": "CALLER" }, "turns": [ { - "speaker": "HOST", + "speaker": "BUMPER", "start": 0.0, - "end": 20.0, - "confidence": 0.9 - }, - { - "speaker": "CO-HOST", - "start": 15.0, - "end": 25.0, - "confidence": 0.87 + "end": 35.0, + "confidence": 1.0 }, { "speaker": "HOST", - "start": 20.0, + "start": 30.0, "end": 690.0, - "confidence": 0.86 + "confidence": 0.97 }, { "speaker": "CALLER", @@ -33,14 +28,20 @@ { "speaker": "HOST", "start": 690.0, - "end": 1350.0, + "end": 1330.0, "confidence": 0.92 }, + { + "speaker": "BUMPER", + "start": 1325.0, + "end": 1355.0, + "confidence": 1.0 + }, { "speaker": "CO-HOST", - "start": 1345.0, + "start": 1350.0, "end": 1470.0, - "confidence": 0.92 + "confidence": 0.93 }, { "speaker": "HOST", diff --git a/projects/radio-show/audio-processor/test-data/transcripts/2014-s6e19/diarization.json b/projects/radio-show/audio-processor/test-data/transcripts/2014-s6e19/diarization.json index 023b81b..4458c75 100644 --- a/projects/radio-show/audio-processor/test-data/transcripts/2014-s6e19/diarization.json +++ b/projects/radio-show/audio-processor/test-data/transcripts/2014-s6e19/diarization.json @@ -1,16 +1,23 @@ { - "num_speakers": 3, + "num_speakers": 4, "speaker_map": { - "CALLER": "CALLER", "HOST": "HOST", + "CALLER": "CALLER", + "BUMPER": "BUMPER", "CO-HOST": "CO-HOST" }, "turns": [ { - "speaker": "CO-HOST", + "speaker": "BUMPER", "start": 0.0, + "end": 35.0, + "confidence": 1.0 + }, + { + "speaker": "CO-HOST", + "start": 30.0, "end": 40.0, - "confidence": 0.96 + "confidence": 0.93 }, { "speaker": "HOST", @@ -61,22 +68,28 @@ "confidence": 0.96 }, { - "speaker": "HOST", + "speaker": "BUMPER", "start": 660.0, - "end": 680.0, - "confidence": 0.98 + "end": 695.0, + "confidence": 1.0 }, { "speaker": "CO-HOST", - "start": 675.0, - "end": 710.0, - "confidence": 0.94 + "start": 690.0, + "end": 700.0, + "confidence": 0.95 + }, + { + "speaker": "BUMPER", + "start": 695.0, + "end": 740.0, + "confidence": 1.0 }, { "speaker": "HOST", - "start": 705.0, + "start": 735.0, "end": 985.0, - "confidence": 0.9 + "confidence": 0.87 }, { "speaker": "CO-HOST", @@ -159,18 +172,18 @@ { "speaker": "HOST", "start": 2055.0, - "end": 2155.0, + "end": 2120.0, "confidence": 0.94 }, { - "speaker": "CALLER", - "start": 2150.0, - "end": 2160.0, - "confidence": 0.83 + "speaker": "BUMPER", + "start": 2115.0, + "end": 2165.0, + "confidence": 1.0 }, { "speaker": "CO-HOST", - "start": 2155.0, + "start": 2160.0, "end": 2170.0, "confidence": 0.97 }, diff --git a/projects/radio-show/audio-processor/test-data/transcripts/2016-s8e43/diarization.json b/projects/radio-show/audio-processor/test-data/transcripts/2016-s8e43/diarization.json index 4fdcac2..d1d2bc4 100644 --- a/projects/radio-show/audio-processor/test-data/transcripts/2016-s8e43/diarization.json +++ b/projects/radio-show/audio-processor/test-data/transcripts/2016-s8e43/diarization.json @@ -1,14 +1,21 @@ { - "num_speakers": 3, + "num_speakers": 4, "speaker_map": { - "CALLER": "CALLER", "HOST": "HOST", + "CALLER": "CALLER", + "BUMPER": "BUMPER", "CO-HOST": "CO-HOST" }, "turns": [ { - "speaker": "CO-HOST", + "speaker": "BUMPER", "start": 0.0, + "end": 35.0, + "confidence": 1.0 + }, + { + "speaker": "CO-HOST", + "start": 30.0, "end": 40.0, "confidence": 0.96 }, @@ -123,14 +130,20 @@ { "speaker": "HOST", "start": 550.0, - "end": 580.0, + "end": 565.0, "confidence": 0.98 }, + { + "speaker": "BUMPER", + "start": 560.0, + "end": 595.0, + "confidence": 1.0 + }, { "speaker": "CO-HOST", - "start": 575.0, + "start": 590.0, "end": 600.0, - "confidence": 0.96 + "confidence": 0.93 }, { "speaker": "CALLER", @@ -153,12 +166,18 @@ { "speaker": "HOST", "start": 1055.0, - "end": 1190.0, + "end": 1160.0, "confidence": 0.99 }, + { + "speaker": "BUMPER", + "start": 1155.0, + "end": 1205.0, + "confidence": 1.0 + }, { "speaker": "CO-HOST", - "start": 1185.0, + "start": 1200.0, "end": 1215.0, "confidence": 0.98 }, @@ -255,12 +274,18 @@ { "speaker": "CO-HOST", "start": 2020.0, - "end": 2055.0, + "end": 2030.0, "confidence": 0.92 }, + { + "speaker": "BUMPER", + "start": 2025.0, + "end": 2060.0, + "confidence": 1.0 + }, { "speaker": "HOST", - "start": 2050.0, + "start": 2055.0, "end": 2105.0, "confidence": 0.98 }, @@ -549,14 +574,14 @@ { "speaker": "HOST", "start": 3370.0, - "end": 3395.0, + "end": 3390.0, "confidence": 0.94 }, { - "speaker": "CO-HOST", - "start": 3390.0, + "speaker": "BUMPER", + "start": 3385.0, "end": 3435.0, - "confidence": 0.85 + "confidence": 1.0 }, { "speaker": "HOST", @@ -565,22 +590,16 @@ "confidence": 0.98 }, { - "speaker": "CO-HOST", + "speaker": "BUMPER", "start": 3965.0, - "end": 3980.0, - "confidence": 0.96 - }, - { - "speaker": "HOST", - "start": 3975.0, - "end": 3990.0, - "confidence": 0.97 + "end": 4020.0, + "confidence": 1.0 }, { "speaker": "CO-HOST", - "start": 3985.0, + "start": 4015.0, "end": 4025.0, - "confidence": 0.86 + "confidence": 0.95 }, { "speaker": "CALLER", @@ -723,14 +742,20 @@ { "speaker": "HOST", "start": 4575.0, - "end": 4680.0, + "end": 4655.0, "confidence": 0.97 }, + { + "speaker": "BUMPER", + "start": 4650.0, + "end": 4695.0, + "confidence": 1.0 + }, { "speaker": "CO-HOST", - "start": 4675.0, + "start": 4690.0, "end": 4715.0, - "confidence": 0.92 + "confidence": 0.94 }, { "speaker": "HOST", diff --git a/projects/radio-show/audio-processor/test-data/transcripts/2017-s9e30/diarization.json b/projects/radio-show/audio-processor/test-data/transcripts/2017-s9e30/diarization.json index b655ed2..d9e0bce 100644 --- a/projects/radio-show/audio-processor/test-data/transcripts/2017-s9e30/diarization.json +++ b/projects/radio-show/audio-processor/test-data/transcripts/2017-s9e30/diarization.json @@ -1,22 +1,17 @@ { - "num_speakers": 3, + "num_speakers": 4, "speaker_map": { - "CALLER": "CALLER", "HOST": "HOST", - "CO-HOST": "CO-HOST" + "CO-HOST": "CO-HOST", + "BUMPER": "BUMPER", + "CALLER": "CALLER" }, "turns": [ { - "speaker": "HOST", + "speaker": "BUMPER", "start": 0.0, - "end": 20.0, - "confidence": 0.88 - }, - { - "speaker": "CO-HOST", - "start": 15.0, "end": 25.0, - "confidence": 0.92 + "confidence": 1.0 }, { "speaker": "HOST", @@ -69,14 +64,20 @@ { "speaker": "HOST", "start": 615.0, - "end": 730.0, + "end": 710.0, "confidence": 0.89 }, + { + "speaker": "BUMPER", + "start": 705.0, + "end": 750.0, + "confidence": 1.0 + }, { "speaker": "CO-HOST", - "start": 725.0, + "start": 745.0, "end": 770.0, - "confidence": 0.91 + "confidence": 0.96 }, { "speaker": "HOST", @@ -117,9 +118,21 @@ { "speaker": "CO-HOST", "start": 1310.0, - "end": 1355.0, + "end": 1320.0, "confidence": 0.98 }, + { + "speaker": "BUMPER", + "start": 1315.0, + "end": 1350.0, + "confidence": 1.0 + }, + { + "speaker": "CO-HOST", + "start": 1345.0, + "end": 1355.0, + "confidence": 0.97 + }, { "speaker": "HOST", "start": 1350.0, @@ -189,20 +202,20 @@ { "speaker": "HOST", "start": 1460.0, - "end": 2130.0, + "end": 2110.0, "confidence": 0.88 }, { - "speaker": "CALLER", - "start": 2125.0, - "end": 2135.0, - "confidence": 0.78 + "speaker": "BUMPER", + "start": 2105.0, + "end": 2155.0, + "confidence": 1.0 }, { "speaker": "CO-HOST", - "start": 2130.0, + "start": 2150.0, "end": 2175.0, - "confidence": 0.86 + "confidence": 0.89 }, { "speaker": "HOST", @@ -219,20 +232,20 @@ { "speaker": "HOST", "start": 2650.0, - "end": 2725.0, + "end": 2715.0, "confidence": 0.97 }, { - "speaker": "CO-HOST", - "start": 2720.0, - "end": 2730.0, - "confidence": 0.89 + "speaker": "BUMPER", + "start": 2710.0, + "end": 2745.0, + "confidence": 1.0 }, { "speaker": "HOST", - "start": 2725.0, + "start": 2740.0, "end": 2995.0, - "confidence": 0.91 + "confidence": 0.99 }, { "speaker": "CO-HOST", @@ -273,20 +286,20 @@ { "speaker": "CO-HOST", "start": 3375.0, - "end": 3410.0, + "end": 3390.0, "confidence": 0.91 }, { - "speaker": "CALLER", - "start": 3405.0, - "end": 3415.0, - "confidence": 0.84 + "speaker": "BUMPER", + "start": 3385.0, + "end": 3425.0, + "confidence": 1.0 }, { "speaker": "HOST", - "start": 3410.0, + "start": 3420.0, "end": 4185.0, - "confidence": 0.96 + "confidence": 0.98 }, { "speaker": "CALLER", @@ -387,14 +400,20 @@ { "speaker": "CO-HOST", "start": 4550.0, - "end": 4595.0, + "end": 4565.0, "confidence": 0.89 }, + { + "speaker": "BUMPER", + "start": 4560.0, + "end": 4605.0, + "confidence": 1.0 + }, { "speaker": "HOST", - "start": 4590.0, + "start": 4600.0, "end": 5285.0, - "confidence": 0.95 + "confidence": 0.94 }, { "speaker": "CO-HOST",