- Fix voice_profiler threshold bug (HOST label overwrote Unknown unconditionally) - Audio preload optimization: single ffmpeg per episode, 149.5x realtime on 5070 Ti - WavLM threshold raised to 0.85 (Mike 0.90-0.99, callers 0.46-0.83) - Promo/bumper filter: weighted signature scoring, 42->27 clean Q&A pairs - Text-only Q&A fallback for episodes with no CALLER diarization labels - TRANSFORMERS_OFFLINE=1 to skip HuggingFace freshness checks - Add diarize_2018.py for targeted re-run + FTS5 rebuild - Add benchmark.py + BENCH_SETUP.md for GURU-BEAST-ROG (RTX 4090) comparison - Commit 9-episode training diarization.json outputs - Session log: 2026-04-27-diarization-pipeline.md Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
177 lines
3.2 KiB
JSON
177 lines
3.2 KiB
JSON
{
|
|
"num_speakers": 2,
|
|
"speaker_map": {
|
|
"CALLER": "CALLER",
|
|
"HOST": "HOST"
|
|
},
|
|
"turns": [
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 0.0,
|
|
"end": 45.0,
|
|
"confidence": 0.66
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 40.0,
|
|
"end": 655.0,
|
|
"confidence": 0.92
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 650.0,
|
|
"end": 690.0,
|
|
"confidence": 0.64
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 685.0,
|
|
"end": 1350.0,
|
|
"confidence": 0.99
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 1345.0,
|
|
"end": 1380.0,
|
|
"confidence": 0.54
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 1375.0,
|
|
"end": 1395.0,
|
|
"confidence": 0.99
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 1390.0,
|
|
"end": 1440.0,
|
|
"confidence": 0.84
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 1435.0,
|
|
"end": 1485.0,
|
|
"confidence": 0.87
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 1480.0,
|
|
"end": 1495.0,
|
|
"confidence": 0.83
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 1490.0,
|
|
"end": 1515.0,
|
|
"confidence": 0.97
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 1510.0,
|
|
"end": 1540.0,
|
|
"confidence": 0.81
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 1535.0,
|
|
"end": 1625.0,
|
|
"confidence": 0.85
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 1620.0,
|
|
"end": 1635.0,
|
|
"confidence": 0.81
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 1630.0,
|
|
"end": 1720.0,
|
|
"confidence": 0.96
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 1715.0,
|
|
"end": 1725.0,
|
|
"confidence": 0.82
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 1720.0,
|
|
"end": 1860.0,
|
|
"confidence": 0.98
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 1855.0,
|
|
"end": 1870.0,
|
|
"confidence": 0.78
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 1865.0,
|
|
"end": 2015.0,
|
|
"confidence": 0.86
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 2010.0,
|
|
"end": 2035.0,
|
|
"confidence": 0.82
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 2030.0,
|
|
"end": 2055.0,
|
|
"confidence": 0.97
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 2050.0,
|
|
"end": 2070.0,
|
|
"confidence": 0.84
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 2065.0,
|
|
"end": 2075.0,
|
|
"confidence": 0.98
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 2070.0,
|
|
"end": 2085.0,
|
|
"confidence": 0.84
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 2080.0,
|
|
"end": 2105.0,
|
|
"confidence": 0.85
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 2100.0,
|
|
"end": 2110.0,
|
|
"confidence": 0.77
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 2105.0,
|
|
"end": 2345.0,
|
|
"confidence": 0.96
|
|
},
|
|
{
|
|
"speaker": "CALLER",
|
|
"start": 2340.0,
|
|
"end": 2390.0,
|
|
"confidence": 0.68
|
|
},
|
|
{
|
|
"speaker": "HOST",
|
|
"start": 2385.0,
|
|
"end": 2720.0,
|
|
"confidence": 0.92
|
|
}
|
|
]
|
|
} |