radio: browseable Q&A — /api/qa, /api/audio range streaming, /episode HTML view
Make the radio archive Q&A pairs actually browseable end to end:
- /api/qa list endpoint (year, min_score, exclude_banter, topic_class,
pagination, sort by air_date or score). Returns the same column shape as
/api/search Q&A hits.
- /api/audio/{episode_id} streams the MP3 with HTTP Range support so the
browser <audio> can seek. 206 + Content-Range when ranged, 200 when
full-file. Returns 404 cleanly when episodes/ tree is absent (Jupiter).
- /episode/{id} HTML transcript view: chronological segments with clickable
timestamps, Q&A blocks spliced inline (anchor #qa-<id>), intros marked
inline, right-rail summary. Hash-anchor on load auto-seeks the audio.
- New question_excerpt / answer_excerpt fields on /api/search Q&A hits and
on /api/qa items: trim leading run-on chatter, take ~300 chars, end on a
sentence boundary or word boundary with ellipsis.
- Index UI: each Q&A hit now links to /episode/{id}#qa-{qa_id}; new
"Browse all Q&A" toggle (year selector, sort, append-load 50 per page,
defaults to min_score=3); FTS snippet replaced with the plain excerpt
when available.
No new dependencies, no schema changes, no LLM calls. Uses
EPISODES_DIR env (default /data/episodes) — Jupiter compose still only
mounts /data so audio degrades gracefully to 404 there until episodes
are uploaded.
This commit is contained in:
@@ -7,22 +7,30 @@ Endpoints:
|
||||
GET /api/episodes/{id} Episode detail: intros + qa_pairs
|
||||
GET /api/episodes/{id}/transcript Chronologically merged segments + turns
|
||||
GET /api/search?q=...&kind=... FTS over segments and/or qa_pairs
|
||||
GET /api/qa List Q&A pairs (no search query, filterable)
|
||||
GET /api/audio/{id} Stream the episode MP3 (HTTP Range supported)
|
||||
GET /api/callers Top recurring caller_names
|
||||
GET /episode/{id} HTML transcript view with audio player
|
||||
|
||||
Config via env:
|
||||
ARCHIVE_DB path to archive.db (default /data/archive.db)
|
||||
EPISODES_DIR path to mp3 tree (default /data/episodes)
|
||||
PORT listen port (default 8765)
|
||||
"""
|
||||
import html as _html
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Query
|
||||
from fastapi.responses import FileResponse, HTMLResponse
|
||||
from fastapi import FastAPI, HTTPException, Query, Request
|
||||
from fastapi.responses import FileResponse, HTMLResponse, Response, StreamingResponse
|
||||
|
||||
DB_PATH = os.environ.get("ARCHIVE_DB", "/data/archive.db")
|
||||
EPISODES_DIR = os.environ.get("EPISODES_DIR", "/data/episodes")
|
||||
PORT = int(os.environ.get("PORT", "8765"))
|
||||
|
||||
|
||||
@@ -49,6 +57,72 @@ def fts_escape(q: str) -> str:
|
||||
return " ".join(f'"{tok}"' for tok in q.split() if tok)
|
||||
|
||||
|
||||
# Excerpt extraction for Q&A texts.
|
||||
#
|
||||
# Whisper transcripts often start with disfluent run-on chatter that's a
|
||||
# leftover from the previous turn. We trim that prefix, take the first 300
|
||||
# chars, and try to end on a sentence boundary so the excerpt reads cleanly.
|
||||
_EXCERPT_BODY = 300 # primary character budget
|
||||
_EXCERPT_LOOKAHEAD = 80 # extra chars allowed to find a sentence end
|
||||
_EXCERPT_LEAD_SCAN = 30 # window to look for a leading capital letter
|
||||
|
||||
|
||||
def _excerpt(text: str | None) -> str:
|
||||
"""Return a short, readable excerpt suitable for browsing.
|
||||
|
||||
Rules (intentionally simple — see spec):
|
||||
1. Walk the leading prefix and skip to the first capital letter, but
|
||||
only within the first ~30 chars; otherwise keep the original start.
|
||||
2. Take the first 300 chars.
|
||||
3. If that cut lands mid-sentence, look up to 80 more chars ahead for
|
||||
the next .!? and end there.
|
||||
4. Otherwise back up to the last word boundary and append "..." so we
|
||||
never display half a word.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
s = text.strip()
|
||||
if not s:
|
||||
return ""
|
||||
|
||||
# 1. trim disfluent leading run-on to the first capital letter
|
||||
lead_window = s[:_EXCERPT_LEAD_SCAN]
|
||||
cap_match = re.search(r"[A-Z]", lead_window)
|
||||
if cap_match and cap_match.start() > 0:
|
||||
s = s[cap_match.start():]
|
||||
|
||||
if len(s) <= _EXCERPT_BODY:
|
||||
return s
|
||||
|
||||
body = s[:_EXCERPT_BODY]
|
||||
# 3. if the body ends mid-sentence, look ahead for a terminator
|
||||
if body and body[-1] not in ".!?":
|
||||
ahead = s[_EXCERPT_BODY:_EXCERPT_BODY + _EXCERPT_LOOKAHEAD]
|
||||
m = re.search(r"[.!?]", ahead)
|
||||
if m:
|
||||
return body + ahead[: m.end()]
|
||||
# 4. back up to last whitespace and ellipsize
|
||||
cut = body.rfind(" ")
|
||||
if cut > 0:
|
||||
return body[:cut].rstrip(",;:- ") + "..."
|
||||
return body + "..."
|
||||
|
||||
return body
|
||||
|
||||
|
||||
def _qa_search_excerpts(row: dict) -> dict:
|
||||
"""Augment a search/qa row with question/answer excerpts.
|
||||
|
||||
Excerpts are computed from the (un-highlighted) full text that lives
|
||||
next to the snippet in the row. This keeps the existing q_snippet/
|
||||
a_snippet (with <mark> highlighting) working for back-compat and adds
|
||||
plain-text excerpts the UI can prefer.
|
||||
"""
|
||||
row["question_excerpt"] = _excerpt(row.pop("_question_text", None))
|
||||
row["answer_excerpt"] = _excerpt(row.pop("_answer_text", None))
|
||||
return row
|
||||
|
||||
|
||||
@app.get("/api/episodes")
|
||||
def list_episodes(year: int | None = None, limit: int = 1000):
|
||||
db: sqlite3.Connection = app.state.db
|
||||
@@ -175,6 +249,8 @@ def search(
|
||||
p.id AS qa_id, p.caller_name,
|
||||
p.question_start_sec, p.answer_start_sec,
|
||||
p.usefulness_score, p.topic_class, p.is_banter,
|
||||
p.question_text AS _question_text,
|
||||
p.answer_text AS _answer_text,
|
||||
snippet(qa_fts, 0, '<mark>', '</mark>', '...', 16) AS q_snippet,
|
||||
snippet(qa_fts, 1, '<mark>', '</mark>', '...', 16) AS a_snippet,
|
||||
bm25(qa_fts) AS rank
|
||||
@@ -184,11 +260,200 @@ def search(
|
||||
WHERE {' AND '.join(qa_clauses)}
|
||||
ORDER BY rank LIMIT :limit
|
||||
"""
|
||||
qa_results = [dict(r) for r in db.execute(qa_sql, qa_params).fetchall()]
|
||||
qa_results = [
|
||||
_qa_search_excerpts(dict(r))
|
||||
for r in db.execute(qa_sql, qa_params).fetchall()
|
||||
]
|
||||
|
||||
return {"q": q, "segments": seg_results, "qa": qa_results}
|
||||
|
||||
|
||||
# Sort key whitelist so we can pass user input straight into ORDER BY.
|
||||
_QA_SORT_ORDERS: dict[str, str] = {
|
||||
"air_date_desc": "COALESCE(e.air_date, '0000') DESC, p.question_start_sec ASC",
|
||||
"air_date_asc": "COALESCE(e.air_date, '9999') ASC, p.question_start_sec ASC",
|
||||
"score_desc": "COALESCE(p.usefulness_score, 0) DESC, "
|
||||
"COALESCE(e.air_date, '0000') DESC, p.question_start_sec ASC",
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/qa")
|
||||
def list_qa(
|
||||
year: int | None = None,
|
||||
min_score: int = Query(0, ge=0, le=5),
|
||||
exclude_banter: bool = Query(False),
|
||||
topic_class: str | None = None,
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
offset: int = Query(0, ge=0),
|
||||
order: str = Query("air_date_desc"),
|
||||
):
|
||||
"""Browseable Q&A list — same column shape as /api/search Q&A hits."""
|
||||
db: sqlite3.Connection = app.state.db
|
||||
if order not in _QA_SORT_ORDERS:
|
||||
raise HTTPException(400, f"unknown order: {order}")
|
||||
order_sql = _QA_SORT_ORDERS[order]
|
||||
|
||||
where = ["1=1"]
|
||||
params: dict[str, object] = {}
|
||||
if year is not None:
|
||||
where.append("e.year = :year")
|
||||
params["year"] = year
|
||||
if min_score > 0:
|
||||
where.append("(p.usefulness_score IS NULL OR p.usefulness_score >= :min_score)")
|
||||
params["min_score"] = min_score
|
||||
if exclude_banter:
|
||||
where.append("(p.is_banter IS NULL OR p.is_banter = 0)")
|
||||
if topic_class:
|
||||
where.append("p.topic_class = :topic_class")
|
||||
params["topic_class"] = topic_class
|
||||
|
||||
where_sql = " AND ".join(where)
|
||||
|
||||
total = db.execute(
|
||||
f"""SELECT COUNT(*) FROM qa_pairs p
|
||||
JOIN episodes e ON e.id = p.episode_id
|
||||
WHERE {where_sql}""",
|
||||
params,
|
||||
).fetchone()[0]
|
||||
|
||||
params_pl = dict(params, limit=limit, offset=offset)
|
||||
rows = db.execute(
|
||||
f"""SELECT e.id AS episode_id, e.year, e.title, e.air_date,
|
||||
p.id AS qa_id, p.caller_name,
|
||||
p.question_start_sec, p.answer_start_sec,
|
||||
p.usefulness_score, p.topic_class, p.is_banter,
|
||||
p.question_text AS _question_text,
|
||||
p.answer_text AS _answer_text
|
||||
FROM qa_pairs p
|
||||
JOIN episodes e ON e.id = p.episode_id
|
||||
WHERE {where_sql}
|
||||
ORDER BY {order_sql}
|
||||
LIMIT :limit OFFSET :offset""",
|
||||
params_pl,
|
||||
).fetchall()
|
||||
|
||||
items = [_qa_search_excerpts(dict(r)) for r in rows]
|
||||
return {"total": total, "items": items}
|
||||
|
||||
|
||||
# --- Audio streaming with HTTP Range support ----------------------------
|
||||
|
||||
_AUDIO_CHUNK = 64 * 1024
|
||||
|
||||
|
||||
def _resolve_audio_path(rel_path: str) -> Path | None:
|
||||
"""Return the absolute Path to the MP3 if it exists, else None.
|
||||
|
||||
rel_path is the value stored in episodes.rel_path (e.g.
|
||||
"2010/10 - October/10-02-10 HR 1.mp3"). We refuse anything that escapes
|
||||
the episodes root via .. so a malicious DB row cannot read arbitrary
|
||||
files.
|
||||
"""
|
||||
if not rel_path:
|
||||
return None
|
||||
base = Path(EPISODES_DIR).resolve()
|
||||
candidate = (base / rel_path).resolve()
|
||||
try:
|
||||
candidate.relative_to(base)
|
||||
except ValueError:
|
||||
return None
|
||||
if not candidate.is_file():
|
||||
return None
|
||||
return candidate
|
||||
|
||||
|
||||
def _parse_range(header: str, file_size: int) -> tuple[int, int] | None:
|
||||
"""Parse a single-range "bytes=START-END" header. Returns None if invalid."""
|
||||
if not header or not header.startswith("bytes="):
|
||||
return None
|
||||
spec = header[len("bytes="):].strip()
|
||||
if "," in spec:
|
||||
# Multi-range — fall back to no-range (full file) for simplicity
|
||||
return None
|
||||
if "-" not in spec:
|
||||
return None
|
||||
start_s, end_s = spec.split("-", 1)
|
||||
try:
|
||||
if start_s == "":
|
||||
# suffix range: "-N" -> last N bytes
|
||||
length = int(end_s)
|
||||
if length <= 0:
|
||||
return None
|
||||
start = max(0, file_size - length)
|
||||
end = file_size - 1
|
||||
else:
|
||||
start = int(start_s)
|
||||
end = int(end_s) if end_s else file_size - 1
|
||||
except ValueError:
|
||||
return None
|
||||
if start < 0 or end < start or start >= file_size:
|
||||
return None
|
||||
end = min(end, file_size - 1)
|
||||
return start, end
|
||||
|
||||
|
||||
def _file_iter(path: Path, start: int, length: int,
|
||||
chunk: int = _AUDIO_CHUNK) -> Iterator[bytes]:
|
||||
remaining = length
|
||||
with open(path, "rb") as f:
|
||||
f.seek(start)
|
||||
while remaining > 0:
|
||||
data = f.read(min(chunk, remaining))
|
||||
if not data:
|
||||
break
|
||||
remaining -= len(data)
|
||||
yield data
|
||||
|
||||
|
||||
@app.get("/api/audio/{episode_id}")
|
||||
def stream_audio(episode_id: int, request: Request):
|
||||
"""Stream the episode's MP3 with HTTP Range support.
|
||||
|
||||
Returns 404 if the episode doesn't exist or the file isn't on disk
|
||||
(Jupiter currently has no episodes/ tree — that's a clean 404). The
|
||||
audio element on the transcript page checks the response and hides
|
||||
itself on 404.
|
||||
"""
|
||||
db: sqlite3.Connection = app.state.db
|
||||
ep = db.execute("SELECT rel_path FROM episodes WHERE id = ?", (episode_id,)).fetchone()
|
||||
if not ep:
|
||||
raise HTTPException(404, "episode not found")
|
||||
path = _resolve_audio_path(ep["rel_path"])
|
||||
if path is None:
|
||||
raise HTTPException(404, "audio file missing")
|
||||
|
||||
file_size = path.stat().st_size
|
||||
range_header = request.headers.get("range") or request.headers.get("Range")
|
||||
rng = _parse_range(range_header, file_size) if range_header else None
|
||||
|
||||
headers = {
|
||||
"Accept-Ranges": "bytes",
|
||||
"Cache-Control": "public, max-age=86400",
|
||||
"Content-Type": "audio/mpeg",
|
||||
}
|
||||
|
||||
if rng is None:
|
||||
# Full content
|
||||
headers["Content-Length"] = str(file_size)
|
||||
return StreamingResponse(
|
||||
_file_iter(path, 0, file_size),
|
||||
status_code=200,
|
||||
headers=headers,
|
||||
media_type="audio/mpeg",
|
||||
)
|
||||
|
||||
start, end = rng
|
||||
length = end - start + 1
|
||||
headers["Content-Length"] = str(length)
|
||||
headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
|
||||
return StreamingResponse(
|
||||
_file_iter(path, start, length),
|
||||
status_code=206,
|
||||
headers=headers,
|
||||
media_type="audio/mpeg",
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/callers")
|
||||
def top_callers(limit: int = 50):
|
||||
db: sqlite3.Connection = app.state.db
|
||||
@@ -240,6 +505,186 @@ def index():
|
||||
return INDEX_HTML
|
||||
|
||||
|
||||
# --- Single-episode HTML transcript view --------------------------------
|
||||
|
||||
|
||||
def _fmt_time(sec: float | None) -> str:
|
||||
if sec is None:
|
||||
return ""
|
||||
s = int(sec)
|
||||
return f"{s // 60}:{s % 60:02d}"
|
||||
|
||||
|
||||
def _episode_html(episode_id: int) -> str:
|
||||
db: sqlite3.Connection = app.state.db
|
||||
ep = db.execute("SELECT * FROM episodes WHERE id = ?", (episode_id,)).fetchone()
|
||||
if not ep:
|
||||
raise HTTPException(404, "episode not found")
|
||||
intros = db.execute(
|
||||
"SELECT id, name, role_hint, intro_time_sec FROM intros "
|
||||
"WHERE episode_id = ? ORDER BY intro_time_sec",
|
||||
(episode_id,),
|
||||
).fetchall()
|
||||
qa = db.execute(
|
||||
"SELECT id, question_start_sec, question_end_sec, answer_start_sec, "
|
||||
" answer_end_sec, question_text, answer_text, caller_name, "
|
||||
" caller_role, usefulness_score, topic_class, is_banter "
|
||||
"FROM qa_pairs WHERE episode_id = ? ORDER BY question_start_sec",
|
||||
(episode_id,),
|
||||
).fetchall()
|
||||
segments = db.execute(
|
||||
"SELECT seg_idx, start_sec, end_sec, text FROM segments "
|
||||
"WHERE episode_id = ? ORDER BY seg_idx",
|
||||
(episode_id,),
|
||||
).fetchall()
|
||||
|
||||
esc = _html.escape
|
||||
title = esc(ep["title"] or f"Episode {episode_id}")
|
||||
air = esc(ep["air_date"] or "")
|
||||
year = ep["year"]
|
||||
duration_min = round((ep["duration_sec"] or 0) / 60.0, 1)
|
||||
rel_path = esc(ep["rel_path"] or "")
|
||||
|
||||
# Build qa lookup keyed by question_start so we can splice them into
|
||||
# the segment stream chronologically.
|
||||
qa_rows = [dict(r) for r in qa]
|
||||
qa_starts = sorted(
|
||||
((r["question_start_sec"] or 0.0), r) for r in qa_rows
|
||||
)
|
||||
|
||||
# Right rail summary lists
|
||||
intro_items = []
|
||||
for r in intros:
|
||||
t = _fmt_time(r["intro_time_sec"])
|
||||
name = esc(r["name"] or "?")
|
||||
role = esc(r["role_hint"] or "")
|
||||
role_html = f' <span class="muted">({role})</span>' if role else ""
|
||||
intro_items.append(
|
||||
f'<li><a href="#intro-{r["id"]}" data-seek="{r["intro_time_sec"] or 0}">'
|
||||
f'{t}</a> · {name}{role_html}</li>'
|
||||
)
|
||||
intros_html = "\n".join(intro_items) or '<li class="muted">none</li>'
|
||||
|
||||
qa_items = []
|
||||
for r in qa_rows:
|
||||
t = _fmt_time(r["question_start_sec"])
|
||||
score = r["usefulness_score"]
|
||||
badge = (
|
||||
f'<span class="badge s{score}" title="usefulness {score}/5">{score}</span>'
|
||||
if score is not None else ""
|
||||
)
|
||||
topic = esc(r["topic_class"] or "")
|
||||
topic_html = f'<span class="topic">{topic}</span> ' if topic else ""
|
||||
caller = esc(r["caller_name"] or "")
|
||||
caller_html = f' · {caller}' if caller else ""
|
||||
first_q = _excerpt(r["question_text"] or "")[:80]
|
||||
teaser = esc(first_q)
|
||||
qa_items.append(
|
||||
f'<li><a href="#qa-{r["id"]}" data-seek="{r["question_start_sec"] or 0}">'
|
||||
f'{t}</a> {badge}{topic_html}<span class="muted">{teaser}</span>{caller_html}</li>'
|
||||
)
|
||||
qa_summary_html = "\n".join(qa_items) or '<li class="muted">none</li>'
|
||||
|
||||
# Build the chronological transcript body. We walk segments and, before
|
||||
# any segment whose start_sec >= a Q&A's question_start, we emit the
|
||||
# Q&A block. (Q&A blocks contain the full question/answer text already,
|
||||
# so segment text becomes context around them.)
|
||||
body_parts: list[str] = []
|
||||
qa_iter = iter(qa_starts)
|
||||
next_qa: tuple[float, dict] | None = next(qa_iter, None)
|
||||
|
||||
# Intros also get inline anchors so the right-rail jump links work
|
||||
intro_by_time = sorted(
|
||||
((r["intro_time_sec"] or 0.0), r) for r in intros
|
||||
)
|
||||
intro_iter = iter(intro_by_time)
|
||||
next_intro = next(intro_iter, None)
|
||||
|
||||
def _flush_inline_at(t_seg: float) -> None:
|
||||
nonlocal next_intro, next_qa
|
||||
while next_intro and next_intro[0] <= t_seg:
|
||||
ir = next_intro[1]
|
||||
tlbl = _fmt_time(ir["intro_time_sec"])
|
||||
name = esc(ir["name"] or "?")
|
||||
role = esc(ir["role_hint"] or "")
|
||||
role_html = f' <span class="muted">({role})</span>' if role else ""
|
||||
body_parts.append(
|
||||
f'<div class="intro-marker" id="intro-{ir["id"]}">'
|
||||
f'<a class="ts" href="#" data-seek="{ir["intro_time_sec"] or 0}">'
|
||||
f'{tlbl}</a> intro: <b>{name}</b>{role_html}'
|
||||
f'</div>'
|
||||
)
|
||||
next_intro = next(intro_iter, None)
|
||||
while next_qa and next_qa[0] <= t_seg:
|
||||
qr = next_qa[1]
|
||||
qstart = qr["question_start_sec"] or 0.0
|
||||
astart = qr["answer_start_sec"] or qstart
|
||||
score = qr["usefulness_score"]
|
||||
badge = (
|
||||
f'<span class="badge s{score}" title="usefulness {score}/5">{score}</span>'
|
||||
if score is not None else ""
|
||||
)
|
||||
topic = esc(qr["topic_class"] or "")
|
||||
topic_html = f'<span class="topic">{topic}</span> ' if topic else ""
|
||||
caller = esc(qr["caller_name"] or "")
|
||||
caller_html = f' · <i>{caller}</i>' if caller else ""
|
||||
qbody = esc(qr["question_text"] or "")
|
||||
abody = esc(qr["answer_text"] or "")
|
||||
dim = " dim" if (score is not None and score <= 2) or qr["is_banter"] == 1 else ""
|
||||
body_parts.append(
|
||||
f'<div class="qa{dim}" id="qa-{qr["id"]}">'
|
||||
f'<div class="qa-head">{badge}{topic_html}'
|
||||
f'<a class="ts" href="#" data-seek="{qstart}">{_fmt_time(qstart)}</a>'
|
||||
f' Q&A{caller_html}'
|
||||
f'<button class="play" data-seek="{qstart}">play from here</button>'
|
||||
f'</div>'
|
||||
f'<div class="qa-q"><b>Q:</b> {qbody}</div>'
|
||||
f'<div class="qa-a">'
|
||||
f'<a class="ts inline" href="#" data-seek="{astart}">{_fmt_time(astart)}</a>'
|
||||
f' <b>A:</b> {abody}</div>'
|
||||
f'</div>'
|
||||
)
|
||||
next_qa = next(qa_iter, None)
|
||||
|
||||
for s in segments:
|
||||
t_seg = s["start_sec"] or 0.0
|
||||
_flush_inline_at(t_seg)
|
||||
seg_text = esc(s["text"] or "").strip()
|
||||
if not seg_text:
|
||||
continue
|
||||
body_parts.append(
|
||||
f'<p class="seg">'
|
||||
f'<a class="ts" href="#" data-seek="{t_seg}">{_fmt_time(t_seg)}</a> '
|
||||
f'{seg_text}</p>'
|
||||
)
|
||||
# Flush any tail intros / Q&As after final segment
|
||||
_flush_inline_at(float("inf"))
|
||||
|
||||
body_html = "\n".join(body_parts) or '<p class="muted">no transcript</p>'
|
||||
|
||||
qa_count = len(qa_rows)
|
||||
intro_count = len(intros)
|
||||
|
||||
return EPISODE_HTML.format(
|
||||
title=title,
|
||||
episode_id=episode_id,
|
||||
year=year,
|
||||
air=air,
|
||||
duration_min=duration_min,
|
||||
rel_path=rel_path,
|
||||
qa_count=qa_count,
|
||||
intro_count=intro_count,
|
||||
intros_summary=intros_html,
|
||||
qa_summary=qa_summary_html,
|
||||
body=body_html,
|
||||
)
|
||||
|
||||
|
||||
@app.get("/episode/{episode_id}", response_class=HTMLResponse)
|
||||
def episode_page(episode_id: int):
|
||||
return _episode_html(episode_id)
|
||||
|
||||
|
||||
INDEX_HTML = """<!doctype html>
|
||||
<html lang=en>
|
||||
<meta charset=utf-8>
|
||||
@@ -249,6 +694,7 @@ INDEX_HTML = """<!doctype html>
|
||||
h1 { margin: 0 0 .25em; }
|
||||
.sub { color:#666; margin-bottom: 1.5em; }
|
||||
input[type=search] { width: 100%; padding: .6em .8em; font-size: 16px; box-sizing: border-box; }
|
||||
input[type=search]:disabled { background:#f4f4f4; color:#999; }
|
||||
.controls { display:flex; gap:.5em; align-items:center; margin: .5em 0 1em; flex-wrap: wrap; }
|
||||
.controls label { font-size: 13px; color:#555; }
|
||||
.group { border-bottom: 1px solid #eee; padding: 1em 0; }
|
||||
@@ -257,6 +703,9 @@ INDEX_HTML = """<!doctype html>
|
||||
.hit .meta { font-size: 12px; color: #888; }
|
||||
.hit a { color: #06c; text-decoration: none; }
|
||||
.hit a:hover { text-decoration: underline; }
|
||||
a.hit-link { display:block; color:inherit; text-decoration:none; padding: .5em .35em; margin: 0 -.35em; border-radius: 4px; }
|
||||
a.hit-link:hover { background: #f6f8fa; }
|
||||
a.hit-link .meta a { color:#06c; }
|
||||
mark { background: #ffec99; padding: 0 .15em; }
|
||||
.stats { font-size: 12px; color:#666; margin-top: 2em; }
|
||||
.empty { color:#999; padding: 1em 0; }
|
||||
@@ -271,7 +720,13 @@ INDEX_HTML = """<!doctype html>
|
||||
.badge.s1 { background: #b85a4a; }
|
||||
.topic { font-size: 11px; color: #888; padding: 0 .35em; border-radius: 3px;
|
||||
background: #f0f0f0; }
|
||||
.hit.dim { opacity: .55; }
|
||||
.hit.dim, a.hit-link.dim { opacity: .55; }
|
||||
button.more { margin: 1em 0; padding: .5em 1em; font-size: 13px; cursor: pointer;
|
||||
background: #f4f4f4; border: 1px solid #ccc; border-radius: 3px; }
|
||||
button.more:hover { background: #ececec; }
|
||||
button.more:disabled { color: #999; cursor: default; }
|
||||
.browse-bar { background:#f8f8f4; border:1px solid #ece8d8; padding:.4em .6em;
|
||||
border-radius: 4px; margin: 0 0 1em; font-size: 13px; color:#665; }
|
||||
</style>
|
||||
<h1>Computer Guru Radio Archive</h1>
|
||||
<div class=sub id=sub>...</div>
|
||||
@@ -292,6 +747,21 @@ INDEX_HTML = """<!doctype html>
|
||||
</label>
|
||||
<label><input type=checkbox id=exclude_banter> hide banter</label>
|
||||
</span>
|
||||
<span style="border-left:1px solid #ddd; padding-left:.6em">
|
||||
<label><input type=checkbox id=browse_all> Browse all Q&A</label>
|
||||
</span>
|
||||
</div>
|
||||
<div class=controls id=browse_controls style="display:none">
|
||||
<label>year
|
||||
<select id=browse_year><option value="">any</option></select>
|
||||
</label>
|
||||
<label>sort
|
||||
<select id=browse_order>
|
||||
<option value=air_date_desc>air date (newest first)</option>
|
||||
<option value=air_date_asc>air date (oldest first)</option>
|
||||
<option value=score_desc>usefulness score</option>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
<div id=results></div>
|
||||
<div class=stats id=stats></div>
|
||||
@@ -299,21 +769,56 @@ INDEX_HTML = """<!doctype html>
|
||||
const q = document.getElementById('q');
|
||||
const results = document.getElementById('results');
|
||||
const sub = document.getElementById('sub');
|
||||
const stats = document.getElementById('stats');
|
||||
const browseToggle = document.getElementById('browse_all');
|
||||
const browseControls = document.getElementById('browse_controls');
|
||||
const browseYear = document.getElementById('browse_year');
|
||||
const browseOrder = document.getElementById('browse_order');
|
||||
const minScoreEl = document.getElementById('min_score');
|
||||
const excludeBanterEl = document.getElementById('exclude_banter');
|
||||
|
||||
let browseOffset = 0;
|
||||
const BROWSE_LIMIT = 50;
|
||||
let browseTotal = 0;
|
||||
|
||||
fetch('/api/stats').then(r => r.json()).then(s => {
|
||||
const c = s.counts;
|
||||
sub.textContent = `${c.episodes} episodes / ${c.qa_pairs} Q&A pairs / ${c.intros} intros / ${c.segments.toLocaleString()} segments`;
|
||||
const yrs = (s.by_year || []).map(x => x.year).sort((a,b)=>b-a);
|
||||
for (const y of yrs) {
|
||||
const o = document.createElement('option');
|
||||
o.value = y; o.textContent = y;
|
||||
browseYear.appendChild(o);
|
||||
}
|
||||
});
|
||||
|
||||
let timer;
|
||||
q.addEventListener('input', () => {
|
||||
if (browseToggle.checked) return;
|
||||
clearTimeout(timer);
|
||||
timer = setTimeout(runSearch, 250);
|
||||
});
|
||||
document.querySelectorAll('input[name=kind]').forEach(el => el.addEventListener('change', runSearch));
|
||||
document.getElementById('min_score').addEventListener('change', runSearch);
|
||||
document.getElementById('exclude_banter').addEventListener('change', runSearch);
|
||||
document.querySelectorAll('input[name=kind]').forEach(el => el.addEventListener('change', () => { if (!browseToggle.checked) runSearch(); }));
|
||||
minScoreEl.addEventListener('change', refresh);
|
||||
excludeBanterEl.addEventListener('change', refresh);
|
||||
browseToggle.addEventListener('change', () => {
|
||||
const on = browseToggle.checked;
|
||||
browseControls.style.display = on ? 'flex' : 'none';
|
||||
q.disabled = on;
|
||||
if (on) {
|
||||
if (minScoreEl.value === '0') minScoreEl.value = '3';
|
||||
runBrowse(true);
|
||||
} else {
|
||||
results.innerHTML = '';
|
||||
if (q.value.trim().length >= 2) runSearch();
|
||||
}
|
||||
});
|
||||
browseYear.addEventListener('change', () => runBrowse(true));
|
||||
browseOrder.addEventListener('change', () => runBrowse(true));
|
||||
|
||||
function refresh() {
|
||||
if (browseToggle.checked) runBrowse(true);
|
||||
else if (q.value.trim().length >= 2) runSearch();
|
||||
}
|
||||
|
||||
function fmtTime(s) {
|
||||
if (s == null) return '';
|
||||
@@ -327,12 +832,40 @@ function escapeHtml(s) {
|
||||
}[c]));
|
||||
}
|
||||
|
||||
function qaHitHtml(h, opts) {
|
||||
const ad = h.air_date ? ` (${h.air_date})` : '';
|
||||
const cn = h.caller_name ? ` — ${escapeHtml(h.caller_name)}` : '';
|
||||
const score = h.usefulness_score;
|
||||
const topic = h.topic_class;
|
||||
const banter = h.is_banter === 1;
|
||||
const badge = score != null
|
||||
? `<span class="badge s${score}" title="usefulness ${score}/5">${score}</span>`
|
||||
: '';
|
||||
const topicTag = topic
|
||||
? `<span class=topic>${escapeHtml(topic)}</span> `
|
||||
: '';
|
||||
const dim = (score != null && score <= 2) || banter ? ' dim' : '';
|
||||
// Prefer plain-text excerpt, fall back to FTS snippet (which may include <mark>).
|
||||
const qBody = h.question_excerpt
|
||||
? escapeHtml(h.question_excerpt)
|
||||
: (h.q_snippet || '');
|
||||
const aBody = h.answer_excerpt
|
||||
? escapeHtml(h.answer_excerpt)
|
||||
: (h.a_snippet || '');
|
||||
const href = `/episode/${h.episode_id}#qa-${h.qa_id}`;
|
||||
return `<a class="hit-link${dim}" href="${href}">
|
||||
<div class=meta>${badge}${topicTag}${h.year} · ${escapeHtml(h.title)}${ad}${cn} · @ ${fmtTime(h.question_start_sec)}</div>
|
||||
<div><b>Q:</b> ${qBody}</div>
|
||||
<div><b>A:</b> ${aBody}</div>
|
||||
</a>`;
|
||||
}
|
||||
|
||||
async function runSearch() {
|
||||
const term = q.value.trim();
|
||||
if (term.length < 2) { results.innerHTML = ''; return; }
|
||||
const kind = document.querySelector('input[name=kind]:checked').value;
|
||||
const minScore = document.getElementById('min_score').value;
|
||||
const excludeBanter = document.getElementById('exclude_banter').checked;
|
||||
const minScore = minScoreEl.value;
|
||||
const excludeBanter = excludeBanterEl.checked;
|
||||
const params = new URLSearchParams({ q: term, kind, limit: '40' });
|
||||
if (minScore !== '0') params.set('min_score', minScore);
|
||||
if (excludeBanter) params.set('exclude_banter', 'true');
|
||||
@@ -341,25 +874,7 @@ async function runSearch() {
|
||||
let html = '';
|
||||
if (j.qa.length) {
|
||||
html += '<div class=group><h3>Q&A Pairs</h3>';
|
||||
for (const h of j.qa) {
|
||||
const ad = h.air_date ? ` (${h.air_date})` : '';
|
||||
const cn = h.caller_name ? ` — ${escapeHtml(h.caller_name)}` : '';
|
||||
const score = h.usefulness_score;
|
||||
const topic = h.topic_class;
|
||||
const banter = h.is_banter === 1;
|
||||
const badge = score != null
|
||||
? `<span class="badge s${score}" title="usefulness ${score}/5">${score}</span>`
|
||||
: '';
|
||||
const topicTag = topic
|
||||
? `<span class=topic>${escapeHtml(topic)}</span> `
|
||||
: '';
|
||||
const dim = (score != null && score <= 2) || banter ? ' dim' : '';
|
||||
html += `<div class="hit${dim}">
|
||||
<div class=meta>${badge}${topicTag}${h.year} · ${escapeHtml(h.title)}${ad}${cn} · @ ${fmtTime(h.question_start_sec)}</div>
|
||||
<div><b>Q:</b> ${h.q_snippet}</div>
|
||||
<div><b>A:</b> ${h.a_snippet}</div>
|
||||
</div>`;
|
||||
}
|
||||
for (const h of j.qa) html += qaHitHtml(h);
|
||||
html += '</div>';
|
||||
}
|
||||
if (j.segments.length) {
|
||||
@@ -367,7 +882,7 @@ async function runSearch() {
|
||||
for (const h of j.segments) {
|
||||
const ad = h.air_date ? ` (${h.air_date})` : '';
|
||||
html += `<div class=hit>
|
||||
<div class=meta>${h.year} · ${h.title}${ad} · @ ${fmtTime(h.start_sec)}</div>
|
||||
<div class=meta>${h.year} · ${escapeHtml(h.title)}${ad} · @ ${fmtTime(h.start_sec)}</div>
|
||||
<div>${h.snippet}</div>
|
||||
</div>`;
|
||||
}
|
||||
@@ -378,6 +893,185 @@ async function runSearch() {
|
||||
}
|
||||
results.innerHTML = html;
|
||||
}
|
||||
|
||||
async function runBrowse(reset) {
|
||||
if (reset) browseOffset = 0;
|
||||
const params = new URLSearchParams({
|
||||
limit: String(BROWSE_LIMIT),
|
||||
offset: String(browseOffset),
|
||||
order: browseOrder.value,
|
||||
});
|
||||
if (browseYear.value) params.set('year', browseYear.value);
|
||||
const ms = minScoreEl.value;
|
||||
if (ms !== '0') params.set('min_score', ms);
|
||||
if (excludeBanterEl.checked) params.set('exclude_banter', 'true');
|
||||
|
||||
const r = await fetch(`/api/qa?${params}`);
|
||||
const j = await r.json();
|
||||
browseTotal = j.total;
|
||||
|
||||
const newRows = (j.items || []).map(h => qaHitHtml(h)).join('');
|
||||
const header = `<div class=browse-bar>Showing ${Math.min(browseOffset + (j.items || []).length, browseTotal)} of ${browseTotal} Q&A pairs</div>`;
|
||||
const moreEnabled = browseOffset + (j.items || []).length < browseTotal;
|
||||
|
||||
if (reset) {
|
||||
results.innerHTML = `${header}<div class=group id=qa_browse><h3>Q&A Pairs</h3>${newRows}</div>` +
|
||||
(moreEnabled ? '<button class=more id=load_more>load more</button>' : '');
|
||||
} else {
|
||||
const list = document.getElementById('qa_browse');
|
||||
list.insertAdjacentHTML('beforeend', newRows);
|
||||
// refresh the count bar
|
||||
const bar = results.querySelector('.browse-bar');
|
||||
if (bar) bar.outerHTML = header;
|
||||
const btn = document.getElementById('load_more');
|
||||
if (btn && !moreEnabled) btn.remove();
|
||||
}
|
||||
browseOffset += (j.items || []).length;
|
||||
const btn = document.getElementById('load_more');
|
||||
if (btn) btn.onclick = () => runBrowse(false);
|
||||
if ((j.items || []).length === 0 && reset) {
|
||||
results.innerHTML = '<div class=empty>no Q&A pairs match these filters</div>';
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
# Single-episode transcript view.
|
||||
EPISODE_HTML = """<!doctype html>
|
||||
<html lang=en>
|
||||
<meta charset=utf-8>
|
||||
<title>{title} · Computer Guru Radio Archive</title>
|
||||
<style>
|
||||
body {{ font: 14px/1.5 ui-sans-serif, system-ui; max-width: 1080px; margin: 1.5em auto;
|
||||
padding: 0 1em; color: #222; }}
|
||||
h1 {{ margin: 0; font-size: 20px; }}
|
||||
.topbar {{ display: flex; gap: 1em; align-items: baseline; flex-wrap: wrap;
|
||||
border-bottom: 1px solid #eee; padding-bottom: .75em; margin-bottom: 1em; }}
|
||||
.topbar .meta {{ color: #666; font-size: 12px; }}
|
||||
.topbar a {{ color: #06c; text-decoration: none; }}
|
||||
.topbar a:hover {{ text-decoration: underline; }}
|
||||
audio {{ width: 100%; margin: .5em 0 1em; }}
|
||||
.layout {{ display: grid; grid-template-columns: 1fr 280px; gap: 2em; }}
|
||||
@media (max-width: 760px) {{ .layout {{ grid-template-columns: 1fr; }} }}
|
||||
.body p.seg {{ margin: .25em 0; }}
|
||||
.ts {{ display: inline-block; min-width: 3.5em; color: #06c; text-decoration: none;
|
||||
font-variant-numeric: tabular-nums; font-size: 12px; }}
|
||||
.ts:hover {{ text-decoration: underline; }}
|
||||
.ts.inline {{ min-width: 0; margin-right: .25em; }}
|
||||
.qa {{ background: #fbf9f1; border-left: 3px solid #d8c97a;
|
||||
padding: .5em .75em; margin: 1em 0; border-radius: 3px; }}
|
||||
.qa.dim {{ opacity: .55; }}
|
||||
.qa-head {{ font-size: 12px; color: #555; margin-bottom: .35em; }}
|
||||
.qa-q {{ margin: .35em 0; }}
|
||||
.qa-a {{ margin: .35em 0; }}
|
||||
.intro-marker {{ border-left: 3px solid #b9d2ec; background: #f6f9fc;
|
||||
padding: .35em .6em; margin: .75em 0; font-size: 13px; }}
|
||||
.badge {{ display: inline-block; min-width: 1.6em; padding: 0 .35em; margin-right: .35em;
|
||||
font-size: 11px; font-weight: 600; text-align: center; border-radius: 3px;
|
||||
color: #fff; background: #999; vertical-align: 1px; }}
|
||||
.badge.s5 {{ background: #2a8f43; }}
|
||||
.badge.s4 {{ background: #5aa54b; }}
|
||||
.badge.s3 {{ background: #999; }}
|
||||
.badge.s2 {{ background: #c08a3a; }}
|
||||
.badge.s1 {{ background: #b85a4a; }}
|
||||
.topic {{ font-size: 11px; color: #888; padding: 0 .35em; border-radius: 3px;
|
||||
background: #f0f0f0; }}
|
||||
.muted {{ color: #999; }}
|
||||
button.play {{ font-size: 11px; margin-left: .5em; padding: 1px 6px;
|
||||
border: 1px solid #ccc; border-radius: 3px; background: #fff;
|
||||
color: #06c; cursor: pointer; }}
|
||||
button.play:hover {{ background: #f6f8fa; }}
|
||||
aside h3 {{ font-size: 12px; text-transform: uppercase; letter-spacing: .04em;
|
||||
color: #666; margin: 1em 0 .35em; }}
|
||||
aside ul {{ list-style: none; margin: 0; padding: 0; font-size: 12px; }}
|
||||
aside li {{ padding: .15em 0; line-height: 1.35; }}
|
||||
aside a {{ color: #06c; text-decoration: none; }}
|
||||
aside a:hover {{ text-decoration: underline; }}
|
||||
.audio-missing {{ background: #fff7e6; border: 1px solid #f0d9a8; padding: .5em .75em;
|
||||
border-radius: 3px; font-size: 13px; color: #8a6f2a; }}
|
||||
</style>
|
||||
<div class=topbar>
|
||||
<div>
|
||||
<h1>{title}</h1>
|
||||
<div class=meta>{year} · {air} · {duration_min} min · {qa_count} Q&A · {intro_count} intros · <a href="/">« back to search</a></div>
|
||||
<div class=meta><code>{rel_path}</code></div>
|
||||
</div>
|
||||
</div>
|
||||
<audio id=player controls preload=none src="/api/audio/{episode_id}"></audio>
|
||||
<div id=audio_missing class=audio-missing style="display:none">
|
||||
Audio file is not available on this server.
|
||||
</div>
|
||||
<div class=layout>
|
||||
<div class=body>
|
||||
{body}
|
||||
</div>
|
||||
<aside>
|
||||
<h3>Q&A pairs ({qa_count})</h3>
|
||||
<ul>{qa_summary}</ul>
|
||||
<h3>Intros ({intro_count})</h3>
|
||||
<ul>{intros_summary}</ul>
|
||||
</aside>
|
||||
</div>
|
||||
<script>
|
||||
(function() {{
|
||||
const player = document.getElementById('player');
|
||||
const missing = document.getElementById('audio_missing');
|
||||
|
||||
// If the audio element fails to load, hide it and show a notice.
|
||||
player.addEventListener('error', () => {{
|
||||
player.style.display = 'none';
|
||||
missing.style.display = '';
|
||||
}});
|
||||
|
||||
function seek(t) {{
|
||||
const sec = parseFloat(t);
|
||||
if (isNaN(sec)) return;
|
||||
if (player.style.display === 'none') return;
|
||||
try {{
|
||||
player.currentTime = sec;
|
||||
player.play().catch(() => {{}});
|
||||
}} catch (e) {{}}
|
||||
}}
|
||||
|
||||
// Click handler for any element with a data-seek attribute.
|
||||
document.body.addEventListener('click', (ev) => {{
|
||||
const el = ev.target.closest('[data-seek]');
|
||||
if (!el) return;
|
||||
// Only intercept if it's a # anchor or button/click — let normal navigation work otherwise.
|
||||
const tag = el.tagName.toLowerCase();
|
||||
if (tag === 'a' && el.getAttribute('href') && !el.getAttribute('href').startsWith('#')) return;
|
||||
ev.preventDefault();
|
||||
const t = el.getAttribute('data-seek');
|
||||
seek(t);
|
||||
const href = el.getAttribute('href') || '';
|
||||
if (tag === 'a' && (href.startsWith('#qa-') || href.startsWith('#intro-'))) {{
|
||||
// also scroll the anchor into view
|
||||
const target = document.getElementById(href.slice(1));
|
||||
if (target) target.scrollIntoView({{ behavior: 'smooth', block: 'start' }});
|
||||
}}
|
||||
}});
|
||||
|
||||
// On page load, if the URL contains #qa-<id>, seek the audio to that Q&A's start.
|
||||
function handleHash() {{
|
||||
const h = window.location.hash;
|
||||
if (!h) return;
|
||||
const target = document.getElementById(h.slice(1));
|
||||
if (!target) return;
|
||||
target.scrollIntoView({{ block: 'start' }});
|
||||
// find nearest data-seek descendant for the start time
|
||||
const seekEl = target.querySelector('[data-seek]');
|
||||
if (seekEl) {{
|
||||
const t = seekEl.getAttribute('data-seek');
|
||||
// wait for metadata before seeking so currentTime sticks
|
||||
if (player.readyState >= 1) seek(t);
|
||||
else player.addEventListener('loadedmetadata', () => seek(t), {{ once: true }});
|
||||
}}
|
||||
}}
|
||||
handleHash();
|
||||
window.addEventListener('hashchange', handleHash);
|
||||
}})();
|
||||
</script>
|
||||
</html>
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user