radio: browseable Q&A — /api/qa, /api/audio range streaming, /episode HTML view

Make the radio archive Q&A pairs actually browseable end to end: - /api/qa list endpoint (year, min_score, exclude_banter, topic_class, pagination, sort by air_date or score). Returns the same column shape as /api/search Q&A hits. - /api/audio/{episode_id} streams the MP3 with HTTP Range support so the browser <audio> can seek. 206 + Content-Range when ranged, 200 when full-file. Returns 404 cleanly when episodes/ tree is absent (Jupiter). - /episode/{id} HTML transcript view: chronological segments with clickable timestamps, Q&A blocks spliced inline (anchor #qa-<id>), intros marked inline, right-rail summary. Hash-anchor on load auto-seeks the audio. - New question_excerpt / answer_excerpt fields on /api/search Q&A hits and on /api/qa items: trim leading run-on chatter, take ~300 chars, end on a sentence boundary or word boundary with ellipsis. - Index UI: each Q&A hit now links to /episode/{id}#qa-{qa_id}; new "Browse all Q&A" toggle (year selector, sort, append-load 50 per page, defaults to min_score=3); FTS snippet replaced with the plain excerpt when available. No new dependencies, no schema changes, no LLM calls. Uses EPISODES_DIR env (default /data/episodes) — Jupiter compose still only mounts /data so audio degrades gracefully to 404 there until episodes are uploaded.
2026-04-30 07:17:07 -07:00
parent e6d7c293db
commit f20a9628c3
1 changed files with 724 additions and 30 deletions
--- a/projects/radio-show/audio-processor/server/main.py
+++ b/projects/radio-show/audio-processor/server/main.py
@@ -7,22 +7,30 @@ Endpoints:
  GET /api/episodes/{id}         Episode detail: intros + qa_pairs
  GET /api/episodes/{id}/transcript  Chronologically merged segments + turns
  GET /api/search?q=...&kind=... FTS over segments and/or qa_pairs
+  GET /api/qa                    List Q&A pairs (no search query, filterable)
+  GET /api/audio/{id}            Stream the episode MP3 (HTTP Range supported)
  GET /api/callers               Top recurring caller_names
+  GET /episode/{id}              HTML transcript view with audio player

 Config via env:
  ARCHIVE_DB    path to archive.db   (default /data/archive.db)
+  EPISODES_DIR  path to mp3 tree     (default /data/episodes)
  PORT          listen port          (default 8765)
 """
+import html as _html
 import json
 import os
+import re
 import sqlite3
 from contextlib import asynccontextmanager
 from pathlib import Path
+from typing import Iterator

-from fastapi import FastAPI, HTTPException, Query
-from fastapi.responses import FileResponse, HTMLResponse
+from fastapi import FastAPI, HTTPException, Query, Request
+from fastapi.responses import FileResponse, HTMLResponse, Response, StreamingResponse

 DB_PATH = os.environ.get("ARCHIVE_DB", "/data/archive.db")
+EPISODES_DIR = os.environ.get("EPISODES_DIR", "/data/episodes")
 PORT = int(os.environ.get("PORT", "8765"))


@@ -49,6 +57,72 @@ def fts_escape(q: str) -> str:
    return " ".join(f'"{tok}"' for tok in q.split() if tok)


+# Excerpt extraction for Q&A texts.
+#
+# Whisper transcripts often start with disfluent run-on chatter that's a
+# leftover from the previous turn. We trim that prefix, take the first 300
+# chars, and try to end on a sentence boundary so the excerpt reads cleanly.
+_EXCERPT_BODY = 300       # primary character budget
+_EXCERPT_LOOKAHEAD = 80   # extra chars allowed to find a sentence end
+_EXCERPT_LEAD_SCAN = 30   # window to look for a leading capital letter
+
+
+def _excerpt(text: str | None) -> str:
+    """Return a short, readable excerpt suitable for browsing.
+
+    Rules (intentionally simple — see spec):
+      1. Walk the leading prefix and skip to the first capital letter, but
+         only within the first ~30 chars; otherwise keep the original start.
+      2. Take the first 300 chars.
+      3. If that cut lands mid-sentence, look up to 80 more chars ahead for
+         the next .!? and end there.
+      4. Otherwise back up to the last word boundary and append "..." so we
+         never display half a word.
+    """
+    if not text:
+        return ""
+    s = text.strip()
+    if not s:
+        return ""
+
+    # 1. trim disfluent leading run-on to the first capital letter
+    lead_window = s[:_EXCERPT_LEAD_SCAN]
+    cap_match = re.search(r"[A-Z]", lead_window)
+    if cap_match and cap_match.start() > 0:
+        s = s[cap_match.start():]
+
+    if len(s) <= _EXCERPT_BODY:
+        return s
+
+    body = s[:_EXCERPT_BODY]
+    # 3. if the body ends mid-sentence, look ahead for a terminator
+    if body and body[-1] not in ".!?":
+        ahead = s[_EXCERPT_BODY:_EXCERPT_BODY + _EXCERPT_LOOKAHEAD]
+        m = re.search(r"[.!?]", ahead)
+        if m:
+            return body + ahead[: m.end()]
+        # 4. back up to last whitespace and ellipsize
+        cut = body.rfind(" ")
+        if cut > 0:
+            return body[:cut].rstrip(",;:- ") + "..."
+        return body + "..."
+
+    return body
+
+
+def _qa_search_excerpts(row: dict) -> dict:
+    """Augment a search/qa row with question/answer excerpts.
+
+    Excerpts are computed from the (un-highlighted) full text that lives
+    next to the snippet in the row. This keeps the existing q_snippet/
+    a_snippet (with <mark> highlighting) working for back-compat and adds
+    plain-text excerpts the UI can prefer.
+    """
+    row["question_excerpt"] = _excerpt(row.pop("_question_text", None))
+    row["answer_excerpt"] = _excerpt(row.pop("_answer_text", None))
+    return row
+
+
@app.get("/api/episodes")
 def list_episodes(year: int | None = None, limit: int = 1000):
    db: sqlite3.Connection = app.state.db
@@ -175,6 +249,8 @@ def search(
                   p.id AS qa_id, p.caller_name,
                   p.question_start_sec, p.answer_start_sec,
                   p.usefulness_score, p.topic_class, p.is_banter,
+                   p.question_text AS _question_text,
+                   p.answer_text   AS _answer_text,
                   snippet(qa_fts, 0, '<mark>', '</mark>', '...', 16) AS q_snippet,
                   snippet(qa_fts, 1, '<mark>', '</mark>', '...', 16) AS a_snippet,
                   bm25(qa_fts) AS rank
@@ -184,11 +260,200 @@ def search(
            WHERE {' AND '.join(qa_clauses)}
            ORDER BY rank LIMIT :limit
        """
-        qa_results = [dict(r) for r in db.execute(qa_sql, qa_params).fetchall()]
+        qa_results = [
+            _qa_search_excerpts(dict(r))
+            for r in db.execute(qa_sql, qa_params).fetchall()
+        ]

    return {"q": q, "segments": seg_results, "qa": qa_results}


+# Sort key whitelist so we can pass user input straight into ORDER BY.
+_QA_SORT_ORDERS: dict[str, str] = {
+    "air_date_desc": "COALESCE(e.air_date, '0000') DESC, p.question_start_sec ASC",
+    "air_date_asc":  "COALESCE(e.air_date, '9999') ASC,  p.question_start_sec ASC",
+    "score_desc":    "COALESCE(p.usefulness_score, 0) DESC, "
+                     "COALESCE(e.air_date, '0000') DESC, p.question_start_sec ASC",
+}
+
+
+@app.get("/api/qa")
+def list_qa(
+    year: int | None = None,
+    min_score: int = Query(0, ge=0, le=5),
+    exclude_banter: bool = Query(False),
+    topic_class: str | None = None,
+    limit: int = Query(50, ge=1, le=200),
+    offset: int = Query(0, ge=0),
+    order: str = Query("air_date_desc"),
+):
+    """Browseable Q&A list — same column shape as /api/search Q&A hits."""
+    db: sqlite3.Connection = app.state.db
+    if order not in _QA_SORT_ORDERS:
+        raise HTTPException(400, f"unknown order: {order}")
+    order_sql = _QA_SORT_ORDERS[order]
+
+    where = ["1=1"]
+    params: dict[str, object] = {}
+    if year is not None:
+        where.append("e.year = :year")
+        params["year"] = year
+    if min_score > 0:
+        where.append("(p.usefulness_score IS NULL OR p.usefulness_score >= :min_score)")
+        params["min_score"] = min_score
+    if exclude_banter:
+        where.append("(p.is_banter IS NULL OR p.is_banter = 0)")
+    if topic_class:
+        where.append("p.topic_class = :topic_class")
+        params["topic_class"] = topic_class
+
+    where_sql = " AND ".join(where)
+
+    total = db.execute(
+        f"""SELECT COUNT(*) FROM qa_pairs p
+            JOIN episodes e ON e.id = p.episode_id
+            WHERE {where_sql}""",
+        params,
+    ).fetchone()[0]
+
+    params_pl = dict(params, limit=limit, offset=offset)
+    rows = db.execute(
+        f"""SELECT e.id AS episode_id, e.year, e.title, e.air_date,
+                   p.id AS qa_id, p.caller_name,
+                   p.question_start_sec, p.answer_start_sec,
+                   p.usefulness_score, p.topic_class, p.is_banter,
+                   p.question_text AS _question_text,
+                   p.answer_text   AS _answer_text
+            FROM qa_pairs p
+            JOIN episodes e ON e.id = p.episode_id
+            WHERE {where_sql}
+            ORDER BY {order_sql}
+            LIMIT :limit OFFSET :offset""",
+        params_pl,
+    ).fetchall()
+
+    items = [_qa_search_excerpts(dict(r)) for r in rows]
+    return {"total": total, "items": items}
+
+
+# --- Audio streaming with HTTP Range support ----------------------------
+
+_AUDIO_CHUNK = 64 * 1024
+
+
+def _resolve_audio_path(rel_path: str) -> Path | None:
+    """Return the absolute Path to the MP3 if it exists, else None.
+
+    rel_path is the value stored in episodes.rel_path (e.g.
+    "2010/10 - October/10-02-10 HR 1.mp3"). We refuse anything that escapes
+    the episodes root via .. so a malicious DB row cannot read arbitrary
+    files.
+    """
+    if not rel_path:
+        return None
+    base = Path(EPISODES_DIR).resolve()
+    candidate = (base / rel_path).resolve()
+    try:
+        candidate.relative_to(base)
+    except ValueError:
+        return None
+    if not candidate.is_file():
+        return None
+    return candidate
+
+
+def _parse_range(header: str, file_size: int) -> tuple[int, int] | None:
+    """Parse a single-range "bytes=START-END" header. Returns None if invalid."""
+    if not header or not header.startswith("bytes="):
+        return None
+    spec = header[len("bytes="):].strip()
+    if "," in spec:
+        # Multi-range — fall back to no-range (full file) for simplicity
+        return None
+    if "-" not in spec:
+        return None
+    start_s, end_s = spec.split("-", 1)
+    try:
+        if start_s == "":
+            # suffix range: "-N" -> last N bytes
+            length = int(end_s)
+            if length <= 0:
+                return None
+            start = max(0, file_size - length)
+            end = file_size - 1
+        else:
+            start = int(start_s)
+            end = int(end_s) if end_s else file_size - 1
+    except ValueError:
+        return None
+    if start < 0 or end < start or start >= file_size:
+        return None
+    end = min(end, file_size - 1)
+    return start, end
+
+
+def _file_iter(path: Path, start: int, length: int,
+               chunk: int = _AUDIO_CHUNK) -> Iterator[bytes]:
+    remaining = length
+    with open(path, "rb") as f:
+        f.seek(start)
+        while remaining > 0:
+            data = f.read(min(chunk, remaining))
+            if not data:
+                break
+            remaining -= len(data)
+            yield data
+
+
+@app.get("/api/audio/{episode_id}")
+def stream_audio(episode_id: int, request: Request):
+    """Stream the episode's MP3 with HTTP Range support.
+
+    Returns 404 if the episode doesn't exist or the file isn't on disk
+    (Jupiter currently has no episodes/ tree — that's a clean 404). The
+    audio element on the transcript page checks the response and hides
+    itself on 404.
+    """
+    db: sqlite3.Connection = app.state.db
+    ep = db.execute("SELECT rel_path FROM episodes WHERE id = ?", (episode_id,)).fetchone()
+    if not ep:
+        raise HTTPException(404, "episode not found")
+    path = _resolve_audio_path(ep["rel_path"])
+    if path is None:
+        raise HTTPException(404, "audio file missing")
+
+    file_size = path.stat().st_size
+    range_header = request.headers.get("range") or request.headers.get("Range")
+    rng = _parse_range(range_header, file_size) if range_header else None
+
+    headers = {
+        "Accept-Ranges": "bytes",
+        "Cache-Control": "public, max-age=86400",
+        "Content-Type": "audio/mpeg",
+    }
+
+    if rng is None:
+        # Full content
+        headers["Content-Length"] = str(file_size)
+        return StreamingResponse(
+            _file_iter(path, 0, file_size),
+            status_code=200,
+            headers=headers,
+            media_type="audio/mpeg",
+        )
+
+    start, end = rng
+    length = end - start + 1
+    headers["Content-Length"] = str(length)
+    headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
+    return StreamingResponse(
+        _file_iter(path, start, length),
+        status_code=206,
+        headers=headers,
+        media_type="audio/mpeg",
+    )
+
+
@app.get("/api/callers")
 def top_callers(limit: int = 50):
    db: sqlite3.Connection = app.state.db
@@ -240,6 +505,186 @@ def index():
    return INDEX_HTML


+# --- Single-episode HTML transcript view --------------------------------
+
+
+def _fmt_time(sec: float | None) -> str:
+    if sec is None:
+        return ""
+    s = int(sec)
+    return f"{s // 60}:{s % 60:02d}"
+
+
+def _episode_html(episode_id: int) -> str:
+    db: sqlite3.Connection = app.state.db
+    ep = db.execute("SELECT * FROM episodes WHERE id = ?", (episode_id,)).fetchone()
+    if not ep:
+        raise HTTPException(404, "episode not found")
+    intros = db.execute(
+        "SELECT id, name, role_hint, intro_time_sec FROM intros "
+        "WHERE episode_id = ? ORDER BY intro_time_sec",
+        (episode_id,),
+    ).fetchall()
+    qa = db.execute(
+        "SELECT id, question_start_sec, question_end_sec, answer_start_sec, "
+        "       answer_end_sec, question_text, answer_text, caller_name, "
+        "       caller_role, usefulness_score, topic_class, is_banter "
+        "FROM qa_pairs WHERE episode_id = ? ORDER BY question_start_sec",
+        (episode_id,),
+    ).fetchall()
+    segments = db.execute(
+        "SELECT seg_idx, start_sec, end_sec, text FROM segments "
+        "WHERE episode_id = ? ORDER BY seg_idx",
+        (episode_id,),
+    ).fetchall()
+
+    esc = _html.escape
+    title = esc(ep["title"] or f"Episode {episode_id}")
+    air = esc(ep["air_date"] or "")
+    year = ep["year"]
+    duration_min = round((ep["duration_sec"] or 0) / 60.0, 1)
+    rel_path = esc(ep["rel_path"] or "")
+
+    # Build qa lookup keyed by question_start so we can splice them into
+    # the segment stream chronologically.
+    qa_rows = [dict(r) for r in qa]
+    qa_starts = sorted(
+        ((r["question_start_sec"] or 0.0), r) for r in qa_rows
+    )
+
+    # Right rail summary lists
+    intro_items = []
+    for r in intros:
+        t = _fmt_time(r["intro_time_sec"])
+        name = esc(r["name"] or "?")
+        role = esc(r["role_hint"] or "")
+        role_html = f' <span class="muted">({role})</span>' if role else ""
+        intro_items.append(
+            f'<li><a href="#intro-{r["id"]}" data-seek="{r["intro_time_sec"] or 0}">'
+            f'{t}</a> &middot; {name}{role_html}</li>'
+        )
+    intros_html = "\n".join(intro_items) or '<li class="muted">none</li>'
+
+    qa_items = []
+    for r in qa_rows:
+        t = _fmt_time(r["question_start_sec"])
+        score = r["usefulness_score"]
+        badge = (
+            f'<span class="badge s{score}" title="usefulness {score}/5">{score}</span>'
+            if score is not None else ""
+        )
+        topic = esc(r["topic_class"] or "")
+        topic_html = f'<span class="topic">{topic}</span> ' if topic else ""
+        caller = esc(r["caller_name"] or "")
+        caller_html = f' &middot; {caller}' if caller else ""
+        first_q = _excerpt(r["question_text"] or "")[:80]
+        teaser = esc(first_q)
+        qa_items.append(
+            f'<li><a href="#qa-{r["id"]}" data-seek="{r["question_start_sec"] or 0}">'
+            f'{t}</a> {badge}{topic_html}<span class="muted">{teaser}</span>{caller_html}</li>'
+        )
+    qa_summary_html = "\n".join(qa_items) or '<li class="muted">none</li>'
+
+    # Build the chronological transcript body. We walk segments and, before
+    # any segment whose start_sec >= a Q&A's question_start, we emit the
+    # Q&A block. (Q&A blocks contain the full question/answer text already,
+    # so segment text becomes context around them.)
+    body_parts: list[str] = []
+    qa_iter = iter(qa_starts)
+    next_qa: tuple[float, dict] | None = next(qa_iter, None)
+
+    # Intros also get inline anchors so the right-rail jump links work
+    intro_by_time = sorted(
+        ((r["intro_time_sec"] or 0.0), r) for r in intros
+    )
+    intro_iter = iter(intro_by_time)
+    next_intro = next(intro_iter, None)
+
+    def _flush_inline_at(t_seg: float) -> None:
+        nonlocal next_intro, next_qa
+        while next_intro and next_intro[0] <= t_seg:
+            ir = next_intro[1]
+            tlbl = _fmt_time(ir["intro_time_sec"])
+            name = esc(ir["name"] or "?")
+            role = esc(ir["role_hint"] or "")
+            role_html = f' <span class="muted">({role})</span>' if role else ""
+            body_parts.append(
+                f'<div class="intro-marker" id="intro-{ir["id"]}">'
+                f'<a class="ts" href="#" data-seek="{ir["intro_time_sec"] or 0}">'
+                f'{tlbl}</a> intro: <b>{name}</b>{role_html}'
+                f'</div>'
+            )
+            next_intro = next(intro_iter, None)
+        while next_qa and next_qa[0] <= t_seg:
+            qr = next_qa[1]
+            qstart = qr["question_start_sec"] or 0.0
+            astart = qr["answer_start_sec"] or qstart
+            score = qr["usefulness_score"]
+            badge = (
+                f'<span class="badge s{score}" title="usefulness {score}/5">{score}</span>'
+                if score is not None else ""
+            )
+            topic = esc(qr["topic_class"] or "")
+            topic_html = f'<span class="topic">{topic}</span> ' if topic else ""
+            caller = esc(qr["caller_name"] or "")
+            caller_html = f' &middot; <i>{caller}</i>' if caller else ""
+            qbody = esc(qr["question_text"] or "")
+            abody = esc(qr["answer_text"] or "")
+            dim = " dim" if (score is not None and score <= 2) or qr["is_banter"] == 1 else ""
+            body_parts.append(
+                f'<div class="qa{dim}" id="qa-{qr["id"]}">'
+                f'<div class="qa-head">{badge}{topic_html}'
+                f'<a class="ts" href="#" data-seek="{qstart}">{_fmt_time(qstart)}</a>'
+                f' Q&amp;A{caller_html}'
+                f'<button class="play" data-seek="{qstart}">play from here</button>'
+                f'</div>'
+                f'<div class="qa-q"><b>Q:</b> {qbody}</div>'
+                f'<div class="qa-a">'
+                f'<a class="ts inline" href="#" data-seek="{astart}">{_fmt_time(astart)}</a>'
+                f' <b>A:</b> {abody}</div>'
+                f'</div>'
+            )
+            next_qa = next(qa_iter, None)
+
+    for s in segments:
+        t_seg = s["start_sec"] or 0.0
+        _flush_inline_at(t_seg)
+        seg_text = esc(s["text"] or "").strip()
+        if not seg_text:
+            continue
+        body_parts.append(
+            f'<p class="seg">'
+            f'<a class="ts" href="#" data-seek="{t_seg}">{_fmt_time(t_seg)}</a> '
+            f'{seg_text}</p>'
+        )
+    # Flush any tail intros / Q&As after final segment
+    _flush_inline_at(float("inf"))
+
+    body_html = "\n".join(body_parts) or '<p class="muted">no transcript</p>'
+
+    qa_count = len(qa_rows)
+    intro_count = len(intros)
+
+    return EPISODE_HTML.format(
+        title=title,
+        episode_id=episode_id,
+        year=year,
+        air=air,
+        duration_min=duration_min,
+        rel_path=rel_path,
+        qa_count=qa_count,
+        intro_count=intro_count,
+        intros_summary=intros_html,
+        qa_summary=qa_summary_html,
+        body=body_html,
+    )
+
+
+@app.get("/episode/{episode_id}", response_class=HTMLResponse)
+def episode_page(episode_id: int):
+    return _episode_html(episode_id)
+
+
 INDEX_HTML = """<!doctype html>
 <html lang=en>
 <meta charset=utf-8>
@@ -249,6 +694,7 @@ INDEX_HTML = """<!doctype html>
  h1 { margin: 0 0 .25em; }
  .sub { color:#666; margin-bottom: 1.5em; }
  input[type=search] { width: 100%; padding: .6em .8em; font-size: 16px; box-sizing: border-box; }
+  input[type=search]:disabled { background:#f4f4f4; color:#999; }
  .controls { display:flex; gap:.5em; align-items:center; margin: .5em 0 1em; flex-wrap: wrap; }
  .controls label { font-size: 13px; color:#555; }
  .group { border-bottom: 1px solid #eee; padding: 1em 0; }
@@ -257,6 +703,9 @@ INDEX_HTML = """<!doctype html>
  .hit .meta { font-size: 12px; color: #888; }
  .hit a { color: #06c; text-decoration: none; }
  .hit a:hover { text-decoration: underline; }
+  a.hit-link { display:block; color:inherit; text-decoration:none; padding: .5em .35em; margin: 0 -.35em; border-radius: 4px; }
+  a.hit-link:hover { background: #f6f8fa; }
+  a.hit-link .meta a { color:#06c; }
  mark { background: #ffec99; padding: 0 .15em; }
  .stats { font-size: 12px; color:#666; margin-top: 2em; }
  .empty { color:#999; padding: 1em 0; }
@@ -271,7 +720,13 @@ INDEX_HTML = """<!doctype html>
  .badge.s1 { background: #b85a4a; }
  .topic { font-size: 11px; color: #888; padding: 0 .35em; border-radius: 3px;
           background: #f0f0f0; }
-  .hit.dim { opacity: .55; }
+  .hit.dim, a.hit-link.dim { opacity: .55; }
+  button.more { margin: 1em 0; padding: .5em 1em; font-size: 13px; cursor: pointer;
+                background: #f4f4f4; border: 1px solid #ccc; border-radius: 3px; }
+  button.more:hover { background: #ececec; }
+  button.more:disabled { color: #999; cursor: default; }
+  .browse-bar { background:#f8f8f4; border:1px solid #ece8d8; padding:.4em .6em;
+                border-radius: 4px; margin: 0 0 1em; font-size: 13px; color:#665; }
 </style>
 <h1>Computer Guru Radio Archive</h1>
 <div class=sub id=sub>...</div>
@@ -292,6 +747,21 @@ INDEX_HTML = """<!doctype html>
    </label>
    <label><input type=checkbox id=exclude_banter> hide banter</label>
  </span>
+  <span style="border-left:1px solid #ddd; padding-left:.6em">
+    <label><input type=checkbox id=browse_all> Browse all Q&amp;A</label>
+  </span>
+</div>
+<div class=controls id=browse_controls style="display:none">
+  <label>year
+    <select id=browse_year><option value="">any</option></select>
+  </label>
+  <label>sort
+    <select id=browse_order>
+      <option value=air_date_desc>air date (newest first)</option>
+      <option value=air_date_asc>air date (oldest first)</option>
+      <option value=score_desc>usefulness score</option>
+    </select>
+  </label>
 </div>
 <div id=results></div>
 <div class=stats id=stats></div>
@@ -299,21 +769,56 @@ INDEX_HTML = """<!doctype html>
 const q = document.getElementById('q');
 const results = document.getElementById('results');
 const sub = document.getElementById('sub');
-const stats = document.getElementById('stats');
+const browseToggle = document.getElementById('browse_all');
+const browseControls = document.getElementById('browse_controls');
+const browseYear = document.getElementById('browse_year');
+const browseOrder = document.getElementById('browse_order');
+const minScoreEl = document.getElementById('min_score');
+const excludeBanterEl = document.getElementById('exclude_banter');
+
+let browseOffset = 0;
+const BROWSE_LIMIT = 50;
+let browseTotal = 0;

 fetch('/api/stats').then(r => r.json()).then(s => {
  const c = s.counts;
  sub.textContent = `${c.episodes} episodes  /  ${c.qa_pairs} Q&A pairs  /  ${c.intros} intros  /  ${c.segments.toLocaleString()} segments`;
+  const yrs = (s.by_year || []).map(x => x.year).sort((a,b)=>b-a);
+  for (const y of yrs) {
+    const o = document.createElement('option');
+    o.value = y; o.textContent = y;
+    browseYear.appendChild(o);
+  }
 });

 let timer;
 q.addEventListener('input', () => {
+  if (browseToggle.checked) return;
  clearTimeout(timer);
  timer = setTimeout(runSearch, 250);
 });
-document.querySelectorAll('input[name=kind]').forEach(el => el.addEventListener('change', runSearch));
-document.getElementById('min_score').addEventListener('change', runSearch);
-document.getElementById('exclude_banter').addEventListener('change', runSearch);
+document.querySelectorAll('input[name=kind]').forEach(el => el.addEventListener('change', () => { if (!browseToggle.checked) runSearch(); }));
+minScoreEl.addEventListener('change', refresh);
+excludeBanterEl.addEventListener('change', refresh);
+browseToggle.addEventListener('change', () => {
+  const on = browseToggle.checked;
+  browseControls.style.display = on ? 'flex' : 'none';
+  q.disabled = on;
+  if (on) {
+    if (minScoreEl.value === '0') minScoreEl.value = '3';
+    runBrowse(true);
+  } else {
+    results.innerHTML = '';
+    if (q.value.trim().length >= 2) runSearch();
+  }
+});
+browseYear.addEventListener('change', () => runBrowse(true));
+browseOrder.addEventListener('change', () => runBrowse(true));
+
+function refresh() {
+  if (browseToggle.checked) runBrowse(true);
+  else if (q.value.trim().length >= 2) runSearch();
+}

 function fmtTime(s) {
  if (s == null) return '';
@@ -327,12 +832,40 @@ function escapeHtml(s) {
  }[c]));
 }

+function qaHitHtml(h, opts) {
+  const ad = h.air_date ? ` (${h.air_date})` : '';
+  const cn = h.caller_name ? ` &mdash; ${escapeHtml(h.caller_name)}` : '';
+  const score = h.usefulness_score;
+  const topic = h.topic_class;
+  const banter = h.is_banter === 1;
+  const badge = score != null
+    ? `<span class="badge s${score}" title="usefulness ${score}/5">${score}</span>`
+    : '';
+  const topicTag = topic
+    ? `<span class=topic>${escapeHtml(topic)}</span> `
+    : '';
+  const dim = (score != null && score <= 2) || banter ? ' dim' : '';
+  // Prefer plain-text excerpt, fall back to FTS snippet (which may include <mark>).
+  const qBody = h.question_excerpt
+    ? escapeHtml(h.question_excerpt)
+    : (h.q_snippet || '');
+  const aBody = h.answer_excerpt
+    ? escapeHtml(h.answer_excerpt)
+    : (h.a_snippet || '');
+  const href = `/episode/${h.episode_id}#qa-${h.qa_id}`;
+  return `<a class="hit-link${dim}" href="${href}">
+    <div class=meta>${badge}${topicTag}${h.year} &middot; ${escapeHtml(h.title)}${ad}${cn} &middot; @ ${fmtTime(h.question_start_sec)}</div>
+    <div><b>Q:</b> ${qBody}</div>
+    <div><b>A:</b> ${aBody}</div>
+  </a>`;
+}
+
 async function runSearch() {
  const term = q.value.trim();
  if (term.length < 2) { results.innerHTML = ''; return; }
  const kind = document.querySelector('input[name=kind]:checked').value;
-  const minScore = document.getElementById('min_score').value;
-  const excludeBanter = document.getElementById('exclude_banter').checked;
+  const minScore = minScoreEl.value;
+  const excludeBanter = excludeBanterEl.checked;
  const params = new URLSearchParams({ q: term, kind, limit: '40' });
  if (minScore !== '0') params.set('min_score', minScore);
  if (excludeBanter) params.set('exclude_banter', 'true');
@@ -341,25 +874,7 @@ async function runSearch() {
  let html = '';
  if (j.qa.length) {
    html += '<div class=group><h3>Q&amp;A Pairs</h3>';
-    for (const h of j.qa) {
-      const ad = h.air_date ? ` (${h.air_date})` : '';
-      const cn = h.caller_name ? ` — ${escapeHtml(h.caller_name)}` : '';
-      const score = h.usefulness_score;
-      const topic = h.topic_class;
-      const banter = h.is_banter === 1;
-      const badge = score != null
-        ? `<span class="badge s${score}" title="usefulness ${score}/5">${score}</span>`
-        : '';
-      const topicTag = topic
-        ? `<span class=topic>${escapeHtml(topic)}</span> `
-        : '';
-      const dim = (score != null && score <= 2) || banter ? ' dim' : '';
-      html += `<div class="hit${dim}">
-        <div class=meta>${badge}${topicTag}${h.year} · ${escapeHtml(h.title)}${ad}${cn} · @ ${fmtTime(h.question_start_sec)}</div>
-        <div><b>Q:</b> ${h.q_snippet}</div>
-        <div><b>A:</b> ${h.a_snippet}</div>
-      </div>`;
-    }
+    for (const h of j.qa) html += qaHitHtml(h);
    html += '</div>';
  }
  if (j.segments.length) {
@@ -367,7 +882,7 @@ async function runSearch() {
    for (const h of j.segments) {
      const ad = h.air_date ? ` (${h.air_date})` : '';
      html += `<div class=hit>
-        <div class=meta>${h.year} · ${h.title}${ad} · @ ${fmtTime(h.start_sec)}</div>
+        <div class=meta>${h.year} &middot; ${escapeHtml(h.title)}${ad} &middot; @ ${fmtTime(h.start_sec)}</div>
        <div>${h.snippet}</div>
      </div>`;
    }
@@ -378,6 +893,185 @@ async function runSearch() {
  }
  results.innerHTML = html;
 }
+
+async function runBrowse(reset) {
+  if (reset) browseOffset = 0;
+  const params = new URLSearchParams({
+    limit: String(BROWSE_LIMIT),
+    offset: String(browseOffset),
+    order: browseOrder.value,
+  });
+  if (browseYear.value) params.set('year', browseYear.value);
+  const ms = minScoreEl.value;
+  if (ms !== '0') params.set('min_score', ms);
+  if (excludeBanterEl.checked) params.set('exclude_banter', 'true');
+
+  const r = await fetch(`/api/qa?${params}`);
+  const j = await r.json();
+  browseTotal = j.total;
+
+  const newRows = (j.items || []).map(h => qaHitHtml(h)).join('');
+  const header = `<div class=browse-bar>Showing ${Math.min(browseOffset + (j.items || []).length, browseTotal)} of ${browseTotal} Q&amp;A pairs</div>`;
+  const moreEnabled = browseOffset + (j.items || []).length < browseTotal;
+
+  if (reset) {
+    results.innerHTML = `${header}<div class=group id=qa_browse><h3>Q&amp;A Pairs</h3>${newRows}</div>` +
+      (moreEnabled ? '<button class=more id=load_more>load more</button>' : '');
+  } else {
+    const list = document.getElementById('qa_browse');
+    list.insertAdjacentHTML('beforeend', newRows);
+    // refresh the count bar
+    const bar = results.querySelector('.browse-bar');
+    if (bar) bar.outerHTML = header;
+    const btn = document.getElementById('load_more');
+    if (btn && !moreEnabled) btn.remove();
+  }
+  browseOffset += (j.items || []).length;
+  const btn = document.getElementById('load_more');
+  if (btn) btn.onclick = () => runBrowse(false);
+  if ((j.items || []).length === 0 && reset) {
+    results.innerHTML = '<div class=empty>no Q&amp;A pairs match these filters</div>';
+  }
+}
+</script>
+</html>
+"""
+
+
+# Single-episode transcript view.
+EPISODE_HTML = """<!doctype html>
+<html lang=en>
+<meta charset=utf-8>
+<title>{title} &middot; Computer Guru Radio Archive</title>
+<style>
+  body {{ font: 14px/1.5 ui-sans-serif, system-ui; max-width: 1080px; margin: 1.5em auto;
+         padding: 0 1em; color: #222; }}
+  h1 {{ margin: 0; font-size: 20px; }}
+  .topbar {{ display: flex; gap: 1em; align-items: baseline; flex-wrap: wrap;
+            border-bottom: 1px solid #eee; padding-bottom: .75em; margin-bottom: 1em; }}
+  .topbar .meta {{ color: #666; font-size: 12px; }}
+  .topbar a {{ color: #06c; text-decoration: none; }}
+  .topbar a:hover {{ text-decoration: underline; }}
+  audio {{ width: 100%; margin: .5em 0 1em; }}
+  .layout {{ display: grid; grid-template-columns: 1fr 280px; gap: 2em; }}
+  @media (max-width: 760px) {{ .layout {{ grid-template-columns: 1fr; }} }}
+  .body p.seg {{ margin: .25em 0; }}
+  .ts {{ display: inline-block; min-width: 3.5em; color: #06c; text-decoration: none;
+         font-variant-numeric: tabular-nums; font-size: 12px; }}
+  .ts:hover {{ text-decoration: underline; }}
+  .ts.inline {{ min-width: 0; margin-right: .25em; }}
+  .qa {{ background: #fbf9f1; border-left: 3px solid #d8c97a;
+         padding: .5em .75em; margin: 1em 0; border-radius: 3px; }}
+  .qa.dim {{ opacity: .55; }}
+  .qa-head {{ font-size: 12px; color: #555; margin-bottom: .35em; }}
+  .qa-q {{ margin: .35em 0; }}
+  .qa-a {{ margin: .35em 0; }}
+  .intro-marker {{ border-left: 3px solid #b9d2ec; background: #f6f9fc;
+                   padding: .35em .6em; margin: .75em 0; font-size: 13px; }}
+  .badge {{ display: inline-block; min-width: 1.6em; padding: 0 .35em; margin-right: .35em;
+            font-size: 11px; font-weight: 600; text-align: center; border-radius: 3px;
+            color: #fff; background: #999; vertical-align: 1px; }}
+  .badge.s5 {{ background: #2a8f43; }}
+  .badge.s4 {{ background: #5aa54b; }}
+  .badge.s3 {{ background: #999; }}
+  .badge.s2 {{ background: #c08a3a; }}
+  .badge.s1 {{ background: #b85a4a; }}
+  .topic {{ font-size: 11px; color: #888; padding: 0 .35em; border-radius: 3px;
+            background: #f0f0f0; }}
+  .muted {{ color: #999; }}
+  button.play {{ font-size: 11px; margin-left: .5em; padding: 1px 6px;
+                 border: 1px solid #ccc; border-radius: 3px; background: #fff;
+                 color: #06c; cursor: pointer; }}
+  button.play:hover {{ background: #f6f8fa; }}
+  aside h3 {{ font-size: 12px; text-transform: uppercase; letter-spacing: .04em;
+              color: #666; margin: 1em 0 .35em; }}
+  aside ul {{ list-style: none; margin: 0; padding: 0; font-size: 12px; }}
+  aside li {{ padding: .15em 0; line-height: 1.35; }}
+  aside a {{ color: #06c; text-decoration: none; }}
+  aside a:hover {{ text-decoration: underline; }}
+  .audio-missing {{ background: #fff7e6; border: 1px solid #f0d9a8; padding: .5em .75em;
+                    border-radius: 3px; font-size: 13px; color: #8a6f2a; }}
+</style>
+<div class=topbar>
+  <div>
+    <h1>{title}</h1>
+    <div class=meta>{year} &middot; {air} &middot; {duration_min} min &middot; {qa_count} Q&amp;A &middot; {intro_count} intros &middot; <a href="/">&laquo; back to search</a></div>
+    <div class=meta><code>{rel_path}</code></div>
+  </div>
+</div>
+<audio id=player controls preload=none src="/api/audio/{episode_id}"></audio>
+<div id=audio_missing class=audio-missing style="display:none">
+  Audio file is not available on this server.
+</div>
+<div class=layout>
+  <div class=body>
+    {body}
+  </div>
+  <aside>
+    <h3>Q&amp;A pairs ({qa_count})</h3>
+    <ul>{qa_summary}</ul>
+    <h3>Intros ({intro_count})</h3>
+    <ul>{intros_summary}</ul>
+  </aside>
+</div>
+<script>
+(function() {{
+  const player = document.getElementById('player');
+  const missing = document.getElementById('audio_missing');
+
+  // If the audio element fails to load, hide it and show a notice.
+  player.addEventListener('error', () => {{
+    player.style.display = 'none';
+    missing.style.display = '';
+  }});
+
+  function seek(t) {{
+    const sec = parseFloat(t);
+    if (isNaN(sec)) return;
+    if (player.style.display === 'none') return;
+    try {{
+      player.currentTime = sec;
+      player.play().catch(() => {{}});
+    }} catch (e) {{}}
+  }}
+
+  // Click handler for any element with a data-seek attribute.
+  document.body.addEventListener('click', (ev) => {{
+    const el = ev.target.closest('[data-seek]');
+    if (!el) return;
+    // Only intercept if it's a # anchor or button/click — let normal navigation work otherwise.
+    const tag = el.tagName.toLowerCase();
+    if (tag === 'a' && el.getAttribute('href') && !el.getAttribute('href').startsWith('#')) return;
+    ev.preventDefault();
+    const t = el.getAttribute('data-seek');
+    seek(t);
+    const href = el.getAttribute('href') || '';
+    if (tag === 'a' && (href.startsWith('#qa-') || href.startsWith('#intro-'))) {{
+      // also scroll the anchor into view
+      const target = document.getElementById(href.slice(1));
+      if (target) target.scrollIntoView({{ behavior: 'smooth', block: 'start' }});
+    }}
+  }});
+
+  // On page load, if the URL contains #qa-<id>, seek the audio to that Q&A's start.
+  function handleHash() {{
+    const h = window.location.hash;
+    if (!h) return;
+    const target = document.getElementById(h.slice(1));
+    if (!target) return;
+    target.scrollIntoView({{ block: 'start' }});
+    // find nearest data-seek descendant for the start time
+    const seekEl = target.querySelector('[data-seek]');
+    if (seekEl) {{
+      const t = seekEl.getAttribute('data-seek');
+      // wait for metadata before seeking so currentTime sticks
+      if (player.readyState >= 1) seek(t);
+      else player.addEventListener('loadedmetadata', () => seek(t), {{ once: true }});
+    }}
+  }}
+  handleHash();
+  window.addEventListener('hashchange', handleHash);
+}})();
 </script>
 </html>
 """