radio: FastAPI/SQLite query server, deployed to Jupiter
Read-only HTTP layer over archive.db. Endpoints: /api/stats,
/api/episodes, /api/episodes/{id}, /api/episodes/{id}/transcript,
/api/search (FTS5 over segments + qa_pairs, bm25-ranked, snippets),
/api/callers. Single-file HTML index with debounced search UI.
Deployed: Jupiter (Unraid Docker), bound to 172.16.3.20:8765, LAN only.
Container path: /mnt/user/appdata/radio-archive/{app,data}.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
14
projects/radio-show/audio-processor/server/Dockerfile
Normal file
14
projects/radio-show/audio-processor/server/Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt /app/
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY main.py /app/
|
||||
|
||||
ENV ARCHIVE_DB=/data/archive.db
|
||||
ENV PORT=8765
|
||||
EXPOSE 8765
|
||||
|
||||
CMD ["python", "-u", "main.py"]
|
||||
13
projects/radio-show/audio-processor/server/compose.yml
Normal file
13
projects/radio-show/audio-processor/server/compose.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
services:
|
||||
radio-archive:
|
||||
image: radio-archive:latest
|
||||
container_name: radio-archive
|
||||
restart: unless-stopped
|
||||
build: .
|
||||
volumes:
|
||||
- /mnt/user/appdata/radio-archive/data:/data:ro
|
||||
ports:
|
||||
- "172.16.3.20:8765:8765"
|
||||
environment:
|
||||
ARCHIVE_DB: /data/archive.db
|
||||
PORT: "8765"
|
||||
308
projects/radio-show/audio-processor/server/main.py
Normal file
308
projects/radio-show/audio-processor/server/main.py
Normal file
@@ -0,0 +1,308 @@
|
||||
"""
|
||||
Radio archive query server. Read-only FastAPI over the SQLite archive.db.
|
||||
|
||||
Endpoints:
|
||||
GET / Landing page with search UI
|
||||
GET /api/episodes List all episodes (year, title, duration)
|
||||
GET /api/episodes/{id} Episode detail: intros + qa_pairs
|
||||
GET /api/episodes/{id}/transcript Chronologically merged segments + turns
|
||||
GET /api/search?q=...&kind=... FTS over segments and/or qa_pairs
|
||||
GET /api/callers Top recurring caller_names
|
||||
|
||||
Config via env:
|
||||
ARCHIVE_DB path to archive.db (default /data/archive.db)
|
||||
PORT listen port (default 8765)
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Query
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
DB_PATH = os.environ.get("ARCHIVE_DB", "/data/archive.db")
|
||||
PORT = int(os.environ.get("PORT", "8765"))
|
||||
|
||||
|
||||
def _connect() -> sqlite3.Connection:
|
||||
if not Path(DB_PATH).exists():
|
||||
raise RuntimeError(f"Archive DB not found at {DB_PATH}")
|
||||
conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True, check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
app.state.db = _connect()
|
||||
yield
|
||||
app.state.db.close()
|
||||
|
||||
|
||||
app = FastAPI(title="Computer Guru Radio Archive", lifespan=lifespan)
|
||||
|
||||
|
||||
def fts_escape(q: str) -> str:
|
||||
"""Wrap each term in double quotes so FTS5 treats reserved chars literally."""
|
||||
return " ".join(f'"{tok}"' for tok in q.split() if tok)
|
||||
|
||||
|
||||
@app.get("/api/episodes")
|
||||
def list_episodes(year: int | None = None, limit: int = 1000):
|
||||
db: sqlite3.Connection = app.state.db
|
||||
sql = """
|
||||
SELECT id, year, title, air_date, ROUND(duration_sec/60.0,1) AS minutes,
|
||||
(SELECT COUNT(*) FROM qa_pairs q WHERE q.episode_id = e.id) AS qa_count,
|
||||
(SELECT COUNT(*) FROM intros i WHERE i.episode_id = e.id) AS intro_count
|
||||
FROM episodes e
|
||||
"""
|
||||
params: list = []
|
||||
if year is not None:
|
||||
sql += " WHERE year = ?"
|
||||
params.append(year)
|
||||
sql += " ORDER BY COALESCE(air_date, '9999') ASC, title ASC LIMIT ?"
|
||||
params.append(limit)
|
||||
rows = db.execute(sql, params).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
@app.get("/api/episodes/{episode_id}")
|
||||
def episode_detail(episode_id: int):
|
||||
db: sqlite3.Connection = app.state.db
|
||||
ep = db.execute("SELECT * FROM episodes WHERE id = ?", (episode_id,)).fetchone()
|
||||
if not ep:
|
||||
raise HTTPException(404, "episode not found")
|
||||
intros = db.execute(
|
||||
"SELECT name, role_hint, intro_time_sec, affiliation, fillin_for, source_text "
|
||||
"FROM intros WHERE episode_id = ? ORDER BY intro_time_sec",
|
||||
(episode_id,),
|
||||
).fetchall()
|
||||
qa = db.execute(
|
||||
"SELECT id, question_start_sec, question_end_sec, "
|
||||
"answer_start_sec, answer_end_sec, "
|
||||
"question_text, answer_text, caller_name, caller_role, topic, topic_tags "
|
||||
"FROM qa_pairs WHERE episode_id = ? ORDER BY question_start_sec",
|
||||
(episode_id,),
|
||||
).fetchall()
|
||||
return {
|
||||
"episode": dict(ep),
|
||||
"intros": [dict(r) for r in intros],
|
||||
"qa_pairs": [
|
||||
{**dict(r), "topic_tags": json.loads(r["topic_tags"] or "[]")} for r in qa
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/episodes/{episode_id}/transcript")
|
||||
def episode_transcript(episode_id: int):
|
||||
db: sqlite3.Connection = app.state.db
|
||||
ep = db.execute("SELECT id, title, year FROM episodes WHERE id = ?", (episode_id,)).fetchone()
|
||||
if not ep:
|
||||
raise HTTPException(404, "episode not found")
|
||||
segments = db.execute(
|
||||
"SELECT seg_idx, start_sec, end_sec, text FROM segments "
|
||||
"WHERE episode_id = ? ORDER BY seg_idx",
|
||||
(episode_id,),
|
||||
).fetchall()
|
||||
turns = db.execute(
|
||||
"SELECT speaker, start_sec, end_sec, confidence FROM turns "
|
||||
"WHERE episode_id = ? ORDER BY start_sec",
|
||||
(episode_id,),
|
||||
).fetchall()
|
||||
return {
|
||||
"episode": dict(ep),
|
||||
"segments": [dict(r) for r in segments],
|
||||
"turns": [dict(r) for r in turns],
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/search")
|
||||
def search(
|
||||
q: str = Query(..., min_length=2),
|
||||
kind: str = Query("both", pattern="^(both|segments|qa)$"),
|
||||
limit: int = Query(50, ge=1, le=500),
|
||||
):
|
||||
db: sqlite3.Connection = app.state.db
|
||||
fts_q = fts_escape(q)
|
||||
if not fts_q:
|
||||
return {"q": q, "segments": [], "qa": []}
|
||||
|
||||
seg_results = []
|
||||
qa_results = []
|
||||
|
||||
if kind in ("both", "segments"):
|
||||
seg_results = [
|
||||
dict(r) for r in db.execute(
|
||||
"""
|
||||
SELECT e.id AS episode_id, e.year, e.title, e.air_date,
|
||||
s.start_sec, s.end_sec,
|
||||
snippet(segments_fts, 0, '<mark>', '</mark>', '...', 16) AS snippet,
|
||||
bm25(segments_fts) AS rank
|
||||
FROM segments_fts
|
||||
JOIN segments s ON s.id = segments_fts.rowid
|
||||
JOIN episodes e ON e.id = s.episode_id
|
||||
WHERE segments_fts MATCH ?
|
||||
ORDER BY rank LIMIT ?
|
||||
""",
|
||||
(fts_q, limit),
|
||||
).fetchall()
|
||||
]
|
||||
|
||||
if kind in ("both", "qa"):
|
||||
qa_results = [
|
||||
dict(r) for r in db.execute(
|
||||
"""
|
||||
SELECT e.id AS episode_id, e.year, e.title, e.air_date,
|
||||
p.id AS qa_id, p.caller_name,
|
||||
p.question_start_sec, p.answer_start_sec,
|
||||
snippet(qa_fts, 0, '<mark>', '</mark>', '...', 16) AS q_snippet,
|
||||
snippet(qa_fts, 1, '<mark>', '</mark>', '...', 16) AS a_snippet,
|
||||
bm25(qa_fts) AS rank
|
||||
FROM qa_fts
|
||||
JOIN qa_pairs p ON p.id = qa_fts.rowid
|
||||
JOIN episodes e ON e.id = p.episode_id
|
||||
WHERE qa_fts MATCH ?
|
||||
ORDER BY rank LIMIT ?
|
||||
""",
|
||||
(fts_q, limit),
|
||||
).fetchall()
|
||||
]
|
||||
|
||||
return {"q": q, "segments": seg_results, "qa": qa_results}
|
||||
|
||||
|
||||
@app.get("/api/callers")
|
||||
def top_callers(limit: int = 50):
|
||||
db: sqlite3.Connection = app.state.db
|
||||
rows = db.execute(
|
||||
"SELECT caller_name, COUNT(*) AS pairs FROM qa_pairs "
|
||||
"WHERE caller_name IS NOT NULL "
|
||||
"GROUP BY caller_name ORDER BY pairs DESC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
@app.get("/api/stats")
|
||||
def stats():
|
||||
db: sqlite3.Connection = app.state.db
|
||||
counts = {
|
||||
t: db.execute(f"SELECT COUNT(*) FROM {t}").fetchone()[0]
|
||||
for t in ("episodes", "segments", "turns", "intros", "qa_pairs")
|
||||
}
|
||||
by_year = [
|
||||
dict(r) for r in db.execute(
|
||||
"SELECT year, COUNT(*) AS episodes, "
|
||||
"ROUND(SUM(duration_sec)/3600.0, 1) AS hours "
|
||||
"FROM episodes GROUP BY year ORDER BY year"
|
||||
).fetchall()
|
||||
]
|
||||
return {"counts": counts, "by_year": by_year}
|
||||
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
def index():
|
||||
return INDEX_HTML
|
||||
|
||||
|
||||
INDEX_HTML = """<!doctype html>
|
||||
<html lang=en>
|
||||
<meta charset=utf-8>
|
||||
<title>Computer Guru Radio Archive</title>
|
||||
<style>
|
||||
body { font: 14px/1.45 ui-sans-serif, system-ui; max-width: 920px; margin: 2em auto; padding: 0 1em; color: #222; }
|
||||
h1 { margin: 0 0 .25em; }
|
||||
.sub { color:#666; margin-bottom: 1.5em; }
|
||||
input[type=search] { width: 100%; padding: .6em .8em; font-size: 16px; box-sizing: border-box; }
|
||||
.controls { display:flex; gap:.5em; align-items:center; margin: .5em 0 1em; flex-wrap: wrap; }
|
||||
.controls label { font-size: 13px; color:#555; }
|
||||
.group { border-bottom: 1px solid #eee; padding: 1em 0; }
|
||||
.group h3 { margin: 0 0 .25em; font-size: 13px; color:#666; text-transform: uppercase; letter-spacing: .04em; }
|
||||
.hit { padding: .5em 0; }
|
||||
.hit .meta { font-size: 12px; color: #888; }
|
||||
.hit a { color: #06c; text-decoration: none; }
|
||||
.hit a:hover { text-decoration: underline; }
|
||||
mark { background: #ffec99; padding: 0 .15em; }
|
||||
.stats { font-size: 12px; color:#666; margin-top: 2em; }
|
||||
.empty { color:#999; padding: 1em 0; }
|
||||
</style>
|
||||
<h1>Computer Guru Radio Archive</h1>
|
||||
<div class=sub id=sub>...</div>
|
||||
<input type=search id=q autofocus placeholder="search transcripts and Q&A — e.g. wireless, virus, BIOS">
|
||||
<div class=controls>
|
||||
<label><input type=radio name=kind value=both checked> both</label>
|
||||
<label><input type=radio name=kind value=qa> Q&A only</label>
|
||||
<label><input type=radio name=kind value=segments> transcript only</label>
|
||||
</div>
|
||||
<div id=results></div>
|
||||
<div class=stats id=stats></div>
|
||||
<script>
|
||||
const q = document.getElementById('q');
|
||||
const results = document.getElementById('results');
|
||||
const sub = document.getElementById('sub');
|
||||
const stats = document.getElementById('stats');
|
||||
|
||||
fetch('/api/stats').then(r => r.json()).then(s => {
|
||||
const c = s.counts;
|
||||
sub.textContent = `${c.episodes} episodes / ${c.qa_pairs} Q&A pairs / ${c.intros} intros / ${c.segments.toLocaleString()} segments`;
|
||||
});
|
||||
|
||||
let timer;
|
||||
q.addEventListener('input', () => {
|
||||
clearTimeout(timer);
|
||||
timer = setTimeout(runSearch, 250);
|
||||
});
|
||||
document.querySelectorAll('input[name=kind]').forEach(el => el.addEventListener('change', runSearch));
|
||||
|
||||
function fmtTime(s) {
|
||||
if (s == null) return '';
|
||||
const m = Math.floor(s/60), sec = Math.floor(s%60);
|
||||
return `${m}:${sec.toString().padStart(2,'0')}`;
|
||||
}
|
||||
|
||||
async function runSearch() {
|
||||
const term = q.value.trim();
|
||||
if (term.length < 2) { results.innerHTML = ''; return; }
|
||||
const kind = document.querySelector('input[name=kind]:checked').value;
|
||||
const r = await fetch(`/api/search?q=${encodeURIComponent(term)}&kind=${kind}&limit=40`);
|
||||
const j = await r.json();
|
||||
let html = '';
|
||||
if (j.qa.length) {
|
||||
html += '<div class=group><h3>Q&A Pairs</h3>';
|
||||
for (const h of j.qa) {
|
||||
const ad = h.air_date ? ` (${h.air_date})` : '';
|
||||
const cn = h.caller_name ? ` — ${h.caller_name}` : '';
|
||||
html += `<div class=hit>
|
||||
<div class=meta>${h.year} · ${h.title}${ad}${cn} · @ ${fmtTime(h.question_start_sec)}</div>
|
||||
<div><b>Q:</b> ${h.q_snippet}</div>
|
||||
<div><b>A:</b> ${h.a_snippet}</div>
|
||||
</div>`;
|
||||
}
|
||||
html += '</div>';
|
||||
}
|
||||
if (j.segments.length) {
|
||||
html += '<div class=group><h3>Transcript Segments</h3>';
|
||||
for (const h of j.segments) {
|
||||
const ad = h.air_date ? ` (${h.air_date})` : '';
|
||||
html += `<div class=hit>
|
||||
<div class=meta>${h.year} · ${h.title}${ad} · @ ${fmtTime(h.start_sec)}</div>
|
||||
<div>${h.snippet}</div>
|
||||
</div>`;
|
||||
}
|
||||
html += '</div>';
|
||||
}
|
||||
if (!j.qa.length && !j.segments.length) {
|
||||
html = '<div class=empty>no hits</div>';
|
||||
}
|
||||
results.innerHTML = html;
|
||||
}
|
||||
</script>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=PORT)
|
||||
@@ -0,0 +1,2 @@
|
||||
fastapi==0.115.6
|
||||
uvicorn[standard]==0.34.0
|
||||
Reference in New Issue
Block a user