diff --git a/projects/radio-show/audio-processor/import_to_sqlite.py b/projects/radio-show/audio-processor/import_to_sqlite.py index 18b0b6c..8ac478f 100644 --- a/projects/radio-show/audio-processor/import_to_sqlite.py +++ b/projects/radio-show/audio-processor/import_to_sqlite.py @@ -87,10 +87,15 @@ CREATE TABLE IF NOT EXISTS qa_pairs ( caller_name TEXT, caller_role TEXT, topic TEXT, - topic_tags TEXT + topic_tags TEXT, + usefulness_score INTEGER, + topic_class TEXT, + is_banter INTEGER ); CREATE INDEX IF NOT EXISTS idx_qa_episode ON qa_pairs(episode_id); CREATE INDEX IF NOT EXISTS idx_qa_caller ON qa_pairs(caller_name); +-- Indexes on quality columns (usefulness_score, topic_class) are created by +-- _migrate_qa_quality_columns() so they apply to both fresh and migrated DBs. CREATE VIRTUAL TABLE IF NOT EXISTS segments_fts USING fts5( text, @@ -133,9 +138,42 @@ def init_schema(conn: sqlite3.Connection): conn.executescript(SCHEMA) conn.executescript(TRIGGERS) conn.execute("PRAGMA foreign_keys = ON") + _migrate_qa_quality_columns(conn) conn.commit() +# Columns added by the Q&A quality classifier (Track 1). +# Defined here so the migration is idempotent and runs on every invocation: +# the SCHEMA above creates them on a fresh DB; this block ALTERs an existing +# qa_pairs that pre-dates them. Names + types must match SCHEMA exactly. +QA_QUALITY_COLUMNS = ( + ("usefulness_score", "INTEGER"), + ("topic_class", "TEXT"), + ("is_banter", "INTEGER"), +) + + +def _migrate_qa_quality_columns(conn: sqlite3.Connection) -> None: + """Add Q&A quality columns to qa_pairs if they're missing. + + Idempotent: existing column values are untouched; new columns default NULL. + Safe to call on fresh DBs (qa_pairs already has the columns from SCHEMA - + PRAGMA table_info reflects that and we no-op). + """ + existing = {row[1] for row in conn.execute("PRAGMA table_info(qa_pairs)").fetchall()} + if not existing: + # qa_pairs hasn't been created yet (shouldn't happen post-SCHEMA, but be safe) + return + for col, col_type in QA_QUALITY_COLUMNS: + if col not in existing: + conn.execute(f"ALTER TABLE qa_pairs ADD COLUMN {col} {col_type}") + # Indexes on the new columns (CREATE INDEX IF NOT EXISTS is already idempotent, + # but on a brand-new DB they were created by SCHEMA; on a migrated DB they + # weren't, so create them here too). + conn.execute("CREATE INDEX IF NOT EXISTS idx_qa_usefulness ON qa_pairs(usefulness_score)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_qa_topic_class ON qa_pairs(topic_class)") + + def sha256_file(path: Path) -> str: h = hashlib.sha256() with open(path, "rb") as f: