claudetools/projects/radio-show/audio-processor/src/show_prep.py

"""
Show prep generator: search the archive index for past caller topics,
extract clips, and generate "then vs now" talking points via Ollama.
"""

import json
from pathlib import Path
from datetime import datetime

from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from rich import box

from .indexer import ArchiveIndex, QAResult, SearchResult
from .clip_extractor import extract_clips_for_results, format_timestamp

console = Console()


def generate_show_prep(
    index: ArchiveIndex,
    topic: str,
    output_dir: Path,
    extract_clips: bool = True,
    ollama_host: str = "http://localhost:11434",
    ollama_model: str = "qwen3:14b",
    limit: int = 10,
) -> Path:
    """
    Search the archive for past discussions of a topic.
    Extracts audio clips and generates "then vs now" talking points.
    Returns path to the generated markdown prep file.
    """
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    console.print(Panel.fit(f"[bold]Show Prep:[/bold] {topic}", border_style="blue"))

    # Search Q&A pairs first (caller exchanges)
    qa_results = index.search_qa(topic, limit=limit)
    # Also search raw segments (for monologue mentions)
    segment_results = index.search(topic, limit=limit)

    if not qa_results and not segment_results:
        console.print(f"[yellow]No results found for: {topic}[/yellow]")
        return None

    # Display results table
    _print_results_table(qa_results, segment_results, topic)

    # Extract clips
    clip_paths = {}
    if extract_clips and qa_results:
        clips_dir = output_dir / "clips"
        console.print(f"\n[dim]Extracting {len(qa_results)} clip(s)...[/dim]")
        clip_paths = extract_clips_for_results(qa_results, clips_dir)

    # Generate then-vs-now content via Ollama
    then_now = _generate_then_vs_now(topic, qa_results, segment_results,
                                      ollama_host, ollama_model)

    # Write markdown prep file
    safe_topic = topic.lower().replace(" ", "-").replace("/", "-")[:40]
    date_str = datetime.now().strftime("%Y-%m-%d")
    prep_path = output_dir / f"{date_str}-{safe_topic}-prep.md"

    _write_prep_file(prep_path, topic, qa_results, segment_results,
                     clip_paths, then_now)

    console.print(f"\n[bold green]Prep file:[/bold green] {prep_path}")
    return prep_path


def _print_results_table(qa_results: list[QAResult], segment_results: list[SearchResult],
                          topic: str):
    if qa_results:
        table = Table(title=f"Caller Q&A — \"{topic}\"", box=box.SIMPLE, show_lines=True)
        table.add_column("Date", style="cyan", width=12)
        table.add_column("Timestamps", style="dim", width=14)
        table.add_column("Duration", style="dim", width=8)
        table.add_column("Caller asked", width=35)
        table.add_column("Topic", style="green", width=20)

        for r in qa_results:
            dur = r.duration()
            table.add_row(
                r.date or r.episode_id,
                r.timestamp_str(),
                f"{int(dur//60)}m{int(dur%60):02d}s",
                r.question_text[:80] + ("…" if len(r.question_text) > 80 else ""),
                r.topic or "—",
            )
        console.print(table)

    if segment_results and not qa_results:
        console.print(f"\n[dim]No structured Q&A found. Showing {len(segment_results)} "
                      f"transcript mentions:[/dim]")
        for r in segment_results:
            console.print(f"  [cyan]{r.date}[/cyan] [{r.timestamp_str()}] "
                          f"[dim]{r.speaker}[/dim]: {r.text[:100]}…")


def _generate_then_vs_now(topic: str, qa_results: list, segment_results: list,
                           ollama_host: str, model: str) -> str:
    try:
        import ollama
        client = ollama.Client(host=ollama_host)
    except ImportError:
        return "_Ollama not available — install with: pip install ollama_"

    # Build context from past discussions
    past_context = ""
    for r in qa_results[:5]:
        date = r.date or r.episode_id
        past_context += f"\n[{date}] Caller: {r.question_text[:200]}\n"
        past_context += f"Host answer: {r.answer_text[:400]}\n"

    if not past_context and segment_results:
        for r in segment_results[:5]:
            past_context += f"\n[{r.date}] {r.speaker}: {r.text[:300]}\n"

    if not past_context:
        return ""

    prompt = f"""You are helping prepare talking points for a technology radio show host.
The host discussed "{topic}" in past episodes. Here are excerpts:

{past_context}

The host wants to do a new segment revisiting this topic.

Write talking points in this format:
## What I Said Then
- [2-3 bullets summarizing the past advice/position]

## What's Changed Since Then
- [2-3 bullets on how the technology/situation has evolved]

## Why My Answer Is Different Now
- [2-3 bullets on the updated recommendation/position]

## Suggested Opening
[1-2 sentences the host can use to open the segment, referencing the old clip]

Keep it conversational, radio-friendly. Be specific about what actually changed."""

    try:
        resp = client.chat(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            options={"temperature": 0.3},
        )
        return resp["message"]["content"]
    except Exception as e:
        return f"_Ollama generation failed: {e}_"


def _write_prep_file(path: Path, topic: str, qa_results: list, segment_results: list,
                      clip_paths: dict, then_now: str):
    lines = [
        f"# Show Prep: {topic}",
        f"",
        f"_Generated {datetime.now().strftime('%Y-%m-%d %H:%M')}_",
        f"",
    ]

    if qa_results:
        lines += [f"## Past Caller Exchanges ({len(qa_results)} found)", ""]
        for i, r in enumerate(qa_results):
            clip_info = ""
            if i in clip_paths:
                clip_info = f" — `{clip_paths[i].name}`"
            lines += [
                f"### {r.date or r.episode_id} — [{r.timestamp_str()}]{clip_info}",
                f"**Caller:** {r.question_text}",
                f"",
                f"**Host:** {r.answer_text[:600]}{'…' if len(r.answer_text) > 600 else ''}",
                f"",
            ]

    elif segment_results:
        lines += [f"## Transcript Mentions ({len(segment_results)} found)", ""]
        for r in segment_results:
            lines += [
                f"- **{r.date}** [{r.timestamp_str()}] ({r.speaker}): {r.text[:200]}",
            ]
        lines.append("")

    if then_now:
        lines += ["## Then vs Now", "", then_now, ""]

    if clip_paths:
        lines += [
            "## Clips",
            "",
            f"Extracted to `clips/` — drag into Audition/Audacity:",
            "",
        ]
        for i, p in clip_paths.items():
            if i < len(qa_results):
                r = qa_results[i]
                lines.append(f"- `{p.name}` — {r.date} [{r.timestamp_str()}]")
        lines.append("")

    path.write_text("\n".join(lines), encoding="utf-8")