"""Stage 6: Content analysis using Ollama for summary, topics, and post-show debrief.""" import json from dataclasses import dataclass from pathlib import Path from rich.console import Console console = Console() @dataclass class EpisodeAnalysis: summary: str segment_summaries: list[dict] # [{title, summary, key_points}] key_quotes: list[dict] # [{quote, speaker, timestamp}] topics: list[str] tags: list[str] blog_post_candidates: list[dict] # [{title, angle, why}] debrief_draft: str # Markdown debrief template def to_dict(self) -> dict: return { "summary": self.summary, "segment_summaries": self.segment_summaries, "key_quotes": self.key_quotes, "topics": self.topics, "tags": self.tags, "blog_post_candidates": self.blog_post_candidates, } def save(self, output_dir: Path): output_dir.mkdir(parents=True, exist_ok=True) with open(output_dir / "analysis.json", "w") as f: json.dump(self.to_dict(), f, indent=2) with open(output_dir / "post-show-debrief.md", "w") as f: f.write(self.debrief_draft) console.print(f"[green]Analysis saved to {output_dir}[/green]") def analyze_episode(transcript_text: str, diarization_data: dict | None = None, show_prep: str | None = None, segments: list | None = None, model: str = "qwen3:14b", ollama_host: str = "http://localhost:11434") -> EpisodeAnalysis: """Analyze a transcribed episode using a local LLM.""" import ollama as ollama_client console.print(f"[bold]Analyzing episode with {model}[/bold]") client = ollama_client.Client(host=ollama_host) # Build context for the LLM context_parts = [] if show_prep: context_parts.append(f"## Show Prep (planned topics)\n\n{show_prep[:3000]}") context_parts.append(f"## Transcript\n\n{transcript_text[:12000]}") if diarization_data: speakers = diarization_data.get("speaker_map", {}) if speakers: speaker_info = "\n".join(f"- {v}" for v in speakers.values()) context_parts.append(f"## Speakers Identified\n\n{speaker_info}") context = "\n\n---\n\n".join(context_parts) # Query 1: Episode summary and segment summaries summary_prompt = f"""You are analyzing a radio show episode transcript. Provide a JSON response with: 1. "summary": A 2-3 paragraph episode summary suitable for a podcast episode page. Write in third person. Be specific about topics discussed. 2. "segment_summaries": An array of objects, each with: - "title": A compelling segment title - "summary": 3-5 sentence summary - "key_points": Array of key takeaway bullet points 3. "topics": Array of main topics discussed (short phrases) 4. "tags": Array of SEO-friendly tags (lowercase, hyphenated) 5. "key_quotes": Array of notable quotes, each with: - "quote": The quote text - "speaker": Who said it (if identifiable) - "context": Brief context 6. "blog_post_candidates": Array of topics worth expanding into blog posts, each with: - "title": Proposed blog post title - "angle": The specific angle or thesis - "why": Why this topic deserves expansion Respond ONLY with valid JSON, no markdown fencing. {context}""" console.print("[dim]Generating episode analysis...[/dim]") response = client.chat( model=model, messages=[{"role": "user", "content": summary_prompt}], options={"temperature": 0.3, "num_ctx": 16384}, ) # Parse LLM response response_text = response["message"]["content"] # Strip markdown code fences if present if "```json" in response_text: response_text = response_text.split("```json", 1)[1] response_text = response_text.split("```", 1)[0] elif "```" in response_text: response_text = response_text.split("```", 1)[1] response_text = response_text.split("```", 1)[0] try: analysis_data = json.loads(response_text.strip()) except json.JSONDecodeError: console.print("[yellow]LLM response was not valid JSON, using raw text[/yellow]") analysis_data = { "summary": response_text, "segment_summaries": [], "topics": [], "tags": [], "key_quotes": [], "blog_post_candidates": [], } # Query 2: Generate debrief draft debrief_prompt = f"""Based on this radio show transcript, generate a post-show debrief in markdown format. Compare what was discussed against the show prep (planned topics) to identify what made it in, what was cut, and what was added. Format: # Post-Show Debrief ## Episode: [derive title from content] ## Air Date: [today's date if not clear] ### What Made It In [For each planned segment, note: Used / Modified / Cut] ### What Changed Live [Topics expanded, cut short, or reordered vs. prep] ### Caller/Audience Interaction [Any caller topics or audience engagement noted in transcript] ### Unplanned Additions [Topics not in prep that came up] ### Best Moments [Most compelling segments or quotes] ### Topics That Deserve More [Topics that were rushed or generated high interest] ### Suggested Blog Posts [2-3 specific blog post ideas with proposed titles and angles] {context}""" console.print("[dim]Generating debrief draft...[/dim]") debrief_response = client.chat( model=model, messages=[{"role": "user", "content": debrief_prompt}], options={"temperature": 0.4, "num_ctx": 16384}, ) debrief_text = debrief_response["message"]["content"] console.print("[green]Analysis complete[/green]") return EpisodeAnalysis( summary=analysis_data.get("summary", ""), segment_summaries=analysis_data.get("segment_summaries", []), key_quotes=analysis_data.get("key_quotes", []), topics=analysis_data.get("topics", []), tags=analysis_data.get("tags", []), blog_post_candidates=analysis_data.get("blog_post_candidates", []), debrief_draft=debrief_text, )