Synced files: - Session logs updated - Latest context and credentials - Command/directive updates Machine: acg-guru-5070 Timestamp: 2026-03-22 22:31:46 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
242 lines
8.2 KiB
Python
242 lines
8.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Test content generation from a transcript using Ollama qwen3:14b.
|
|
|
|
Generates:
|
|
1. Episode analysis (summary, segments, topics, tags, quotes, blog candidates)
|
|
2. Sample forum discussion post
|
|
3. Sample blog post draft
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import ollama
|
|
|
|
MODEL = "qwen3:14b"
|
|
OLLAMA_HOST = "http://localhost:11434"
|
|
# qwen3:14b supports 32k context -- use more of it
|
|
MAX_TRANSCRIPT_CHARS = 40000
|
|
|
|
client = ollama.Client(host=OLLAMA_HOST)
|
|
|
|
|
|
def load_transcript(transcript_dir: str) -> str:
|
|
"""Load transcript text."""
|
|
txt_path = Path(transcript_dir) / "transcript.txt"
|
|
if not txt_path.exists():
|
|
print(f"ERROR: {txt_path} not found")
|
|
sys.exit(1)
|
|
return txt_path.read_text()
|
|
|
|
|
|
def timed_query(label: str, prompt: str, temperature: float = 0.3) -> str:
|
|
"""Run an Ollama query with timing."""
|
|
print(f"\n{'='*60}")
|
|
print(f" {label}")
|
|
print(f"{'='*60}")
|
|
start = time.time()
|
|
|
|
response = client.chat(
|
|
model=MODEL,
|
|
messages=[{"role": "user", "content": prompt}],
|
|
options={"temperature": temperature, "num_ctx": 32768},
|
|
)
|
|
|
|
elapsed = time.time() - start
|
|
result = response["message"]["content"]
|
|
print(f" [{elapsed:.1f}s, {len(result)} chars]")
|
|
return result
|
|
|
|
|
|
def generate_analysis(transcript: str) -> dict:
|
|
"""Generate episode analysis JSON."""
|
|
prompt = f"""You are analyzing a transcript from "The Computer Guru Show", a live call-in
|
|
radio show hosted by Mike Swanson on AM1030 KVOI in Tucson, Arizona. The show covers
|
|
technology news, tips, and takes listener calls for free tech support.
|
|
|
|
Analyze this transcript and provide a JSON response with:
|
|
|
|
1. "summary": A 2-3 paragraph episode summary suitable for a podcast page. Write in third
|
|
person. Be specific about topics and conversations.
|
|
|
|
2. "segment_summaries": Array of distinct topic segments discussed, each with:
|
|
- "title": Compelling segment title
|
|
- "summary": 3-5 sentence summary
|
|
- "key_points": Array of key takeaway bullet points
|
|
- "approximate_position": "early", "mid", or "late" in the show
|
|
|
|
3. "topics": Array of main topics discussed (short phrases)
|
|
|
|
4. "tags": Array of SEO-friendly tags (lowercase, hyphenated)
|
|
|
|
5. "key_quotes": Array of 3-5 notable/quotable moments, each with:
|
|
- "quote": The exact quote text
|
|
- "speaker": Who said it
|
|
- "context": Brief context for why it's notable
|
|
|
|
6. "blog_post_candidates": Array of 2-3 topics worth expanding into full blog posts, each with:
|
|
- "title": Proposed blog post title
|
|
- "angle": The specific thesis or angle
|
|
- "why": Why this deserves expansion (audience interest, SEO potential, etc.)
|
|
- "key_points_to_expand": Array of points from the show to develop further
|
|
|
|
Respond ONLY with valid JSON. No markdown fencing, no explanation outside the JSON.
|
|
|
|
## Transcript
|
|
|
|
{transcript[:MAX_TRANSCRIPT_CHARS]}"""
|
|
|
|
result = timed_query("Episode Analysis (JSON)", prompt)
|
|
|
|
# Strip markdown fences if present
|
|
if "```json" in result:
|
|
result = result.split("```json", 1)[1].split("```", 1)[0]
|
|
elif "```" in result:
|
|
result = result.split("```", 1)[1].split("```", 1)[0]
|
|
|
|
# Strip thinking tags if qwen3 uses them
|
|
if "<think>" in result:
|
|
result = result.split("</think>")[-1]
|
|
|
|
try:
|
|
return json.loads(result.strip())
|
|
except json.JSONDecodeError as e:
|
|
print(f" WARNING: JSON parse failed: {e}")
|
|
print(f" Raw response (first 500 chars): {result[:500]}")
|
|
return {"raw_response": result}
|
|
|
|
|
|
def generate_forum_post(transcript: str, analysis: dict) -> str:
|
|
"""Generate a forum discussion thread post."""
|
|
summary = analysis.get("summary", "")
|
|
topics = analysis.get("topics", [])
|
|
|
|
prompt = f"""You are writing a forum discussion post for "The Computer Guru Show" community
|
|
forum. The tone should be conversational, engaging, and invite discussion. This is NOT a
|
|
formal article -- it's a community post that makes people want to comment.
|
|
|
|
Show info:
|
|
- Host: Mike Swanson ("The Computer Guru")
|
|
- Station: AM1030 KVOI, Tucson AZ
|
|
- Format: Live call-in tech show
|
|
|
|
Episode summary: {summary}
|
|
Topics covered: {', '.join(topics)}
|
|
|
|
Write a forum discussion post with:
|
|
1. A brief, engaging hook (2-3 sentences about the most interesting thing from the episode)
|
|
2. Bullet list of topics covered (with one-line teasers, not full summaries)
|
|
3. 2-3 discussion questions that invite audience participation
|
|
4. A "Listen to the full episode" call-to-action at the end
|
|
|
|
Keep it under 300 words. Use a casual, friendly tone. No emojis.
|
|
|
|
Key transcript excerpts for context:
|
|
{transcript[:8000]}"""
|
|
|
|
return timed_query("Forum Discussion Post", prompt, temperature=0.5)
|
|
|
|
|
|
def generate_blog_post(transcript: str, candidate: dict) -> str:
|
|
"""Generate a full blog post draft from a blog candidate."""
|
|
prompt = f"""You are writing a blog post for the "Computer Guru Show" website
|
|
(radio.azcomputerguru.com). The author is Mike Swanson, a veteran IT professional and
|
|
radio host in Tucson, Arizona. His style is:
|
|
- Explains complex tech in plain English
|
|
- Uses analogies and humor
|
|
- Gives practical, actionable advice
|
|
- Takes strong positions on consumer rights and privacy
|
|
- Speaks directly to the reader
|
|
|
|
Write a blog post with this info:
|
|
- Title: {candidate.get('title', 'Untitled')}
|
|
- Angle: {candidate.get('angle', '')}
|
|
- Points to expand: {json.dumps(candidate.get('key_points_to_expand', []))}
|
|
|
|
Format:
|
|
1. Engaging opening paragraph (hook the reader)
|
|
2. 3-5 sections with subheadings
|
|
3. Practical "what this means for you" section
|
|
4. Key Takeaways (bullet points)
|
|
5. Closing paragraph that ties back to the show
|
|
|
|
Target length: 800-1200 words. Write in first person as Mike Swanson.
|
|
Include a note at the bottom: "This topic was discussed on The Computer Guru Show.
|
|
Listen to the full episode for more."
|
|
|
|
Relevant transcript excerpts:
|
|
{transcript[:12000]}"""
|
|
|
|
return timed_query(f"Blog Post: {candidate.get('title', '?')}", prompt, temperature=0.5)
|
|
|
|
|
|
def main():
|
|
transcript_dir = sys.argv[1] if len(sys.argv) > 1 else \
|
|
"training-data/transcripts/2016-s8e42"
|
|
|
|
print(f"Loading transcript from: {transcript_dir}")
|
|
transcript = load_transcript(transcript_dir)
|
|
print(f"Transcript length: {len(transcript)} chars ({len(transcript.splitlines())} lines)")
|
|
print(f"Sending first {min(len(transcript), MAX_TRANSCRIPT_CHARS)} chars to LLM")
|
|
|
|
# Output directory
|
|
output_dir = Path(transcript_dir) / "generated"
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Step 1: Analysis
|
|
analysis = generate_analysis(transcript)
|
|
with open(output_dir / "analysis.json", "w") as f:
|
|
json.dump(analysis, f, indent=2)
|
|
print(f"\n Saved: {output_dir}/analysis.json")
|
|
|
|
# Print summary
|
|
if "summary" in analysis:
|
|
print(f"\n--- EPISODE SUMMARY ---")
|
|
print(analysis["summary"])
|
|
|
|
if "topics" in analysis:
|
|
print(f"\n--- TOPICS ---")
|
|
for t in analysis["topics"]:
|
|
print(f" - {t}")
|
|
|
|
if "tags" in analysis:
|
|
print(f"\n--- TAGS ---")
|
|
print(f" {', '.join(analysis['tags'])}")
|
|
|
|
if "blog_post_candidates" in analysis:
|
|
print(f"\n--- BLOG POST CANDIDATES ---")
|
|
for i, c in enumerate(analysis["blog_post_candidates"], 1):
|
|
print(f" {i}. {c.get('title', '?')}")
|
|
print(f" Angle: {c.get('angle', '?')}")
|
|
|
|
# Step 2: Forum post
|
|
forum_post = generate_forum_post(transcript, analysis)
|
|
with open(output_dir / "forum-post.md", "w") as f:
|
|
f.write(forum_post)
|
|
print(f"\n Saved: {output_dir}/forum-post.md")
|
|
print(f"\n--- FORUM POST ---")
|
|
print(forum_post)
|
|
|
|
# Step 3: Blog post (pick the first candidate)
|
|
candidates = analysis.get("blog_post_candidates", [])
|
|
if candidates:
|
|
blog_post = generate_blog_post(transcript, candidates[0])
|
|
slug = candidates[0].get("title", "draft").lower().replace(" ", "-")[:50]
|
|
with open(output_dir / f"blog-{slug}.md", "w") as f:
|
|
f.write(blog_post)
|
|
print(f"\n Saved: {output_dir}/blog-{slug}.md")
|
|
print(f"\n--- BLOG POST DRAFT ---")
|
|
print(blog_post)
|
|
else:
|
|
print("\n No blog post candidates found, skipping blog generation")
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f" All outputs saved to: {output_dir}/")
|
|
print(f"{'='*60}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|