#!/usr/bin/env python3 """ Ollama MCP Server Provides local AI assistance to Claude Code via MCP protocol """ import asyncio import json import sys from typing import Any import httpx # MCP imports try: from mcp.server import Server from mcp.types import Tool, TextContent except ImportError: print("[ERROR] MCP package not installed. Run: pip install mcp", file=sys.stderr) sys.exit(1) # Configuration OLLAMA_HOST = "http://localhost:11434" DEFAULT_MODEL = "llama3.1:8b" # Create MCP server app = Server("ollama-assistant") @app.list_tools() async def list_tools() -> list[Tool]: """List available Ollama tools""" return [ Tool( name="ask_ollama", description="Ask the local Ollama model a question. Use for simple queries, code review, or when you want a second opinion. The model has no context of the conversation.", inputSchema={ "type": "object", "properties": { "prompt": { "type": "string", "description": "The question or task for Ollama" }, "model": { "type": "string", "description": "Model to use (default: llama3.1:8b)", "default": DEFAULT_MODEL }, "system": { "type": "string", "description": "System prompt to set context/role", "default": "You are a helpful coding assistant." } }, "required": ["prompt"] } ), Tool( name="analyze_code_local", description="Analyze code using local Ollama model. Good for privacy-sensitive code or large codebases. Returns analysis without sending code to external APIs.", inputSchema={ "type": "object", "properties": { "code": { "type": "string", "description": "Code to analyze" }, "language": { "type": "string", "description": "Programming language" }, "analysis_type": { "type": "string", "enum": ["security", "performance", "quality", "bugs", "general"], "description": "Type of analysis to perform", "default": "general" } }, "required": ["code", "language"] } ), Tool( name="summarize_large_file", description="Summarize large files using local model. No size limits or API costs.", inputSchema={ "type": "object", "properties": { "content": { "type": "string", "description": "File content to summarize" }, "summary_length": { "type": "string", "enum": ["brief", "detailed", "technical"], "default": "brief" } }, "required": ["content"] } ), Tool( name="ollama_status", description="Check Ollama server status and list available models", inputSchema={ "type": "object", "properties": {} } ) ] @app.call_tool() async def call_tool(name: str, arguments: Any) -> list[TextContent]: """Execute Ollama tool""" if name == "ask_ollama": prompt = arguments["prompt"] model = arguments.get("model", DEFAULT_MODEL) system = arguments.get("system", "You are a helpful coding assistant.") try: response = await query_ollama(prompt, model, system) return [TextContent(type="text", text=response)] except Exception as e: return [TextContent(type="text", text=f"[ERROR] Ollama query failed: {str(e)}")] elif name == "analyze_code_local": code = arguments["code"] language = arguments["language"] analysis_type = arguments.get("analysis_type", "general") system = f"You are a {language} code analyzer. Focus on {analysis_type} analysis. Be concise and specific." prompt = f"Analyze this {language} code for {analysis_type} issues:\n\n```{language}\n{code}\n```\n\nProvide specific findings with line references where possible." # Try to use code-specific model if available, fallback to default try: response = await query_ollama(prompt, "qwen2.5-coder:7b", system) except: try: response = await query_ollama(prompt, "codellama:13b", system) except: response = await query_ollama(prompt, DEFAULT_MODEL, system) return [TextContent(type="text", text=response)] elif name == "summarize_large_file": content = arguments["content"] summary_length = arguments.get("summary_length", "brief") length_instructions = { "brief": "Create a concise 2-3 sentence summary.", "detailed": "Create a comprehensive paragraph summary covering main points.", "technical": "Create a technical summary highlighting key functions, classes, and architecture." } system = f"You are a file summarizer. {length_instructions[summary_length]}" prompt = f"Summarize this content:\n\n{content[:50000]}" # Limit to first 50k chars response = await query_ollama(prompt, DEFAULT_MODEL, system) return [TextContent(type="text", text=response)] elif name == "ollama_status": try: status = await check_ollama_status() return [TextContent(type="text", text=status)] except Exception as e: return [TextContent(type="text", text=f"[ERROR] Failed to check Ollama status: {str(e)}")] else: raise ValueError(f"Unknown tool: {name}") async def query_ollama(prompt: str, model: str, system: str) -> str: """Query Ollama API""" async with httpx.AsyncClient(timeout=120.0) as client: try: response = await client.post( f"{OLLAMA_HOST}/api/generate", json={ "model": model, "prompt": prompt, "system": system, "stream": False, "options": { "temperature": 0.7, "top_p": 0.9 } } ) response.raise_for_status() result = response.json() return result["response"] except httpx.ConnectError: raise Exception(f"Cannot connect to Ollama at {OLLAMA_HOST}. Is Ollama running? Try: ollama serve") except httpx.HTTPStatusError as e: if e.response.status_code == 404: raise Exception(f"Model '{model}' not found. Pull it with: ollama pull {model}") raise Exception(f"Ollama API error: {e.response.status_code} - {e.response.text}") async def check_ollama_status() -> str: """Check Ollama server status and list models""" async with httpx.AsyncClient(timeout=10.0) as client: try: # Check server await client.get(f"{OLLAMA_HOST}/") # List models response = await client.get(f"{OLLAMA_HOST}/api/tags") response.raise_for_status() models = response.json().get("models", []) if not models: return "[WARNING] Ollama is running but no models are installed. Pull a model with: ollama pull llama3.1:8b" status = "[OK] Ollama is running\n\nAvailable models:\n" for model in models: name = model["name"] size = model.get("size", 0) / (1024**3) # Convert to GB status += f" - {name} ({size:.1f} GB)\n" return status except httpx.ConnectError: return f"[ERROR] Ollama is not running. Start it with: ollama serve\nOr install from: https://ollama.ai/download" async def main(): """Run MCP server""" try: from mcp.server.stdio import stdio_server async with stdio_server() as (read_stream, write_stream): await app.run( read_stream, write_stream, app.create_initialization_options() ) except Exception as e: print(f"[ERROR] MCP server failed: {e}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": asyncio.run(main())