claudetools/mcp-servers/ollama-assistant/server.py

#!/usr/bin/env python3
"""
Ollama MCP Server
Provides local AI assistance to Claude Code via MCP protocol
"""

import asyncio
import json
import sys
from typing import Any
import httpx

# MCP imports
try:
    from mcp.server import Server
    from mcp.types import Tool, TextContent
except ImportError:
    print("[ERROR] MCP package not installed. Run: pip install mcp", file=sys.stderr)
    sys.exit(1)

# Configuration
OLLAMA_HOST = "http://localhost:11434"
DEFAULT_MODEL = "llama3.1:8b"

# Create MCP server
app = Server("ollama-assistant")

@app.list_tools()
async def list_tools() -> list[Tool]:
    """List available Ollama tools"""
    return [
        Tool(
            name="ask_ollama",
            description="Ask the local Ollama model a question. Use for simple queries, code review, or when you want a second opinion. The model has no context of the conversation.",
            inputSchema={
                "type": "object",
                "properties": {
                    "prompt": {
                        "type": "string",
                        "description": "The question or task for Ollama"
                    },
                    "model": {
                        "type": "string",
                        "description": "Model to use (default: llama3.1:8b)",
                        "default": DEFAULT_MODEL
                    },
                    "system": {
                        "type": "string",
                        "description": "System prompt to set context/role",
                        "default": "You are a helpful coding assistant."
                    }
                },
                "required": ["prompt"]
            }
        ),
        Tool(
            name="analyze_code_local",
            description="Analyze code using local Ollama model. Good for privacy-sensitive code or large codebases. Returns analysis without sending code to external APIs.",
            inputSchema={
                "type": "object",
                "properties": {
                    "code": {
                        "type": "string",
                        "description": "Code to analyze"
                    },
                    "language": {
                        "type": "string",
                        "description": "Programming language"
                    },
                    "analysis_type": {
                        "type": "string",
                        "enum": ["security", "performance", "quality", "bugs", "general"],
                        "description": "Type of analysis to perform",
                        "default": "general"
                    }
                },
                "required": ["code", "language"]
            }
        ),
        Tool(
            name="summarize_large_file",
            description="Summarize large files using local model. No size limits or API costs.",
            inputSchema={
                "type": "object",
                "properties": {
                    "content": {
                        "type": "string",
                        "description": "File content to summarize"
                    },
                    "summary_length": {
                        "type": "string",
                        "enum": ["brief", "detailed", "technical"],
                        "default": "brief"
                    }
                },
                "required": ["content"]
            }
        ),
        Tool(
            name="ollama_status",
            description="Check Ollama server status and list available models",
            inputSchema={
                "type": "object",
                "properties": {}
            }
        )
    ]

@app.call_tool()
async def call_tool(name: str, arguments: Any) -> list[TextContent]:
    """Execute Ollama tool"""

    if name == "ask_ollama":
        prompt = arguments["prompt"]
        model = arguments.get("model", DEFAULT_MODEL)
        system = arguments.get("system", "You are a helpful coding assistant.")

        try:
            response = await query_ollama(prompt, model, system)
            return [TextContent(type="text", text=response)]
        except Exception as e:
            return [TextContent(type="text", text=f"[ERROR] Ollama query failed: {str(e)}")]

    elif name == "analyze_code_local":
        code = arguments["code"]
        language = arguments["language"]
        analysis_type = arguments.get("analysis_type", "general")

        system = f"You are a {language} code analyzer. Focus on {analysis_type} analysis. Be concise and specific."
        prompt = f"Analyze this {language} code for {analysis_type} issues:\n\n```{language}\n{code}\n```\n\nProvide specific findings with line references where possible."

        # Try to use code-specific model if available, fallback to default
        try:
            response = await query_ollama(prompt, "qwen2.5-coder:7b", system)
        except:
            try:
                response = await query_ollama(prompt, "codellama:13b", system)
            except:
                response = await query_ollama(prompt, DEFAULT_MODEL, system)

        return [TextContent(type="text", text=response)]

    elif name == "summarize_large_file":
        content = arguments["content"]
        summary_length = arguments.get("summary_length", "brief")

        length_instructions = {
            "brief": "Create a concise 2-3 sentence summary.",
            "detailed": "Create a comprehensive paragraph summary covering main points.",
            "technical": "Create a technical summary highlighting key functions, classes, and architecture."
        }

        system = f"You are a file summarizer. {length_instructions[summary_length]}"
        prompt = f"Summarize this content:\n\n{content[:50000]}"  # Limit to first 50k chars

        response = await query_ollama(prompt, DEFAULT_MODEL, system)
        return [TextContent(type="text", text=response)]

    elif name == "ollama_status":
        try:
            status = await check_ollama_status()
            return [TextContent(type="text", text=status)]
        except Exception as e:
            return [TextContent(type="text", text=f"[ERROR] Failed to check Ollama status: {str(e)}")]

    else:
        raise ValueError(f"Unknown tool: {name}")

async def query_ollama(prompt: str, model: str, system: str) -> str:
    """Query Ollama API"""
    async with httpx.AsyncClient(timeout=120.0) as client:
        try:
            response = await client.post(
                f"{OLLAMA_HOST}/api/generate",
                json={
                    "model": model,
                    "prompt": prompt,
                    "system": system,
                    "stream": False,
                    "options": {
                        "temperature": 0.7,
                        "top_p": 0.9
                    }
                }
            )
            response.raise_for_status()
            result = response.json()
            return result["response"]
        except httpx.ConnectError:
            raise Exception(f"Cannot connect to Ollama at {OLLAMA_HOST}. Is Ollama running? Try: ollama serve")
        except httpx.HTTPStatusError as e:
            if e.response.status_code == 404:
                raise Exception(f"Model '{model}' not found. Pull it with: ollama pull {model}")
            raise Exception(f"Ollama API error: {e.response.status_code} - {e.response.text}")

async def check_ollama_status() -> str:
    """Check Ollama server status and list models"""
    async with httpx.AsyncClient(timeout=10.0) as client:
        try:
            # Check server
            await client.get(f"{OLLAMA_HOST}/")

            # List models
            response = await client.get(f"{OLLAMA_HOST}/api/tags")
            response.raise_for_status()
            models = response.json().get("models", [])

            if not models:
                return "[WARNING] Ollama is running but no models are installed. Pull a model with: ollama pull llama3.1:8b"

            status = "[OK] Ollama is running\n\nAvailable models:\n"
            for model in models:
                name = model["name"]
                size = model.get("size", 0) / (1024**3)  # Convert to GB
                status += f"  - {name} ({size:.1f} GB)\n"

            return status

        except httpx.ConnectError:
            return f"[ERROR] Ollama is not running. Start it with: ollama serve\nOr install from: https://ollama.ai/download"

async def main():
    """Run MCP server"""
    try:
        from mcp.server.stdio import stdio_server

        async with stdio_server() as (read_stream, write_stream):
            await app.run(
                read_stream,
                write_stream,
                app.create_initialization_options()
            )
    except Exception as e:
        print(f"[ERROR] MCP server failed: {e}", file=sys.stderr)
        sys.exit(1)

if __name__ == "__main__":
    asyncio.run(main())