Files
claudetools/mcp-servers/ollama-assistant/server.py
Mike Swanson eca8fe820e sync: Auto-sync from ACG-M-L5090 at 2026-01-22 19:22:24
Synced files:
- Grepai optimization documentation
- Ollama Assistant MCP server implementation
- Session logs and context updates

Machine: ACG-M-L5090
Timestamp: 2026-01-22 19:22:24

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-22 19:23:16 -07:00

239 lines
8.7 KiB
Python

#!/usr/bin/env python3
"""
Ollama MCP Server
Provides local AI assistance to Claude Code via MCP protocol
"""
import asyncio
import json
import sys
from typing import Any
import httpx
# MCP imports
try:
from mcp.server import Server
from mcp.types import Tool, TextContent
except ImportError:
print("[ERROR] MCP package not installed. Run: pip install mcp", file=sys.stderr)
sys.exit(1)
# Configuration
OLLAMA_HOST = "http://localhost:11434"
DEFAULT_MODEL = "llama3.1:8b"
# Create MCP server
app = Server("ollama-assistant")
@app.list_tools()
async def list_tools() -> list[Tool]:
"""List available Ollama tools"""
return [
Tool(
name="ask_ollama",
description="Ask the local Ollama model a question. Use for simple queries, code review, or when you want a second opinion. The model has no context of the conversation.",
inputSchema={
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "The question or task for Ollama"
},
"model": {
"type": "string",
"description": "Model to use (default: llama3.1:8b)",
"default": DEFAULT_MODEL
},
"system": {
"type": "string",
"description": "System prompt to set context/role",
"default": "You are a helpful coding assistant."
}
},
"required": ["prompt"]
}
),
Tool(
name="analyze_code_local",
description="Analyze code using local Ollama model. Good for privacy-sensitive code or large codebases. Returns analysis without sending code to external APIs.",
inputSchema={
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "Code to analyze"
},
"language": {
"type": "string",
"description": "Programming language"
},
"analysis_type": {
"type": "string",
"enum": ["security", "performance", "quality", "bugs", "general"],
"description": "Type of analysis to perform",
"default": "general"
}
},
"required": ["code", "language"]
}
),
Tool(
name="summarize_large_file",
description="Summarize large files using local model. No size limits or API costs.",
inputSchema={
"type": "object",
"properties": {
"content": {
"type": "string",
"description": "File content to summarize"
},
"summary_length": {
"type": "string",
"enum": ["brief", "detailed", "technical"],
"default": "brief"
}
},
"required": ["content"]
}
),
Tool(
name="ollama_status",
description="Check Ollama server status and list available models",
inputSchema={
"type": "object",
"properties": {}
}
)
]
@app.call_tool()
async def call_tool(name: str, arguments: Any) -> list[TextContent]:
"""Execute Ollama tool"""
if name == "ask_ollama":
prompt = arguments["prompt"]
model = arguments.get("model", DEFAULT_MODEL)
system = arguments.get("system", "You are a helpful coding assistant.")
try:
response = await query_ollama(prompt, model, system)
return [TextContent(type="text", text=response)]
except Exception as e:
return [TextContent(type="text", text=f"[ERROR] Ollama query failed: {str(e)}")]
elif name == "analyze_code_local":
code = arguments["code"]
language = arguments["language"]
analysis_type = arguments.get("analysis_type", "general")
system = f"You are a {language} code analyzer. Focus on {analysis_type} analysis. Be concise and specific."
prompt = f"Analyze this {language} code for {analysis_type} issues:\n\n```{language}\n{code}\n```\n\nProvide specific findings with line references where possible."
# Try to use code-specific model if available, fallback to default
try:
response = await query_ollama(prompt, "qwen2.5-coder:7b", system)
except:
try:
response = await query_ollama(prompt, "codellama:13b", system)
except:
response = await query_ollama(prompt, DEFAULT_MODEL, system)
return [TextContent(type="text", text=response)]
elif name == "summarize_large_file":
content = arguments["content"]
summary_length = arguments.get("summary_length", "brief")
length_instructions = {
"brief": "Create a concise 2-3 sentence summary.",
"detailed": "Create a comprehensive paragraph summary covering main points.",
"technical": "Create a technical summary highlighting key functions, classes, and architecture."
}
system = f"You are a file summarizer. {length_instructions[summary_length]}"
prompt = f"Summarize this content:\n\n{content[:50000]}" # Limit to first 50k chars
response = await query_ollama(prompt, DEFAULT_MODEL, system)
return [TextContent(type="text", text=response)]
elif name == "ollama_status":
try:
status = await check_ollama_status()
return [TextContent(type="text", text=status)]
except Exception as e:
return [TextContent(type="text", text=f"[ERROR] Failed to check Ollama status: {str(e)}")]
else:
raise ValueError(f"Unknown tool: {name}")
async def query_ollama(prompt: str, model: str, system: str) -> str:
"""Query Ollama API"""
async with httpx.AsyncClient(timeout=120.0) as client:
try:
response = await client.post(
f"{OLLAMA_HOST}/api/generate",
json={
"model": model,
"prompt": prompt,
"system": system,
"stream": False,
"options": {
"temperature": 0.7,
"top_p": 0.9
}
}
)
response.raise_for_status()
result = response.json()
return result["response"]
except httpx.ConnectError:
raise Exception(f"Cannot connect to Ollama at {OLLAMA_HOST}. Is Ollama running? Try: ollama serve")
except httpx.HTTPStatusError as e:
if e.response.status_code == 404:
raise Exception(f"Model '{model}' not found. Pull it with: ollama pull {model}")
raise Exception(f"Ollama API error: {e.response.status_code} - {e.response.text}")
async def check_ollama_status() -> str:
"""Check Ollama server status and list models"""
async with httpx.AsyncClient(timeout=10.0) as client:
try:
# Check server
await client.get(f"{OLLAMA_HOST}/")
# List models
response = await client.get(f"{OLLAMA_HOST}/api/tags")
response.raise_for_status()
models = response.json().get("models", [])
if not models:
return "[WARNING] Ollama is running but no models are installed. Pull a model with: ollama pull llama3.1:8b"
status = "[OK] Ollama is running\n\nAvailable models:\n"
for model in models:
name = model["name"]
size = model.get("size", 0) / (1024**3) # Convert to GB
status += f" - {name} ({size:.1f} GB)\n"
return status
except httpx.ConnectError:
return f"[ERROR] Ollama is not running. Start it with: ollama serve\nOr install from: https://ollama.ai/download"
async def main():
"""Run MCP server"""
try:
from mcp.server.stdio import stdio_server
async with stdio_server() as (read_stream, write_stream):
await app.run(
read_stream,
write_stream,
app.create_initialization_options()
)
except Exception as e:
print(f"[ERROR] MCP server failed: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())