Synced files: - Grepai optimization documentation - Ollama Assistant MCP server implementation - Session logs and context updates Machine: ACG-M-L5090 Timestamp: 2026-01-22 19:22:24 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
239 lines
8.7 KiB
Python
239 lines
8.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Ollama MCP Server
|
|
Provides local AI assistance to Claude Code via MCP protocol
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
from typing import Any
|
|
import httpx
|
|
|
|
# MCP imports
|
|
try:
|
|
from mcp.server import Server
|
|
from mcp.types import Tool, TextContent
|
|
except ImportError:
|
|
print("[ERROR] MCP package not installed. Run: pip install mcp", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Configuration
|
|
OLLAMA_HOST = "http://localhost:11434"
|
|
DEFAULT_MODEL = "llama3.1:8b"
|
|
|
|
# Create MCP server
|
|
app = Server("ollama-assistant")
|
|
|
|
@app.list_tools()
|
|
async def list_tools() -> list[Tool]:
|
|
"""List available Ollama tools"""
|
|
return [
|
|
Tool(
|
|
name="ask_ollama",
|
|
description="Ask the local Ollama model a question. Use for simple queries, code review, or when you want a second opinion. The model has no context of the conversation.",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"prompt": {
|
|
"type": "string",
|
|
"description": "The question or task for Ollama"
|
|
},
|
|
"model": {
|
|
"type": "string",
|
|
"description": "Model to use (default: llama3.1:8b)",
|
|
"default": DEFAULT_MODEL
|
|
},
|
|
"system": {
|
|
"type": "string",
|
|
"description": "System prompt to set context/role",
|
|
"default": "You are a helpful coding assistant."
|
|
}
|
|
},
|
|
"required": ["prompt"]
|
|
}
|
|
),
|
|
Tool(
|
|
name="analyze_code_local",
|
|
description="Analyze code using local Ollama model. Good for privacy-sensitive code or large codebases. Returns analysis without sending code to external APIs.",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"code": {
|
|
"type": "string",
|
|
"description": "Code to analyze"
|
|
},
|
|
"language": {
|
|
"type": "string",
|
|
"description": "Programming language"
|
|
},
|
|
"analysis_type": {
|
|
"type": "string",
|
|
"enum": ["security", "performance", "quality", "bugs", "general"],
|
|
"description": "Type of analysis to perform",
|
|
"default": "general"
|
|
}
|
|
},
|
|
"required": ["code", "language"]
|
|
}
|
|
),
|
|
Tool(
|
|
name="summarize_large_file",
|
|
description="Summarize large files using local model. No size limits or API costs.",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"content": {
|
|
"type": "string",
|
|
"description": "File content to summarize"
|
|
},
|
|
"summary_length": {
|
|
"type": "string",
|
|
"enum": ["brief", "detailed", "technical"],
|
|
"default": "brief"
|
|
}
|
|
},
|
|
"required": ["content"]
|
|
}
|
|
),
|
|
Tool(
|
|
name="ollama_status",
|
|
description="Check Ollama server status and list available models",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {}
|
|
}
|
|
)
|
|
]
|
|
|
|
@app.call_tool()
|
|
async def call_tool(name: str, arguments: Any) -> list[TextContent]:
|
|
"""Execute Ollama tool"""
|
|
|
|
if name == "ask_ollama":
|
|
prompt = arguments["prompt"]
|
|
model = arguments.get("model", DEFAULT_MODEL)
|
|
system = arguments.get("system", "You are a helpful coding assistant.")
|
|
|
|
try:
|
|
response = await query_ollama(prompt, model, system)
|
|
return [TextContent(type="text", text=response)]
|
|
except Exception as e:
|
|
return [TextContent(type="text", text=f"[ERROR] Ollama query failed: {str(e)}")]
|
|
|
|
elif name == "analyze_code_local":
|
|
code = arguments["code"]
|
|
language = arguments["language"]
|
|
analysis_type = arguments.get("analysis_type", "general")
|
|
|
|
system = f"You are a {language} code analyzer. Focus on {analysis_type} analysis. Be concise and specific."
|
|
prompt = f"Analyze this {language} code for {analysis_type} issues:\n\n```{language}\n{code}\n```\n\nProvide specific findings with line references where possible."
|
|
|
|
# Try to use code-specific model if available, fallback to default
|
|
try:
|
|
response = await query_ollama(prompt, "qwen2.5-coder:7b", system)
|
|
except:
|
|
try:
|
|
response = await query_ollama(prompt, "codellama:13b", system)
|
|
except:
|
|
response = await query_ollama(prompt, DEFAULT_MODEL, system)
|
|
|
|
return [TextContent(type="text", text=response)]
|
|
|
|
elif name == "summarize_large_file":
|
|
content = arguments["content"]
|
|
summary_length = arguments.get("summary_length", "brief")
|
|
|
|
length_instructions = {
|
|
"brief": "Create a concise 2-3 sentence summary.",
|
|
"detailed": "Create a comprehensive paragraph summary covering main points.",
|
|
"technical": "Create a technical summary highlighting key functions, classes, and architecture."
|
|
}
|
|
|
|
system = f"You are a file summarizer. {length_instructions[summary_length]}"
|
|
prompt = f"Summarize this content:\n\n{content[:50000]}" # Limit to first 50k chars
|
|
|
|
response = await query_ollama(prompt, DEFAULT_MODEL, system)
|
|
return [TextContent(type="text", text=response)]
|
|
|
|
elif name == "ollama_status":
|
|
try:
|
|
status = await check_ollama_status()
|
|
return [TextContent(type="text", text=status)]
|
|
except Exception as e:
|
|
return [TextContent(type="text", text=f"[ERROR] Failed to check Ollama status: {str(e)}")]
|
|
|
|
else:
|
|
raise ValueError(f"Unknown tool: {name}")
|
|
|
|
async def query_ollama(prompt: str, model: str, system: str) -> str:
|
|
"""Query Ollama API"""
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
try:
|
|
response = await client.post(
|
|
f"{OLLAMA_HOST}/api/generate",
|
|
json={
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"system": system,
|
|
"stream": False,
|
|
"options": {
|
|
"temperature": 0.7,
|
|
"top_p": 0.9
|
|
}
|
|
}
|
|
)
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
return result["response"]
|
|
except httpx.ConnectError:
|
|
raise Exception(f"Cannot connect to Ollama at {OLLAMA_HOST}. Is Ollama running? Try: ollama serve")
|
|
except httpx.HTTPStatusError as e:
|
|
if e.response.status_code == 404:
|
|
raise Exception(f"Model '{model}' not found. Pull it with: ollama pull {model}")
|
|
raise Exception(f"Ollama API error: {e.response.status_code} - {e.response.text}")
|
|
|
|
async def check_ollama_status() -> str:
|
|
"""Check Ollama server status and list models"""
|
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
try:
|
|
# Check server
|
|
await client.get(f"{OLLAMA_HOST}/")
|
|
|
|
# List models
|
|
response = await client.get(f"{OLLAMA_HOST}/api/tags")
|
|
response.raise_for_status()
|
|
models = response.json().get("models", [])
|
|
|
|
if not models:
|
|
return "[WARNING] Ollama is running but no models are installed. Pull a model with: ollama pull llama3.1:8b"
|
|
|
|
status = "[OK] Ollama is running\n\nAvailable models:\n"
|
|
for model in models:
|
|
name = model["name"]
|
|
size = model.get("size", 0) / (1024**3) # Convert to GB
|
|
status += f" - {name} ({size:.1f} GB)\n"
|
|
|
|
return status
|
|
|
|
except httpx.ConnectError:
|
|
return f"[ERROR] Ollama is not running. Start it with: ollama serve\nOr install from: https://ollama.ai/download"
|
|
|
|
async def main():
|
|
"""Run MCP server"""
|
|
try:
|
|
from mcp.server.stdio import stdio_server
|
|
|
|
async with stdio_server() as (read_stream, write_stream):
|
|
await app.run(
|
|
read_stream,
|
|
write_stream,
|
|
app.create_initialization_options()
|
|
)
|
|
except Exception as e:
|
|
print(f"[ERROR] MCP server failed: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|