diff --git a/.claude/OLLAMA.md b/.claude/OLLAMA.md index f838e6b..e81c22a 100644 --- a/.claude/OLLAMA.md +++ b/.claude/OLLAMA.md @@ -83,18 +83,23 @@ If neither endpoint responds: verify Tailscale (`tailscale status`) and whether Use the `/api/chat` endpoint with `think:false` for qwen3 models. The older `/api/generate` endpoint on qwen3 puts output into thinking tokens that don't appear in the `response` field — you'll get an empty response if you use `/api/generate`. -Preferred one-liner: +Preferred one-liner — endpoint **and** model come from `identity.json` (consistent with +**Endpoints** above; no per-call probe). The old inline auto-detect was REMOVED: it called +`urlopen()` as a truthiness test, which *raises* `URLError` on a down host instead of +yielding the fallback — so it crashed on a down localhost rather than failing over to Beast, +and it violated the "no per-call probe" rule. ```bash -python -c " +OLLAMA="${OLLAMA:-$(jq -r '.ollama.endpoint // .ollama.fallback // "http://localhost:11434"' .claude/identity.json)}" +MODEL="${MODEL:-$(jq -r '.ollama.prose_model // "qwen3:14b"' .claude/identity.json)}" +OLLAMA="$OLLAMA" MODEL="$MODEL" python -c " import urllib.request, json, sys, os -OLLAMA = os.environ.get('OLLAMA') or ('http://localhost:11434' if __import__('urllib.request').request.urlopen(urllib.request.Request('http://localhost:11434/api/tags'),timeout=2) else 'http://100.101.122.4:11434') body = json.dumps({ - 'model':'qwen3:14b', + 'model': os.environ['MODEL'], 'messages':[{'role':'user','content': sys.argv[1]}], 'stream':False, 'think':False }).encode() -res = json.loads(urllib.request.urlopen(urllib.request.Request(OLLAMA+'/api/chat', body), timeout=120).read()) +res = json.loads(urllib.request.urlopen(urllib.request.Request(os.environ['OLLAMA']+'/api/chat', body), timeout=120).read()) print(res['message']['content']) " "Your prompt here" ```