claudetools/tmp_qwen_reason.py

import urllib.request, json, time

def ask(model, prompt, max_tokens=6000):
    payload = {
        "model": model,
        "prompt": prompt,
        "stream": False,
        "options": {"num_predict": max_tokens}
    }
    start = time.time()
    req = urllib.request.Request(
        "http://localhost:11434/api/generate",
        data=json.dumps(payload).encode(),
        headers={"Content-Type": "application/json"},
    )
    r = json.loads(urllib.request.urlopen(req, timeout=600).read())
    elapsed = time.time() - start
    tokens = r.get("eval_count", 0)
    dur_ns = r.get("eval_duration", 1)
    tps = tokens / (dur_ns / 1e9) if dur_ns else 0
    raw = r["response"].strip()
    if "<think>" in raw and "</think>" in raw:
        think = raw[raw.index("<think>")+7 : raw.index("</think>")]
        visible = raw[raw.index("</think>")+8:].strip()
        think_words = len(think.split())
    else:
        think = ""
        visible = raw
        think_words = 0
    return visible, elapsed, tokens, tps, think_words, think

prompt = (
    "An MSP has 3 technicians. Tech A can complete 4 tickets per hour. "
    "Tech B can complete 3 tickets per hour. Tech C can complete 2 tickets per hour. "
    "They have 45 tickets in the queue. Tech A works 8 hours, Tech B works 6 hours, "
    "Tech C works 4 hours. Will they clear the queue? How many tickets will be left or "
    "how many ahead of schedule will they finish? Show your work."
)

print("Running qwen3.6 reasoning test at 6000 token budget...")
print(f"Prompt: {prompt}\n")

for model in ["qwen3.6:latest", "qwen3:14b"]:
    print(f"\n{'='*60}")
    print(f"MODEL: {model}")
    print('='*60)
    visible, t, tokens, tps, think_words, think = ask(model, prompt)
    print(f"Time: {t:.1f}s | Total tokens: {tokens} | Speed: {tps:.0f} tok/s")
    if think_words:
        print(f"Thinking: ~{think_words} words")
        print(f"Thinking excerpt (first 300 chars):\n  {think[:300]}...")
    else:
        print("Thinking: not exposed in output")
    print(f"\nResponse:\n{visible}")