Author: Mike Swanson Machine: DESKTOP-0O8A1RL Timestamp: 2026-05-16 15:59:41
55 lines
2.0 KiB
Python
55 lines
2.0 KiB
Python
import urllib.request, json, time
|
|
|
|
def ask(model, prompt, max_tokens=6000):
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"stream": False,
|
|
"options": {"num_predict": max_tokens}
|
|
}
|
|
start = time.time()
|
|
req = urllib.request.Request(
|
|
"http://localhost:11434/api/generate",
|
|
data=json.dumps(payload).encode(),
|
|
headers={"Content-Type": "application/json"},
|
|
)
|
|
r = json.loads(urllib.request.urlopen(req, timeout=600).read())
|
|
elapsed = time.time() - start
|
|
tokens = r.get("eval_count", 0)
|
|
dur_ns = r.get("eval_duration", 1)
|
|
tps = tokens / (dur_ns / 1e9) if dur_ns else 0
|
|
raw = r["response"].strip()
|
|
if "<think>" in raw and "</think>" in raw:
|
|
think = raw[raw.index("<think>")+7 : raw.index("</think>")]
|
|
visible = raw[raw.index("</think>")+8:].strip()
|
|
think_words = len(think.split())
|
|
else:
|
|
think = ""
|
|
visible = raw
|
|
think_words = 0
|
|
return visible, elapsed, tokens, tps, think_words, think
|
|
|
|
prompt = (
|
|
"An MSP has 3 technicians. Tech A can complete 4 tickets per hour. "
|
|
"Tech B can complete 3 tickets per hour. Tech C can complete 2 tickets per hour. "
|
|
"They have 45 tickets in the queue. Tech A works 8 hours, Tech B works 6 hours, "
|
|
"Tech C works 4 hours. Will they clear the queue? How many tickets will be left or "
|
|
"how many ahead of schedule will they finish? Show your work."
|
|
)
|
|
|
|
print("Running qwen3.6 reasoning test at 6000 token budget...")
|
|
print(f"Prompt: {prompt}\n")
|
|
|
|
for model in ["qwen3.6:latest", "qwen3:14b"]:
|
|
print(f"\n{'='*60}")
|
|
print(f"MODEL: {model}")
|
|
print('='*60)
|
|
visible, t, tokens, tps, think_words, think = ask(model, prompt)
|
|
print(f"Time: {t:.1f}s | Total tokens: {tokens} | Speed: {tps:.0f} tok/s")
|
|
if think_words:
|
|
print(f"Thinking: ~{think_words} words")
|
|
print(f"Thinking excerpt (first 300 chars):\n {think[:300]}...")
|
|
else:
|
|
print("Thinking: not exposed in output")
|
|
print(f"\nResponse:\n{visible}")
|