sync: auto-sync from DESKTOP-0O8A1RL at 2026-05-16 15:59:41

Author: Mike Swanson Machine: DESKTOP-0O8A1RL Timestamp: 2026-05-16 15:59:41
2026-05-16 15:59:43 -07:00
parent c9c4c01cdc
commit ee67a8bcf7
3 changed files with 371 additions and 0 deletions
--- a/tmp_qwen_reason.py
+++ b/tmp_qwen_reason.py
@@ -0,0 +1,54 @@
+import urllib.request, json, time
+
+def ask(model, prompt, max_tokens=6000):
+    payload = {
+        "model": model,
+        "prompt": prompt,
+        "stream": False,
+        "options": {"num_predict": max_tokens}
+    }
+    start = time.time()
+    req = urllib.request.Request(
+        "http://localhost:11434/api/generate",
+        data=json.dumps(payload).encode(),
+        headers={"Content-Type": "application/json"},
+    )
+    r = json.loads(urllib.request.urlopen(req, timeout=600).read())
+    elapsed = time.time() - start
+    tokens = r.get("eval_count", 0)
+    dur_ns = r.get("eval_duration", 1)
+    tps = tokens / (dur_ns / 1e9) if dur_ns else 0
+    raw = r["response"].strip()
+    if "<think>" in raw and "</think>" in raw:
+        think = raw[raw.index("<think>")+7 : raw.index("</think>")]
+        visible = raw[raw.index("</think>")+8:].strip()
+        think_words = len(think.split())
+    else:
+        think = ""
+        visible = raw
+        think_words = 0
+    return visible, elapsed, tokens, tps, think_words, think
+
+prompt = (
+    "An MSP has 3 technicians. Tech A can complete 4 tickets per hour. "
+    "Tech B can complete 3 tickets per hour. Tech C can complete 2 tickets per hour. "
+    "They have 45 tickets in the queue. Tech A works 8 hours, Tech B works 6 hours, "
+    "Tech C works 4 hours. Will they clear the queue? How many tickets will be left or "
+    "how many ahead of schedule will they finish? Show your work."
+)
+
+print("Running qwen3.6 reasoning test at 6000 token budget...")
+print(f"Prompt: {prompt}\n")
+
+for model in ["qwen3.6:latest", "qwen3:14b"]:
+    print(f"\n{'='*60}")
+    print(f"MODEL: {model}")
+    print('='*60)
+    visible, t, tokens, tps, think_words, think = ask(model, prompt)
+    print(f"Time: {t:.1f}s | Total tokens: {tokens} | Speed: {tps:.0f} tok/s")
+    if think_words:
+        print(f"Thinking: ~{think_words} words")
+        print(f"Thinking excerpt (first 300 chars):\n  {think[:300]}...")
+    else:
+        print("Thinking: not exposed in output")
+    print(f"\nResponse:\n{visible}")