sync: auto-sync from DESKTOP-0O8A1RL at 2026-05-16 15:59:41
Author: Mike Swanson Machine: DESKTOP-0O8A1RL Timestamp: 2026-05-16 15:59:41
This commit is contained in:
54
tmp_qwen_reason.py
Normal file
54
tmp_qwen_reason.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import urllib.request, json, time
|
||||
|
||||
def ask(model, prompt, max_tokens=6000):
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"num_predict": max_tokens}
|
||||
}
|
||||
start = time.time()
|
||||
req = urllib.request.Request(
|
||||
"http://localhost:11434/api/generate",
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
r = json.loads(urllib.request.urlopen(req, timeout=600).read())
|
||||
elapsed = time.time() - start
|
||||
tokens = r.get("eval_count", 0)
|
||||
dur_ns = r.get("eval_duration", 1)
|
||||
tps = tokens / (dur_ns / 1e9) if dur_ns else 0
|
||||
raw = r["response"].strip()
|
||||
if "<think>" in raw and "</think>" in raw:
|
||||
think = raw[raw.index("<think>")+7 : raw.index("</think>")]
|
||||
visible = raw[raw.index("</think>")+8:].strip()
|
||||
think_words = len(think.split())
|
||||
else:
|
||||
think = ""
|
||||
visible = raw
|
||||
think_words = 0
|
||||
return visible, elapsed, tokens, tps, think_words, think
|
||||
|
||||
prompt = (
|
||||
"An MSP has 3 technicians. Tech A can complete 4 tickets per hour. "
|
||||
"Tech B can complete 3 tickets per hour. Tech C can complete 2 tickets per hour. "
|
||||
"They have 45 tickets in the queue. Tech A works 8 hours, Tech B works 6 hours, "
|
||||
"Tech C works 4 hours. Will they clear the queue? How many tickets will be left or "
|
||||
"how many ahead of schedule will they finish? Show your work."
|
||||
)
|
||||
|
||||
print("Running qwen3.6 reasoning test at 6000 token budget...")
|
||||
print(f"Prompt: {prompt}\n")
|
||||
|
||||
for model in ["qwen3.6:latest", "qwen3:14b"]:
|
||||
print(f"\n{'='*60}")
|
||||
print(f"MODEL: {model}")
|
||||
print('='*60)
|
||||
visible, t, tokens, tps, think_words, think = ask(model, prompt)
|
||||
print(f"Time: {t:.1f}s | Total tokens: {tokens} | Speed: {tps:.0f} tok/s")
|
||||
if think_words:
|
||||
print(f"Thinking: ~{think_words} words")
|
||||
print(f"Thinking excerpt (first 300 chars):\n {think[:300]}...")
|
||||
else:
|
||||
print("Thinking: not exposed in output")
|
||||
print(f"\nResponse:\n{visible}")
|
||||
Reference in New Issue
Block a user