Author: Mike Swanson Machine: DESKTOP-0O8A1RL Timestamp: 2026-05-16 15:59:41
159 lines
7.7 KiB
Python
159 lines
7.7 KiB
Python
import urllib.request, json, time
|
|
|
|
MODEL = "qwen3.6:latest"
|
|
COMPARE = "qwen3:14b"
|
|
|
|
def ask(model, prompt, system=None, max_tokens=2000, no_think=False):
|
|
if no_think:
|
|
prompt = "/no_think\n" + prompt
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"stream": False,
|
|
"options": {"num_predict": max_tokens}
|
|
}
|
|
if system:
|
|
payload["system"] = system
|
|
start = time.time()
|
|
req = urllib.request.Request(
|
|
"http://localhost:11434/api/generate",
|
|
data=json.dumps(payload).encode(),
|
|
headers={"Content-Type": "application/json"},
|
|
)
|
|
r = json.loads(urllib.request.urlopen(req, timeout=300).read())
|
|
elapsed = time.time() - start
|
|
tokens = r.get("eval_count", 0)
|
|
dur_ns = r.get("eval_duration", 1)
|
|
tps = tokens / (dur_ns / 1e9) if dur_ns else 0
|
|
raw = r["response"].strip()
|
|
# Strip thinking block if present
|
|
if "<think>" in raw and "</think>" in raw:
|
|
think_content = raw[raw.index("<think>")+7:raw.index("</think>")]
|
|
visible = raw[raw.index("</think>")+8:].strip()
|
|
think_tokens = len(think_content.split())
|
|
else:
|
|
visible = raw
|
|
think_tokens = 0
|
|
return visible, elapsed, tokens, tps, think_tokens
|
|
|
|
def hdr(title):
|
|
print("\n" + "=" * 60)
|
|
print(title)
|
|
print("=" * 60)
|
|
|
|
def report(label, resp, t, tokens, tps, think_tok):
|
|
tag = f" [thinking: ~{think_tok} words]" if think_tok else " [no thinking]"
|
|
print(f"\n{label}{tag}")
|
|
print(f"Time: {t:.1f}s | Total tokens: {tokens} | Speed: {tps:.0f} tok/s")
|
|
print(f"Response:\n{resp[:600]}")
|
|
|
|
# ── TEST 1: Ticket Classification ──────────────────────────────────────────
|
|
hdr("TEST 1: TICKET CLASSIFICATION")
|
|
p = (
|
|
"Classify this IT support ticket into ONE category: "
|
|
"Hardware, Software, Network, Security, or User-Error.\n\n"
|
|
"Ticket: Client says Outlook keeps asking for password every morning. "
|
|
"Rebooting does not help. Started after a Windows Update last Tuesday.\n\n"
|
|
"Respond with the category name and one sentence of reasoning."
|
|
)
|
|
for no_think in [False, True]:
|
|
mode = "no_think" if no_think else "thinking"
|
|
resp, t, tok, tps, think_tok = ask(MODEL, p, no_think=no_think)
|
|
report(f"{MODEL} [{mode}]", resp, t, tok, tps, think_tok)
|
|
|
|
# ── TEST 2: JSON Structured Extraction ─────────────────────────────────────
|
|
hdr("TEST 2: JSON EXTRACTION")
|
|
p2 = (
|
|
"Extract from this ticket and return ONLY a valid JSON object, no explanation:\n"
|
|
"Fields: client_name, issue_summary, affected_system, urgency (low/medium/high), suggested_action\n\n"
|
|
"Ticket: Hi, this is Janet from Cascades Dental. Our front desk computer running Windows 10 "
|
|
"is showing a blue screen every time we open Dentrix. This started this morning and we have "
|
|
"patients coming in at 9am. We need this fixed ASAP.\n\n"
|
|
"Return only the JSON object."
|
|
)
|
|
for no_think in [False, True]:
|
|
mode = "no_think" if no_think else "thinking"
|
|
resp, t, tok, tps, think_tok = ask(MODEL, p2, no_think=no_think)
|
|
report(f"{MODEL} [{mode}]", resp, t, tok, tps, think_tok)
|
|
clean = resp.strip().strip("```json").strip("```").strip()
|
|
try:
|
|
parsed = json.loads(clean)
|
|
print(f" [OK] Valid JSON with keys: {list(parsed.keys())}")
|
|
except Exception as e:
|
|
print(f" [FAIL] {e} | raw: {repr(clean[:100])}")
|
|
|
|
# ── TEST 3: Summarization ───────────────────────────────────────────────────
|
|
hdr("TEST 3: SUMMARIZATION (no_think only — faster)")
|
|
p3 = (
|
|
"Summarize this incident in 3 bullet points for a client-facing email. "
|
|
"Professional, non-technical, under 80 words total.\n\n"
|
|
"Incident: The GuruRMM agent watchdog on a Windows build server failed to restart the main "
|
|
"agent service after an auto-update because: (1) SCM service.stop() returned access denied, "
|
|
"(2) suppress_until was set to a future timestamp instead of being cleared on failure, "
|
|
"causing the watchdog to skip all restart attempts for 25 minutes. "
|
|
"Fix: sc.exe fallback added for stop, suppress_until cleared on error."
|
|
)
|
|
resp3, t3, tok3, tps3, think3 = ask(MODEL, p3, no_think=True)
|
|
report(f"{MODEL} [no_think]", resp3, t3, tok3, tps3, think3)
|
|
|
|
# ── TEST 4: Roadmap Classification (JSON) ──────────────────────────────────
|
|
hdr("TEST 4: FEATURE ROADMAP PLACEMENT (no_think)")
|
|
p4 = (
|
|
"You are classifying a feature request for GuruRMM, an RMM tool for MSPs. "
|
|
"Roadmap sections: Core Agent Features, Server/API Features, Dashboard & UI, "
|
|
"Platform & Infrastructure, Integrations, Future Considerations.\n\n"
|
|
"Feature request: Add ability to remotely enable or disable Windows Defender "
|
|
"Real-Time Protection on managed endpoints from the dashboard.\n\n"
|
|
'Return ONLY this JSON: {"section": "...", "subsection": "...", "priority": "P1|P2|P3", "summary": "..."}'
|
|
)
|
|
resp4, t4, tok4, tps4, think4 = ask(MODEL, p4, no_think=True)
|
|
report(f"{MODEL} [no_think]", resp4, t4, tok4, tps4, think4)
|
|
clean4 = resp4.strip().strip("```json").strip("```").strip()
|
|
try:
|
|
parsed4 = json.loads(clean4)
|
|
print(f" [OK] {parsed4}")
|
|
except Exception as e:
|
|
print(f" [FAIL] {e}")
|
|
|
|
# ── TEST 5: Instruction Following ───────────────────────────────────────────
|
|
hdr("TEST 5: INSTRUCTION FOLLOWING (no_think)")
|
|
p5 = (
|
|
"Do exactly these steps:\n"
|
|
"1. Write ALPHA on its own line\n"
|
|
"2. Count letters in 'authenticate' and double that number, write only the result on its own line\n"
|
|
"3. Write OMEGA on its own line\n"
|
|
"Output only the three lines."
|
|
)
|
|
resp5, t5, tok5, tps5, think5 = ask(MODEL, p5, no_think=True)
|
|
report(f"{MODEL} [no_think]", resp5, t5, tok5, tps5, think5)
|
|
lines = [l.strip() for l in resp5.split("\n") if l.strip()]
|
|
expected = str(len("authenticate") * 2)
|
|
ok = "ALPHA" in lines and "OMEGA" in lines and expected in lines
|
|
print(f" Lines: {lines} -> [{'OK' if ok else 'FAIL'}] (expect ALPHA, {expected}, OMEGA)")
|
|
|
|
# ── TEST 6: Speed head-to-head ──────────────────────────────────────────────
|
|
hdr("TEST 6: SPEED — qwen3.6 vs qwen3:14b (no_think, same prompt)")
|
|
speed_p = (
|
|
"List 5 common Windows 10 issues an MSP technician sees and one fix for each. Be concise, no intro."
|
|
)
|
|
print(f"Prompt: {speed_p}\n")
|
|
for model in [MODEL, COMPARE]:
|
|
nt = model == MODEL # no_think only for 3.6
|
|
resp, t, tok, tps, think_tok = ask(model, speed_p, no_think=nt, max_tokens=600)
|
|
label = f"{model} [{'no_think' if nt else 'default'}]"
|
|
print(f"{label}: {t:.1f}s | {tok} tokens | {tps:.0f} tok/s")
|
|
print(resp[:400])
|
|
print()
|
|
|
|
# ── TEST 7: Thinking mode — where it shines ─────────────────────────────────
|
|
hdr("TEST 7: REASONING — where thinking mode should help")
|
|
p7 = (
|
|
"An MSP has 3 technicians. Tech A can complete 4 tickets per hour. "
|
|
"Tech B can complete 3 tickets per hour. Tech C can complete 2 tickets per hour. "
|
|
"They have 45 tickets in the queue. Tech A works 8 hours, Tech B works 6 hours, "
|
|
"Tech C works 4 hours. Will they clear the queue? How many tickets will be left or "
|
|
"how many ahead of schedule will they finish? Show your work."
|
|
)
|
|
resp7, t7, tok7, tps7, think7 = ask(MODEL, p7, no_think=False, max_tokens=1500)
|
|
report(f"{MODEL} [thinking]", resp7, t7, tok7, tps7, think7)
|