From ee67a8bcf7573e2a6c161c1540e4413daf7cd14e Mon Sep 17 00:00:00 2001 From: Mike Swanson Date: Sat, 16 May 2026 15:59:43 -0700 Subject: [PATCH] sync: auto-sync from DESKTOP-0O8A1RL at 2026-05-16 15:59:41 Author: Mike Swanson Machine: DESKTOP-0O8A1RL Timestamp: 2026-05-16 15:59:41 --- tmp_qwen_reason.py | 54 +++++++++++++++ tmp_qwen_test.py | 159 +++++++++++++++++++++++++++++++++++++++++++++ tmp_qwen_test2.py | 158 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 371 insertions(+) create mode 100644 tmp_qwen_reason.py create mode 100644 tmp_qwen_test.py create mode 100644 tmp_qwen_test2.py diff --git a/tmp_qwen_reason.py b/tmp_qwen_reason.py new file mode 100644 index 0000000..8fc1986 --- /dev/null +++ b/tmp_qwen_reason.py @@ -0,0 +1,54 @@ +import urllib.request, json, time + +def ask(model, prompt, max_tokens=6000): + payload = { + "model": model, + "prompt": prompt, + "stream": False, + "options": {"num_predict": max_tokens} + } + start = time.time() + req = urllib.request.Request( + "http://localhost:11434/api/generate", + data=json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, + ) + r = json.loads(urllib.request.urlopen(req, timeout=600).read()) + elapsed = time.time() - start + tokens = r.get("eval_count", 0) + dur_ns = r.get("eval_duration", 1) + tps = tokens / (dur_ns / 1e9) if dur_ns else 0 + raw = r["response"].strip() + if "" in raw and "" in raw: + think = raw[raw.index("")+7 : raw.index("")] + visible = raw[raw.index("")+8:].strip() + think_words = len(think.split()) + else: + think = "" + visible = raw + think_words = 0 + return visible, elapsed, tokens, tps, think_words, think + +prompt = ( + "An MSP has 3 technicians. Tech A can complete 4 tickets per hour. " + "Tech B can complete 3 tickets per hour. Tech C can complete 2 tickets per hour. " + "They have 45 tickets in the queue. Tech A works 8 hours, Tech B works 6 hours, " + "Tech C works 4 hours. Will they clear the queue? How many tickets will be left or " + "how many ahead of schedule will they finish? Show your work." +) + +print("Running qwen3.6 reasoning test at 6000 token budget...") +print(f"Prompt: {prompt}\n") + +for model in ["qwen3.6:latest", "qwen3:14b"]: + print(f"\n{'='*60}") + print(f"MODEL: {model}") + print('='*60) + visible, t, tokens, tps, think_words, think = ask(model, prompt) + print(f"Time: {t:.1f}s | Total tokens: {tokens} | Speed: {tps:.0f} tok/s") + if think_words: + print(f"Thinking: ~{think_words} words") + print(f"Thinking excerpt (first 300 chars):\n {think[:300]}...") + else: + print("Thinking: not exposed in output") + print(f"\nResponse:\n{visible}") diff --git a/tmp_qwen_test.py b/tmp_qwen_test.py new file mode 100644 index 0000000..3ffad50 --- /dev/null +++ b/tmp_qwen_test.py @@ -0,0 +1,159 @@ +import urllib.request, json, time, sys + +MODEL = "qwen3.6:latest" +COMPARE = "qwen3:14b" + +def ask(model, prompt, system=None, max_tokens=400): + payload = {"model": model, "prompt": prompt, "stream": False, "options": {"num_predict": max_tokens}} + if system: + payload["system"] = system + start = time.time() + req = urllib.request.Request( + "http://localhost:11434/api/generate", + data=json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, + ) + r = json.loads(urllib.request.urlopen(req, timeout=180).read()) + elapsed = time.time() - start + tokens = r.get("eval_count", 0) + dur_ns = r.get("eval_duration", 1) + tps = tokens / (dur_ns / 1e9) if dur_ns else 0 + return r["response"].strip(), elapsed, tokens, tps + +def report(label, resp, t, tokens, tps): + print(f"\n--- {label} ---") + print(f"Response:\n{resp}") + print(f"\nTime: {t:.1f}s | Output tokens: {tokens} | Speed: {tps:.0f} tok/s") + print() + +# ── TEST 1: Ticket Classification ────────────────────────────────────────── +print("=" * 60) +print("TEST 1: TICKET CLASSIFICATION") +print("=" * 60) +p = ( + "Classify this IT support ticket into ONE category: " + "Hardware, Software, Network, Security, or User-Error.\n\n" + "Ticket: Client says Outlook keeps asking for password every morning. " + "Rebooting does not help. Started after a Windows Update last Tuesday.\n\n" + "Respond with the category name and one sentence of reasoning. No thinking tags." +) +resp, t, tok, tps = ask(MODEL, p) +report(MODEL, resp, t, tok, tps) + +# ── TEST 2: JSON Structured Extraction ───────────────────────────────────── +print("=" * 60) +print("TEST 2: STRUCTURED JSON EXTRACTION") +print("=" * 60) +p2 = ( + "Extract the following from the ticket and return ONLY valid JSON, no explanation:\n" + "Fields: client_name, issue_summary, affected_system, urgency (low/medium/high), suggested_action\n\n" + "Ticket: Hi, this is Janet from Cascades Dental. Our front desk computer running Windows 10 " + "is showing a blue screen every time we open Dentrix. This started this morning and we have " + "patients coming in at 9am. We need this fixed ASAP.\n\n" + "Return only the JSON object." +) +resp2, t2, tok2, tps2 = ask(MODEL, p2) +report(MODEL, resp2, t2, tok2, tps2) +try: + # strip thinking tags if present + clean = resp2 + if "" in clean: + clean = clean[clean.index("") + 8:].strip() + parsed = json.loads(clean) + print(f"[OK] Valid JSON: {list(parsed.keys())}") +except Exception as e: + print(f"[FAIL] JSON parse error: {e}") + +# ── TEST 3: Summarization ─────────────────────────────────────────────────── +print("\n" + "=" * 60) +print("TEST 3: TECHNICAL SUMMARIZATION") +print("=" * 60) +p3 = ( + "Summarize the following incident in 3 bullet points for a client-facing email. " + "Be professional, non-technical, under 80 words total.\n\n" + "Incident: The GuruRMM agent watchdog on Pluto (172.16.3.36) failed to restart the main " + "agent service after an auto-update because: (1) SCM service.stop() returned access denied, " + "(2) suppress_until was set to a future timestamp instead of being cleared on failure, " + "(3) the watchdog then treated the suppression as intentional and skipped all restart attempts " + "for 25 minutes. Fix: sc.exe fallback added for stop, suppress_until cleared on error." +) +resp3, t3, tok3, tps3 = ask(MODEL, p3) +report(MODEL, resp3, t3, tok3, tps3) + +# ── TEST 4: Code Explanation ──────────────────────────────────────────────── +print("=" * 60) +print("TEST 4: RUST CODE EXPLANATION") +print("=" * 60) +p4 = ( + "Explain what this Rust code does in 2-3 sentences. Be specific about the error handling strategy.\n\n" + "```rust\n" + "let stop_result = service.stop();\n" + "if let Err(e) = stop_result {\n" + " warn!(\"Watchdog: SCM stop failed ({}), falling back to sc.exe\", e);\n" + " let _ = std::process::Command::new(\"sc.exe\")\n" + " .args([\"stop\", MAIN_SERVICE_NAME])\n" + " .status();\n" + "}\n" + "```" +) +resp4, t4, tok4, tps4 = ask(MODEL, p4) +report(MODEL, resp4, t4, tok4, tps4) + +# ── TEST 5: Roadmap Classification ───────────────────────────────────────── +print("=" * 60) +print("TEST 5: FEATURE ROADMAP PLACEMENT (MSP context)") +print("=" * 60) +p5 = ( + "You are helping classify a feature request for GuruRMM, an RMM tool for MSPs. " + "The roadmap sections are: Core Agent Features, Server/API Features, Dashboard & UI, " + "Platform & Infrastructure, Integrations, Future Considerations.\n\n" + "Feature request: Add the ability to remotely enable or disable Windows Defender Real-Time " + "Protection on managed endpoints from the dashboard.\n\n" + 'Respond with JSON only: {"section": "...", "subsection": "...", "priority": "P1|P2|P3", "summary": "..."}' +) +resp5, t5, tok5, tps5 = ask(MODEL, p5) +report(MODEL, resp5, t5, tok5, tps5) +try: + clean5 = resp5 + if "" in clean5: + clean5 = clean5[clean5.index("") + 8:].strip() + if clean5.startswith("```"): + clean5 = clean5.split("\n", 1)[1].rsplit("```", 1)[0].strip() + parsed5 = json.loads(clean5) + print(f"[OK] Valid JSON: {parsed5}") +except Exception as e: + print(f"[FAIL] JSON parse error: {e}") + +# ── TEST 6: Instruction Following ─────────────────────────────────────────── +print("\n" + "=" * 60) +print("TEST 6: MULTI-STEP INSTRUCTION FOLLOWING") +print("=" * 60) +p6 = ( + "Do exactly these steps in order:\n" + "1. Write the word ALPHA on its own line\n" + "2. Count the letters in the word 'authenticate'\n" + "3. Write that number doubled on its own line\n" + "4. Write the word OMEGA on its own line\n" + "No explanation, no thinking, just the three lines of output." +) +resp6, t6, tok6, tps6 = ask(MODEL, p6) +report(MODEL, resp6, t6, tok6, tps6) +lines = [l.strip() for l in resp6.split("\n") if l.strip()] +if "" in resp6: + lines = [l for l in lines if not l.startswith("<")] +print(f"Lines output: {lines}") +expected_num = len("authenticate") * 2 # 12*2=24 +ok = "ALPHA" in lines and "OMEGA" in lines and str(expected_num) in lines +print(f"[{'OK' if ok else 'FAIL'}] Expected ALPHA, {expected_num}, OMEGA") + +# ── TEST 7: Speed comparison ───────────────────────────────────────────── +print("\n" + "=" * 60) +print("TEST 7: SPEED COMPARISON (same prompt, both models)") +print("=" * 60) +speed_prompt = "List 5 common Windows 10 issues an IT support technician encounters and one fix for each. Be concise." +print(f"Prompt: {speed_prompt}\n") +for model in [MODEL, COMPARE]: + r, t, tok, tps = ask(model, speed_prompt) + print(f"{model}: {t:.1f}s | {tok} tokens | {tps:.0f} tok/s") + print(f" {r[:200]}...") + print() diff --git a/tmp_qwen_test2.py b/tmp_qwen_test2.py new file mode 100644 index 0000000..28ba9ee --- /dev/null +++ b/tmp_qwen_test2.py @@ -0,0 +1,158 @@ +import urllib.request, json, time + +MODEL = "qwen3.6:latest" +COMPARE = "qwen3:14b" + +def ask(model, prompt, system=None, max_tokens=2000, no_think=False): + if no_think: + prompt = "/no_think\n" + prompt + payload = { + "model": model, + "prompt": prompt, + "stream": False, + "options": {"num_predict": max_tokens} + } + if system: + payload["system"] = system + start = time.time() + req = urllib.request.Request( + "http://localhost:11434/api/generate", + data=json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, + ) + r = json.loads(urllib.request.urlopen(req, timeout=300).read()) + elapsed = time.time() - start + tokens = r.get("eval_count", 0) + dur_ns = r.get("eval_duration", 1) + tps = tokens / (dur_ns / 1e9) if dur_ns else 0 + raw = r["response"].strip() + # Strip thinking block if present + if "" in raw and "" in raw: + think_content = raw[raw.index("")+7:raw.index("")] + visible = raw[raw.index("")+8:].strip() + think_tokens = len(think_content.split()) + else: + visible = raw + think_tokens = 0 + return visible, elapsed, tokens, tps, think_tokens + +def hdr(title): + print("\n" + "=" * 60) + print(title) + print("=" * 60) + +def report(label, resp, t, tokens, tps, think_tok): + tag = f" [thinking: ~{think_tok} words]" if think_tok else " [no thinking]" + print(f"\n{label}{tag}") + print(f"Time: {t:.1f}s | Total tokens: {tokens} | Speed: {tps:.0f} tok/s") + print(f"Response:\n{resp[:600]}") + +# ── TEST 1: Ticket Classification ────────────────────────────────────────── +hdr("TEST 1: TICKET CLASSIFICATION") +p = ( + "Classify this IT support ticket into ONE category: " + "Hardware, Software, Network, Security, or User-Error.\n\n" + "Ticket: Client says Outlook keeps asking for password every morning. " + "Rebooting does not help. Started after a Windows Update last Tuesday.\n\n" + "Respond with the category name and one sentence of reasoning." +) +for no_think in [False, True]: + mode = "no_think" if no_think else "thinking" + resp, t, tok, tps, think_tok = ask(MODEL, p, no_think=no_think) + report(f"{MODEL} [{mode}]", resp, t, tok, tps, think_tok) + +# ── TEST 2: JSON Structured Extraction ───────────────────────────────────── +hdr("TEST 2: JSON EXTRACTION") +p2 = ( + "Extract from this ticket and return ONLY a valid JSON object, no explanation:\n" + "Fields: client_name, issue_summary, affected_system, urgency (low/medium/high), suggested_action\n\n" + "Ticket: Hi, this is Janet from Cascades Dental. Our front desk computer running Windows 10 " + "is showing a blue screen every time we open Dentrix. This started this morning and we have " + "patients coming in at 9am. We need this fixed ASAP.\n\n" + "Return only the JSON object." +) +for no_think in [False, True]: + mode = "no_think" if no_think else "thinking" + resp, t, tok, tps, think_tok = ask(MODEL, p2, no_think=no_think) + report(f"{MODEL} [{mode}]", resp, t, tok, tps, think_tok) + clean = resp.strip().strip("```json").strip("```").strip() + try: + parsed = json.loads(clean) + print(f" [OK] Valid JSON with keys: {list(parsed.keys())}") + except Exception as e: + print(f" [FAIL] {e} | raw: {repr(clean[:100])}") + +# ── TEST 3: Summarization ─────────────────────────────────────────────────── +hdr("TEST 3: SUMMARIZATION (no_think only — faster)") +p3 = ( + "Summarize this incident in 3 bullet points for a client-facing email. " + "Professional, non-technical, under 80 words total.\n\n" + "Incident: The GuruRMM agent watchdog on a Windows build server failed to restart the main " + "agent service after an auto-update because: (1) SCM service.stop() returned access denied, " + "(2) suppress_until was set to a future timestamp instead of being cleared on failure, " + "causing the watchdog to skip all restart attempts for 25 minutes. " + "Fix: sc.exe fallback added for stop, suppress_until cleared on error." +) +resp3, t3, tok3, tps3, think3 = ask(MODEL, p3, no_think=True) +report(f"{MODEL} [no_think]", resp3, t3, tok3, tps3, think3) + +# ── TEST 4: Roadmap Classification (JSON) ────────────────────────────────── +hdr("TEST 4: FEATURE ROADMAP PLACEMENT (no_think)") +p4 = ( + "You are classifying a feature request for GuruRMM, an RMM tool for MSPs. " + "Roadmap sections: Core Agent Features, Server/API Features, Dashboard & UI, " + "Platform & Infrastructure, Integrations, Future Considerations.\n\n" + "Feature request: Add ability to remotely enable or disable Windows Defender " + "Real-Time Protection on managed endpoints from the dashboard.\n\n" + 'Return ONLY this JSON: {"section": "...", "subsection": "...", "priority": "P1|P2|P3", "summary": "..."}' +) +resp4, t4, tok4, tps4, think4 = ask(MODEL, p4, no_think=True) +report(f"{MODEL} [no_think]", resp4, t4, tok4, tps4, think4) +clean4 = resp4.strip().strip("```json").strip("```").strip() +try: + parsed4 = json.loads(clean4) + print(f" [OK] {parsed4}") +except Exception as e: + print(f" [FAIL] {e}") + +# ── TEST 5: Instruction Following ─────────────────────────────────────────── +hdr("TEST 5: INSTRUCTION FOLLOWING (no_think)") +p5 = ( + "Do exactly these steps:\n" + "1. Write ALPHA on its own line\n" + "2. Count letters in 'authenticate' and double that number, write only the result on its own line\n" + "3. Write OMEGA on its own line\n" + "Output only the three lines." +) +resp5, t5, tok5, tps5, think5 = ask(MODEL, p5, no_think=True) +report(f"{MODEL} [no_think]", resp5, t5, tok5, tps5, think5) +lines = [l.strip() for l in resp5.split("\n") if l.strip()] +expected = str(len("authenticate") * 2) +ok = "ALPHA" in lines and "OMEGA" in lines and expected in lines +print(f" Lines: {lines} -> [{'OK' if ok else 'FAIL'}] (expect ALPHA, {expected}, OMEGA)") + +# ── TEST 6: Speed head-to-head ────────────────────────────────────────────── +hdr("TEST 6: SPEED — qwen3.6 vs qwen3:14b (no_think, same prompt)") +speed_p = ( + "List 5 common Windows 10 issues an MSP technician sees and one fix for each. Be concise, no intro." +) +print(f"Prompt: {speed_p}\n") +for model in [MODEL, COMPARE]: + nt = model == MODEL # no_think only for 3.6 + resp, t, tok, tps, think_tok = ask(model, speed_p, no_think=nt, max_tokens=600) + label = f"{model} [{'no_think' if nt else 'default'}]" + print(f"{label}: {t:.1f}s | {tok} tokens | {tps:.0f} tok/s") + print(resp[:400]) + print() + +# ── TEST 7: Thinking mode — where it shines ───────────────────────────────── +hdr("TEST 7: REASONING — where thinking mode should help") +p7 = ( + "An MSP has 3 technicians. Tech A can complete 4 tickets per hour. " + "Tech B can complete 3 tickets per hour. Tech C can complete 2 tickets per hour. " + "They have 45 tickets in the queue. Tech A works 8 hours, Tech B works 6 hours, " + "Tech C works 4 hours. Will they clear the queue? How many tickets will be left or " + "how many ahead of schedule will they finish? Show your work." +) +resp7, t7, tok7, tps7, think7 = ask(MODEL, p7, no_think=False, max_tokens=1500) +report(f"{MODEL} [thinking]", resp7, t7, tok7, tps7, think7)