claudetools/tmp_qwen_test.py

import urllib.request, json, time, sys

MODEL = "qwen3.6:latest"
COMPARE = "qwen3:14b"

def ask(model, prompt, system=None, max_tokens=400):
    payload = {"model": model, "prompt": prompt, "stream": False, "options": {"num_predict": max_tokens}}
    if system:
        payload["system"] = system
    start = time.time()
    req = urllib.request.Request(
        "http://localhost:11434/api/generate",
        data=json.dumps(payload).encode(),
        headers={"Content-Type": "application/json"},
    )
    r = json.loads(urllib.request.urlopen(req, timeout=180).read())
    elapsed = time.time() - start
    tokens = r.get("eval_count", 0)
    dur_ns = r.get("eval_duration", 1)
    tps = tokens / (dur_ns / 1e9) if dur_ns else 0
    return r["response"].strip(), elapsed, tokens, tps

def report(label, resp, t, tokens, tps):
    print(f"\n--- {label} ---")
    print(f"Response:\n{resp}")
    print(f"\nTime: {t:.1f}s | Output tokens: {tokens} | Speed: {tps:.0f} tok/s")
    print()

# ── TEST 1: Ticket Classification ──────────────────────────────────────────
print("=" * 60)
print("TEST 1: TICKET CLASSIFICATION")
print("=" * 60)
p = (
    "Classify this IT support ticket into ONE category: "
    "Hardware, Software, Network, Security, or User-Error.\n\n"
    "Ticket: Client says Outlook keeps asking for password every morning. "
    "Rebooting does not help. Started after a Windows Update last Tuesday.\n\n"
    "Respond with the category name and one sentence of reasoning. No thinking tags."
)
resp, t, tok, tps = ask(MODEL, p)
report(MODEL, resp, t, tok, tps)

# ── TEST 2: JSON Structured Extraction ─────────────────────────────────────
print("=" * 60)
print("TEST 2: STRUCTURED JSON EXTRACTION")
print("=" * 60)
p2 = (
    "Extract the following from the ticket and return ONLY valid JSON, no explanation:\n"
    "Fields: client_name, issue_summary, affected_system, urgency (low/medium/high), suggested_action\n\n"
    "Ticket: Hi, this is Janet from Cascades Dental. Our front desk computer running Windows 10 "
    "is showing a blue screen every time we open Dentrix. This started this morning and we have "
    "patients coming in at 9am. We need this fixed ASAP.\n\n"
    "Return only the JSON object."
)
resp2, t2, tok2, tps2 = ask(MODEL, p2)
report(MODEL, resp2, t2, tok2, tps2)
try:
    # strip thinking tags if present
    clean = resp2
    if "</think>" in clean:
        clean = clean[clean.index("</think>") + 8:].strip()
    parsed = json.loads(clean)
    print(f"[OK] Valid JSON: {list(parsed.keys())}")
except Exception as e:
    print(f"[FAIL] JSON parse error: {e}")

# ── TEST 3: Summarization ───────────────────────────────────────────────────
print("\n" + "=" * 60)
print("TEST 3: TECHNICAL SUMMARIZATION")
print("=" * 60)
p3 = (
    "Summarize the following incident in 3 bullet points for a client-facing email. "
    "Be professional, non-technical, under 80 words total.\n\n"
    "Incident: The GuruRMM agent watchdog on Pluto (172.16.3.36) failed to restart the main "
    "agent service after an auto-update because: (1) SCM service.stop() returned access denied, "
    "(2) suppress_until was set to a future timestamp instead of being cleared on failure, "
    "(3) the watchdog then treated the suppression as intentional and skipped all restart attempts "
    "for 25 minutes. Fix: sc.exe fallback added for stop, suppress_until cleared on error."
)
resp3, t3, tok3, tps3 = ask(MODEL, p3)
report(MODEL, resp3, t3, tok3, tps3)

# ── TEST 4: Code Explanation ────────────────────────────────────────────────
print("=" * 60)
print("TEST 4: RUST CODE EXPLANATION")
print("=" * 60)
p4 = (
    "Explain what this Rust code does in 2-3 sentences. Be specific about the error handling strategy.\n\n"
    "```rust\n"
    "let stop_result = service.stop();\n"
    "if let Err(e) = stop_result {\n"
    "    warn!(\"Watchdog: SCM stop failed ({}), falling back to sc.exe\", e);\n"
    "    let _ = std::process::Command::new(\"sc.exe\")\n"
    "        .args([\"stop\", MAIN_SERVICE_NAME])\n"
    "        .status();\n"
    "}\n"
    "```"
)
resp4, t4, tok4, tps4 = ask(MODEL, p4)
report(MODEL, resp4, t4, tok4, tps4)

# ── TEST 5: Roadmap Classification ─────────────────────────────────────────
print("=" * 60)
print("TEST 5: FEATURE ROADMAP PLACEMENT (MSP context)")
print("=" * 60)
p5 = (
    "You are helping classify a feature request for GuruRMM, an RMM tool for MSPs. "
    "The roadmap sections are: Core Agent Features, Server/API Features, Dashboard & UI, "
    "Platform & Infrastructure, Integrations, Future Considerations.\n\n"
    "Feature request: Add the ability to remotely enable or disable Windows Defender Real-Time "
    "Protection on managed endpoints from the dashboard.\n\n"
    'Respond with JSON only: {"section": "...", "subsection": "...", "priority": "P1|P2|P3", "summary": "..."}'
)
resp5, t5, tok5, tps5 = ask(MODEL, p5)
report(MODEL, resp5, t5, tok5, tps5)
try:
    clean5 = resp5
    if "</think>" in clean5:
        clean5 = clean5[clean5.index("</think>") + 8:].strip()
    if clean5.startswith("```"):
        clean5 = clean5.split("\n", 1)[1].rsplit("```", 1)[0].strip()
    parsed5 = json.loads(clean5)
    print(f"[OK] Valid JSON: {parsed5}")
except Exception as e:
    print(f"[FAIL] JSON parse error: {e}")

# ── TEST 6: Instruction Following ───────────────────────────────────────────
print("\n" + "=" * 60)
print("TEST 6: MULTI-STEP INSTRUCTION FOLLOWING")
print("=" * 60)
p6 = (
    "Do exactly these steps in order:\n"
    "1. Write the word ALPHA on its own line\n"
    "2. Count the letters in the word 'authenticate'\n"
    "3. Write that number doubled on its own line\n"
    "4. Write the word OMEGA on its own line\n"
    "No explanation, no thinking, just the three lines of output."
)
resp6, t6, tok6, tps6 = ask(MODEL, p6)
report(MODEL, resp6, t6, tok6, tps6)
lines = [l.strip() for l in resp6.split("\n") if l.strip()]
if "</think>" in resp6:
    lines = [l for l in lines if not l.startswith("<")]
print(f"Lines output: {lines}")
expected_num = len("authenticate") * 2  # 12*2=24
ok = "ALPHA" in lines and "OMEGA" in lines and str(expected_num) in lines
print(f"[{'OK' if ok else 'FAIL'}] Expected ALPHA, {expected_num}, OMEGA")

# ── TEST 7: Speed comparison ─────────────────────────────────────────────
print("\n" + "=" * 60)
print("TEST 7: SPEED COMPARISON (same prompt, both models)")
print("=" * 60)
speed_prompt = "List 5 common Windows 10 issues an IT support technician encounters and one fix for each. Be concise."
print(f"Prompt: {speed_prompt}\n")
for model in [MODEL, COMPARE]:
    r, t, tok, tps = ask(model, speed_prompt)
    print(f"{model}: {t:.1f}s | {tok} tokens | {tps:.0f} tok/s")
    print(f"  {r[:200]}...")
    print()