rmm dashboard redesign (Gemini live review) + CDP Chrome driver

- .claude/scripts/cdp.py: drive Chrome via DevTools Protocol; screenshots to disk
  (so Gemini/Grok can see the live site). Fixes invisible-window + no-disk-screenshot.
- reference_cdp_chrome_driver.md (+ MEMORY index)
- gururmm submodule pointer -> dashboard redesign docs (local 3cef6ba)
- session log

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-05 13:10:19 -07:00
parent c4ec2ed4b0
commit 47b71b7b3a
7 changed files with 408 additions and 103 deletions

194
.claude/scripts/cdp.py Normal file
View File

@@ -0,0 +1,194 @@
#!/usr/bin/env python
"""
cdp.py - drive Chrome over the DevTools Protocol (CDP), like Antigravity does.
Launches (or attaches to) a Chrome started with --remote-debugging-port and drives
it: navigate, screenshot-to-disk, click, type, key, eval. Screenshots are written
as real PNG files (so they can be fed to Gemini/Grok image tools).
Usage:
py cdp.py launch [url] # start a visible debug Chrome (dedicated profile)
py cdp.py status # /json/version + list page targets
py cdp.py nav <url> [tabid] # navigate (active page if tabid omitted)
py cdp.py shot <out.png> [tabid] # screenshot the page to a PNG file
py cdp.py click <x> <y> [tabid] # left-click at viewport coords
py cdp.py type <text> [tabid] # insert text into the focused element
py cdp.py key <Key> [tabid] # press a key (Enter/Tab/Escape/...)
py cdp.py eval <js> [tabid] # Runtime.evaluate, prints JSON result
Env: CDP_PORT (default 9222), CDP_PROFILE (default %USERPROFILE%\\.claude\\cdp-chrome-profile)
"""
import sys, os, json, time, base64, subprocess, urllib.request
PORT = int(os.environ.get("CDP_PORT", "9222"))
BASE = f"http://localhost:{PORT}"
PROFILE = os.environ.get("CDP_PROFILE", os.path.join(os.path.expanduser("~"), ".claude", "cdp-chrome-profile"))
CHROME = next((p for p in [
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome\Application\chrome.exe"),
] if os.path.isfile(p)), None)
import websocket # websocket-client
def http_get(path):
with urllib.request.urlopen(BASE + path, timeout=5) as r:
return json.loads(r.read().decode())
def page_targets():
return [t for t in http_get("/json") if t.get("type") == "page"]
def pick_target(tabid=None):
targets = page_targets()
if not targets:
raise SystemExit("[cdp] no page targets. Run: py cdp.py launch")
if tabid:
for t in targets:
if t["id"] == tabid:
return t
raise SystemExit(f"[cdp] tabid {tabid} not found")
# prefer a non-devtools, non-blank page
for t in targets:
if not t["url"].startswith("devtools://"):
return t
return targets[0]
def send(ws, _id, method, params=None):
ws.send(json.dumps({"id": _id, "method": method, "params": params or {}}))
while True:
msg = json.loads(ws.recv())
if msg.get("id") == _id:
if "error" in msg:
raise SystemExit(f"[cdp] {method} error: {msg['error']}")
return msg.get("result", {})
# ignore events with no matching id
def with_ws(tabid, fn):
t = pick_target(tabid)
ws = websocket.create_connection(t["webSocketDebuggerUrl"], max_size=64 * 1024 * 1024)
try:
return fn(ws)
finally:
ws.close()
def cmd_launch(args):
if not CHROME:
raise SystemExit("[cdp] chrome.exe not found")
os.makedirs(PROFILE, exist_ok=True)
url = args[0] if args else "about:blank"
subprocess.Popen([
CHROME,
f"--remote-debugging-port={PORT}",
f"--user-data-dir={PROFILE}",
"--no-first-run", "--no-default-browser-check",
"--remote-allow-origins=*",
url,
], close_fds=True)
for _ in range(40):
try:
v = http_get("/json/version")
print(f"[cdp] launched: {v.get('Browser')} ws={v.get('webSocketDebuggerUrl','')[:40]}...")
print(f"[cdp] profile: {PROFILE}")
return
except Exception:
time.sleep(0.25)
raise SystemExit("[cdp] chrome started but debug port never opened")
def cmd_status(args):
v = http_get("/json/version")
print(f"Browser: {v.get('Browser')}")
for t in page_targets():
print(f" [{t['id'][:8]}] {t['title'][:40]!r} {t['url'][:70]}")
def cmd_nav(args):
url = args[0]
if "://" not in url:
url = "https://" + url
tabid = args[1] if len(args) > 1 else None
def fn(ws):
send(ws, 1, "Page.enable")
send(ws, 2, "Page.navigate", {"url": url})
# wait for load event (best-effort)
deadline = time.time() + 20
ws.settimeout(20)
while time.time() < deadline:
try:
m = json.loads(ws.recv())
except Exception:
break
if m.get("method") == "Page.loadEventFired":
break
return "ok"
with_ws(tabid, fn)
time.sleep(1.0)
print(f"[cdp] navigated -> {url}")
def cmd_shot(args):
out = os.path.abspath(args[0])
tabid = args[1] if len(args) > 1 else None
def fn(ws):
return send(ws, 1, "Page.captureScreenshot", {"format": "png", "captureBeyondViewport": False})
res = with_ws(tabid, fn)
with open(out, "wb") as f:
f.write(base64.b64decode(res["data"]))
print(f"[cdp] screenshot -> {out} ({os.path.getsize(out)} bytes)")
def cmd_click(args):
x, y = float(args[0]), float(args[1])
tabid = args[2] if len(args) > 2 else None
def fn(ws):
for typ in ("mousePressed", "mouseReleased"):
send(ws, 1, "Input.dispatchMouseEvent",
{"type": typ, "x": x, "y": y, "button": "left", "clickCount": 1})
return "ok"
with_ws(tabid, fn)
print(f"[cdp] click ({x},{y})")
def cmd_type(args):
text = args[0]
tabid = args[1] if len(args) > 1 else None
with_ws(tabid, lambda ws: send(ws, 1, "Input.insertText", {"text": text}))
print(f"[cdp] typed {len(text)} chars")
KEYMAP = {"Enter": 13, "Return": 13, "Tab": 9, "Escape": 27, "Backspace": 8}
def cmd_key(args):
key = args[0]
tabid = args[1] if len(args) > 1 else None
code = KEYMAP.get(key)
def fn(ws):
base = {"key": key, "windowsVirtualKeyCode": code} if code else {"key": key}
send(ws, 1, "Input.dispatchKeyEvent", {"type": "keyDown", **base})
send(ws, 2, "Input.dispatchKeyEvent", {"type": "keyUp", **base})
return "ok"
with_ws(tabid, fn)
print(f"[cdp] key {key}")
def cmd_eval(args):
js = args[0]
tabid = args[1] if len(args) > 1 else None
res = with_ws(tabid, lambda ws: send(ws, 1, "Runtime.evaluate",
{"expression": js, "returnByValue": True}))
print(json.dumps(res.get("result", {}).get("value"), indent=2, default=str))
CMDS = {"launch": cmd_launch, "status": cmd_status, "nav": cmd_nav, "shot": cmd_shot,
"click": cmd_click, "type": cmd_type, "key": cmd_key, "eval": cmd_eval}
if __name__ == "__main__":
if len(sys.argv) < 2 or sys.argv[1] not in CMDS:
print(__doc__)
raise SystemExit(1)
CMDS[sys.argv[1]](sys.argv[2:])