- .claude/scripts/cdp.py: drive Chrome via DevTools Protocol; screenshots to disk (so Gemini/Grok can see the live site). Fixes invisible-window + no-disk-screenshot. - reference_cdp_chrome_driver.md (+ MEMORY index) - gururmm submodule pointer -> dashboard redesign docs (local 3cef6ba) - session log Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
195 lines
6.6 KiB
Python
195 lines
6.6 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
cdp.py - drive Chrome over the DevTools Protocol (CDP), like Antigravity does.
|
|
|
|
Launches (or attaches to) a Chrome started with --remote-debugging-port and drives
|
|
it: navigate, screenshot-to-disk, click, type, key, eval. Screenshots are written
|
|
as real PNG files (so they can be fed to Gemini/Grok image tools).
|
|
|
|
Usage:
|
|
py cdp.py launch [url] # start a visible debug Chrome (dedicated profile)
|
|
py cdp.py status # /json/version + list page targets
|
|
py cdp.py nav <url> [tabid] # navigate (active page if tabid omitted)
|
|
py cdp.py shot <out.png> [tabid] # screenshot the page to a PNG file
|
|
py cdp.py click <x> <y> [tabid] # left-click at viewport coords
|
|
py cdp.py type <text> [tabid] # insert text into the focused element
|
|
py cdp.py key <Key> [tabid] # press a key (Enter/Tab/Escape/...)
|
|
py cdp.py eval <js> [tabid] # Runtime.evaluate, prints JSON result
|
|
|
|
Env: CDP_PORT (default 9222), CDP_PROFILE (default %USERPROFILE%\\.claude\\cdp-chrome-profile)
|
|
"""
|
|
import sys, os, json, time, base64, subprocess, urllib.request
|
|
|
|
PORT = int(os.environ.get("CDP_PORT", "9222"))
|
|
BASE = f"http://localhost:{PORT}"
|
|
PROFILE = os.environ.get("CDP_PROFILE", os.path.join(os.path.expanduser("~"), ".claude", "cdp-chrome-profile"))
|
|
CHROME = next((p for p in [
|
|
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
|
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
|
|
os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome\Application\chrome.exe"),
|
|
] if os.path.isfile(p)), None)
|
|
|
|
import websocket # websocket-client
|
|
|
|
|
|
def http_get(path):
|
|
with urllib.request.urlopen(BASE + path, timeout=5) as r:
|
|
return json.loads(r.read().decode())
|
|
|
|
|
|
def page_targets():
|
|
return [t for t in http_get("/json") if t.get("type") == "page"]
|
|
|
|
|
|
def pick_target(tabid=None):
|
|
targets = page_targets()
|
|
if not targets:
|
|
raise SystemExit("[cdp] no page targets. Run: py cdp.py launch")
|
|
if tabid:
|
|
for t in targets:
|
|
if t["id"] == tabid:
|
|
return t
|
|
raise SystemExit(f"[cdp] tabid {tabid} not found")
|
|
# prefer a non-devtools, non-blank page
|
|
for t in targets:
|
|
if not t["url"].startswith("devtools://"):
|
|
return t
|
|
return targets[0]
|
|
|
|
|
|
def send(ws, _id, method, params=None):
|
|
ws.send(json.dumps({"id": _id, "method": method, "params": params or {}}))
|
|
while True:
|
|
msg = json.loads(ws.recv())
|
|
if msg.get("id") == _id:
|
|
if "error" in msg:
|
|
raise SystemExit(f"[cdp] {method} error: {msg['error']}")
|
|
return msg.get("result", {})
|
|
# ignore events with no matching id
|
|
|
|
|
|
def with_ws(tabid, fn):
|
|
t = pick_target(tabid)
|
|
ws = websocket.create_connection(t["webSocketDebuggerUrl"], max_size=64 * 1024 * 1024)
|
|
try:
|
|
return fn(ws)
|
|
finally:
|
|
ws.close()
|
|
|
|
|
|
def cmd_launch(args):
|
|
if not CHROME:
|
|
raise SystemExit("[cdp] chrome.exe not found")
|
|
os.makedirs(PROFILE, exist_ok=True)
|
|
url = args[0] if args else "about:blank"
|
|
subprocess.Popen([
|
|
CHROME,
|
|
f"--remote-debugging-port={PORT}",
|
|
f"--user-data-dir={PROFILE}",
|
|
"--no-first-run", "--no-default-browser-check",
|
|
"--remote-allow-origins=*",
|
|
url,
|
|
], close_fds=True)
|
|
for _ in range(40):
|
|
try:
|
|
v = http_get("/json/version")
|
|
print(f"[cdp] launched: {v.get('Browser')} ws={v.get('webSocketDebuggerUrl','')[:40]}...")
|
|
print(f"[cdp] profile: {PROFILE}")
|
|
return
|
|
except Exception:
|
|
time.sleep(0.25)
|
|
raise SystemExit("[cdp] chrome started but debug port never opened")
|
|
|
|
|
|
def cmd_status(args):
|
|
v = http_get("/json/version")
|
|
print(f"Browser: {v.get('Browser')}")
|
|
for t in page_targets():
|
|
print(f" [{t['id'][:8]}] {t['title'][:40]!r} {t['url'][:70]}")
|
|
|
|
|
|
def cmd_nav(args):
|
|
url = args[0]
|
|
if "://" not in url:
|
|
url = "https://" + url
|
|
tabid = args[1] if len(args) > 1 else None
|
|
def fn(ws):
|
|
send(ws, 1, "Page.enable")
|
|
send(ws, 2, "Page.navigate", {"url": url})
|
|
# wait for load event (best-effort)
|
|
deadline = time.time() + 20
|
|
ws.settimeout(20)
|
|
while time.time() < deadline:
|
|
try:
|
|
m = json.loads(ws.recv())
|
|
except Exception:
|
|
break
|
|
if m.get("method") == "Page.loadEventFired":
|
|
break
|
|
return "ok"
|
|
with_ws(tabid, fn)
|
|
time.sleep(1.0)
|
|
print(f"[cdp] navigated -> {url}")
|
|
|
|
|
|
def cmd_shot(args):
|
|
out = os.path.abspath(args[0])
|
|
tabid = args[1] if len(args) > 1 else None
|
|
def fn(ws):
|
|
return send(ws, 1, "Page.captureScreenshot", {"format": "png", "captureBeyondViewport": False})
|
|
res = with_ws(tabid, fn)
|
|
with open(out, "wb") as f:
|
|
f.write(base64.b64decode(res["data"]))
|
|
print(f"[cdp] screenshot -> {out} ({os.path.getsize(out)} bytes)")
|
|
|
|
|
|
def cmd_click(args):
|
|
x, y = float(args[0]), float(args[1])
|
|
tabid = args[2] if len(args) > 2 else None
|
|
def fn(ws):
|
|
for typ in ("mousePressed", "mouseReleased"):
|
|
send(ws, 1, "Input.dispatchMouseEvent",
|
|
{"type": typ, "x": x, "y": y, "button": "left", "clickCount": 1})
|
|
return "ok"
|
|
with_ws(tabid, fn)
|
|
print(f"[cdp] click ({x},{y})")
|
|
|
|
|
|
def cmd_type(args):
|
|
text = args[0]
|
|
tabid = args[1] if len(args) > 1 else None
|
|
with_ws(tabid, lambda ws: send(ws, 1, "Input.insertText", {"text": text}))
|
|
print(f"[cdp] typed {len(text)} chars")
|
|
|
|
|
|
KEYMAP = {"Enter": 13, "Return": 13, "Tab": 9, "Escape": 27, "Backspace": 8}
|
|
def cmd_key(args):
|
|
key = args[0]
|
|
tabid = args[1] if len(args) > 1 else None
|
|
code = KEYMAP.get(key)
|
|
def fn(ws):
|
|
base = {"key": key, "windowsVirtualKeyCode": code} if code else {"key": key}
|
|
send(ws, 1, "Input.dispatchKeyEvent", {"type": "keyDown", **base})
|
|
send(ws, 2, "Input.dispatchKeyEvent", {"type": "keyUp", **base})
|
|
return "ok"
|
|
with_ws(tabid, fn)
|
|
print(f"[cdp] key {key}")
|
|
|
|
|
|
def cmd_eval(args):
|
|
js = args[0]
|
|
tabid = args[1] if len(args) > 1 else None
|
|
res = with_ws(tabid, lambda ws: send(ws, 1, "Runtime.evaluate",
|
|
{"expression": js, "returnByValue": True}))
|
|
print(json.dumps(res.get("result", {}).get("value"), indent=2, default=str))
|
|
|
|
|
|
CMDS = {"launch": cmd_launch, "status": cmd_status, "nav": cmd_nav, "shot": cmd_shot,
|
|
"click": cmd_click, "type": cmd_type, "key": cmd_key, "eval": cmd_eval}
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2 or sys.argv[1] not in CMDS:
|
|
print(__doc__)
|
|
raise SystemExit(1)
|
|
CMDS[sys.argv[1]](sys.argv[2:])
|