#!/usr/bin/env python """ cdp.py - drive Chrome over the DevTools Protocol (CDP), like Antigravity does. Launches (or attaches to) a Chrome started with --remote-debugging-port and drives it: navigate, screenshot-to-disk, click, type, key, eval. Screenshots are written as real PNG files (so they can be fed to Gemini/Grok image tools). Usage: py cdp.py launch [url] # start a visible debug Chrome (dedicated profile) py cdp.py status # /json/version + list page targets py cdp.py nav [tabid] # navigate (active page if tabid omitted) py cdp.py shot [tabid] # screenshot the page to a PNG file py cdp.py click [tabid] # left-click at viewport coords py cdp.py type [tabid] # insert text into the focused element py cdp.py key [tabid] # press a key (Enter/Tab/Escape/...) py cdp.py eval [tabid] # Runtime.evaluate, prints JSON result Env: CDP_PORT (default 9222), CDP_PROFILE (default %USERPROFILE%\\.claude\\cdp-chrome-profile) """ import sys, os, json, time, base64, subprocess, urllib.request PORT = int(os.environ.get("CDP_PORT", "9222")) BASE = f"http://localhost:{PORT}" PROFILE = os.environ.get("CDP_PROFILE", os.path.join(os.path.expanduser("~"), ".claude", "cdp-chrome-profile")) CHROME = next((p for p in [ r"C:\Program Files\Google\Chrome\Application\chrome.exe", r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe", os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome\Application\chrome.exe"), ] if os.path.isfile(p)), None) import websocket # websocket-client def http_get(path): with urllib.request.urlopen(BASE + path, timeout=5) as r: return json.loads(r.read().decode()) def page_targets(): return [t for t in http_get("/json") if t.get("type") == "page"] def pick_target(tabid=None): targets = page_targets() if not targets: raise SystemExit("[cdp] no page targets. Run: py cdp.py launch") if tabid: for t in targets: if t["id"] == tabid: return t raise SystemExit(f"[cdp] tabid {tabid} not found") # prefer a non-devtools, non-blank page for t in targets: if not t["url"].startswith("devtools://"): return t return targets[0] def send(ws, _id, method, params=None): ws.send(json.dumps({"id": _id, "method": method, "params": params or {}})) while True: msg = json.loads(ws.recv()) if msg.get("id") == _id: if "error" in msg: raise SystemExit(f"[cdp] {method} error: {msg['error']}") return msg.get("result", {}) # ignore events with no matching id def with_ws(tabid, fn): t = pick_target(tabid) ws = websocket.create_connection(t["webSocketDebuggerUrl"], max_size=64 * 1024 * 1024) try: return fn(ws) finally: ws.close() def cmd_launch(args): if not CHROME: raise SystemExit("[cdp] chrome.exe not found") os.makedirs(PROFILE, exist_ok=True) url = args[0] if args else "about:blank" subprocess.Popen([ CHROME, f"--remote-debugging-port={PORT}", f"--user-data-dir={PROFILE}", "--no-first-run", "--no-default-browser-check", "--remote-allow-origins=*", url, ], close_fds=True) for _ in range(40): try: v = http_get("/json/version") print(f"[cdp] launched: {v.get('Browser')} ws={v.get('webSocketDebuggerUrl','')[:40]}...") print(f"[cdp] profile: {PROFILE}") return except Exception: time.sleep(0.25) raise SystemExit("[cdp] chrome started but debug port never opened") def cmd_status(args): v = http_get("/json/version") print(f"Browser: {v.get('Browser')}") for t in page_targets(): print(f" [{t['id'][:8]}] {t['title'][:40]!r} {t['url'][:70]}") def cmd_nav(args): url = args[0] if "://" not in url: url = "https://" + url tabid = args[1] if len(args) > 1 else None def fn(ws): send(ws, 1, "Page.enable") send(ws, 2, "Page.navigate", {"url": url}) # wait for load event (best-effort) deadline = time.time() + 20 ws.settimeout(20) while time.time() < deadline: try: m = json.loads(ws.recv()) except Exception: break if m.get("method") == "Page.loadEventFired": break return "ok" with_ws(tabid, fn) time.sleep(1.0) print(f"[cdp] navigated -> {url}") def cmd_shot(args): out = os.path.abspath(args[0]) tabid = args[1] if len(args) > 1 else None def fn(ws): return send(ws, 1, "Page.captureScreenshot", {"format": "png", "captureBeyondViewport": False}) res = with_ws(tabid, fn) with open(out, "wb") as f: f.write(base64.b64decode(res["data"])) print(f"[cdp] screenshot -> {out} ({os.path.getsize(out)} bytes)") def cmd_click(args): x, y = float(args[0]), float(args[1]) tabid = args[2] if len(args) > 2 else None def fn(ws): for typ in ("mousePressed", "mouseReleased"): send(ws, 1, "Input.dispatchMouseEvent", {"type": typ, "x": x, "y": y, "button": "left", "clickCount": 1}) return "ok" with_ws(tabid, fn) print(f"[cdp] click ({x},{y})") def cmd_type(args): text = args[0] tabid = args[1] if len(args) > 1 else None with_ws(tabid, lambda ws: send(ws, 1, "Input.insertText", {"text": text})) print(f"[cdp] typed {len(text)} chars") KEYMAP = {"Enter": 13, "Return": 13, "Tab": 9, "Escape": 27, "Backspace": 8} def cmd_key(args): key = args[0] tabid = args[1] if len(args) > 1 else None code = KEYMAP.get(key) def fn(ws): base = {"key": key, "windowsVirtualKeyCode": code} if code else {"key": key} send(ws, 1, "Input.dispatchKeyEvent", {"type": "keyDown", **base}) send(ws, 2, "Input.dispatchKeyEvent", {"type": "keyUp", **base}) return "ok" with_ws(tabid, fn) print(f"[cdp] key {key}") def cmd_eval(args): js = args[0] tabid = args[1] if len(args) > 1 else None res = with_ws(tabid, lambda ws: send(ws, 1, "Runtime.evaluate", {"expression": js, "returnByValue": True})) print(json.dumps(res.get("result", {}).get("value"), indent=2, default=str)) CMDS = {"launch": cmd_launch, "status": cmd_status, "nav": cmd_nav, "shot": cmd_shot, "click": cmd_click, "type": cmd_type, "key": cmd_key, "eval": cmd_eval} if __name__ == "__main__": if len(sys.argv) < 2 or sys.argv[1] not in CMDS: print(__doc__) raise SystemExit(1) CMDS[sys.argv[1]](sys.argv[2:])