Files
claudetools/.claude/skills/agy/scripts/ask-gemini.sh
Mike Swanson ac0106f254 feat(agy): add keyless image-analyze + search modes
image-analyze: independent second-model vision over OAuth (pins the
gemini-3.1-pro-preview vision model; the default flash-lite router
hallucinates image content) — reads an image via read_file and describes it.
search: Google-grounded live web results with citation URLs (google_web_search).
Both verified working on the keyless Google OAuth. Image GENERATION
(nano-banana) still needs an AI Studio key + extension and stays Grok's lane.
Includes a scoped best-effort output sanitizer for image-analyze (preview
model occasionally leaks reasoning tokens); text/verify/review/search
unchanged. migrate-identity.sh now upgrades the gemini capabilities array.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-05 09:03:21 -07:00

361 lines
18 KiB
Bash

#!/usr/bin/env bash
# ask-gemini.sh — Claude -> Google Gemini CLI router (independent second model).
#
# Sibling of ask-grok.sh. Routes a task to the official Google Gemini CLI
# (`gemini`, npm global) for an independent, different-vendor second opinion,
# verification, or a Gemini code review. Headless, safe-by-default, JSON-parsed.
#
# Auth is Google login (OAuth) — NO API key. Creds: ~/.gemini/oauth_creds.json.
# If a call fails with an auth error, run `gemini` interactively once and pick
# "Login with Google".
#
# Output contract (VERIFIED on GURU-5070, gemini 0.45.1):
# - Prefer JSON: `gemini -p ... -o json` -> {session_id, response, stats}.
# The answer text is `.response`. stdout may carry two cosmetic warning lines
# ("True color..." / "Ripgrep is not available...") before the JSON; we extract
# the object starting at the FIRST '{' to ignore them. stderr (429 backoff,
# warnings) is captured SEPARATELY and never fed to the JSON parser.
# - `--skip-trust` is REQUIRED headless (the CWD isn't a trusted folder).
# - stdin is always closed (</dev/null) so `-p` never hangs waiting on stdin.
#
# File reads (review*): Gemini's read_file honors .gitignore AND a workspace
# sandbox (only files under the workspace/included dirs are readable). To make
# review robust for ANY file (tracked, gitignored, with spaces), we copy each
# target into a temp dir and add it to the workspace via --include-directories.
# review-diff runs with the repo dir included so changed files read in place.
#
# Usage:
# ask-gemini.sh text "<prompt>" # one-shot answer
# ask-gemini.sh text --prompt-file <path> # long content
# ask-gemini.sh verify "<claim or finding to refute>" # adversarial check
# ask-gemini.sh verify --prompt-file <path>
# ask-gemini.sh review <file> [instructions] # gemini reads + reviews one file
# ask-gemini.sh review-files [-i "instr"] <f1> [f2 ...] # review a SET of files together
# ask-gemini.sh review-diff [-C <repo-dir>] [-i "instr"] <gitref> [-- <pathspec>]
# ask-gemini.sh image-analyze <image-path> ["question"] # vision: read_file image + describe (PRO model)
# ask-gemini.sh search "<query>" # Google-grounded live web search + sources
# ask-gemini.sh raw <gemini args...> # escape hatch
#
# Exit: 0 ok, 1 no result, 2 usage, 3 not installed here, 127 gemini/python not found.
set -uo pipefail
SELF="ask-gemini"
PY="$(command -v py 2>/dev/null || command -v python 2>/dev/null || command -v python3 2>/dev/null || true)"
[ -z "$PY" ] && { echo "[$SELF] python (py/python/python3) required for JSON parsing" >&2; exit 127; }
# --- path conversion: native-Windows path for the gemini args (no-op off Windows) ---
# gemini is a native Windows binary (npm shim -> node.exe); Git Bash hands it POSIX
# paths (/tmp, /c/.., /d/..) it cannot resolve. cygpath -w converts to C:\... on
# MSYS/Cygwin; on Linux/macOS it passes through unchanged. Explicit conversion
# removes reliance on MSYS auto-conversion (which breaks on spaces/edge cases).
if command -v cygpath >/dev/null 2>&1; then
winpath() { cygpath -w -- "$1" 2>/dev/null || printf '%s' "$1"; }
else
winpath() { printf '%s' "$1"; }
fi
# --- identity.json (per-machine, gitignored) declares whether gemini is installed here ---
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" 2>/dev/null && pwd)"
IDFILE=""
[ -n "${CLAUDETOOLS_ROOT:-}" ] && [ -f "$CLAUDETOOLS_ROOT/.claude/identity.json" ] && IDFILE="$CLAUDETOOLS_ROOT/.claude/identity.json"
[ -z "$IDFILE" ] && IDFILE="$(cd "$SCRIPT_DIR/../../.." 2>/dev/null && pwd)/identity.json"
idgem() { # read field $1 from identity.json .gemini (empty if absent)
[ -f "$IDFILE" ] || { echo ""; return; }
"$PY" -c "import json,sys
try:
g=(json.load(sys.stdin).get('gemini') or {}); v=g.get('$1','')
print('' if v is None else (str(v).lower() if isinstance(v,bool) else v))
except Exception: print('')" < "$IDFILE"
}
# If identity explicitly says gemini is NOT installed here, fail fast with guidance.
if [ "$(idgem installed)" = "false" ]; then
echo "[$SELF] gemini is not installed on this machine (identity.json gemini.installed=false)." >&2
echo "[$SELF] Gemini runs only on the fleet host. Route this request there, or install the gemini CLI (npm i -g @google/gemini-cli) + set identity.json gemini.installed=true." >&2
exit 3
fi
# --- locate the gemini binary: GEMINI env > identity.json gemini.binary > auto-locate ---
# An explicit GEMINI= override that isn't runnable is a user error -> fail clearly up front
# (covers absolute paths AND a bare name resolvable on PATH, e.g. GEMINI=gemini).
GEMINI="${GEMINI:-}"
if [ -n "$GEMINI" ] && [ ! -x "$GEMINI" ] && ! command -v "$GEMINI" >/dev/null 2>&1; then
echo "[$SELF] GEMINI='$GEMINI' is not an executable gemini binary." >&2; exit 127
fi
cand="$(idgem binary)"
[ -z "$GEMINI" ] && [ -n "$cand" ] && [ -x "$cand" ] && GEMINI="$cand"
if [ -z "$GEMINI" ]; then
if command -v gemini >/dev/null 2>&1; then GEMINI="$(command -v gemini)"; else
for c in "${APPDATA:-}/npm/gemini" "/c/Users/${USERNAME:-${USER:-x}}/AppData/Roaming/npm/gemini" \
"$HOME/AppData/Roaming/npm/gemini" "/usr/local/bin/gemini" "$HOME/.npm-global/bin/gemini"; do
[ -n "$c" ] && [ -x "$c" ] && { GEMINI="$c"; break; }
done
fi
fi
[ -z "$GEMINI" ] && { echo "[$SELF] gemini CLI not found (set identity.json gemini.binary, GEMINI=, or install: npm i -g @google/gemini-cli)" >&2; exit 127; }
# Model: default routing for text; a strong pinned model for verify/review.
# gemini-3.1-pro-preview verified available on this account (2026-06-05); overridable.
STRONG_MODEL="${GEMINI_MODEL:-gemini-3.1-pro-preview}"
MODE="${1:-}"; shift 2>/dev/null || true
[ -z "$MODE" ] && { echo "usage: $SELF {text|verify|review|review-files|review-diff|image-analyze|search|raw} ..." >&2; exit 2; }
TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT
PF="$TMP/prompt.txt"; OUT="$TMP/out.txt"; ERR="$TMP/err.txt"
REPO_ROOT="${CLAUDETOOLS_ROOT:-$(cd "$SCRIPT_DIR/../../../.." 2>/dev/null && pwd)}"
# gtimeout on macOS (brew coreutils), timeout elsewhere.
TIMEOUT_CMD="timeout"
if [[ "${OSTYPE:-}" == "darwin"* ]]; then
TIMEOUT_CMD="$(command -v gtimeout 2>/dev/null || echo timeout)"
fi
# run gemini headless reading the prompt file. $1=timeout secs; rest=extra flags.
# stdout -> $OUT, stderr -> $ERR (kept separate so warning/429 noise never reaches
# the JSON parser). Never fail the script on gemini's exit code; we judge by output.
# Records the invocation so emit_or_fail can replay it once on a transient empty turn.
LAST_RUN=()
run_gemini() {
local to="$1"; shift
LAST_RUN=("$to" "$@")
"$TIMEOUT_CMD" "$to" "$GEMINI" -p "$(cat "$PF")" -o json --skip-trust "$@" \
>"$OUT" 2>"$ERR" </dev/null || true
}
# extract .response from the JSON object starting at the first '{' in $OUT.
# Parsed via stdin so Windows python never resolves a git-bash (/c/...) path.
#
# Some pinned-pro tool-using turns (notably image-analyze) leak the model's
# internal reasoning stream into .response: a stray token + a 'thought' marker
# followed by 'CRITICAL INSTRUCTION N:' lines, then the real answer. We strip
# that preamble ONLY when the signature is clearly present, so clean responses
# (text/verify/review/search) pass through byte-for-byte unchanged.
gresponse() { "$PY" -c "import json,sys,re,os
raw=sys.stdin.read()
i=raw.find('{')
if i < 0:
print(''); sys.exit(0)
try:
r=json.loads(raw[i:]).get('response','') or ''
except Exception:
print(''); sys.exit(0)
head=r[:40].lower()
leak=('thought' in head) or ('critical instruction' in r.lower()[:600])
if leak:
lines=r.split('\n')
keep=[]; dropping=True
for ln in lines:
s=ln.strip()
low=s.lower()
if dropping and (
low.endswith('thought') or low.startswith('critical instruction')
or low.startswith('thought:') or low=='' ):
continue
dropping=False
keep.append(ln)
cleaned='\n'.join(keep).strip()
r=cleaned if cleaned else r.strip()
# AGY_CLEAN: aggressive prefix scrub for tool-using turns (image-analyze), which
# can fuse a stray stream/tool token onto the front of the answer (e.g. '.',
# '.94>', 'uem_image_0_0_png}'). Off by default so text/verify/review/search are
# byte-exact. We only remove a junk run that ends in a stream delimiter (} > :)
# or a lone leading punctuation char, immediately before the first real sentence.
if os.environ.get('AGY_CLEAN') == '1' and r:
# The pro-preview tool loop sometimes prepends a numbered/markdown reasoning
# block before the actual answer. If a clear answer pivot follows such a
# preamble, keep from the pivot onward (the user-facing answer).
if re.search(r'(?im)^\s*\d+[.)]\s', r) or 'thought' in r[:60].lower():
pivs=list(re.finditer(r'(?i)(Based on the image\b|\*\*Answer:?\*\*|The image (?:contains|shows|displays)\b)', r))
if pivs:
r=r[pivs[-1].start():]
m=re.match(r'^[^\n]{0,40}?(?:\.png\)|\.jpe?g\)|[}>:)])\s*([\"A-Z].*)$', r, re.S)
if m and m.group(1):
r=m.group(1)
else:
# a short leading junk run (ASCII punctuation/digits or non-Latin stream
# tokens) before a capitalized/quoted sentence start. Bounded length so we
# never eat a real lowercase sentence or real prose.
m=re.match(r'^(?:[^A-Za-z\"]|[^\x00-\x7f]){1,8}([A-Z\"].*)$', r, re.S)
if m and m.group(1):
r=m.group(1)
r=r.strip()
print(r)" < "$OUT"; }
# detect an auth failure in stderr (so we can give a precise remediation hint)
auth_failed() { grep -qiE 'oauth|unauthor|authenticat|login|credential|invalid_grant|401' "$ERR" 2>/dev/null; }
emit_or_fail() { # print .response, or retry once on a transient empty turn, else fail
local txt; txt="$(gresponse)"
if [ -n "$txt" ]; then printf '%s\n' "$txt"; return 0; fi
# Auth failures won't be fixed by a retry — report immediately.
if auth_failed; then
echo "[$SELF] Gemini auth error — run 'gemini' interactively and choose 'Login with Google', then retry." >&2
exit 1
fi
# Gemini occasionally returns an empty turn (or absorbs a 429 backoff into the
# timeout). Replay the identical call once before giving up.
if [ ${#LAST_RUN[@]} -gt 0 ]; then
echo "[$SELF] empty response — retrying once..." >&2
run_gemini "${LAST_RUN[@]}"
txt="$(gresponse)"
if [ -n "$txt" ]; then printf '%s\n' "$txt"; return 0; fi
if auth_failed; then
echo "[$SELF] Gemini auth error — run 'gemini' interactively and choose 'Login with Google', then retry." >&2
exit 1
fi
fi
echo "[$SELF] no response from gemini. stderr tail:" >&2
tail -3 "$ERR" >&2 2>/dev/null || true
exit 1
}
# Copy target files into an included temp workspace dir so gemini's read_file can
# reach them regardless of .gitignore / workspace sandbox. Echoes the included dir.
INCLUDE_DIR="$TMP/inbox"
prep_includes() { mkdir -p "$INCLUDE_DIR"; }
case "$MODE" in
text|verify)
SRC=""
if [ "${1:-}" = "--prompt-file" ]; then
[ -f "${2:-}" ] || { echo "[$SELF] prompt file not found: ${2:-}" >&2; exit 2; }
SRC="$(cat "$2")"
else
SRC="${1:-}"
fi
[ -z "$SRC" ] && { echo "usage: $SELF $MODE \"<prompt>\" | $SELF $MODE --prompt-file <path>" >&2; exit 2; }
if [ "$MODE" = "verify" ]; then
printf 'You are an adversarial reviewer giving an independent second opinion. Evaluate the following claim/finding/document: try hard to find any way it is WRONG, incomplete, unsupported, or overstated. Then give a clear VERDICT (e.g. correct / partly correct / incorrect) plus specific justification. Answer in text only; do not use any tools.\n\nContent:\n%s' "$SRC" > "$PF"
run_gemini 180 -m "$STRONG_MODEL"
else
printf 'Answer the following directly in text. Do not use any tools.\n\n%s' "$SRC" > "$PF"
run_gemini 180
fi
emit_or_fail
;;
review|file)
[ -z "${1:-}" ] && { echo "usage: $SELF review <file-path> [instructions]" >&2; exit 2; }
target="$1"
instr="${2:-Give an independent, critical review of this file: accuracy, gaps/omissions, bugs, and concrete improvements. Be specific.}"
if [ -f "$target" ]; then resolved="$target"
elif [ -f "$REPO_ROOT/$target" ]; then resolved="$REPO_ROOT/$target"
else echo "[$SELF] file not found: $target" >&2; exit 2; fi
prep_includes
base="$(basename "$resolved")"
cp -f "$resolved" "$INCLUDE_DIR/$base"
tgt_win="$(winpath "$INCLUDE_DIR/$base")"
inc_win="$(winpath "$INCLUDE_DIR")"
printf 'Use your read_file tool to read the file at this absolute path, then perform the task and stop. Do not modify anything.\nPath: %s\n\nTask: %s' "$tgt_win" "$instr" > "$PF"
run_gemini 240 -m "$STRONG_MODEL" --approval-mode plan --include-directories "$inc_win"
emit_or_fail
;;
review-files)
instr='Independently review these files together as a unit: correctness/bugs, gaps, cross-file consistency, and concrete improvements. Be specific and cite file:line.'
files=()
while [ $# -gt 0 ]; do
case "$1" in
-i|--instr) instr="${2:-}"; shift 2 2>/dev/null || shift ;;
*) files+=("$1"); shift ;;
esac
done
[ ${#files[@]} -eq 0 ] && { echo "usage: $SELF review-files [-i \"instructions\"] <file> [file ...]" >&2; exit 2; }
prep_includes
list=""
declare -A seen=()
for f in "${files[@]}"; do
if [ -f "$f" ]; then r="$f"
elif [ -f "$REPO_ROOT/$f" ]; then r="$REPO_ROOT/$f"
else echo "[$SELF] file not found: $f" >&2; exit 2; fi
base="$(basename "$r")"
# de-collide identical basenames from different dirs
if [ -n "${seen[$base]:-}" ]; then
n=1; while [ -e "$INCLUDE_DIR/${n}_${base}" ]; do n=$((n+1)); done; base="${n}_${base}"
fi
seen[$base]=1
cp -f "$r" "$INCLUDE_DIR/$base"
list+="- $(winpath "$INCLUDE_DIR/$base")
"
done
inc_win="$(winpath "$INCLUDE_DIR")"
printf 'Use your read_file tool to read EACH of these files (absolute paths), then perform the task across ALL of them and stop. Do not modify anything.\n\nFiles:\n%s\nTask: %s' "$list" "$instr" > "$PF"
run_gemini 300 -m "$STRONG_MODEL" --approval-mode plan --include-directories "$inc_win"
emit_or_fail
;;
review-diff)
gdir="$REPO_ROOT"
instr='Review this git diff: correctness/bugs introduced, regressions, missing edge cases, and concrete fixes. Focus on the CHANGES. Be specific and cite file:line.'
ref=""; pathspec=()
while [ $# -gt 0 ]; do
case "$1" in
-C|--dir) gdir="${2:-}"; shift 2 2>/dev/null || shift ;;
-i|--instr) instr="${2:-}"; shift 2 2>/dev/null || shift ;;
--) shift; while [ $# -gt 0 ]; do pathspec+=("$1"); shift; done ;;
*) if [ -z "$ref" ]; then ref="$1"; else pathspec+=("$1"); fi; shift ;;
esac
done
[ -z "$ref" ] && { echo "usage: $SELF review-diff [-C <repo-dir>] [-i \"instr\"] <gitref> [-- <pathspec>]" >&2; exit 2; }
[ -d "$gdir" ] || { [ -d "$REPO_ROOT/$gdir" ] && gdir="$REPO_ROOT/$gdir"; }
git -C "$gdir" rev-parse --git-dir >/dev/null 2>&1 || { echo "[$SELF] not a git repo: $gdir" >&2; exit 2; }
if [ ${#pathspec[@]} -gt 0 ]; then
git -C "$gdir" diff "$ref" -- "${pathspec[@]}" > "$TMP/diff.txt" 2>"$TMP/differr.txt"
else
git -C "$gdir" diff "$ref" > "$TMP/diff.txt" 2>"$TMP/differr.txt"
fi
[ -s "$TMP/diff.txt" ] || { echo "[$SELF] empty/failed diff for '$ref' in $gdir: $(head -1 "$TMP/differr.txt" 2>/dev/null)" >&2; exit 1; }
gdir_win="$(winpath "$gdir")"
{ printf 'Review the following unified git diff. %s\nYou may use your read_file tool on any changed file for full context (paths in the diff are relative to %s; strip the a/ b/ prefixes). Do not modify anything.\n\n=== BEGIN DIFF ===\n' "$instr" "$gdir_win"; cat "$TMP/diff.txt"; printf '\n=== END DIFF ===\n'; } > "$PF"
run_gemini 300 -m "$STRONG_MODEL" --approval-mode plan --include-directories "$gdir_win"
emit_or_fail
;;
image-analyze|image|vision)
# Independent second-model VISION. The default flash-lite router hallucinates
# image content, so we PIN the pro vision model (STRONG_MODEL) and run with
# yolo approval so read_file can execute. The image is copied into an included
# temp dir (like the review modes) and handed to Gemini by absolute winpath.
[ -z "${1:-}" ] && { echo "usage: $SELF image-analyze <image-path> [\"question\"]" >&2; exit 2; }
target="$1"
question="${2:-Describe exactly what is in this image.}"
if [ -f "$target" ]; then resolved="$target"
elif [ -f "$REPO_ROOT/$target" ]; then resolved="$REPO_ROOT/$target"
else echo "[$SELF] image not found: $target" >&2; exit 2; fi
prep_includes
base="$(basename "$resolved")"
cp -f "$resolved" "$INCLUDE_DIR/$base"
img_win="$(winpath "$INCLUDE_DIR/$base")"
inc_win="$(winpath "$INCLUDE_DIR")"
# Image path goes in via %s (never as a printf format string).
printf 'Use your read_file tool to read the image at this absolute path, then describe exactly what you see. Report only what is actually present in the image; do not guess or invent content. Then stop. Do not modify anything.\nImage path: %s\n\nQuestion: %s' "$img_win" "$question" > "$PF"
run_gemini 240 -m "$STRONG_MODEL" --approval-mode yolo --include-directories "$inc_win"
AGY_CLEAN=1 emit_or_fail
;;
search|websearch)
# Google-grounded LIVE web search (mirrors grok xsearch). Gemini's
# google_web_search tool works on OAuth; run with yolo so the tool can fire.
# Query goes via the prompt file so long queries don't hit shell-quote limits.
SRC=""
if [ "${1:-}" = "--prompt-file" ]; then
[ -f "${2:-}" ] || { echo "[$SELF] prompt file not found: ${2:-}" >&2; exit 2; }
SRC="$(cat "$2")"
else
SRC="${1:-}"
fi
[ -z "$SRC" ] && { echo "usage: $SELF search \"<query>\" | $SELF search --prompt-file <path>" >&2; exit 2; }
printf 'Use your google_web_search tool to find current, live information answering the following, then stop. Answer concisely and ALWAYS include the source URLs you used (a Sources list of full URLs). Do not fabricate URLs.\n\nQuery: %s' "$SRC" > "$PF"
run_gemini 180 -m "$STRONG_MODEL" --approval-mode yolo
emit_or_fail
;;
raw)
"$GEMINI" "$@"
;;
*)
echo "[$SELF] unknown mode '$MODE' (use text|verify|review|review-files|review-diff|image-analyze|search|raw)" >&2; exit 2 ;;
esac