diff --git a/.claude/skills/agy/scripts/ask-gemini.sh b/.claude/skills/agy/scripts/ask-gemini.sh index f9cf51a3..16951188 100644 --- a/.claude/skills/agy/scripts/ask-gemini.sh +++ b/.claude/skills/agy/scripts/ask-gemini.sh @@ -184,31 +184,51 @@ if os.environ.get('AGY_CLEAN') == '1' and r: r=r.strip() print(r)" < "$OUT"; } -# detect an auth failure in stderr (so we can give a precise remediation hint) -auth_failed() { grep -qiE 'oauth|unauthor|authenticat|login|credential|invalid_grant|401' "$ERR" 2>/dev/null; } +# detect a GENUINE auth failure in stderr (precise remediation hint). Tightened 2026-06-17 - +# the old broad regex (bare login|credential|authenticat|oauth|401) matched benign mid-run +# token-refresh lines and false-flagged working sessions as auth failures. +auth_failed() { grep -qiE 'invalid_grant|unauthorized|not authenticated|authentication failed|re-?authenticat|please (log|sign).?in|login with google|token (has )?expired|no (valid )?credentials' "$ERR" 2>/dev/null; } +# detect a quota / rate-capacity exhaustion (the pinned strong model can be capped mid-session) +quota_exhausted() { grep -qiE 'exhausted your capacity|quota|resource[_ ]?exhausted|rate limit|too many requests|429' "$ERR" 2>/dev/null; } emit_or_fail() { # print .response; gemini intermittently returns an empty turn, so retry a few # times with backoff before giving up (single retry was insufficient - 2 empties # in a row caused spurious failures during live research, 2026-06-17). + # Do ALL retries first; only classify the failure (auth vs generic) AFTER exhausting them. + # (Checking auth_failed INSIDE the loop caused false aborts: a benign mid-run credential-refresh + # line in stderr matched the auth regex and killed the retries even though auth was fine. 2026-06-17.) local txt tries=0 max="${AGY_MAX_TRIES:-3}" txt="$(gresponse)" - while [ -z "$txt" ]; do - # Auth failures won't be fixed by a retry - report immediately. - if auth_failed; then - echo "[$SELF] Gemini auth error - run 'gemini' interactively and choose 'Login with Google', then retry." >&2 - _logerr "gemini auth/login failure" --context "mode=$MODE"; exit 1 - fi + while [ -z "$txt" ] && [ "$tries" -lt $((max-1)) ] && [ ${#LAST_RUN[@]} -gt 0 ]; do tries=$((tries+1)) - { [ "$tries" -ge "$max" ] || [ ${#LAST_RUN[@]} -eq 0 ]; } && break echo "[$SELF] empty response - retry $tries/$((max-1)) (backoff ${tries}x3s)..." >&2 - sleep $((tries*3)) # 3s, 6s, ... backoff (covers transient empties / 429s) + sleep $((tries*3)) # 3s, 6s backoff (covers transient empties / 429s / token refresh) run_gemini "${LAST_RUN[@]}" txt="$(gresponse)" done if [ -n "$txt" ]; then printf '%s\n' "$txt"; return 0; fi - echo "[$SELF] no response from gemini after $max attempts. stderr tail:" >&2 - tail -3 "$ERR" >&2 2>/dev/null || true - _logerr "gemini returned no response (empty after $max attempts)" --context "mode=$MODE err=$(tail -1 "$ERR" 2>/dev/null | tr -d '\n' | cut -c1-80)" + # Quota fallback: if the pinned strong model is capacity/quota-capped, retry ONCE on the default + # (lighter) model by stripping -m from the last invocation - the default model has a separate quota. + if quota_exhausted && [ ${#LAST_RUN[@]} -gt 0 ]; then + echo "[$SELF] '$STRONG_MODEL' quota exhausted - retrying once on the default (lighter) model..." >&2 + local nr=() a skip=0 + for a in "${LAST_RUN[@]}"; do + if [ "$skip" = 1 ]; then skip=0; continue; fi + if [ "$a" = "-m" ]; then skip=1; continue; fi + nr+=("$a") + done + run_gemini "${nr[@]}" + txt="$(gresponse)" + if [ -n "$txt" ]; then printf '%s\n' "$txt"; return 0; fi + fi + if auth_failed; then + echo "[$SELF] Gemini auth error - run 'gemini' interactively and choose 'Login with Google', then retry." >&2 + _logerr "gemini auth/login failure" --context "mode=$MODE" + else + echo "[$SELF] no response from gemini after $max attempts. stderr tail:" >&2 + tail -3 "$ERR" >&2 2>/dev/null || true + _logerr "gemini returned no response (empty after $max attempts)" --context "mode=$MODE err=$(tail -1 "$ERR" 2>/dev/null | tr -d '\n' | cut -c1-80)" + fi exit 1 } diff --git a/docs/CT_THOUGHTS.md b/docs/CT_THOUGHTS.md index 3b6cc202..ef52831a 100644 --- a/docs/CT_THOUGHTS.md +++ b/docs/CT_THOUGHTS.md @@ -266,7 +266,13 @@ the docs before probing" workflow - it has to be dependable. the search phase - 183 thoughts, only progress-noise text), and buffered `json` => total loss. GEMINI search = INTERMITTENT empty turn (a clean re-run succeeded in 122s with a real 2.6KB answer); the wrapper only retried once, so two empties in a row failed spuriously. -- **Gemini fix:** `emit_or_fail` now retries up to 3x with 3s/6s backoff (was 1). +- **Gemini fix:** `emit_or_fail` now retries up to 3x with 3s/6s backoff (was 1). Two follow-on bugs + found+fixed same day while using it: (a) the auth check ran INSIDE the retry loop and a benign mid-run + token-refresh line matched the over-broad auth regex -> false "auth error" abort; moved auth-classify + AFTER the retries and tightened the regex. (b) added a QUOTA FALLBACK: when the pinned strong model + (gemini-3.1-pro-preview) returns "exhausted your capacity on this model", retry once on the default + (lighter) model (separate quota) by stripping -m. Validated: a quota-capped pro run fell back and + returned a 2.9KB answer. - **Grok xsearch fix:** switched to `--output-format streaming-json` (salvage any partial that streamed), moderate budget, and **AUTO-FALLBACK to gemini search** when grok doesn't finish (rc!=0 or empty). Validated e2e: grok timed out (rc=124) -> fell back -> gemini returned a real sourced answer. diff --git a/errorlog.md b/errorlog.md index 39a4dd7b..961d0dab 100644 --- a/errorlog.md +++ b/errorlog.md @@ -17,6 +17,10 @@ Categories (the `[type]` tag): _(none)_ = skill/command execution failure ยท +2026-06-17 | GURU-5070 | agy | gemini returned no response (empty after 3 attempts) [ctx: mode=search err=Attempt 1 failed: You have exhausted your capacity on this model. Your quota wil] + +2026-06-17 | GURU-5070 | agy | gemini auth/login failure [ctx: mode=search] + 2026-06-17 | Howard-Home | wiki-compile | [friction] Phase 6 release cmd documented as 'coord.py lock release claudetools ' but coord.py 'lock release' takes the LOCK ID, not the resource path -> inline release no-ops and strands the lock until TTL. Fix: capture lock id from claim and release by id. [ctx: skill=wiki-compile phase=6] 2026-06-17 | GURU-5070 | grok | grok xsearch incomplete (rc=124); auto-fell back to gemini [ctx: mode=xsearch]