diff --git a/.claude/skills/agy/scripts/ask-gemini.sh b/.claude/skills/agy/scripts/ask-gemini.sh
index f9cf51a3..16951188 100644
--- a/.claude/skills/agy/scripts/ask-gemini.sh
+++ b/.claude/skills/agy/scripts/ask-gemini.sh
@@ -184,31 +184,51 @@ if os.environ.get('AGY_CLEAN') == '1' and r:
     r=r.strip()
 print(r)" < "$OUT"; }
 
-# detect an auth failure in stderr (so we can give a precise remediation hint)
-auth_failed() { grep -qiE 'oauth|unauthor|authenticat|login|credential|invalid_grant|401' "$ERR" 2>/dev/null; }
+# detect a GENUINE auth failure in stderr (precise remediation hint). Tightened 2026-06-17 -
+# the old broad regex (bare login|credential|authenticat|oauth|401) matched benign mid-run
+# token-refresh lines and false-flagged working sessions as auth failures.
+auth_failed() { grep -qiE 'invalid_grant|unauthorized|not authenticated|authentication failed|re-?authenticat|please (log|sign).?in|login with google|token (has )?expired|no (valid )?credentials' "$ERR" 2>/dev/null; }
+# detect a quota / rate-capacity exhaustion (the pinned strong model can be capped mid-session)
+quota_exhausted() { grep -qiE 'exhausted your capacity|quota|resource[_ ]?exhausted|rate limit|too many requests|429' "$ERR" 2>/dev/null; }
 
 emit_or_fail() {  # print .response; gemini intermittently returns an empty turn, so retry a few
                   # times with backoff before giving up (single retry was insufficient - 2 empties
                   # in a row caused spurious failures during live research, 2026-06-17).
+  # Do ALL retries first; only classify the failure (auth vs generic) AFTER exhausting them.
+  # (Checking auth_failed INSIDE the loop caused false aborts: a benign mid-run credential-refresh
+  # line in stderr matched the auth regex and killed the retries even though auth was fine. 2026-06-17.)
   local txt tries=0 max="${AGY_MAX_TRIES:-3}"
   txt="$(gresponse)"
-  while [ -z "$txt" ]; do
-    # Auth failures won't be fixed by a retry - report immediately.
-    if auth_failed; then
-      echo "[$SELF] Gemini auth error - run 'gemini' interactively and choose 'Login with Google', then retry." >&2
-      _logerr "gemini auth/login failure" --context "mode=$MODE"; exit 1
-    fi
+  while [ -z "$txt" ] && [ "$tries" -lt $((max-1)) ] && [ ${#LAST_RUN[@]} -gt 0 ]; do
     tries=$((tries+1))
-    { [ "$tries" -ge "$max" ] || [ ${#LAST_RUN[@]} -eq 0 ]; } && break
     echo "[$SELF] empty response - retry $tries/$((max-1)) (backoff ${tries}x3s)..." >&2
-    sleep $((tries*3))                 # 3s, 6s, ... backoff (covers transient empties / 429s)
+    sleep $((tries*3))                 # 3s, 6s backoff (covers transient empties / 429s / token refresh)
     run_gemini "${LAST_RUN[@]}"
     txt="$(gresponse)"
   done
   if [ -n "$txt" ]; then printf '%s\n' "$txt"; return 0; fi
-  echo "[$SELF] no response from gemini after $max attempts. stderr tail:" >&2
-  tail -3 "$ERR" >&2 2>/dev/null || true
-  _logerr "gemini returned no response (empty after $max attempts)" --context "mode=$MODE err=$(tail -1 "$ERR" 2>/dev/null | tr -d '\n' | cut -c1-80)"
+  # Quota fallback: if the pinned strong model is capacity/quota-capped, retry ONCE on the default
+  # (lighter) model by stripping -m from the last invocation - the default model has a separate quota.
+  if quota_exhausted && [ ${#LAST_RUN[@]} -gt 0 ]; then
+    echo "[$SELF] '$STRONG_MODEL' quota exhausted - retrying once on the default (lighter) model..." >&2
+    local nr=() a skip=0
+    for a in "${LAST_RUN[@]}"; do
+      if [ "$skip" = 1 ]; then skip=0; continue; fi
+      if [ "$a" = "-m" ]; then skip=1; continue; fi
+      nr+=("$a")
+    done
+    run_gemini "${nr[@]}"
+    txt="$(gresponse)"
+    if [ -n "$txt" ]; then printf '%s\n' "$txt"; return 0; fi
+  fi
+  if auth_failed; then
+    echo "[$SELF] Gemini auth error - run 'gemini' interactively and choose 'Login with Google', then retry." >&2
+    _logerr "gemini auth/login failure" --context "mode=$MODE"
+  else
+    echo "[$SELF] no response from gemini after $max attempts. stderr tail:" >&2
+    tail -3 "$ERR" >&2 2>/dev/null || true
+    _logerr "gemini returned no response (empty after $max attempts)" --context "mode=$MODE err=$(tail -1 "$ERR" 2>/dev/null | tr -d '\n' | cut -c1-80)"
+  fi
   exit 1
 }
 
diff --git a/docs/CT_THOUGHTS.md b/docs/CT_THOUGHTS.md
index 3b6cc202..ef52831a 100644
--- a/docs/CT_THOUGHTS.md
+++ b/docs/CT_THOUGHTS.md
@@ -266,7 +266,13 @@ the docs before probing" workflow - it has to be dependable.
   the search phase - 183 thoughts, only progress-noise text), and buffered `json` => total loss. GEMINI
   search = INTERMITTENT empty turn (a clean re-run succeeded in 122s with a real 2.6KB answer); the
   wrapper only retried once, so two empties in a row failed spuriously.
-- **Gemini fix:** `emit_or_fail` now retries up to 3x with 3s/6s backoff (was 1).
+- **Gemini fix:** `emit_or_fail` now retries up to 3x with 3s/6s backoff (was 1). Two follow-on bugs
+  found+fixed same day while using it: (a) the auth check ran INSIDE the retry loop and a benign mid-run
+  token-refresh line matched the over-broad auth regex -> false "auth error" abort; moved auth-classify
+  AFTER the retries and tightened the regex. (b) added a QUOTA FALLBACK: when the pinned strong model
+  (gemini-3.1-pro-preview) returns "exhausted your capacity on this model", retry once on the default
+  (lighter) model (separate quota) by stripping -m. Validated: a quota-capped pro run fell back and
+  returned a 2.9KB answer.
 - **Grok xsearch fix:** switched to `--output-format streaming-json` (salvage any partial that streamed),
   moderate budget, and **AUTO-FALLBACK to gemini search** when grok doesn't finish (rc!=0 or empty).
   Validated e2e: grok timed out (rc=124) -> fell back -> gemini returned a real sourced answer.
diff --git a/errorlog.md b/errorlog.md
index 39a4dd7b..961d0dab 100644
--- a/errorlog.md
+++ b/errorlog.md
@@ -17,6 +17,10 @@ Categories (the `[type]` tag): _(none)_ = skill/command execution failure ·
 
 <!-- Append entries below this line -->
 
+2026-06-17 | GURU-5070 | agy | gemini returned no response (empty after 3 attempts) [ctx: mode=search err=Attempt 1 failed: You have exhausted your capacity on this model. Your quota wil]
+
+2026-06-17 | GURU-5070 | agy | gemini auth/login failure [ctx: mode=search]
+
 2026-06-17 | Howard-Home | wiki-compile | [friction] Phase 6 release cmd documented as 'coord.py lock release claudetools <resource-path>' but coord.py 'lock release' takes the LOCK ID, not the resource path -> inline release no-ops and strands the lock until TTL. Fix: capture lock id from claim and release by id. [ctx: skill=wiki-compile phase=6]
 
 2026-06-17 | GURU-5070 | grok | grok xsearch incomplete (rc=124); auto-fell back to gemini [ctx: mode=xsearch]