harness: fleet-wide functional-error + correction + friction logging

Add .claude/scripts/log-skill-error.sh — the canonical agent error log helper (writes errorlog.md in DATE | MACHINE | skill | [type] error format, soft-fails). Three categories: execution failures (default), user corrections (--correction), and preventable self-inflicted friction (--friction; cite ref= when it repeats a documented gotcha). Goal: stop paying tokens twice for the same avoidable mistake. - CLAUDE.md: make logging mandatory for all skills + corrections + friction. - skill-creator: new skills must wire in the helper (guidance + checklist). - Retrofit every skill script's genuine failure branches to call the helper (b2/bitdefender/mailprotector/packetdial/coord python CLIs; remediation-tool + onboard365 bash; vault, rmm-auth, post-bot-alert, agy, grok, 1password, run-onboarding-diagnostic). Handled conditions + self-tests left alone. - errorlog.md: broaden header to cover skills + harness + corrections; seed this session's corrections (INKY, Mail.Send token-audience, omnibox-strictness) and friction (git-bash /tmp, env-persistence, argv-limit, PowerShell var-case). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-15 11:39:43 -07:00
parent 927a06a0cf
commit 9960da5f9a
29 changed files with 388 additions and 36 deletions
--- a/.claude/skills/1password/scripts/env_from_op.sh
+++ b/.claude/skills/1password/scripts/env_from_op.sh
@@ -14,6 +14,10 @@

 set -euo pipefail

+# Functional-error logger (skill name "1password"); 4 levels up to the ClaudeTools repo.
+CLAUDETOOLS_ROOT="${CLAUDETOOLS_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)}"
+_logerr() { bash "$CLAUDETOOLS_ROOT/.claude/scripts/log-skill-error.sh" "1password" "$@" >/dev/null 2>&1 || true; }
+
 VAULT=""
 ITEM=""
 OUTPUT=".env"
@@ -35,6 +39,7 @@ done
 # Check op is available
 if ! command -v op &>/dev/null; then
  echo "❌ 1Password CLI (op) not found. Install: https://developer.1password.com/docs/cli/get-started/"
+  _logerr "op CLI not found on PATH"
  exit 1
 fi

--- a/.claude/skills/1password/scripts/store-mcp-credentials.sh
+++ b/.claude/skills/1password/scripts/store-mcp-credentials.sh
@@ -23,6 +23,10 @@

 set -euo pipefail

+# Functional-error logger (skill name "1password"); 4 levels up to the ClaudeTools repo.
+CLAUDETOOLS_ROOT="${CLAUDETOOLS_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)}"
+_logerr() { bash "$CLAUDETOOLS_ROOT/.claude/scripts/log-skill-error.sh" "1password" "$@" >/dev/null 2>&1 || true; }
+
 VAULT="Dev"
 ITEM=""
 UPDATE=false
@@ -88,14 +92,16 @@ ALL_FIELDS=("${OP_FIELDS[@]+"${OP_FIELDS[@]}"}" "${SECRET_VALUES[@]+"${SECRET_VA
 echo "Saving to 1Password..."

 if $UPDATE; then
-  op item edit "$ITEM" --vault "$VAULT" "${ALL_FIELDS[@]}"
+  op item edit "$ITEM" --vault "$VAULT" "${ALL_FIELDS[@]}" \
+    || { rc=$?; _logerr "op item edit failed (update MCP creds)" --context "item=$ITEM vault=$VAULT rc=$rc"; exit $rc; }
  echo ""
  echo "✅ Updated '$ITEM' in vault '$VAULT'"
 else
  # Try create, fall back to update if already exists
  if op item get "$ITEM" --vault "$VAULT" &>/dev/null 2>&1; then
    echo "  Item already exists — updating instead..."
-    op item edit "$ITEM" --vault "$VAULT" "${ALL_FIELDS[@]}"
+    op item edit "$ITEM" --vault "$VAULT" "${ALL_FIELDS[@]}" \
+      || { rc=$?; _logerr "op item edit failed (update MCP creds)" --context "item=$ITEM vault=$VAULT rc=$rc"; exit $rc; }
    echo ""
    echo "✅ Updated '$ITEM' in vault '$VAULT'"
  else
@@ -103,7 +109,8 @@ else
      --category API_CREDENTIAL \
      --title "$ITEM" \
      --vault "$VAULT" \
-      "${ALL_FIELDS[@]}"
+      "${ALL_FIELDS[@]}" \
+      || { rc=$?; _logerr "op item create failed (MCP creds)" --context "item=$ITEM vault=$VAULT rc=$rc"; exit $rc; }
    echo ""
    echo "✅ Created '$ITEM' in vault '$VAULT'"
  fi
--- a/.claude/skills/1password/scripts/store_secret.sh
+++ b/.claude/skills/1password/scripts/store_secret.sh
@@ -9,6 +9,10 @@

 set -euo pipefail

+# Functional-error logger (skill name "1password"); 4 levels up to the ClaudeTools repo.
+CLAUDETOOLS_ROOT="${CLAUDETOOLS_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)}"
+_logerr() { bash "$CLAUDETOOLS_ROOT/.claude/scripts/log-skill-error.sh" "1password" "$@" >/dev/null 2>&1 || true; }
+
 TITLE=""
 FIELD="credential"
 VALUE=""
@@ -67,7 +71,8 @@ VAULT_FLAG=""

 if $UPDATE; then
  echo "Updating '${FIELD}' in '${TITLE}'..."
-  op item edit "$TITLE" $VAULT_FLAG "${FIELD}[password]=${VALUE}"
+  op item edit "$TITLE" $VAULT_FLAG "${FIELD}[password]=${VALUE}" \
+    || { rc=$?; _logerr "op item edit failed (update secret)" --context "item=$TITLE field=$FIELD vault=${VAULT:-default} rc=$rc"; exit $rc; }
  echo "✅ Updated '${FIELD}' in '${TITLE}'"
 else
  echo "Creating '${TITLE}' in 1Password..."
@@ -76,7 +81,8 @@ else
    --title "$TITLE" \
    $VAULT_FLAG \
    "${FIELD}[password]=${VALUE}" \
-    --format=json)
+    --format=json) \
+    || { rc=$?; _logerr "op item create failed" --context "item=$TITLE category=$CATEGORY vault=${VAULT:-default} rc=$rc"; exit $rc; }

  ITEM_ID=$(echo "$RESULT" | jq -r '.id')
  VAULT_NAME=$(echo "$RESULT" | jq -r '.vault.name')
--- a/.claude/skills/agy/scripts/ask-gemini.sh
+++ b/.claude/skills/agy/scripts/ask-gemini.sh
@@ -104,6 +104,8 @@ MODE="${1:-}"; shift 2>/dev/null || true
 TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT
 PF="$TMP/prompt.txt"; OUT="$TMP/out.txt"; ERR="$TMP/err.txt"
 REPO_ROOT="${CLAUDETOOLS_ROOT:-$(cd "$SCRIPT_DIR/../../../.." 2>/dev/null && pwd)}"
+# Functional-error logger (skill name "agy"); soft-fails, never breaks the caller.
+_logerr() { bash "$REPO_ROOT/.claude/scripts/log-skill-error.sh" "agy" "$@" >/dev/null 2>&1 || true; }

 # gtimeout on macOS (brew coreutils), timeout elsewhere.
 TIMEOUT_CMD="timeout"
@@ -191,6 +193,7 @@ emit_or_fail() {  # print .response, or retry once on a transient empty turn, el
  # Auth failures won't be fixed by a retry — report immediately.
  if auth_failed; then
    echo "[$SELF] Gemini auth error — run 'gemini' interactively and choose 'Login with Google', then retry." >&2
+    _logerr "gemini auth/login failure" --context "mode=$MODE"
    exit 1
  fi
  # Gemini occasionally returns an empty turn (or absorbs a 429 backoff into the
@@ -202,11 +205,13 @@ emit_or_fail() {  # print .response, or retry once on a transient empty turn, el
    if [ -n "$txt" ]; then printf '%s\n' "$txt"; return 0; fi
    if auth_failed; then
      echo "[$SELF] Gemini auth error — run 'gemini' interactively and choose 'Login with Google', then retry." >&2
+      _logerr "gemini auth/login failure (after retry)" --context "mode=$MODE"
      exit 1
    fi
  fi
  echo "[$SELF] no response from gemini. stderr tail:" >&2
  tail -3 "$ERR" >&2 2>/dev/null || true
+  _logerr "gemini returned no response (empty after retry)" --context "mode=$MODE err=$(tail -1 "$ERR" 2>/dev/null | tr -d '\n' | cut -c1-80)"
  exit 1
 }

--- a/.claude/skills/b2/scripts/b2.py
+++ b/.claude/skills/b2/scripts/b2.py
@@ -32,12 +32,32 @@ from __future__ import annotations

 import argparse
 import json
+import os
+import subprocess
 import sys
 from typing import Optional

 from b2_client import B2Client, B2Error, RATE_PER_GB_USD, BYTES_PER_GB, BYTES_PER_GIB


+def _log_skill_error(skill, msg, context=""):
+    """Soft-fail: append a functional-error entry to errorlog.md (never throws)."""
+    try:
+        root = os.environ.get("CLAUDETOOLS_ROOT") or os.path.abspath(
+            os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")
+        )
+        h = os.path.join(root, ".claude", "scripts", "log-skill-error.sh")
+        if not os.path.exists(h):
+            return
+        a = ["bash", h, skill, msg]
+        if context:
+            a += ["--context", context]
+        subprocess.run(a, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
+                       timeout=10)
+    except Exception:
+        pass
+
+
 def _emit(obj, as_json: bool, table_fn=None) -> None:
    if as_json or table_fn is None:
        print(json.dumps(obj, indent=2, default=str))
@@ -757,6 +777,10 @@ def main(argv=None) -> int:
        return rc if isinstance(rc, int) else 0
    except B2Error as exc:
        print(f"[ERROR] {exc}", file=sys.stderr)
+        _log_skill_error("b2", f"{exc}",
+                         context=f"cmd={getattr(args, 'command', '?')}"
+                                 + (f" http={exc.status}" if exc.status else "")
+                                 + (f" code={exc.code}" if exc.code else ""))
        return 1
    except KeyboardInterrupt:
        return 130
--- a/.claude/skills/bitdefender/scripts/gz.py
+++ b/.claude/skills/bitdefender/scripts/gz.py
@@ -37,11 +37,31 @@ from __future__ import annotations
 import argparse
 import dataclasses
 import json
+import os
+import subprocess
 import sys

 from gz_client import GravityZoneClient, GravityZoneError, GZEndpointSummary


+def _log_skill_error(skill, msg, context=""):
+    """Soft-fail: append a functional-error entry to errorlog.md (never throws)."""
+    try:
+        root = os.environ.get("CLAUDETOOLS_ROOT") or os.path.abspath(
+            os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")
+        )
+        h = os.path.join(root, ".claude", "scripts", "log-skill-error.sh")
+        if not os.path.exists(h):
+            return
+        a = ["bash", h, skill, msg]
+        if context:
+            a += ["--context", context]
+        subprocess.run(a, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
+                       timeout=10)
+    except Exception:
+        pass
+
+
 def _emit(obj, as_json: bool, table_fn=None) -> None:
    if as_json or table_fn is None:
        print(json.dumps(obj, indent=2, default=_json_default))
@@ -554,6 +574,8 @@ def main(argv=None) -> int:
        return rc if isinstance(rc, int) else 0
    except GravityZoneError as exc:
        print(f"[ERROR] {exc}", file=sys.stderr)
+        _log_skill_error("bitdefender", f"{exc}",
+                         context=f"cmd={getattr(args, 'command', '?')}")
        return 1
    except KeyboardInterrupt:
        return 130
--- a/.claude/skills/coord/scripts/coord.py
+++ b/.claude/skills/coord/scripts/coord.py
@@ -21,7 +21,25 @@ Usage:
  coord.py lock release <id>
  coord.py lock list [--project KEY]
 """
-import sys, os, json, argparse, urllib.request, urllib.error, urllib.parse
+import sys, os, json, argparse, subprocess, urllib.request, urllib.error, urllib.parse
+
+
+def _log_skill_error(skill, msg, context=""):
+    """Soft-fail: append a functional-error entry to errorlog.md (never throws)."""
+    try:
+        root = os.environ.get("CLAUDETOOLS_ROOT") or os.path.abspath(
+            os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")
+        )
+        h = os.path.join(root, ".claude", "scripts", "log-skill-error.sh")
+        if not os.path.exists(h):
+            return
+        a = ["bash", h, skill, msg]
+        if context:
+            a += ["--context", context]
+        subprocess.run(a, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
+                       timeout=10)
+    except Exception:
+        pass


 def find_identity():
@@ -73,6 +91,10 @@ def call(method, path, body=None, query=None):
 def die(st, resp, ok=(200, 201)):
    if st not in ok:
        print(f"[coord] ERROR HTTP {st}: {json.dumps(resp)[:500]}", file=sys.stderr)
+        _log_skill_error(
+            "coord", f"coord API call failed (HTTP {st})",
+            context=f"http={st} cmd={' '.join(sys.argv[1:3])} resp={json.dumps(resp)[:80]}",
+        )
        sys.exit(1)


--- a/.claude/skills/grok/scripts/ask-grok.sh
+++ b/.claude/skills/grok/scripts/ask-grok.sh
@@ -83,6 +83,8 @@ WORK="$TMP/work"; mkdir -p "$WORK"
 PF="$TMP/prompt.txt"; OUT="$TMP/out.json"
 RUN_CWD="$WORK"   # grok's working dir; the 'review' mode overrides to the repo so read_file can reach repo files
 REPO_ROOT="${CLAUDETOOLS_ROOT:-$(cd "$SCRIPT_DIR/../../../.." 2>/dev/null && pwd)}"
+# Functional-error logger (skill name "grok"); soft-fails, never breaks the caller.
+_logerr() { bash "$REPO_ROOT/.claude/scripts/log-skill-error.sh" "grok" "$@" >/dev/null 2>&1 || true; }

 # run grok headless. $1=timeout secs; rest=extra flags. Reads $PF -> $OUT.
 # Never fails the script on grok's exit code (Cancelled is expected; we read artifacts).
@@ -170,7 +172,7 @@ case "$MODE" in
    run_grok 180 --disable-web-search --max-turns 3
    txt="$(jfield text)"
    if [ -n "$txt" ]; then printf '%s\n' "$txt"; else
-      echo "[$SELF] no text (stopReason=$(jfield stopReason)); raw: $OUT" >&2; exit 1; fi
+      echo "[$SELF] no text (stopReason=$(jfield stopReason)); raw: $OUT" >&2; _logerr "grok returned no text" --context "mode=$MODE stopReason=$(jfield stopReason)"; exit 1; fi
    ;;
  image)
    [ -z "${1:-}" ] && { echo "usage: $SELF image \"<prompt>\" [out.png]" >&2; exit 2; }
@@ -180,7 +182,7 @@ case "$MODE" in
    sid="$(jfield sessionId)"; art="$(find_artifact "$sid" images)"
    if [ -n "$art" ] && [ -f "$art" ]; then cp -f "$art" "$out"
      echo "[$SELF] image OK -> $out  (session $sid)"
-    else echo "[$SELF] no image artifact (session=$sid, stopReason=$(jfield stopReason))" >&2; exit 1; fi
+    else echo "[$SELF] no image artifact (session=$sid, stopReason=$(jfield stopReason))" >&2; _logerr "grok produced no image artifact" --context "session=$sid stopReason=$(jfield stopReason)"; exit 1; fi
    ;;
  video)
    [ -z "${1:-}" ] || [ -z "${2:-}" ] && { echo "usage: $SELF video \"<prompt>\" <input-image> [out.mp4]" >&2; exit 2; }
@@ -192,7 +194,7 @@ case "$MODE" in
    sid="$(jfield sessionId)"; art="$(find_artifact "$sid" videos)"
    if [ -n "$art" ] && [ -f "$art" ]; then cp -f "$art" "$out"
      echo "[$SELF] video OK -> $out  (session $sid)"
-    else echo "[$SELF] no video artifact (session=$sid, stopReason=$(jfield stopReason))" >&2; exit 1; fi
+    else echo "[$SELF] no video artifact (session=$sid, stopReason=$(jfield stopReason))" >&2; _logerr "grok produced no video artifact" --context "session=$sid stopReason=$(jfield stopReason)"; exit 1; fi
    ;;
  xsearch)
    [ -z "${1:-}" ] && { echo "usage: $SELF xsearch \"<query>\"" >&2; exit 2; }
@@ -200,7 +202,7 @@ case "$MODE" in
    run_grok 150 --max-turns 6
    txt="$(jfield text)"
    if [ -n "$txt" ]; then printf '%s\n' "$txt"; else
-      echo "[$SELF] no result (stopReason=$(jfield stopReason))" >&2; exit 1; fi
+      echo "[$SELF] no result (stopReason=$(jfield stopReason))" >&2; _logerr "grok xsearch returned no result" --context "mode=xsearch stopReason=$(jfield stopReason)"; exit 1; fi
    ;;
  review|file)
    [ -z "${1:-}" ] && { echo "usage: $SELF review <file-path> [instructions]" >&2; exit 2; }
@@ -233,7 +235,7 @@ case "$MODE" in
      fi
    fi
    if [ -n "$txt" ]; then printf '%s\n' "$txt"; else
-      echo "[$SELF] no result (session=$(jfield sessionId), stopReason=$(jfield stopReason))" >&2; exit 1; fi
+      echo "[$SELF] no result (session=$(jfield sessionId), stopReason=$(jfield stopReason))" >&2; _logerr "grok review returned no result" --context "mode=$MODE session=$(jfield sessionId) stopReason=$(jfield stopReason)"; exit 1; fi
    ;;
  review-files)
    # review-files [-i "instructions"] <file> [file ...]
@@ -283,7 +285,7 @@ case "$MODE" in
      fi
    fi
    if [ -n "$txt" ]; then printf '%s\n' "$txt"; else
-      echo "[$SELF] no result (session=$(jfield sessionId), stopReason=$(jfield stopReason))" >&2; exit 1; fi
+      echo "[$SELF] no result (session=$(jfield sessionId), stopReason=$(jfield stopReason))" >&2; _logerr "grok review returned no result" --context "mode=$MODE session=$(jfield sessionId) stopReason=$(jfield stopReason)"; exit 1; fi
    ;;
  review-diff)
    # review-diff [-C <repo-dir>] [-i "instructions"] <gitref> [-- <pathspec...>]
@@ -328,7 +330,7 @@ case "$MODE" in
      fi
    fi
    if [ -n "$txt" ]; then printf '%s\n' "$txt"; else
-      echo "[$SELF] no result (session=$(jfield sessionId), stopReason=$(jfield stopReason))" >&2; exit 1; fi
+      echo "[$SELF] no result (session=$(jfield sessionId), stopReason=$(jfield stopReason))" >&2; _logerr "grok review returned no result" --context "mode=$MODE session=$(jfield sessionId) stopReason=$(jfield stopReason)"; exit 1; fi
    ;;
  raw)
    "$GROK" "$@"
--- a/.claude/skills/mailprotector/scripts/mp.py
+++ b/.claude/skills/mailprotector/scripts/mp.py
@@ -37,11 +37,31 @@ from __future__ import annotations

 import argparse
 import json
+import os
+import subprocess
 import sys

 from mp_client import MailprotectorClient, MailprotectorError, VALID_SCOPES


+def _log_skill_error(skill, msg, context=""):
+    """Soft-fail: append a functional-error entry to errorlog.md (never throws)."""
+    try:
+        root = os.environ.get("CLAUDETOOLS_ROOT") or os.path.abspath(
+            os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")
+        )
+        h = os.path.join(root, ".claude", "scripts", "log-skill-error.sh")
+        if not os.path.exists(h):
+            return
+        a = ["bash", h, skill, msg]
+        if context:
+            a += ["--context", context]
+        subprocess.run(a, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
+                       timeout=10)
+    except Exception:
+        pass
+
+
 def _emit(obj) -> None:
    print(json.dumps(obj, indent=2, ensure_ascii=False, default=str))

@@ -314,6 +334,8 @@ def main(argv=None) -> int:
            p.error(f"unknown command {args.cmd}")
    except MailprotectorError as exc:
        print(f"[ERROR] {exc}", file=sys.stderr)
+        _log_skill_error("mailprotector", f"{exc}",
+                         context=f"cmd={getattr(args, 'cmd', '?')}")
        return 1
    return 0

--- a/.claude/skills/onboard365/scripts/onboard365.sh
+++ b/.claude/skills/onboard365/scripts/onboard365.sh
@@ -20,6 +20,7 @@ set -euo pipefail
 TENANT_ADMIN_APPID="709e6eed-0711-4875-9c44-2d3518c47063"
 CONSENT_BASE="https://login.microsoftonline.com"
 CONSENT_REDIRECT="https://azcomputerguru.com"
+__ROOT="${CLAUDETOOLS_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)}"

 # ── Locate the reused remediation-tool scripts ────────────────────────────────
 # Prefer the applied global copy (stable path on every fleet machine); fall back
@@ -43,6 +44,7 @@ RT="$(find_rtool)" || {
  echo "[ERROR] remediation-tool scripts not found." >&2
  echo "        Expected: \$HOME/.claude/skills/remediation-tool/scripts/onboard-tenant.sh" >&2
  echo "        Run a repo sync, or check identity.json.claudetools_root." >&2
+  bash "$__ROOT/.claude/scripts/log-skill-error.sh" "onboard365" "onboard365: remediation-tool scripts not found (onboard-tenant.sh missing on this machine)" >/dev/null 2>&1 || true
  exit 3
 }

--- a/.claude/skills/packetdial/scripts/ns.py
+++ b/.claude/skills/packetdial/scripts/ns.py
@@ -34,11 +34,31 @@ from __future__ import annotations

 import argparse
 import json
+import os
+import subprocess
 import sys

 from ns_client import NetSapiensClient, PacketDialError


+def _log_skill_error(skill, msg, context=""):
+    """Soft-fail: append a functional-error entry to errorlog.md (never throws)."""
+    try:
+        root = os.environ.get("CLAUDETOOLS_ROOT") or os.path.abspath(
+            os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")
+        )
+        h = os.path.join(root, ".claude", "scripts", "log-skill-error.sh")
+        if not os.path.exists(h):
+            return
+        a = ["bash", h, skill, msg]
+        if context:
+            a += ["--context", context]
+        subprocess.run(a, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
+                       timeout=10)
+    except Exception:
+        pass
+
+
 def _emit(obj) -> None:
    print(json.dumps(obj, indent=2, ensure_ascii=False, default=str))

@@ -161,6 +181,8 @@ def main(argv=None) -> int:
            p.error(f"unknown command {args.cmd}")
    except PacketDialError as exc:
        print(f"[ERROR] {exc}", file=sys.stderr)
+        _log_skill_error("packetdial", f"{exc}",
+                         context=f"cmd={getattr(args, 'cmd', '?')}")
        return 1
    return 0

--- a/.claude/skills/remediation-tool/scripts/assign-exchange-role.sh
+++ b/.claude/skills/remediation-tool/scripts/assign-exchange-role.sh
@@ -86,8 +86,10 @@ process_one() {
    case "$rc" in
        201) echo "ASSIGNED (Exchange Admin -> Exchange Operator SP)" ;;
        400) if echo "$body" | grep -qiE 'conflicting object|already (exist|present)'; then echo "OK    (already assigned)"
-             else echo "ERROR  (HTTP 400: $(echo "$body" | jqr '.error.message // .' | head -c 120))"; fi ;;
-        *)   echo "ERROR  (HTTP $rc: $(echo "$body" | jqr '.error.message // .' | head -c 120))" ;;
+             else echo "ERROR  (HTTP 400: $(echo "$body" | jqr '.error.message // .' | head -c 120))"
+                  bash "$REPO_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "assign-exchange-role: role assignment POST failed" --context "tenant=$tgt http=400 msg=$(echo "$body" | jqr '.error.message // .' | head -c 80)" >/dev/null 2>&1 || true; fi ;;
+        *)   echo "ERROR  (HTTP $rc: $(echo "$body" | jqr '.error.message // .' | head -c 120))"
+             bash "$REPO_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "assign-exchange-role: role assignment POST failed" --context "tenant=$tgt http=$rc msg=$(echo "$body" | jqr '.error.message // .' | head -c 80)" >/dev/null 2>&1 || true ;;
    esac
 }

@@ -95,7 +97,7 @@ echo "=== assign-exchange-role  [mode=$MODE] ==="
 echo "Role: Exchange Administrator ($EXCH_ADMIN_TEMPLATE)  ->  SP: Exchange Operator ($EXCHANGE_OP_APPID)"
 echo "------------------------------------------------------------------------"
 if [ "$TARGET" = "--all" ]; then
-    [ -f "$TENANTS_MD" ] || { echo "[ERROR] tenants.md not found: $TENANTS_MD" >&2; exit 66; }
+    [ -f "$TENANTS_MD" ] || { echo "[ERROR] tenants.md not found: $TENANTS_MD" >&2; bash "$REPO_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "assign-exchange-role: --all run but references/tenants.md not found" --context "path=$TENANTS_MD" >/dev/null 2>&1 || true; exit 66; }
    # extract tenant GUIDs from the markdown table (column 3)
    grep -oE '[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}' "$TENANTS_MD" \
        | sort -u | while read -r tid; do process_one "$tid"; done
--- a/.claude/skills/remediation-tool/scripts/get-token.sh
+++ b/.claude/skills/remediation-tool/scripts/get-token.sh
@@ -239,6 +239,7 @@ case "$AUTH_OVERRIDE" in
    if [[ -z "$CERT_X5T" || -z "$CERT_KEY_B64" ]]; then
      echo "ERROR: REMEDIATION_AUTH=cert but cert fields missing in vault ($VAULT_PATH)" >&2
      echo "  Required fields under credentials: cert_thumbprint_b64url, cert_private_key_pem_b64" >&2
+      bash "$CLAUDETOOLS_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "get-token: cert auth forced but cert fields missing in vault" --context "tier=$TIER vault=$VAULT_PATH" >/dev/null 2>&1 || true
      exit 4
    fi
    AUTH_METHOD="cert"
@@ -251,6 +252,7 @@ case "$AUTH_OVERRIDE" in
    if [[ -z "$CLIENT_SECRET" ]]; then
      echo "ERROR: REMEDIATION_AUTH=secret but client_secret missing in vault ($VAULT_PATH)" >&2
      echo "  Check field: credentials.client_secret  (or credentials.credential for older entries)" >&2
+      bash "$CLAUDETOOLS_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "get-token: secret auth forced but client_secret missing in vault" --context "tier=$TIER vault=$VAULT_PATH" >/dev/null 2>&1 || true
      exit 4
    fi
    AUTH_METHOD="secret"
@@ -269,6 +271,7 @@ case "$AUTH_OVERRIDE" in
        echo "ERROR: no usable credential found in $VAULT_PATH" >&2
        echo "  Need either credentials.cert_thumbprint_b64url + credentials.cert_private_key_pem_b64," >&2
        echo "  or credentials.client_secret (or legacy credentials.credential)." >&2
+        bash "$CLAUDETOOLS_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "get-token: no usable credential (cert or client_secret) in vault" --context "tier=$TIER vault=$VAULT_PATH" >/dev/null 2>&1 || true
        exit 4
      fi
      AUTH_METHOD="secret"
@@ -336,6 +339,7 @@ PY
  if [[ $ASSERT_RC -ne 0 || -z "$CLIENT_ASSERTION" ]]; then
    echo "ERROR: failed to build client_assertion JWT" >&2
    [[ -n "$CLIENT_ASSERTION" ]] && echo "$CLIENT_ASSERTION" >&2
+    bash "$CLAUDETOOLS_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "get-token: failed to build client_assertion JWT (cert auth)" --context "tier=$TIER rc=$ASSERT_RC" >/dev/null 2>&1 || true
    exit 4
  fi

@@ -371,11 +375,13 @@ if [[ -z "$TOKEN" ]]; then
    echo "  After the admin accepts, run onboard-tenant.sh to assign required directory roles:" >&2
    SCRIPT_DIR_ERR="$(dirname "${BASH_SOURCE[0]}")"
    echo "  bash ${SCRIPT_DIR_ERR}/onboard-tenant.sh ${TARGET}" >&2
+    bash "$CLAUDETOOLS_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "get-token: AADSTS7000229 — app not consented in tenant" --context "tenant=$TENANT_ID tier=$TIER auth=$AUTH_METHOD" >/dev/null 2>&1 || true
    exit 5
  fi

  echo "ERROR: token request failed (tenant=$TENANT_ID tier=$TIER auth=$AUTH_METHOD)" >&2
  echo "$RESP" >&2
+  bash "$CLAUDETOOLS_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "get-token: token request failed (no access_token)" --context "tenant=$TENANT_ID tier=$TIER auth=$AUTH_METHOD err=${ERROR_CODE:-none}" >/dev/null 2>&1 || true
  exit 5
 fi

--- a/.claude/skills/remediation-tool/scripts/onboard-tenant.sh
+++ b/.claude/skills/remediation-tool/scripts/onboard-tenant.sh
@@ -23,6 +23,7 @@
 set -euo pipefail

 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+__ROOT="${CLAUDETOOLS_ROOT:-$(cd "$SCRIPT_DIR/../../../.." && pwd)}"

 TARGET="${1:?Usage: onboard-tenant.sh <domain-or-tenant-id> [--dry-run]}"
 DRY_RUN=false
@@ -182,6 +183,7 @@ create_sp_if_missing() {
      return 0
    fi
    echo "  [ERROR] Failed to create SP for $app_name: $(echo "$resp" | jq -r '.error.message // empty')" >&2
+    bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "onboard-tenant: failed to create service principal" --context "app=$app_name appId=$app_id msg=$(echo "$resp" | jq -r '.error.message // empty' | head -c 80)" >/dev/null 2>&1 || true
    return 1
  fi

@@ -239,6 +241,7 @@ grant_app_role() {
      return 0
    fi
    echo "    [ERROR] grant_app_role failed for $role_id: $(echo "$resp" | jq -r '.error.message // "unknown"')" >&2
+    bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "onboard-tenant: grant_app_role appRoleAssignment failed" --context "role=$role_id msg=$(echo "$resp" | jq -r '.error.message // "unknown"' | head -c 80)" >/dev/null 2>&1 || true
    return 1
  fi
 }
@@ -380,6 +383,7 @@ assign_role() {
    fi
    echo "    [ERROR] Failed to assign $role_name" >&2
    echo "    Response: $resp" >&2
+    bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "onboard-tenant: failed to assign directory role" --context "role=$role_name sp=$sp_oid msg=$(echo "$resp" | jq -r '.error.message // empty' | head -c 80)" >/dev/null 2>&1 || true
    return 1
  fi
  echo "    [OK] $role_name assigned (assignment id=$assigned_id)"
@@ -390,6 +394,7 @@ echo "[INFO] Resolving tenant: $TARGET"
 TENANT_ID=$("$SCRIPT_DIR/resolve-tenant.sh" "$TARGET")
 if [[ -z "$TENANT_ID" ]]; then
  echo "[ERROR] Could not resolve tenant ID for: $TARGET" >&2
+  bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "onboard-tenant: could not resolve tenant ID" --context "target=$TARGET" >/dev/null 2>&1 || true
  exit 1
 fi

@@ -412,6 +417,7 @@ if [[ $GET_TOKEN_EXIT -ne 0 ]]; then
  fi
  echo "[ERROR] Failed to acquire Tenant Admin token (exit $GET_TOKEN_EXIT)" >&2
  echo "$TOKEN_ERR" >&2
+  bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "onboard-tenant: failed to acquire Tenant Admin token" --context "tenant=$TENANT_ID exit=$GET_TOKEN_EXIT" >/dev/null 2>&1 || true
  exit 5
 fi
 TENANT_ADMIN_TOKEN="$TENANT_ADMIN_TOKEN_OUT"
@@ -440,6 +446,7 @@ DEFENDER_SP_OID=$(get_sp_oid "$TENANT_ADMIN_TOKEN" "$DEFENDER_APP_ID")

 if [[ -z "$GRAPH_SP_OID" ]]; then
  echo "[ERROR] Microsoft Graph SP missing — cannot grant app permissions" >&2
+  bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "onboard-tenant: Microsoft Graph SP missing in tenant — cannot grant app permissions" --context "tenant=$TENANT_ID" >/dev/null 2>&1 || true
  exit 1
 fi

--- a/.claude/skills/remediation-tool/scripts/patch-tenant-admin-manifest.sh
+++ b/.claude/skills/remediation-tool/scripts/patch-tenant-admin-manifest.sh
@@ -20,6 +20,8 @@ ROLE_MGMT_PERMISSION_ID="9e3f62cf-ca93-4989-b6ce-bf83c28f9fe8"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 CLAUDETOOLS_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
 IDENTITY_FILE="$CLAUDETOOLS_ROOT/.claude/identity.json"
+# Repo root for the functional-error logger (4 levels up from this scripts dir).
+__ELOG_ROOT="${CLAUDETOOLS_ROOT_ENV:-$(cd "$SCRIPT_DIR/../../../.." && pwd)}"

 VAULT_ROOT="${VAULT_PATH:-}"
 if [[ -z "$VAULT_ROOT" && -f "$IDENTITY_FILE" ]]; then
@@ -39,7 +41,7 @@ CLIENT_SECRET=$(bash "$VAULT_ROOT/scripts/vault.sh" get-field "$MANAGEMENT_VAULT
 if [[ -z "$CLIENT_SECRET" ]]; then
  CLIENT_SECRET=$(bash "$VAULT_ROOT/scripts/vault.sh" get-field "$MANAGEMENT_VAULT_PATH" credentials.credential 2>/dev/null | tr -d '\r\n' || true)
 fi
-[[ -z "$CLIENT_SECRET" ]] && { echo "[ERROR] Could not read secret from $MANAGEMENT_VAULT_PATH" >&2; exit 4; }
+[[ -z "$CLIENT_SECRET" ]] && { echo "[ERROR] Could not read secret from $MANAGEMENT_VAULT_PATH" >&2; bash "$__ELOG_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "patch-tenant-admin-manifest: could not read Management app secret from vault" --context "vault=$MANAGEMENT_VAULT_PATH" >/dev/null 2>&1 || true; exit 4; }
 echo "[OK] Management app secret retrieved"

 # ── Step 2: Get Management app token (home tenant) ───────────────────────────
@@ -55,6 +57,7 @@ MGMT_TOKEN=$(echo "$TOKEN_RESP" | jq -r '.access_token // empty')
 if [[ -z "$MGMT_TOKEN" ]]; then
  echo "[ERROR] Failed to acquire Management app token" >&2
  echo "$TOKEN_RESP" >&2
+  bash "$__ELOG_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "patch-tenant-admin-manifest: failed to acquire Management app token (home tenant)" --context "err=$(echo "$TOKEN_RESP" | jq -r '.error // empty' 2>/dev/null)" >/dev/null 2>&1 || true
  exit 5
 fi
 echo "[OK] Management app token acquired"
@@ -73,6 +76,7 @@ APP_DISPLAY=$(echo "$APP_RESP" | jq -r '.value[0].displayName // empty')
 if [[ -z "$APP_OBJ_ID" ]]; then
  echo "[ERROR] Tenant Admin application not found (appId=$TENANT_ADMIN_APP_ID)" >&2
  echo "Response: $APP_RESP" >&2
+  bash "$__ELOG_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "patch-tenant-admin-manifest: Tenant Admin application registration not found" --context "appId=$TENANT_ADMIN_APP_ID" >/dev/null 2>&1 || true
  exit 6
 fi
 echo "[OK] Found app: $APP_DISPLAY (objectId=$APP_OBJ_ID)"
@@ -108,6 +112,7 @@ else
    echo "[OK] App manifest patched (HTTP 204)"
  else
    echo "[ERROR] PATCH returned HTTP $PATCH_RESP" >&2
+    bash "$__ELOG_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "patch-tenant-admin-manifest: app manifest PATCH failed" --context "appObjId=$APP_OBJ_ID http=$PATCH_RESP" >/dev/null 2>&1 || true
    exit 7
  fi
 fi
@@ -121,7 +126,7 @@ TA_SP_RESP=$(curl -s --max-time 15 \
  --data-urlencode "\$select=id,displayName" \
  "https://graph.microsoft.com/v1.0/servicePrincipals")
 TA_SP_OID=$(echo "$TA_SP_RESP" | jq -r '.value[0].id // empty')
-[[ -z "$TA_SP_OID" ]] && { echo "[ERROR] Tenant Admin SP not found in home tenant" >&2; exit 8; }
+[[ -z "$TA_SP_OID" ]] && { echo "[ERROR] Tenant Admin SP not found in home tenant" >&2; bash "$__ELOG_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "patch-tenant-admin-manifest: Tenant Admin SP not found in home tenant" --context "appId=$TENANT_ADMIN_APP_ID" >/dev/null 2>&1 || true; exit 8; }
 echo "[OK] Tenant Admin SP: $TA_SP_OID"

 echo "[INFO] Locating Microsoft Graph SP in home tenant..."
@@ -132,7 +137,7 @@ GRAPH_SP_RESP=$(curl -s --max-time 15 \
  --data-urlencode "\$select=id" \
  "https://graph.microsoft.com/v1.0/servicePrincipals")
 GRAPH_SP_OID=$(echo "$GRAPH_SP_RESP" | jq -r '.value[0].id // empty')
-[[ -z "$GRAPH_SP_OID" ]] && { echo "[ERROR] Microsoft Graph SP not found in home tenant" >&2; exit 8; }
+[[ -z "$GRAPH_SP_OID" ]] && { echo "[ERROR] Microsoft Graph SP not found in home tenant" >&2; bash "$__ELOG_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "patch-tenant-admin-manifest: Microsoft Graph SP not found in home tenant" >/dev/null 2>&1 || true; exit 8; }
 echo "[OK] Microsoft Graph SP: $GRAPH_SP_OID"

 # ── Step 6: Check if appRoleAssignment already granted ────────────────────────
@@ -165,6 +170,7 @@ else
  if [[ -z "$GRANT_ID" ]]; then
    echo "[ERROR] Failed to grant appRoleAssignment" >&2
    echo "$GRANT_RESP" >&2
+    bash "$__ELOG_ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "patch-tenant-admin-manifest: failed to grant RoleManagement.ReadWrite.Directory appRoleAssignment" --context "msg=$(echo "$GRANT_RESP" | jq -r '.error.message // empty' 2>/dev/null | head -c 80)" >/dev/null 2>&1 || true
    exit 9
  fi
  echo "[OK] appRoleAssignment granted (id=$GRANT_ID)"
--- a/.claude/skills/remediation-tool/scripts/reset-password.sh
+++ b/.claude/skills/remediation-tool/scripts/reset-password.sh
@@ -23,6 +23,7 @@
 set -euo pipefail

 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+__ROOT="${CLAUDETOOLS_ROOT:-$(cd "$SCRIPT_DIR/../../../.." && pwd)}"

 TENANT_INPUT="${1:?usage: reset-password.sh <tenant|domain> <upn> <new-password> [--force-change]}"
 UPN="${2:?usage: reset-password.sh <tenant|domain> <upn> <new-password> [--force-change]}"
@@ -43,7 +44,7 @@ G="https://graph.microsoft.com/v1.0"
 # --- resolve target user object id ---
 UID_=$(curl -s "${GH[@]}" "$G/users/${UPN}?\$select=id" | tr -d '\000-\037' \
        | python -c "import sys,json;print(json.load(sys.stdin).get('id',''))" 2>/dev/null || true)
-[[ -z "$UID_" ]] && { echo "[ERROR] user not found: $UPN" >&2; exit 1; }
+[[ -z "$UID_" ]] && { echo "[ERROR] user not found: $UPN" >&2; bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "reset-password: target user not found / Graph returned no id" --context "tenant=$TENANT_ID upn=$UPN" >/dev/null 2>&1 || true; exit 1; }
 echo "[info] tenant=$TENANT_ID  target=$UPN  id=$UID_  force_change=$FORCE_CHANGE"

 # --- build payload (single-quoted heredoc would block $NEWPW; use python to emit JSON safely) ---
@@ -61,6 +62,7 @@ fi
 if [[ "$CODE" != "403" ]]; then
  echo "[ERROR] unexpected HTTP $CODE on password PATCH" >&2
  curl -s -X PATCH "${GH[@]}" "$G/users/$UID_" --data-binary "$PAYLOAD" | tr -d '\000-\037' >&2
+  bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "reset-password: unexpected HTTP on password PATCH" --context "tenant=$TENANT_ID upn=$UPN http=$CODE" >/dev/null 2>&1 || true
  exit 1
 fi

@@ -69,7 +71,7 @@ echo "[info] 403 on direct reset (target likely holds an admin role) -> JIT elev
 # --- resolve tenant-admin SP object id ---
 SPID=$(curl -s "${GH[@]}" "$G/servicePrincipals(appId='$TENANT_ADMIN_APPID')?\$select=id" | tr -d '\000-\037' \
        | python -c "import sys,json;print(json.load(sys.stdin).get('id',''))")
-[[ -z "$SPID" ]] && { echo "[ERROR] could not resolve Tenant Admin service principal" >&2; exit 1; }
+[[ -z "$SPID" ]] && { echo "[ERROR] could not resolve Tenant Admin service principal" >&2; bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "reset-password: could not resolve Tenant Admin SP for JIT elevation" --context "tenant=$TENANT_ID" >/dev/null 2>&1 || true; exit 1; }

 # --- does the SP already hold Privileged Authentication Administrator? ---
 EXISTING=$(curl -s "${GH[@]}" "$G/roleManagement/directory/roleAssignments?\$filter=principalId+eq+'$SPID'+and+roleDefinitionId+eq+'$PAA_ROLE_ID'" \
@@ -82,7 +84,7 @@ else
  ASSIGN_BODY=$(SPID="$SPID" RID="$PAA_ROLE_ID" python -c "import os,json;print(json.dumps({'principalId':os.environ['SPID'],'roleDefinitionId':os.environ['RID'],'directoryScopeId':'/'}))")
  CREATED_ASSIGNMENT=$(curl -s -X POST "${GH[@]}" "$G/roleManagement/directory/roleAssignments" --data-binary "$ASSIGN_BODY" \
                        | tr -d '\000-\037' | python -c "import sys,json;d=json.load(sys.stdin);print(d.get('id',''))" 2>/dev/null || true)
-  [[ -z "$CREATED_ASSIGNMENT" ]] && { echo "[ERROR] failed to assign Privileged Authentication Administrator to SP" >&2; exit 1; }
+  [[ -z "$CREATED_ASSIGNMENT" ]] && { echo "[ERROR] failed to assign Privileged Authentication Administrator to SP" >&2; bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "reset-password: failed to assign Privileged Authentication Administrator to SP (JIT elevation)" --context "tenant=$TENANT_ID sp=$SPID" >/dev/null 2>&1 || true; exit 1; }
  echo "[info] assigned Privileged Authentication Administrator to SP (assignment $CREATED_ASSIGNMENT)"
 fi

@@ -90,7 +92,7 @@ fi
 cleanup() {
  if [[ -n "$CREATED_ASSIGNMENT" ]]; then
    DC=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "${GH[@]}" "$G/roleManagement/directory/roleAssignments/$CREATED_ASSIGNMENT")
-    if [[ "$DC" == "204" ]]; then echo "[info] removed JIT role assignment (de-elevated)"; else echo "[WARNING] failed to remove JIT role assignment $CREATED_ASSIGNMENT (HTTP $DC) - REMOVE MANUALLY" >&2; fi
+    if [[ "$DC" == "204" ]]; then echo "[info] removed JIT role assignment (de-elevated)"; else echo "[WARNING] failed to remove JIT role assignment $CREATED_ASSIGNMENT (HTTP $DC) - REMOVE MANUALLY" >&2; bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "reset-password: failed to remove JIT Privileged Auth Admin role - standing privilege left behind, REMOVE MANUALLY" --context "tenant=$TENANT_ID assignment=$CREATED_ASSIGNMENT http=$DC" >/dev/null 2>&1 || true; fi
  fi
 }
 trap cleanup EXIT
@@ -108,4 +110,5 @@ done

 echo "[ERROR] password reset still failing after elevation (last HTTP $CODE)" >&2
 curl -s -X PATCH "${GH[@]}" "$G/users/$UID_" --data-binary "$PAYLOAD" | tr -d '\000-\037' >&2
+bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "reset-password: reset still failing after JIT elevation + retries" --context "tenant=$TENANT_ID upn=$UPN http=$CODE" >/dev/null 2>&1 || true
 exit 1
--- a/.claude/skills/remediation-tool/scripts/resolve-tenant.sh
+++ b/.claude/skills/remediation-tool/scripts/resolve-tenant.sh
@@ -4,6 +4,8 @@
 # Output (stdout): tenant GUID. Exit 0 on success, 1 on failure.
 set -euo pipefail

+__ROOT="${CLAUDETOOLS_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)}"
+
 INPUT="${1:?usage: resolve-tenant.sh <domain|upn|tenant-id>}"

 # If it looks like a GUID already, pass through.
@@ -31,6 +33,7 @@ TENANT_ID=$(echo "$RESP" | jq -r '.issuer // empty' | sed -E 's|^https://login\.
 if [[ -z "$TENANT_ID" ]] || [[ ! "$TENANT_ID" =~ ^[0-9a-fA-F]{8}- ]]; then
  echo "ERROR: could not resolve tenant for domain: $DOMAIN" >&2
  echo "Response: $RESP" >&2
+  bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "resolve-tenant: OpenID discovery did not return a tenant GUID" --context "domain=$DOMAIN" >/dev/null 2>&1 || true
  exit 1
 fi

--- a/.claude/skills/remediation-tool/scripts/user-breach-check.sh
+++ b/.claude/skills/remediation-tool/scripts/user-breach-check.sh
@@ -6,6 +6,7 @@
 set -euo pipefail

 SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")"
+__ROOT="${CLAUDETOOLS_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)}"

 TENANT_INPUT="${1:?usage: user-breach-check.sh <tenant-id|domain> <upn>}"
 UPN="${2:?usage: user-breach-check.sh <tenant-id|domain> <upn>}"
@@ -28,6 +29,7 @@ UID_=$(jq -r '.id // empty' "$OUT/00_user.json")
 if [[ -z "$UID_" ]]; then
  echo "ERROR: user not found or Graph returned error" >&2
  cat "$OUT/00_user.json" >&2
+  bash "$__ROOT/.claude/scripts/log-skill-error.sh" "remediation-tool" "user-breach-check: user not found or Graph returned error resolving user object" --context "tenant=$TENANT_ID upn=$UPN err=$(jq -r '.error.code // empty' "$OUT/00_user.json" 2>/dev/null)" >/dev/null 2>&1 || true
  exit 1
 fi
 echo "[info] object id: $UID_"
--- a/.claude/skills/skill-creator/SKILL.md
+++ b/.claude/skills/skill-creator/SKILL.md
@@ -98,6 +98,23 @@ After creating the files:
   - Commands: Tell them to use `/{name}` or `/{name} arguments`
 3. Remind them to update CLAUDE.md's Commands & Skills table if they want it documented there

+## Mandatory: functional error logging
+
+**Every skill MUST report genuine functional errors to `errorlog.md`** via the canonical
+helper, so failures can be linted and fed back into skill improvements (CLAUDE.md core rule).
+Bake this into the skill from the start:
+
+- In each skill **script's failure branches** (API/auth failure, unexpected response,
+  validation error, unexpected non-zero exit), call:
+  ```bash
+  bash "$ROOT/.claude/scripts/log-skill-error.sh" "<skill-name>" "<brief error>" --context "op=... http=..."
+  ```
+  It stamps date+machine, inserts in the standard `YYYY-MM-DD | MACHINE | skill | error`
+  format, and soft-fails (never breaks the caller). Python skills shell out to the same helper.
+- In the **SKILL.md**, add a line under workflow/guidelines: "On a functional error, log it via
+  `log-skill-error.sh` before surfacing it."
+- Do NOT log expected/handled conditions (no results, no unread, user-declined) — only real failures.
+
 ## Quality Checklist

 Before finalizing, verify:
@@ -107,6 +124,7 @@ Before finalizing, verify:
 - [ ] File is in the correct location (`.claude/skills/` or `.claude/commands/`)
 - [ ] Name uses kebab-case and is concise
 - [ ] For skills with auto-triggers: triggers are specific enough to avoid false positives
+- [ ] **Functional errors are logged** via `log-skill-error.sh` in the script's failure branches

 ## Tips for Good Skills/Commands

--- a/.claude/skills/vault/scripts/vault-helper.sh
+++ b/.claude/skills/vault/scripts/vault-helper.sh
@@ -25,6 +25,12 @@
 # Anything you put OUTSIDE those keys is committed in PLAINTEXT — never do that.
 set -euo pipefail

+# ── Functional-error logger ───────────────────────────────────────────────────
+# errorlog.md + log-skill-error.sh live in the ClaudeTools repo (4 levels up from
+# .claude/skills/vault/scripts/), NOT the SOPS vault repo (VAULT_DIR below).
+CLAUDETOOLS_ROOT="${CLAUDETOOLS_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)}"
+_logerr() { bash "$CLAUDETOOLS_ROOT/.claude/scripts/log-skill-error.sh" "vault" "$@" >/dev/null 2>&1 || true; }
+
 # ── Resolve vault root (the #1 thing sessions get wrong) ──────────────────────
 # Order: $VAULT_PATH override → the repo we're standing in (correct identity) →
 # $HOME identity vault_path → $HOME identity claudetools_root → that repo's identity.
@@ -84,7 +90,7 @@ cmd_verify() {
  local f; f=$(abspath "${1:?usage: verify <path>}")
  [[ -f "$f" ]] || { echo "[ERROR] not found: $f" >&2; exit 1; }
  if ! _is_encrypted "$f"; then echo "[FAIL] $1 — NO encrypted values found (plaintext?)"; exit 1; fi
-  if ! ( cd "$VAULT_DIR" && sops -d "$f" >/dev/null 2>&1 ); then echo "[FAIL] $1 — encrypted but does not decrypt (key mismatch?)"; exit 1; fi
+  if ! ( cd "$VAULT_DIR" && sops -d "$f" >/dev/null 2>&1 ); then echo "[FAIL] $1 — encrypted but does not decrypt (key mismatch?)"; _logerr "sops decrypt failed (key mismatch?)" --context "op=verify path=$1"; exit 1; fi
  echo "[OK] $1 — encrypted and decrypts cleanly"
 }

@@ -129,7 +135,7 @@ doc["notes"]=""
 with open(f,"w",encoding="utf-8",newline="\n") as fh:
    yaml.safe_dump(doc,fh,default_flow_style=False,sort_keys=False,allow_unicode=True)
 PY
-  ( cd "$VAULT_DIR" && sops --encrypt --in-place "$f" ) || { echo "[ERROR] sops encrypt failed; removing plaintext" >&2; rm -f "$f"; exit 1; }
+  ( cd "$VAULT_DIR" && sops --encrypt --in-place "$f" ) || { echo "[ERROR] sops encrypt failed; removing plaintext" >&2; _logerr "sops encrypt failed (new entry)" --context "op=new path=$path"; rm -f "$f"; exit 1; }
  cmd_verify "$path"
  echo "[INFO] Created ${f#$VAULT_DIR/}. Publish with:  bash .claude/scripts/sync.sh   (Phase 6 commits+pushes the vault)"
 }
@@ -142,7 +148,7 @@ cmd_set() {
  local f; f=$(abspath "$path")
  [[ -f "$f" ]] || { echo "[ERROR] not found: ${f#$VAULT_DIR/} (use 'new' to create)" >&2; exit 1; }
  local tmp; tmp=$(mktemp)
-  ( cd "$VAULT_DIR" && sops -d "$f" ) > "$tmp" 2>/dev/null || { echo "[ERROR] decrypt failed" >&2; rm -f "$tmp"; exit 1; }
+  ( cd "$VAULT_DIR" && sops -d "$f" ) > "$tmp" 2>/dev/null || { echo "[ERROR] decrypt failed" >&2; _logerr "sops decrypt failed (set)" --context "op=set path=$path"; rm -f "$tmp"; exit 1; }
  SETS="$(printf '%s\n' "${sets[@]}")" "$PY" - "$tmp" <<'PY'
 import os,sys,yaml
 f=sys.argv[1]
@@ -154,7 +160,7 @@ for kv in os.environ["SETS"].splitlines():
 yaml.safe_dump(doc,open(f,"w",encoding="utf-8",newline="\n"),default_flow_style=False,sort_keys=False,allow_unicode=True)
 PY
  cp "$tmp" "$f"; rm -f "$tmp"
-  ( cd "$VAULT_DIR" && sops --encrypt --in-place "$f" ) || { echo "[ERROR] re-encrypt failed" >&2; exit 1; }
+  ( cd "$VAULT_DIR" && sops --encrypt --in-place "$f" ) || { echo "[ERROR] re-encrypt failed" >&2; _logerr "sops re-encrypt failed (set)" --context "op=set path=$path"; exit 1; }
  cmd_verify "$path"
  echo "[INFO] Updated ${f#$VAULT_DIR/}. Publish with:  bash .claude/scripts/sync.sh"
 }