#!/usr/bin/env bash # self-check.sh - ClaudeTools harness self-diagnosis / fleet conformance probe. # # V1 is a CENSUS tool. Each machine probes its own harness wiring (tools, # identity, hooks, skills, commands, scripts, connectivity, capability tier), # grades what it can against the provisional baseline manifest, and can publish # the result to the coord API so the fleet can be compared and the baseline # refined from real data. See ../SKILL.md and ../baseline/README.md. # # Usage: # self-check.sh Run checks, print a human report. (default) # self-check.sh --json Emit the structured census JSON to stdout only. # self-check.sh --publish Run checks, then PUT the census to coord (component selfcheck_). # self-check.sh fanout Broadcast a request to ALL_SESSIONS to run /self-check --publish. # self-check.sh aggregate Read every machine's published census and print a fleet table # plus a proposed-baseline (intersection/union) summary. # # Portable: bash 3.2+ (macOS), Git Bash (Windows), Linux. Deps: jq, curl. # Read-only. It collects and reports; it changes nothing on the machine. set -u # --------------------------------------------------------------------------- # Bootstrap: resolve repo root, identity, coord API, session id # --------------------------------------------------------------------------- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SKILL_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)" MANIFEST="$SKILL_DIR/baseline/manifest.json" if ! command -v jq >/dev/null 2>&1; then echo "[ERROR] jq is required and not found on PATH. Install jq, then re-run." >&2 exit 2 fi # Some Windows jq builds (winget) emit CRLF line endings; a trailing \r corrupts # every `for x in $(jq ...)` word and `read`-from-@tsv field. Strip \r from all # jq output (it is insignificant JSON whitespace and never wanted in raw values). jq() { command jq "$@" | tr -d '\r'; } if ! command -v curl >/dev/null 2>&1; then echo "[ERROR] curl is required and not found on PATH." >&2 exit 2 fi if [ ! -f "$MANIFEST" ]; then echo "[ERROR] Baseline manifest not found: $MANIFEST" >&2 exit 2 fi # identity.json: prefer repo copy, then ~/.claude (mirrors check-messages.sh). IDENTITY="" for c in "$REPO_ROOT/.claude/identity.json" "$HOME/.claude/identity.json"; do [ -f "$c" ] && { IDENTITY="$c"; break; } done idfield() { # dotted.path -> value or empty [ -n "$IDENTITY" ] && jq -r "$1 // empty" "$IDENTITY" 2>/dev/null } HOSTNAME_RAW="$(hostname 2>/dev/null || echo unknown)" HOST="${HOSTNAME_RAW%.local}" SESSION="${HOST}/claude-main" API="$(idfield '.coord_api')" [ -z "$API" ] && API="http://172.16.3.30:8001" PLATFORM="$(idfield '.platform')" [ -z "$PLATFORM" ] && case "$(uname -s)" in Darwin) PLATFORM="macos" ;; Linux) PLATFORM="linux" ;; CYGWIN*|MINGW*|MSYS*) PLATFORM="windows" ;; *) PLATFORM="unknown" ;; esac ARCH="$(idfield '.architecture')" [ -z "$ARCH" ] && ARCH="$(uname -m 2>/dev/null || echo unknown)" # --------------------------------------------------------------------------- # Results accumulation. Each check appends one compact JSON object. # status in {PASS, WARN, FAIL, SKIP, INFO}. Grade: any FAIL->RED, WARN->AMBER. # --------------------------------------------------------------------------- RESULTS_FILE="$(mktemp 2>/dev/null || echo "${TMPDIR:-/tmp}/selfcheck.$$")" : > "$RESULTS_FILE" trap 'rm -f "$RESULTS_FILE" 2>/dev/null' EXIT emit() { # id category status detail fix jq -nc --arg id "$1" --arg cat "$2" --arg st "$3" --arg detail "$4" --arg fix "${5:-}" \ '{id:$id,category:$cat,status:$st,detail:$detail,fix:$fix}' >> "$RESULTS_FILE" } reachable() { curl -s -o /dev/null -m 4 "$1" 2>/dev/null; } # exit 0 if HTTP responds # Content-equal ignoring line endings: a repo LF copy and a ~/.claude CRLF copy # are the SAME content (the cross-machine case this check polices), so compare # with \r stripped rather than byte-for-byte (cmp would false-flag them). same_content() { diff -q <(tr -d '\r' < "$1") <(tr -d '\r' < "$2") >/dev/null 2>&1; } # --------------------------------------------------------------------------- # CHECK: identity # --------------------------------------------------------------------------- check_identity() { if [ -z "$IDENTITY" ]; then emit identity.present identity FAIL "identity.json not found (.claude or ~/.claude)" \ "Run onboarding; create .claude/identity.json then 'bash .claude/scripts/migrate-identity.sh'" return fi if ! jq -e . "$IDENTITY" >/dev/null 2>&1; then emit identity.parse identity FAIL "identity.json is not valid JSON: $IDENTITY" "Fix the JSON syntax" return fi emit identity.present identity PASS "identity.json present and valid: $IDENTITY" local missing="" for f in $(jq -r '.required_identity_fields[]' "$MANIFEST"); do local v; v="$(jq -r ".$f // empty" "$IDENTITY" 2>/dev/null)" [ -z "$v" ] && missing="$missing $f" done if [ -n "$missing" ]; then emit identity.fields identity WARN "missing/empty identity fields:$missing" \ "bash .claude/scripts/migrate-identity.sh (populates machine-specific fields)" else emit identity.fields identity PASS "all required identity fields present" fi # --- path fields: identity.json is the map of WHERE things live on this box. # It is foundational - every later check trusts claudetools_root / vault_path. # Verify they resolve to real locations and that claudetools_root is in fact # the repo we are running from (a stale clone path is a silent footgun). norm() { # path -> lowercase, forward-slash, drive-letter, no trailing slash local p="$1" command -v cygpath >/dev/null 2>&1 && p="$(cygpath -m "$p" 2>/dev/null || echo "$p")" printf '%s' "$p" | tr 'A-Z' 'a-z' | sed 's#\\#/#g; s#/\{1,\}$##' } local ctroot; ctroot="$(idfield '.claudetools_root')" if [ -z "$ctroot" ]; then emit identity.claudetools_root identity FAIL "identity.claudetools_root not set" \ "Set claudetools_root in identity.json to this repo's absolute path" elif [ ! -d "$ctroot" ]; then emit identity.claudetools_root identity FAIL "claudetools_root does not exist: $ctroot" \ "Fix claudetools_root in identity.json (machine moved/renamed the repo?)" elif [ "$(norm "$ctroot")" != "$(norm "$REPO_ROOT")" ]; then emit identity.claudetools_root identity WARN \ "claudetools_root ($ctroot) != running repo ($REPO_ROOT)" \ "Reconcile claudetools_root in identity.json with the repo you actually run from" else emit identity.claudetools_root identity PASS "claudetools_root resolves to this repo ($ctroot)" fi local vpath2; vpath2="$(idfield '.vault_path')" if [ -z "$vpath2" ]; then emit identity.vault_path identity FAIL "identity.vault_path not set (cannot locate the SOPS vault)" \ "Set vault_path in identity.json to the cloned vault repo path" elif [ ! -d "$vpath2" ]; then emit identity.vault_path identity FAIL "vault_path does not exist: $vpath2" \ "Clone the vault repo and set vault_path in identity.json" else emit identity.vault_path identity PASS "vault_path resolves ($vpath2)" fi # machine field vs actual hostname local idmach; idmach="$(idfield '.machine')" if [ -n "$idmach" ] && [ "$(echo "$idmach" | tr 'A-Z' 'a-z')" != "$(echo "$HOST" | tr 'A-Z' 'a-z')" ]; then emit identity.hostname identity WARN "identity.machine='$idmach' != actual hostname '$HOST'" \ "Update .machine in identity.json (did you clone onto a new box?)" else emit identity.hostname identity PASS "identity.machine matches hostname ($HOST)" fi # git config vs identity local gn ge idn ide gn="$(git -C "$REPO_ROOT" config user.name 2>/dev/null)" ge="$(git -C "$REPO_ROOT" config user.email 2>/dev/null)" idn="$(idfield '.full_name')"; ide="$(idfield '.email')" if [ -n "$idn" ] && [ "$gn" != "$idn" ]; then emit identity.git_name identity WARN "git user.name='$gn' != identity.full_name='$idn'" \ "git config user.name \"$idn\"" else emit identity.git_name identity PASS "git user.name matches identity ($gn)" fi if [ -n "$ide" ] && [ "$ge" != "$ide" ]; then emit identity.git_email identity WARN "git user.email='$ge' != identity.email='$ide'" \ "git config user.email \"$ide\"" else emit identity.git_email identity PASS "git user.email matches identity ($ge)" fi } # --------------------------------------------------------------------------- # CHECK: tooling (required + capability-gated) # --------------------------------------------------------------------------- toolver() { # best-effort one-line version "$1" --version 2>/dev/null | head -1 || true } check_tools() { local n why while IFS=$'\t' read -r n why; do if command -v "$n" >/dev/null 2>&1; then emit "tool.$n" tooling PASS "$n present ($(toolver "$n"))" else emit "tool.$n" tooling FAIL "$n MISSING (required: $why)" "Install $n and ensure it is on PATH" fi done < <(jq -r '.required_tools[] | [.name, .why] | @tsv' "$MANIFEST") # python: any_of local pyok="" pc for pc in $(jq -r '.required_python.any_of[]' "$MANIFEST"); do if command -v "$pc" >/dev/null 2>&1; then pyok="$pc"; break; fi done if [ -n "$pyok" ]; then local declared; declared="$(idfield '.python.command')" if [ -n "$declared" ] && ! command -v "$declared" >/dev/null 2>&1; then emit tool.python tooling WARN "identity.python.command='$declared' not on PATH; '$pyok' is available" \ "Update .python.command in identity.json or re-run migrate-identity.sh" else emit tool.python tooling PASS "python available ($pyok; identity declares '${declared:-unset}')" fi else emit tool.python tooling FAIL "no python interpreter found (tried py/python3/python)" "Install Python" fi # capability tools - presence only, never FAIL local cn cap cwhy while IFS=$'\t' read -r cn cap cwhy; do if command -v "$cn" >/dev/null 2>&1; then emit "cap.$cn" capability INFO "$cn present [$cap] ($(toolver "$cn"))" else emit "cap.$cn" capability INFO "$cn absent [$cap] - capability off ($cwhy)" fi done < <(jq -r '.capability_tools[] | [.name, .capability, .why] | @tsv' "$MANIFEST") } # --------------------------------------------------------------------------- # CHECK: capability tier (ollama) + effective ruleset # --------------------------------------------------------------------------- check_capability_tier() { local declared fb local_ok="" remote_ok="" tier rule declared="$(idfield '.ollama.endpoint')" fb="$(idfield '.ollama.fallback')" reachable "http://localhost:11434/api/tags" && local_ok=1 [ -n "$fb" ] && reachable "${fb%/}/api/tags" && remote_ok=1 if [ -n "$local_ok" ]; then tier="ollama_local" elif [ -n "$remote_ok" ]; then tier="ollama_remote" else tier="ollama_none" fi rule="$(jq -r ".capability_rules.$tier.tier0_engine" "$MANIFEST")" # Does the resolved tier agree with what identity declares? if [ "$tier" = "ollama_none" ]; then emit captier.ollama capability WARN "Ollama tier = NONE (local + fallback both unreachable). Effective rule: $rule" \ "Confirm this machine is meant to run without Ollama; ensure Tier-0 work routes to haiku, not blocked" else local note="" if [ "$tier" = "ollama_remote" ] && echo "$declared" | grep -q "localhost"; then note=" (identity declares localhost but local is down; using fallback $fb)" fi emit captier.ollama capability PASS "Ollama tier = ${tier#ollama_}${note}. Effective Tier-0: $rule" fi } # --------------------------------------------------------------------------- # CHECK: required scripts + hook files (exist + executable) # --------------------------------------------------------------------------- check_files() { local rel p for rel in $(jq -r '.required_scripts[], .required_hook_files[]' "$MANIFEST"); do p="$REPO_ROOT/$rel" if [ ! -f "$p" ]; then emit "file.$rel" files FAIL "missing: $rel" "Restore via /sync (git pull from Gitea)" elif [ ! -x "$p" ] && echo "$rel" | grep -qE '\.(sh|template)$'; then emit "file.$rel" files WARN "present but not executable: $rel" "chmod +x \"$rel\"" else emit "file.$rel" files PASS "present: $rel" fi done } # --------------------------------------------------------------------------- # CHECK: settings.json hooks wired correctly # --------------------------------------------------------------------------- check_settings_hooks() { local settings="$REPO_ROOT/.claude/settings.json" if [ ! -f "$settings" ] || ! jq -e . "$settings" >/dev/null 2>&1; then emit hooks.settings hooks FAIL "settings.json missing or invalid JSON" "Restore .claude/settings.json via /sync" return fi local ev needle why found # NB: omit .matcher from the TSV - an empty middle field collapses under tab # IFS (tab is IFS-whitespace), shifting columns. We do not use the matcher here. while IFS=$'\t' read -r ev needle why; do # any hook command under this event containing the needle found="$(jq -r --arg ev "$ev" --arg n "$needle" \ '(.hooks[$ev] // []) | [.[].hooks[]?.command // ""] | map(select(contains($n))) | length' \ "$settings" 2>/dev/null)" if [ "${found:-0}" -gt 0 ] 2>/dev/null; then emit "hook.$ev" hooks PASS "$ev hook wired ($needle)" else emit "hook.$ev" hooks FAIL "$ev hook NOT wired (expected command containing '$needle' - $why)" \ "Add the $ev hook to .claude/settings.json (see baseline manifest required_settings_hooks)" fi done < <(jq -r '.required_settings_hooks[] | [.event, .command_contains, .why] | @tsv' "$MANIFEST") # current-mode file (created by UserPromptSubmit hook, but flag if absent) if [ -f "$REPO_ROOT/.claude/current-mode" ]; then emit hook.current-mode hooks PASS "current-mode present ($(tr -d '[:space:]' < "$REPO_ROOT/.claude/current-mode"))" else emit hook.current-mode hooks WARN "current-mode missing (auto-created on next prompt)" "echo general > .claude/current-mode" fi } # --------------------------------------------------------------------------- # CHECK: git remote + post-commit hooks # --------------------------------------------------------------------------- check_git() { local url want host_ip url="$(git -C "$REPO_ROOT" remote get-url origin 2>/dev/null)" want="$(jq -r '.git.remote_host_contains' "$MANIFEST")" host_ip="$(jq -r '.git.remote_host_internal_ip' "$MANIFEST")" if [ -z "$url" ]; then emit git.remote git WARN "no 'origin' remote on $REPO_ROOT" "git remote add origin " elif echo "$url" | grep -qF "$want" || echo "$url" | grep -qF "$host_ip"; then emit git.remote git PASS "origin -> $url" else emit git.remote git FAIL "origin does not point at ACG Gitea: $url" \ "git remote set-url origin http://@$host_ip:3000/azcomputerguru/claudetools.git" fi if [ "$(jq -r '.git.post_commit_hook_expected' "$MANIFEST")" = "true" ]; then if [ -f "$REPO_ROOT/.git/hooks/post-commit" ]; then emit git.post_commit git PASS "main-repo post-commit hook installed" else emit git.post_commit git WARN "main-repo post-commit hook NOT installed (HOOKS.md mandates dev-alerts hook)" \ "cp .claude/hooks/post-commit.template .git/hooks/post-commit && chmod +x .git/hooks/post-commit" fi fi } # --------------------------------------------------------------------------- # CHECK: skills + commands conformance vs manifest # --------------------------------------------------------------------------- check_skills_commands() { local name dir # skills present for name in $(jq -r '.skills[]' "$MANIFEST"); do dir="$REPO_ROOT/.claude/skills/$name" if [ -d "$dir" ]; then if [ -f "$dir/SKILL.md" ] || ls "$dir"/*.md >/dev/null 2>&1 || [ -d "$dir/scripts" ]; then emit "skill.$name" skills PASS "skill present: $name" else emit "skill.$name" skills WARN "skill dir present but looks empty: $name" "Restore skill contents via /sync" fi else emit "skill.$name" skills FAIL "skill MISSING: $name" "Restore .claude/skills/$name via /sync" fi done # extra skills not in manifest (drift to report, not fail in V1) local known known="|$(jq -r '.skills[]' "$MANIFEST" | tr '\n' '|')" for dir in "$REPO_ROOT"/.claude/skills/*/; do [ -d "$dir" ] || continue name="$(basename "$dir")" case "$known" in *"|$name|"*) ;; *) emit "skill.extra.$name" skills INFO "skill present but NOT in baseline: $name (census candidate)" ;; esac done # commands present for name in $(jq -r '.commands[]' "$MANIFEST"); do if [ -f "$REPO_ROOT/.claude/commands/$name.md" ]; then emit "cmd.$name" commands PASS "command present: /$name" else emit "cmd.$name" commands FAIL "command MISSING: /$name" "Restore .claude/commands/$name.md via /sync" fi done # extra commands known="|$(jq -r '.commands[]' "$MANIFEST" | tr '\n' '|')" for f in "$REPO_ROOT"/.claude/commands/*.md; do [ -f "$f" ] || continue name="$(basename "$f" .md)" [ "$name" = "README" ] && continue case "$known" in *"|$name|"*) ;; *) emit "cmd.extra.$name" commands INFO "command present but NOT in baseline: /$name (census candidate)" ;; esac done } # --------------------------------------------------------------------------- # CHECK: vault decrypt readiness # --------------------------------------------------------------------------- check_vault() { local vpath; vpath="$(idfield '.vault_path')" if [ -z "$vpath" ]; then emit vault.path vault WARN "identity.vault_path not set" "Set vault_path in identity.json" return fi if [ ! -d "$vpath" ]; then emit vault.path vault FAIL "vault_path does not exist: $vpath" "Clone the vault repo and set vault_path" return fi emit vault.path vault PASS "vault repo present: $vpath" if ! command -v sops >/dev/null 2>&1 || ! command -v age >/dev/null 2>&1; then emit vault.tools vault FAIL "sops/age missing - cannot decrypt vault" "Install sops and age" return fi # Lightweight readiness: vault.sh list should enumerate entries without error. if [ -x "$REPO_ROOT/.claude/scripts/vault.sh" ]; then if bash "$REPO_ROOT/.claude/scripts/vault.sh" list >/dev/null 2>&1; then emit vault.list vault PASS "vault.sh list succeeded (sops/age wired)" else emit vault.list vault WARN "vault.sh list failed - check age key + SOPS_AGE_KEY_FILE" \ "Verify age key at the SOPS recipient path; run: bash .claude/scripts/vault.sh list" fi fi } # --------------------------------------------------------------------------- # CHECK: connectivity # --------------------------------------------------------------------------- check_connectivity() { local name url req while IFS=$'\t' read -r name url req; do if reachable "$url"; then emit "net.$name" connectivity PASS "$name reachable ($url)" elif [ "$req" = "true" ]; then emit "net.$name" connectivity FAIL "$name UNREACHABLE ($url)" "Check VPN/Tailscale/network to 172.16.3.x" else emit "net.$name" connectivity WARN "$name unreachable ($url) - off-network is OK" "" fi done < <(jq -r '.connectivity[] | [.name, .url, (.required|tostring)] | @tsv' "$MANIFEST") } # --------------------------------------------------------------------------- # CHECK: duplicate command/skill definitions across search roots. # Claude Code resolves slash commands and skills from BOTH the repo # (.claude/commands, .claude/skills) and the user profile (~/.claude/...). When # the same name exists in both with DIFFERENT content, the harness may resolve a # different one than you expect - the "same /cmd, different behaviour on the Mac" # bug. Divergent = WARN; identical = INFO (redundant copy that WILL drift). # --------------------------------------------------------------------------- check_duplicates() { local kind repo_dir user_dir # commands: compare *.md files by content for kind in commands skills; do repo_dir="$REPO_ROOT/.claude/$kind" user_dir="$HOME/.claude/$kind" [ -d "$repo_dir" ] || continue [ -d "$user_dir" ] || { emit "dup.$kind" duplicates PASS "no user-level ~/.claude/$kind (single source: repo)"; continue; } local name rp up dup_div=0 dup_same=0 if [ "$kind" = "commands" ]; then for rp in "$repo_dir"/*.md; do [ -f "$rp" ] || continue name="$(basename "$rp" .md)" [ "$name" = "README" ] && continue up="$user_dir/$name.md" [ -f "$up" ] || continue [ "$rp" -ef "$up" ] && continue # symlink to the same file - cannot drift if same_content "$rp" "$up"; then dup_same=$((dup_same+1)) else dup_div=$((dup_div+1)) emit "dup.cmd.$name" duplicates WARN \ "/$name is DIVERGENT: repo and ~/.claude copies differ (harness may run the wrong one)" \ "Reconcile: diff \"$rp\" \"$up\" then make ~/.claude/commands/$name.md match the repo (or remove it)" fi done else for rp in "$repo_dir"/*/; do [ -d "$rp" ] || continue name="$(basename "$rp")" up="$user_dir/$name" [ -d "$up" ] || continue [ "$rp" -ef "$up" ] && continue # symlinked dir - cannot drift # Only compare when BOTH have a SKILL.md; otherwise not comparable # (script-only / *.md-only skills) - skip rather than miscount. if [ -f "$rp/SKILL.md" ] && [ -f "$up/SKILL.md" ]; then if same_content "$rp/SKILL.md" "$up/SKILL.md"; then dup_same=$((dup_same+1)) else dup_div=$((dup_div+1)) emit "dup.skill.$name" duplicates WARN \ "skill '$name' is DIVERGENT: repo and ~/.claude SKILL.md differ" \ "Reconcile ~/.claude/skills/$name with the repo copy (or remove the user-level one)" fi fi done fi if [ "$dup_div" -eq 0 ] && [ "$dup_same" -gt 0 ]; then emit "dup.$kind" duplicates INFO \ "$dup_same $kind exist in BOTH repo and ~/.claude (identical now, but a redundant copy that can drift)" \ "Consider a single source of truth for $kind to prevent future divergence" elif [ "$dup_div" -eq 0 ] && [ "$dup_same" -eq 0 ]; then emit "dup.$kind" duplicates PASS "no duplicate $kind across roots" fi done } # --------------------------------------------------------------------------- # CHECK: rogue memories that contradict settings/identity. # Deterministic core only: index integrity + a conservative, manifest-declared # set of contradiction patterns evaluated against this machine's identity. The # SEMANTIC contradiction pass (reasoning over all memories vs identity/settings) # is a judgment task and is delegated to the model in SKILL.md, not grep. # --------------------------------------------------------------------------- check_memory() { local mdir="$REPO_ROOT/.claude/memory" idx="$REPO_ROOT/.claude/memory/MEMORY.md" if [ ! -d "$mdir" ]; then emit memory.dir memory WARN "no .claude/memory directory" "Expected the shared memory store; restore via /sync" return fi if [ ! -f "$idx" ]; then emit memory.index memory WARN "MEMORY.md index missing" "Create .claude/memory/MEMORY.md (the loaded index)" else # orphan detection: every *.md (except MEMORY.md) should be referenced in the index local f base orphans=0 for f in "$mdir"/*.md; do [ -f "$f" ] || continue base="$(basename "$f")" [ "$base" = "MEMORY.md" ] && continue if ! grep -qF "$base" "$idx" 2>/dev/null; then orphans=$((orphans+1)) fi done if [ "$orphans" -gt 0 ]; then emit memory.orphans memory WARN "$orphans memory file(s) not referenced in MEMORY.md (orphaned)" \ "Run /memory-dream or add the missing index lines" else emit memory.index memory PASS "MEMORY.md index present; no orphaned memory files" fi fi # Manifest-declared contradiction patterns. Each entry: # { when_field, when_equals, grep, why } - only evaluated when this # machine's identity. == when_equals, so a pattern fires only # where it is actually a contradiction (e.g. prescribing python3 on a `py` box). # NB: fields are read via @tsv, so when_equals/grep MUST NOT contain tab chars. local has; has="$(jq -r '(.memory.contradiction_patterns // []) | length' "$MANIFEST" 2>/dev/null)" if [ "${has:-0}" -gt 0 ] 2>/dev/null; then local wf we gx why hits while IFS=$'\t' read -r wf we gx why; do [ -n "$wf" ] || continue if [ "$(idfield ".$wf")" = "$we" ]; then hits="$(grep -rliE "$gx" "$mdir" 2>/dev/null | grep -vF 'MEMORY.md' | head -5 | tr '\n' ' ')" if [ -n "$hits" ]; then emit "memory.contradiction.$wf" memory WARN \ "memory may contradict identity.$wf=$we ($why): $hits" \ "Review the listed memory file(s); correct or delete if they prescribe the wrong behaviour for this machine" fi fi done < <(jq -r '(.memory.contradiction_patterns // [])[] | [.when_field, .when_equals, .grep, .why] | @tsv' "$MANIFEST") fi } # --------------------------------------------------------------------------- # Build the census JSON from accumulated results # --------------------------------------------------------------------------- build_census() { local fails warns grade fails="$(jq -s '[.[]|select(.status=="FAIL")]|length' "$RESULTS_FILE")" warns="$(jq -s '[.[]|select(.status=="WARN")]|length' "$RESULTS_FILE")" if [ "$fails" -gt 0 ]; then grade="RED"; elif [ "$warns" -gt 0 ]; then grade="AMBER"; else grade="GREEN"; fi jq -s \ --arg host "$HOST" --arg session "$SESSION" --arg platform "$PLATFORM" --arg arch "$ARCH" \ --arg grade "$grade" --arg ts "$RUN_TS" \ --arg mver "$(jq -r '.schema_version' "$MANIFEST")" \ '{ host:$host, session:$session, platform:$platform, arch:$arch, grade:$grade, generated_at:$ts, manifest_version:$mver, summary: { pass:([.[]|select(.status=="PASS")]|length), warn:([.[]|select(.status=="WARN")]|length), fail:([.[]|select(.status=="FAIL")]|length), info:([.[]|select(.status=="INFO")]|length) }, results: . }' "$RESULTS_FILE" } # --------------------------------------------------------------------------- # Human report # --------------------------------------------------------------------------- print_report() { local census="$1" grade grade="$(echo "$census" | jq -r .grade)" echo "" echo "============================================================" echo " ClaudeTools self-check - $HOST ($PLATFORM/$ARCH)" echo " Grade: $grade $(echo "$census" | jq -r '.summary | "PASS \(.pass) WARN \(.warn) FAIL \(.fail) INFO \(.info)"')" echo " Manifest: $(echo "$census" | jq -r .manifest_version) (provisional) $RUN_TS" echo "============================================================" # FAIL then WARN then INFO; PASS summarized per category echo "$census" | jq -r ' def mark(s): if s=="FAIL" then "[FAIL]" elif s=="WARN" then "[WARN]" elif s=="INFO" then "[INFO]" elif s=="SKIP" then "[SKIP]" else "[ OK ]" end; (.results | map(select(.status=="FAIL"))) as $f | (.results | map(select(.status=="WARN"))) as $w | (.results | map(select(.status=="INFO"))) as $i | (if ($f|length)>0 then "\nFAILURES:" else empty end), ($f[] | " [FAIL] \(.category)/\(.id): \(.detail)" + (if .fix!="" then "\n fix: \(.fix)" else "" end)), (if ($w|length)>0 then "\nWARNINGS:" else empty end), ($w[] | " [WARN] \(.category)/\(.id): \(.detail)" + (if .fix!="" then "\n fix: \(.fix)" else "" end)), (if ($i|length)>0 then "\nINFO / capability:" else empty end), ($i[] | " [INFO] \(.detail)") ' # per-category PASS counts echo "" echo "PASS by category:" echo "$census" | jq -r '.results | map(select(.status=="PASS")) | group_by(.category)[] | " \(.[0].category): \(length) ok"' echo "============================================================" } # --------------------------------------------------------------------------- # Publish census to coord API. # The coord API uses the PATH-PARAM form: PUT /api/coord/components/{pk}/{comp} # with a body of {state, version, notes, updated_by} (the body form 405s). # The component segment must be slash-free (a slash 404s, even URL-encoded), so # the per-machine component is "selfcheck_" (NOT "selfcheck/"). # --------------------------------------------------------------------------- COMPONENT="selfcheck_$HOST" publish_census() { local census="$1" grade compact body path grade="$(echo "$census" | jq -r .grade)" compact="$(echo "$census" | jq -c .)" path="/api/coord/components/claudetools/$COMPONENT" body="$(jq -nc --arg state "$grade" \ --arg ver "$(jq -r '.schema_version' "$MANIFEST")" --arg notes "$compact" --arg by "$SESSION" \ '{state:$state, version:$ver, notes:$notes, updated_by:$by}')" if curl -s -m 8 -X PUT "$API$path" -H "Content-Type: application/json" -d "$body" >/dev/null 2>&1; then echo "[OK] Published census to coord: component $COMPONENT = $grade" else # softfail per coordination protocol - queue the SAME path/body so a # later /sync drain replays a request that actually works. local q="$REPO_ROOT/.claude/coord-queue.jsonl" jq -nc --arg path "$path" --argjson b "$body" --arg ts "$RUN_TS" \ '{ts:$ts, method:"PUT", path:$path, body:$b}' >> "$q" 2>/dev/null echo "[WARN] coord unreachable; census queued to .claude/coord-queue.jsonl" fi } # --------------------------------------------------------------------------- # Subcommand: fanout - request all instances to run /self-check --publish # --------------------------------------------------------------------------- do_fanout() { local subj body payload subj="[self-check] Fleet census + self-remediation request" body="On THIS machine: (1) run /self-check ; (2) apply the suggested fix commands it prints for any FAIL/WARN - fix your OWN machine, locally, with your operator present (nobody fixes you remotely) ; (3) re-run /self-check to confirm GREEN ; (4) run /self-check --publish to report your census (component selfcheck_) to coord. The check is read-only; only --publish writes (your census only). Requested by $SESSION at $RUN_TS." payload="$(jq -nc --arg from "$SESSION" --arg subj "$subj" --arg body "$body" \ '{from_session:$from, to_session:"ALL_SESSIONS", project_key:"claudetools", subject:$subj, body:$body}')" if curl -s -m 8 -X POST "$API/api/coord/messages" -H "Content-Type: application/json" -d "$payload" >/dev/null 2>&1; then echo "[OK] Broadcast census request to ALL_SESSIONS." else echo "[ERROR] Failed to broadcast (coord unreachable)." >&2 exit 1 fi } # --------------------------------------------------------------------------- # Subcommand: aggregate - read all published censuses, build fleet view # --------------------------------------------------------------------------- do_aggregate() { local comps comps="$(curl -s -m 8 "$API/api/coord/components?project_key=claudetools" 2>/dev/null)" if [ -z "$comps" ]; then echo "[ERROR] coord unreachable." >&2; exit 1; fi # The coord API returns {states:[...], total:N}; each row's grade is .state and # the full census JSON is in .notes. Keep selfcheck_* rows with parseable notes. # (.components / bare-array kept as defensive fallbacks.) local censuses censuses="$(echo "$comps" | jq -c ' ( .states? // .components? // (if type=="array" then . else [] end) ) as $rows | ($rows // []) | map(select((.component? // "") | startswith("selfcheck"))) | map(.notes | try fromjson catch empty) ' 2>/dev/null)" local n; n="$(echo "$censuses" | jq 'length' 2>/dev/null || echo 0)" if [ "${n:-0}" -eq 0 ]; then echo "No published censuses found yet. Run 'self-check.sh fanout', then have each machine run /self-check --publish." return fi echo "============================================================" echo " Fleet census: $n machine(s) reporting" echo "============================================================" echo "$censuses" | jq -r '.[] | " \(.grade)\t\(.host)\t\(.platform)/\(.arch)\tP\(.summary.pass) W\(.summary.warn) F\(.summary.fail)\t\(.generated_at)"' | column -t -s$'\t' 2>/dev/null \ || echo "$censuses" | jq -r '.[] | " \(.grade) \(.host) \(.platform)/\(.arch) P\(.summary.pass) W\(.summary.warn) F\(.summary.fail)"' echo "" echo "Proposed baseline (intersection = required everywhere; symmetric diff = capability-gated):" # Tools present on every machine vs only some, derived from tool.* PASS results. echo "$censuses" | jq -r ' [ .[] | { host:.host, tools:( .results | map(select((.id|startswith("tool."))) | select(.status=="PASS") | (.id|sub("^tool.";""))) ) } ] as $m | ($m|length) as $count | ([ $m[].tools[] ] | unique) as $all | " tools on ALL \($count): " + ( [ $all[] | . as $t | select( ([ $m[] | select(.tools|index($t)) ]|length) == $count ) ] | join(", ") ), " tools on SOME only: " + ( [ $all[] | . as $t | select( ([ $m[] | select(.tools|index($t)) ]|length) < $count ) ] | join(", ") ) ' 2>/dev/null echo "" echo "Machines that must self-remediate (RED/AMBER) - each fixes ITSELF, then re-runs + re-publishes:" local needfix needfix="$(echo "$censuses" | jq -r ' .[] | select(.grade!="GREEN") | " \(.host) [\(.grade)] should run, in order:\n" + ( [ .results[] | select(.status=="FAIL" or .status=="WARN") | select(.fix!="") | " - \(.fix)" ] | join("\n") ) + "\n then: /self-check --publish" ' 2>/dev/null)" if [ -n "$needfix" ]; then echo "$needfix" else echo " (none - whole fleet is GREEN)" fi echo "============================================================" echo "We do NOT fix remote machines. Relay each machine's fix list to its operator;" echo "they self-remediate locally, re-run /self-check, and re-publish until GREEN." echo "Once the fleet is reporting consistently, ratify baseline/manifest.json with Mike." } # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- # RUN_TS is passed in by the caller (SKILL.md instructs a real UTC stamp); # fall back to `date` if available so the script is runnable standalone. RUN_TS="${SELFCHECK_TS:-$(date -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo unknown)}" MODE="${1:-report}" case "$MODE" in fanout) do_fanout; exit 0 ;; aggregate) do_aggregate; exit 0 ;; esac # run all checks check_identity check_tools check_capability_tier check_files check_settings_hooks check_git check_skills_commands check_duplicates check_memory check_vault check_connectivity CENSUS="$(build_census)" case "$MODE" in --json) echo "$CENSUS" ;; --publish) print_report "$CENSUS"; publish_census "$CENSUS" ;; report|*) print_report "$CENSUS" ;; esac # exit code reflects grade for scripting (0 GREEN, 1 AMBER, 2 RED) GR="$(echo "$CENSUS" | jq -r .grade)" case "$GR" in GREEN) exit 0 ;; AMBER) exit 1 ;; RED) exit 2 ;; *) exit 0 ;; esac