Files
claudetools/.claude/skills/self-check/scripts/self-check.sh
Mike Swanson b153ff158b feat(self-check): add harness self-diagnosis / fleet conformance skill
New /self-check skill: each machine probes its own ClaudeTools harness wiring
(identity.json paths, required tooling, settings.json hooks, skill/command/script
set, vault decrypt, coord/Gitea connectivity, Ollama capability tier) and grades
RED/AMBER/GREEN against a checked-in provisional baseline manifest.

- Capability-tier model: architectural/OS/hardware differences (e.g. no local
  Ollama) select a fallback ruleset instead of failing.
- Duplicate detection: flags command/skill names that diverge between the repo
  and ~/.claude (the "same /cmd, different behaviour" cross-machine bug);
  CRLF-only diffs ignored.
- Memory check: index + orphan detection, plus a model-driven semantic pass for
  memories that contradict identity/settings.
- V1 is a census tool: --publish writes a per-machine census to coord
  (component selfcheck_<host>); fanout requests the fleet to self-check +
  self-remediate + re-publish; aggregate derives the proposed baseline. No
  machine ever fixes another.

Reviewed twice by the Code Review Agent; three CRITICAL coord-API bugs and the
CRLF false-WARN found and fixed, verified live against the coord API.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 14:45:42 -07:00

747 lines
37 KiB
Bash

#!/usr/bin/env bash
# self-check.sh - ClaudeTools harness self-diagnosis / fleet conformance probe.
#
# V1 is a CENSUS tool. Each machine probes its own harness wiring (tools,
# identity, hooks, skills, commands, scripts, connectivity, capability tier),
# grades what it can against the provisional baseline manifest, and can publish
# the result to the coord API so the fleet can be compared and the baseline
# refined from real data. See ../SKILL.md and ../baseline/README.md.
#
# Usage:
# self-check.sh Run checks, print a human report. (default)
# self-check.sh --json Emit the structured census JSON to stdout only.
# self-check.sh --publish Run checks, then PUT the census to coord (component selfcheck_<host>).
# self-check.sh fanout Broadcast a request to ALL_SESSIONS to run /self-check --publish.
# self-check.sh aggregate Read every machine's published census and print a fleet table
# plus a proposed-baseline (intersection/union) summary.
#
# Portable: bash 3.2+ (macOS), Git Bash (Windows), Linux. Deps: jq, curl.
# Read-only. It collects and reports; it changes nothing on the machine.
set -u
# ---------------------------------------------------------------------------
# Bootstrap: resolve repo root, identity, coord API, session id
# ---------------------------------------------------------------------------
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SKILL_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
MANIFEST="$SKILL_DIR/baseline/manifest.json"
if ! command -v jq >/dev/null 2>&1; then
echo "[ERROR] jq is required and not found on PATH. Install jq, then re-run." >&2
exit 2
fi
# Some Windows jq builds (winget) emit CRLF line endings; a trailing \r corrupts
# every `for x in $(jq ...)` word and `read`-from-@tsv field. Strip \r from all
# jq output (it is insignificant JSON whitespace and never wanted in raw values).
jq() { command jq "$@" | tr -d '\r'; }
if ! command -v curl >/dev/null 2>&1; then
echo "[ERROR] curl is required and not found on PATH." >&2
exit 2
fi
if [ ! -f "$MANIFEST" ]; then
echo "[ERROR] Baseline manifest not found: $MANIFEST" >&2
exit 2
fi
# identity.json: prefer repo copy, then ~/.claude (mirrors check-messages.sh).
IDENTITY=""
for c in "$REPO_ROOT/.claude/identity.json" "$HOME/.claude/identity.json"; do
[ -f "$c" ] && { IDENTITY="$c"; break; }
done
idfield() { # dotted.path -> value or empty
[ -n "$IDENTITY" ] && jq -r "$1 // empty" "$IDENTITY" 2>/dev/null
}
HOSTNAME_RAW="$(hostname 2>/dev/null || echo unknown)"
HOST="${HOSTNAME_RAW%.local}"
SESSION="${HOST}/claude-main"
API="$(idfield '.coord_api')"
[ -z "$API" ] && API="http://172.16.3.30:8001"
PLATFORM="$(idfield '.platform')"
[ -z "$PLATFORM" ] && case "$(uname -s)" in
Darwin) PLATFORM="macos" ;; Linux) PLATFORM="linux" ;;
CYGWIN*|MINGW*|MSYS*) PLATFORM="windows" ;; *) PLATFORM="unknown" ;;
esac
ARCH="$(idfield '.architecture')"
[ -z "$ARCH" ] && ARCH="$(uname -m 2>/dev/null || echo unknown)"
# ---------------------------------------------------------------------------
# Results accumulation. Each check appends one compact JSON object.
# status in {PASS, WARN, FAIL, SKIP, INFO}. Grade: any FAIL->RED, WARN->AMBER.
# ---------------------------------------------------------------------------
RESULTS_FILE="$(mktemp 2>/dev/null || echo "${TMPDIR:-/tmp}/selfcheck.$$")"
: > "$RESULTS_FILE"
trap 'rm -f "$RESULTS_FILE" 2>/dev/null' EXIT
emit() { # id category status detail fix
jq -nc --arg id "$1" --arg cat "$2" --arg st "$3" --arg detail "$4" --arg fix "${5:-}" \
'{id:$id,category:$cat,status:$st,detail:$detail,fix:$fix}' >> "$RESULTS_FILE"
}
reachable() { curl -s -o /dev/null -m 4 "$1" 2>/dev/null; } # exit 0 if HTTP responds
# Content-equal ignoring line endings: a repo LF copy and a ~/.claude CRLF copy
# are the SAME content (the cross-machine case this check polices), so compare
# with \r stripped rather than byte-for-byte (cmp would false-flag them).
same_content() { diff -q <(tr -d '\r' < "$1") <(tr -d '\r' < "$2") >/dev/null 2>&1; }
# ---------------------------------------------------------------------------
# CHECK: identity
# ---------------------------------------------------------------------------
check_identity() {
if [ -z "$IDENTITY" ]; then
emit identity.present identity FAIL "identity.json not found (.claude or ~/.claude)" \
"Run onboarding; create .claude/identity.json then 'bash .claude/scripts/migrate-identity.sh'"
return
fi
if ! jq -e . "$IDENTITY" >/dev/null 2>&1; then
emit identity.parse identity FAIL "identity.json is not valid JSON: $IDENTITY" "Fix the JSON syntax"
return
fi
emit identity.present identity PASS "identity.json present and valid: $IDENTITY"
local missing=""
for f in $(jq -r '.required_identity_fields[]' "$MANIFEST"); do
local v; v="$(jq -r ".$f // empty" "$IDENTITY" 2>/dev/null)"
[ -z "$v" ] && missing="$missing $f"
done
if [ -n "$missing" ]; then
emit identity.fields identity WARN "missing/empty identity fields:$missing" \
"bash .claude/scripts/migrate-identity.sh (populates machine-specific fields)"
else
emit identity.fields identity PASS "all required identity fields present"
fi
# --- path fields: identity.json is the map of WHERE things live on this box.
# It is foundational - every later check trusts claudetools_root / vault_path.
# Verify they resolve to real locations and that claudetools_root is in fact
# the repo we are running from (a stale clone path is a silent footgun).
norm() { # path -> lowercase, forward-slash, drive-letter, no trailing slash
local p="$1"
command -v cygpath >/dev/null 2>&1 && p="$(cygpath -m "$p" 2>/dev/null || echo "$p")"
printf '%s' "$p" | tr 'A-Z' 'a-z' | sed 's#\\#/#g; s#/\{1,\}$##'
}
local ctroot; ctroot="$(idfield '.claudetools_root')"
if [ -z "$ctroot" ]; then
emit identity.claudetools_root identity FAIL "identity.claudetools_root not set" \
"Set claudetools_root in identity.json to this repo's absolute path"
elif [ ! -d "$ctroot" ]; then
emit identity.claudetools_root identity FAIL "claudetools_root does not exist: $ctroot" \
"Fix claudetools_root in identity.json (machine moved/renamed the repo?)"
elif [ "$(norm "$ctroot")" != "$(norm "$REPO_ROOT")" ]; then
emit identity.claudetools_root identity WARN \
"claudetools_root ($ctroot) != running repo ($REPO_ROOT)" \
"Reconcile claudetools_root in identity.json with the repo you actually run from"
else
emit identity.claudetools_root identity PASS "claudetools_root resolves to this repo ($ctroot)"
fi
local vpath2; vpath2="$(idfield '.vault_path')"
if [ -z "$vpath2" ]; then
emit identity.vault_path identity FAIL "identity.vault_path not set (cannot locate the SOPS vault)" \
"Set vault_path in identity.json to the cloned vault repo path"
elif [ ! -d "$vpath2" ]; then
emit identity.vault_path identity FAIL "vault_path does not exist: $vpath2" \
"Clone the vault repo and set vault_path in identity.json"
else
emit identity.vault_path identity PASS "vault_path resolves ($vpath2)"
fi
# machine field vs actual hostname
local idmach; idmach="$(idfield '.machine')"
if [ -n "$idmach" ] && [ "$(echo "$idmach" | tr 'A-Z' 'a-z')" != "$(echo "$HOST" | tr 'A-Z' 'a-z')" ]; then
emit identity.hostname identity WARN "identity.machine='$idmach' != actual hostname '$HOST'" \
"Update .machine in identity.json (did you clone onto a new box?)"
else
emit identity.hostname identity PASS "identity.machine matches hostname ($HOST)"
fi
# git config vs identity
local gn ge idn ide
gn="$(git -C "$REPO_ROOT" config user.name 2>/dev/null)"
ge="$(git -C "$REPO_ROOT" config user.email 2>/dev/null)"
idn="$(idfield '.full_name')"; ide="$(idfield '.email')"
if [ -n "$idn" ] && [ "$gn" != "$idn" ]; then
emit identity.git_name identity WARN "git user.name='$gn' != identity.full_name='$idn'" \
"git config user.name \"$idn\""
else
emit identity.git_name identity PASS "git user.name matches identity ($gn)"
fi
if [ -n "$ide" ] && [ "$ge" != "$ide" ]; then
emit identity.git_email identity WARN "git user.email='$ge' != identity.email='$ide'" \
"git config user.email \"$ide\""
else
emit identity.git_email identity PASS "git user.email matches identity ($ge)"
fi
}
# ---------------------------------------------------------------------------
# CHECK: tooling (required + capability-gated)
# ---------------------------------------------------------------------------
toolver() { # best-effort one-line version
"$1" --version 2>/dev/null | head -1 || true
}
check_tools() {
local n why
while IFS=$'\t' read -r n why; do
if command -v "$n" >/dev/null 2>&1; then
emit "tool.$n" tooling PASS "$n present ($(toolver "$n"))"
else
emit "tool.$n" tooling FAIL "$n MISSING (required: $why)" "Install $n and ensure it is on PATH"
fi
done < <(jq -r '.required_tools[] | [.name, .why] | @tsv' "$MANIFEST")
# python: any_of
local pyok="" pc
for pc in $(jq -r '.required_python.any_of[]' "$MANIFEST"); do
if command -v "$pc" >/dev/null 2>&1; then pyok="$pc"; break; fi
done
if [ -n "$pyok" ]; then
local declared; declared="$(idfield '.python.command')"
if [ -n "$declared" ] && ! command -v "$declared" >/dev/null 2>&1; then
emit tool.python tooling WARN "identity.python.command='$declared' not on PATH; '$pyok' is available" \
"Update .python.command in identity.json or re-run migrate-identity.sh"
else
emit tool.python tooling PASS "python available ($pyok; identity declares '${declared:-unset}')"
fi
else
emit tool.python tooling FAIL "no python interpreter found (tried py/python3/python)" "Install Python"
fi
# capability tools - presence only, never FAIL
local cn cap cwhy
while IFS=$'\t' read -r cn cap cwhy; do
if command -v "$cn" >/dev/null 2>&1; then
emit "cap.$cn" capability INFO "$cn present [$cap] ($(toolver "$cn"))"
else
emit "cap.$cn" capability INFO "$cn absent [$cap] - capability off ($cwhy)"
fi
done < <(jq -r '.capability_tools[] | [.name, .capability, .why] | @tsv' "$MANIFEST")
}
# ---------------------------------------------------------------------------
# CHECK: capability tier (ollama) + effective ruleset
# ---------------------------------------------------------------------------
check_capability_tier() {
local declared fb local_ok="" remote_ok="" tier rule
declared="$(idfield '.ollama.endpoint')"
fb="$(idfield '.ollama.fallback')"
reachable "http://localhost:11434/api/tags" && local_ok=1
[ -n "$fb" ] && reachable "${fb%/}/api/tags" && remote_ok=1
if [ -n "$local_ok" ]; then
tier="ollama_local"
elif [ -n "$remote_ok" ]; then
tier="ollama_remote"
else
tier="ollama_none"
fi
rule="$(jq -r ".capability_rules.$tier.tier0_engine" "$MANIFEST")"
# Does the resolved tier agree with what identity declares?
if [ "$tier" = "ollama_none" ]; then
emit captier.ollama capability WARN "Ollama tier = NONE (local + fallback both unreachable). Effective rule: $rule" \
"Confirm this machine is meant to run without Ollama; ensure Tier-0 work routes to haiku, not blocked"
else
local note=""
if [ "$tier" = "ollama_remote" ] && echo "$declared" | grep -q "localhost"; then
note=" (identity declares localhost but local is down; using fallback $fb)"
fi
emit captier.ollama capability PASS "Ollama tier = ${tier#ollama_}${note}. Effective Tier-0: $rule"
fi
}
# ---------------------------------------------------------------------------
# CHECK: required scripts + hook files (exist + executable)
# ---------------------------------------------------------------------------
check_files() {
local rel p
for rel in $(jq -r '.required_scripts[], .required_hook_files[]' "$MANIFEST"); do
p="$REPO_ROOT/$rel"
if [ ! -f "$p" ]; then
emit "file.$rel" files FAIL "missing: $rel" "Restore via /sync (git pull from Gitea)"
elif [ ! -x "$p" ] && echo "$rel" | grep -qE '\.(sh|template)$'; then
emit "file.$rel" files WARN "present but not executable: $rel" "chmod +x \"$rel\""
else
emit "file.$rel" files PASS "present: $rel"
fi
done
}
# ---------------------------------------------------------------------------
# CHECK: settings.json hooks wired correctly
# ---------------------------------------------------------------------------
check_settings_hooks() {
local settings="$REPO_ROOT/.claude/settings.json"
if [ ! -f "$settings" ] || ! jq -e . "$settings" >/dev/null 2>&1; then
emit hooks.settings hooks FAIL "settings.json missing or invalid JSON" "Restore .claude/settings.json via /sync"
return
fi
local ev needle why found
# NB: omit .matcher from the TSV - an empty middle field collapses under tab
# IFS (tab is IFS-whitespace), shifting columns. We do not use the matcher here.
while IFS=$'\t' read -r ev needle why; do
# any hook command under this event containing the needle
found="$(jq -r --arg ev "$ev" --arg n "$needle" \
'(.hooks[$ev] // []) | [.[].hooks[]?.command // ""] | map(select(contains($n))) | length' \
"$settings" 2>/dev/null)"
if [ "${found:-0}" -gt 0 ] 2>/dev/null; then
emit "hook.$ev" hooks PASS "$ev hook wired ($needle)"
else
emit "hook.$ev" hooks FAIL "$ev hook NOT wired (expected command containing '$needle' - $why)" \
"Add the $ev hook to .claude/settings.json (see baseline manifest required_settings_hooks)"
fi
done < <(jq -r '.required_settings_hooks[] | [.event, .command_contains, .why] | @tsv' "$MANIFEST")
# current-mode file (created by UserPromptSubmit hook, but flag if absent)
if [ -f "$REPO_ROOT/.claude/current-mode" ]; then
emit hook.current-mode hooks PASS "current-mode present ($(tr -d '[:space:]' < "$REPO_ROOT/.claude/current-mode"))"
else
emit hook.current-mode hooks WARN "current-mode missing (auto-created on next prompt)" "echo general > .claude/current-mode"
fi
}
# ---------------------------------------------------------------------------
# CHECK: git remote + post-commit hooks
# ---------------------------------------------------------------------------
check_git() {
local url want host_ip
url="$(git -C "$REPO_ROOT" remote get-url origin 2>/dev/null)"
want="$(jq -r '.git.remote_host_contains' "$MANIFEST")"
host_ip="$(jq -r '.git.remote_host_internal_ip' "$MANIFEST")"
if [ -z "$url" ]; then
emit git.remote git WARN "no 'origin' remote on $REPO_ROOT" "git remote add origin <gitea-url>"
elif echo "$url" | grep -qF "$want" || echo "$url" | grep -qF "$host_ip"; then
emit git.remote git PASS "origin -> $url"
else
emit git.remote git FAIL "origin does not point at ACG Gitea: $url" \
"git remote set-url origin http://<user>@$host_ip:3000/azcomputerguru/claudetools.git"
fi
if [ "$(jq -r '.git.post_commit_hook_expected' "$MANIFEST")" = "true" ]; then
if [ -f "$REPO_ROOT/.git/hooks/post-commit" ]; then
emit git.post_commit git PASS "main-repo post-commit hook installed"
else
emit git.post_commit git WARN "main-repo post-commit hook NOT installed (HOOKS.md mandates dev-alerts hook)" \
"cp .claude/hooks/post-commit.template .git/hooks/post-commit && chmod +x .git/hooks/post-commit"
fi
fi
}
# ---------------------------------------------------------------------------
# CHECK: skills + commands conformance vs manifest
# ---------------------------------------------------------------------------
check_skills_commands() {
local name dir
# skills present
for name in $(jq -r '.skills[]' "$MANIFEST"); do
dir="$REPO_ROOT/.claude/skills/$name"
if [ -d "$dir" ]; then
if [ -f "$dir/SKILL.md" ] || ls "$dir"/*.md >/dev/null 2>&1 || [ -d "$dir/scripts" ]; then
emit "skill.$name" skills PASS "skill present: $name"
else
emit "skill.$name" skills WARN "skill dir present but looks empty: $name" "Restore skill contents via /sync"
fi
else
emit "skill.$name" skills FAIL "skill MISSING: $name" "Restore .claude/skills/$name via /sync"
fi
done
# extra skills not in manifest (drift to report, not fail in V1)
local known
known="|$(jq -r '.skills[]' "$MANIFEST" | tr '\n' '|')"
for dir in "$REPO_ROOT"/.claude/skills/*/; do
[ -d "$dir" ] || continue
name="$(basename "$dir")"
case "$known" in *"|$name|"*) ;; *) emit "skill.extra.$name" skills INFO "skill present but NOT in baseline: $name (census candidate)" ;; esac
done
# commands present
for name in $(jq -r '.commands[]' "$MANIFEST"); do
if [ -f "$REPO_ROOT/.claude/commands/$name.md" ]; then
emit "cmd.$name" commands PASS "command present: /$name"
else
emit "cmd.$name" commands FAIL "command MISSING: /$name" "Restore .claude/commands/$name.md via /sync"
fi
done
# extra commands
known="|$(jq -r '.commands[]' "$MANIFEST" | tr '\n' '|')"
for f in "$REPO_ROOT"/.claude/commands/*.md; do
[ -f "$f" ] || continue
name="$(basename "$f" .md)"
[ "$name" = "README" ] && continue
case "$known" in *"|$name|"*) ;; *) emit "cmd.extra.$name" commands INFO "command present but NOT in baseline: /$name (census candidate)" ;; esac
done
}
# ---------------------------------------------------------------------------
# CHECK: vault decrypt readiness
# ---------------------------------------------------------------------------
check_vault() {
local vpath; vpath="$(idfield '.vault_path')"
if [ -z "$vpath" ]; then
emit vault.path vault WARN "identity.vault_path not set" "Set vault_path in identity.json"
return
fi
if [ ! -d "$vpath" ]; then
emit vault.path vault FAIL "vault_path does not exist: $vpath" "Clone the vault repo and set vault_path"
return
fi
emit vault.path vault PASS "vault repo present: $vpath"
if ! command -v sops >/dev/null 2>&1 || ! command -v age >/dev/null 2>&1; then
emit vault.tools vault FAIL "sops/age missing - cannot decrypt vault" "Install sops and age"
return
fi
# Lightweight readiness: vault.sh list should enumerate entries without error.
if [ -x "$REPO_ROOT/.claude/scripts/vault.sh" ]; then
if bash "$REPO_ROOT/.claude/scripts/vault.sh" list >/dev/null 2>&1; then
emit vault.list vault PASS "vault.sh list succeeded (sops/age wired)"
else
emit vault.list vault WARN "vault.sh list failed - check age key + SOPS_AGE_KEY_FILE" \
"Verify age key at the SOPS recipient path; run: bash .claude/scripts/vault.sh list"
fi
fi
}
# ---------------------------------------------------------------------------
# CHECK: connectivity
# ---------------------------------------------------------------------------
check_connectivity() {
local name url req
while IFS=$'\t' read -r name url req; do
if reachable "$url"; then
emit "net.$name" connectivity PASS "$name reachable ($url)"
elif [ "$req" = "true" ]; then
emit "net.$name" connectivity FAIL "$name UNREACHABLE ($url)" "Check VPN/Tailscale/network to 172.16.3.x"
else
emit "net.$name" connectivity WARN "$name unreachable ($url) - off-network is OK" ""
fi
done < <(jq -r '.connectivity[] | [.name, .url, (.required|tostring)] | @tsv' "$MANIFEST")
}
# ---------------------------------------------------------------------------
# CHECK: duplicate command/skill definitions across search roots.
# Claude Code resolves slash commands and skills from BOTH the repo
# (.claude/commands, .claude/skills) and the user profile (~/.claude/...). When
# the same name exists in both with DIFFERENT content, the harness may resolve a
# different one than you expect - the "same /cmd, different behaviour on the Mac"
# bug. Divergent = WARN; identical = INFO (redundant copy that WILL drift).
# ---------------------------------------------------------------------------
check_duplicates() {
local kind repo_dir user_dir
# commands: compare *.md files by content
for kind in commands skills; do
repo_dir="$REPO_ROOT/.claude/$kind"
user_dir="$HOME/.claude/$kind"
[ -d "$repo_dir" ] || continue
[ -d "$user_dir" ] || { emit "dup.$kind" duplicates PASS "no user-level ~/.claude/$kind (single source: repo)"; continue; }
local name rp up dup_div=0 dup_same=0
if [ "$kind" = "commands" ]; then
for rp in "$repo_dir"/*.md; do
[ -f "$rp" ] || continue
name="$(basename "$rp" .md)"
[ "$name" = "README" ] && continue
up="$user_dir/$name.md"
[ -f "$up" ] || continue
[ "$rp" -ef "$up" ] && continue # symlink to the same file - cannot drift
if same_content "$rp" "$up"; then
dup_same=$((dup_same+1))
else
dup_div=$((dup_div+1))
emit "dup.cmd.$name" duplicates WARN \
"/$name is DIVERGENT: repo and ~/.claude copies differ (harness may run the wrong one)" \
"Reconcile: diff \"$rp\" \"$up\" then make ~/.claude/commands/$name.md match the repo (or remove it)"
fi
done
else
for rp in "$repo_dir"/*/; do
[ -d "$rp" ] || continue
name="$(basename "$rp")"
up="$user_dir/$name"
[ -d "$up" ] || continue
[ "$rp" -ef "$up" ] && continue # symlinked dir - cannot drift
# Only compare when BOTH have a SKILL.md; otherwise not comparable
# (script-only / *.md-only skills) - skip rather than miscount.
if [ -f "$rp/SKILL.md" ] && [ -f "$up/SKILL.md" ]; then
if same_content "$rp/SKILL.md" "$up/SKILL.md"; then
dup_same=$((dup_same+1))
else
dup_div=$((dup_div+1))
emit "dup.skill.$name" duplicates WARN \
"skill '$name' is DIVERGENT: repo and ~/.claude SKILL.md differ" \
"Reconcile ~/.claude/skills/$name with the repo copy (or remove the user-level one)"
fi
fi
done
fi
if [ "$dup_div" -eq 0 ] && [ "$dup_same" -gt 0 ]; then
emit "dup.$kind" duplicates INFO \
"$dup_same $kind exist in BOTH repo and ~/.claude (identical now, but a redundant copy that can drift)" \
"Consider a single source of truth for $kind to prevent future divergence"
elif [ "$dup_div" -eq 0 ] && [ "$dup_same" -eq 0 ]; then
emit "dup.$kind" duplicates PASS "no duplicate $kind across roots"
fi
done
}
# ---------------------------------------------------------------------------
# CHECK: rogue memories that contradict settings/identity.
# Deterministic core only: index integrity + a conservative, manifest-declared
# set of contradiction patterns evaluated against this machine's identity. The
# SEMANTIC contradiction pass (reasoning over all memories vs identity/settings)
# is a judgment task and is delegated to the model in SKILL.md, not grep.
# ---------------------------------------------------------------------------
check_memory() {
local mdir="$REPO_ROOT/.claude/memory" idx="$REPO_ROOT/.claude/memory/MEMORY.md"
if [ ! -d "$mdir" ]; then
emit memory.dir memory WARN "no .claude/memory directory" "Expected the shared memory store; restore via /sync"
return
fi
if [ ! -f "$idx" ]; then
emit memory.index memory WARN "MEMORY.md index missing" "Create .claude/memory/MEMORY.md (the loaded index)"
else
# orphan detection: every *.md (except MEMORY.md) should be referenced in the index
local f base orphans=0
for f in "$mdir"/*.md; do
[ -f "$f" ] || continue
base="$(basename "$f")"
[ "$base" = "MEMORY.md" ] && continue
if ! grep -qF "$base" "$idx" 2>/dev/null; then
orphans=$((orphans+1))
fi
done
if [ "$orphans" -gt 0 ]; then
emit memory.orphans memory WARN "$orphans memory file(s) not referenced in MEMORY.md (orphaned)" \
"Run /memory-dream or add the missing index lines"
else
emit memory.index memory PASS "MEMORY.md index present; no orphaned memory files"
fi
fi
# Manifest-declared contradiction patterns. Each entry:
# { when_field, when_equals, grep, why } - only evaluated when this
# machine's identity.<when_field> == when_equals, so a pattern fires only
# where it is actually a contradiction (e.g. prescribing python3 on a `py` box).
# NB: fields are read via @tsv, so when_equals/grep MUST NOT contain tab chars.
local has; has="$(jq -r '(.memory.contradiction_patterns // []) | length' "$MANIFEST" 2>/dev/null)"
if [ "${has:-0}" -gt 0 ] 2>/dev/null; then
local wf we gx why hits
while IFS=$'\t' read -r wf we gx why; do
[ -n "$wf" ] || continue
if [ "$(idfield ".$wf")" = "$we" ]; then
hits="$(grep -rliE "$gx" "$mdir" 2>/dev/null | grep -vF 'MEMORY.md' | head -5 | tr '\n' ' ')"
if [ -n "$hits" ]; then
emit "memory.contradiction.$wf" memory WARN \
"memory may contradict identity.$wf=$we ($why): $hits" \
"Review the listed memory file(s); correct or delete if they prescribe the wrong behaviour for this machine"
fi
fi
done < <(jq -r '(.memory.contradiction_patterns // [])[] | [.when_field, .when_equals, .grep, .why] | @tsv' "$MANIFEST")
fi
}
# ---------------------------------------------------------------------------
# Build the census JSON from accumulated results
# ---------------------------------------------------------------------------
build_census() {
local fails warns grade
fails="$(jq -s '[.[]|select(.status=="FAIL")]|length' "$RESULTS_FILE")"
warns="$(jq -s '[.[]|select(.status=="WARN")]|length' "$RESULTS_FILE")"
if [ "$fails" -gt 0 ]; then grade="RED"; elif [ "$warns" -gt 0 ]; then grade="AMBER"; else grade="GREEN"; fi
jq -s \
--arg host "$HOST" --arg session "$SESSION" --arg platform "$PLATFORM" --arg arch "$ARCH" \
--arg grade "$grade" --arg ts "$RUN_TS" \
--arg mver "$(jq -r '.schema_version' "$MANIFEST")" \
'{
host:$host, session:$session, platform:$platform, arch:$arch,
grade:$grade, generated_at:$ts, manifest_version:$mver,
summary: { pass:([.[]|select(.status=="PASS")]|length),
warn:([.[]|select(.status=="WARN")]|length),
fail:([.[]|select(.status=="FAIL")]|length),
info:([.[]|select(.status=="INFO")]|length) },
results: .
}' "$RESULTS_FILE"
}
# ---------------------------------------------------------------------------
# Human report
# ---------------------------------------------------------------------------
print_report() {
local census="$1" grade
grade="$(echo "$census" | jq -r .grade)"
echo ""
echo "============================================================"
echo " ClaudeTools self-check - $HOST ($PLATFORM/$ARCH)"
echo " Grade: $grade $(echo "$census" | jq -r '.summary | "PASS \(.pass) WARN \(.warn) FAIL \(.fail) INFO \(.info)"')"
echo " Manifest: $(echo "$census" | jq -r .manifest_version) (provisional) $RUN_TS"
echo "============================================================"
# FAIL then WARN then INFO; PASS summarized per category
echo "$census" | jq -r '
def mark(s): if s=="FAIL" then "[FAIL]" elif s=="WARN" then "[WARN]"
elif s=="INFO" then "[INFO]" elif s=="SKIP" then "[SKIP]" else "[ OK ]" end;
(.results | map(select(.status=="FAIL"))) as $f
| (.results | map(select(.status=="WARN"))) as $w
| (.results | map(select(.status=="INFO"))) as $i
| (if ($f|length)>0 then "\nFAILURES:" else empty end),
($f[] | " [FAIL] \(.category)/\(.id): \(.detail)" + (if .fix!="" then "\n fix: \(.fix)" else "" end)),
(if ($w|length)>0 then "\nWARNINGS:" else empty end),
($w[] | " [WARN] \(.category)/\(.id): \(.detail)" + (if .fix!="" then "\n fix: \(.fix)" else "" end)),
(if ($i|length)>0 then "\nINFO / capability:" else empty end),
($i[] | " [INFO] \(.detail)")
'
# per-category PASS counts
echo ""
echo "PASS by category:"
echo "$census" | jq -r '.results | map(select(.status=="PASS")) | group_by(.category)[] | " \(.[0].category): \(length) ok"'
echo "============================================================"
}
# ---------------------------------------------------------------------------
# Publish census to coord API.
# The coord API uses the PATH-PARAM form: PUT /api/coord/components/{pk}/{comp}
# with a body of {state, version, notes, updated_by} (the body form 405s).
# The component segment must be slash-free (a slash 404s, even URL-encoded), so
# the per-machine component is "selfcheck_<host>" (NOT "selfcheck/<host>").
# ---------------------------------------------------------------------------
COMPONENT="selfcheck_$HOST"
publish_census() {
local census="$1" grade compact body path
grade="$(echo "$census" | jq -r .grade)"
compact="$(echo "$census" | jq -c .)"
path="/api/coord/components/claudetools/$COMPONENT"
body="$(jq -nc --arg state "$grade" \
--arg ver "$(jq -r '.schema_version' "$MANIFEST")" --arg notes "$compact" --arg by "$SESSION" \
'{state:$state, version:$ver, notes:$notes, updated_by:$by}')"
if curl -s -m 8 -X PUT "$API$path" -H "Content-Type: application/json" -d "$body" >/dev/null 2>&1; then
echo "[OK] Published census to coord: component $COMPONENT = $grade"
else
# softfail per coordination protocol - queue the SAME path/body so a
# later /sync drain replays a request that actually works.
local q="$REPO_ROOT/.claude/coord-queue.jsonl"
jq -nc --arg path "$path" --argjson b "$body" --arg ts "$RUN_TS" \
'{ts:$ts, method:"PUT", path:$path, body:$b}' >> "$q" 2>/dev/null
echo "[WARN] coord unreachable; census queued to .claude/coord-queue.jsonl"
fi
}
# ---------------------------------------------------------------------------
# Subcommand: fanout - request all instances to run /self-check --publish
# ---------------------------------------------------------------------------
do_fanout() {
local subj body payload
subj="[self-check] Fleet census + self-remediation request"
body="On THIS machine: (1) run /self-check ; (2) apply the suggested fix commands it prints for any FAIL/WARN - fix your OWN machine, locally, with your operator present (nobody fixes you remotely) ; (3) re-run /self-check to confirm GREEN ; (4) run /self-check --publish to report your census (component selfcheck_<host>) to coord. The check is read-only; only --publish writes (your census only). Requested by $SESSION at $RUN_TS."
payload="$(jq -nc --arg from "$SESSION" --arg subj "$subj" --arg body "$body" \
'{from_session:$from, to_session:"ALL_SESSIONS", project_key:"claudetools", subject:$subj, body:$body}')"
if curl -s -m 8 -X POST "$API/api/coord/messages" -H "Content-Type: application/json" -d "$payload" >/dev/null 2>&1; then
echo "[OK] Broadcast census request to ALL_SESSIONS."
else
echo "[ERROR] Failed to broadcast (coord unreachable)." >&2
exit 1
fi
}
# ---------------------------------------------------------------------------
# Subcommand: aggregate - read all published censuses, build fleet view
# ---------------------------------------------------------------------------
do_aggregate() {
local comps
comps="$(curl -s -m 8 "$API/api/coord/components?project_key=claudetools" 2>/dev/null)"
if [ -z "$comps" ]; then echo "[ERROR] coord unreachable." >&2; exit 1; fi
# The coord API returns {states:[...], total:N}; each row's grade is .state and
# the full census JSON is in .notes. Keep selfcheck_* rows with parseable notes.
# (.components / bare-array kept as defensive fallbacks.)
local censuses
censuses="$(echo "$comps" | jq -c '
( .states? // .components? // (if type=="array" then . else [] end) ) as $rows
| ($rows // [])
| map(select((.component? // "") | startswith("selfcheck")))
| map(.notes | try fromjson catch empty)
' 2>/dev/null)"
local n; n="$(echo "$censuses" | jq 'length' 2>/dev/null || echo 0)"
if [ "${n:-0}" -eq 0 ]; then
echo "No published censuses found yet. Run 'self-check.sh fanout', then have each machine run /self-check --publish."
return
fi
echo "============================================================"
echo " Fleet census: $n machine(s) reporting"
echo "============================================================"
echo "$censuses" | jq -r '.[] | " \(.grade)\t\(.host)\t\(.platform)/\(.arch)\tP\(.summary.pass) W\(.summary.warn) F\(.summary.fail)\t\(.generated_at)"' | column -t -s$'\t' 2>/dev/null \
|| echo "$censuses" | jq -r '.[] | " \(.grade) \(.host) \(.platform)/\(.arch) P\(.summary.pass) W\(.summary.warn) F\(.summary.fail)"'
echo ""
echo "Proposed baseline (intersection = required everywhere; symmetric diff = capability-gated):"
# Tools present on every machine vs only some, derived from tool.* PASS results.
echo "$censuses" | jq -r '
[ .[] | { host:.host, tools:( .results | map(select((.id|startswith("tool."))) | select(.status=="PASS") | (.id|sub("^tool.";""))) ) } ] as $m
| ($m|length) as $count
| ([ $m[].tools[] ] | unique) as $all
| " tools on ALL \($count): " + ( [ $all[] | . as $t | select( ([ $m[] | select(.tools|index($t)) ]|length) == $count ) ] | join(", ") ),
" tools on SOME only: " + ( [ $all[] | . as $t | select( ([ $m[] | select(.tools|index($t)) ]|length) < $count ) ] | join(", ") )
' 2>/dev/null
echo ""
echo "Machines that must self-remediate (RED/AMBER) - each fixes ITSELF, then re-runs + re-publishes:"
local needfix
needfix="$(echo "$censuses" | jq -r '
.[] | select(.grade!="GREEN")
| " \(.host) [\(.grade)] should run, in order:\n"
+ ( [ .results[] | select(.status=="FAIL" or .status=="WARN") | select(.fix!="")
| " - \(.fix)" ] | join("\n") )
+ "\n then: /self-check --publish"
' 2>/dev/null)"
if [ -n "$needfix" ]; then
echo "$needfix"
else
echo " (none - whole fleet is GREEN)"
fi
echo "============================================================"
echo "We do NOT fix remote machines. Relay each machine's fix list to its operator;"
echo "they self-remediate locally, re-run /self-check, and re-publish until GREEN."
echo "Once the fleet is reporting consistently, ratify baseline/manifest.json with Mike."
}
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
# RUN_TS is passed in by the caller (SKILL.md instructs a real UTC stamp);
# fall back to `date` if available so the script is runnable standalone.
RUN_TS="${SELFCHECK_TS:-$(date -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo unknown)}"
MODE="${1:-report}"
case "$MODE" in
fanout) do_fanout; exit 0 ;;
aggregate) do_aggregate; exit 0 ;;
esac
# run all checks
check_identity
check_tools
check_capability_tier
check_files
check_settings_hooks
check_git
check_skills_commands
check_duplicates
check_memory
check_vault
check_connectivity
CENSUS="$(build_census)"
case "$MODE" in
--json) echo "$CENSUS" ;;
--publish) print_report "$CENSUS"; publish_census "$CENSUS" ;;
report|*) print_report "$CENSUS" ;;
esac
# exit code reflects grade for scripting (0 GREEN, 1 AMBER, 2 RED)
GR="$(echo "$CENSUS" | jq -r .grade)"
case "$GR" in GREEN) exit 0 ;; AMBER) exit 1 ;; RED) exit 2 ;; *) exit 0 ;; esac