New /self-check skill: each machine probes its own ClaudeTools harness wiring (identity.json paths, required tooling, settings.json hooks, skill/command/script set, vault decrypt, coord/Gitea connectivity, Ollama capability tier) and grades RED/AMBER/GREEN against a checked-in provisional baseline manifest. - Capability-tier model: architectural/OS/hardware differences (e.g. no local Ollama) select a fallback ruleset instead of failing. - Duplicate detection: flags command/skill names that diverge between the repo and ~/.claude (the "same /cmd, different behaviour" cross-machine bug); CRLF-only diffs ignored. - Memory check: index + orphan detection, plus a model-driven semantic pass for memories that contradict identity/settings. - V1 is a census tool: --publish writes a per-machine census to coord (component selfcheck_<host>); fanout requests the fleet to self-check + self-remediate + re-publish; aggregate derives the proposed baseline. No machine ever fixes another. Reviewed twice by the Code Review Agent; three CRITICAL coord-API bugs and the CRLF false-WARN found and fixed, verified live against the coord API. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
747 lines
37 KiB
Bash
747 lines
37 KiB
Bash
#!/usr/bin/env bash
|
|
# self-check.sh - ClaudeTools harness self-diagnosis / fleet conformance probe.
|
|
#
|
|
# V1 is a CENSUS tool. Each machine probes its own harness wiring (tools,
|
|
# identity, hooks, skills, commands, scripts, connectivity, capability tier),
|
|
# grades what it can against the provisional baseline manifest, and can publish
|
|
# the result to the coord API so the fleet can be compared and the baseline
|
|
# refined from real data. See ../SKILL.md and ../baseline/README.md.
|
|
#
|
|
# Usage:
|
|
# self-check.sh Run checks, print a human report. (default)
|
|
# self-check.sh --json Emit the structured census JSON to stdout only.
|
|
# self-check.sh --publish Run checks, then PUT the census to coord (component selfcheck_<host>).
|
|
# self-check.sh fanout Broadcast a request to ALL_SESSIONS to run /self-check --publish.
|
|
# self-check.sh aggregate Read every machine's published census and print a fleet table
|
|
# plus a proposed-baseline (intersection/union) summary.
|
|
#
|
|
# Portable: bash 3.2+ (macOS), Git Bash (Windows), Linux. Deps: jq, curl.
|
|
# Read-only. It collects and reports; it changes nothing on the machine.
|
|
|
|
set -u
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Bootstrap: resolve repo root, identity, coord API, session id
|
|
# ---------------------------------------------------------------------------
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
SKILL_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
REPO_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
|
|
MANIFEST="$SKILL_DIR/baseline/manifest.json"
|
|
|
|
if ! command -v jq >/dev/null 2>&1; then
|
|
echo "[ERROR] jq is required and not found on PATH. Install jq, then re-run." >&2
|
|
exit 2
|
|
fi
|
|
# Some Windows jq builds (winget) emit CRLF line endings; a trailing \r corrupts
|
|
# every `for x in $(jq ...)` word and `read`-from-@tsv field. Strip \r from all
|
|
# jq output (it is insignificant JSON whitespace and never wanted in raw values).
|
|
jq() { command jq "$@" | tr -d '\r'; }
|
|
if ! command -v curl >/dev/null 2>&1; then
|
|
echo "[ERROR] curl is required and not found on PATH." >&2
|
|
exit 2
|
|
fi
|
|
if [ ! -f "$MANIFEST" ]; then
|
|
echo "[ERROR] Baseline manifest not found: $MANIFEST" >&2
|
|
exit 2
|
|
fi
|
|
|
|
# identity.json: prefer repo copy, then ~/.claude (mirrors check-messages.sh).
|
|
IDENTITY=""
|
|
for c in "$REPO_ROOT/.claude/identity.json" "$HOME/.claude/identity.json"; do
|
|
[ -f "$c" ] && { IDENTITY="$c"; break; }
|
|
done
|
|
|
|
idfield() { # dotted.path -> value or empty
|
|
[ -n "$IDENTITY" ] && jq -r "$1 // empty" "$IDENTITY" 2>/dev/null
|
|
}
|
|
|
|
HOSTNAME_RAW="$(hostname 2>/dev/null || echo unknown)"
|
|
HOST="${HOSTNAME_RAW%.local}"
|
|
SESSION="${HOST}/claude-main"
|
|
|
|
API="$(idfield '.coord_api')"
|
|
[ -z "$API" ] && API="http://172.16.3.30:8001"
|
|
|
|
PLATFORM="$(idfield '.platform')"
|
|
[ -z "$PLATFORM" ] && case "$(uname -s)" in
|
|
Darwin) PLATFORM="macos" ;; Linux) PLATFORM="linux" ;;
|
|
CYGWIN*|MINGW*|MSYS*) PLATFORM="windows" ;; *) PLATFORM="unknown" ;;
|
|
esac
|
|
ARCH="$(idfield '.architecture')"
|
|
[ -z "$ARCH" ] && ARCH="$(uname -m 2>/dev/null || echo unknown)"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Results accumulation. Each check appends one compact JSON object.
|
|
# status in {PASS, WARN, FAIL, SKIP, INFO}. Grade: any FAIL->RED, WARN->AMBER.
|
|
# ---------------------------------------------------------------------------
|
|
RESULTS_FILE="$(mktemp 2>/dev/null || echo "${TMPDIR:-/tmp}/selfcheck.$$")"
|
|
: > "$RESULTS_FILE"
|
|
trap 'rm -f "$RESULTS_FILE" 2>/dev/null' EXIT
|
|
|
|
emit() { # id category status detail fix
|
|
jq -nc --arg id "$1" --arg cat "$2" --arg st "$3" --arg detail "$4" --arg fix "${5:-}" \
|
|
'{id:$id,category:$cat,status:$st,detail:$detail,fix:$fix}' >> "$RESULTS_FILE"
|
|
}
|
|
|
|
reachable() { curl -s -o /dev/null -m 4 "$1" 2>/dev/null; } # exit 0 if HTTP responds
|
|
|
|
# Content-equal ignoring line endings: a repo LF copy and a ~/.claude CRLF copy
|
|
# are the SAME content (the cross-machine case this check polices), so compare
|
|
# with \r stripped rather than byte-for-byte (cmp would false-flag them).
|
|
same_content() { diff -q <(tr -d '\r' < "$1") <(tr -d '\r' < "$2") >/dev/null 2>&1; }
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: identity
|
|
# ---------------------------------------------------------------------------
|
|
check_identity() {
|
|
if [ -z "$IDENTITY" ]; then
|
|
emit identity.present identity FAIL "identity.json not found (.claude or ~/.claude)" \
|
|
"Run onboarding; create .claude/identity.json then 'bash .claude/scripts/migrate-identity.sh'"
|
|
return
|
|
fi
|
|
if ! jq -e . "$IDENTITY" >/dev/null 2>&1; then
|
|
emit identity.parse identity FAIL "identity.json is not valid JSON: $IDENTITY" "Fix the JSON syntax"
|
|
return
|
|
fi
|
|
emit identity.present identity PASS "identity.json present and valid: $IDENTITY"
|
|
|
|
local missing=""
|
|
for f in $(jq -r '.required_identity_fields[]' "$MANIFEST"); do
|
|
local v; v="$(jq -r ".$f // empty" "$IDENTITY" 2>/dev/null)"
|
|
[ -z "$v" ] && missing="$missing $f"
|
|
done
|
|
if [ -n "$missing" ]; then
|
|
emit identity.fields identity WARN "missing/empty identity fields:$missing" \
|
|
"bash .claude/scripts/migrate-identity.sh (populates machine-specific fields)"
|
|
else
|
|
emit identity.fields identity PASS "all required identity fields present"
|
|
fi
|
|
|
|
# --- path fields: identity.json is the map of WHERE things live on this box.
|
|
# It is foundational - every later check trusts claudetools_root / vault_path.
|
|
# Verify they resolve to real locations and that claudetools_root is in fact
|
|
# the repo we are running from (a stale clone path is a silent footgun).
|
|
norm() { # path -> lowercase, forward-slash, drive-letter, no trailing slash
|
|
local p="$1"
|
|
command -v cygpath >/dev/null 2>&1 && p="$(cygpath -m "$p" 2>/dev/null || echo "$p")"
|
|
printf '%s' "$p" | tr 'A-Z' 'a-z' | sed 's#\\#/#g; s#/\{1,\}$##'
|
|
}
|
|
local ctroot; ctroot="$(idfield '.claudetools_root')"
|
|
if [ -z "$ctroot" ]; then
|
|
emit identity.claudetools_root identity FAIL "identity.claudetools_root not set" \
|
|
"Set claudetools_root in identity.json to this repo's absolute path"
|
|
elif [ ! -d "$ctroot" ]; then
|
|
emit identity.claudetools_root identity FAIL "claudetools_root does not exist: $ctroot" \
|
|
"Fix claudetools_root in identity.json (machine moved/renamed the repo?)"
|
|
elif [ "$(norm "$ctroot")" != "$(norm "$REPO_ROOT")" ]; then
|
|
emit identity.claudetools_root identity WARN \
|
|
"claudetools_root ($ctroot) != running repo ($REPO_ROOT)" \
|
|
"Reconcile claudetools_root in identity.json with the repo you actually run from"
|
|
else
|
|
emit identity.claudetools_root identity PASS "claudetools_root resolves to this repo ($ctroot)"
|
|
fi
|
|
|
|
local vpath2; vpath2="$(idfield '.vault_path')"
|
|
if [ -z "$vpath2" ]; then
|
|
emit identity.vault_path identity FAIL "identity.vault_path not set (cannot locate the SOPS vault)" \
|
|
"Set vault_path in identity.json to the cloned vault repo path"
|
|
elif [ ! -d "$vpath2" ]; then
|
|
emit identity.vault_path identity FAIL "vault_path does not exist: $vpath2" \
|
|
"Clone the vault repo and set vault_path in identity.json"
|
|
else
|
|
emit identity.vault_path identity PASS "vault_path resolves ($vpath2)"
|
|
fi
|
|
|
|
# machine field vs actual hostname
|
|
local idmach; idmach="$(idfield '.machine')"
|
|
if [ -n "$idmach" ] && [ "$(echo "$idmach" | tr 'A-Z' 'a-z')" != "$(echo "$HOST" | tr 'A-Z' 'a-z')" ]; then
|
|
emit identity.hostname identity WARN "identity.machine='$idmach' != actual hostname '$HOST'" \
|
|
"Update .machine in identity.json (did you clone onto a new box?)"
|
|
else
|
|
emit identity.hostname identity PASS "identity.machine matches hostname ($HOST)"
|
|
fi
|
|
|
|
# git config vs identity
|
|
local gn ge idn ide
|
|
gn="$(git -C "$REPO_ROOT" config user.name 2>/dev/null)"
|
|
ge="$(git -C "$REPO_ROOT" config user.email 2>/dev/null)"
|
|
idn="$(idfield '.full_name')"; ide="$(idfield '.email')"
|
|
if [ -n "$idn" ] && [ "$gn" != "$idn" ]; then
|
|
emit identity.git_name identity WARN "git user.name='$gn' != identity.full_name='$idn'" \
|
|
"git config user.name \"$idn\""
|
|
else
|
|
emit identity.git_name identity PASS "git user.name matches identity ($gn)"
|
|
fi
|
|
if [ -n "$ide" ] && [ "$ge" != "$ide" ]; then
|
|
emit identity.git_email identity WARN "git user.email='$ge' != identity.email='$ide'" \
|
|
"git config user.email \"$ide\""
|
|
else
|
|
emit identity.git_email identity PASS "git user.email matches identity ($ge)"
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: tooling (required + capability-gated)
|
|
# ---------------------------------------------------------------------------
|
|
toolver() { # best-effort one-line version
|
|
"$1" --version 2>/dev/null | head -1 || true
|
|
}
|
|
check_tools() {
|
|
local n why
|
|
while IFS=$'\t' read -r n why; do
|
|
if command -v "$n" >/dev/null 2>&1; then
|
|
emit "tool.$n" tooling PASS "$n present ($(toolver "$n"))"
|
|
else
|
|
emit "tool.$n" tooling FAIL "$n MISSING (required: $why)" "Install $n and ensure it is on PATH"
|
|
fi
|
|
done < <(jq -r '.required_tools[] | [.name, .why] | @tsv' "$MANIFEST")
|
|
|
|
# python: any_of
|
|
local pyok="" pc
|
|
for pc in $(jq -r '.required_python.any_of[]' "$MANIFEST"); do
|
|
if command -v "$pc" >/dev/null 2>&1; then pyok="$pc"; break; fi
|
|
done
|
|
if [ -n "$pyok" ]; then
|
|
local declared; declared="$(idfield '.python.command')"
|
|
if [ -n "$declared" ] && ! command -v "$declared" >/dev/null 2>&1; then
|
|
emit tool.python tooling WARN "identity.python.command='$declared' not on PATH; '$pyok' is available" \
|
|
"Update .python.command in identity.json or re-run migrate-identity.sh"
|
|
else
|
|
emit tool.python tooling PASS "python available ($pyok; identity declares '${declared:-unset}')"
|
|
fi
|
|
else
|
|
emit tool.python tooling FAIL "no python interpreter found (tried py/python3/python)" "Install Python"
|
|
fi
|
|
|
|
# capability tools - presence only, never FAIL
|
|
local cn cap cwhy
|
|
while IFS=$'\t' read -r cn cap cwhy; do
|
|
if command -v "$cn" >/dev/null 2>&1; then
|
|
emit "cap.$cn" capability INFO "$cn present [$cap] ($(toolver "$cn"))"
|
|
else
|
|
emit "cap.$cn" capability INFO "$cn absent [$cap] - capability off ($cwhy)"
|
|
fi
|
|
done < <(jq -r '.capability_tools[] | [.name, .capability, .why] | @tsv' "$MANIFEST")
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: capability tier (ollama) + effective ruleset
|
|
# ---------------------------------------------------------------------------
|
|
check_capability_tier() {
|
|
local declared fb local_ok="" remote_ok="" tier rule
|
|
declared="$(idfield '.ollama.endpoint')"
|
|
fb="$(idfield '.ollama.fallback')"
|
|
|
|
reachable "http://localhost:11434/api/tags" && local_ok=1
|
|
[ -n "$fb" ] && reachable "${fb%/}/api/tags" && remote_ok=1
|
|
|
|
if [ -n "$local_ok" ]; then
|
|
tier="ollama_local"
|
|
elif [ -n "$remote_ok" ]; then
|
|
tier="ollama_remote"
|
|
else
|
|
tier="ollama_none"
|
|
fi
|
|
rule="$(jq -r ".capability_rules.$tier.tier0_engine" "$MANIFEST")"
|
|
|
|
# Does the resolved tier agree with what identity declares?
|
|
if [ "$tier" = "ollama_none" ]; then
|
|
emit captier.ollama capability WARN "Ollama tier = NONE (local + fallback both unreachable). Effective rule: $rule" \
|
|
"Confirm this machine is meant to run without Ollama; ensure Tier-0 work routes to haiku, not blocked"
|
|
else
|
|
local note=""
|
|
if [ "$tier" = "ollama_remote" ] && echo "$declared" | grep -q "localhost"; then
|
|
note=" (identity declares localhost but local is down; using fallback $fb)"
|
|
fi
|
|
emit captier.ollama capability PASS "Ollama tier = ${tier#ollama_}${note}. Effective Tier-0: $rule"
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: required scripts + hook files (exist + executable)
|
|
# ---------------------------------------------------------------------------
|
|
check_files() {
|
|
local rel p
|
|
for rel in $(jq -r '.required_scripts[], .required_hook_files[]' "$MANIFEST"); do
|
|
p="$REPO_ROOT/$rel"
|
|
if [ ! -f "$p" ]; then
|
|
emit "file.$rel" files FAIL "missing: $rel" "Restore via /sync (git pull from Gitea)"
|
|
elif [ ! -x "$p" ] && echo "$rel" | grep -qE '\.(sh|template)$'; then
|
|
emit "file.$rel" files WARN "present but not executable: $rel" "chmod +x \"$rel\""
|
|
else
|
|
emit "file.$rel" files PASS "present: $rel"
|
|
fi
|
|
done
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: settings.json hooks wired correctly
|
|
# ---------------------------------------------------------------------------
|
|
check_settings_hooks() {
|
|
local settings="$REPO_ROOT/.claude/settings.json"
|
|
if [ ! -f "$settings" ] || ! jq -e . "$settings" >/dev/null 2>&1; then
|
|
emit hooks.settings hooks FAIL "settings.json missing or invalid JSON" "Restore .claude/settings.json via /sync"
|
|
return
|
|
fi
|
|
local ev needle why found
|
|
# NB: omit .matcher from the TSV - an empty middle field collapses under tab
|
|
# IFS (tab is IFS-whitespace), shifting columns. We do not use the matcher here.
|
|
while IFS=$'\t' read -r ev needle why; do
|
|
# any hook command under this event containing the needle
|
|
found="$(jq -r --arg ev "$ev" --arg n "$needle" \
|
|
'(.hooks[$ev] // []) | [.[].hooks[]?.command // ""] | map(select(contains($n))) | length' \
|
|
"$settings" 2>/dev/null)"
|
|
if [ "${found:-0}" -gt 0 ] 2>/dev/null; then
|
|
emit "hook.$ev" hooks PASS "$ev hook wired ($needle)"
|
|
else
|
|
emit "hook.$ev" hooks FAIL "$ev hook NOT wired (expected command containing '$needle' - $why)" \
|
|
"Add the $ev hook to .claude/settings.json (see baseline manifest required_settings_hooks)"
|
|
fi
|
|
done < <(jq -r '.required_settings_hooks[] | [.event, .command_contains, .why] | @tsv' "$MANIFEST")
|
|
|
|
# current-mode file (created by UserPromptSubmit hook, but flag if absent)
|
|
if [ -f "$REPO_ROOT/.claude/current-mode" ]; then
|
|
emit hook.current-mode hooks PASS "current-mode present ($(tr -d '[:space:]' < "$REPO_ROOT/.claude/current-mode"))"
|
|
else
|
|
emit hook.current-mode hooks WARN "current-mode missing (auto-created on next prompt)" "echo general > .claude/current-mode"
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: git remote + post-commit hooks
|
|
# ---------------------------------------------------------------------------
|
|
check_git() {
|
|
local url want host_ip
|
|
url="$(git -C "$REPO_ROOT" remote get-url origin 2>/dev/null)"
|
|
want="$(jq -r '.git.remote_host_contains' "$MANIFEST")"
|
|
host_ip="$(jq -r '.git.remote_host_internal_ip' "$MANIFEST")"
|
|
if [ -z "$url" ]; then
|
|
emit git.remote git WARN "no 'origin' remote on $REPO_ROOT" "git remote add origin <gitea-url>"
|
|
elif echo "$url" | grep -qF "$want" || echo "$url" | grep -qF "$host_ip"; then
|
|
emit git.remote git PASS "origin -> $url"
|
|
else
|
|
emit git.remote git FAIL "origin does not point at ACG Gitea: $url" \
|
|
"git remote set-url origin http://<user>@$host_ip:3000/azcomputerguru/claudetools.git"
|
|
fi
|
|
|
|
if [ "$(jq -r '.git.post_commit_hook_expected' "$MANIFEST")" = "true" ]; then
|
|
if [ -f "$REPO_ROOT/.git/hooks/post-commit" ]; then
|
|
emit git.post_commit git PASS "main-repo post-commit hook installed"
|
|
else
|
|
emit git.post_commit git WARN "main-repo post-commit hook NOT installed (HOOKS.md mandates dev-alerts hook)" \
|
|
"cp .claude/hooks/post-commit.template .git/hooks/post-commit && chmod +x .git/hooks/post-commit"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: skills + commands conformance vs manifest
|
|
# ---------------------------------------------------------------------------
|
|
check_skills_commands() {
|
|
local name dir
|
|
# skills present
|
|
for name in $(jq -r '.skills[]' "$MANIFEST"); do
|
|
dir="$REPO_ROOT/.claude/skills/$name"
|
|
if [ -d "$dir" ]; then
|
|
if [ -f "$dir/SKILL.md" ] || ls "$dir"/*.md >/dev/null 2>&1 || [ -d "$dir/scripts" ]; then
|
|
emit "skill.$name" skills PASS "skill present: $name"
|
|
else
|
|
emit "skill.$name" skills WARN "skill dir present but looks empty: $name" "Restore skill contents via /sync"
|
|
fi
|
|
else
|
|
emit "skill.$name" skills FAIL "skill MISSING: $name" "Restore .claude/skills/$name via /sync"
|
|
fi
|
|
done
|
|
# extra skills not in manifest (drift to report, not fail in V1)
|
|
local known
|
|
known="|$(jq -r '.skills[]' "$MANIFEST" | tr '\n' '|')"
|
|
for dir in "$REPO_ROOT"/.claude/skills/*/; do
|
|
[ -d "$dir" ] || continue
|
|
name="$(basename "$dir")"
|
|
case "$known" in *"|$name|"*) ;; *) emit "skill.extra.$name" skills INFO "skill present but NOT in baseline: $name (census candidate)" ;; esac
|
|
done
|
|
|
|
# commands present
|
|
for name in $(jq -r '.commands[]' "$MANIFEST"); do
|
|
if [ -f "$REPO_ROOT/.claude/commands/$name.md" ]; then
|
|
emit "cmd.$name" commands PASS "command present: /$name"
|
|
else
|
|
emit "cmd.$name" commands FAIL "command MISSING: /$name" "Restore .claude/commands/$name.md via /sync"
|
|
fi
|
|
done
|
|
# extra commands
|
|
known="|$(jq -r '.commands[]' "$MANIFEST" | tr '\n' '|')"
|
|
for f in "$REPO_ROOT"/.claude/commands/*.md; do
|
|
[ -f "$f" ] || continue
|
|
name="$(basename "$f" .md)"
|
|
[ "$name" = "README" ] && continue
|
|
case "$known" in *"|$name|"*) ;; *) emit "cmd.extra.$name" commands INFO "command present but NOT in baseline: /$name (census candidate)" ;; esac
|
|
done
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: vault decrypt readiness
|
|
# ---------------------------------------------------------------------------
|
|
check_vault() {
|
|
local vpath; vpath="$(idfield '.vault_path')"
|
|
if [ -z "$vpath" ]; then
|
|
emit vault.path vault WARN "identity.vault_path not set" "Set vault_path in identity.json"
|
|
return
|
|
fi
|
|
if [ ! -d "$vpath" ]; then
|
|
emit vault.path vault FAIL "vault_path does not exist: $vpath" "Clone the vault repo and set vault_path"
|
|
return
|
|
fi
|
|
emit vault.path vault PASS "vault repo present: $vpath"
|
|
if ! command -v sops >/dev/null 2>&1 || ! command -v age >/dev/null 2>&1; then
|
|
emit vault.tools vault FAIL "sops/age missing - cannot decrypt vault" "Install sops and age"
|
|
return
|
|
fi
|
|
# Lightweight readiness: vault.sh list should enumerate entries without error.
|
|
if [ -x "$REPO_ROOT/.claude/scripts/vault.sh" ]; then
|
|
if bash "$REPO_ROOT/.claude/scripts/vault.sh" list >/dev/null 2>&1; then
|
|
emit vault.list vault PASS "vault.sh list succeeded (sops/age wired)"
|
|
else
|
|
emit vault.list vault WARN "vault.sh list failed - check age key + SOPS_AGE_KEY_FILE" \
|
|
"Verify age key at the SOPS recipient path; run: bash .claude/scripts/vault.sh list"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: connectivity
|
|
# ---------------------------------------------------------------------------
|
|
check_connectivity() {
|
|
local name url req
|
|
while IFS=$'\t' read -r name url req; do
|
|
if reachable "$url"; then
|
|
emit "net.$name" connectivity PASS "$name reachable ($url)"
|
|
elif [ "$req" = "true" ]; then
|
|
emit "net.$name" connectivity FAIL "$name UNREACHABLE ($url)" "Check VPN/Tailscale/network to 172.16.3.x"
|
|
else
|
|
emit "net.$name" connectivity WARN "$name unreachable ($url) - off-network is OK" ""
|
|
fi
|
|
done < <(jq -r '.connectivity[] | [.name, .url, (.required|tostring)] | @tsv' "$MANIFEST")
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: duplicate command/skill definitions across search roots.
|
|
# Claude Code resolves slash commands and skills from BOTH the repo
|
|
# (.claude/commands, .claude/skills) and the user profile (~/.claude/...). When
|
|
# the same name exists in both with DIFFERENT content, the harness may resolve a
|
|
# different one than you expect - the "same /cmd, different behaviour on the Mac"
|
|
# bug. Divergent = WARN; identical = INFO (redundant copy that WILL drift).
|
|
# ---------------------------------------------------------------------------
|
|
check_duplicates() {
|
|
local kind repo_dir user_dir
|
|
# commands: compare *.md files by content
|
|
for kind in commands skills; do
|
|
repo_dir="$REPO_ROOT/.claude/$kind"
|
|
user_dir="$HOME/.claude/$kind"
|
|
[ -d "$repo_dir" ] || continue
|
|
[ -d "$user_dir" ] || { emit "dup.$kind" duplicates PASS "no user-level ~/.claude/$kind (single source: repo)"; continue; }
|
|
|
|
local name rp up dup_div=0 dup_same=0
|
|
if [ "$kind" = "commands" ]; then
|
|
for rp in "$repo_dir"/*.md; do
|
|
[ -f "$rp" ] || continue
|
|
name="$(basename "$rp" .md)"
|
|
[ "$name" = "README" ] && continue
|
|
up="$user_dir/$name.md"
|
|
[ -f "$up" ] || continue
|
|
[ "$rp" -ef "$up" ] && continue # symlink to the same file - cannot drift
|
|
if same_content "$rp" "$up"; then
|
|
dup_same=$((dup_same+1))
|
|
else
|
|
dup_div=$((dup_div+1))
|
|
emit "dup.cmd.$name" duplicates WARN \
|
|
"/$name is DIVERGENT: repo and ~/.claude copies differ (harness may run the wrong one)" \
|
|
"Reconcile: diff \"$rp\" \"$up\" then make ~/.claude/commands/$name.md match the repo (or remove it)"
|
|
fi
|
|
done
|
|
else
|
|
for rp in "$repo_dir"/*/; do
|
|
[ -d "$rp" ] || continue
|
|
name="$(basename "$rp")"
|
|
up="$user_dir/$name"
|
|
[ -d "$up" ] || continue
|
|
[ "$rp" -ef "$up" ] && continue # symlinked dir - cannot drift
|
|
# Only compare when BOTH have a SKILL.md; otherwise not comparable
|
|
# (script-only / *.md-only skills) - skip rather than miscount.
|
|
if [ -f "$rp/SKILL.md" ] && [ -f "$up/SKILL.md" ]; then
|
|
if same_content "$rp/SKILL.md" "$up/SKILL.md"; then
|
|
dup_same=$((dup_same+1))
|
|
else
|
|
dup_div=$((dup_div+1))
|
|
emit "dup.skill.$name" duplicates WARN \
|
|
"skill '$name' is DIVERGENT: repo and ~/.claude SKILL.md differ" \
|
|
"Reconcile ~/.claude/skills/$name with the repo copy (or remove the user-level one)"
|
|
fi
|
|
fi
|
|
done
|
|
fi
|
|
if [ "$dup_div" -eq 0 ] && [ "$dup_same" -gt 0 ]; then
|
|
emit "dup.$kind" duplicates INFO \
|
|
"$dup_same $kind exist in BOTH repo and ~/.claude (identical now, but a redundant copy that can drift)" \
|
|
"Consider a single source of truth for $kind to prevent future divergence"
|
|
elif [ "$dup_div" -eq 0 ] && [ "$dup_same" -eq 0 ]; then
|
|
emit "dup.$kind" duplicates PASS "no duplicate $kind across roots"
|
|
fi
|
|
done
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CHECK: rogue memories that contradict settings/identity.
|
|
# Deterministic core only: index integrity + a conservative, manifest-declared
|
|
# set of contradiction patterns evaluated against this machine's identity. The
|
|
# SEMANTIC contradiction pass (reasoning over all memories vs identity/settings)
|
|
# is a judgment task and is delegated to the model in SKILL.md, not grep.
|
|
# ---------------------------------------------------------------------------
|
|
check_memory() {
|
|
local mdir="$REPO_ROOT/.claude/memory" idx="$REPO_ROOT/.claude/memory/MEMORY.md"
|
|
if [ ! -d "$mdir" ]; then
|
|
emit memory.dir memory WARN "no .claude/memory directory" "Expected the shared memory store; restore via /sync"
|
|
return
|
|
fi
|
|
if [ ! -f "$idx" ]; then
|
|
emit memory.index memory WARN "MEMORY.md index missing" "Create .claude/memory/MEMORY.md (the loaded index)"
|
|
else
|
|
# orphan detection: every *.md (except MEMORY.md) should be referenced in the index
|
|
local f base orphans=0
|
|
for f in "$mdir"/*.md; do
|
|
[ -f "$f" ] || continue
|
|
base="$(basename "$f")"
|
|
[ "$base" = "MEMORY.md" ] && continue
|
|
if ! grep -qF "$base" "$idx" 2>/dev/null; then
|
|
orphans=$((orphans+1))
|
|
fi
|
|
done
|
|
if [ "$orphans" -gt 0 ]; then
|
|
emit memory.orphans memory WARN "$orphans memory file(s) not referenced in MEMORY.md (orphaned)" \
|
|
"Run /memory-dream or add the missing index lines"
|
|
else
|
|
emit memory.index memory PASS "MEMORY.md index present; no orphaned memory files"
|
|
fi
|
|
fi
|
|
|
|
# Manifest-declared contradiction patterns. Each entry:
|
|
# { when_field, when_equals, grep, why } - only evaluated when this
|
|
# machine's identity.<when_field> == when_equals, so a pattern fires only
|
|
# where it is actually a contradiction (e.g. prescribing python3 on a `py` box).
|
|
# NB: fields are read via @tsv, so when_equals/grep MUST NOT contain tab chars.
|
|
local has; has="$(jq -r '(.memory.contradiction_patterns // []) | length' "$MANIFEST" 2>/dev/null)"
|
|
if [ "${has:-0}" -gt 0 ] 2>/dev/null; then
|
|
local wf we gx why hits
|
|
while IFS=$'\t' read -r wf we gx why; do
|
|
[ -n "$wf" ] || continue
|
|
if [ "$(idfield ".$wf")" = "$we" ]; then
|
|
hits="$(grep -rliE "$gx" "$mdir" 2>/dev/null | grep -vF 'MEMORY.md' | head -5 | tr '\n' ' ')"
|
|
if [ -n "$hits" ]; then
|
|
emit "memory.contradiction.$wf" memory WARN \
|
|
"memory may contradict identity.$wf=$we ($why): $hits" \
|
|
"Review the listed memory file(s); correct or delete if they prescribe the wrong behaviour for this machine"
|
|
fi
|
|
fi
|
|
done < <(jq -r '(.memory.contradiction_patterns // [])[] | [.when_field, .when_equals, .grep, .why] | @tsv' "$MANIFEST")
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Build the census JSON from accumulated results
|
|
# ---------------------------------------------------------------------------
|
|
build_census() {
|
|
local fails warns grade
|
|
fails="$(jq -s '[.[]|select(.status=="FAIL")]|length' "$RESULTS_FILE")"
|
|
warns="$(jq -s '[.[]|select(.status=="WARN")]|length' "$RESULTS_FILE")"
|
|
if [ "$fails" -gt 0 ]; then grade="RED"; elif [ "$warns" -gt 0 ]; then grade="AMBER"; else grade="GREEN"; fi
|
|
|
|
jq -s \
|
|
--arg host "$HOST" --arg session "$SESSION" --arg platform "$PLATFORM" --arg arch "$ARCH" \
|
|
--arg grade "$grade" --arg ts "$RUN_TS" \
|
|
--arg mver "$(jq -r '.schema_version' "$MANIFEST")" \
|
|
'{
|
|
host:$host, session:$session, platform:$platform, arch:$arch,
|
|
grade:$grade, generated_at:$ts, manifest_version:$mver,
|
|
summary: { pass:([.[]|select(.status=="PASS")]|length),
|
|
warn:([.[]|select(.status=="WARN")]|length),
|
|
fail:([.[]|select(.status=="FAIL")]|length),
|
|
info:([.[]|select(.status=="INFO")]|length) },
|
|
results: .
|
|
}' "$RESULTS_FILE"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Human report
|
|
# ---------------------------------------------------------------------------
|
|
print_report() {
|
|
local census="$1" grade
|
|
grade="$(echo "$census" | jq -r .grade)"
|
|
echo ""
|
|
echo "============================================================"
|
|
echo " ClaudeTools self-check - $HOST ($PLATFORM/$ARCH)"
|
|
echo " Grade: $grade $(echo "$census" | jq -r '.summary | "PASS \(.pass) WARN \(.warn) FAIL \(.fail) INFO \(.info)"')"
|
|
echo " Manifest: $(echo "$census" | jq -r .manifest_version) (provisional) $RUN_TS"
|
|
echo "============================================================"
|
|
# FAIL then WARN then INFO; PASS summarized per category
|
|
echo "$census" | jq -r '
|
|
def mark(s): if s=="FAIL" then "[FAIL]" elif s=="WARN" then "[WARN]"
|
|
elif s=="INFO" then "[INFO]" elif s=="SKIP" then "[SKIP]" else "[ OK ]" end;
|
|
(.results | map(select(.status=="FAIL"))) as $f
|
|
| (.results | map(select(.status=="WARN"))) as $w
|
|
| (.results | map(select(.status=="INFO"))) as $i
|
|
| (if ($f|length)>0 then "\nFAILURES:" else empty end),
|
|
($f[] | " [FAIL] \(.category)/\(.id): \(.detail)" + (if .fix!="" then "\n fix: \(.fix)" else "" end)),
|
|
(if ($w|length)>0 then "\nWARNINGS:" else empty end),
|
|
($w[] | " [WARN] \(.category)/\(.id): \(.detail)" + (if .fix!="" then "\n fix: \(.fix)" else "" end)),
|
|
(if ($i|length)>0 then "\nINFO / capability:" else empty end),
|
|
($i[] | " [INFO] \(.detail)")
|
|
'
|
|
# per-category PASS counts
|
|
echo ""
|
|
echo "PASS by category:"
|
|
echo "$census" | jq -r '.results | map(select(.status=="PASS")) | group_by(.category)[] | " \(.[0].category): \(length) ok"'
|
|
echo "============================================================"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Publish census to coord API.
|
|
# The coord API uses the PATH-PARAM form: PUT /api/coord/components/{pk}/{comp}
|
|
# with a body of {state, version, notes, updated_by} (the body form 405s).
|
|
# The component segment must be slash-free (a slash 404s, even URL-encoded), so
|
|
# the per-machine component is "selfcheck_<host>" (NOT "selfcheck/<host>").
|
|
# ---------------------------------------------------------------------------
|
|
COMPONENT="selfcheck_$HOST"
|
|
publish_census() {
|
|
local census="$1" grade compact body path
|
|
grade="$(echo "$census" | jq -r .grade)"
|
|
compact="$(echo "$census" | jq -c .)"
|
|
path="/api/coord/components/claudetools/$COMPONENT"
|
|
body="$(jq -nc --arg state "$grade" \
|
|
--arg ver "$(jq -r '.schema_version' "$MANIFEST")" --arg notes "$compact" --arg by "$SESSION" \
|
|
'{state:$state, version:$ver, notes:$notes, updated_by:$by}')"
|
|
if curl -s -m 8 -X PUT "$API$path" -H "Content-Type: application/json" -d "$body" >/dev/null 2>&1; then
|
|
echo "[OK] Published census to coord: component $COMPONENT = $grade"
|
|
else
|
|
# softfail per coordination protocol - queue the SAME path/body so a
|
|
# later /sync drain replays a request that actually works.
|
|
local q="$REPO_ROOT/.claude/coord-queue.jsonl"
|
|
jq -nc --arg path "$path" --argjson b "$body" --arg ts "$RUN_TS" \
|
|
'{ts:$ts, method:"PUT", path:$path, body:$b}' >> "$q" 2>/dev/null
|
|
echo "[WARN] coord unreachable; census queued to .claude/coord-queue.jsonl"
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Subcommand: fanout - request all instances to run /self-check --publish
|
|
# ---------------------------------------------------------------------------
|
|
do_fanout() {
|
|
local subj body payload
|
|
subj="[self-check] Fleet census + self-remediation request"
|
|
body="On THIS machine: (1) run /self-check ; (2) apply the suggested fix commands it prints for any FAIL/WARN - fix your OWN machine, locally, with your operator present (nobody fixes you remotely) ; (3) re-run /self-check to confirm GREEN ; (4) run /self-check --publish to report your census (component selfcheck_<host>) to coord. The check is read-only; only --publish writes (your census only). Requested by $SESSION at $RUN_TS."
|
|
payload="$(jq -nc --arg from "$SESSION" --arg subj "$subj" --arg body "$body" \
|
|
'{from_session:$from, to_session:"ALL_SESSIONS", project_key:"claudetools", subject:$subj, body:$body}')"
|
|
if curl -s -m 8 -X POST "$API/api/coord/messages" -H "Content-Type: application/json" -d "$payload" >/dev/null 2>&1; then
|
|
echo "[OK] Broadcast census request to ALL_SESSIONS."
|
|
else
|
|
echo "[ERROR] Failed to broadcast (coord unreachable)." >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Subcommand: aggregate - read all published censuses, build fleet view
|
|
# ---------------------------------------------------------------------------
|
|
do_aggregate() {
|
|
local comps
|
|
comps="$(curl -s -m 8 "$API/api/coord/components?project_key=claudetools" 2>/dev/null)"
|
|
if [ -z "$comps" ]; then echo "[ERROR] coord unreachable." >&2; exit 1; fi
|
|
# The coord API returns {states:[...], total:N}; each row's grade is .state and
|
|
# the full census JSON is in .notes. Keep selfcheck_* rows with parseable notes.
|
|
# (.components / bare-array kept as defensive fallbacks.)
|
|
local censuses
|
|
censuses="$(echo "$comps" | jq -c '
|
|
( .states? // .components? // (if type=="array" then . else [] end) ) as $rows
|
|
| ($rows // [])
|
|
| map(select((.component? // "") | startswith("selfcheck")))
|
|
| map(.notes | try fromjson catch empty)
|
|
' 2>/dev/null)"
|
|
local n; n="$(echo "$censuses" | jq 'length' 2>/dev/null || echo 0)"
|
|
if [ "${n:-0}" -eq 0 ]; then
|
|
echo "No published censuses found yet. Run 'self-check.sh fanout', then have each machine run /self-check --publish."
|
|
return
|
|
fi
|
|
echo "============================================================"
|
|
echo " Fleet census: $n machine(s) reporting"
|
|
echo "============================================================"
|
|
echo "$censuses" | jq -r '.[] | " \(.grade)\t\(.host)\t\(.platform)/\(.arch)\tP\(.summary.pass) W\(.summary.warn) F\(.summary.fail)\t\(.generated_at)"' | column -t -s$'\t' 2>/dev/null \
|
|
|| echo "$censuses" | jq -r '.[] | " \(.grade) \(.host) \(.platform)/\(.arch) P\(.summary.pass) W\(.summary.warn) F\(.summary.fail)"'
|
|
|
|
echo ""
|
|
echo "Proposed baseline (intersection = required everywhere; symmetric diff = capability-gated):"
|
|
# Tools present on every machine vs only some, derived from tool.* PASS results.
|
|
echo "$censuses" | jq -r '
|
|
[ .[] | { host:.host, tools:( .results | map(select((.id|startswith("tool."))) | select(.status=="PASS") | (.id|sub("^tool.";""))) ) } ] as $m
|
|
| ($m|length) as $count
|
|
| ([ $m[].tools[] ] | unique) as $all
|
|
| " tools on ALL \($count): " + ( [ $all[] | . as $t | select( ([ $m[] | select(.tools|index($t)) ]|length) == $count ) ] | join(", ") ),
|
|
" tools on SOME only: " + ( [ $all[] | . as $t | select( ([ $m[] | select(.tools|index($t)) ]|length) < $count ) ] | join(", ") )
|
|
' 2>/dev/null
|
|
echo ""
|
|
echo "Machines that must self-remediate (RED/AMBER) - each fixes ITSELF, then re-runs + re-publishes:"
|
|
local needfix
|
|
needfix="$(echo "$censuses" | jq -r '
|
|
.[] | select(.grade!="GREEN")
|
|
| " \(.host) [\(.grade)] should run, in order:\n"
|
|
+ ( [ .results[] | select(.status=="FAIL" or .status=="WARN") | select(.fix!="")
|
|
| " - \(.fix)" ] | join("\n") )
|
|
+ "\n then: /self-check --publish"
|
|
' 2>/dev/null)"
|
|
if [ -n "$needfix" ]; then
|
|
echo "$needfix"
|
|
else
|
|
echo " (none - whole fleet is GREEN)"
|
|
fi
|
|
echo "============================================================"
|
|
echo "We do NOT fix remote machines. Relay each machine's fix list to its operator;"
|
|
echo "they self-remediate locally, re-run /self-check, and re-publish until GREEN."
|
|
echo "Once the fleet is reporting consistently, ratify baseline/manifest.json with Mike."
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
# RUN_TS is passed in by the caller (SKILL.md instructs a real UTC stamp);
|
|
# fall back to `date` if available so the script is runnable standalone.
|
|
RUN_TS="${SELFCHECK_TS:-$(date -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo unknown)}"
|
|
|
|
MODE="${1:-report}"
|
|
case "$MODE" in
|
|
fanout) do_fanout; exit 0 ;;
|
|
aggregate) do_aggregate; exit 0 ;;
|
|
esac
|
|
|
|
# run all checks
|
|
check_identity
|
|
check_tools
|
|
check_capability_tier
|
|
check_files
|
|
check_settings_hooks
|
|
check_git
|
|
check_skills_commands
|
|
check_duplicates
|
|
check_memory
|
|
check_vault
|
|
check_connectivity
|
|
|
|
CENSUS="$(build_census)"
|
|
|
|
case "$MODE" in
|
|
--json) echo "$CENSUS" ;;
|
|
--publish) print_report "$CENSUS"; publish_census "$CENSUS" ;;
|
|
report|*) print_report "$CENSUS" ;;
|
|
esac
|
|
|
|
# exit code reflects grade for scripting (0 GREEN, 1 AMBER, 2 RED)
|
|
GR="$(echo "$CENSUS" | jq -r .grade)"
|
|
case "$GR" in GREEN) exit 0 ;; AMBER) exit 1 ;; RED) exit 2 ;; *) exit 0 ;; esac
|