#!/usr/bin/env bash # test-harness-guard.sh — false-positive / true-positive test matrix for harness-guard.sh. # # WHY: the guard is WARN-ONLY today; before it is promoted to FATAL (blocking) the # harness-optimization plan requires proof of ZERO false positives on legitimate content # plus reliable detection of the real footguns. This script is that proof, repeatable. # # It spins up a throwaway git repo, stages synthetic files, runs the REAL harness-guard.sh # inside it (the guard cd's to its repo root and inspects the staged blobs), and asserts # WARN / no-WARN per case. It also scans the actual tracked tree for content that the # guard's detection patterns would flag, to size the real-world false-positive blast radius. # # Read-only against the real repo (the synthetic staging happens in a temp repo under TMP). # Exit 0 = all cases passed; exit 1 = at least one mismatch (promotion NOT yet safe). set -uo pipefail REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" || { echo "[ERROR] not in a git repo"; exit 2; } GUARD="$REPO_ROOT/.claude/scripts/harness-guard.sh" [ -f "$GUARD" ] || { echo "[ERROR] guard not found: $GUARD"; exit 2; } TMP="$(mktemp -d 2>/dev/null || echo "${TMPDIR:-/tmp}/guardtest.$$")" mkdir -p "$TMP" cleanup() { rm -rf "$TMP" 2>/dev/null; } trap cleanup EXIT # --- isolated temp repo so we can stage synthetic content without touching the real tree git -C "$TMP" init -q git -C "$TMP" config user.name "guard-test" git -C "$TMP" config user.email "guard-test@local" mkdir -p "$TMP/.claude/harness" # so the guard's log path mkdir is a no-op PASS=0; FAIL=0 FAILED_CASES="" # run_case run_case() { local name="$1" expect="$2" file="$3" out rc warned # reset the temp index/worktree git -C "$TMP" reset -q --hard >/dev/null 2>&1 || true git -C "$TMP" rm -rq --cached . >/dev/null 2>&1 || true rm -f "$TMP"/*.* "$TMP"/* 2>/dev/null || true mkdir -p "$TMP/$(dirname "$file")" 2>/dev/null || true cat > "$TMP/$file" git -C "$TMP" add -A >/dev/null 2>&1 # run the REAL guard from inside the temp repo out="$( cd "$TMP" && bash "$GUARD" 2>&1 )"; rc=$? if printf '%s\n' "$out" | grep -q '\[harness-guard\]\[WARN\]'; then warned=1; else warned=0; fi local got; [ "$warned" = 1 ] && got="warn" || got="clean" if [ "$got" = "$expect" ]; then PASS=$((PASS+1)); printf ' [PASS] %-34s expected=%-5s got=%-5s\n' "$name" "$expect" "$got" else FAIL=$((FAIL+1)); FAILED_CASES="$FAILED_CASES $name" printf ' [FAIL] %-34s expected=%-5s got=%-5s\n' "$name" "$expect" "$got" printf ' guard said: %s\n' "$(printf '%s' "$out" | grep WARN | head -2 | tr '\n' '|')" fi } echo "============================================================" echo " harness-guard false-positive / true-positive matrix" echo " guard: $GUARD" echo "============================================================" echo "" echo "TRUE POSITIVES (must WARN):" run_case "real-conflict-hunk" warn "src/app.rs" <<'EOF' fn main() { <<<<<<< HEAD let x = 1; ======= let x = 2; >>>>>>> feature } EOF run_case "unencrypted-sops" warn "infra/secret.sops.yaml" <<'EOF' api_key: super-secret-plaintext password: hunter2 EOF run_case "private-key-openssh" warn "keys/id_ed25519" <<'EOF' -----BEGIN OPENSSH PRIVATE KEY----- b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAAB -----END OPENSSH PRIVATE KEY----- EOF run_case "private-key-rsa" warn "keys/id_rsa" <<'EOF' -----BEGIN RSA PRIVATE KEY----- MIIEpAIBAAKCAQEA... -----END RSA PRIVATE KEY----- EOF echo "" echo "FALSE-POSITIVE VECTORS (must stay CLEAN):" # markdown setext H1 underline (long run) — must stay clean run_case "markdown-setext-underline-long" clean "docs/title.md" <<'EOF' My Document Title ================= Body text here. EOF # the precise edge: a setext underline that is EXACTLY seven equals (git's conflict-middle # marker). The old standalone '=======$' rule false-positived here; the pair-required rule # must keep it clean (no open/close markers present). run_case "setext-underline-exactly-7" clean "docs/short.md" <<'EOF' Title X ======= body EOF # a horizontal divider of exactly seven equals in a comment — must stay clean run_case "divider-exactly-7-equals" clean "notes/changelog.md" <<'EOF' ## Release notes ======= - item one EOF # a doc that *mentions* a single conflict marker (a git tutorial) — no real hunk run_case "doc-mentions-open-marker" clean "docs/git-tutorial.md" <<'EOF' When git hits a conflict it inserts a line starting with `<<<<<<< HEAD`. You then edit the file to resolve it. (No closing marker in this doc.) EOF # already-encrypted sops file — has ENC[ / sops: markers, must NOT warn run_case "encrypted-sops" clean "infra/real.sops.yaml" <<'EOF' api_key: ENC[AES256_GCM,data:abc==,iv:xyz==,tag:q==,type:str] sops: kms: [] age: - recipient: age1xyz EOF # public key — guard targets PRIVATE keys only; a public key must not warn run_case "public-key-ssh" clean "keys/id_ed25519.pub" <<'EOF' ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIabc123 user@host -----BEGIN PUBLIC KEY----- MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE -----END PUBLIC KEY----- EOF # a .sops.yaml.example template (not a real vault file path) with placeholder text run_case "sops-example-template" clean "infra/secret.sops.yaml.example" <<'EOF' api_key: note: copy to secret.sops.yaml and encrypt with sops EOF # normal source with '=======' inside a comment banner (not its own 7-char line) run_case "comment-banner-equals" clean "src/lib.rs" <<'EOF' // ======= section: helpers ======= fn helper() {} EOF echo "" echo "REAL-CORPUS BLAST RADIUS:" # Old standalone rule surface (for context): exactly-7-equals lines that USED to false-positive. OLD_EQ="$(git -C "$REPO_ROOT" grep -lE '^=======$' 2>/dev/null | wc -l | tr -d '[:space:]')" # New rule surface: files with BOTH an open and a close marker = a real conflict (should be 0). OPEN_HITS="$(git -C "$REPO_ROOT" grep -lE '^<<<<<<< ' 2>/dev/null | sort)" CLOSE_HITS="$(git -C "$REPO_ROOT" grep -lE '^>>>>>>> ' 2>/dev/null | sort)" BOTH="$(comm -12 <(printf '%s\n' "$OPEN_HITS") <(printf '%s\n' "$CLOSE_HITS") | grep -c . )" echo " tracked files with a lone '^=======\$' line (OLD rule false-positive surface): $OLD_EQ" echo " tracked files with BOTH open+close markers (NEW rule = real conflicts): $BOTH" echo " -> NEW rule flags only genuine conflict hunks; lone dividers/underlines are clean." echo "" echo "============================================================" echo " RESULT: PASS $PASS FAIL $FAIL" [ -n "$FAILED_CASES" ] && echo " failed:$FAILED_CASES" echo "============================================================" [ "$FAIL" -eq 0 ] && exit 0 || exit 1