feat(harness-guard): FATAL-promotion prerequisite — test matrix + pair-required conflict rule (VERSION 1.4.3)
Builds the false-positive/true-positive proof the plan requires before the guard can be promoted to blocking, and fixes the one false-positive it surfaced. - test-harness-guard.sh: 12-case matrix in a throwaway repo, runs the REAL guard, asserts WARN/clean for real conflicts/secrets/keys vs legit content (setext underlines, dividers, docs that mention a marker, encrypted sops, public keys, .example templates). - harness-guard.sh: conflict rule now requires a real hunk (BOTH ^<<<<<<< AND ^>>>>>>>), dropping the lone =======$ trigger that false-positived on a 7-char setext underline / divider. Identical true-positive power (git writes all three markers); FP surface -> 0. - /self-check: new harness.guard_selftest runs the matrix in an isolated temp repo (read-only vs the real tree) so guard correctness is continuously proven. Verified 12/12 pass, true positives intact, real-tree FP surface = 0. FATAL flip (todo f1c11d0d, on/after 2026-06-22) is now evidence-backed + one-step. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -30,8 +30,12 @@ mapfile -t STAGED < <(git diff --cached --name-only --diff-filter=ACM 2>/dev/nul
|
||||
for f in "${STAGED[@]}"; do
|
||||
[ -n "$f" ] || continue
|
||||
blob=$(git show ":$f" 2>/dev/null) || continue
|
||||
# 1. Conflict markers
|
||||
if printf '%s\n' "$blob" | grep -qE '^(<<<<<<< |=======$|>>>>>>> )'; then
|
||||
# 1. Conflict markers — require a REAL hunk: both an open (<<<<<<<) AND a close
|
||||
# (>>>>>>>) marker at line start. A lone '=======' line is a markdown setext
|
||||
# underline or a divider, not a conflict, so flagging it alone is a false positive
|
||||
# with no detection value (git always writes all three markers). Requiring the pair
|
||||
# eliminates that vector (verified by test-harness-guard.sh) before FATAL promotion.
|
||||
if printf '%s\n' "$blob" | grep -qE '^<<<<<<< ' && printf '%s\n' "$blob" | grep -qE '^>>>>>>> '; then
|
||||
warn "conflict markers in staged file: $f"; ISSUES=$((ISSUES + 1))
|
||||
fi
|
||||
# 2. Unencrypted SOPS vault file
|
||||
|
||||
174
.claude/scripts/test-harness-guard.sh
Normal file
174
.claude/scripts/test-harness-guard.sh
Normal file
@@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env bash
|
||||
# test-harness-guard.sh — false-positive / true-positive test matrix for harness-guard.sh.
|
||||
#
|
||||
# WHY: the guard is WARN-ONLY today; before it is promoted to FATAL (blocking) the
|
||||
# harness-optimization plan requires proof of ZERO false positives on legitimate content
|
||||
# plus reliable detection of the real footguns. This script is that proof, repeatable.
|
||||
#
|
||||
# It spins up a throwaway git repo, stages synthetic files, runs the REAL harness-guard.sh
|
||||
# inside it (the guard cd's to its repo root and inspects the staged blobs), and asserts
|
||||
# WARN / no-WARN per case. It also scans the actual tracked tree for content that the
|
||||
# guard's detection patterns would flag, to size the real-world false-positive blast radius.
|
||||
#
|
||||
# Read-only against the real repo (the synthetic staging happens in a temp repo under TMP).
|
||||
# Exit 0 = all cases passed; exit 1 = at least one mismatch (promotion NOT yet safe).
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)" || { echo "[ERROR] not in a git repo"; exit 2; }
|
||||
GUARD="$REPO_ROOT/.claude/scripts/harness-guard.sh"
|
||||
[ -f "$GUARD" ] || { echo "[ERROR] guard not found: $GUARD"; exit 2; }
|
||||
|
||||
TMP="$(mktemp -d 2>/dev/null || echo "${TMPDIR:-/tmp}/guardtest.$$")"
|
||||
mkdir -p "$TMP"
|
||||
cleanup() { rm -rf "$TMP" 2>/dev/null; }
|
||||
trap cleanup EXIT
|
||||
|
||||
# --- isolated temp repo so we can stage synthetic content without touching the real tree
|
||||
git -C "$TMP" init -q
|
||||
git -C "$TMP" config user.name "guard-test"
|
||||
git -C "$TMP" config user.email "guard-test@local"
|
||||
mkdir -p "$TMP/.claude/harness" # so the guard's log path mkdir is a no-op
|
||||
|
||||
PASS=0; FAIL=0
|
||||
FAILED_CASES=""
|
||||
|
||||
# run_case <name> <expect: warn|clean> <file> <heredoc-content-on-stdin>
|
||||
run_case() {
|
||||
local name="$1" expect="$2" file="$3" out rc warned
|
||||
# reset the temp index/worktree
|
||||
git -C "$TMP" reset -q --hard >/dev/null 2>&1 || true
|
||||
git -C "$TMP" rm -rq --cached . >/dev/null 2>&1 || true
|
||||
rm -f "$TMP"/*.* "$TMP"/* 2>/dev/null || true
|
||||
mkdir -p "$TMP/$(dirname "$file")" 2>/dev/null || true
|
||||
cat > "$TMP/$file"
|
||||
git -C "$TMP" add -A >/dev/null 2>&1
|
||||
# run the REAL guard from inside the temp repo
|
||||
out="$( cd "$TMP" && bash "$GUARD" 2>&1 )"; rc=$?
|
||||
if printf '%s\n' "$out" | grep -q '\[harness-guard\]\[WARN\]'; then warned=1; else warned=0; fi
|
||||
|
||||
local got; [ "$warned" = 1 ] && got="warn" || got="clean"
|
||||
if [ "$got" = "$expect" ]; then
|
||||
PASS=$((PASS+1)); printf ' [PASS] %-34s expected=%-5s got=%-5s\n' "$name" "$expect" "$got"
|
||||
else
|
||||
FAIL=$((FAIL+1)); FAILED_CASES="$FAILED_CASES $name"
|
||||
printf ' [FAIL] %-34s expected=%-5s got=%-5s\n' "$name" "$expect" "$got"
|
||||
printf ' guard said: %s\n' "$(printf '%s' "$out" | grep WARN | head -2 | tr '\n' '|')"
|
||||
fi
|
||||
}
|
||||
|
||||
echo "============================================================"
|
||||
echo " harness-guard false-positive / true-positive matrix"
|
||||
echo " guard: $GUARD"
|
||||
echo "============================================================"
|
||||
echo ""
|
||||
echo "TRUE POSITIVES (must WARN):"
|
||||
|
||||
run_case "real-conflict-hunk" warn "src/app.rs" <<'EOF'
|
||||
fn main() {
|
||||
<<<<<<< HEAD
|
||||
let x = 1;
|
||||
=======
|
||||
let x = 2;
|
||||
>>>>>>> feature
|
||||
}
|
||||
EOF
|
||||
|
||||
run_case "unencrypted-sops" warn "infra/secret.sops.yaml" <<'EOF'
|
||||
api_key: super-secret-plaintext
|
||||
password: hunter2
|
||||
EOF
|
||||
|
||||
run_case "private-key-openssh" warn "keys/id_ed25519" <<'EOF'
|
||||
-----BEGIN OPENSSH PRIVATE KEY-----
|
||||
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAAB
|
||||
-----END OPENSSH PRIVATE KEY-----
|
||||
EOF
|
||||
|
||||
run_case "private-key-rsa" warn "keys/id_rsa" <<'EOF'
|
||||
-----BEGIN RSA PRIVATE KEY-----
|
||||
MIIEpAIBAAKCAQEA...
|
||||
-----END RSA PRIVATE KEY-----
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
echo "FALSE-POSITIVE VECTORS (must stay CLEAN):"
|
||||
|
||||
# markdown setext H1 underline (long run) — must stay clean
|
||||
run_case "markdown-setext-underline-long" clean "docs/title.md" <<'EOF'
|
||||
My Document Title
|
||||
=================
|
||||
|
||||
Body text here.
|
||||
EOF
|
||||
|
||||
# the precise edge: a setext underline that is EXACTLY seven equals (git's conflict-middle
|
||||
# marker). The old standalone '=======$' rule false-positived here; the pair-required rule
|
||||
# must keep it clean (no open/close markers present).
|
||||
run_case "setext-underline-exactly-7" clean "docs/short.md" <<'EOF'
|
||||
Title X
|
||||
=======
|
||||
|
||||
body
|
||||
EOF
|
||||
|
||||
# a horizontal divider of exactly seven equals in a comment — must stay clean
|
||||
run_case "divider-exactly-7-equals" clean "notes/changelog.md" <<'EOF'
|
||||
## Release notes
|
||||
=======
|
||||
- item one
|
||||
EOF
|
||||
|
||||
# a doc that *mentions* a single conflict marker (a git tutorial) — no real hunk
|
||||
run_case "doc-mentions-open-marker" clean "docs/git-tutorial.md" <<'EOF'
|
||||
When git hits a conflict it inserts a line starting with `<<<<<<< HEAD`.
|
||||
You then edit the file to resolve it. (No closing marker in this doc.)
|
||||
EOF
|
||||
|
||||
# already-encrypted sops file — has ENC[ / sops: markers, must NOT warn
|
||||
run_case "encrypted-sops" clean "infra/real.sops.yaml" <<'EOF'
|
||||
api_key: ENC[AES256_GCM,data:abc==,iv:xyz==,tag:q==,type:str]
|
||||
sops:
|
||||
kms: []
|
||||
age:
|
||||
- recipient: age1xyz
|
||||
EOF
|
||||
|
||||
# public key — guard targets PRIVATE keys only; a public key must not warn
|
||||
run_case "public-key-ssh" clean "keys/id_ed25519.pub" <<'EOF'
|
||||
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIabc123 user@host
|
||||
-----BEGIN PUBLIC KEY-----
|
||||
MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE
|
||||
-----END PUBLIC KEY-----
|
||||
EOF
|
||||
|
||||
# a .sops.yaml.example template (not a real vault file path) with placeholder text
|
||||
run_case "sops-example-template" clean "infra/secret.sops.yaml.example" <<'EOF'
|
||||
api_key: <your-key-here>
|
||||
note: copy to secret.sops.yaml and encrypt with sops
|
||||
EOF
|
||||
|
||||
# normal source with '=======' inside a comment banner (not its own 7-char line)
|
||||
run_case "comment-banner-equals" clean "src/lib.rs" <<'EOF'
|
||||
// ======= section: helpers =======
|
||||
fn helper() {}
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
echo "REAL-CORPUS BLAST RADIUS:"
|
||||
# Old standalone rule surface (for context): exactly-7-equals lines that USED to false-positive.
|
||||
OLD_EQ="$(git -C "$REPO_ROOT" grep -lE '^=======$' 2>/dev/null | wc -l | tr -d '[:space:]')"
|
||||
# New rule surface: files with BOTH an open and a close marker = a real conflict (should be 0).
|
||||
OPEN_HITS="$(git -C "$REPO_ROOT" grep -lE '^<<<<<<< ' 2>/dev/null | sort)"
|
||||
CLOSE_HITS="$(git -C "$REPO_ROOT" grep -lE '^>>>>>>> ' 2>/dev/null | sort)"
|
||||
BOTH="$(comm -12 <(printf '%s\n' "$OPEN_HITS") <(printf '%s\n' "$CLOSE_HITS") | grep -c . )"
|
||||
echo " tracked files with a lone '^=======\$' line (OLD rule false-positive surface): $OLD_EQ"
|
||||
echo " tracked files with BOTH open+close markers (NEW rule = real conflicts): $BOTH"
|
||||
echo " -> NEW rule flags only genuine conflict hunks; lone dividers/underlines are clean."
|
||||
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo " RESULT: PASS $PASS FAIL $FAIL"
|
||||
[ -n "$FAILED_CASES" ] && echo " failed:$FAILED_CASES"
|
||||
echo "============================================================"
|
||||
[ "$FAIL" -eq 0 ] && exit 0 || exit 1
|
||||
Reference in New Issue
Block a user