diff --git a/.claude/scripts/sync.sh b/.claude/scripts/sync.sh index ff0e173..a2e514e 100755 --- a/.claude/scripts/sync.sh +++ b/.claude/scripts/sync.sh @@ -21,12 +21,19 @@ NC='\033[0m' # content is never auto-deleted (use `git rm` for that, deliberately). purge_garbled_paths() { local rec st p removed=0 + # POSIX ERE (not PCRE): `grep -P` refuses to run under non-UTF-8/unibyte locales on + # some platforms (Git Bash printed "grep: -P supports only unibyte and UTF-8 locales", + # silently disabling this guard). ERE has no such restriction; LC_ALL=C makes the match + # byte-wise. Same byte set as before: control chars / backslash / colon, plus the MSYS2 + # Private-Use-Area substitutes (0xEE 0x80-0xBF = U+E0xx, 0xEF 0x80-0xA3 = U+F0xx) that + # stand in for : \ newline etc. in Windows path-as-filename cruft. + local garble_re=$'[\001-\037\\:]|\356[\200-\277]|\357[\200-\243]' while IFS= read -r -d '' rec; do st="${rec:0:2}" p="${rec:3}" [ -z "$p" ] && continue [ "$st" = "??" ] || continue # untracked only - if printf '%s' "$p" | LC_ALL=C grep -qaP '[\x00-\x1f\\:]|\xee[\x80-\xbf]|\xef[\x80-\xa3]'; then + if printf '%s' "$p" | LC_ALL=C grep -qaE "$garble_re"; then echo -e "${YELLOW}[WARNING]${NC} Removing garbled untracked path before staging: $(printf '%s' "$p" | cat -v)" rm -f -- "$p" 2>/dev/null || true removed=1 diff --git a/session-logs/2026-05-25-beast-chrome-fetch-and-identity-audit.md b/session-logs/2026-05-25-beast-chrome-fetch-and-identity-audit.md index 0180fb1..0a2a975 100644 --- a/session-logs/2026-05-25-beast-chrome-fetch-and-identity-audit.md +++ b/session-logs/2026-05-25-beast-chrome-fetch-and-identity-audit.md @@ -125,3 +125,22 @@ another machine has a hyphenated `git config user.name`. ("Web Research / Bot-Blocked Sites"). - Coord reply id: `ac1e3767-f085-4c83-8d74-fbd9cc821d63` (to GURU-5070/claude-main). - Extension id `fcoeoabgfenejglbffodgkkbkcdhcgfn`; native host `com.anthropic.claude_browser_extension`. + +--- + +## Update: 13:17 PT — harden sync.sh purge_garbled_paths (locale fix) + +During the prior save's sync, `purge_garbled_paths()` printed `grep: -P supports only unibyte and +UTF-8 locales` — its `grep -P` (PCRE) guard refuses to run on BEAST's Git Bash locale, so the +garbled-path protection was a silent no-op here. + +**Fix:** switched the detector from PCRE (`grep -qaP`) to POSIX ERE (`grep -qaE`), which has no +such locale restriction, keeping `LC_ALL=C` for byte-wise matching and the **exact same byte set** +(control chars / `\` / `:` plus the MSYS2 PUA substitutes `0xEE 0x80-0xBF` and `0xEF 0x80-0xA3`). +Pattern built via `$'...'`. No behavior change other than that it now actually runs. + +Verified on BEAST: `bash -n` clean; PUA-garbled name (U+F03A) detected; literal `:` detected; +normal repo paths not flagged; no `-P` locale error. Applies to all machines on next sync (sync.sh +is shared). + +- File: `.claude/scripts/sync.sh` — `purge_garbled_paths()` grep line + comment.