sync: auto-sync from GURU-5070 at 2026-06-05 17:57:59

Author: Mike Swanson Machine: GURU-5070 Timestamp: 2026-06-05 17:57:59
2026-06-05 17:58:07 -07:00
parent 2402566782
commit 7a7b4da75e
3 changed files with 145 additions and 5 deletions
--- a/.claude/scripts/sync.sh
+++ b/.claude/scripts/sync.sh
@@ -121,6 +121,111 @@ cd "$REPO_ROOT"

 echo -e "${GREEN}[OK]${NC} Working directory: $(pwd)"

+# --- Concurrency lock --------------------------------------------------------
+# WHY: multiple sync runs on ONE machine must NOT overlap. An interactive /sync
+# or /save can collide with the scheduled-task sync, or two concurrent Claude
+# sessions can each stage + commit + fetch + rebase + push and interleave their
+# git state — corrupting an in-progress rebase, orphaning commits, or pushing a
+# half-built tree. We serialize the whole claudetools critical section (Phase 1a
+# submodule update, staging, commit, fetch, rebase, push — and by extension the
+# vault phase) behind a single per-machine lock.
+#
+# PORTABILITY: `flock` is frequently ABSENT on Git Bash (MSYS2), so we can't
+# depend on it. An atomic `mkdir` is the lowest common denominator — it fails if
+# the directory already exists, atomically, on every platform we run on (Windows
+# Git Bash, macOS, Linux). The lock lives under .git/ (never tracked, so a blind
+# `git add -A` can't stage it) and is scoped to this repo.
+SYNC_LOCK_DIR="$REPO_ROOT/.git/claudetools-sync.lock"
+SYNC_LOCK_WAIT=120     # max seconds to wait for a held lock before skipping the run
+SYNC_LOCK_STALE=600    # seconds after which a held lock is treated as stale (10 min)
+SYNC_LOCK_OWNED=0      # becomes 1 only once THIS run owns the lock (gates release)
+
+# Idempotent release — only removes the lock if THIS process actually owns it
+# (stored PID == $$), so a "skipping this run" exit can never clobber the lock
+# held by the live sync we deferred to. Installed as an EXIT trap because the
+# script runs under `set -e`: the lock must be released on error exits too.
+# (There is no pre-existing EXIT trap in this script, so this adds a fresh one.)
+release_sync_lock() {
+    if [ "$SYNC_LOCK_OWNED" = "1" ] && [ -d "$SYNC_LOCK_DIR" ]; then
+        local owner_pid
+        owner_pid=$(cat "$SYNC_LOCK_DIR/owner.pid" 2>/dev/null || echo "")
+        if [ -z "$owner_pid" ] || [ "$owner_pid" = "$$" ]; then
+            rm -rf "$SYNC_LOCK_DIR" 2>/dev/null || true
+        fi
+        SYNC_LOCK_OWNED=0
+    fi
+}
+trap release_sync_lock EXIT INT TERM
+
+# Portable liveness check. `kill -0 <pid>` works on Git Bash (it maps to the
+# Windows process table), macOS, and Linux; guarded so a bad/empty PID is "dead".
+sync_pid_alive() {
+    local pid="$1"
+    [ -n "$pid" ] || return 1
+    kill -0 "$pid" 2>/dev/null
+}
+
+acquire_sync_lock() {
+    local waited=0 owner_pid owner_ts now mtime lock_age stale_aside
+    while true; do
+        if mkdir "$SYNC_LOCK_DIR" 2>/dev/null; then
+            SYNC_LOCK_OWNED=1
+            printf '%s' "$$" > "$SYNC_LOCK_DIR/owner.pid" 2>/dev/null || true
+            # PID + ISO timestamp inside the lock dir, for diagnostics.
+            {
+                printf 'pid=%s\n'     "$$"
+                printf 'iso=%s\n'     "$(date -u "+%Y-%m-%dT%H:%M:%SZ")"
+                printf 'machine=%s\n' "$MACHINE"
+            } > "$SYNC_LOCK_DIR/owner" 2>/dev/null || true
+            # Defense-in-depth: confirm we still own the dir we just created. If
+            # owner.pid isn't ours, drop ownership and re-evaluate (never fatal
+            # under set -e — comparison is cheap and the body just loops).
+            if [ "$(cat "$SYNC_LOCK_DIR/owner.pid" 2>/dev/null)" != "$$" ]; then
+                SYNC_LOCK_OWNED=0; continue
+            fi
+            return 0
+        fi
+
+        # mkdir failed -> the lock is held. Decide whether it's stale or live.
+        owner_pid=$(cat "$SYNC_LOCK_DIR/owner.pid" 2>/dev/null || echo "")
+        owner_ts=$(sed -n 's/^iso=//p' "$SYNC_LOCK_DIR/owner" 2>/dev/null | head -1)
+        [ -n "$owner_ts" ] || owner_ts="unknown"
+
+        # Stale if the dir is older than the threshold OR the owner PID is dead.
+        # `stat -c` is GNU/Git-Bash, `stat -f` is BSD/macOS; fall back to 0.
+        now=$(date +%s 2>/dev/null || echo 0)
+        mtime=$(stat -c %Y "$SYNC_LOCK_DIR" 2>/dev/null || stat -f %m "$SYNC_LOCK_DIR" 2>/dev/null || echo 0)
+        lock_age=$(( now - mtime ))
+        if { [ "$mtime" -gt 0 ] && [ "$lock_age" -ge "$SYNC_LOCK_STALE" ]; } \
+           || { [ -n "$owner_pid" ] && ! sync_pid_alive "$owner_pid"; }; then
+            echo -e "${YELLOW}[WARNING]${NC} removing stale sync lock (held by PID ${owner_pid:-?} since ${owner_ts}, age ${lock_age}s)"
+            # Atomically claim the right to clear the stale lock. Only ONE racer can rename
+            # the canonical dir aside (rename source vanishes after the first; the loser's mv
+            # fails and it re-evaluates next pass). The canonical lock name is thereafter only
+            # ever recreated by the atomic mkdir at the top, so a live freshly-acquired lock
+            # can never be rm'd out from under its owner.
+            stale_aside="${SYNC_LOCK_DIR}.stale.$$"
+            if mv "$SYNC_LOCK_DIR" "$stale_aside" 2>/dev/null; then
+                rm -rf "$stale_aside" 2>/dev/null || true
+            fi
+            continue   # retry mkdir immediately
+        fi
+
+        # Live lock. If we've waited the full budget, skip (a duplicate sync is
+        # harmless to drop — the next scheduled/interactive run catches up).
+        if [ "$waited" -ge "$SYNC_LOCK_WAIT" ]; then
+            echo -e "${YELLOW}[WARNING]${NC} another sync is in progress (held by PID ${owner_pid:-?} since ${owner_ts}); skipping this run"
+            exit 75   # EX_TEMPFAIL: deferred (another sync in progress), not a real success
+        fi
+        sleep 2
+        waited=$(( waited + 2 ))
+    done
+}
+
+acquire_sync_lock
+echo -e "${GREEN}[OK]${NC} Acquired sync lock ($SYNC_LOCK_DIR)"
+# --- end concurrency lock ----------------------------------------------------
+
 # Detect Python interpreter — read from identity.json first, fall back to detection
 PYTHON=""
 if [ -f ".claude/identity.json" ] && command -v jq >/dev/null 2>&1; then