feat(sync): serialize sync.sh with a per-machine lock; per-session log filenames
Multiple concurrent Claude sessions (and the scheduled-task sync) were stepping on each other's git state. sync.sh now takes an atomic mkdir lock in .git/ around the whole run (stage/commit/fetch/rebase/push + vault), exits 75 (EX_TEMPFAIL = deferred) on contention instead of racing, and reclaims stale/dead-owner locks with a re-verify-before-clear guard (closes two TOCTOU races caught in review). /save now mandates per-session-unique log filenames (never the bare YYYY-MM-DD-session.md). Docs updated for the lock + deferred-exit semantics. Note: git add -A is still the catch-all sweep; full per-session commit isolation and routing /scc + /checkpoint through the lock are follow-ups.
This commit is contained in:
@@ -166,7 +166,7 @@ sync_pid_alive() {
|
||||
}
|
||||
|
||||
acquire_sync_lock() {
|
||||
local waited=0 owner_pid owner_ts now mtime lock_age stale_aside
|
||||
local waited=0 owner_pid owner_ts now mtime lock_age stale_aside re_pid re_now re_mtime re_age
|
||||
while true; do
|
||||
if mkdir "$SYNC_LOCK_DIR" 2>/dev/null; then
|
||||
SYNC_LOCK_OWNED=1
|
||||
@@ -198,17 +198,26 @@ acquire_sync_lock() {
|
||||
lock_age=$(( now - mtime ))
|
||||
if { [ "$mtime" -gt 0 ] && [ "$lock_age" -ge "$SYNC_LOCK_STALE" ]; } \
|
||||
|| { [ -n "$owner_pid" ] && ! sync_pid_alive "$owner_pid"; }; then
|
||||
echo -e "${YELLOW}[WARNING]${NC} removing stale sync lock (held by PID ${owner_pid:-?} since ${owner_ts}, age ${lock_age}s)"
|
||||
# Atomically claim the right to clear the stale lock. Only ONE racer can rename
|
||||
# the canonical dir aside (rename source vanishes after the first; the loser's mv
|
||||
# fails and it re-evaluates next pass). The canonical lock name is thereafter only
|
||||
# ever recreated by the atomic mkdir at the top, so a live freshly-acquired lock
|
||||
# can never be rm'd out from under its owner.
|
||||
stale_aside="${SYNC_LOCK_DIR}.stale.$$"
|
||||
if mv "$SYNC_LOCK_DIR" "$stale_aside" 2>/dev/null; then
|
||||
rm -rf "$stale_aside" 2>/dev/null || true
|
||||
# Re-verify staleness IMMEDIATELY before clearing. Between the check
|
||||
# above and here, another racer may have already cleared the stale
|
||||
# lock and acquired a fresh, LIVE one. Re-read owner.pid + mtime NOW;
|
||||
# only rename-aside if it is STILL stale this instant. A freshly
|
||||
# acquired winner has a live PID and fresh mtime, so the loser falls
|
||||
# through to the live-lock wait path instead of stealing the lock.
|
||||
re_pid=$(cat "$SYNC_LOCK_DIR/owner.pid" 2>/dev/null || echo "")
|
||||
re_now=$(date +%s 2>/dev/null || echo 0)
|
||||
re_mtime=$(stat -c %Y "$SYNC_LOCK_DIR" 2>/dev/null || stat -f %m "$SYNC_LOCK_DIR" 2>/dev/null || echo 0)
|
||||
re_age=$(( re_now - re_mtime ))
|
||||
if { [ "$re_mtime" -gt 0 ] && [ "$re_age" -ge "$SYNC_LOCK_STALE" ]; } \
|
||||
|| { [ -n "$re_pid" ] && ! sync_pid_alive "$re_pid"; }; then
|
||||
echo -e "${YELLOW}[WARNING]${NC} removing stale sync lock (held by PID ${re_pid:-?} since ${owner_ts}, age ${re_age}s)"
|
||||
stale_aside="${SYNC_LOCK_DIR}.stale.$$"
|
||||
if mv "$SYNC_LOCK_DIR" "$stale_aside" 2>/dev/null; then
|
||||
rm -rf "$stale_aside" 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
continue # retry mkdir immediately
|
||||
sleep 1 # insurance: never tight-spin if clearing persistently fails
|
||||
continue
|
||||
fi
|
||||
|
||||
# Live lock. If we've waited the full budget, skip (a duplicate sync is
|
||||
|
||||
Reference in New Issue
Block a user