Add memory-dream skill + additive cross-machine memory sync

memory-dream: read-only memory lint/consolidation analyzer (index, backlinks, stale refs, dup clusters, profile drift); additive-only --apply-safe, all merges/deletes are proposals. sync-memory.sh: additive repo<->harness-profile union (no delete/overwrite, conflicts surfaced), wired to a SessionStart hook. Migrates the useful profile-only memories into the synced repo store. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 15:21:56 -07:00
parent a00069a020
commit 2a1ccfac73
24 changed files with 1875 additions and 0 deletions
--- a/.claude/skills/memory-dream/scripts/memory_dream.py
+++ b/.claude/skills/memory-dream/scripts/memory_dream.py
@@ -0,0 +1,903 @@
+#!/usr/bin/env python3
+"""
+memory_dream.py -- memory lint + consolidation analyzer for the ClaudeTools REPO
+memory store (.claude/memory/).
+
+ADDITIVE-ONLY by design. The default run is READ-ONLY and mutates nothing.
+The only mutating mode is --apply-safe, which performs ONLY additive,
+non-destructive actions:
+  * append missing index lines to MEMORY.md for orphan memory files
+  * copy profile-only memory files INTO the repo store (never overwriting)
+It NEVER deletes a file, NEVER removes an index line, NEVER overwrites differing
+content, and NEVER performs a proposed merge. Every destructive idea stays in
+the report as a PROPOSED action for a human to approve.
+
+Stdlib only. Python launcher on Windows fleet is `py`; also runs under
+python3/python.
+
+Usage:
+  py memory_dream.py                 # REPORT ONLY (default)
+  py memory_dream.py --apply-safe    # additive-only fixes + report
+  py memory_dream.py --no-file       # report to stdout only, skip _reports/ file
+  py memory_dream.py --report-file X # write report to an explicit path
+"""
+
+from __future__ import annotations
+
+import argparse
+import datetime
+import os
+import re
+import shutil
+import sys
+from pathlib import Path
+
+# Windows consoles default to cp1252; memory bodies contain Unicode (arrows,
+# em dashes). Force UTF-8 stdout/stderr with replacement so printing never
+# crashes regardless of the active code page.
+for _stream in (sys.stdout, sys.stderr):
+    try:
+        _stream.reconfigure(encoding="utf-8", errors="replace")
+    except Exception:
+        pass
+
+# --------------------------------------------------------------------------
+# Path resolution -- no hardcoded drive letters.
+# --------------------------------------------------------------------------
+
+STALE_MONTHS = 6  # project facts older than this (in "as of <date>") -> re-verify
+
+
+def _read_identity_root(repo_guess: Path) -> str | None:
+    """Best-effort read of claudetools_root from .claude/identity.json."""
+    ident = repo_guess / ".claude" / "identity.json"
+    if not ident.is_file():
+        return None
+    try:
+        import json
+
+        data = json.loads(ident.read_text(encoding="utf-8"))
+        root = data.get("claudetools_root")
+        if root and Path(root).is_dir():
+            return root
+    except Exception:
+        return None
+    return None
+
+
+def resolve_claudetools_root() -> Path:
+    """
+    Resolve CLAUDETOOLS_ROOT:
+      1. env CLAUDETOOLS_ROOT
+      2. .claude/identity.json claudetools_root (found by walking up from script)
+      3. derive from this script's location (.../.claude/skills/memory-dream/scripts/)
+    """
+    env_root = os.environ.get("CLAUDETOOLS_ROOT")
+    if env_root and Path(env_root).is_dir():
+        return Path(env_root).resolve()
+
+    # Walk up from this file looking for a .claude dir.
+    here = Path(__file__).resolve()
+    derived = None
+    for parent in here.parents:
+        if (parent / ".claude").is_dir():
+            derived = parent
+            break
+
+    if derived is not None:
+        ident_root = _read_identity_root(derived)
+        if ident_root:
+            return Path(ident_root).resolve()
+        return derived.resolve()
+
+    # Last resort: assume scripts/ -> memory-dream/ -> skills/ -> .claude/ -> ROOT
+    # (script is at ROOT/.claude/skills/memory-dream/scripts/memory_dream.py)
+    return here.parents[4].resolve()
+
+
+def profile_memory_dir(repo_root: Path) -> Path | None:
+    """
+    Derive the harness profile memory dir for this project.
+
+    Slug: take the absolute project path, replace every run of non-alphanumeric
+    chars with '-', then look under $HOME/.claude/projects/<slug>/memory/.
+
+    Prefers CLAUDE_PROJECT_DIR if set; falls back to repo_root.
+    Returns the dir if it exists, else None.
+    """
+    home = Path(os.environ.get("HOME") or os.path.expanduser("~"))
+    project_dir = os.environ.get("CLAUDE_PROJECT_DIR") or str(repo_root)
+    abspath = str(Path(project_dir).resolve())
+    projects_root = home / ".claude" / "projects"
+
+    # The single-dash collapse: replace every run of non-alphanumeric chars with
+    # a single '-'. This is the historical/POSIX-style derivation.
+    slug_single = re.sub(r"[^A-Za-z0-9]+", "-", abspath)
+
+    # The Claude Code harness maps a Windows drive colon to '--' (so
+    # "D:\\claudetools" -> "D--claudetools"), but the single-dash collapse above
+    # produces "D-claudetools". Reproduce the harness rule by doubling a leading
+    # "<drive>-" into "<drive>--".
+    slug_double = re.sub(r"^([A-Za-z])-", r"\1--", slug_single)
+
+    # Try the EXACT candidate slugs in priority order; use the first whose
+    # profile memory dir actually exists. The double-dash (harness) variant is
+    # primary; the single-dash collapse is the secondary exact candidate.
+    seen: set[str] = set()
+    for slug in (slug_double, slug_single):
+        if slug in seen:
+            continue
+        seen.add(slug)
+        base = projects_root / slug
+        for candidate in (base / "memory", base):
+            if candidate.is_dir():
+                # If the slug dir itself was matched (no nested memory/), use the
+                # conventional memory subdir under it.
+                return (base / "memory") if candidate == base else candidate
+
+    # ONLY if none of the exact candidates exist, fall back to a case-insensitive
+    # tail-scan of $HOME/.claude/projects/*/memory for a dir whose slug "looks
+    # like" this repo (tail match on the last path component). If MORE THAN ONE
+    # dir matches, do NOT guess -- report the ambiguity and skip.
+    if projects_root.is_dir():
+        tail = re.sub(r"[^A-Za-z0-9]+", "-", repo_root.name).lower()
+        matches: list[Path] = []
+        for child in sorted(projects_root.iterdir()):
+            if not child.is_dir():
+                continue
+            if child.name.lower().endswith(tail):
+                mem = child / "memory"
+                if mem.is_dir():
+                    matches.append(mem)
+        if len(matches) > 1:
+            names = ", ".join(str(m.parent.name) for m in matches)
+            print(
+                f"[WARNING] multiple profile dirs matched ({names}); "
+                "skipping profile drift analysis to avoid cross-project contamination"
+            )
+            return None
+        if len(matches) == 1:
+            return matches[0]
+    return None
+
+
+# --------------------------------------------------------------------------
+# Frontmatter / memory file parsing
+# --------------------------------------------------------------------------
+
+
+class Memory:
+    def __init__(self, path: Path):
+        self.path = path
+        self.filename = path.name
+        self.slug = path.stem
+        self.name: str | None = None
+        self.description: str | None = None
+        self.type: str | None = None
+        self.body: str = ""
+        self._parse()
+
+    def _parse(self) -> None:
+        text = self.path.read_text(encoding="utf-8", errors="replace")
+        lines = text.splitlines()
+        if not lines or lines[0].strip() != "---":
+            # No frontmatter; whole file is body.
+            self.body = text
+            return
+        # Find closing fence.
+        end = None
+        for i in range(1, len(lines)):
+            if lines[i].strip() == "---":
+                end = i
+                break
+        if end is None:
+            self.body = text
+            return
+        fm = lines[1:end]
+        self.body = "\n".join(lines[end + 1 :])
+        self._parse_frontmatter(fm)
+
+    def _parse_frontmatter(self, fm_lines: list[str]) -> None:
+        """
+        Tolerant YAML-ish parse. Handles:
+          name: X
+          description: X   (or '>-' folded block following)
+          type: X          (top-level)
+          metadata:
+            type: X        (nested)
+        """
+        i = 0
+        in_metadata = False
+        while i < len(fm_lines):
+            raw = fm_lines[i]
+            line = raw.rstrip("\n")
+            stripped = line.strip()
+            indent = len(line) - len(line.lstrip())
+
+            if not stripped:
+                i += 1
+                continue
+
+            if stripped == "metadata:":
+                in_metadata = True
+                i += 1
+                continue
+
+            # Detect leaving the metadata block (a top-level key reappears).
+            if in_metadata and indent == 0 and ":" in stripped:
+                in_metadata = False
+
+            m = re.match(r"^([A-Za-z_][\w\-]*):\s*(.*)$", stripped)
+            if not m:
+                i += 1
+                continue
+            key, val = m.group(1), m.group(2)
+
+            # Folded/literal block scalar -> capture following more-indented lines.
+            if val in (">-", ">", "|", "|-", "|+"):
+                block_lines = []
+                j = i + 1
+                base_indent = indent
+                while j < len(fm_lines):
+                    nxt = fm_lines[j]
+                    nxt_indent = len(nxt) - len(nxt.lstrip())
+                    if nxt.strip() == "" or nxt_indent > base_indent:
+                        block_lines.append(nxt.strip())
+                        j += 1
+                    else:
+                        break
+                val = " ".join(x for x in block_lines if x)
+                i = j
+            else:
+                val = val.strip().strip('"').strip("'")
+                i += 1
+
+            if key == "name" and not in_metadata:
+                self.name = val
+            elif key == "description":
+                self.description = val
+            elif key == "type":
+                # Both top-level and metadata.type land here.
+                self.type = (val or "").lower() or None
+            else:
+                continue
+
+
+# --------------------------------------------------------------------------
+# Index (MEMORY.md) parsing
+# --------------------------------------------------------------------------
+
+INDEX_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
+# Body backlinks like [[some-name]]
+BACKLINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
+# "as of <date>" style dated claims.
+DATE_RE = re.compile(
+    r"(?:as of|updated|corrected|lesson|fixed|live)\s+"
+    r"(\d{4}-\d{2}-\d{2})",
+    re.IGNORECASE,
+)
+ISO_DATE_RE = re.compile(r"\b(\d{4}-\d{2}-\d{2})\b")
+
+# Type -> index header. Index uses singular headers.
+TYPE_HEADER = {
+    "reference": "Reference",
+    "feedback": "Feedback",
+    "project": "Project",
+    "user": "Users",
+}
+
+
+def parse_index(index_path: Path):
+    """
+    Returns:
+      links: list of (title, target, lineno, raw_line)
+      headers: dict header-name -> lineno
+      lines: original file lines (no newline)
+    """
+    links = []
+    headers = {}
+    if not index_path.is_file():
+        return links, headers, []
+    text = index_path.read_text(encoding="utf-8", errors="replace")
+    lines = text.split("\n")
+    for idx, line in enumerate(lines):
+        hm = re.match(r"^##\s+(.+?)\s*$", line)
+        if hm:
+            headers[hm.group(1).strip()] = idx
+            continue
+        if line.lstrip().startswith("- "):
+            m = INDEX_LINK_RE.search(line)
+            if m:
+                links.append((m.group(1), m.group(2), idx, line))
+    return links, headers, lines
+
+
+# --------------------------------------------------------------------------
+# Referenced-artifact extraction (conservative)
+# --------------------------------------------------------------------------
+
+# Referenced-artifact extraction is intentionally CONSERVATIVE: it only inspects
+# backtick-wrapped spans (`...`) and only treats a span as a repo path when the
+# whole span is a single path-like token. Extensions are ordered longest-first
+# so `identity.json` is never truncated to `identity.js`. We do NOT scan bare
+# prose -- too many false positives.
+PATHISH_RE = re.compile(r"`([^`\n]+?)`")
+
+# Longest-first extension alternation, anchored to end-of-token, prevents the
+# json->js / yaml->yml style truncation bug.
+KNOWN_EXTS = (
+    "tsx", "json", "yaml", "toml", "service",
+    "py", "sh", "rs", "ts", "js", "md", "yml", "sql", "ps1",
+)
+EXT_RE = re.compile(r"\.(?:" + "|".join(KNOWN_EXTS) + r")$", re.IGNORECASE)
+
+# Vault-style secret paths live in the SEPARATE vault repo, not claudetools.
+VAULT_HINT_RE = re.compile(r"\.sops\.ya?ml$", re.IGNORECASE)
+
+# Tokens we never treat as repo paths.
+ABS_PREFIXES = ("/api/", "/home/", "/var/", "/opt/", "/etc/", "/tmp/",
+                "/proc/", "/dev/", "/data/", "/usr/")
+
+
+def looks_like_repo_path(token: str) -> bool:
+    token = token.strip()
+    if not token:
+        return False
+    # Reject anything with whitespace, glob/placeholder/url/colon characters --
+    # those are descriptions or templates, not concrete repo paths.
+    if any(c in token for c in (" ", "<", ">", "*", "?", ":", "|", "\\")):
+        return False
+    if token.startswith(("http://", "https://", "//", "git@", "vault:")):
+        return False
+    if token.startswith(ABS_PREFIXES):
+        return False  # server absolute paths, not repo-relative
+    # Vault secret refs belong to the vault repo -- not a staleness signal here.
+    if VAULT_HINT_RE.search(token):
+        return False
+    # Must end in a recognized extension (anchored, longest-first).
+    if not EXT_RE.search(token):
+        return False
+    # A real reference is either repo-relative-with-slash or a bare filename.
+    # Reject single-segment tokens that are clearly prose-y (no slash AND no
+    # underscore/dash) unless they look like a script filename.
+    has_slash = "/" in token
+    if not has_slash:
+        # bare filename: require it to look like an actual file (has a dot ext,
+        # already guaranteed) and contain a separator or be a known script ext.
+        return True
+    return True
+
+
+def extract_referenced_paths(body: str) -> list[str]:
+    found = set()
+    for m in PATHISH_RE.finditer(body):
+        span = m.group(1).strip()
+        # A backtick span counts only if the ENTIRE span is one token (a path).
+        # Spans with spaces are commands/prose -> skip (avoids `cmd args` noise).
+        if not span or " " in span:
+            continue
+        token = span.lstrip("./")
+        if looks_like_repo_path(token):
+            found.add(token)
+    return sorted(found)
+
+
+def repo_path_exists(repo_root: Path, token: str) -> bool:
+    token = token.lstrip("./")
+    # Try repo-relative.
+    if (repo_root / token).exists():
+        return True
+    # Bare filename -> search anywhere in repo (cheap, bounded).
+    if "/" not in token:
+        try:
+            return any(True for _ in repo_root.rglob(token))
+        except OSError:
+            return False
+    # Also try matching just the tail (last 2 segments) anywhere, since memories
+    # often cite paths relative to a subproject root.
+    parts = token.split("/")
+    if len(parts) >= 2:
+        tail = "/".join(parts[-2:])
+        try:
+            for p in repo_root.rglob(parts[-1]):
+                if str(p).replace("\\", "/").endswith(tail):
+                    return True
+        except OSError:
+            return False
+    return False
+
+
+# --------------------------------------------------------------------------
+# Similarity / duplicate clustering (token-overlap heuristic)
+# --------------------------------------------------------------------------
+
+STOPWORDS = {
+    "the", "a", "an", "and", "or", "to", "of", "in", "on", "for", "with",
+    "is", "are", "be", "not", "via", "use", "used", "uses", "no", "never",
+    "always", "only", "via", "from", "by", "at", "as", "it", "this", "that",
+    "when", "if", "then", "do", "don't", "we", "our", "you", "your",
+}
+
+
+def tokenize(text: str) -> set[str]:
+    toks = re.findall(r"[a-z0-9]+", (text or "").lower())
+    return {t for t in toks if t not in STOPWORDS and len(t) > 2}
+
+
+def jaccard(a: set[str], b: set[str]) -> float:
+    if not a or not b:
+        return 0.0
+    inter = len(a & b)
+    union = len(a | b)
+    return inter / union if union else 0.0
+
+
+def cluster_overlaps(mems: list[Memory], threshold: float = 0.34):
+    """
+    Within each type, find pairs with token-overlap >= threshold, then union
+    them into clusters. Returns list of (type, [filenames]) for clusters >1.
+    """
+    clusters_out = []
+    by_type: dict[str, list[Memory]] = {}
+    for m in mems:
+        by_type.setdefault(m.type or "untyped", []).append(m)
+
+    for typ, group in by_type.items():
+        # token signature per memory: name + description + slug words
+        sigs = {}
+        for m in group:
+            base = " ".join(
+                filter(None, [m.name, m.description, m.slug.replace("_", " ")])
+            )
+            sigs[m.filename] = tokenize(base)
+
+        # Also bias by shared slug prefix (e.g. feedback_syncro_*).
+        parent = {m.filename: m.filename for m in group}
+
+        def find(x):
+            while parent[x] != x:
+                parent[x] = parent[parent[x]]
+                x = parent[x]
+            return x
+
+        def union(x, y):
+            rx, ry = find(x), find(y)
+            if rx != ry:
+                parent[rx] = ry
+
+        files = [m.filename for m in group]
+        slug_prefix = {}
+        for m in group:
+            parts = m.slug.split("_")
+            slug_prefix[m.filename] = "_".join(parts[:2]) if len(parts) >= 2 else m.slug
+
+        for i in range(len(files)):
+            for j in range(i + 1, len(files)):
+                fi, fj = files[i], files[j]
+                sim = jaccard(sigs[fi], sigs[fj])
+                same_prefix = (
+                    slug_prefix[fi] == slug_prefix[fj]
+                    and len(slug_prefix[fi].split("_")) >= 2
+                )
+                if sim >= threshold or same_prefix:
+                    union(fi, fj)
+
+        groups: dict[str, list[str]] = {}
+        for f in files:
+            groups.setdefault(find(f), []).append(f)
+        for members in groups.values():
+            if len(members) > 1:
+                clusters_out.append((typ, sorted(members)))
+    return clusters_out
+
+
+# --------------------------------------------------------------------------
+# Stale dated facts
+# --------------------------------------------------------------------------
+
+
+def find_stale_dates(mem: Memory, today: datetime.date):
+    """Return list of (date_str, age_days) for dated claims older than STALE_MONTHS."""
+    hits = []
+    seen = set()
+    for rx in (DATE_RE, ISO_DATE_RE):
+        for m in rx.finditer(mem.body):
+            ds = m.group(1)
+            if ds in seen:
+                continue
+            seen.add(ds)
+            try:
+                d = datetime.date.fromisoformat(ds)
+            except ValueError:
+                continue
+            age = (today - d).days
+            if age > STALE_MONTHS * 30:
+                hits.append((ds, age))
+    return hits
+
+
+# --------------------------------------------------------------------------
+# Report
+# --------------------------------------------------------------------------
+
+
+class Report:
+    def __init__(self):
+        self.lines: list[str] = []
+
+    def add(self, s: str = ""):
+        self.lines.append(s)
+
+    def __str__(self):
+        return "\n".join(self.lines)
+
+
+def slugify_link_target(target: str) -> str:
+    return Path(target).stem
+
+
+def run(args) -> int:
+    repo_root = resolve_claudetools_root()
+    mem_dir = repo_root / ".claude" / "memory"
+    index_path = mem_dir / "MEMORY.md"
+
+    if not mem_dir.is_dir():
+        print(f"[ERROR] memory dir not found: {mem_dir}")
+        return 2
+
+    today = datetime.date.today()
+    rpt = Report()
+    rpt.add("# Memory Dream Report")
+    rpt.add(f"Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}")
+    rpt.add(f"Repo root: {repo_root}")
+    rpt.add(f"Memory store: {mem_dir}")
+    rpt.add(f"Mode: {'APPLY-SAFE (additive)' if args.apply_safe else 'REPORT-ONLY'}")
+    rpt.add("")
+
+    # Load memories.
+    mem_files = sorted(p for p in mem_dir.glob("*.md") if p.name != "MEMORY.md")
+    mems = [Memory(p) for p in mem_files]
+    mem_by_file = {m.filename: m for m in mems}
+    rpt.add(f"Loaded {len(mems)} memory files (excluding MEMORY.md).")
+    rpt.add("")
+
+    # ----- 1. INDEX RECONCILE -----
+    links, headers, index_lines = parse_index(index_path)
+    indexed_targets = {slugify_link_target(t): (title, t, ln)
+                       for (title, t, ln, _raw) in links}
+    rpt.add("## 1. INDEX RECONCILE")
+    rpt.add("")
+
+    orphans = []  # files with no index line
+    for m in mems:
+        if m.slug not in indexed_targets:
+            orphans.append(m)
+    rpt.add(f"### Orphan files (no index line): {len(orphans)}")
+    for m in orphans:
+        rpt.add(f"- [INFO] {m.filename}  (type={m.type or '?'})")
+    rpt.add("")
+
+    missing_targets = []  # index lines whose file is missing
+    for title, target, ln, _raw in links:
+        # Only consider links that look like local memory files.
+        tgt = target.strip()
+        if tgt.startswith(("http://", "https://")):
+            continue
+        resolved = (mem_dir / tgt).resolve()
+        if not resolved.is_file():
+            missing_targets.append((title, target, ln))
+    rpt.add(f"### Index lines pointing at missing files: {len(missing_targets)}")
+    for title, target, ln in missing_targets:
+        rpt.add(f"- [WARNING] line {ln + 1}: [{title}]({target}) -> file not found")
+    rpt.add("")
+
+    name_mismatches = []  # frontmatter name vs filename slug
+    for m in mems:
+        if m.name is None:
+            name_mismatches.append((m.filename, "(no name in frontmatter)"))
+            continue
+        # The convention is loose: name may be a title, not the slug. Only flag
+        # when name itself looks like a slug AND differs from the filename slug.
+        name_as_slug = re.sub(r"[^A-Za-z0-9]+", "_", m.name.strip().lower()).strip("_")
+        if re.fullmatch(r"[a-z0-9_]+", m.name.strip()) and m.name.strip() != m.slug:
+            name_mismatches.append((m.filename, f"name='{m.name}' != slug='{m.slug}'"))
+    rpt.add(f"### Frontmatter name vs filename signals: {len(name_mismatches)}")
+    for fn, note in name_mismatches:
+        rpt.add(f"- [INFO] {fn}: {note}")
+    rpt.add("")
+
+    # ----- 2. BACKLINKS -----
+    rpt.add("## 2. BACKLINKS ([[name]] references)")
+    rpt.add("")
+    known_slugs = {m.slug for m in mems}
+    broken_backlinks = []
+    for m in mems:
+        for bm in BACKLINK_RE.finditer(m.body):
+            ref = bm.group(1).strip()
+            ref_slug = slugify_link_target(ref)
+            if ref_slug not in known_slugs and ref not in known_slugs:
+                broken_backlinks.append((m.filename, ref))
+    rpt.add(f"### Broken backlinks: {len(broken_backlinks)}")
+    for fn, ref in broken_backlinks:
+        rpt.add(f"- [WARNING] {fn}: [[{ref}]] has no matching memory file")
+    if not broken_backlinks:
+        rpt.add("- [OK] no broken backlinks found")
+    rpt.add("")
+
+    # ----- 3. REFERENCED-ARTIFACT VALIDITY -----
+    rpt.add("## 3. REFERENCED-ARTIFACT VALIDITY (conservative; 'verify', not 'delete')")
+    rpt.add("")
+    artifact_flags = []
+    for m in mems:
+        for tok in extract_referenced_paths(m.body):
+            if not repo_path_exists(repo_root, tok):
+                artifact_flags.append((m.filename, tok))
+    rpt.add(f"### Referenced paths not found in repo: {len(artifact_flags)}")
+    for fn, tok in artifact_flags:
+        rpt.add(f"- [VERIFY] {fn}: `{tok}` not found under repo (may be server-side "
+                f"or renamed -- verify, do not auto-delete)")
+    if not artifact_flags:
+        rpt.add("- [OK] no clearly-stale repo paths detected")
+    rpt.add("")
+
+    # ----- 4. DUPLICATE / OVERLAP CLUSTERS -----
+    rpt.add("## 4. DUPLICATE / OVERLAP CLUSTERS (PROPOSED merges -- never auto-applied)")
+    rpt.add("")
+    clusters = cluster_overlaps(mems)
+    clusters.sort(key=lambda c: (-len(c[1]), c[0]))
+    rpt.add(f"### Candidate clusters: {len(clusters)}")
+    for typ, members in clusters:
+        rpt.add(f"- [{typ}] {len(members)} related memories:")
+        for f in members:
+            mm = mem_by_file.get(f)
+            desc = (mm.description or mm.name or "") if mm else ""
+            desc = desc[:90]
+            rpt.add(f"    - {f}  -- {desc}")
+    if not clusters:
+        rpt.add("- [OK] no overlap clusters above threshold")
+    rpt.add("")
+
+    # ----- 5. STALE DATED FACTS -----
+    rpt.add(f"## 5. STALE DATED FACTS (project-type, dated > {STALE_MONTHS} months)")
+    rpt.add("")
+    stale_hits = []
+    for m in mems:
+        if (m.type or "") != "project":
+            continue
+        hits = find_stale_dates(m, today)
+        if hits:
+            stale_hits.append((m.filename, hits))
+    rpt.add(f"### Project memories with stale dated claims: {len(stale_hits)}")
+    for fn, hits in stale_hits:
+        for ds, age in hits:
+            rpt.add(f"- [VERIFY] {fn}: dated {ds} (~{age} days old) -- re-verify")
+    if not stale_hits:
+        rpt.add("- [OK] no stale dated project facts")
+    rpt.add("")
+
+    # ----- 6. DRIFT vs PROFILE STORE -----
+    rpt.add("## 6. DRIFT vs HARNESS PROFILE STORE")
+    rpt.add("")
+    prof_dir = profile_memory_dir(repo_root)
+    profile_only = []
+    repo_only = []
+    conflicts = []
+    if prof_dir is None:
+        rpt.add("- [INFO] profile memory dir not found; skipping drift check.")
+    else:
+        rpt.add(f"Profile store: {prof_dir}")
+        rpt.add("")
+        prof_files = {p.name for p in prof_dir.glob("*.md") if p.name != "MEMORY.md"}
+        repo_files = {m.filename for m in mems}
+
+        for pf in sorted(prof_files - repo_files):
+            profile_only.append(pf)
+        for rf in sorted(repo_files - prof_files):
+            repo_only.append(rf)
+        for both in sorted(prof_files & repo_files):
+            a = (prof_dir / both).read_text(encoding="utf-8", errors="replace")
+            b = (mem_dir / both).read_text(encoding="utf-8", errors="replace")
+            if a != b:
+                conflicts.append(both)
+
+        rpt.add(f"### Profile-only (candidates to MIGRATE INTO repo): {len(profile_only)}")
+        for f in profile_only:
+            rpt.add(f"- [INFO] {f}")
+        rpt.add("")
+        rpt.add(f"### Repo-only (candidates to PUSH OUT to profile): {len(repo_only)}")
+        for f in repo_only:
+            rpt.add(f"- [INFO] {f}")
+        rpt.add("")
+        rpt.add(f"### Present in BOTH but differing (CONFLICT -- human review): "
+                f"{len(conflicts)}")
+        for f in conflicts:
+            rpt.add(f"- [WARNING] {f}: content differs between repo and profile")
+        rpt.add("")
+
+    # ----- APPLY-SAFE ACTIONS (additive-only) -----
+    actions_taken = []
+    if args.apply_safe:
+        rpt.add("## APPLY-SAFE ACTIONS PERFORMED (additive-only)")
+        rpt.add("")
+
+        # (a) Append missing index lines for orphan files.
+        if orphans and index_path.is_file():
+            appended = append_index_lines(index_path, orphans, index_lines, headers)
+            for line, hdr in appended:
+                actions_taken.append(f"INDEX += [{hdr}] {line}")
+                rpt.add(f"- [OK] appended index line under ## {hdr}: {line}")
+        elif orphans:
+            rpt.add("- [WARNING] orphans exist but MEMORY.md missing; nothing appended")
+
+        # (b) Copy profile-only files INTO repo (never overwrite).
+        if prof_dir is not None:
+            for f in profile_only:
+                src = prof_dir / f
+                dst = mem_dir / f
+                if dst.exists():
+                    rpt.add(f"- [SKIP] {f}: already exists in repo (not overwriting)")
+                    continue
+                shutil.copy2(src, dst)
+                actions_taken.append(f"COPIED profile->repo: {f}")
+                rpt.add(f"- [OK] copied profile-only file into repo: {f}")
+        if not actions_taken:
+            rpt.add("- [INFO] no additive actions were necessary")
+        rpt.add("")
+
+    # ----- SUMMARY -----
+    rpt.add("## SUMMARY")
+    rpt.add("")
+    rpt.add(f"- memory files:                 {len(mems)}")
+    rpt.add(f"- orphan files (no index):      {len(orphans)}")
+    rpt.add(f"- index -> missing file:        {len(missing_targets)}")
+    rpt.add(f"- name/filename signals:        {len(name_mismatches)}")
+    rpt.add(f"- broken backlinks:             {len(broken_backlinks)}")
+    rpt.add(f"- stale referenced paths:       {len(artifact_flags)}")
+    rpt.add(f"- overlap clusters:             {len(clusters)}")
+    rpt.add(f"- stale dated project facts:    {len(stale_hits)}")
+    rpt.add(f"- profile-only files:           {len(profile_only)}")
+    rpt.add(f"- repo-only files:              {len(repo_only)}")
+    rpt.add(f"- repo<->profile conflicts:     {len(conflicts)}")
+    if args.apply_safe:
+        rpt.add(f"- additive actions performed:   {len(actions_taken)}")
+    rpt.add("")
+    rpt.add("## PROPOSED (needs human approval -- NEVER auto-applied)")
+    rpt.add("")
+    n_prop = 0
+    for typ, members in clusters:
+        n_prop += 1
+        rpt.add(f"- [MERGE?] consolidate {len(members)} '{typ}' memories: "
+                f"{', '.join(members)}")
+    for fn, hits in stale_hits:
+        n_prop += 1
+        rpt.add(f"- [REVERIFY?] {fn} (dated facts) -- confirm still true, then update")
+    for fn, tok in artifact_flags:
+        n_prop += 1
+        rpt.add(f"- [STALE-REF?] {fn} references `{tok}` -- confirm/repoint or note moved")
+    for title, target, ln in missing_targets:
+        n_prop += 1
+        rpt.add(f"- [INDEX-CLEANUP?] MEMORY.md line {ln + 1} points at missing "
+                f"{target} -- human decides keep/remove")
+    if prof_dir is not None:
+        for f in conflicts:
+            n_prop += 1
+            rpt.add(f"- [DRIFT-RESOLVE?] {f} differs repo vs profile -- human picks "
+                    f"winner (sync-memory.sh leaves both untouched)")
+    if n_prop == 0:
+        rpt.add("- [OK] nothing proposed; memory store is clean")
+    rpt.add("")
+
+    out = str(rpt)
+    print(out)
+
+    # Write report file unless suppressed.
+    if not args.no_file:
+        reports_dir = mem_dir / "_reports"
+        reports_dir.mkdir(parents=True, exist_ok=True)
+        if args.report_file:
+            rpath = Path(args.report_file)
+        else:
+            stamp = datetime.datetime.now().strftime("%Y-%m-%d-%H%M")
+            rpath = reports_dir / f"{stamp}-dream.md"
+        rpath.write_text(out + "\n", encoding="utf-8")
+        print(f"\n[INFO] report written: {rpath}")
+
+    return 0
+
+
+def append_index_lines(index_path: Path, orphans, index_lines, headers):
+    """
+    Additive only: append a '- [Name](file.md) -- description' line for each
+    orphan under the correct '## <Header>' section. Never reorders or removes
+    existing lines. If a header doesn't exist, append it at end of file.
+
+    Returns list of (line_text, header_used).
+    """
+    text = index_path.read_text(encoding="utf-8", errors="replace")
+    lines = text.split("\n")
+    appended = []
+
+    # Group orphans by target header.
+    by_header: dict[str, list[Memory]] = {}
+    for m in orphans:
+        hdr = TYPE_HEADER.get(m.type or "", None)
+        if hdr is None:
+            hdr = "Project"  # safe default bucket; human can recategorize
+        by_header.setdefault(hdr, []).append(m)
+
+    def build_line(m: Memory) -> str:
+        title = m.name or m.slug
+        hook = (m.description or "").strip()
+        if hook:
+            return f"- [{title}]({m.filename}) -- {hook}"
+        return f"- [{title}]({m.filename})"
+
+    for hdr, members in by_header.items():
+        # Find header line index.
+        hidx = None
+        for i, ln in enumerate(lines):
+            hm = re.match(r"^##\s+(.+?)\s*$", ln)
+            if hm and hm.group(1).strip() == hdr:
+                hidx = i
+                break
+
+        new_lines = [build_line(m) for m in members]
+
+        if hidx is None:
+            # Append a fresh section at end of file.
+            if lines and lines[-1].strip() != "":
+                lines.append("")
+            lines.append(f"## {hdr}")
+            lines.extend(new_lines)
+            for nl, m in zip(new_lines, members):
+                appended.append((nl, hdr))
+            continue
+
+        # Find end of this section: next '## ' or EOF.
+        end = len(lines)
+        for j in range(hidx + 1, len(lines)):
+            if re.match(r"^##\s+", lines[j]):
+                end = j
+                break
+        # Insert after the last non-blank line of the section.
+        insert_at = end
+        while insert_at - 1 > hidx and lines[insert_at - 1].strip() == "":
+            insert_at -= 1
+        for off, (nl, m) in enumerate(zip(new_lines, members)):
+            lines.insert(insert_at + off, nl)
+            appended.append((nl, hdr))
+
+    index_path.write_text("\n".join(lines), encoding="utf-8")
+    return appended
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(
+        description="Memory lint + consolidation analyzer (additive-only)."
+    )
+    ap.add_argument(
+        "--apply-safe",
+        action="store_true",
+        help="Perform ONLY additive fixes (append index lines, copy profile-only "
+        "files into repo). Never deletes/overwrites/merges.",
+    )
+    ap.add_argument(
+        "--no-file",
+        action="store_true",
+        help="Print report to stdout only; do not write a _reports/ file.",
+    )
+    ap.add_argument(
+        "--report-file",
+        default=None,
+        help="Explicit path for the report file (overrides _reports/ default).",
+    )
+    args = ap.parse_args()
+    try:
+        return run(args)
+    except KeyboardInterrupt:
+        print("[ERROR] interrupted")
+        return 130
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/.claude/skills/memory-dream/scripts/selftest.py
+++ b/.claude/skills/memory-dream/scripts/selftest.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+"""
+selftest.py -- exercises memory_dream.py against a synthetic fixture store.
+
+Builds a throwaway repo + profile memory store in a temp dir, runs the analyzer
+both in report-only and --apply-safe modes (as a subprocess, with
+CLAUDETOOLS_ROOT / HOME / CLAUDE_PROJECT_DIR pointed at the fixtures), and
+asserts:
+  * each detector fires (orphan, missing index target, broken backlink, stale
+    referenced path, overlap cluster, profile drift),
+  * --apply-safe is strictly additive (no file deleted, no file overwritten,
+    orphan index line appended, profile-only file migrated, differing file
+    skipped not clobbered).
+
+Stdlib only. Exit 0 on success, 1 on any failed assertion.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+SCRIPT = Path(__file__).resolve().with_name("memory_dream.py")
+PY = sys.executable or "python"
+
+FAILURES: list[str] = []
+
+
+def check(cond: bool, msg: str) -> None:
+    status = "[OK]" if cond else "[ERROR]"
+    print(f"{status} {msg}")
+    if not cond:
+        FAILURES.append(msg)
+
+
+def write(path: Path, text: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(text, encoding="utf-8")
+
+
+def build_fixture(root: Path):
+    """Create repo + profile fixture stores. Returns (repo_root, project_dir, home)."""
+    repo_root = root / "repo"
+    mem = repo_root / ".claude" / "memory"
+    mem.mkdir(parents=True, exist_ok=True)
+
+    # A real script the memory can reference (exists -> must NOT be flagged).
+    write(repo_root / ".claude" / "scripts" / "real.sh", "#!/bin/sh\necho hi\n")
+
+    # --- memory files ---
+    # indexed + clean
+    write(mem / "reference_alpha.md",
+          "---\nname: Alpha\ndescription: alpha thing\nmetadata:\n  type: reference\n---\n"
+          "Uses `.claude/scripts/real.sh` which exists.\n")
+    # orphan (no index line) + broken backlink + stale referenced path
+    write(mem / "feedback_orphan.md",
+          "---\nname: Orphan Feedback\ndescription: an orphan\ntype: feedback\n---\n"
+          "See [[no_such_memory]] and `scripts/ghost_missing.py` which is gone.\n")
+    # two overlapping feedback memories (same slug prefix -> cluster)
+    write(mem / "feedback_syncro_aaa.md",
+          "---\nname: Syncro AAA\ndescription: syncro billing rule one\ntype: feedback\n---\nbody\n")
+    write(mem / "feedback_syncro_bbb.md",
+          "---\nname: Syncro BBB\ndescription: syncro billing rule two\ntype: feedback\n---\nbody\n")
+    # stale dated project fact
+    write(mem / "project_old.md",
+          "---\nname: Old Project\ndescription: ancient\ntype: project\n---\n"
+          "Migration completed as of 2019-01-01 and never touched since.\n")
+
+    # --- MEMORY.md index ---
+    # references reference_alpha + a MISSING target; omits feedback_orphan (orphan)
+    write(mem / "MEMORY.md",
+          "# Memory Index\n\n"
+          "## Reference\n"
+          "- [Alpha](reference_alpha.md) -- alpha thing\n"
+          "- [Ghost](reference_ghost.md) -- points at a missing file\n\n"
+          "## Feedback\n"
+          "- [Syncro AAA](feedback_syncro_aaa.md) -- syncro billing rule one\n"
+          "- [Syncro BBB](feedback_syncro_bbb.md) -- syncro billing rule two\n\n"
+          "## Project\n"
+          "- [Old Project](project_old.md) -- ancient\n")
+
+    # --- profile store ---
+    # slug derivation mirrors memory_dream.profile_memory_dir
+    project_dir = repo_root  # we set CLAUDE_PROJECT_DIR to repo_root
+    home = root / "home"
+    slug = re.sub(r"[^A-Za-z0-9]+", "-", str(project_dir.resolve()))
+    prof = home / ".claude" / "projects" / slug / "memory"
+    prof.mkdir(parents=True, exist_ok=True)
+
+    # profile-only file (candidate to migrate INTO repo)
+    write(prof / "feedback_profile_only.md",
+          "---\nname: Profile Only\ndescription: lives only in profile\ntype: feedback\n---\nkeep me\n")
+    # same-named in BOTH but DIFFERING content (must be skipped, not overwritten)
+    write(prof / "reference_alpha.md",
+          "---\nname: Alpha\ndescription: alpha thing\nmetadata:\n  type: reference\n---\n"
+          "PROFILE VERSION -- different content.\n")
+
+    return repo_root, project_dir, home, prof
+
+
+def run_analyzer(repo_root: Path, project_dir: Path, home: Path, *extra) -> str:
+    env = dict(os.environ)
+    env["CLAUDETOOLS_ROOT"] = str(repo_root)
+    env["CLAUDE_PROJECT_DIR"] = str(project_dir)
+    env["HOME"] = str(home)
+    env["PYTHONIOENCODING"] = "utf-8"
+    cmd = [PY, str(SCRIPT), "--no-file", *extra]
+    res = subprocess.run(cmd, env=env, capture_output=True, text=True,
+                         encoding="utf-8", errors="replace")
+    return res.stdout + "\n" + res.stderr
+
+
+def main() -> int:
+    with tempfile.TemporaryDirectory() as td:
+        root = Path(td)
+        repo_root, project_dir, home, prof = build_fixture(root)
+        mem = repo_root / ".claude" / "memory"
+
+        # ---- report-only run ----
+        out = run_analyzer(repo_root, project_dir, home)
+
+        check("Mode: REPORT-ONLY" in out, "default run is report-only")
+        check("feedback_orphan.md" in out and "Orphan files (no index line): 1" in out,
+              "detects the orphan file")
+        check("reference_ghost.md" in out and "missing files: 1" in out,
+              "detects index line pointing at missing file")
+        check("[[no_such_memory]]" in out, "detects broken backlink")
+        check("ghost_missing.py" in out, "flags stale referenced path")
+        check("real.sh" not in out.split("REFERENCED-ARTIFACT")[-1].split("##")[0]
+              if "REFERENCED-ARTIFACT" in out else True,
+              "does NOT flag an existing referenced path (real.sh)")
+        check("feedback_syncro_aaa.md" in out and "feedback_syncro_bbb.md" in out
+              and "CLUSTER" in out.upper(), "detects overlap cluster")
+        check("project_old.md" in out and "2019-01-01" in out,
+              "detects stale dated project fact")
+        check("feedback_profile_only.md" in out
+              and "MIGRATE INTO repo" in out, "detects profile-only drift")
+        check("reference_alpha.md" in out and "differs between repo and profile" in out,
+              "detects repo<->profile content conflict")
+        check("PROPOSED (needs human approval" in out, "emits PROPOSED section")
+
+        # ---- snapshot repo state before apply-safe ----
+        before = {p.name: p.read_text(encoding="utf-8") for p in mem.glob("*.md")}
+
+        # ---- apply-safe run (additive only) ----
+        out2 = run_analyzer(repo_root, project_dir, home, "--apply-safe")
+
+        after = {p.name: p.read_text(encoding="utf-8") for p in mem.glob("*.md")}
+
+        # No file deleted.
+        check(set(before).issubset(set(after)), "apply-safe deleted no repo file")
+        # Orphan index line appended (file content for non-index unchanged).
+        for fn, content in before.items():
+            if fn == "MEMORY.md":
+                continue
+            check(after.get(fn) == content,
+                  f"apply-safe did not alter memory body: {fn}")
+        # MEMORY.md grew (orphan appended) and kept all old lines.
+        idx_before = before["MEMORY.md"]
+        idx_after = after["MEMORY.md"]
+        check("feedback_orphan.md" in idx_after,
+              "apply-safe appended orphan index line")
+        check(all(line in idx_after for line in idx_before.splitlines() if line.strip()),
+              "apply-safe preserved every existing index line")
+        # Profile-only migrated INTO repo.
+        check("feedback_profile_only.md" in after,
+              "apply-safe migrated profile-only file into repo")
+        # Differing same-named file was SKIPPED, not overwritten.
+        check(after["reference_alpha.md"] == before["reference_alpha.md"],
+              "apply-safe did NOT overwrite differing repo file (skipped)")
+        # The differing same-named file is surfaced as a drift conflict, not a
+        # copy target -- apply-safe leaves it for human review.
+        check("reference_alpha.md" in out2
+              and "differs between repo and profile" in out2,
+              "apply-safe reported the differing file as a conflict (not overwritten)")
+        # Profile store itself untouched by dream (dream only writes repo side).
+        check((prof / "feedback_profile_only.md").exists(),
+              "profile-only source still present after migration")
+
+    print()
+    if FAILURES:
+        print(f"[ERROR] {len(FAILURES)} self-test assertion(s) failed:")
+        for f in FAILURES:
+            print(f"  - {f}")
+        return 1
+    print("[SUCCESS] all self-test assertions passed")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())