#!/usr/bin/env python3 """ memory_dream.py -- memory lint + consolidation analyzer for the ClaudeTools REPO memory store (.claude/memory/). ADDITIVE-ONLY by design. The default run is READ-ONLY and mutates nothing. The only mutating mode is --apply-safe, which performs ONLY additive, non-destructive actions: * append missing index lines to MEMORY.md for orphan memory files * copy profile-only memory files INTO the repo store (never overwriting) It NEVER deletes a file, NEVER removes an index line, NEVER overwrites differing content, and NEVER performs a proposed merge. Every destructive idea stays in the report as a PROPOSED action for a human to approve. Stdlib only. Python launcher on Windows fleet is `py`; also runs under python3/python. Usage: py memory_dream.py # REPORT ONLY (default) py memory_dream.py --apply-safe # additive-only fixes + report py memory_dream.py --no-file # report to stdout only, skip _reports/ file py memory_dream.py --report-file X # write report to an explicit path """ from __future__ import annotations import argparse import datetime import os import re import shutil import sys from pathlib import Path # Windows consoles default to cp1252; memory bodies contain Unicode (arrows, # em dashes). Force UTF-8 stdout/stderr with replacement so printing never # crashes regardless of the active code page. for _stream in (sys.stdout, sys.stderr): try: _stream.reconfigure(encoding="utf-8", errors="replace") except Exception: pass # -------------------------------------------------------------------------- # Path resolution -- no hardcoded drive letters. # -------------------------------------------------------------------------- STALE_MONTHS = 6 # project facts older than this (in "as of ") -> re-verify def _read_identity_root(repo_guess: Path) -> str | None: """Best-effort read of claudetools_root from .claude/identity.json.""" ident = repo_guess / ".claude" / "identity.json" if not ident.is_file(): return None try: import json data = json.loads(ident.read_text(encoding="utf-8")) root = data.get("claudetools_root") if root and Path(root).is_dir(): return root except Exception: return None return None def resolve_claudetools_root() -> Path: """ Resolve CLAUDETOOLS_ROOT: 1. env CLAUDETOOLS_ROOT 2. .claude/identity.json claudetools_root (found by walking up from script) 3. derive from this script's location (.../.claude/skills/memory-dream/scripts/) """ env_root = os.environ.get("CLAUDETOOLS_ROOT") if env_root and Path(env_root).is_dir(): return Path(env_root).resolve() # Walk up from this file looking for a .claude dir. here = Path(__file__).resolve() derived = None for parent in here.parents: if (parent / ".claude").is_dir(): derived = parent break if derived is not None: ident_root = _read_identity_root(derived) if ident_root: return Path(ident_root).resolve() return derived.resolve() # Last resort: assume scripts/ -> memory-dream/ -> skills/ -> .claude/ -> ROOT # (script is at ROOT/.claude/skills/memory-dream/scripts/memory_dream.py) return here.parents[4].resolve() def profile_memory_dir(repo_root: Path) -> Path | None: """ Derive the harness profile memory dir for this project. Slug: take the absolute project path, replace every run of non-alphanumeric chars with '-', then look under $HOME/.claude/projects//memory/. Prefers CLAUDE_PROJECT_DIR if set; falls back to repo_root. Returns the dir if it exists, else None. """ home = Path(os.environ.get("HOME") or os.path.expanduser("~")) project_dir = os.environ.get("CLAUDE_PROJECT_DIR") or str(repo_root) abspath = str(Path(project_dir).resolve()) projects_root = home / ".claude" / "projects" # The single-dash collapse: replace every run of non-alphanumeric chars with # a single '-'. This is the historical/POSIX-style derivation. slug_single = re.sub(r"[^A-Za-z0-9]+", "-", abspath) # The Claude Code harness maps a Windows drive colon to '--' (so # "D:\\claudetools" -> "D--claudetools"), but the single-dash collapse above # produces "D-claudetools". Reproduce the harness rule by doubling a leading # "-" into "--". slug_double = re.sub(r"^([A-Za-z])-", r"\1--", slug_single) # Try the EXACT candidate slugs in priority order; use the first whose # profile memory dir actually exists. The double-dash (harness) variant is # primary; the single-dash collapse is the secondary exact candidate. seen: set[str] = set() for slug in (slug_double, slug_single): if slug in seen: continue seen.add(slug) base = projects_root / slug for candidate in (base / "memory", base): if candidate.is_dir(): # If the slug dir itself was matched (no nested memory/), use the # conventional memory subdir under it. return (base / "memory") if candidate == base else candidate # ONLY if none of the exact candidates exist, fall back to a case-insensitive # tail-scan of $HOME/.claude/projects/*/memory for a dir whose slug "looks # like" this repo (tail match on the last path component). If MORE THAN ONE # dir matches, do NOT guess -- report the ambiguity and skip. if projects_root.is_dir(): tail = re.sub(r"[^A-Za-z0-9]+", "-", repo_root.name).lower() matches: list[Path] = [] for child in sorted(projects_root.iterdir()): if not child.is_dir(): continue if child.name.lower().endswith(tail): mem = child / "memory" if mem.is_dir(): matches.append(mem) if len(matches) > 1: names = ", ".join(str(m.parent.name) for m in matches) print( f"[WARNING] multiple profile dirs matched ({names}); " "skipping profile drift analysis to avoid cross-project contamination" ) return None if len(matches) == 1: return matches[0] return None # -------------------------------------------------------------------------- # Frontmatter / memory file parsing # -------------------------------------------------------------------------- class Memory: def __init__(self, path: Path): self.path = path self.filename = path.name self.slug = path.stem self.name: str | None = None self.description: str | None = None self.type: str | None = None self.body: str = "" self._parse() def _parse(self) -> None: text = self.path.read_text(encoding="utf-8", errors="replace") lines = text.splitlines() if not lines or lines[0].strip() != "---": # No frontmatter; whole file is body. self.body = text return # Find closing fence. end = None for i in range(1, len(lines)): if lines[i].strip() == "---": end = i break if end is None: self.body = text return fm = lines[1:end] self.body = "\n".join(lines[end + 1 :]) self._parse_frontmatter(fm) def _parse_frontmatter(self, fm_lines: list[str]) -> None: """ Tolerant YAML-ish parse. Handles: name: X description: X (or '>-' folded block following) type: X (top-level) metadata: type: X (nested) """ i = 0 in_metadata = False while i < len(fm_lines): raw = fm_lines[i] line = raw.rstrip("\n") stripped = line.strip() indent = len(line) - len(line.lstrip()) if not stripped: i += 1 continue if stripped == "metadata:": in_metadata = True i += 1 continue # Detect leaving the metadata block (a top-level key reappears). if in_metadata and indent == 0 and ":" in stripped: in_metadata = False m = re.match(r"^([A-Za-z_][\w\-]*):\s*(.*)$", stripped) if not m: i += 1 continue key, val = m.group(1), m.group(2) # Folded/literal block scalar -> capture following more-indented lines. if val in (">-", ">", "|", "|-", "|+"): block_lines = [] j = i + 1 base_indent = indent while j < len(fm_lines): nxt = fm_lines[j] nxt_indent = len(nxt) - len(nxt.lstrip()) if nxt.strip() == "" or nxt_indent > base_indent: block_lines.append(nxt.strip()) j += 1 else: break val = " ".join(x for x in block_lines if x) i = j else: val = val.strip().strip('"').strip("'") i += 1 if key == "name" and not in_metadata: self.name = val elif key == "description": self.description = val elif key == "type": # Both top-level and metadata.type land here. self.type = (val or "").lower() or None else: continue # -------------------------------------------------------------------------- # Index (MEMORY.md) parsing # -------------------------------------------------------------------------- INDEX_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)") # Body backlinks like [[some-name]] BACKLINK_RE = re.compile(r"\[\[([^\]]+)\]\]") # "as of " style dated claims. DATE_RE = re.compile( r"(?:as of|updated|corrected|lesson|fixed|live)\s+" r"(\d{4}-\d{2}-\d{2})", re.IGNORECASE, ) ISO_DATE_RE = re.compile(r"\b(\d{4}-\d{2}-\d{2})\b") # Type -> index header. Index uses singular headers. TYPE_HEADER = { "reference": "Reference", "feedback": "Feedback", "project": "Project", "user": "Users", } def parse_index(index_path: Path): """ Returns: links: list of (title, target, lineno, raw_line) headers: dict header-name -> lineno lines: original file lines (no newline) """ links = [] headers = {} if not index_path.is_file(): return links, headers, [] text = index_path.read_text(encoding="utf-8", errors="replace") lines = text.split("\n") for idx, line in enumerate(lines): hm = re.match(r"^##\s+(.+?)\s*$", line) if hm: headers[hm.group(1).strip()] = idx continue if line.lstrip().startswith("- "): m = INDEX_LINK_RE.search(line) if m: links.append((m.group(1), m.group(2), idx, line)) return links, headers, lines # -------------------------------------------------------------------------- # Referenced-artifact extraction (conservative) # -------------------------------------------------------------------------- # Referenced-artifact extraction is intentionally CONSERVATIVE: it only inspects # backtick-wrapped spans (`...`) and only treats a span as a repo path when the # whole span is a single path-like token. Extensions are ordered longest-first # so `identity.json` is never truncated to `identity.js`. We do NOT scan bare # prose -- too many false positives. PATHISH_RE = re.compile(r"`([^`\n]+?)`") # Longest-first extension alternation, anchored to end-of-token, prevents the # json->js / yaml->yml style truncation bug. KNOWN_EXTS = ( "tsx", "json", "yaml", "toml", "service", "py", "sh", "rs", "ts", "js", "md", "yml", "sql", "ps1", ) EXT_RE = re.compile(r"\.(?:" + "|".join(KNOWN_EXTS) + r")$", re.IGNORECASE) # Vault-style secret paths live in the SEPARATE vault repo, not claudetools. VAULT_HINT_RE = re.compile(r"\.sops\.ya?ml$", re.IGNORECASE) # Tokens we never treat as repo paths. ABS_PREFIXES = ("/api/", "/home/", "/var/", "/opt/", "/etc/", "/tmp/", "/proc/", "/dev/", "/data/", "/usr/") def looks_like_repo_path(token: str) -> bool: token = token.strip() if not token: return False # Reject anything with whitespace, glob/placeholder/url/colon characters -- # those are descriptions or templates, not concrete repo paths. if any(c in token for c in (" ", "<", ">", "*", "?", ":", "|", "\\")): return False if token.startswith(("http://", "https://", "//", "git@", "vault:")): return False if token.startswith(ABS_PREFIXES): return False # server absolute paths, not repo-relative # Vault secret refs belong to the vault repo -- not a staleness signal here. if VAULT_HINT_RE.search(token): return False # Must end in a recognized extension (anchored, longest-first). if not EXT_RE.search(token): return False # A real reference is either repo-relative-with-slash or a bare filename. # Reject single-segment tokens that are clearly prose-y (no slash AND no # underscore/dash) unless they look like a script filename. has_slash = "/" in token if not has_slash: # bare filename: require it to look like an actual file (has a dot ext, # already guaranteed) and contain a separator or be a known script ext. return True return True def extract_referenced_paths(body: str) -> list[str]: found = set() for m in PATHISH_RE.finditer(body): span = m.group(1).strip() # A backtick span counts only if the ENTIRE span is one token (a path). # Spans with spaces are commands/prose -> skip (avoids `cmd args` noise). if not span or " " in span: continue token = span.lstrip("./") if looks_like_repo_path(token): found.add(token) return sorted(found) def repo_path_exists(repo_root: Path, token: str) -> bool: token = token.lstrip("./") # Try repo-relative. if (repo_root / token).exists(): return True # Bare filename -> search anywhere in repo (cheap, bounded). if "/" not in token: try: return any(True for _ in repo_root.rglob(token)) except OSError: return False # Also try matching just the tail (last 2 segments) anywhere, since memories # often cite paths relative to a subproject root. parts = token.split("/") if len(parts) >= 2: tail = "/".join(parts[-2:]) try: for p in repo_root.rglob(parts[-1]): if str(p).replace("\\", "/").endswith(tail): return True except OSError: return False return False # -------------------------------------------------------------------------- # Similarity / duplicate clustering (token-overlap heuristic) # -------------------------------------------------------------------------- STOPWORDS = { "the", "a", "an", "and", "or", "to", "of", "in", "on", "for", "with", "is", "are", "be", "not", "via", "use", "used", "uses", "no", "never", "always", "only", "via", "from", "by", "at", "as", "it", "this", "that", "when", "if", "then", "do", "don't", "we", "our", "you", "your", } def tokenize(text: str) -> set[str]: toks = re.findall(r"[a-z0-9]+", (text or "").lower()) return {t for t in toks if t not in STOPWORDS and len(t) > 2} def jaccard(a: set[str], b: set[str]) -> float: if not a or not b: return 0.0 inter = len(a & b) union = len(a | b) return inter / union if union else 0.0 def cluster_overlaps(mems: list[Memory], threshold: float = 0.34): """ Within each type, find pairs with token-overlap >= threshold, then union them into clusters. Returns list of (type, [filenames]) for clusters >1. """ clusters_out = [] by_type: dict[str, list[Memory]] = {} for m in mems: by_type.setdefault(m.type or "untyped", []).append(m) for typ, group in by_type.items(): # token signature per memory: name + description + slug words sigs = {} for m in group: base = " ".join( filter(None, [m.name, m.description, m.slug.replace("_", " ")]) ) sigs[m.filename] = tokenize(base) # Also bias by shared slug prefix (e.g. feedback_syncro_*). parent = {m.filename: m.filename for m in group} def find(x): while parent[x] != x: parent[x] = parent[parent[x]] x = parent[x] return x def union(x, y): rx, ry = find(x), find(y) if rx != ry: parent[rx] = ry files = [m.filename for m in group] slug_prefix = {} for m in group: parts = m.slug.split("_") slug_prefix[m.filename] = "_".join(parts[:2]) if len(parts) >= 2 else m.slug for i in range(len(files)): for j in range(i + 1, len(files)): fi, fj = files[i], files[j] sim = jaccard(sigs[fi], sigs[fj]) same_prefix = ( slug_prefix[fi] == slug_prefix[fj] and len(slug_prefix[fi].split("_")) >= 2 ) if sim >= threshold or same_prefix: union(fi, fj) groups: dict[str, list[str]] = {} for f in files: groups.setdefault(find(f), []).append(f) for members in groups.values(): if len(members) > 1: clusters_out.append((typ, sorted(members))) return clusters_out # -------------------------------------------------------------------------- # Stale dated facts # -------------------------------------------------------------------------- def find_stale_dates(mem: Memory, today: datetime.date): """Return list of (date_str, age_days) for dated claims older than STALE_MONTHS.""" hits = [] seen = set() for rx in (DATE_RE, ISO_DATE_RE): for m in rx.finditer(mem.body): ds = m.group(1) if ds in seen: continue seen.add(ds) try: d = datetime.date.fromisoformat(ds) except ValueError: continue age = (today - d).days if age > STALE_MONTHS * 30: hits.append((ds, age)) return hits # -------------------------------------------------------------------------- # Report # -------------------------------------------------------------------------- class Report: def __init__(self): self.lines: list[str] = [] def add(self, s: str = ""): self.lines.append(s) def __str__(self): return "\n".join(self.lines) def slugify_link_target(target: str) -> str: return Path(target).stem def run(args) -> int: repo_root = resolve_claudetools_root() mem_dir = repo_root / ".claude" / "memory" index_path = mem_dir / "MEMORY.md" if not mem_dir.is_dir(): print(f"[ERROR] memory dir not found: {mem_dir}") return 2 today = datetime.date.today() rpt = Report() rpt.add("# Memory Dream Report") rpt.add(f"Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}") rpt.add(f"Repo root: {repo_root}") rpt.add(f"Memory store: {mem_dir}") rpt.add(f"Mode: {'APPLY-SAFE (additive)' if args.apply_safe else 'REPORT-ONLY'}") rpt.add("") # Load memories. mem_files = sorted(p for p in mem_dir.glob("*.md") if p.name != "MEMORY.md") mems = [Memory(p) for p in mem_files] mem_by_file = {m.filename: m for m in mems} rpt.add(f"Loaded {len(mems)} memory files (excluding MEMORY.md).") rpt.add("") # ----- 1. INDEX RECONCILE ----- links, headers, index_lines = parse_index(index_path) indexed_targets = {slugify_link_target(t): (title, t, ln) for (title, t, ln, _raw) in links} rpt.add("## 1. INDEX RECONCILE") rpt.add("") orphans = [] # files with no index line for m in mems: if m.slug not in indexed_targets: orphans.append(m) rpt.add(f"### Orphan files (no index line): {len(orphans)}") for m in orphans: rpt.add(f"- [INFO] {m.filename} (type={m.type or '?'})") rpt.add("") missing_targets = [] # index lines whose file is missing for title, target, ln, _raw in links: # Only consider links that look like local memory files. tgt = target.strip() if tgt.startswith(("http://", "https://")): continue resolved = (mem_dir / tgt).resolve() if not resolved.is_file(): missing_targets.append((title, target, ln)) rpt.add(f"### Index lines pointing at missing files: {len(missing_targets)}") for title, target, ln in missing_targets: rpt.add(f"- [WARNING] line {ln + 1}: [{title}]({target}) -> file not found") rpt.add("") name_mismatches = [] # frontmatter name vs filename slug for m in mems: if m.name is None: name_mismatches.append((m.filename, "(no name in frontmatter)")) continue # The convention is loose: name may be a title, not the slug. Only flag # when name itself looks like a slug AND differs from the filename slug. name_as_slug = re.sub(r"[^A-Za-z0-9]+", "_", m.name.strip().lower()).strip("_") if re.fullmatch(r"[a-z0-9_]+", m.name.strip()) and m.name.strip() != m.slug: name_mismatches.append((m.filename, f"name='{m.name}' != slug='{m.slug}'")) rpt.add(f"### Frontmatter name vs filename signals: {len(name_mismatches)}") for fn, note in name_mismatches: rpt.add(f"- [INFO] {fn}: {note}") rpt.add("") # ----- 2. BACKLINKS ----- rpt.add("## 2. BACKLINKS ([[name]] references)") rpt.add("") known_slugs = {m.slug for m in mems} broken_backlinks = [] for m in mems: for bm in BACKLINK_RE.finditer(m.body): ref = bm.group(1).strip() ref_slug = slugify_link_target(ref) if ref_slug not in known_slugs and ref not in known_slugs: broken_backlinks.append((m.filename, ref)) rpt.add(f"### Broken backlinks: {len(broken_backlinks)}") for fn, ref in broken_backlinks: rpt.add(f"- [WARNING] {fn}: [[{ref}]] has no matching memory file") if not broken_backlinks: rpt.add("- [OK] no broken backlinks found") rpt.add("") # ----- 3. REFERENCED-ARTIFACT VALIDITY ----- rpt.add("## 3. REFERENCED-ARTIFACT VALIDITY (conservative; 'verify', not 'delete')") rpt.add("") artifact_flags = [] for m in mems: for tok in extract_referenced_paths(m.body): if not repo_path_exists(repo_root, tok): artifact_flags.append((m.filename, tok)) rpt.add(f"### Referenced paths not found in repo: {len(artifact_flags)}") for fn, tok in artifact_flags: rpt.add(f"- [VERIFY] {fn}: `{tok}` not found under repo (may be server-side " f"or renamed -- verify, do not auto-delete)") if not artifact_flags: rpt.add("- [OK] no clearly-stale repo paths detected") rpt.add("") # ----- 4. DUPLICATE / OVERLAP CLUSTERS ----- rpt.add("## 4. DUPLICATE / OVERLAP CLUSTERS (PROPOSED merges -- never auto-applied)") rpt.add("") clusters = cluster_overlaps(mems) clusters.sort(key=lambda c: (-len(c[1]), c[0])) rpt.add(f"### Candidate clusters: {len(clusters)}") for typ, members in clusters: rpt.add(f"- [{typ}] {len(members)} related memories:") for f in members: mm = mem_by_file.get(f) desc = (mm.description or mm.name or "") if mm else "" desc = desc[:90] rpt.add(f" - {f} -- {desc}") if not clusters: rpt.add("- [OK] no overlap clusters above threshold") rpt.add("") # ----- 5. STALE DATED FACTS ----- rpt.add(f"## 5. STALE DATED FACTS (project-type, dated > {STALE_MONTHS} months)") rpt.add("") stale_hits = [] for m in mems: if (m.type or "") != "project": continue hits = find_stale_dates(m, today) if hits: stale_hits.append((m.filename, hits)) rpt.add(f"### Project memories with stale dated claims: {len(stale_hits)}") for fn, hits in stale_hits: for ds, age in hits: rpt.add(f"- [VERIFY] {fn}: dated {ds} (~{age} days old) -- re-verify") if not stale_hits: rpt.add("- [OK] no stale dated project facts") rpt.add("") # ----- 6. DRIFT vs PROFILE STORE ----- rpt.add("## 6. DRIFT vs HARNESS PROFILE STORE") rpt.add("") prof_dir = profile_memory_dir(repo_root) profile_only = [] repo_only = [] conflicts = [] if prof_dir is None: rpt.add("- [INFO] profile memory dir not found; skipping drift check.") else: rpt.add(f"Profile store: {prof_dir}") rpt.add("") prof_files = {p.name for p in prof_dir.glob("*.md") if p.name != "MEMORY.md"} repo_files = {m.filename for m in mems} for pf in sorted(prof_files - repo_files): profile_only.append(pf) for rf in sorted(repo_files - prof_files): repo_only.append(rf) for both in sorted(prof_files & repo_files): a = (prof_dir / both).read_text(encoding="utf-8", errors="replace") b = (mem_dir / both).read_text(encoding="utf-8", errors="replace") if a != b: conflicts.append(both) rpt.add(f"### Profile-only (candidates to MIGRATE INTO repo): {len(profile_only)}") for f in profile_only: rpt.add(f"- [INFO] {f}") rpt.add("") rpt.add(f"### Repo-only (candidates to PUSH OUT to profile): {len(repo_only)}") for f in repo_only: rpt.add(f"- [INFO] {f}") rpt.add("") rpt.add(f"### Present in BOTH but differing (CONFLICT -- human review): " f"{len(conflicts)}") for f in conflicts: rpt.add(f"- [WARNING] {f}: content differs between repo and profile") rpt.add("") # ----- APPLY-SAFE ACTIONS (additive-only) ----- actions_taken = [] if args.apply_safe: rpt.add("## APPLY-SAFE ACTIONS PERFORMED (additive-only)") rpt.add("") # (a) Append missing index lines for orphan files. if orphans and index_path.is_file(): appended = append_index_lines(index_path, orphans, index_lines, headers) for line, hdr in appended: actions_taken.append(f"INDEX += [{hdr}] {line}") rpt.add(f"- [OK] appended index line under ## {hdr}: {line}") elif orphans: rpt.add("- [WARNING] orphans exist but MEMORY.md missing; nothing appended") # (b) Copy profile-only files INTO repo (never overwrite). if prof_dir is not None: for f in profile_only: src = prof_dir / f dst = mem_dir / f if dst.exists(): rpt.add(f"- [SKIP] {f}: already exists in repo (not overwriting)") continue shutil.copy2(src, dst) actions_taken.append(f"COPIED profile->repo: {f}") rpt.add(f"- [OK] copied profile-only file into repo: {f}") if not actions_taken: rpt.add("- [INFO] no additive actions were necessary") rpt.add("") # ----- SUMMARY ----- rpt.add("## SUMMARY") rpt.add("") rpt.add(f"- memory files: {len(mems)}") rpt.add(f"- orphan files (no index): {len(orphans)}") rpt.add(f"- index -> missing file: {len(missing_targets)}") rpt.add(f"- name/filename signals: {len(name_mismatches)}") rpt.add(f"- broken backlinks: {len(broken_backlinks)}") rpt.add(f"- stale referenced paths: {len(artifact_flags)}") rpt.add(f"- overlap clusters: {len(clusters)}") rpt.add(f"- stale dated project facts: {len(stale_hits)}") rpt.add(f"- profile-only files: {len(profile_only)}") rpt.add(f"- repo-only files: {len(repo_only)}") rpt.add(f"- repo<->profile conflicts: {len(conflicts)}") if args.apply_safe: rpt.add(f"- additive actions performed: {len(actions_taken)}") rpt.add("") rpt.add("## PROPOSED (needs human approval -- NEVER auto-applied)") rpt.add("") n_prop = 0 for typ, members in clusters: n_prop += 1 rpt.add(f"- [MERGE?] consolidate {len(members)} '{typ}' memories: " f"{', '.join(members)}") for fn, hits in stale_hits: n_prop += 1 rpt.add(f"- [REVERIFY?] {fn} (dated facts) -- confirm still true, then update") for fn, tok in artifact_flags: n_prop += 1 rpt.add(f"- [STALE-REF?] {fn} references `{tok}` -- confirm/repoint or note moved") for title, target, ln in missing_targets: n_prop += 1 rpt.add(f"- [INDEX-CLEANUP?] MEMORY.md line {ln + 1} points at missing " f"{target} -- human decides keep/remove") if prof_dir is not None: for f in conflicts: n_prop += 1 rpt.add(f"- [DRIFT-RESOLVE?] {f} differs repo vs profile -- human picks " f"winner (sync-memory.sh leaves both untouched)") if n_prop == 0: rpt.add("- [OK] nothing proposed; memory store is clean") rpt.add("") out = str(rpt) print(out) # Write report file unless suppressed. if not args.no_file: reports_dir = mem_dir / "_reports" reports_dir.mkdir(parents=True, exist_ok=True) if args.report_file: rpath = Path(args.report_file) else: stamp = datetime.datetime.now().strftime("%Y-%m-%d-%H%M") rpath = reports_dir / f"{stamp}-dream.md" rpath.write_text(out + "\n", encoding="utf-8") print(f"\n[INFO] report written: {rpath}") return 0 def append_index_lines(index_path: Path, orphans, index_lines, headers): """ Additive only: append a '- [Name](file.md) -- description' line for each orphan under the correct '##
' section. Never reorders or removes existing lines. If a header doesn't exist, append it at end of file. Returns list of (line_text, header_used). """ text = index_path.read_text(encoding="utf-8", errors="replace") lines = text.split("\n") appended = [] # Group orphans by target header. by_header: dict[str, list[Memory]] = {} for m in orphans: hdr = TYPE_HEADER.get(m.type or "", None) if hdr is None: hdr = "Project" # safe default bucket; human can recategorize by_header.setdefault(hdr, []).append(m) def build_line(m: Memory) -> str: title = m.name or m.slug hook = (m.description or "").strip() if hook: return f"- [{title}]({m.filename}) -- {hook}" return f"- [{title}]({m.filename})" for hdr, members in by_header.items(): # Find header line index. hidx = None for i, ln in enumerate(lines): hm = re.match(r"^##\s+(.+?)\s*$", ln) if hm and hm.group(1).strip() == hdr: hidx = i break new_lines = [build_line(m) for m in members] if hidx is None: # Append a fresh section at end of file. if lines and lines[-1].strip() != "": lines.append("") lines.append(f"## {hdr}") lines.extend(new_lines) for nl, m in zip(new_lines, members): appended.append((nl, hdr)) continue # Find end of this section: next '## ' or EOF. end = len(lines) for j in range(hidx + 1, len(lines)): if re.match(r"^##\s+", lines[j]): end = j break # Insert after the last non-blank line of the section. insert_at = end while insert_at - 1 > hidx and lines[insert_at - 1].strip() == "": insert_at -= 1 for off, (nl, m) in enumerate(zip(new_lines, members)): lines.insert(insert_at + off, nl) appended.append((nl, hdr)) index_path.write_text("\n".join(lines), encoding="utf-8") return appended def main() -> int: ap = argparse.ArgumentParser( description="Memory lint + consolidation analyzer (additive-only)." ) ap.add_argument( "--apply-safe", action="store_true", help="Perform ONLY additive fixes (append index lines, copy profile-only " "files into repo). Never deletes/overwrites/merges.", ) ap.add_argument( "--no-file", action="store_true", help="Print report to stdout only; do not write a _reports/ file.", ) ap.add_argument( "--report-file", default=None, help="Explicit path for the report file (overrides _reports/ default).", ) args = ap.parse_args() try: return run(args) except KeyboardInterrupt: print("[ERROR] interrupted") return 130 if __name__ == "__main__": sys.exit(main())