Files
claudetools/.claude/skills/memory-dream/scripts/memory_dream.py
Mike Swanson 2a1ccfac73 Add memory-dream skill + additive cross-machine memory sync
memory-dream: read-only memory lint/consolidation analyzer (index, backlinks,
stale refs, dup clusters, profile drift); additive-only --apply-safe, all
merges/deletes are proposals. sync-memory.sh: additive repo<->harness-profile
union (no delete/overwrite, conflicts surfaced), wired to a SessionStart hook.
Migrates the useful profile-only memories into the synced repo store.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 15:22:12 -07:00

904 lines
33 KiB
Python

#!/usr/bin/env python3
"""
memory_dream.py -- memory lint + consolidation analyzer for the ClaudeTools REPO
memory store (.claude/memory/).
ADDITIVE-ONLY by design. The default run is READ-ONLY and mutates nothing.
The only mutating mode is --apply-safe, which performs ONLY additive,
non-destructive actions:
* append missing index lines to MEMORY.md for orphan memory files
* copy profile-only memory files INTO the repo store (never overwriting)
It NEVER deletes a file, NEVER removes an index line, NEVER overwrites differing
content, and NEVER performs a proposed merge. Every destructive idea stays in
the report as a PROPOSED action for a human to approve.
Stdlib only. Python launcher on Windows fleet is `py`; also runs under
python3/python.
Usage:
py memory_dream.py # REPORT ONLY (default)
py memory_dream.py --apply-safe # additive-only fixes + report
py memory_dream.py --no-file # report to stdout only, skip _reports/ file
py memory_dream.py --report-file X # write report to an explicit path
"""
from __future__ import annotations
import argparse
import datetime
import os
import re
import shutil
import sys
from pathlib import Path
# Windows consoles default to cp1252; memory bodies contain Unicode (arrows,
# em dashes). Force UTF-8 stdout/stderr with replacement so printing never
# crashes regardless of the active code page.
for _stream in (sys.stdout, sys.stderr):
try:
_stream.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
# --------------------------------------------------------------------------
# Path resolution -- no hardcoded drive letters.
# --------------------------------------------------------------------------
STALE_MONTHS = 6 # project facts older than this (in "as of <date>") -> re-verify
def _read_identity_root(repo_guess: Path) -> str | None:
"""Best-effort read of claudetools_root from .claude/identity.json."""
ident = repo_guess / ".claude" / "identity.json"
if not ident.is_file():
return None
try:
import json
data = json.loads(ident.read_text(encoding="utf-8"))
root = data.get("claudetools_root")
if root and Path(root).is_dir():
return root
except Exception:
return None
return None
def resolve_claudetools_root() -> Path:
"""
Resolve CLAUDETOOLS_ROOT:
1. env CLAUDETOOLS_ROOT
2. .claude/identity.json claudetools_root (found by walking up from script)
3. derive from this script's location (.../.claude/skills/memory-dream/scripts/)
"""
env_root = os.environ.get("CLAUDETOOLS_ROOT")
if env_root and Path(env_root).is_dir():
return Path(env_root).resolve()
# Walk up from this file looking for a .claude dir.
here = Path(__file__).resolve()
derived = None
for parent in here.parents:
if (parent / ".claude").is_dir():
derived = parent
break
if derived is not None:
ident_root = _read_identity_root(derived)
if ident_root:
return Path(ident_root).resolve()
return derived.resolve()
# Last resort: assume scripts/ -> memory-dream/ -> skills/ -> .claude/ -> ROOT
# (script is at ROOT/.claude/skills/memory-dream/scripts/memory_dream.py)
return here.parents[4].resolve()
def profile_memory_dir(repo_root: Path) -> Path | None:
"""
Derive the harness profile memory dir for this project.
Slug: take the absolute project path, replace every run of non-alphanumeric
chars with '-', then look under $HOME/.claude/projects/<slug>/memory/.
Prefers CLAUDE_PROJECT_DIR if set; falls back to repo_root.
Returns the dir if it exists, else None.
"""
home = Path(os.environ.get("HOME") or os.path.expanduser("~"))
project_dir = os.environ.get("CLAUDE_PROJECT_DIR") or str(repo_root)
abspath = str(Path(project_dir).resolve())
projects_root = home / ".claude" / "projects"
# The single-dash collapse: replace every run of non-alphanumeric chars with
# a single '-'. This is the historical/POSIX-style derivation.
slug_single = re.sub(r"[^A-Za-z0-9]+", "-", abspath)
# The Claude Code harness maps a Windows drive colon to '--' (so
# "D:\\claudetools" -> "D--claudetools"), but the single-dash collapse above
# produces "D-claudetools". Reproduce the harness rule by doubling a leading
# "<drive>-" into "<drive>--".
slug_double = re.sub(r"^([A-Za-z])-", r"\1--", slug_single)
# Try the EXACT candidate slugs in priority order; use the first whose
# profile memory dir actually exists. The double-dash (harness) variant is
# primary; the single-dash collapse is the secondary exact candidate.
seen: set[str] = set()
for slug in (slug_double, slug_single):
if slug in seen:
continue
seen.add(slug)
base = projects_root / slug
for candidate in (base / "memory", base):
if candidate.is_dir():
# If the slug dir itself was matched (no nested memory/), use the
# conventional memory subdir under it.
return (base / "memory") if candidate == base else candidate
# ONLY if none of the exact candidates exist, fall back to a case-insensitive
# tail-scan of $HOME/.claude/projects/*/memory for a dir whose slug "looks
# like" this repo (tail match on the last path component). If MORE THAN ONE
# dir matches, do NOT guess -- report the ambiguity and skip.
if projects_root.is_dir():
tail = re.sub(r"[^A-Za-z0-9]+", "-", repo_root.name).lower()
matches: list[Path] = []
for child in sorted(projects_root.iterdir()):
if not child.is_dir():
continue
if child.name.lower().endswith(tail):
mem = child / "memory"
if mem.is_dir():
matches.append(mem)
if len(matches) > 1:
names = ", ".join(str(m.parent.name) for m in matches)
print(
f"[WARNING] multiple profile dirs matched ({names}); "
"skipping profile drift analysis to avoid cross-project contamination"
)
return None
if len(matches) == 1:
return matches[0]
return None
# --------------------------------------------------------------------------
# Frontmatter / memory file parsing
# --------------------------------------------------------------------------
class Memory:
def __init__(self, path: Path):
self.path = path
self.filename = path.name
self.slug = path.stem
self.name: str | None = None
self.description: str | None = None
self.type: str | None = None
self.body: str = ""
self._parse()
def _parse(self) -> None:
text = self.path.read_text(encoding="utf-8", errors="replace")
lines = text.splitlines()
if not lines or lines[0].strip() != "---":
# No frontmatter; whole file is body.
self.body = text
return
# Find closing fence.
end = None
for i in range(1, len(lines)):
if lines[i].strip() == "---":
end = i
break
if end is None:
self.body = text
return
fm = lines[1:end]
self.body = "\n".join(lines[end + 1 :])
self._parse_frontmatter(fm)
def _parse_frontmatter(self, fm_lines: list[str]) -> None:
"""
Tolerant YAML-ish parse. Handles:
name: X
description: X (or '>-' folded block following)
type: X (top-level)
metadata:
type: X (nested)
"""
i = 0
in_metadata = False
while i < len(fm_lines):
raw = fm_lines[i]
line = raw.rstrip("\n")
stripped = line.strip()
indent = len(line) - len(line.lstrip())
if not stripped:
i += 1
continue
if stripped == "metadata:":
in_metadata = True
i += 1
continue
# Detect leaving the metadata block (a top-level key reappears).
if in_metadata and indent == 0 and ":" in stripped:
in_metadata = False
m = re.match(r"^([A-Za-z_][\w\-]*):\s*(.*)$", stripped)
if not m:
i += 1
continue
key, val = m.group(1), m.group(2)
# Folded/literal block scalar -> capture following more-indented lines.
if val in (">-", ">", "|", "|-", "|+"):
block_lines = []
j = i + 1
base_indent = indent
while j < len(fm_lines):
nxt = fm_lines[j]
nxt_indent = len(nxt) - len(nxt.lstrip())
if nxt.strip() == "" or nxt_indent > base_indent:
block_lines.append(nxt.strip())
j += 1
else:
break
val = " ".join(x for x in block_lines if x)
i = j
else:
val = val.strip().strip('"').strip("'")
i += 1
if key == "name" and not in_metadata:
self.name = val
elif key == "description":
self.description = val
elif key == "type":
# Both top-level and metadata.type land here.
self.type = (val or "").lower() or None
else:
continue
# --------------------------------------------------------------------------
# Index (MEMORY.md) parsing
# --------------------------------------------------------------------------
INDEX_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
# Body backlinks like [[some-name]]
BACKLINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
# "as of <date>" style dated claims.
DATE_RE = re.compile(
r"(?:as of|updated|corrected|lesson|fixed|live)\s+"
r"(\d{4}-\d{2}-\d{2})",
re.IGNORECASE,
)
ISO_DATE_RE = re.compile(r"\b(\d{4}-\d{2}-\d{2})\b")
# Type -> index header. Index uses singular headers.
TYPE_HEADER = {
"reference": "Reference",
"feedback": "Feedback",
"project": "Project",
"user": "Users",
}
def parse_index(index_path: Path):
"""
Returns:
links: list of (title, target, lineno, raw_line)
headers: dict header-name -> lineno
lines: original file lines (no newline)
"""
links = []
headers = {}
if not index_path.is_file():
return links, headers, []
text = index_path.read_text(encoding="utf-8", errors="replace")
lines = text.split("\n")
for idx, line in enumerate(lines):
hm = re.match(r"^##\s+(.+?)\s*$", line)
if hm:
headers[hm.group(1).strip()] = idx
continue
if line.lstrip().startswith("- "):
m = INDEX_LINK_RE.search(line)
if m:
links.append((m.group(1), m.group(2), idx, line))
return links, headers, lines
# --------------------------------------------------------------------------
# Referenced-artifact extraction (conservative)
# --------------------------------------------------------------------------
# Referenced-artifact extraction is intentionally CONSERVATIVE: it only inspects
# backtick-wrapped spans (`...`) and only treats a span as a repo path when the
# whole span is a single path-like token. Extensions are ordered longest-first
# so `identity.json` is never truncated to `identity.js`. We do NOT scan bare
# prose -- too many false positives.
PATHISH_RE = re.compile(r"`([^`\n]+?)`")
# Longest-first extension alternation, anchored to end-of-token, prevents the
# json->js / yaml->yml style truncation bug.
KNOWN_EXTS = (
"tsx", "json", "yaml", "toml", "service",
"py", "sh", "rs", "ts", "js", "md", "yml", "sql", "ps1",
)
EXT_RE = re.compile(r"\.(?:" + "|".join(KNOWN_EXTS) + r")$", re.IGNORECASE)
# Vault-style secret paths live in the SEPARATE vault repo, not claudetools.
VAULT_HINT_RE = re.compile(r"\.sops\.ya?ml$", re.IGNORECASE)
# Tokens we never treat as repo paths.
ABS_PREFIXES = ("/api/", "/home/", "/var/", "/opt/", "/etc/", "/tmp/",
"/proc/", "/dev/", "/data/", "/usr/")
def looks_like_repo_path(token: str) -> bool:
token = token.strip()
if not token:
return False
# Reject anything with whitespace, glob/placeholder/url/colon characters --
# those are descriptions or templates, not concrete repo paths.
if any(c in token for c in (" ", "<", ">", "*", "?", ":", "|", "\\")):
return False
if token.startswith(("http://", "https://", "//", "git@", "vault:")):
return False
if token.startswith(ABS_PREFIXES):
return False # server absolute paths, not repo-relative
# Vault secret refs belong to the vault repo -- not a staleness signal here.
if VAULT_HINT_RE.search(token):
return False
# Must end in a recognized extension (anchored, longest-first).
if not EXT_RE.search(token):
return False
# A real reference is either repo-relative-with-slash or a bare filename.
# Reject single-segment tokens that are clearly prose-y (no slash AND no
# underscore/dash) unless they look like a script filename.
has_slash = "/" in token
if not has_slash:
# bare filename: require it to look like an actual file (has a dot ext,
# already guaranteed) and contain a separator or be a known script ext.
return True
return True
def extract_referenced_paths(body: str) -> list[str]:
found = set()
for m in PATHISH_RE.finditer(body):
span = m.group(1).strip()
# A backtick span counts only if the ENTIRE span is one token (a path).
# Spans with spaces are commands/prose -> skip (avoids `cmd args` noise).
if not span or " " in span:
continue
token = span.lstrip("./")
if looks_like_repo_path(token):
found.add(token)
return sorted(found)
def repo_path_exists(repo_root: Path, token: str) -> bool:
token = token.lstrip("./")
# Try repo-relative.
if (repo_root / token).exists():
return True
# Bare filename -> search anywhere in repo (cheap, bounded).
if "/" not in token:
try:
return any(True for _ in repo_root.rglob(token))
except OSError:
return False
# Also try matching just the tail (last 2 segments) anywhere, since memories
# often cite paths relative to a subproject root.
parts = token.split("/")
if len(parts) >= 2:
tail = "/".join(parts[-2:])
try:
for p in repo_root.rglob(parts[-1]):
if str(p).replace("\\", "/").endswith(tail):
return True
except OSError:
return False
return False
# --------------------------------------------------------------------------
# Similarity / duplicate clustering (token-overlap heuristic)
# --------------------------------------------------------------------------
STOPWORDS = {
"the", "a", "an", "and", "or", "to", "of", "in", "on", "for", "with",
"is", "are", "be", "not", "via", "use", "used", "uses", "no", "never",
"always", "only", "via", "from", "by", "at", "as", "it", "this", "that",
"when", "if", "then", "do", "don't", "we", "our", "you", "your",
}
def tokenize(text: str) -> set[str]:
toks = re.findall(r"[a-z0-9]+", (text or "").lower())
return {t for t in toks if t not in STOPWORDS and len(t) > 2}
def jaccard(a: set[str], b: set[str]) -> float:
if not a or not b:
return 0.0
inter = len(a & b)
union = len(a | b)
return inter / union if union else 0.0
def cluster_overlaps(mems: list[Memory], threshold: float = 0.34):
"""
Within each type, find pairs with token-overlap >= threshold, then union
them into clusters. Returns list of (type, [filenames]) for clusters >1.
"""
clusters_out = []
by_type: dict[str, list[Memory]] = {}
for m in mems:
by_type.setdefault(m.type or "untyped", []).append(m)
for typ, group in by_type.items():
# token signature per memory: name + description + slug words
sigs = {}
for m in group:
base = " ".join(
filter(None, [m.name, m.description, m.slug.replace("_", " ")])
)
sigs[m.filename] = tokenize(base)
# Also bias by shared slug prefix (e.g. feedback_syncro_*).
parent = {m.filename: m.filename for m in group}
def find(x):
while parent[x] != x:
parent[x] = parent[parent[x]]
x = parent[x]
return x
def union(x, y):
rx, ry = find(x), find(y)
if rx != ry:
parent[rx] = ry
files = [m.filename for m in group]
slug_prefix = {}
for m in group:
parts = m.slug.split("_")
slug_prefix[m.filename] = "_".join(parts[:2]) if len(parts) >= 2 else m.slug
for i in range(len(files)):
for j in range(i + 1, len(files)):
fi, fj = files[i], files[j]
sim = jaccard(sigs[fi], sigs[fj])
same_prefix = (
slug_prefix[fi] == slug_prefix[fj]
and len(slug_prefix[fi].split("_")) >= 2
)
if sim >= threshold or same_prefix:
union(fi, fj)
groups: dict[str, list[str]] = {}
for f in files:
groups.setdefault(find(f), []).append(f)
for members in groups.values():
if len(members) > 1:
clusters_out.append((typ, sorted(members)))
return clusters_out
# --------------------------------------------------------------------------
# Stale dated facts
# --------------------------------------------------------------------------
def find_stale_dates(mem: Memory, today: datetime.date):
"""Return list of (date_str, age_days) for dated claims older than STALE_MONTHS."""
hits = []
seen = set()
for rx in (DATE_RE, ISO_DATE_RE):
for m in rx.finditer(mem.body):
ds = m.group(1)
if ds in seen:
continue
seen.add(ds)
try:
d = datetime.date.fromisoformat(ds)
except ValueError:
continue
age = (today - d).days
if age > STALE_MONTHS * 30:
hits.append((ds, age))
return hits
# --------------------------------------------------------------------------
# Report
# --------------------------------------------------------------------------
class Report:
def __init__(self):
self.lines: list[str] = []
def add(self, s: str = ""):
self.lines.append(s)
def __str__(self):
return "\n".join(self.lines)
def slugify_link_target(target: str) -> str:
return Path(target).stem
def run(args) -> int:
repo_root = resolve_claudetools_root()
mem_dir = repo_root / ".claude" / "memory"
index_path = mem_dir / "MEMORY.md"
if not mem_dir.is_dir():
print(f"[ERROR] memory dir not found: {mem_dir}")
return 2
today = datetime.date.today()
rpt = Report()
rpt.add("# Memory Dream Report")
rpt.add(f"Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}")
rpt.add(f"Repo root: {repo_root}")
rpt.add(f"Memory store: {mem_dir}")
rpt.add(f"Mode: {'APPLY-SAFE (additive)' if args.apply_safe else 'REPORT-ONLY'}")
rpt.add("")
# Load memories.
mem_files = sorted(p for p in mem_dir.glob("*.md") if p.name != "MEMORY.md")
mems = [Memory(p) for p in mem_files]
mem_by_file = {m.filename: m for m in mems}
rpt.add(f"Loaded {len(mems)} memory files (excluding MEMORY.md).")
rpt.add("")
# ----- 1. INDEX RECONCILE -----
links, headers, index_lines = parse_index(index_path)
indexed_targets = {slugify_link_target(t): (title, t, ln)
for (title, t, ln, _raw) in links}
rpt.add("## 1. INDEX RECONCILE")
rpt.add("")
orphans = [] # files with no index line
for m in mems:
if m.slug not in indexed_targets:
orphans.append(m)
rpt.add(f"### Orphan files (no index line): {len(orphans)}")
for m in orphans:
rpt.add(f"- [INFO] {m.filename} (type={m.type or '?'})")
rpt.add("")
missing_targets = [] # index lines whose file is missing
for title, target, ln, _raw in links:
# Only consider links that look like local memory files.
tgt = target.strip()
if tgt.startswith(("http://", "https://")):
continue
resolved = (mem_dir / tgt).resolve()
if not resolved.is_file():
missing_targets.append((title, target, ln))
rpt.add(f"### Index lines pointing at missing files: {len(missing_targets)}")
for title, target, ln in missing_targets:
rpt.add(f"- [WARNING] line {ln + 1}: [{title}]({target}) -> file not found")
rpt.add("")
name_mismatches = [] # frontmatter name vs filename slug
for m in mems:
if m.name is None:
name_mismatches.append((m.filename, "(no name in frontmatter)"))
continue
# The convention is loose: name may be a title, not the slug. Only flag
# when name itself looks like a slug AND differs from the filename slug.
name_as_slug = re.sub(r"[^A-Za-z0-9]+", "_", m.name.strip().lower()).strip("_")
if re.fullmatch(r"[a-z0-9_]+", m.name.strip()) and m.name.strip() != m.slug:
name_mismatches.append((m.filename, f"name='{m.name}' != slug='{m.slug}'"))
rpt.add(f"### Frontmatter name vs filename signals: {len(name_mismatches)}")
for fn, note in name_mismatches:
rpt.add(f"- [INFO] {fn}: {note}")
rpt.add("")
# ----- 2. BACKLINKS -----
rpt.add("## 2. BACKLINKS ([[name]] references)")
rpt.add("")
known_slugs = {m.slug for m in mems}
broken_backlinks = []
for m in mems:
for bm in BACKLINK_RE.finditer(m.body):
ref = bm.group(1).strip()
ref_slug = slugify_link_target(ref)
if ref_slug not in known_slugs and ref not in known_slugs:
broken_backlinks.append((m.filename, ref))
rpt.add(f"### Broken backlinks: {len(broken_backlinks)}")
for fn, ref in broken_backlinks:
rpt.add(f"- [WARNING] {fn}: [[{ref}]] has no matching memory file")
if not broken_backlinks:
rpt.add("- [OK] no broken backlinks found")
rpt.add("")
# ----- 3. REFERENCED-ARTIFACT VALIDITY -----
rpt.add("## 3. REFERENCED-ARTIFACT VALIDITY (conservative; 'verify', not 'delete')")
rpt.add("")
artifact_flags = []
for m in mems:
for tok in extract_referenced_paths(m.body):
if not repo_path_exists(repo_root, tok):
artifact_flags.append((m.filename, tok))
rpt.add(f"### Referenced paths not found in repo: {len(artifact_flags)}")
for fn, tok in artifact_flags:
rpt.add(f"- [VERIFY] {fn}: `{tok}` not found under repo (may be server-side "
f"or renamed -- verify, do not auto-delete)")
if not artifact_flags:
rpt.add("- [OK] no clearly-stale repo paths detected")
rpt.add("")
# ----- 4. DUPLICATE / OVERLAP CLUSTERS -----
rpt.add("## 4. DUPLICATE / OVERLAP CLUSTERS (PROPOSED merges -- never auto-applied)")
rpt.add("")
clusters = cluster_overlaps(mems)
clusters.sort(key=lambda c: (-len(c[1]), c[0]))
rpt.add(f"### Candidate clusters: {len(clusters)}")
for typ, members in clusters:
rpt.add(f"- [{typ}] {len(members)} related memories:")
for f in members:
mm = mem_by_file.get(f)
desc = (mm.description or mm.name or "") if mm else ""
desc = desc[:90]
rpt.add(f" - {f} -- {desc}")
if not clusters:
rpt.add("- [OK] no overlap clusters above threshold")
rpt.add("")
# ----- 5. STALE DATED FACTS -----
rpt.add(f"## 5. STALE DATED FACTS (project-type, dated > {STALE_MONTHS} months)")
rpt.add("")
stale_hits = []
for m in mems:
if (m.type or "") != "project":
continue
hits = find_stale_dates(m, today)
if hits:
stale_hits.append((m.filename, hits))
rpt.add(f"### Project memories with stale dated claims: {len(stale_hits)}")
for fn, hits in stale_hits:
for ds, age in hits:
rpt.add(f"- [VERIFY] {fn}: dated {ds} (~{age} days old) -- re-verify")
if not stale_hits:
rpt.add("- [OK] no stale dated project facts")
rpt.add("")
# ----- 6. DRIFT vs PROFILE STORE -----
rpt.add("## 6. DRIFT vs HARNESS PROFILE STORE")
rpt.add("")
prof_dir = profile_memory_dir(repo_root)
profile_only = []
repo_only = []
conflicts = []
if prof_dir is None:
rpt.add("- [INFO] profile memory dir not found; skipping drift check.")
else:
rpt.add(f"Profile store: {prof_dir}")
rpt.add("")
prof_files = {p.name for p in prof_dir.glob("*.md") if p.name != "MEMORY.md"}
repo_files = {m.filename for m in mems}
for pf in sorted(prof_files - repo_files):
profile_only.append(pf)
for rf in sorted(repo_files - prof_files):
repo_only.append(rf)
for both in sorted(prof_files & repo_files):
a = (prof_dir / both).read_text(encoding="utf-8", errors="replace")
b = (mem_dir / both).read_text(encoding="utf-8", errors="replace")
if a != b:
conflicts.append(both)
rpt.add(f"### Profile-only (candidates to MIGRATE INTO repo): {len(profile_only)}")
for f in profile_only:
rpt.add(f"- [INFO] {f}")
rpt.add("")
rpt.add(f"### Repo-only (candidates to PUSH OUT to profile): {len(repo_only)}")
for f in repo_only:
rpt.add(f"- [INFO] {f}")
rpt.add("")
rpt.add(f"### Present in BOTH but differing (CONFLICT -- human review): "
f"{len(conflicts)}")
for f in conflicts:
rpt.add(f"- [WARNING] {f}: content differs between repo and profile")
rpt.add("")
# ----- APPLY-SAFE ACTIONS (additive-only) -----
actions_taken = []
if args.apply_safe:
rpt.add("## APPLY-SAFE ACTIONS PERFORMED (additive-only)")
rpt.add("")
# (a) Append missing index lines for orphan files.
if orphans and index_path.is_file():
appended = append_index_lines(index_path, orphans, index_lines, headers)
for line, hdr in appended:
actions_taken.append(f"INDEX += [{hdr}] {line}")
rpt.add(f"- [OK] appended index line under ## {hdr}: {line}")
elif orphans:
rpt.add("- [WARNING] orphans exist but MEMORY.md missing; nothing appended")
# (b) Copy profile-only files INTO repo (never overwrite).
if prof_dir is not None:
for f in profile_only:
src = prof_dir / f
dst = mem_dir / f
if dst.exists():
rpt.add(f"- [SKIP] {f}: already exists in repo (not overwriting)")
continue
shutil.copy2(src, dst)
actions_taken.append(f"COPIED profile->repo: {f}")
rpt.add(f"- [OK] copied profile-only file into repo: {f}")
if not actions_taken:
rpt.add("- [INFO] no additive actions were necessary")
rpt.add("")
# ----- SUMMARY -----
rpt.add("## SUMMARY")
rpt.add("")
rpt.add(f"- memory files: {len(mems)}")
rpt.add(f"- orphan files (no index): {len(orphans)}")
rpt.add(f"- index -> missing file: {len(missing_targets)}")
rpt.add(f"- name/filename signals: {len(name_mismatches)}")
rpt.add(f"- broken backlinks: {len(broken_backlinks)}")
rpt.add(f"- stale referenced paths: {len(artifact_flags)}")
rpt.add(f"- overlap clusters: {len(clusters)}")
rpt.add(f"- stale dated project facts: {len(stale_hits)}")
rpt.add(f"- profile-only files: {len(profile_only)}")
rpt.add(f"- repo-only files: {len(repo_only)}")
rpt.add(f"- repo<->profile conflicts: {len(conflicts)}")
if args.apply_safe:
rpt.add(f"- additive actions performed: {len(actions_taken)}")
rpt.add("")
rpt.add("## PROPOSED (needs human approval -- NEVER auto-applied)")
rpt.add("")
n_prop = 0
for typ, members in clusters:
n_prop += 1
rpt.add(f"- [MERGE?] consolidate {len(members)} '{typ}' memories: "
f"{', '.join(members)}")
for fn, hits in stale_hits:
n_prop += 1
rpt.add(f"- [REVERIFY?] {fn} (dated facts) -- confirm still true, then update")
for fn, tok in artifact_flags:
n_prop += 1
rpt.add(f"- [STALE-REF?] {fn} references `{tok}` -- confirm/repoint or note moved")
for title, target, ln in missing_targets:
n_prop += 1
rpt.add(f"- [INDEX-CLEANUP?] MEMORY.md line {ln + 1} points at missing "
f"{target} -- human decides keep/remove")
if prof_dir is not None:
for f in conflicts:
n_prop += 1
rpt.add(f"- [DRIFT-RESOLVE?] {f} differs repo vs profile -- human picks "
f"winner (sync-memory.sh leaves both untouched)")
if n_prop == 0:
rpt.add("- [OK] nothing proposed; memory store is clean")
rpt.add("")
out = str(rpt)
print(out)
# Write report file unless suppressed.
if not args.no_file:
reports_dir = mem_dir / "_reports"
reports_dir.mkdir(parents=True, exist_ok=True)
if args.report_file:
rpath = Path(args.report_file)
else:
stamp = datetime.datetime.now().strftime("%Y-%m-%d-%H%M")
rpath = reports_dir / f"{stamp}-dream.md"
rpath.write_text(out + "\n", encoding="utf-8")
print(f"\n[INFO] report written: {rpath}")
return 0
def append_index_lines(index_path: Path, orphans, index_lines, headers):
"""
Additive only: append a '- [Name](file.md) -- description' line for each
orphan under the correct '## <Header>' section. Never reorders or removes
existing lines. If a header doesn't exist, append it at end of file.
Returns list of (line_text, header_used).
"""
text = index_path.read_text(encoding="utf-8", errors="replace")
lines = text.split("\n")
appended = []
# Group orphans by target header.
by_header: dict[str, list[Memory]] = {}
for m in orphans:
hdr = TYPE_HEADER.get(m.type or "", None)
if hdr is None:
hdr = "Project" # safe default bucket; human can recategorize
by_header.setdefault(hdr, []).append(m)
def build_line(m: Memory) -> str:
title = m.name or m.slug
hook = (m.description or "").strip()
if hook:
return f"- [{title}]({m.filename}) -- {hook}"
return f"- [{title}]({m.filename})"
for hdr, members in by_header.items():
# Find header line index.
hidx = None
for i, ln in enumerate(lines):
hm = re.match(r"^##\s+(.+?)\s*$", ln)
if hm and hm.group(1).strip() == hdr:
hidx = i
break
new_lines = [build_line(m) for m in members]
if hidx is None:
# Append a fresh section at end of file.
if lines and lines[-1].strip() != "":
lines.append("")
lines.append(f"## {hdr}")
lines.extend(new_lines)
for nl, m in zip(new_lines, members):
appended.append((nl, hdr))
continue
# Find end of this section: next '## ' or EOF.
end = len(lines)
for j in range(hidx + 1, len(lines)):
if re.match(r"^##\s+", lines[j]):
end = j
break
# Insert after the last non-blank line of the section.
insert_at = end
while insert_at - 1 > hidx and lines[insert_at - 1].strip() == "":
insert_at -= 1
for off, (nl, m) in enumerate(zip(new_lines, members)):
lines.insert(insert_at + off, nl)
appended.append((nl, hdr))
index_path.write_text("\n".join(lines), encoding="utf-8")
return appended
def main() -> int:
ap = argparse.ArgumentParser(
description="Memory lint + consolidation analyzer (additive-only)."
)
ap.add_argument(
"--apply-safe",
action="store_true",
help="Perform ONLY additive fixes (append index lines, copy profile-only "
"files into repo). Never deletes/overwrites/merges.",
)
ap.add_argument(
"--no-file",
action="store_true",
help="Print report to stdout only; do not write a _reports/ file.",
)
ap.add_argument(
"--report-file",
default=None,
help="Explicit path for the report file (overrides _reports/ default).",
)
args = ap.parse_args()
try:
return run(args)
except KeyboardInterrupt:
print("[ERROR] interrupted")
return 130
if __name__ == "__main__":
sys.exit(main())