348 lines
13 KiB
Python
348 lines
13 KiB
Python
#!/usr/bin/env python
|
|
"""errorlog-dream: lint the fleet error log (errorlog.md).
|
|
|
|
Read-only by default. Parses the canonical entry format
|
|
YYYY-MM-DD | MACHINE | skill/context | [type] message [ctx: k=v ...] (xN)
|
|
and reports the patterns the log exists to surface: which contexts generate
|
|
the most failures, which documented rules keep getting violated (repeat ref=
|
|
citations), which identical failures recur across days (noise clusters that
|
|
need a skill-side fix, not more logging), resolved entries, machine-name
|
|
drift, and entries old enough to archive.
|
|
|
|
The single mutating mode, --apply-archive, moves entries older than --days
|
|
(default 60) into errorlog-archive/YYYY-MM.md. Everything judgment-shaped
|
|
stays in the PROPOSED section for the operator, mirroring memory-dream.
|
|
"""
|
|
|
|
import argparse
|
|
import io
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from collections import defaultdict
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
MARKER = "<!-- Append entries below this line -->"
|
|
|
|
ENTRY_RE = re.compile(
|
|
r"^(\d{4}-\d{2}-\d{2}) \| ([^|]+?) \| ([^|]+?) \| (.*)$"
|
|
)
|
|
TYPE_RE = re.compile(r"^\[(correction|friction|[a-z-]+)\]\s+")
|
|
CTX_RE = re.compile(r"\[ctx: ([^\]]*)\]")
|
|
REF_RE = re.compile(r"ref=([A-Za-z0-9_./#-]+)")
|
|
COUNT_RE = re.compile(r" \(x(\d+)\)\s*$")
|
|
RESOLVED_RE = re.compile(r"\[RESOLVED[^\]]*\]", re.IGNORECASE)
|
|
|
|
|
|
def find_root():
|
|
env = os.environ.get("CLAUDETOOLS_ROOT")
|
|
if env and os.path.isdir(env):
|
|
return env
|
|
here = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
|
idf = os.path.join(here, ".claude", "identity.json")
|
|
if os.path.isfile(idf):
|
|
try:
|
|
with io.open(idf, encoding="utf-8") as fh:
|
|
root = json.load(fh).get("claudetools_root")
|
|
if root and os.path.isdir(root):
|
|
return root
|
|
except Exception:
|
|
pass
|
|
return here
|
|
|
|
|
|
class Entry(object):
|
|
__slots__ = ("date", "machine", "skill", "msg", "etype", "ctx", "refs",
|
|
"count", "resolved", "lines", "raw_first")
|
|
|
|
def __init__(self, date, machine, skill, msg, lines):
|
|
self.date = date
|
|
self.machine = machine.strip()
|
|
self.skill = skill.strip()
|
|
self.lines = lines # verbatim block lines (for archiving)
|
|
self.raw_first = lines[0]
|
|
m = COUNT_RE.search(msg)
|
|
self.count = int(m.group(1)) if m else 1
|
|
msg = COUNT_RE.sub("", msg)
|
|
t = TYPE_RE.match(msg)
|
|
self.etype = t.group(1) if t else "exec"
|
|
cm = CTX_RE.search(msg)
|
|
self.ctx = cm.group(1) if cm else ""
|
|
self.refs = REF_RE.findall(msg)
|
|
self.resolved = bool(RESOLVED_RE.search(" ".join(lines)))
|
|
self.msg = msg
|
|
|
|
@property
|
|
def context_group(self):
|
|
return self.skill.split("/", 1)[0].strip()
|
|
|
|
def norm_msg(self):
|
|
"""Message with volatile tokens (ids, numbers, hex, paths' digits)
|
|
collapsed, for grouping recurring failures across days."""
|
|
m = TYPE_RE.sub("", self.msg)
|
|
m = CTX_RE.sub("", m)
|
|
m = re.sub(r"[0-9a-fA-F]{8}-[0-9a-fA-F-]{27,}", "<uuid>", m)
|
|
m = re.sub(r"\b[0-9a-fA-F]{12,}\b", "<hex>", m)
|
|
m = re.sub(r"\d+", "<n>", m)
|
|
return re.sub(r"\s+", " ", m).strip().lower()
|
|
|
|
|
|
def parse_log(path):
|
|
"""Return (header_lines, entries, unparsed_blocks, trailing_map).
|
|
|
|
Blocks are runs of consecutive non-blank lines after the marker. A block
|
|
whose first line matches ENTRY_RE is an Entry (continuation lines belong
|
|
to it -- the pre-helper era wrote multi-line entries by hand); anything
|
|
else is an unparsed block, reported but never touched.
|
|
"""
|
|
with io.open(path, encoding="utf-8") as fh:
|
|
lines = fh.read().splitlines()
|
|
header, rest, seen_marker = [], [], False
|
|
for ln in lines:
|
|
(rest if seen_marker else header).append(ln)
|
|
if not seen_marker and ln.strip() == MARKER:
|
|
seen_marker = True
|
|
if not seen_marker:
|
|
rest, header = header, []
|
|
|
|
blocks, cur = [], []
|
|
for ln in rest:
|
|
if ln.strip():
|
|
cur.append(ln)
|
|
elif cur:
|
|
blocks.append(cur)
|
|
cur = []
|
|
if cur:
|
|
blocks.append(cur)
|
|
|
|
entries, unparsed = [], []
|
|
for b in blocks:
|
|
m = ENTRY_RE.match(b[0])
|
|
if m:
|
|
entries.append(Entry(m.group(1), m.group(2), m.group(3), m.group(4), b))
|
|
else:
|
|
unparsed.append(b)
|
|
return header, entries, unparsed
|
|
|
|
|
|
def analyze(entries, unparsed, root, archive_days, today):
|
|
r = {}
|
|
weight = lambda es: sum(e.count for e in es)
|
|
|
|
r["total"] = len(entries)
|
|
r["weighted"] = weight(entries)
|
|
r["unparsed"] = len(unparsed)
|
|
if entries:
|
|
r["span"] = (min(e.date for e in entries), max(e.date for e in entries))
|
|
by_type = defaultdict(int)
|
|
for e in entries:
|
|
by_type[e.etype] += 1
|
|
r["by_type"] = dict(by_type)
|
|
|
|
ctxs = defaultdict(list)
|
|
for e in entries:
|
|
ctxs[e.context_group].append(e)
|
|
r["top_contexts"] = sorted(
|
|
((c, weight(es), len(es)) for c, es in ctxs.items()),
|
|
key=lambda t: -t[1])[:15]
|
|
|
|
# repeat ref= citations: >=2 means a documented rule/memory is not sticking
|
|
refs = defaultdict(list)
|
|
for e in entries:
|
|
for ref in e.refs:
|
|
refs[ref].append(e)
|
|
mem_dir = os.path.join(root, ".claude", "memory")
|
|
rep = []
|
|
for ref, es in sorted(refs.items(), key=lambda kv: -len(kv[1])):
|
|
if len(es) < 2:
|
|
continue
|
|
base = ref.split("#", 1)[0].split("/")[-1]
|
|
cand = base if base.endswith(".md") else base + ".md"
|
|
exists = os.path.isfile(os.path.join(mem_dir, cand))
|
|
rep.append((ref, len(es), exists, sorted({e.date for e in es})[-3:]))
|
|
r["repeat_refs"] = rep
|
|
|
|
# noise clusters: same machine+skill+normalized message on >=3 distinct
|
|
# days (the helper's (xN) dedup already collapses same-day repeats)
|
|
clusters = defaultdict(list)
|
|
for e in entries:
|
|
clusters[(e.machine, e.skill, e.norm_msg())].append(e)
|
|
noise = []
|
|
for (mach, skill, norm), es in clusters.items():
|
|
days = sorted({e.date for e in es})
|
|
if len(days) >= 3 or weight(es) >= 5:
|
|
noise.append((mach, skill, norm[:110], weight(es), len(days)))
|
|
r["noise"] = sorted(noise, key=lambda t: -t[3])[:15]
|
|
|
|
r["resolved"] = [e for e in entries if e.resolved]
|
|
|
|
machines = defaultdict(set)
|
|
for e in entries:
|
|
machines[e.machine.lower()].add(e.machine)
|
|
r["machine_drift"] = {k: sorted(v) for k, v in machines.items() if len(v) > 1}
|
|
r["by_machine"] = sorted(
|
|
((m, weight(es)) for m, es in
|
|
((m, [e for e in entries if e.machine.lower() == m]) for m in machines)),
|
|
key=lambda t: -t[1])
|
|
|
|
cutoff = (today - timedelta(days=archive_days)).strftime("%Y-%m-%d")
|
|
r["cutoff"] = cutoff
|
|
r["archive"] = [e for e in entries if e.date < cutoff]
|
|
return r
|
|
|
|
|
|
def render(r, archive_days):
|
|
L = []
|
|
add = L.append
|
|
add("# errorlog-dream report")
|
|
add("")
|
|
add("## SUMMARY")
|
|
span = r.get("span")
|
|
add("- entries: %d parsed (%d weighted with (xN) counters), %d unparsed legacy block(s)"
|
|
% (r["total"], r["weighted"], r["unparsed"]))
|
|
if span:
|
|
add("- span: %s .. %s" % span)
|
|
add("- by type: " + ", ".join("%s=%d" % kv for kv in sorted(r["by_type"].items())))
|
|
add("- by machine: " + ", ".join("%s=%d" % kv for kv in r["by_machine"]))
|
|
add("")
|
|
add("## TOP CONTEXTS (weighted)")
|
|
for c, w, n in r["top_contexts"]:
|
|
add("- %-22s %4d (%d entries)" % (c, w, n))
|
|
add("")
|
|
add("## REPEAT REFS -- documented rules that are NOT sticking")
|
|
if r["repeat_refs"]:
|
|
for ref, n, exists, dates in r["repeat_refs"]:
|
|
add("- ref=%s cited %dx (last: %s) -- memory file %s"
|
|
% (ref, n, ", ".join(dates), "exists" if exists else "NOT FOUND"))
|
|
else:
|
|
add("- none")
|
|
add("")
|
|
add("## NOISE CLUSTERS -- identical failures recurring across days")
|
|
if r["noise"]:
|
|
for mach, skill, norm, w, days in r["noise"]:
|
|
add("- %s | %s | %dx over %d day(s): %s" % (mach, skill, w, days, norm))
|
|
else:
|
|
add("- none")
|
|
add("")
|
|
add("## RESOLVED entries (archive candidates regardless of age)")
|
|
for e in r["resolved"]:
|
|
add("- %s | %s | %s" % (e.date, e.machine, e.skill))
|
|
if not r["resolved"]:
|
|
add("- none")
|
|
add("")
|
|
add("## MACHINE-NAME DRIFT")
|
|
if r["machine_drift"]:
|
|
for k, variants in sorted(r["machine_drift"].items()):
|
|
add("- %s spelled %s -- normalize identity.json .machine on the odd one out"
|
|
% (k, " / ".join(variants)))
|
|
else:
|
|
add("- none")
|
|
add("")
|
|
add("## ARCHIVE CANDIDATES (older than %d days, cutoff %s)" % (archive_days, r["cutoff"]))
|
|
add("- %d entr%s -- run --apply-archive to move them to errorlog-archive/YYYY-MM.md"
|
|
% (len(r["archive"]), "y" if len(r["archive"]) == 1 else "ies"))
|
|
add("")
|
|
add("## PROPOSED (needs human approval)")
|
|
for ref, n, exists, dates in r["repeat_refs"]:
|
|
add("- [STRENGTHEN?] ref=%s keeps repeating (%dx)%s -- the prose rule failed; "
|
|
"add a mechanical guard (hook/wrapper/preflight) or rewrite the memory"
|
|
% (ref, n, "" if exists else " (and the cited memory file is MISSING)"))
|
|
for mach, skill, norm, w, days in r["noise"]:
|
|
add("- [SUPPRESS?] %s/%s fails identically %dx over %d days -- fix the skill "
|
|
"(backoff, expected-condition filter, or health-gate), don't keep logging it"
|
|
% (mach, skill, w, days))
|
|
for e in r["resolved"]:
|
|
add("- [ARCHIVE?] resolved entry %s | %s | %s can move to the archive now"
|
|
% (e.date, e.machine, e.skill))
|
|
if not (r["repeat_refs"] or r["noise"] or r["resolved"]):
|
|
add("- nothing to propose")
|
|
add("")
|
|
return "\n".join(L)
|
|
|
|
|
|
def apply_archive(log_path, root, header, entries, unparsed, cutoff_entries):
|
|
"""Move cutoff_entries' blocks into errorlog-archive/YYYY-MM.md (append,
|
|
newest-first order preserved as-is) and rewrite errorlog.md without them.
|
|
Unparsed blocks are never moved."""
|
|
arch_dir = os.path.join(root, "errorlog-archive")
|
|
if not os.path.isdir(arch_dir):
|
|
os.makedirs(arch_dir)
|
|
by_month = defaultdict(list)
|
|
for e in cutoff_entries:
|
|
by_month[e.date[:7]].append(e)
|
|
for month, es in sorted(by_month.items()):
|
|
p = os.path.join(arch_dir, "%s.md" % month)
|
|
new = not os.path.isfile(p)
|
|
with io.open(p, "a", encoding="utf-8", newline="\n") as fh:
|
|
if new:
|
|
fh.write("# Error Log archive -- %s\n\nMoved out of errorlog.md by "
|
|
"errorlog-dream --apply-archive.\n" % month)
|
|
for e in es:
|
|
fh.write("\n" + "\n".join(e.lines) + "\n")
|
|
print("[OK] archived %d entr%s -> errorlog-archive/%s.md"
|
|
% (len(es), "y" if len(es) == 1 else "ies", month))
|
|
|
|
keep_ids = {id(e) for e in entries} - {id(e) for e in cutoff_entries}
|
|
out = list(header)
|
|
for e in entries:
|
|
if id(e) in keep_ids:
|
|
out.append("")
|
|
out.extend(e.lines)
|
|
for b in unparsed:
|
|
out.append("")
|
|
out.extend(b)
|
|
out.append("")
|
|
with io.open(log_path, "w", encoding="utf-8", newline="\n") as fh:
|
|
fh.write("\n".join(out))
|
|
print("[OK] errorlog.md rewritten: %d entries kept, %d archived, %d unparsed block(s) untouched"
|
|
% (len(keep_ids), len(cutoff_entries), len(unparsed)))
|
|
|
|
|
|
def main(argv=None):
|
|
ap = argparse.ArgumentParser(description="lint errorlog.md")
|
|
ap.add_argument("--days", type=int, default=60,
|
|
help="archive-candidate age threshold (default 60)")
|
|
ap.add_argument("--apply-archive", action="store_true",
|
|
help="move entries older than --days to errorlog-archive/")
|
|
ap.add_argument("--no-file", action="store_true",
|
|
help="print report to stdout only")
|
|
ap.add_argument("--report-file", default=None)
|
|
ap.add_argument("--log", default=None, help="path to errorlog.md (for tests)")
|
|
ap.add_argument("--root", default=None, help="repo root override (for tests)")
|
|
args = ap.parse_args(argv)
|
|
|
|
root = args.root or find_root()
|
|
log_path = args.log or os.path.join(root, "errorlog.md")
|
|
if not os.path.isfile(log_path):
|
|
print("[ERROR] %s not found" % log_path, file=sys.stderr)
|
|
return 2
|
|
|
|
header, entries, unparsed = parse_log(log_path)
|
|
today = datetime.now(timezone.utc)
|
|
r = analyze(entries, unparsed, root, args.days, today)
|
|
report = render(r, args.days)
|
|
print(report)
|
|
|
|
if not args.no_file:
|
|
rp = args.report_file
|
|
if not rp:
|
|
rdir = os.path.join(root, "errorlog-archive", "_reports")
|
|
if not os.path.isdir(rdir):
|
|
os.makedirs(rdir)
|
|
rp = os.path.join(rdir, today.strftime("%Y-%m-%d-%H%M") + "-dream.md")
|
|
with io.open(rp, "w", encoding="utf-8", newline="\n") as fh:
|
|
fh.write(report + "\n")
|
|
print("[OK] report written: %s" % os.path.relpath(rp, root))
|
|
|
|
if args.apply_archive:
|
|
if r["archive"]:
|
|
apply_archive(log_path, root, header, entries, unparsed, r["archive"])
|
|
else:
|
|
print("[OK] nothing old enough to archive (cutoff %s)" % r["cutoff"])
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|