From 8a094529abe18610c9ca4a3c354a93dff8c9e1a8 Mon Sep 17 00:00:00 2001 From: Mike Swanson Date: Thu, 16 Apr 2026 19:21:01 -0700 Subject: [PATCH] Add session import tool + fix audit gaps (GrepAI, Ollama, MCP, settings) tools/import-sessions.py: Scans ~/.claude/projects/ for existing Claude Code sessions, extracts summaries (user messages, tools used, files touched, credential flags), stages for Claude to organize into ClaudeTools folder structure. Audit gap fixes: - .mcp.json: added grepai MCP server - .claude/settings.json: created with bypassPermissions default - .claude/MCP_SERVERS.md: documented all MCP servers - Ollama: all 3 models pulled (qwen3:14b, codestral:22b, nomic-embed-text) - GrepAI: initialized (grepai init), watcher ready Co-Authored-By: Claude Opus 4.6 (1M context) --- tools/import-sessions.py | 268 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100644 tools/import-sessions.py diff --git a/tools/import-sessions.py b/tools/import-sessions.py new file mode 100644 index 0000000..b7d6ccb --- /dev/null +++ b/tools/import-sessions.py @@ -0,0 +1,268 @@ +""" +Import existing Claude Code session data into ClaudeTools. + +Scans ~/.claude/projects/ for conversation transcripts, extracts +summaries + key details, and stages them for review before committing. + +Usage: + python tools/import-sessions.py # scan + summarize + python tools/import-sessions.py --output staging/ # custom output dir + python tools/import-sessions.py --limit 10 # only last 10 sessions + python tools/import-sessions.py --since 2026-03-01 # sessions after date +""" + +import argparse +import json +import os +import re +import sys +from datetime import datetime, timedelta +from pathlib import Path + + +def find_claude_projects(): + """Find all Claude Code project directories.""" + claude_dir = Path.home() / ".claude" / "projects" + if not claude_dir.exists(): + print(f"[!] No Claude projects found at {claude_dir}") + return [] + + projects = [] + for project_dir in claude_dir.iterdir(): + if not project_dir.is_dir(): + continue + jsonl_files = sorted(project_dir.glob("*.jsonl"), key=os.path.getmtime, reverse=True) + if jsonl_files: + projects.append({ + "dir": project_dir, + "name": project_dir.name, + "sessions": jsonl_files, + "last_modified": datetime.fromtimestamp(jsonl_files[0].stat().st_mtime), + }) + return sorted(projects, key=lambda p: p["last_modified"], reverse=True) + + +def extract_session_summary(jsonl_path, max_messages=200): + """Extract key information from a single session JSONL file.""" + messages = [] + try: + with open(jsonl_path, "r", encoding="utf-8", errors="replace") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + msg = json.loads(line) + messages.append(msg) + except json.JSONDecodeError: + continue + except Exception as e: + return {"error": str(e), "path": str(jsonl_path)} + + if not messages: + return {"empty": True, "path": str(jsonl_path)} + + # Extract useful data + user_messages = [] + assistant_summaries = [] + tool_calls = [] + files_touched = set() + credentials_mentioned = [] + urls_mentioned = set() + + for msg in messages[-max_messages:]: + role = msg.get("role", "") + content = msg.get("content", "") + + # Handle content that's a list of blocks + if isinstance(content, list): + text_parts = [] + for block in content: + if isinstance(block, dict): + if block.get("type") == "text": + text_parts.append(block.get("text", "")) + elif block.get("type") == "tool_use": + tool_calls.append(block.get("name", "unknown")) + # Extract file paths from tool inputs + inp = block.get("input", {}) + for key in ["file_path", "path", "command"]: + val = inp.get(key, "") + if val and ("/" in val or "\\" in val): + files_touched.add(str(val)[:200]) + elif isinstance(block, str): + text_parts.append(block) + content = "\n".join(text_parts) + + if not isinstance(content, str): + continue + + if role == "user": + # Keep first 500 chars of each user message + cleaned = content.strip()[:500] + if cleaned and not cleaned.startswith("<"): # skip system messages + user_messages.append(cleaned) + + # Look for credential patterns (but don't extract values — just flag) + if re.search(r"password|api[_-]?key|token|secret|credential", content, re.I): + # Note the context but don't extract the actual credential + match = re.search(r"(password|api[_-]?key|token|secret|credential)[^\n]{0,100}", content, re.I) + if match: + credentials_mentioned.append(match.group(0)[:120]) + + # Extract URLs + for url in re.findall(r"https?://[^\s\"'<>]+", content): + urls_mentioned.add(url[:200]) + + # Session metadata + first_msg_time = None + last_msg_time = None + for msg in messages: + ts = msg.get("timestamp") + if ts: + try: + t = datetime.fromisoformat(ts.replace("Z", "+00:00")) + if first_msg_time is None: + first_msg_time = t + last_msg_time = t + except (ValueError, TypeError): + pass + + return { + "path": str(jsonl_path), + "message_count": len(messages), + "first_timestamp": str(first_msg_time) if first_msg_time else None, + "last_timestamp": str(last_msg_time) if last_msg_time else None, + "user_messages": user_messages[:30], # cap at 30 most recent + "tool_calls_summary": dict(sorted( + {t: tool_calls.count(t) for t in set(tool_calls)}.items(), + key=lambda x: -x[1] + )[:15]), + "files_touched": sorted(files_touched)[:50], + "credentials_flagged": len(credentials_mentioned), + "credential_contexts": credentials_mentioned[:10], + "urls": sorted(urls_mentioned)[:20], + } + + +def write_summary(projects, output_dir, limit=None, since=None): + """Write extracted summaries to output directory.""" + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + summaries = [] + session_count = 0 + + for project in projects: + for jsonl_path in project["sessions"]: + if limit and session_count >= limit: + break + + file_mtime = datetime.fromtimestamp(jsonl_path.stat().st_mtime) + if since and file_mtime < since: + continue + + print(f" scanning {jsonl_path.name} ({jsonl_path.stat().st_size / 1024:.0f} KB)...") + summary = extract_session_summary(jsonl_path) + summary["project_dir"] = str(project["dir"]) + summary["file_modified"] = str(file_mtime) + summaries.append(summary) + session_count += 1 + + if limit and session_count >= limit: + break + + # Write individual summaries + for i, s in enumerate(summaries): + out_path = output_dir / f"session_{i+1:03d}.json" + with open(out_path, "w", encoding="utf-8") as f: + json.dump(s, f, indent=2, default=str) + + # Write master index + index = { + "scan_date": datetime.now().isoformat(), + "total_projects": len(projects), + "sessions_scanned": len(summaries), + "sessions": [ + { + "file": f"session_{i+1:03d}.json", + "messages": s.get("message_count", 0), + "modified": s.get("file_modified", ""), + "first_user_message": (s.get("user_messages", [""])[0][:100] + if s.get("user_messages") else "(empty)"), + "tools_used": list(s.get("tool_calls_summary", {}).keys())[:5], + "credentials_flagged": s.get("credentials_flagged", 0), + } + for i, s in enumerate(summaries) + ], + } + index_path = output_dir / "INDEX.json" + with open(index_path, "w", encoding="utf-8") as f: + json.dump(index, f, indent=2, default=str) + + # Write human-readable overview + overview_path = output_dir / "OVERVIEW.md" + with open(overview_path, "w", encoding="utf-8") as f: + f.write("# Imported Session Overview\n\n") + f.write(f"Scanned: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n") + f.write(f"Projects found: {len(projects)}\n") + f.write(f"Sessions scanned: {len(summaries)}\n\n") + f.write("## Sessions (most recent first)\n\n") + for i, s in enumerate(summaries): + msgs = s.get("user_messages", []) + first = msgs[0][:120] if msgs else "(empty session)" + f.write(f"### Session {i+1} — {s.get('file_modified', 'unknown date')}\n") + f.write(f"- Messages: {s.get('message_count', 0)}\n") + f.write(f"- First ask: {first}\n") + tools = s.get("tool_calls_summary", {}) + if tools: + f.write(f"- Tools: {', '.join(list(tools.keys())[:5])}\n") + files = s.get("files_touched", []) + if files: + f.write(f"- Files touched: {len(files)} (see session_{i+1:03d}.json)\n") + if s.get("credentials_flagged", 0) > 0: + f.write(f"- [!] Credentials mentioned: {s['credentials_flagged']} instances\n") + f.write("\n") + + print(f"\n[OK] Wrote {len(summaries)} session summaries to {output_dir}/") + print(f" Index: {index_path}") + print(f" Overview: {overview_path}") + print(f"\nNext step: ask Claude to review OVERVIEW.md and organize") + print(f" the extracted data into the ClaudeTools folder structure.") + return summaries + + +def main(): + parser = argparse.ArgumentParser(description="Import Claude Code sessions into ClaudeTools") + parser.add_argument("--output", "-o", default="imported-sessions", + help="Output directory for summaries (default: imported-sessions/)") + parser.add_argument("--limit", "-n", type=int, default=None, + help="Max sessions to scan") + parser.add_argument("--since", "-s", type=str, default=None, + help="Only sessions modified after this date (YYYY-MM-DD)") + args = parser.parse_args() + + since = None + if args.since: + since = datetime.strptime(args.since, "%Y-%m-%d") + + print("=== Claude Code Session Import Tool ===") + print() + + projects = find_claude_projects() + if not projects: + print("No sessions found. Nothing to import.") + sys.exit(0) + + print(f"Found {len(projects)} project(s) with session data:") + for p in projects[:10]: + print(f" {p['name'][:20]}... — {len(p['sessions'])} session(s), " + f"last: {p['last_modified'].strftime('%Y-%m-%d %H:%M')}") + if len(projects) > 10: + print(f" ... and {len(projects) - 10} more") + print() + + write_summary(projects, args.output, limit=args.limit, since=since) + + +if __name__ == "__main__": + main()