#!/usr/bin/env python3 """ Reorganize imported Claude conversations from flat structure to organized hierarchy. """ import json import os import shutil from pathlib import Path from datetime import datetime from typing import Dict, List, Tuple # Base directory BASE_DIR = Path(r"D:\ClaudeTools\imported-conversations\general-work\claude-projects") ORGANIZED_DIR = BASE_DIR / "organized" METADATA_DIR = ORGANIZED_DIR / "_metadata" SESSIONS_DIR = ORGANIZED_DIR / "sessions-by-date" LEGACY_DIR = ORGANIZED_DIR / "legacy-original" # Statistics tracking stats = { "sessions_organized": 0, "agent_files_moved": 0, "tool_files_moved": 0, "orphaned_agents": 0, "flat_sessions": 0, "errors": [] } def load_session_index() -> Dict: """Load sessions-index.json metadata.""" index_path = BASE_DIR / "sessions-index.json" with open(index_path, 'r', encoding='utf-8') as f: return json.load(f) def create_directory_structure(): """Create the new organized directory structure.""" print("[Phase 1] Creating directory structure...") # Create main directories METADATA_DIR.mkdir(parents=True, exist_ok=True) SESSIONS_DIR.mkdir(parents=True, exist_ok=True) LEGACY_DIR.mkdir(parents=True, exist_ok=True) # Create legacy subdirectories (LEGACY_DIR / "orphaned-agents").mkdir(exist_ok=True) (LEGACY_DIR / "flat-sessions").mkdir(exist_ok=True) print(f" [OK] Created {ORGANIZED_DIR}") print(f" [OK] Created {METADATA_DIR}") print(f" [OK] Created {SESSIONS_DIR}") print(f" [OK] Created {LEGACY_DIR}") def copy_sessions_index(): """Copy sessions-index.json to metadata directory.""" src = BASE_DIR / "sessions-index.json" dst = METADATA_DIR / "sessions-index.json" shutil.copy2(src, dst) print(f" [OK] Copied sessions-index.json to _metadata/") def organize_sessions(session_index: Dict): """Organize sessions by date and topic.""" print("\n[Phase 2] Organizing sessions by date...") entries = session_index.get("entries", []) for entry in entries: session_id = entry["sessionId"] created_date = entry.get("created", "") first_prompt = entry.get("firstPrompt", "No prompt") # Parse date if created_date: try: dt = datetime.fromisoformat(created_date.replace('Z', '+00:00')) date_str = dt.strftime("%Y-%m-%d") except Exception as e: stats["errors"].append(f"Date parse error for {session_id}: {e}") date_str = "unknown-date" else: date_str = "unknown-date" # Create session directory name session_short = session_id[:6] session_dir_name = f"{date_str}_session-{session_short}" session_dir = SESSIONS_DIR / session_dir_name # Create session directory session_dir.mkdir(exist_ok=True) # Move conversation file (UUID.jsonl) src_conversation = BASE_DIR / f"{session_id}.jsonl" if src_conversation.exists(): dst_conversation = session_dir / "conversation.jsonl" shutil.move(str(src_conversation), str(dst_conversation)) stats["sessions_organized"] += 1 # Get file size size_mb = dst_conversation.stat().st_size / (1024 * 1024) size_str = f"{size_mb:.2f} MB" if size_mb > 1 else f"{dst_conversation.stat().st_size / 1024:.2f} KB" print(f" [OK] {session_dir_name}/ ({size_str})") # Check if UUID subdirectory exists uuid_subdir = BASE_DIR / session_id if uuid_subdir.exists() and uuid_subdir.is_dir(): # Move subagents subagents_src = uuid_subdir / "subagents" if subagents_src.exists(): subagents_dst = session_dir / "agents" shutil.move(str(subagents_src), str(subagents_dst)) agent_count = len(list(subagents_dst.glob("*.jsonl"))) stats["agent_files_moved"] += agent_count print(f" -> Moved {agent_count} agent files") # Move tool-results tools_src = uuid_subdir / "tool-results" if tools_src.exists(): tools_dst = session_dir / "tools" shutil.move(str(tools_src), str(tools_dst)) tool_count = len(list(tools_dst.glob("*.txt"))) stats["tool_files_moved"] += tool_count print(f" -> Moved {tool_count} tool result files") # Remove now-empty UUID directory try: uuid_subdir.rmdir() except OSError: # Directory not empty, leave it pass else: stats["errors"].append(f"Conversation file not found: {session_id}.jsonl") def handle_orphaned_agents(): """Move orphaned agent files to legacy directory.""" print("\n[Phase 3] Handling orphaned agent files...") orphaned_agents_dir = LEGACY_DIR / "orphaned-agents" # Find all agent-*.jsonl files at root level agent_files = list(BASE_DIR.glob("agent-*.jsonl")) for agent_file in agent_files: dst = orphaned_agents_dir / agent_file.name shutil.move(str(agent_file), str(dst)) stats["orphaned_agents"] += 1 print(f" [OK] Moved {stats['orphaned_agents']} orphaned agent files") # Create README readme_content = """# Orphaned Agent Files These agent files were found at the root level without a parent session. **Total Files:** {count} These files represent sub-agent conversations that were not associated with any identifiable parent session in the sessions-index.json metadata. They are preserved here for reference but may not have complete context. """.format(count=stats['orphaned_agents']) readme_path = orphaned_agents_dir / "README.md" readme_path.write_text(readme_content, encoding='utf-8') def archive_remaining_flat_files(): """Archive any remaining flat session files.""" print("\n[Phase 4] Archiving remaining flat files...") flat_sessions_dir = LEGACY_DIR / "flat-sessions" # Find remaining .jsonl files at root (excluding sessions-index.json) jsonl_files = [f for f in BASE_DIR.glob("*.jsonl") if f.name != "sessions-index.json" and not f.name.startswith("agent-")] for jsonl_file in jsonl_files: dst = flat_sessions_dir / jsonl_file.name shutil.move(str(jsonl_file), str(dst)) stats["flat_sessions"] += 1 if stats["flat_sessions"] > 0: print(f" [OK] Moved {stats['flat_sessions']} flat session files") else: print(" [OK] No additional flat files to archive") def calculate_directory_size(directory: Path) -> Tuple[int, int]: """Calculate total size and file count in directory.""" total_size = 0 file_count = 0 for item in directory.rglob("*"): if item.is_file(): total_size += item.stat().st_size file_count += 1 return total_size, file_count def create_documentation(): """Create README.md for organized directory.""" print("\n[Phase 5] Creating documentation...") # Calculate organized directory size total_bytes, file_count = calculate_directory_size(ORGANIZED_DIR) total_mb = total_bytes / (1024 * 1024) readme_content = f"""# Organized Claude Conversations This directory contains reorganized conversation sessions from Claude Code exports. ## Structure ``` organized/ ├── _metadata/ │ └── sessions-index.json # Original session metadata ├── sessions-by-date/ │ ├── YYYY-MM-DD_session-XXXXXX/ # Organized by creation date │ │ ├── conversation.jsonl # Main conversation file │ │ ├── agents/ # Sub-agent conversations (if any) │ │ └── tools/ # Tool result files (if any) │ └── ... └── legacy-original/ ├── orphaned-agents/ # Agent files without parent sessions └── flat-sessions/ # Unmapped session files ``` ## Statistics - **Total Sessions Organized:** {stats['sessions_organized']} - **Agent Files Moved:** {stats['agent_files_moved']} - **Tool Result Files Moved:** {stats['tool_files_moved']} - **Orphaned Agent Files:** {stats['orphaned_agents']} - **Total Files:** {file_count} - **Total Size:** {total_mb:.2f} MB ## Finding Sessions Sessions are organized by creation date in `sessions-by-date/`. Each directory name follows the pattern: ``` YYYY-MM-DD_session-XXXXXX/ ``` Where: - `YYYY-MM-DD` is the creation date - `XXXXXX` is the first 6 characters of the session UUID ## Legacy Original Files The `legacy-original/` directory contains: - **orphaned-agents/**: Agent files found at root level with no identifiable parent session - **flat-sessions/**: Session files that could not be mapped to metadata ## Large Files The largest conversation file is approximately 11.23 MB. All files were preserved during reorganization. ## Notes - No files were deleted during reorganization - All original .jsonl and .txt files were preserved - Session metadata was extracted from `sessions-index.json` - Sessions without metadata entries were archived in `legacy-original/` --- **Reorganization Date:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} **Original Location:** `imported-conversations/general-work/claude-projects/` """ readme_path = ORGANIZED_DIR / "README.md" readme_path.write_text(readme_content, encoding='utf-8') print(f" [OK] Created README.md") def print_summary(): """Print final summary report.""" print("\n" + "="*70) print("REORGANIZATION COMPLETE") print("="*70) print(f"\n1. Sessions Organized: {stats['sessions_organized']}") print(f"2. Agent Files Moved: {stats['agent_files_moved']}") print(f"3. Tool Result Files Moved: {stats['tool_files_moved']}") print(f"4. Orphaned Agents: {stats['orphaned_agents']}") print(f"5. Flat Sessions Archived: {stats['flat_sessions']}") # Calculate final size total_bytes, file_count = calculate_directory_size(ORGANIZED_DIR) total_mb = total_bytes / (1024 * 1024) print(f"\nTotal Files in organized/: {file_count}") print(f"Total Size: {total_mb:.2f} MB") if stats["errors"]: print(f"\n[WARNING] {len(stats['errors'])} errors encountered:") for error in stats["errors"]: print(f" - {error}") else: print("\n[SUCCESS] No errors encountered") print(f"\nOrganized directory: {ORGANIZED_DIR}") print("="*70) def main(): """Main execution function.""" print("="*70) print("CLAUDE CONVERSATIONS REORGANIZATION") print("="*70) print(f"\nBase directory: {BASE_DIR}") print(f"Target directory: {ORGANIZED_DIR}\n") try: # Load session index session_index = load_session_index() print(f"[OK] Loaded sessions-index.json ({len(session_index.get('entries', []))} sessions)\n") # Execute phases create_directory_structure() copy_sessions_index() organize_sessions(session_index) handle_orphaned_agents() archive_remaining_flat_files() create_documentation() # Print summary print_summary() except Exception as e: print(f"\n[ERROR] Fatal error: {e}") import traceback traceback.print_exc() return 1 return 0 if __name__ == "__main__": exit(main())