#!/usr/bin/env python3 """ Direct Database Import Script Imports Claude conversation contexts directly to the database, bypassing the API. Useful when the API is on a remote server but the conversation files are local. Usage: python scripts/direct_db_import.py --folder "C:\\Users\\MikeSwanson\\.claude\\projects" --dry-run python scripts/direct_db_import.py --folder "C:\\Users\\MikeSwanson\\.claude\\projects" --execute """ import argparse import sys from pathlib import Path # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from api.utils.conversation_parser import ( extract_context_from_conversation, parse_jsonl_conversation, scan_folder_for_conversations, ) from api.models.conversation_context import ConversationContext from api.schemas.conversation_context import ConversationContextCreate import os from dotenv import load_dotenv def get_database_url(): """Get database URL from environment.""" # Load from .env file env_path = Path(__file__).parent.parent / ".env" if env_path.exists(): load_dotenv(env_path) db_url = os.getenv("DATABASE_URL") if not db_url: print("[ERROR] DATABASE_URL not found in .env file") sys.exit(1) print(f"[OK] Database: {db_url.split('@')[1] if '@' in db_url else 'configured'}") return db_url def import_conversations(folder_path: str, dry_run: bool = True, project_id: str = None): """ Import conversations directly to database. Args: folder_path: Path to folder containing .jsonl files dry_run: If True, preview without saving project_id: Optional project ID to associate contexts with """ print("\n" + "=" * 70) print("DIRECT DATABASE IMPORT") print("=" * 70) print(f"Mode: {'DRY RUN (preview only)' if dry_run else 'EXECUTE (will save to database)'}") print(f"Folder: {folder_path}") print("") # Results tracking result = { "files_scanned": 0, "files_processed": 0, "contexts_created": 0, "errors": [], "contexts_preview": [], "contexts_data": [], # Store full context data for database insert } # Step 1: Scan for conversation files print("[1/3] Scanning folder for conversation files...") try: conversation_files = scan_folder_for_conversations(folder_path) result["files_scanned"] = len(conversation_files) print(f" Found {len(conversation_files)} .jsonl files") except Exception as e: print(f"[ERROR] Failed to scan folder: {e}") return result if not conversation_files: print("[WARNING] No conversation files found") return result # Step 2: Parse conversations print(f"\n[2/3] Parsing conversations...") print(f"[DEBUG] dry_run = {dry_run}") for file_path in conversation_files: try: # Parse conversation conversation = parse_jsonl_conversation(file_path) if not conversation.get("messages"): result["errors"].append({ "file": file_path, "error": "No messages found" }) continue # Extract context raw_context = extract_context_from_conversation(conversation) # Transform to database format metadata = raw_context.get("raw_metadata", {}) summary_obj = raw_context.get("summary", {}) # Get title from metadata or generate one title = metadata.get("title") or metadata.get("conversation_id") or f"Conversation" # Get dense summary from summary object dense_summary = summary_obj.get("summary") or summary_obj.get("dense_summary") or "No summary available" # Transform context to database format import json # Convert decisions and tags to JSON strings decisions = raw_context.get("decisions", []) key_decisions_json = json.dumps(decisions) if decisions else None tags = raw_context.get("tags", []) tags_json = json.dumps(tags) if tags else None context = { "project_id": project_id, "session_id": None, "machine_id": None, "context_type": raw_context.get("category", "general_context"), "title": title, "dense_summary": dense_summary, "key_decisions": key_decisions_json, "current_state": None, "tags": tags_json, "relevance_score": raw_context.get("metrics", {}).get("quality_score", 5.0), } result["files_processed"] += 1 result["contexts_preview"].append({ "title": context["title"], "type": context["context_type"], "message_count": len(conversation["messages"]), "tags": context.get("tags", []), "relevance_score": context.get("relevance_score", 0.0), }) # Store full context data for database insert if not dry_run: result["contexts_data"].append(context) print(f" [DEBUG] Stored context: {context['title'][:50]}") except Exception as e: print(f"[DEBUG] Exception for {Path(file_path).name}: {e}") result["errors"].append({ "file": file_path, "error": str(e) }) print(f" Processed {result['files_processed']} files successfully") print(f" Errors: {len(result['errors'])}") # Step 3: Save to database (if execute mode) if not dry_run: print(f"\n[3/3] Saving to database...") try: # Create database connection db_url = get_database_url() engine = create_engine(db_url) SessionLocal = sessionmaker(bind=engine) db = SessionLocal() # Save each context saved_count = 0 for context_data in result["contexts_data"]: try: # Create context object context_obj = ConversationContext( project_id=context_data.get("project_id"), session_id=context_data.get("session_id"), machine_id=context_data.get("machine_id"), context_type=context_data["context_type"], title=context_data["title"], dense_summary=context_data["dense_summary"], key_decisions=context_data.get("key_decisions"), current_state=context_data.get("current_state"), tags=context_data.get("tags", []), relevance_score=context_data.get("relevance_score", 5.0), ) db.add(context_obj) saved_count += 1 except Exception as e: print(f"[WARNING] Failed to save context '{context_data.get('title', 'Unknown')}': {e}") # Commit all changes db.commit() db.close() result["contexts_created"] = saved_count print(f" Saved {saved_count} contexts to database") except Exception as e: print(f"[ERROR] Database error: {e}") return result else: print(f"\n[3/3] Skipping database save (dry run mode)") # Display results print("\n" + "=" * 70) print("IMPORT RESULTS") print("=" * 70) print(f"\nFiles scanned: {result['files_scanned']}") print(f"Files processed: {result['files_processed']}") print(f"Contexts created: {result['contexts_created'] if not dry_run else 'N/A (dry run)'}") print(f"Errors: {len(result['errors'])}") # Show preview of contexts if result["contexts_preview"]: print(f"\n[PREVIEW] First 5 contexts:") for i, ctx in enumerate(result["contexts_preview"][:5], 1): print(f"\n {i}. {ctx['title']}") print(f" Type: {ctx['type']}") print(f" Messages: {ctx['message_count']}") print(f" Tags: {', '.join(ctx.get('tags', [])[:5])}") print(f" Relevance: {ctx.get('relevance_score', 0.0):.1f}/10.0") # Show errors if result["errors"]: print(f"\n[ERRORS] First 5 errors:") for i, err in enumerate(result["errors"][:5], 1): print(f"\n {i}. File: {Path(err['file']).name}") print(f" Error: {err['error']}") if len(result["errors"]) > 5: print(f"\n ... and {len(result['errors']) - 5} more errors") print("\n" + "=" * 70) return result def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="Import Claude conversations directly to database (bypasses API)" ) parser.add_argument( "--folder", required=True, help="Path to folder containing .jsonl conversation files" ) mode_group = parser.add_mutually_exclusive_group(required=True) mode_group.add_argument( "--dry-run", action="store_true", help="Preview import without saving" ) mode_group.add_argument( "--execute", action="store_true", help="Execute import and save to database" ) parser.add_argument( "--project-id", help="Associate all contexts with this project ID" ) args = parser.parse_args() # Validate folder folder_path = Path(args.folder) if not folder_path.exists(): print(f"[ERROR] Folder does not exist: {folder_path}") sys.exit(1) # Run import try: result = import_conversations( folder_path=str(folder_path), dry_run=args.dry_run, project_id=args.project_id ) # Success message if args.dry_run: print("\n[SUCCESS] Dry run completed") print(" Run with --execute to save to database") else: print(f"\n[SUCCESS] Import completed") print(f" Created {result['contexts_created']} contexts") sys.exit(0) except Exception as e: print(f"\n[ERROR] Import failed: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()