""" Test script for conversation_parser.py Tests all four main functions with sample data. """ import json import os import tempfile from api.utils.conversation_parser import ( parse_jsonl_conversation, categorize_conversation, extract_context_from_conversation, scan_folder_for_conversations, batch_process_conversations, ) def test_parse_jsonl_conversation(): """Test parsing .jsonl conversation files.""" print("\n=== Test 1: parse_jsonl_conversation ===") # Create a temporary .jsonl file with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False, encoding='utf-8') as f: # Write sample conversation data f.write(json.dumps({ "role": "user", "content": "Build a FastAPI authentication system with PostgreSQL", "timestamp": 1705449600000 }) + "\n") f.write(json.dumps({ "role": "assistant", "content": "I'll help you build an auth system using FastAPI and PostgreSQL. Let me create the api/auth.py file.", "timestamp": 1705449620000 }) + "\n") f.write(json.dumps({ "role": "user", "content": "Also add JWT token support", "timestamp": 1705449640000 }) + "\n") temp_file = f.name try: result = parse_jsonl_conversation(temp_file) print(f"Messages: {result['message_count']}") print(f"Duration: {result['duration_seconds']} seconds") print(f"File paths extracted: {result['file_paths']}") print(f"First message: {result['messages'][0]['content'][:50]}...") assert result['message_count'] == 3, "Should have 3 messages" assert result['duration_seconds'] == 40, "Duration should be 40 seconds" assert 'api/auth.py' in result['file_paths'], "Should extract file path" print("[PASS] parse_jsonl_conversation test passed!") finally: os.unlink(temp_file) def test_categorize_conversation(): """Test conversation categorization.""" print("\n=== Test 2: categorize_conversation ===") # Test MSP conversation msp_messages = [ {"role": "user", "content": "Client reported firewall blocking Office365 connection"}, {"role": "assistant", "content": "I'll check the firewall rules for the client site"} ] msp_category = categorize_conversation(msp_messages) print(f"MSP category: {msp_category}") assert msp_category == "msp", "Should categorize as MSP" # Test Development conversation dev_messages = [ {"role": "user", "content": "Build API endpoint for user authentication with FastAPI"}, {"role": "assistant", "content": "I'll create the endpoint using SQLAlchemy and implement JWT tokens"} ] dev_category = categorize_conversation(dev_messages) print(f"Development category: {dev_category}") assert dev_category == "development", "Should categorize as development" # Test General conversation general_messages = [ {"role": "user", "content": "What's the weather like today?"}, {"role": "assistant", "content": "I don't have access to current weather data"} ] general_category = categorize_conversation(general_messages) print(f"General category: {general_category}") assert general_category == "general", "Should categorize as general" print("[PASS] categorize_conversation test passed!") def test_extract_context_from_conversation(): """Test context extraction from conversation.""" print("\n=== Test 3: extract_context_from_conversation ===") # Create a sample conversation conversation = { "messages": [ { "role": "user", "content": "Build a FastAPI REST API with PostgreSQL database", "timestamp": 1705449600000 }, { "role": "assistant", "content": "I'll create the API using FastAPI and SQLAlchemy. Decided to use Alembic for migrations because it integrates well with SQLAlchemy.", "timestamp": 1705449620000 }, { "role": "user", "content": "Add authentication with JWT tokens", "timestamp": 1705449640000 } ], "metadata": { "title": "Build API system", "model": "claude-opus-4", "sessionId": "test-123" }, "file_paths": ["api/main.py", "api/auth.py", "api/models.py"], "tool_calls": [ {"tool": "write", "count": 5}, {"tool": "read", "count": 3} ], "duration_seconds": 40, "message_count": 3 } context = extract_context_from_conversation(conversation) print(f"Category: {context['category']}") print(f"Tags: {context['tags'][:5]}") print(f"Decisions: {len(context['decisions'])}") print(f"Quality score: {context['metrics']['quality_score']}/10") print(f"Key files: {context['key_files']}") assert context['category'] in ['msp', 'development', 'general'], "Should have valid category" assert len(context['tags']) > 0, "Should have extracted tags" assert context['metrics']['message_count'] == 3, "Should have correct message count" print("[PASS] extract_context_from_conversation test passed!") def test_scan_folder_for_conversations(): """Test scanning folder for conversation files.""" print("\n=== Test 4: scan_folder_for_conversations ===") # Create a temporary directory structure with tempfile.TemporaryDirectory() as tmpdir: # Create some conversation files conv1_path = os.path.join(tmpdir, "conversation1.jsonl") conv2_path = os.path.join(tmpdir, "session", "conversation2.json") config_path = os.path.join(tmpdir, "config.json") # Should be skipped os.makedirs(os.path.dirname(conv2_path), exist_ok=True) # Create files with open(conv1_path, 'w') as f: f.write('{"role": "user", "content": "test"}\n') with open(conv2_path, 'w') as f: f.write('{"role": "user", "content": "test"}') with open(config_path, 'w') as f: f.write('{"setting": "value"}') # Scan folder files = scan_folder_for_conversations(tmpdir) print(f"Found {len(files)} conversation files") print(f"Files: {[os.path.basename(f) for f in files]}") assert len(files) == 2, "Should find 2 conversation files" assert any("conversation1.jsonl" in f for f in files), "Should find jsonl file" assert any("conversation2.json" in f for f in files), "Should find json file" assert not any("config.json" in f for f in files), "Should skip config.json" print("[PASS] scan_folder_for_conversations test passed!") def test_batch_process(): """Test batch processing of conversations.""" print("\n=== Test 5: batch_process_conversations ===") with tempfile.TemporaryDirectory() as tmpdir: # Create sample conversations conv1_path = os.path.join(tmpdir, "msp_work.jsonl") conv2_path = os.path.join(tmpdir, "dev_work.jsonl") # MSP conversation with open(conv1_path, 'w') as f: f.write(json.dumps({ "role": "user", "content": "Client ticket: firewall blocking Office365", "timestamp": 1705449600000 }) + "\n") f.write(json.dumps({ "role": "assistant", "content": "I'll check the client firewall configuration", "timestamp": 1705449620000 }) + "\n") # Development conversation with open(conv2_path, 'w') as f: f.write(json.dumps({ "role": "user", "content": "Build FastAPI endpoint for authentication", "timestamp": 1705449600000 }) + "\n") f.write(json.dumps({ "role": "assistant", "content": "Creating API endpoint with SQLAlchemy", "timestamp": 1705449620000 }) + "\n") # Process all conversations processed_count = [0] def progress_callback(file_path, context): processed_count[0] += 1 print(f" Processed: {os.path.basename(file_path)} -> {context['category']}") contexts = batch_process_conversations(tmpdir, progress_callback) print(f"\nTotal processed: {len(contexts)}") assert len(contexts) == 2, "Should process 2 conversations" assert processed_count[0] == 2, "Callback should be called twice" categories = [ctx['category'] for ctx in contexts] print(f"Categories: {categories}") print("[PASS] batch_process_conversations test passed!") def test_real_conversation_file(): """Test with real conversation file if available.""" print("\n=== Test 6: Real conversation file ===") real_file = r"C:\Users\MikeSwanson\AppData\Roaming\Claude\claude-code-sessions\0c32bde5-dc29-49ac-8c80-5adeaf1cdb33\299a238a-5ebf-44f4-948b-eedfa5c1f57c\local_feb419c2-b7a6-4c31-a7ce-38f6c0ccc523.json" if os.path.exists(real_file): try: conversation = parse_jsonl_conversation(real_file) print(f"Real file - Messages: {conversation['message_count']}") print(f"Real file - Metadata: {conversation['metadata'].get('title', 'No title')}") if conversation['message_count'] > 0: context = extract_context_from_conversation(conversation) print(f"Real file - Category: {context['category']}") print(f"Real file - Quality: {context['metrics']['quality_score']}/10") except Exception as e: print(f"Note: Real file test skipped - {e}") else: print("Real conversation file not found - skipping this test") if __name__ == "__main__": print("=" * 60) print("Testing conversation_parser.py") print("=" * 60) try: test_parse_jsonl_conversation() test_categorize_conversation() test_extract_context_from_conversation() test_scan_folder_for_conversations() test_batch_process() test_real_conversation_file() print("\n" + "=" * 60) print("All tests passed! [OK]") print("=" * 60) except AssertionError as e: print(f"\n[FAIL] Test failed: {e}") raise except Exception as e: print(f"\n[ERROR] Unexpected error: {e}") raise