Reorganized project structure for better maintainability and reduced disk usage by 95.9% (11 GB -> 451 MB). Directory Reorganization (85% reduction in root files): - Created docs/ with subdirectories (deployment, testing, database, etc.) - Created infrastructure/vpn-configs/ for VPN scripts - Moved 90+ files from root to organized locations - Archived obsolete documentation (context system, offline mode, zombie debugging) - Moved all test files to tests/ directory - Root directory: 119 files -> 18 files Disk Cleanup (10.55 GB recovered): - Deleted Rust build artifacts: 9.6 GB (target/ directories) - Deleted Python virtual environments: 161 MB (venv/ directories) - Deleted Python cache: 50 KB (__pycache__/) New Structure: - docs/ - All documentation organized by category - docs/archives/ - Obsolete but preserved documentation - infrastructure/ - VPN configs and SSH setup - tests/ - All test files consolidated - logs/ - Ready for future logs Benefits: - Cleaner root directory (18 vs 119 files) - Logical organization of documentation - 95.9% disk space reduction - Faster navigation and discovery - Better portability (build artifacts excluded) Build artifacts can be regenerated: - Rust: cargo build --release (5-15 min per project) - Python: pip install -r requirements.txt (2-3 min) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
287 lines
10 KiB
Python
287 lines
10 KiB
Python
"""
|
|
Test script for conversation_parser.py
|
|
|
|
Tests all four main functions with sample data.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import tempfile
|
|
from api.utils.conversation_parser import (
|
|
parse_jsonl_conversation,
|
|
categorize_conversation,
|
|
extract_context_from_conversation,
|
|
scan_folder_for_conversations,
|
|
batch_process_conversations,
|
|
)
|
|
|
|
|
|
def test_parse_jsonl_conversation():
|
|
"""Test parsing .jsonl conversation files."""
|
|
print("\n=== Test 1: parse_jsonl_conversation ===")
|
|
|
|
# Create a temporary .jsonl file
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False, encoding='utf-8') as f:
|
|
# Write sample conversation data
|
|
f.write(json.dumps({
|
|
"role": "user",
|
|
"content": "Build a FastAPI authentication system with PostgreSQL",
|
|
"timestamp": 1705449600000
|
|
}) + "\n")
|
|
f.write(json.dumps({
|
|
"role": "assistant",
|
|
"content": "I'll help you build an auth system using FastAPI and PostgreSQL. Let me create the api/auth.py file.",
|
|
"timestamp": 1705449620000
|
|
}) + "\n")
|
|
f.write(json.dumps({
|
|
"role": "user",
|
|
"content": "Also add JWT token support",
|
|
"timestamp": 1705449640000
|
|
}) + "\n")
|
|
temp_file = f.name
|
|
|
|
try:
|
|
result = parse_jsonl_conversation(temp_file)
|
|
|
|
print(f"Messages: {result['message_count']}")
|
|
print(f"Duration: {result['duration_seconds']} seconds")
|
|
print(f"File paths extracted: {result['file_paths']}")
|
|
print(f"First message: {result['messages'][0]['content'][:50]}...")
|
|
|
|
assert result['message_count'] == 3, "Should have 3 messages"
|
|
assert result['duration_seconds'] == 40, "Duration should be 40 seconds"
|
|
assert 'api/auth.py' in result['file_paths'], "Should extract file path"
|
|
|
|
print("[PASS] parse_jsonl_conversation test passed!")
|
|
|
|
finally:
|
|
os.unlink(temp_file)
|
|
|
|
|
|
def test_categorize_conversation():
|
|
"""Test conversation categorization."""
|
|
print("\n=== Test 2: categorize_conversation ===")
|
|
|
|
# Test MSP conversation
|
|
msp_messages = [
|
|
{"role": "user", "content": "Client reported firewall blocking Office365 connection"},
|
|
{"role": "assistant", "content": "I'll check the firewall rules for the client site"}
|
|
]
|
|
|
|
msp_category = categorize_conversation(msp_messages)
|
|
print(f"MSP category: {msp_category}")
|
|
assert msp_category == "msp", "Should categorize as MSP"
|
|
|
|
# Test Development conversation
|
|
dev_messages = [
|
|
{"role": "user", "content": "Build API endpoint for user authentication with FastAPI"},
|
|
{"role": "assistant", "content": "I'll create the endpoint using SQLAlchemy and implement JWT tokens"}
|
|
]
|
|
|
|
dev_category = categorize_conversation(dev_messages)
|
|
print(f"Development category: {dev_category}")
|
|
assert dev_category == "development", "Should categorize as development"
|
|
|
|
# Test General conversation
|
|
general_messages = [
|
|
{"role": "user", "content": "What's the weather like today?"},
|
|
{"role": "assistant", "content": "I don't have access to current weather data"}
|
|
]
|
|
|
|
general_category = categorize_conversation(general_messages)
|
|
print(f"General category: {general_category}")
|
|
assert general_category == "general", "Should categorize as general"
|
|
|
|
print("[PASS] categorize_conversation test passed!")
|
|
|
|
|
|
def test_extract_context_from_conversation():
|
|
"""Test context extraction from conversation."""
|
|
print("\n=== Test 3: extract_context_from_conversation ===")
|
|
|
|
# Create a sample conversation
|
|
conversation = {
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "Build a FastAPI REST API with PostgreSQL database",
|
|
"timestamp": 1705449600000
|
|
},
|
|
{
|
|
"role": "assistant",
|
|
"content": "I'll create the API using FastAPI and SQLAlchemy. Decided to use Alembic for migrations because it integrates well with SQLAlchemy.",
|
|
"timestamp": 1705449620000
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "Add authentication with JWT tokens",
|
|
"timestamp": 1705449640000
|
|
}
|
|
],
|
|
"metadata": {
|
|
"title": "Build API system",
|
|
"model": "claude-opus-4",
|
|
"sessionId": "test-123"
|
|
},
|
|
"file_paths": ["api/main.py", "api/auth.py", "api/models.py"],
|
|
"tool_calls": [
|
|
{"tool": "write", "count": 5},
|
|
{"tool": "read", "count": 3}
|
|
],
|
|
"duration_seconds": 40,
|
|
"message_count": 3
|
|
}
|
|
|
|
context = extract_context_from_conversation(conversation)
|
|
|
|
print(f"Category: {context['category']}")
|
|
print(f"Tags: {context['tags'][:5]}")
|
|
print(f"Decisions: {len(context['decisions'])}")
|
|
print(f"Quality score: {context['metrics']['quality_score']}/10")
|
|
print(f"Key files: {context['key_files']}")
|
|
|
|
assert context['category'] in ['msp', 'development', 'general'], "Should have valid category"
|
|
assert len(context['tags']) > 0, "Should have extracted tags"
|
|
assert context['metrics']['message_count'] == 3, "Should have correct message count"
|
|
|
|
print("[PASS] extract_context_from_conversation test passed!")
|
|
|
|
|
|
def test_scan_folder_for_conversations():
|
|
"""Test scanning folder for conversation files."""
|
|
print("\n=== Test 4: scan_folder_for_conversations ===")
|
|
|
|
# Create a temporary directory structure
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
# Create some conversation files
|
|
conv1_path = os.path.join(tmpdir, "conversation1.jsonl")
|
|
conv2_path = os.path.join(tmpdir, "session", "conversation2.json")
|
|
config_path = os.path.join(tmpdir, "config.json") # Should be skipped
|
|
|
|
os.makedirs(os.path.dirname(conv2_path), exist_ok=True)
|
|
|
|
# Create files
|
|
with open(conv1_path, 'w') as f:
|
|
f.write('{"role": "user", "content": "test"}\n')
|
|
|
|
with open(conv2_path, 'w') as f:
|
|
f.write('{"role": "user", "content": "test"}')
|
|
|
|
with open(config_path, 'w') as f:
|
|
f.write('{"setting": "value"}')
|
|
|
|
# Scan folder
|
|
files = scan_folder_for_conversations(tmpdir)
|
|
|
|
print(f"Found {len(files)} conversation files")
|
|
print(f"Files: {[os.path.basename(f) for f in files]}")
|
|
|
|
assert len(files) == 2, "Should find 2 conversation files"
|
|
assert any("conversation1.jsonl" in f for f in files), "Should find jsonl file"
|
|
assert any("conversation2.json" in f for f in files), "Should find json file"
|
|
assert not any("config.json" in f for f in files), "Should skip config.json"
|
|
|
|
print("[PASS] scan_folder_for_conversations test passed!")
|
|
|
|
|
|
def test_batch_process():
|
|
"""Test batch processing of conversations."""
|
|
print("\n=== Test 5: batch_process_conversations ===")
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
# Create sample conversations
|
|
conv1_path = os.path.join(tmpdir, "msp_work.jsonl")
|
|
conv2_path = os.path.join(tmpdir, "dev_work.jsonl")
|
|
|
|
# MSP conversation
|
|
with open(conv1_path, 'w') as f:
|
|
f.write(json.dumps({
|
|
"role": "user",
|
|
"content": "Client ticket: firewall blocking Office365",
|
|
"timestamp": 1705449600000
|
|
}) + "\n")
|
|
f.write(json.dumps({
|
|
"role": "assistant",
|
|
"content": "I'll check the client firewall configuration",
|
|
"timestamp": 1705449620000
|
|
}) + "\n")
|
|
|
|
# Development conversation
|
|
with open(conv2_path, 'w') as f:
|
|
f.write(json.dumps({
|
|
"role": "user",
|
|
"content": "Build FastAPI endpoint for authentication",
|
|
"timestamp": 1705449600000
|
|
}) + "\n")
|
|
f.write(json.dumps({
|
|
"role": "assistant",
|
|
"content": "Creating API endpoint with SQLAlchemy",
|
|
"timestamp": 1705449620000
|
|
}) + "\n")
|
|
|
|
# Process all conversations
|
|
processed_count = [0]
|
|
|
|
def progress_callback(file_path, context):
|
|
processed_count[0] += 1
|
|
print(f" Processed: {os.path.basename(file_path)} -> {context['category']}")
|
|
|
|
contexts = batch_process_conversations(tmpdir, progress_callback)
|
|
|
|
print(f"\nTotal processed: {len(contexts)}")
|
|
|
|
assert len(contexts) == 2, "Should process 2 conversations"
|
|
assert processed_count[0] == 2, "Callback should be called twice"
|
|
|
|
categories = [ctx['category'] for ctx in contexts]
|
|
print(f"Categories: {categories}")
|
|
|
|
print("[PASS] batch_process_conversations test passed!")
|
|
|
|
|
|
def test_real_conversation_file():
|
|
"""Test with real conversation file if available."""
|
|
print("\n=== Test 6: Real conversation file ===")
|
|
|
|
real_file = r"C:\Users\MikeSwanson\AppData\Roaming\Claude\claude-code-sessions\0c32bde5-dc29-49ac-8c80-5adeaf1cdb33\299a238a-5ebf-44f4-948b-eedfa5c1f57c\local_feb419c2-b7a6-4c31-a7ce-38f6c0ccc523.json"
|
|
|
|
if os.path.exists(real_file):
|
|
try:
|
|
conversation = parse_jsonl_conversation(real_file)
|
|
print(f"Real file - Messages: {conversation['message_count']}")
|
|
print(f"Real file - Metadata: {conversation['metadata'].get('title', 'No title')}")
|
|
|
|
if conversation['message_count'] > 0:
|
|
context = extract_context_from_conversation(conversation)
|
|
print(f"Real file - Category: {context['category']}")
|
|
print(f"Real file - Quality: {context['metrics']['quality_score']}/10")
|
|
except Exception as e:
|
|
print(f"Note: Real file test skipped - {e}")
|
|
else:
|
|
print("Real conversation file not found - skipping this test")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("=" * 60)
|
|
print("Testing conversation_parser.py")
|
|
print("=" * 60)
|
|
|
|
try:
|
|
test_parse_jsonl_conversation()
|
|
test_categorize_conversation()
|
|
test_extract_context_from_conversation()
|
|
test_scan_folder_for_conversations()
|
|
test_batch_process()
|
|
test_real_conversation_file()
|
|
|
|
print("\n" + "=" * 60)
|
|
print("All tests passed! [OK]")
|
|
print("=" * 60)
|
|
|
|
except AssertionError as e:
|
|
print(f"\n[FAIL] Test failed: {e}")
|
|
raise
|
|
except Exception as e:
|
|
print(f"\n[ERROR] Unexpected error: {e}")
|
|
raise
|