Files
claudetools/tests/test_conversation_parser.py
Mike Swanson 06f7617718 feat: Major directory reorganization and cleanup
Reorganized project structure for better maintainability and reduced
disk usage by 95.9% (11 GB -> 451 MB).

Directory Reorganization (85% reduction in root files):
- Created docs/ with subdirectories (deployment, testing, database, etc.)
- Created infrastructure/vpn-configs/ for VPN scripts
- Moved 90+ files from root to organized locations
- Archived obsolete documentation (context system, offline mode, zombie debugging)
- Moved all test files to tests/ directory
- Root directory: 119 files -> 18 files

Disk Cleanup (10.55 GB recovered):
- Deleted Rust build artifacts: 9.6 GB (target/ directories)
- Deleted Python virtual environments: 161 MB (venv/ directories)
- Deleted Python cache: 50 KB (__pycache__/)

New Structure:
- docs/ - All documentation organized by category
- docs/archives/ - Obsolete but preserved documentation
- infrastructure/ - VPN configs and SSH setup
- tests/ - All test files consolidated
- logs/ - Ready for future logs

Benefits:
- Cleaner root directory (18 vs 119 files)
- Logical organization of documentation
- 95.9% disk space reduction
- Faster navigation and discovery
- Better portability (build artifacts excluded)

Build artifacts can be regenerated:
- Rust: cargo build --release (5-15 min per project)
- Python: pip install -r requirements.txt (2-3 min)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-18 20:42:28 -07:00

287 lines
10 KiB
Python

"""
Test script for conversation_parser.py
Tests all four main functions with sample data.
"""
import json
import os
import tempfile
from api.utils.conversation_parser import (
parse_jsonl_conversation,
categorize_conversation,
extract_context_from_conversation,
scan_folder_for_conversations,
batch_process_conversations,
)
def test_parse_jsonl_conversation():
"""Test parsing .jsonl conversation files."""
print("\n=== Test 1: parse_jsonl_conversation ===")
# Create a temporary .jsonl file
with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False, encoding='utf-8') as f:
# Write sample conversation data
f.write(json.dumps({
"role": "user",
"content": "Build a FastAPI authentication system with PostgreSQL",
"timestamp": 1705449600000
}) + "\n")
f.write(json.dumps({
"role": "assistant",
"content": "I'll help you build an auth system using FastAPI and PostgreSQL. Let me create the api/auth.py file.",
"timestamp": 1705449620000
}) + "\n")
f.write(json.dumps({
"role": "user",
"content": "Also add JWT token support",
"timestamp": 1705449640000
}) + "\n")
temp_file = f.name
try:
result = parse_jsonl_conversation(temp_file)
print(f"Messages: {result['message_count']}")
print(f"Duration: {result['duration_seconds']} seconds")
print(f"File paths extracted: {result['file_paths']}")
print(f"First message: {result['messages'][0]['content'][:50]}...")
assert result['message_count'] == 3, "Should have 3 messages"
assert result['duration_seconds'] == 40, "Duration should be 40 seconds"
assert 'api/auth.py' in result['file_paths'], "Should extract file path"
print("[PASS] parse_jsonl_conversation test passed!")
finally:
os.unlink(temp_file)
def test_categorize_conversation():
"""Test conversation categorization."""
print("\n=== Test 2: categorize_conversation ===")
# Test MSP conversation
msp_messages = [
{"role": "user", "content": "Client reported firewall blocking Office365 connection"},
{"role": "assistant", "content": "I'll check the firewall rules for the client site"}
]
msp_category = categorize_conversation(msp_messages)
print(f"MSP category: {msp_category}")
assert msp_category == "msp", "Should categorize as MSP"
# Test Development conversation
dev_messages = [
{"role": "user", "content": "Build API endpoint for user authentication with FastAPI"},
{"role": "assistant", "content": "I'll create the endpoint using SQLAlchemy and implement JWT tokens"}
]
dev_category = categorize_conversation(dev_messages)
print(f"Development category: {dev_category}")
assert dev_category == "development", "Should categorize as development"
# Test General conversation
general_messages = [
{"role": "user", "content": "What's the weather like today?"},
{"role": "assistant", "content": "I don't have access to current weather data"}
]
general_category = categorize_conversation(general_messages)
print(f"General category: {general_category}")
assert general_category == "general", "Should categorize as general"
print("[PASS] categorize_conversation test passed!")
def test_extract_context_from_conversation():
"""Test context extraction from conversation."""
print("\n=== Test 3: extract_context_from_conversation ===")
# Create a sample conversation
conversation = {
"messages": [
{
"role": "user",
"content": "Build a FastAPI REST API with PostgreSQL database",
"timestamp": 1705449600000
},
{
"role": "assistant",
"content": "I'll create the API using FastAPI and SQLAlchemy. Decided to use Alembic for migrations because it integrates well with SQLAlchemy.",
"timestamp": 1705449620000
},
{
"role": "user",
"content": "Add authentication with JWT tokens",
"timestamp": 1705449640000
}
],
"metadata": {
"title": "Build API system",
"model": "claude-opus-4",
"sessionId": "test-123"
},
"file_paths": ["api/main.py", "api/auth.py", "api/models.py"],
"tool_calls": [
{"tool": "write", "count": 5},
{"tool": "read", "count": 3}
],
"duration_seconds": 40,
"message_count": 3
}
context = extract_context_from_conversation(conversation)
print(f"Category: {context['category']}")
print(f"Tags: {context['tags'][:5]}")
print(f"Decisions: {len(context['decisions'])}")
print(f"Quality score: {context['metrics']['quality_score']}/10")
print(f"Key files: {context['key_files']}")
assert context['category'] in ['msp', 'development', 'general'], "Should have valid category"
assert len(context['tags']) > 0, "Should have extracted tags"
assert context['metrics']['message_count'] == 3, "Should have correct message count"
print("[PASS] extract_context_from_conversation test passed!")
def test_scan_folder_for_conversations():
"""Test scanning folder for conversation files."""
print("\n=== Test 4: scan_folder_for_conversations ===")
# Create a temporary directory structure
with tempfile.TemporaryDirectory() as tmpdir:
# Create some conversation files
conv1_path = os.path.join(tmpdir, "conversation1.jsonl")
conv2_path = os.path.join(tmpdir, "session", "conversation2.json")
config_path = os.path.join(tmpdir, "config.json") # Should be skipped
os.makedirs(os.path.dirname(conv2_path), exist_ok=True)
# Create files
with open(conv1_path, 'w') as f:
f.write('{"role": "user", "content": "test"}\n')
with open(conv2_path, 'w') as f:
f.write('{"role": "user", "content": "test"}')
with open(config_path, 'w') as f:
f.write('{"setting": "value"}')
# Scan folder
files = scan_folder_for_conversations(tmpdir)
print(f"Found {len(files)} conversation files")
print(f"Files: {[os.path.basename(f) for f in files]}")
assert len(files) == 2, "Should find 2 conversation files"
assert any("conversation1.jsonl" in f for f in files), "Should find jsonl file"
assert any("conversation2.json" in f for f in files), "Should find json file"
assert not any("config.json" in f for f in files), "Should skip config.json"
print("[PASS] scan_folder_for_conversations test passed!")
def test_batch_process():
"""Test batch processing of conversations."""
print("\n=== Test 5: batch_process_conversations ===")
with tempfile.TemporaryDirectory() as tmpdir:
# Create sample conversations
conv1_path = os.path.join(tmpdir, "msp_work.jsonl")
conv2_path = os.path.join(tmpdir, "dev_work.jsonl")
# MSP conversation
with open(conv1_path, 'w') as f:
f.write(json.dumps({
"role": "user",
"content": "Client ticket: firewall blocking Office365",
"timestamp": 1705449600000
}) + "\n")
f.write(json.dumps({
"role": "assistant",
"content": "I'll check the client firewall configuration",
"timestamp": 1705449620000
}) + "\n")
# Development conversation
with open(conv2_path, 'w') as f:
f.write(json.dumps({
"role": "user",
"content": "Build FastAPI endpoint for authentication",
"timestamp": 1705449600000
}) + "\n")
f.write(json.dumps({
"role": "assistant",
"content": "Creating API endpoint with SQLAlchemy",
"timestamp": 1705449620000
}) + "\n")
# Process all conversations
processed_count = [0]
def progress_callback(file_path, context):
processed_count[0] += 1
print(f" Processed: {os.path.basename(file_path)} -> {context['category']}")
contexts = batch_process_conversations(tmpdir, progress_callback)
print(f"\nTotal processed: {len(contexts)}")
assert len(contexts) == 2, "Should process 2 conversations"
assert processed_count[0] == 2, "Callback should be called twice"
categories = [ctx['category'] for ctx in contexts]
print(f"Categories: {categories}")
print("[PASS] batch_process_conversations test passed!")
def test_real_conversation_file():
"""Test with real conversation file if available."""
print("\n=== Test 6: Real conversation file ===")
real_file = r"C:\Users\MikeSwanson\AppData\Roaming\Claude\claude-code-sessions\0c32bde5-dc29-49ac-8c80-5adeaf1cdb33\299a238a-5ebf-44f4-948b-eedfa5c1f57c\local_feb419c2-b7a6-4c31-a7ce-38f6c0ccc523.json"
if os.path.exists(real_file):
try:
conversation = parse_jsonl_conversation(real_file)
print(f"Real file - Messages: {conversation['message_count']}")
print(f"Real file - Metadata: {conversation['metadata'].get('title', 'No title')}")
if conversation['message_count'] > 0:
context = extract_context_from_conversation(conversation)
print(f"Real file - Category: {context['category']}")
print(f"Real file - Quality: {context['metrics']['quality_score']}/10")
except Exception as e:
print(f"Note: Real file test skipped - {e}")
else:
print("Real conversation file not found - skipping this test")
if __name__ == "__main__":
print("=" * 60)
print("Testing conversation_parser.py")
print("=" * 60)
try:
test_parse_jsonl_conversation()
test_categorize_conversation()
test_extract_context_from_conversation()
test_scan_folder_for_conversations()
test_batch_process()
test_real_conversation_file()
print("\n" + "=" * 60)
print("All tests passed! [OK]")
print("=" * 60)
except AssertionError as e:
print(f"\n[FAIL] Test failed: {e}")
raise
except Exception as e:
print(f"\n[ERROR] Unexpected error: {e}")
raise