claudetools/tests/test_conversation_parser.py

"""
Test script for conversation_parser.py

Tests all four main functions with sample data.
"""

import json
import os
import tempfile
from api.utils.conversation_parser import (
    parse_jsonl_conversation,
    categorize_conversation,
    extract_context_from_conversation,
    scan_folder_for_conversations,
    batch_process_conversations,
)


def test_parse_jsonl_conversation():
    """Test parsing .jsonl conversation files."""
    print("\n=== Test 1: parse_jsonl_conversation ===")

    # Create a temporary .jsonl file
    with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False, encoding='utf-8') as f:
        # Write sample conversation data
        f.write(json.dumps({
            "role": "user",
            "content": "Build a FastAPI authentication system with PostgreSQL",
            "timestamp": 1705449600000
        }) + "\n")
        f.write(json.dumps({
            "role": "assistant",
            "content": "I'll help you build an auth system using FastAPI and PostgreSQL. Let me create the api/auth.py file.",
            "timestamp": 1705449620000
        }) + "\n")
        f.write(json.dumps({
            "role": "user",
            "content": "Also add JWT token support",
            "timestamp": 1705449640000
        }) + "\n")
        temp_file = f.name

    try:
        result = parse_jsonl_conversation(temp_file)

        print(f"Messages: {result['message_count']}")
        print(f"Duration: {result['duration_seconds']} seconds")
        print(f"File paths extracted: {result['file_paths']}")
        print(f"First message: {result['messages'][0]['content'][:50]}...")

        assert result['message_count'] == 3, "Should have 3 messages"
        assert result['duration_seconds'] == 40, "Duration should be 40 seconds"
        assert 'api/auth.py' in result['file_paths'], "Should extract file path"

        print("[PASS] parse_jsonl_conversation test passed!")

    finally:
        os.unlink(temp_file)


def test_categorize_conversation():
    """Test conversation categorization."""
    print("\n=== Test 2: categorize_conversation ===")

    # Test MSP conversation
    msp_messages = [
        {"role": "user", "content": "Client reported firewall blocking Office365 connection"},
        {"role": "assistant", "content": "I'll check the firewall rules for the client site"}
    ]

    msp_category = categorize_conversation(msp_messages)
    print(f"MSP category: {msp_category}")
    assert msp_category == "msp", "Should categorize as MSP"

    # Test Development conversation
    dev_messages = [
        {"role": "user", "content": "Build API endpoint for user authentication with FastAPI"},
        {"role": "assistant", "content": "I'll create the endpoint using SQLAlchemy and implement JWT tokens"}
    ]

    dev_category = categorize_conversation(dev_messages)
    print(f"Development category: {dev_category}")
    assert dev_category == "development", "Should categorize as development"

    # Test General conversation
    general_messages = [
        {"role": "user", "content": "What's the weather like today?"},
        {"role": "assistant", "content": "I don't have access to current weather data"}
    ]

    general_category = categorize_conversation(general_messages)
    print(f"General category: {general_category}")
    assert general_category == "general", "Should categorize as general"

    print("[PASS] categorize_conversation test passed!")


def test_extract_context_from_conversation():
    """Test context extraction from conversation."""
    print("\n=== Test 3: extract_context_from_conversation ===")

    # Create a sample conversation
    conversation = {
        "messages": [
            {
                "role": "user",
                "content": "Build a FastAPI REST API with PostgreSQL database",
                "timestamp": 1705449600000
            },
            {
                "role": "assistant",
                "content": "I'll create the API using FastAPI and SQLAlchemy. Decided to use Alembic for migrations because it integrates well with SQLAlchemy.",
                "timestamp": 1705449620000
            },
            {
                "role": "user",
                "content": "Add authentication with JWT tokens",
                "timestamp": 1705449640000
            }
        ],
        "metadata": {
            "title": "Build API system",
            "model": "claude-opus-4",
            "sessionId": "test-123"
        },
        "file_paths": ["api/main.py", "api/auth.py", "api/models.py"],
        "tool_calls": [
            {"tool": "write", "count": 5},
            {"tool": "read", "count": 3}
        ],
        "duration_seconds": 40,
        "message_count": 3
    }

    context = extract_context_from_conversation(conversation)

    print(f"Category: {context['category']}")
    print(f"Tags: {context['tags'][:5]}")
    print(f"Decisions: {len(context['decisions'])}")
    print(f"Quality score: {context['metrics']['quality_score']}/10")
    print(f"Key files: {context['key_files']}")

    assert context['category'] in ['msp', 'development', 'general'], "Should have valid category"
    assert len(context['tags']) > 0, "Should have extracted tags"
    assert context['metrics']['message_count'] == 3, "Should have correct message count"

    print("[PASS] extract_context_from_conversation test passed!")


def test_scan_folder_for_conversations():
    """Test scanning folder for conversation files."""
    print("\n=== Test 4: scan_folder_for_conversations ===")

    # Create a temporary directory structure
    with tempfile.TemporaryDirectory() as tmpdir:
        # Create some conversation files
        conv1_path = os.path.join(tmpdir, "conversation1.jsonl")
        conv2_path = os.path.join(tmpdir, "session", "conversation2.json")
        config_path = os.path.join(tmpdir, "config.json")  # Should be skipped

        os.makedirs(os.path.dirname(conv2_path), exist_ok=True)

        # Create files
        with open(conv1_path, 'w') as f:
            f.write('{"role": "user", "content": "test"}\n')

        with open(conv2_path, 'w') as f:
            f.write('{"role": "user", "content": "test"}')

        with open(config_path, 'w') as f:
            f.write('{"setting": "value"}')

        # Scan folder
        files = scan_folder_for_conversations(tmpdir)

        print(f"Found {len(files)} conversation files")
        print(f"Files: {[os.path.basename(f) for f in files]}")

        assert len(files) == 2, "Should find 2 conversation files"
        assert any("conversation1.jsonl" in f for f in files), "Should find jsonl file"
        assert any("conversation2.json" in f for f in files), "Should find json file"
        assert not any("config.json" in f for f in files), "Should skip config.json"

        print("[PASS] scan_folder_for_conversations test passed!")


def test_batch_process():
    """Test batch processing of conversations."""
    print("\n=== Test 5: batch_process_conversations ===")

    with tempfile.TemporaryDirectory() as tmpdir:
        # Create sample conversations
        conv1_path = os.path.join(tmpdir, "msp_work.jsonl")
        conv2_path = os.path.join(tmpdir, "dev_work.jsonl")

        # MSP conversation
        with open(conv1_path, 'w') as f:
            f.write(json.dumps({
                "role": "user",
                "content": "Client ticket: firewall blocking Office365",
                "timestamp": 1705449600000
            }) + "\n")
            f.write(json.dumps({
                "role": "assistant",
                "content": "I'll check the client firewall configuration",
                "timestamp": 1705449620000
            }) + "\n")

        # Development conversation
        with open(conv2_path, 'w') as f:
            f.write(json.dumps({
                "role": "user",
                "content": "Build FastAPI endpoint for authentication",
                "timestamp": 1705449600000
            }) + "\n")
            f.write(json.dumps({
                "role": "assistant",
                "content": "Creating API endpoint with SQLAlchemy",
                "timestamp": 1705449620000
            }) + "\n")

        # Process all conversations
        processed_count = [0]

        def progress_callback(file_path, context):
            processed_count[0] += 1
            print(f"  Processed: {os.path.basename(file_path)} -> {context['category']}")

        contexts = batch_process_conversations(tmpdir, progress_callback)

        print(f"\nTotal processed: {len(contexts)}")

        assert len(contexts) == 2, "Should process 2 conversations"
        assert processed_count[0] == 2, "Callback should be called twice"

        categories = [ctx['category'] for ctx in contexts]
        print(f"Categories: {categories}")

        print("[PASS] batch_process_conversations test passed!")


def test_real_conversation_file():
    """Test with real conversation file if available."""
    print("\n=== Test 6: Real conversation file ===")

    real_file = r"C:\Users\MikeSwanson\AppData\Roaming\Claude\claude-code-sessions\0c32bde5-dc29-49ac-8c80-5adeaf1cdb33\299a238a-5ebf-44f4-948b-eedfa5c1f57c\local_feb419c2-b7a6-4c31-a7ce-38f6c0ccc523.json"

    if os.path.exists(real_file):
        try:
            conversation = parse_jsonl_conversation(real_file)
            print(f"Real file - Messages: {conversation['message_count']}")
            print(f"Real file - Metadata: {conversation['metadata'].get('title', 'No title')}")

            if conversation['message_count'] > 0:
                context = extract_context_from_conversation(conversation)
                print(f"Real file - Category: {context['category']}")
                print(f"Real file - Quality: {context['metrics']['quality_score']}/10")
        except Exception as e:
            print(f"Note: Real file test skipped - {e}")
    else:
        print("Real conversation file not found - skipping this test")


if __name__ == "__main__":
    print("=" * 60)
    print("Testing conversation_parser.py")
    print("=" * 60)

    try:
        test_parse_jsonl_conversation()
        test_categorize_conversation()
        test_extract_context_from_conversation()
        test_scan_folder_for_conversations()
        test_batch_process()
        test_real_conversation_file()

        print("\n" + "=" * 60)
        print("All tests passed! [OK]")
        print("=" * 60)

    except AssertionError as e:
        print(f"\n[FAIL] Test failed: {e}")
        raise
    except Exception as e:
        print(f"\n[ERROR] Unexpected error: {e}")
        raise