feat: Major directory reorganization and cleanup

Reorganized project structure for better maintainability and reduced
disk usage by 95.9% (11 GB -> 451 MB).

Directory Reorganization (85% reduction in root files):
- Created docs/ with subdirectories (deployment, testing, database, etc.)
- Created infrastructure/vpn-configs/ for VPN scripts
- Moved 90+ files from root to organized locations
- Archived obsolete documentation (context system, offline mode, zombie debugging)
- Moved all test files to tests/ directory
- Root directory: 119 files -> 18 files

Disk Cleanup (10.55 GB recovered):
- Deleted Rust build artifacts: 9.6 GB (target/ directories)
- Deleted Python virtual environments: 161 MB (venv/ directories)
- Deleted Python cache: 50 KB (__pycache__/)

New Structure:
- docs/ - All documentation organized by category
- docs/archives/ - Obsolete but preserved documentation
- infrastructure/ - VPN configs and SSH setup
- tests/ - All test files consolidated
- logs/ - Ready for future logs

Benefits:
- Cleaner root directory (18 vs 119 files)
- Logical organization of documentation
- 95.9% disk space reduction
- Faster navigation and discovery
- Better portability (build artifacts excluded)

Build artifacts can be regenerated:
- Rust: cargo build --release (5-15 min per project)
- Python: pip install -r requirements.txt (2-3 min)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-18 20:42:28 -07:00
parent 89e5118306
commit 06f7617718
96 changed files with 54 additions and 2639 deletions

View File

@@ -0,0 +1,333 @@
"""
SQL Injection Security Tests for Context Recall API
Tests that the recall API is properly protected against SQL injection attacks.
Validates both the input validation layer and the parameterized query layer.
"""
import unittest
import requests
from typing import Dict, Any
# Import auth utilities for token creation
from api.middleware.auth import create_access_token
# Test configuration
API_BASE_URL = "http://172.16.3.30:8001/api"
TEST_USER_EMAIL = "admin@claudetools.local"
class TestSQLInjectionSecurity(unittest.TestCase):
"""Test suite for SQL injection attack prevention."""
@classmethod
def setUpClass(cls):
"""Create test JWT token for authentication."""
# Create token directly without login endpoint
cls.token = create_access_token({"sub": TEST_USER_EMAIL})
cls.headers = {"Authorization": f"Bearer {cls.token}"}
# SQL Injection Test Cases for search_term parameter
def test_sql_injection_search_term_basic_attack(self):
"""Test basic SQL injection attempt via search_term."""
malicious_input = "' OR '1'='1"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": malicious_input},
headers=self.headers
)
# Should reject due to pattern validation (contains single quotes)
assert response.status_code == 422, "Failed to reject SQL injection attack"
error_detail = response.json()["detail"]
assert any("pattern" in str(err).lower() or "match" in str(err).lower()
for err in error_detail if isinstance(err, dict))
def test_sql_injection_search_term_union_attack(self):
"""Test UNION-based SQL injection attempt."""
malicious_input = "' UNION SELECT * FROM users--"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": malicious_input},
headers=self.headers
)
# Should reject due to pattern validation
assert response.status_code == 422, "Failed to reject UNION attack"
def test_sql_injection_search_term_comment_injection(self):
"""Test comment-based SQL injection."""
malicious_input = "test' --"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": malicious_input},
headers=self.headers
)
# Should reject due to pattern validation (contains single quote)
assert response.status_code == 422, "Failed to reject comment injection"
def test_sql_injection_search_term_semicolon_attack(self):
"""Test semicolon-based SQL injection for multiple statements."""
malicious_input = "test'; DROP TABLE conversation_contexts;--"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": malicious_input},
headers=self.headers
)
# Should reject due to pattern validation (contains semicolon and quotes)
assert response.status_code == 422, "Failed to reject DROP TABLE attack"
def test_sql_injection_search_term_encoded_attack(self):
"""Test URL-encoded SQL injection attempt."""
# URL encoding of "' OR 1=1--"
malicious_input = "%27%20OR%201%3D1--"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": malicious_input},
headers=self.headers
)
# Should reject due to pattern validation after decoding
assert response.status_code == 422, "Failed to reject encoded attack"
# SQL Injection Test Cases for tags parameter
def test_sql_injection_tags_basic_attack(self):
"""Test SQL injection via tags parameter."""
malicious_tag = "' OR '1'='1"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"tags": [malicious_tag]},
headers=self.headers
)
# Should reject due to tag validation (contains single quotes and spaces)
assert response.status_code == 400, "Failed to reject SQL injection via tags"
assert "Invalid tag format" in response.json()["detail"]
def test_sql_injection_tags_union_attack(self):
"""Test UNION attack via tags parameter."""
malicious_tag = "tag' UNION SELECT password FROM users--"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"tags": [malicious_tag]},
headers=self.headers
)
# Should reject due to tag validation
assert response.status_code == 400, "Failed to reject UNION attack via tags"
def test_sql_injection_tags_multiple_malicious(self):
"""Test multiple malicious tags."""
malicious_tags = [
"tag1' OR '1'='1",
"tag2'; DROP TABLE tags;--",
"tag3' UNION SELECT NULL--"
]
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"tags": malicious_tags},
headers=self.headers
)
# Should reject due to tag validation
assert response.status_code == 400, "Failed to reject multiple malicious tags"
# Valid Input Tests (should succeed)
def test_valid_search_term_alphanumeric(self):
"""Test that valid alphanumeric search terms work."""
valid_input = "API development"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": valid_input},
headers=self.headers
)
# Should succeed
assert response.status_code == 200, f"Valid input rejected: {response.text}"
data = response.json()
assert "contexts" in data
assert isinstance(data["contexts"], list)
def test_valid_search_term_with_punctuation(self):
"""Test valid search terms with allowed punctuation."""
valid_input = "database-migration (phase-1)!"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": valid_input},
headers=self.headers
)
# Should succeed
assert response.status_code == 200, f"Valid input rejected: {response.text}"
def test_valid_tags(self):
"""Test that valid tags work."""
valid_tags = ["api", "database", "phase-1", "test_tag"]
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"tags": valid_tags},
headers=self.headers
)
# Should succeed
assert response.status_code == 200, f"Valid tags rejected: {response.text}"
data = response.json()
assert "contexts" in data
# Boundary Tests
def test_search_term_max_length(self):
"""Test search term at maximum allowed length (200 chars)."""
valid_input = "a" * 200
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": valid_input},
headers=self.headers
)
# Should succeed
assert response.status_code == 200, "Max length valid input rejected"
def test_search_term_exceeds_max_length(self):
"""Test search term exceeding maximum length."""
invalid_input = "a" * 201
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": invalid_input},
headers=self.headers
)
# Should reject
assert response.status_code == 422, "Overlong input not rejected"
def test_tags_max_items(self):
"""Test maximum number of tags (20)."""
valid_tags = [f"tag{i}" for i in range(20)]
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"tags": valid_tags},
headers=self.headers
)
# Should succeed
assert response.status_code == 200, "Max tags rejected"
def test_tags_exceeds_max_items(self):
"""Test exceeding maximum number of tags."""
invalid_tags = [f"tag{i}" for i in range(21)]
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"tags": invalid_tags},
headers=self.headers
)
# Should reject
assert response.status_code == 422, "Too many tags not rejected"
# Advanced SQL Injection Techniques
def test_sql_injection_hex_encoding(self):
"""Test hex-encoded SQL injection."""
malicious_input = "0x27204f522031203d2031" # Hex for "' OR 1 = 1"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": malicious_input},
headers=self.headers
)
# Pattern allows alphanumeric, so this passes input validation
# Should be safe due to parameterized queries
assert response.status_code == 200, "Hex encoding caused error"
# Verify it's treated as literal search, not executed as SQL
data = response.json()
assert isinstance(data["contexts"], list)
def test_sql_injection_time_based_blind(self):
"""Test time-based blind SQL injection attempt."""
malicious_input = "' AND SLEEP(5)--"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": malicious_input},
headers=self.headers
)
# Should reject due to pattern validation
assert response.status_code == 422, "Time-based attack not rejected"
def test_sql_injection_stacked_queries(self):
"""Test stacked query injection."""
malicious_input = "test; DELETE FROM conversation_contexts WHERE 1=1"
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": malicious_input},
headers=self.headers
)
# Should reject due to pattern validation (semicolon not allowed)
assert response.status_code == 422, "Stacked query attack not rejected"
# Verify Database Integrity
def test_database_not_compromised(self):
"""Verify database still functions after attack attempts."""
# Simple query to verify database is intact
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"limit": 5},
headers=self.headers
)
assert response.status_code == 200, "Database may be compromised"
data = response.json()
assert "contexts" in data
assert isinstance(data["contexts"], list)
def test_fulltext_index_still_works(self):
"""Verify FULLTEXT index functionality after attacks."""
# Test normal search that should use FULLTEXT index
response = requests.get(
f"{API_BASE_URL}/conversation-contexts/recall",
params={"search_term": "test"},
headers=self.headers
)
assert response.status_code == 200, "FULLTEXT search failed"
data = response.json()
assert isinstance(data["contexts"], list)
if __name__ == "__main__":
print("=" * 70)
print("SQL INJECTION SECURITY TEST SUITE")
print("=" * 70)
print()
print("Testing Context Recall API endpoint security...")
print(f"Target: {API_BASE_URL}/conversation-contexts/recall")
print()
# Run tests
unittest.main(verbosity=2)