feat: Major directory reorganization and cleanup

Reorganized project structure for better maintainability and reduced disk usage by 95.9% (11 GB -> 451 MB). Directory Reorganization (85% reduction in root files): - Created docs/ with subdirectories (deployment, testing, database, etc.) - Created infrastructure/vpn-configs/ for VPN scripts - Moved 90+ files from root to organized locations - Archived obsolete documentation (context system, offline mode, zombie debugging) - Moved all test files to tests/ directory - Root directory: 119 files -> 18 files Disk Cleanup (10.55 GB recovered): - Deleted Rust build artifacts: 9.6 GB (target/ directories) - Deleted Python virtual environments: 161 MB (venv/ directories) - Deleted Python cache: 50 KB (__pycache__/) New Structure: - docs/ - All documentation organized by category - docs/archives/ - Obsolete but preserved documentation - infrastructure/ - VPN configs and SSH setup - tests/ - All test files consolidated - logs/ - Ready for future logs Benefits: - Cleaner root directory (18 vs 119 files) - Logical organization of documentation - 95.9% disk space reduction - Faster navigation and discovery - Better portability (build artifacts excluded) Build artifacts can be regenerated: - Rust: cargo build --release (5-15 min per project) - Python: pip install -r requirements.txt (2-3 min) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-18 20:42:28 -07:00
parent 89e5118306
commit 06f7617718
96 changed files with 54 additions and 2639 deletions
--- a/tests/test_sql_injection_security.py
+++ b/tests/test_sql_injection_security.py
@@ -0,0 +1,333 @@
+"""
+SQL Injection Security Tests for Context Recall API
+
+Tests that the recall API is properly protected against SQL injection attacks.
+Validates both the input validation layer and the parameterized query layer.
+"""
+
+import unittest
+import requests
+from typing import Dict, Any
+
+# Import auth utilities for token creation
+from api.middleware.auth import create_access_token
+
+
+# Test configuration
+API_BASE_URL = "http://172.16.3.30:8001/api"
+TEST_USER_EMAIL = "admin@claudetools.local"
+
+
+class TestSQLInjectionSecurity(unittest.TestCase):
+    """Test suite for SQL injection attack prevention."""
+
+    @classmethod
+    def setUpClass(cls):
+        """Create test JWT token for authentication."""
+        # Create token directly without login endpoint
+        cls.token = create_access_token({"sub": TEST_USER_EMAIL})
+        cls.headers = {"Authorization": f"Bearer {cls.token}"}
+
+    # SQL Injection Test Cases for search_term parameter
+
+    def test_sql_injection_search_term_basic_attack(self):
+        """Test basic SQL injection attempt via search_term."""
+        malicious_input = "' OR '1'='1"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": malicious_input},
+            headers=self.headers
+        )
+
+        # Should reject due to pattern validation (contains single quotes)
+        assert response.status_code == 422, "Failed to reject SQL injection attack"
+        error_detail = response.json()["detail"]
+        assert any("pattern" in str(err).lower() or "match" in str(err).lower()
+                   for err in error_detail if isinstance(err, dict))
+
+    def test_sql_injection_search_term_union_attack(self):
+        """Test UNION-based SQL injection attempt."""
+        malicious_input = "' UNION SELECT * FROM users--"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": malicious_input},
+            headers=self.headers
+        )
+
+        # Should reject due to pattern validation
+        assert response.status_code == 422, "Failed to reject UNION attack"
+
+    def test_sql_injection_search_term_comment_injection(self):
+        """Test comment-based SQL injection."""
+        malicious_input = "test' --"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": malicious_input},
+            headers=self.headers
+        )
+
+        # Should reject due to pattern validation (contains single quote)
+        assert response.status_code == 422, "Failed to reject comment injection"
+
+    def test_sql_injection_search_term_semicolon_attack(self):
+        """Test semicolon-based SQL injection for multiple statements."""
+        malicious_input = "test'; DROP TABLE conversation_contexts;--"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": malicious_input},
+            headers=self.headers
+        )
+
+        # Should reject due to pattern validation (contains semicolon and quotes)
+        assert response.status_code == 422, "Failed to reject DROP TABLE attack"
+
+    def test_sql_injection_search_term_encoded_attack(self):
+        """Test URL-encoded SQL injection attempt."""
+        # URL encoding of "' OR 1=1--"
+        malicious_input = "%27%20OR%201%3D1--"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": malicious_input},
+            headers=self.headers
+        )
+
+        # Should reject due to pattern validation after decoding
+        assert response.status_code == 422, "Failed to reject encoded attack"
+
+    # SQL Injection Test Cases for tags parameter
+
+    def test_sql_injection_tags_basic_attack(self):
+        """Test SQL injection via tags parameter."""
+        malicious_tag = "' OR '1'='1"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"tags": [malicious_tag]},
+            headers=self.headers
+        )
+
+        # Should reject due to tag validation (contains single quotes and spaces)
+        assert response.status_code == 400, "Failed to reject SQL injection via tags"
+        assert "Invalid tag format" in response.json()["detail"]
+
+    def test_sql_injection_tags_union_attack(self):
+        """Test UNION attack via tags parameter."""
+        malicious_tag = "tag' UNION SELECT password FROM users--"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"tags": [malicious_tag]},
+            headers=self.headers
+        )
+
+        # Should reject due to tag validation
+        assert response.status_code == 400, "Failed to reject UNION attack via tags"
+
+    def test_sql_injection_tags_multiple_malicious(self):
+        """Test multiple malicious tags."""
+        malicious_tags = [
+            "tag1' OR '1'='1",
+            "tag2'; DROP TABLE tags;--",
+            "tag3' UNION SELECT NULL--"
+        ]
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"tags": malicious_tags},
+            headers=self.headers
+        )
+
+        # Should reject due to tag validation
+        assert response.status_code == 400, "Failed to reject multiple malicious tags"
+
+    # Valid Input Tests (should succeed)
+
+    def test_valid_search_term_alphanumeric(self):
+        """Test that valid alphanumeric search terms work."""
+        valid_input = "API development"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": valid_input},
+            headers=self.headers
+        )
+
+        # Should succeed
+        assert response.status_code == 200, f"Valid input rejected: {response.text}"
+        data = response.json()
+        assert "contexts" in data
+        assert isinstance(data["contexts"], list)
+
+    def test_valid_search_term_with_punctuation(self):
+        """Test valid search terms with allowed punctuation."""
+        valid_input = "database-migration (phase-1)!"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": valid_input},
+            headers=self.headers
+        )
+
+        # Should succeed
+        assert response.status_code == 200, f"Valid input rejected: {response.text}"
+
+    def test_valid_tags(self):
+        """Test that valid tags work."""
+        valid_tags = ["api", "database", "phase-1", "test_tag"]
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"tags": valid_tags},
+            headers=self.headers
+        )
+
+        # Should succeed
+        assert response.status_code == 200, f"Valid tags rejected: {response.text}"
+        data = response.json()
+        assert "contexts" in data
+
+    # Boundary Tests
+
+    def test_search_term_max_length(self):
+        """Test search term at maximum allowed length (200 chars)."""
+        valid_input = "a" * 200
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": valid_input},
+            headers=self.headers
+        )
+
+        # Should succeed
+        assert response.status_code == 200, "Max length valid input rejected"
+
+    def test_search_term_exceeds_max_length(self):
+        """Test search term exceeding maximum length."""
+        invalid_input = "a" * 201
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": invalid_input},
+            headers=self.headers
+        )
+
+        # Should reject
+        assert response.status_code == 422, "Overlong input not rejected"
+
+    def test_tags_max_items(self):
+        """Test maximum number of tags (20)."""
+        valid_tags = [f"tag{i}" for i in range(20)]
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"tags": valid_tags},
+            headers=self.headers
+        )
+
+        # Should succeed
+        assert response.status_code == 200, "Max tags rejected"
+
+    def test_tags_exceeds_max_items(self):
+        """Test exceeding maximum number of tags."""
+        invalid_tags = [f"tag{i}" for i in range(21)]
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"tags": invalid_tags},
+            headers=self.headers
+        )
+
+        # Should reject
+        assert response.status_code == 422, "Too many tags not rejected"
+
+    # Advanced SQL Injection Techniques
+
+    def test_sql_injection_hex_encoding(self):
+        """Test hex-encoded SQL injection."""
+        malicious_input = "0x27204f522031203d2031"  # Hex for "' OR 1 = 1"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": malicious_input},
+            headers=self.headers
+        )
+
+        # Pattern allows alphanumeric, so this passes input validation
+        # Should be safe due to parameterized queries
+        assert response.status_code == 200, "Hex encoding caused error"
+        # Verify it's treated as literal search, not executed as SQL
+        data = response.json()
+        assert isinstance(data["contexts"], list)
+
+    def test_sql_injection_time_based_blind(self):
+        """Test time-based blind SQL injection attempt."""
+        malicious_input = "' AND SLEEP(5)--"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": malicious_input},
+            headers=self.headers
+        )
+
+        # Should reject due to pattern validation
+        assert response.status_code == 422, "Time-based attack not rejected"
+
+    def test_sql_injection_stacked_queries(self):
+        """Test stacked query injection."""
+        malicious_input = "test; DELETE FROM conversation_contexts WHERE 1=1"
+
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": malicious_input},
+            headers=self.headers
+        )
+
+        # Should reject due to pattern validation (semicolon not allowed)
+        assert response.status_code == 422, "Stacked query attack not rejected"
+
+    # Verify Database Integrity
+
+    def test_database_not_compromised(self):
+        """Verify database still functions after attack attempts."""
+        # Simple query to verify database is intact
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"limit": 5},
+            headers=self.headers
+        )
+
+        assert response.status_code == 200, "Database may be compromised"
+        data = response.json()
+        assert "contexts" in data
+        assert isinstance(data["contexts"], list)
+
+    def test_fulltext_index_still_works(self):
+        """Verify FULLTEXT index functionality after attacks."""
+        # Test normal search that should use FULLTEXT index
+        response = requests.get(
+            f"{API_BASE_URL}/conversation-contexts/recall",
+            params={"search_term": "test"},
+            headers=self.headers
+        )
+
+        assert response.status_code == 200, "FULLTEXT search failed"
+        data = response.json()
+        assert isinstance(data["contexts"], list)
+
+
+if __name__ == "__main__":
+    print("=" * 70)
+    print("SQL INJECTION SECURITY TEST SUITE")
+    print("=" * 70)
+    print()
+    print("Testing Context Recall API endpoint security...")
+    print(f"Target: {API_BASE_URL}/conversation-contexts/recall")
+    print()
+
+    # Run tests
+    unittest.main(verbosity=2)