diff --git a/.claude/memory/MEMORY.md b/.claude/memory/MEMORY.md index bdd061c..248cbde 100644 --- a/.claude/memory/MEMORY.md +++ b/.claude/memory/MEMORY.md @@ -70,6 +70,7 @@ - [Mac gururmm setup pending](project_mac_gururmm_setup_pending.md) — ACTION REQUIRED: run `bash scripts/install-hooks.sh` in gururmm repo on Mikes-MacBook-Air before any RMM work ## Project +- [Only RMM & GC are versionable products](project_versionable_products.md) — GuruRMM + GuruConnect are the only products with own repos/submodules; everything else stays in the claudetools monorepo. Split only for independent pipeline OR versioned external consumer. - [Quantum GoDaddy M365 tenant](project_quantum_godaddy_m365_tenant.md) — quantumwms.com parked in a GoDaddy-provisioned M365 tenant (id ddf3d2c9-b76c-40d9-a216-9f11a1a26f97, netorg18235235.onmicrosoft.com); blocks Pax8 migration until GoDaddy removed. Managed = no DNS takeover; need GoDaddy/GA access. - [Cascades Migration Plan](project-cascades-migration-plan.md) — Active multi-day migration. Plan file: `C:\Users\Howard\.claude\plans\wise-discovering-panda.md`. Syncro ticket: #110680053. Resume: "resume the Cascades migration plan". - [GuruRMM Development Principles](gururmm-development-principles.md) - MANDATORY: every feature needs full stack (backend, API, UI, docs, scalability). Product must work without AI agents (AI features are enhancements). Documented in guru-rmm/docs/DESIGN.md. diff --git a/.claude/memory/project_versionable_products.md b/.claude/memory/project_versionable_products.md new file mode 100644 index 0000000..b97723b --- /dev/null +++ b/.claude/memory/project_versionable_products.md @@ -0,0 +1,23 @@ +--- +name: project_versionable_products +description: Only GuruRMM and GuruConnect are versionable products (own repos/submodules); everything else stays in the claudetools monorepo +metadata: + type: project +--- + +GuruRMM (`azcomputerguru/gururmm`) and GuruConnect (`azcomputerguru/guru-connect`) are the +**only** versionable products. Each has its own Gitea repo + CI/CD + release cadence and is tracked +as a git **submodule** under `projects/msp-tools/` (guru-rmm, guru-connect). Confirmed by Mike 2026-05-29. + +Everything else — the ClaudeTools API itself, quote-wizard, dataforth-dos, radio-show, msp-tools +scripts, `.claude/`, `wiki/`, `session-logs/` — stays **in the claudetools monorepo**. Do not split +them into their own repos. + +**Why:** Separate repos are only justified by an independent release/deploy pipeline OR an external +consumer that integrates via a versioned contract (see RMM↔GC boundary, ADR-008). Every extra repo +multiplies submodule-sync burden — and an un-pushed vendored copy drifts (GC's repo was 4 months +stale until 2026-05-29 because edits stayed in the monorepo and were never pushed). + +**How to apply:** When asked "should X be its own repo?", default to NO unless X ships/deploys on its +own pipeline or is consumed elsewhere through a versioned interface. For the two products, never edit +the submodule's files without committing + pushing inside the submodule. Relates to [[feedback_gururmm_builds]]. diff --git a/.gitmodules b/.gitmodules index ee40c38..64bfaf0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,7 @@ path = projects/msp-tools/guru-rmm url = https://git.azcomputerguru.com/azcomputerguru/gururmm.git branch = main +[submodule "projects/msp-tools/guru-connect"] + path = projects/msp-tools/guru-connect + url = http://azcomputerguru@172.16.3.20:3000/azcomputerguru/guru-connect.git + branch = main diff --git a/projects/msp-tools/guru-connect b/projects/msp-tools/guru-connect new file mode 160000 index 0000000..e3e95f8 --- /dev/null +++ b/projects/msp-tools/guru-connect @@ -0,0 +1 @@ +Subproject commit e3e95f8fa7508a2c56139e7c36800ffc780d0e67 diff --git a/projects/msp-tools/guru-connect/.gitea/workflows/build-and-test.yml b/projects/msp-tools/guru-connect/.gitea/workflows/build-and-test.yml deleted file mode 100644 index b9e7b75..0000000 --- a/projects/msp-tools/guru-connect/.gitea/workflows/build-and-test.yml +++ /dev/null @@ -1,145 +0,0 @@ -name: Build and Test - -on: - push: - branches: - - main - - develop - pull_request: - branches: - - main - -jobs: - build-server: - name: Build Server (Linux) - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - target: x86_64-unknown-linux-gnu - override: true - components: rustfmt, clippy - - - name: Cache Cargo dependencies - uses: actions/cache@v3 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - target/ - key: ${{ runner.os }}-cargo-server-${{ hashFiles('server/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo-server- - - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install -y pkg-config libssl-dev protobuf-compiler - - - name: Check formatting - run: cd server && cargo fmt --all -- --check - - - name: Run Clippy - run: cd server && cargo clippy --all-targets --all-features -- -D warnings - - - name: Build server - run: | - cd server - cargo build --release --target x86_64-unknown-linux-gnu - - - name: Run tests - run: | - cd server - cargo test --release - - - name: Upload server binary - uses: actions/upload-artifact@v3 - with: - name: guruconnect-server-linux - path: server/target/x86_64-unknown-linux-gnu/release/guruconnect-server - retention-days: 30 - - build-agent: - name: Build Agent (Windows) - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - target: x86_64-pc-windows-msvc - override: true - - - name: Install cross-compilation tools - run: | - sudo apt-get update - sudo apt-get install -y mingw-w64 - - - name: Cache Cargo dependencies - uses: actions/cache@v3 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - target/ - key: ${{ runner.os }}-cargo-agent-${{ hashFiles('agent/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo-agent- - - - name: Build agent (cross-compile for Windows) - run: | - rustup target add x86_64-pc-windows-gnu - cd agent - cargo build --release --target x86_64-pc-windows-gnu - - - name: Upload agent binary - uses: actions/upload-artifact@v3 - with: - name: guruconnect-agent-windows - path: agent/target/x86_64-pc-windows-gnu/release/guruconnect.exe - retention-days: 30 - - security-audit: - name: Security Audit - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - - name: Install cargo-audit - run: cargo install cargo-audit - - - name: Run security audit on server - run: cd server && cargo audit - - - name: Run security audit on agent - run: cd agent && cargo audit - - build-summary: - name: Build Summary - runs-on: ubuntu-latest - needs: [build-server, build-agent, security-audit] - steps: - - name: Build succeeded - run: | - echo "All builds completed successfully" - echo "Server: Linux x86_64" - echo "Agent: Windows x86_64" - echo "Security: Passed" diff --git a/projects/msp-tools/guru-connect/.gitea/workflows/deploy.yml b/projects/msp-tools/guru-connect/.gitea/workflows/deploy.yml deleted file mode 100644 index e9b5133..0000000 --- a/projects/msp-tools/guru-connect/.gitea/workflows/deploy.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: Deploy to Production - -on: - push: - tags: - - 'v*.*.*' - workflow_dispatch: - inputs: - environment: - description: 'Deployment environment' - required: true - default: 'production' - type: choice - options: - - production - - staging - -jobs: - deploy-server: - name: Deploy Server - runs-on: ubuntu-latest - environment: ${{ github.event.inputs.environment || 'production' }} - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - target: x86_64-unknown-linux-gnu - - - name: Build server - run: | - cd server - cargo build --release --target x86_64-unknown-linux-gnu - - - name: Create deployment package - run: | - mkdir -p deploy - cp server/target/x86_64-unknown-linux-gnu/release/guruconnect-server deploy/ - cp -r server/static deploy/ - cp -r server/migrations deploy/ - cp server/.env.example deploy/.env.example - tar -czf guruconnect-server-${{ github.ref_name }}.tar.gz -C deploy . - - - name: Upload deployment package - uses: actions/upload-artifact@v3 - with: - name: deployment-package - path: guruconnect-server-${{ github.ref_name }}.tar.gz - retention-days: 90 - - - name: Deploy to server (production) - if: github.event.inputs.environment == 'production' || startsWith(github.ref, 'refs/tags/') - run: | - echo "Deployment command would run here" - echo "SSH to 172.16.3.30 and deploy" - # Actual deployment would use SSH keys and run: - # scp guruconnect-server-*.tar.gz guru@172.16.3.30:/tmp/ - # ssh guru@172.16.3.30 'bash /home/guru/guru-connect/scripts/deploy.sh' - - create-release: - name: Create GitHub Release - runs-on: ubuntu-latest - needs: deploy-server - if: startsWith(github.ref, 'refs/tags/') - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Download artifacts - uses: actions/download-artifact@v3 - - - name: Create Release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ github.ref_name }} - release_name: Release ${{ github.ref_name }} - draft: false - prerelease: false - - - name: Upload Release Assets - run: | - echo "Upload server and agent binaries to release" - # Would attach artifacts to the release here diff --git a/projects/msp-tools/guru-connect/.gitea/workflows/test.yml b/projects/msp-tools/guru-connect/.gitea/workflows/test.yml deleted file mode 100644 index d6628ee..0000000 --- a/projects/msp-tools/guru-connect/.gitea/workflows/test.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: Run Tests - -on: - push: - branches: - - main - - develop - - 'feature/**' - pull_request: - -jobs: - test-server: - name: Test Server - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - target: x86_64-unknown-linux-gnu - components: rustfmt, clippy - - - name: Cache Cargo dependencies - uses: actions/cache@v3 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - target/ - key: ${{ runner.os }}-cargo-test-${{ hashFiles('server/Cargo.lock') }} - - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y pkg-config libssl-dev protobuf-compiler - - - name: Run unit tests - run: | - cd server - cargo test --lib --release - - - name: Run integration tests - run: | - cd server - cargo test --test '*' --release - - - name: Run doc tests - run: | - cd server - cargo test --doc --release - - test-agent: - name: Test Agent - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - - name: Run agent tests - run: | - cd agent - cargo test --release - - code-coverage: - name: Code Coverage - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - components: llvm-tools-preview - - - name: Install tarpaulin - run: cargo install cargo-tarpaulin - - - name: Generate coverage report - run: | - cd server - cargo tarpaulin --out Xml --output-dir ../coverage - - - name: Upload coverage to artifact - uses: actions/upload-artifact@v3 - with: - name: coverage-report - path: coverage/ - - lint: - name: Lint and Format Check - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - components: rustfmt, clippy - - - name: Check formatting (server) - run: cd server && cargo fmt --all -- --check - - - name: Check formatting (agent) - run: cd agent && cargo fmt --all -- --check - - - name: Run clippy (server) - run: cd server && cargo clippy --all-targets --all-features -- -D warnings - - - name: Run clippy (agent) - run: cd agent && cargo clippy --all-targets --all-features -- -D warnings diff --git a/projects/msp-tools/guru-connect/ACTIVATE_CI_CD.md b/projects/msp-tools/guru-connect/ACTIVATE_CI_CD.md deleted file mode 100644 index 44b3cb3..0000000 --- a/projects/msp-tools/guru-connect/ACTIVATE_CI_CD.md +++ /dev/null @@ -1,629 +0,0 @@ -# GuruConnect CI/CD Activation Guide - -**Date:** 2026-01-18 -**Status:** Ready for Activation -**Server:** 172.16.3.30 (gururmm) - ---- - -## Prerequisites Complete - -- [x] Gitea Actions workflows committed -- [x] Deployment automation scripts created -- [x] Gitea Actions runner binary installed -- [x] Systemd service configured -- [x] All documentation complete - ---- - -## Step 1: Register Gitea Actions Runner - -### 1.1 Get Registration Token - -1. Open browser and navigate to: - ``` - https://git.azcomputerguru.com/admin/actions/runners - ``` - -2. Log in with Gitea admin credentials - -3. Click **"Create new Runner"** - -4. Copy the registration token (starts with something like `D0g...`) - -### 1.2 Register Runner on Server - -```bash -# SSH to server -ssh guru@172.16.3.30 - -# Register runner with token from above -sudo -u gitea-runner act_runner register \ - --instance https://git.azcomputerguru.com \ - --token YOUR_REGISTRATION_TOKEN_HERE \ - --name gururmm-runner \ - --labels ubuntu-latest,ubuntu-22.04 -``` - -**Expected Output:** -``` -INFO Registering runner, arch=amd64, os=linux, version=0.2.11. -INFO Successfully registered runner. -``` - -### 1.3 Start Runner Service - -```bash -# Reload systemd configuration -sudo systemctl daemon-reload - -# Enable runner to start on boot -sudo systemctl enable gitea-runner - -# Start runner service -sudo systemctl start gitea-runner - -# Check status -sudo systemctl status gitea-runner -``` - -**Expected Output:** -``` -● gitea-runner.service - Gitea Actions Runner - Loaded: loaded (/etc/systemd/system/gitea-runner.service; enabled) - Active: active (running) since Sat 2026-01-18 16:00:00 UTC -``` - -### 1.4 Verify Registration - -1. Go back to: https://git.azcomputerguru.com/admin/actions/runners - -2. Verify "gururmm-runner" appears in the list - -3. Status should show: **Online** (green) - ---- - -## Step 2: Test Build Workflow - -### 2.1 Trigger First Build - -```bash -# On server -cd ~/guru-connect - -# Make empty commit to trigger CI -git commit --allow-empty -m "test: trigger CI/CD pipeline" -git push origin main -``` - -### 2.2 Monitor Build Progress - -1. Open browser: https://git.azcomputerguru.com/azcomputerguru/guru-connect/actions - -2. You should see a new workflow run: **"Build and Test"** - -3. Click on the workflow run to view progress - -4. Watch the jobs complete: - - Build Server (Linux) - ~2-3 minutes - - Build Agent (Windows) - ~2-3 minutes - - Security Audit - ~1 minute - - Build Summary - ~10 seconds - -### 2.3 Expected Results - -**Build Server Job:** -``` -✓ Checkout code -✓ Install Rust toolchain -✓ Cache Cargo dependencies -✓ Install dependencies (pkg-config, libssl-dev, protobuf-compiler) -✓ Build server -✓ Upload server binary -``` - -**Build Agent Job:** -``` -✓ Checkout code -✓ Install Rust toolchain -✓ Install cross-compilation tools -✓ Build agent -✓ Upload agent binary -``` - -**Security Audit Job:** -``` -✓ Checkout code -✓ Install Rust toolchain -✓ Install cargo-audit -✓ Run security audit -``` - -### 2.4 Download Build Artifacts - -1. Scroll down to **Artifacts** section - -2. Download artifacts: - - `guruconnect-server-linux` (server binary) - - `guruconnect-agent-windows` (agent .exe) - -3. Verify file sizes: - - Server: ~15-20 MB - - Agent: ~10-15 MB - ---- - -## Step 3: Test Workflow - -### 3.1 Trigger Test Suite - -```bash -# Tests run automatically on push, or trigger manually: -cd ~/guru-connect - -# Make a code change to trigger tests -echo "// Test comment" >> server/src/main.rs -git add server/src/main.rs -git commit -m "test: trigger test workflow" -git push origin main -``` - -### 3.2 Monitor Test Execution - -1. Go to: https://git.azcomputerguru.com/azcomputerguru/guru-connect/actions - -2. Click on **"Run Tests"** workflow - -3. Watch jobs complete: - - Test Server - ~3-5 minutes - - Test Agent - ~2-3 minutes - - Code Coverage - ~4-6 minutes - - Lint - ~2-3 minutes - -### 3.3 Expected Results - -**Test Server Job:** -``` -✓ Run unit tests -✓ Run integration tests -✓ Run doc tests -``` - -**Test Agent Job:** -``` -✓ Run agent tests -``` - -**Code Coverage Job:** -``` -✓ Install tarpaulin -✓ Generate coverage report -✓ Upload coverage artifact -``` - -**Lint Job:** -``` -✓ Check formatting (server) - cargo fmt -✓ Check formatting (agent) - cargo fmt -✓ Run clippy (server) - zero warnings -✓ Run clippy (agent) - zero warnings -``` - ---- - -## Step 4: Test Deployment Workflow - -### 4.1 Create Version Tag - -```bash -# On server -cd ~/guru-connect/scripts - -# Create first release tag (v0.1.0) -./version-tag.sh patch -``` - -**Expected Interaction:** -``` -========================================= -GuruConnect Version Tagging -========================================= - -Current version: v0.0.0 -New version: v0.1.0 - -Changes since v0.0.0: -------------------------------------------- -5b7cf5f ci: add Gitea Actions workflows and deployment automation -[previous commits...] -------------------------------------------- - -Create tag v0.1.0? (y/N) y - -Updating Cargo.toml versions... -Updated server/Cargo.toml -Updated agent/Cargo.toml - -Committing version bump... -[main abc1234] chore: bump version to v0.1.0 - -Creating tag v0.1.0... -Tag created successfully - -To push tag to remote: - git push origin v0.1.0 -``` - -### 4.2 Push Tag to Trigger Deployment - -```bash -# Push the version bump commit -git push origin main - -# Push the tag (this triggers deployment workflow) -git push origin v0.1.0 -``` - -### 4.3 Monitor Deployment - -1. Go to: https://git.azcomputerguru.com/azcomputerguru/guru-connect/actions - -2. Click on **"Deploy to Production"** workflow - -3. Watch deployment progress: - - Deploy Server - ~10-15 minutes - - Create Release - ~2-3 minutes - -### 4.4 Expected Deployment Flow - -**Deploy Server Job:** -``` -✓ Checkout code -✓ Install Rust toolchain -✓ Build release binary -✓ Create deployment package -✓ Transfer to server (via SSH) -✓ Run deployment script - ├─ Backup current version - ├─ Stop service - ├─ Deploy new binary - ├─ Start service - ├─ Health check - └─ Verify deployment -✓ Upload deployment artifact -``` - -**Create Release Job:** -``` -✓ Create GitHub/Gitea release -✓ Upload release assets - ├─ guruconnect-server-v0.1.0.tar.gz - ├─ guruconnect-agent-v0.1.0.exe - └─ SHA256SUMS -``` - -### 4.5 Verify Deployment - -```bash -# Check service status -sudo systemctl status guruconnect - -# Check new version -~/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server --version -# Should output: v0.1.0 - -# Check health endpoint -curl http://172.16.3.30:3002/health -# Should return: {"status":"OK"} - -# Check backup created -ls -lh /home/guru/deployments/backups/ -# Should show: guruconnect-server-20260118-HHMMSS - -# Check artifact saved -ls -lh /home/guru/deployments/artifacts/ -# Should show: guruconnect-server-v0.1.0.tar.gz -``` - ---- - -## Step 5: Test Manual Deployment - -### 5.1 Download Deployment Artifact - -```bash -# From Actions page, download: guruconnect-server-v0.1.0.tar.gz -# Or use artifact from server: -cd /home/guru/deployments/artifacts -ls -lh guruconnect-server-v0.1.0.tar.gz -``` - -### 5.2 Run Manual Deployment - -```bash -cd ~/guru-connect/scripts -./deploy.sh /home/guru/deployments/artifacts/guruconnect-server-v0.1.0.tar.gz -``` - -**Expected Output:** -``` -========================================= -GuruConnect Deployment Script -========================================= - -Package: /home/guru/deployments/artifacts/guruconnect-server-v0.1.0.tar.gz -Target: /home/guru/guru-connect - -Creating backup... -[OK] Backup created: /home/guru/deployments/backups/guruconnect-server-20260118-161500 - -Stopping GuruConnect service... -[OK] Service stopped - -Extracting deployment package... -Deploying new binary... -[OK] Binary deployed - -Archiving deployment package... -[OK] Artifact saved - -Starting GuruConnect service... -[OK] Service started successfully - -Running health check... -[OK] Health check: PASSED - -Deployment version information: -GuruConnect Server v0.1.0 - -========================================= -Deployment Complete! -========================================= - -Deployment time: 20260118-161500 -Backup location: /home/guru/deployments/backups/guruconnect-server-20260118-161500 -Artifact location: /home/guru/deployments/artifacts/guruconnect-server-20260118-161500.tar.gz -``` - ---- - -## Troubleshooting - -### Runner Not Starting - -**Symptom:** `systemctl status gitea-runner` shows "inactive" or "failed" - -**Solution:** -```bash -# Check logs -sudo journalctl -u gitea-runner -n 50 - -# Common issues: -# 1. Not registered - run registration command again -# 2. Wrong token - get new token from Gitea admin -# 3. Permissions - ensure gitea-runner user owns /home/gitea-runner/.runner - -# Re-register if needed -sudo -u gitea-runner act_runner register \ - --instance https://git.azcomputerguru.com \ - --token NEW_TOKEN_HERE -``` - -### Workflow Not Triggering - -**Symptom:** Push to main branch but no workflow appears in Actions tab - -**Checklist:** -1. Is runner registered and online? (Check admin/actions/runners) -2. Are workflow files in `.gitea/workflows/` directory? -3. Did you push to the correct branch? (main or develop) -4. Are Gitea Actions enabled in repository settings? - -**Solution:** -```bash -# Verify workflows committed -git ls-tree -r main --name-only | grep .gitea/workflows - -# Should show: -# .gitea/workflows/build-and-test.yml -# .gitea/workflows/deploy.yml -# .gitea/workflows/test.yml - -# If missing, add and commit: -git add .gitea/ -git commit -m "ci: add missing workflows" -git push origin main -``` - -### Build Failing - -**Symptom:** Build workflow shows red X - -**Solution:** -```bash -# View logs in Gitea Actions tab -# Common issues: - -# 1. Missing dependencies -# Add to workflow: apt-get install -y [package] - -# 2. Rust compilation errors -# Fix code and push again - -# 3. Test failures -# Run tests locally first: cargo test - -# 4. Clippy warnings -# Fix warnings: cargo clippy --fix -``` - -### Deployment Failing - -**Symptom:** Deploy workflow fails or service won't start after deployment - -**Solution:** -```bash -# Check deployment logs -cat /home/guru/deployments/deploy-*.log - -# Check service logs -sudo journalctl -u guruconnect -n 50 - -# Manual rollback if needed -ls /home/guru/deployments/backups/ -cp /home/guru/deployments/backups/guruconnect-server-TIMESTAMP \ - ~/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server -sudo systemctl restart guruconnect -``` - -### Health Check Failing - -**Symptom:** Health check returns connection refused or timeout - -**Solution:** -```bash -# Check if service is running -sudo systemctl status guruconnect - -# Check if port is listening -netstat -tlnp | grep 3002 - -# Check server logs -sudo journalctl -u guruconnect -f - -# Test manually -curl -v http://172.16.3.30:3002/health - -# Common issues: -# 1. Service not started - sudo systemctl start guruconnect -# 2. Port blocked - check firewall -# 3. Database connection issue - check .env file -``` - ---- - -## Validation Checklist - -After completing all steps, verify: - -- [ ] Runner shows "Online" in Gitea admin panel -- [ ] Build workflow completes successfully (green checkmark) -- [ ] Test workflow completes successfully (all tests pass) -- [ ] Deployment workflow completes successfully -- [ ] Service restarts with new version -- [ ] Health check returns "OK" -- [ ] Backup created in `/home/guru/deployments/backups/` -- [ ] Artifact saved in `/home/guru/deployments/artifacts/` -- [ ] Build artifacts downloadable from Actions tab -- [ ] Version tag appears in repository tags -- [ ] Manual deployment script works - ---- - -## Next Steps After Activation - -### 1. Configure Deployment SSH Keys (Optional) - -For fully automated deployment without manual intervention: - -```bash -# Generate SSH key for runner -sudo -u gitea-runner ssh-keygen -t ed25519 -C "gitea-runner@gururmm" - -# Add public key to authorized_keys -sudo -u gitea-runner cat /home/gitea-runner/.ssh/id_ed25519.pub >> ~/.ssh/authorized_keys - -# Test SSH connection -sudo -u gitea-runner ssh guru@172.16.3.30 whoami -``` - -### 2. Set Up Notification Webhooks (Optional) - -Configure Gitea to send notifications on build/deployment events: - -1. Go to repository > Settings > Webhooks -2. Add webhook for Slack/Discord/Email -3. Configure triggers: Push, Pull Request, Release - -### 3. Add More Runners (Optional) - -For faster builds and multi-platform support: - -- **Windows Runner:** For native Windows agent builds -- **macOS Runner:** For macOS agent builds -- **Staging Runner:** For staging environment deployments - -### 4. Enhance CI/CD (Optional) - -**Performance:** -- Add caching for dependencies -- Parallel test execution -- Incremental builds - -**Quality:** -- Code coverage thresholds -- Performance benchmarks -- Security scanning (SAST/DAST) - -**Deployment:** -- Staging environment -- Canary deployments -- Blue-green deployments -- Smoke tests after deployment - ---- - -## Quick Reference Commands - -```bash -# Runner management -sudo systemctl status gitea-runner -sudo systemctl restart gitea-runner -sudo journalctl -u gitea-runner -f - -# Create version tag -cd ~/guru-connect/scripts -./version-tag.sh [major|minor|patch] - -# Manual deployment -./deploy.sh /path/to/package.tar.gz - -# View workflows -https://git.azcomputerguru.com/azcomputerguru/guru-connect/actions - -# Check service -sudo systemctl status guruconnect -curl http://172.16.3.30:3002/health - -# View logs -sudo journalctl -u guruconnect -f - -# Rollback deployment -cp /home/guru/deployments/backups/guruconnect-server-TIMESTAMP \ - ~/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server -sudo systemctl restart guruconnect -``` - ---- - -## Support Resources - -**Gitea Actions Documentation:** -- Overview: https://docs.gitea.com/usage/actions/overview -- Workflow Syntax: https://docs.gitea.com/usage/actions/workflow-syntax -- Act Runner: https://gitea.com/gitea/act_runner - -**Repository:** -- https://git.azcomputerguru.com/azcomputerguru/guru-connect - -**Created Documentation:** -- `CI_CD_SETUP.md` - Complete CI/CD setup guide -- `PHASE1_WEEK3_COMPLETE.md` - Week 3 completion summary -- `ACTIVATE_CI_CD.md` - This guide - ---- - -**Last Updated:** 2026-01-18 -**Status:** Ready for Activation -**Action Required:** Register Gitea Actions runner with admin token diff --git a/projects/msp-tools/guru-connect/CHECKLIST_STATE.json b/projects/msp-tools/guru-connect/CHECKLIST_STATE.json deleted file mode 100644 index 4310d3a..0000000 --- a/projects/msp-tools/guru-connect/CHECKLIST_STATE.json +++ /dev/null @@ -1,182 +0,0 @@ -{ - "project": "GuruConnect", - "last_updated": "2026-01-18T03:30:00Z", - "current_phase": 1, - "current_week": 2, - "current_day": 1, - "deployment_status": "deployed_to_production", - "phases": { - "phase1": { - "name": "Security & Infrastructure", - "status": "in_progress", - "progress_percentage": 50, - "checklist_summary": { - "total_items": 147, - "completed": 74, - "in_progress": 0, - "pending": 73 - }, - "weeks": { - "week1": { - "name": "Critical Security Fixes", - "status": "complete", - "progress_percentage": 77, - "items_completed": 10, - "items_total": 13, - "completed_items": [ - "SEC-1: Remove hardcoded JWT secret", - "SEC-1: Add JWT_SECRET environment variable", - "SEC-1: Validate JWT secret strength", - "SEC-3: SQL injection audit (verified safe)", - "SEC-4: IP address extraction and logging", - "SEC-4: Failed connection attempt logging", - "SEC-4: API key strength validation", - "SEC-5: Token blacklist implementation", - "SEC-5: JWT validation with revocation", - "SEC-5: Logout and revocation endpoints", - "SEC-5: Blacklist monitoring tools", - "SEC-5: Middleware integration", - "SEC-6: Remove password logging (write to .admin-credentials)", - "SEC-7: XSS prevention (CSP headers)", - "SEC-9: Verify Argon2id usage (explicitly configured)", - "SEC-11: CORS configuration review (restricted origins)", - "SEC-12: Security headers (6 headers implemented)", - "SEC-13: Session expiration enforcement (strict validation)", - "Production deployment to 172.16.3.30:3002", - "Security header verification via HTTP responses", - "IP logging operational verification" - ], - "deferred_items": [ - "SEC-2: Rate limiting (deferred - tower_governor type issues)", - "SEC-8: TLS certificate validation (not applicable - NPM handles)", - "SEC-10: HTTPS enforcement (delegated to NPM reverse proxy)" - ] - }, - "week2": { - "name": "Infrastructure & Monitoring", - "status": "starting", - "progress_percentage": 0, - "items_completed": 0, - "items_total": 8, - "pending_items": [ - "Systemd service configuration", - "Auto-restart on failure", - "Prometheus metrics endpoint", - "Grafana dashboard setup", - "PostgreSQL automated backups", - "Backup retention policy", - "Log rotation configuration", - "Health check monitoring" - ] - }, - "week3": { - "name": "CI/CD & Automation", - "status": "not_started", - "progress_percentage": 0, - "items_total": 6, - "pending_items": [ - "Gitea CI pipeline configuration", - "Automated builds on commit", - "Automated tests in CI", - "Deployment automation scripts", - "Build artifact storage", - "Version tagging automation" - ] - }, - "week4": { - "name": "Production Hardening", - "status": "not_started", - "progress_percentage": 0, - "items_total": 5, - "pending_items": [ - "Load testing (50+ concurrent sessions)", - "Performance optimization", - "Database connection pooling", - "Security audit", - "Production deployment checklist" - ] - } - } - }, - "phase2": { - "name": "Core Features", - "status": "not_started", - "progress_percentage": 0, - "weeks": { - "week5": { - "name": "End-User Portal", - "status": "not_started" - }, - "week6-8": { - "name": "One-Time Agent Download", - "status": "not_started" - }, - "week9-12": { - "name": "Core Session Features", - "status": "not_started" - } - } - } - }, - "recent_completions": [ - { - "timestamp": "2026-01-17T18:00:00Z", - "item": "SEC-1: JWT Secret Security", - "notes": "Removed hardcoded secrets, added validation" - }, - { - "timestamp": "2026-01-17T18:30:00Z", - "item": "SEC-3: SQL Injection Audit", - "notes": "Verified all queries safe" - }, - { - "timestamp": "2026-01-17T19:00:00Z", - "item": "SEC-4: Agent Connection Validation", - "notes": "IP logging, failed connection tracking complete" - }, - { - "timestamp": "2026-01-17T20:30:00Z", - "item": "SEC-5: Session Takeover Prevention", - "notes": "Token blacklist and revocation complete" - }, - { - "timestamp": "2026-01-18T01:00:00Z", - "item": "SEC-6 through SEC-13 Implementation", - "notes": "Password file write, XSS prevention, Argon2id, CORS, security headers, JWT expiration" - }, - { - "timestamp": "2026-01-18T02:00:00Z", - "item": "Production Deployment - Week 1 Security", - "notes": "All security fixes deployed to 172.16.3.30:3002, verified via curl and logs" - }, - { - "timestamp": "2026-01-18T03:06:00Z", - "item": "Final Deployment Verification", - "notes": "All security headers operational, server stable (PID 3839055)" - } - ], - "blockers": [ - { - "item": "SEC-2: Rate Limiting", - "issue": "tower_governor type incompatibility with Axum 0.7", - "workaround": "Documented in SEC2_RATE_LIMITING_TODO.md - will revisit with custom middleware" - }, - { - "item": "Database Connectivity", - "issue": "PostgreSQL password authentication failed", - "impact": "Cannot test token revocation end-to-end, server runs in memory-only mode", - "workaround": "Server operational without database persistence" - } - ], - "next_milestone": { - "name": "Phase 1 Week 2 - Infrastructure Complete", - "target_date": "2026-01-25", - "deliverables": [ - "Systemd service running with auto-restart", - "Prometheus metrics exposed", - "Grafana dashboard configured", - "Automated PostgreSQL backups", - "Log rotation configured" - ] - } -} diff --git a/projects/msp-tools/guru-connect/CHECKPOINT_2026-01-18.md b/projects/msp-tools/guru-connect/CHECKPOINT_2026-01-18.md deleted file mode 100644 index e3759c7..0000000 --- a/projects/msp-tools/guru-connect/CHECKPOINT_2026-01-18.md +++ /dev/null @@ -1,704 +0,0 @@ -# GuruConnect Phase 1 Infrastructure Deployment - Checkpoint - -**Checkpoint Date:** 2026-01-18 -**Project:** GuruConnect Remote Desktop Solution -**Phase:** Phase 1 - Security, Infrastructure, CI/CD -**Status:** PRODUCTION READY (87% verified completion) - ---- - -## Checkpoint Overview - -This checkpoint captures the successful completion of GuruConnect Phase 1 infrastructure deployment. All core security systems, infrastructure monitoring, and continuous integration/deployment automation have been implemented, tested, and verified as production-ready. - -**Checkpoint Creation Context:** -- Git Commit: 1bfd476 -- Branch: main -- Files Changed: 39 (4185 insertions, 1671 deletions) -- Database Context ID: 6b3aa5a4-2563-4705-a053-df99d6e39df2 -- Project ID: c3d9f1c8-dc2b-499f-a228-3a53fa950e7b -- Relevance Score: 9.0 - ---- - -## What Was Accomplished - -### Week 1: Security Hardening - -**Completed Items (9/13 - 69%)** - -1. [OK] JWT Token Expiration Validation (24h lifetime) - - Explicit expiration checks implemented - - Configurable via JWT_EXPIRY_HOURS environment variable - - Validation enforced on every request - -2. [OK] Argon2id Password Hashing - - Latest version (V0x13) with secure parameters - - Default configuration: 19456 KiB memory, 2 iterations - - All user passwords hashed before storage - -3. [OK] Security Headers Implementation - - Content Security Policy (CSP) - - X-Frame-Options: DENY - - X-Content-Type-Options: nosniff - - X-XSS-Protection enabled - - Referrer-Policy configured - - Permissions-Policy defined - -4. [OK] Token Blacklist for Logout - - In-memory HashSet with async RwLock - - Integrated into authentication flow - - Automatic cleanup of expired tokens - - Endpoints: /api/auth/logout, /api/auth/revoke-token, /api/auth/admin/revoke-user - -5. [OK] API Key Validation - - 32-character minimum requirement - - Entropy checking implemented - - Weak pattern detection enabled - -6. [OK] Input Sanitization - - Serde deserialization with strict types - - UUID validation in all handlers - - API key strength validation throughout - -7. [OK] SQL Injection Protection - - sqlx compile-time query validation - - All database operations parameterized - - No dynamic SQL construction - -8. [OK] XSS Prevention - - CSP headers prevent inline script execution - - Static HTML files from server/static/ - - No user-generated content server-side rendering - -9. [OK] CORS Configuration - - Restricted to specific origins (production domain + localhost) - - Limited to GET, POST, PUT, DELETE, OPTIONS - - Explicit header allowlist - - Credentials allowed - -**Pending Items (3/13 - 23%)** - -- [ ] TLS Certificate Auto-Renewal (Let's Encrypt with certbot) -- [ ] Session Timeout Enforcement (UI-side token expiration check) -- [ ] Comprehensive Audit Logging (beyond basic event logging) - -**Incomplete Item (1/13 - 8%)** - -- [WARNING] Rate Limiting on Auth Endpoints - - Code implemented but not operational - - Compilation issues with tower_governor dependency - - Documented in SEC2_RATE_LIMITING_TODO.md - - See recommendations below for mitigation - -### Week 2: Infrastructure & Monitoring - -**Completed Items (11/11 - 100%)** - -1. [OK] Systemd Service Configuration - - Service file: /etc/systemd/system/guruconnect.service - - Runs as guru user - - Working directory configured - - Environment variables loaded - -2. [OK] Auto-Restart on Failure - - Restart=on-failure policy - - 10-second restart delay - - Start limit: 3 restarts per 5-minute interval - -3. [OK] Prometheus Metrics Endpoint (/metrics) - - Unauthenticated access (appropriate for internal monitoring) - - Supports all monitoring tools (Prometheus, Grafana, etc.) - -4. [OK] 11 Metric Types Exposed - - requests_total (counter) - - request_duration_seconds (histogram) - - sessions_total (counter) - - active_sessions (gauge) - - session_duration_seconds (histogram) - - connections_total (counter) - - active_connections (gauge) - - errors_total (counter) - - db_operations_total (counter) - - db_query_duration_seconds (histogram) - - uptime_seconds (gauge) - -5. [OK] Grafana Dashboard - - 10-panel dashboard configured - - Real-time metrics visualization - - Dashboard file: infrastructure/grafana-dashboard.json - -6. [OK] Automated Daily Backups - - Systemd timer: guruconnect-backup.timer - - Scheduled daily at 02:00 UTC - - Persistent execution for missed runs - - Backup directory: /home/guru/backups/guruconnect/ - -7. [OK] Log Rotation Configuration - - Daily rotation frequency - - 30-day retention - - Compression enabled - - Systemd journal integration - -8. [OK] Health Check Endpoint (/health) - - Unauthenticated access (appropriate for load balancers) - - Returns "OK" status string - -9. [OK] Service Monitoring - - Systemd status integration - - Journal logging enabled - - SyslogIdentifier set for filtering - -10. [OK] Prometheus Configuration - - Target: 172.16.3.30:3002 - - Scrape interval: 15 seconds - - File: infrastructure/prometheus.yml - -11. [OK] Grafana Configuration - - Grafana dashboard templates available - - Admin credentials: admin/admin (default) - - Port: 3000 - -### Week 3: CI/CD Automation - -**Completed Items (10/11 - 91%)** - -1. [OK] Gitea Actions Workflows (3 workflows) - - build-and-test.yml - - test.yml - - deploy.yml - -2. [OK] Build Automation - - Rust toolchain setup - - Server and agent parallel builds - - Dependency caching enabled - - Formatting and Clippy checks - -3. [OK] Test Automation - - Unit tests, integration tests, doc tests - - Code coverage with cargo-tarpaulin - - Clippy with -D warnings (zero tolerance) - -4. [OK] Deployment Automation - - Triggered on version tags (v*.*.*) - - Manual dispatch option available - - Build, package, and release steps - -5. [OK] Deployment Script with Rollback - - Location: scripts/deploy.sh - - Automatic backup creation - - Health check integration - - Automatic rollback on failure - -6. [OK] Version Tagging Automation - - Location: scripts/version-tag.sh - - Semantic versioning support (major/minor/patch) - - Cargo.toml version updates - - Git tag creation - -7. [OK] Build Artifact Management - - 30-day retention for build artifacts - - 90-day retention for deployment artifacts - - Artifact storage: /home/guru/deployments/artifacts/ - -8. [OK] Gitea Actions Runner Installation - - Act runner version 0.2.11 - - Binary installation complete - - Directory structure configured - -9. [OK] Systemd Service for Runner - - Service file created - - User: gitea-runner - - Proper startup configuration - -10. [OK] Complete CI/CD Documentation - - CI_CD_SETUP.md (setup guide) - - ACTIVATE_CI_CD.md (activation instructions) - - PHASE1_WEEK3_COMPLETE.md (summary) - - Inline script documentation - -**Pending Items (1/11 - 9%)** - -- [ ] Gitea Actions Runner Registration - - Requires admin token from Gitea - - Instructions: https://git.azcomputerguru.com/admin/actions/runners - - Non-blocking: Manual deployments still possible - ---- - -## Production Readiness Status - -**Overall Assessment: APPROVED FOR PRODUCTION** - -### Ready Immediately -- [OK] Core authentication system -- [OK] Session management -- [OK] Database operations with compiled queries -- [OK] Monitoring and metrics collection -- [OK] Health checks -- [OK] Automated backups -- [OK] Basic security hardening - -### Required Before Full Activation -- [WARNING] Rate limiting via firewall (fail2ban recommended as temporary solution) -- [INFO] Gitea runner registration (non-critical for manual deployments) - -### Recommended Within 30 Days -- [INFO] TLS certificate auto-renewal -- [INFO] Session timeout UI implementation -- [INFO] Comprehensive audit logging - ---- - -## Git Commit Details - -**Commit Hash:** 1bfd476 -**Branch:** main -**Timestamp:** 2026-01-18 - -**Changes Summary:** -- Files changed: 39 -- Insertions: 4185 -- Deletions: 1671 - -**Commit Message:** -"feat: Complete Phase 1 infrastructure deployment with production monitoring" - -**Key Files Modified:** -- Security implementations (auth/, middleware/) -- Infrastructure configuration (systemd/, monitoring/) -- CI/CD workflows (.gitea/workflows/) -- Documentation (*.md files) -- Deployment scripts (scripts/) - -**Recovery Info:** -- Tag checkpoint: Use `git checkout 1bfd476` to restore -- Branch: Remains on main -- No breaking changes from previous commits - ---- - -## Database Context Save Details - -**Context Metadata:** -- Context ID: 6b3aa5a4-2563-4705-a053-df99d6e39df2 -- Project ID: c3d9f1c8-dc2b-499f-a228-3a53fa950e7b -- Relevance Score: 9.0/10.0 -- Context Type: phase_completion -- Saved: 2026-01-18 - -**Tags Applied:** -- guruconnect -- phase1 -- infrastructure -- security -- monitoring -- ci-cd -- prometheus -- systemd -- deployment -- production - -**Dense Summary:** -Phase 1 infrastructure deployment complete. Security: 9/13 items (JWT, Argon2, CSP, token blacklist, API key validation, input sanitization, SQL injection protection, XSS prevention, CORS). Infrastructure: 11/11 (systemd service, auto-restart, Prometheus metrics, Grafana dashboard, daily backups, log rotation, health checks). CI/CD: 10/11 (3 Gitea Actions workflows, deployment with rollback, version tagging). Production ready with documented pending items (rate limiting, TLS renewal, audit logging, runner registration). - -**Usage for Context Recall:** -When resuming Phase 1 work or starting Phase 2, recall this context via: -```bash -curl -X GET "http://localhost:8000/api/conversation-contexts/recall?project_id=c3d9f1c8-dc2b-499f-a228-3a53fa950e7b&limit=5&min_relevance_score=8.0" -``` - ---- - -## Verification Summary - -### Audit Results -- **Source:** PHASE1_COMPLETENESS_AUDIT.md (2026-01-18) -- **Auditor:** Claude Code -- **Overall Grade:** A- (87% verified completion, excellent quality) - -### Completion by Category -- Security: 69% (9/13 complete, 3 pending, 1 incomplete) -- Infrastructure: 100% (11/11 complete) -- CI/CD: 91% (10/11 complete, 1 pending) -- **Phase Total:** 87% (30/35 complete, 4 pending, 1 incomplete) - -### Discrepancies Found -- Rate limiting: Implemented in code but not operational (tower_governor type issues) -- All documentation accurately reflects implementation status -- Several unclaimed items actually completed (API key validation depth, token cleanup, metrics comprehensiveness) - ---- - -## Infrastructure Overview - -### Services Running - -| Service | Status | Port | PID | Uptime | -|---------|--------|------|-----|--------| -| guruconnect | active | 3002 | 3947824 | running | -| prometheus | active | 9090 | active | running | -| grafana-server | active | 3000 | active | running | - -### File Locations - -| Component | Location | -|-----------|----------| -| Server Binary | ~/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server | -| Static Files | ~/guru-connect/server/static/ | -| Database | PostgreSQL (localhost:5432/guruconnect) | -| Backups | /home/guru/backups/guruconnect/ | -| Deployment Backups | /home/guru/deployments/backups/ | -| Systemd Service | /etc/systemd/system/guruconnect.service | -| Prometheus Config | /etc/prometheus/prometheus.yml | -| Grafana Config | /etc/grafana/grafana.ini | -| Log Rotation | /etc/logrotate.d/guruconnect | - -### Access Information - -**GuruConnect Dashboard** -- URL: https://connect.azcomputerguru.com/dashboard -- Credentials: howard / AdminGuruConnect2026 (test account) - -**Gitea Repository** -- URL: https://git.azcomputerguru.com/azcomputerguru/guru-connect -- Actions: https://git.azcomputerguru.com/azcomputerguru/guru-connect/actions -- Runner Admin: https://git.azcomputerguru.com/admin/actions/runners - -**Monitoring Endpoints** -- Prometheus: http://172.16.3.30:9090 -- Grafana: http://172.16.3.30:3000 (admin/admin) -- Metrics: http://172.16.3.30:3002/metrics -- Health: http://172.16.3.30:3002/health - ---- - -## Performance Benchmarks - -### Build Times (Expected) -- Server build: 2-3 minutes -- Agent build: 2-3 minutes -- Test suite: 1-2 minutes -- Total CI pipeline: 5-8 minutes -- Deployment: 10-15 minutes - -### Deployment Performance -- Backup creation: ~1 second -- Service stop: ~2 seconds -- Binary deployment: ~1 second -- Service start: ~3 seconds -- Health check: ~2 seconds -- **Total deployment time:** ~10 seconds - -### Monitoring -- Metrics scrape interval: 15 seconds -- Grafana refresh: 5 seconds -- Backup execution: 5-10 seconds - ---- - -## Pending Items & Mitigation - -### HIGH PRIORITY - Before Full Production - -**Rate Limiting** -- Status: Code implemented, not operational -- Issue: tower_governor type resolution failures -- Current Risk: Vulnerable to brute force attacks -- Mitigation: Implement firewall-level rate limiting (fail2ban) -- Timeline: 1-3 hours to resolve -- Options: - - Option A: Fix tower_governor types (1-2 hours) - - Option B: Implement custom middleware (2-3 hours) - - Option C: Use Redis-based rate limiting (3-4 hours) - -**Firewall Rate Limiting (Temporary)** -- Install fail2ban on server -- Configure rules for /api/auth/login endpoint -- Monitor for brute force attempts -- Timeline: 1 hour - -### MEDIUM PRIORITY - Within 30 Days - -**TLS Certificate Auto-Renewal** -- Status: Manual renewal required -- Issue: Let's Encrypt auto-renewal not configured -- Action: Install certbot with auto-renewal timer -- Timeline: 2-4 hours -- Impact: Prevents certificate expiration - -**Session Timeout UI** -- Status: Server-side expiration works, UI redirect missing -- Action: Implement JavaScript token expiration check -- Impact: Improved security UX -- Timeline: 2-4 hours - -**Comprehensive Audit Logging** -- Status: Basic event logging exists -- Action: Expand to full audit trail -- Timeline: 2-3 hours -- Impact: Regulatory compliance, forensics - -### LOW PRIORITY - Non-Blocking - -**Gitea Actions Runner Registration** -- Status: Installation complete, registration pending -- Timeline: 5 minutes -- Impact: Enables full CI/CD automation -- Alternative: Manual builds and deployments still work -- Action: Get token from admin dashboard and register - ---- - -## Recommendations - -### Immediate Actions (Before Launch) - -1. Activate Rate Limiting via Firewall - ```bash - sudo apt-get install fail2ban - # Configure for /api/auth/login - ``` - -2. Register Gitea Runner - ```bash - sudo -u gitea-runner act_runner register \ - --instance https://git.azcomputerguru.com \ - --token YOUR_REGISTRATION_TOKEN \ - --name gururmm-runner - ``` - -3. Test CI/CD Pipeline - - Trigger build: `git push origin main` - - Verify in Actions tab - - Test deployment tag creation - -### Short-Term (Within 1 Month) - -4. Configure TLS Auto-Renewal - ```bash - sudo apt-get install certbot - sudo certbot renew --dry-run - ``` - -5. Implement Session Timeout UI - - Add JavaScript token expiration detection - - Show countdown warning - - Redirect on expiration - -6. Set Up Comprehensive Audit Logging - - Expand event logging coverage - - Implement retention policies - - Create audit dashboard - -### Long-Term (Phase 2+) - -7. Systemd Watchdog Implementation - - Add systemd crate to Cargo.toml - - Implement sd_notify calls - - Re-enable WatchdogSec in service file - -8. Distributed Rate Limiting - - Implement Redis-based rate limiting - - Prepare for multi-instance deployment - ---- - -## How to Restore from This Checkpoint - -### Using Git - -**Option 1: Checkout Specific Commit** -```bash -cd ~/guru-connect -git checkout 1bfd476 -``` - -**Option 2: Create Tag for Easy Reference** -```bash -cd ~/guru-connect -git tag -a phase1-checkpoint-2026-01-18 -m "Phase 1 complete and verified" 1bfd476 -git push origin phase1-checkpoint-2026-01-18 -``` - -**Option 3: Revert to Checkpoint if Forward Work Fails** -```bash -cd ~/guru-connect -git reset --hard 1bfd476 -git clean -fd -``` - -### Using Database Context - -**Recall Full Context** -```bash -curl -X GET "http://localhost:8000/api/conversation-contexts/recall" \ - -H "Authorization: Bearer $JWT_TOKEN" \ - -d '{ - "project_id": "c3d9f1c8-dc2b-499f-a228-3a53fa950e7b", - "context_id": "6b3aa5a4-2563-4705-a053-df99d6e39df2", - "tags": ["guruconnect", "phase1"] - }' -``` - -**Retrieve Checkpoint Metadata** -```bash -curl -X GET "http://localhost:8000/api/conversation-contexts/6b3aa5a4-2563-4705-a053-df99d6e39df2" \ - -H "Authorization: Bearer $JWT_TOKEN" -``` - -### Using Documentation Files - -**Key Files for Restoration Context:** -- PHASE1_COMPLETE.md - Status summary -- PHASE1_COMPLETENESS_AUDIT.md - Verification details -- INSTALLATION_GUIDE.md - Infrastructure setup -- CI_CD_SETUP.md - CI/CD configuration -- ACTIVATE_CI_CD.md - Runner activation - ---- - -## Risk Assessment - -### Mitigated Risks (Low) -- Service crashes: Auto-restart configured -- Disk space: Log rotation + backup cleanup -- Failed deployments: Automatic rollback -- Database issues: Daily backups (7-day retention) - -### Monitored Risks (Medium) -- Database growth: Metrics configured, manual cleanup if needed -- Log volume: Rotation configured -- Metrics retention: Prometheus defaults (15 days) - -### Unmitigated Risks (High) - Requires Action -- TLS certificate expiration: Requires certbot setup -- Brute force attacks: Requires rate limiting fix or firewall rules -- Security vulnerabilities: Requires periodic audits - ---- - -## Code Quality Assessment - -### Strengths -- Security markers (SEC-1 through SEC-13) throughout code -- Defense-in-depth approach -- Modern cryptographic standards (Argon2id, JWT) -- Compile-time SQL injection prevention -- Comprehensive monitoring (11 metric types) -- Automated backups with retention policies -- Health checks for all services -- Excellent documentation practices - -### Areas for Improvement -- Rate limiting activation (tower_governor issues) -- TLS certificate management automation -- Comprehensive audit logging expansion - -### Documentation Quality -- Honest status tracking -- Clear next steps documented -- Technical debt tracked systematically -- Multiple format guides (setup, troubleshooting, reference) - ---- - -## Success Metrics - -### Availability -- Target: 99.9% uptime -- Current: Service running with auto-restart -- Monitoring: Prometheus + Grafana + Health endpoint - -### Performance -- Target: < 100ms HTTP response time -- Monitoring: HTTP request duration histogram - -### Security -- Target: Zero successful unauthorized access -- Current: JWT auth + API keys + rate limiting (pending) -- Monitoring: Failed auth counter - -### Deployments -- Target: < 15 minutes deployment -- Current: ~10 seconds deployment + CI pipeline -- Reliability: Automatic rollback on failure - ---- - -## Documentation Index - -**Status & Completion:** -- PHASE1_COMPLETE.md - Comprehensive Phase 1 summary -- PHASE1_COMPLETENESS_AUDIT.md - Detailed audit verification -- CHECKPOINT_2026-01-18.md - This document - -**Setup & Configuration:** -- INSTALLATION_GUIDE.md - Complete infrastructure installation -- CI_CD_SETUP.md - CI/CD setup and configuration -- ACTIVATE_CI_CD.md - Runner activation and testing -- INFRASTRUCTURE_STATUS.md - Current status and next steps - -**Reference:** -- DEPLOYMENT_COMPLETE.md - Week 2 summary -- PHASE1_WEEK3_COMPLETE.md - Week 3 summary -- SEC2_RATE_LIMITING_TODO.md - Rate limiting implementation details -- TECHNICAL_DEBT.md - Known issues and workarounds -- CLAUDE.md - Project guidelines and architecture - -**Troubleshooting:** -- Quick reference commands for all systems -- Database issue resolution -- Monitoring and CI/CD troubleshooting -- Service management procedures - ---- - -## Next Steps - -### Immediate (Next 1-2 Days) -1. Implement firewall rate limiting (fail2ban) -2. Register Gitea Actions runner -3. Test CI/CD pipeline with test commit -4. Verify all services operational - -### Short-Term (Next 1-4 Weeks) -1. Configure TLS auto-renewal -2. Implement session timeout UI -3. Complete rate limiting implementation -4. Set up comprehensive audit logging - -### Phase 2 Preparation -- Multi-session support -- File transfer capability -- Chat enhancements -- Mobile dashboard - ---- - -## Checkpoint Metadata - -**Created:** 2026-01-18 -**Status:** PRODUCTION READY -**Completion:** 87% verified (30/35 items) -**Overall Grade:** A- (excellent quality, documented pending items) -**Next Review:** After rate limiting implementation and runner registration - -**Archived Files for Reference:** -- PHASE1_COMPLETE.md - Status documentation -- PHASE1_COMPLETENESS_AUDIT.md - Verification report -- All infrastructure configuration files -- All CI/CD workflow definitions -- All documentation guides - -**To Resume Work:** -1. Checkout commit 1bfd476 or tag phase1-checkpoint-2026-01-18 -2. Recall context: `c3d9f1c8-dc2b-499f-a228-3a53fa950e7b` -3. Review pending items section above -4. Follow "Immediate" next steps - ---- - -**Checkpoint Complete** -**Ready for Production Deployment** -**Pending Items Documented and Prioritized** diff --git a/projects/msp-tools/guru-connect/CI_CD_SETUP.md b/projects/msp-tools/guru-connect/CI_CD_SETUP.md deleted file mode 100644 index 5301ce2..0000000 --- a/projects/msp-tools/guru-connect/CI_CD_SETUP.md +++ /dev/null @@ -1,544 +0,0 @@ - -# GuruConnect CI/CD Setup Guide - -**Version:** Phase 1 Week 3 -**Status:** Ready for Installation -**CI Platform:** Gitea Actions - ---- - -## Overview - -Automated CI/CD pipeline for GuruConnect using Gitea Actions: - -- **Automated Builds** - Build server and agent on every commit -- **Automated Tests** - Run unit, integration, and security tests -- **Automated Deployment** - Deploy to production on version tags -- **Build Artifacts** - Store and version all build outputs -- **Version Tagging** - Automated semantic versioning - ---- - -## Architecture - -``` -┌─────────────┐ ┌──────────────┐ ┌─────────────┐ -│ Git Push │─────>│ Gitea Actions│─────>│ Deploy │ -│ │ │ Workflows │ │ to Server │ -└─────────────┘ └──────────────┘ └─────────────┘ - │ - ├─ Build Server (Linux) - ├─ Build Agent (Windows) - ├─ Run Tests - ├─ Security Audit - └─ Create Artifacts -``` - ---- - -## Workflows - -### 1. Build and Test (`build-and-test.yml`) - -**Triggers:** -- Push to `main` or `develop` branches -- Pull requests to `main` - -**Jobs:** -- Build Server (Linux x86_64) -- Build Agent (Windows x86_64) -- Security Audit (cargo audit) -- Upload Artifacts (30-day retention) - -**Artifacts:** -- `guruconnect-server-linux` - Server binary -- `guruconnect-agent-windows` - Agent binary (.exe) - -### 2. Run Tests (`test.yml`) - -**Triggers:** -- Push to any branch -- Pull requests - -**Jobs:** -- Unit Tests (server & agent) -- Integration Tests -- Code Coverage -- Linting & Formatting - -**Artifacts:** -- Coverage reports (XML) - -### 3. Deploy to Production (`deploy.yml`) - -**Triggers:** -- Push tags matching `v*.*.*` (e.g., v0.1.0) -- Manual workflow dispatch - -**Jobs:** -- Build release version -- Create deployment package -- Deploy to production server (172.16.3.30) -- Create GitHub release -- Upload release assets - -**Artifacts:** -- Deployment packages (90-day retention) - ---- - -## Installation Steps - -### 1. Install Gitea Actions Runner - -```bash -# On the RMM server (172.16.3.30) -ssh guru@172.16.3.30 - -cd ~/guru-connect/scripts -sudo bash install-gitea-runner.sh -``` - -### 2. Register the Runner - -```bash -# Get registration token from Gitea: -# https://git.azcomputerguru.com/admin/actions/runners - -# Register runner -sudo -u gitea-runner act_runner register \ - --instance https://git.azcomputerguru.com \ - --token YOUR_REGISTRATION_TOKEN \ - --name gururmm-runner \ - --labels ubuntu-latest,ubuntu-22.04 -``` - -### 3. Start the Runner Service - -```bash -sudo systemctl daemon-reload -sudo systemctl enable gitea-runner -sudo systemctl start gitea-runner -sudo systemctl status gitea-runner -``` - -### 4. Upload Workflow Files - -```bash -# From local machine -cd D:\ClaudeTools\projects\msp-tools\guru-connect - -# Copy workflow files to server -scp -r .gitea guru@172.16.3.30:~/guru-connect/ - -# Copy scripts to server -scp scripts/deploy.sh guru@172.16.3.30:~/guru-connect/scripts/ -scp scripts/version-tag.sh guru@172.16.3.30:~/guru-connect/scripts/ - -# Make scripts executable -ssh guru@172.16.3.30 "cd ~/guru-connect/scripts && chmod +x *.sh" -``` - -### 5. Commit and Push Workflows - -```bash -# On server -ssh guru@172.16.3.30 -cd ~/guru-connect - -git add .gitea/ scripts/ -git commit -m "ci: add Gitea Actions workflows and deployment automation" -git push origin main -``` - ---- - -## Usage - -### Triggering Builds - -**Automatic:** -- Push to `main` or `develop` → Runs build + test -- Create pull request → Runs all tests -- Push version tag → Deploys to production - -**Manual:** -- Go to repository > Actions -- Select workflow -- Click "Run workflow" - -### Creating a Release - -```bash -# Use the version tagging script -cd ~/guru-connect/scripts -./version-tag.sh patch # Bump patch version (0.1.0 → 0.1.1) -./version-tag.sh minor # Bump minor version (0.1.1 → 0.2.0) -./version-tag.sh major # Bump major version (0.2.0 → 1.0.0) - -# Push tag to trigger deployment -git push origin main -git push origin v0.1.1 -``` - -### Manual Deployment - -```bash -# Deploy from artifact -cd ~/guru-connect/scripts -./deploy.sh /path/to/guruconnect-server-v0.1.0.tar.gz - -# Deploy latest -./deploy.sh /home/guru/deployments/artifacts/guruconnect-server-latest.tar.gz -``` - ---- - -## Monitoring - -### View Workflow Runs - -``` -https://git.azcomputerguru.com/azcomputerguru/guru-connect/actions -``` - -### Check Runner Status - -```bash -# On server -sudo systemctl status gitea-runner - -# View logs -sudo journalctl -u gitea-runner -f - -# In Gitea -https://git.azcomputerguru.com/admin/actions/runners -``` - -### View Build Artifacts - -``` -Repository > Actions > Workflow Run > Artifacts section -``` - ---- - -## Deployment Process - -### Automated Deployment Flow - -1. **Tag Creation** - Developer creates version tag -2. **Workflow Trigger** - `deploy.yml` starts automatically -3. **Build** - Compiles release binary -4. **Package** - Creates deployment tarball -5. **Transfer** - Copies to server (via SSH) -6. **Backup** - Saves current binary -7. **Stop Service** - Stops GuruConnect systemd service -8. **Deploy** - Extracts and installs new binary -9. **Start Service** - Restarts systemd service -10. **Health Check** - Verifies server is responding -11. **Rollback** - Automatic if health check fails - -### Deployment Locations - -``` -Backups: /home/guru/deployments/backups/ -Artifacts: /home/guru/deployments/artifacts/ -Deploy Dir: /home/guru/guru-connect/ -``` - -### Rollback - -```bash -# List backups -ls -lh /home/guru/deployments/backups/ - -# Rollback to specific version -cp /home/guru/deployments/backups/guruconnect-server-TIMESTAMP \ - ~/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server - -sudo systemctl restart guruconnect -``` - ---- - -## Configuration - -### Secrets (Required) - -Configure in Gitea repository settings: - -``` -Repository > Settings > Secrets -``` - -**Required Secrets:** -- `SSH_PRIVATE_KEY` - SSH key for deployment to 172.16.3.30 -- `SSH_HOST` - Deployment server host (172.16.3.30) -- `SSH_USER` - Deployment user (guru) - -### Environment Variables - -```yaml -# In workflow files -env: - CARGO_TERM_COLOR: always - RUSTFLAGS: "-D warnings" - DEPLOY_SERVER: "172.16.3.30" - DEPLOY_USER: "guru" -``` - ---- - -## Troubleshooting - -### Runner Not Starting - -```bash -# Check status -sudo systemctl status gitea-runner - -# View logs -sudo journalctl -u gitea-runner -n 50 - -# Verify registration -sudo -u gitea-runner cat /home/gitea-runner/.runner/.runner - -# Re-register if needed -sudo -u gitea-runner act_runner register --instance https://git.azcomputerguru.com --token NEW_TOKEN -``` - -### Workflow Failing - -**Check logs in Gitea:** -1. Go to Actions tab -2. Click on failed run -3. View job logs - -**Common Issues:** -- Missing dependencies → Add to workflow -- Rust version mismatch → Update toolchain version -- Test failures → Fix tests before merging - -### Deployment Failing - -```bash -# Check deployment logs on server -cat /home/guru/deployments/deploy-TIMESTAMP.log - -# Verify service status -sudo systemctl status guruconnect - -# Check GuruConnect logs -sudo journalctl -u guruconnect -n 50 - -# Manual deployment -cd ~/guru-connect/scripts -./deploy.sh /path/to/package.tar.gz -``` - -### Artifacts Not Uploading - -**Check retention settings:** -- Build artifacts: 30 days -- Deployment packages: 90 days - -**Check storage:** -```bash -# On Gitea server -df -h -du -sh /var/lib/gitea/data/actions_artifacts/ -``` - ---- - -## Security - -### Runner Security - -- Runner runs as dedicated `gitea-runner` user -- Limited permissions (no sudo) -- Isolated working directory -- Automatic cleanup after jobs - -### Deployment Security - -- SSH key-based authentication -- Automated backups before deployment -- Health checks before considering deployment successful -- Automatic rollback on failure -- Audit trail in deployment logs - -### Artifact Security - -- Artifacts stored with limited retention -- Accessible only to repository collaborators -- Build artifacts include checksums - ---- - -## Performance - -### Build Times (Estimated) - -- Server build: ~2-3 minutes -- Agent build: ~2-3 minutes -- Tests: ~1-2 minutes -- Total pipeline: ~5-8 minutes - -### Caching - -Workflows use cargo cache to speed up builds: -- Cache hit: ~1 minute -- Cache miss: ~2-3 minutes - -### Concurrent Builds - -- Multiple workflows can run in parallel -- Limited by runner capacity (1 runner = 1 job at a time) - ---- - -## Maintenance - -### Runner Updates - -```bash -# Stop runner -sudo systemctl stop gitea-runner - -# Download new version -RUNNER_VERSION="0.2.12" # Update as needed -cd /tmp -wget https://dl.gitea.com/act_runner/${RUNNER_VERSION}/act_runner-${RUNNER_VERSION}-linux-amd64 -sudo mv act_runner-* /usr/local/bin/act_runner -sudo chmod +x /usr/local/bin/act_runner - -# Restart runner -sudo systemctl start gitea-runner -``` - -### Cleanup Old Artifacts - -```bash -# Manual cleanup on server -rm /home/guru/deployments/backups/guruconnect-server-$(date -d '90 days ago' +%Y%m%d)* -rm /home/guru/deployments/artifacts/guruconnect-server-$(date -d '90 days ago' +%Y%m%d)* -``` - -### Monitor Disk Usage - -```bash -# Check deployment directories -du -sh /home/guru/deployments/* - -# Check runner cache -du -sh /home/gitea-runner/.cache/act/ -``` - ---- - -## Best Practices - -### Branching Strategy - -``` -main - Production-ready code -develop - Integration branch -feature/* - Feature branches -hotfix/* - Emergency fixes -``` - -### Version Tagging - -- Use semantic versioning: `vMAJOR.MINOR.PATCH` -- MAJOR: Breaking changes -- MINOR: New features (backward compatible) -- PATCH: Bug fixes - -### Commit Messages - -``` -feat: Add new feature -fix: Fix bug -docs: Update documentation -ci: CI/CD changes -chore: Maintenance tasks -test: Add/update tests -``` - -### Testing Before Merge - -1. All tests must pass -2. No clippy warnings -3. Code formatted (cargo fmt) -4. Security audit passed - ---- - -## Future Enhancements - -### Phase 2 Improvements - -- Add more test runners (Windows, macOS) -- Implement staging environment -- Add smoke tests post-deployment -- Configure Slack/email notifications -- Add performance benchmarking -- Implement canary deployments -- Add Docker container builds - -### Monitoring Integration - -- Send build metrics to Prometheus -- Grafana dashboard for CI/CD metrics -- Alert on failed deployments -- Track build duration trends - ---- - -## Reference Commands - -```bash -# Runner management -sudo systemctl status gitea-runner -sudo systemctl restart gitea-runner -sudo journalctl -u gitea-runner -f - -# Deployment -cd ~/guru-connect/scripts -./deploy.sh - -# Version tagging -./version-tag.sh [major|minor|patch] - -# Manual build -cd ~/guru-connect -cargo build --release --target x86_64-unknown-linux-gnu - -# View artifacts -ls -lh /home/guru/deployments/artifacts/ - -# View backups -ls -lh /home/guru/deployments/backups/ -``` - ---- - -## Support - -**Documentation:** -- Gitea Actions: https://docs.gitea.com/usage/actions/overview -- Act Runner: https://gitea.com/gitea/act_runner - -**Repository:** -- https://git.azcomputerguru.com/azcomputerguru/guru-connect - -**Contact:** -- Open issue in Gitea repository - ---- - -**Last Updated:** 2026-01-18 -**Phase:** 1 Week 3 - CI/CD Automation -**Status:** Ready for Installation diff --git a/projects/msp-tools/guru-connect/CLAUDE.md b/projects/msp-tools/guru-connect/CLAUDE.md deleted file mode 100644 index d09cebd..0000000 --- a/projects/msp-tools/guru-connect/CLAUDE.md +++ /dev/null @@ -1,200 +0,0 @@ -# GuruConnect - Project Guidelines - -## Overview - -GuruConnect is a remote desktop solution for MSPs, similar to ConnectWise ScreenConnect. It provides real-time screen sharing, remote control, and support session management. - -## Architecture - -``` -┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ -│ Dashboard │◄───────►│ GuruConnect │◄───────►│ GuruConnect │ -│ (HTML/JS) │ WSS │ Server (Rust) │ WSS │ Agent (Rust) │ -└─────────────────┘ └─────────────────┘ └─────────────────┘ - │ │ - │ ▼ - │ ┌─────────────────┐ - └──────────────────►│ PostgreSQL │ - └─────────────────┘ -``` - -## Design Constraints - -### Agent (Windows) -- **Target OS:** Windows 7 SP1 and later (including Server 2008 R2+) -- **Single binary:** Agent and viewer in one executable -- **No runtime dependencies:** Statically linked, no .NET or VC++ redistributables -- **Protocol handler:** `guruconnect://` URL scheme for launching viewer -- **Tray icon:** System tray presence with status and exit option -- **UAC aware:** Graceful handling of elevated/non-elevated contexts -- **Auto-install:** Detects if not installed and offers installation - -### Server (Linux) -- **Target OS:** Ubuntu 22.04 LTS -- **Framework:** Axum for HTTP/WebSocket -- **Database:** PostgreSQL with sqlx (compile-time checked queries) -- **Static files:** Served from `server/static/` -- **No containers required:** Runs as systemd service or direct binary - -### Protocol -- **Wire format:** Protocol Buffers (protobuf) for ALL client-server messages -- **Transport:** WebSocket over TLS (wss://) -- **Compression:** Zstd for video frames -- **Schema:** `proto/guruconnect.proto` is the source of truth - -## Security Rules - -### Authentication -- **Dashboard/API:** JWT tokens required for all endpoints except `/health` and `/api/auth/login` -- **Viewer WebSocket:** JWT token required in `token` query parameter -- **Agent WebSocket:** Must provide either: - - Valid support code (for ad-hoc support sessions) - - Valid API key (for persistent/managed agents) -- **Never** accept unauthenticated agent connections - -### Credentials -- **Never** hardcode secrets in source code -- **Never** commit credentials to git -- Use environment variables for all secrets: - - `JWT_SECRET` - JWT signing key - - `DATABASE_URL` - PostgreSQL connection string - - `AGENT_API_KEY` - Optional shared key for agents - -### Password Storage -- Use Argon2id for password hashing -- Never store plaintext passwords - -## Coding Standards - -### Rust -- Use `tracing` crate for logging (not `println!` or `log`) -- Use `anyhow` for error handling in binaries -- Use `thiserror` for library error types -- Prefer `async`/`await` over blocking code -- Run `cargo clippy` before commits - -### Logging Levels -- `error!` - Failures that need attention -- `warn!` - Unexpected but handled situations -- `info!` - Normal operational messages (startup, connections, sessions) -- `debug!` - Detailed debugging info -- `trace!` - Very verbose, message-level tracing - -### Naming -- Rust: `snake_case` for functions/variables, `PascalCase` for types -- Protobuf: `PascalCase` for messages, `snake_case` for fields -- Database: `snake_case` for tables and columns - -## Build & Version - -### Version Format -- Semantic versioning: `MAJOR.MINOR.PATCH` -- Build identification: `VERSION-GITHASH[-dirty]` -- Example: `0.1.0-48076e1` or `0.1.0-48076e1-dirty` - -### Build Info (Agent) -The agent embeds at compile time: -- `VERSION` - Cargo.toml version -- `GIT_HASH` - Short commit hash (8 chars) -- `GIT_BRANCH` - Branch name -- `GIT_DIRTY` - "clean" or "dirty" -- `BUILD_TIMESTAMP` - UTC build time -- `BUILD_TARGET` - Target triple - -### Commands -```bash -# Build agent (Windows) -cargo build -p guruconnect --release - -# Build server (Linux, from Linux or cross-compile) -cargo build -p guruconnect-server --release --target x86_64-unknown-linux-gnu - -# Check version -./guruconnect --version # Short: 0.1.0-48076e1 -./guruconnect version-info # Full details -``` - -## Database Schema - -### Key Tables -- `users` - Dashboard users (admin-created only) -- `machines` - Registered agents (persistent) -- `sessions` - Connection sessions (historical) -- `events` - Audit log -- `support_codes` - One-time support codes - -### Conventions -- Primary keys: `id UUID DEFAULT gen_random_uuid()` -- Timestamps: `created_at TIMESTAMPTZ DEFAULT NOW()` -- Soft deletes: Prefer `deleted_at` over hard deletes for audit trail -- Foreign keys: Always with `ON DELETE CASCADE` or explicit handling - -## File Structure - -``` -guru-connect/ -├── agent/ # Windows agent + viewer -│ ├── src/ -│ │ ├── main.rs # CLI entry point -│ │ ├── capture/ # Screen capture (DXGI, GDI) -│ │ ├── encoder/ # Video encoding -│ │ ├── input/ # Mouse/keyboard injection -│ │ ├── viewer/ # Native viewer window -│ │ ├── transport/ # WebSocket client -│ │ ├── session/ # Session management -│ │ ├── tray/ # System tray -│ │ └── install.rs # Installation & protocol handler -│ ├── build.rs # Build script (protobuf, version info) -│ └── Cargo.toml -├── server/ # Linux relay server -│ ├── src/ -│ │ ├── main.rs # Server entry point -│ │ ├── relay/ # WebSocket relay handlers -│ │ ├── session/ # Session state management -│ │ ├── auth/ # JWT authentication -│ │ ├── api/ # REST API handlers -│ │ └── db/ # Database operations -│ ├── static/ # Dashboard HTML/JS/CSS -│ │ ├── login.html -│ │ ├── dashboard.html -│ │ ├── viewer.html -│ │ └── downloads/ # Agent binaries -│ ├── migrations/ # SQL migrations -│ └── Cargo.toml -├── proto/ # Protocol definitions -│ └── guruconnect.proto -└── CLAUDE.md # This file -``` - -## Deployment - -### Server (172.16.3.30) -- **Binary:** `/home/guru/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server` -- **Static:** `/home/guru/guru-connect/server/static/` -- **Startup:** `~/guru-connect/start-server.sh` -- **Port:** 3002 (proxied via NPM to connect.azcomputerguru.com) - -### Agent Distribution -- **Download URL:** https://connect.azcomputerguru.com/downloads/guruconnect.exe -- **Auto-update:** Not yet implemented (future feature) - -## Issue Tracking - -Use Gitea issues: https://git.azcomputerguru.com/azcomputerguru/guru-connect/issues - -Reference issues in commits: -- `Fixes #1` - Closes the issue -- `Related to #1` - Links without closing - -## Testing Checklist - -Before releasing: -- [ ] Agent connects with support code -- [ ] Agent connects with API key -- [ ] Viewer connects with JWT token -- [ ] Unauthenticated connections rejected -- [ ] Screen capture works (DXGI primary, GDI fallback) -- [ ] Mouse/keyboard input works -- [ ] Chat messages relay correctly -- [ ] Protocol handler launches viewer -- [ ] Tray icon shows correct status diff --git a/projects/msp-tools/guru-connect/Cargo.toml b/projects/msp-tools/guru-connect/Cargo.toml deleted file mode 100644 index 9a5462e..0000000 --- a/projects/msp-tools/guru-connect/Cargo.toml +++ /dev/null @@ -1,27 +0,0 @@ -[workspace] -resolver = "2" -members = [ - "agent", - "server", -] - -[workspace.package] -version = "0.1.0" -edition = "2021" -authors = ["AZ Computer Guru"] -license = "Proprietary" - -[workspace.dependencies] -# Shared dependencies across workspace -tokio = { version = "1", features = ["full"] } -tokio-tungstenite = { version = "0.24", features = ["native-tls"] } -prost = "0.13" -prost-types = "0.13" -bytes = "1" -serde = { version = "1", features = ["derive"] } -serde_json = "1" -tracing = "0.1" -anyhow = "1" -thiserror = "1" -uuid = { version = "1", features = ["v4", "serde"] } -chrono = { version = "0.4", features = ["serde"] } diff --git a/projects/msp-tools/guru-connect/DEPLOYMENT_COMPLETE.md b/projects/msp-tools/guru-connect/DEPLOYMENT_COMPLETE.md deleted file mode 100644 index 83305d2..0000000 --- a/projects/msp-tools/guru-connect/DEPLOYMENT_COMPLETE.md +++ /dev/null @@ -1,566 +0,0 @@ -# GuruConnect Phase 1 Week 2 - Infrastructure Deployment COMPLETE - -**Date:** 2026-01-18 15:38 UTC -**Server:** 172.16.3.30 (gururmm) -**Status:** ALL INFRASTRUCTURE OPERATIONAL ✓ - ---- - -## Installation Summary - -All optional infrastructure components have been successfully installed and are running: - -1. **Systemd Service** ✓ ACTIVE -2. **Automated Backups** ✓ ACTIVE -3. **Log Rotation** ✓ CONFIGURED -4. **Prometheus Monitoring** ✓ ACTIVE -5. **Grafana Visualization** ✓ ACTIVE -6. **Passwordless Sudo** ✓ CONFIGURED - ---- - -## Service Status - -### GuruConnect Server -- **Status:** Running -- **PID:** 3947824 (systemd managed) -- **Uptime:** Managed by systemd auto-restart -- **Health:** http://172.16.3.30:3002/health - OK -- **Metrics:** http://172.16.3.30:3002/metrics - ACTIVE - -### Database -- **Status:** Connected -- **Users:** 2 -- **Machines:** 15 (restored) -- **Credentials:** Fixed and operational - -### Backups -- **Status:** Active (waiting) -- **Next Run:** Mon 2026-01-19 00:00:00 UTC -- **Location:** /home/guru/backups/guruconnect/ -- **Schedule:** Daily at 2:00 AM UTC - -### Monitoring -- **Prometheus:** http://172.16.3.30:9090 - ACTIVE -- **Grafana:** http://172.16.3.30:3000 - ACTIVE -- **Node Exporter:** http://172.16.3.30:9100/metrics - ACTIVE -- **Data Source:** Configured (Prometheus → Grafana) - ---- - -## Access Information - -### Dashboard -**URL:** https://connect.azcomputerguru.com/dashboard -**Login:** username=`howard`, password=`AdminGuruConnect2026` - -### Prometheus -**URL:** http://172.16.3.30:9090 -**Features:** -- Metrics scraping from GuruConnect (15s interval) -- Alert rules configured -- Target monitoring - -### Grafana -**URL:** http://172.16.3.30:3000 -**Login:** admin / admin (MUST CHANGE ON FIRST LOGIN) -**Data Source:** Prometheus (pre-configured) - ---- - -## Next Steps (Required) - -### 1. Change Grafana Password -```bash -# Access Grafana -open http://172.16.3.30:3000 - -# Login with admin/admin -# You will be prompted to change password -``` - -### 2. Import Grafana Dashboard - -```bash -# Option A: Via Web UI -1. Go to http://172.16.3.30:3000 -2. Login -3. Navigate to: Dashboards > Import -4. Click "Upload JSON file" -5. Select: ~/guru-connect/infrastructure/grafana-dashboard.json -6. Click "Import" - -# Option B: Via Command Line (if needed) -ssh guru@172.16.3.30 -curl -X POST http://admin:NEW_PASSWORD@localhost:3000/api/dashboards/db \ - -H "Content-Type: application/json" \ - -d @~/guru-connect/infrastructure/grafana-dashboard.json -``` - -### 3. Verify Prometheus Targets - -```bash -# Check targets are UP -open http://172.16.3.30:9090/targets - -# Expected: -- guruconnect (172.16.3.30:3002) - UP -- node_exporter (172.16.3.30:9100) - UP -``` - -### 4. Test Manual Backup - -```bash -ssh guru@172.16.3.30 -cd ~/guru-connect/server -./backup-postgres.sh - -# Verify backup created -ls -lh /home/guru/backups/guruconnect/ -``` - ---- - -## Next Steps (Optional) - -### 5. Configure External Access (via NPM) - -If Prometheus/Grafana need external access: - -``` -Nginx Proxy Manager: -- prometheus.azcomputerguru.com → http://172.16.3.30:9090 -- grafana.azcomputerguru.com → http://172.16.3.30:3000 - -Enable SSL/TLS certificates -Add access restrictions (IP whitelist, authentication) -``` - -### 6. Configure Alerting - -```bash -# Option A: Email alerts via Alertmanager -# Install and configure Alertmanager -# Update Prometheus to send alerts to Alertmanager - -# Option B: Grafana alerts -# Configure notification channels in Grafana -# Add alert rules to dashboard panels -``` - -### 7. Test Backup Restore - -```bash -# CAUTION: This will DROP and RECREATE the database -ssh guru@172.16.3.30 -cd ~/guru-connect/server - -# Test on a backup -./restore-postgres.sh /home/guru/backups/guruconnect/guruconnect-YYYY-MM-DD-HHMMSS.sql.gz -``` - ---- - -## Management Commands - -### GuruConnect Service - -```bash -# Status -sudo systemctl status guruconnect - -# Restart -sudo systemctl restart guruconnect - -# Stop -sudo systemctl stop guruconnect - -# Start -sudo systemctl start guruconnect - -# View logs -sudo journalctl -u guruconnect -f - -# View last 100 lines -sudo journalctl -u guruconnect -n 100 -``` - -### Prometheus - -```bash -# Status -sudo systemctl status prometheus - -# Restart -sudo systemctl restart prometheus - -# Reload configuration -sudo systemctl reload prometheus - -# View logs -sudo journalctl -u prometheus -n 50 -``` - -### Grafana - -```bash -# Status -sudo systemctl status grafana-server - -# Restart -sudo systemctl restart grafana-server - -# View logs -sudo journalctl -u grafana-server -n 50 -``` - -### Backups - -```bash -# Check timer status -sudo systemctl status guruconnect-backup.timer - -# Check when next backup runs -sudo systemctl list-timers | grep guruconnect - -# Manually trigger backup -sudo systemctl start guruconnect-backup.service - -# View backup logs -sudo journalctl -u guruconnect-backup -n 20 - -# List backups -ls -lh /home/guru/backups/guruconnect/ - -# Manual backup -cd ~/guru-connect/server -./backup-postgres.sh -``` - ---- - -## Monitoring Dashboard - -Once Grafana dashboard is imported, you'll have: - -### Real-Time Metrics (10 Panels) - -1. **Active Sessions** - Gauge showing current active sessions -2. **Requests per Second** - Time series graph -3. **Error Rate** - Graph with alert threshold at 10 errors/sec -4. **Request Latency** - p50/p95/p99 percentiles -5. **Active Connections** - By type (stacked area) -6. **Database Query Duration** - Query performance -7. **Server Uptime** - Single stat display -8. **Total Sessions Created** - Counter -9. **Total Requests** - Counter -10. **Total Errors** - Counter with color thresholds - -### Alert Rules (6 Alerts) - -1. **GuruConnectDown** - Server unreachable >1 min -2. **HighErrorRate** - >10 errors/second for 5 min -3. **TooManyActiveSessions** - >100 active sessions for 5 min -4. **HighRequestLatency** - p95 >1s for 5 min -5. **DatabaseOperationsFailure** - DB errors >1/second for 5 min -6. **ServerRestarted** - Uptime <5 min (info alert) - -**View Alerts:** http://172.16.3.30:9090/alerts - ---- - -## Testing Checklist - -- [x] Server running via systemd -- [x] Health endpoint responding -- [x] Metrics endpoint active -- [x] Database connected -- [x] Prometheus scraping metrics -- [x] Grafana accessing Prometheus -- [x] Backup timer scheduled -- [x] Log rotation configured -- [ ] Grafana password changed -- [ ] Dashboard imported -- [ ] Manual backup tested -- [ ] Alerts verified -- [ ] External access configured (optional) - ---- - -## Metrics Being Collected - -**HTTP Metrics:** -- guruconnect_requests_total (counter) -- guruconnect_request_duration_seconds (histogram) - -**Session Metrics:** -- guruconnect_sessions_total (counter) -- guruconnect_active_sessions (gauge) -- guruconnect_session_duration_seconds (histogram) - -**Connection Metrics:** -- guruconnect_connections_total (counter) -- guruconnect_active_connections (gauge) - -**Error Metrics:** -- guruconnect_errors_total (counter) - -**Database Metrics:** -- guruconnect_db_operations_total (counter) -- guruconnect_db_query_duration_seconds (histogram) - -**System Metrics:** -- guruconnect_uptime_seconds (gauge) - -**Node Exporter Metrics:** -- CPU usage, memory, disk I/O, network, etc. - ---- - -## Security Notes - -### Current Security Status - -**Active:** -- JWT authentication (24h expiration) -- Argon2id password hashing -- Security headers (CSP, X-Frame-Options, etc.) -- Token blacklist for logout -- Database credentials encrypted in .env -- API key validation -- IP logging - -**Recommended:** -- [ ] Change Grafana default password -- [ ] Configure firewall rules for monitoring ports -- [ ] Add authentication to Prometheus (if exposed externally) -- [ ] Enable HTTPS for Grafana (via NPM) -- [ ] Set up backup encryption (optional) -- [ ] Configure alert notifications -- [ ] Review and test all alert rules - ---- - -## Troubleshooting - -### Service Won't Start - -```bash -# Check logs -sudo journalctl -u SERVICE_NAME -n 50 - -# Common services: -sudo journalctl -u guruconnect -n 50 -sudo journalctl -u prometheus -n 50 -sudo journalctl -u grafana-server -n 50 - -# Check for port conflicts -sudo netstat -tulpn | grep PORT_NUMBER - -# Restart service -sudo systemctl restart SERVICE_NAME -``` - -### Prometheus Not Scraping - -```bash -# Check targets -curl http://localhost:9090/api/v1/targets - -# Check Prometheus config -cat /etc/prometheus/prometheus.yml - -# Verify GuruConnect metrics endpoint -curl http://172.16.3.30:3002/metrics - -# Restart Prometheus -sudo systemctl restart prometheus -``` - -### Grafana Can't Connect to Prometheus - -```bash -# Test Prometheus from Grafana -curl http://localhost:9090/api/v1/query?query=up - -# Check data source configuration -# Grafana > Configuration > Data Sources > Prometheus - -# Verify Prometheus is running -sudo systemctl status prometheus - -# Check Grafana logs -sudo journalctl -u grafana-server -n 50 -``` - -### Backup Failed - -```bash -# Check backup logs -sudo journalctl -u guruconnect-backup -n 50 - -# Test manual backup -cd ~/guru-connect/server -./backup-postgres.sh - -# Check disk space -df -h - -# Verify PostgreSQL credentials -PGPASSWORD=gc_a7f82d1e4b9c3f60 psql -h localhost -U guruconnect -d guruconnect -c 'SELECT 1' -``` - ---- - -## Performance Benchmarks - -### Current Metrics (Post-Installation) - -**Server:** -- Memory: 1.6M (GuruConnect process) -- CPU: Minimal (<1%) -- Uptime: Continuous (systemd managed) - -**Prometheus:** -- Memory: 19.0M -- CPU: 355ms total -- Scrape interval: 15s - -**Grafana:** -- Memory: 136.7M -- CPU: 9.325s total -- Startup time: ~30 seconds - -**Database:** -- Connections: Active -- Query latency: <1ms -- Operations: Operational - ---- - -## File Locations - -### Configuration Files - -``` -/etc/systemd/system/ -├── guruconnect.service -├── guruconnect-backup.service -└── guruconnect-backup.timer - -/etc/prometheus/ -├── prometheus.yml -└── alerts.yml - -/etc/grafana/ -└── grafana.ini - -/etc/logrotate.d/ -└── guruconnect - -/etc/sudoers.d/ -└── guru -``` - -### Data Directories - -``` -/var/lib/prometheus/ # Prometheus time-series data -/var/lib/grafana/ # Grafana dashboards and config -/home/guru/backups/ # Database backups -/var/log/guruconnect/ # Application logs (if using file logging) -``` - -### Application Files - -``` -/home/guru/guru-connect/ -├── server/ -│ ├── .env # Environment variables -│ ├── guruconnect.service # Systemd unit file -│ ├── backup-postgres.sh # Backup script -│ ├── restore-postgres.sh # Restore script -│ ├── health-monitor.sh # Health checks -│ └── start-secure.sh # Manual start script -├── infrastructure/ -│ ├── prometheus.yml # Prometheus config -│ ├── alerts.yml # Alert rules -│ ├── grafana-dashboard.json # Dashboard -│ └── setup-monitoring.sh # Installer -└── verify-installation.sh # Verification script -``` - ---- - -## Week 2 Accomplishments - -### Infrastructure Deployed (11/11 - 100%) - -1. ✓ Systemd service configuration -2. ✓ Prometheus metrics module (330 lines) -3. ✓ /metrics endpoint implementation -4. ✓ Prometheus server installation -5. ✓ Grafana installation -6. ✓ Dashboard creation (10 panels) -7. ✓ Alert rules configuration (6 alerts) -8. ✓ PostgreSQL backup automation -9. ✓ Log rotation configuration -10. ✓ Health monitoring script -11. ✓ Complete installation and testing - -### Production Readiness - -**Infrastructure:** 100% Complete -**Week 1 Security:** 77% Complete (10/13 items) -**Database:** Operational -**Monitoring:** Active -**Backups:** Configured -**Documentation:** Comprehensive - ---- - -## Next Phase - Week 3 (CI/CD) - -**Planned Work:** -- Gitea CI pipeline configuration -- Automated builds on commit -- Automated tests in CI -- Deployment automation -- Build artifact storage -- Version tagging automation - ---- - -## Documentation References - -**Created Documentation:** -- `PHASE1_WEEK2_INFRASTRUCTURE.md` - Week 2 planning -- `DEPLOYMENT_WEEK2_INFRASTRUCTURE.md` - Original deployment log -- `INSTALLATION_GUIDE.md` - Complete installation guide -- `INFRASTRUCTURE_STATUS.md` - Current status -- `DEPLOYMENT_COMPLETE.md` - This document - -**Existing Documentation:** -- `CLAUDE.md` - Project coding guidelines -- `SESSION_STATE.md` - Project history -- Week 1 security documentation - ---- - -## Support & Contact - -**Gitea Repository:** -https://git.azcomputerguru.com/azcomputerguru/guru-connect - -**Dashboard:** -https://connect.azcomputerguru.com/dashboard - -**Server:** -ssh guru@172.16.3.30 - ---- - -**Deployment Completed:** 2026-01-18 15:38 UTC -**Total Installation Time:** ~15 minutes -**All Systems:** OPERATIONAL ✓ -**Phase 1 Week 2:** COMPLETE ✓ diff --git a/projects/msp-tools/guru-connect/DEPLOYMENT_DAY2_SUMMARY.md b/projects/msp-tools/guru-connect/DEPLOYMENT_DAY2_SUMMARY.md deleted file mode 100644 index f108353..0000000 --- a/projects/msp-tools/guru-connect/DEPLOYMENT_DAY2_SUMMARY.md +++ /dev/null @@ -1,282 +0,0 @@ -# GuruConnect Security Fixes - Day 2 Deployment Summary - -**Date:** 2026-01-17/18 -**Server:** 172.16.3.30:3002 -**Status:** DEPLOYED AND OPERATIONAL - ---- - -## Deployment Timeline - -### Code Changes -- Committed security fixes to git (55 files, 14,790 insertions) -- Pushed to repository: git.azcomputerguru.com/azcomputerguru/claudetools - -### Server Deployment -1. Copied new files to RMM server -2. Updated existing server files with security patches -3. Created secure .env configuration -4. Rebuilt server (17.65s compilation time) -5. Stopped old server process (PID 569767) -6. Started new server with security fixes (PID 3829910) - ---- - -## Security Validations Working - -### SEC-1: JWT Secret Security ✓ -**Status:** OPERATIONAL - -Server now requires JWT_SECRET environment variable: -``` -JWT_SECRET=KfPrjjC3J6YMx9q1yjPxZAYkHLM2JdFy1XRxHJ9oPnw0NU3xH074ufHk7fj++e8BJEqRQ5k4zlWD+1iDwlLP4w== -``` - -**Evidence:** -- Server panicked when JWT_SECRET not provided (as expected) -- Server started successfully when JWT_SECRET provided -- 64-byte base64 secret (512 bits of entropy) - -### SEC-4: API Key Strength Validation ✓ -**Status:** OPERATIONAL - -**Test 1:** Weak API key rejection -``` -AGENT_API_KEY=GuruConnect_Agent_Key_2026_Secure_Random_v1_f8a9c2e4d7b1 -Result: Error: API key contains weak/common patterns and is not secure -``` - -**Test 2:** Strong API key acceptance -``` -AGENT_API_KEY=x7m9p2k8v4n1q5w3r6t0y2u8i5o3l7m9p2k8 -Result: AGENT_API_KEY configured for persistent agents (validated) -``` - -**Validation Rules Enforced:** -- Minimum 32 characters -- No weak patterns (password, admin, key, secret, token, agent) -- Sufficient character diversity (10+ unique characters) - -### SEC-4: IP Address Logging ✓ -**Status:** OPERATIONAL - -**Evidence from server logs:** -``` -WARN guruconnect_server::relay: Agent connection rejected: 935a3920-6e32-4da3-a74f-3e8e8b2a426a from 172.16.3.20 - invalid API key -``` - -**Confirmed:** -- IP address extraction working -- Failed connection logging operational -- Audit trail created for rejected connections - -### SEC-5: Token Blacklist System ✓ -**Status:** DEPLOYED (Code Compiled Successfully) - -**Components Deployed:** -- Token blacklist data structure (Arc>>) -- Blacklist check in authentication flow -- 5 new logout/revocation endpoints: - - POST /api/auth/logout - - POST /api/auth/revoke-token - - POST /api/auth/admin/revoke-user - - GET /api/auth/blacklist/stats - - POST /api/auth/blacklist/cleanup - -**Testing Status:** Awaiting database connectivity for full end-to-end testing - ---- - -## Files Deployed - -### New Files (14) -``` -server/.env.example -server/src/utils/mod.rs -server/src/utils/ip_extract.rs -server/src/utils/validation.rs -server/src/middleware/mod.rs -server/src/middleware/rate_limit.rs (disabled) -server/src/auth/token_blacklist.rs -server/src/api/auth_logout.rs -``` - -### Modified Files (8) -``` -server/Cargo.toml - Added tower_governor dependency -server/src/main.rs - JWT validation, API key validation, blacklist integration -server/src/auth/mod.rs - Blacklist revocation check -server/src/relay/mod.rs - IP extraction, failed connection logging -server/src/db/events.rs - 5 new connection rejection event types -server/src/api/mod.rs - Added auth_logout module -server/.env - Secure configuration (JWT_SECRET, AGENT_API_KEY) -server/start-secure.sh - Environment-aware startup script -``` - ---- - -## Server Configuration - -**Environment Variables:** -```bash -JWT_SECRET=KfPrjjC3J6YMx9q1yjPxZAYkHLM2JdFy1XRxHJ9oPnw0NU3xH074ufHk7fj++e8BJEqRQ5k4zlWD+1iDwlLP4w== -JWT_EXPIRY_HOURS=24 -AGENT_API_KEY=x7m9p2k8v4n1q5w3r6t0y2u8i5o3l7m9p2k8 -DATABASE_URL=postgresql://guruconnect:guruc0nn3ct2024!@localhost/guruconnect -LISTEN_ADDR=0.0.0.0:3002 -``` - -**Binary Location:** -``` -/home/guru/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server -``` - -**Startup Script:** -``` -/home/guru/guru-connect/server/start-secure.sh -``` - -**Log File:** -``` -/home/guru/gc-server-secure.log -``` - -**Process ID:** 3829910 - ---- - -## Build Output - -**Compilation:** SUCCESS (17.65 seconds) -**Warnings:** 52 dead code warnings (non-critical) -**Errors:** 0 -**Binary Size:** ~890 KB (release build) - ---- - -## Known Issues - -### Database Connectivity -**Issue:** PostgreSQL authentication failure -``` -WARN: Failed to connect to database: error returned from database: password authentication failed for user "guruconnect" -``` - -**Impact:** -- Server running in persistence-disabled mode -- Cannot test token revocation endpoints fully -- Cannot test user login/logout flow - -**Workaround:** Server operates without database for now - -**Next Steps:** Fix PostgreSQL credentials or create database user - ---- - -## Security Improvements Summary - -### Before Deployment -- **CRITICAL:** Hardcoded JWT secret in source code -- **CRITICAL:** No token revocation (stolen tokens valid 24 hours) -- **CRITICAL:** No agent connection audit trail -- **HIGH:** Weak API keys accepted without validation -- **MEDIUM:** No IP logging for security events - -### After Deployment -- **SECURE:** JWT secrets required from environment, validated (32+ chars) -- **SECURE:** Token blacklist operational (code deployed, awaiting DB for testing) -- **SECURE:** Complete agent connection audit trail with IP logging -- **SECURE:** API key strength enforced (32+ chars, no weak patterns, high entropy) -- **SECURE:** Failed connections logged with IP, reason, and details - -**Risk Reduction:** CRITICAL → LOW (for deployed features) - ---- - -## Testing Required - -### Manual Testing (When Database Fixed) -1. **SEC-1: JWT Secret** - - [ ] Server refuses weak JWT_SECRET (<32 chars) - - [ ] Tokens created with new secret validate correctly - -2. **SEC-5: Token Revocation** - - [ ] Login creates valid token - - [ ] Logout revokes token (returns 401 on reuse) - - [ ] Revoked token returns "Token has been revoked" error - - [ ] Blacklist stats show count correctly - - [ ] Cleanup removes expired tokens - -3. **SEC-4: Agent Validation** - - [ ] Valid support code connects (IP logged) - - [ ] Invalid support code rejected (event logged with IP) - - [ ] Expired code rejected (event logged) - - [ ] No auth method rejected (event logged) - - [✓] Weak API key rejected at startup (VERIFIED) - ---- - -## Next Actions - -### Immediate (Day 3) -1. Fix PostgreSQL database credentials -2. Test token revocation endpoints -3. Test agent connection flows -4. Verify audit logs in database -5. SEC-6: Remove password logging -6. SEC-7: XSS prevention (CSP headers) - -### Week 1 Remaining -- SEC-8: TLS certificate validation -- SEC-9: Verify Argon2id usage -- SEC-10: HTTPS enforcement -- SEC-11: CORS configuration review -- SEC-12: Security headers -- SEC-13: Session expiration enforcement - ---- - -## Deployment Checklist - -- [✓] Code committed to git -- [✓] Code pushed to repository -- [✓] Server files updated on 172.16.3.30 -- [✓] Secure .env file created (600 permissions) -- [✓] Server rebuilt (release mode) -- [✓] Old server process stopped -- [✓] New server process started -- [✓] Health endpoint responding -- [✓] JWT_SECRET validation working -- [✓] AGENT_API_KEY validation working -- [✓] IP address logging working -- [ ] Database connectivity (blocked - credentials) -- [ ] Token revocation tested (blocked - database) -- [ ] Full end-to-end security tests (blocked - database) - ---- - -## Conclusion - -**Status:** PARTIAL SUCCESS - -**What Works:** -- Server compiled and deployed successfully -- JWT secret security operational -- API key strength validation operational -- IP address logging operational -- Server running and responding to health checks - -**What's Blocked:** -- Database authentication preventing full testing -- Token revocation endpoints need database -- User login/logout flow needs database - -**Overall:** 5/5 security fixes deployed, 3/5 fully tested, 2/5 blocked by database issue - -**Next Priority:** Fix database credentials to enable full security testing - ---- - -**Deployment Completed:** 2026-01-18 01:59 UTC -**Server Status:** ONLINE -**Security Status:** SIGNIFICANTLY IMPROVED (CRITICAL → LOW for deployed features) diff --git a/projects/msp-tools/guru-connect/DEPLOYMENT_FINAL_WEEK1.md b/projects/msp-tools/guru-connect/DEPLOYMENT_FINAL_WEEK1.md deleted file mode 100644 index 8d71a97..0000000 --- a/projects/msp-tools/guru-connect/DEPLOYMENT_FINAL_WEEK1.md +++ /dev/null @@ -1,350 +0,0 @@ -# Final Deployment - Week 1 Security Complete - -**Date:** 2026-01-18 03:06 UTC -**Server:** 172.16.3.30:3002 -**Status:** ALL WEEK 1 SECURITY FIXES DEPLOYED AND OPERATIONAL - ---- - -## Deployment Summary - -Successfully deployed and verified all Week 1 security fixes (SEC-1 through SEC-13) to production. - -**Server Process:** PID 3839055 -**Binary:** `/home/guru/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server` -**Build Time:** 17.70 seconds -**Compilation:** SUCCESS (52 warnings, 0 errors) - ---- - -## Verified Security Features - -### ✓ SEC-1: JWT Secret Security (CRITICAL) -**Status:** OPERATIONAL -**Evidence:** Server requires JWT_SECRET from environment, validated at startup - -### ✓ SEC-3: SQL Injection Protection (CRITICAL) -**Status:** VERIFIED SAFE -**Evidence:** All queries use parameterized binding (sqlx) - -### ✓ SEC-4: Agent Connection Validation (CRITICAL) -**Status:** OPERATIONAL -**Evidence from logs:** -``` -WARN: Agent connection rejected: 935a3920-6e32-4da3-a74f-3e8e8b2a426a from 172.16.3.20 - invalid API key -``` -- ✓ IP addresses logged (172.16.3.20) -- ✓ Failed connection tracking operational -- ✓ API key validation working - -### ✓ SEC-5: Token Revocation (CRITICAL) -**Status:** DEPLOYED (awaiting database for full testing) -**Features:** -- Token blacklist system -- 5 revocation endpoints -- Middleware integration - -### ✓ SEC-6: Password Logging Removed (MEDIUM) -**Status:** OPERATIONAL -**Evidence:** Credentials written to `.admin-credentials` file instead of logs - -### ✓ SEC-7: XSS Prevention (HIGH) -**Status:** OPERATIONAL -**Verified via curl:** -``` -content-security-policy: default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; font-src 'self'; connect-src 'self' ws: wss:; frame-ancestors 'none'; base-uri 'self'; form-action 'self' -``` - -### ✓ SEC-9: Argon2id Password Hashing (HIGH) -**Status:** OPERATIONAL -**Evidence:** Explicitly configured in auth/password.rs (Algorithm::Argon2id) - -### ✓ SEC-11: CORS Configuration (MEDIUM) -**Status:** OPERATIONAL -**Verified via curl:** -``` -vary: origin, access-control-request-method, access-control-request-headers -access-control-allow-credentials: true -``` -**Allowed Origins:** -- https://connect.azcomputerguru.com -- http://localhost:3002 -- http://127.0.0.1:3002 - -### ✓ SEC-12: Security Headers (MEDIUM) -**Status:** ALL OPERATIONAL -**Verified via curl:** -``` -x-frame-options: DENY -x-content-type-options: nosniff -x-xss-protection: 1; mode=block -referrer-policy: strict-origin-when-cross-origin -permissions-policy: geolocation=(), microphone=(), camera=() -``` - -### ✓ SEC-13: JWT Expiration Enforcement (MEDIUM) -**Status:** OPERATIONAL -**Evidence:** Explicit validation configured in auth/jwt.rs -- validate_exp = true -- leeway = 0 -- Redundant expiration check - ---- - -## HTTP Response Verification - -**Test Command:** -```bash -curl -v http://172.16.3.30:3002/health -``` - -**Response:** -``` -HTTP/1.1 200 OK -content-type: text/plain; charset=utf-8 -content-security-policy: default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; font-src 'self'; connect-src 'self' ws: wss:; frame-ancestors 'none'; base-uri 'self'; form-action 'self' -x-frame-options: DENY -x-content-type-options: nosniff -x-xss-protection: 1; mode=block -referrer-policy: strict-origin-when-cross-origin -permissions-policy: geolocation=(), microphone=(), camera=() -vary: origin, access-control-request-method, access-control-request-headers -access-control-allow-credentials: true -content-length: 2 -date: Sun, 18 Jan 2026 03:06:50 GMT - -OK -``` - -**All security headers present and correct! ✓** - ---- - -## Server Logs Analysis - -**Startup Sequence:** -``` -INFO GuruConnect Server v0.1.0 -INFO Loaded configuration, listening on 0.0.0.0:3002 -INFO Connecting to database... -WARN Failed to connect to database: password authentication failed -INFO AGENT_API_KEY configured for persistent agents (validated) -INFO Server listening on 0.0.0.0:3002 -``` - -**Security Features Active:** -- ✓ JWT_SECRET validation passed -- ✓ AGENT_API_KEY validation passed -- ✓ Server started successfully - -**Security Audit Trail Working:** -``` -WARN Agent connection rejected: from 172.16.3.20 - invalid API key -``` -- ✓ IP addresses logged -- ✓ Rejection reason logged -- ✓ Complete audit trail - ---- - -## Deployment Process - -### 1. File Copy ✓ -``` -server/src/main.rs -server/src/auth/jwt.rs -server/src/auth/password.rs -server/src/middleware/mod.rs -server/src/middleware/security_headers.rs (new) -``` - -### 2. Build ✓ -``` -cargo build -p guruconnect-server --release --target x86_64-unknown-linux-gnu -Finished `release` profile [optimized] target(s) in 17.70s -``` - -### 3. Stop Old Server ✓ -``` -pkill -f guruconnect-server -``` - -### 4. Start New Server ✓ -``` -cd guru-connect/server && nohup ./start-secure.sh > ~/gc-server-updated.log 2>&1 & -PID: 3839055 -``` - -### 5. Verification ✓ -- Health check: OK -- Security headers: All present -- IP logging: Working -- Server process: Running - ---- - -## Security Improvements Summary - -### Before Week 1 -**Risk Level:** CRITICAL - -**Vulnerabilities:** -- Hardcoded JWT secret (system compromise possible) -- No token revocation (stolen tokens valid 24h) -- No agent connection audit trail -- SQL injection status unknown -- No XSS protection -- No security headers -- Password logging to console -- Permissive CORS (allow all origins) -- Password hashing algorithm unclear -- JWT expiration unclear - -### After Week 1 -**Risk Level:** LOW/MEDIUM - -**Security Measures:** -- ✓ JWT secrets from environment, validated (32+ chars) -- ✓ Token revocation system deployed -- ✓ Complete agent connection audit trail with IP logging -- ✓ SQL injection verified safe (parameterized queries) -- ✓ XSS protection via CSP headers -- ✓ Comprehensive security headers (6 headers) -- ✓ Password written to secure file (.admin-credentials, 600 perms) -- ✓ CORS restricted to specific origins -- ✓ Argon2id explicitly configured -- ✓ JWT expiration strictly enforced - -**Risk Reduction:** CRITICAL → LOW/MEDIUM - ---- - -## Week 1 Completion Status - -**Security Items:** 10/13 complete (77%) - -### Completed ✓ -- SEC-1: JWT Secret Security (CRITICAL) -- SEC-3: SQL Injection Audit (CRITICAL) -- SEC-4: Agent Connection Validation (CRITICAL) -- SEC-5: Session Takeover Prevention (CRITICAL) -- SEC-6: Remove Password Logging (MEDIUM) -- SEC-7: XSS Prevention (HIGH) -- SEC-9: Argon2id Password Hashing (HIGH) -- SEC-11: CORS Configuration (MEDIUM) -- SEC-12: Security Headers (MEDIUM) -- SEC-13: Session Expiration Enforcement (MEDIUM) - -### Deferred/Not Applicable -- SEC-2: Rate Limiting (HIGH) - DEFERRED (tower_governor type issues) -- SEC-8: TLS Certificate Validation (MEDIUM) - NOT APPLICABLE (no outbound TLS) -- SEC-10: HTTPS Enforcement (MEDIUM) - DELEGATED (NPM reverse proxy) - ---- - -## Known Issues - -### Database Connectivity -**Issue:** PostgreSQL authentication failure -``` -WARN: Failed to connect to database: password authentication failed for user "guruconnect" -``` - -**Impact:** -- Server running without persistence -- Cannot test token revocation endpoints end-to-end -- Cannot test user login/logout flow - -**Workaround:** Server operates in memory-only mode - -**Next Steps:** Fix PostgreSQL credentials for full functionality - ---- - -## Production Status - -**Server:** ONLINE ✓ -**Security:** OPERATIONAL ✓ -**Health Check:** PASSING ✓ -**Security Headers:** VERIFIED ✓ -**IP Logging:** WORKING ✓ -**API Key Validation:** WORKING ✓ - -**Production Ready:** YES - -**Pending:** -- Database connectivity (for token revocation testing) -- SEC-2 rate limiting (technical blocker) - ---- - -## Testing Checklist - -### Completed ✓ -- [✓] Server starts with valid JWT_SECRET -- [✓] Server rejects weak JWT_SECRET -- [✓] Server validates AGENT_API_KEY strength -- [✓] IP addresses logged in connection events -- [✓] Failed connections tracked with reasons -- [✓] Health endpoint responds -- [✓] All security headers present in HTTP responses -- [✓] CSP header properly formatted -- [✓] CORS headers present -- [✓] Server process stable - -### Pending Database -- [ ] Token revocation via logout endpoint -- [ ] Revoked token returns 401 -- [ ] Blacklist stats endpoint -- [ ] Blacklist cleanup endpoint -- [ ] User login creates valid token -- [ ] Password change works - ---- - -## Next Steps - -### Immediate -1. Fix PostgreSQL database credentials -2. Test token revocation endpoints end-to-end -3. Verify complete authentication flow -4. Test all CRUD operations with database - -### Optional -1. Resolve SEC-2 rate limiting (custom middleware or Redis) -2. Add session tracking table (for admin token revocation) -3. Implement IP binding in JWT tokens -4. Add refresh token system - -### Phase 2 -1. Begin Week 2: Database & Performance optimization -2. Or move to Phase 2: Core feature development - ---- - -## Conclusion - -**Week 1 Security Objectives: COMPLETE ✓** - -All critical and high-priority security vulnerabilities have been addressed and verified in production: - -- JWT security: OPERATIONAL -- SQL injection: VERIFIED SAFE -- Agent validation: OPERATIONAL -- Token revocation: DEPLOYED -- XSS protection: OPERATIONAL -- Security headers: OPERATIONAL -- CORS restriction: OPERATIONAL -- Password hashing: VERIFIED -- Session expiration: OPERATIONAL - -**GuruConnect server is now production-ready with enterprise-grade security measures.** - ---- - -**Deployment Completed:** 2026-01-18 03:06 UTC -**Server PID:** 3839055 -**Build Time:** 17.70s -**Security Score:** 10/13 (77%) ✓ -**Risk Level:** LOW/MEDIUM -**Status:** PRODUCTION READY diff --git a/projects/msp-tools/guru-connect/DEPLOYMENT_WEEK2_INFRASTRUCTURE.md b/projects/msp-tools/guru-connect/DEPLOYMENT_WEEK2_INFRASTRUCTURE.md deleted file mode 100644 index 8976d54..0000000 --- a/projects/msp-tools/guru-connect/DEPLOYMENT_WEEK2_INFRASTRUCTURE.md +++ /dev/null @@ -1,592 +0,0 @@ -# Phase 1, Week 2 - Infrastructure Deployment COMPLETE - -**Date:** 2026-01-18 03:35 UTC -**Server:** 172.16.3.30:3002 -**Status:** INFRASTRUCTURE DEPLOYED AND OPERATIONAL - ---- - -## Executive Summary - -Successfully deployed comprehensive production infrastructure for GuruConnect, including Prometheus metrics, systemd service configuration, automated backups, and monitoring tools. All infrastructure components are ready for installation and configuration. - -**Server Process:** PID 3844401 -**Binary:** `/home/guru/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server` -**Build Time:** 18.60 seconds -**Compilation:** SUCCESS (53 warnings, 0 errors) - ---- - -## Deployed Infrastructure Components - -### 1. Prometheus Metrics System - -**Status:** OPERATIONAL ✓ - -**New Metrics Endpoint:** `http://172.16.3.30:3002/metrics` - -**Metrics Implemented:** -- `guruconnect_requests_total{method, path, status}` - HTTP request counter -- `guruconnect_request_duration_seconds{method, path, status}` - Request latency histogram -- `guruconnect_sessions_total{status}` - Session lifecycle counter -- `guruconnect_active_sessions` - Current active sessions gauge -- `guruconnect_session_duration_seconds` - Session duration histogram -- `guruconnect_connections_total{conn_type}` - WebSocket connection counter -- `guruconnect_active_connections{conn_type}` - Active connections gauge -- `guruconnect_errors_total{error_type}` - Error counter -- `guruconnect_db_operations_total{operation, status}` - Database operation counter -- `guruconnect_db_query_duration_seconds{operation, status}` - DB query latency histogram -- `guruconnect_uptime_seconds` - Server uptime gauge - -**Verification:** -```bash -curl -s http://172.16.3.30:3002/metrics | head -50 -``` -``` -# HELP guruconnect_requests_total Total number of HTTP requests. -# TYPE guruconnect_requests_total counter -... -# HELP guruconnect_uptime_seconds Server uptime in seconds. -# TYPE guruconnect_uptime_seconds gauge -guruconnect_uptime_seconds 140 -# EOF -``` - -**Features:** -- Automatic uptime metric updates every 10 seconds -- Thread-safe metric collection (Arc>) -- Prometheus-compatible format -- No authentication required (for monitoring tools) -- Histogram buckets optimized for web and database performance - ---- - -### 2. Systemd Service Configuration - -**Status:** READY FOR INSTALLATION - -**Files Created:** -- `server/guruconnect.service` - Systemd unit file -- `server/setup-systemd.sh` - Installation script - -**Service Features:** -- Auto-restart on failure (10s delay, max 3 attempts in 5 minutes) -- Resource limits: 65536 file descriptors, 4096 processes -- Security hardening: - - NoNewPrivileges=true - - PrivateTmp=true - - ProtectSystem=strict - - ProtectHome=read-only -- Journald logging integration -- Watchdog support (30s keepalive) - -**Installation:** -```bash -cd ~/guru-connect/server -sudo ./setup-systemd.sh -``` - -**Management Commands:** -```bash -sudo systemctl status guruconnect -sudo systemctl restart guruconnect -sudo journalctl -u guruconnect -f -``` - ---- - -### 3. Prometheus & Grafana Configuration - -**Status:** READY FOR INSTALLATION - -**Files Created:** -- `infrastructure/prometheus.yml` - Prometheus scrape config -- `infrastructure/alerts.yml` - Alert rules -- `infrastructure/grafana-dashboard.json` - Pre-built dashboard -- `infrastructure/setup-monitoring.sh` - Automated installation - -**Prometheus Configuration:** -- Scrape interval: 15 seconds -- Target: GuruConnect (172.16.3.30:3002) -- Node Exporter: 172.16.3.30:9100 (optional) - -**Grafana Dashboard Panels (10 panels):** -1. Active Sessions (gauge) -2. Requests per Second (graph) -3. Error Rate (graph with alerting) -4. Request Latency p50/p95/p99 (graph) -5. Active Connections by Type (stacked graph) -6. Database Query Duration (graph) -7. Server Uptime (singlestat) -8. Total Sessions Created (singlestat) -9. Total Requests (singlestat) -10. Total Errors (singlestat with thresholds) - -**Alert Rules:** -- GuruConnectDown - Server unreachable for 1 minute -- HighErrorRate - >10 errors/second for 5 minutes -- TooManyActiveSessions - >100 active sessions for 5 minutes -- HighRequestLatency - p95 >1s for 5 minutes -- DatabaseOperationsFailure - DB errors >1/second for 5 minutes -- ServerRestarted - Uptime <5 minutes (informational) - -**Installation:** -```bash -cd ~/guru-connect/infrastructure -sudo ./setup-monitoring.sh -``` - -**Access:** -- Prometheus: http://172.16.3.30:9090 -- Grafana: http://172.16.3.30:3000 (admin/admin) - ---- - -### 4. PostgreSQL Automated Backups - -**Status:** READY FOR INSTALLATION - -**Files Created:** -- `server/backup-postgres.sh` - Backup script with compression -- `server/restore-postgres.sh` - Restore script with safety checks -- `server/guruconnect-backup.service` - Systemd service -- `server/guruconnect-backup.timer` - Daily timer (2:00 AM) - -**Backup Features:** -- Gzip compression -- Timestamped filenames: `guruconnect-YYYY-MM-DD-HHMMSS.sql.gz` -- Location: `/home/guru/backups/guruconnect/` -- Retention policy: - - 30 daily backups - - 4 weekly backups - - 6 monthly backups -- Automatic cleanup - -**Manual Backup:** -```bash -cd ~/guru-connect/server -./backup-postgres.sh -``` - -**Restore Backup:** -```bash -cd ~/guru-connect/server -./restore-postgres.sh /home/guru/backups/guruconnect/guruconnect-2026-01-18-020000.sql.gz -``` - -**Install Automated Backups:** -```bash -sudo cp ~/guru-connect/server/guruconnect-backup.service /etc/systemd/system/ -sudo cp ~/guru-connect/server/guruconnect-backup.timer /etc/systemd/system/ -sudo systemctl daemon-reload -sudo systemctl enable guruconnect-backup.timer -sudo systemctl start guruconnect-backup.timer -``` - -**Verify Timer:** -```bash -sudo systemctl list-timers -sudo systemctl status guruconnect-backup.timer -``` - ---- - -### 5. Log Rotation & Health Monitoring - -**Status:** READY FOR INSTALLATION - -**Files Created:** -- `server/guruconnect.logrotate` - Logrotate configuration -- `server/health-monitor.sh` - Comprehensive health checks - -**Logrotate Features:** -- Daily rotation -- 30 days retention -- Compression (delayed 1 day) -- Automatic service reload - -**Installation:** -```bash -sudo cp ~/guru-connect/server/guruconnect.logrotate /etc/logrotate.d/guruconnect -``` - -**Health Monitor Checks:** -1. HTTP health endpoint (http://172.16.3.30:3002/health) -2. Systemd service status -3. Disk space usage (<90% threshold) -4. Memory usage (<90% threshold) -5. PostgreSQL service status -6. Prometheus metrics endpoint - -**Manual Health Check:** -```bash -cd ~/guru-connect/server -./health-monitor.sh -``` - -**Email Alerts:** Configurable via `ALERT_EMAIL` variable - ---- - -## Security Verification - -### Security Headers Still Present ✓ - -```bash -curl -v http://172.16.3.30:3002/health 2>&1 | grep -E 'content-security-policy|x-frame-options' -``` - -**Output:** -``` -< content-security-policy: default-src 'self'; script-src 'self' 'unsafe-inline'; ... -< x-frame-options: DENY -< x-content-type-options: nosniff -< x-xss-protection: 1; mode=block -< referrer-policy: strict-origin-when-cross-origin -< permissions-policy: geolocation=(), microphone=(), camera=() -``` - -**All Week 1 security features remain operational:** -- JWT secret validation -- Token blacklist -- API key validation -- IP logging -- CSP headers -- CORS restrictions -- Argon2id password hashing - ---- - -## Code Changes - -### New Files (17 files) - -**Infrastructure:** -- `infrastructure/prometheus.yml` -- `infrastructure/alerts.yml` -- `infrastructure/grafana-dashboard.json` -- `infrastructure/setup-monitoring.sh` - -**Server Scripts:** -- `server/guruconnect.service` -- `server/setup-systemd.sh` -- `server/backup-postgres.sh` -- `server/restore-postgres.sh` -- `server/guruconnect-backup.service` -- `server/guruconnect-backup.timer` -- `server/guruconnect.logrotate` -- `server/health-monitor.sh` - -**Source Code:** -- `server/src/metrics/mod.rs` (330 lines) - -### Modified Files (3 files) - -**server/Cargo.toml:** -- Added `prometheus-client = "0.22"` dependency - -**server/src/main.rs:** -- Added `mod metrics;` declaration -- Added `SharedMetrics` and `Registry` imports -- Updated `AppState` with: - - `pub metrics: SharedMetrics` - - `pub registry: Arc>` - - `pub start_time: Arc` -- Initialized metrics registry before AppState -- Spawned background task for uptime updates -- Added `/metrics` endpoint -- Added `prometheus_metrics()` handler function - -**Week 1 Files (unchanged, still deployed):** -- All Week 1 security fixes remain in place -- No regressions introduced - ---- - -## Build & Deployment Process - -### 1. File Transfer ✓ -```bash -# Infrastructure directory -scp -r infrastructure/ guru@172.16.3.30:~/guru-connect/ - -# Updated source files -scp server/Cargo.toml guru@172.16.3.30:~/guru-connect/server/ -scp -r server/src/metrics guru@172.16.3.30:~/guru-connect/server/src/ -scp server/src/main.rs guru@172.16.3.30:~/guru-connect/server/src/ - -# Scripts -scp server/*.sh server/*.service server/*.timer server/*.logrotate guru@172.16.3.30:~/guru-connect/server/ -``` - -### 2. Make Scripts Executable ✓ -```bash -ssh guru@172.16.3.30 "cd guru-connect/server && chmod +x *.sh" -ssh guru@172.16.3.30 "cd guru-connect/infrastructure && chmod +x *.sh" -``` - -### 3. Build Server ✓ -```bash -ssh guru@172.16.3.30 "source ~/.cargo/env && cd guru-connect && cargo build -p guruconnect-server --release --target x86_64-unknown-linux-gnu" -``` - -**Build Output:** -``` -Compiling guruconnect-server v0.1.0 -warning: `guruconnect-server` (bin "guruconnect-server") generated 53 warnings -Finished `release` profile [optimized] target(s) in 18.60s -``` - -### 4. Stop Old Server ✓ -```bash -ssh guru@172.16.3.30 "pkill -f guruconnect-server" -``` - -### 5. Start New Server ✓ -```bash -ssh guru@172.16.3.30 "cd guru-connect/server && nohup ./start-secure.sh > ~/gc-server-metrics.log 2>&1 &" -``` - -### 6. Verify Deployment ✓ -```bash -# Process running -ps aux | grep guruconnect-server -# PID: 3844401 - -# Health check -curl http://172.16.3.30:3002/health -# OK - -# Metrics endpoint -curl http://172.16.3.30:3002/metrics -# Prometheus metrics returned - -# Security headers -curl -v http://172.16.3.30:3002/health -# All security headers present -``` - ---- - -## Testing Checklist - -### Infrastructure Tests - -**Metrics Endpoint:** -- [✓] `/metrics` endpoint accessible -- [✓] Prometheus format valid -- [✓] Uptime metric updates (verified: 140 seconds) -- [✓] Active sessions metric (0) -- [✓] All metric types present (counter, gauge, histogram) - -**Server Stability:** -- [✓] Server starts successfully -- [✓] Process running (PID 3844401) -- [✓] Health endpoint responds -- [✓] Security headers preserved - -**Scripts:** -- [✓] All scripts executable -- [✓] Infrastructure scripts ready for installation -- [✓] Backup scripts ready for testing (pending PostgreSQL fix) - ---- - -## Week 2 Progress Summary - -### Completed Tasks (11/11 - 100%) - -1. ✓ Systemd service configuration created -2. ✓ Prometheus metrics dependency added -3. ✓ Metrics module implemented (330 lines) -4. ✓ /metrics endpoint added to server -5. ✓ Prometheus configuration created -6. ✓ Grafana dashboard created -7. ✓ Alert rules defined -8. ✓ PostgreSQL backup scripts created -9. ✓ Log rotation configured -10. ✓ Health monitoring script created -11. ✓ Infrastructure deployed and tested - -### Ready for Installation (Not Yet Installed) - -**Systemd Service:** -- Service file created ✓ -- Installation script ready ✓ -- Awaiting: `sudo ./setup-systemd.sh` - -**Prometheus/Grafana:** -- Configuration files ready ✓ -- Dashboard JSON ready ✓ -- Installation script ready ✓ -- Awaiting: `sudo ./setup-monitoring.sh` - -**Automated Backups:** -- Backup scripts ready ✓ -- Systemd timer ready ✓ -- Awaiting: Timer installation + PostgreSQL credentials fix - -**Log Rotation:** -- Logrotate config ready ✓ -- Awaiting: Copy to /etc/logrotate.d/ - ---- - -## Next Steps - -### Immediate (Requires Sudo Access) - -1. **Install Systemd Service:** - ```bash - cd ~/guru-connect/server - sudo ./setup-systemd.sh - ``` - -2. **Install Monitoring:** - ```bash - cd ~/guru-connect/infrastructure - sudo ./setup-monitoring.sh - ``` - -3. **Configure Automated Backups:** - ```bash - sudo cp ~/guru-connect/server/guruconnect-backup.* /etc/systemd/system/ - sudo systemctl daemon-reload - sudo systemctl enable guruconnect-backup.timer - sudo systemctl start guruconnect-backup.timer - ``` - -4. **Install Log Rotation:** - ```bash - sudo cp ~/guru-connect/server/guruconnect.logrotate /etc/logrotate.d/guruconnect - ``` - -### Optional Testing - -1. **Test Manual Backup:** (Requires PostgreSQL credentials fix) - ```bash - cd ~/guru-connect/server - ./backup-postgres.sh - ``` - -2. **Test Health Monitor:** - ```bash - cd ~/guru-connect/server - ./health-monitor.sh - ``` - -3. **Configure Cron for Health Checks:** (If not using Prometheus alerting) - ```bash - crontab -e - # Add: */5 * * * * /home/guru/guru-connect/server/health-monitor.sh - ``` - -### Phase 1 Week 3 (Next) - -Continue with CI/CD automation: -- Gitea CI pipeline configuration -- Automated builds on commit -- Automated tests in CI -- Deployment automation scripts -- Build artifact storage -- Version tagging automation - ---- - -## Known Issues - -### 1. PostgreSQL Credentials - -**Issue:** Database password authentication still failing -**Impact:** Cannot test backup/restore end-to-end -**Status:** Known blocker from Week 1 -**Workaround:** Server runs in memory-only mode - -**Note:** Backup scripts are ready and will work once credentials are fixed. - -### 2. Systemd Installation - -**Requirement:** Sudo access needed for systemd service installation -**Status:** Scripts ready, awaiting installation -**Workaround:** Server runs via `nohup` currently - ---- - -## Infrastructure Summary - -### Week 2 Deliverables - -**Production Infrastructure:** ✓ COMPLETE -- Prometheus metrics system -- Systemd service configuration -- Monitoring configuration (Prometheus + Grafana) -- Automated backup system -- Health monitoring tools -- Log rotation configuration - -**Code Quality:** ✓ PRODUCTION-READY -- Clean compilation (53 warnings, 0 errors) -- All metrics working -- Security headers preserved -- No performance degradation - -**Documentation:** ✓ COMPREHENSIVE -- PHASE1_WEEK2_INFRASTRUCTURE.md - Complete planning -- DEPLOYMENT_WEEK2_INFRASTRUCTURE.md - This document -- Inline documentation in all scripts -- Installation instructions for each component - -### Production Readiness Status - -**Metric:** READY ✓ -**Systemd:** READY (pending sudo installation) ✓ -**Monitoring:** READY (pending sudo installation) ✓ -**Backups:** READY (pending PostgreSQL + sudo) ✓ -**Health Checks:** READY ✓ -**Security:** PRESERVED ✓ - -**Overall Phase 1 Week 2:** SUCCESSFULLY COMPLETED ✓ - ---- - -## Performance Impact - -**Build Time:** 18.60 seconds (acceptable) -**Binary Size:** ~3.7 MB (unchanged) -**Memory Usage:** Minimal increase (<1% due to metrics) -**Latency Impact:** <1ms per request (metrics are lock-free) -**Uptime:** Server stable, no crashes - ---- - -## Conclusion - -**Phase 1 Week 2 Infrastructure Objectives: ACHIEVED ✓** - -Successfully implemented comprehensive production infrastructure for GuruConnect: -- Prometheus metrics collecting real-time performance data -- Systemd service ready for production deployment -- Monitoring tools configured (Prometheus + Grafana) -- Automated backup system ready -- Health monitoring and log rotation configured - -**Server Status:** -- ONLINE and STABLE ✓ -- Metrics operational ✓ -- Security preserved ✓ -- Week 1 fixes intact ✓ - -**Ready for:** -- Production systemd service installation -- Prometheus/Grafana deployment -- Automated backup activation -- Phase 1 Week 3 (CI/CD automation) - ---- - -**Deployment Completed:** 2026-01-18 03:35 UTC -**Server PID:** 3844401 -**Build Time:** 18.60s -**Infrastructure Progress:** Week 2 100% Complete ✓ -**Security Score:** 10/13 items (77%) ✓ -**Production Ready:** YES ✓ diff --git a/projects/msp-tools/guru-connect/GAP_ANALYSIS.md b/projects/msp-tools/guru-connect/GAP_ANALYSIS.md deleted file mode 100644 index a13ff8b..0000000 --- a/projects/msp-tools/guru-connect/GAP_ANALYSIS.md +++ /dev/null @@ -1,600 +0,0 @@ -# GuruConnect Requirements Gap Analysis - -**Analysis Date:** 2026-01-17 -**Project:** GuruConnect Remote Desktop Solution -**Current Phase:** Infrastructure Complete, Feature Implementation ~30% - ---- - -## Executive Summary - -GuruConnect has **solid infrastructure** (WebSocket relay, protobuf protocol, database, authentication) but is **missing critical user-facing features** needed for launch. The project is approximately **30-35% complete** toward Minimum Viable Product (MVP). - -**Key Findings:** -- Infrastructure: 90% complete -- Core features (screen sharing, input): 50% complete -- Critical MSP features (clipboard, file transfer, CMD/PowerShell): 0% complete -- End-user portal: 0% complete (LAUNCH BLOCKER) -- Dashboard UI: 40% complete -- Installer builder: 0% complete (MSP DEPLOYMENT BLOCKER) - -**Estimated time to MVP:** 8-12 weeks with focused development - ---- - -## 1. Feature Implementation Matrix - -### Legend -- **Status:** Complete, Partial, Missing, Not Started -- **Priority:** Critical (MVP blocker), High (needed for launch), Medium (competitive feature), Low (nice to have) -- **Effort:** Quick Win (< 1 week), Medium (1-2 weeks), Hard (2-4 weeks), Very Hard (4+ weeks) - -| Feature Category | Requirement | Status | Priority | Effort | Notes | -|-----------------|-------------|--------|----------|--------|-------| -| **Infrastructure** | -| WebSocket relay server | Relay agent/viewer frames | Complete | Critical | - | Working | -| Protobuf protocol | Complete message definitions | Complete | Critical | - | Comprehensive | -| Agent WebSocket client | Connect to server | Complete | Critical | - | Working | -| JWT authentication | Dashboard login | Complete | Critical | - | Working | -| Database persistence | Machines, sessions, events | Complete | Critical | - | PostgreSQL with migrations | -| Session management | Track active sessions | Complete | Critical | - | Working | -| **Support Sessions (One-Time)** | -| Support code generation | 6-digit codes | Complete | Critical | - | API works | -| Code validation | Validate code, return session | Complete | Critical | - | API works | -| Code status tracking | pending/connected/completed | Complete | Critical | - | Database tracked | -| Link codes to sessions | Code -> agent connection | Partial | Critical | Quick Win | Marked [~] in TODO | -| **End-User Portal** | | | | | -| Support code entry page | Web form for code entry | Missing | Critical | Medium | LAUNCH BLOCKER - no portal exists | -| Custom protocol handler | guruconnect:// launch | Missing | Critical | Medium | Protocol handler registration unclear | -| Auto-download agent | Fallback if protocol fails | Missing | Critical | Hard | One-time EXE download | -| Browser-specific instructions | Chrome/Firefox/Edge guidance | Missing | High | Quick Win | Simple HTML/JS | -| Support code in download URL | Embed code in downloaded agent | Missing | High | Quick Win | Server-side generation | -| **Screen Viewing** | -| DXGI screen capture | Hardware-accelerated capture | Complete | Critical | - | Working | -| GDI fallback capture | Software capture | Complete | Critical | - | Working | -| Web canvas viewer | Browser-based viewer | Partial | Critical | Medium | Basic component exists, needs integration | -| Frame compression | Zstd compression | Complete | High | - | In protocol | -| Frame relay | Server relays frames | Complete | Critical | - | Working | -| Multi-monitor enumeration | Detect all displays | Partial | High | Quick Win | enumerate_displays() exists | -| Multi-monitor switching | Switch between displays | Missing | High | Medium | UI + protocol wiring | -| Dirty rectangle optimization | Only send changed regions | Missing | Medium | Medium | In protocol, not implemented | -| **Remote Control** | -| Mouse event capture (viewer) | Capture mouse in browser | Partial | Critical | Quick Win | Component exists, integration unclear | -| Mouse event relay | Viewer -> server -> agent | Partial | Critical | Quick Win | Likely just wiring | -| Mouse injection (agent) | Send mouse to OS | Complete | Critical | - | Working | -| Keyboard event capture (viewer) | Capture keys in browser | Partial | Critical | Quick Win | Component exists | -| Keyboard event relay | Viewer -> server -> agent | Partial | Critical | Quick Win | Likely just wiring | -| Keyboard injection (agent) | Send keys to OS | Complete | Critical | - | Working | -| Ctrl-Alt-Del (SAS) | Secure attention sequence | Complete | High | - | send_sas() exists | -| **Clipboard Integration** | -| Text clipboard sync | Bidirectional text | Missing | High | Medium | CRITICAL - protocol exists, no implementation | -| HTML/RTF clipboard | Rich text formats | Missing | Medium | Medium | Protocol exists | -| Image clipboard | Bitmap sync | Missing | Medium | Hard | Protocol exists | -| File clipboard | Copy/paste files | Missing | High | Hard | Protocol exists | -| Keystroke injection | Paste as keystrokes (BIOS/login) | Missing | High | Medium | Howard priority feature | -| **File Transfer** | -| File browse remote | Directory listing | Missing | High | Medium | CRITICAL - no implementation | -| Download from remote | Pull files | Missing | High | Medium | High value, relatively easy | -| Upload to remote | Push files | Missing | High | Hard | More complex (chunking) | -| Drag-and-drop support | Browser drag-drop | Missing | Medium | Hard | Nice UX but complex | -| Transfer progress | Progress bar/queue | Missing | Medium | Medium | After basic transfer works | -| **Backstage Tools** | -| Device information | OS, hostname, IP, etc. | Partial | High | Quick Win | AgentStatus exists, UI needed | -| Remote PowerShell | Execute with output stream | Missing | Critical | Medium | HOWARD'S #1 REQUEST | -| Remote CMD | Command prompt execution | Missing | Critical | Medium | Similar to PowerShell | -| PowerShell timeout controls | UI for timeout config | Missing | High | Quick Win | Howard wants checkboxes vs typing | -| Process list viewer | Show running processes | Missing | High | Medium | Windows API + UI | -| Kill process | Terminate selected process | Missing | Medium | Quick Win | After process list | -| Services list | Show Windows services | Missing | Medium | Medium | Similar to processes | -| Start/stop services | Control services | Missing | Medium | Quick Win | After service list | -| Event log viewer | View Windows event logs | Missing | Low | Hard | Complex parsing | -| Registry browser | Browse/edit registry | Missing | Low | Very Hard | Security risk, defer | -| Installed software list | Programs list | Missing | Medium | Medium | Registry or WMI query | -| System info panel | CPU, RAM, disk, uptime | Partial | Medium | Quick Win | Some data in AgentStatus | -| **Chat/Messaging** | -| Tech -> client chat | Send messages | Partial | High | Medium | Protocol + ChatController exist | -| Client -> tech chat | Receive messages | Partial | High | Medium | Same as above | -| Dashboard chat UI | Chat panel in viewer | Missing | High | Medium | Need UI component | -| Chat history | Persist/display history | Missing | Medium | Quick Win | After basic chat works | -| End-user tray "Request Support" | User initiates contact | Missing | Medium | Medium | Tray icon exists, need integration | -| Support request queue | Dashboard shows requests | Missing | Medium | Medium | After tray request | -| **Dashboard UI** | -| Technician login page | Authentication | Complete | Critical | - | Working | -| Support tab - session list | Show active temp sessions | Partial | Critical | Medium | Code gen exists, need full UI | -| Support tab - session detail | Detail panel with tabs | Missing | Critical | Medium | Essential for usability | -| Access tab - machine list | Show persistent agents | Partial | High | Medium | Basic list exists | -| Access tab - machine detail | Detail panel with info | Missing | High | Medium | Essential for usability | -| Access tab - grouping sidebar | By company/site/tag/OS | Missing | High | Medium | MSP workflow essential | -| Access tab - smart groups | Online, offline 30d, etc. | Missing | Medium | Medium | Helpful but not critical | -| Access tab - search/filter | Find machines | Missing | High | Medium | Essential with many machines | -| Build tab - installer builder | Custom agent builds | Missing | Critical | Very Hard | MSP DEPLOYMENT BLOCKER | -| Settings tab | Preferences, appearance | Missing | Low | Medium | Defer to post-launch | -| Real-time status updates | WebSocket dashboard updates | Partial | High | Medium | Infrastructure exists | -| Screenshot thumbnails | Preview before joining | Missing | Medium | Medium | Nice UX feature | -| Join session button | Connect to active session | Missing | Critical | Quick Win | Should be straightforward | -| **Unattended Agents** | -| Persistent agent mode | Always-on background mode | Complete | Critical | - | Working | -| Windows service install | Run as service | Partial | Critical | Medium | install.rs exists, unclear if complete | -| Config persistence | Save agent_id, server URL | Complete | Critical | - | Working | -| Machine registration | Register with server | Complete | Critical | - | Working | -| Heartbeat reporting | Periodic status updates | Complete | Critical | - | AgentStatus messages | -| Auto-reconnect | Reconnect on network change | Partial | Critical | Quick Win | WebSocket likely handles this | -| Agent metadata | Company, site, tags, etc. | Complete | High | - | In config and protocol | -| Custom properties | Extensible metadata | Partial | Medium | Quick Win | In protocol, UI needed | -| **Installer Builder** | -| Custom metadata fields | Company, site, dept, tag | Missing | Critical | Hard | MSP workflow requirement | -| EXE download | Download custom installer | Missing | Critical | Very Hard | Need build pipeline | -| MSI packaging | GPO deployment support | Missing | High | Very Hard | Howard wants 64-bit MSI | -| Silent install | /qn support | Missing | High | Medium | After MSI works | -| URL copy/send link | Share installer link | Missing | Medium | Quick Win | After builder exists | -| Server-built installers | On-demand generation | Missing | Critical | Very Hard | Architecture question | -| Reconfigure installed agent | --reconfigure flag | Missing | Low | Medium | Useful but defer | -| **Auto-Update** | -| Update check | Agent checks for updates | Partial | High | Medium | update.rs exists | -| Download update | Fetch new binary | Partial | High | Medium | Unclear if complete | -| Verify checksum | SHA-256 validation | Partial | High | Quick Win | Protocol has field | -| Install update | Replace binary | Missing | High | Hard | Tricky on Windows (file locks) | -| Rollback on failure | Revert to previous version | Missing | Medium | Hard | Safety feature | -| Version reporting | Agent version to server | Complete | High | - | build_info module | -| Mandatory updates | Force update immediately | Missing | Low | Quick Win | After update works | -| **Security & Compliance** | -| JWT authentication | Dashboard login | Complete | Critical | - | Working | -| Argon2 password hashing | Secure password storage | Complete | Critical | - | Working | -| User management API | CRUD users | Complete | High | - | Working | -| Session audit logging | Who, when, what, duration | Complete | High | - | events table | -| MFA/2FA support | TOTP authenticator | Missing | High | Hard | Common security requirement | -| Role-based permissions | Tech, senior, admin roles | Partial | Medium | Medium | Schema exists, enforcement unclear | -| Per-client permissions | Restrict tech to clients | Missing | Medium | Medium | MSP multi-tenant need | -| Session recording | Video playback | Missing | Low | Very Hard | Compliance feature, defer | -| Command audit log | Log all commands run | Partial | Medium | Quick Win | events table exists | -| File transfer audit | Log file transfers | Missing | Medium | Quick Win | After file transfer works | -| **Agent Special Features** | -| Protocol handler registration | guruconnect:// URLs | Partial | High | Medium | install.rs, unclear if working | -| Tray icon | System tray presence | Partial | Medium | Medium | tray.rs exists | -| Tray menu | Status, exit, request support | Missing | Medium | Medium | After tray works | -| Safe mode reboot | Reboot to safe mode + networking | Missing | Medium | Hard | Malware removal feature | -| Emergency reboot | Force immediate reboot | Missing | Low | Medium | Useful but not critical | -| Wake-on-LAN | Wake offline machines | Missing | Low | Hard | Needs local relay agent | -| Self-delete (support mode) | Cleanup after one-time session | Missing | High | Medium | One-time agent requirement | -| Run without admin | User-space support sessions | Partial | Critical | Quick Win | Should work, needs testing | -| Optional elevation | Admin access when needed | Missing | High | Medium | UAC prompt + elevated mode | -| **Session Management** | -| Transfer session | Hand off to another tech | Missing | Medium | Hard | Useful collaboration feature | -| Pause/resume session | Temporary pause | Missing | Low | Medium | Nice to have | -| Session notes | Per-session documentation | Missing | Medium | Medium | Good MSP practice | -| Timeline view | Connection history | Partial | Medium | Medium | Database exists, UI needed | -| Session tags | Categorize sessions | Missing | Low | Quick Win | After basic session mgmt | -| **Integration** | -| GuruRMM integration | Shared auth, launch from RMM | Missing | Low | Hard | Future phase | -| PSA integration | HaloPSA, Autotask, CW | Missing | Low | Very Hard | Future phase | -| Standalone mode | Works without RMM | Complete | Critical | - | Current state | - ---- - -## 2. MVP Feature Set Recommendation - -To ship a **Minimum Viable Product** that MSPs can actually use, the following features are ESSENTIAL: - -### ABSOLUTE MVP (cannot function without these) -1. End-user portal with support code entry -2. Auto-download one-time agent executable -3. Browser-based screen viewing (working) -4. Mouse and keyboard control (working) -5. Dashboard with session list and join capability - -**Current Status:** Items 3-4 mostly done, items 1-2-5 are blockers - -### CRITICAL MVP (needed for real MSP work) -6. Text clipboard sync (bidirectional) -7. File download from remote machine -8. Remote PowerShell/CMD execution with output streaming -9. Persistent agent installer (Windows service) -10. Multi-session handling (tech manages multiple sessions) - -**Current Status:** Item 9 partially done, items 6-8-10 missing - -### HIGH PRIORITY MVP (competitive parity) -11. Chat between tech and end user -12. Process viewer with kill capability -13. System information display -14. Installer builder with custom metadata -15. Dashboard machine grouping (by company/site) - -**Current Status:** All missing except partial system info - -### RECOMMENDED MVP SCOPE -Include: Items 1-14 (defer item 15 to post-launch) -Defer: MSI packaging, advanced backstage tools, session recording, mobile support -**Estimated Time:** 8-10 weeks with focused development - ---- - -## 3. Critical Gaps That Block Launch - -### LAUNCH BLOCKERS (ship-stoppers) - -| Gap | Impact | Why Critical | Effort | -|-----|--------|-------------|--------| -| **No end-user portal** | Cannot ship | End users have no way to initiate support sessions. Support codes are useless without a portal to enter them. | Medium (2 weeks) | -| **No one-time agent download** | Cannot ship | The entire attended support model depends on downloading a temporary agent. Without this, only persistent agents work. | Hard (3-4 weeks) | -| **Input relay incomplete** | Barely functional | If mouse/keyboard doesn't work reliably, it's not remote control - it's just screen viewing. | Quick Win (1 week) | -| **No dashboard session list UI** | Cannot ship | Technicians can't see or join sessions. The API exists but there's no UI to use it. | Medium (2 weeks) | - -**Total to unblock launch:** 8-9 weeks - -### USABILITY BLOCKERS (can ship but product is barely functional) - -| Gap | Impact | Why Critical | Effort | -|-----|--------|-------------|--------| -| **No clipboard sync** | Poor UX | Industry standard feature. MSPs expect to copy/paste credentials, commands, URLs between local and remote. Howard emphasized this. | Medium (2 weeks) | -| **No file transfer** | Limited utility | Essential for support work - uploading fixes, downloading logs, transferring files. Every competitor has this. | Medium (2-3 weeks) | -| **No remote CMD/PowerShell** | Deal breaker for MSPs | Howard's #1 feature request. Windows admin work requires running commands remotely. ScreenConnect has this, we must have it. | Medium (2 weeks) | -| **No installer builder** | Deployment blocker | Can't easily deploy to client machines. Manual agent setup doesn't scale. MSPs need custom installers with company/site metadata baked in. | Very Hard (4+ weeks) | - -**Total to be competitive:** Additional 10-13 weeks - ---- - -## 4. Quick Wins (High Value, Low Effort) - -These features provide significant value with minimal implementation effort: - -| Feature | Value | Effort | Rationale | -|---------|-------|--------|-----------| -| **Complete input relay** | Critical | 1 week | Server already relays messages. Just connect viewer input capture to WebSocket properly. | -| **Text clipboard sync** | High | 2 weeks | Protocol defined. Implement Windows clipboard API on agent, JS clipboard API in viewer. Start with text only. | -| **System info display** | Medium | 1 week | AgentStatus already collects hostname, OS, uptime. Just display it in dashboard detail panel. | -| **Basic file download** | High | 1-2 weeks | Simpler than bidirectional. Agent reads file, streams chunks, viewer saves. High MSP value. | -| **Session detail panel** | High | 1 week | Data exists (session info, machine info). Create UI component with tabs (Info, Screen, Chat, etc.). | -| **Support code in download URL** | Medium | 1 week | Server embeds code in downloaded agent filename or metadata. Agent reads it on startup. | -| **Join session button** | Critical | 3 days | Straightforward: button clicks -> JWT auth -> WebSocket connect -> viewer loads. | -| **PowerShell timeout controls** | High | 3 days | Howard specifically requested checkboxes/textboxes instead of typing timeout flags every time. | -| **Process list viewer** | Medium | 1 week | Windows API call to enumerate processes. Display in dashboard. Foundation for kill process. | -| **Chat UI integration** | Medium | 1-2 weeks | ChatController exists on agent. Protocol defined. Just create dashboard UI component and wire it up. | - -**Total quick wins time:** 8-10 weeks (if done in parallel: 4-5 weeks) - ---- - -## 5. Feature Prioritization Roadmap - -### PHASE A: Make It Work (6-8 weeks) -**Goal:** Basic functional product for attended support - -| Priority | Feature | Status | Effort | -|----------|---------|--------|--------| -| 1 | End-user portal (support code entry) | Missing | 2 weeks | -| 2 | One-time agent download | Missing | 3-4 weeks | -| 3 | Complete input relay (mouse/keyboard) | Partial | 1 week | -| 4 | Dashboard session list UI | Partial | 2 weeks | -| 5 | Session detail panel with tabs | Missing | 1 week | -| 6 | Join session functionality | Missing | 3 days | - -**Deliverable:** MSP can generate support code, end user can connect, tech can view screen and control remotely. - -### PHASE B: Make It Useful (6-8 weeks) -**Goal:** Competitive for real support work - -| Priority | Feature | Status | Effort | -|----------|---------|--------|--------| -| 7 | Text clipboard sync (bidirectional) | Missing | 2 weeks | -| 8 | Remote PowerShell execution | Missing | 2 weeks | -| 9 | PowerShell timeout controls | Missing | 3 days | -| 10 | Basic file download | Missing | 1-2 weeks | -| 11 | Process list viewer | Missing | 1 week | -| 12 | System information display | Partial | 1 week | -| 13 | Chat UI in dashboard | Missing | 1-2 weeks | -| 14 | Multi-monitor support | Missing | 2 weeks | - -**Deliverable:** Full-featured support tool competitive with ScreenConnect for attended sessions. - -### PHASE C: Make It Production (8-10 weeks) -**Goal:** Complete MSP solution with deployment tools - -| Priority | Feature | Status | Effort | -|----------|---------|--------|--------| -| 15 | Persistent agent Windows service | Partial | 2 weeks | -| 16 | Installer builder (custom EXE) | Missing | 4 weeks | -| 17 | Dashboard machine grouping | Missing | 2 weeks | -| 18 | Search and filtering | Missing | 2 weeks | -| 19 | File upload capability | Missing | 2 weeks | -| 20 | Rich clipboard (HTML, RTF, images) | Missing | 2 weeks | -| 21 | Services list viewer | Missing | 1 week | -| 22 | Command audit logging | Partial | 1 week | - -**Deliverable:** Full MSP remote access solution with deployment automation. - -### PHASE D: Polish & Advanced Features (ongoing) -**Goal:** Feature parity with ScreenConnect, competitive advantages - -| Priority | Feature | Status | Effort | -|----------|---------|--------|--------| -| 23 | MSI packaging (64-bit) | Missing | 3-4 weeks | -| 24 | MFA/2FA support | Missing | 2 weeks | -| 25 | Role-based permissions enforcement | Partial | 2 weeks | -| 26 | Session recording | Missing | 4+ weeks | -| 27 | Safe mode reboot | Missing | 2 weeks | -| 28 | Event log viewer | Missing | 3 weeks | -| 29 | Auto-update complete | Partial | 3 weeks | -| 30 | Mobile viewer | Missing | 8+ weeks | - -**Deliverable:** Enterprise-grade solution with advanced features. - ---- - -## 6. Requirement Quality Assessment - -### CLEAR AND TESTABLE -- Most requirements are well-defined with specific capabilities -- Mock-ups provided for dashboard design (helpful) -- Howard's feedback is concrete (PowerShell timeouts, 64-bit client) -- Protocol definitions are precise - -### CONFLICTS OR AMBIGUITIES -- **None identified** - requirements are internally consistent -- Design mockups match written requirements - -### UNREALISTIC REQUIREMENTS -- **None found** - all features exist in ScreenConnect and are technically feasible -- MSI packaging is complex but standard industry practice -- Safe mode reboot is possible via Windows APIs -- WoL requires network relay but requirement acknowledges this - -### MISSING REQUIREMENTS - -| Area | What's Missing | Impact | Recommendation | -|------|---------------|--------|----------------| -| **Performance** | Vague targets ("30+ FPS on LAN") | Can't validate if met | Define minimum acceptable: "15+ FPS WAN, 30+ FPS LAN, <200ms input latency" | -| **Bandwidth** | No network requirements | Can't test WAN scenarios | Specify: "Must work on 1 Mbps WAN, graceful degradation on slower" | -| **Scalability** | "50+ concurrent agents" is vague | Don't know when to scale | Define: "Single server: 100 agents, 25 concurrent sessions. Cluster: 1000+ agents" | -| **Disaster Recovery** | No backup/restore mentioned | Production risk | Add: "Database backup, config export/import, agent re-registration" | -| **Migration** | No ScreenConnect import | Friction for new customers | Add: "Import ScreenConnect sessions, export contact lists" | -| **Mobile** | Mentioned but not detailed | Scope unclear | Either detail requirements or defer to Phase 2 entirely | -| **API** | Limited to PSA integration | Third-party extensibility | Add: "REST API for session control, webhook events" | -| **Monitoring** | No health checks, metrics | Operational blindness | Add: "Prometheus metrics, health endpoints, alerting" | -| **Internationalization** | English only assumed | Global MSPs excluded | Consider: "i18n support for dashboard" or explicitly English-only | -| **Accessibility** | No WCAG compliance | ADA compliance risk | Add: "WCAG 2.1 AA compliance" or acknowledge limitation | - -### RECOMMENDATIONS FOR REQUIREMENTS - -1. **Add Performance Acceptance Criteria** - - Minimum FPS: 15 FPS WAN, 30 FPS LAN - - Maximum latency: 200ms input delay on WAN - - Bandwidth: Functional on 1 Mbps, optimal on 5+ Mbps - - Scalability: 100 agents / 25 concurrent sessions per server - -2. **Create ScreenConnect Feature Parity Checklist** - - List all ScreenConnect features - - Mark must-have vs nice-to-have - - Use as validation for "done" - -3. **Detail or Defer Mobile Requirements** - - Either: Full mobile spec (iOS/Android apps) - - Or: Explicitly defer to Phase 2, focus on web - -4. **Add Operational Requirements** - - Monitoring and alerting - - Backup and restore procedures - - Multi-server deployment architecture - - Load balancing strategy - -5. **Specify Migration/Import Tools** - - ScreenConnect session import (if possible) - - Bulk agent deployment strategies - - Configuration migration scripts - ---- - -## 7. Implementation Status Summary - -### By Category (% Complete) - -| Category | Complete | Partial | Missing | Overall % | -|----------|----------|---------|---------|-----------| -| Infrastructure | 10 | 0 | 0 | 100% | -| Support Sessions | 4 | 1 | 2 | 70% | -| End-User Portal | 0 | 0 | 5 | 0% | -| Screen Viewing | 5 | 2 | 2 | 65% | -| Remote Control | 3 | 3 | 1 | 60% | -| Clipboard | 0 | 0 | 5 | 0% | -| File Transfer | 0 | 0 | 5 | 0% | -| Backstage Tools | 0 | 2 | 10 | 10% | -| Chat/Messaging | 0 | 2 | 4 | 20% | -| Dashboard UI | 2 | 3 | 10 | 25% | -| Unattended Agents | 5 | 3 | 1 | 70% | -| Installer Builder | 0 | 0 | 7 | 0% | -| Auto-Update | 2 | 3 | 3 | 40% | -| Security | 4 | 2 | 4 | 50% | -| Agent Features | 0 | 3 | 6 | 20% | -| Session Management | 0 | 1 | 4 | 10% | - -**Overall Project Completion: 32%** - -### What Works Today -- Persistent agent connects to server -- JWT authentication for dashboard -- Support code generation and validation -- Screen capture (DXGI + GDI fallback) -- Basic WebSocket relay -- Database persistence -- User management -- Machine registration - -### What Doesn't Work Today -- End users can't initiate sessions (no portal) -- Input control not fully wired -- No clipboard sync -- No file transfer -- No backstage tools -- No installer builder -- Dashboard is very basic -- Chat not integrated - -### What Needs Completion -- Wire up existing components (input, chat, system info) -- Build missing UI (portal, dashboard panels) -- Implement protocol features (clipboard, file transfer) -- Create new features (backstage tools, installer builder) - ---- - -## 8. Risk Assessment - -### HIGH RISK (likely to cause delays) - -| Risk | Probability | Impact | Mitigation | -|------|------------|--------|------------| -| One-time agent download complexity | High | Critical | Start early, may need to simplify (just run without install) | -| Installer builder scope creep | High | High | Define MVP: EXE only, defer MSI to Phase 2 | -| Input relay timing issues | Medium | Critical | Thorough testing on various networks | -| Clipboard compatibility issues | Medium | High | Start with text-only, add formats incrementally | - -### MEDIUM RISK (manageable) - -| Risk | Probability | Impact | Mitigation | -|------|------------|--------|------------| -| Multi-monitor switching complexity | Medium | Medium | Good protocol support, mainly UI work | -| File transfer chunking/resume | Medium | Medium | Simple implementation first, optimize later | -| PowerShell output streaming | Medium | High | Use existing .NET libraries, test thoroughly | -| Dashboard real-time updates | Low | High | WebSocket infrastructure exists | - -### LOW RISK (minor concerns) - -| Risk | Probability | Impact | Mitigation | -|------|------------|--------|------------| -| MSI packaging learning curve | Low | Medium | Defer to Phase D, use WiX | -| Safe mode reboot compatibility | Low | Low | Windows API well-documented | -| Cross-browser compatibility | Low | Medium | Modern browsers similar, test all | - ---- - -## 9. Recommendations - -### IMMEDIATE ACTIONS (Week 1-2) - -1. **Create End-User Portal** (static HTML/JS) - - Support code entry form - - Validation via API - - Download link generation - - Browser detection for instructions - -2. **Complete Input Relay Chain** - - Verify viewer captures mouse/keyboard - - Ensure server relays to agent - - Test end-to-end on LAN and WAN - -3. **Build Dashboard Session List UI** - - Display active sessions from API - - Real-time updates via WebSocket - - Join button that launches viewer - -### SHORT TERM (Week 3-8) - -4. **One-Time Agent Download** - - Simplify: agent runs without install - - Embed support code in download URL - - Test on Windows 10/11 without admin - -5. **Text Clipboard Sync** - - Windows clipboard API on agent - - JavaScript clipboard API in viewer - - Bidirectional sync on change - -6. **Remote PowerShell** - - Execute process, capture stdout/stderr - - Stream output to dashboard - - UI with timeout controls (checkboxes) - -7. **File Download** - - Agent reads file, chunks it - - Stream via WebSocket - - Viewer saves to local disk - -### MEDIUM TERM (Week 9-16) - -8. **Persistent Agent Service Mode** - - Complete Windows service installation - - Auto-start on boot - - Test on Server 2016/2019/2022 - -9. **Dashboard Enhancements** - - Machine grouping by company/site - - Search and filtering - - Session detail panels with tabs - -10. **Installer Builder MVP** - - Generate custom EXE with metadata - - Server-side build pipeline - - Download from dashboard - -### LONG TERM (Week 17+) - -11. **MSI Packaging** - - WiX toolset integration - - 64-bit support (Howard requirement) - - Silent install for GPO - -12. **Advanced Features** - - Session recording - - MFA/2FA - - Mobile viewer - - PSA integrations - -### PROCESS IMPROVEMENTS - -13. **Add Performance Testing** - - Define FPS benchmarks - - Latency measurement - - Bandwidth profiling - -14. **Create Test Plan** - - End-to-end scenarios - - Cross-browser testing - - Network simulation (WAN throttling) - -15. **Update Requirements Document** - - Add missing operational requirements - - Define performance targets - - Create ScreenConnect parity checklist - ---- - -## 10. Conclusion - -GuruConnect has **excellent technical foundations** but needs **significant feature development** to reach MVP. The infrastructure (server, protocol, database, auth) is production-ready, but user-facing features are 30-35% complete. - -### Path to Launch - -**Conservative Estimate:** 20-24 weeks to production-ready -**Aggressive Estimate:** 12-16 weeks with focused development -**Recommended Approach:** 3-phase delivery - -1. **Phase A (6-8 weeks):** Basic functional product - attended support only -2. **Phase B (6-8 weeks):** Competitive features - clipboard, file transfer, PowerShell -3. **Phase C (8-10 weeks):** Full MSP solution - installer builder, grouping, polish - -### Key Success Factors - -1. **Prioritize ruthlessly** - Defer nice-to-haves (MSI, session recording, mobile) -2. **Leverage existing code** - Chat, system info, auth already partially done -3. **Start with simple implementations** - Text-only clipboard, download-only files -4. **Focus on Howard's priorities** - PowerShell/CMD, 64-bit client, clipboard -5. **Test early and often** - Input latency, cross-browser, WAN performance - -### Critical Path Items - -The following items are on the critical path and cannot be parallelized: - -1. End-user portal (blocks testing) -2. One-time agent download (blocks end-user usage) -3. Input relay completion (blocks remote control validation) -4. Dashboard session UI (blocks technician workflow) - -Everything else can be developed in parallel by separate developers. - -**Bottom Line:** The project is viable and well-architected, but needs 3-6 months of focused feature development to compete with ScreenConnect. Howard's team should plan accordingly. - ---- - -**Generated:** 2026-01-17 -**Next Review:** After Phase A completion diff --git a/projects/msp-tools/guru-connect/INFRASTRUCTURE_STATUS.md b/projects/msp-tools/guru-connect/INFRASTRUCTURE_STATUS.md deleted file mode 100644 index 8da6707..0000000 --- a/projects/msp-tools/guru-connect/INFRASTRUCTURE_STATUS.md +++ /dev/null @@ -1,336 +0,0 @@ -# GuruConnect Production Infrastructure Status - -**Date:** 2026-01-18 15:36 UTC -**Server:** 172.16.3.30 (gururmm) -**Installation Status:** IN PROGRESS - ---- - -## Completed Components - -### 1. Systemd Service - ACTIVE ✓ - -**Status:** Running -**PID:** 3944724 -**Service:** guruconnect.service -**Auto-start:** Enabled - -```bash -sudo systemctl status guruconnect -sudo journalctl -u guruconnect -f -``` - -**Features:** -- Auto-restart on failure (10s delay, max 3 in 5 min) -- Resource limits: 65536 FDs, 4096 processes -- Security hardening enabled -- Journald logging integration -- Watchdog support (30s keepalive) - ---- - -### 2. Automated Backups - CONFIGURED ✓ - -**Status:** Active (waiting) -**Timer:** guruconnect-backup.timer -**Next Run:** Mon 2026-01-19 00:00:00 UTC (8h remaining) - -```bash -sudo systemctl status guruconnect-backup.timer -``` - -**Configuration:** -- Schedule: Daily at 2:00 AM UTC -- Location: `/home/guru/backups/guruconnect/` -- Format: `guruconnect-YYYY-MM-DD-HHMMSS.sql.gz` -- Retention: 30 daily, 4 weekly, 6 monthly -- Compression: Gzip - -**Manual Backup:** -```bash -cd ~/guru-connect/server -./backup-postgres.sh -``` - ---- - -### 3. Log Rotation - CONFIGURED ✓ - -**Status:** Configured -**File:** `/etc/logrotate.d/guruconnect` - -**Configuration:** -- Rotation: Daily -- Retention: 30 days -- Compression: Yes (delayed 1 day) -- Post-rotate: Reload guruconnect service - ---- - -### 4. Passwordless Sudo - CONFIGURED ✓ - -**Status:** Active -**File:** `/etc/sudoers.d/guru` - -The `guru` user can now run all commands with `sudo` without password prompts. - ---- - -## In Progress - -### 5. Prometheus & Grafana - INSTALLING ⏳ - -**Status:** Installing (in progress) -**Progress:** -- ✓ Prometheus packages downloaded and installed -- ✓ Prometheus Node Exporter installed -- ⏳ Grafana being installed (194 MB download complete, unpacking) - -**Expected Installation Time:** ~5-10 minutes remaining - -**Will be available at:** -- Prometheus: http://172.16.3.30:9090 -- Grafana: http://172.16.3.30:3000 (admin/admin) -- Node Exporter: http://172.16.3.30:9100/metrics - ---- - -## Server Status - -### GuruConnect Server - -**Health:** OK -**Metrics:** Operational -**Uptime:** 20 seconds (via systemd) - -```bash -# Health check -curl http://172.16.3.30:3002/health - -# Metrics -curl http://172.16.3.30:3002/metrics -``` - -### Database - -**Status:** Connected -**Users:** 2 -**Machines:** 15 (restored from database) -**Credentials:** Fixed (gc_a7f82d1e4b9c3f60) - -### Authentication - -**Admin User:** howard -**Password:** AdminGuruConnect2026 -**Dashboard:** https://connect.azcomputerguru.com/dashboard - -**JWT Token Example:** -``` -eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIwOThhNmEyNC05YmNiLTRmOWItODUyMS04ZmJiOTU5YzlmM2YiLCJ1c2VybmFtZSI6Imhvd2FyZCIsInJvbGUiOiJhZG1pbiIsInBlcm1pc3Npb25zIjpbInZpZXciLCJjb250cm9sIiwidHJhbnNmZXIiLCJtYW5hZ2VfY2xpZW50cyJdLCJleHAiOjE3Njg3OTUxNDYsImlhdCI6MTc2ODcwODc0Nn0.q2SFMDOWDH09kLj3y1MiVXFhIqunbHHp_-kjJP6othA -``` - ---- - -## Verification Commands - -```bash -# Run comprehensive verification -bash ~/guru-connect/verify-installation.sh - -# Check individual components -sudo systemctl status guruconnect -sudo systemctl status guruconnect-backup.timer -sudo systemctl status prometheus -sudo systemctl status grafana-server - -# Test endpoints -curl http://172.16.3.30:3002/health -curl http://172.16.3.30:3002/metrics -curl http://172.16.3.30:9090 # Prometheus (after install) -curl http://172.16.3.30:3000 # Grafana (after install) -``` - ---- - -## Next Steps - -### After Prometheus/Grafana Installation Completes - -1. **Access Grafana:** - - URL: http://172.16.3.30:3000 - - Login: admin/admin - - Change default password - -2. **Import Dashboard:** - ``` - Grafana > Dashboards > Import - Upload: ~/guru-connect/infrastructure/grafana-dashboard.json - ``` - -3. **Verify Prometheus Scraping:** - - URL: http://172.16.3.30:9090/targets - - Check GuruConnect target is UP - - Verify metrics being collected - -4. **Test Alerts:** - - URL: http://172.16.3.30:9090/alerts - - Review configured alert rules - - Consider configuring Alertmanager for notifications - ---- - -## Production Readiness Checklist - -- [x] Server running via systemd -- [x] Database connected and operational -- [x] Admin credentials configured -- [x] Automated backups configured -- [x] Log rotation configured -- [x] Passwordless sudo enabled -- [ ] Prometheus/Grafana installed (in progress) -- [ ] Grafana dashboard imported -- [ ] Grafana default password changed -- [ ] Firewall rules reviewed -- [ ] SSL/TLS certificates valid -- [ ] Monitoring alerts tested -- [ ] Backup restore tested -- [ ] Health monitoring cron configured (optional) - ---- - -## Infrastructure Files - -**On Server:** -``` -/home/guru/guru-connect/ -├── server/ -│ ├── guruconnect.service # Systemd service unit -│ ├── setup-systemd.sh # Service installer -│ ├── backup-postgres.sh # Backup script -│ ├── restore-postgres.sh # Restore script -│ ├── health-monitor.sh # Health checks -│ ├── guruconnect-backup.service # Backup service unit -│ ├── guruconnect-backup.timer # Backup timer -│ ├── guruconnect.logrotate # Log rotation config -│ └── start-secure.sh # Manual start script -├── infrastructure/ -│ ├── prometheus.yml # Prometheus config -│ ├── alerts.yml # Alert rules -│ ├── grafana-dashboard.json # Pre-built dashboard -│ └── setup-monitoring.sh # Monitoring installer -├── install-production-infrastructure.sh # Master installer -└── verify-installation.sh # Verification script -``` - -**Systemd Files:** -``` -/etc/systemd/system/ -├── guruconnect.service -├── guruconnect-backup.service -└── guruconnect-backup.timer -``` - -**Configuration Files:** -``` -/etc/prometheus/ -├── prometheus.yml -└── alerts.yml - -/etc/logrotate.d/ -└── guruconnect - -/etc/sudoers.d/ -└── guru -``` - ---- - -## Troubleshooting - -### Server Not Starting - -```bash -# Check logs -sudo journalctl -u guruconnect -n 50 - -# Check for port conflicts -sudo netstat -tulpn | grep 3002 - -# Verify binary -ls -la ~/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server - -# Check environment -cat ~/guru-connect/server/.env -``` - -### Database Connection Issues - -```bash -# Test connection -PGPASSWORD=gc_a7f82d1e4b9c3f60 psql -h localhost -U guruconnect -d guruconnect -c 'SELECT 1' - -# Check PostgreSQL -sudo systemctl status postgresql - -# Verify credentials -cat ~/guru-connect/server/.env | grep DATABASE_URL -``` - -### Backup Issues - -```bash -# Test backup manually -cd ~/guru-connect/server -./backup-postgres.sh - -# Check backup directory -ls -lh /home/guru/backups/guruconnect/ - -# View timer logs -sudo journalctl -u guruconnect-backup -n 50 -``` - ---- - -## Performance Metrics - -**Current Metrics (Prometheus):** -- Active Sessions: 0 -- Server Uptime: 20 seconds -- Database Connected: Yes -- Request Latency: <1ms -- Memory Usage: 1.6M -- CPU Usage: Minimal - -**10 Prometheus Metrics Collected:** -1. guruconnect_requests_total -2. guruconnect_request_duration_seconds -3. guruconnect_sessions_total -4. guruconnect_active_sessions -5. guruconnect_session_duration_seconds -6. guruconnect_connections_total -7. guruconnect_active_connections -8. guruconnect_errors_total -9. guruconnect_db_operations_total -10. guruconnect_db_query_duration_seconds - ---- - -## Security Status - -**Week 1 Security Fixes:** 10/13 (77%) -**Week 2 Infrastructure:** 100% Complete - -**Active Security Features:** -- JWT authentication with 24h expiration -- Argon2id password hashing -- Security headers (CSP, X-Frame-Options, etc.) -- Token blacklist for logout -- Database credentials encrypted in .env -- API key validation for agents -- IP logging for connections - ---- - -**Last Updated:** 2026-01-18 15:36 UTC -**Next Update:** After Prometheus/Grafana installation completes diff --git a/projects/msp-tools/guru-connect/INSTALLATION_GUIDE.md b/projects/msp-tools/guru-connect/INSTALLATION_GUIDE.md deleted file mode 100644 index e7f0ca0..0000000 --- a/projects/msp-tools/guru-connect/INSTALLATION_GUIDE.md +++ /dev/null @@ -1,518 +0,0 @@ -# GuruConnect Production Infrastructure Installation Guide - -**Date:** 2026-01-18 -**Server:** 172.16.3.30 -**Status:** Core system operational, infrastructure ready for installation - ---- - -## Current Status - -- Server Process: Running (PID 3847752) -- Health Check: OK -- Metrics Endpoint: Operational -- Database: Connected (2 users) -- Dashboard: https://connect.azcomputerguru.com/dashboard - -**Login:** username=`howard`, password=`AdminGuruConnect2026` - ---- - -## Installation Options - -### Option 1: One-Command Installation (Recommended) - -Run the master installation script that installs everything: - -```bash -ssh guru@172.16.3.30 -cd ~/guru-connect -sudo bash install-production-infrastructure.sh -``` - -This will install: -1. Systemd service for auto-start and management -2. Prometheus & Grafana monitoring stack -3. Automated PostgreSQL backups (daily at 2:00 AM) -4. Log rotation configuration - -**Time:** ~10-15 minutes (Grafana installation takes longest) - ---- - -### Option 2: Step-by-Step Manual Installation - -If you prefer to install components individually: - -#### Step 1: Install Systemd Service - -```bash -ssh guru@172.16.3.30 -cd ~/guru-connect/server -sudo ./setup-systemd.sh -``` - -**What this does:** -- Installs GuruConnect as a systemd service -- Enables auto-start on boot -- Configures auto-restart on failure -- Sets resource limits and security hardening - -**Verify:** -```bash -sudo systemctl status guruconnect -sudo journalctl -u guruconnect -n 20 -``` - ---- - -#### Step 2: Install Prometheus & Grafana - -```bash -ssh guru@172.16.3.30 -cd ~/guru-connect/infrastructure -sudo ./setup-monitoring.sh -``` - -**What this does:** -- Installs Prometheus for metrics collection -- Installs Grafana for visualization -- Configures Prometheus to scrape GuruConnect metrics -- Sets up Prometheus data source in Grafana - -**Access:** -- Prometheus: http://172.16.3.30:9090 -- Grafana: http://172.16.3.30:3000 (admin/admin) - -**Post-installation:** -1. Access Grafana at http://172.16.3.30:3000 -2. Login with admin/admin -3. Change the default password -4. Import dashboard: - - Go to Dashboards > Import - - Upload `~/guru-connect/infrastructure/grafana-dashboard.json` - ---- - -#### Step 3: Install Automated Backups - -```bash -ssh guru@172.16.3.30 - -# Create backup directory -sudo mkdir -p /home/guru/backups/guruconnect -sudo chown guru:guru /home/guru/backups/guruconnect - -# Install systemd timer -sudo cp ~/guru-connect/server/guruconnect-backup.service /etc/systemd/system/ -sudo cp ~/guru-connect/server/guruconnect-backup.timer /etc/systemd/system/ -sudo systemctl daemon-reload -sudo systemctl enable guruconnect-backup.timer -sudo systemctl start guruconnect-backup.timer -``` - -**Verify:** -```bash -sudo systemctl status guruconnect-backup.timer -sudo systemctl list-timers -``` - -**Test manual backup:** -```bash -cd ~/guru-connect/server -./backup-postgres.sh -ls -lh /home/guru/backups/guruconnect/ -``` - -**Backup Schedule:** Daily at 2:00 AM -**Retention:** 30 daily, 4 weekly, 6 monthly backups - ---- - -#### Step 4: Install Log Rotation - -```bash -ssh guru@172.16.3.30 -sudo cp ~/guru-connect/server/guruconnect.logrotate /etc/logrotate.d/guruconnect -sudo chmod 644 /etc/logrotate.d/guruconnect -``` - -**Verify:** -```bash -sudo cat /etc/logrotate.d/guruconnect -sudo logrotate -d /etc/logrotate.d/guruconnect -``` - -**Log Rotation:** Daily, 30 days retention, compressed - ---- - -## Verification - -After installation, verify everything is working: - -```bash -ssh guru@172.16.3.30 -bash ~/guru-connect/verify-installation.sh -``` - -Expected output (all green): -- Server process: Running -- Health endpoint: OK -- Metrics endpoint: OK -- Systemd service: Active -- Prometheus: Active -- Grafana: Active -- Backup timer: Active -- Log rotation: Configured -- Database: Connected - ---- - -## Post-Installation Tasks - -### 1. Configure Grafana - -1. Access http://172.16.3.30:3000 -2. Login with admin/admin -3. Change password when prompted -4. Import dashboard: - ``` - Dashboards > Import > Upload JSON file - Select: ~/guru-connect/infrastructure/grafana-dashboard.json - ``` - -### 2. Test Backup & Restore - -**Test backup:** -```bash -ssh guru@172.16.3.30 -cd ~/guru-connect/server -./backup-postgres.sh -``` - -**Verify backup created:** -```bash -ls -lh /home/guru/backups/guruconnect/ -``` - -**Test restore (CAUTION - use test database):** -```bash -cd ~/guru-connect/server -./restore-postgres.sh /home/guru/backups/guruconnect/guruconnect-YYYY-MM-DD-HHMMSS.sql.gz -``` - -### 3. Configure NPM (Nginx Proxy Manager) - -If Prometheus/Grafana need external access: - -1. Add proxy hosts in NPM: - - prometheus.azcomputerguru.com -> http://172.16.3.30:9090 - - grafana.azcomputerguru.com -> http://172.16.3.30:3000 - -2. Enable SSL/TLS via Let's Encrypt - -3. Restrict access (firewall or NPM access lists) - -### 4. Test Health Monitoring - -```bash -ssh guru@172.16.3.30 -cd ~/guru-connect/server -./health-monitor.sh -``` - -Expected output: All checks passed - ---- - -## Service Management - -### GuruConnect Server - -```bash -# Start server -sudo systemctl start guruconnect - -# Stop server -sudo systemctl stop guruconnect - -# Restart server -sudo systemctl restart guruconnect - -# Check status -sudo systemctl status guruconnect - -# View logs -sudo journalctl -u guruconnect -f - -# View recent logs -sudo journalctl -u guruconnect -n 100 -``` - -### Prometheus - -```bash -# Status -sudo systemctl status prometheus - -# Restart -sudo systemctl restart prometheus - -# Logs -sudo journalctl -u prometheus -n 50 -``` - -### Grafana - -```bash -# Status -sudo systemctl status grafana-server - -# Restart -sudo systemctl restart grafana-server - -# Logs -sudo journalctl -u grafana-server -n 50 -``` - -### Backups - -```bash -# Check timer status -sudo systemctl status guruconnect-backup.timer - -# Check when next backup runs -sudo systemctl list-timers - -# Manually trigger backup -sudo systemctl start guruconnect-backup.service - -# View backup logs -sudo journalctl -u guruconnect-backup -n 20 -``` - ---- - -## Troubleshooting - -### Server Won't Start - -```bash -# Check logs -sudo journalctl -u guruconnect -n 50 - -# Check if port 3002 is in use -sudo netstat -tulpn | grep 3002 - -# Verify .env file -cat ~/guru-connect/server/.env - -# Test manual start -cd ~/guru-connect/server -./start-secure.sh -``` - -### Database Connection Issues - -```bash -# Test PostgreSQL -PGPASSWORD=gc_a7f82d1e4b9c3f60 psql -h localhost -U guruconnect -d guruconnect -c 'SELECT 1' - -# Check PostgreSQL service -sudo systemctl status postgresql - -# Verify DATABASE_URL in .env -cat ~/guru-connect/server/.env | grep DATABASE_URL -``` - -### Prometheus Not Scraping Metrics - -```bash -# Check Prometheus targets -# Access: http://172.16.3.30:9090/targets - -# Verify GuruConnect metrics endpoint -curl http://172.16.3.30:3002/metrics - -# Check Prometheus config -sudo cat /etc/prometheus/prometheus.yml - -# Restart Prometheus -sudo systemctl restart prometheus -``` - -### Grafana Dashboard Not Loading - -```bash -# Check Grafana logs -sudo journalctl -u grafana-server -n 50 - -# Verify data source -# Access: http://172.16.3.30:3000/datasources - -# Test Prometheus connection -curl http://localhost:9090/api/v1/query?query=up -``` - ---- - -## Monitoring & Alerts - -### Prometheus Alerts - -Configured alerts (from `infrastructure/alerts.yml`): - -1. **GuruConnectDown** - Server unreachable for 1 minute -2. **HighErrorRate** - >10 errors/second for 5 minutes -3. **TooManyActiveSessions** - >100 active sessions -4. **HighRequestLatency** - p95 >1s for 5 minutes -5. **DatabaseOperationsFailure** - DB errors >1/second -6. **ServerRestarted** - Uptime <5 minutes (informational) - -**View alerts:** http://172.16.3.30:9090/alerts - -### Grafana Dashboard - -Pre-configured panels: - -1. Active Sessions (gauge) -2. Requests per Second (graph) -3. Error Rate (graph with alerting) -4. Request Latency p50/p95/p99 (graph) -5. Active Connections by Type (stacked graph) -6. Database Query Duration (graph) -7. Server Uptime (singlestat) -8. Total Sessions Created (singlestat) -9. Total Requests (singlestat) -10. Total Errors (singlestat with thresholds) - ---- - -## Backup & Recovery - -### Manual Backup - -```bash -cd ~/guru-connect/server -./backup-postgres.sh -``` - -Backup location: `/home/guru/backups/guruconnect/guruconnect-YYYY-MM-DD-HHMMSS.sql.gz` - -### Restore from Backup - -**WARNING:** This will drop and recreate the database! - -```bash -cd ~/guru-connect/server -./restore-postgres.sh /path/to/backup.sql.gz -``` - -The script will: -1. Stop GuruConnect service -2. Drop existing database -3. Recreate database -4. Restore from backup -5. Restart service - -### Backup Verification - -```bash -# List backups -ls -lh /home/guru/backups/guruconnect/ - -# Check backup size -du -sh /home/guru/backups/guruconnect/* - -# Verify backup contents (without restoring) -zcat /path/to/backup.sql.gz | head -50 -``` - ---- - -## Security Checklist - -- [x] JWT secret configured (96-char base64) -- [x] Database password changed from default -- [x] Admin password changed from default -- [x] Security headers enabled (CSP, X-Frame-Options, etc.) -- [x] Database credentials in .env (not committed to git) -- [ ] Grafana default password changed (admin/admin) -- [ ] Firewall rules configured (limit access to monitoring ports) -- [ ] SSL/TLS enabled for public endpoints -- [ ] Backup encryption (optional - consider encrypting backups) -- [ ] Regular security updates (OS, PostgreSQL, Prometheus, Grafana) - ---- - -## Files Reference - -### Configuration Files - -- `server/.env` - Environment variables and secrets -- `server/guruconnect.service` - Systemd service unit -- `infrastructure/prometheus.yml` - Prometheus scrape config -- `infrastructure/alerts.yml` - Alert rules -- `infrastructure/grafana-dashboard.json` - Pre-built dashboard - -### Scripts - -- `server/start-secure.sh` - Manual server start -- `server/backup-postgres.sh` - Manual backup -- `server/restore-postgres.sh` - Restore from backup -- `server/health-monitor.sh` - Health checks -- `server/setup-systemd.sh` - Install systemd service -- `infrastructure/setup-monitoring.sh` - Install Prometheus/Grafana -- `install-production-infrastructure.sh` - Master installer -- `verify-installation.sh` - Verify installation status - ---- - -## Support & Documentation - -**Main Documentation:** -- `PHASE1_WEEK2_INFRASTRUCTURE.md` - Week 2 planning -- `DEPLOYMENT_WEEK2_INFRASTRUCTURE.md` - Week 2 deployment log -- `CLAUDE.md` - Project coding guidelines - -**Gitea Repository:** -- https://git.azcomputerguru.com/azcomputerguru/guru-connect - -**Dashboard:** -- https://connect.azcomputerguru.com/dashboard - -**API Docs:** -- http://172.16.3.30:3002/api/docs (if OpenAPI enabled) - ---- - -## Next Steps (Phase 1 Week 3) - -After infrastructure is fully installed: - -1. **CI/CD Automation** - - Gitea CI pipeline configuration - - Automated builds on commit - - Automated tests in CI - - Deployment automation - - Build artifact storage - - Version tagging - -2. **Advanced Monitoring** - - Alertmanager configuration for email/Slack alerts - - Custom Grafana dashboards - - Log aggregation (optional - Loki) - - Distributed tracing (optional - Jaeger) - -3. **Production Hardening** - - Firewall configuration - - Fail2ban for brute-force protection - - Rate limiting - - DDoS protection - - Regular security audits - ---- - -**Last Updated:** 2026-01-18 04:00 UTC -**Version:** Phase 1 Week 2 Complete diff --git a/projects/msp-tools/guru-connect/MASTER_ACTION_PLAN.md b/projects/msp-tools/guru-connect/MASTER_ACTION_PLAN.md deleted file mode 100644 index 536d430..0000000 --- a/projects/msp-tools/guru-connect/MASTER_ACTION_PLAN.md +++ /dev/null @@ -1,789 +0,0 @@ -# GuruConnect - Master Action Plan -**Comprehensive Review Synthesis** - -**Date:** 2026-01-17 -**Project Status:** Infrastructure Complete, 30-35% Feature Complete -**Reviews Conducted:** 6 specialized analyses - ---- - -## EXECUTIVE SUMMARY - -GuruConnect has **excellent technical foundations** but requires **significant development** across security, features, UI/UX, and infrastructure before production readiness. All reviews converge on a **3-6 month timeline** to MVP with focused effort. - -### Overall Grades - -| Review Area | Grade | Completion | Key Finding | -|-------------|-------|------------|-------------| -| **Security** | D+ | 40% secure | 5 CRITICAL vulnerabilities must be fixed before launch | -| **Architecture** | B- | 30% complete | Solid design, needs feature implementation | -| **Code Quality** | B+ | 85% ready | High quality Rust code, good practices | -| **Infrastructure** | D+ | 15-20% ready | No systemd, no monitoring, manual deployment | -| **Frontend/UI** | C+ | 35-40% complete | Good visual design, massive UX gaps | -| **Requirements Gap** | C | 30-35% complete | 4 launch blockers, 10+ critical missing features | - -### Critical Path Insights - -**LAUNCH BLOCKERS** (Cannot ship without): -1. JWT secret hardcoded (SECURITY) -2. No end-user portal (FUNCTIONALITY) -3. No one-time agent download (FUNCTIONALITY) -4. Input relay incomplete (FUNCTIONALITY) -5. No systemd service (INFRASTRUCTURE) - -**Time to Unblock:** 10-12 weeks minimum - -### Recommended Approach - -**PHASE 1: Security & Foundation** (3-4 weeks) -Fix all critical security issues, establish proper deployment infrastructure - -**PHASE 2: Core Features** (6-8 weeks) -Build missing launch blockers: portal, agent download, input completion, dashboard UI - -**PHASE 3: Competitive Features** (6-8 weeks) -Add clipboard, file transfer, PowerShell, chat - features needed to compete with ScreenConnect - -**PHASE 4: Polish & Production** (4-6 weeks) -Installer builder, machine grouping, monitoring, optimization - -**Total Time to Production:** 19-26 weeks (Conservative: 26 weeks, Aggressive: 16 weeks) - ---- - -## 1. CRITICAL SECURITY ISSUES (Must Fix Before Launch) - -### SEVERITY: CRITICAL (5 issues) - -| ID | Issue | Impact | Fix Effort | Priority | -|----|-------|--------|-----------|----------| -| **SEC-1** | JWT secret hardcoded in source | Anyone can forge admin tokens, full system compromise | 2 hours | P0 - IMMEDIATE | -| **SEC-2** | No rate limiting on auth endpoints | Brute force attacks succeed | 1 day | P0 - IMMEDIATE | -| **SEC-3** | SQL injection in machine filters | Database compromise | 3 days | P0 - IMMEDIATE | -| **SEC-4** | Agent connections without validation | Rogue agents can connect | 2 days | P0 - IMMEDIATE | -| **SEC-5** | Session takeover possible | Attackers can hijack sessions | 2 days | P0 - IMMEDIATE | - -**Total Critical Fix Time:** 1.5 weeks - -### SEVERITY: HIGH (8 issues) - -| ID | Issue | Impact | Fix Effort | Priority | -|----|-------|--------|-----------|----------| -| **SEC-6** | Plaintext passwords in logs | Credential exposure | 1 day | P1 | -| **SEC-7** | No input sanitization (XSS) | Dashboard compromise | 2 days | P1 | -| **SEC-8** | Missing TLS cert validation | MITM attacks | 1 day | P1 | -| **SEC-9** | Weak PBKDF2 password hashing | Password cracking easier | 1 day | P1 | -| **SEC-10** | No HTTPS enforcement | Credential interception | 4 hours | P1 | -| **SEC-11** | Overly permissive CORS | Cross-site attacks | 2 hours | P1 | -| **SEC-12** | No CSP headers | XSS attacks easier | 4 hours | P1 | -| **SEC-13** | Session tokens never expire | Stolen tokens valid forever | 1 day | P1 | - -**Total High-Priority Fix Time:** 1.5 weeks - -### Security Roadmap - -**Week 1:** -- Day 1-2: Fix JWT secret (SEC-1), add env variable, rotate keys -- Day 3: Implement rate limiting (SEC-2) -- Day 4-5: Fix SQL injection (SEC-3), use parameterized queries - -**Week 2:** -- Day 1-2: Fix agent validation (SEC-4) -- Day 3-4: Fix session takeover (SEC-5) -- Day 5: Add HTTPS enforcement (SEC-10) - -**Week 3:** -- Day 1: Fix password logging (SEC-6) -- Day 2-3: Add input sanitization (SEC-7) -- Day 4: Upgrade to Argon2id (SEC-9) -- Day 5: Add session expiration (SEC-13) - -**Security Testing:** After Week 3, conduct penetration testing - ---- - -## 2. LAUNCH BLOCKERS (Cannot Ship Without These) - -### Functional Blockers - -| Blocker | Current State | Required State | Effort | Dependencies | -|---------|--------------|---------------|--------|--------------| -| **Portal Missing** | 0% | End-user portal with code entry, agent download | 2 weeks | None | -| **Agent Download** | 0% | One-time agent EXE with embedded code | 3-4 weeks | Portal | -| **Input Relay** | 50% | Complete mouse/keyboard viewer → agent | 1 week | None | -| **Dashboard UI** | 40% | Session list, join button, real-time updates | 2 weeks | None | - -### Infrastructure Blockers - -| Blocker | Current State | Required State | Effort | Dependencies | -|---------|--------------|---------------|--------|--------------| -| **Systemd Service** | None | Server runs as systemd service, auto-restart | 1 week | None | -| **Monitoring** | None | Prometheus metrics, health checks, alerting | 1 week | None | -| **Automated Backup** | None | Daily PostgreSQL backups, retention policy | 3 days | None | -| **CI/CD Pipeline** | None | Automated builds, tests, deployment | 1 week | None | - -### Combined Launch Blocker Timeline - -**Can be parallelized:** -- Security fixes (3 weeks) || Portal + Agent Download (5 weeks) || Infrastructure (2.5 weeks) -- Input relay (1 week) || Dashboard UI (2 weeks) - -**Critical Path:** Portal → Agent Download → Testing = 6 weeks -**Parallel Work:** Security (3 weeks) + Infrastructure (2.5 weeks) - -**Minimum Time to Launchable MVP:** 8-10 weeks (with 2+ developers) - ---- - -## 3. FEATURE PRIORITIZATION MATRIX - -### TIER 0: Launch Blockers (Must Have) - -| Feature | Status | Effort | Critical Path | Owner | -|---------|--------|--------|---------------|-------| -| End-user portal | 0% | 2 weeks | YES | Frontend Dev | -| One-time agent download | 0% | 3-4 weeks | YES | Agent Dev | -| Complete input relay | 50% | 1 week | YES | Agent Dev | -| Dashboard session list UI | 40% | 2 weeks | YES | Frontend Dev | -| JWT secret externalized | 0% | 2 hours | NO | Backend Dev | -| SQL injection fixes | 0% | 3 days | NO | Backend Dev | -| Rate limiting | 0% | 1 day | NO | Backend Dev | -| Systemd service | 0% | 1 week | NO | DevOps | - -### TIER 1: Critical for Usability (Howard's Priorities) - -| Feature | Status | Effort | Business Value | Owner | -|---------|--------|--------|----------------|-------| -| Text clipboard sync | 0% | 2 weeks | HIGH - industry standard | Agent Dev | -| Remote PowerShell/CMD | 0% | 2 weeks | CRITICAL - Howard's #1 request | Agent Dev | -| PowerShell timeout controls | 0% | 3 days | HIGH - Howard specific ask | Frontend Dev | -| File download | 0% | 1-2 weeks | HIGH - essential for support | Agent Dev | -| System info display | 20% | 1 week | MEDIUM - quick win | Frontend Dev | -| Chat UI integration | 20% | 1-2 weeks | HIGH - user expectation | Frontend Dev | -| Process viewer | 0% | 1 week | MEDIUM - troubleshooting aid | Agent Dev | -| Multi-monitor support | 0% | 2 weeks | MEDIUM - common scenario | Agent Dev | - -### TIER 2: Competitive Parity (Nice to Have) - -| Feature | Status | Effort | Competitor Has | Owner | -|---------|--------|--------|----------------|-------| -| Persistent agent service | 70% | 2 weeks | ScreenConnect, TeamViewer | Agent Dev | -| Installer builder (EXE) | 0% | 4 weeks | ScreenConnect | DevOps | -| Machine grouping (company/site) | 0% | 2 weeks | ScreenConnect | Frontend Dev | -| Search and filtering | 0% | 2 weeks | All competitors | Frontend Dev | -| File upload | 0% | 2 weeks | All competitors | Agent Dev | -| Rich clipboard (HTML, images) | 0% | 2 weeks | TeamViewer, AnyDesk | Agent Dev | -| Session recording | 0% | 4+ weeks | ScreenConnect (paid) | Agent Dev | - -### TIER 3: Advanced Features (Defer to Post-Launch) - -| Feature | Status | Effort | Justification for Deferral | -|---------|--------|--------|---------------------------| -| MSI packaging (64-bit) | 0% | 3-4 weeks | EXE works for initial launch | -| MFA/2FA support | 0% | 2 weeks | Single-tenant MSP initially | -| Mobile viewer | 0% | 8+ weeks | Desktop-first strategy | -| GuruRMM integration | 0% | 4+ weeks | Standalone value first | -| PSA integrations | 0% | 6+ weeks | After market validation | -| Safe mode reboot | 0% | 2 weeks | Advanced troubleshooting | -| Wake-on-LAN | 0% | 3 weeks | Requires network infrastructure | - ---- - -## 4. INTEGRATED DEVELOPMENT ROADMAP - -### PHASE 1: Security & Infrastructure (Weeks 1-4) - -**Goal:** Fix critical vulnerabilities, establish production-ready infrastructure - -**Team:** 1 Backend Dev + 1 DevOps Engineer - -| Week | Backend Tasks | DevOps Tasks | Deliverable | -|------|--------------|--------------|-------------| -| 1 | JWT secret fix, rate limiting, SQL injection fixes | Systemd service setup, auto-restart config | Secure auth system | -| 2 | Agent validation, session security, password logging fix | Prometheus metrics, Grafana dashboards | Production monitoring | -| 3 | Input sanitization, session expiration, Argon2id upgrade | PostgreSQL automated backups, retention policy | Secure data persistence | -| 4 | TLS enforcement, CORS fix, CSP headers | CI/CD pipeline (GitHub Actions or Gitea CI) | Automated deployments | - -**Milestone:** Production-ready infrastructure, all critical security issues resolved - -**Exit Criteria:** -- [ ] No critical or high-severity security issues remain -- [ ] Server runs as systemd service with auto-restart -- [ ] Prometheus metrics exposed, Grafana dashboard configured -- [ ] Daily automated PostgreSQL backups -- [ ] CI/CD pipeline builds and tests on every commit - -### PHASE 2: Core Functionality (Weeks 5-12) - -**Goal:** Build missing features needed for basic attended support sessions - -**Team:** 1 Frontend Dev + 1 Agent Dev + 1 Backend Dev (part-time) - -| Week | Frontend | Agent | Backend | Deliverable | -|------|----------|-------|---------|-------------| -| 5 | End-user portal HTML/CSS/JS | Complete input relay wiring | Support code API enhancements | Portal + input working | -| 6 | Portal browser detection, instructions | One-time agent download (phase 1) | Support code → agent linking | Code entry functional | -| 7 | Dashboard session list real-time updates | One-time agent download (phase 2) | Session state management | Live session tracking | -| 8 | Session detail panel with tabs | One-time agent download (phase 3) | File download API | Agent download working | -| 9 | Join session button, viewer launch | Text clipboard sync (agent side) | Clipboard relay protocol | Join sessions working | -| 10 | Clipboard sync UI indicators | Text clipboard sync (complete) | PowerShell execution backend | Clipboard working | -| 11 | Remote PowerShell UI with output | PowerShell timeout controls | Command streaming | PowerShell working | -| 12 | System info panel, process viewer | File download implementation | File transfer protocol | File download working | - -**Milestone:** Functional attended support sessions end-to-end - -**Exit Criteria:** -- [ ] End user can enter support code and download agent -- [ ] Technician can see session in dashboard and join -- [ ] Screen viewing works reliably -- [ ] Mouse and keyboard control works -- [ ] Text clipboard syncs bidirectionally -- [ ] Remote PowerShell executes with live output -- [ ] Files can be downloaded from remote machine -- [ ] System information displays in dashboard - -### PHASE 3: Competitive Features (Weeks 13-20) - -**Goal:** Feature parity with ScreenConnect for attended support - -**Team:** Same team as Phase 2 - -| Week | Frontend | Agent | Backend | Deliverable | -|------|----------|-------|---------|-------------| -| 13 | Chat UI in session panel | Chat integration | Chat persistence | Working chat | -| 14 | Multi-monitor switcher UI | Multi-monitor enumeration | Monitor state tracking | Multi-monitor support | -| 15 | Machine grouping sidebar (company/site) | Persistent agent service completion | Machine grouping API | Persistent agents | -| 16 | Search and filter interface | Process viewer, kill process | Process list API | Advanced troubleshooting | -| 17 | File upload UI with drag-drop | File upload implementation | File upload chunking | Bidirectional file transfer | -| 18 | Rich clipboard UI indicators | Rich clipboard (HTML, RTF) | Enhanced clipboard protocol | Advanced clipboard | -| 19 | Screenshot thumbnails, session timeline | Services viewer | Service control API | Enhanced session management | -| 20 | Performance optimization, polish | Agent optimization | Server optimization | Performance tuning | - -**Milestone:** Competitive product ready for MSP beta testing - -**Exit Criteria:** -- [ ] Chat works between tech and end user -- [ ] Multi-monitor switching works -- [ ] Persistent agents install as Windows service -- [ ] Machines can be grouped by company/site -- [ ] Search and filtering works -- [ ] File upload and download both work -- [ ] Rich clipboard formats supported -- [ ] Process and service viewers functional - -### PHASE 4: Production Readiness (Weeks 21-26) - -**Goal:** Installer builder, scalability, polish for general availability - -**Team:** 2 Frontend Devs + 1 Agent Dev + 1 DevOps - -| Week | Frontend | Agent | DevOps | Deliverable | -|------|----------|-------|--------|-------------| -| 21 | Installer builder UI | Installer metadata embedding | Build pipeline for custom agents | Builder MVP | -| 22 | Mobile-responsive dashboard | 64-bit agent compilation (Howard req) | Horizontal scaling architecture | Multi-device support | -| 23 | Advanced grouping (smart groups) | Auto-update implementation | Load balancer configuration | Smart filtering | -| 24 | Accessibility improvements (WCAG 2.1) | Update verification | Database connection pooling | Accessible UI | -| 25 | UI polish, animations, final design pass | Agent stability testing | Performance testing, benchmarking | Polished product | -| 26 | User testing feedback integration | Bug fixes | Production deployment checklist | Production-ready | - -**Milestone:** Production-ready MSP remote support solution - -**Exit Criteria:** -- [ ] Installer builder generates custom EXE with metadata -- [ ] 64-bit agent available (Howard requirement) -- [ ] Dashboard works on tablets and phones -- [ ] Smart groups (Online, Offline 30d, Attention) work -- [ ] WCAG 2.1 AA accessibility compliance -- [ ] Auto-update mechanism works -- [ ] Server can handle 50+ concurrent sessions -- [ ] Full end-to-end testing passed - ---- - -## 5. RESOURCE REQUIREMENTS - -### Team Composition - -**Minimum Team (Slower Path - 26 weeks):** -- 1 Full-Stack Developer (Rust + Frontend) -- 1 DevOps Engineer (part-time, first 4 weeks full-time) - -**Recommended Team (Faster Path - 16-20 weeks):** -- 1 Frontend Developer (HTML/CSS/JS) -- 1 Agent Developer (Rust, Windows APIs) -- 1 Backend Developer (Rust, Axum, PostgreSQL) -- 1 DevOps Engineer (Weeks 1-4 full-time, then part-time) - -**Optimal Team (Aggressive Path - 12-16 weeks):** -- 2 Frontend Developers (one for dashboard, one for portal/viewer) -- 2 Agent Developers (one for capture/input, one for features) -- 1 Backend Developer -- 1 DevOps Engineer (Weeks 1-4 full-time) -- 1 QA Engineer (Weeks 8+) - -### Skill Requirements - -**Frontend Developer:** -- HTML5, CSS3, Modern JavaScript (ES6+) -- WebSocket client programming -- Canvas API (for viewer rendering) -- Protobuf.js or similar -- Responsive design, accessibility (WCAG) - -**Agent Developer:** -- Rust (intermediate to advanced) -- Windows API (screen capture, input injection, clipboard) -- Tokio async runtime -- Protobuf -- Windows internals (services, registry, UAC) - -**Backend Developer:** -- Rust (advanced) -- Axum or similar async web framework -- PostgreSQL, sqlx -- JWT authentication -- WebSocket relay patterns -- Security best practices - -**DevOps Engineer:** -- Linux system administration (Ubuntu) -- Systemd services -- Prometheus, Grafana -- PostgreSQL administration -- CI/CD pipelines (GitHub Actions or Gitea) -- NPM (Nginx Proxy Manager) or similar - ---- - -## 6. RISK ASSESSMENT & MITIGATION - -### HIGH RISK (Likely to Cause Delays) - -| Risk | Probability | Impact | Mitigation Strategy | -|------|------------|--------|---------------------| -| **One-time agent download complexity** | 80% | CRITICAL | Start early (Week 6), consider simplified approach (agent runs without install initially) | -| **Installer builder scope creep** | 70% | HIGH | Define strict MVP: EXE only with embedded metadata. Defer MSI to Phase 4 or post-launch. | -| **Input relay timing/latency issues** | 60% | CRITICAL | Extensive testing on WAN (throttled networks), optimize early, consider adaptive quality. | -| **Team availability/turnover** | 50% | HIGH | Document everything, code reviews, pair programming for knowledge transfer. | -| **Security vulnerabilities in rush** | 60% | CRITICAL | Security review after each phase, automated security scanning in CI/CD. | - -### MEDIUM RISK (Manageable) - -| Risk | Probability | Impact | Mitigation Strategy | -|------|------------|--------|---------------------| -| **Multi-monitor switching complexity** | 50% | MEDIUM | Protocol already supports it. Focus on UI simplicity. Test with 2-4 monitors. | -| **Clipboard compatibility issues** | 50% | MEDIUM | Start text-only, add formats incrementally. Test on Windows 7-11. | -| **PowerShell output streaming** | 40% | HIGH | Use existing .NET/Windows libraries, test with long-running commands, handle timeouts gracefully. | -| **File transfer chunking/resume** | 40% | MEDIUM | Start with simple implementation (no resume), optimize later based on real-world usage. | -| **Dashboard real-time update performance** | 30% | MEDIUM | WebSocket infrastructure exists. Test with 50+ sessions, optimize selectively. | - -### LOW RISK (Minor Concerns) - -| Risk | Probability | Impact | Mitigation Strategy | -|------|------------|--------|---------------------| -| **Cross-browser compatibility** | 30% | MEDIUM | Modern browsers are similar. Test Chrome, Firefox, Edge. Defer Safari/old browsers. | -| **MSI packaging learning curve** | 30% | LOW | Defer to Phase 4 or post-launch. Use WiX toolset, plenty of documentation. | -| **Safe mode reboot compatibility** | 20% | LOW | Windows API well-documented. Test on Windows 10/11 and Server 2019/2022. | - ---- - -## 7. QUICK WINS (High Value, Low Effort) - -These features can be completed quickly and provide immediate value: - -| Week | Quick Win | Value | Effort | Owner | -|------|-----------|-------|--------|-------| -| 2 | Join session button | CRITICAL | 3 days | Frontend | -| 5 | Complete input relay | CRITICAL | 1 week | Agent | -| 9 | System info display | MEDIUM | 1 week | Frontend | -| 11 | PowerShell timeout controls | HIGH | 3 days | Frontend | -| 12 | Process list viewer | MEDIUM | 1 week | Agent + Frontend | -| 15 | Session detail panel | HIGH | 1 week | Frontend | -| 19 | Chat UI integration | HIGH | 1-2 weeks | Frontend | -| 22 | Command audit logging | MEDIUM | 3 days | Backend | - -**Combined Quick Win Time:** 6-7 weeks of work (can be distributed across phases) - ---- - -## 8. FRONTEND/UI SPECIFIC IMPROVEMENTS - -### Tier 1: Critical UX Issues (Blocks Adoption) - -| Issue | Current State | Target State | Effort | Week | -|-------|--------------|--------------|--------|------| -| **Machine organization missing** | Flat list | Company/Site/Tag hierarchy with collapsible tree | 2 weeks | 15-16 | -| **No session detail panel** | Click machine → nothing | Detail panel with tabs (Info, Screen, Chat, Commands, Files) | 1 week | 8 | -| **No search/filter** | No search box | Full-text search + multi-filter (online, OS, company, tag) | 2 weeks | 16-17 | -| **Connect flow confusing** | Modal with web/native choice | Default to web viewer, clear guidance | 3 days | 9 | -| **Support code entry not optimized** | Single input field | 6 segmented inputs with auto-advance (Apple-style) | 1 week | 5 | - -### Tier 2: Important UX Improvements - -| Issue | Current State | Target State | Effort | Week | -|-------|--------------|--------------|--------|------| -| **No toast notifications** | Silent updates | Toast for new sessions, errors, status changes | 1 week | 11 | -| **No keyboard navigation** | Mouse-only | Full Tab order, focus indicators, shortcuts | 1 week | 24 | -| **Minimal viewer toolbar** | 3 buttons | 10+ buttons (Quality, Monitors, Clipboard, Files, Chat, Screenshot) | 1 week | 18 | -| **No connection quality feedback** | FPS counter only | Latency, bandwidth, quality indicator (Good/Fair/Poor) | 1 week | 20 | -| **Poor mobile experience** | Desktop-only | Responsive dashboard, mobile-optimized viewer | 2 weeks | 22-23 | - -### Tier 3: Polish & Accessibility - -| Improvement | Effort | Week | -|-------------|--------|------| -| WCAG 2.1 AA compliance (focus, ARIA, contrast) | 1 week | 24 | -| Dark/light theme toggle | 3 days | 25 | -| Loading skeletons for async content | 2 days | 25 | -| Empty states with helpful instructions | 2 days | 25 | -| Micro-animations and transitions | 3 days | 25 | - -**Total Frontend Improvement Time:** Integrated into main roadmap (Weeks 5-25) - ---- - -## 9. TESTING STRATEGY - -### Unit Testing (Ongoing) - -**Target Coverage:** 70%+ for agent, server -**Framework:** Rust `cargo test` -**CI Integration:** Run on every commit - -**Focus Areas:** -- Agent: Screen capture, input injection, clipboard -- Server: Session management, authentication, WebSocket relay -- Protocol: Message serialization/deserialization - -### Integration Testing (Weekly) - -**Target:** End-to-end workflows -**Tools:** Manual testing + automated scripts (Playwright for dashboard) - -**Test Scenarios:** -- Week 8: Support code entry → agent download → join session -- Week 12: Screen viewing + input control + clipboard sync -- Week 16: PowerShell execution + file download -- Week 20: Multi-monitor + chat + file upload -- Week 25: Full MSP workflow (code gen → session → transfer → close) - -### Performance Testing (Weeks 20, 25) - -**Metrics:** -- Screen FPS: Target 30+ FPS on LAN, 15+ FPS on WAN -- Input latency: Target <100ms on LAN, <200ms on WAN -- Concurrent sessions: Target 50+ sessions on single server -- Bandwidth: Measure at various quality levels - -**Tools:** -- Network throttling (Chrome DevTools, tc on Linux) -- Load generation (custom script or k6) -- Prometheus metrics analysis - -### Security Testing (Weeks 4, 12, 20, 26) - -**Penetration Testing:** -- Week 4: After security fixes, basic pen test -- Week 12: Full authentication and session security review -- Week 20: WebSocket relay attack scenarios -- Week 26: Pre-production comprehensive security audit - -**Automated Scanning:** -- OWASP ZAP or similar in CI/CD -- Rust `cargo audit` for dependency vulnerabilities -- Static analysis (Clippy in strict mode) - -### User Acceptance Testing (Weeks 24-26) - -**Beta Testers:** 3-5 MSP technicians (Howard + team) - -**Scenarios:** -- Remote troubleshooting sessions -- Software installation -- Network configuration -- Credential retrieval -- Multi-monitor workflows - -**Feedback Collection:** Survey + direct interviews - ---- - -## 10. DECISION POINTS & GO/NO-GO CRITERIA - -### DECISION POINT 1: After Week 4 (Security & Infrastructure Complete) - -**Go Criteria:** -- [ ] All critical security issues resolved (SEC-1 through SEC-5) -- [ ] All high-priority security issues resolved (SEC-6 through SEC-13) -- [ ] Systemd service operational with auto-restart -- [ ] Prometheus metrics exposed, Grafana dashboard configured -- [ ] Automated PostgreSQL backups running -- [ ] CI/CD pipeline functional - -**No-Go Scenarios:** -- Security issues remain → Continue Phase 1, delay Phase 2 -- Infrastructure unreliable → Bring in senior DevOps consultant -- Team capacity issues → Reduce scope or extend timeline - -**Decision:** Proceed to Phase 2 or re-evaluate timeline - -### DECISION POINT 2: After Week 12 (Core Features Complete) - -**Go Criteria:** -- [ ] End-user portal functional -- [ ] One-time agent download working -- [ ] Input relay complete and responsive -- [ ] Dashboard session list with join functionality -- [ ] Text clipboard syncs bidirectionally -- [ ] Remote PowerShell executes with live output -- [ ] File download works - -**No-Go Scenarios:** -- Input latency >500ms on WAN → Optimize before proceeding -- Agent download fails >20% of the time → Fix reliability -- Core features unstable → Extend Phase 2 - -**Decision:** Proceed to Phase 3 or extend core feature development - -### DECISION POINT 3: After Week 20 (Competitive Features Complete) - -**Go Criteria:** -- [ ] Chat functional -- [ ] Multi-monitor support working -- [ ] Persistent agents install as service -- [ ] Machine grouping (company/site) implemented -- [ ] Search and filtering functional -- [ ] File upload and download both work -- [ ] Rich clipboard formats supported -- [ ] 30+ FPS on LAN, 15+ FPS on WAN (performance targets met) - -**No-Go Scenarios:** -- Performance significantly below targets → Optimization sprint -- Critical bugs in competitive features → Fix before launch -- User testing reveals major UX issues → Address before GA - -**Decision:** Proceed to Phase 4 or conduct extended beta period - -### DECISION POINT 4: After Week 26 (Production Readiness) - -**Go Criteria:** -- [ ] Installer builder generates custom agents -- [ ] 64-bit agent available -- [ ] Dashboard mobile-responsive -- [ ] WCAG 2.1 AA compliant -- [ ] Auto-update working -- [ ] 50+ concurrent sessions supported -- [ ] Security audit passed -- [ ] Beta testing feedback addressed - -**Launch Decision:** General Availability or Extended Beta - ---- - -## 11. POST-LAUNCH ROADMAP (Optional Phase 5) - -### Months 7-9: Advanced Features - -- MSI packaging (64-bit) for GPO deployment -- MFA/2FA support -- Session recording and playback -- Advanced role-based permissions (per-client access) -- Event log viewer -- Registry browser (with safety warnings) - -### Months 10-12: Integrations & Scale - -- GuruRMM integration (shared auth, launch from RMM) -- PSA integrations (HaloPSA, Autotask, ConnectWise) -- Multi-server clustering -- Geographic load balancing -- Mobile apps (iOS, Android) - -### Year 2: Enterprise Features - -- SSO integration (SAML, OAuth) -- LDAP/AD synchronization -- Custom branding/white-labeling -- Advanced reporting and analytics -- Wake-on-LAN with local relay -- Disaster recovery automation - ---- - -## 12. COST ESTIMATION - -### Labor Costs (Recommended Team - 20 weeks) - -| Role | Weeks | Hours/Week | Total Hours | Rate Estimate | Total Cost | -|------|-------|------------|-------------|---------------|------------| -| Frontend Developer | 20 | 40 | 800 | $75/hr | $60,000 | -| Agent Developer | 20 | 40 | 800 | $85/hr | $68,000 | -| Backend Developer | 20 | 40 | 800 | $85/hr | $68,000 | -| DevOps Engineer | 8 (full) + 12 (part) | 40 + 20 | 560 | $80/hr | $44,800 | -| QA Engineer | 12 | 30 | 360 | $60/hr | $21,600 | - -**Total Labor:** $262,400 - -### Infrastructure Costs (6 months) - -| Resource | Monthly Cost | Total (6 months) | -|----------|-------------|------------------| -| Server (existing 172.16.3.30) | $0 (owned) | $0 | -| PostgreSQL (on same server) | $0 | $0 | -| Prometheus + Grafana (on same server) | $0 | $0 | -| Backup storage (100GB) | $5 | $30 | -| SSL certificates (Let's Encrypt) | $0 | $0 | -| Domain (azcomputerguru.com) | $15 | $90 | -| CI/CD (Gitea + runners) | $0 (self-hosted) | $0 | - -**Total Infrastructure:** $120 (minimal) - -### Tools & Licenses - -| Tool | Cost | -|------|------| -| Development tools (VS Code, etc.) | $0 (free) | -| Testing tools (Playwright, k6) | $0 (free) | -| Security scanning (OWASP ZAP) | $0 (free) | -| Protobuf compiler | $0 (free) | - -**Total Tools:** $0 - -### **TOTAL PROJECT COST (20-week timeline):** ~$262,500 - ---- - -## 13. SUCCESS METRICS - -### Technical Metrics - -| Metric | Target | Measurement | -|--------|--------|-------------| -| Screen FPS (LAN) | 30+ FPS | Prometheus metrics | -| Screen FPS (WAN) | 15+ FPS | Prometheus metrics | -| Input latency (LAN) | <100ms | Manual testing | -| Input latency (WAN) | <200ms | Manual testing | -| Concurrent sessions | 50+ | Load testing | -| Uptime | 99.5%+ | Prometheus uptime | -| Security issues | 0 critical/high | Quarterly audits | - -### Business Metrics - -| Metric | Target | Measurement | -|--------|--------|-------------| -| MSP adoption rate | 5+ MSPs in first 3 months | Tracking | -| Sessions per week | 100+ | Database query | -| Agent installations | 200+ | Database query | -| Support tickets | <10/week | Gitea issues | -| Customer satisfaction | 4.5+/5 | Survey | - -### User Experience Metrics - -| Metric | Target | Measurement | -|--------|--------|-------------| -| Time to first session | <5 minutes | User testing | -| Session join time | <10 seconds | Prometheus metrics | -| Dashboard load time | <2 seconds | Browser DevTools | -| Agent download success | >95% | Server logs | -| Accessibility compliance | WCAG 2.1 AA | Automated testing | - ---- - -## 14. FINAL RECOMMENDATIONS - -### IMMEDIATE ACTIONS (This Week) - -1. **Prioritize security fixes** - Cannot launch with hardcoded JWT secret -2. **Hire/assign frontend developer** - Critical path bottleneck -3. **Set up systemd service** - Infrastructure requirement for production -4. **Create GitHub/Gitea issues** - Track all findings from this review -5. **Schedule weekly team syncs** - Every Monday, review progress vs roadmap - -### STRATEGIC DECISIONS - -**Decision 1: Timeline** -- **Conservative (26 weeks):** Lower risk, thorough testing, minimal team stress -- **Aggressive (16 weeks):** Higher risk, requires optimal team, potential burnout -- **RECOMMENDED (20 weeks):** Balanced approach with contingency buffer - -**Decision 2: Team Size** -- **Minimum (1-2 people):** 26+ weeks, high risk of delays -- **RECOMMENDED (4-5 people):** 16-20 weeks, manageable risk -- **Optimal (6-7 people):** 12-16 weeks, lowest risk - -**Decision 3: Feature Scope** -- **MVP Only (Tier 0):** Fast to market but not competitive -- **RECOMMENDED (Tier 0 + Tier 1):** Competitive product, reasonable timeline -- **Full Feature (Tier 0-3):** 26+ weeks, defer some to post-launch - -### KEY SUCCESS FACTORS - -1. **Fix security issues FIRST** - Non-negotiable -2. **Build end-user portal early** - Unblocks all testing -3. **Focus on Howard's priorities** - PowerShell/CMD, clipboard, 64-bit -4. **Test on real networks** - WAN latency is critical -5. **Get beta users early** - MSP feedback invaluable -6. **Maintain code quality** - Rust makes this easier, don't compromise -7. **Document as you go** - Reduces onboarding time for new team members - ---- - -## 15. APPENDICES - -### A. Review Sources - -This master action plan synthesizes findings from: - -1. **Security Review** - 23 vulnerabilities (5 critical, 8 high, 6 medium, 4 low) -2. **Architecture Review** - Design assessment, 30% MVP completeness -3. **Code Quality Review** - Grade B+, 85/100 production readiness -4. **Infrastructure Review** - 15-20% production ready, systemd/monitoring gaps -5. **Frontend/UI/UX Review** - Grade C+, 35-40% complete, 14-section analysis -6. **Requirements Gap Analysis** - 100+ feature matrix, 30-35% implementation - -### B. File References - -- **GAP_ANALYSIS.md** - Detailed feature implementation matrix -- **REQUIREMENTS.md** - Original requirements specification -- **TODO.md** - Current task tracking -- **CLAUDE.md** - Project guidelines and architecture -- Security review (conversation archive) -- Architecture review (conversation archive) -- Code quality review (conversation archive) -- Infrastructure review (conversation archive) -- Frontend/UI review (conversation archive) - -### C. Contact & Escalation - -**Project Owner:** Howard -**Technical Escalation:** TBD (assign technical lead) -**Security Escalation:** TBD (assign security lead) - ---- - -**Document Version:** 1.0 -**Last Updated:** 2026-01-17 -**Next Review:** After Phase 1 completion (Week 4) -**Status:** DRAFT - Awaiting Howard's approval - ---- - -## SUMMARY: THE PATH FORWARD - -GuruConnect is a **well-architected project** with **solid technical foundations** that needs **focused feature development and security hardening** to reach production readiness. - -**Timeline:** 16-26 weeks (recommended: 20 weeks) -**Team:** 4-5 developers + 1 DevOps -**Cost:** ~$262,500 labor + minimal infrastructure -**Risk Level:** MEDIUM (manageable with proper planning) - -**Critical Path:** -1. Fix 5 critical security vulnerabilities (3 weeks) -2. Build end-user portal + agent download (5 weeks) -3. Complete core features (clipboard, PowerShell, files) (7 weeks) -4. Add competitive features (chat, multi-monitor, grouping) (8 weeks) -5. Polish and production readiness (6 weeks) - -**Outcome:** Competitive MSP remote support solution ready for general availability - -**Next Step:** Howard reviews this plan, approves timeline/budget, assigns team diff --git a/projects/msp-tools/guru-connect/PHASE1_COMPLETE.md b/projects/msp-tools/guru-connect/PHASE1_COMPLETE.md deleted file mode 100644 index 447f99d..0000000 --- a/projects/msp-tools/guru-connect/PHASE1_COMPLETE.md +++ /dev/null @@ -1,610 +0,0 @@ -# Phase 1 Complete - Production Infrastructure - -**Date:** 2026-01-18 -**Project:** GuruConnect Remote Desktop Solution -**Server:** 172.16.3.30 (gururmm) -**Status:** PRODUCTION READY - ---- - -## Executive Summary - -Phase 1 of GuruConnect infrastructure deployment is complete and ready for production use. All core infrastructure, monitoring, and CI/CD automation has been successfully implemented and tested. - -**Overall Completion: 89% (31/35 items)** - ---- - -## Phase 1 Breakdown - -### Week 1: Security Hardening (77% - 10/13) - -**Completed:** -- [x] JWT token expiration validation (24h lifetime) -- [x] Argon2id password hashing for user accounts -- [x] Security headers (CSP, X-Frame-Options, HSTS, X-Content-Type-Options) -- [x] Token blacklist for logout invalidation -- [x] API key validation for agent connections -- [x] Input sanitization on API endpoints -- [x] SQL injection protection (sqlx compile-time checks) -- [x] XSS prevention in templates -- [x] CORS configuration for dashboard -- [x] Rate limiting on auth endpoints - -**Pending:** -- [ ] TLS certificate auto-renewal (Let's Encrypt with certbot) -- [ ] Session timeout enforcement (UI-side) -- [ ] Security audit logging (comprehensive audit trail) - -**Impact:** Core security is operational. Missing items are enhancements for production hardening. - ---- - -### Week 2: Infrastructure & Monitoring (100% - 11/11) - -**Completed:** -- [x] Systemd service configuration -- [x] Auto-restart on failure -- [x] Prometheus metrics endpoint (/metrics) -- [x] 11 metric types exposed: - - Active sessions (gauge) - - Total connections (counter) - - Active WebSocket connections (gauge) - - Failed authentication attempts (counter) - - HTTP request duration (histogram) - - HTTP requests total (counter) - - Database connection pool (gauge) - - Agent connections (gauge) - - Viewer connections (gauge) - - Protocol errors (counter) - - Bytes transmitted (counter) -- [x] Grafana dashboard with 10 panels -- [x] Automated daily backups (systemd timer) -- [x] Log rotation configuration -- [x] Health check endpoint (/health) -- [x] Service monitoring (systemctl status) - -**Details:** -- **Service:** guruconnect.service running as PID 3947824 -- **Prometheus:** Running on port 9090 -- **Grafana:** Running on port 3000 (admin/admin) -- **Backups:** Daily at 00:00 UTC → /home/guru/backups/guruconnect/ -- **Retention:** 7 days automatic cleanup -- **Log Rotation:** Daily rotation, 14-day retention, compressed - -**Documentation:** -- `INSTALLATION_GUIDE.md` - Complete setup instructions -- `INFRASTRUCTURE_STATUS.md` - Current status and next steps -- `DEPLOYMENT_COMPLETE.md` - Week 2 summary - ---- - -### Week 3: CI/CD Automation (91% - 10/11) - -**Completed:** -- [x] Gitea Actions workflows (3 workflows) -- [x] Build automation (build-and-test.yml) -- [x] Test automation (test.yml) -- [x] Deployment automation (deploy.yml) -- [x] Deployment script with rollback (deploy.sh) -- [x] Version tagging automation (version-tag.sh) -- [x] Build artifact management -- [x] Gitea Actions runner installed (act_runner 0.2.11) -- [x] Systemd service for runner -- [x] Complete CI/CD documentation - -**Pending:** -- [ ] Gitea Actions runner registration (requires admin token) - -**Workflows:** - -1. **Build and Test** (.gitea/workflows/build-and-test.yml) - - Triggers: Push to main/develop, PRs to main - - Jobs: Build server, Build agent, Security audit, Summary - - Artifacts: Server binary (Linux), Agent binary (Windows) - - Retention: 30 days - - Duration: ~5-8 minutes - -2. **Run Tests** (.gitea/workflows/test.yml) - - Triggers: Push to any branch, PRs - - Jobs: Test server, Test agent, Code coverage, Lint - - Artifacts: Coverage report - - Quality gates: Zero clippy warnings, all tests pass - - Duration: ~3-5 minutes - -3. **Deploy to Production** (.gitea/workflows/deploy.yml) - - Triggers: Version tags (v*.*.*), Manual dispatch - - Jobs: Deploy server, Create release - - Process: Build → Package → Transfer → Backup → Deploy → Health Check - - Rollback: Automatic on health check failure - - Retention: 90 days - - Duration: ~10-15 minutes - -**Automation Scripts:** - -- `scripts/deploy.sh` - Deployment with automatic rollback -- `scripts/version-tag.sh` - Semantic version tagging -- `scripts/install-gitea-runner.sh` - Runner installation - -**Documentation:** -- `CI_CD_SETUP.md` - Complete CI/CD setup guide -- `PHASE1_WEEK3_COMPLETE.md` - Week 3 detailed summary -- `ACTIVATE_CI_CD.md` - Runner activation and testing guide - ---- - -## Infrastructure Overview - -### Services Running - -``` -Service Status Port PID Uptime ------------------------------------------------------------- -guruconnect active 3002 3947824 running -prometheus active 9090 active running -grafana-server active 3000 active running -``` - -### Automated Tasks - -``` -Task Frequency Next Run Status ------------------------------------------------------------- -Daily Backups Daily Mon 00:00 UTC active -Log Rotation Daily Daily active -``` - -### File Locations - -``` -Component Location ------------------------------------------------------------- -Server Binary ~/guru-connect/target/x86_64-unknown-linux-gnu/release/guruconnect-server -Static Files ~/guru-connect/server/static/ -Database PostgreSQL (localhost:5432/guruconnect) -Backups /home/guru/backups/guruconnect/ -Deployment Backups /home/guru/deployments/backups/ -Deployment Artifacts /home/guru/deployments/artifacts/ -Systemd Service /etc/systemd/system/guruconnect.service -Prometheus Config /etc/prometheus/prometheus.yml -Grafana Config /etc/grafana/grafana.ini -Log Rotation /etc/logrotate.d/guruconnect -``` - ---- - -## Access Information - -### GuruConnect Dashboard -- **URL:** https://connect.azcomputerguru.com/dashboard -- **Username:** howard -- **Password:** AdminGuruConnect2026 - -### Gitea Repository -- **URL:** https://git.azcomputerguru.com/azcomputerguru/guru-connect -- **Actions:** https://git.azcomputerguru.com/azcomputerguru/guru-connect/actions -- **Runner Admin:** https://git.azcomputerguru.com/admin/actions/runners - -### Monitoring -- **Prometheus:** http://172.16.3.30:9090 -- **Grafana:** http://172.16.3.30:3000 (admin/admin) -- **Metrics Endpoint:** http://172.16.3.30:3002/metrics -- **Health Endpoint:** http://172.16.3.30:3002/health - ---- - -## Key Achievements - -### Infrastructure -- Production-grade systemd service with auto-restart -- Comprehensive metrics collection (11 metric types) -- Visual monitoring dashboards (10 panels) -- Automated backup and recovery system -- Log management and rotation -- Health monitoring - -### Security -- JWT authentication with token expiration -- Argon2id password hashing -- Security headers (CSP, HSTS, etc.) -- API key validation for agents -- Token blacklist for logout -- Rate limiting on auth endpoints - -### CI/CD -- Automated build pipeline for server and agent -- Comprehensive test suite automation -- Automated deployment with rollback -- Version tagging automation -- Build artifact management -- Release automation - -### Documentation -- Complete installation guides -- Infrastructure status documentation -- CI/CD setup and usage guides -- Activation and testing procedures -- Troubleshooting guides - ---- - -## Performance Benchmarks - -### Build Times (Expected) -- Server build: ~2-3 minutes -- Agent build: ~2-3 minutes -- Test suite: ~1-2 minutes -- Total CI pipeline: ~5-8 minutes -- Deployment: ~10-15 minutes - -### Deployment -- Backup creation: ~1 second -- Service stop: ~2 seconds -- Binary deployment: ~1 second -- Service start: ~3 seconds -- Health check: ~2 seconds -- **Total deployment time:** ~10 seconds - -### Monitoring -- Metrics scrape interval: 15 seconds -- Grafana dashboard refresh: 5 seconds -- Backup execution time: ~5-10 seconds (depending on DB size) - ---- - -## Testing Checklist - -### Infrastructure Testing (Complete) -- [x] Systemd service starts successfully -- [x] Service auto-restarts on failure -- [x] Prometheus scrapes metrics endpoint -- [x] Grafana displays metrics -- [x] Daily backup timer scheduled -- [x] Backup creates valid dump files -- [x] Log rotation configured -- [x] Health endpoint returns OK -- [x] Admin login works - -### CI/CD Testing (Pending Runner Registration) -- [ ] Runner shows online in Gitea admin -- [ ] Build workflow triggers on push -- [ ] Test workflow runs successfully -- [ ] Deployment workflow triggers on tag -- [ ] Deployment creates backup -- [ ] Deployment performs health check -- [ ] Rollback works on failure -- [ ] Build artifacts are downloadable -- [ ] Version tagging script works - ---- - -## Next Steps - -### Immediate (Required for Full CI/CD) - -**1. Register Gitea Actions Runner** - -```bash -# Get token from: https://git.azcomputerguru.com/admin/actions/runners -ssh guru@172.16.3.30 - -sudo -u gitea-runner act_runner register \ - --instance https://git.azcomputerguru.com \ - --token YOUR_REGISTRATION_TOKEN_HERE \ - --name gururmm-runner \ - --labels ubuntu-latest,ubuntu-22.04 - -sudo systemctl enable gitea-runner -sudo systemctl start gitea-runner -``` - -**2. Test CI/CD Pipeline** - -```bash -# Trigger first build -cd ~/guru-connect -git commit --allow-empty -m "test: trigger CI/CD" -git push origin main - -# Verify in Actions tab -https://git.azcomputerguru.com/azcomputerguru/guru-connect/actions -``` - -**3. Create First Release** - -```bash -# Create version tag -cd ~/guru-connect/scripts -./version-tag.sh patch - -# Push to trigger deployment -git push origin main -git push origin v0.1.0 -``` - -### Optional Enhancements - -**Security Hardening:** -- Configure Let's Encrypt auto-renewal -- Implement session timeout UI -- Add comprehensive audit logging -- Set up intrusion detection (fail2ban) - -**Monitoring:** -- Import Grafana dashboard from `infrastructure/grafana-dashboard.json` -- Configure Alertmanager for Prometheus -- Set up notification webhooks -- Add uptime monitoring (UptimeRobot, etc.) - -**CI/CD:** -- Configure deployment SSH keys for full automation -- Add Windows runner for native agent builds -- Implement staging environment -- Add smoke tests post-deployment -- Configure notification webhooks - -**Infrastructure:** -- Set up database replication -- Configure offsite backup sync -- Implement centralized logging (ELK stack) -- Add performance profiling - ---- - -## Troubleshooting - -### Service Issues - -```bash -# Check service status -sudo systemctl status guruconnect - -# View logs -sudo journalctl -u guruconnect -f - -# Restart service -sudo systemctl restart guruconnect - -# Check if port is listening -netstat -tlnp | grep 3002 -``` - -### Database Issues - -```bash -# Check database connection -psql -U guruconnect -d guruconnect -c "SELECT 1;" - -# View active connections -psql -U postgres -c "SELECT * FROM pg_stat_activity WHERE datname='guruconnect';" - -# Check database size -psql -U postgres -c "SELECT pg_size_pretty(pg_database_size('guruconnect'));" -``` - -### Backup Issues - -```bash -# Check backup timer status -sudo systemctl status guruconnect-backup.timer - -# List backups -ls -lh /home/guru/backups/guruconnect/ - -# Manual backup -sudo systemctl start guruconnect-backup.service - -# View backup logs -sudo journalctl -u guruconnect-backup.service -n 50 -``` - -### Monitoring Issues - -```bash -# Check Prometheus -systemctl status prometheus -curl http://localhost:9090/-/healthy - -# Check Grafana -systemctl status grafana-server -curl http://localhost:3000/api/health - -# Check metrics endpoint -curl http://localhost:3002/metrics -``` - -### CI/CD Issues - -```bash -# Check runner status -sudo systemctl status gitea-runner -sudo journalctl -u gitea-runner -f - -# View runner logs -sudo -u gitea-runner cat /home/gitea-runner/.runner/.runner - -# Re-register runner -sudo -u gitea-runner act_runner register \ - --instance https://git.azcomputerguru.com \ - --token NEW_TOKEN -``` - ---- - -## Quick Reference Commands - -### Service Management -```bash -sudo systemctl start guruconnect -sudo systemctl stop guruconnect -sudo systemctl restart guruconnect -sudo systemctl status guruconnect -sudo journalctl -u guruconnect -f -``` - -### Deployment -```bash -cd ~/guru-connect/scripts -./deploy.sh /path/to/package.tar.gz -./version-tag.sh [major|minor|patch] -``` - -### Backups -```bash -# Manual backup -sudo systemctl start guruconnect-backup.service - -# List backups -ls -lh /home/guru/backups/guruconnect/ - -# Restore from backup -psql -U guruconnect -d guruconnect < /home/guru/backups/guruconnect/guruconnect-20260118-000000.sql -``` - -### Monitoring -```bash -# Check metrics -curl http://localhost:3002/metrics - -# Check health -curl http://localhost:3002/health - -# Prometheus UI -http://172.16.3.30:9090 - -# Grafana UI -http://172.16.3.30:3000 -``` - -### CI/CD -```bash -# View workflows -https://git.azcomputerguru.com/azcomputerguru/guru-connect/actions - -# Runner status -sudo systemctl status gitea-runner - -# Trigger build -git push origin main - -# Create release -./version-tag.sh patch -git push origin main && git push origin v0.1.0 -``` - ---- - -## Documentation Index - -**Installation & Setup:** -- `INSTALLATION_GUIDE.md` - Complete infrastructure installation -- `CI_CD_SETUP.md` - CI/CD setup and configuration -- `ACTIVATE_CI_CD.md` - Runner activation and testing - -**Status & Completion:** -- `INFRASTRUCTURE_STATUS.md` - Infrastructure status and next steps -- `DEPLOYMENT_COMPLETE.md` - Week 2 deployment summary -- `PHASE1_WEEK3_COMPLETE.md` - Week 3 CI/CD summary -- `PHASE1_COMPLETE.md` - This document - -**Project Documentation:** -- `README.md` - Project overview and getting started -- `CLAUDE.md` - Development guidelines and architecture -- `SESSION_STATE.md` - Current session state (if exists) - ---- - -## Success Metrics - -### Availability -- **Target:** 99.9% uptime -- **Current:** Service running with auto-restart -- **Monitoring:** Prometheus + Grafana + Health endpoint - -### Performance -- **Target:** < 100ms HTTP response time -- **Monitoring:** HTTP request duration histogram - -### Security -- **Target:** Zero successful unauthorized access attempts -- **Current:** JWT auth + API keys + rate limiting -- **Monitoring:** Failed auth counter - -### Deployments -- **Target:** < 15 minutes deployment time -- **Current:** ~10 second deployment + CI pipeline time -- **Reliability:** Automatic rollback on failure - ---- - -## Risk Assessment - -### Low Risk Items (Mitigated) -- **Service crashes:** Auto-restart configured -- **Disk space:** Log rotation + backup cleanup -- **Failed deployments:** Automatic rollback -- **Database issues:** Daily backups with 7-day retention - -### Medium Risk Items (Monitored) -- **Database growth:** Monitoring configured, manual cleanup if needed -- **Log volume:** Rotation configured, monitor disk usage -- **Metrics retention:** Prometheus defaults (15 days) - -### High Risk Items (Manual Intervention) -- **TLS certificate expiration:** Requires certbot auto-renewal setup -- **Security vulnerabilities:** Requires periodic security audits -- **Database connection pool exhaustion:** Monitor pool metrics - ---- - -## Cost Analysis - -**Server Resources (172.16.3.30):** -- CPU: Minimal (< 5% average) -- RAM: ~200MB for GuruConnect + 300MB for monitoring -- Disk: ~50MB for binaries + backups (growing) -- Network: Minimal (internal metrics scraping) - -**External Services:** -- Domain: connect.azcomputerguru.com (existing) -- TLS Certificate: Let's Encrypt (free) -- Git hosting: Self-hosted Gitea - -**Total Additional Cost:** $0/month - ---- - -## Phase 1 Summary - -**Start Date:** 2026-01-15 -**Completion Date:** 2026-01-18 -**Duration:** 3 days - -**Items Completed:** 31/35 (89%) -**Production Ready:** Yes -**Blocking Issues:** None - -**Key Deliverables:** -- Production-grade infrastructure -- Comprehensive monitoring -- Automated CI/CD pipeline (pending runner registration) -- Complete documentation - -**Next Phase:** Phase 2 - Feature Development -- Multi-session support -- File transfer capability -- Chat enhancements -- Mobile dashboard - ---- - -**Deployment Status:** PRODUCTION READY -**Activation Status:** Pending Gitea Actions runner registration -**Documentation Status:** Complete -**Next Action:** Register runner → Test pipeline → Begin Phase 2 - ---- - -**Last Updated:** 2026-01-18 -**Document Version:** 1.0 -**Phase:** 1 Complete (89%) diff --git a/projects/msp-tools/guru-connect/PHASE1_COMPLETENESS_AUDIT.md b/projects/msp-tools/guru-connect/PHASE1_COMPLETENESS_AUDIT.md deleted file mode 100644 index 32c200c..0000000 --- a/projects/msp-tools/guru-connect/PHASE1_COMPLETENESS_AUDIT.md +++ /dev/null @@ -1,592 +0,0 @@ -# GuruConnect Phase 1 - Completeness Audit Report - -**Audit Date:** 2026-01-18 -**Auditor:** Claude Code -**Project:** GuruConnect Remote Desktop Solution -**Phase:** Phase 1 (Security, Infrastructure, CI/CD) -**Claimed Completion:** 89% (31/35 items) - ---- - -## Executive Summary - -After comprehensive code review and verification, the Phase 1 completion claim of **89% (31/35 items)** is **ACCURATE** with minor discrepancies. The actual verified completion is **87% (30/35 items)** - one claimed item (rate limiting) is not fully operational. - -**Overall Assessment: PRODUCTION READY** with documented pending items. - -**Key Findings:** -- Security implementations verified and robust -- Infrastructure fully operational -- CI/CD pipelines complete but not activated (pending runner registration) -- Documentation comprehensive and accurate -- One security item (rate limiting) implemented in code but not active due to compilation issues - ---- - -## Detailed Verification Results - -### Week 1: Security Hardening (Claimed: 77% - 10/13) - -#### VERIFIED COMPLETE (10/10 claimed) - -1. **JWT Token Expiration Validation (24h lifetime)** - - **Status:** VERIFIED - - **Evidence:** - - `server/src/auth/jwt.rs` lines 92-118 - - Explicit expiration check with `validate_exp = true` - - 24-hour default lifetime configurable via `JWT_EXPIRY_HOURS` - - Additional redundant expiration check at line 111-115 - - **Code Marker:** SEC-13 - -2. **Argon2id Password Hashing** - - **Status:** VERIFIED - - **Evidence:** - - `server/src/auth/password.rs` lines 20-34 - - Explicitly uses `Algorithm::Argon2id` (line 25) - - Latest version (V0x13) - - Default secure params: 19456 KiB memory, 2 iterations - - **Code Marker:** SEC-9 - -3. **Security Headers (CSP, X-Frame-Options, HSTS, X-Content-Type-Options)** - - **Status:** VERIFIED - - **Evidence:** - - `server/src/middleware/security_headers.rs` lines 13-75 - - CSP implemented (lines 20-35) - - X-Frame-Options: DENY (lines 38-41) - - X-Content-Type-Options: nosniff (lines 44-47) - - X-XSS-Protection (lines 49-53) - - Referrer-Policy (lines 55-59) - - Permissions-Policy (lines 61-65) - - HSTS ready but commented out (lines 68-72) - appropriate for HTTP testing - - **Code Markers:** SEC-7, SEC-12 - -4. **Token Blacklist for Logout Invalidation** - - **Status:** VERIFIED - - **Evidence:** - - `server/src/auth/token_blacklist.rs` - Complete implementation - - In-memory HashSet with async RwLock - - Integrated into authentication flow (line 109-112 in auth/mod.rs) - - Cleanup mechanism for expired tokens - - **Endpoints:** - - `/api/auth/logout` - Implemented - - `/api/auth/revoke-token` - Implemented - - `/api/auth/admin/revoke-user` - Implemented - -5. **API Key Validation for Agent Connections** - - **Status:** VERIFIED - - **Evidence:** - - `server/src/main.rs` lines 209-216 - - API key strength validation: `server/src/utils/validation.rs` - - Minimum 32 characters - - Entropy checking - - Weak pattern detection - - **Code Marker:** SEC-4 (validation strength) - -6. **Input Sanitization on API Endpoints** - - **Status:** VERIFIED - - **Evidence:** - - Serde deserialization with strict types - - UUID validation in handlers - - API key strength validation - - All API handlers use typed extractors (Json, Path, Query) - -7. **SQL Injection Protection (sqlx compile-time checks)** - - **Status:** VERIFIED - - **Evidence:** - - `server/src/db/` modules use `sqlx::query!` and `sqlx::query_as!` macros - - Compile-time query validation - - All database operations parameterized - - **Sample:** `db/events.rs` lines 1-10 show sqlx usage - -8. **XSS Prevention in Templates** - - **Status:** VERIFIED - - **Evidence:** - - CSP headers prevent inline script execution from untrusted sources - - Static HTML files served from `server/static/` - - No user-generated content rendered server-side - -9. **CORS Configuration for Dashboard** - - **Status:** VERIFIED - - **Evidence:** - - `server/src/main.rs` lines 328-347 - - Restricted to specific origins (production domain + localhost) - - Limited methods (GET, POST, PUT, DELETE, OPTIONS) - - Explicit header allowlist - - Credentials allowed - - **Code Marker:** SEC-11 - -10. **Rate Limiting on Auth Endpoints** - - **Status:** PARTIAL - CODE EXISTS BUT NOT ACTIVE - - **Evidence:** - - Rate limiting middleware implemented: `server/src/middleware/rate_limit.rs` - - Three limiters defined (auth: 5/min, support: 10/min, api: 60/min) - - NOT applied in main.rs due to compilation issues - - TODOs present in main.rs lines 258, 277 - - **Issue:** Type resolution problems with tower_governor - - **Documentation:** `SEC2_RATE_LIMITING_TODO.md` - - **Recommendation:** Counts as INCOMPLETE until actually deployed - -**CORRECTION:** Rate limiting claim should be marked as incomplete. Adjusted count: **9/10 completed** - -#### VERIFIED PENDING (3/3 claimed) - -11. **TLS Certificate Auto-Renewal** - - **Status:** VERIFIED PENDING - - **Evidence:** Documented in TECHNICAL_DEBT.md - - **Impact:** Manual renewal required - -12. **Session Timeout Enforcement (UI-side)** - - **Status:** VERIFIED PENDING - - **Evidence:** JWT expiration works server-side, UI redirect not implemented - -13. **Security Audit Logging (comprehensive audit trail)** - - **Status:** VERIFIED PENDING - - **Evidence:** Basic event logging exists in `db/events.rs`, comprehensive audit trail not yet implemented - -**Week 1 Verified Result: 69% (9/13)** vs Claimed: 77% (10/13) - ---- - -### Week 2: Infrastructure & Monitoring (Claimed: 100% - 11/11) - -#### VERIFIED COMPLETE (11/11 claimed) - -1. **Systemd Service Configuration** - - **Status:** VERIFIED - - **Evidence:** - - `server/guruconnect.service` - Complete systemd unit file - - Service type: simple - - User/Group: guru - - Working directory configured - - Environment file loaded - - **Note:** WatchdogSec removed due to crash issues (documented in TECHNICAL_DEBT.md) - -2. **Auto-Restart on Failure** - - **Status:** VERIFIED - - **Evidence:** - - `server/guruconnect.service` lines 20-23 - - Restart=on-failure - - RestartSec=10s - - StartLimitInterval=5min, StartLimitBurst=3 - -3. **Prometheus Metrics Endpoint (/metrics)** - - **Status:** VERIFIED - - **Evidence:** - - `server/src/metrics/mod.rs` - Complete metrics implementation - - `server/src/main.rs` line 256 - `/metrics` endpoint - - No authentication required (appropriate for internal monitoring) - -4. **11 Metric Types Exposed** - - **Status:** VERIFIED - - **Evidence:** `server/src/metrics/mod.rs` lines 49-72 - - requests_total (Counter family) - - request_duration_seconds (Histogram family) - - sessions_total (Counter family) - - active_sessions (Gauge) - - session_duration_seconds (Histogram) - - connections_total (Counter family) - - active_connections (Gauge family) - - errors_total (Counter family) - - db_operations_total (Counter family) - - db_query_duration_seconds (Histogram family) - - uptime_seconds (Gauge) - - **Count:** 11 metrics confirmed - -5. **Grafana Dashboard with 10 Panels** - - **Status:** VERIFIED - - **Evidence:** - - `infrastructure/grafana-dashboard.json` exists - - Dashboard JSON structure present - - **Note:** Unable to verify exact panel count without opening Grafana, but file exists - -6. **Automated Daily Backups (systemd timer)** - - **Status:** VERIFIED - - **Evidence:** - - `server/guruconnect-backup.timer` - Timer unit (daily at 02:00) - - `server/guruconnect-backup.service` - Backup service unit - - `server/backup-postgres.sh` - Backup script - - Persistent=true for missed executions - -7. **Log Rotation Configuration** - - **Status:** VERIFIED - - **Evidence:** - - `server/guruconnect.logrotate` - Complete logrotate config - - Daily rotation - - 30-day retention - - Compression enabled - - Systemd journal integration documented - -8. **Health Check Endpoint (/health)** - - **Status:** VERIFIED - - **Evidence:** - - `server/src/main.rs` line 254, 364-366 - - Returns "OK" string - - No authentication required (appropriate for load balancers) - -9. **Service Monitoring (systemctl status)** - - **Status:** VERIFIED - - **Evidence:** - - Systemd service configured - - Journal logging enabled (lines 37-39 in guruconnect.service) - - SyslogIdentifier set - -10. **Prometheus Configuration** - - **Status:** VERIFIED - - **Evidence:** - - `infrastructure/prometheus.yml` - Complete config - - Scrapes GuruConnect on 172.16.3.30:3002 - - 15-second scrape interval - -11. **Grafana Configuration** - - **Status:** VERIFIED - - **Evidence:** - - Dashboard JSON template exists - - Installation instructions in prometheus.yml comments - -**Week 2 Verified Result: 100% (11/11)** - Matches claimed completion - ---- - -### Week 3: CI/CD Automation (Claimed: 91% - 10/11) - -#### VERIFIED COMPLETE (10/10 claimed) - -1. **Gitea Actions Workflows (3 workflows)** - - **Status:** VERIFIED - - **Evidence:** - - `.gitea/workflows/build-and-test.yml` - Build workflow - - `.gitea/workflows/test.yml` - Test workflow - - `.gitea/workflows/deploy.yml` - Deploy workflow - -2. **Build Automation (build-and-test.yml)** - - **Status:** VERIFIED - - **Evidence:** - - Complete workflow with server + agent builds - - Triggers: push to main/develop, PRs to main - - Rust toolchain setup - - Dependency caching - - Formatting and Clippy checks - - Test execution - -3. **Test Automation (test.yml)** - - **Status:** VERIFIED - - **Evidence:** - - Unit tests, integration tests, doc tests - - Code coverage with cargo-tarpaulin - - Lint and format checks - - Clippy with -D warnings - -4. **Deployment Automation (deploy.yml)** - - **Status:** VERIFIED - - **Evidence:** - - Triggers on version tags (v*.*.*) - - Manual dispatch option - - Build and package steps - - Deployment notes (SSH commented out - appropriate for security) - - Release creation - -5. **Deployment Script with Rollback (deploy.sh)** - - **Status:** VERIFIED - - **Evidence:** - - `scripts/deploy.sh` - Complete deployment script - - Backup creation (lines 49-56) - - Service stop/start - - Health check (lines 139-147) - - Automatic rollback on failure (lines 123-136) - -6. **Version Tagging Automation (version-tag.sh)** - - **Status:** VERIFIED - - **Evidence:** - - `scripts/version-tag.sh` - Complete version script - - Semantic versioning support (major/minor/patch) - - Cargo.toml version updates - - Git tag creation - - Changelog display - -7. **Build Artifact Management** - - **Status:** VERIFIED - - **Evidence:** - - Workflows upload artifacts with retention policies - - build-and-test.yml: 30-day retention - - deploy.yml: 90-day retention - - deploy.sh saves artifacts to `/home/guru/deployments/artifacts/` - -8. **Gitea Actions Runner Installed (act_runner 0.2.11)** - - **Status:** VERIFIED - - **Evidence:** - - `scripts/install-gitea-runner.sh` - Installation script - - Version 0.2.11 specified (line 24) - - User creation, binary installation - - Directory structure setup - -9. **Systemd Service for Runner** - - **Status:** VERIFIED - - **Evidence:** - - `scripts/install-gitea-runner.sh` lines 79-95 - - Service unit created at /etc/systemd/system/gitea-runner.service - - Proper service configuration (User, WorkingDirectory, ExecStart) - -10. **Complete CI/CD Documentation** - - **Status:** VERIFIED - - **Evidence:** - - `CI_CD_SETUP.md` - Complete setup guide - - `ACTIVATE_CI_CD.md` - Activation instructions - - `PHASE1_WEEK3_COMPLETE.md` - Summary - - Scripts include inline documentation - -#### VERIFIED PENDING (1/1 claimed) - -11. **Gitea Actions Runner Registration** - - **Status:** VERIFIED PENDING - - **Evidence:** Documented in ACTIVATE_CI_CD.md - - **Blocker:** Requires admin token from Gitea - - **Impact:** CI/CD pipeline ready but not active - -**Week 3 Verified Result: 91% (10/11)** - Matches claimed completion - ---- - -## Discrepancies Found - -### 1. Rate Limiting Implementation - -**Claimed:** Completed -**Actual Status:** Code exists but not operational - -**Details:** -- Rate limiting middleware written and well-designed -- Type resolution issues with tower_governor prevent compilation -- Not applied to routes in main.rs (commented out with TODO) -- Documented in SEC2_RATE_LIMITING_TODO.md - -**Impact:** Minor - server is still secure, but vulnerable to brute force attacks without additional mitigations (firewall, fail2ban) - -**Recommendation:** Mark as incomplete. Use alternative: -- Option A: Fix tower_governor types (1-2 hours) -- Option B: Implement custom middleware (2-3 hours) -- Option C: Use Redis-based rate limiting (3-4 hours) - -### 2. Documentation Accuracy - -**Finding:** All documentation accurately reflects implementation status - -**Notable Documentation:** -- `PHASE1_COMPLETE.md` - Accurate summary -- `TECHNICAL_DEBT.md` - Honest tracking of issues -- `SEC2_RATE_LIMITING_TODO.md` - Clear status of incomplete work -- Installation and setup guides comprehensive - -### 3. Unclaimed Completed Work - -**Items NOT claimed but actually completed:** -- API key strength validation (goes beyond basic validation) -- Token blacklist cleanup mechanism -- Comprehensive metrics (11 types, not just basic) -- Deployment rollback automation -- Grafana alert configuration template (`infrastructure/alerts.yml`) - ---- - -## Verification Summary by Category - -### Security (Week 1) -| Category | Claimed | Verified | Status | -|----------|---------|----------|--------| -| Completed | 10/13 | 9/13 | 1 item incomplete | -| Pending | 3/13 | 3/13 | Accurate | -| **Total** | **77%** | **69%** | **-8% discrepancy** | - -### Infrastructure (Week 2) -| Category | Claimed | Verified | Status | -|----------|---------|----------|--------| -| Completed | 11/11 | 11/11 | Accurate | -| Pending | 0/11 | 0/11 | Accurate | -| **Total** | **100%** | **100%** | **No discrepancy** | - -### CI/CD (Week 3) -| Category | Claimed | Verified | Status | -|----------|---------|----------|--------| -| Completed | 10/11 | 10/11 | Accurate | -| Pending | 1/11 | 1/11 | Accurate | -| **Total** | **91%** | **91%** | **No discrepancy** | - -### Overall Phase 1 -| Category | Claimed | Verified | Status | -|----------|---------|----------|--------| -| Completed | 31/35 | 30/35 | Rate limiting incomplete | -| Pending | 4/35 | 4/35 | Accurate | -| **Total** | **89%** | **87%** | **-2% discrepancy** | - ---- - -## Code Quality Assessment - -### Strengths - -1. **Security Implementation Quality** - - Explicit security markers (SEC-1 through SEC-13) in code - - Defense in depth approach - - Modern cryptographic standards (Argon2id, JWT) - - Compile-time SQL injection prevention - -2. **Infrastructure Robustness** - - Comprehensive monitoring (11 metric types) - - Automated backups with retention - - Health checks for all services - - Proper systemd integration - -3. **CI/CD Pipeline Design** - - Multiple quality gates (formatting, clippy, tests) - - Security audit integration - - Artifact management with retention - - Automatic rollback on deployment failure - -4. **Documentation Excellence** - - Honest status tracking - - Clear next steps documented - - Technical debt tracked systematically - - Multiple formats (guides, summaries, technical specs) - -### Weaknesses - -1. **Rate Limiting** - - Not operational despite code existence - - Dependency issues not resolved - -2. **Watchdog Implementation** - - Removed due to crash issues - - Proper sd_notify implementation pending - -3. **TLS Certificate Management** - - Manual renewal required - - Auto-renewal not configured - ---- - -## Production Readiness Assessment - -### Ready for Production ✓ - -**Core Functionality:** -- ✓ Authentication and authorization -- ✓ Session management -- ✓ Database operations -- ✓ Monitoring and metrics -- ✓ Health checks -- ✓ Automated backups -- ✓ Deployment automation - -**Security (Operational):** -- ✓ JWT token validation with expiration -- ✓ Argon2id password hashing -- ✓ Security headers (CSP, X-Frame-Options, etc.) -- ✓ Token blacklist for logout -- ✓ API key validation -- ✓ SQL injection protection -- ✓ CORS configuration -- ✗ Rate limiting (pending - use firewall alternative) - -**Infrastructure:** -- ✓ Systemd service with auto-restart -- ✓ Log rotation -- ✓ Prometheus metrics -- ✓ Grafana dashboards -- ✓ Daily backups - -### Pending Items (Non-Blocking) - -1. **Gitea Actions Runner Registration** (5 minutes) - - Required for: Automated CI/CD - - Alternative: Manual builds and deployments - - Impact: Operational efficiency - -2. **Rate Limiting Activation** (1-3 hours) - - Required for: Brute force protection - - Alternative: Firewall rate limiting (fail2ban, NPM) - - Impact: Security hardening - -3. **TLS Auto-Renewal** (2-4 hours) - - Required for: Certificate management - - Alternative: Manual renewal reminders - - Impact: Operational maintenance - -4. **Session Timeout UI** (2-4 hours) - - Required for: Enhanced security UX - - Alternative: Server-side expiration works - - Impact: User experience - ---- - -## Recommendations - -### Immediate (Before Production Launch) - -1. **Activate Rate Limiting** (Priority: HIGH) - - Implement one of three options from SEC2_RATE_LIMITING_TODO.md - - Test with curl/Postman - - Verify rate limit headers - -2. **Register Gitea Runner** (Priority: MEDIUM) - - Get registration token from admin - - Register and activate runner - - Test with dummy commit - -3. **Configure Firewall Rate Limiting** (Priority: HIGH - temporary) - - Install fail2ban - - Configure rules for /api/auth/login - - Monitor for brute force attempts - -### Short Term (Within 1 Month) - -4. **TLS Certificate Auto-Renewal** (Priority: HIGH) - - Install certbot - - Configure auto-renewal timer - - Test dry-run renewal - -5. **Session Timeout UI** (Priority: MEDIUM) - - Implement JavaScript token expiration check - - Redirect to login on expiration - - Show countdown warning - -6. **Comprehensive Audit Logging** (Priority: MEDIUM) - - Expand event logging - - Add audit trail for sensitive operations - - Implement log retention policies - -### Long Term (Phase 2+) - -7. **Systemd Watchdog Implementation** - - Add systemd crate - - Implement sd_notify calls - - Re-enable WatchdogSec in service file - -8. **Distributed Rate Limiting** - - Implement Redis-based rate limiting - - Prepare for multi-instance deployment - ---- - -## Conclusion - -The Phase 1 completion claim of **89%** is **SUBSTANTIALLY ACCURATE** with a verified completion of **87%**. The 2-point discrepancy is due to rate limiting being implemented in code but not operational in production. - -**Overall Assessment: APPROVED FOR PRODUCTION** with the following caveats: - -1. Implement temporary rate limiting via firewall (fail2ban) -2. Monitor authentication endpoints for abuse -3. Schedule TLS auto-renewal setup within 30 days -4. Register Gitea runner when convenient (non-critical) - -**Code Quality:** Excellent -**Documentation:** Comprehensive and honest -**Security Posture:** Strong (9/10 security items operational) -**Infrastructure:** Production-ready -**CI/CD:** Complete but not activated - -The project demonstrates high-quality engineering practices, honest documentation, and production-ready infrastructure. The pending items are clearly documented and have reasonable alternatives or mitigations in place. - ---- - -**Audit Completed:** 2026-01-18 -**Next Review:** After Gitea runner registration and rate limiting implementation -**Overall Grade:** A- (87% verified completion, excellent quality) diff --git a/projects/msp-tools/guru-connect/PHASE1_SECURITY_INFRASTRUCTURE.md b/projects/msp-tools/guru-connect/PHASE1_SECURITY_INFRASTRUCTURE.md deleted file mode 100644 index a13fce6..0000000 --- a/projects/msp-tools/guru-connect/PHASE1_SECURITY_INFRASTRUCTURE.md +++ /dev/null @@ -1,316 +0,0 @@ -# Phase 1: Security & Infrastructure -**Duration:** 4 weeks -**Team:** 1 Backend Developer + 1 DevOps Engineer -**Goal:** Fix critical vulnerabilities, establish production-ready infrastructure - ---- - -## Week 1: Critical Security Fixes - -### Day 1-2: JWT Secret & Rate Limiting - -**SEC-1: JWT Secret Hardcoded (CRITICAL)** -- [ ] Remove hardcoded JWT secret from source code -- [ ] Add JWT_SECRET environment variable to .env -- [ ] Update server/src/auth/ to read from env -- [ ] Generate strong random secret (64+ chars) -- [ ] Document secret rotation procedure -- [ ] Test authentication with new secret -- [ ] Verify old tokens rejected after rotation - -**SEC-2: Rate Limiting (CRITICAL)** -- [ ] Install tower-governor or similar rate limiting middleware -- [ ] Add rate limiting to /api/auth/login (5 attempts/minute) -- [ ] Add rate limiting to /api/auth/register (2 attempts/minute) -- [ ] Add rate limiting to support code validation (10 attempts/minute) -- [ ] Add IP-based tracking -- [ ] Test rate limiting with automated requests -- [ ] Add rate limit headers (X-RateLimit-Remaining, etc.) - -### Day 3: SQL Injection Prevention - -**SEC-3: SQL Injection in Machine Filters (CRITICAL)** -- [ ] Audit all raw SQL queries in server/src/db/ -- [ ] Replace string concatenation with sqlx parameterized queries -- [ ] Focus on machine_filters.rs (high risk) -- [ ] Review user_queries.rs for injection points -- [ ] Add input validation for filter parameters -- [ ] Test with SQL injection payloads ('; DROP TABLE--, etc.) -- [ ] Document safe query patterns for team - -### Day 4-5: Agent & Session Security - -**SEC-4: Agent Connection Validation (CRITICAL)** -- [ ] Implement support code validation in relay handler -- [ ] Implement API key validation for persistent agents -- [ ] Reject connections without valid credentials -- [ ] Add connection attempt logging -- [ ] Test with invalid codes/keys -- [ ] Add IP whitelisting option for agents -- [ ] Document agent authentication flow - -**SEC-5: Session Takeover Prevention (CRITICAL)** -- [ ] Add session ownership validation -- [ ] Verify JWT user_id matches session creator -- [ ] Prevent cross-user session access -- [ ] Add session token binding (tie to initial connection) -- [ ] Test with stolen session IDs -- [ ] Add session hijacking detection (IP change alerts) -- [ ] Implement session timeout (4-hour max) - ---- - -## Week 2: High-Priority Security - -### Day 1: Logging & HTTPS - -**SEC-6: Password Logging (HIGH)** -- [ ] Audit all logging statements for sensitive data -- [ ] Remove password/token logging from auth.rs -- [ ] Add [REDACTED] filter for sensitive fields -- [ ] Update tracing configuration -- [ ] Test logs don't contain credentials -- [ ] Document logging security policy - -**SEC-10: HTTPS Enforcement (HIGH)** -- [ ] Add HTTPS redirect middleware -- [ ] Configure HSTS headers (max-age=31536000) -- [ ] Update NPM to enforce HTTPS -- [ ] Test HTTP requests redirect to HTTPS -- [ ] Add secure cookie flags (Secure, HttpOnly) -- [ ] Update documentation with HTTPS URLs - -### Day 2-3: Input Sanitization - -**SEC-7: XSS Prevention (HIGH)** -- [ ] Install validator crate for input sanitization -- [ ] Sanitize all user inputs in API endpoints -- [ ] Escape HTML in machine names, notes, tags -- [ ] Add Content-Security-Policy headers -- [ ] Test with XSS payloads ( - - diff --git a/projects/msp-tools/guru-connect/server/static/index.html b/projects/msp-tools/guru-connect/server/static/index.html deleted file mode 100644 index e56a3f2..0000000 --- a/projects/msp-tools/guru-connect/server/static/index.html +++ /dev/null @@ -1,425 +0,0 @@ - - - - - - GuruConnect - Remote Support - - - -
- - -
- -
- -
- -
- - -
- -
- -
-

How to connect:

-
    -
  1. Enter the 6-digit code provided by your technician
  2. -
  3. Click "Connect" to start the session
  4. -
  5. If prompted, allow the download and run the file
  6. -
-
- - -
- - - - diff --git a/projects/msp-tools/guru-connect/server/static/login.html b/projects/msp-tools/guru-connect/server/static/login.html deleted file mode 100644 index 34ad38c..0000000 --- a/projects/msp-tools/guru-connect/server/static/login.html +++ /dev/null @@ -1,229 +0,0 @@ - - - - - - GuruConnect - Login - - - -
- - - - - -
- - - - diff --git a/projects/msp-tools/guru-connect/server/static/users.html b/projects/msp-tools/guru-connect/server/static/users.html deleted file mode 100644 index 08bb946..0000000 --- a/projects/msp-tools/guru-connect/server/static/users.html +++ /dev/null @@ -1,602 +0,0 @@ - - - - - - GuruConnect - User Management - - - -
-
- - ← Back to Dashboard -
-
- -
-
-
-
-

User Management

-

Create and manage user accounts

-
- -
- -
- - - - - - - - - - - - - - - - - -
UsernameEmailRoleStatusLast LoginActions
-
-

Loading users...

-
-
-
-
- - - - -
-
-
- - - - diff --git a/projects/msp-tools/guru-connect/server/static/viewer.html b/projects/msp-tools/guru-connect/server/static/viewer.html deleted file mode 100644 index 1383a6b..0000000 --- a/projects/msp-tools/guru-connect/server/static/viewer.html +++ /dev/null @@ -1,694 +0,0 @@ - - - - - - GuruConnect Viewer - - - - -
- - - -
-
-
FPS: 0
-
Resolution: -
-
Frames: 0
-
-
Connecting...
-
- -
- -
- -
-
-
-
Connecting to remote desktop...
-
-
- - - - diff --git a/projects/msp-tools/guru-connect/session-logs/2025-12-29-session.md b/projects/msp-tools/guru-connect/session-logs/2025-12-29-session.md deleted file mode 100644 index ff437c4..0000000 --- a/projects/msp-tools/guru-connect/session-logs/2025-12-29-session.md +++ /dev/null @@ -1,134 +0,0 @@ -# GuruConnect Session Log - 2025-12-29 - -## Session Summary - -### What Was Accomplished -1. **Cleaned up stale persistent sessions** - Deleted 12 offline machines from PostgreSQL database -2. **Added machine deletion API with uninstall support** - Implemented full machine management endpoints -3. **Added AdminCommand protobuf message** - For server-to-agent commands (uninstall, restart, update) -4. **Implemented machine history export** - Sessions and events can be exported before deletion - -### Key Decisions -- Machine deletion has two modes: - - **Delete Only** (`DELETE /api/machines/:agent_id`) - Removes from DB, allows re-registration - - **Delete with Uninstall** (`DELETE /api/machines/:agent_id?uninstall=true`) - Sends uninstall command to agent if online -- History export available via `?export=true` query param or separate endpoint -- AdminCommand message types: ADMIN_UNINSTALL, ADMIN_RESTART, ADMIN_UPDATE - -### Problems Encountered -- Server endpoint returning 404 - new binary may not have been properly deployed -- Cross-compilation issues with ring crate for Windows MSVC on Linux - ---- - -## Credentials - -### GuruConnect Database (PostgreSQL) -- **Host:** 172.16.3.30 (localhost from server) -- **Database:** guruconnect -- **User:** guruconnect -- **Password:** gc_a7f82d1e4b9c3f60 -- **DATABASE_URL:** `postgres://guruconnect:gc_a7f82d1e4b9c3f60@localhost:5432/guruconnect` - -### Build Server SSH -- **Host:** 172.16.3.30 -- **User:** guru -- **Password:** Gptf*77ttb123!@#-rmm -- **Sudo Password:** Gptf*77ttb123!@#-rmm - ---- - -## Infrastructure - -### GuruConnect Server -- **Host:** 172.16.3.30 -- **Port:** 3002 -- **Binary:** `/home/guru/guru-connect/target/release/guruconnect-server` -- **Service:** guruconnect.service (systemd) -- **Log:** ~/gc-server.log - -### API Endpoints (NEW) -``` -GET /api/machines - List all persistent machines -GET /api/machines/:agent_id - Get machine info -GET /api/machines/:agent_id/history - Get full session/event history -DELETE /api/machines/:agent_id - Delete machine - Query params: - ?uninstall=true - Send uninstall command to agent - ?export=true - Include history in response -``` - ---- - -## Files Modified - -### Protobuf Schema -- `proto/guruconnect.proto` - Added AdminCommand message and AdminCommandType enum - -### Server Changes -- `server/src/main.rs` - Added machine API routes and handlers -- `server/src/api/mod.rs` - Added MachineInfo, MachineHistory, DeleteMachineParams types -- `server/src/db/machines.rs` - Existing delete_machine function used -- `server/src/db/sessions.rs` - Added get_sessions_for_machine() -- `server/src/db/events.rs` - Added get_events_for_machine() -- `server/src/session/mod.rs` - Added send_admin_command() and remove_agent() methods - -### Agent Changes -- `agent/src/session/mod.rs` - Added AdminCommand message handler -- `agent/src/main.rs` - Added ADMIN_UNINSTALL and ADMIN_RESTART error handlers - ---- - -## Important Commands - -### Query/Delete Machines from PostgreSQL -```bash -# Query all machines -ssh guru@172.16.3.30 'PGPASSWORD=gc_a7f82d1e4b9c3f60 psql -h localhost -U guruconnect -d guruconnect -c "SELECT agent_id, hostname, status FROM connect_machines;"' - -# Delete all offline machines -ssh guru@172.16.3.30 'PGPASSWORD=gc_a7f82d1e4b9c3f60 psql -h localhost -U guruconnect -d guruconnect -c "DELETE FROM connect_machines WHERE status = '\''offline'\'';"' -``` - -### Build Server -```bash -# Build for Linux -ssh guru@172.16.3.30 'cd ~/guru-connect && source ~/.cargo/env && cargo build -p guruconnect-server --release --target x86_64-unknown-linux-gnu' - -# Restart server -ssh guru@172.16.3.30 'pkill -f guruconnect-server; cd ~/guru-connect/server && DATABASE_URL="postgres://guruconnect:gc_a7f82d1e4b9c3f60@localhost:5432/guruconnect" nohup ~/guru-connect/target/release/guruconnect-server > ~/gc-server.log 2>&1 &' -``` - ---- - -## Pending Tasks - -1. **Debug 404 on /api/machines endpoint** - The new routes aren't being recognized - - May need to verify the correct binary is being executed - - Check if old process is still running on port 3002 - -2. **Test machine deletion flow end-to-end** - - Connect an agent - - Delete with uninstall flag - - Verify agent receives command and uninstalls - -3. **Build Windows agent binary** - Cross-compilation needs MSVC tools or use Windows build - ---- - -## Git Status - -Committed and pushed: -``` -commit dc7b742: Add machine deletion API with uninstall command support -- 8 files changed, 380 insertions(+), 6 deletions(-) -``` - ---- - -## Next Steps for Future Sessions - -1. Investigate why `/api/machines` returns 404 - likely old binary running -2. Use systemd properly for server management (need root access) -3. Build and test Windows agent with uninstall command handling -4. Add dashboard UI for machine management (list, delete with options) diff --git a/projects/msp-tools/guru-rmm b/projects/msp-tools/guru-rmm index 4ce60d0..2dec45f 160000 --- a/projects/msp-tools/guru-rmm +++ b/projects/msp-tools/guru-rmm @@ -1 +1 @@ -Subproject commit 4ce60d05527dda39f08958de3fff34bb010d9364 +Subproject commit 2dec45fffb53c9c2847fd5795d06b63cc2ef8df5 diff --git a/session-logs/2026-05-29-session.md b/session-logs/2026-05-29-session.md new file mode 100644 index 0000000..cb7cbd6 --- /dev/null +++ b/session-logs/2026-05-29-session.md @@ -0,0 +1,99 @@ +# Session Log — 2026-05-29 + +## User +- **User:** Mike Swanson (mike) +- **Machine:** GURU-5070 +- **Role:** admin + +## Session Summary + +Shaped a pre-implementation spec for native integrated remote control in the GuruRMM ecosystem, then restructured how the guru-connect product is tracked in the monorepo. The session began as a "fix lint errors" request that was redirected into a GuruRMM feature request for the guru-connect (GC) project: native, integrated remote control comparable to ScreenConnect/Splashtop, built entirely on our own Rust stack to avoid third-party agents and supply-chain exposure. + +Research established that GC already implements the full remote-control engine (DXGI/GDI capture, input injection, viewer, `guruconnect://` protocol handler, persistent/unattended + support-code/attended modes, protobuf over WSS) and that GuruRMM already has the orchestration rails (per-agent command dispatch, stable `device_id` identity, the AgentDetail action-button pattern, and a half-built generic `tunnel` scaffold). Two parallel Explore agents mapped the exact integration surfaces with file:line references. The feature is therefore ~80% wiring against existing capability, not greenfield. Architecture decisions were captured via the user: broker model (RMM orchestrates the separate GC agent), both unattended and attended access, multi-monitor in scope, file transfer / session recording / non-Windows agents out of scope, priority P2. + +The `/shape-spec` skill produced four files in `projects/msp-tools/guru-connect/specs/native-remote-control/` (shape, plan, references, standards). The user then clarified that GC is a standalone product with its own pipeline/cadence, and the real intent is a durable, versioned integration contract so the two products stay integration-compatible without coupling. The spec was rewritten around a GC-owned, semver'd integration contract (`/api/integration/v1/`, capability discovery, embedded session viewer). A concrete blocker was identified: GC's `security_headers.rs` sets `frame-ancestors 'none'`, which must be relaxed to a scoped RMM-origin allowlist for the embedded viewer. RMM-side hints were added (ADR-008 + `docs/GURU_CONNECT_INTEGRATION.md`) recording that RMM consumes GC via the contract and does no active dev on GC. The spec and hints were committed across both repos (commit-only, no push). + +The user then asked to wire GC as a submodule like guru-rmm. Investigation revealed the remote `azcomputerguru/guru-connect` repo was ~4 months stale (frozen 2026-01-18) while the local monorepo copy was far ahead (entire `middleware`/`metrics`/`utils` modules, token blacklist, Phase-1 security/deploy work, the new spec). Per the user's decisions (publish local to the existing repo as a snapshot commit; preserve history), the Gitea Agent published the local working state to GC main (fast-forward `5b7cf5f..e3e95f8`, history preserved, KEEP paths `.gitignore`/`.cargo`/`server/static/downloads` retained), then converted the vendored directory into a submodule pinned at `e3e95f8`. Confirmed that GC `deploy.yml` triggers only on `v*.*.*` tags / manual dispatch, so the push ran CI build/test but did not deploy to production. + +Finally, the user confirmed RMM and GC are the only versionable products; everything else stays in the monorepo. This policy was recorded to memory (`project_versionable_products.md`). + +## Key Decisions + +- Broker architecture: RMM orchestrates the separate GC agent (two agents coexist) rather than merging GC into the RMM agent — reuses GC's existing engine, ships sooner, keeps GC standalone. +- The deliverable is a GC-owned, semver'd integration contract + capability discovery, not one-off broker wiring — so the two products stay in-sync via the contract without sharing pipelines or releasing in lockstep. +- Stable cross-product identity = RMM `device_id` passed as the GC `agent_id`, so brokered sessions deterministically match the endpoint. +- Supply-chain guard made concrete: the RMM agent downloads the GC binary only from GC's release channel and verifies SHA-256 before launch (reusing GC's `releases.checksum_sha256`). +- Embedded viewer over native-only: relax `frame-ancestors`/`X-Frame-Options` on the viewer route to a scoped RMM-origin allowlist; keep `'none'` everywhere else. +- Spec lives in the GC repo (GC owns the contract); RMM gets ADR-008 + a pointer doc reminding it not to perform active dev on GC. +- Submodule reconciliation: publish the local (authoritative) state up to the stale GC repo as a snapshot commit on top of existing main (preserve history), then submodule-add — nothing lost. +- Only GuruRMM and GuruConnect are versionable products (own repos/submodules); all other projects stay in the claudetools monorepo. Split only for an independent pipeline OR a versioned external consumer. +- All git operations committed but NOT pushed (claudetools), per the established pattern of leaving the push to the user; the GC repo push was mandatory for the submodule to resolve. + +## Problems Encountered + +- Initial "fix lint errors" request was ambiguous (clean tree, multiple lintable projects). Asked which project; user redirected to the GC feature request instead. +- CLAUDE.md warns that a Gitea repo named `guru-connect` is an "abandoned duplicate." Verified by inspecting the remote repo's contents (`proto/guruconnect.proto`, `agent/`, `server/`, `dashboard/`) that `azcomputerguru/guru-connect` is the real GC product, not the abandoned RMM duplicate the warning refers to. +- The remote GC repo was 4 months stale and the local monorepo copy had diverged substantially (whole modules + Phase-1 work never pushed). A naive `submodule add` would have reverted that work. Resolved by diffing local vs remote, surfacing the divergence, and publishing local→remote before converting. +- Production-deploy risk on push: checked GC's `.gitea/workflows`; confirmed `deploy.yml` triggers only on `v*.*.*` tags / `workflow_dispatch`, so pushing to main runs CI but does not deploy. + +## Configuration Changes + +Created (committed `afbe5a8`, then moved into the GC repo via the submodule conversion): +- `projects/msp-tools/guru-connect/specs/native-remote-control/shape.md` +- `projects/msp-tools/guru-connect/specs/native-remote-control/plan.md` +- `projects/msp-tools/guru-connect/specs/native-remote-control/references.md` +- `projects/msp-tools/guru-connect/specs/native-remote-control/standards.md` + +guru-rmm submodule (committed `7701d26` in the submodule): +- Modified `docs/ARCHITECTURE_DECISIONS.md` — added ADR-008 (GC is a separate product consumed via versioned contract) +- Created `docs/GURU_CONNECT_INTEGRATION.md` — RMM-side boundary/pointer doc + +Repo structure: +- `.gitmodules` — added `projects/msp-tools/guru-connect` submodule entry (branch main) +- `projects/msp-tools/guru-connect` — converted from vendored directory to submodule (gitlink mode 160000 at `e3e95f8`) + +Memory: +- Created `.claude/memory/project_versionable_products.md` +- Updated `.claude/memory/MEMORY.md` index (Project section) + +## Credentials & Secrets + +None discovered or created this session. The spec references secrets to be sourced from env/SOPS at implementation time (`CONNECT_INTEGRATION_KEY`, `CONNECT_SERVER_URL`, per-machine GC agent keys, `CONNECT_EMBED_ALLOWED_ORIGINS`) — none provisioned yet. + +## Infrastructure & Servers + +- Gitea (internal): http://172.16.3.20:3000 — used for repo inspection + GC push (per internal-API preference) +- GC relay server: 172.16.3.30:3002, proxied via NPM to connect.azcomputerguru.com +- GuruRMM server: 172.16.3.30:3001, dashboard rmm.azcomputerguru.com +- GC repo CI: `.gitea/workflows/{build-and-test,test,deploy}.yml` — deploy only on `v*.*.*` tags / manual dispatch + +## Commands & Outputs + +Repo divergence check (local vs remote GC), shallow clone + `diff -rq` — confirmed local far ahead; cleaned up temp clone afterward. + +GC publish (Gitea Agent): +- `git push origin main` → `5b7cf5f..e3e95f8 main -> main` (fast-forward, 73 files changed, 15611 insertions, 5760 deletions; `Cargo.lock` dropped — not tracked in the authoritative copy) + +Submodule conversion (Gitea Agent): +- `git rm -r --cached projects/msp-tools/guru-connect` + `rm -rf` + `git submodule add -b main ` +- `git submodule status` → `e3e95f8 ... guru-connect (heads/main)`, `7701d26 ... guru-rmm (heads/main)` + +## Pending / Incomplete Tasks + +- claudetools commits are LOCAL, not pushed: `53e14da` (submodule conversion) + `1fc2401`/`afbe5a8` (spec + pointer bump) from earlier. Push when ready. +- GC repo housekeeping: re-add `Cargo.lock` (dropped in the snapshot; wanted for reproducible builds). +- GC submodule URL uses the internal IP `172.16.3.20:3000`; guru-rmm uses the public `git.azcomputerguru.com`. Off-network clones (Howard's Mac) won't resolve the internal IP — consider switching to the public hostname for parity. +- GC CI run kicked off by the publish push may be red (the snapshot may not build cleanly; Cargo.lock removed). Check the Actions run. +- Implementation of the feature itself has not started — Task 0 of the spec (commit the spec) is effectively satisfied; Tasks 1+ are not begun. + +## Reference Information + +- Spec: `projects/msp-tools/guru-connect/specs/native-remote-control/` (4 files) — now in the GC repo at `e3e95f8` +- ADR: `projects/msp-tools/guru-rmm/docs/ARCHITECTURE_DECISIONS.md` ADR-008 +- RMM pointer: `projects/msp-tools/guru-rmm/docs/GURU_CONNECT_INTEGRATION.md` +- GC repo: `azcomputerguru/guru-connect`; published `5b7cf5f → e3e95f8` +- guru-rmm submodule commit: `7701d26` +- claudetools commits: `afbe5a8` (spec), `1fc2401` (submodule ptr bump for ADR), `53e14da` (GC submodule conversion) +- Roadmap context: `projects/msp-tools/guru-rmm/docs/FEATURE_ROADMAP.md:635-675`, `docs/UI_GAPS.md:155-186` +- Key GC integration files: `server/src/middleware/security_headers.rs:30,37-39` (frame-ancestors), `server/static/viewer.html`, `server/src/relay/mod.rs:187` (agent key validation), `server/src/main.rs:300` (`/api/version`) +- Key RMM files: `server/src/api/commands.rs:87-157` (command dispatch), `agent/src/device_id.rs`, `dashboard/src/pages/AgentDetail.tsx:1893-1931`