chore: sync repository to current working state
Some checks failed
Build and Test / Build Server (Linux) (push) Has been cancelled
Build and Test / Build Agent (Windows) (push) Has been cancelled
Build and Test / Security Audit (push) Has been cancelled
Build and Test / Build Summary (push) Has been cancelled
Run Tests / Test Server (push) Has been cancelled
Run Tests / Test Agent (push) Has been cancelled
Run Tests / Code Coverage (push) Has been cancelled
Run Tests / Lint and Format Check (push) Has been cancelled

Brings azcomputerguru/guru-connect up to the authoritative working copy that
had been maintained in the claudetools monorepo: Phase 1 security and
infrastructure (middleware, metrics, utils, token blacklist, deployment
scripts, security audits) plus the native-remote-control integration spec.
Preserves the repo .gitignore, .cargo, and server/static/downloads.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-29 06:15:29 -07:00
parent 5b7cf5fb07
commit e3e95f8fa7
73 changed files with 15608 additions and 5757 deletions

68
infrastructure/alerts.yml Normal file
View File

@@ -0,0 +1,68 @@
# Prometheus Alert Rules for GuruConnect
#
# This file defines alerting rules for monitoring GuruConnect health and performance.
# Copy to /etc/prometheus/alerts.yml and reference in prometheus.yml
groups:
- name: guruconnect_alerts
interval: 30s
rules:
# GuruConnect is down
- alert: GuruConnectDown
expr: up{job="guruconnect"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "GuruConnect server is down"
description: "GuruConnect server on {{ $labels.instance }} has been down for more than 1 minute"
# High error rate
- alert: HighErrorRate
expr: rate(guruconnect_errors_total[5m]) > 10
for: 5m
labels:
severity: warning
annotations:
summary: "High error rate detected"
description: "Error rate is {{ $value | humanize }} errors/second over the last 5 minutes"
# Too many active sessions
- alert: TooManyActiveSessions
expr: guruconnect_active_sessions > 100
for: 5m
labels:
severity: warning
annotations:
summary: "Too many active sessions"
description: "There are {{ $value }} active sessions, exceeding threshold of 100"
# High request latency
- alert: HighRequestLatency
expr: histogram_quantile(0.95, rate(guruconnect_request_duration_seconds_bucket[5m])) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "High request latency"
description: "95th percentile request latency is {{ $value | humanize }}s"
# Database operations failing
- alert: DatabaseOperationsFailure
expr: rate(guruconnect_db_operations_total{status="error"}[5m]) > 1
for: 5m
labels:
severity: critical
annotations:
summary: "Database operations failing"
description: "Database error rate is {{ $value | humanize }} errors/second"
# Server uptime low (recent restart)
- alert: ServerRestarted
expr: guruconnect_uptime_seconds < 300
for: 1m
labels:
severity: info
annotations:
summary: "Server recently restarted"
description: "Server uptime is only {{ $value | humanize }}s, indicating a recent restart"

View File

@@ -0,0 +1,228 @@
{
"dashboard": {
"title": "GuruConnect Monitoring",
"tags": ["guruconnect", "monitoring"],
"timezone": "browser",
"schemaVersion": 16,
"version": 1,
"refresh": "10s",
"panels": [
{
"id": 1,
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
"type": "graph",
"title": "Active Sessions",
"targets": [
{
"expr": "guruconnect_active_sessions",
"legendFormat": "Active Sessions",
"refId": "A"
}
],
"yaxes": [
{"label": "Sessions", "show": true},
{"show": false}
],
"lines": true,
"fill": 1,
"linewidth": 2,
"tooltip": {"shared": true}
},
{
"id": 2,
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
"type": "graph",
"title": "Requests per Second",
"targets": [
{
"expr": "rate(guruconnect_requests_total[1m])",
"legendFormat": "{{method}} {{path}}",
"refId": "A"
}
],
"yaxes": [
{"label": "Requests/sec", "show": true},
{"show": false}
],
"lines": true,
"fill": 1,
"linewidth": 2,
"tooltip": {"shared": true}
},
{
"id": 3,
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
"type": "graph",
"title": "Error Rate",
"targets": [
{
"expr": "rate(guruconnect_errors_total[1m])",
"legendFormat": "{{error_type}}",
"refId": "A"
}
],
"yaxes": [
{"label": "Errors/sec", "show": true},
{"show": false}
],
"lines": true,
"fill": 1,
"linewidth": 2,
"tooltip": {"shared": true},
"alert": {
"conditions": [
{
"evaluator": {"params": [10], "type": "gt"},
"operator": {"type": "and"},
"query": {"params": ["A", "1m", "now"]},
"reducer": {"params": [], "type": "avg"},
"type": "query"
}
],
"executionErrorState": "alerting",
"frequency": "60s",
"handler": 1,
"name": "High Error Rate",
"noDataState": "no_data",
"notifications": []
}
},
{
"id": 4,
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},
"type": "graph",
"title": "Request Latency (p50, p95, p99)",
"targets": [
{
"expr": "histogram_quantile(0.50, rate(guruconnect_request_duration_seconds_bucket[5m]))",
"legendFormat": "p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, rate(guruconnect_request_duration_seconds_bucket[5m]))",
"legendFormat": "p95",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, rate(guruconnect_request_duration_seconds_bucket[5m]))",
"legendFormat": "p99",
"refId": "C"
}
],
"yaxes": [
{"label": "Latency (seconds)", "show": true, "format": "s"},
{"show": false}
],
"lines": true,
"fill": 0,
"linewidth": 2,
"tooltip": {"shared": true}
},
{
"id": 5,
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
"type": "graph",
"title": "Active Connections by Type",
"targets": [
{
"expr": "guruconnect_active_connections",
"legendFormat": "{{conn_type}}",
"refId": "A"
}
],
"yaxes": [
{"label": "Connections", "show": true},
{"show": false}
],
"lines": true,
"fill": 1,
"linewidth": 2,
"stack": true,
"tooltip": {"shared": true}
},
{
"id": 6,
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
"type": "graph",
"title": "Database Query Duration",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(guruconnect_db_query_duration_seconds_bucket[5m]))",
"legendFormat": "{{operation}} p95",
"refId": "A"
}
],
"yaxes": [
{"label": "Duration (seconds)", "show": true, "format": "s"},
{"show": false}
],
"lines": true,
"fill": 0,
"linewidth": 2,
"tooltip": {"shared": true}
},
{
"id": 7,
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 24},
"type": "singlestat",
"title": "Server Uptime",
"targets": [
{
"expr": "guruconnect_uptime_seconds",
"refId": "A"
}
],
"format": "s",
"valueName": "current",
"sparkline": {"show": true}
},
{
"id": 8,
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 24},
"type": "singlestat",
"title": "Total Sessions Created",
"targets": [
{
"expr": "guruconnect_sessions_total{status=\"created\"}",
"refId": "A"
}
],
"format": "short",
"valueName": "current",
"sparkline": {"show": true}
},
{
"id": 9,
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 24},
"type": "singlestat",
"title": "Total Requests",
"targets": [
{
"expr": "sum(guruconnect_requests_total)",
"refId": "A"
}
],
"format": "short",
"valueName": "current",
"sparkline": {"show": true}
},
{
"id": 10,
"gridPos": {"h": 4, "w": 6, "x": 18, "y": 24},
"type": "singlestat",
"title": "Total Errors",
"targets": [
{
"expr": "sum(guruconnect_errors_total)",
"refId": "A"
}
],
"format": "short",
"valueName": "current",
"sparkline": {"show": true},
"thresholds": "10,100",
"colors": ["#299c46", "#e0b400", "#d44a3a"]
}
]
}
}

View File

@@ -0,0 +1,45 @@
# Prometheus configuration for GuruConnect
#
# Install Prometheus:
# sudo apt-get install prometheus
#
# Copy this file to:
# sudo cp prometheus.yml /etc/prometheus/prometheus.yml
#
# Restart Prometheus:
# sudo systemctl restart prometheus
global:
scrape_interval: 15s # Scrape metrics every 15 seconds
evaluation_interval: 15s # Evaluate rules every 15 seconds
external_labels:
cluster: 'guruconnect-production'
environment: 'production'
# Scrape configurations
scrape_configs:
# GuruConnect server metrics
- job_name: 'guruconnect'
static_configs:
- targets: ['172.16.3.30:3002']
labels:
service: 'guruconnect-server'
instance: 'rmm-server'
# Node Exporter (system metrics)
# Install: sudo apt-get install prometheus-node-exporter
- job_name: 'node_exporter'
static_configs:
- targets: ['172.16.3.30:9100']
labels:
instance: 'rmm-server'
# Alert rules (optional)
# rule_files:
# - '/etc/prometheus/alerts.yml'
# Alertmanager configuration (optional)
# alerting:
# alertmanagers:
# - static_configs:
# - targets: ['localhost:9093']

View File

@@ -0,0 +1,102 @@
#!/bin/bash
# GuruConnect Monitoring Setup Script
# Installs and configures Prometheus and Grafana
set -e
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
echo "========================================="
echo "GuruConnect Monitoring Setup"
echo "========================================="
# Check if running as root
if [ "$EUID" -ne 0 ]; then
echo -e "${RED}ERROR: This script must be run as root (sudo)${NC}"
exit 1
fi
# Update package list
echo "Updating package list..."
apt-get update
# Install Prometheus
echo ""
echo "Installing Prometheus..."
apt-get install -y prometheus prometheus-node-exporter
# Copy Prometheus configuration
echo "Copying Prometheus configuration..."
cp prometheus.yml /etc/prometheus/prometheus.yml
if [ -f "alerts.yml" ]; then
cp alerts.yml /etc/prometheus/alerts.yml
fi
# Set permissions
chown prometheus:prometheus /etc/prometheus/prometheus.yml
if [ -f "/etc/prometheus/alerts.yml" ]; then
chown prometheus:prometheus /etc/prometheus/alerts.yml
fi
# Restart Prometheus
echo "Restarting Prometheus..."
systemctl restart prometheus
systemctl enable prometheus
systemctl restart prometheus-node-exporter
systemctl enable prometheus-node-exporter
# Install Grafana
echo ""
echo "Installing Grafana..."
apt-get install -y software-properties-common
add-apt-repository -y "deb https://packages.grafana.com/oss/deb stable main"
wget -q -O - https://packages.grafana.com/gpg.key | apt-key add -
apt-get update
apt-get install -y grafana
# Start Grafana
echo "Starting Grafana..."
systemctl start grafana-server
systemctl enable grafana-server
# Wait for Grafana to start
sleep 5
# Configure Grafana data source (Prometheus)
echo ""
echo "Configuring Grafana data source..."
curl -X POST -H "Content-Type: application/json" \
-d '{
"name":"Prometheus",
"type":"prometheus",
"url":"http://localhost:9090",
"access":"proxy",
"isDefault":true
}' \
http://admin:admin@localhost:3000/api/datasources || true
echo ""
echo "========================================="
echo "Monitoring Setup Complete!"
echo "========================================="
echo ""
echo "Services:"
echo " Prometheus: http://172.16.3.30:9090"
echo " Grafana: http://172.16.3.30:3000 (default login: admin/admin)"
echo " Node Exporter: http://172.16.3.30:9100/metrics"
echo ""
echo "Next steps:"
echo "1. Access Grafana at http://172.16.3.30:3000"
echo "2. Login with default credentials (admin/admin)"
echo "3. Change the default password"
echo "4. Import the dashboard from grafana-dashboard.json"
echo "5. Configure alerting (optional)"
echo ""
echo "To import the dashboard:"
echo " Grafana > Dashboards > Import > Upload JSON file"
echo " Select: infrastructure/grafana-dashboard.json"
echo ""