Added comprehensive production infrastructure: Systemd Service: - guruconnect.service with auto-restart, resource limits, security hardening - setup-systemd.sh installation script Prometheus Metrics: - Added prometheus-client dependency - Created metrics module tracking: - HTTP requests (count, latency) - Sessions (created, closed, active) - Connections (WebSocket, by type) - Errors (by type) - Database operations (count, latency) - Server uptime - Added /metrics endpoint - Background task for uptime updates Monitoring Configuration: - prometheus.yml with scrape configs for GuruConnect and node_exporter - alerts.yml with alerting rules - grafana-dashboard.json with 10 panels - setup-monitoring.sh installation script PostgreSQL Backups: - backup-postgres.sh with gzip compression - restore-postgres.sh with safety checks - guruconnect-backup.service and .timer for automated daily backups - Retention policy: 30 daily, 4 weekly, 6 monthly Health Monitoring: - health-monitor.sh checking HTTP, disk, memory, database, metrics - guruconnect.logrotate for log rotation - Email alerts on failures Updated CHECKLIST_STATE.json to reflect Week 1 completion (77%) and Week 2 start. Created PHASE1_WEEK2_INFRASTRUCTURE.md with comprehensive planning. Ready for deployment and testing on RMM server.
46 lines
1.1 KiB
YAML
46 lines
1.1 KiB
YAML
# Prometheus configuration for GuruConnect
|
|
#
|
|
# Install Prometheus:
|
|
# sudo apt-get install prometheus
|
|
#
|
|
# Copy this file to:
|
|
# sudo cp prometheus.yml /etc/prometheus/prometheus.yml
|
|
#
|
|
# Restart Prometheus:
|
|
# sudo systemctl restart prometheus
|
|
|
|
global:
|
|
scrape_interval: 15s # Scrape metrics every 15 seconds
|
|
evaluation_interval: 15s # Evaluate rules every 15 seconds
|
|
external_labels:
|
|
cluster: 'guruconnect-production'
|
|
environment: 'production'
|
|
|
|
# Scrape configurations
|
|
scrape_configs:
|
|
# GuruConnect server metrics
|
|
- job_name: 'guruconnect'
|
|
static_configs:
|
|
- targets: ['172.16.3.30:3002']
|
|
labels:
|
|
service: 'guruconnect-server'
|
|
instance: 'rmm-server'
|
|
|
|
# Node Exporter (system metrics)
|
|
# Install: sudo apt-get install prometheus-node-exporter
|
|
- job_name: 'node_exporter'
|
|
static_configs:
|
|
- targets: ['172.16.3.30:9100']
|
|
labels:
|
|
instance: 'rmm-server'
|
|
|
|
# Alert rules (optional)
|
|
# rule_files:
|
|
# - '/etc/prometheus/alerts.yml'
|
|
|
|
# Alertmanager configuration (optional)
|
|
# alerting:
|
|
# alertmanagers:
|
|
# - static_configs:
|
|
# - targets: ['localhost:9093']
|