Per the 2026-05-25 re-audit + Mike's decision (option b): the safe-rollout promotion gating these docs describe/test is NOT live (update_rollouts / update_health_metrics written-but-never-read; crash detection dead until the unmerged BUG-002 fix). Added a [WARNING] STATUS banner to the test plan, verify script, and the two 'complete' summaries so they aren't trusted as validating a working feature. Automation is a roadmap Phase-2 item requiring a full re-spec.
293 lines
8.9 KiB
Bash
Executable File
293 lines
8.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Verification script for Safe Agent Rollout System
|
|
# Run on gururmm-build (172.16.3.30) to verify Phase 1-5 implementation
|
|
#
|
|
# =====================================================================
|
|
# [WARNING] STATUS - 2026-05-25 re-audit: the safe-rollout gating this script
|
|
# verifies is NOT live. update_rollouts / update_health_metrics are written but
|
|
# never read to gate promotion (gururmm docs/FEATURE_ROADMAP.md BUG-004); crash
|
|
# detection was dead code until the BUG-002 fix (branch fix/audit-2-remediation,
|
|
# unmerged). Promotion is currently 100% manual. This verifies INERT scaffolding
|
|
# - Phase-2 aspirational until the gating is re-spec'd and wired. Decision
|
|
# 2026-05-25 (Mike): keep the feature inert and labeled.
|
|
# =====================================================================
|
|
|
|
set -e
|
|
|
|
GREEN='\033[0;32m'
|
|
RED='\033[0;31m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m'
|
|
|
|
echo "=========================================="
|
|
echo "GuruRMM Safe Rollout System Verification"
|
|
echo "=========================================="
|
|
echo ""
|
|
|
|
# Function to check status
|
|
check() {
|
|
if [ $? -eq 0 ]; then
|
|
echo -e "${GREEN}[OK]${NC} $1"
|
|
return 0
|
|
else
|
|
echo -e "${RED}[FAIL]${NC} $1"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
info() {
|
|
echo -e "${YELLOW}[INFO]${NC} $1"
|
|
}
|
|
|
|
FAIL_COUNT=0
|
|
|
|
# ===== Phase 1: Build Scripts =====
|
|
echo "Phase 1: Build Scripts"
|
|
echo "----------------------"
|
|
|
|
if grep -q "Mark all new builds as beta" /opt/gururmm/build-linux.sh; then
|
|
check "build-linux.sh has beta marking code"
|
|
else
|
|
check "build-linux.sh missing beta marking code"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
if grep -q "Mark all new builds as beta" /opt/gururmm/build-windows.sh; then
|
|
check "build-windows.sh has beta marking code"
|
|
else
|
|
check "build-windows.sh missing beta marking code"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
# Check for actual .channel files
|
|
CHANNEL_COUNT=$(find /var/www/gururmm/downloads -name "*.channel" 2>/dev/null | wc -l)
|
|
if [ "$CHANNEL_COUNT" -gt 0 ]; then
|
|
check ".channel files exist in downloads directory ($CHANNEL_COUNT found)"
|
|
info "Sample: $(find /var/www/gururmm/downloads -name "*.channel" | head -1)"
|
|
SAMPLE_FILE=$(find /var/www/gururmm/downloads -name "*.channel" | head -1)
|
|
if [ -f "$SAMPLE_FILE" ]; then
|
|
SAMPLE_CONTENT=$(cat "$SAMPLE_FILE")
|
|
info "Content: $SAMPLE_CONTENT"
|
|
fi
|
|
else
|
|
check ".channel files in downloads directory"
|
|
((FAIL_COUNT++))
|
|
info "No .channel files found - may need to trigger a build"
|
|
fi
|
|
|
|
echo ""
|
|
|
|
# ===== Phase 2: Database Migration =====
|
|
echo "Phase 2: Database Migration"
|
|
echo "---------------------------"
|
|
|
|
# Check tables exist
|
|
if sudo -u postgres psql gururmm_production -t -c "\d update_rollouts" &>/dev/null; then
|
|
check "update_rollouts table exists"
|
|
else
|
|
check "update_rollouts table exists"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
if sudo -u postgres psql gururmm_production -t -c "\d update_health_metrics" &>/dev/null; then
|
|
check "update_health_metrics table exists"
|
|
else
|
|
check "update_health_metrics table exists"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
if sudo -u postgres psql gururmm_production -t -c "\d agent_update_events" &>/dev/null; then
|
|
check "agent_update_events table exists"
|
|
else
|
|
check "agent_update_events table exists"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
# Check for data
|
|
ROLLOUT_COUNT=$(sudo -u postgres psql gururmm_production -t -c "SELECT COUNT(*) FROM update_rollouts" 2>/dev/null | xargs)
|
|
info "Rollouts tracked: $ROLLOUT_COUNT"
|
|
|
|
EVENT_COUNT=$(sudo -u postgres psql gururmm_production -t -c "SELECT COUNT(*) FROM agent_update_events" 2>/dev/null | xargs)
|
|
info "Update events logged: $EVENT_COUNT"
|
|
|
|
METRIC_COUNT=$(sudo -u postgres psql gururmm_production -t -c "SELECT COUNT(*) FROM update_health_metrics" 2>/dev/null | xargs)
|
|
info "Health metrics tracked: $METRIC_COUNT"
|
|
|
|
echo ""
|
|
|
|
# ===== Phase 3: Health Monitoring =====
|
|
echo "Phase 3: Health Monitoring"
|
|
echo "--------------------------"
|
|
|
|
# Check source files exist
|
|
if [ -f "/opt/gururmm/server/src/updates/health.rs" ]; then
|
|
check "health.rs source file exists"
|
|
else
|
|
check "health.rs source file exists"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
# Check if server is running
|
|
if systemctl is-active --quiet gururmm-server; then
|
|
check "GuruRMM server is running"
|
|
|
|
# Check for health monitor in logs
|
|
if sudo journalctl -u gururmm-server --since "1 hour ago" | grep -q "Health monitoring task spawned"; then
|
|
check "Health monitor task spawned (found in logs)"
|
|
else
|
|
echo -e "${YELLOW}[WARN]${NC} Health monitor spawn message not found in recent logs"
|
|
info "May need to restart service if code just deployed"
|
|
fi
|
|
else
|
|
check "GuruRMM server is running"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
echo ""
|
|
|
|
# ===== Phase 4: API Endpoints =====
|
|
echo "Phase 4: API Endpoints"
|
|
echo "----------------------"
|
|
|
|
if [ -f "/opt/gururmm/server/src/api/updates.rs" ]; then
|
|
check "updates.rs API file exists"
|
|
|
|
# Check for key functions
|
|
if grep -q "pub async fn list_rollouts" /opt/gururmm/server/src/api/updates.rs; then
|
|
check "list_rollouts endpoint defined"
|
|
else
|
|
check "list_rollouts endpoint defined"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
if grep -q "pub async fn promote_version" /opt/gururmm/server/src/api/updates.rs; then
|
|
check "promote_version endpoint defined"
|
|
else
|
|
check "promote_version endpoint defined"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
if grep -q "pub async fn rollback_version" /opt/gururmm/server/src/api/updates.rs; then
|
|
check "rollback_version endpoint defined"
|
|
else
|
|
check "rollback_version endpoint defined"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
else
|
|
check "updates.rs API file exists"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
# Check routes registered
|
|
if grep -q "api::updates::list_rollouts" /opt/gururmm/server/src/api/mod.rs; then
|
|
check "API routes registered in mod.rs"
|
|
else
|
|
check "API routes registered in mod.rs"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
echo ""
|
|
|
|
# ===== Phase 5: Dashboard UI =====
|
|
echo "Phase 5: Dashboard UI"
|
|
echo "---------------------"
|
|
|
|
if [ -f "/opt/gururmm/dashboard/src/pages/Updates.tsx" ]; then
|
|
check "Updates.tsx page exists"
|
|
|
|
# Check for key components
|
|
if grep -q "RolloutInfo" /opt/gururmm/dashboard/src/pages/Updates.tsx; then
|
|
check "RolloutInfo interface defined"
|
|
else
|
|
check "RolloutInfo interface defined"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
if grep -q "handlePromote" /opt/gururmm/dashboard/src/pages/Updates.tsx; then
|
|
check "Promote functionality implemented"
|
|
else
|
|
check "Promote functionality implemented"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
if grep -q "handleRollback" /opt/gururmm/dashboard/src/pages/Updates.tsx; then
|
|
check "Rollback functionality implemented"
|
|
else
|
|
check "Rollback functionality implemented"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
else
|
|
check "Updates.tsx page exists"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
# Check navigation
|
|
if grep -q "/updates" /opt/gururmm/dashboard/src/App.tsx; then
|
|
check "Updates route registered in App.tsx"
|
|
else
|
|
check "Updates route registered in App.tsx"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
if grep -q "updates" /opt/gururmm/dashboard/src/components/Layout.tsx; then
|
|
check "Updates navigation link added"
|
|
else
|
|
check "Updates navigation link added"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
echo ""
|
|
|
|
# ===== Build Status =====
|
|
echo "Build Status"
|
|
echo "------------"
|
|
|
|
# Check server binary
|
|
if [ -f "/opt/gururmm/gururmm-server" ]; then
|
|
SERVER_SIZE=$(stat -f%z "/opt/gururmm/gururmm-server" 2>/dev/null || stat -c%s "/opt/gururmm/gururmm-server" 2>/dev/null)
|
|
SERVER_DATE=$(stat -f%Sm "/opt/gururmm/gururmm-server" 2>/dev/null || stat -c%y "/opt/gururmm/gururmm-server" 2>/dev/null | cut -d' ' -f1)
|
|
check "Server binary exists (${SERVER_SIZE} bytes, ${SERVER_DATE})"
|
|
else
|
|
check "Server binary exists"
|
|
((FAIL_COUNT++))
|
|
fi
|
|
|
|
# Check dashboard build
|
|
if [ -d "/opt/gururmm/dashboard/dist" ]; then
|
|
check "Dashboard build exists"
|
|
else
|
|
echo -e "${YELLOW}[WARN]${NC} Dashboard dist/ directory not found - may need to run 'npm run build'"
|
|
fi
|
|
|
|
echo ""
|
|
|
|
# ===== Summary =====
|
|
echo "=========================================="
|
|
echo "Verification Summary"
|
|
echo "=========================================="
|
|
|
|
if [ $FAIL_COUNT -eq 0 ]; then
|
|
echo -e "${GREEN}✓ All checks passed!${NC}"
|
|
echo ""
|
|
echo "Safe Agent Rollout System is ready for Phase 6 testing."
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo " 1. Review PHASE_6_TEST_PLAN.md"
|
|
echo " 2. Execute Test 1: Beta-first build workflow"
|
|
echo " 3. Execute Test 2-4: Health monitoring, promotion, rollback"
|
|
echo " 4. Execute Test 5: Dashboard UI testing"
|
|
echo " 5. Execute Test 6: Integration testing"
|
|
exit 0
|
|
else
|
|
echo -e "${RED}✗ ${FAIL_COUNT} check(s) failed${NC}"
|
|
echo ""
|
|
echo "Review failures above and fix before proceeding to Phase 6."
|
|
echo ""
|
|
echo "Common issues:"
|
|
echo " - Code not deployed (git pull + rebuild needed)"
|
|
echo " - Migration not applied (run migration 046)"
|
|
echo " - Service not restarted (systemctl restart gururmm-server)"
|
|
echo " - Build not triggered (no .channel files yet)"
|
|
exit 1
|
|
fi
|