feat(rmm): onboarding diagnostic (Phase 1) - probe + triage + baseline
/rmm diagnose: dispatches a Windows security/health probe to a newly onboarded agent, grades RED/AMBER/GREEN, writes an immutable per-client baseline (clients/<slug>/onboarding-baselines/), diffs vs prior, and alerts CRITICALs to #dev-alerts. Probe is PS5.1/ASCII/SYSTEM-safe, never-abort, base64 chunked upload around the agent command-size cap. Code-reviewed (no blockers); folded in immutability guard, severity-independent finding ids, Defender-unknown sentinel, expanded competitor/backup detection. First baselines captured: Rednour FRONTDESKRECEPT + LEGALASST (both RED - prior MSP ScreenConnect/Splashtop/Syncro still live; LEGALASST OS EOL). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
1275
.claude/scripts/onboarding-diagnostic.ps1
Normal file
1275
.claude/scripts/onboarding-diagnostic.ps1
Normal file
File diff suppressed because it is too large
Load Diff
574
.claude/scripts/run-onboarding-diagnostic.sh
Normal file
574
.claude/scripts/run-onboarding-diagnostic.sh
Normal file
@@ -0,0 +1,574 @@
|
||||
#!/usr/bin/env bash
|
||||
# run-onboarding-diagnostic.sh - GuruRMM onboarding diagnostic runner (Phase 1).
|
||||
#
|
||||
# Dispatches .claude/scripts/onboarding-diagnostic.ps1 to a Windows agent via the
|
||||
# GuruRMM RMM API, extracts the fenced JSON result, grades it RED/AMBER/GREEN,
|
||||
# writes an immutable baseline (JSON + Markdown report) under
|
||||
# clients/<slug>/onboarding-baselines/, diffs against any prior baseline, and
|
||||
# alerts #dev-alerts on RED / critical findings.
|
||||
#
|
||||
# Usage:
|
||||
# bash run-onboarding-diagnostic.sh <hostname-or-uuid> [client-slug]
|
||||
#
|
||||
# Mirrors the plumbing in .claude/commands/rmm.md (vault auth -> JWT -> dispatch
|
||||
# -> poll -> command_text/stdout). Read-only against the endpoint; the probe only
|
||||
# collects, it changes nothing.
|
||||
|
||||
set -u
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Args
|
||||
# ---------------------------------------------------------------------------
|
||||
TARGET="${1:-}"
|
||||
CLIENT_SLUG="${2:-}"
|
||||
|
||||
if [ -z "$TARGET" ]; then
|
||||
echo "[ERROR] Usage: bash run-onboarding-diagnostic.sh <hostname-or-uuid> [client-slug]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bootstrap (resolve repo root, vault, RMM base)
|
||||
# ---------------------------------------------------------------------------
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
VAULT="$REPO_ROOT/.claude/scripts/vault.sh"
|
||||
PROBE="$SCRIPT_DIR/onboarding-diagnostic.ps1"
|
||||
ALERT="$REPO_ROOT/.claude/scripts/post-bot-alert.sh"
|
||||
RMM="http://172.16.3.30:3001"
|
||||
|
||||
if [ ! -f "$PROBE" ]; then
|
||||
echo "[ERROR] Probe script not found: $PROBE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for tool in jq curl; do
|
||||
if ! command -v "$tool" >/dev/null 2>&1; then
|
||||
echo "[ERROR] Required tool not found: $tool" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Soft-fail wrapper for the bot alert so an alerting failure never aborts the run.
|
||||
post_alert() {
|
||||
local msg="$1"
|
||||
if [ -f "$ALERT" ]; then
|
||||
bash "$ALERT" "$msg" >/dev/null 2>&1 || true
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Authenticate
|
||||
# ---------------------------------------------------------------------------
|
||||
RMM_EMAIL="$(bash "$VAULT" get-field infrastructure/gururmm-server.sops.yaml credentials.gururmm-api.admin-email 2>/dev/null)"
|
||||
RMM_PASS="$(bash "$VAULT" get-field infrastructure/gururmm-server.sops.yaml credentials.gururmm-api.admin-password 2>/dev/null)"
|
||||
|
||||
if [ -z "$RMM_EMAIL" ] || [ -z "$RMM_PASS" ] || [ "$RMM_EMAIL" = "null" ]; then
|
||||
echo "[ERROR] Could not read GuruRMM credentials from vault (infrastructure/gururmm-server.sops.yaml)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
LOGIN_PAYLOAD="$(jq -nc --arg e "$RMM_EMAIL" --arg p "$RMM_PASS" '{email:$e, password:$p}')"
|
||||
TOKEN="$(curl -s -m 30 -X POST "$RMM/api/auth/login" \
|
||||
-H "Content-Type: application/json" \
|
||||
--data-binary "$LOGIN_PAYLOAD" | jq -r '.token // empty')"
|
||||
|
||||
if [ -z "$TOKEN" ]; then
|
||||
echo "[ERROR] RMM login failed (no token returned)" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "[OK] Authenticated to GuruRMM"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Resolve agent (by exact UUID, exact hostname, then partial hostname)
|
||||
# ---------------------------------------------------------------------------
|
||||
AGENTS="$(curl -s -m 30 "$RMM/api/agents" -H "Authorization: Bearer $TOKEN")"
|
||||
if [ -z "$AGENTS" ] || ! echo "$AGENTS" | jq -e 'type=="array"' >/dev/null 2>&1; then
|
||||
echo "[ERROR] Could not retrieve agent list" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# UUID-shaped target -> match by id; otherwise match by hostname.
|
||||
AGENT=""
|
||||
if echo "$TARGET" | grep -qiE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then
|
||||
AGENT="$(echo "$AGENTS" | jq --arg id "$TARGET" '[.[] | select(.id==$id)] | .[0] // empty')"
|
||||
else
|
||||
# exact hostname (case-insensitive) first
|
||||
AGENT="$(echo "$AGENTS" | jq --arg h "$TARGET" '[.[] | select((.hostname|ascii_downcase)==($h|ascii_downcase))] | .[0] // empty')"
|
||||
if [ -z "$AGENT" ] || [ "$AGENT" = "null" ]; then
|
||||
# partial match
|
||||
MATCHES="$(echo "$AGENTS" | jq --arg h "$TARGET" '[.[] | select(.hostname|ascii_downcase|contains($h|ascii_downcase))]')"
|
||||
COUNT="$(echo "$MATCHES" | jq 'length')"
|
||||
if [ "$COUNT" = "0" ]; then
|
||||
AGENT=""
|
||||
elif [ "$COUNT" = "1" ]; then
|
||||
AGENT="$(echo "$MATCHES" | jq '.[0]')"
|
||||
else
|
||||
echo "[ERROR] Multiple agents match '$TARGET' - be more specific:" >&2
|
||||
echo "$MATCHES" | jq -r '.[] | " \(.hostname) (\(.os_type)) id=\(.id) client=\(.client_name)"' >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$AGENT" ] || [ "$AGENT" = "null" ]; then
|
||||
echo "[ERROR] No agent found matching '$TARGET'. Run /rmm agents to list enrolled agents." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
AGENT_ID="$(echo "$AGENT" | jq -r '.id // empty')"
|
||||
AGENT_HOST="$(echo "$AGENT" | jq -r '.hostname // empty')"
|
||||
AGENT_OS="$(echo "$AGENT" | jq -r '.os_type // empty')"
|
||||
AGENT_STATUS="$(echo "$AGENT" | jq -r '.status // "unknown"')"
|
||||
AGENT_CONNECTED="$(echo "$AGENT" | jq -r '.is_connected // "null"')"
|
||||
AGENT_CLIENT="$(echo "$AGENT" | jq -r '.client_name // empty')"
|
||||
AGENT_LAST="$(echo "$AGENT" | jq -r '.last_seen // "never"')"
|
||||
|
||||
echo "[OK] Agent: $AGENT_HOST ($AGENT_OS) status=$AGENT_STATUS connected=$AGENT_CONNECTED client=$AGENT_CLIENT last_seen=$AGENT_LAST id=$AGENT_ID"
|
||||
|
||||
if [ "$AGENT_OS" != "windows" ]; then
|
||||
echo "[ERROR] This diagnostic is Windows-only. Agent os_type='$AGENT_OS'." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Treat online if status==online OR is_connected==true (is_connected can be null even when online).
|
||||
if [ "$AGENT_STATUS" != "online" ] && [ "$AGENT_CONNECTED" != "true" ]; then
|
||||
echo "[WARNING] Agent appears offline (status=$AGENT_STATUS). The command will queue and run when it reconnects."
|
||||
fi
|
||||
|
||||
# Derive client slug if not supplied: prefer explicit arg; else slugify client_name.
|
||||
if [ -z "$CLIENT_SLUG" ]; then
|
||||
if [ -n "$AGENT_CLIENT" ]; then
|
||||
CLIENT_SLUG="$(echo "$AGENT_CLIENT" | tr '[:upper:]' '[:lower:]' | sed -E 's/[^a-z0-9]+/-/g; s/^-+//; s/-+$//')"
|
||||
echo "[INFO] No client slug supplied; derived '$CLIENT_SLUG' from client name '$AGENT_CLIENT'."
|
||||
else
|
||||
CLIENT_SLUG="_unsorted"
|
||||
echo "[WARNING] No client slug and no client name; using '_unsorted'."
|
||||
fi
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Command dispatch helper
|
||||
# ---------------------------------------------------------------------------
|
||||
# The agent caps the inline command body at roughly 32-40 KB (above that it
|
||||
# returns "Failed to execute command" before PowerShell ever runs). The probe is
|
||||
# ~60 KB, so we cannot send it inline. Instead we:
|
||||
# 1. base64-encode the probe locally,
|
||||
# 2. upload it to a temp file on the endpoint in <24 KB chunks (one command
|
||||
# each: first writes, the rest append),
|
||||
# 3. send a final small command that decodes the file to a .ps1, runs it,
|
||||
# prints the fenced JSON, and deletes both temp files.
|
||||
# Each dispatched command stays well under the agent limit, so this scales no
|
||||
# matter how large the probe grows in later phases.
|
||||
|
||||
WORK_DIR="$(mktemp -d 2>/dev/null || echo "${TMPDIR:-/tmp}/onboard-diag-$$")"
|
||||
mkdir -p "$WORK_DIR" 2>/dev/null || true
|
||||
cleanup() { rm -rf "$WORK_DIR" 2>/dev/null || true; }
|
||||
trap cleanup EXIT
|
||||
|
||||
# dispatch_one <command-file-with-script> <timeout_seconds> -> echoes result JSON, returns 0/1
|
||||
dispatch_one() {
|
||||
local script_file="$1"
|
||||
local to="$2"
|
||||
local payload_file resp cmd_id status result count
|
||||
|
||||
payload_file="$WORK_DIR/payload.json"
|
||||
jq -nc --rawfile cmd "$script_file" --argjson to "$to" \
|
||||
'{command_type:"powershell", command:$cmd, timeout_seconds:$to}' > "$payload_file"
|
||||
|
||||
resp="$(curl -s -m 30 -X POST "$RMM/api/agents/$AGENT_ID/command" \
|
||||
-H "Authorization: Bearer $TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
--data-binary "@$payload_file")"
|
||||
cmd_id="$(echo "$resp" | jq -r '.command_id // empty')"
|
||||
if [ -z "$cmd_id" ]; then
|
||||
echo "[ERROR] Dispatch failed: $resp" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
count=0
|
||||
while [ $count -lt 72 ]; do
|
||||
result="$(curl -s -m 30 "$RMM/api/commands/$cmd_id" -H "Authorization: Bearer $TOKEN")"
|
||||
status="$(echo "$result" | jq -r '.status // empty')"
|
||||
case "$status" in
|
||||
completed|failed|cancelled|interrupted)
|
||||
# Persist the command id to a file: this function runs in a $( )
|
||||
# subshell, so a plain variable assignment would not survive.
|
||||
printf '%s' "$cmd_id" > "$WORK_DIR/last_cmd_id" 2>/dev/null || true
|
||||
echo "$result"
|
||||
return 0
|
||||
;;
|
||||
running|pending|"") count=$((count + 1)); sleep 5 ;;
|
||||
*) count=$((count + 1)); sleep 5 ;;
|
||||
esac
|
||||
done
|
||||
echo "[ERROR] Command $cmd_id did not finish (last status=$status)" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Upload probe (base64, chunked) then execute
|
||||
# ---------------------------------------------------------------------------
|
||||
echo "[INFO] Uploading probe to endpoint (chunked base64)..."
|
||||
|
||||
# Stable-ish remote temp names; unique per run via timestamp+pid.
|
||||
REMOTE_TAG="grmm_onboard_$(date -u +%Y%m%d%H%M%S)_$$"
|
||||
REMOTE_B64="\$env:TEMP\\${REMOTE_TAG}.b64"
|
||||
REMOTE_PS1="\$env:TEMP\\${REMOTE_TAG}.ps1"
|
||||
|
||||
# Produce base64 (single line) and split into chunks.
|
||||
B64_FILE="$WORK_DIR/probe.b64"
|
||||
base64 -w0 "$PROBE" > "$B64_FILE" 2>/dev/null || base64 "$PROBE" | tr -d '\n' > "$B64_FILE"
|
||||
CHUNK_DIR="$WORK_DIR/chunks"
|
||||
mkdir -p "$CHUNK_DIR"
|
||||
split -b 24000 "$B64_FILE" "$CHUNK_DIR/chunk_"
|
||||
CHUNKS=$(ls -1 "$CHUNK_DIR"/chunk_* | sort)
|
||||
N_CHUNKS=$(echo "$CHUNKS" | wc -l | tr -d ' ')
|
||||
echo "[INFO] Probe is $(wc -c < "$PROBE") bytes -> $N_CHUNKS chunk(s)"
|
||||
|
||||
IDX=0
|
||||
for ch in $CHUNKS; do
|
||||
IDX=$((IDX + 1))
|
||||
# INVARIANT: DATA is RFC4648 standard base64 (alphabet A-Za-z0-9+/ with '='
|
||||
# padding). None of those characters are PowerShell metacharacters, so DATA
|
||||
# is safe to interpolate raw into the here-doc below. If this is ever changed
|
||||
# to base64url (alphabet adds '-' and '_'), it stays safe too - but revisit
|
||||
# this assertion before swapping the encoder, do not assume silently.
|
||||
DATA="$(cat "$ch")"
|
||||
SCRIPT_FILE="$WORK_DIR/chunkcmd.ps1"
|
||||
if [ "$IDX" -eq 1 ]; then
|
||||
# First chunk: create/overwrite the file (no newline appended).
|
||||
cat > "$SCRIPT_FILE" <<PS
|
||||
\$ErrorActionPreference = 'Stop'
|
||||
[System.IO.File]::WriteAllText("$REMOTE_B64", "$DATA")
|
||||
Write-Output "CHUNK $IDX OK"
|
||||
PS
|
||||
else
|
||||
cat > "$SCRIPT_FILE" <<PS
|
||||
\$ErrorActionPreference = 'Stop'
|
||||
[System.IO.File]::AppendAllText("$REMOTE_B64", "$DATA")
|
||||
Write-Output "CHUNK $IDX OK"
|
||||
PS
|
||||
fi
|
||||
CH_RESULT="$(dispatch_one "$SCRIPT_FILE" 60)" || { echo "[ERROR] Chunk $IDX dispatch failed" >&2; exit 1; }
|
||||
CH_STATUS="$(echo "$CH_RESULT" | jq -r '.status')"
|
||||
if [ "$CH_STATUS" != "completed" ]; then
|
||||
echo "[ERROR] Chunk $IDX upload failed: status=$CH_STATUS stderr=$(echo "$CH_RESULT" | jq -r '.stderr' | head -c 200)" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "[OK] Uploaded chunk $IDX/$N_CHUNKS"
|
||||
done
|
||||
|
||||
echo "[INFO] Decoding and executing probe on endpoint (timeout 240s)..."
|
||||
|
||||
# Final command: decode base64 file -> .ps1, run it, then clean up both temp files.
|
||||
RUN_SCRIPT="$WORK_DIR/runcmd.ps1"
|
||||
cat > "$RUN_SCRIPT" <<PS
|
||||
\$ErrorActionPreference = 'Continue'
|
||||
try {
|
||||
\$b64 = [System.IO.File]::ReadAllText("$REMOTE_B64")
|
||||
\$bytes = [System.Convert]::FromBase64String(\$b64)
|
||||
[System.IO.File]::WriteAllBytes("$REMOTE_PS1", \$bytes)
|
||||
& powershell.exe -NonInteractive -ExecutionPolicy Bypass -File "$REMOTE_PS1"
|
||||
} catch {
|
||||
Write-Output ("PROBE_RUN_ERROR: " + \$_.Exception.Message)
|
||||
} finally {
|
||||
Remove-Item -Path "$REMOTE_B64" -Force -ErrorAction SilentlyContinue
|
||||
Remove-Item -Path "$REMOTE_PS1" -Force -ErrorAction SilentlyContinue
|
||||
}
|
||||
PS
|
||||
|
||||
RESULT="$(dispatch_one "$RUN_SCRIPT" 240)" || { echo "[ERROR] Probe execution dispatch failed" >&2; exit 1; }
|
||||
CMD_ID="$(cat "$WORK_DIR/last_cmd_id" 2>/dev/null || echo unknown)"
|
||||
|
||||
FINAL_STATUS="$(echo "$RESULT" | jq -r '.status // empty')"
|
||||
EXIT_CODE="$(echo "$RESULT" | jq -r '.exit_code // "null"')"
|
||||
STDOUT="$(echo "$RESULT" | jq -r '.stdout // ""')"
|
||||
STDERR="$(echo "$RESULT" | jq -r '.stderr // ""')"
|
||||
|
||||
echo "[INFO] Probe finished: status=$FINAL_STATUS exit_code=$EXIT_CODE stdout_len=${#STDOUT} stderr_len=${#STDERR} cmd=$CMD_ID"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Extract fenced JSON from stdout
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pull text strictly between the markers. awk handles arbitrary surrounding noise.
|
||||
DIAG_JSON="$(printf '%s' "$STDOUT" | awk '
|
||||
/===DIAG-JSON-START===/ { capture=1; next }
|
||||
/===DIAG-JSON-END===/ { capture=0 }
|
||||
capture { print }
|
||||
')"
|
||||
|
||||
if [ -z "$DIAG_JSON" ] || ! echo "$DIAG_JSON" | jq -e '.host' >/dev/null 2>&1; then
|
||||
echo "[ERROR] Could not extract valid diagnostic JSON from probe output." >&2
|
||||
echo "[ERROR] status=$FINAL_STATUS exit_code=$EXIT_CODE" >&2
|
||||
if [ -n "$STDERR" ]; then
|
||||
echo "--- stderr ---" >&2
|
||||
printf '%s\n' "$STDERR" | head -40 >&2
|
||||
fi
|
||||
echo "--- stdout (first 60 lines) ---" >&2
|
||||
printf '%s\n' "$STDOUT" | head -60 >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[OK] Extracted diagnostic JSON ($(echo "$DIAG_JSON" | wc -c | tr -d ' ') bytes)"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Grade: RED (any critical) / AMBER (any warning, no critical) / GREEN (none)
|
||||
# ---------------------------------------------------------------------------
|
||||
N_CRIT="$(echo "$DIAG_JSON" | jq '[.findings[] | select(.severity=="critical")] | length')"
|
||||
N_WARN="$(echo "$DIAG_JSON" | jq '[.findings[] | select(.severity=="warning")] | length')"
|
||||
N_UNK="$(echo "$DIAG_JSON" | jq '[.findings[] | select(.severity=="unknown")] | length')"
|
||||
N_INFO="$(echo "$DIAG_JSON" | jq '[.findings[] | select(.severity=="info")] | length')"
|
||||
|
||||
if [ "$N_CRIT" -gt 0 ]; then
|
||||
GRADE="RED"
|
||||
elif [ "$N_WARN" -gt 0 ]; then
|
||||
GRADE="AMBER"
|
||||
else
|
||||
GRADE="GREEN"
|
||||
fi
|
||||
|
||||
PROBE_HOST="$(echo "$DIAG_JSON" | jq -r '.host // empty')"
|
||||
[ -z "$PROBE_HOST" ] && PROBE_HOST="$AGENT_HOST"
|
||||
COLLECTED="$(echo "$DIAG_JSON" | jq -r '.collected_at_utc // empty')"
|
||||
|
||||
echo "[INFO] Grade=$GRADE critical=$N_CRIT warning=$N_WARN unknown=$N_UNK info=$N_INFO"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Output paths
|
||||
# ---------------------------------------------------------------------------
|
||||
BASE_DIR="$REPO_ROOT/clients/$CLIENT_SLUG/onboarding-baselines"
|
||||
mkdir -p "$BASE_DIR"
|
||||
|
||||
UTC_STAMP="$(date -u +%Y%m%dT%H%M%S)"
|
||||
SAFE_HOST="$(echo "$PROBE_HOST" | sed -E 's/[^A-Za-z0-9._-]+/_/g')"
|
||||
JSON_PATH="$BASE_DIR/${SAFE_HOST}-${UTC_STAMP}.json"
|
||||
MD_PATH="$BASE_DIR/${SAFE_HOST}-${UTC_STAMP}.md"
|
||||
|
||||
# Immutability guard: the per-second UTC_STAMP can collide if two runs land in
|
||||
# the same second (or a re-run of the same dispatch). A baseline is immutable
|
||||
# once written, so never truncate an existing one - append a PID uniquifier
|
||||
# instead so the prior baseline survives intact.
|
||||
if [ -e "$JSON_PATH" ]; then JSON_PATH="${JSON_PATH%.json}-$$.json"; MD_PATH="${MD_PATH%.md}-$$.md"; fi
|
||||
|
||||
# Find the most recent PRIOR baseline json for this host (before we write the new one).
|
||||
PRIOR_JSON=""
|
||||
PRIOR_JSON="$(ls -1 "$BASE_DIR/${SAFE_HOST}-"*.json 2>/dev/null | sort | tail -n 1)"
|
||||
|
||||
# Write the immutable raw snapshot (pretty-printed for readability/diffing).
|
||||
echo "$DIAG_JSON" | jq '.' > "$JSON_PATH"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build the Markdown report
|
||||
# ---------------------------------------------------------------------------
|
||||
{
|
||||
echo "# Onboarding Diagnostic Baseline - $PROBE_HOST"
|
||||
echo ""
|
||||
echo "- **Grade:** $GRADE"
|
||||
echo "- **Host:** $PROBE_HOST"
|
||||
echo "- **Client:** ${AGENT_CLIENT:-$CLIENT_SLUG} (\`$CLIENT_SLUG\`)"
|
||||
echo "- **Collected (UTC):** $COLLECTED"
|
||||
echo "- **Agent ID:** $AGENT_ID"
|
||||
echo "- **Command ID:** $CMD_ID"
|
||||
echo "- **Findings:** $N_CRIT critical / $N_WARN warning / $N_INFO info / $N_UNK unknown"
|
||||
echo ""
|
||||
OS_CAPTION="$(echo "$DIAG_JSON" | jq -r '.os.caption // "?"')"
|
||||
OS_BUILD="$(echo "$DIAG_JSON" | jq -r '.os.build // "?"')"
|
||||
echo "- **OS:** $OS_CAPTION (build $OS_BUILD)"
|
||||
echo ""
|
||||
echo "---"
|
||||
echo ""
|
||||
|
||||
for sev in critical warning info unknown; do
|
||||
SEV_COUNT="$(echo "$DIAG_JSON" | jq --arg s "$sev" '[.findings[] | select(.severity==$s)] | length')"
|
||||
[ "$SEV_COUNT" = "0" ] && continue
|
||||
SEV_LABEL="$(echo "$sev" | tr '[:lower:]' '[:upper:]')"
|
||||
echo "## $SEV_LABEL ($SEV_COUNT)"
|
||||
echo ""
|
||||
echo "$DIAG_JSON" | jq -r --arg s "$sev" '
|
||||
.findings[] | select(.severity==$s) |
|
||||
"### " + .title + "\n" +
|
||||
"- **Category:** " + (.category // "?") + "\n" +
|
||||
"- **ID:** `" + (.id // "?") + "`\n" +
|
||||
"- " + (.detail // "") + "\n" +
|
||||
(if (.evidence // "") != "" then "\n```\n" + .evidence + "\n```\n" else "" end)
|
||||
'
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo "---"
|
||||
echo ""
|
||||
echo "## Inventory Baseline Summary"
|
||||
echo ""
|
||||
echo "$DIAG_JSON" | jq -r '
|
||||
.facts as $f |
|
||||
"- **Manufacturer / Model:** " + (($f.hardware.manufacturer // "?") + " / " + ($f.hardware.model // "?")) + "\n" +
|
||||
"- **Serial:** " + ($f.hardware.serial // "?") + "\n" +
|
||||
"- **CPU:** " + ($f.hardware.cpu // "?") + " (" + (($f.hardware.cpu_cores // 0)|tostring) + " cores / " + (($f.hardware.cpu_logical // 0)|tostring) + " logical)\n" +
|
||||
"- **RAM (GB):** " + (($f.hardware.ram_gb // 0)|tostring) + "\n" +
|
||||
"- **BIOS:** " + ($f.hardware.bios_version // "?") + " (" + ($f.hardware.bios_date // "?") + ")\n" +
|
||||
"- **Chassis is laptop:** " + (($f.is_laptop // false)|tostring) + "\n" +
|
||||
"- **TPM present / Secure Boot:** " + (($f.tpm.present // "?")|tostring) + " / " + (($f.secure_boot // "?")|tostring) + "\n" +
|
||||
"- **Domain joined:** " + (($f.domain_joined // false)|tostring) + " (" + ($f.domain // "?") + ")\n" +
|
||||
"- **OS activation licensed:** " + (($f.activation.licensed // "?")|tostring) + "\n" +
|
||||
"- **Uptime (days):** " + (($f.uptime_days // "?")|tostring) + "\n" +
|
||||
"- **Pending reboot:** " + (($f.pending_reboot // false)|tostring) + "\n" +
|
||||
"- **Installed software count:** " + (($f.installed_software_count // 0)|tostring) + "\n" +
|
||||
"- **Scheduled tasks (non-MS, enabled):** " + (($f.scheduled_tasks_count // 0)|tostring) + "\n" +
|
||||
"- **Local administrators:** " + (($f.local_administrators // []) | join(", "))
|
||||
'
|
||||
echo ""
|
||||
echo "### Fixed volumes"
|
||||
echo ""
|
||||
echo "$DIAG_JSON" | jq -r '
|
||||
(.facts.volumes // []) | .[] |
|
||||
"- " + (.drive // "?") + " - " + ((.free_gb // 0)|tostring) + " GB free of " + ((.size_gb // 0)|tostring) + " GB (" + ((.free_pct // 0)|tostring) + "%)"
|
||||
'
|
||||
echo ""
|
||||
echo "### Network adapters"
|
||||
echo ""
|
||||
echo "$DIAG_JSON" | jq -r '
|
||||
(.facts.network_adapters // []) | .[] |
|
||||
"- " + (.description // "?") + " - IP: " + ((.ip // []) | join(", ")) + " - DNS: " + ((.dns // []) | join(", ")) + " - DHCP: " + ((.dhcp // false)|tostring)
|
||||
'
|
||||
echo ""
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# DIFF section vs prior baseline
|
||||
# -----------------------------------------------------------------------
|
||||
if [ -n "$PRIOR_JSON" ] && [ -f "$PRIOR_JSON" ]; then
|
||||
PRIOR_STAMP="$(basename "$PRIOR_JSON")"
|
||||
echo "---"
|
||||
echo ""
|
||||
echo "## Diff vs Prior Baseline"
|
||||
echo ""
|
||||
echo "- **Compared against:** \`$PRIOR_STAMP\`"
|
||||
echo ""
|
||||
|
||||
# New findings: ids present now but not before.
|
||||
NEW_FINDINGS="$(jq -n \
|
||||
--slurpfile cur "$JSON_PATH" \
|
||||
--slurpfile old "$PRIOR_JSON" '
|
||||
($old[0].findings // []) as $o |
|
||||
($cur[0].findings // []) as $c |
|
||||
($o | map(.id)) as $oids |
|
||||
[ $c[] | select(.severity!="info") | select(.id as $id | ($oids | index($id)) | not) ]
|
||||
')"
|
||||
# Resolved findings: ids present before but not now.
|
||||
RESOLVED_FINDINGS="$(jq -n \
|
||||
--slurpfile cur "$JSON_PATH" \
|
||||
--slurpfile old "$PRIOR_JSON" '
|
||||
($old[0].findings // []) as $o |
|
||||
($cur[0].findings // []) as $c |
|
||||
($c | map(.id)) as $cids |
|
||||
[ $o[] | select(.severity!="info") | select(.id as $id | ($cids | index($id)) | not) ]
|
||||
')"
|
||||
# Regressed: same id, severity got worse (info<warning<critical; unknown treated as warning-level).
|
||||
REGRESSED="$(jq -n \
|
||||
--slurpfile cur "$JSON_PATH" \
|
||||
--slurpfile old "$PRIOR_JSON" '
|
||||
def rank(s): if s=="critical" then 3 elif s=="warning" then 2 elif s=="unknown" then 2 elif s=="info" then 1 else 0 end;
|
||||
($old[0].findings // []) as $o |
|
||||
($cur[0].findings // []) as $c |
|
||||
($o | map({key:.id, value:.severity}) | from_entries) as $om |
|
||||
[ $c[] | select(.id as $id | $om[$id] != null) | select(rank(.severity) > rank($om[.id])) |
|
||||
{id, title, was: $om[.id], now: .severity} ]
|
||||
')"
|
||||
|
||||
echo "**New findings:**"
|
||||
echo ""
|
||||
if [ "$(echo "$NEW_FINDINGS" | jq 'length')" = "0" ]; then
|
||||
echo "- (none)"
|
||||
else
|
||||
echo "$NEW_FINDINGS" | jq -r '.[] | "- [" + (.severity|ascii_upcase) + "] " + .title'
|
||||
fi
|
||||
echo ""
|
||||
echo "**Resolved findings:**"
|
||||
echo ""
|
||||
if [ "$(echo "$RESOLVED_FINDINGS" | jq 'length')" = "0" ]; then
|
||||
echo "- (none)"
|
||||
else
|
||||
echo "$RESOLVED_FINDINGS" | jq -r '.[] | "- [" + (.severity|ascii_upcase) + "] " + .title'
|
||||
fi
|
||||
echo ""
|
||||
echo "**Regressed findings:**"
|
||||
echo ""
|
||||
if [ "$(echo "$REGRESSED" | jq 'length')" = "0" ]; then
|
||||
echo "- (none)"
|
||||
else
|
||||
echo "$REGRESSED" | jq -r '.[] | "- " + .title + " (" + .was + " -> " + .now + ")"'
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Installed-software deltas
|
||||
SW_ADDED="$(jq -n \
|
||||
--slurpfile cur "$JSON_PATH" \
|
||||
--slurpfile old "$PRIOR_JSON" '
|
||||
((($old[0].facts.installed_software // []) | map(.name)) | unique) as $o |
|
||||
((($cur[0].facts.installed_software // []) | map(.name)) | unique) as $c |
|
||||
[ $c[] | select(. as $n | ($o | index($n)) | not) ]
|
||||
')"
|
||||
SW_REMOVED="$(jq -n \
|
||||
--slurpfile cur "$JSON_PATH" \
|
||||
--slurpfile old "$PRIOR_JSON" '
|
||||
((($old[0].facts.installed_software // []) | map(.name)) | unique) as $o |
|
||||
((($cur[0].facts.installed_software // []) | map(.name)) | unique) as $c |
|
||||
[ $o[] | select(. as $n | ($c | index($n)) | not) ]
|
||||
')"
|
||||
|
||||
echo "**Software added:**"
|
||||
echo ""
|
||||
if [ "$(echo "$SW_ADDED" | jq 'length')" = "0" ]; then
|
||||
echo "- (none)"
|
||||
else
|
||||
echo "$SW_ADDED" | jq -r '.[] | "- " + .'
|
||||
fi
|
||||
echo ""
|
||||
echo "**Software removed:**"
|
||||
echo ""
|
||||
if [ "$(echo "$SW_REMOVED" | jq 'length')" = "0" ]; then
|
||||
echo "- (none)"
|
||||
else
|
||||
echo "$SW_REMOVED" | jq -r '.[] | "- " + .'
|
||||
fi
|
||||
echo ""
|
||||
else
|
||||
echo "---"
|
||||
echo ""
|
||||
echo "## Diff vs Prior Baseline"
|
||||
echo ""
|
||||
echo "- No prior baseline found for this host. This is the first baseline."
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo "---"
|
||||
echo ""
|
||||
echo "_Generated by run-onboarding-diagnostic.sh (GuruRMM onboarding diagnostic, Phase 1). Raw snapshot: \`$(basename "$JSON_PATH")\` (immutable)._"
|
||||
} > "$MD_PATH"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Alerts (soft-fail): one line for RED overall, one per critical finding (capped)
|
||||
# ---------------------------------------------------------------------------
|
||||
if [ "$GRADE" = "RED" ]; then
|
||||
CRIT_TITLES="$(echo "$DIAG_JSON" | jq -r '[.findings[] | select(.severity=="critical") | .title] | .[0:3] | join("; ")')"
|
||||
MORE=""
|
||||
if [ "$N_CRIT" -gt 3 ]; then MORE=" (+$((N_CRIT - 3)) more)"; fi
|
||||
post_alert "[RMM] Onboarding diag $PROBE_HOST ($CLIENT_SLUG) = RED: $N_CRIT critical - ${CRIT_TITLES}${MORE}"
|
||||
elif [ "$GRADE" = "AMBER" ]; then
|
||||
post_alert "[RMM] Onboarding diag $PROBE_HOST ($CLIENT_SLUG) = AMBER: $N_WARN warning, 0 critical"
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Final console summary
|
||||
# ---------------------------------------------------------------------------
|
||||
echo ""
|
||||
echo "=========================================================="
|
||||
echo " Onboarding diagnostic complete"
|
||||
echo " Host: $PROBE_HOST"
|
||||
echo " Client: ${AGENT_CLIENT:-$CLIENT_SLUG} ($CLIENT_SLUG)"
|
||||
echo " Grade: $GRADE ($N_CRIT critical / $N_WARN warning / $N_INFO info / $N_UNK unknown)"
|
||||
echo " JSON: $JSON_PATH"
|
||||
echo " Report: $MD_PATH"
|
||||
echo "=========================================================="
|
||||
echo ""
|
||||
echo "Report path: $MD_PATH"
|
||||
Reference in New Issue
Block a user