From 60d0a2bf8759a5c48fb5784b76c5686c468c335b Mon Sep 17 00:00:00 2001 From: Howard Enos Date: Mon, 15 Jun 2026 23:25:05 -0700 Subject: [PATCH] sync: auto-sync from HOWARD-HOME at 2026-06-15 23:24:56 Author: Howard Enos Machine: HOWARD-HOME Timestamp: 2026-06-15 23:24:56 --- .../unifi-wifi/scripts/neighbor-collect.sh | 158 ++++++++++++++++++ ...026-06-15-howard-cascades-wifi-rf-audit.md | 48 ++++++ 2 files changed, 206 insertions(+) create mode 100644 .claude/skills/unifi-wifi/scripts/neighbor-collect.sh diff --git a/.claude/skills/unifi-wifi/scripts/neighbor-collect.sh b/.claude/skills/unifi-wifi/scripts/neighbor-collect.sh new file mode 100644 index 0000000..acce1ba --- /dev/null +++ b/.claude/skills/unifi-wifi/scripts/neighbor-collect.sh @@ -0,0 +1,158 @@ +#!/usr/bin/env bash +# neighbor-collect.sh — harvest the AP-to-AP RF-neighbor SNR matrix for a UniFi site. +# +# THE KEY DISCOVERY (2026-06-15): UniFi does NOT expose managed-AP-to-managed-AP visibility +# through any documented API, the controller `rogue`/`stat/rogueap` (both filter out our own +# APs), Mongo, 802.11k hostapd, or Channel AI's channelplan. BUT each AP keeps the data +# internally in /proc, populated NON-DISRUPTIVELY by UniFi's own background RRM scanning: +# /proc/ui_neighbor/ess_ap_list — full list of our APs this AP hears (serial/band/channel) +# /proc/ui_neighbor/ssid/ — same neighbors WITH SNR (per scan-vap, per band) +# This script SSHes each AP, reads those, maps neighbor BSSIDs/serials -> AP names, and emits +# the AP-to-AP SNR adjacency matrix + a redundancy summary (which APs are heard strongly by >=2 +# neighbors = candidates whose radio can be powered-down/disabled without a coverage hole). +# /proc/ui_neighbor exists on every UniFi AP, so this is fleet-generic (any UOS site). +# +# Reads are non-disruptive. Needs: controller cred (infrastructure/uos-server-network-api-rw, +# for the BSSID/mac->name map) + per-site AP device-auth SSH cred + L3 reach to the AP mgmt VLAN +# (the site VPN). AP SSH uses sshpass if present, else an SSH_ASKPASS fallback (Windows has no +# sshpass; the fallback needs `ssh` from PATH = MSYS ssh on Windows). +# +# Usage: bash .claude/skills/unifi-wifi/scripts/neighbor-collect.sh [ap-ssh-vault-path] [snr_min] +# e.g. bash .../neighbor-collect.sh cascades # default cred + SNR>=20 +# bash .../neighbor-collect.sh cascades clients/acme/unifi-ap-ssh 25 +set -uo pipefail +REPO="$(git rev-parse --show-toplevel 2>/dev/null || echo .)" +VAULT="$REPO/.claude/scripts/vault.sh"; UOS="$REPO/.claude/scripts/uos-mongo.sh" +HOST="${UOS_HOST:-172.16.3.29}"; PORT="${UOS_HTTPS_PORT:-11443}" +SITEARG="${1:?usage: neighbor-collect.sh [ap-ssh-vault-path] [snr_min]}" +VP="${2:-clients/cascades-tucson/unifi-ap-ssh}"; SNR_MIN="${3:-20}" +TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT + +# --- controller creds + login (RW admin reads fine) --- +CU="$(bash "$VAULT" get-field infrastructure/uos-server-network-api-rw credentials.username 2>/dev/null)" +CP="$(bash "$VAULT" get-field infrastructure/uos-server-network-api-rw credentials.password 2>/dev/null)" +[ -n "$CU" ] && [ -n "$CP" ] || { echo "[ERROR] no controller cred (infrastructure/uos-server-network-api-rw)"; exit 1; } +base="https://$HOST:$PORT"; CJ="$TMP/cj" +code=$(curl -sk -c "$CJ" -o /dev/null -w '%{http_code}' -X POST "$base/api/auth/login" -H 'Content-Type: application/json' \ + --data-binary "$(python -c 'import json,sys;print(json.dumps({"username":sys.argv[1],"password":sys.argv[2]}))' "$CU" "$CP")") +[ "$code" = "200" ] || { echo "[ERROR] controller login HTTP $code"; exit 1; } + +# resolve site short name +SHORT="$(curl -sk -b "$CJ" "$base/proxy/network/api/self/sites" | python -c " +import sys,json; d=json.load(sys.stdin).get('data',[]); q='''$SITEARG'''.lower() +for s in d: + if s.get('_id')=='''$SITEARG''' or s.get('name')=='''$SITEARG''' or q in (s.get('desc','').lower()): print(s.get('name')); break +")" +[ -n "$SHORT" ] || SHORT="$SITEARG" +echo "[INFO] site=$SHORT SNR_MIN=$SNR_MIN" + +# --- build maps (mac->name for ess_ap_list, bssid->name for ssid/*, name->ip to SSH) --- +curl -sk -b "$CJ" "$base/proxy/network/api/s/$SHORT/stat/device" -o "$TMP/dev.json" +python - "$TMP/dev.json" "$TMP" <<'PY' +import json,sys +d=[a for a in json.load(open(sys.argv[1])).get('data',[]) if a.get('type')=='uap'] +macmap={}; bssmap={}; aps=[] +for a in d: + nm=a.get('name') or a.get('mac'); mac=(a.get('mac') or '').lower(); ip=a.get('ip') + if mac: macmap[mac]=nm + for v in a.get('vap_table',[]): + b=(v.get('bssid') or '').lower() + if b: bssmap[b]=nm + if ip and a.get('state')==1: aps.append((nm,ip)) # online only +json.dump(macmap,open(sys.argv[2]+'/macmap.json','w')) +json.dump(bssmap,open(sys.argv[2]+'/bssmap.json','w')) +open(sys.argv[2]+'/aps.tsv','w',newline='\n').write('\n'.join(f"{n}\t{i}" for n,i in aps)) # force LF (Windows text mode would write CRLF -> \r breaks ssh target) +print(f"[INFO] {len(aps)} online APs; {len(macmap)} mac + {len(bssmap)} bssid map entries") +PY + +# --- AP SSH auth: sshpass if present, else SSH_ASKPASS fallback --- +AU="$(bash "$VAULT" get-field "$VP" credentials.username 2>/dev/null)" +AP_PW="$(bash "$VAULT" get-field "$VP" credentials.password 2>/dev/null)"; export AP_PW +[ -n "$AU" ] && [ -n "$AP_PW" ] || { echo "[ERROR] no AP device-auth cred at vault:$VP"; exit 1; } +SSH_OPTS=(-o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/dev/null \ + -o PreferredAuthentications=password -o PubkeyAuthentication=no -o NumberOfPasswordPrompts=1) +# NOTE: /dev/null 2>&1; then + ap_ssh() { SSHPASS="$AP_PW" sshpass -e ssh "${SSH_OPTS[@]}" "$@" "$ASKP"; chmod +x "$ASKP" + ap_ssh() { SSH_ASKPASS="$ASKP" SSH_ASKPASS_REQUIRE=force DISPLAY="${DISPLAY:-:0}" ssh "${SSH_OPTS[@]}" "$@" > "$RAW" + if ap_ssh "$AU@$ip" 'echo "@@ESS"; cat /proc/ui_neighbor/ess_ap_list 2>/dev/null; for s in /proc/ui_neighbor/ssid/*; do echo "@@SSID $s"; cat "$s" 2>/dev/null; done' >> "$RAW" 2>/dev/null; then + ok=$((ok+1)) + else + echo "@@UNREACHABLE" >> "$RAW" + fi + printf '\r[INFO] harvested %d/%d (reachable %d) ' "$n" "$(wc -l < "$TMP/aps.tsv")" "$ok" >&2 +done < "$TMP/aps.tsv" +echo "" >&2 + +# --- parse + map + emit adjacency matrix + redundancy summary --- +python - "$RAW" "$TMP/macmap.json" "$TMP/bssmap.json" "$SNR_MIN" <<'PY' +import json,re,sys +raw=open(sys.argv[1],encoding='utf-8',errors='replace').read().splitlines() +macmap=json.load(open(sys.argv[2])); bssmap=json.load(open(sys.argv[3])); SNR_MIN=int(sys.argv[4]) +BN={'0':'2.4','1':'5','2':'6'}; EB={'2.4ghz':'2.4','5ghz':'5','6ghz':'6'} +# edges[(src, band)] = {neighbor_name: best_snr}; presence[(src,band)] = set(names) from ess_ap_list +edges={}; presence={}; cur=None; mode=None; band=None +def add_edge(src,band,name,snr): + k=(src,band); edges.setdefault(k,{}); + if name not in edges[k] or snr>edges[k][name]: edges[k][name]=snr +for ln in raw: + if ln.startswith('###AP'): + p=ln.split('\t'); cur=p[1] if len(p)>1 else None; mode=None; band=None; continue + if ln.startswith('@@ESS'): mode='ess'; continue + if ln.startswith('@@SSID'): mode='ssid'; band=None; continue + if ln.startswith('@@UNREACHABLE') or cur is None: continue + if mode=='ess': + p=ln.split() + if len(p)>=3 and re.match(r'^[0-9a-f:]{17}$',p[0]): + b=EB.get(p[1].lower()); nm=macmap.get(p[0].lower()) + if b and nm and nm!=cur: presence.setdefault((cur,b),set()).add(nm) + elif mode=='ssid': + mb=re.match(r'band\((\d)\)',ln.strip()) + if mb: band=BN.get(mb.group(1)); continue + p=ln.split() + if band and len(p)>=2 and re.match(r'^[0-9a-f:]{17}$',p[0]) and p[1].lstrip('-').isdigit(): + nm=bssmap.get(p[0].lower()) + if nm and nm!=cur: add_edge(cur,band,nm,int(p[1])) + +aps=sorted({k[0] for k in list(edges)+list(presence)}) +print(f"\n==== AP-to-AP RF NEIGHBOR MATRIX ({len(aps)} APs reporting) ====") +print("(SNR from /proc/ui_neighbor/ssid; '+N more' = additional neighbors seen in ess_ap_list w/o fresh SNR)\n") +for ap in aps: + line=[f"{ap}:"] + for b in ('2.4','5','6'): + snrs=edges.get((ap,b),{}) + seen=presence.get((ap,b),set()) + extra=len(seen - set(snrs)) + if snrs or seen: + top=sorted(snrs.items(),key=lambda x:-x[1])[:5] + s=", ".join(f"{n}({v})" for n,v in top) + more=f" +{extra} more" if extra>0 else "" + line.append(f"\n {b}GHz: {s}{more}") + print("".join(line)) + +# redundancy summary: APs whose radio is "safe-ish" to power-down/disable = heard by >=2 neighbors at strong SNR +print(f"\n==== REDUNDANCY (neighbors at SNR>={SNR_MIN}; >=2 strong same-band neighbors = coverage-redundant) ====") +for b in ('2.4','5','6'): + rows=[] + for ap in aps: + strong=[n for n,v in edges.get((ap,b),{}).items() if v>=SNR_MIN] + rows.append((len(strong), ap, strong)) + redund=[r for r in rows if r[0]>=2] + print(f"\n-- {b}GHz: {len(redund)}/{len(rows)} APs have >=2 strong neighbors (disable/power-down candidates) --") + for cnt,ap,strong in sorted(redund,reverse=True)[:12]: + print(f" {ap}: {cnt} strong ({', '.join(strong[:4])}{'...' if len(strong)>4 else ''})") +PY +echo "" +echo "[next] feed the redundancy list to optimize-radios.sh; validate per-zone with watch-ap.sh before any --apply." diff --git a/clients/cascades-tucson/session-logs/2026-06/2026-06-15-howard-cascades-wifi-rf-audit.md b/clients/cascades-tucson/session-logs/2026-06/2026-06-15-howard-cascades-wifi-rf-audit.md index 7088e87..62130fe 100644 --- a/clients/cascades-tucson/session-logs/2026-06/2026-06-15-howard-cascades-wifi-rf-audit.md +++ b/clients/cascades-tucson/session-logs/2026-06/2026-06-15-howard-cascades-wifi-rf-audit.md @@ -227,3 +227,51 @@ real measured RF data that supports a DATA-DRIVEN CHANNEL PLAN. Sample AP 132 (R - [ ] (Howard) has more information to add — pending. - Confirmed env: firmware 8.6.11.18870 on U7-Pro; AP device-auth SSH = uid 0 (root); split-tunnel VPN gives AP reach on 192.168.2.x/3.x (192.168.0.x shadowed by home LAN). + +--- + +## Update: 23:24 PT — BREAKTHROUGH: AP-to-AP SNR matrix IS obtainable; neighbor-collect.sh built; disables now data-supportable + +**REVERSES the earlier "AP-neighbor table unobtainable / disables not data-supportable" conclusion.** +The data DOES exist — not via any controller API/DB (all of which filter our managed APs) but on +each AP in `/proc`, populated NON-DISRUPTIVELY by UniFi's background RRM scanning: +- `/proc/ui_neighbor/ess_ap_list` — full list of managed APs this AP hears (serial/band/channel) +- `/proc/ui_neighbor/ssid/` — same neighbors WITH SNR (per scan-vap, per band) +Discovered via `strings $(which mcad) | grep neighbor` → `/proc/ui_neighbor/ess_ap_list` + +`load_ssid_neighbor_cache`. `/proc/ui_neighbor` exists on every UniFi AP → fleet-generic. + +### NEW: .claude/skills/unifi-wifi/scripts/neighbor-collect.sh (built + validated, Mike notified 3dbe2437) +Logs into controller for the mac/bssid->AP-name map, SSHes each online AP (sshpass OR SSH_ASKPASS +fallback), reads `/proc/ui_neighbor`, maps neighbors to AP names, emits the AP-to-AP SNR adjacency +matrix + a redundancy summary (APs heard by >=2 neighbors at SNR>=N = power-down/disable candidates). +Usage: `neighbor-collect.sh [ap-ssh-vault-path] [snr_min]`. Run FOREGROUND. +**Validated on Cascades: 74/74 APs reporting.** Physically sensible (115<->116 SNR 63, 121<->221 +SNR 65 = adjacent). Redundancy: **73/74 APs have >=2 strong (SNR>=20) 2.4GHz neighbors**, 72/74 on +5GHz, 65/74 on 6GHz → nearly every AP is coverage-redundant on 2.4 → aggressive 2.4 pruning is +data-supported, and individual radio DISABLES are now decidable from data. + +### Three bugs found+fixed building it (apply to any loop-over-AP script) +1. Python wrote the AP-list temp file in Windows TEXT mode -> CRLF -> bash `read` left `\r` on the + IP -> ssh "hostname contains invalid characters" on EVERY AP (the "reachable 0" symptom). Fix: + `open(...,newline='\n')` + `ip="${ip%$'\r'}"` strip in the read loop. +2. Must run FOREGROUND — a fully detached background process can't spawn the SSH_ASKPASS helper + (both background runs got reachable 0; foreground after the CRLF fix got 74). +3. `ssh` in a `while read` loop needs `/unifi-ap-ssh` vaulted (overridable via arg). Out of current scope (it's +unifi-WIFI): switch/PoE, gateway/WAN/firewall, adoption — reachable via the same access layer +(uos-mongo/controller API/device SSH) but not yet wrapped as scripts. + +### Pending follow-ups +- [ ] Wire neighbor-collect redundancy output into optimize-radios.sh (data-backed disables). +- [ ] Fold survey-dump (per-channel busy/noise) + dmesg DFS-radar recipes into the skill as + reusable collectors (the two ad-hoc datasets from this session). +- [ ] Per-client AP device-auth creds for other clients when extending beyond Cascades. +- [ ] Floor-4 2.4 power-down pilot (still the next live change; nothing applied yet). +- Coord this update: neighbor-collect.sh announce 3dbe2437.