From 9ab36352ae82628c787e35ded7bb3626d2971236 Mon Sep 17 00:00:00 2001 From: Mike Swanson Date: Mon, 13 Apr 2026 15:59:49 -0700 Subject: [PATCH] Session log: Tunnel expansion + WHM fix (ix. grey-cloud) Audited all 25 proxied zone records and expanded tunnel ingress to cover 9 hostnames total (azcomputerguru + analytics + community + radio + git + plexrequest + rmm + rmm-api + sync). All verified HTTP 200. Reverted 3 hostnames to original A records after discovering they require backend work, not tunnel changes: - plex/rustdesk: NPM on Jupiter has no vhost for these (returned 'tls: unrecognized name' when tunneled) - secure: Jupiter can't route to its backend subnet 172.16.1.0/24 Reverted ix.azcomputerguru.com to DNS-only A record after user reported :2087 WHM access broken. Cloudflare Tunnel is hostname-bound, not port-bound, so non-standard admin ports can't pass through. Direct NAT to 72.194.62.5 restored WHM/cPanel access. Adds four new helper scripts under clients/internal-infrastructure/ scripts/cloudflared-tunnel-setup/ (audit_proxied, discover_backends, expand_tunnel, revert_broken). All use SOPS vault / env var for creds. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../cloudflared-tunnel-setup/audit_proxied.py | 131 ++++++++++++++ .../discover_backends.py | 68 +++++++ .../cloudflared-tunnel-setup/expand_tunnel.py | 151 ++++++++++++++++ .../cloudflared-tunnel-setup/revert_broken.py | 123 +++++++++++++ .../session-logs/2026-04-13-session.md | 167 ++++++++++++++++++ 5 files changed, 640 insertions(+) create mode 100644 clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/audit_proxied.py create mode 100644 clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/discover_backends.py create mode 100644 clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/expand_tunnel.py create mode 100644 clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/revert_broken.py diff --git a/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/audit_proxied.py b/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/audit_proxied.py new file mode 100644 index 0000000..88c9604 --- /dev/null +++ b/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/audit_proxied.py @@ -0,0 +1,131 @@ +"""Audit proxied Cloudflare hosts vs. current tunnel ingress. + +For each proxied record in the zone: + - classify origin (internal LAN, public IP owned by us, external) + - test HTTPS through CF (currently 2xx/3xx/4xx/5xx?) + - cross-check against ingress list in config.yml + +Flags which proxied hosts would benefit from being added to the tunnel. +""" +import json, os, re, socket, subprocess, urllib.error, urllib.request +import paramiko, yaml + +ZONE = '1beb9917c22b54be32e5215df2c227ce' +CF_TOKEN = os.environ.get('CF_API_TOKEN_FULL_DNS', '') +if not CF_TOKEN: + raise SystemExit('set CF_API_TOKEN_FULL_DNS env var') + +# Our public IPs (from pfSense WAN) +OUR_PUBLIC_IPS = { + '72.194.62.' + str(n) for n in range(2, 11) +} | { + '70.175.28.' + str(n) for n in list(range(51, 55)) + [56, 57] +} | {'98.181.90.163'} + +# Known internal LAN reachability from Jupiter (where tunnel runs) +LAN_HOSTS = { + '172.16.3.10': 'IX (cPanel/WHM)', + '172.16.3.20': 'Jupiter (this tunnel host)', + '172.16.3.22': 'gitea', + '172.16.3.29': 'UniFi OS Server VM', + '172.16.0.1': 'pfSense', +} + +def cfapi(path): + req = urllib.request.Request( + f'https://api.cloudflare.com/client/v4{path}', + headers={'Authorization': f'Bearer {CF_TOKEN}'}, + ) + with urllib.request.urlopen(req, timeout=30) as r: + return json.load(r) + +def probe(host): + """HEAD https://host/ with a browser UA, return (status, cf_ray_or_server).""" + try: + req = urllib.request.Request(f'https://{host}/', method='HEAD', + headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0'}) + with urllib.request.urlopen(req, timeout=12) as r: + return r.status, r.headers.get('Server', '-') + except urllib.error.HTTPError as e: + return e.code, e.headers.get('Server', '-') if hasattr(e,'headers') else '-' + except Exception as e: + return 'ERR', str(e)[:40] + +def load_current_ingress(): + """Pull config.yml from Jupiter and return the set of hostnames already tunneled.""" + creds = yaml.safe_load(subprocess.run( + ['sops','-d','D:/vault/infrastructure/jupiter-unraid-primary.sops.yaml'], + capture_output=True, text=True, timeout=30, check=True, + ).stdout) + c = paramiko.SSHClient(); c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + c.connect('172.16.3.20', username='root', password=creds['credentials']['password'], + timeout=30, look_for_keys=False, allow_agent=False) + _, o, _ = c.exec_command('cat /mnt/cache/appdata/cloudflared/config.yml', timeout=30) + cfg = yaml.safe_load(o.read().decode()) + c.close() + return {i.get('hostname') for i in cfg.get('ingress', []) if i.get('hostname')} + +def classify(content, ctype): + """Bucket the origin.""" + if ctype == 'A': + if content in OUR_PUBLIC_IPS: + return 'OUR_PUBLIC_IP' + if content in LAN_HOSTS: + return 'LAN' + return 'EXTERNAL_IP' + if ctype == 'CNAME': + low = content.lower() + if low.endswith('cfargotunnel.com'): + return 'TUNNEL_CNAME' + if any(low.endswith(d) for d in [ + 'outlook.com','msftonline.com','microsoft.com','office.com','microsoftonline.com', + 'sendgrid.net','unbouncepages.com','msp360.com','secureserver.net', + 'azurestaticapps.net','azurefd.net','aws.com','acm-validations.aws','ucaasnetwork.com', + 'itglue.com','manage.microsoft.com','windows.net','mtasv.net','onmicrosoft.com', + ]): + return 'EXTERNAL_SAAS' + if low.endswith('azcomputerguru.com'): + return 'SELF_CNAME' + return 'EXTERNAL_CNAME' + return 'OTHER' + +def main(): + print('[INFO] fetching DNS records...') + a_recs = cfapi(f'/zones/{ZONE}/dns_records?type=A&per_page=100')['result'] + cname_recs = cfapi(f'/zones/{ZONE}/dns_records?type=CNAME&per_page=100')['result'] + all_recs = [r for r in a_recs + cname_recs if r.get('proxied')] + print(f'[INFO] {len(all_recs)} proxied records') + + print('[INFO] reading current tunnel ingress...') + tunneled = load_current_ingress() + print(f'[INFO] currently tunneled hostnames: {sorted(tunneled)}') + + print() + print(f'{"HOSTNAME":42} {"TYPE":6} {"TARGET":35} {"CLASS":14} {"IN_TUNNEL":10} {"HTTPS":>5} {"SERVER":10}') + print('-' * 130) + + candidates = [] + for r in sorted(all_recs, key=lambda x: x['name']): + name = r['name'] + ctype = r['type'] + content = r['content'] + cls = classify(content, ctype) + in_tunnel = 'YES' if name in tunneled else '' + status, server = probe(name) + line = f'{name:42} {ctype:6} {content[:35]:35} {cls:14} {in_tunnel:10} {status!s:>5} {server[:10]:10}' + print(line) + # Candidates for tunnel: our origin (LAN or OUR_PUBLIC_IP) + not already in tunnel + if cls in ('LAN','OUR_PUBLIC_IP') and name not in tunneled: + candidates.append((name, content, cls, status)) + + print() + print('=' * 60) + print('CANDIDATES FOR TUNNEL INGRESS (own origin, not yet tunneled):') + print('=' * 60) + if not candidates: + print('(none)') + for name, content, cls, status in candidates: + print(f' {name:42} -> {content:20} ({cls}, currently HTTP {status})') + +if __name__ == '__main__': + main() diff --git a/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/discover_backends.py b/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/discover_backends.py new file mode 100644 index 0000000..b4db7ea --- /dev/null +++ b/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/discover_backends.py @@ -0,0 +1,68 @@ +"""Discover internal backends for each proxied hostname by tracing NAT rules. + +For each public IP in the 72.194.62.x block, pull pfSense port forwards on 443 +(and other ports if visible) and map them to internal LAN IPs:ports. +Also pull NPM hosts from Jupiter to map hostnames -> backend services. +""" +import json, os, re, subprocess +import paramiko, yaml + +def _pwd(vault_path): + r = subprocess.run(['sops','-d',vault_path], capture_output=True, text=True, timeout=30, check=True) + return yaml.safe_load(r.stdout)['credentials']['password'] + +def ssh(host, user, pwd, port=22): + c = paramiko.SSHClient(); c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + c.connect(host, port=port, username=user, password=pwd, timeout=30, look_for_keys=False, allow_agent=False) + return c + +def run(c, cmd, to=60): + _, o, _ = c.exec_command(cmd, timeout=to) + return o.read().decode('utf-8','replace') + +# ----------------------------------------------------------------- +print('=== [1] pfSense NAT rules: public 72.194.62.x -> internal ===') +pf_pwd = _pwd('D:/vault/infrastructure/pfsense-firewall.sops.yaml') +pf = ssh('172.16.0.1', 'admin', pf_pwd, port=2248) +# Pull rdr rules referencing each public IP on :443 +out = run(pf, r'pfctl -s nat 2>/dev/null | grep -E "rdr on igc0 .*tcp.*72\.194\.62\.[0-9]+ port = (https|2083|2087|3389|3000|8000)" | sort -u | head -40') +print(out.strip()) +print() + +# ----------------------------------------------------------------- +print('=== [2] Jupiter docker ps + NPM inspection for :4 traffic ===') +j_pwd = _pwd('D:/vault/infrastructure/jupiter-unraid-primary.sops.yaml') +j = ssh('172.16.3.20', 'root', j_pwd) + +# NPM container: find its config file +out = run(j, 'docker ps --format "{{.Names}}\\t{{.Image}}\\t{{.Ports}}" | grep -iE "npm|nginx-proxy|proxy"') +print('-- NPM container --') +print(out.strip()) +print() + +# Find NPM hosts config (usually /data/nginx/proxy_host or in database) +out = run(j, 'ls /mnt/user/appdata/NginxProxyManager*/data/nginx/proxy_host/ 2>/dev/null | head') +print('-- NPM proxy_host configs --') +print(out.strip()) +print() + +# Show the first few proxy_host configs to extract hostname -> upstream mappings +out = run(j, r''' +for f in /mnt/user/appdata/NginxProxyManager-v3/data/nginx/proxy_host/*.conf /mnt/user/appdata/NginxProxyManager/data/nginx/proxy_host/*.conf 2>/dev/null; do + if [ -f "$f" ]; then + srv=$(grep -oP "server_name \K[^;]+" "$f" | head -1) + ups=$(grep -oP "(proxy_pass|set \$server) \K[^;\"]+" "$f" | head -2 | tr '\n' '|') + echo "$(basename $f): server=$srv upstream=$ups" + fi +done 2>/dev/null +''', to=60) +print('-- server_name -> upstream --') +print(out.strip()) +print() + +# Also dump docker ps for the services themselves +out = run(j, 'docker ps --format "{{.Names}}\\t{{.Ports}}" | head -30') +print('-- all docker containers + ports --') +print(out.strip()) + +pf.close(); j.close() diff --git a/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/expand_tunnel.py b/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/expand_tunnel.py new file mode 100644 index 0000000..193efbe --- /dev/null +++ b/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/expand_tunnel.py @@ -0,0 +1,151 @@ +"""Expand cloudflared ingress to cover the 9 additional proxied hostnames. + +Mapping (per pfSense NAT discovery): + ix. .5 -> 172.16.3.10:443 (IX direct, like the existing 4) + git./plex./plexrequest./rmm./rmm-api./sync./rustdesk. -> 172.16.3.20:18443 via NPM + secure. .2 -> 172.16.1.16:443 (unknown host, try with SNI) + +NPM routes on SNI, so every ingress gets originServerName = . + +Then flips their DNS (A 72.194.62.* proxied) -> CNAME tunnel proxied. +""" +import json, os, subprocess, time, urllib.request, urllib.error +import paramiko, yaml + +ZONE = '1beb9917c22b54be32e5215df2c227ce' +CF_TOKEN = os.environ.get('CF_API_TOKEN_FULL_DNS', '') +if not CF_TOKEN: + raise SystemExit('set CF_API_TOKEN_FULL_DNS') + +APPDATA = '/mnt/cache/appdata/cloudflared' + +# (hostname, service-url) +IX = 'https://172.16.3.10:443' +JNPM = 'https://172.16.3.20:18443' +FULL_INGRESS = [ + # Existing 4 (IX cPanel) + ('azcomputerguru.com', IX), + ('analytics.azcomputerguru.com', IX), + ('community.azcomputerguru.com', IX), + ('radio.azcomputerguru.com', IX), + # New IX-origin + ('ix.azcomputerguru.com', IX), + # Jupiter NPM-served + ('git.azcomputerguru.com', JNPM), + ('plex.azcomputerguru.com', JNPM), + ('plexrequest.azcomputerguru.com', JNPM), + ('rmm.azcomputerguru.com', JNPM), + ('rmm-api.azcomputerguru.com', JNPM), + ('sync.azcomputerguru.com', JNPM), + ('rustdesk.azcomputerguru.com', JNPM), + # Different subnet, likely pfSense-routable + ('secure.azcomputerguru.com', 'https://172.16.1.16:443'), +] + +NEW_HOSTS = [h for h,_ in FULL_INGRESS if h not in { + 'azcomputerguru.com','analytics.azcomputerguru.com', + 'community.azcomputerguru.com','radio.azcomputerguru.com' +}] + +def cfapi(method, path, body=None): + req = urllib.request.Request( + f'https://api.cloudflare.com/client/v4{path}', + data=json.dumps(body).encode() if body else None, + method=method, + headers={'Authorization': f'Bearer {CF_TOKEN}', 'Content-Type':'application/json'}, + ) + try: + with urllib.request.urlopen(req, timeout=30) as r: + return json.loads(r.read()) + except urllib.error.HTTPError as e: + try: return json.loads(e.read()) + except: return {'success':False,'errors':[{'message':str(e)}]} + +# -- Jupiter SSH -- +def _pwd(v): return yaml.safe_load(subprocess.run(['sops','-d',v],capture_output=True,text=True,timeout=30,check=True).stdout)['credentials']['password'] +j = paramiko.SSHClient(); j.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +j.connect('172.16.3.20', username='root', password=_pwd('D:/vault/infrastructure/jupiter-unraid-primary.sops.yaml'), + timeout=30, look_for_keys=False, allow_agent=False) + +def jrun(cmd, to=60): + _, o, _ = j.exec_command(cmd, timeout=to) + return o.read().decode('utf-8','replace') + +try: + # Read current tunnel UUID + out = jrun(f'grep "^tunnel:" {APPDATA}/config.yml') + UUID = out.split(':',1)[1].strip() + print(f'[INFO] tunnel UUID: {UUID}') + + # Build new config.yml + config = f'tunnel: {UUID}\n' + config += f'credentials-file: /home/nonroot/.cloudflared/{UUID}.json\n' + config += 'ingress:\n' + for h, svc in FULL_INGRESS: + config += f' - hostname: {h}\n' + config += f' service: {svc}\n' + config += f' originRequest:\n' + config += f' originServerName: {h}\n' + config += f' noTLSVerify: true\n' + config += ' - service: http_status:404\n' + + print('\n=== [1] write new config.yml ===') + print(config) + + # Backup then write + jrun(f'cp {APPDATA}/config.yml {APPDATA}/config.yml.bak-$(date +%Y%m%d-%H%M%S)') + HEREDOC = "'EOF_CFG'" + jrun(f"cat > {APPDATA}/config.yml <<{HEREDOC}\n{config}\nEOF_CFG") + jrun(f'chown 65532:65532 {APPDATA}/config.yml') + print('\n[OK] config.yml written') + + print('\n=== [2] DNS cutover for new hostnames ===') + tunnel_target = f'{UUID}.cfargotunnel.com' + for h in NEW_HOSTS: + r = cfapi('GET', f'/zones/{ZONE}/dns_records?name={h}') + if not r.get('success') or not r['result']: + print(f' [SKIP] {h}: no record found') + continue + rec = r['result'][0] + print(f' [{h}] current: type={rec["type"]} content={rec["content"]} proxied={rec["proxied"]}') + if rec['type']=='CNAME' and rec['content']==tunnel_target: + print(f' already tunneled, skipping') + continue + d = cfapi('DELETE', f'/zones/{ZONE}/dns_records/{rec["id"]}') + if not d.get('success'): + print(f' [FAIL delete] {d.get("errors")}') + continue + body = {'type':'CNAME','name':h,'content':tunnel_target,'proxied':True,'ttl':1} + cr = cfapi('POST', f'/zones/{ZONE}/dns_records', body) + if cr.get('success'): + print(f' [OK] -> CNAME tunnel proxied') + else: + print(f' [FAIL create] {cr.get("errors")}') + + print('\n=== [3] restart cloudflared ===') + print(jrun('docker restart cloudflared').rstrip()) + + print('\n=== [4] wait for reconnect ===') + for i in range(25): + time.sleep(3) + logs = jrun('docker logs cloudflared 2>&1 | tail -40') + conns = logs.count('Registered tunnel connection') + if conns >= 4 and ('INF Starting metrics' in logs or 'initiating connection' in logs or 'Registered tunnel connection connIndex=3' in logs): + print(f' [try {i+1}] {conns} connections registered') + break + print(f' [try {i+1}] connections: {conns}') +finally: + j.close() + +# External verification +print('\n=== [5] external probe all 13 hostnames ===') +for h, _ in FULL_INGRESS: + try: + req = urllib.request.Request(f'https://{h}/', method='HEAD', + headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0'}) + with urllib.request.urlopen(req, timeout=15) as r: + print(f' {h:42} HTTP {r.status} {r.headers.get("Server","-")}') + except urllib.error.HTTPError as e: + print(f' {h:42} HTTP {e.code}') + except Exception as e: + print(f' {h:42} ERR {str(e)[:40]}') diff --git a/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/revert_broken.py b/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/revert_broken.py new file mode 100644 index 0000000..a7b20de --- /dev/null +++ b/clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/revert_broken.py @@ -0,0 +1,123 @@ +"""Revert the 3 hostnames that have no functional backend: +- plex (NPM has no vhost) +- rustdesk (NPM has no vhost) +- secure (Jupiter can't route to 172.16.1.16) + +Removes them from tunnel ingress and restores their original A records. +""" +import json, os, subprocess, urllib.error, urllib.request, time +import paramiko, yaml + +ZONE = '1beb9917c22b54be32e5215df2c227ce' +CF_TOKEN = os.environ.get('CF_API_TOKEN_FULL_DNS','') +if not CF_TOKEN: raise SystemExit('set CF_API_TOKEN_FULL_DNS') + +REVERT = { + # hostname: original A content + 'plex.azcomputerguru.com': '72.194.62.4', + 'rustdesk.azcomputerguru.com': '72.194.62.10', + 'secure.azcomputerguru.com': '72.194.62.2', +} + +def cfapi(method, path, body=None): + req = urllib.request.Request( + f'https://api.cloudflare.com/client/v4{path}', + data=json.dumps(body).encode() if body else None, + method=method, + headers={'Authorization': f'Bearer {CF_TOKEN}','Content-Type':'application/json'}, + ) + try: + with urllib.request.urlopen(req, timeout=30) as r: + return json.loads(r.read()) + except urllib.error.HTTPError as e: + try: return json.loads(e.read()) + except: return {'success':False,'errors':[{'message':str(e)}]} + +def _pwd(v): return yaml.safe_load(subprocess.run(['sops','-d',v],capture_output=True,text=True,timeout=30,check=True).stdout)['credentials']['password'] + +j = paramiko.SSHClient(); j.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +j.connect('172.16.3.20', username='root', password=_pwd('D:/vault/infrastructure/jupiter-unraid-primary.sops.yaml'), + timeout=30, look_for_keys=False, allow_agent=False) + +def jrun(cmd, to=60): + _, o, _ = j.exec_command(cmd, timeout=to) + return o.read().decode() + +try: + print('=== [1] rewrite config.yml without the 3 broken hosts ===') + APPDATA = '/mnt/cache/appdata/cloudflared' + # Read UUID + UUID = jrun(f'grep "^tunnel:" {APPDATA}/config.yml').split(':',1)[1].strip() + + IX = 'https://172.16.3.10:443' + JNPM = 'https://172.16.3.20:18443' + KEEP = [ + ('azcomputerguru.com', IX), + ('analytics.azcomputerguru.com', IX), + ('community.azcomputerguru.com', IX), + ('radio.azcomputerguru.com', IX), + ('ix.azcomputerguru.com', IX), + ('git.azcomputerguru.com', JNPM), + ('plexrequest.azcomputerguru.com', JNPM), + ('rmm.azcomputerguru.com', JNPM), + ('rmm-api.azcomputerguru.com', JNPM), + ('sync.azcomputerguru.com', JNPM), + ] + config = f'tunnel: {UUID}\ncredentials-file: /home/nonroot/.cloudflared/{UUID}.json\ningress:\n' + for h, svc in KEEP: + config += f' - hostname: {h}\n service: {svc}\n originRequest:\n originServerName: {h}\n noTLSVerify: true\n' + config += ' - service: http_status:404\n' + jrun(f'cp {APPDATA}/config.yml {APPDATA}/config.yml.bak-$(date +%Y%m%d-%H%M%S)') + HD = "'EOF_CFG'" + jrun(f"cat > {APPDATA}/config.yml <<{HD}\n{config}\nEOF_CFG") + jrun(f'chown 65532:65532 {APPDATA}/config.yml') + print(f' 10 ingress hostnames kept (plex/rustdesk/secure removed)') + + print('\n=== [2] revert DNS for 3 hosts ===') + for host, orig_ip in REVERT.items(): + r = cfapi('GET', f'/zones/{ZONE}/dns_records?name={host}') + if not r.get('success') or not r['result']: + print(f' [{host}] no record, skipping'); continue + rec = r['result'][0] + print(f' [{host}] current: type={rec["type"]} content={rec["content"]}') + d = cfapi('DELETE', f'/zones/{ZONE}/dns_records/{rec["id"]}') + if not d.get('success'): + print(f' [FAIL delete] {d.get("errors")}'); continue + body = {'type':'A','name':host,'content':orig_ip,'proxied':True,'ttl':1} + cr = cfapi('POST', f'/zones/{ZONE}/dns_records', body) + if cr.get('success'): + print(f' [OK] restored A {orig_ip} proxied') + else: + print(f' [FAIL create] {cr.get("errors")}') + + print('\n=== [3] restart cloudflared ===') + print(jrun('docker restart cloudflared').rstrip()) + + print('\n=== [4] wait for reconnect ===') + for i in range(20): + time.sleep(3) + logs = jrun('docker logs cloudflared 2>&1 | tail -30') + conns = logs.count('Registered tunnel connection') + if conns >= 4: + print(f' [try {i+1}] {conns} connections') + break +finally: + j.close() + +print('\n=== [5] external probe all 10 tunneled hostnames ===') +import urllib.request +for h in [k[0] for k in [ + ('azcomputerguru.com',),('analytics.azcomputerguru.com',),('community.azcomputerguru.com',), + ('radio.azcomputerguru.com',),('ix.azcomputerguru.com',),('git.azcomputerguru.com',), + ('plexrequest.azcomputerguru.com',),('rmm.azcomputerguru.com',),('rmm-api.azcomputerguru.com',), + ('sync.azcomputerguru.com',), +]]: + try: + req = urllib.request.Request(f'https://{h}/', method='HEAD', + headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0'}) + with urllib.request.urlopen(req, timeout=15) as r: + print(f' {h:42} HTTP {r.status} {r.headers.get("Server","-")}') + except urllib.error.HTTPError as e: + print(f' {h:42} HTTP {e.code}') + except Exception as e: + print(f' {h:42} ERR {str(e)[:40]}') diff --git a/clients/internal-infrastructure/session-logs/2026-04-13-session.md b/clients/internal-infrastructure/session-logs/2026-04-13-session.md index 190276e..e708fa5 100644 --- a/clients/internal-infrastructure/session-logs/2026-04-13-session.md +++ b/clients/internal-infrastructure/session-logs/2026-04-13-session.md @@ -351,3 +351,170 @@ curl -sI -A "Mozilla/5.0 Chrome/120.0" https://azcomputerguru.com/ **Last Updated:** 2026-04-13 **Next Actions:** submit Cox ticket; consider populating Cloudflare vault entry; monitor tunnel for 24h; cleanup misplaced helper scripts. + +--- + +## Update: 15:56 — Tunnel expansion audit + ix.azcomputerguru.com grey-cloud revert + +Post-initial-deploy work to assess which other proxied records in the zone would benefit from the tunnel, then fix a regression on WHM access. + +### Work done + +1. **Audit of all 25 proxied zone records** (`audit_proxied.py`). Classified each by origin: + - Tunneled (4): azcomputerguru.com, analytics, community, radio + - External SaaS (8): msp360, Microsoft, SendGrid, GoDaddy, etc. — not eligible + - Our-origin not-yet-tunneled (9): ix, git, plex, plexrequest, rmm, rmm-api, sync, rustdesk, secure + - Of those 9, 4 were actively broken (ix=521, plex=525, rustdesk=525, secure=ERR) and 5 working (git/plexrequest/rmm/rmm-api/sync=200) + +2. **Mapped NAT rules and NPM backends** (`discover_backends.py`): + - pfSense `pfctl -s nat` shows: `.4`, `.9`, `.10` all rdr to `172.16.3.20:18443` (Jupiter NPM) + - `.5 -> 172.16.3.10:443` (IX Apache) + - `.2 -> 172.16.1.16:443` (different subnet; no route from Jupiter) + - NPM_Server pfSense alias resolves to `172.16.3.20` only (single-member) + - Jupiter NPM active config dir: `/mnt/user/appdata/npm/nginx/proxy_host/` (separate from `NginxProxyManager/` which is a stale v1 copy; there's also an empty `NginxProxyManager-v3/`) + - NPM has proxy_host entries for: emby, plexrequest, unifi, git, rmm-api+rmm, sync, connect + - NPM has **NO** entries for: plex, rustdesk, secure -- so routing them to `https://172.16.3.20:18443` with that Host header returned `tls: unrecognized name` (default cert fallback) + +3. **Expanded tunnel to 13 hostnames** (`expand_tunnel.py`) via CF DNS API cutovers, then immediately rolled back 3: + - plex/rustdesk -> cloudflared error `Unable to reach the origin service ... remote error: tls: unrecognized name` (NPM returned default cert because no vhost matched). 502 to users. + - secure -> cloudflared error `no route to host` (Jupiter can't reach 172.16.1.16/24). 502 to users. + - All 3 were already broken BEFORE the tunnel (525/525/ERR). No user-visible regression, but not a *fix* either -- reverted their DNS back to original A records. + +4. **Final state after `revert_broken.py`: 10 hostnames tunneled, all HTTP 200**: + - azcomputerguru.com, analytics, community, radio, ix, git, plexrequest, rmm, rmm-api, sync + +5. **User reported "IX generated blank screen"** -> root cause: `https://ix.azcomputerguru.com:2087/` is the WHM admin URL. Cloudflare Tunnel is hostname-bound, not port-bound; ingress rules route ALL port traffic (Cloudflare normalizes at edge) to the single backend specified (`https://172.16.3.10:443`). So `:2087` -> landed at Apache:443, not WHM:2087. Apache returned the default vhost redirect instead of WHM. + + **Fix: grey-clouded `ix.azcomputerguru.com`** (proxied=False) pointing directly to A `72.194.62.5`. pfSense NAT rules for 2087/2083 are intact and route the traffic to IX. Verified: + - `ix.azcomputerguru.com:443` -> 200 (default vhost redirect, fine) + - `ix.azcomputerguru.com:2087` -> 200 (WHM) + - `ix.azcomputerguru.com:2083` -> 200 (cPanel) + + Trade-off: `ix.` no longer benefits from CF's DDoS/caching, but it's admin-only access. If the Cox BGP issue resurfaces specifically for traffic to 72.194.62.5 from certain geographies, `ix.azcomputerguru.com:2087` would fail for users in those regions -- but admin access typically comes from your own network which works fine. + +### Key decisions & rationale + +- **Tunnel ingress reconfigured to 9 hostnames** (dropped ix. after WHM issue surfaced, kept 3-broken removal from earlier). All 9 serve via tunnel, all verified 200. +- **Grey-cloud (DNS-only) rather than tunnel** for `ix.` because port 2087/2083 admin needs can't be satisfied by the tunnel. +- **Not investigated further**: the 3 unfixable hostnames (plex, rustdesk, secure) -- require NPM vhost additions and/or Jupiter routing changes, beyond today's tunnel scope. Captured as follow-ups. + +### Problems encountered and resolutions + +| Problem | Resolution | +|---|---| +| plex/rustdesk = 502 (`tls: unrecognized name`) | NPM has no vhost for these hostnames; it returned default cert. Reverted DNS to original A records (no worse than pre-tunnel state). | +| secure = 502 (`no route to host`) | Jupiter (172.16.3.20) can't route to 172.16.1.16 (different subnet). Reverted DNS. | +| WHM blank screen (`:2087`) | Tunnel is hostname-only, can't preserve non-standard ports. Grey-clouded `ix.` so direct NAT handles the admin ports. | +| Tailscale stopped mid-session (again) | User re-enabled after prompt; resumed. | +| Unicode arrow character crashed Python print on Windows cp1252 | Re-ran verify with ASCII chars. Harmless -- DNS/tunnel changes had already succeeded. | + +--- + +## Credentials (unchanged from this session) + +Same set as the earlier 2026-04-13 entry above: +- Cloudflare Full DNS token: `DRRGkHS33pxAUjQfRDzDeVPtt6wwUU6FwtXqOzNj` +- Cloudflare Legacy token: `U1UTbBOWA4a69eWEBiqIbYh0etCGzrpTU4XaKp7w` +- Zone ID: `1beb9917c22b54be32e5215df2c227ce` +- Jupiter: `root / Th1nk3r^99##` at 172.16.3.20:22 +- IX: `root / Gptf*77ttb!@#!@#` at 172.16.3.10:22 (public 72.194.62.5) +- pfSense: `admin / r3tr0gradE99!!` at 172.16.0.1:2248 + +--- + +## DNS changes summary (all of 2026-04-13) + +| Hostname | Before session | After session | +|---|---|---| +| azcomputerguru.com | A 72.194.62.5 (mis-configured as proxied=False) | CNAME tunnel proxied | +| analytics.azcomputerguru.com | A 72.194.62.5 proxied | CNAME tunnel proxied | +| community.azcomputerguru.com | A 72.194.62.5 proxied | CNAME tunnel proxied | +| radio.azcomputerguru.com | A 72.194.62.5 proxied | CNAME tunnel proxied | +| ix.azcomputerguru.com | A 72.194.62.5 proxied | **A 72.194.62.5 DNS-only (grey cloud)** (supports :2087/:2083) | +| git.azcomputerguru.com | A 72.194.62.4 proxied | CNAME tunnel proxied | +| plex.azcomputerguru.com | A 72.194.62.4 proxied | A 72.194.62.4 proxied (unchanged net effect) | +| plexrequest.azcomputerguru.com | A 72.194.62.4 proxied | CNAME tunnel proxied | +| rmm.azcomputerguru.com | A 72.194.62.4 proxied | CNAME tunnel proxied | +| rmm-api.azcomputerguru.com | A 72.194.62.4 proxied | CNAME tunnel proxied | +| sync.azcomputerguru.com | A 72.194.62.9 proxied | CNAME tunnel proxied | +| rustdesk.azcomputerguru.com | A 72.194.62.10 proxied | A 72.194.62.10 proxied (unchanged net effect) | +| secure.azcomputerguru.com | A 72.194.62.2 proxied | A 72.194.62.2 proxied (unchanged net effect) | + +--- + +## Current tunnel ingress (9 hostnames -- /mnt/cache/appdata/cloudflared/config.yml) + +Tunnel: `78d3e58f-1979-4f0e-a28b-98d6b3c3d867` (name `acg-origin`) + +- azcomputerguru.com -> https://172.16.3.10:443 (SNI + noTLSVerify) +- analytics.azcomputerguru.com -> https://172.16.3.10:443 +- community.azcomputerguru.com -> https://172.16.3.10:443 +- radio.azcomputerguru.com -> https://172.16.3.10:443 +- git.azcomputerguru.com -> https://172.16.3.20:18443 +- plexrequest.azcomputerguru.com -> https://172.16.3.20:18443 +- rmm.azcomputerguru.com -> https://172.16.3.20:18443 +- rmm-api.azcomputerguru.com -> https://172.16.3.20:18443 +- sync.azcomputerguru.com -> https://172.16.3.20:18443 +- catch-all -> http_status:404 + +Backups of config.yml kept as `config.yml.bak-YYYYMMDD-HHMMSS` in same dir. + +--- + +## Final verification outputs + +``` +azcomputerguru.com HTTP 200 cloudflare (tunnel -> IX) +analytics.azcomputerguru.com HTTP 200 cloudflare (tunnel -> IX) +community.azcomputerguru.com HTTP 200 cloudflare (tunnel -> IX) +radio.azcomputerguru.com HTTP 200 cloudflare (tunnel -> IX) +git.azcomputerguru.com HTTP 200 cloudflare (tunnel -> Jupiter NPM) +plexrequest.azcomputerguru.com HTTP 200 cloudflare (tunnel -> Jupiter NPM) +rmm.azcomputerguru.com HTTP 200 cloudflare (tunnel -> Jupiter NPM) +rmm-api.azcomputerguru.com HTTP 200 cloudflare (tunnel -> Jupiter NPM) +sync.azcomputerguru.com HTTP 200 cloudflare (tunnel -> Jupiter NPM) + +ix.azcomputerguru.com:443 HTTP 200 (direct, default vhost) +ix.azcomputerguru.com:2087 HTTP 200 (direct, WHM) +ix.azcomputerguru.com:2083 HTTP 200 (direct, cPanel) +``` + +--- + +## Scripts created (in clients/internal-infrastructure/scripts/cloudflared-tunnel-setup/) + +- `audit_proxied.py` -- list all proxied zone records, classify origin, external probe each +- `discover_backends.py` -- extract pfSense NAT rules and Jupiter NPM server_name mappings +- `expand_tunnel.py` -- extend tunnel ingress to 13 hostnames + DNS cutover +- `revert_broken.py` -- remove plex/rustdesk/secure from tunnel and restore their A records + +All have been sanitized to use SOPS vault for credentials / env var for CF token. + +--- + +## Pending / Incomplete / Open Items + +Additions to the list from the earlier 2026-04-13 entry: + +1. **`plex.azcomputerguru.com` is still broken** (525) -- requires NPM proxy_host entry on Jupiter. Likely target: `binhex-plexpass` container at `172.16.3.20:32400` (or whatever internal IP Plex uses with `network_mode: host`). Once NPM has the vhost, can add to tunnel with a single config.yml change. + +2. **`rustdesk.azcomputerguru.com` is still broken** (525) -- requires: + - Finding where the rustdesk server is actually running (no `rustdesk` container visible in `docker ps` on Jupiter; may be on a different host, or decommissioned) + - Adding NPM vhost for it + - Then tunnel ingress + +3. **`secure.azcomputerguru.com` is still broken** (ERR) -- requires either: + - A static route on Jupiter to 172.16.1.0/24 so cloudflared can reach 172.16.1.16 + - Or move the service behind Jupiter NPM + - Or grey-cloud to DNS-only like we did for `ix.` (bypass CF entirely) + +4. **Still TODO from the earlier block:** + - Submit Cox BGP ticket (`clients/internal-infrastructure/vendor-tickets/2026-04-13-cox-bgp-cloudflare-routing.md`) + - Populate CF tokens in SOPS vault (currently 1Password only) + - Fix stale `Paper123\!@#` in Dataforth AD2 vault entry + - Verify rsync covers Dataforth `VASLOG - Engineering Tested` subfolder + +--- + +**Last Updated:** 2026-04-13 15:56 +**Next Actions:** consider adding NPM vhost for plex, investigate rustdesk host, commit today's additions.