Session log: Tunnel expansion + WHM fix (ix. grey-cloud)

Audited all 25 proxied zone records and expanded tunnel ingress to cover
9 hostnames total (azcomputerguru + analytics + community + radio +
git + plexrequest + rmm + rmm-api + sync). All verified HTTP 200.

Reverted 3 hostnames to original A records after discovering they
require backend work, not tunnel changes:
- plex/rustdesk: NPM on Jupiter has no vhost for these (returned
  'tls: unrecognized name' when tunneled)
- secure: Jupiter can't route to its backend subnet 172.16.1.0/24

Reverted ix.azcomputerguru.com to DNS-only A record after user
reported :2087 WHM access broken. Cloudflare Tunnel is hostname-bound,
not port-bound, so non-standard admin ports can't pass through. Direct
NAT to 72.194.62.5 restored WHM/cPanel access.

Adds four new helper scripts under clients/internal-infrastructure/
scripts/cloudflared-tunnel-setup/ (audit_proxied, discover_backends,
expand_tunnel, revert_broken). All use SOPS vault / env var for creds.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-13 15:59:49 -07:00
parent 5169936cfc
commit 9ab36352ae
5 changed files with 640 additions and 0 deletions

View File

@@ -0,0 +1,131 @@
"""Audit proxied Cloudflare hosts vs. current tunnel ingress.
For each proxied record in the zone:
- classify origin (internal LAN, public IP owned by us, external)
- test HTTPS through CF (currently 2xx/3xx/4xx/5xx?)
- cross-check against ingress list in config.yml
Flags which proxied hosts would benefit from being added to the tunnel.
"""
import json, os, re, socket, subprocess, urllib.error, urllib.request
import paramiko, yaml
ZONE = '1beb9917c22b54be32e5215df2c227ce'
CF_TOKEN = os.environ.get('CF_API_TOKEN_FULL_DNS', '')
if not CF_TOKEN:
raise SystemExit('set CF_API_TOKEN_FULL_DNS env var')
# Our public IPs (from pfSense WAN)
OUR_PUBLIC_IPS = {
'72.194.62.' + str(n) for n in range(2, 11)
} | {
'70.175.28.' + str(n) for n in list(range(51, 55)) + [56, 57]
} | {'98.181.90.163'}
# Known internal LAN reachability from Jupiter (where tunnel runs)
LAN_HOSTS = {
'172.16.3.10': 'IX (cPanel/WHM)',
'172.16.3.20': 'Jupiter (this tunnel host)',
'172.16.3.22': 'gitea',
'172.16.3.29': 'UniFi OS Server VM',
'172.16.0.1': 'pfSense',
}
def cfapi(path):
req = urllib.request.Request(
f'https://api.cloudflare.com/client/v4{path}',
headers={'Authorization': f'Bearer {CF_TOKEN}'},
)
with urllib.request.urlopen(req, timeout=30) as r:
return json.load(r)
def probe(host):
"""HEAD https://host/ with a browser UA, return (status, cf_ray_or_server)."""
try:
req = urllib.request.Request(f'https://{host}/', method='HEAD',
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0'})
with urllib.request.urlopen(req, timeout=12) as r:
return r.status, r.headers.get('Server', '-')
except urllib.error.HTTPError as e:
return e.code, e.headers.get('Server', '-') if hasattr(e,'headers') else '-'
except Exception as e:
return 'ERR', str(e)[:40]
def load_current_ingress():
"""Pull config.yml from Jupiter and return the set of hostnames already tunneled."""
creds = yaml.safe_load(subprocess.run(
['sops','-d','D:/vault/infrastructure/jupiter-unraid-primary.sops.yaml'],
capture_output=True, text=True, timeout=30, check=True,
).stdout)
c = paramiko.SSHClient(); c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect('172.16.3.20', username='root', password=creds['credentials']['password'],
timeout=30, look_for_keys=False, allow_agent=False)
_, o, _ = c.exec_command('cat /mnt/cache/appdata/cloudflared/config.yml', timeout=30)
cfg = yaml.safe_load(o.read().decode())
c.close()
return {i.get('hostname') for i in cfg.get('ingress', []) if i.get('hostname')}
def classify(content, ctype):
"""Bucket the origin."""
if ctype == 'A':
if content in OUR_PUBLIC_IPS:
return 'OUR_PUBLIC_IP'
if content in LAN_HOSTS:
return 'LAN'
return 'EXTERNAL_IP'
if ctype == 'CNAME':
low = content.lower()
if low.endswith('cfargotunnel.com'):
return 'TUNNEL_CNAME'
if any(low.endswith(d) for d in [
'outlook.com','msftonline.com','microsoft.com','office.com','microsoftonline.com',
'sendgrid.net','unbouncepages.com','msp360.com','secureserver.net',
'azurestaticapps.net','azurefd.net','aws.com','acm-validations.aws','ucaasnetwork.com',
'itglue.com','manage.microsoft.com','windows.net','mtasv.net','onmicrosoft.com',
]):
return 'EXTERNAL_SAAS'
if low.endswith('azcomputerguru.com'):
return 'SELF_CNAME'
return 'EXTERNAL_CNAME'
return 'OTHER'
def main():
print('[INFO] fetching DNS records...')
a_recs = cfapi(f'/zones/{ZONE}/dns_records?type=A&per_page=100')['result']
cname_recs = cfapi(f'/zones/{ZONE}/dns_records?type=CNAME&per_page=100')['result']
all_recs = [r for r in a_recs + cname_recs if r.get('proxied')]
print(f'[INFO] {len(all_recs)} proxied records')
print('[INFO] reading current tunnel ingress...')
tunneled = load_current_ingress()
print(f'[INFO] currently tunneled hostnames: {sorted(tunneled)}')
print()
print(f'{"HOSTNAME":42} {"TYPE":6} {"TARGET":35} {"CLASS":14} {"IN_TUNNEL":10} {"HTTPS":>5} {"SERVER":10}')
print('-' * 130)
candidates = []
for r in sorted(all_recs, key=lambda x: x['name']):
name = r['name']
ctype = r['type']
content = r['content']
cls = classify(content, ctype)
in_tunnel = 'YES' if name in tunneled else ''
status, server = probe(name)
line = f'{name:42} {ctype:6} {content[:35]:35} {cls:14} {in_tunnel:10} {status!s:>5} {server[:10]:10}'
print(line)
# Candidates for tunnel: our origin (LAN or OUR_PUBLIC_IP) + not already in tunnel
if cls in ('LAN','OUR_PUBLIC_IP') and name not in tunneled:
candidates.append((name, content, cls, status))
print()
print('=' * 60)
print('CANDIDATES FOR TUNNEL INGRESS (own origin, not yet tunneled):')
print('=' * 60)
if not candidates:
print('(none)')
for name, content, cls, status in candidates:
print(f' {name:42} -> {content:20} ({cls}, currently HTTP {status})')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,68 @@
"""Discover internal backends for each proxied hostname by tracing NAT rules.
For each public IP in the 72.194.62.x block, pull pfSense port forwards on 443
(and other ports if visible) and map them to internal LAN IPs:ports.
Also pull NPM hosts from Jupiter to map hostnames -> backend services.
"""
import json, os, re, subprocess
import paramiko, yaml
def _pwd(vault_path):
r = subprocess.run(['sops','-d',vault_path], capture_output=True, text=True, timeout=30, check=True)
return yaml.safe_load(r.stdout)['credentials']['password']
def ssh(host, user, pwd, port=22):
c = paramiko.SSHClient(); c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect(host, port=port, username=user, password=pwd, timeout=30, look_for_keys=False, allow_agent=False)
return c
def run(c, cmd, to=60):
_, o, _ = c.exec_command(cmd, timeout=to)
return o.read().decode('utf-8','replace')
# -----------------------------------------------------------------
print('=== [1] pfSense NAT rules: public 72.194.62.x -> internal ===')
pf_pwd = _pwd('D:/vault/infrastructure/pfsense-firewall.sops.yaml')
pf = ssh('172.16.0.1', 'admin', pf_pwd, port=2248)
# Pull rdr rules referencing each public IP on :443
out = run(pf, r'pfctl -s nat 2>/dev/null | grep -E "rdr on igc0 .*tcp.*72\.194\.62\.[0-9]+ port = (https|2083|2087|3389|3000|8000)" | sort -u | head -40')
print(out.strip())
print()
# -----------------------------------------------------------------
print('=== [2] Jupiter docker ps + NPM inspection for :4 traffic ===')
j_pwd = _pwd('D:/vault/infrastructure/jupiter-unraid-primary.sops.yaml')
j = ssh('172.16.3.20', 'root', j_pwd)
# NPM container: find its config file
out = run(j, 'docker ps --format "{{.Names}}\\t{{.Image}}\\t{{.Ports}}" | grep -iE "npm|nginx-proxy|proxy"')
print('-- NPM container --')
print(out.strip())
print()
# Find NPM hosts config (usually /data/nginx/proxy_host or in database)
out = run(j, 'ls /mnt/user/appdata/NginxProxyManager*/data/nginx/proxy_host/ 2>/dev/null | head')
print('-- NPM proxy_host configs --')
print(out.strip())
print()
# Show the first few proxy_host configs to extract hostname -> upstream mappings
out = run(j, r'''
for f in /mnt/user/appdata/NginxProxyManager-v3/data/nginx/proxy_host/*.conf /mnt/user/appdata/NginxProxyManager/data/nginx/proxy_host/*.conf 2>/dev/null; do
if [ -f "$f" ]; then
srv=$(grep -oP "server_name \K[^;]+" "$f" | head -1)
ups=$(grep -oP "(proxy_pass|set \$server) \K[^;\"]+" "$f" | head -2 | tr '\n' '|')
echo "$(basename $f): server=$srv upstream=$ups"
fi
done 2>/dev/null
''', to=60)
print('-- server_name -> upstream --')
print(out.strip())
print()
# Also dump docker ps for the services themselves
out = run(j, 'docker ps --format "{{.Names}}\\t{{.Ports}}" | head -30')
print('-- all docker containers + ports --')
print(out.strip())
pf.close(); j.close()

View File

@@ -0,0 +1,151 @@
"""Expand cloudflared ingress to cover the 9 additional proxied hostnames.
Mapping (per pfSense NAT discovery):
ix. .5 -> 172.16.3.10:443 (IX direct, like the existing 4)
git./plex./plexrequest./rmm./rmm-api./sync./rustdesk. -> 172.16.3.20:18443 via NPM
secure. .2 -> 172.16.1.16:443 (unknown host, try with SNI)
NPM routes on SNI, so every ingress gets originServerName = <hostname>.
Then flips their DNS (A 72.194.62.* proxied) -> CNAME tunnel proxied.
"""
import json, os, subprocess, time, urllib.request, urllib.error
import paramiko, yaml
ZONE = '1beb9917c22b54be32e5215df2c227ce'
CF_TOKEN = os.environ.get('CF_API_TOKEN_FULL_DNS', '')
if not CF_TOKEN:
raise SystemExit('set CF_API_TOKEN_FULL_DNS')
APPDATA = '/mnt/cache/appdata/cloudflared'
# (hostname, service-url)
IX = 'https://172.16.3.10:443'
JNPM = 'https://172.16.3.20:18443'
FULL_INGRESS = [
# Existing 4 (IX cPanel)
('azcomputerguru.com', IX),
('analytics.azcomputerguru.com', IX),
('community.azcomputerguru.com', IX),
('radio.azcomputerguru.com', IX),
# New IX-origin
('ix.azcomputerguru.com', IX),
# Jupiter NPM-served
('git.azcomputerguru.com', JNPM),
('plex.azcomputerguru.com', JNPM),
('plexrequest.azcomputerguru.com', JNPM),
('rmm.azcomputerguru.com', JNPM),
('rmm-api.azcomputerguru.com', JNPM),
('sync.azcomputerguru.com', JNPM),
('rustdesk.azcomputerguru.com', JNPM),
# Different subnet, likely pfSense-routable
('secure.azcomputerguru.com', 'https://172.16.1.16:443'),
]
NEW_HOSTS = [h for h,_ in FULL_INGRESS if h not in {
'azcomputerguru.com','analytics.azcomputerguru.com',
'community.azcomputerguru.com','radio.azcomputerguru.com'
}]
def cfapi(method, path, body=None):
req = urllib.request.Request(
f'https://api.cloudflare.com/client/v4{path}',
data=json.dumps(body).encode() if body else None,
method=method,
headers={'Authorization': f'Bearer {CF_TOKEN}', 'Content-Type':'application/json'},
)
try:
with urllib.request.urlopen(req, timeout=30) as r:
return json.loads(r.read())
except urllib.error.HTTPError as e:
try: return json.loads(e.read())
except: return {'success':False,'errors':[{'message':str(e)}]}
# -- Jupiter SSH --
def _pwd(v): return yaml.safe_load(subprocess.run(['sops','-d',v],capture_output=True,text=True,timeout=30,check=True).stdout)['credentials']['password']
j = paramiko.SSHClient(); j.set_missing_host_key_policy(paramiko.AutoAddPolicy())
j.connect('172.16.3.20', username='root', password=_pwd('D:/vault/infrastructure/jupiter-unraid-primary.sops.yaml'),
timeout=30, look_for_keys=False, allow_agent=False)
def jrun(cmd, to=60):
_, o, _ = j.exec_command(cmd, timeout=to)
return o.read().decode('utf-8','replace')
try:
# Read current tunnel UUID
out = jrun(f'grep "^tunnel:" {APPDATA}/config.yml')
UUID = out.split(':',1)[1].strip()
print(f'[INFO] tunnel UUID: {UUID}')
# Build new config.yml
config = f'tunnel: {UUID}\n'
config += f'credentials-file: /home/nonroot/.cloudflared/{UUID}.json\n'
config += 'ingress:\n'
for h, svc in FULL_INGRESS:
config += f' - hostname: {h}\n'
config += f' service: {svc}\n'
config += f' originRequest:\n'
config += f' originServerName: {h}\n'
config += f' noTLSVerify: true\n'
config += ' - service: http_status:404\n'
print('\n=== [1] write new config.yml ===')
print(config)
# Backup then write
jrun(f'cp {APPDATA}/config.yml {APPDATA}/config.yml.bak-$(date +%Y%m%d-%H%M%S)')
HEREDOC = "'EOF_CFG'"
jrun(f"cat > {APPDATA}/config.yml <<{HEREDOC}\n{config}\nEOF_CFG")
jrun(f'chown 65532:65532 {APPDATA}/config.yml')
print('\n[OK] config.yml written')
print('\n=== [2] DNS cutover for new hostnames ===')
tunnel_target = f'{UUID}.cfargotunnel.com'
for h in NEW_HOSTS:
r = cfapi('GET', f'/zones/{ZONE}/dns_records?name={h}')
if not r.get('success') or not r['result']:
print(f' [SKIP] {h}: no record found')
continue
rec = r['result'][0]
print(f' [{h}] current: type={rec["type"]} content={rec["content"]} proxied={rec["proxied"]}')
if rec['type']=='CNAME' and rec['content']==tunnel_target:
print(f' already tunneled, skipping')
continue
d = cfapi('DELETE', f'/zones/{ZONE}/dns_records/{rec["id"]}')
if not d.get('success'):
print(f' [FAIL delete] {d.get("errors")}')
continue
body = {'type':'CNAME','name':h,'content':tunnel_target,'proxied':True,'ttl':1}
cr = cfapi('POST', f'/zones/{ZONE}/dns_records', body)
if cr.get('success'):
print(f' [OK] -> CNAME tunnel proxied')
else:
print(f' [FAIL create] {cr.get("errors")}')
print('\n=== [3] restart cloudflared ===')
print(jrun('docker restart cloudflared').rstrip())
print('\n=== [4] wait for reconnect ===')
for i in range(25):
time.sleep(3)
logs = jrun('docker logs cloudflared 2>&1 | tail -40')
conns = logs.count('Registered tunnel connection')
if conns >= 4 and ('INF Starting metrics' in logs or 'initiating connection' in logs or 'Registered tunnel connection connIndex=3' in logs):
print(f' [try {i+1}] {conns} connections registered')
break
print(f' [try {i+1}] connections: {conns}')
finally:
j.close()
# External verification
print('\n=== [5] external probe all 13 hostnames ===')
for h, _ in FULL_INGRESS:
try:
req = urllib.request.Request(f'https://{h}/', method='HEAD',
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0'})
with urllib.request.urlopen(req, timeout=15) as r:
print(f' {h:42} HTTP {r.status} {r.headers.get("Server","-")}')
except urllib.error.HTTPError as e:
print(f' {h:42} HTTP {e.code}')
except Exception as e:
print(f' {h:42} ERR {str(e)[:40]}')

View File

@@ -0,0 +1,123 @@
"""Revert the 3 hostnames that have no functional backend:
- plex (NPM has no vhost)
- rustdesk (NPM has no vhost)
- secure (Jupiter can't route to 172.16.1.16)
Removes them from tunnel ingress and restores their original A records.
"""
import json, os, subprocess, urllib.error, urllib.request, time
import paramiko, yaml
ZONE = '1beb9917c22b54be32e5215df2c227ce'
CF_TOKEN = os.environ.get('CF_API_TOKEN_FULL_DNS','')
if not CF_TOKEN: raise SystemExit('set CF_API_TOKEN_FULL_DNS')
REVERT = {
# hostname: original A content
'plex.azcomputerguru.com': '72.194.62.4',
'rustdesk.azcomputerguru.com': '72.194.62.10',
'secure.azcomputerguru.com': '72.194.62.2',
}
def cfapi(method, path, body=None):
req = urllib.request.Request(
f'https://api.cloudflare.com/client/v4{path}',
data=json.dumps(body).encode() if body else None,
method=method,
headers={'Authorization': f'Bearer {CF_TOKEN}','Content-Type':'application/json'},
)
try:
with urllib.request.urlopen(req, timeout=30) as r:
return json.loads(r.read())
except urllib.error.HTTPError as e:
try: return json.loads(e.read())
except: return {'success':False,'errors':[{'message':str(e)}]}
def _pwd(v): return yaml.safe_load(subprocess.run(['sops','-d',v],capture_output=True,text=True,timeout=30,check=True).stdout)['credentials']['password']
j = paramiko.SSHClient(); j.set_missing_host_key_policy(paramiko.AutoAddPolicy())
j.connect('172.16.3.20', username='root', password=_pwd('D:/vault/infrastructure/jupiter-unraid-primary.sops.yaml'),
timeout=30, look_for_keys=False, allow_agent=False)
def jrun(cmd, to=60):
_, o, _ = j.exec_command(cmd, timeout=to)
return o.read().decode()
try:
print('=== [1] rewrite config.yml without the 3 broken hosts ===')
APPDATA = '/mnt/cache/appdata/cloudflared'
# Read UUID
UUID = jrun(f'grep "^tunnel:" {APPDATA}/config.yml').split(':',1)[1].strip()
IX = 'https://172.16.3.10:443'
JNPM = 'https://172.16.3.20:18443'
KEEP = [
('azcomputerguru.com', IX),
('analytics.azcomputerguru.com', IX),
('community.azcomputerguru.com', IX),
('radio.azcomputerguru.com', IX),
('ix.azcomputerguru.com', IX),
('git.azcomputerguru.com', JNPM),
('plexrequest.azcomputerguru.com', JNPM),
('rmm.azcomputerguru.com', JNPM),
('rmm-api.azcomputerguru.com', JNPM),
('sync.azcomputerguru.com', JNPM),
]
config = f'tunnel: {UUID}\ncredentials-file: /home/nonroot/.cloudflared/{UUID}.json\ningress:\n'
for h, svc in KEEP:
config += f' - hostname: {h}\n service: {svc}\n originRequest:\n originServerName: {h}\n noTLSVerify: true\n'
config += ' - service: http_status:404\n'
jrun(f'cp {APPDATA}/config.yml {APPDATA}/config.yml.bak-$(date +%Y%m%d-%H%M%S)')
HD = "'EOF_CFG'"
jrun(f"cat > {APPDATA}/config.yml <<{HD}\n{config}\nEOF_CFG")
jrun(f'chown 65532:65532 {APPDATA}/config.yml')
print(f' 10 ingress hostnames kept (plex/rustdesk/secure removed)')
print('\n=== [2] revert DNS for 3 hosts ===')
for host, orig_ip in REVERT.items():
r = cfapi('GET', f'/zones/{ZONE}/dns_records?name={host}')
if not r.get('success') or not r['result']:
print(f' [{host}] no record, skipping'); continue
rec = r['result'][0]
print(f' [{host}] current: type={rec["type"]} content={rec["content"]}')
d = cfapi('DELETE', f'/zones/{ZONE}/dns_records/{rec["id"]}')
if not d.get('success'):
print(f' [FAIL delete] {d.get("errors")}'); continue
body = {'type':'A','name':host,'content':orig_ip,'proxied':True,'ttl':1}
cr = cfapi('POST', f'/zones/{ZONE}/dns_records', body)
if cr.get('success'):
print(f' [OK] restored A {orig_ip} proxied')
else:
print(f' [FAIL create] {cr.get("errors")}')
print('\n=== [3] restart cloudflared ===')
print(jrun('docker restart cloudflared').rstrip())
print('\n=== [4] wait for reconnect ===')
for i in range(20):
time.sleep(3)
logs = jrun('docker logs cloudflared 2>&1 | tail -30')
conns = logs.count('Registered tunnel connection')
if conns >= 4:
print(f' [try {i+1}] {conns} connections')
break
finally:
j.close()
print('\n=== [5] external probe all 10 tunneled hostnames ===')
import urllib.request
for h in [k[0] for k in [
('azcomputerguru.com',),('analytics.azcomputerguru.com',),('community.azcomputerguru.com',),
('radio.azcomputerguru.com',),('ix.azcomputerguru.com',),('git.azcomputerguru.com',),
('plexrequest.azcomputerguru.com',),('rmm.azcomputerguru.com',),('rmm-api.azcomputerguru.com',),
('sync.azcomputerguru.com',),
]]:
try:
req = urllib.request.Request(f'https://{h}/', method='HEAD',
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0'})
with urllib.request.urlopen(req, timeout=15) as r:
print(f' {h:42} HTTP {r.status} {r.headers.get("Server","-")}')
except urllib.error.HTTPError as e:
print(f' {h:42} HTTP {e.code}')
except Exception as e:
print(f' {h:42} ERR {str(e)[:40]}')