Files
claudetools/projects/dataforth-dos/datasheet-pipeline/run_full_drain.py
Mike Swanson dd5c5afd4b Session log + DFWDS Node port + Hoffman API uploader pipeline
Built the missing piece between the test datasheet pipeline and Dataforth's
new product API. End-to-end:

- Pulled DFWDS (Dataforth Web Datasheet System) VB6 source from
  AD1\Engineering\ENGR\ATE\Test Datasheets\DFWDS to local for analysis
- Decoded its filename validation: A-J prefix decodes (A=10..J=19), all-
  numeric WO# valid (no leading 0), anything else bad
- Ported the validation + move logic to Node (dfwds-process.js)
- Built bulk uploader (upload-delta.js) for Hoffman's Swagger API
  (POST /api/v1/TestReportDataFiles/bulk with OAuth client_credentials)

Sanitized 3 prior reference scripts (fetch-server-inventory, test-scenarios,
test-upload-two) to read CF_* env vars instead of hardcoded creds.

Live drain results:
- 897 files moved Test_Datasheets -> For_Web (all valid, no renames, no
  bad), DFWDS port summary in 1.1s
- Pushed entire For_Web (7,061 files) to Hoffman API in 49.7s @ 142/s:
  Created=803 Updated=114 Unchanged=6,144 Errors=0
- Server count: 489,579 -> 490,382 (+803 net new)

Also:
- Added clients/dataforth/.gitignore to exclude plaintext Oauth.txt note
- Added clients/instrumental-music-center/docs/2026-04-13-ticket-notes.md
  (ticket write-up of 2026-04-11/12/13 IMC1 RDS removal/SQL migration work)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 21:06:50 -07:00

122 lines
5.0 KiB
Python

"""Drain Test_Datasheets through DFWDS-Node, refresh inventory+delta, push to API.
Steps:
1. SFTP dfwds-process.js + fetch-server-inventory.js + compute-delta logic
(we'll use the existing Python ones for inventory; keep upload-delta.js as-is)
2. Run DFWDS dry-run on AD2 to see what 897 would do
3. Run DFWDS for real
4. Re-build for_web inventory on AD2 (PowerShell one-liner)
5. SFTP for_web inventory back, fetch server inventory locally, compute delta locally
6. SFTP delta to AD2, run upload-delta.js
"""
import base64, paramiko, subprocess, sys, time, yaml, os, threading
LIMIT = 0 # 0 = all 897
DRY = False
for i, a in enumerate(sys.argv[1:]):
if a == '--limit': LIMIT = int(sys.argv[i+2])
if a == '--dry-run': DRY = True
ad2_pwd = yaml.safe_load(subprocess.run(['sops','-d','D:/vault/clients/dataforth/ad2.sops.yaml'],
capture_output=True, text=True, timeout=30, check=True).stdout)['credentials']['password'].replace('\\','')
api = yaml.safe_load(subprocess.run(['sops','-d','D:/vault/clients/dataforth/api-oauth.sops.yaml'],
capture_output=True, text=True, timeout=30, check=True).stdout)
REMOTE_DIR = 'C:/Users/sysadmin/Documents/dataforth-uploader'
LOCAL = r'D:\claudetools\projects\dataforth-dos\datasheet-pipeline'
TEMP_DIR = r'C:\Users\guru\AppData\Local\Temp'
c = paramiko.SSHClient(); c.set_missing_host_key_policy(paramiko.AutoAddPolicy())
c.connect('192.168.0.6', username='sysadmin', password=ad2_pwd,
timeout=30, banner_timeout=45, look_for_keys=False, allow_agent=False)
def run(cmd, to=300):
enc = base64.b64encode(cmd.encode('utf-16-le')).decode()
_, o, _ = c.exec_command(f'powershell -NoProfile -EncodedCommand {enc}', timeout=to)
return o.read().decode('utf-8','replace')
def stream(cmd, to=7200):
enc = base64.b64encode(cmd.encode('utf-16-le')).decode()
stdin, stdout, stderr = c.exec_command(f'powershell -NoProfile -EncodedCommand {enc}', timeout=to)
def reader(s):
try:
for line in iter(lambda: s.readline(), ''):
if not line: break
print(line.rstrip(), flush=True)
except Exception: pass
t = threading.Thread(target=reader, args=(stdout,), daemon=True); t.start()
t2 = threading.Thread(target=reader, args=(stderr,), daemon=True); t2.start()
t0 = time.time()
while time.time() - t0 < to:
if stdout.channel.exit_status_ready(): break
time.sleep(1)
t.join(timeout=5); t2.join(timeout=5)
return stdout.channel.recv_exit_status() if stdout.channel.exit_status_ready() else -1
print('[1] sftp dfwds-process.js to AD2')
sftp = c.open_sftp()
sftp.put(os.path.join(LOCAL, 'dfwds-process.js'), f'{REMOTE_DIR}/dfwds-process.js')
sftp.close()
print(f'\n[2] dry-run DFWDS on Test_Datasheets (limit={LIMIT or "all"})')
flags = '--dry-run'
if LIMIT: flags += f' --limit {LIMIT}'
rc = stream(f'cd "{REMOTE_DIR}"; & node dfwds-process.js {flags} 2>&1', to=300)
print(f'[dry-run rc={rc}]')
if DRY:
print('\n--dry-run flag set on outer script -- stopping here')
c.close(); sys.exit(0)
print(f'\n[3] LIVE DFWDS run')
flags = ''
if LIMIT: flags = f'--limit {LIMIT}'
rc = stream(f'cd "{REMOTE_DIR}"; & node dfwds-process.js {flags} 2>&1', to=600)
print(f'[live rc={rc}]')
print('\n[4] regenerate for_web inventory on AD2')
ps_inv = (
r'$out = "C:\Users\sysadmin\Documents\dataforth-uploader\for_web_inventory.txt"; '
r'Get-ChildItem "C:\Shares\webshare\For_Web" -File -Filter *.TXT | '
r'ForEach-Object { "$($_.FullName)|$([System.IO.Path]::GetFileNameWithoutExtension($_.Name))|$($_.Length)|$($_.LastWriteTime.ToString("o"))" } | '
r'Set-Content -Path $out -Encoding ASCII; '
r'(Get-Content $out).Count'
)
out = run(ps_inv, to=120)
print(f' for_web entries: {out.strip()}')
# Pull inventory back to workstation
sftp = c.open_sftp()
sftp.get(f'{REMOTE_DIR}/for_web_inventory.txt', os.path.join(TEMP_DIR, 'for_web_inventory.txt'))
sftp.close()
print(f' pulled to {TEMP_DIR}\\for_web_inventory.txt')
print('\n[5] fetch fresh server inventory + compute delta locally')
rc = subprocess.run([sys.executable, '-u', os.path.join(LOCAL, 'fetch-server-inventory.py')],
timeout=600).returncode
print(f' fetch-server-inventory rc={rc}')
rc = subprocess.run([sys.executable, '-u', os.path.join(LOCAL, 'compute-delta.py')],
timeout=120).returncode
print(f' compute-delta rc={rc}')
# Push fresh delta to AD2
sftp = c.open_sftp()
sftp.put(os.path.join(TEMP_DIR, 'delta_to_upload.txt'), f'{REMOTE_DIR}/delta_to_upload.txt')
sftp.close()
print('\n[6] run upload-delta.js with fresh delta')
ps_upload = (
f'$env:CF_TOKEN_URL = "{api["endpoints"]["token-url"]}"; '
f'$env:CF_API_BASE = "{api["endpoints"]["api-base"]}"; '
f'$env:CF_CLIENT_ID = "{api["credentials"]["client-id"]}"; '
f'$env:CF_CLIENT_SECRET = "{api["credentials"]["client-secret"]}"; '
f'$env:CF_SCOPE = "{api["credentials"]["scope"]}"; '
f'cd "{REMOTE_DIR}"; '
f'& node upload-delta.js --batch 100 2>&1'
)
rc = stream(ps_upload, to=3600)
print(f'\n[upload rc={rc}]')
c.close()
print('\n[OK] full drain complete')