"""Pull a few just-backfilled files for byte-level verification.""" import base64, os, subprocess, yaml, paramiko LOCAL_OUT = r'D:\claudetools\projects\dataforth-dos\datasheet-pipeline\scmvas-hvas-research\samples\backfill-verify' os.makedirs(LOCAL_OUT, exist_ok=True) NODE_QUERY = r''' const db = require('./database/db'); (async () => { const rows = await db.query( "SELECT serial_number, model_number, log_type, source_file FROM test_records " + "WHERE forweb_exported_at IS NOT NULL " + "AND ((model_number LIKE 'SCMVAS%' OR model_number LIKE 'SCMHVAS%') OR log_type='VASLOG_ENG') " + "ORDER BY forweb_exported_at DESC LIMIT 5" ); console.log(JSON.stringify(rows, null, 2)); await db.close(); })(); ''' def pwd(): r = subprocess.run(['sops','-d','D:/vault/clients/dataforth/ad2.sops.yaml'], capture_output=True, text=True, timeout=30, check=True) return yaml.safe_load(r.stdout)['credentials']['password'].replace('\\','') def ps(c, cmd, to=120): enc = base64.b64encode(cmd.encode('utf-16-le')).decode() stdin, stdout, stderr = c.exec_command(f'powershell -NoProfile -EncodedCommand {enc}', timeout=to) return stdout.read().decode('utf-8','replace'), stderr.read().decode('utf-8','replace'), stdout.channel.recv_exit_status() c = paramiko.SSHClient() c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) c.connect('192.168.0.6', username='sysadmin', password=pwd(), timeout=30, banner_timeout=45, look_for_keys=False, allow_agent=False) try: sftp = c.open_sftp() remote = 'C:/Shares/testdatadb/_q.js' with sftp.open(remote,'w') as fh: fh.write(NODE_QUERY) sftp.close() out, err, rc = ps(c, r'cd C:\Shares\testdatadb; & node ./_q.js') import json # Extract JSON from output start = out.find('[') rows = json.loads(out[start:out.rfind(']')+1]) print(f'[INFO] {len(rows)} recently-exported records') sftp = c.open_sftp() for r in rows: sn = r['serial_number'] model = r['model_number'] ltype = r['log_type'] src_file = r.get('source_file', '') # Pull the exported file from For_Web export_remote = f'//ad2/webshare/For_Web/{sn}.TXT' # Can't SFTP via UNC directly; PowerShell read back # Use a fresh exec_command to get the content out2, err2, rc2 = ps(c, fr'Get-Content -Raw -LiteralPath "\\ad2\webshare\For_Web\{sn}.TXT" -ErrorAction SilentlyContinue') local_exp = os.path.join(LOCAL_OUT, f'{sn}-exported.TXT') with open(local_exp, 'w', encoding='utf-8', newline='') as fh: fh.write(out2) print(f'[INFO] {sn} ({model} / {ltype}) exported size={len(out2)} bytes') # If it's a passthrough, also pull the source file for diff if ltype == 'VASLOG_ENG' and src_file: src_posix = src_file.replace('\\','/') try: local_src = os.path.join(LOCAL_OUT, f'{sn}-source.txt') sftp.get(src_posix, local_src) # Compare byte-for-byte with open(local_src, 'rb') as f1, open(local_exp, 'rb') as f2: # The exported came through PowerShell Get-Content which may have # mangled line endings; load source byte-for-byte for reference pass print(f' [INFO] source pulled: {local_src}') except Exception as e: print(f' [WARN] source pull fail: {e}') sftp.close() sftp = c.open_sftp() try: sftp.remove(remote) except Exception: pass sftp.close() finally: c.close() # Byte-level compare for the first VASLOG_ENG print('\n=== Byte-level compare ===') for fn in os.listdir(LOCAL_OUT): if fn.endswith('-source.txt'): sn = fn.replace('-source.txt','') src = os.path.join(LOCAL_OUT, fn) exp = os.path.join(LOCAL_OUT, f'{sn}-exported.TXT') if os.path.exists(exp): with open(src, 'rb') as f1, open(exp, 'rb') as f2: s = f1.read(); e = f2.read() print(f'{sn}: src={len(s)}B exp={len(e)}B identical={s == e}')