Files
Mike Swanson dd5c5afd4b Session log + DFWDS Node port + Hoffman API uploader pipeline
Built the missing piece between the test datasheet pipeline and Dataforth's
new product API. End-to-end:

- Pulled DFWDS (Dataforth Web Datasheet System) VB6 source from
  AD1\Engineering\ENGR\ATE\Test Datasheets\DFWDS to local for analysis
- Decoded its filename validation: A-J prefix decodes (A=10..J=19), all-
  numeric WO# valid (no leading 0), anything else bad
- Ported the validation + move logic to Node (dfwds-process.js)
- Built bulk uploader (upload-delta.js) for Hoffman's Swagger API
  (POST /api/v1/TestReportDataFiles/bulk with OAuth client_credentials)

Sanitized 3 prior reference scripts (fetch-server-inventory, test-scenarios,
test-upload-two) to read CF_* env vars instead of hardcoded creds.

Live drain results:
- 897 files moved Test_Datasheets -> For_Web (all valid, no renames, no
  bad), DFWDS port summary in 1.1s
- Pushed entire For_Web (7,061 files) to Hoffman API in 49.7s @ 142/s:
  Created=803 Updated=114 Unchanged=6,144 Errors=0
- Server count: 489,579 -> 490,382 (+803 net new)

Also:
- Added clients/dataforth/.gitignore to exclude plaintext Oauth.txt note
- Added clients/instrumental-music-center/docs/2026-04-13-ticket-notes.md
  (ticket write-up of 2026-04-11/12/13 IMC1 RDS removal/SQL migration work)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 21:06:50 -07:00

95 lines
3.6 KiB
Python

"""Upload two real datasheets, fetch them back, diff byte-for-byte."""
import json
import sys
import urllib.request
import urllib.parse
import hashlib
import os, sys
TOKEN_URL = os.environ.get("CF_TOKEN_URL", "https://login.dataforth.com/connect/token")
API_BASE = os.environ.get("CF_API_BASE", "https://www.dataforth.com") + "/api/v1"
CLIENT_ID = os.environ.get("CF_CLIENT_ID", "")
CLIENT_SECRET = os.environ.get("CF_CLIENT_SECRET", "")
SCOPE = os.environ.get("CF_SCOPE", "dataforth.web")
if not CLIENT_ID or not CLIENT_SECRET:
sys.exit("set CF_CLIENT_ID + CF_CLIENT_SECRET (vault: clients/dataforth/api-oauth.sops.yaml)")
SAMPLES = [
("179377-5", r"D:\claudetools\projects\dataforth-dos\datasheet-pipeline\scmvas-hvas-research\samples\backfill-verify\179377-5-source.txt"),
("179377-6", r"D:\claudetools\projects\dataforth-dos\datasheet-pipeline\scmvas-hvas-research\samples\backfill-verify\179377-6-source.txt"),
]
def get_token():
data = urllib.parse.urlencode({
"grant_type": "client_credentials",
"client_id": CLIENT_ID,
"client_secret": CLIENT_SECRET,
"scope": SCOPE,
}).encode()
req = urllib.request.Request(TOKEN_URL, data=data)
with urllib.request.urlopen(req) as r:
return json.loads(r.read())["access_token"]
def api(method, path, token, body=None):
url = API_BASE + path
headers = {"Authorization": f"Bearer {token}"}
if body is not None:
body = json.dumps(body).encode()
headers["Content-Type"] = "application/json"
req = urllib.request.Request(url, data=body, headers=headers, method=method)
try:
with urllib.request.urlopen(req) as r:
return r.status, r.read().decode()
except urllib.error.HTTPError as e:
return e.code, e.read().decode()
def main():
token = get_token()
print(f"[OK] Got access token (len={len(token)})\n")
for sn, path in SAMPLES:
with open(path, "rb") as f:
content_bytes = f.read()
content = content_bytes.decode("utf-8", errors="replace")
local_hash = hashlib.sha256(content.encode()).hexdigest()[:16]
print(f"=== {sn} ===")
print(f" Local file: {path}")
print(f" Local bytes: {len(content_bytes)} sha256[16]: {local_hash}")
status, body = api("POST", "/TestReportDataFiles", token,
{"SerialNumber": sn, "Content": content})
print(f" POST -> HTTP {status}")
print(f" Server response: {body}")
status, body = api("GET", f"/TestReportDataFiles/{sn}", token)
print(f" GET -> HTTP {status}")
if status != 200:
print(f" !! Fetch failed: {body}")
continue
obj = json.loads(body)
fetched = obj.get("Content", "")
fetched_hash = hashlib.sha256(fetched.encode()).hexdigest()[:16]
print(f" Server bytes: {len(fetched.encode('utf-8'))} sha256[16]: {fetched_hash}")
match = "MATCH" if content == fetched else "DIFF"
print(f" Content match: {match}")
print(f" CreatedAtUtc: {obj.get('CreatedAtUtc')}")
print(f" UpdatedAtUtc: {obj.get('UpdatedAtUtc')}")
if content != fetched:
# Show first diff
for i, (a, b) in enumerate(zip(content, fetched)):
if a != b:
print(f" First diff at char {i}: local={a!r} server={b!r}")
print(f" context: ...{content[max(0,i-20):i+20]!r}")
break
else:
print(f" Length diff: local={len(content)} server={len(fetched)}")
print()
if __name__ == "__main__":
main()