"""Upload two real datasheets, fetch them back, diff byte-for-byte.""" import json import sys import urllib.request import urllib.parse import hashlib import os, sys TOKEN_URL = os.environ.get("CF_TOKEN_URL", "https://login.dataforth.com/connect/token") API_BASE = os.environ.get("CF_API_BASE", "https://www.dataforth.com") + "/api/v1" CLIENT_ID = os.environ.get("CF_CLIENT_ID", "") CLIENT_SECRET = os.environ.get("CF_CLIENT_SECRET", "") SCOPE = os.environ.get("CF_SCOPE", "dataforth.web") if not CLIENT_ID or not CLIENT_SECRET: sys.exit("set CF_CLIENT_ID + CF_CLIENT_SECRET (vault: clients/dataforth/api-oauth.sops.yaml)") SAMPLES = [ ("179377-5", r"D:\claudetools\projects\dataforth-dos\datasheet-pipeline\scmvas-hvas-research\samples\backfill-verify\179377-5-source.txt"), ("179377-6", r"D:\claudetools\projects\dataforth-dos\datasheet-pipeline\scmvas-hvas-research\samples\backfill-verify\179377-6-source.txt"), ] def get_token(): data = urllib.parse.urlencode({ "grant_type": "client_credentials", "client_id": CLIENT_ID, "client_secret": CLIENT_SECRET, "scope": SCOPE, }).encode() req = urllib.request.Request(TOKEN_URL, data=data) with urllib.request.urlopen(req) as r: return json.loads(r.read())["access_token"] def api(method, path, token, body=None): url = API_BASE + path headers = {"Authorization": f"Bearer {token}"} if body is not None: body = json.dumps(body).encode() headers["Content-Type"] = "application/json" req = urllib.request.Request(url, data=body, headers=headers, method=method) try: with urllib.request.urlopen(req) as r: return r.status, r.read().decode() except urllib.error.HTTPError as e: return e.code, e.read().decode() def main(): token = get_token() print(f"[OK] Got access token (len={len(token)})\n") for sn, path in SAMPLES: with open(path, "rb") as f: content_bytes = f.read() content = content_bytes.decode("utf-8", errors="replace") local_hash = hashlib.sha256(content.encode()).hexdigest()[:16] print(f"=== {sn} ===") print(f" Local file: {path}") print(f" Local bytes: {len(content_bytes)} sha256[16]: {local_hash}") status, body = api("POST", "/TestReportDataFiles", token, {"SerialNumber": sn, "Content": content}) print(f" POST -> HTTP {status}") print(f" Server response: {body}") status, body = api("GET", f"/TestReportDataFiles/{sn}", token) print(f" GET -> HTTP {status}") if status != 200: print(f" !! Fetch failed: {body}") continue obj = json.loads(body) fetched = obj.get("Content", "") fetched_hash = hashlib.sha256(fetched.encode()).hexdigest()[:16] print(f" Server bytes: {len(fetched.encode('utf-8'))} sha256[16]: {fetched_hash}") match = "MATCH" if content == fetched else "DIFF" print(f" Content match: {match}") print(f" CreatedAtUtc: {obj.get('CreatedAtUtc')}") print(f" UpdatedAtUtc: {obj.get('UpdatedAtUtc')}") if content != fetched: # Show first diff for i, (a, b) in enumerate(zip(content, fetched)): if a != b: print(f" First diff at char {i}: local={a!r} server={b!r}") print(f" context: ...{content[max(0,i-20):i+20]!r}") break else: print(f" Length diff: local={len(content)} server={len(fetched)}") print() if __name__ == "__main__": main()