// Parsing-fidelity validation (READ-ONLY): every staged original .TXT vs the DB record. // Compares scale-invariant data: SN, model, date, and the 5 Error(%) accuracy values // (error% is dimensionless -> immune to mV scaling / current-output conversion, so a // mismatch means a real parsing/segmentation/identity fault, not a rendering transform). const fs = require('fs'); const path = require('path'); const db = require('./database/db'); const STAGE = 'C:/Shares/test/STAGE'; const ERR_TOL = 0.003; // half-unit of 3-decimal display + margin const REPORT = process.argv[2] || null; function walk(dir, out) { let items = []; try { items = fs.readdirSync(dir, { withFileTypes: true }); } catch { return out; } for (const it of items) { const p = path.join(dir, it.name); if (it.isDirectory()) walk(p, out); else if (/\.txt$/i.test(it.name)) out.push(p); } return out; } function parseTxt(txt) { const lines = txt.split(/\r?\n/); const get = re => { for (const l of lines) { const m = l.match(re); if (m) return m[1]; } return null; }; const sn = get(/^\s*SN:\s*(\S+)/); const model = get(/^\s*Model:\s*(\S+)/); const date = get(/^\s*Date:\s*(\d{2}-\d{2}-\d{4})/); // accuracy rows: lines ending in PASS/FAIL with >=4 numeric tokens, before FINAL TEST const errs = []; const stims = []; for (const l of lines) { if (/FINAL TEST/i.test(l)) break; if (!/\b(PASS|FAIL)\b/.test(l)) continue; const nums = (l.match(/[+-]?\d*\.\d+|[+-]?\d+/g) || []).map(Number); if (nums.length >= 4) { errs.push(nums[3]); stims.push(nums[0]); } // [0]=stim [3]=Error(%) if (errs.length === 5) break; } return { sn, model, date, errs, stims }; } // Decode hex-prefix encoded serial (A-prefix files store the ENCODED SN inside): // leading [A-Z] -> (charCode-55) numeric prefix. H9553-13-style files already store // the decoded SN, which is numeric, so they don't match and pass through unchanged. function decodeSn(sn) { if (/^[A-Za-z]\d/.test(sn)) { const n = sn.toUpperCase().charCodeAt(0) - 55; return String(n) + sn.slice(1); } return sn; } const normModel = m => (m || '').toUpperCase().replace(/^SCM/, ''); function parseRawAcc(raw) { if (!raw) return { errs: [], stims: [] }; const lines = raw.split('\n').map(s => s.trim()).filter(Boolean); const errs = [], stims = []; for (let i = 1; i < lines.length && errs.length < 5; i++) { const f = lines[i].split(','); if (f.length >= 5 && /"(PASS|FAIL)"/.test(lines[i])) { const e = parseFloat(f[3]), s = parseFloat(f[0]); if (!isNaN(e)) { errs.push(e); stims.push(s); } } } return { errs, stims }; } // scale-aware + relative stim match (mV display = V*1000; analog inputs vary run-to-run). // Matching the 5-point setpoint pattern proves same unit/test -> correct segmentation. function stimMatch1(t, r) { return [r, r * 1000, r / 1000].some(c => Math.abs(t - c) <= Math.max(0.3, 0.005 * Math.abs(c))); } function stimsMatch(txt, raw) { return txt.length === 5 && raw.length === 5 && txt.every((t, i) => stimMatch1(t, raw[i])); } (async () => { console.log('Scanning staged .TXT files...'); const files = walk(STAGE, []); console.log('Found ' + files.length + ' staged .TXT files'); // Parse all files, collect SNs const recs = []; let noSn = 0, noAcc = 0; for (const f of files) { let t; try { t = fs.readFileSync(f, 'utf8'); } catch { continue; } const p = parseTxt(t); if (!p.sn) { noSn++; continue; } if (p.errs.length < 5) noAcc++; p.key = decodeSn(p.sn); // DB lookup key (decoded) recs.push({ file: f, ...p }); } // Bulk-load DB rows for these SNs (decoded keys) const sns = [...new Set(recs.map(r => r.key))]; const dbMap = new Map(); for (let i = 0; i < sns.length; i += 1000) { const chunk = sns.slice(i, i + 1000); const rows = await db.query( 'SELECT serial_number, model_number, test_date, raw_data FROM test_records WHERE serial_number = ANY($1)', [chunk]); for (const r of rows) dbMap.set(r.serial_number, r); } const out = { missing: [], collision: [], model: [], dbOlder: [], err: [], errRowCount: [], retest: 0, retestSameDay: 0, vasFmt: 0, ok: 0 }; for (const r of recs) { const d = dbMap.get(r.key); if (!d) { out.missing.push(r.sn + (r.key !== r.sn ? ' (dec ' + r.key + ')' : '')); continue; } const dbDate = d.test_date && d.test_date.toISOString ? d.test_date.toISOString().slice(0,10) : String(d.test_date); let txtDate = null; if (r.date) { const [mm,dd,yy] = r.date.split('-'); txtDate = `${yy}-${mm}-${dd}`; } // Collision: same SN but a genuinely different product family in DB (generic serials like 1-1 reused) if (r.model && d.model_number && normModel(r.model) !== normModel(d.model_number)) { const famTxt = normModel(r.model).replace(/[-0-9].*$/, ''); const famDb = normModel(d.model_number).replace(/[-0-9].*$/, ''); if (famTxt !== famDb) { out.collision.push(`${r.sn}: txt=${r.model} db=${d.model_number}`); continue; } out.model.push(`${r.sn}: txt=${r.model} db=${d.model_number}`); continue; // same family, diff variant } // Retest: DB date newer than the staged file -> ON-CONFLICT updated DB to a later test. Expected. if (txtDate && dbDate > txtDate) { out.retest++; continue; } if (txtDate && dbDate < txtDate) { out.dbOlder.push(`${r.sn}: txt=${r.date} db=${dbDate}`); continue; } // Same test run -> error% must match const acc = parseRawAcc(d.raw_data); const de = acc.errs; if (r.errs.length === 5 && de.length === 5) { const maxd = Math.max(...r.errs.map((e,i) => Math.abs(e - de[i]))); if (maxd > ERR_TOL) { // Same SN+model+date but error% differs. If the STIM SETPOINTS match, it's the // same unit/test points -> a same-day retest (DB kept a different run). If stim // does NOT match, the wrong record's data is in raw_data -> genuine parse fault. if (stimsMatch(r.stims, acc.stims)) { out.retestSameDay++; continue; } out.err.push(`${r.sn} (${d.model_number}): STIM txt=[${r.stims.join(',')}] raw=[${acc.stims.map(x=>x.toFixed(4)).join(',')}] | err txt=[${r.errs.join(',')}] db=[${de.map(x=>x.toFixed(4)).join(',')}]`); continue; } } else if (r.errs.length === 5 && de.length === 0) { out.vasFmt++; continue; // VAS/single-point format, no 5-row accuracy block in raw_data } else if (r.errs.length === 5 && de.length !== 5) { out.errRowCount.push(`${r.sn} (${d.model_number}): txt 5 rows, raw_data ${de.length}`); continue; } out.ok++; } const lines = []; const L = s => { lines.push(s); console.log(s); }; L('========== PARSING FIDELITY REPORT =========='); L('Staged .TXT files scanned : ' + files.length); L(' - no SN line (non-standard fmt): ' + noSn); L(' - SN found / compared : ' + recs.length); L(' - .TXT w/o 5 accuracy rows : ' + noAcc); L('Unique SNs looked up in DB : ' + sns.length); L('SNs present in DB : ' + (sns.length - new Set(out.missing).size)); L(''); L('EXPLAINED (not parsing faults):'); L(' Consistent (SN+model+date+5 error% match) : ' + out.ok); L(' Retest, DB newer date than .TXT : ' + out.retest); L(' Retest same-day (stim matches, run differs): ' + out.retestSameDay); L(' VAS/single-point fmt (no 5-row block) : ' + out.vasFmt); L(' Serial collision (generic SN, diff family): ' + out.collision.length); L(''); L('NEEDS REVIEW (potential genuine issues):'); L(' Missing from DB (after hex-decode) : ' + out.missing.length); L(' Model variant mismatch (same family) : ' + out.model.length); L(' DB OLDER than .TXT (stale DB?) : ' + out.dbOlder.length); L(' GENUINE error% fault (stim ALSO differs) : ' + out.err.length); L(' Accuracy-row-count diff : ' + out.errRowCount.length); const sample = (label, arr) => { if (arr.length) { L(''); L(label + ' (first 20):'); arr.slice(0,20).forEach(x => L(' ' + x)); } }; sample('COLLISION (informational)', out.collision); sample('MODEL VARIANT MISMATCH', out.model); sample('DB OLDER THAN .TXT', out.dbOlder); sample('GENUINE FAULT (stim+error differ)', out.err); sample('ROW-COUNT DIFF', out.errRowCount); if (out.missing.length) { L(''); L('MISSING-FROM-DB (first 30): ' + out.missing.slice(0,30).join(', ')); } if (REPORT) { fs.writeFileSync(REPORT, lines.join('\n') + '\n'); console.log('\n[written] ' + REPORT); } await db.close(); })().catch(e => { console.error(e); process.exit(1); });