Files
claudetools/projects/dataforth-dos/tools/derive-dsca-slotmaps.js
Mike Swanson 9c04c23ab0 dataforth(datasheet): wire DSCA33/45 Hoffman-mined templates (gated; accuracy-data WIP)
Per the 5070 handoff (DSCA33-45-HOFFMAN-RECOVERY): the lost DSCA33/45 specs are
recoverable from Hoffman, not John. Wired the mined dsca33-45-templates.json (56
models) into the renderer:

- datasheet-exact.js: load DSCA3345_TEMPLATES; for family DSCA, the Hoffman-mined
  template takes PRECEDENCE over the stale staged-extraction entry (which shadowed 25
  models with accOut "?"/no accHeader). Emit the verbatim 2-line accHeader for these
  families (Vin (mVAC)/Iin (AAC)/Frequency (Hz), Output (VDC)/(mADC)). Per-model
  `validated` GATE: a DSCA33/45 model renders only after byte-matching its Hoffman
  original; until then it returns null (skipped) so an unverified render can never
  overwrite a pristine live original. DSCA_VALIDATE_MODE env opens the gate for the
  validation harness only. Exposed rendersWithoutSpecs().
- render-datasheet.js: allow a null-specs render for DSCA33/45 (their spec files were
  lost; template-driven) instead of bailing on missing specs.
- derive-dsca-slotmaps.js: DSCA_TPL env to target the 3345 templates; derived 43 slot
  maps into them (22 models need none, 8 DSCA33 still below threshold).
- validate-dsca3345.js (new): renders each model's _srcSerial, fetches the live
  Hoffman original (GET TestReportDataFiles/{serial}, deployed uploader token — no
  vault needed), content-normalized compare; --apply marks validated.

STATUS: gate is CLOSED — 0 models validated, all DSCA33/45 still render null, nothing
published, no risk. Final-Test block + accuracy headers now byte-match the Hoffman
originals for all 56 models; the remaining blocker is accuracy-DATA numeric quirks that
must match to pass the gate:
  - DSCA33 calc column stored in A but displayed in mADC (x1000); measured stored in
    mA (not scaled) — an original-software unit quirk.
  - sign conventions differ per layout (DSCA33 stim/calc/meas unsigned, error signed;
    DSCA45 stim unsigned, calc/meas/error signed).
  - DSCA45 frequency-input stim formatting.
These need per-layout reverse-engineering against the originals (the validation harness
is the oracle). 8 DSCA33 models (DSCA33-02/03/03A/04/04A/05/05A/1642) also lack a slot
map (below threshold). DSCA33-1948 + DSCA45-1746 (24 units) have no Hoffman original.

Cleanups: deleted superseded memory project_dsca33_45_spec_gap; struck the obsolete
"ask John" TODO 2 from the handoff note.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-18 13:32:37 -07:00

174 lines
9.2 KiB
JavaScript

// Fix 2 — derive per-model DSCA slot maps for the ambiguous layouts.
//
// Problem: for some DSCA subtypes the raw_data STATUS groups carry MORE (or fewer)
// value-bearing entries than the template's spec-bearing rows — the test program
// measures slots (e.g. an extra 5mA load pair) that the printed sheet omits. A
// simple in-order zip then misaligns values onto rows, so those models are skipped
// by the renderer's count-guard.
//
// Fix: for each such model, pair a staged original with its DB raw_data and greedily
// match each printed measured value to a STATUS entry (same fround formatting the
// renderer uses), recording the ABSOLUTE statusEntries index per spec-bearing row.
// That ordered subsequence is the slotMap; the renderer reads statusEntries[slotMap[s]]
// for the s-th spec-bearing row. Stored in dsca-templates.json as `slotMap`.
//
// Data-vintage safe: a staged unit that is a retest (printed values != DB record)
// won't match cleanly; we try multiple staged units per model and accept the first
// that matches ALL spec rows. Read-only except the templates JSON it rewrites.
const fs = require('fs'), path = require('path');
// This tool is specific to the deployed pipeline (it reads the staged originals and
// rewrites the deployed templates JSON), so it requires the deployed modules by
// absolute path and is runnable from anywhere.
const DEPLOY = 'C:/Shares/testdatadb';
const db = require(DEPLOY + '/database/db');
const dse = require(DEPLOY + '/templates/datasheet-exact');
const STAGE = 'C:/Shares/test/STAGE';
// Default to the staged-original templates; point at the Hoffman-mined DSCA33/45 set
// via DSCA_TPL when deriving slotMaps for those families.
const OUT = process.env.DSCA_TPL || (DEPLOY + '/dsca-templates.json');
function walk(d, out) { let it = []; try { it = fs.readdirSync(d, { withFileTypes: true }); } catch { return out; } for (const e of it) { const p = path.join(d, e.name); if (e.isDirectory()) walk(p, out); else if (/\.txt$/i.test(e.name)) out.push(p); } return out; }
function colSpans(sep) { const cols = []; let m; const re = /=+/g; while ((m = re.exec(sep))) cols.push([m.index, m.index + m[0].length]); return cols; }
// fround + toFixed(decimal-code), value start at index 4 — must match formatMeasuredExact.
function fmt(statusStr) {
if (!statusStr || statusStr.length <= 4) return null;
const decimalDigit = statusStr[statusStr.length - 1];
const valueStr = statusStr.substring(4, statusStr.length - 1).trim();
const parsed = parseFloat(valueStr);
if (isNaN(parsed)) return valueStr;
const v = Math.fround(parsed);
const d = parseInt(decimalDigit, 10);
return isNaN(d) ? v.toFixed(1) : v.toFixed(d);
}
// Parse the staged Final-Test section -> ordered list of spec-bearing printed values.
function stagedPrintedValues(t) {
const L = t.replace(/\r\n/g, '\n').split('\n');
const fi = L.findIndex(l => /FINAL TEST RESULTS/.test(l)); if (fi < 0) return null;
let hi = -1; for (let i = fi + 1; i < L.length; i++) { if (/Parameter\s+Measured/.test(L[i])) { hi = i; break; } } if (hi < 0) return null;
const sep = L[hi + 1] || ''; const cols = colSpans(sep); if (cols.length < 4) return null;
const [pc, mc, sc, stc] = cols;
const vals = [];
for (let i = hi + 2; i < L.length; i++) {
const l = L[i];
if (/Check List|^\s*_{5,}/.test(l)) break;
if (!l.trim()) continue;
if (/^\s*Standard output load/i.test(l)) continue;
const measured = (l.slice(mc[0], sc[0]) || '').trim();
// spec column ONLY (cols 48..69) — not the trailing Status column (PASS),
// so empty-spec rows (240VAC Withstand / Hi-Pot) are correctly skipped.
const spec = (l.slice(sc[0], stc[0]) || '').trim();
if (!spec) continue;
const v = measured.split(/\s+/)[0]; // strip trailing unit
if (v === '') return null; // a spec row with no printed value -> can't use this unit
vals.push(v);
}
return vals;
}
// Greedy in-order match printed values -> absolute statusEntries indices.
function greedyMap(printed, statusEntries) {
const map = []; let j = 0;
for (const pv of printed) {
let found = -1;
for (let k = j; k < statusEntries.length; k++) {
if (fmt(statusEntries[k]) === pv) { found = k; break; }
}
if (found < 0) return null;
map.push(found); j = found + 1;
}
return map;
}
// Does this slotMap reproduce a unit's printed values exactly?
function mapMatches(map, printed, statusEntries) {
if (map.length !== printed.length) return false;
for (let s = 0; s < map.length; s++) {
if (fmt(statusEntries[map[s]]) !== printed[s]) return false;
}
return true;
}
(async () => {
const onlyModels = process.argv.slice(2).filter(a => !a.startsWith('--'));
const tpl = JSON.parse(fs.readFileSync(OUT, 'utf8'));
// index staged DSCA files by model
const files = walk(STAGE, []);
const byModel = {};
for (const f of files) {
let t; try { t = fs.readFileSync(f, 'utf8'); } catch { continue; }
const model = (t.match(/^\s*Model:\s*(\S+)/m) || [])[1] || '';
if (!/^DSCA/i.test(model)) continue;
const sn = (t.match(/^\s*SN:\s*(\S+)/m) || [])[1] || '';
if (!sn) continue;
(byModel[model.trim()] = byModel[model.trim()] || []).push({ f, sn: sn.trim(), text: t });
}
let derived = 0, failed = [];
const UNITS_PER_MODEL = 8, MAX_SAMPLES = 48; // bounds DB lookups per signature group
// Seeds = the ambiguous models to solve (args), or all models if none given.
const seeds = new Set(onlyModels.length ? onlyModels.filter(m => tpl[m]) : Object.keys(tpl));
// Group ALL models by identical row-name signature. Same printed layout => same
// canonical-slot mapping, so one slotMap serves the whole group; pooling units
// across the group lets siblings disambiguate duplicate values (e.g. a unit where
// 5mA != 50mA linearity forces the correct slot). Only process groups that contain
// a seed, so a targeted run touches only the relevant families.
const sigOf = (m) => tpl[m].rows.map(r => r.name).join('|');
const groups = {};
for (const model of Object.keys(tpl)) { (groups[sigOf(model)] = groups[sigOf(model)] || []).push(model); }
for (const models of Object.values(groups)) {
if (![...models].some(m => seeds.has(m))) continue;
const specRowCount = tpl[models[0]].rows.filter(r => (r.spec || '').trim()).length;
const samples = [];
for (const model of models) {
if (samples.length >= MAX_SAMPLES) break;
const units = (byModel[model] || []).slice(0, UNITS_PER_MODEL);
for (const u of units) {
if (samples.length >= MAX_SAMPLES) break;
let row = await db.queryOne('SELECT raw_data FROM test_records WHERE serial_number=$1 AND model_number=$2 LIMIT 1', [u.sn, model]);
if (!row) row = await db.queryOne('SELECT raw_data FROM test_records WHERE raw_serial_number=$1 AND model_number=$2 LIMIT 1', [u.sn, model]);
if (!row || !row.raw_data) continue;
const printed = stagedPrintedValues(u.text);
if (!printed || printed.length !== specRowCount) continue;
const p = dse.parseRawData(row.raw_data, 'DSCA');
if (!p) continue;
samples.push({ printed, status: p.statusEntries });
}
}
if (!samples.length) continue;
// Candidate slotMaps = greedy map from each sample; the TRUE map reproduces the
// most units (a duplicate-confused map fails where the duplicated slots differ;
// retest-vintage units fail every map and are ignored).
const cands = new Map();
for (const s of samples) { const m = greedyMap(s.printed, s.status); if (m) cands.set(m.join(','), m); }
let best = null, bestScore = -1;
for (const m of cands.values()) {
let score = 0;
for (const s of samples) if (mapMatches(m, s.printed, s.status)) score++;
if (score > bestScore) { bestScore = score; best = m; }
}
const ratio = bestScore / samples.length;
const accept = best && (samples.length === 1 ? bestScore === 1 : (ratio >= 0.6 && bestScore >= 2));
if (accept) {
// Apply to every model in the group. The renderer only consults slotMap when
// the sequential value-zip fails (value count != spec-row count), so clean
// models keep their current path and only ambiguous ones use the map.
for (const model of models) tpl[model].slotMap = best;
derived += models.length;
console.log(' [' + models.length + '] ' + models[0].padEnd(13) + ' slotMap=[' + best.join(',') + '] matched ' + bestScore + '/' + samples.length + ' units' + (models.length > 1 ? ' (+' + (models.length - 1) + ' siblings)' : ''));
} else if (best) {
failed.push(models.join('/') + '(best ' + bestScore + '/' + samples.length + ')');
}
}
fs.writeFileSync(OUT, JSON.stringify(tpl));
console.log('\nderived slotMaps: ' + derived);
if (failed.length) console.log('no clean match (left as-is): ' + failed.join(', '));
await db.close();
})().catch(e => { console.error('ERR', e.message, e.stack); process.exit(1); });