claudetools/projects/dataforth-dos/tools/derive-dsca-slotmaps.js

// Fix 2 — derive per-model DSCA slot maps for the ambiguous layouts.
//
// Problem: for some DSCA subtypes the raw_data STATUS groups carry MORE (or fewer)
// value-bearing entries than the template's spec-bearing rows — the test program
// measures slots (e.g. an extra 5mA load pair) that the printed sheet omits. A
// simple in-order zip then misaligns values onto rows, so those models are skipped
// by the renderer's count-guard.
//
// Fix: for each such model, pair a staged original with its DB raw_data and greedily
// match each printed measured value to a STATUS entry (same fround formatting the
// renderer uses), recording the ABSOLUTE statusEntries index per spec-bearing row.
// That ordered subsequence is the slotMap; the renderer reads statusEntries[slotMap[s]]
// for the s-th spec-bearing row. Stored in dsca-templates.json as `slotMap`.
//
// Data-vintage safe: a staged unit that is a retest (printed values != DB record)
// won't match cleanly; we try multiple staged units per model and accept the first
// that matches ALL spec rows. Read-only except the templates JSON it rewrites.
const fs = require('fs'), path = require('path');
// This tool is specific to the deployed pipeline (it reads the staged originals and
// rewrites the deployed templates JSON), so it requires the deployed modules by
// absolute path and is runnable from anywhere.
const DEPLOY = 'C:/Shares/testdatadb';
const db = require(DEPLOY + '/database/db');
const dse = require(DEPLOY + '/templates/datasheet-exact');

const STAGE = 'C:/Shares/test/STAGE';
// Default to the staged-original templates; point at the Hoffman-mined DSCA33/45 set
// via DSCA_TPL when deriving slotMaps for those families.
const OUT = process.env.DSCA_TPL || (DEPLOY + '/dsca-templates.json');

function walk(d, out) { let it = []; try { it = fs.readdirSync(d, { withFileTypes: true }); } catch { return out; } for (const e of it) { const p = path.join(d, e.name); if (e.isDirectory()) walk(p, out); else if (/\.txt$/i.test(e.name)) out.push(p); } return out; }
function colSpans(sep) { const cols = []; let m; const re = /=+/g; while ((m = re.exec(sep))) cols.push([m.index, m.index + m[0].length]); return cols; }

// fround + toFixed(decimal-code), value start at index 4 — must match formatMeasuredExact.
function fmt(statusStr) {
    if (!statusStr || statusStr.length <= 4) return null;
    const decimalDigit = statusStr[statusStr.length - 1];
    const valueStr = statusStr.substring(4, statusStr.length - 1).trim();
    const parsed = parseFloat(valueStr);
    if (isNaN(parsed)) return valueStr;
    const v = Math.fround(parsed);
    const d = parseInt(decimalDigit, 10);
    return isNaN(d) ? v.toFixed(1) : v.toFixed(d);
}

// Parse the staged Final-Test section -> ordered list of spec-bearing printed values.
function stagedPrintedValues(t) {
    const L = t.replace(/\r\n/g, '\n').split('\n');
    const fi = L.findIndex(l => /FINAL TEST RESULTS/.test(l)); if (fi < 0) return null;
    let hi = -1; for (let i = fi + 1; i < L.length; i++) { if (/Parameter\s+Measured/.test(L[i])) { hi = i; break; } } if (hi < 0) return null;
    const sep = L[hi + 1] || ''; const cols = colSpans(sep); if (cols.length < 4) return null;
    const [pc, mc, sc, stc] = cols;
    const vals = [];
    for (let i = hi + 2; i < L.length; i++) {
        const l = L[i];
        if (/Check List|^\s*_{5,}/.test(l)) break;
        if (!l.trim()) continue;
        if (/^\s*Standard output load/i.test(l)) continue;
        const measured = (l.slice(mc[0], sc[0]) || '').trim();
        // spec column ONLY (cols 48..69) — not the trailing Status column (PASS),
        // so empty-spec rows (240VAC Withstand / Hi-Pot) are correctly skipped.
        const spec = (l.slice(sc[0], stc[0]) || '').trim();
        if (!spec) continue;
        const v = measured.split(/\s+/)[0]; // strip trailing unit
        if (v === '') return null;          // a spec row with no printed value -> can't use this unit
        vals.push(v);
    }
    return vals;
}

// Greedy in-order match printed values -> absolute statusEntries indices.
function greedyMap(printed, statusEntries) {
    const map = []; let j = 0;
    for (const pv of printed) {
        let found = -1;
        for (let k = j; k < statusEntries.length; k++) {
            if (fmt(statusEntries[k]) === pv) { found = k; break; }
        }
        if (found < 0) return null;
        map.push(found); j = found + 1;
    }
    return map;
}

// Does this slotMap reproduce a unit's printed values exactly?
function mapMatches(map, printed, statusEntries) {
    if (map.length !== printed.length) return false;
    for (let s = 0; s < map.length; s++) {
        if (fmt(statusEntries[map[s]]) !== printed[s]) return false;
    }
    return true;
}

(async () => {
    const onlyModels = process.argv.slice(2).filter(a => !a.startsWith('--'));
    const tpl = JSON.parse(fs.readFileSync(OUT, 'utf8'));
    // index staged DSCA files by model
    const files = walk(STAGE, []);
    const byModel = {};
    for (const f of files) {
        let t; try { t = fs.readFileSync(f, 'utf8'); } catch { continue; }
        const model = (t.match(/^\s*Model:\s*(\S+)/m) || [])[1] || '';
        if (!/^DSCA/i.test(model)) continue;
        const sn = (t.match(/^\s*SN:\s*(\S+)/m) || [])[1] || '';
        if (!sn) continue;
        (byModel[model.trim()] = byModel[model.trim()] || []).push({ f, sn: sn.trim(), text: t });
    }

    let derived = 0, failed = [];
    const UNITS_PER_MODEL = 8, MAX_SAMPLES = 48; // bounds DB lookups per signature group

    // Seeds = the ambiguous models to solve (args), or all models if none given.
    const seeds = new Set(onlyModels.length ? onlyModels.filter(m => tpl[m]) : Object.keys(tpl));

    // Group ALL models by identical row-name signature. Same printed layout => same
    // canonical-slot mapping, so one slotMap serves the whole group; pooling units
    // across the group lets siblings disambiguate duplicate values (e.g. a unit where
    // 5mA != 50mA linearity forces the correct slot). Only process groups that contain
    // a seed, so a targeted run touches only the relevant families.
    const sigOf = (m) => tpl[m].rows.map(r => r.name).join('|');
    const groups = {};
    for (const model of Object.keys(tpl)) { (groups[sigOf(model)] = groups[sigOf(model)] || []).push(model); }

    for (const models of Object.values(groups)) {
        if (![...models].some(m => seeds.has(m))) continue;
        const specRowCount = tpl[models[0]].rows.filter(r => (r.spec || '').trim()).length;
        const samples = [];
        for (const model of models) {
            if (samples.length >= MAX_SAMPLES) break;
            const units = (byModel[model] || []).slice(0, UNITS_PER_MODEL);
            for (const u of units) {
                if (samples.length >= MAX_SAMPLES) break;
                let row = await db.queryOne('SELECT raw_data FROM test_records WHERE serial_number=$1 AND model_number=$2 LIMIT 1', [u.sn, model]);
                if (!row) row = await db.queryOne('SELECT raw_data FROM test_records WHERE raw_serial_number=$1 AND model_number=$2 LIMIT 1', [u.sn, model]);
                if (!row || !row.raw_data) continue;
                const printed = stagedPrintedValues(u.text);
                if (!printed || printed.length !== specRowCount) continue;
                const p = dse.parseRawData(row.raw_data, 'DSCA');
                if (!p) continue;
                samples.push({ printed, status: p.statusEntries });
            }
        }
        if (!samples.length) continue;

        // Candidate slotMaps = greedy map from each sample; the TRUE map reproduces the
        // most units (a duplicate-confused map fails where the duplicated slots differ;
        // retest-vintage units fail every map and are ignored).
        const cands = new Map();
        for (const s of samples) { const m = greedyMap(s.printed, s.status); if (m) cands.set(m.join(','), m); }
        let best = null, bestScore = -1;
        for (const m of cands.values()) {
            let score = 0;
            for (const s of samples) if (mapMatches(m, s.printed, s.status)) score++;
            if (score > bestScore) { bestScore = score; best = m; }
        }
        const ratio = bestScore / samples.length;
        const accept = best && (samples.length === 1 ? bestScore === 1 : (ratio >= 0.6 && bestScore >= 2));
        if (accept) {
            // Apply to every model in the group. The renderer only consults slotMap when
            // the sequential value-zip fails (value count != spec-row count), so clean
            // models keep their current path and only ambiguous ones use the map.
            for (const model of models) tpl[model].slotMap = best;
            derived += models.length;
            console.log('  [' + models.length + '] ' + models[0].padEnd(13) + ' slotMap=[' + best.join(',') + ']  matched ' + bestScore + '/' + samples.length + ' units' + (models.length > 1 ? '  (+' + (models.length - 1) + ' siblings)' : ''));
        } else if (best) {
            failed.push(models.join('/') + '(best ' + bestScore + '/' + samples.length + ')');
        }
    }
    fs.writeFileSync(OUT, JSON.stringify(tpl));
    console.log('\nderived slotMaps: ' + derived);
    if (failed.length) console.log('no clean match (left as-is): ' + failed.join(', '));
    await db.close();
})().catch(e => { console.error('ERR', e.message, e.stack); process.exit(1); });