Whole-source sweep (981,716 records / 406,549 serials): 6,515 same-day multi-run events; DB holds a NON-latest run for 311 (the strictly-greater-date conflict rule freezes on an arbitrary same-day run). Corrects the verdict doc to flag same-day retests as a latest-wins faithfulness violation (not benign). Adds the proposed >= -with-data-differs conflict-rule fix (diagnose-only) and the sweep tool. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
105 lines
5.2 KiB
JavaScript
105 lines
5.2 KiB
JavaScript
// Whole-source sweep (READ-ONLY): find serials with same-day multi-runs (distinct values)
|
|
// and measure how many the DB does NOT hold the latest run for. Scans the import's .DAT sources.
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const db = require('./database/db');
|
|
|
|
const ROOTS = ['C:/Shares/test/Ate/HISTLOGS']; // central combined logs first
|
|
const STATION_BASE = 'C:/Shares/test';
|
|
|
|
function datFiles(dir, out) {
|
|
let it = []; try { it = fs.readdirSync(dir, { withFileTypes: true }); } catch { return out; }
|
|
for (const e of it) { const p = path.join(dir, e.name);
|
|
if (e.isDirectory()) datFiles(p, out);
|
|
else if (/\.dat$/i.test(e.name)) out.push(p);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
// signature of a record = the 5 Error(%) columns joined (distinguishes runs)
|
|
function recSig(block) {
|
|
const errs = [];
|
|
for (const l of block) {
|
|
if (/,"(PASS|FAIL)"/.test(l)) { const f = l.split(','); if (f.length >= 5) { errs.push(f[3].trim()); if (errs.length === 5) break; } }
|
|
}
|
|
return errs.length === 5 ? errs.join('|') : null;
|
|
}
|
|
|
|
(async () => {
|
|
// gather files: HISTLOGS, then station LOGS (mirrors import order; station = latest)
|
|
let files = [];
|
|
for (const r of ROOTS) datFiles(r, files);
|
|
let stations = [];
|
|
try { stations = fs.readdirSync(STATION_BASE, { withFileTypes: true }).filter(d => d.isDirectory() && /^TS-\d+[LR]?$/i.test(d.name)).map(d => d.name); } catch {}
|
|
for (const s of stations) datFiles(path.join(STATION_BASE, s, 'LOGS'), files);
|
|
console.log('Scanning ' + files.length + ' .DAT files (' + stations.length + ' stations + HISTLOGS)...');
|
|
|
|
// serial -> date -> { sigs:Set, last:sig }
|
|
const map = new Map();
|
|
let recCount = 0, fi = 0;
|
|
for (const f of files) {
|
|
fi++; if (fi % 3000 === 0) console.log(' ...' + fi + '/' + files.length + ' files, ' + recCount + ' records');
|
|
let lines; try { lines = fs.readFileSync(f, 'utf8').split('\n'); } catch { continue; }
|
|
let block = [];
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const t = lines[i].trim();
|
|
const sd = t.match(/^"(\d+-\d+[A-Za-z]?)","(\d{2}-\d{2}-\d{4})"$/);
|
|
if (sd) {
|
|
const sig = recSig(block);
|
|
if (sig) {
|
|
recCount++;
|
|
const sn = sd[1]; const [mm,dd,yy] = sd[2].split('-'); const date = `${yy}-${mm}-${dd}`;
|
|
let dm = map.get(sn); if (!dm) { dm = new Map(); map.set(sn, dm); }
|
|
let e = dm.get(date); if (!e) { e = { sigs: new Set(), last: null }; dm.set(date, e); }
|
|
e.sigs.add(sig); e.last = sig;
|
|
}
|
|
block = [];
|
|
} else if (t) block.push(t);
|
|
}
|
|
}
|
|
console.log('Parsed ' + recCount + ' records, ' + map.size + ' distinct serials.');
|
|
|
|
// find serials with same-day multi-runs (>=2 distinct sigs on one date)
|
|
const multi = []; // { sn, date, runs, lastSig }
|
|
for (const [sn, dm] of map) for (const [date, e] of dm) if (e.sigs.size >= 2) multi.push({ sn, date, runs: e.sigs.size, lastSig: e.last });
|
|
console.log('Serials*date with same-day multi-runs (distinct values): ' + multi.length);
|
|
const multiSerials = new Set(multi.map(m => m.sn));
|
|
console.log('Distinct serials affected: ' + multiSerials.size);
|
|
|
|
// For each, check what the DB holds vs the latest same-day run
|
|
const sns = [...multiSerials];
|
|
const dbMap = new Map();
|
|
for (let i = 0; i < sns.length; i += 1000) {
|
|
const rows = await db.query('SELECT serial_number, test_date, raw_data FROM test_records WHERE serial_number = ANY($1)', [sns.slice(i, i+1000)]);
|
|
for (const r of rows) dbMap.set(r.serial_number, r);
|
|
}
|
|
let notLatest = 0, dbNewer = 0, dbAbsent = 0, dbMatches = 0, examples = [];
|
|
for (const m of multi) {
|
|
const d = dbMap.get(m.sn);
|
|
if (!d) { dbAbsent++; continue; }
|
|
const dbDate = d.test_date && d.test_date.toISOString ? d.test_date.toISOString().slice(0,10) : String(d.test_date);
|
|
if (dbDate > m.date) { dbNewer++; continue; } // DB has an even later test -> fine
|
|
if (dbDate < m.date) { notLatest++; if (examples.length<15) examples.push(`${m.sn}: DB date ${dbDate} < multirun ${m.date}`); continue; }
|
|
const dbSig = recSig((d.raw_data||'').split('\n').map(s=>s.trim()));
|
|
if (dbSig === m.lastSig) dbMatches++;
|
|
else { notLatest++; if (examples.length<15) examples.push(`${m.sn} (${m.date}, ${m.runs} runs): DB sig != latest`); }
|
|
}
|
|
|
|
const out = [];
|
|
const L = s => { out.push(s); console.log(s); };
|
|
L('\n========== SAME-DAY RETEST EXPOSURE (whole source) ==========');
|
|
L('Records parsed : ' + recCount);
|
|
L('Distinct serials in source : ' + map.size);
|
|
L('Serial+date with same-day multi-runs : ' + multi.length);
|
|
L('Distinct serials affected : ' + multiSerials.size);
|
|
L('');
|
|
L('Of those same-day multi-run (serial,date) groups, the DB row:');
|
|
L(' matches the LATEST same-day run : ' + dbMatches);
|
|
L(' does NOT hold the latest run : ' + notLatest + ' <-- faithfulness violations');
|
|
L(' holds an even newer-date test (ok) : ' + dbNewer);
|
|
L(' serial absent from DB : ' + dbAbsent);
|
|
if (examples.length) { L(''); L('Examples (not-latest):'); examples.forEach(x=>L(' '+x)); }
|
|
if (process.argv[2]) fs.writeFileSync(process.argv[2], out.join('\n')+'\n');
|
|
await db.close();
|
|
})().catch(e => { console.error(e); process.exit(1); });
|