// Root-cause + scope of the 608 missing staged units (READ-ONLY) for the report to John. // Hypothesis: importer serial/date regex requires a leading digit, so hex-encoded // (leading-letter) serials in the .DAT are never matched -> records dropped. const fs = require('fs'); const path = require('path'); const db = require('./database/db'); const STAGE = 'C:/Shares/test/STAGE'; const STRICT = /^"(\d+-\d+[A-Za-z]?)","(\d{2}-\d{2}-\d{4})"$/; // current importer regex const LOOSE = /^"([^"]+)","(\d{2}-\d{2}-\d{4})"$/; // any serial before a date const decode = sn => /^[A-Za-z]\d/.test(sn) ? String(sn.toUpperCase().charCodeAt(0) - 55) + sn.slice(1) : sn; function walk(dir, re, out) { let it=[]; try{it=fs.readdirSync(dir,{withFileTypes:true})}catch{return out;} for(const e of it){const p=path.join(dir,e.name); if(e.isDirectory()) walk(p,re,out); else if(re.test(e.name)) out.push(p);} return out; } (async () => { // ---- staged .TXT inventory ---- const txts = walk(STAGE, /\.txt$/i, []); const staged = []; for (const f of txts) { let t; try{t=fs.readFileSync(f,'utf8')}catch{continue;} const sn=(t.match(/^\s*SN:\s*(\S+)/m)||[])[1]; if(!sn) continue; const model=(t.match(/^\s*Model:\s*(\S+)/m)||[])[1]||''; const date=(t.match(/^\s*Date:\s*(\d{2}-\d{2}-\d{4})/m)||[])[1]||''; const station=(f.match(/STAGE[\\\/]([^\\\/]+)/)||[])[1]||''; staged.push({ sn, dec: decode(sn), model, date, station, file: f }); } // ---- which staged decoded serials are in DB ---- const decs=[...new Set(staged.map(s=>s.dec))]; const inDb=new Set(); for(let i=0;i!inDb.has(s.dec)); // ---- scan ALL .DAT sources: which serial tokens appear, strict vs letter-prefixed ---- let dats=[]; walk('C:/Shares/test/Ate/HISTLOGS', /\.dat$/i, dats); let stations=[]; try{stations=fs.readdirSync('C:/Shares/test',{withFileTypes:true}).filter(d=>d.isDirectory()&&/^TS-\d+[LR]?$/i.test(d.name)).map(d=>d.name);}catch{} for(const s of stations) walk(path.join('C:/Shares/test',s,'LOGS'), /\.dat$/i, dats); const looseSet=new Set(); const letterSet=new Set(); let letterRecs=0; const letterModels=new Set(); let fi=0; for(const f of dats){ fi++; if(fi%5000===0) console.log(' scan '+fi+'/'+dats.length); let lines; try{lines=fs.readFileSync(f,'utf8').split('\n')}catch{continue;} let lastModel=''; for(const l of lines){ const t=l.trim(); const mm=t.match(/^"([A-Z0-9][A-Z0-9 \-]*)"$/i); if(mm && !/PASS|FAIL/.test(t) && !t.includes(',')) { lastModel=mm[1].trim(); continue; } const m=t.match(LOOSE); if(m){ const sn=m[1]; looseSet.add(sn); if(/^[A-Za-z]\d/.test(sn) && !STRICT.test(t)){ letterSet.add(sn); letterRecs++; if(lastModel) letterModels.add(lastModel); } } } } // ---- categorize the missing ---- const cat = { parserDrop: [], absent: [], decInDbButMiss: [] }; for(const s of missing){ if(letterSet.has(s.sn) || (/^[A-Za-z]\d/.test(s.sn) && looseSet.has(s.sn))) cat.parserDrop.push(s); else if(!looseSet.has(s.sn) && !looseSet.has(s.dec)) cat.absent.push(s); else cat.decInDbButMiss.push(s); } const by=(arr,k)=>{const m={};for(const x of arr){const v=(x[k]||'?');m[v]=(m[v]||0)+1;}return Object.entries(m).sort((a,b)=>b[1]-a[1]);}; // ---- full letter-prefixed population in .DAT and how much is absent from DB ---- const letterDecs=[...letterSet].map(decode); const letterInDb=new Set(); for(let i=0;i!letterInDb.has(d)).length; const out=[]; const L=s=>{out.push(s);console.log(s);}; L('========== MISSING-UNITS ROOT CAUSE & SCOPE =========='); L('Staged .TXT with SN : '+staged.length); L('Staged units MISSING from DB : '+missing.length); L(''); L('ROOT-CAUSE CATEGORIES (of the missing):'); L(' Parser-drop (encoded serial w/ leading letter present in .DAT, regex rejects): '+cat.parserDrop.length); L(' Source .DAT has no record for this unit (data absent) : '+cat.absent.length); L(' Other (decoded present, still missing - investigate) : '+cat.decInDbButMiss.length); L(''); L('PARSER-DROP breakdown by leading char: '+by(cat.parserDrop, 'sn').slice(0,1).length? '' : ''); const lead=k=>{const m={};for(const x of cat.parserDrop){const c=x.sn[0].toUpperCase();m[c]=(m[c]||0)+1;}return Object.entries(m).sort((a,b)=>b[1]-a[1]);}; L(' by leading letter: '+lead().map(([c,n])=>c+'='+n).join(', ')); L(' by station : '+by(cat.parserDrop,'station').map(([c,n])=>c+'='+n).join(', ')); L(' by model (top 12): '+by(cat.parserDrop,'model').slice(0,12).map(([c,n])=>c+'='+n).join(', ')); L(' date range : '+(()=>{const ds=cat.parserDrop.map(s=>s.date).filter(Boolean).sort();return ds[0]+' .. '+ds[ds.length-1];})()); L(' samples :'); cat.parserDrop.slice(0,12).forEach(s=>L(' '+s.sn+' -> '+s.dec+' '+s.model+' '+s.date+' '+s.station)); if(cat.absent.length){ L(''); L('DATA-ABSENT samples:'); cat.absent.slice(0,10).forEach(s=>L(' '+s.sn+' -> '+s.dec+' '+s.model+' '+s.date+' '+s.station)); } if(cat.decInDbButMiss.length){ L(''); L('OTHER samples:'); cat.decInDbButMiss.slice(0,10).forEach(s=>L(' '+s.sn+' -> '+s.dec+' '+s.model+' '+s.date+' '+s.station)); } L(''); L('FULL .DAT BLIND SPOT (all letter-prefixed serials the importer skips, not just staged):'); L(' distinct letter-prefixed serials in .DAT : '+letterSet.size); L(' total letter-prefixed records (dropped) : '+letterRecs); L(' distinct models affected : '+letterModels.size); L(' of those serials, DECODED form absent from DB: '+letterMissingDistinct+' / '+letterSet.size); if(process.argv[2]) fs.writeFileSync(process.argv[2], out.join('\n')+'\n'); await db.close(); })().catch(e=>{console.error(e);process.exit(1);});