Files
claudetools/projects/dataforth-dos/datasheet-pipeline/implementation/database/import.js
Mike Swanson 45083f4735 Add SCMVAS/SCMHVAS datasheet pipeline extension (Dataforth)
Extends the Test Datasheet Pipeline on AD2:C:\Shares\testdatadb to
generate web-published datasheets for the SCMVAS-Mxxx (obsolete) and
SCMHVAS-Mxxxx (replacement) High Voltage Input Module product lines.
Both are tested either with the existing TESTHV3 software (production
VASLOG .DAT logs) or in Engineering with plain .txt output.

Key changes on AD2 (all deployed 2026-04-12 with dated backups):

- parsers/spec-reader.js: getSpecs() returns a `{_family:'SCMVAS',
  _noSpecs:true}` sentinel for SCMVAS/SCMHVAS/VAS-M/HVAS-M model prefixes
  so the export pipeline does not silently skip them for missing specs.
- templates/datasheet-exact.js: new Accuracy-only template branch
  (generateSCMVASDatasheet + helpers) that mirrors the existing shipped
  format byte-for-byte. Extraction regex covers both QuickBASIC STR$()
  output formats: scientific-with-trailing-status-digit (98.4% of
  records) and plain-decimal (1.6% of records above QB's threshold).
- parsers/vaslog-engtxt.js (new): parses the Engineering-Tested .txt
  files in TS-3R\LOGS\VASLOG\VASLOG - Engineering Tested\. Filename SN
  regex strips optional trailing 14-digit timestamp; in-file "SN:"
  header is the authoritative source when the filename is malformed.
- database/import.js: LOG_TYPES grows a VASLOG_ENG entry with
  subfolder + recursive flags. Pre-existing 7 log types keep their
  implicit recursive=true behaviour (config.recursive !== false).
  importFiles() routes VASLOG_ENG paths before the generic loop so a
  VASLOG - Engineering Tested/*.txt path does not mis-dispatch to the
  multiline parser.
- database/export-datasheets.js: VASLOG_ENG records are written
  verbatim via fs.copyFileSync(source_file, For_Web/<SN>.TXT) for true
  byte-level pass-through, with a graceful raw_data fallback when the
  source file is no longer on disk.

Deploy outcome:
- 27,503 SCMVAS/SCMHVAS datasheets rendered (27,065 from scientific +
  438 from plain-decimal PASS lines, post-patch rerun)
- 434 Engineering-Tested .txt files pass-through-copied to For_Web
- 0 errors across both batches

Repo layout added here:
- scmvas-hvas-research/: discovery artifacts (source .BAS, hvin.dat,
  sample .DAT + .txt, binary-format notes, IMPLEMENTATION_PLAN.md)
- implementation/: staged final code + deploy helpers + local test
  harness + per-step verification scripts
- backups/pre-deploy-20260412/: independent local snapshot of the 4
  AD2 files replaced, pulled byte-for-byte before deploy

All helper scripts fetch the AD2 password at runtime from the SOPS
vault (clients/dataforth/ad2.sops.yaml). None of the committed files
contain the plaintext credential. Known vault-entry hygiene issue
(stale shell-escape backslash before the `!`) is documented in the
fetcher comments and stripped at read-time; flagged separately for
cleanup.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 07:36:45 -07:00

397 lines
14 KiB
JavaScript

/**
* Data Import Script
* Imports test data from DAT and SHT files into PostgreSQL database
*/
const fs = require('fs');
const path = require('path');
const db = require('./db');
const { parseMultilineFile, extractTestStation } = require('../parsers/multiline');
const { parseCsvFile } = require('../parsers/csvline');
const { parseShtFile } = require('../parsers/shtfile');
const { parseVaslogEngTxt } = require('../parsers/vaslog-engtxt');
// Data source paths
const TEST_PATH = 'C:/Shares/test';
const RECOVERY_PATH = 'C:/Shares/Recovery-TEST';
const HISTLOGS_PATH = path.join(TEST_PATH, 'Ate/HISTLOGS');
// Log types and their parsers.
// NOTE: `recursive` defaults to TRUE when absent (walk subfolders by default,
// preserving pre-existing production behavior for DSCLOG/5BLOG/8BLOG/PWRLOG/
// SCTLOG/7BLOG). Set it to FALSE explicitly on VASLOG so the .DAT walk does
// NOT descend into the "VASLOG - Engineering Tested" subfolder (belt-and-
// suspenders: the .DAT glob wouldn't match .txt, but be explicit anyway).
// VASLOG_ENG also sets recursive:false -- the eng-tested dir is flat.
const LOG_TYPES = {
'DSCLOG': { parser: 'multiline', ext: '.DAT' },
'5BLOG': { parser: 'multiline', ext: '.DAT' },
'8BLOG': { parser: 'multiline', ext: '.DAT' },
'PWRLOG': { parser: 'multiline', ext: '.DAT' },
'SCTLOG': { parser: 'multiline', ext: '.DAT' },
'VASLOG': { parser: 'multiline', ext: '.DAT', recursive: false },
'7BLOG': { parser: 'csvline', ext: '.DAT' },
// Engineering-tested SCMHVAS pre-rendered datasheets live under VASLOG/"VASLOG - Engineering Tested"/
'VASLOG_ENG': { parser: 'vaslog-engtxt', ext: '.txt', dir: 'VASLOG/VASLOG - Engineering Tested', recursive: false }
};
// Find all files of a specific type in a directory
function findFiles(dir, pattern, recursive = true) {
const results = [];
try {
if (!fs.existsSync(dir)) return results;
const items = fs.readdirSync(dir, { withFileTypes: true });
for (const item of items) {
const fullPath = path.join(dir, item.name);
if (item.isDirectory() && recursive) {
results.push(...findFiles(fullPath, pattern, recursive));
} else if (item.isFile()) {
if (pattern.test(item.name)) {
results.push(fullPath);
}
}
}
} catch (err) {
// Ignore permission errors
}
return results;
}
// Parse records from a file (sync -- file I/O only)
function parseFile(filePath, logType, parser) {
const testStation = extractTestStation(filePath);
switch (parser) {
case 'multiline':
return parseMultilineFile(filePath, logType, testStation);
case 'csvline':
return parseCsvFile(filePath, testStation);
case 'shtfile':
return parseShtFile(filePath, testStation);
case 'vaslog-engtxt':
return parseVaslogEngTxt(filePath, testStation);
default:
return [];
}
}
// Batch insert records into PostgreSQL
async function insertBatch(txClient, records) {
let imported = 0;
for (const record of records) {
try {
const result = await txClient.execute(
`INSERT INTO test_records
(log_type, model_number, serial_number, test_date, test_station, overall_result, raw_data, source_file)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (log_type, model_number, serial_number, test_date, test_station)
DO UPDATE SET raw_data = EXCLUDED.raw_data, overall_result = EXCLUDED.overall_result`,
[
record.log_type,
record.model_number,
record.serial_number,
record.test_date,
record.test_station,
record.overall_result,
record.raw_data,
record.source_file
]
);
if (result.rowCount > 0) imported++;
} catch (err) {
// Constraint error - skip
}
}
return imported;
}
// Import records from a file
async function importFile(txClient, filePath, logType, parser) {
let records = [];
try {
records = parseFile(filePath, logType, parser);
const imported = await insertBatch(txClient, records);
return { total: records.length, imported };
} catch (err) {
console.error(`Error importing ${filePath}: ${err.message}`);
return { total: 0, imported: 0 };
}
}
// Import from HISTLOGS (master consolidated logs)
async function importHistlogs(txClient) {
console.log('\n=== Importing from HISTLOGS ===');
let totalImported = 0;
let totalRecords = 0;
for (const [logType, config] of Object.entries(LOG_TYPES)) {
const subdir = config.dir || logType;
const logDir = path.join(HISTLOGS_PATH, subdir);
if (!fs.existsSync(logDir)) {
console.log(` ${logType}: directory not found`);
continue;
}
const files = findFiles(logDir, new RegExp(`\\${config.ext}$`, 'i'), config.recursive !== false);
console.log(` ${logType}: found ${files.length} files`);
for (const file of files) {
const { total, imported } = await importFile(txClient, file, logType, config.parser);
totalRecords += total;
totalImported += imported;
}
}
console.log(` HISTLOGS total: ${totalImported} records imported (${totalRecords} parsed)`);
return totalImported;
}
// Import from test station logs
async function importStationLogs(txClient, basePath, label) {
console.log(`\n=== Importing from ${label} ===`);
let totalImported = 0;
let totalRecords = 0;
const stationPattern = /^TS-\d+[LR]?$/i;
let stations = [];
try {
const items = fs.readdirSync(basePath, { withFileTypes: true });
stations = items
.filter(i => i.isDirectory() && stationPattern.test(i.name))
.map(i => i.name);
} catch (err) {
console.log(` Error reading ${basePath}: ${err.message}`);
return 0;
}
console.log(` Found stations: ${stations.join(', ')}`);
for (const station of stations) {
const logsDir = path.join(basePath, station, 'LOGS');
if (!fs.existsSync(logsDir)) continue;
for (const [logType, config] of Object.entries(LOG_TYPES)) {
const subdir = config.dir || logType;
const logDir = path.join(logsDir, subdir);
if (!fs.existsSync(logDir)) continue;
const files = findFiles(logDir, new RegExp(`\\${config.ext}$`, 'i'), config.recursive !== false);
for (const file of files) {
const { total, imported } = await importFile(txClient, file, logType, config.parser);
totalRecords += total;
totalImported += imported;
}
}
}
// Also import SHT files
const shtFiles = findFiles(basePath, /\.SHT$/i, true);
console.log(` Found ${shtFiles.length} SHT files`);
for (const file of shtFiles) {
const { total, imported } = await importFile(txClient, file, 'SHT', 'shtfile');
totalRecords += total;
totalImported += imported;
}
console.log(` ${label} total: ${totalImported} records imported (${totalRecords} parsed)`);
return totalImported;
}
// Import from Recovery-TEST backups (newest first)
async function importRecoveryBackups(txClient) {
console.log('\n=== Importing from Recovery-TEST backups ===');
if (!fs.existsSync(RECOVERY_PATH)) {
console.log(' Recovery-TEST directory not found');
return 0;
}
const backups = fs.readdirSync(RECOVERY_PATH, { withFileTypes: true })
.filter(i => i.isDirectory() && /^\d{2}-\d{2}-\d{2}$/.test(i.name))
.map(i => i.name)
.sort()
.reverse();
console.log(` Found backup dates: ${backups.join(', ')}`);
let totalImported = 0;
for (const backup of backups) {
const backupPath = path.join(RECOVERY_PATH, backup);
const imported = await importStationLogs(txClient, backupPath, `Recovery-TEST/${backup}`);
totalImported += imported;
}
return totalImported;
}
// Main import function
async function runImport() {
console.log('========================================');
console.log('Test Data Import');
console.log('========================================');
console.log(`Start time: ${new Date().toISOString()}`);
let grandTotal = 0;
await db.transaction(async (txClient) => {
grandTotal += await importHistlogs(txClient);
grandTotal += await importRecoveryBackups(txClient);
grandTotal += await importStationLogs(txClient, TEST_PATH, 'test');
});
const stats = await db.queryOne('SELECT COUNT(*) as count FROM test_records');
console.log('\n========================================');
console.log('Import Complete');
console.log('========================================');
console.log(`Total records in database: ${stats.count}`);
console.log(`End time: ${new Date().toISOString()}`);
await db.close();
}
// Import a single file (for incremental imports from sync)
async function importSingleFile(filePath) {
console.log(`Importing: ${filePath}`);
let logType = null;
let parser = null;
// VASLOG_ENG subpath must be checked before VASLOG (substring overlap).
if (filePath.includes('VASLOG - Engineering Tested')) {
logType = 'VASLOG_ENG';
parser = LOG_TYPES['VASLOG_ENG'].parser;
} else {
for (const [type, config] of Object.entries(LOG_TYPES)) {
if (type === 'VASLOG_ENG') continue;
if (filePath.includes(type)) {
logType = type;
parser = config.parser;
break;
}
}
}
if (!logType) {
if (/\.SHT$/i.test(filePath)) {
logType = 'SHT';
parser = 'shtfile';
} else {
console.log(` Unknown log type for: ${filePath}`);
return { total: 0, imported: 0 };
}
}
let result;
await db.transaction(async (txClient) => {
result = await importFile(txClient, filePath, logType, parser);
});
console.log(` Imported ${result.imported} of ${result.total} records`);
return result;
}
// Import multiple files (for batch incremental imports)
async function importFiles(filePaths) {
console.log(`\n========================================`);
console.log(`Incremental Import: ${filePaths.length} files`);
console.log(`========================================`);
let totalImported = 0;
let totalRecords = 0;
await db.transaction(async (txClient) => {
for (const filePath of filePaths) {
let logType = null;
let parser = null;
// VASLOG_ENG subpath must be checked before the generic loop --
// otherwise `includes('VASLOG')` hits first and the eng .txt gets
// dispatched to the multiline parser. Mirror importSingleFile().
if (filePath.includes('VASLOG - Engineering Tested')) {
logType = 'VASLOG_ENG';
parser = LOG_TYPES['VASLOG_ENG'].parser;
} else {
for (const [type, config] of Object.entries(LOG_TYPES)) {
if (type === 'VASLOG_ENG') continue;
if (filePath.includes(type)) {
logType = type;
parser = config.parser;
break;
}
}
}
if (!logType) {
if (/\.SHT$/i.test(filePath)) {
logType = 'SHT';
parser = 'shtfile';
} else {
console.log(` Skipping unknown type: ${filePath}`);
continue;
}
}
const { total, imported } = await importFile(txClient, filePath, logType, parser);
totalRecords += total;
totalImported += imported;
console.log(` ${path.basename(filePath)}: ${imported}/${total} records`);
}
});
console.log(`\nTotal: ${totalImported} records imported (${totalRecords} parsed)`);
// Export datasheets for newly imported records
if (totalImported > 0) {
try {
const { loadAllSpecs } = require('../parsers/spec-reader');
const { exportNewRecords } = require('./export-datasheets');
const specMap = loadAllSpecs();
await exportNewRecords(specMap, filePaths);
} catch (err) {
console.error(`[EXPORT] Datasheet export failed: ${err.message}`);
}
}
return { total: totalRecords, imported: totalImported };
}
// Run if called directly
if (require.main === module) {
const args = process.argv.slice(2);
if (args.length > 0 && args[0] === '--file') {
const files = args.slice(1);
if (files.length === 0) {
console.log('Usage: node import.js --file <file1> [file2] ...');
process.exit(1);
}
importFiles(files).then(() => db.close()).catch(console.error);
} else if (args.length > 0 && args[0] === '--help') {
console.log('Usage:');
console.log(' node import.js Full import from all sources');
console.log(' node import.js --file <f> Import specific file(s)');
process.exit(0);
} else {
runImport().catch(console.error);
}
}
module.exports = { runImport, importSingleFile, importFiles };