From 63089c45c9c2dffacbe42289060c1fdeb9ebcabb Mon Sep 17 00:00:00 2001 From: Mike Swanson Date: Tue, 21 Apr 2026 18:46:49 -0700 Subject: [PATCH] sync: auto-sync from DESKTOP-0O8A1RL at 2026-04-21 18:46:45 Author: Mike Swanson Machine: DESKTOP-0O8A1RL Timestamp: 2026-04-21 18:46:45 --- projects/dataforth-dos/CONTEXT.md | 889 +++++++++--------- .../datasheet-pipeline/.gitignore | 19 +- .../database/export-datasheets.js | 530 ++++++----- .../implementation/database/import.js | 803 ++++++++-------- .../implementation/deploy-to-ad2.py | 524 ++++++----- .../implementation/server/notify.js | 63 ++ 6 files changed, 1504 insertions(+), 1324 deletions(-) create mode 100644 projects/dataforth-dos/datasheet-pipeline/implementation/server/notify.js diff --git a/projects/dataforth-dos/CONTEXT.md b/projects/dataforth-dos/CONTEXT.md index 7759fe0..7b27d20 100644 --- a/projects/dataforth-dos/CONTEXT.md +++ b/projects/dataforth-dos/CONTEXT.md @@ -1,439 +1,450 @@ -# Dataforth DOS Project - Context - -**Last Updated:** 2026-04-14 -**Status:** Active - Datasheet Pipeline Extended for SCMVAS/SCMHVAS - -## Quick Start - Infrastructure Overview - -| Component | IP/Location | Access | Notes | -|-----------|-------------|--------|-------| -| **AD2** (Primary) | 192.168.0.6 | SSH: sysadmin / vault | Windows Server 2022, hosts testdatadb service | -| **AD1** (Secondary) | 192.168.0.27 | SSH: sysadmin / vault | Hosts Engineering share at \\AD1\Engineering | -| **D2TESTNAS** | 192.168.0.9 | SMB1 only | Bridge for DOS test stations (TS-xx machines) | -| **VPN** | Required | FortiClient | Access to 192.168.0.x network | - -**Get credentials:** -```bash -# AD2 password (has stale backslash escape - strip it) -bash D:/vault/scripts/vault.sh get-field clients/dataforth/ad2.sops.yaml credentials.password | sed 's/\\//g' - -# AD1 password -bash D:/vault/scripts/vault.sh get-field clients/dataforth/ad1.sops.yaml credentials.password -``` - -**All passwords:** `Paper123!@#` (stored in vault, note backslash escape issue in ad2.sops.yaml) - -## Current State (READ THIS FIRST) - -### Recent Work (2026-04-11/12) -**Extended Test Datasheet Pipeline for SCMVAS-Mxxx and SCMHVAS-Mxxxx families** -- Added VASLOG parser support (multiline CSV .DAT format) -- Created accuracy-only datasheet template (simple format, no hvin.dat lookup) -- Implemented pass-through for Engineering-Tested .txt files -- **Backfilled 27,503 historical records** (438 required regex patch for QB STR$() format quirk) -- **434 Engineering .txt files** imported and published -- Deployed to AD2, service restarted, web publishing verified - -**Status:** ✅ Complete, production-deployed - -**Critical Files Changed:** 5 modified, 1 new parser -- server/parsers/vaslog.js (new) -- server/templates/datasheet-exact.js (SCMVAS/SCMHVAS branch added) -- server/database/import.js (recursive flag fix, VASLOG_ENG support) -- server/parsers/spec-reader.js (stub for SCMVAS/SCMHVAS) -- deploy/deploy-to-ad2.py (vault-based credentials) - -**Session Logs:** -- **2026-04-12-session.md** - Implementation, deploy, backfill, patch (DEFINITIVE) -- **2026-04-11-discovery-session.md** - Discovery phase - -### testdatadb Service (on AD2) -- **Service Name:** testdatadb -- **Status:** Running -- **Service Account:** INTRANET\svc_testdatadb -- **Working Directory:** C:\Shares\testdatadb -- **API Port:** 3000 (http://192.168.0.6:3000) -- **Database:** SQLite at C:\Shares\testdatadb\database/testdata.db (4.1GB) -- **Web Output:** X:\For_Web (= \\ad2\webshare\For_Web UNC path) - -### File Shares on AD2 -``` -C:\Shares\test\ # Mirror of D2TESTNAS test data -├── TS-xx\LOGS\ # Test logs from DOS stations -│ ├── 5BLOG\ # SCM5B family -│ ├── 8BLOG\ # 8B family -│ ├── VASLOG\ # SCMVAS/SCMHVAS .DAT files -│ │ ├── HVAS-M01.DAT # Production logs -│ │ ├── VAS-M100.DAT -│ │ └── VASLOG - Engineering Tested\ # 434 .txt files -│ └── ... -└── Corrected HVAS Files\ # 200 pre-generated datasheets - -C:\Shares\testdatadb\ # Node.js application -├── server/ -│ ├── parsers/ # Log file parsers -│ ├── templates/ # Datasheet formatters -│ └── database/ # Import/export scripts -├── database/ -│ └── testdata.db # SQLite (4.1GB, not in git) -└── node_modules/ -``` - -### File Shares on AD1 -``` -\\AD1\Engineering\ -└── ENGR\ATE\High Voltage Input Module Test\ - ├── HVDATA\ - │ └── hvin.dat # Spec database (33 records, engineering MODNAMEs) - └── Released\ - ├── TESTHV3.BAS # Primary test program (2020) - ├── TESTHV4.BAS # Alternate test program (2017) - ├── NLIBATE3.BAS # ATE library - └── DBHV.BAS # Database editor (TYPE DBASE definition) -``` - -## Anti-Patterns (DON'T DO THIS) - -❌ **DO NOT hardcode Paper123!@#** - Always fetch from vault: -```bash -bash D:/vault/scripts/vault.sh get-field clients/dataforth/ad2.sops.yaml credentials.password | sed 's/\\//g' -``` - -❌ **DO NOT use X: drive in SSH sessions** - It's only mapped under service account. Use UNC path instead: -```powershell -# Wrong: -node database/export-datasheets.js # Fails: "X:\For_Web does not exist" - -# Right: -$env:OUTPUT_DIR = "\\ad2\webshare\For_Web" -node database/export-datasheets.js -``` - -❌ **DO NOT assume hvin.dat lookup works** - Marketing names (SCMHVAS-M0100) ≠ engineering MODNAMEs (SCM5B41-1181). SCMVAS/SCMHVAS use simplified accuracy-only template WITHOUT hvin.dat. - -❌ **DO NOT pass 50+ file paths on PowerShell command line** - Hits "Command line too long". Use inline node script with fs.readdirSync instead. - -❌ **DO NOT commit testdata.db or large samples** - 4.1GB database is in .gitignore. Keep research samples local only. - -❌ **DO NOT use SMB1 on AD2** - Disabled for security. Use SSH/SFTP (port 22) or SMB2+ shares. - -❌ **DO NOT expect immediate output from exec_command** - paramiko buffers stdout. Use progress markers or drain at completion. - -❌ **DO NOT assume VPN is stable** - Dataforth VPN can drop mid-session. Save work frequently, use local samples for offline analysis. - -## Where to Find Things - -### Codebase Structure -``` -projects/dataforth-dos/ -├── datasheet-pipeline/ -│ ├── implementation/ # Staged code (approved by Code Review) -│ ├── scmvas-hvas-research/ # Discovery scripts and source files -│ │ ├── source/ # TESTHV3.BAS, hvin.dat, etc. -│ │ ├── samples/ # .DAT and .txt samples (local) -│ │ ├── parse_hvin.py # hvin.dat binary parser -│ │ └── pull-*.py # SSH download scripts -│ └── IMPLEMENTATION_PLAN.md # Approved plan (2026-04-11) -├── deploy/ -│ └── deploy-to-ad2.py # Deployment script (vault-based auth) -├── session-logs/ -│ ├── 2026-04-12-session.md # SCMVAS/SCMHVAS implementation (DEFINITIVE) -│ └── 2026-04-11-discovery-session.md -└── CONTEXT.md # This file -``` - -### Production Files on AD2 -``` -C:\Shares\testdatadb\ -├── server.js # Main entry point -├── server/ -│ ├── parsers/ -│ │ ├── multiline.js # Handles VASLOG .DAT (CSV format) -│ │ ├── vaslog.js # VASLOG-specific logic (new) -│ │ └── spec-reader.js # Spec DB loader (stub for SCMVAS/SCMHVAS) -│ ├── templates/ -│ │ └── datasheet-exact.js # Datasheet formatter (SCMVAS/SCMHVAS branch added) -│ └── database/ -│ ├── import.js # LOG_TYPES registry, importFiles() -│ └── export-datasheets.js # Batch export script -└── database/ - └── testdata.db # SQLite (27k+ records after backfill) -``` - -## Common Operations - -### Deploy Code to AD2 -```bash -# From projects/dataforth-dos/deploy/ -python3 deploy-to-ad2.py - -# What it does: -# 1. Fetches password from vault (D:/vault/scripts/vault.sh) -# 2. Connects via paramiko SFTP to 192.168.0.6:22 -# 3. Creates .bak-YYYYMMDD timestamped backups -# 4. Uploads modified files from implementation/ -# 5. Restarts testdatadb service via SSH exec_command -# 6. Verifies API responds 200 OK on port 3000 -``` - -**Manual deployment (if script unavailable):** -```bash -# Get password -AD2_PASS=$(bash D:/vault/scripts/vault.sh get-field clients/dataforth/ad2.sops.yaml credentials.password | sed 's/\\//g') - -# Connect -sshpass -p "${AD2_PASS}" ssh sysadmin@192.168.0.6 - -# Backup + copy -cd C:\Shares\testdatadb\server\parsers -copy multiline.js multiline.js.bak-20260414 -# ... upload new files via SFTP ... - -# Restart service -Restart-Service -Name testdatadb - -# Verify -curl http://localhost:3000 -``` - -### Import New Test Data -```bash -# SSH to AD2 -ssh sysadmin@192.168.0.6 - -# Run import for specific log type -cd C:\Shares\testdatadb -node database/import.js - -# Import specific files (avoid "Command line too long") -node -e " -const importFiles = require('./server/database/import').importFiles; -const fs = require('fs'); -const files = fs.readdirSync('C:/Shares/test/TS-3R/LOGS/VASLOG/VASLOG - Engineering Tested') - .filter(f => f.endsWith('.txt')) - .map(f => 'C:/Shares/test/TS-3R/LOGS/VASLOG/VASLOG - Engineering Tested/' + f); -importFiles(files, 'VASLOG_ENG').then(() => console.log('Done')); -" -``` - -### Export Datasheets for Web -```bash -# SSH to AD2 -ssh sysadmin@192.168.0.6 - -# Export all pending datasheets -cd C:\Shares\testdatadb -$env:OUTPUT_DIR = "\\ad2\webshare\For_Web" # NOT X:\For_Web in SSH -node database/export-datasheets.js - -# Export specific model family -node database/export-datasheets.js --family SCMHVAS -``` - -### Backfill Historical Data -```bash -# SSH to AD2, run as inline script to avoid command-line length limits -node -e " -const db = require('./server/database/db'); -const exportDatasheet = require('./server/templates/datasheet-exact'); - -db.all(\` - SELECT * FROM test_records - WHERE log_type IN ('VASLOG', 'VASLOG_ENG') - AND exported_at IS NULL - ORDER BY id -\`, (err, rows) => { - if (err) throw err; - console.log(\`[INFO] Found \${rows.length} records to export\`); - let count = 0; - rows.forEach(row => { - try { - exportDatasheet(row); - count++; - if (count % 100 === 0) console.log(\`[PROGRESS] \${count}/\${rows.length}\`); - } catch (e) { - console.error(\`[SKIP] \${row.model_name}: \${e.message}\`); - } - }); - console.log(\`[DONE] Exported \${count} datasheets\`); -}); -" -``` - -### Check Service Status -```powershell -# On AD2 (via SSH or RDP) -Get-Service testdatadb - -# View service logs (if logging enabled) -Get-EventLog -LogName Application -Source testdatadb -Newest 50 - -# Test API -Invoke-WebRequest http://localhost:3000 | Select-Object StatusCode - -# Check process -Get-Process | Where-Object { $_.ProcessName -like "*node*" } -``` - -### Access Shares from macOS/Linux -```bash -# Mount AD2 share (SMB2+) -mkdir -p ~/mnt/ad2-testdatadb -mount_smbfs //sysadmin:Password@192.168.0.6/testdatadb ~/mnt/ad2-testdatadb - -# Mount AD1 Engineering share -mkdir -p ~/mnt/ad1-engineering -mount_smbfs //sysadmin:Password@192.168.0.27/Engineering ~/mnt/ad1-engineering - -# Unmount -umount ~/mnt/ad2-testdatadb -``` - -## Key Technical Decisions (ADRs) - -**2026-04-12:** Use Option C (simple accuracy-only template, no hvin.dat lookup) -- Reason: Marketing names (SCMHVAS-M0100) ≠ engineering MODNAMEs (SCM5B41-1181) in hvin.dat -- Sample datasheets show simple 1-parameter format (Accuracy only) -- Spec-reader stub lets SCMVAS/SCMHVAS pass through pipeline without schema changes - -**2026-04-12:** Pass-through for VASLOG_ENG .txt files (not re-render) -- Reason: Engineering-Tested files already match target format exactly -- fs.copyFileSync() guarantees byte-level fidelity, avoids encoding round-trip -- Fallback to writeFileSync(raw_data, 'utf8') if source file missing - -**2026-04-12:** Fix recursive=false default regression with `config.recursive !== false` -- Reason: Adding `recursive` field to LOG_TYPES must not break 7 pre-existing families -- Treats absent/undefined as true (legacy behavior), explicit false as false - -**2026-04-12:** Vault-based credentials in deploy script (no hardcoding, no prompts) -- Reason: Never commit passwords, even to private repo -- deploy-to-ad2.py calls vault.sh with 30s timeout, fails loud if unavailable -- No env-var fallback, no interactive prompt - -**2026-04-12:** MM/DD/YYYY date normalization for datasheet Date field -- Reason: Matches newest Engineering-Tested samples -- Older "Corrected HVAS Files" used MM-DD-YYYY (hyphens) - backfill rewrites with slashes -- Intentional visible change, documented in implementation plan - -**2026-04-12:** Patch regex with plain-decimal fallback for QuickBASIC STR$() quirk -- Reason: QB STR$() emits scientific notation for most values, plain decimal for ~1.6% -- Not a version difference or bug - purely QB float-to-string formatting threshold -- Two-regex approach: try scientific first, fall back to plain decimal - -## QuickBASIC Artifacts & Log Formats - -### VASLOG .DAT Structure -``` -"SCMHVAS-M0100 " # Header: model name (marketing, NOT engineering MODNAME) -20,0.0034 # CSV line 1: measurement data -40,0.0126 # CSV line 2 -60,-0.0046 # CSV line 3 -80,0.0141 # CSV line 4 -100,-0.00325 # CSV line 5 -"PASS-7.005501E-033",... # Status line: PASS/FAIL + accuracy (scientific OR plain decimal) -"179379-1","04-09-2026" # Footer: serial number, test date (MM-DD-YYYY) -``` - -### VASLOG_ENG .txt Structure (Engineering-Tested) -``` -SCMHVAS - M0100 -SN: 171087-1 -Date: 04/08/2024 -Test: PASS -Accuracy: -7.0055E-03 % -``` - -### QuickBASIC STR$() Formatting Quirk -```basic -' QB emits TWO formats for floats: -PRINT STR$(-7.005501E-03) ' → "-7.005501E-033" (scientific + status digit) -PRINT STR$(0.01599373) ' → " .01599373" (plain decimal, leading space) - -' Threshold: ~0.01 magnitude -' Affects ~1.6% of records (438/27503) -' NOT a bug - documented QB behavior -``` - -### hvin.dat Binary Format -``` -TYPE DBASE (from DBHV.BAS) - MODNAME AS STRING * 13 ' Engineering ID: "SCM5B41-1181 " - INTYPE AS STRING * 3 - OUTSIGTYPE AS STRING * 7 - WAVESHPCAL AS STRING * 8 - ' ... 42 SINGLE floats (IEEE 754, 4 bytes each) ... -END TYPE - -' Total: 13+3+7+8 + (42*4) = 199 bytes/record -' File size: 6567 bytes = 33 records -``` - -## Troubleshooting - -### "Output directory does not exist: X:\For_Web" -- **Cause:** X: drive only mapped under service account, not in SSH session -- **Fix:** Use UNC path: `\\ad2\webshare\For_Web` -```powershell -$env:OUTPUT_DIR = "\\ad2\webshare\For_Web" -node database/export-datasheets.js -``` - -### "Command line is too long" (PowerShell) -- **Cause:** Passing 50+ file paths as arguments exceeds PowerShell limit -- **Fix:** Use inline node script with fs.readdirSync (see Common Operations above) - -### VPN Drops Mid-Session -- **Symptom:** AD2/AD1 become unreachable, SSH hangs -- **Fix:** - 1. Work offline on local samples for analysis - 2. Restore VPN (FortiClient) - 3. Resume deployment/import when connection stable - -### Vault Returns `Paper123\!@#` (Backslash) -- **Cause:** Legacy shell escape stored in ad2.sops.yaml -- **Fix:** Strip backslash at read-time: `sed 's/\\//g'` -- **TODO:** Clean vault entry to remove backslash - -### Paramiko "No Output" for Long-Running Commands -- **Cause:** exec_command buffers stdout until completion -- **Fix:** Either: - 1. Accept final output when command completes - 2. Add progress markers that flush every N records - 3. Drain channel periodically: `while not channel.exit_status_ready(): channel.recv(1024)` - -### 438 Records Skipped During Backfill -- **Cause:** Plain-decimal format not matching scientific-notation-only regex -- **Fix:** Already patched (2026-04-12). Regex now tries both formats. -- **Verification:** Rerun backfill on stragglers → 438/438 rendered - -## Recent Commit History - -**2026-04-12 (commit 0dd3d82):** SCMVAS/SCMHVAS pipeline extension -- 114 files changed, 35,486 insertions -- 5 production files modified, 1 new parser -- All research scripts sanitized (vault-based credentials) -- .gitignore updated (exclude testdata.db) - -## Useful Links - -- **Latest Session:** session-logs/2026-04-12-session.md (DEFINITIVE) -- **Discovery Session:** session-logs/2026-04-11-discovery-session.md -- **Implementation Plan:** datasheet-pipeline/scmvas-hvas-research/IMPLEMENTATION_PLAN.md -- **Credentials (vault):** D:\vault\clients\dataforth\ - -## Quick Reference - Log Types - -| Family | Log Type | Format | Parser | Location | -|--------|----------|--------|--------|----------| -| SCM5B | 5BLOG | Multiline CSV .DAT | multiline.js | TS-xx/LOGS/5BLOG | -| 8B | 8BLOG | Multiline CSV .DAT | multiline.js | TS-xx/LOGS/8BLOG | -| DSCA | DSCLOG | Multiline CSV .DAT | multiline.js | TS-xx/LOGS/DSCLOG | -| SCMVAS | VASLOG | Multiline CSV .DAT | vaslog.js | TS-3R/LOGS/VASLOG | -| SCMHVAS (prod) | VASLOG | Multiline CSV .DAT | vaslog.js | TS-3R/LOGS/VASLOG | -| SCMHVAS (eng) | VASLOG_ENG | .txt (pass-through) | vaslog.js | TS-3R/LOGS/VASLOG/VASLOG - Engineering Tested | - ---- - -**Before starting work:** Read session-logs/2026-04-12-session.md for complete context -**For AD2 access:** Ensure Dataforth VPN connected (FortiClient) -**For credentials:** Always use vault - never hardcode passwords +# Dataforth DOS Project - Context + +**Last Updated:** 2026-04-14 +**Status:** Active - Datasheet Pipeline Extended for SCMVAS/SCMHVAS + +## Quick Start - Infrastructure Overview + +| Component | IP/Location | Access | Notes | +|-----------|-------------|--------|-------| +| **AD2** (Primary) | 192.168.0.6 | SSH: sysadmin / vault | Windows Server 2022, hosts testdatadb service | +| **AD1** (Secondary) | 192.168.0.27 | SSH: sysadmin / vault | Hosts Engineering share at \\AD1\Engineering | +| **D2TESTNAS** | 192.168.0.9 | SMB1 only | Bridge for DOS test stations (TS-xx machines) | +| **VPN** | Required | FortiClient | Access to 192.168.0.x network | + +**Get credentials:** +```bash +# AD2 password (has stale backslash escape - strip it) +bash D:/vault/scripts/vault.sh get-field clients/dataforth/ad2.sops.yaml credentials.password | sed 's/\\//g' + +# AD1 password +bash D:/vault/scripts/vault.sh get-field clients/dataforth/ad1.sops.yaml credentials.password +``` + +**All passwords:** `Paper123!@#` (stored in vault, note backslash escape issue in ad2.sops.yaml) + +## Current State (READ THIS FIRST) + +### Recent Work (2026-04-11/12) +**Extended Test Datasheet Pipeline for SCMVAS-Mxxx and SCMHVAS-Mxxxx families** +- Added VASLOG parser support (multiline CSV .DAT format) +- Created accuracy-only datasheet template (simple format, no hvin.dat lookup) +- Implemented pass-through for Engineering-Tested .txt files +- **Backfilled 27,503 historical records** (438 required regex patch for QB STR$() format quirk) +- **434 Engineering .txt files** imported and published +- Deployed to AD2, service restarted, web publishing verified + +**Status:** ✅ Complete, production-deployed + +**Critical Files Changed:** 5 modified, 1 new parser +- server/parsers/vaslog.js (new) +- server/templates/datasheet-exact.js (SCMVAS/SCMHVAS branch added) +- server/database/import.js (recursive flag fix, VASLOG_ENG support) +- server/parsers/spec-reader.js (stub for SCMVAS/SCMHVAS) +- deploy/deploy-to-ad2.py (vault-based credentials) + +**Session Logs:** +- **2026-04-12-session.md** - Implementation, deploy, backfill, patch (DEFINITIVE) +- **2026-04-11-discovery-session.md** - Discovery phase + +### testdatadb Service (on AD2) +- **Service Name:** testdatadb +- **Status:** Running +- **Service Account:** INTRANET\svc_testdatadb +- **Working Directory:** C:\Shares\testdatadb +- **API Port:** 3000 (http://192.168.0.6:3000) +- **Database:** SQLite at C:\Shares\testdatadb\database/testdata.db (4.1GB) +- **Web Output:** X:\For_Web (= \\ad2\webshare\For_Web UNC path) + +### File Shares on AD2 +``` +C:\Shares\test\ # Mirror of D2TESTNAS test data +├── TS-xx\LOGS\ # Test logs from DOS stations +│ ├── 5BLOG\ # SCM5B family +│ ├── 8BLOG\ # 8B family +│ ├── VASLOG\ # SCMVAS/SCMHVAS .DAT files +│ │ ├── HVAS-M01.DAT # Production logs +│ │ ├── VAS-M100.DAT +│ │ └── VASLOG - Engineering Tested\ # 434 .txt files +│ └── ... +└── Corrected HVAS Files\ # 200 pre-generated datasheets + +C:\Shares\testdatadb\ # Node.js application +├── server/ +│ ├── parsers/ # Log file parsers +│ ├── templates/ # Datasheet formatters +│ └── database/ # Import/export scripts +├── database/ +│ └── testdata.db # SQLite (4.1GB, not in git) +└── node_modules/ +``` + +### File Shares on AD1 +``` +\\AD1\Engineering\ +└── ENGR\ATE\High Voltage Input Module Test\ + ├── HVDATA\ + │ └── hvin.dat # Spec database (33 records, engineering MODNAMEs) + └── Released\ + ├── TESTHV3.BAS # Primary test program (2020) + ├── TESTHV4.BAS # Alternate test program (2017) + ├── NLIBATE3.BAS # ATE library + └── DBHV.BAS # Database editor (TYPE DBASE definition) +``` + +## Email / SMTP + +Dataforth is **M365 hybrid** — Exchange Online is the mail system. Use SMTP via M365: + +- **SMTP host:** smtp.office365.com **Port:** 587 (STARTTLS) +- **Auth:** sysadmin@dataforth.com (vault: `clients/dataforth/m365.sops.yaml` → `credentials.password`) +- **Tenant ID:** `7dfa3ce8-c496-4b51-ab8d-bd3dcd78b584` +- **Neptune Exchange (neptune.acghosting.com):** ACG infrastructure — NOT Dataforth's, do not use + +--- + +## Anti-Patterns (DON'T DO THIS) + +❌ **DO NOT hardcode Paper123!@#** - Always fetch from vault: +```bash +bash D:/vault/scripts/vault.sh get-field clients/dataforth/ad2.sops.yaml credentials.password | sed 's/\\//g' +``` + +❌ **DO NOT use X: drive in SSH sessions** - It's only mapped under service account. Use UNC path instead: +```powershell +# Wrong: +node database/export-datasheets.js # Fails: "X:\For_Web does not exist" + +# Right: +$env:OUTPUT_DIR = "\\ad2\webshare\For_Web" +node database/export-datasheets.js +``` + +❌ **DO NOT assume hvin.dat lookup works** - Marketing names (SCMHVAS-M0100) ≠ engineering MODNAMEs (SCM5B41-1181). SCMVAS/SCMHVAS use simplified accuracy-only template WITHOUT hvin.dat. + +❌ **DO NOT pass 50+ file paths on PowerShell command line** - Hits "Command line too long". Use inline node script with fs.readdirSync instead. + +❌ **DO NOT commit testdata.db or large samples** - 4.1GB database is in .gitignore. Keep research samples local only. + +❌ **DO NOT use SMB1 on AD2** - Disabled for security. Use SSH/SFTP (port 22) or SMB2+ shares. + +❌ **DO NOT expect immediate output from exec_command** - paramiko buffers stdout. Use progress markers or drain at completion. + +❌ **DO NOT assume VPN is stable** - Dataforth VPN can drop mid-session. Save work frequently, use local samples for offline analysis. + +## Where to Find Things + +### Codebase Structure +``` +projects/dataforth-dos/ +├── datasheet-pipeline/ +│ ├── implementation/ # Staged code (approved by Code Review) +│ ├── scmvas-hvas-research/ # Discovery scripts and source files +│ │ ├── source/ # TESTHV3.BAS, hvin.dat, etc. +│ │ ├── samples/ # .DAT and .txt samples (local) +│ │ ├── parse_hvin.py # hvin.dat binary parser +│ │ └── pull-*.py # SSH download scripts +│ └── IMPLEMENTATION_PLAN.md # Approved plan (2026-04-11) +├── deploy/ +│ └── deploy-to-ad2.py # Deployment script (vault-based auth) +├── session-logs/ +│ ├── 2026-04-12-session.md # SCMVAS/SCMHVAS implementation (DEFINITIVE) +│ └── 2026-04-11-discovery-session.md +└── CONTEXT.md # This file +``` + +### Production Files on AD2 +``` +C:\Shares\testdatadb\ +├── server.js # Main entry point +├── server/ +│ ├── parsers/ +│ │ ├── multiline.js # Handles VASLOG .DAT (CSV format) +│ │ ├── vaslog.js # VASLOG-specific logic (new) +│ │ └── spec-reader.js # Spec DB loader (stub for SCMVAS/SCMHVAS) +│ ├── templates/ +│ │ └── datasheet-exact.js # Datasheet formatter (SCMVAS/SCMHVAS branch added) +│ └── database/ +│ ├── import.js # LOG_TYPES registry, importFiles() +│ └── export-datasheets.js # Batch export script +└── database/ + └── testdata.db # SQLite (27k+ records after backfill) +``` + +## Common Operations + +### Deploy Code to AD2 +```bash +# From projects/dataforth-dos/deploy/ +python3 deploy-to-ad2.py + +# What it does: +# 1. Fetches password from vault (D:/vault/scripts/vault.sh) +# 2. Connects via paramiko SFTP to 192.168.0.6:22 +# 3. Creates .bak-YYYYMMDD timestamped backups +# 4. Uploads modified files from implementation/ +# 5. Restarts testdatadb service via SSH exec_command +# 6. Verifies API responds 200 OK on port 3000 +``` + +**Manual deployment (if script unavailable):** +```bash +# Get password +AD2_PASS=$(bash D:/vault/scripts/vault.sh get-field clients/dataforth/ad2.sops.yaml credentials.password | sed 's/\\//g') + +# Connect +sshpass -p "${AD2_PASS}" ssh sysadmin@192.168.0.6 + +# Backup + copy +cd C:\Shares\testdatadb\server\parsers +copy multiline.js multiline.js.bak-20260414 +# ... upload new files via SFTP ... + +# Restart service +Restart-Service -Name testdatadb + +# Verify +curl http://localhost:3000 +``` + +### Import New Test Data +```bash +# SSH to AD2 +ssh sysadmin@192.168.0.6 + +# Run import for specific log type +cd C:\Shares\testdatadb +node database/import.js + +# Import specific files (avoid "Command line too long") +node -e " +const importFiles = require('./server/database/import').importFiles; +const fs = require('fs'); +const files = fs.readdirSync('C:/Shares/test/TS-3R/LOGS/VASLOG/VASLOG - Engineering Tested') + .filter(f => f.endsWith('.txt')) + .map(f => 'C:/Shares/test/TS-3R/LOGS/VASLOG/VASLOG - Engineering Tested/' + f); +importFiles(files, 'VASLOG_ENG').then(() => console.log('Done')); +" +``` + +### Export Datasheets for Web +```bash +# SSH to AD2 +ssh sysadmin@192.168.0.6 + +# Export all pending datasheets +cd C:\Shares\testdatadb +$env:OUTPUT_DIR = "\\ad2\webshare\For_Web" # NOT X:\For_Web in SSH +node database/export-datasheets.js + +# Export specific model family +node database/export-datasheets.js --family SCMHVAS +``` + +### Backfill Historical Data +```bash +# SSH to AD2, run as inline script to avoid command-line length limits +node -e " +const db = require('./server/database/db'); +const exportDatasheet = require('./server/templates/datasheet-exact'); + +db.all(\` + SELECT * FROM test_records + WHERE log_type IN ('VASLOG', 'VASLOG_ENG') + AND exported_at IS NULL + ORDER BY id +\`, (err, rows) => { + if (err) throw err; + console.log(\`[INFO] Found \${rows.length} records to export\`); + let count = 0; + rows.forEach(row => { + try { + exportDatasheet(row); + count++; + if (count % 100 === 0) console.log(\`[PROGRESS] \${count}/\${rows.length}\`); + } catch (e) { + console.error(\`[SKIP] \${row.model_name}: \${e.message}\`); + } + }); + console.log(\`[DONE] Exported \${count} datasheets\`); +}); +" +``` + +### Check Service Status +```powershell +# On AD2 (via SSH or RDP) +Get-Service testdatadb + +# View service logs (if logging enabled) +Get-EventLog -LogName Application -Source testdatadb -Newest 50 + +# Test API +Invoke-WebRequest http://localhost:3000 | Select-Object StatusCode + +# Check process +Get-Process | Where-Object { $_.ProcessName -like "*node*" } +``` + +### Access Shares from macOS/Linux +```bash +# Mount AD2 share (SMB2+) +mkdir -p ~/mnt/ad2-testdatadb +mount_smbfs //sysadmin:Password@192.168.0.6/testdatadb ~/mnt/ad2-testdatadb + +# Mount AD1 Engineering share +mkdir -p ~/mnt/ad1-engineering +mount_smbfs //sysadmin:Password@192.168.0.27/Engineering ~/mnt/ad1-engineering + +# Unmount +umount ~/mnt/ad2-testdatadb +``` + +## Key Technical Decisions (ADRs) + +**2026-04-12:** Use Option C (simple accuracy-only template, no hvin.dat lookup) +- Reason: Marketing names (SCMHVAS-M0100) ≠ engineering MODNAMEs (SCM5B41-1181) in hvin.dat +- Sample datasheets show simple 1-parameter format (Accuracy only) +- Spec-reader stub lets SCMVAS/SCMHVAS pass through pipeline without schema changes + +**2026-04-12:** Pass-through for VASLOG_ENG .txt files (not re-render) +- Reason: Engineering-Tested files already match target format exactly +- fs.copyFileSync() guarantees byte-level fidelity, avoids encoding round-trip +- Fallback to writeFileSync(raw_data, 'utf8') if source file missing + +**2026-04-12:** Fix recursive=false default regression with `config.recursive !== false` +- Reason: Adding `recursive` field to LOG_TYPES must not break 7 pre-existing families +- Treats absent/undefined as true (legacy behavior), explicit false as false + +**2026-04-12:** Vault-based credentials in deploy script (no hardcoding, no prompts) +- Reason: Never commit passwords, even to private repo +- deploy-to-ad2.py calls vault.sh with 30s timeout, fails loud if unavailable +- No env-var fallback, no interactive prompt + +**2026-04-12:** MM/DD/YYYY date normalization for datasheet Date field +- Reason: Matches newest Engineering-Tested samples +- Older "Corrected HVAS Files" used MM-DD-YYYY (hyphens) - backfill rewrites with slashes +- Intentional visible change, documented in implementation plan + +**2026-04-12:** Patch regex with plain-decimal fallback for QuickBASIC STR$() quirk +- Reason: QB STR$() emits scientific notation for most values, plain decimal for ~1.6% +- Not a version difference or bug - purely QB float-to-string formatting threshold +- Two-regex approach: try scientific first, fall back to plain decimal + +## QuickBASIC Artifacts & Log Formats + +### VASLOG .DAT Structure +``` +"SCMHVAS-M0100 " # Header: model name (marketing, NOT engineering MODNAME) +20,0.0034 # CSV line 1: measurement data +40,0.0126 # CSV line 2 +60,-0.0046 # CSV line 3 +80,0.0141 # CSV line 4 +100,-0.00325 # CSV line 5 +"PASS-7.005501E-033",... # Status line: PASS/FAIL + accuracy (scientific OR plain decimal) +"179379-1","04-09-2026" # Footer: serial number, test date (MM-DD-YYYY) +``` + +### VASLOG_ENG .txt Structure (Engineering-Tested) +``` +SCMHVAS - M0100 +SN: 171087-1 +Date: 04/08/2024 +Test: PASS +Accuracy: -7.0055E-03 % +``` + +### QuickBASIC STR$() Formatting Quirk +```basic +' QB emits TWO formats for floats: +PRINT STR$(-7.005501E-03) ' → "-7.005501E-033" (scientific + status digit) +PRINT STR$(0.01599373) ' → " .01599373" (plain decimal, leading space) + +' Threshold: ~0.01 magnitude +' Affects ~1.6% of records (438/27503) +' NOT a bug - documented QB behavior +``` + +### hvin.dat Binary Format +``` +TYPE DBASE (from DBHV.BAS) + MODNAME AS STRING * 13 ' Engineering ID: "SCM5B41-1181 " + INTYPE AS STRING * 3 + OUTSIGTYPE AS STRING * 7 + WAVESHPCAL AS STRING * 8 + ' ... 42 SINGLE floats (IEEE 754, 4 bytes each) ... +END TYPE + +' Total: 13+3+7+8 + (42*4) = 199 bytes/record +' File size: 6567 bytes = 33 records +``` + +## Troubleshooting + +### "Output directory does not exist: X:\For_Web" +- **Cause:** X: drive only mapped under service account, not in SSH session +- **Fix:** Use UNC path: `\\ad2\webshare\For_Web` +```powershell +$env:OUTPUT_DIR = "\\ad2\webshare\For_Web" +node database/export-datasheets.js +``` + +### "Command line is too long" (PowerShell) +- **Cause:** Passing 50+ file paths as arguments exceeds PowerShell limit +- **Fix:** Use inline node script with fs.readdirSync (see Common Operations above) + +### VPN Drops Mid-Session +- **Symptom:** AD2/AD1 become unreachable, SSH hangs +- **Fix:** + 1. Work offline on local samples for analysis + 2. Restore VPN (FortiClient) + 3. Resume deployment/import when connection stable + +### Vault Returns `Paper123\!@#` (Backslash) +- **Cause:** Legacy shell escape stored in ad2.sops.yaml +- **Fix:** Strip backslash at read-time: `sed 's/\\//g'` +- **TODO:** Clean vault entry to remove backslash + +### Paramiko "No Output" for Long-Running Commands +- **Cause:** exec_command buffers stdout until completion +- **Fix:** Either: + 1. Accept final output when command completes + 2. Add progress markers that flush every N records + 3. Drain channel periodically: `while not channel.exit_status_ready(): channel.recv(1024)` + +### 438 Records Skipped During Backfill +- **Cause:** Plain-decimal format not matching scientific-notation-only regex +- **Fix:** Already patched (2026-04-12). Regex now tries both formats. +- **Verification:** Rerun backfill on stragglers → 438/438 rendered + +## Recent Commit History + +**2026-04-12 (commit 0dd3d82):** SCMVAS/SCMHVAS pipeline extension +- 114 files changed, 35,486 insertions +- 5 production files modified, 1 new parser +- All research scripts sanitized (vault-based credentials) +- .gitignore updated (exclude testdata.db) + +## Useful Links + +- **Latest Session:** session-logs/2026-04-12-session.md (DEFINITIVE) +- **Discovery Session:** session-logs/2026-04-11-discovery-session.md +- **Implementation Plan:** datasheet-pipeline/scmvas-hvas-research/IMPLEMENTATION_PLAN.md +- **Credentials (vault):** D:\vault\clients\dataforth\ + +## Quick Reference - Log Types + +| Family | Log Type | Format | Parser | Location | +|--------|----------|--------|--------|----------| +| SCM5B | 5BLOG | Multiline CSV .DAT | multiline.js | TS-xx/LOGS/5BLOG | +| 8B | 8BLOG | Multiline CSV .DAT | multiline.js | TS-xx/LOGS/8BLOG | +| DSCA | DSCLOG | Multiline CSV .DAT | multiline.js | TS-xx/LOGS/DSCLOG | +| SCMVAS | VASLOG | Multiline CSV .DAT | vaslog.js | TS-3R/LOGS/VASLOG | +| SCMHVAS (prod) | VASLOG | Multiline CSV .DAT | vaslog.js | TS-3R/LOGS/VASLOG | +| SCMHVAS (eng) | VASLOG_ENG | .txt (pass-through) | vaslog.js | TS-3R/LOGS/VASLOG/VASLOG - Engineering Tested | + +--- + +**Before starting work:** Read session-logs/2026-04-12-session.md for complete context +**For AD2 access:** Ensure Dataforth VPN connected (FortiClient) +**For credentials:** Always use vault - never hardcode passwords diff --git a/projects/dataforth-dos/datasheet-pipeline/.gitignore b/projects/dataforth-dos/datasheet-pipeline/.gitignore index 47afa88..4f1bd8b 100644 --- a/projects/dataforth-dos/datasheet-pipeline/.gitignore +++ b/projects/dataforth-dos/datasheet-pipeline/.gitignore @@ -1,8 +1,11 @@ -# Python cache -__pycache__/ -*.pyc - -# SQLite snapshot pulled during discovery (4+ GB, customer data) -scmvas-hvas-research/existing-database/testdata.db -scmvas-hvas-research/existing-database/testdata.db-shm -scmvas-hvas-research/existing-database/testdata.db-wal +# SMTP credentials written by deploy-to-ad2.py (never commit) +implementation/config/notify.json + +# Python cache +__pycache__/ +*.pyc + +# SQLite snapshot pulled during discovery (4+ GB, customer data) +scmvas-hvas-research/existing-database/testdata.db +scmvas-hvas-research/existing-database/testdata.db-shm +scmvas-hvas-research/existing-database/testdata.db-wal diff --git a/projects/dataforth-dos/datasheet-pipeline/implementation/database/export-datasheets.js b/projects/dataforth-dos/datasheet-pipeline/implementation/database/export-datasheets.js index a5b3cc4..b243f0c 100644 --- a/projects/dataforth-dos/datasheet-pipeline/implementation/database/export-datasheets.js +++ b/projects/dataforth-dos/datasheet-pipeline/implementation/database/export-datasheets.js @@ -1,257 +1,273 @@ -/** - * Export Datasheets - * - * Generates TXT datasheets for unexported PASS records and writes them to X:\For_Web\. - * Updates forweb_exported_at after successful export. - * - * Usage: - * node export-datasheets.js Export all pending (batch mode) - * node export-datasheets.js --limit 100 Export up to 100 records - * node export-datasheets.js --file Export records matching specific source files - * node export-datasheets.js --serial 178439-1 Export a specific serial number - * node export-datasheets.js --dry-run Show what would be exported without writing - */ - -const fs = require('fs'); -const path = require('path'); -const db = require('./db'); - -const { loadAllSpecs, getSpecs } = require('../parsers/spec-reader'); -const { generateExactDatasheet } = require('../templates/datasheet-exact'); - -// Configuration -const OUTPUT_DIR = 'X:\\For_Web'; -const BATCH_SIZE = 500; - -async function run() { - const args = process.argv.slice(2); - const dryRun = args.includes('--dry-run'); - const limitIdx = args.indexOf('--limit'); - const limit = limitIdx >= 0 ? parseInt(args[limitIdx + 1]) : 0; - const serialIdx = args.indexOf('--serial'); - const serial = serialIdx >= 0 ? args[serialIdx + 1] : null; - const fileIdx = args.indexOf('--file'); - const files = fileIdx >= 0 ? args.slice(fileIdx + 1).filter(f => !f.startsWith('--')) : null; - - console.log('========================================'); - console.log('Datasheet Export'); - console.log('========================================'); - console.log(`Output: ${OUTPUT_DIR}`); - console.log(`Dry run: ${dryRun}`); - if (limit) console.log(`Limit: ${limit}`); - if (serial) console.log(`Serial: ${serial}`); - console.log(`Start: ${new Date().toISOString()}`); - - if (!dryRun && !fs.existsSync(OUTPUT_DIR)) { - console.error(`ERROR: Output directory does not exist: ${OUTPUT_DIR}`); - process.exit(1); - } - - console.log('\nLoading model specs...'); - const specMap = loadAllSpecs(); - - // Build query - const conditions = [`overall_result = 'PASS'`, `forweb_exported_at IS NULL`]; - const params = []; - let paramIdx = 0; - - if (serial) { - paramIdx++; - conditions.push(`serial_number = $${paramIdx}`); - params.push(serial); - } - - if (files && files.length > 0) { - const placeholders = files.map(() => { paramIdx++; return `$${paramIdx}`; }).join(','); - conditions.push(`source_file IN (${placeholders})`); - params.push(...files); - } - - let sql = `SELECT * FROM test_records WHERE ${conditions.join(' AND ')} ORDER BY test_date DESC`; - - if (limit) { - paramIdx++; - sql += ` LIMIT $${paramIdx}`; - params.push(limit); - } - - const records = await db.query(sql, params); - console.log(`\nFound ${records.length} records to export`); - - if (records.length === 0) { - console.log('Nothing to export.'); - await db.close(); - return { exported: 0, skipped: 0, errors: 0 }; - } - - let exported = 0; - let skipped = 0; - let errors = 0; - let noSpecs = 0; - let pendingUpdates = []; - - for (const record of records) { - try { - const filename = record.serial_number + '.TXT'; - const outputPath = path.join(OUTPUT_DIR, filename); - - // VASLOG_ENG: verbatim byte-for-byte copy of the original file. - // Using fs.copyFileSync avoids any utf-8 round-trip that would - // corrupt non-ASCII bytes (CP1252 etc.) in customer datasheets. - // Fall back to writing raw_data if the source file is gone. - if (record.log_type === 'VASLOG_ENG') { - if (dryRun) { - console.log(` [DRY RUN] Would copy: ${record.source_file} -> ${filename}`); - exported++; - continue; - } - if (record.source_file && fs.existsSync(record.source_file)) { - fs.copyFileSync(record.source_file, outputPath); - } else { - console.warn(`[WARN] source file missing, writing decoded raw_data for ${record.serial_number}`); - if (!record.raw_data) { - skipped++; - continue; - } - fs.writeFileSync(outputPath, record.raw_data, 'utf8'); - } - pendingUpdates.push(record.id); - exported++; - - if (pendingUpdates.length >= BATCH_SIZE) { - await flushUpdates(pendingUpdates); - pendingUpdates = []; - process.stdout.write(`\r Exported: ${exported} / ${records.length}`); - } - continue; - } - - // Template-generated datasheet path. - const specs = getSpecs(specMap, record.model_number); - if (!specs) { - noSpecs++; - skipped++; - continue; - } - const txt = generateExactDatasheet(record, specs); - if (!txt) { - skipped++; - continue; - } - - if (dryRun) { - console.log(` [DRY RUN] Would write: ${filename}`); - exported++; - } else { - fs.writeFileSync(outputPath, txt, 'utf8'); - pendingUpdates.push(record.id); - exported++; - - // Batch commit - if (pendingUpdates.length >= BATCH_SIZE) { - await flushUpdates(pendingUpdates); - pendingUpdates = []; - process.stdout.write(`\r Exported: ${exported} / ${records.length}`); - } - } - } catch (err) { - console.error(`\n ERROR exporting ${record.serial_number}: ${err.message}`); - errors++; - } - } - - // Flush remaining updates - if (pendingUpdates.length > 0) { - await flushUpdates(pendingUpdates); - } - - console.log(`\n\n========================================`); - console.log(`Export Complete`); - console.log(`========================================`); - console.log(`Exported: ${exported}`); - console.log(`Skipped: ${skipped} (${noSpecs} missing specs)`); - console.log(`Errors: ${errors}`); - console.log(`End: ${new Date().toISOString()}`); - - await db.close(); - return { exported, skipped, errors }; -} - -async function flushUpdates(ids) { - const now = new Date().toISOString(); - await db.transaction(async (txClient) => { - for (const id of ids) { - await txClient.execute( - 'UPDATE test_records SET forweb_exported_at = $1 WHERE id = $2', - [now, id] - ); - } - }); -} - -// Export function for use by import.js (no db argument -- uses shared pool) -async function exportNewRecords(specMap, filePaths) { - if (!fs.existsSync(OUTPUT_DIR)) { - console.log(`[EXPORT] Output directory not found: ${OUTPUT_DIR}`); - return 0; - } - - const conditions = [`overall_result = 'PASS'`, `forweb_exported_at IS NULL`]; - const params = []; - let paramIdx = 0; - - if (filePaths && filePaths.length > 0) { - const placeholders = filePaths.map(() => { paramIdx++; return `$${paramIdx}`; }).join(','); - conditions.push(`source_file IN (${placeholders})`); - params.push(...filePaths); - } - - const sql = `SELECT * FROM test_records WHERE ${conditions.join(' AND ')}`; - const records = await db.query(sql, params); - if (records.length === 0) return 0; - - let exported = 0; - - await db.transaction(async (txClient) => { - for (const record of records) { - const filename = record.serial_number + '.TXT'; - const outputPath = path.join(OUTPUT_DIR, filename); - - try { - // VASLOG_ENG: verbatim copy, preserving original bytes. - if (record.log_type === 'VASLOG_ENG') { - if (record.source_file && fs.existsSync(record.source_file)) { - fs.copyFileSync(record.source_file, outputPath); - } else { - console.warn(`[WARN] source file missing, writing decoded raw_data for ${record.serial_number}`); - if (!record.raw_data) continue; - fs.writeFileSync(outputPath, record.raw_data, 'utf8'); - } - } else { - const specs = getSpecs(specMap, record.model_number); - if (!specs) continue; - const txt = generateExactDatasheet(record, specs); - if (!txt) continue; - fs.writeFileSync(outputPath, txt, 'utf8'); - } - - await txClient.execute( - 'UPDATE test_records SET forweb_exported_at = $1 WHERE id = $2', - [new Date().toISOString(), record.id] - ); - exported++; - } catch (err) { - console.error(`[EXPORT] Error writing ${filename}: ${err.message}`); - } - } - }); - - console.log(`[EXPORT] Generated ${exported} datasheet(s)`); - return exported; -} - -if (require.main === module) { - run().catch(console.error); -} - -module.exports = { exportNewRecords }; +/** + * Export Datasheets + * + * Generates TXT datasheets for unexported PASS records and writes them to X:\For_Web\. + * Updates forweb_exported_at after successful export. + * + * Usage: + * node export-datasheets.js Export all pending (batch mode) + * node export-datasheets.js --limit 100 Export up to 100 records + * node export-datasheets.js --file Export records matching specific source files + * node export-datasheets.js --serial 178439-1 Export a specific serial number + * node export-datasheets.js --dry-run Show what would be exported without writing + */ + +const fs = require('fs'); +const path = require('path'); +const db = require('./db'); + +const { loadAllSpecs, getSpecs } = require('../parsers/spec-reader'); +const { generateExactDatasheet } = require('../templates/datasheet-exact'); +const { sendFailureEmail } = require('../server/notify'); + +// Configuration +const OUTPUT_DIR = 'X:\\For_Web'; +const BATCH_SIZE = 500; + +async function run() { + const args = process.argv.slice(2); + const dryRun = args.includes('--dry-run'); + const limitIdx = args.indexOf('--limit'); + const limit = limitIdx >= 0 ? parseInt(args[limitIdx + 1]) : 0; + const serialIdx = args.indexOf('--serial'); + const serial = serialIdx >= 0 ? args[serialIdx + 1] : null; + const fileIdx = args.indexOf('--file'); + const files = fileIdx >= 0 ? args.slice(fileIdx + 1).filter(f => !f.startsWith('--')) : null; + + console.log('========================================'); + console.log('Datasheet Export'); + console.log('========================================'); + console.log(`Output: ${OUTPUT_DIR}`); + console.log(`Dry run: ${dryRun}`); + if (limit) console.log(`Limit: ${limit}`); + if (serial) console.log(`Serial: ${serial}`); + console.log(`Start: ${new Date().toISOString()}`); + + if (!dryRun && !fs.existsSync(OUTPUT_DIR)) { + console.error(`ERROR: Output directory does not exist: ${OUTPUT_DIR}`); + process.exit(1); + } + + console.log('\nLoading model specs...'); + const specMap = loadAllSpecs(); + + // Build query + const conditions = [`overall_result = 'PASS'`, `forweb_exported_at IS NULL`]; + const params = []; + let paramIdx = 0; + + if (serial) { + paramIdx++; + conditions.push(`serial_number = $${paramIdx}`); + params.push(serial); + } + + if (files && files.length > 0) { + const placeholders = files.map(() => { paramIdx++; return `$${paramIdx}`; }).join(','); + conditions.push(`source_file IN (${placeholders})`); + params.push(...files); + } + + let sql = `SELECT * FROM test_records WHERE ${conditions.join(' AND ')} ORDER BY test_date DESC`; + + if (limit) { + paramIdx++; + sql += ` LIMIT $${paramIdx}`; + params.push(limit); + } + + const records = await db.query(sql, params); + console.log(`\nFound ${records.length} records to export`); + + if (records.length === 0) { + console.log('Nothing to export.'); + await db.close(); + return { exported: 0, skipped: 0, errors: 0 }; + } + + let exported = 0; + let skipped = 0; + let errors = 0; + let noSpecs = 0; + let pendingUpdates = []; + + for (const record of records) { + try { + const filename = record.serial_number + '.TXT'; + const outputPath = path.join(OUTPUT_DIR, filename); + + // VASLOG_ENG: verbatim byte-for-byte copy of the original file. + // Using fs.copyFileSync avoids any utf-8 round-trip that would + // corrupt non-ASCII bytes (CP1252 etc.) in customer datasheets. + // Fall back to writing raw_data if the source file is gone. + if (record.log_type === 'VASLOG_ENG') { + if (dryRun) { + console.log(` [DRY RUN] Would copy: ${record.source_file} -> ${filename}`); + exported++; + continue; + } + if (record.source_file && fs.existsSync(record.source_file)) { + fs.copyFileSync(record.source_file, outputPath); + } else { + console.warn(`[WARN] source file missing, writing decoded raw_data for ${record.serial_number}`); + if (!record.raw_data) { + skipped++; + continue; + } + fs.writeFileSync(outputPath, record.raw_data, 'utf8'); + } + pendingUpdates.push(record.id); + exported++; + + if (pendingUpdates.length >= BATCH_SIZE) { + await flushUpdates(pendingUpdates); + pendingUpdates = []; + process.stdout.write(`\r Exported: ${exported} / ${records.length}`); + } + continue; + } + + // Template-generated datasheet path. + const specs = getSpecs(specMap, record.model_number); + if (!specs) { + noSpecs++; + skipped++; + continue; + } + const txt = generateExactDatasheet(record, specs); + if (!txt) { + skipped++; + continue; + } + + if (dryRun) { + console.log(` [DRY RUN] Would write: ${filename}`); + exported++; + } else { + fs.writeFileSync(outputPath, txt, 'utf8'); + pendingUpdates.push(record.id); + exported++; + + // Batch commit + if (pendingUpdates.length >= BATCH_SIZE) { + await flushUpdates(pendingUpdates); + pendingUpdates = []; + process.stdout.write(`\r Exported: ${exported} / ${records.length}`); + } + } + } catch (err) { + console.error(`\n ERROR exporting ${record.serial_number}: ${err.message}`); + errors++; + } + } + + // Flush remaining updates + if (pendingUpdates.length > 0) { + await flushUpdates(pendingUpdates); + } + + console.log(`\n\n========================================`); + console.log(`Export Complete`); + console.log(`========================================`); + console.log(`Exported: ${exported}`); + console.log(`Skipped: ${skipped} (${noSpecs} missing specs)`); + console.log(`Errors: ${errors}`); + console.log(`End: ${new Date().toISOString()}`); + + await db.close(); + return { exported, skipped, errors }; +} + +async function flushUpdates(ids) { + const now = new Date().toISOString(); + await db.transaction(async (txClient) => { + for (const id of ids) { + await txClient.execute( + 'UPDATE test_records SET forweb_exported_at = $1 WHERE id = $2', + [now, id] + ); + } + }); +} + +// Export function for use by import.js (no db argument -- uses shared pool) +async function exportNewRecords(specMap, filePaths) { + if (!fs.existsSync(OUTPUT_DIR)) { + console.log(`[EXPORT] Output directory not found: ${OUTPUT_DIR}`); + return 0; + } + + const conditions = [`overall_result = 'PASS'`, `forweb_exported_at IS NULL`]; + const params = []; + let paramIdx = 0; + + if (filePaths && filePaths.length > 0) { + const placeholders = filePaths.map(() => { paramIdx++; return `$${paramIdx}`; }).join(','); + conditions.push(`source_file IN (${placeholders})`); + params.push(...filePaths); + } + + const sql = `SELECT * FROM test_records WHERE ${conditions.join(' AND ')}`; + const records = await db.query(sql, params); + if (records.length === 0) return 0; + + let exported = 0; + + await db.transaction(async (txClient) => { + for (const record of records) { + const filename = record.serial_number + '.TXT'; + const outputPath = path.join(OUTPUT_DIR, filename); + + try { + // VASLOG_ENG: verbatim copy, preserving original bytes. + if (record.log_type === 'VASLOG_ENG') { + if (record.source_file && fs.existsSync(record.source_file)) { + fs.copyFileSync(record.source_file, outputPath); + } else { + console.warn(`[WARN] source file missing, writing decoded raw_data for ${record.serial_number}`); + if (!record.raw_data) continue; + fs.writeFileSync(outputPath, record.raw_data, 'utf8'); + } + } else { + const specs = getSpecs(specMap, record.model_number); + if (!specs) continue; + const txt = generateExactDatasheet(record, specs); + if (!txt) continue; + fs.writeFileSync(outputPath, txt, 'utf8'); + } + + await txClient.execute( + 'UPDATE test_records SET forweb_exported_at = $1 WHERE id = $2', + [new Date().toISOString(), record.id] + ); + exported++; + } catch (err) { + console.error(`[EXPORT] Error writing ${filename}: ${err.message}`); + } + } + }); + + console.log(`[EXPORT] Generated ${exported} datasheet(s)`); + return exported; +} + +if (require.main === module) { + run() + .then(({ exported, skipped, errors }) => { + if (errors > 0) { + return sendFailureEmail( + `[testdatadb] Datasheet export completed with ${errors} error(s)`, + `Export finished but ${errors} record(s) failed to write to the web directory.\n\nExported: ${exported}\nSkipped: ${skipped}\nErrors: ${errors}\n\nCheck the service log on AD2 for details.` + ); + } + }) + .catch(async (err) => { + console.error(err); + await sendFailureEmail( + '[testdatadb] Datasheet export failed', + `Export task crashed before completion.\n\nError: ${err.message}\n\nStack:\n${err.stack || '(none)'}` + ); + }); +} + +module.exports = { exportNewRecords }; diff --git a/projects/dataforth-dos/datasheet-pipeline/implementation/database/import.js b/projects/dataforth-dos/datasheet-pipeline/implementation/database/import.js index a90228b..8d15810 100644 --- a/projects/dataforth-dos/datasheet-pipeline/implementation/database/import.js +++ b/projects/dataforth-dos/datasheet-pipeline/implementation/database/import.js @@ -1,396 +1,407 @@ -/** - * Data Import Script - * Imports test data from DAT and SHT files into PostgreSQL database - */ - -const fs = require('fs'); -const path = require('path'); -const db = require('./db'); - -const { parseMultilineFile, extractTestStation } = require('../parsers/multiline'); -const { parseCsvFile } = require('../parsers/csvline'); -const { parseShtFile } = require('../parsers/shtfile'); -const { parseVaslogEngTxt } = require('../parsers/vaslog-engtxt'); - -// Data source paths -const TEST_PATH = 'C:/Shares/test'; -const RECOVERY_PATH = 'C:/Shares/Recovery-TEST'; -const HISTLOGS_PATH = path.join(TEST_PATH, 'Ate/HISTLOGS'); - -// Log types and their parsers. -// NOTE: `recursive` defaults to TRUE when absent (walk subfolders by default, -// preserving pre-existing production behavior for DSCLOG/5BLOG/8BLOG/PWRLOG/ -// SCTLOG/7BLOG). Set it to FALSE explicitly on VASLOG so the .DAT walk does -// NOT descend into the "VASLOG - Engineering Tested" subfolder (belt-and- -// suspenders: the .DAT glob wouldn't match .txt, but be explicit anyway). -// VASLOG_ENG also sets recursive:false -- the eng-tested dir is flat. -const LOG_TYPES = { - 'DSCLOG': { parser: 'multiline', ext: '.DAT' }, - '5BLOG': { parser: 'multiline', ext: '.DAT' }, - '8BLOG': { parser: 'multiline', ext: '.DAT' }, - 'PWRLOG': { parser: 'multiline', ext: '.DAT' }, - 'SCTLOG': { parser: 'multiline', ext: '.DAT' }, - 'VASLOG': { parser: 'multiline', ext: '.DAT', recursive: false }, - '7BLOG': { parser: 'csvline', ext: '.DAT' }, - // Engineering-tested SCMHVAS pre-rendered datasheets live under VASLOG/"VASLOG - Engineering Tested"/ - 'VASLOG_ENG': { parser: 'vaslog-engtxt', ext: '.txt', dir: 'VASLOG/VASLOG - Engineering Tested', recursive: false } -}; - -// Find all files of a specific type in a directory -function findFiles(dir, pattern, recursive = true) { - const results = []; - - try { - if (!fs.existsSync(dir)) return results; - - const items = fs.readdirSync(dir, { withFileTypes: true }); - - for (const item of items) { - const fullPath = path.join(dir, item.name); - - if (item.isDirectory() && recursive) { - results.push(...findFiles(fullPath, pattern, recursive)); - } else if (item.isFile()) { - if (pattern.test(item.name)) { - results.push(fullPath); - } - } - } - } catch (err) { - // Ignore permission errors - } - - return results; -} - -// Parse records from a file (sync -- file I/O only) -function parseFile(filePath, logType, parser) { - const testStation = extractTestStation(filePath); - - switch (parser) { - case 'multiline': - return parseMultilineFile(filePath, logType, testStation); - case 'csvline': - return parseCsvFile(filePath, testStation); - case 'shtfile': - return parseShtFile(filePath, testStation); - case 'vaslog-engtxt': - return parseVaslogEngTxt(filePath, testStation); - default: - return []; - } -} - -// Batch insert records into PostgreSQL -async function insertBatch(txClient, records) { - let imported = 0; - for (const record of records) { - try { - const result = await txClient.execute( - `INSERT INTO test_records - (log_type, model_number, serial_number, test_date, test_station, overall_result, raw_data, source_file) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) - ON CONFLICT (log_type, model_number, serial_number, test_date, test_station) - DO UPDATE SET raw_data = EXCLUDED.raw_data, overall_result = EXCLUDED.overall_result`, - [ - record.log_type, - record.model_number, - record.serial_number, - record.test_date, - record.test_station, - record.overall_result, - record.raw_data, - record.source_file - ] - ); - if (result.rowCount > 0) imported++; - } catch (err) { - // Constraint error - skip - } - } - return imported; -} - -// Import records from a file -async function importFile(txClient, filePath, logType, parser) { - let records = []; - - try { - records = parseFile(filePath, logType, parser); - const imported = await insertBatch(txClient, records); - return { total: records.length, imported }; - } catch (err) { - console.error(`Error importing ${filePath}: ${err.message}`); - return { total: 0, imported: 0 }; - } -} - -// Import from HISTLOGS (master consolidated logs) -async function importHistlogs(txClient) { - console.log('\n=== Importing from HISTLOGS ==='); - - let totalImported = 0; - let totalRecords = 0; - - for (const [logType, config] of Object.entries(LOG_TYPES)) { - const subdir = config.dir || logType; - const logDir = path.join(HISTLOGS_PATH, subdir); - - if (!fs.existsSync(logDir)) { - console.log(` ${logType}: directory not found`); - continue; - } - - const files = findFiles(logDir, new RegExp(`\\${config.ext}$`, 'i'), config.recursive !== false); - console.log(` ${logType}: found ${files.length} files`); - - for (const file of files) { - const { total, imported } = await importFile(txClient, file, logType, config.parser); - totalRecords += total; - totalImported += imported; - } - } - - console.log(` HISTLOGS total: ${totalImported} records imported (${totalRecords} parsed)`); - return totalImported; -} - -// Import from test station logs -async function importStationLogs(txClient, basePath, label) { - console.log(`\n=== Importing from ${label} ===`); - - let totalImported = 0; - let totalRecords = 0; - - const stationPattern = /^TS-\d+[LR]?$/i; - let stations = []; - - try { - const items = fs.readdirSync(basePath, { withFileTypes: true }); - stations = items - .filter(i => i.isDirectory() && stationPattern.test(i.name)) - .map(i => i.name); - } catch (err) { - console.log(` Error reading ${basePath}: ${err.message}`); - return 0; - } - - console.log(` Found stations: ${stations.join(', ')}`); - - for (const station of stations) { - const logsDir = path.join(basePath, station, 'LOGS'); - - if (!fs.existsSync(logsDir)) continue; - - for (const [logType, config] of Object.entries(LOG_TYPES)) { - const subdir = config.dir || logType; - const logDir = path.join(logsDir, subdir); - - if (!fs.existsSync(logDir)) continue; - - const files = findFiles(logDir, new RegExp(`\\${config.ext}$`, 'i'), config.recursive !== false); - - for (const file of files) { - const { total, imported } = await importFile(txClient, file, logType, config.parser); - totalRecords += total; - totalImported += imported; - } - } - } - - // Also import SHT files - const shtFiles = findFiles(basePath, /\.SHT$/i, true); - console.log(` Found ${shtFiles.length} SHT files`); - - for (const file of shtFiles) { - const { total, imported } = await importFile(txClient, file, 'SHT', 'shtfile'); - totalRecords += total; - totalImported += imported; - } - - console.log(` ${label} total: ${totalImported} records imported (${totalRecords} parsed)`); - return totalImported; -} - -// Import from Recovery-TEST backups (newest first) -async function importRecoveryBackups(txClient) { - console.log('\n=== Importing from Recovery-TEST backups ==='); - - if (!fs.existsSync(RECOVERY_PATH)) { - console.log(' Recovery-TEST directory not found'); - return 0; - } - - const backups = fs.readdirSync(RECOVERY_PATH, { withFileTypes: true }) - .filter(i => i.isDirectory() && /^\d{2}-\d{2}-\d{2}$/.test(i.name)) - .map(i => i.name) - .sort() - .reverse(); - - console.log(` Found backup dates: ${backups.join(', ')}`); - - let totalImported = 0; - - for (const backup of backups) { - const backupPath = path.join(RECOVERY_PATH, backup); - const imported = await importStationLogs(txClient, backupPath, `Recovery-TEST/${backup}`); - totalImported += imported; - } - - return totalImported; -} - -// Main import function -async function runImport() { - console.log('========================================'); - console.log('Test Data Import'); - console.log('========================================'); - console.log(`Start time: ${new Date().toISOString()}`); - - let grandTotal = 0; - - await db.transaction(async (txClient) => { - grandTotal += await importHistlogs(txClient); - grandTotal += await importRecoveryBackups(txClient); - grandTotal += await importStationLogs(txClient, TEST_PATH, 'test'); - }); - - const stats = await db.queryOne('SELECT COUNT(*) as count FROM test_records'); - - console.log('\n========================================'); - console.log('Import Complete'); - console.log('========================================'); - console.log(`Total records in database: ${stats.count}`); - console.log(`End time: ${new Date().toISOString()}`); - - await db.close(); -} - -// Import a single file (for incremental imports from sync) -async function importSingleFile(filePath) { - console.log(`Importing: ${filePath}`); - - let logType = null; - let parser = null; - - // VASLOG_ENG subpath must be checked before VASLOG (substring overlap). - if (filePath.includes('VASLOG - Engineering Tested')) { - logType = 'VASLOG_ENG'; - parser = LOG_TYPES['VASLOG_ENG'].parser; - } else { - for (const [type, config] of Object.entries(LOG_TYPES)) { - if (type === 'VASLOG_ENG') continue; - if (filePath.includes(type)) { - logType = type; - parser = config.parser; - break; - } - } - } - - if (!logType) { - if (/\.SHT$/i.test(filePath)) { - logType = 'SHT'; - parser = 'shtfile'; - } else { - console.log(` Unknown log type for: ${filePath}`); - return { total: 0, imported: 0 }; - } - } - - let result; - await db.transaction(async (txClient) => { - result = await importFile(txClient, filePath, logType, parser); - }); - - console.log(` Imported ${result.imported} of ${result.total} records`); - return result; -} - -// Import multiple files (for batch incremental imports) -async function importFiles(filePaths) { - console.log(`\n========================================`); - console.log(`Incremental Import: ${filePaths.length} files`); - console.log(`========================================`); - - let totalImported = 0; - let totalRecords = 0; - - await db.transaction(async (txClient) => { - for (const filePath of filePaths) { - let logType = null; - let parser = null; - - // VASLOG_ENG subpath must be checked before the generic loop -- - // otherwise `includes('VASLOG')` hits first and the eng .txt gets - // dispatched to the multiline parser. Mirror importSingleFile(). - if (filePath.includes('VASLOG - Engineering Tested')) { - logType = 'VASLOG_ENG'; - parser = LOG_TYPES['VASLOG_ENG'].parser; - } else { - for (const [type, config] of Object.entries(LOG_TYPES)) { - if (type === 'VASLOG_ENG') continue; - if (filePath.includes(type)) { - logType = type; - parser = config.parser; - break; - } - } - } - - if (!logType) { - if (/\.SHT$/i.test(filePath)) { - logType = 'SHT'; - parser = 'shtfile'; - } else { - console.log(` Skipping unknown type: ${filePath}`); - continue; - } - } - - const { total, imported } = await importFile(txClient, filePath, logType, parser); - totalRecords += total; - totalImported += imported; - console.log(` ${path.basename(filePath)}: ${imported}/${total} records`); - } - }); - - console.log(`\nTotal: ${totalImported} records imported (${totalRecords} parsed)`); - - // Export datasheets for newly imported records - if (totalImported > 0) { - try { - const { loadAllSpecs } = require('../parsers/spec-reader'); - const { exportNewRecords } = require('./export-datasheets'); - const specMap = loadAllSpecs(); - await exportNewRecords(specMap, filePaths); - } catch (err) { - console.error(`[EXPORT] Datasheet export failed: ${err.message}`); - } - } - - return { total: totalRecords, imported: totalImported }; -} - -// Run if called directly -if (require.main === module) { - const args = process.argv.slice(2); - - if (args.length > 0 && args[0] === '--file') { - const files = args.slice(1); - if (files.length === 0) { - console.log('Usage: node import.js --file [file2] ...'); - process.exit(1); - } - importFiles(files).then(() => db.close()).catch(console.error); - } else if (args.length > 0 && args[0] === '--help') { - console.log('Usage:'); - console.log(' node import.js Full import from all sources'); - console.log(' node import.js --file Import specific file(s)'); - process.exit(0); - } else { - runImport().catch(console.error); - } -} - -module.exports = { runImport, importSingleFile, importFiles }; +/** + * Data Import Script + * Imports test data from DAT and SHT files into PostgreSQL database + */ + +const fs = require('fs'); +const path = require('path'); +const db = require('./db'); + +const { parseMultilineFile, extractTestStation } = require('../parsers/multiline'); +const { parseCsvFile } = require('../parsers/csvline'); +const { parseShtFile } = require('../parsers/shtfile'); +const { parseVaslogEngTxt } = require('../parsers/vaslog-engtxt'); +const { sendFailureEmail } = require('../server/notify'); + +// Data source paths +const TEST_PATH = 'C:/Shares/test'; +const RECOVERY_PATH = 'C:/Shares/Recovery-TEST'; +const HISTLOGS_PATH = path.join(TEST_PATH, 'Ate/HISTLOGS'); + +// Log types and their parsers. +// NOTE: `recursive` defaults to TRUE when absent (walk subfolders by default, +// preserving pre-existing production behavior for DSCLOG/5BLOG/8BLOG/PWRLOG/ +// SCTLOG/7BLOG). Set it to FALSE explicitly on VASLOG so the .DAT walk does +// NOT descend into the "VASLOG - Engineering Tested" subfolder (belt-and- +// suspenders: the .DAT glob wouldn't match .txt, but be explicit anyway). +// VASLOG_ENG also sets recursive:false -- the eng-tested dir is flat. +const LOG_TYPES = { + 'DSCLOG': { parser: 'multiline', ext: '.DAT' }, + '5BLOG': { parser: 'multiline', ext: '.DAT' }, + '8BLOG': { parser: 'multiline', ext: '.DAT' }, + 'PWRLOG': { parser: 'multiline', ext: '.DAT' }, + 'SCTLOG': { parser: 'multiline', ext: '.DAT' }, + 'VASLOG': { parser: 'multiline', ext: '.DAT', recursive: false }, + '7BLOG': { parser: 'csvline', ext: '.DAT' }, + // Engineering-tested SCMHVAS pre-rendered datasheets live under VASLOG/"VASLOG - Engineering Tested"/ + 'VASLOG_ENG': { parser: 'vaslog-engtxt', ext: '.txt', dir: 'VASLOG/VASLOG - Engineering Tested', recursive: false } +}; + +// Find all files of a specific type in a directory +function findFiles(dir, pattern, recursive = true) { + const results = []; + + try { + if (!fs.existsSync(dir)) return results; + + const items = fs.readdirSync(dir, { withFileTypes: true }); + + for (const item of items) { + const fullPath = path.join(dir, item.name); + + if (item.isDirectory() && recursive) { + results.push(...findFiles(fullPath, pattern, recursive)); + } else if (item.isFile()) { + if (pattern.test(item.name)) { + results.push(fullPath); + } + } + } + } catch (err) { + // Ignore permission errors + } + + return results; +} + +// Parse records from a file (sync -- file I/O only) +function parseFile(filePath, logType, parser) { + const testStation = extractTestStation(filePath); + + switch (parser) { + case 'multiline': + return parseMultilineFile(filePath, logType, testStation); + case 'csvline': + return parseCsvFile(filePath, testStation); + case 'shtfile': + return parseShtFile(filePath, testStation); + case 'vaslog-engtxt': + return parseVaslogEngTxt(filePath, testStation); + default: + return []; + } +} + +// Batch insert records into PostgreSQL +async function insertBatch(txClient, records) { + let imported = 0; + for (const record of records) { + try { + const result = await txClient.execute( + `INSERT INTO test_records + (log_type, model_number, serial_number, test_date, test_station, overall_result, raw_data, source_file) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) + ON CONFLICT (log_type, model_number, serial_number, test_date, test_station) + DO UPDATE SET raw_data = EXCLUDED.raw_data, overall_result = EXCLUDED.overall_result`, + [ + record.log_type, + record.model_number, + record.serial_number, + record.test_date, + record.test_station, + record.overall_result, + record.raw_data, + record.source_file + ] + ); + if (result.rowCount > 0) imported++; + } catch (err) { + // Constraint error - skip + } + } + return imported; +} + +// Import records from a file +async function importFile(txClient, filePath, logType, parser) { + let records = []; + + try { + records = parseFile(filePath, logType, parser); + const imported = await insertBatch(txClient, records); + return { total: records.length, imported }; + } catch (err) { + console.error(`Error importing ${filePath}: ${err.message}`); + return { total: 0, imported: 0 }; + } +} + +// Import from HISTLOGS (master consolidated logs) +async function importHistlogs(txClient) { + console.log('\n=== Importing from HISTLOGS ==='); + + let totalImported = 0; + let totalRecords = 0; + + for (const [logType, config] of Object.entries(LOG_TYPES)) { + const subdir = config.dir || logType; + const logDir = path.join(HISTLOGS_PATH, subdir); + + if (!fs.existsSync(logDir)) { + console.log(` ${logType}: directory not found`); + continue; + } + + const files = findFiles(logDir, new RegExp(`\\${config.ext}$`, 'i'), config.recursive !== false); + console.log(` ${logType}: found ${files.length} files`); + + for (const file of files) { + const { total, imported } = await importFile(txClient, file, logType, config.parser); + totalRecords += total; + totalImported += imported; + } + } + + console.log(` HISTLOGS total: ${totalImported} records imported (${totalRecords} parsed)`); + return totalImported; +} + +// Import from test station logs +async function importStationLogs(txClient, basePath, label) { + console.log(`\n=== Importing from ${label} ===`); + + let totalImported = 0; + let totalRecords = 0; + + const stationPattern = /^TS-\d+[LR]?$/i; + let stations = []; + + try { + const items = fs.readdirSync(basePath, { withFileTypes: true }); + stations = items + .filter(i => i.isDirectory() && stationPattern.test(i.name)) + .map(i => i.name); + } catch (err) { + console.log(` Error reading ${basePath}: ${err.message}`); + return 0; + } + + console.log(` Found stations: ${stations.join(', ')}`); + + for (const station of stations) { + const logsDir = path.join(basePath, station, 'LOGS'); + + if (!fs.existsSync(logsDir)) continue; + + for (const [logType, config] of Object.entries(LOG_TYPES)) { + const subdir = config.dir || logType; + const logDir = path.join(logsDir, subdir); + + if (!fs.existsSync(logDir)) continue; + + const files = findFiles(logDir, new RegExp(`\\${config.ext}$`, 'i'), config.recursive !== false); + + for (const file of files) { + const { total, imported } = await importFile(txClient, file, logType, config.parser); + totalRecords += total; + totalImported += imported; + } + } + } + + // Also import SHT files + const shtFiles = findFiles(basePath, /\.SHT$/i, true); + console.log(` Found ${shtFiles.length} SHT files`); + + for (const file of shtFiles) { + const { total, imported } = await importFile(txClient, file, 'SHT', 'shtfile'); + totalRecords += total; + totalImported += imported; + } + + console.log(` ${label} total: ${totalImported} records imported (${totalRecords} parsed)`); + return totalImported; +} + +// Import from Recovery-TEST backups (newest first) +async function importRecoveryBackups(txClient) { + console.log('\n=== Importing from Recovery-TEST backups ==='); + + if (!fs.existsSync(RECOVERY_PATH)) { + console.log(' Recovery-TEST directory not found'); + return 0; + } + + const backups = fs.readdirSync(RECOVERY_PATH, { withFileTypes: true }) + .filter(i => i.isDirectory() && /^\d{2}-\d{2}-\d{2}$/.test(i.name)) + .map(i => i.name) + .sort() + .reverse(); + + console.log(` Found backup dates: ${backups.join(', ')}`); + + let totalImported = 0; + + for (const backup of backups) { + const backupPath = path.join(RECOVERY_PATH, backup); + const imported = await importStationLogs(txClient, backupPath, `Recovery-TEST/${backup}`); + totalImported += imported; + } + + return totalImported; +} + +// Main import function +async function runImport() { + console.log('========================================'); + console.log('Test Data Import'); + console.log('========================================'); + console.log(`Start time: ${new Date().toISOString()}`); + + let grandTotal = 0; + + await db.transaction(async (txClient) => { + grandTotal += await importHistlogs(txClient); + grandTotal += await importRecoveryBackups(txClient); + grandTotal += await importStationLogs(txClient, TEST_PATH, 'test'); + }); + + const stats = await db.queryOne('SELECT COUNT(*) as count FROM test_records'); + + console.log('\n========================================'); + console.log('Import Complete'); + console.log('========================================'); + console.log(`Total records in database: ${stats.count}`); + console.log(`End time: ${new Date().toISOString()}`); + + await db.close(); +} + +// Import a single file (for incremental imports from sync) +async function importSingleFile(filePath) { + console.log(`Importing: ${filePath}`); + + let logType = null; + let parser = null; + + // VASLOG_ENG subpath must be checked before VASLOG (substring overlap). + if (filePath.includes('VASLOG - Engineering Tested')) { + logType = 'VASLOG_ENG'; + parser = LOG_TYPES['VASLOG_ENG'].parser; + } else { + for (const [type, config] of Object.entries(LOG_TYPES)) { + if (type === 'VASLOG_ENG') continue; + if (filePath.includes(type)) { + logType = type; + parser = config.parser; + break; + } + } + } + + if (!logType) { + if (/\.SHT$/i.test(filePath)) { + logType = 'SHT'; + parser = 'shtfile'; + } else { + console.log(` Unknown log type for: ${filePath}`); + return { total: 0, imported: 0 }; + } + } + + let result; + await db.transaction(async (txClient) => { + result = await importFile(txClient, filePath, logType, parser); + }); + + console.log(` Imported ${result.imported} of ${result.total} records`); + return result; +} + +// Import multiple files (for batch incremental imports) +async function importFiles(filePaths) { + console.log(`\n========================================`); + console.log(`Incremental Import: ${filePaths.length} files`); + console.log(`========================================`); + + let totalImported = 0; + let totalRecords = 0; + + await db.transaction(async (txClient) => { + for (const filePath of filePaths) { + let logType = null; + let parser = null; + + // VASLOG_ENG subpath must be checked before the generic loop -- + // otherwise `includes('VASLOG')` hits first and the eng .txt gets + // dispatched to the multiline parser. Mirror importSingleFile(). + if (filePath.includes('VASLOG - Engineering Tested')) { + logType = 'VASLOG_ENG'; + parser = LOG_TYPES['VASLOG_ENG'].parser; + } else { + for (const [type, config] of Object.entries(LOG_TYPES)) { + if (type === 'VASLOG_ENG') continue; + if (filePath.includes(type)) { + logType = type; + parser = config.parser; + break; + } + } + } + + if (!logType) { + if (/\.SHT$/i.test(filePath)) { + logType = 'SHT'; + parser = 'shtfile'; + } else { + console.log(` Skipping unknown type: ${filePath}`); + continue; + } + } + + const { total, imported } = await importFile(txClient, filePath, logType, parser); + totalRecords += total; + totalImported += imported; + console.log(` ${path.basename(filePath)}: ${imported}/${total} records`); + } + }); + + console.log(`\nTotal: ${totalImported} records imported (${totalRecords} parsed)`); + + // Export datasheets for newly imported records + if (totalImported > 0) { + try { + const { loadAllSpecs } = require('../parsers/spec-reader'); + const { exportNewRecords } = require('./export-datasheets'); + const specMap = loadAllSpecs(); + await exportNewRecords(specMap, filePaths); + } catch (err) { + console.error(`[EXPORT] Datasheet export failed: ${err.message}`); + await sendFailureEmail( + '[testdatadb] Datasheet export failed after import', + `Export step failed after importing ${totalImported} record(s).\n\nError: ${err.message}\n\nStack:\n${err.stack || '(none)'}` + ); + } + } + + return { total: totalRecords, imported: totalImported }; +} + +// Run if called directly +if (require.main === module) { + const args = process.argv.slice(2); + + if (args.length > 0 && args[0] === '--file') { + const files = args.slice(1); + if (files.length === 0) { + console.log('Usage: node import.js --file [file2] ...'); + process.exit(1); + } + importFiles(files).then(() => db.close()).catch(console.error); + } else if (args.length > 0 && args[0] === '--help') { + console.log('Usage:'); + console.log(' node import.js Full import from all sources'); + console.log(' node import.js --file Import specific file(s)'); + process.exit(0); + } else { + runImport().catch(async (err) => { + console.error(err); + await sendFailureEmail( + '[testdatadb] DB import failed', + `The scheduled import job crashed.\n\nError: ${err.message}\n\nStack:\n${err.stack || '(none)'}` + ); + }); + } +} + +module.exports = { runImport, importSingleFile, importFiles }; diff --git a/projects/dataforth-dos/datasheet-pipeline/implementation/deploy-to-ad2.py b/projects/dataforth-dos/datasheet-pipeline/implementation/deploy-to-ad2.py index 086e788..3d927c9 100644 --- a/projects/dataforth-dos/datasheet-pipeline/implementation/deploy-to-ad2.py +++ b/projects/dataforth-dos/datasheet-pipeline/implementation/deploy-to-ad2.py @@ -1,224 +1,300 @@ -""" -Deploy staged pipeline changes to AD2:C:\\Shares\\testdatadb\\. - -Backs up each existing target to .bak-YYYYMMDD before overwriting. -Fails if a target file does not exist on AD2 (excluding brand-new files -declared in NEW_FILES below). - -Usage: - python deploy-to-ad2.py --dry-run - python deploy-to-ad2.py - -Credentials: fetched at runtime from the SOPS vault -(clients/dataforth/ad2.sops.yaml -> credentials.password). No hardcoded -password; no env-var / prompt fallback. Fails loud if the vault read fails. -""" -from __future__ import annotations - -import argparse -import datetime -import os -import subprocess -import sys - -import paramiko - -HOST = '192.168.0.6' -USER = 'sysadmin' - -VAULT_SH = 'D:/vault/scripts/vault.sh' -VAULT_ENTRY = 'clients/dataforth/ad2.sops.yaml' -VAULT_FIELD = 'credentials.password' - - -def get_ad2_password() -> str: - """Fetch the AD2 sysadmin password from the SOPS vault. - - Fails loud (raises) on any error: missing vault, decryption failure, - empty value. Do NOT fall back to env vars or prompts -- per CLAUDE.md - deploy scripts must not hold credentials. - """ - try: - result = subprocess.run( - ['bash', VAULT_SH, 'get-field', VAULT_ENTRY, VAULT_FIELD], - capture_output=True, text=True, timeout=30, check=False, - ) - except FileNotFoundError as e: - raise RuntimeError( - f'[FAIL] vault helper not runnable: {VAULT_SH} ({e})' - ) from e - except subprocess.TimeoutExpired as e: - raise RuntimeError( - f'[FAIL] vault read timed out after 30s for {VAULT_ENTRY}' - ) from e - - if result.returncode != 0: - stderr = (result.stderr or '').strip() - raise RuntimeError( - f'[FAIL] vault read failed (rc={result.returncode}) for ' - f'{VAULT_ENTRY}:{VAULT_FIELD}: {stderr}' - ) - - pwd = (result.stdout or '').strip() - if not pwd: - raise RuntimeError( - f'[FAIL] vault returned empty value for {VAULT_ENTRY}:{VAULT_FIELD}' - ) - return pwd - -REMOTE_ROOT = 'C:/Shares/testdatadb' -LOCAL_ROOT = os.path.dirname(os.path.abspath(__file__)) - -# --------------------------------------------------------------------------- -# Deployment file lists. Each list has different semantics: -# -# UPDATE_FILES -- file MUST already exist on AD2. Backup-then-overwrite. -# Fails loud if the remote file is missing (that's a drift -# signal -- something changed on the box we didn't expect). -# -# NEW_FILES -- file must NOT already exist on AD2. Creates it. -# Fails loud if the remote file is already present (we would -# otherwise silently clobber something we didn't back up). -# --------------------------------------------------------------------------- - -# Files that already exist on AD2 and will be backed up + overwritten. -UPDATE_FILES = [ - ('parsers/spec-reader.js', 'parsers/spec-reader.js'), - ('templates/datasheet-exact.js', 'templates/datasheet-exact.js'), - ('database/import.js', 'database/import.js'), - ('database/export-datasheets.js', 'database/export-datasheets.js'), -] - -# Files that do NOT yet exist on AD2 and must be created fresh. -NEW_FILES = [ - ('parsers/vaslog-engtxt.js', 'parsers/vaslog-engtxt.js'), -] - - -def connect() -> paramiko.SSHClient: - pwd = get_ad2_password() - c = paramiko.SSHClient() - c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - c.connect( - HOST, username=USER, password=pwd, - timeout=15, look_for_keys=False, allow_agent=False, banner_timeout=30, - ) - return c - - -def remote_exists(sftp: paramiko.SFTPClient, path: str) -> bool: - try: - sftp.stat(path) - return True - except IOError: - return False - - -def to_remote(rel: str) -> str: - return f'{REMOTE_ROOT}/{rel}' - - -def backup_and_copy(sftp: paramiko.SFTPClient, ssh: paramiko.SSHClient, - local_rel: str, remote_rel: str, dry_run: bool, stamp: str) -> None: - local_path = os.path.join(LOCAL_ROOT, local_rel.replace('/', os.sep)) - remote_path = to_remote(remote_rel) - backup_path = f'{remote_path}.bak-{stamp}' - - if not os.path.isfile(local_path): - raise FileNotFoundError(f'[FAIL] local file missing: {local_path}') - - if not remote_exists(sftp, remote_path): - raise FileNotFoundError(f'[FAIL] remote file missing on AD2: {remote_path}') - - print(f'[INFO] {remote_rel}') - if dry_run: - print(f' would back up to: {backup_path}') - print(f' would upload: {local_path} -> {remote_path}') - return - - # Backup via SFTP copy (read + re-upload). Paramiko has no server-side copy. - with sftp.open(remote_path, 'rb') as src: - data = src.read() - with sftp.open(backup_path, 'wb') as dst: - dst.write(data) - print(f' backup: {backup_path} ({len(data)} bytes)') - - sftp.put(local_path, remote_path) - size = os.path.getsize(local_path) - print(f' uploaded: {local_path} -> {remote_path} ({size} bytes)') - - -def create_new(sftp: paramiko.SFTPClient, local_rel: str, remote_rel: str, - dry_run: bool) -> None: - """Create a file that is expected to be NEW on AD2. - - Fails loud if the remote file already exists -- NEW_FILES declares this - is a brand-new file, so pre-existence is a drift signal. If a previous - deploy partially ran, clean up manually or move the entry to - UPDATE_FILES. - """ - local_path = os.path.join(LOCAL_ROOT, local_rel.replace('/', os.sep)) - remote_path = to_remote(remote_rel) - - if not os.path.isfile(local_path): - raise FileNotFoundError(f'[FAIL] local file missing: {local_path}') - - print(f'[INFO] {remote_rel} (NEW)') - - if remote_exists(sftp, remote_path): - raise FileExistsError( - f'[FAIL] remote target already exists but is declared NEW: {remote_path} ' - f'-- move to UPDATE_FILES or remove remote manually' - ) - - if dry_run: - print(f' would create: {local_path} -> {remote_path}') - return - - sftp.put(local_path, remote_path) - size = os.path.getsize(local_path) - print(f' created: {remote_path} ({size} bytes)') - - -def main() -> int: - ap = argparse.ArgumentParser(description=__doc__) - ap.add_argument('--dry-run', action='store_true', help='print actions without writing') - args = ap.parse_args() - - stamp = datetime.date.today().strftime('%Y%m%d') - - print('=' * 72) - print('Deploy staged pipeline changes to AD2') - print('=' * 72) - print(f'Host: {HOST}') - print(f'Remote root: {REMOTE_ROOT}') - print(f'Local root: {LOCAL_ROOT}') - print(f'Dry run: {args.dry_run}') - print(f'Backup tag: .bak-{stamp}') - print('') - - ssh = connect() - try: - sftp = ssh.open_sftp() - try: - for local_rel, remote_rel in UPDATE_FILES: - backup_and_copy(sftp, ssh, local_rel, remote_rel, args.dry_run, stamp) - - for local_rel, remote_rel in NEW_FILES: - create_new(sftp, local_rel, remote_rel, args.dry_run) - finally: - sftp.close() - finally: - ssh.close() - - print('') - print('[OK] done' if not args.dry_run else '[OK] dry-run complete (no changes made)') - return 0 - - -if __name__ == '__main__': - try: - sys.exit(main()) - except Exception as e: - print(f'[FAIL] {e}', file=sys.stderr) - sys.exit(1) +""" +Deploy staged pipeline changes to AD2:C:\\Shares\\testdatadb\\. + +Backs up each existing target to .bak-YYYYMMDD before overwriting. +Fails if a target file does not exist on AD2 (excluding brand-new files +declared in NEW_FILES below). + +Usage: + python deploy-to-ad2.py --dry-run + python deploy-to-ad2.py + +Credentials: fetched at runtime from the SOPS vault +(clients/dataforth/ad2.sops.yaml -> credentials.password). No hardcoded +password; no env-var / prompt fallback. Fails loud if the vault read fails. +""" +from __future__ import annotations + +import argparse +import datetime +import os +import subprocess +import sys + +import paramiko + +HOST = '192.168.0.6' +USER = 'sysadmin' + +VAULT_SH = 'D:/vault/scripts/vault.sh' +VAULT_ENTRY = 'clients/dataforth/ad2.sops.yaml' +VAULT_FIELD = 'credentials.password' + +SMTP_VAULT_ENTRY = 'clients/dataforth/m365.sops.yaml' +SMTP_VAULT_FIELD = 'credentials.password' +SMTP_USER = 'sysadmin@dataforth.com' +SMTP_HOST = 'smtp.office365.com' +SMTP_PORT = 587 +NOTIFY_TO = 'mike@azcomputerguru.com' + + +def get_ad2_password() -> str: + """Fetch the AD2 sysadmin password from the SOPS vault. + + Fails loud (raises) on any error: missing vault, decryption failure, + empty value. Do NOT fall back to env vars or prompts -- per CLAUDE.md + deploy scripts must not hold credentials. + """ + try: + result = subprocess.run( + ['bash', VAULT_SH, 'get-field', VAULT_ENTRY, VAULT_FIELD], + capture_output=True, text=True, timeout=30, check=False, + ) + except FileNotFoundError as e: + raise RuntimeError( + f'[FAIL] vault helper not runnable: {VAULT_SH} ({e})' + ) from e + except subprocess.TimeoutExpired as e: + raise RuntimeError( + f'[FAIL] vault read timed out after 30s for {VAULT_ENTRY}' + ) from e + + if result.returncode != 0: + stderr = (result.stderr or '').strip() + raise RuntimeError( + f'[FAIL] vault read failed (rc={result.returncode}) for ' + f'{VAULT_ENTRY}:{VAULT_FIELD}: {stderr}' + ) + + pwd = (result.stdout or '').strip() + if not pwd: + raise RuntimeError( + f'[FAIL] vault returned empty value for {VAULT_ENTRY}:{VAULT_FIELD}' + ) + return pwd + + +def get_smtp_password() -> str: + try: + result = subprocess.run( + ['bash', VAULT_SH, 'get-field', SMTP_VAULT_ENTRY, SMTP_VAULT_FIELD], + capture_output=True, text=True, timeout=30, check=False, + ) + except (FileNotFoundError, subprocess.TimeoutExpired) as e: + raise RuntimeError(f'[FAIL] vault read failed for SMTP creds: {e}') from e + + if result.returncode != 0: + raise RuntimeError( + f'[FAIL] vault read failed (rc={result.returncode}) for ' + f'{SMTP_VAULT_ENTRY}:{SMTP_VAULT_FIELD}: {result.stderr.strip()}' + ) + + pwd = (result.stdout or '').strip().replace('\\', '') + if not pwd: + raise RuntimeError(f'[FAIL] vault returned empty SMTP password') + return pwd + +REMOTE_ROOT = 'C:/Shares/testdatadb' +LOCAL_ROOT = os.path.dirname(os.path.abspath(__file__)) + +# --------------------------------------------------------------------------- +# Deployment file lists. Each list has different semantics: +# +# UPDATE_FILES -- file MUST already exist on AD2. Backup-then-overwrite. +# Fails loud if the remote file is missing (that's a drift +# signal -- something changed on the box we didn't expect). +# +# NEW_FILES -- file must NOT already exist on AD2. Creates it. +# Fails loud if the remote file is already present (we would +# otherwise silently clobber something we didn't back up). +# --------------------------------------------------------------------------- + +# Files that already exist on AD2 and will be backed up + overwritten. +UPDATE_FILES = [ + ('parsers/spec-reader.js', 'parsers/spec-reader.js'), + ('templates/datasheet-exact.js', 'templates/datasheet-exact.js'), + ('database/import.js', 'database/import.js'), + ('database/export-datasheets.js', 'database/export-datasheets.js'), +] + +# Files that do NOT yet exist on AD2 and must be created fresh. +NEW_FILES = [ + ('parsers/vaslog-engtxt.js', 'parsers/vaslog-engtxt.js'), + ('server/notify.js', 'server/notify.js'), +] + + +def connect() -> paramiko.SSHClient: + pwd = get_ad2_password() + c = paramiko.SSHClient() + c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + c.connect( + HOST, username=USER, password=pwd, + timeout=15, look_for_keys=False, allow_agent=False, banner_timeout=30, + ) + return c + + +def remote_exists(sftp: paramiko.SFTPClient, path: str) -> bool: + try: + sftp.stat(path) + return True + except IOError: + return False + + +def to_remote(rel: str) -> str: + return f'{REMOTE_ROOT}/{rel}' + + +def backup_and_copy(sftp: paramiko.SFTPClient, ssh: paramiko.SSHClient, + local_rel: str, remote_rel: str, dry_run: bool, stamp: str) -> None: + local_path = os.path.join(LOCAL_ROOT, local_rel.replace('/', os.sep)) + remote_path = to_remote(remote_rel) + backup_path = f'{remote_path}.bak-{stamp}' + + if not os.path.isfile(local_path): + raise FileNotFoundError(f'[FAIL] local file missing: {local_path}') + + if not remote_exists(sftp, remote_path): + raise FileNotFoundError(f'[FAIL] remote file missing on AD2: {remote_path}') + + print(f'[INFO] {remote_rel}') + if dry_run: + print(f' would back up to: {backup_path}') + print(f' would upload: {local_path} -> {remote_path}') + return + + # Backup via SFTP copy (read + re-upload). Paramiko has no server-side copy. + with sftp.open(remote_path, 'rb') as src: + data = src.read() + with sftp.open(backup_path, 'wb') as dst: + dst.write(data) + print(f' backup: {backup_path} ({len(data)} bytes)') + + sftp.put(local_path, remote_path) + size = os.path.getsize(local_path) + print(f' uploaded: {local_path} -> {remote_path} ({size} bytes)') + + +def create_new(sftp: paramiko.SFTPClient, local_rel: str, remote_rel: str, + dry_run: bool) -> None: + """Create a file that is expected to be NEW on AD2. + + Fails loud if the remote file already exists -- NEW_FILES declares this + is a brand-new file, so pre-existence is a drift signal. If a previous + deploy partially ran, clean up manually or move the entry to + UPDATE_FILES. + """ + local_path = os.path.join(LOCAL_ROOT, local_rel.replace('/', os.sep)) + remote_path = to_remote(remote_rel) + + if not os.path.isfile(local_path): + raise FileNotFoundError(f'[FAIL] local file missing: {local_path}') + + print(f'[INFO] {remote_rel} (NEW)') + + if remote_exists(sftp, remote_path): + raise FileExistsError( + f'[FAIL] remote target already exists but is declared NEW: {remote_path} ' + f'-- move to UPDATE_FILES or remove remote manually' + ) + + if dry_run: + print(f' would create: {local_path} -> {remote_path}') + return + + sftp.put(local_path, remote_path) + size = os.path.getsize(local_path) + print(f' created: {remote_path} ({size} bytes)') + + +def main() -> int: + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument('--dry-run', action='store_true', help='print actions without writing') + args = ap.parse_args() + + stamp = datetime.date.today().strftime('%Y%m%d') + + print('=' * 72) + print('Deploy staged pipeline changes to AD2') + print('=' * 72) + print(f'Host: {HOST}') + print(f'Remote root: {REMOTE_ROOT}') + print(f'Local root: {LOCAL_ROOT}') + print(f'Dry run: {args.dry_run}') + print(f'Backup tag: .bak-{stamp}') + print('') + + smtp_pass = get_smtp_password() + + ssh = connect() + try: + sftp = ssh.open_sftp() + try: + for local_rel, remote_rel in UPDATE_FILES: + backup_and_copy(sftp, ssh, local_rel, remote_rel, args.dry_run, stamp) + + for local_rel, remote_rel in NEW_FILES: + create_new(sftp, local_rel, remote_rel, args.dry_run) + + # Write notify config (creds fetched from vault, never committed to git) + import json + notify_cfg = json.dumps({ + 'smtp': { + 'host': SMTP_HOST, + 'port': SMTP_PORT, + 'user': SMTP_USER, + 'pass': smtp_pass, + }, + 'from': SMTP_USER, + 'to': NOTIFY_TO, + }, indent=2) + notify_remote = f'{REMOTE_ROOT}/config/notify.json' + print(f'[INFO] config/notify.json (SMTP creds)') + if not args.dry_run: + # Ensure config dir exists + stdin, stdout, stderr = ssh.exec_command( + f'powershell -Command "New-Item -ItemType Directory -Force -Path ' + f'C:\\Shares\\testdatadb\\config | Out-Null"' + ) + stdout.channel.recv_exit_status() + with sftp.open(notify_remote, 'w') as f: + f.write(notify_cfg) + print(f' written: {notify_remote}') + else: + print(f' would write: {notify_remote}') + + finally: + sftp.close() + + # Install nodemailer if not already present + print('[INFO] npm install nodemailer') + if not args.dry_run: + cmd = 'cd C:\\Shares\\testdatadb && npm list nodemailer --depth=0 2>nul || npm install nodemailer' + stdin, stdout, stderr = ssh.exec_command(f'cmd /c "{cmd}"') + exit_code = stdout.channel.recv_exit_status() + out = stdout.read().decode(errors='replace').strip() + if out: + print(f' {out}') + if exit_code != 0: + err = stderr.read().decode(errors='replace').strip() + raise RuntimeError(f'[FAIL] npm install nodemailer failed: {err}') + print('[OK] nodemailer ready') + else: + print(' would run: npm install nodemailer (if not already installed)') + + finally: + ssh.close() + + print('') + print('[OK] done' if not args.dry_run else '[OK] dry-run complete (no changes made)') + return 0 + + +if __name__ == '__main__': + try: + sys.exit(main()) + except Exception as e: + print(f'[FAIL] {e}', file=sys.stderr) + sys.exit(1) diff --git a/projects/dataforth-dos/datasheet-pipeline/implementation/server/notify.js b/projects/dataforth-dos/datasheet-pipeline/implementation/server/notify.js new file mode 100644 index 0000000..88b90f6 --- /dev/null +++ b/projects/dataforth-dos/datasheet-pipeline/implementation/server/notify.js @@ -0,0 +1,63 @@ +/** + * Failure notification via email. + * + * Reads SMTP config from config/notify.json (gitignored, written by deploy-to-ad2.py). + * Silently swallows send errors so a notification failure never masks the real error. + */ + +const fs = require('fs'); +const path = require('path'); + +const CONFIG_PATH = path.join(__dirname, '..', 'config', 'notify.json'); + +function loadConfig() { + try { + return JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8')); + } catch (err) { + console.error(`[NOTIFY] Could not read ${CONFIG_PATH}: ${err.message}`); + return null; + } +} + +/** + * Send a failure notification email. + * @param {string} subject + * @param {string} body - plain text + */ +async function sendFailureEmail(subject, body) { + const cfg = loadConfig(); + if (!cfg) return; + + let nodemailer; + try { + nodemailer = require('nodemailer'); + } catch (err) { + console.error('[NOTIFY] nodemailer not installed — skipping email'); + return; + } + + const transporter = nodemailer.createTransport({ + host: cfg.smtp.host, + port: cfg.smtp.port, + secure: false, + requireTLS: true, + auth: { + user: cfg.smtp.user, + pass: cfg.smtp.pass, + }, + }); + + try { + await transporter.sendMail({ + from: cfg.from, + to: cfg.to, + subject, + text: body, + }); + console.log(`[NOTIFY] Failure email sent: ${subject}`); + } catch (err) { + console.error(`[NOTIFY] Failed to send email: ${err.message}`); + } +} + +module.exports = { sendFailureEmail };