diff --git a/projects/dataforth-dos/datasheet-pipeline/fix_run_pipeline.py b/projects/dataforth-dos/datasheet-pipeline/fix_run_pipeline.py new file mode 100644 index 0000000..f790272 --- /dev/null +++ b/projects/dataforth-dos/datasheet-pipeline/fix_run_pipeline.py @@ -0,0 +1,105 @@ +"""Patch run-pipeline.ps1 to use full node.exe path and retry.""" +import base64, paramiko, subprocess, time, yaml + +ad2_pwd = yaml.safe_load(subprocess.run(['sops','-d','D:/vault/clients/dataforth/ad2.sops.yaml'], + capture_output=True, text=True, timeout=30, check=True).stdout)['credentials']['password'].replace('\\','') + +PROD_DIR = r'C:\ProgramData\dataforth-uploader' + +RUN_PS1 = r'''# Dataforth Test Datasheet Uploader (hourly) +$ErrorActionPreference = 'Stop' +$prod = 'C:\ProgramData\dataforth-uploader' +$logDir = Join-Path $prod 'logs' +$nodeExe = 'C:\Program Files\nodejs\node.exe' +New-Item -ItemType Directory -Force -Path $logDir | Out-Null +$stamp = Get-Date -Format 'yyyy-MM-dd_HH-mm-ss' +$log = Join-Path $logDir "pipeline-$stamp.log" + +function Log([string]$m) { + $line = "[$(Get-Date -Format o)] $m" + Write-Host $line + Add-Content -Path $log -Value $line -Encoding utf8 +} + +try { + Log "=== pipeline start (pid=$PID) ===" + + # Load credentials + $creds = Get-Content (Join-Path $prod 'credentials.json') -Raw | ConvertFrom-Json + $env:CF_TOKEN_URL = $creds.CF_TOKEN_URL + $env:CF_API_BASE = $creds.CF_API_BASE + $env:CF_CLIENT_ID = $creds.CF_CLIENT_ID + $env:CF_CLIENT_SECRET = $creds.CF_CLIENT_SECRET + $env:CF_SCOPE = $creds.CF_SCOPE + + # [1] DFWDS process + Log '[1] dfwds-process.js' + $dfwdsJs = Join-Path $prod 'dfwds-process.js' + $out = & $nodeExe $dfwdsJs 2>&1 + $out | ForEach-Object { Log $_ } + + # [2] Enumerate For_Web + Log '[2] enumerate For_Web' + $delta = Join-Path $prod 'delta_for_web_all.txt' + Get-ChildItem 'C:\Shares\webshare\For_Web' -File -Filter *.TXT | + ForEach-Object { + $sn = [System.IO.Path]::GetFileNameWithoutExtension($_.Name) + "$sn|$($_.FullName)|$($_.Length)|$($_.LastWriteTime.ToString('o'))" + } | Set-Content -Path $delta -Encoding ASCII + $count = (Get-Content $delta).Count + Log " enumerated $count files" + + # [3] Upload via Node + Log '[3] upload-delta.js' + $uploadJs = Join-Path $prod 'upload-delta.js' + $out = & $nodeExe $uploadJs --delta $delta --batch 100 2>&1 + $out | ForEach-Object { Log $_ } + + Log '=== pipeline end (OK) ===' +} catch { + Log "FATAL: $_" + Log "StackTrace: $($_.ScriptStackTrace)" + throw +} finally { + # Retention: keep 60 days of pipeline logs + Get-ChildItem $logDir -Filter 'pipeline-*.log' -ErrorAction SilentlyContinue | + Where-Object { $_.LastWriteTime -lt (Get-Date).AddDays(-60) } | + Remove-Item -Force -ErrorAction SilentlyContinue +} +''' + +c = paramiko.SSHClient(); c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect('192.168.0.6', username='sysadmin', password=ad2_pwd, + timeout=30, banner_timeout=45, look_for_keys=False, allow_agent=False) + +sftp = c.open_sftp() +with sftp.open(f'{PROD_DIR.replace(chr(92),"/")}/run-pipeline.ps1', 'w') as fh: + fh.write(RUN_PS1) +sftp.close() +print('[1] run-pipeline.ps1 updated with full node.exe path') + +def psb64(cmd, to=120): + enc = base64.b64encode(cmd.encode('utf-16-le')).decode() + _, o, e = c.exec_command(f'powershell -NoProfile -EncodedCommand {enc}', timeout=to) + return o.read().decode('utf-8','replace'), e.read().decode('utf-8','replace'), o.channel.recv_exit_status() + +print('\n[2] trigger scheduled task') +out, _, _ = psb64('Start-ScheduledTask -TaskName "DataforthTestDatasheetUploader"') +print(' triggered') + +print('\n[3] wait 25s for completion') +time.sleep(25) +out, _, _ = psb64( + r'Get-ScheduledTaskInfo -TaskName "DataforthTestDatasheetUploader" | ' + r'Select LastRunTime,LastTaskResult,NextRunTime | Format-List' +) +print(out.strip()) + +print('\n[4] tail latest pipeline log') +out, _, _ = psb64( + f'$latest = Get-ChildItem "{PROD_DIR}\\logs" -Filter "pipeline-*.log" -ErrorAction SilentlyContinue | Sort-Object LastWriteTime -Descending | Select -First 1; ' + f'"Log: $($latest.FullName)"; Get-Content $latest.FullName -Tail 80 -ErrorAction SilentlyContinue' +) +print(out.strip()) + +c.close() diff --git a/projects/dataforth-dos/datasheet-pipeline/inspect_task.py b/projects/dataforth-dos/datasheet-pipeline/inspect_task.py new file mode 100644 index 0000000..37ba287 --- /dev/null +++ b/projects/dataforth-dos/datasheet-pipeline/inspect_task.py @@ -0,0 +1,19 @@ +import base64, paramiko, subprocess, yaml +pwd_raw = yaml.safe_load(subprocess.run(['sops','-d','D:/vault/clients/dataforth/ad2.sops.yaml'], + capture_output=True, text=True, timeout=30, check=True).stdout)['credentials']['password'] +PWD = pwd_raw.replace('\\', '') +c = paramiko.SSHClient(); c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect('192.168.0.6', username='sysadmin', password=PWD, timeout=30, look_for_keys=False, allow_agent=False) + +def psb64(cmd, to=120): + enc = base64.b64encode(cmd.encode('utf-16-le')).decode() + _, o, _ = c.exec_command(f'powershell -NoProfile -EncodedCommand {enc}', timeout=to) + return o.read().decode('utf-8','replace') + +print('=== task action definition ===') +print(psb64(r'(Get-ScheduledTask -TaskName "DataforthTestDatasheetUploader").Actions | Format-List')) + +print('\n=== try running script manually (sysadmin context) ===') +print(psb64(r'& powershell -NoProfile -ExecutionPolicy Bypass -File "C:\ProgramData\dataforth-uploader\run-pipeline.ps1" 2>&1 | Select -First 30 | Out-String', to=180)) + +c.close() diff --git a/projects/dataforth-dos/datasheet-pipeline/install_ad2_schedule.py b/projects/dataforth-dos/datasheet-pipeline/install_ad2_schedule.py new file mode 100644 index 0000000..a2f7ff7 --- /dev/null +++ b/projects/dataforth-dos/datasheet-pipeline/install_ad2_schedule.py @@ -0,0 +1,169 @@ +"""Install the Dataforth uploader as a Windows Scheduled Task on AD2. + +Creates C:\\ProgramData\\dataforth-uploader\\ with: + - credentials.json (SYSTEM+Admin ACL only) + - run-pipeline.ps1 (DFWDS process -> enumerate For_Web -> upload) + - dfwds-process.js (copied from current install) + - upload-delta.js (copied from current install) + - logs\\ (directory for per-run logs) + +Registers Scheduled Task 'DataforthTestDatasheetUploader' to run as SYSTEM hourly. +""" +import base64, json, paramiko, subprocess, time, yaml + +ad2_pwd = yaml.safe_load(subprocess.run(['sops','-d','D:/vault/clients/dataforth/ad2.sops.yaml'], + capture_output=True, text=True, timeout=30, check=True).stdout)['credentials']['password'].replace('\\','') +api = yaml.safe_load(subprocess.run(['sops','-d','D:/vault/clients/dataforth/api-oauth.sops.yaml'], + capture_output=True, text=True, timeout=30, check=True).stdout) + +PROD_DIR = r'C:\ProgramData\dataforth-uploader' +OLD_DIR = r'C:\Users\sysadmin\Documents\dataforth-uploader' +TASK_NAME = 'DataforthTestDatasheetUploader' + +creds_json = json.dumps({ + 'CF_TOKEN_URL': api['endpoints']['token-url'], + 'CF_API_BASE': api['endpoints']['api-base'], + 'CF_CLIENT_ID': api['credentials']['client-id'], + 'CF_CLIENT_SECRET': api['credentials']['client-secret'], + 'CF_SCOPE': api['credentials']['scope'], +}, indent=2) + +RUN_PS1 = r'''# Dataforth Test Datasheet Uploader (nightly/hourly) +# Loads credentials from local JSON, runs DFWDS-process then upload-delta. + +$ErrorActionPreference = 'Stop' +$prod = 'C:\ProgramData\dataforth-uploader' +$logDir = Join-Path $prod 'logs' +New-Item -ItemType Directory -Force -Path $logDir | Out-Null +$stamp = Get-Date -Format 'yyyy-MM-dd_HH-mm-ss' +$log = Join-Path $logDir "pipeline-$stamp.log" + +function Log([string]$m) { + $line = "[$(Get-Date -Format o)] $m" + Write-Host $line + Add-Content -Path $log -Value $line -Encoding utf8 +} + +Log "=== pipeline start ===" + +# Load credentials +$creds = Get-Content (Join-Path $prod 'credentials.json') -Raw | ConvertFrom-Json +$env:CF_TOKEN_URL = $creds.CF_TOKEN_URL +$env:CF_API_BASE = $creds.CF_API_BASE +$env:CF_CLIENT_ID = $creds.CF_CLIENT_ID +$env:CF_CLIENT_SECRET = $creds.CF_CLIENT_SECRET +$env:CF_SCOPE = $creds.CF_SCOPE + +# [1] DFWDS process: Test_Datasheets -> For_Web +Log '[1] dfwds-process.js' +$out = & node (Join-Path $prod 'dfwds-process.js') 2>&1 +$out | ForEach-Object { Log $_ } + +# [2] Enumerate For_Web to delta file +Log '[2] enumerate For_Web' +$delta = Join-Path $prod 'delta_for_web_all.txt' +Get-ChildItem 'C:\Shares\webshare\For_Web' -File -Filter *.TXT | + ForEach-Object { + $sn = [System.IO.Path]::GetFileNameWithoutExtension($_.Name) + "$sn|$($_.FullName)|$($_.Length)|$($_.LastWriteTime.ToString('o'))" + } | Set-Content -Path $delta -Encoding ASCII +$count = (Get-Content $delta).Count +Log " enumerated $count files" + +# [3] Upload delta (idempotent; server dedups) +Log '[3] upload-delta.js' +$out = & node (Join-Path $prod 'upload-delta.js') --delta $delta --batch 100 2>&1 +$out | ForEach-Object { Log $_ } + +Log '=== pipeline end ===' + +# Retention: keep 60 days of pipeline logs +Get-ChildItem $logDir -Filter 'pipeline-*.log' | + Where-Object { $_.LastWriteTime -lt (Get-Date).AddDays(-60) } | + Remove-Item -Force +''' + +c = paramiko.SSHClient(); c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect('192.168.0.6', username='sysadmin', password=ad2_pwd, + timeout=30, banner_timeout=45, look_for_keys=False, allow_agent=False) + +def psb64(cmd, to=120): + enc = base64.b64encode(cmd.encode('utf-16-le')).decode() + _, o, e = c.exec_command(f'powershell -NoProfile -EncodedCommand {enc}', timeout=to) + return o.read().decode('utf-8','replace'), e.read().decode('utf-8','replace'), o.channel.recv_exit_status() + +print('[1] create ProgramData dir') +out, _, _ = psb64( + f'New-Item -ItemType Directory -Force -Path "{PROD_DIR}\\logs" | Out-Null; ' + f'Test-Path "{PROD_DIR}"; Test-Path "{PROD_DIR}\\logs"' +) +print(out.strip()) + +print('\n[2] copy Node scripts from old dir to ProgramData') +out, _, _ = psb64( + f'Copy-Item -LiteralPath "{OLD_DIR}\\dfwds-process.js" -Destination "{PROD_DIR}\\" -Force; ' + f'Copy-Item -LiteralPath "{OLD_DIR}\\upload-delta.js" -Destination "{PROD_DIR}\\" -Force; ' + f'Get-ChildItem "{PROD_DIR}" -File | Select Name,Length | Format-Table -AutoSize | Out-String' +) +print(out.strip()) + +print('\n[3] write credentials.json + run-pipeline.ps1 via SFTP (avoids arg escaping)') +sftp = c.open_sftp() +with sftp.open(f'{PROD_DIR.replace(chr(92),"/")}/credentials.json', 'w') as fh: + fh.write(creds_json) +with sftp.open(f'{PROD_DIR.replace(chr(92),"/")}/run-pipeline.ps1', 'w') as fh: + fh.write(RUN_PS1) +sftp.close() +out, _, _ = psb64( + f'Get-ChildItem "{PROD_DIR}" -File | Select Name,Length | Format-Table -AutoSize | Out-String' +) +print(out.strip()) + +print('\n[4] ACL down credentials.json + run-pipeline.ps1 (SYSTEM + Administrators only)') +acl_cmd = ( + f'icacls "{PROD_DIR}\\credentials.json" /inheritance:r ' + f'/grant:r "NT AUTHORITY\\SYSTEM:(F)" ' + f'/grant:r "BUILTIN\\Administrators:(F)"; ' + f'icacls "{PROD_DIR}\\credentials.json"' +) +out, err, rc = psb64(acl_cmd, to=60) +print(out.strip()) +if err.strip() and 'CLIXML' not in err: print('[stderr]', err[:400]) + +print('\n[5] register Scheduled Task (SYSTEM, hourly)') +# Delete existing if present, then create +register_cmd = ( + f'$taskName = "{TASK_NAME}"; ' + f'Unregister-ScheduledTask -TaskName $taskName -Confirm:$false -ErrorAction SilentlyContinue | Out-Null; ' + f'$action = New-ScheduledTaskAction -Execute "powershell.exe" ' + f' -Argument "-NoProfile -ExecutionPolicy Bypass -File \\"{PROD_DIR}\\run-pipeline.ps1\\""; ' + f'$trigger = New-ScheduledTaskTrigger -Once -At (Get-Date).Date.AddHours(1) ' + f' -RepetitionInterval (New-TimeSpan -Hours 1); ' + f'$principal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest; ' + f'$settings = New-ScheduledTaskSettingsSet -AllowStartIfOnBatteries -DontStopIfGoingOnBatteries ' + f' -StartWhenAvailable -ExecutionTimeLimit (New-TimeSpan -Minutes 30); ' + f'Register-ScheduledTask -TaskName $taskName -Action $action -Trigger $trigger ' + f' -Principal $principal -Settings $settings -Description "Dataforth Test Datasheet Uploader (DFWDS + Hoffman API)" | Out-Null; ' + f'Get-ScheduledTask -TaskName $taskName | Select TaskName,State,@{{N="LastRunTime";E={{(Get-ScheduledTaskInfo $_).LastRunTime}}}},@{{N="NextRunTime";E={{(Get-ScheduledTaskInfo $_).NextRunTime}}}} | Format-List' +) +out, err, rc = psb64(register_cmd, to=60) +print(out.strip()) +if err.strip() and 'CLIXML' not in err: print('[stderr]', err[:500]) + +print('\n[6] kick it off NOW for verification') +out, err, rc = psb64( + f'Start-ScheduledTask -TaskName "{TASK_NAME}"; Start-Sleep -Seconds 2; ' + f'Get-ScheduledTaskInfo -TaskName "{TASK_NAME}" | Select LastRunTime,LastTaskResult | Format-List' +) +print(out.strip()) + +print('\n[7] wait for run to finish, show tail of log') +time.sleep(15) +out, _, _ = psb64( + f'$latest = Get-ChildItem "{PROD_DIR}\\logs" -Filter "pipeline-*.log" | Sort-Object LastWriteTime -Descending | Select -First 1; ' + f'"Log: $($latest.FullName)"; Get-Content $latest.FullName -Tail 40 -ErrorAction SilentlyContinue' +) +print(out.strip()) + +c.close() +print('\n[OK] scheduled task installed') diff --git a/projects/dataforth-dos/datasheet-pipeline/reregister_task.py b/projects/dataforth-dos/datasheet-pipeline/reregister_task.py new file mode 100644 index 0000000..125b7b0 --- /dev/null +++ b/projects/dataforth-dos/datasheet-pipeline/reregister_task.py @@ -0,0 +1,67 @@ +"""Re-register scheduled task with clean argument escaping. + +Uses an external file for the PowerShell registration script rather than +inline base64 (which was mangling backslashes). +""" +import base64, paramiko, subprocess, time, yaml + +pwd_raw = yaml.safe_load(subprocess.run(['sops','-d','D:/vault/clients/dataforth/ad2.sops.yaml'], + capture_output=True, text=True, timeout=30, check=True).stdout)['credentials']['password'] +PWD = pwd_raw.replace('\\', '') + +REG_SCRIPT = r'''# register-task.ps1 — re-register DataforthTestDatasheetUploader cleanly +$taskName = 'DataforthTestDatasheetUploader' +$scriptPath = 'C:\ProgramData\dataforth-uploader\run-pipeline.ps1' +Unregister-ScheduledTask -TaskName $taskName -Confirm:$false -ErrorAction SilentlyContinue | Out-Null + +# Argument uses single quotes inside to avoid double-quote escaping issues +$argStr = '-NoProfile -ExecutionPolicy Bypass -File ' + '"' + $scriptPath + '"' +$action = New-ScheduledTaskAction -Execute 'powershell.exe' -Argument $argStr -WorkingDirectory 'C:\ProgramData\dataforth-uploader' +$trigger = New-ScheduledTaskTrigger -Once -At (Get-Date).Date.AddHours(1) -RepetitionInterval (New-TimeSpan -Hours 1) +$principal = New-ScheduledTaskPrincipal -UserId 'SYSTEM' -LogonType ServiceAccount -RunLevel Highest +$settings = New-ScheduledTaskSettingsSet -AllowStartIfOnBatteries -DontStopIfGoingOnBatteries -StartWhenAvailable -ExecutionTimeLimit (New-TimeSpan -Minutes 30) +Register-ScheduledTask -TaskName $taskName -Action $action -Trigger $trigger -Principal $principal -Settings $settings -Description 'Dataforth Test Datasheet Uploader (DFWDS port + Hoffman API)' | Out-Null + +Write-Host '=== registered task definition ===' +(Get-ScheduledTask -TaskName $taskName).Actions | Format-List + +Write-Host '=== run it now ===' +Start-ScheduledTask -TaskName $taskName +Start-Sleep -Seconds 20 +Get-ScheduledTaskInfo -TaskName $taskName | Select LastRunTime,LastTaskResult,NextRunTime | Format-List +''' + +c = paramiko.SSHClient(); c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) +c.connect('192.168.0.6', username='sysadmin', password=PWD, + timeout=30, banner_timeout=45, look_for_keys=False, allow_agent=False) + +print('[1] SFTP register-task.ps1 to AD2') +remote_reg = 'C:/ProgramData/dataforth-uploader/register-task.ps1' +sftp = c.open_sftp() +with sftp.open(remote_reg, 'w') as fh: + fh.write(REG_SCRIPT) +sftp.close() + +print('\n[2] run register-task.ps1 (elevated)') +# Use cmd to launch powershell so we avoid the quote-escape chain +_, o, e = c.exec_command( + r'powershell -NoProfile -ExecutionPolicy Bypass -File "C:\ProgramData\dataforth-uploader\register-task.ps1"', + timeout=120 +) +print(o.read().decode('utf-8','replace')) +err = e.read().decode('utf-8','replace') +if err.strip() and 'CLIXML' not in err: print('[stderr]', err[:500]) + +print('\n[3] tail latest pipeline log (post-SYSTEM-run)') +def psb64(cmd, to=60): + enc = base64.b64encode(cmd.encode('utf-16-le')).decode() + _, o, _ = c.exec_command(f'powershell -NoProfile -EncodedCommand {enc}', timeout=to) + return o.read().decode('utf-8','replace') +out = psb64( + r'$latest = Get-ChildItem "C:\ProgramData\dataforth-uploader\logs" -Filter "pipeline-*.log" | ' + r'Sort-Object LastWriteTime -Descending | Select -First 1; ' + r'"Log: $($latest.FullName)"; "---"; Get-Content $latest.FullName -Tail 20' +) +print(out) + +c.close()