sync: Auto-sync from acg-guru-5070 at 2026-03-21 16:34:05

Synced files: - Session logs updated - Latest context and credentials - Command/directive updates Machine: acg-guru-5070 Timestamp: 2026-03-21 16:34:05 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-03-21 16:34:05 -07:00
parent 37aaa6660b
commit a29d00c6b2
2 changed files with 539 additions and 0 deletions
--- a/projects/radio-show/audio-processor/gpu_debug_transcribe.py
+++ b/projects/radio-show/audio-processor/gpu_debug_transcribe.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+"""GPU-monitored batch transcription with diagnostics.
+
+Monitors GPU health before, during, and after each episode transcription.
+Logs temperature, power, utilization, and memory to detect what triggers
+the NVRM rpcSendMessage failure (status 0x00000062).
+"""
+
+import subprocess
+import sys
+import time
+import signal
+import threading
+import os
+from datetime import datetime
+from pathlib import Path
+
+LOG_DIR = Path("gpu-debug-logs")
+LOG_DIR.mkdir(exist_ok=True)
+LOG_FILE = LOG_DIR / f"gpu_monitor_{datetime.now():%Y%m%d_%H%M%S}.log"
+
+# Episodes to transcribe (remaining ones)
+EPISODES = [
+    "training-data/episodes/2011-06-04-hr1.mp3",
+    "training-data/episodes/2011-09-10-hr1.mp3",
+    "training-data/episodes/2014-s6e05.mp3",
+    "training-data/episodes/2015-s7e30.mp3",
+    "training-data/episodes/2016-s8e42.mp3",
+    "training-data/episodes/2017-s9e26.mp3",
+    "training-data/episodes/2018-s10e17.mp3",
+    "training-data/episodes/2018-s10e21.mp3",
+]
+
+stop_monitor = threading.Event()
+
+
+def log(msg: str):
+    ts = datetime.now().strftime("%H:%M:%S.%f")[:-3]
+    line = f"[{ts}] {msg}"
+    print(line)
+    with open(LOG_FILE, "a") as f:
+        f.write(line + "\n")
+
+
+def gpu_query() -> dict | None:
+    """Query GPU stats via nvidia-smi. Returns None if GPU is in error state."""
+    try:
+        result = subprocess.run(
+            ["nvidia-smi",
+             "--query-gpu=temperature.gpu,power.draw,utilization.gpu,utilization.memory,"
+             "memory.used,memory.total,clocks.current.sm,clocks.current.memory,"
+             "pstate,fan.speed",
+             "--format=csv,noheader,nounits"],
+            capture_output=True, text=True, timeout=5
+        )
+        if result.returncode != 0:
+            return None
+        parts = [p.strip() for p in result.stdout.strip().split(",")]
+        # Check for ERR! or [N/A] in any field
+        if any("ERR" in p or "[N/A]" in p for p in parts[:4]):
+            return {"error": True, "raw": result.stdout.strip()}
+        return {
+            "temp_c": parts[0],
+            "power_w": parts[1],
+            "gpu_util": parts[2],
+            "mem_util": parts[3],
+            "mem_used_mb": parts[4],
+            "mem_total_mb": parts[5],
+            "sm_clock_mhz": parts[6],
+            "mem_clock_mhz": parts[7],
+            "pstate": parts[8],
+            "fan": parts[9],
+            "error": False,
+        }
+    except (subprocess.TimeoutExpired, Exception) as e:
+        return {"error": True, "raw": str(e)}
+
+
+def gpu_health_check() -> bool:
+    """Returns True if GPU is healthy."""
+    stats = gpu_query()
+    if stats is None or stats.get("error"):
+        log(f"GPU ERROR: {stats}")
+        return False
+    return True
+
+
+def gpu_status_str(stats: dict) -> str:
+    if stats.get("error"):
+        return f"ERR! raw={stats.get('raw', 'unknown')}"
+    return (f"T={stats['temp_c']}C P={stats['power_w']}W "
+            f"GPU={stats['gpu_util']}% MEM={stats['mem_util']}% "
+            f"VRAM={stats['mem_used_mb']}/{stats['mem_total_mb']}MB "
+            f"SM={stats['sm_clock_mhz']}MHz MEMCLK={stats['mem_clock_mhz']}MHz "
+            f"PState={stats['pstate']} Fan={stats['fan']}")
+
+
+def monitor_thread(interval: float = 2.0):
+    """Background thread that logs GPU stats at regular intervals."""
+    while not stop_monitor.is_set():
+        stats = gpu_query()
+        if stats:
+            log(f"MONITOR: {gpu_status_str(stats)}")
+            if stats.get("error"):
+                log("MONITOR: GPU ENTERED ERROR STATE!")
+                # Check dmesg for the smoking gun
+                try:
+                    result = subprocess.run(
+                        ["sudo", "dmesg", "-T", "--level=err,warn"],
+                        capture_output=True, text=True, timeout=5
+                    )
+                    nvrm_lines = [l for l in result.stdout.splitlines()
+                                  if "NVRM" in l or "nvidia" in l.lower()]
+                    for line in nvrm_lines[-5:]:
+                        log(f"DMESG: {line}")
+                except Exception:
+                    pass
+        stop_monitor.wait(interval)
+
+
+def check_runtime_d3():
+    """Check and log Runtime D3 power management status."""
+    try:
+        power_file = Path("/proc/driver/nvidia/gpus/0000:02:00.0/power")
+        if power_file.exists():
+            log(f"GPU Power Management:\n{power_file.read_text()}")
+
+        # Check if dynamic power management is enabled
+        result = subprocess.run(
+            ["cat", "/sys/bus/pci/devices/0000:02:00.0/power/runtime_status"],
+            capture_output=True, text=True, timeout=5
+        )
+        log(f"PCI runtime_status: {result.stdout.strip()}")
+
+        result = subprocess.run(
+            ["cat", "/sys/bus/pci/devices/0000:02:00.0/power/control"],
+            capture_output=True, text=True, timeout=5
+        )
+        log(f"PCI power control: {result.stdout.strip()}")
+
+        result = subprocess.run(
+            ["cat", "/sys/bus/pci/devices/0000:02:00.0/power/runtime_enabled"],
+            capture_output=True, text=True, timeout=5
+        )
+        log(f"PCI runtime_enabled: {result.stdout.strip()}")
+
+    except Exception as e:
+        log(f"Power check error: {e}")
+
+
+def check_nvidia_persistence():
+    """Check persistence mode."""
+    try:
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=persistence_mode", "--format=csv,noheader"],
+            capture_output=True, text=True, timeout=5
+        )
+        log(f"Persistence mode: {result.stdout.strip()}")
+    except Exception as e:
+        log(f"Persistence check error: {e}")
+
+
+def transcribe_one(episode_path: str) -> bool:
+    """Transcribe a single episode with GPU health monitoring. Returns success."""
+    name = Path(episode_path).stem
+    output_dir = f"training-data/transcripts/{name}"
+
+    if Path(output_dir).exists() and (Path(output_dir) / "transcript.json").exists():
+        log(f"SKIP: {name} already transcribed")
+        return True
+
+    # Pre-flight GPU check
+    log(f"PRE-FLIGHT: Checking GPU before {name}")
+    stats = gpu_query()
+    if not stats or stats.get("error"):
+        log(f"PRE-FLIGHT FAIL: GPU already in error state! Stats: {stats}")
+        return False
+    log(f"PRE-FLIGHT: {gpu_status_str(stats)}")
+
+    # Quick CUDA test
+    log("PRE-FLIGHT: Testing CUDA...")
+    try:
+        import torch
+        if not torch.cuda.is_available():
+            log("PRE-FLIGHT FAIL: torch.cuda.is_available() = False")
+            return False
+        # Small allocation test
+        x = torch.randn(100, 100, device="cuda")
+        y = x @ x
+        del x, y
+        torch.cuda.synchronize()
+        torch.cuda.empty_cache()
+        log(f"PRE-FLIGHT: CUDA OK, allocated={torch.cuda.memory_allocated() / 1024**2:.0f}MB")
+    except Exception as e:
+        log(f"PRE-FLIGHT FAIL: CUDA test error: {e}")
+        return False
+
+    # Transcribe
+    log(f"START: {name} ({episode_path})")
+    start_time = time.time()
+
+    try:
+        from src.transcriber import transcribe
+        transcript = transcribe(episode_path)
+        transcript.save(Path(output_dir))
+        elapsed = time.time() - start_time
+        log(f"DONE: {name} in {elapsed:.1f}s ({elapsed/60:.1f}min), "
+            f"{len(transcript.segments)} segments")
+    except Exception as e:
+        elapsed = time.time() - start_time
+        log(f"FAIL: {name} after {elapsed:.1f}s: {type(e).__name__}: {e}")
+
+        # Post-failure GPU check
+        stats = gpu_query()
+        log(f"POST-FAIL: {gpu_status_str(stats) if stats else 'query failed'}")
+        return False
+
+    # Post-transcription GPU check
+    stats = gpu_query()
+    if stats and not stats.get("error"):
+        log(f"POST: {gpu_status_str(stats)}")
+    else:
+        log(f"POST: GPU entered error state after transcription! {stats}")
+
+    # Cool-down: clear CUDA cache, let GPU idle briefly
+    try:
+        import torch
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
+    except Exception:
+        pass
+
+    log("COOLDOWN: Waiting 10s between episodes...")
+    time.sleep(10)
+
+    return True
+
+
+def main():
+    log("=" * 60)
+    log("GPU Debug Batch Transcription")
+    log(f"Driver: {subprocess.getoutput('nvidia-smi --query-gpu=driver_version --format=csv,noheader')}")
+    log(f"CUDA version: {subprocess.getoutput('nvidia-smi --query-gpu=cuda_version --format=csv,noheader 2>/dev/null') or 'N/A'}")
+    log("=" * 60)
+
+    # Check power management
+    check_runtime_d3()
+    check_nvidia_persistence()
+
+    # Initial GPU state
+    stats = gpu_query()
+    if not stats or stats.get("error"):
+        log(f"ABORT: GPU already in error state at startup: {stats}")
+        sys.exit(1)
+    log(f"INITIAL: {gpu_status_str(stats)}")
+
+    # Start background monitor (every 5 seconds during transcription)
+    monitor = threading.Thread(target=monitor_thread, args=(5.0,), daemon=True)
+    monitor.start()
+
+    # Filter to only episodes that need transcription
+    remaining = []
+    for ep in EPISODES:
+        name = Path(ep).stem
+        out = Path(f"training-data/transcripts/{name}/transcript.json")
+        if out.exists():
+            log(f"ALREADY DONE: {name}")
+        else:
+            remaining.append(ep)
+
+    log(f"QUEUE: {len(remaining)} episodes to transcribe")
+
+    completed = 0
+    failed = 0
+    for ep in remaining:
+        success = transcribe_one(ep)
+        if success:
+            completed += 1
+        else:
+            failed += 1
+            log(f"STOPPING: GPU failure detected after {completed} episodes, {failed} failed")
+            # Log final state
+            stats = gpu_query()
+            log(f"FINAL: {gpu_status_str(stats) if stats else 'query failed'}")
+            break
+
+    stop_monitor.set()
+    log(f"SUMMARY: {completed} completed, {failed} failed, "
+        f"{len(remaining) - completed - failed} remaining")
+    log(f"Log saved to: {LOG_FILE}")
+
+
+if __name__ == "__main__":
+    main()
--- a/session-logs/2026-03-21-session.md
+++ b/session-logs/2026-03-21-session.md
@@ -575,3 +575,248 @@ done
 ```

 Then: run speaker identification across all transcribed episodes, cluster non-host voices, begin element fingerprinting.
+
+## Update: 15:00 — Dataforth Email, GPU Debug, VWP Citrix→Hyper-V Migration, ScreenConnect
+
+### Session Summary
+
+Multi-task session: Dataforth email forwarding, GPU error diagnosis for voice training, and major VWP infrastructure migration (Citrix XenServer → Hyper-V). Installed ScreenConnect on VWP-FILES via PowerShell Direct.
+
+### 1. Dataforth Email Forwarding (dataforthgit@)
+
+**Task:** AJ (Angel Lopez) at Dataforth needs messages sent to dataforthgit@dataforth.com forwarded to him.
+
+**Discovery:** `dataforthgit@dataforth.com` is an existing alias on the **Support** shared mailbox (`support@dataforth.com`).
+
+**Solution:** Created inbox rule on Support mailbox via Graph API:
+- **Rule:** "Forward dataforthgit@ to AJ Lopez"
+- **Trigger:** recipientContains `dataforthgit@dataforth.com`
+- **Action:** Forward to `alopez@dataforth.com`
+- **Rule ID:** `AQAAAFO12jE=`
+
+**Auth used:** Claude-MSP-Access multi-tenant app:
+- Tenant ID: `7dfa3ce8-c496-4b51-ab8d-bd3dcd78b584`
+- App ID: `fabb3421-8b34-484b-bc17-e46de9703418`
+- Client Secret: `~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO`
+
+### 2. GPU Error Diagnosis (RTX 5070 Ti)
+
+**Problem:** GPU entered error state during voice training batch transcription (same issue as previous session). `nvidia-smi` shows ERR! across all fields. The GPU failed ~40 min into transcription.
+
+**Root cause investigation:**
+- `NVRM: _issueRpcLarge: rpcSendMessage failed with status 0x00000062 for fn 76!` — repeating every 100ms
+- No Xid errors in dmesg — only RPC communication failures
+- **Runtime D3 (fine-grained power management) is enabled** — prime suspect for GPU hang during sustained compute
+- GPU is in D0 power state, video memory active
+- Error first appeared at 4335 seconds after boot (~72 min)
+- `torch.cuda.is_available()` returned True initially, GPU loaded model into VRAM then failed
+
+**Fix applied (pending reboot):**
+- Created `/etc/modprobe.d/nvidia-no-d3.conf`: `options nvidia NVreg_DynamicPowerManagement=0`
+- Plan: After reboot, run `sudo nvidia-smi -pm 1` (persistence mode)
+
+**Diagnostic script created:** `projects/radio-show/audio-processor/gpu_debug_transcribe.py`
+- Monitors GPU temp, power, utilization, VRAM, clocks every 5 seconds
+- Pre-flight CUDA health check before each episode
+- 10-second cooldown between episodes
+- Stops at first GPU error and logs state
+- Saves logs to `gpu-debug-logs/`
+
+**Transcription status:** Only `2010-10-02-hr1` completed. 8 episodes remaining:
+- 2011-06-04-hr1, 2011-09-10-hr1, 2014-s6e05, 2015-s7e30, 2016-s8e42, 2017-s9e26, 2018-s10e17, 2018-s10e21
+
+**After reboot commands:**
+```bash
+sudo nvidia-smi -pm 1
+source /home/guru/.local/share/radio-processor/bin/activate
+cd /home/guru/ClaudeTools/projects/radio-show/audio-processor
+python3 gpu_debug_transcribe.py
+```
+
+### 3. VWP Citrix XenServer → Hyper-V Migration
+
+#### VPN Access
+
+**Critical:** Must `sudo tailscale down` before VWP VPN — D2TESTNAS advertises `192.168.0.0/24` for Dataforth which conflicts with VWP's same subnet.
+
+**Starlink subnet conflict:** Starlink was on `192.168.4.0/24`, same as VPN tunnel. User changed Starlink to `10.0.3.x/16` to resolve.
+
+**Working VPN command (split tunnel):**
+```bash
+sudo tailscale down
+sudo openvpn --config ~/Downloads/OpenVPN-Server.ovpn --auth-user-pass /etc/openvpn/vwp-auth.txt --group nobody --daemon vwp-vpn --log /tmp/vwp-vpn.log --route-noexec
+# Then manually add split routes:
+sudo ip route add 172.16.9.0/24 dev tun0
+sudo ip route add 192.168.0.0/24 dev tun0
+sudo ip route add 192.168.3.0/24 dev tun0
+```
+
+**Key:** Must use `--route-noexec` to prevent full-tunnel `0.0.0.0/1` redirect, then manually add split routes.
+
+#### VPN Credentials
+- **Auth file:** `/etc/openvpn/vwp-auth.txt` (sysadmin / r3tr0gradE99#)
+- **Remote:** 4.18.160.106:1194 TCP
+- **VPN IP assigned:** 192.168.4.2 or 192.168.4.3
+
+#### WinRM Access to Hyper-V
+
+**Installed `pywinrm`** (`pip install --user --break-system-packages pywinrm`) for remote PowerShell via WinRM.
+
+**WinRM enabled on VWP-HYPERV1** (user ran on console):
+```powershell
+Enable-PSRemoting -Force
+Set-Item WSMan:\localhost\Client\TrustedHosts -Value "*" -Force
+New-NetFirewallRule -DisplayName "WinRM All" -Direction Inbound -Protocol TCP -LocalPort 5985 -Action Allow
+```
+
+**Python WinRM usage:**
+```python
+import winrm
+s = winrm.Session('http://172.16.9.184:5985/wsman', auth=('sysadmin', 'r3tr0gradE99#'), transport='ntlm')
+r = s.run_ps("hostname")
+print(r.std_out.decode().strip())
+```
+
+#### Hyper-V Host Status (VWP-HYPERV1)
+
+- **Hostname:** VWP-HYPERV1
+- **IP:** 172.16.9.184
+- **OS:** Windows Server 2025 Standard
+- **Specs:** 64 vCPUs (Xeon Platinum 8180M), 256GB RAM, PowerEdge R740
+- **Disk:** 10.5TB free on C:
+- **vSwitch:** "Intel(R) Ethernet 10G 4P X550/I350 rNDC - Virtual Switch" (External, NIC1 at 1Gbps)
+- **Physical NICs:** NIC1 (up, 1Gbps), NIC2/3/4 (disconnected)
+- **Native VLAN:** 172.16.9.x (untagged)
+
+**Existing VMs on Hyper-V:**
+| VM | State | Gen | RAM | vCPUs |
+|----|-------|-----|-----|-------|
+| VWP-DC1 | Running | 2 | ~7.4GB | 56 |
+| VWP-FILES | Running | 2 | 2GB | 16 |
+
+**VLAN configuration:**
+- Native/untagged: 172.16.9.0/24 (VWP LAN)
+- VLAN 2: 192.168.0.0/24 (OldNet)
+- VLAN 99: 192.168.3.0/24 (Mgt)
+- UDM trunks all VLANs, defaults to selected VLAN for untagged
+
+#### XenServer VM Inventory (source)
+
+| VM | OS | IP | State | vCPUs | RAM | Disk |
+|----|----|----|-------|-------|-----|------|
+| server 2012 R2 | Server 2012 R2 Standard | 192.168.0.19 | running | 4 | 16GB | 200GB |
+| BACKUP-SRV | Server 2019 Datacenter | 192.168.0.22 | running | 2 | 15GB | 240GB |
+| server 2003 | Server 2003 Enterprise SP2 | 192.168.0.20 | running | 4 | 3GB | 130GB |
+| XP | Windows XP | none | running | 2 | 3GB | 40GB |
+| Windows 7 (32-bit) | Windows 7 | 192.168.0.40 | halted | 2 | 4GB | 80GB |
+
+#### Server 2012 R2 Migration (IN PROGRESS)
+
+**VDI Export running on XenServer:**
+- VDI UUID: `e65ccf95-0bc7-4530-ac91-c418e667e1de`
+- VM UUID: `298da244-79b5-84ed-d6e0-694825697096`
+- Export command: `xe vdi-export uuid=e65ccf95-0bc7-4530-ac91-c418e667e1de filename=/mnt/hyperv/server2012r2.vhd format=vhd`
+- PID: 26610 (nohup, survives disconnects)
+- Destination: `//172.16.9.184/Migration` mounted at `/mnt/hyperv` (SMBv2)
+- Progress at last check: **65GB of ~200GB** (~4GB/min, ~35 min remaining)
+- Transfer rate: ~4GB/min over 1Gbps link
+
+**SMB share created on Hyper-V:**
+```powershell
+New-SmbShare -Name 'Migration' -Path 'C:\Migration' -FullAccess 'Everyone'
+New-NetFirewallRule -DisplayName 'SMB from XenServer' -Direction Inbound -Protocol TCP -LocalPort 445 -RemoteAddress 192.168.0.0/24 -Action Allow
+```
+
+**Mount on XenServer:**
+```bash
+mount.cifs //172.16.9.184/Migration /mnt/hyperv -o username=sysadmin,password=r3tr0gradE99#,domain=VWP,vers=2.0
+```
+
+**Planned VM creation (after export completes):**
+- Generation 1 (BIOS/MBR from XenServer)
+- 4 vCPUs, 16GB RAM
+- NIC on VLAN 2 (192.168.0.x)
+- Attach server2012r2.vhd from C:\Migration
+- Boot and install Hyper-V integration services
+
+#### ITSvc Share (C:\Shares\ITSvc on VWP-HYPERV1)
+Contains installers:
+- VWP-ScreenConnect.ClientSetup.msi (27.6MB)
+- VWPScreenConnect.ClientSetup.exe (19.1MB)
+- Ninite installers (Chrome, Firefox, .NET, WizTree)
+- ISO subfolder
+
+### 4. ScreenConnect on VWP-FILES
+
+**VWP-FILES VM details:**
+- **Hostname:** VWP-FILES.VWP.US
+- **IP:** 172.16.9.107
+- **OS:** Windows Server 2019 Standard
+- **Hyper-V Gen:** 2
+- **RAM:** 2GB, 16 vCPUs
+
+**PowerShell Direct credentials:** `VWP\sysadmin` / `r3tr0gradE99#`
+
+**Installation:** MSI copied via `Copy-VMFile` (Hyper-V Guest Service Interface), installed via PowerShell Direct:
+```powershell
+Copy-VMFile -Name 'VWP-FILES' -SourcePath 'C:\Shares\ITSvc\VWP-ScreenConnect.ClientSetup.msi' -DestinationPath 'C:\Temp\VWP-ScreenConnect.ClientSetup.msi' -CreateFullPath -FileSource Host
+Invoke-Command -VMName 'VWP-FILES' -Credential ... -ScriptBlock { Start-Process msiexec.exe -ArgumentList '/i C:\Temp\VWP-ScreenConnect.ClientSetup.msi /quiet /norestart' -Wait }
+```
+
+**Issue:** Service installed but stopped immediately — "Your host has ended the remote session." User had accidentally deleted the unit in ScreenConnect console.
+
+**Fix:** Uninstalled (`msiexec /x ... /quiet /norestart`), reinstalled same MSI. Service now **Running**.
+
+**Service:** `ScreenConnect Client (1912bf3444b41a08)` — connects to `instance-kgc7jt-relay.screenconnect.com:443`
+
+### 5. Memory Saved
+
+- `reference_dataforth_contact.md` — AJ at Dataforth, dataforthgit@ email forwarding
+
+### Credentials Used This Session
+
+```
+### Dataforth M365 (Graph API)
+- Tenant ID: 7dfa3ce8-c496-4b51-ab8d-bd3dcd78b584
+- App ID: fabb3421-8b34-484b-bc17-e46de9703418
+- Client Secret: ~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO
+
+### VWP VPN
+- Auth file: /etc/openvpn/vwp-auth.txt
+- User: sysadmin / r3tr0gradE99#
+- Remote: 4.18.160.106:1194 TCP
+
+### VWP XenServer (192.168.0.104)
+- SSH: root / r3tr0gradE99!
+- Note: $'...' quoting for !
+
+### VWP-HYPERV1 (172.16.9.184)
+- WinRM: sysadmin / r3tr0gradE99# (NTLM)
+- URL: http://172.16.9.184:5985/wsman
+
+### VWP-DC1 (172.16.9.2)
+- Domain: VWP\sysadmin / r3tr0gradE99#
+
+### VWP-FILES (172.16.9.107)
+- PowerShell Direct: VWP\sysadmin / r3tr0gradE99#
+
+### VWP iDRAC - XenServer R720 (192.168.3.30)
+- SSH: root / r3tr0gradE99#
+- SSH flags: -o KexAlgorithms=+diffie-hellman-group14-sha1 -o HostKeyAlgorithms=+ssh-rsa -o Ciphers=+aes128-cbc,aes256-cbc
+```
+
+### Pending/Incomplete Tasks
+
+1. **VDI export in progress** — Server 2012 R2 exporting from XenServer to Hyper-V, ~65GB/200GB done, PID 26610 on XenServer
+2. **Create Server 2012 R2 VM on Hyper-V** — After export: Gen1, 4 vCPU, 16GB RAM, VLAN 2, attach VHD
+3. **GPU debug after reboot** — Run `sudo nvidia-smi -pm 1` then `python3 gpu_debug_transcribe.py`
+4. **Server 2003 data migration** — Move shares/data from 192.168.0.20 (G: drive) to VWP-FILES after 2012 R2 migration
+5. **Remaining XenServer VMs** — BACKUP-SRV, server 2003, XP, Windows 7 all need migration
+6. **pywinrm installed** — `pip install --user --break-system-packages pywinrm` on workstation
+
+### Files Created/Modified This Session
+
+- `/etc/modprobe.d/nvidia-no-d3.conf` — Disable GPU Runtime D3 power management
+- `projects/radio-show/audio-processor/gpu_debug_transcribe.py` — GPU diagnostic batch transcription script
+- `~/.claude/projects/-home-guru-ClaudeTools/memory/reference_dataforth_contact.md` — AJ/dataforthgit memory
+- `~/.claude/projects/-home-guru-ClaudeTools/memory/MEMORY.md` — Updated index