""" Download the full Computer Guru Show archive from IX server (172.16.3.10). Mirrors the year-based directory structure as-is to archive-data/episodes/. Resumable: skips files already present with matching size. Requires Tailscale. """ import os import sys import time import paramiko from pathlib import Path password = os.environ.get("IX_PASSWORD") if not password: print("IX_PASSWORD env var not set", file=sys.stderr) sys.exit(1) LOCAL_ROOT = Path(__file__).parent / "archive-data" / "episodes" LOCAL_ROOT.mkdir(parents=True, exist_ok=True) REMOTE_ROOT = "/home/gurushow/public_html/archive" YEARS = ["2010", "2011", "2012", "2014", "2015", "2016", "2017", "2018"] def connect(): c = paramiko.SSHClient() c.set_missing_host_key_policy(paramiko.AutoAddPolicy()) c.connect("172.16.3.10", username="root", password=password, look_for_keys=False, allow_agent=False, timeout=30, banner_timeout=30, auth_timeout=30) transport = c.get_transport() if transport is not None: transport.set_keepalive(30) # send keepalive every 30s s = c.open_sftp() s.get_channel().settimeout(120) # per-operation timeout return c, s print(f"Connecting to 172.16.3.10...", flush=True) client, sftp = connect() print("Connected.", flush=True) def list_remote_mp3s(year: str) -> list[str]: cmd = f"find '{REMOTE_ROOT}/{year}' -iname '*.mp3' 2>/dev/null" stdin, stdout, stderr = client.exec_command(cmd) return [line.strip() for line in stdout.read().decode().splitlines() if line.strip()] total_files = 0 total_bytes = 0 skipped_files = 0 skipped_bytes = 0 downloaded_files = 0 downloaded_bytes = 0 errors = [] t_start = time.monotonic() for year in YEARS: print(f"\n=== {year} ===", flush=True) remote_paths = list_remote_mp3s(year) print(f" {len(remote_paths)} MP3 files found on remote", flush=True) for remote in remote_paths: rel = remote[len(REMOTE_ROOT) + 1:] local = LOCAL_ROOT / rel local.parent.mkdir(parents=True, exist_ok=True) try: remote_stat = sftp.stat(remote) remote_size = remote_stat.st_size except Exception as e: errors.append(f"stat {remote}: {e}") continue total_files += 1 total_bytes += remote_size if local.exists() and local.stat().st_size == remote_size: skipped_files += 1 skipped_bytes += remote_size continue size_mb = remote_size / 1024 / 1024 print(f" [{downloaded_files + 1:3d}] {rel} ({size_mb:.1f} MB)...", end="", flush=True) t0 = time.monotonic() attempt = 0 while True: attempt += 1 try: sftp.get(remote, str(local)) elapsed = time.monotonic() - t0 mbps = size_mb / elapsed if elapsed > 0 else 0 print(f" done ({elapsed:.1f}s, {mbps:.1f} MB/s)", flush=True) downloaded_files += 1 downloaded_bytes += remote_size break except Exception as e: if attempt >= 3: print(f" FAILED after {attempt} attempts: {e}", flush=True) errors.append(f"get {remote}: {e}") break print(f" retry {attempt} ({e})...", end="", flush=True) # Reconnect on failure try: sftp.close() client.close() except Exception: pass time.sleep(5) client, sftp = connect() elapsed_total = time.monotonic() - t_start print(f"\n=== Summary ===", flush=True) print(f" Total remote files : {total_files}", flush=True) print(f" Total remote bytes : {total_bytes / 1024 / 1024 / 1024:.2f} GB", flush=True) print(f" Already present : {skipped_files} files / {skipped_bytes / 1024 / 1024 / 1024:.2f} GB", flush=True) print(f" Newly downloaded : {downloaded_files} files / {downloaded_bytes / 1024 / 1024 / 1024:.2f} GB", flush=True) print(f" Errors : {len(errors)}", flush=True) print(f" Wall time : {elapsed_total:.1f}s", flush=True) if errors: print(f"\n=== Errors ===", flush=True) for e in errors[:20]: print(f" {e}", flush=True) sftp.close() client.close()