"""Size up candidate source folders we might want to copy off D:. Match by exact-leaf-name; print all matches per leaf, sorted by size. """ import csv, sys, os from collections import defaultdict CSV = sys.argv[1] if len(sys.argv) > 1 else 'clients/valleywide/app-modernization/WizTree_20260516172207.csv' # Match folders whose leaf name (the last path component) is in this set LEAVES_OF_INTEREST = { 'project', 'source', 'source hold', 'virtualbox', 'vm_vdi', 'vwp_current', 'vwp_update', 'vwp_inv', 'vwp_current_0317', 'kingston', 'full', 'recovery', 'darv', 'virtual box', 'virtual box copy', 'xp box', } groups = defaultdict(list) # leaf -> [(size, files, folders, modified, path)] with open(CSV, encoding='utf-8-sig', errors='replace') as f: r = csv.reader(f) next(r) # banner next(r) # header for row in r: if not row or len(row) < 7: continue p = row[0] if not p.endswith('\\'): continue leaf = p.rstrip('\\').rsplit('\\', 1)[-1].lower() if leaf in LEAVES_OF_INTEREST: try: sz = int(row[1]) files = int(row[5]) if row[5].strip() else 0 folders = int(row[6]) if row[6].strip() else 0 except (ValueError, IndexError): continue groups[leaf].append((sz, files, folders, row[3], p)) print(f'{"GB":>8} {"Files":>8} {"Folders":>7} Modified Path') print('-' * 140) # Order: source code targets first, then VMs order = ['vwp_current', 'vwp_update', 'vwp_inv', 'vwp_current_0317', 'project', 'source', 'source hold', 'kingston', 'full', 'recovery', 'virtualbox', 'vm_vdi', 'virtual box', 'virtual box copy', 'xp box', 'darv'] for leaf in order: items = sorted(groups.get(leaf, []), reverse=True) if not items: continue print(f'\n--- leaf "{leaf}" ({len(items)} match{"es" if len(items)!=1 else ""}) ---') for sz, files, folders, mod, p in items[:10]: print(f'{sz/1024/1024/1024:>8.2f} {files:>8} {folders:>7} {mod:<19} {p}')