claudetools/clients/valleywide/app-modernization/source-analysis/drive2_inspect.py

"""Inspect specific drive 2 paths of interest."""
import csv
from collections import defaultdict

WIZ = 'clients/valleywide/app-modernization/WizTree_20260516173603.csv'

PATHS_OF_INTEREST = (
    r'D:\Archive\Darv-Win7-PC',
    r'D:\Office-Estimates\Darv\000_ASource',
    r'D:\Archive\98_Server\O Drive\G-Drive\Darv',
)

# Files under each path, top 30 by size
by_path = defaultdict(list)
all_vdi = []

with open(WIZ, encoding='utf-8-sig', errors='replace') as f:
    r = csv.reader(f); next(r); next(r)
    for row in r:
        if not row or len(row) < 4:
            continue
        p = row[0]
        if p.endswith('\\'):
            continue  # files only
        for prefix in PATHS_OF_INTEREST:
            if p.startswith(prefix + '\\'):
                try:
                    sz = int(row[1])
                except ValueError:
                    sz = 0
                by_path[prefix].append((sz, row[3], p))
                break
        # also catch all .vdi anywhere on this drive
        if p.lower().endswith('.vdi'):
            try:
                sz = int(row[1])
            except ValueError:
                sz = 0
            all_vdi.append((sz, row[3], p))

for prefix in PATHS_OF_INTEREST:
    items = sorted(by_path[prefix], reverse=True)
    print(f'\n=== Top 20 files under {prefix} ({len(items)} files total) ===')
    for sz, mod, p in items[:20]:
        print(f'{sz/1024/1024:>10.1f} MB  {mod:<19}  {p}')

print(f'\n=== All .vdi files on drive 2 ({len(all_vdi)} total), sorted by size ===')
for sz, mod, p in sorted(all_vdi, reverse=True):
    print(f'{sz/1024/1024/1024:>8.2f} GB  {mod:<19}  {p}')