"""Summarize VWP2 folder on drive 3 — size, newest content, file type breakdown.""" import csv from collections import defaultdict WIZ = 'clients/valleywide/app-modernization/WizTree_20260516174356.csv' # Use raw string to avoid \v -> vertical tab interpretation SEARCH_FOLDER = r'\97-server-g-drive\g$ 2024-04-10 21;13;45\vwp2\\'.lower() folder_sizes = {} files_by_ext = defaultdict(list) all_files = [] prefix = SEARCH_FOLDER.rstrip('\\') print(f'DEBUG prefix: {prefix!r}') with open(WIZ, encoding='utf-8-sig', errors='replace') as f: r = csv.reader(f); next(r); next(r) rows_checked = 0 for row in r: if not row or len(row) < 4: continue rows_checked += 1 p = row[0]; pl = p.lower() if prefix not in pl: continue try: sz = int(row[1]) except (ValueError, IndexError): continue if p.endswith('\\'): try: files = int(row[5]); folders = int(row[6]) except (ValueError, IndexError): files, folders = 0, 0 folder_sizes[p] = (sz, files, folders, row[3]) else: ext = p.rsplit('.', 1)[-1].lower() if '.' in p.rsplit('\\', 1)[-1] else '(none)' files_by_ext[ext].append((row[3], sz, p)) all_files.append((row[3], sz, p, ext)) # Top-level VWP2 folder top = next((k for k in folder_sizes if k.lower().rstrip('\\').endswith('\\vwp2')), None) if top: sz, files, folders, mod = folder_sizes[top] print(f'=== {top} ===') print(f' {sz/1024/1024/1024:.2f} GB, {files:,} files, {folders} subfolders, modified {mod}') print() print('=== File type breakdown ===') totals = [] for ext, items in files_by_ext.items(): total_mb = sum(s for _,s,_ in items) / 1024 / 1024 totals.append((total_mb, ext, len(items))) for total_mb, ext, n in sorted(totals, reverse=True)[:15]: print(f' .{ext:<8} {n:>5} files, {total_mb:>10.1f} MB') print() print('=== Newest 30 files in \\VWP2\\ tree ===') all_files.sort(reverse=True) for mod, sz, p, ext in all_files[:30]: print(f' {mod:<19} {sz/1024/1024:>8.1f} MB {p}') print() print('=== Top-level subfolders of \\VWP2\\ ===') for path, (sz, files, folders, mod) in sorted(folder_sizes.items(), key=lambda x: -x[1][0]): if path.lower().rstrip('\\').endswith('\\vwp2'): continue rel = path.lower().split('\\vwp2\\', 1)[1].rstrip('\\') if '\\' in rel: continue # only direct children print(f' {sz/1024/1024:>8.1f} MB {files:>5} files {mod:<19} {path}')