"""Check current state of Bardach Temp contacts folder and compare to previous snapshot.""" import subprocess, json, sys, os from collections import Counter, defaultdict TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f" CLAUDE_APP = "fabb3421-8b34-484b-bc17-e46de9703418" CLAUDE_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO" USER = "barbara@bardach.net" SELECT = ("id,displayName,givenName,surname,emailAddresses," "homePhones,businessPhones,companyName,jobTitle," "personalNotes,lastModifiedDateTime") # --- 1. Get token --- r = subprocess.run([ 'curl', '-s', '-X', 'POST', f'https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token', '-d', f'client_id={CLAUDE_APP}&client_secret={CLAUDE_SECRET}&scope=https://graph.microsoft.com/.default&grant_type=client_credentials' ], capture_output=True, text=True) tok_data = json.loads(r.stdout) if 'access_token' not in tok_data: print(f"[ERROR] Token failed: {tok_data.get('error_description', tok_data)}") sys.exit(1) token = tok_data['access_token'] print("[OK] Token acquired") # --- 2. Get Temp folder ID --- r2 = subprocess.run(['curl', '-s', '-H', f'Authorization: Bearer {token}', f'https://graph.microsoft.com/v1.0/users/{USER}/contactFolders?$select=displayName,id'], capture_output=True, text=True) folders = json.loads(r2.stdout).get('value', []) temp_id = None for f in folders: if f['displayName'] == 'Temp': temp_id = f['id'] break if not temp_id: print("[ERROR] Temp folder not found. Folders:", [f['displayName'] for f in folders]) sys.exit(1) print(f"[OK] Temp folder ID: {temp_id[:20]}...") # --- 3. Pull ALL contacts with pagination --- print("Pulling Temp contacts...") url = f"https://graph.microsoft.com/v1.0/users/{USER}/contactFolders/{temp_id}/contacts?$top=100&$select={SELECT}" all_contacts = [] page = 0 while url: page += 1 r = subprocess.run(['curl', '-s', '-H', f'Authorization: Bearer {token}', url], capture_output=True, text=True) data = json.loads(r.stdout) if 'error' in data: print(f"[ERROR] Page {page}: {data['error'].get('message','')[:200]}") break items = data.get('value', []) all_contacts.extend(items) url = data.get('@odata.nextLink') if page % 10 == 0: print(f" Page {page}: {len(all_contacts)} contacts so far...") if not items: break print(f"[OK] Total Temp contacts pulled: {len(all_contacts)} ({page} pages)") # --- 4. Duplicate analysis --- print(f"\n{'='*60}") print("DUPLICATE ANALYSIS BY displayName") print(f"{'='*60}") name_groups = defaultdict(list) no_name_contacts = [] for c in all_contacts: name = (c.get('displayName') or '').strip() if name: name_groups[name.lower()].append(c) else: no_name_contacts.append(c) unique_names = len(name_groups) dupe_names = {k: v for k, v in name_groups.items() if len(v) > 1} single_names = {k: v for k, v in name_groups.items() if len(v) == 1} total_dupe_entries = sum(len(v) for v in dupe_names.values()) total_removable = sum(len(v) - 1 for v in dupe_names.values()) print(f"Total contacts: {len(all_contacts)}") print(f"Contacts with no name: {len(no_name_contacts)}") print(f"Unique display names: {unique_names}") print(f" - Names appearing once: {len(single_names)}") print(f" - Names with duplicates: {len(dupe_names)}") print(f"Total entries in dupe groups: {total_dupe_entries}") print(f"Removable duplicates: {total_removable}") print(f"Estimated after dedup: {len(single_names) + len(dupe_names) + len(no_name_contacts)}") # Duplicate distribution dupe_dist = Counter(len(v) for v in dupe_names.values()) print(f"\nDuplicate distribution (how many names appear N times):") for count, num_names in sorted(dupe_dist.items()): print(f" {count}x: {num_names} names") # Top 20 most duplicated sorted_dupes = sorted(dupe_names.items(), key=lambda x: -len(x[1])) print(f"\nTop 20 most duplicated names:") print(f" {'Count':<6} {'Name':<35} {'Emails'}") print(f" {'-'*5:<6} {'-'*34:<35} {'-'*30}") for name, contacts in sorted_dupes[:20]: emails = set() for c in contacts: for e in c.get('emailAddresses', []): if e.get('address'): emails.add(e['address'].lower()) email_str = ', '.join(sorted(emails)[:3]) if emails else '(no email)' # Grab original-case name from first contact orig_name = contacts[0].get('displayName', name) print(f" {len(contacts):<6} {orig_name[:34]:<35} {email_str[:60]}") # --- 5. Compare to previous snapshot --- print(f"\n{'='*60}") print("COMPARISON TO PREVIOUS SNAPSHOT") print(f"{'='*60}") prev_file = 'D:/ClaudeTools/temp/bardach_temp_all.json' if os.path.exists(prev_file): with open(prev_file, 'r') as f: prev_contacts = json.load(f) prev_count = len(prev_contacts) curr_count = len(all_contacts) diff = curr_count - prev_count sign = '+' if diff > 0 else '' print(f"Previous count: {prev_count}") print(f"Current count: {curr_count}") print(f"Difference: {sign}{diff}") # Check IDs overlap prev_ids = set(c.get('id') for c in prev_contacts) curr_ids = set(c.get('id') for c in all_contacts) removed = prev_ids - curr_ids added = curr_ids - prev_ids unchanged = prev_ids & curr_ids print(f"\nBy contact ID:") print(f" Still present (unchanged ID): {len(unchanged)}") print(f" Removed since last snapshot: {len(removed)}") print(f" New since last snapshot: {len(added)}") else: print(f"[WARNING] Previous file not found: {prev_file}") print("No comparison available.") print(f"\n[INFO] Script complete.")