#!/usr/bin/env python3 """Step 5: Verify deduplication - pull contacts again and check for remaining duplicates.""" import json import subprocess import sys from collections import defaultdict TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f" CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418" CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO" SCOPE = "https://graph.microsoft.com/.default" USER = "barbara@bardach.net" FOLDER_ID = "AAMkADNiYWE4ZDYxLWE4M2EtNGY1MS05YWQwLWY2OWYzMWI3YjZjNAAuAAAAAADrk4YN-mpcR5zROC2646l9AQCo_dM7bg-DQY5RuVpcPz_JAAQU2EZxAAA=" SELECT_FIELDS = "id,displayName" def get_token(): url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token" result = subprocess.run( ["curl", "-s", "-X", "POST", url, "-H", "Content-Type: application/x-www-form-urlencoded", "-d", f"client_id={CLIENT_ID}&scope={SCOPE}&client_secret={CLIENT_SECRET}&grant_type=client_credentials"], capture_output=True, text=True ) data = json.loads(result.stdout) if "access_token" not in data: print(f"[ERROR] Failed to get token: {data}", flush=True) sys.exit(1) return data["access_token"] def graph_get(token, url): result = subprocess.run( ["curl", "-s", "-X", "GET", url, "-H", f"Authorization: Bearer {token}", "-H", "Content-Type: application/json"], capture_output=True, text=True ) return json.loads(result.stdout) def main(): print("=" * 60, flush=True) print("STEP 5: Verify deduplication", flush=True) print("=" * 60, flush=True) token = get_token() print("[OK] Token acquired", flush=True) # Pull all contacts (just id and displayName for speed) contacts = [] url = f"https://graph.microsoft.com/v1.0/users/{USER}/contactFolders/{FOLDER_ID}/contacts?$top=100&$select={SELECT_FIELDS}" page = 1 while url: data = graph_get(token, url) if "value" not in data: print(f"[ERROR] {data}", flush=True) break contacts.extend(data["value"]) if page % 20 == 0: print(f" Page {page}, total so far: {len(contacts)}", flush=True) url = data.get("@odata.nextLink") page += 1 if page % 50 == 0: token = get_token() new_count = len(contacts) old_count = 10404 print(f"\n{'=' * 60}", flush=True) print(f"VERIFICATION RESULTS", flush=True) print(f"{'=' * 60}", flush=True) print(f" Old count (pre-dedup): {old_count}", flush=True) print(f" New count (post-dedup): {new_count}", flush=True) print(f" Contacts removed: {old_count - new_count}", flush=True) # Check for remaining duplicates groups = defaultdict(list) for c in contacts: name = (c.get("displayName") or "").strip().lower() if name: groups[name].append(c["id"]) remaining_dups = {name: ids for name, ids in groups.items() if len(ids) >= 2} if remaining_dups: print(f"\n[WARNING] Remaining duplicate groups: {len(remaining_dups)}", flush=True) for name, ids in sorted(remaining_dups.items())[:10]: print(f" {name}: {len(ids)} copies", flush=True) else: print(f"\n[OK] No duplicates remain! Deduplication complete.", flush=True) print(f"\n Unique contact names: {len(groups)}", flush=True) no_name = sum(1 for c in contacts if not (c.get("displayName") or "").strip()) print(f" Contacts without name: {no_name}", flush=True) if __name__ == "__main__": main()