Files
claudetools/temp/bardach_dedup_step5_verify.py
Mike Swanson fa15b03180 sync: Auto-sync from ACG-M-L5090 at 2026-03-10 19:11:00
Synced files:
- Quote wizard frontend (all components, hooks, types, config)
- API updates (config, models, routers, schemas, services)
- Client work (bg-builders, gurushow)
- Scripts (BGB Lesley termination, CIPP, Datto, migration)
- Temp files (Bardach contacts, VWP investigation, misc)
- Credentials and session logs
- Email service, PHP API, session logs

Machine: ACG-M-L5090
Timestamp: 2026-03-10 19:11:00

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 19:59:08 -07:00

100 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""Step 5: Verify deduplication - pull contacts again and check for remaining duplicates."""
import json
import subprocess
import sys
from collections import defaultdict
TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
SCOPE = "https://graph.microsoft.com/.default"
USER = "barbara@bardach.net"
FOLDER_ID = "AAMkADNiYWE4ZDYxLWE4M2EtNGY1MS05YWQwLWY2OWYzMWI3YjZjNAAuAAAAAADrk4YN-mpcR5zROC2646l9AQCo_dM7bg-DQY5RuVpcPz_JAAQU2EZxAAA="
SELECT_FIELDS = "id,displayName"
def get_token():
url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
result = subprocess.run(
["curl", "-s", "-X", "POST", url,
"-H", "Content-Type: application/x-www-form-urlencoded",
"-d", f"client_id={CLIENT_ID}&scope={SCOPE}&client_secret={CLIENT_SECRET}&grant_type=client_credentials"],
capture_output=True, text=True
)
data = json.loads(result.stdout)
if "access_token" not in data:
print(f"[ERROR] Failed to get token: {data}", flush=True)
sys.exit(1)
return data["access_token"]
def graph_get(token, url):
result = subprocess.run(
["curl", "-s", "-X", "GET", url,
"-H", f"Authorization: Bearer {token}",
"-H", "Content-Type: application/json"],
capture_output=True, text=True
)
return json.loads(result.stdout)
def main():
print("=" * 60, flush=True)
print("STEP 5: Verify deduplication", flush=True)
print("=" * 60, flush=True)
token = get_token()
print("[OK] Token acquired", flush=True)
# Pull all contacts (just id and displayName for speed)
contacts = []
url = f"https://graph.microsoft.com/v1.0/users/{USER}/contactFolders/{FOLDER_ID}/contacts?$top=100&$select={SELECT_FIELDS}"
page = 1
while url:
data = graph_get(token, url)
if "value" not in data:
print(f"[ERROR] {data}", flush=True)
break
contacts.extend(data["value"])
if page % 20 == 0:
print(f" Page {page}, total so far: {len(contacts)}", flush=True)
url = data.get("@odata.nextLink")
page += 1
if page % 50 == 0:
token = get_token()
new_count = len(contacts)
old_count = 10404
print(f"\n{'=' * 60}", flush=True)
print(f"VERIFICATION RESULTS", flush=True)
print(f"{'=' * 60}", flush=True)
print(f" Old count (pre-dedup): {old_count}", flush=True)
print(f" New count (post-dedup): {new_count}", flush=True)
print(f" Contacts removed: {old_count - new_count}", flush=True)
# Check for remaining duplicates
groups = defaultdict(list)
for c in contacts:
name = (c.get("displayName") or "").strip().lower()
if name:
groups[name].append(c["id"])
remaining_dups = {name: ids for name, ids in groups.items() if len(ids) >= 2}
if remaining_dups:
print(f"\n[WARNING] Remaining duplicate groups: {len(remaining_dups)}", flush=True)
for name, ids in sorted(remaining_dups.items())[:10]:
print(f" {name}: {len(ids)} copies", flush=True)
else:
print(f"\n[OK] No duplicates remain! Deduplication complete.", flush=True)
print(f"\n Unique contact names: {len(groups)}", flush=True)
no_name = sum(1 for c in contacts if not (c.get("displayName") or "").strip())
print(f" Contacts without name: {no_name}", flush=True)
if __name__ == "__main__":
main()