Files
claudetools/temp/bardach_temp_check_current.py
Mike Swanson fa15b03180 sync: Auto-sync from ACG-M-L5090 at 2026-03-10 19:11:00
Synced files:
- Quote wizard frontend (all components, hooks, types, config)
- API updates (config, models, routers, schemas, services)
- Client work (bg-builders, gurushow)
- Scripts (BGB Lesley termination, CIPP, Datto, migration)
- Temp files (Bardach contacts, VWP investigation, misc)
- Credentials and session logs
- Email service, PHP API, session logs

Machine: ACG-M-L5090
Timestamp: 2026-03-10 19:11:00

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 19:59:08 -07:00

149 lines
5.6 KiB
Python

"""Check current state of Bardach Temp contacts folder and compare to previous snapshot."""
import subprocess, json, sys, os
from collections import Counter, defaultdict
TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
CLAUDE_APP = "fabb3421-8b34-484b-bc17-e46de9703418"
CLAUDE_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
USER = "barbara@bardach.net"
SELECT = ("id,displayName,givenName,surname,emailAddresses,"
"homePhones,businessPhones,companyName,jobTitle,"
"personalNotes,lastModifiedDateTime")
# --- 1. Get token ---
r = subprocess.run([
'curl', '-s', '-X', 'POST',
f'https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token',
'-d', f'client_id={CLAUDE_APP}&client_secret={CLAUDE_SECRET}&scope=https://graph.microsoft.com/.default&grant_type=client_credentials'
], capture_output=True, text=True)
tok_data = json.loads(r.stdout)
if 'access_token' not in tok_data:
print(f"[ERROR] Token failed: {tok_data.get('error_description', tok_data)}")
sys.exit(1)
token = tok_data['access_token']
print("[OK] Token acquired")
# --- 2. Get Temp folder ID ---
r2 = subprocess.run(['curl', '-s', '-H', f'Authorization: Bearer {token}',
f'https://graph.microsoft.com/v1.0/users/{USER}/contactFolders?$select=displayName,id'],
capture_output=True, text=True)
folders = json.loads(r2.stdout).get('value', [])
temp_id = None
for f in folders:
if f['displayName'] == 'Temp':
temp_id = f['id']
break
if not temp_id:
print("[ERROR] Temp folder not found. Folders:", [f['displayName'] for f in folders])
sys.exit(1)
print(f"[OK] Temp folder ID: {temp_id[:20]}...")
# --- 3. Pull ALL contacts with pagination ---
print("Pulling Temp contacts...")
url = f"https://graph.microsoft.com/v1.0/users/{USER}/contactFolders/{temp_id}/contacts?$top=100&$select={SELECT}"
all_contacts = []
page = 0
while url:
page += 1
r = subprocess.run(['curl', '-s', '-H', f'Authorization: Bearer {token}', url],
capture_output=True, text=True)
data = json.loads(r.stdout)
if 'error' in data:
print(f"[ERROR] Page {page}: {data['error'].get('message','')[:200]}")
break
items = data.get('value', [])
all_contacts.extend(items)
url = data.get('@odata.nextLink')
if page % 10 == 0:
print(f" Page {page}: {len(all_contacts)} contacts so far...")
if not items:
break
print(f"[OK] Total Temp contacts pulled: {len(all_contacts)} ({page} pages)")
# --- 4. Duplicate analysis ---
print(f"\n{'='*60}")
print("DUPLICATE ANALYSIS BY displayName")
print(f"{'='*60}")
name_groups = defaultdict(list)
no_name_contacts = []
for c in all_contacts:
name = (c.get('displayName') or '').strip()
if name:
name_groups[name.lower()].append(c)
else:
no_name_contacts.append(c)
unique_names = len(name_groups)
dupe_names = {k: v for k, v in name_groups.items() if len(v) > 1}
single_names = {k: v for k, v in name_groups.items() if len(v) == 1}
total_dupe_entries = sum(len(v) for v in dupe_names.values())
total_removable = sum(len(v) - 1 for v in dupe_names.values())
print(f"Total contacts: {len(all_contacts)}")
print(f"Contacts with no name: {len(no_name_contacts)}")
print(f"Unique display names: {unique_names}")
print(f" - Names appearing once: {len(single_names)}")
print(f" - Names with duplicates: {len(dupe_names)}")
print(f"Total entries in dupe groups: {total_dupe_entries}")
print(f"Removable duplicates: {total_removable}")
print(f"Estimated after dedup: {len(single_names) + len(dupe_names) + len(no_name_contacts)}")
# Duplicate distribution
dupe_dist = Counter(len(v) for v in dupe_names.values())
print(f"\nDuplicate distribution (how many names appear N times):")
for count, num_names in sorted(dupe_dist.items()):
print(f" {count}x: {num_names} names")
# Top 20 most duplicated
sorted_dupes = sorted(dupe_names.items(), key=lambda x: -len(x[1]))
print(f"\nTop 20 most duplicated names:")
print(f" {'Count':<6} {'Name':<35} {'Emails'}")
print(f" {'-'*5:<6} {'-'*34:<35} {'-'*30}")
for name, contacts in sorted_dupes[:20]:
emails = set()
for c in contacts:
for e in c.get('emailAddresses', []):
if e.get('address'):
emails.add(e['address'].lower())
email_str = ', '.join(sorted(emails)[:3]) if emails else '(no email)'
# Grab original-case name from first contact
orig_name = contacts[0].get('displayName', name)
print(f" {len(contacts):<6} {orig_name[:34]:<35} {email_str[:60]}")
# --- 5. Compare to previous snapshot ---
print(f"\n{'='*60}")
print("COMPARISON TO PREVIOUS SNAPSHOT")
print(f"{'='*60}")
prev_file = 'D:/ClaudeTools/temp/bardach_temp_all.json'
if os.path.exists(prev_file):
with open(prev_file, 'r') as f:
prev_contacts = json.load(f)
prev_count = len(prev_contacts)
curr_count = len(all_contacts)
diff = curr_count - prev_count
sign = '+' if diff > 0 else ''
print(f"Previous count: {prev_count}")
print(f"Current count: {curr_count}")
print(f"Difference: {sign}{diff}")
# Check IDs overlap
prev_ids = set(c.get('id') for c in prev_contacts)
curr_ids = set(c.get('id') for c in all_contacts)
removed = prev_ids - curr_ids
added = curr_ids - prev_ids
unchanged = prev_ids & curr_ids
print(f"\nBy contact ID:")
print(f" Still present (unchanged ID): {len(unchanged)}")
print(f" Removed since last snapshot: {len(removed)}")
print(f" New since last snapshot: {len(added)}")
else:
print(f"[WARNING] Previous file not found: {prev_file}")
print("No comparison available.")
print(f"\n[INFO] Script complete.")