sync: Auto-sync from ACG-M-L5090 at 2026-03-10 19:11:00
Synced files: - Quote wizard frontend (all components, hooks, types, config) - API updates (config, models, routers, schemas, services) - Client work (bg-builders, gurushow) - Scripts (BGB Lesley termination, CIPP, Datto, migration) - Temp files (Bardach contacts, VWP investigation, misc) - Credentials and session logs - Email service, PHP API, session logs Machine: ACG-M-L5090 Timestamp: 2026-03-10 19:11:00 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
158
temp/bardach_temp_dupes.py
Normal file
158
temp/bardach_temp_dupes.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""Pull all Temp contacts and analyze internal duplicates."""
|
||||
import subprocess, json, sys
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
|
||||
CLAUDE_APP = "fabb3421-8b34-484b-bc17-e46de9703418"
|
||||
CLAUDE_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
|
||||
USER = "barbara@bardach.net"
|
||||
|
||||
SELECT = ("id,displayName,givenName,surname,emailAddresses,"
|
||||
"homePhones,businessPhones,companyName,jobTitle,"
|
||||
"personalNotes,homeAddress,businessAddress,lastModifiedDateTime")
|
||||
|
||||
# Get token
|
||||
r = subprocess.run([
|
||||
'curl', '-s', '-X', 'POST',
|
||||
f'https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token',
|
||||
'-d', f'client_id={CLAUDE_APP}&client_secret={CLAUDE_SECRET}&scope=https://graph.microsoft.com/.default&grant_type=client_credentials'
|
||||
], capture_output=True, text=True)
|
||||
token = json.loads(r.stdout)['access_token']
|
||||
print("[OK] Token acquired")
|
||||
|
||||
# Get Temp folder ID
|
||||
r2 = subprocess.run(['curl', '-s', '-H', f'Authorization: Bearer {token}',
|
||||
f'https://graph.microsoft.com/v1.0/users/{USER}/contactFolders?$select=displayName,id'],
|
||||
capture_output=True, text=True)
|
||||
folders = json.loads(r2.stdout).get('value', [])
|
||||
temp_id = next(f['id'] for f in folders if f['displayName'] == 'Temp')
|
||||
|
||||
# Pull all Temp contacts
|
||||
print("Pulling Temp contacts...")
|
||||
url = f"https://graph.microsoft.com/v1.0/users/{USER}/contactFolders/{temp_id}/contacts?$top=100&$select={SELECT}"
|
||||
all_contacts = []
|
||||
page = 0
|
||||
|
||||
while url:
|
||||
page += 1
|
||||
r = subprocess.run(['curl', '-s', '-H', f'Authorization: Bearer {token}', url],
|
||||
capture_output=True, text=True)
|
||||
data = json.loads(r.stdout)
|
||||
if 'error' in data:
|
||||
print(f"Error page {page}: {data['error'].get('message','')[:200]}")
|
||||
break
|
||||
items = data.get('value', [])
|
||||
all_contacts.extend(items)
|
||||
url = data.get('@odata.nextLink')
|
||||
if page % 20 == 0:
|
||||
print(f" Page {page}: {len(all_contacts)} contacts...")
|
||||
if not items:
|
||||
break
|
||||
|
||||
print(f"\nTotal Temp contacts pulled: {len(all_contacts)}")
|
||||
|
||||
# Save raw data
|
||||
with open('D:/ClaudeTools/temp/bardach_temp_all.json', 'w') as f:
|
||||
json.dump(all_contacts, f)
|
||||
print("Saved to bardach_temp_all.json")
|
||||
|
||||
# Analyze duplicates by displayName
|
||||
print(f"\n{'='*60}")
|
||||
print("DUPLICATE ANALYSIS BY NAME")
|
||||
print(f"{'='*60}")
|
||||
|
||||
name_groups = defaultdict(list)
|
||||
for c in all_contacts:
|
||||
name = (c.get('displayName') or '').strip().lower()
|
||||
if name:
|
||||
name_groups[name].append(c)
|
||||
|
||||
no_name = [c for c in all_contacts if not (c.get('displayName') or '').strip()]
|
||||
|
||||
unique_names = len(name_groups)
|
||||
dupe_names = {k: v for k, v in name_groups.items() if len(v) > 1}
|
||||
total_dupes = sum(len(v) - 1 for v in dupe_names.values())
|
||||
|
||||
print(f"Total contacts: {len(all_contacts)}")
|
||||
print(f"Contacts with no name: {len(no_name)}")
|
||||
print(f"Unique names: {unique_names}")
|
||||
print(f"Names with duplicates: {len(dupe_names)}")
|
||||
print(f"Total duplicate entries (removable): {total_dupes}")
|
||||
print(f"Estimated after dedup: {unique_names + len(no_name)}")
|
||||
|
||||
# Distribution of duplicate counts
|
||||
dupe_dist = Counter(len(v) for v in dupe_names.values())
|
||||
print(f"\nDuplicate distribution:")
|
||||
for count, num_names in sorted(dupe_dist.items()):
|
||||
print(f" {count}x duplicated: {num_names} names")
|
||||
|
||||
# Top duplicated names
|
||||
sorted_dupes = sorted(dupe_names.items(), key=lambda x: -len(x[1]))
|
||||
print(f"\nTop 30 most duplicated:")
|
||||
for name, contacts in sorted_dupes[:30]:
|
||||
emails = set()
|
||||
notes_count = 0
|
||||
for c in contacts:
|
||||
for e in c.get('emailAddresses', []):
|
||||
if e.get('address'):
|
||||
emails.add(e['address'].lower())
|
||||
if (c.get('personalNotes') or '').strip():
|
||||
notes_count += 1
|
||||
email_str = ', '.join(list(emails)[:2]) if emails else '(no email)'
|
||||
print(f" {len(contacts)}x - {name} | {email_str} | {notes_count} have notes")
|
||||
|
||||
# Sample notes to find cleanup patterns
|
||||
print(f"\n{'='*60}")
|
||||
print("NOTES CLEANUP PATTERNS")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Collect all notes
|
||||
all_notes = []
|
||||
for c in all_contacts:
|
||||
notes = (c.get('personalNotes') or '').strip()
|
||||
if notes:
|
||||
all_notes.append(notes)
|
||||
|
||||
print(f"Contacts with notes: {len(all_notes)}")
|
||||
|
||||
# Find common patterns
|
||||
patterns_found = defaultdict(int)
|
||||
for notes in all_notes:
|
||||
lines = notes.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if 'read-only' in line.lower() and 'outlook' in line.lower():
|
||||
patterns_found['read-only outlook warning'] += 1
|
||||
elif 'tap the link' in line.lower():
|
||||
patterns_found['tap the link instruction'] += 1
|
||||
elif 'edit in outlook' in line.lower():
|
||||
patterns_found['edit in outlook'] += 1
|
||||
elif line.startswith('20') and len(line) > 10 and ('This contact' in line or 'read-only' in line.lower()):
|
||||
patterns_found['dated read-only warning'] += 1
|
||||
|
||||
print(f"\nKnown junk patterns found:")
|
||||
for pattern, count in sorted(patterns_found.items(), key=lambda x: -x[1]):
|
||||
print(f" {pattern}: {count} occurrences")
|
||||
|
||||
# Show sample notes with the junk pattern
|
||||
print(f"\nSample notes containing 'read-only' (first 5):")
|
||||
shown = 0
|
||||
for notes in all_notes:
|
||||
if 'read-only' in notes.lower():
|
||||
print(f" ---")
|
||||
# Show first 300 chars
|
||||
print(f" {notes[:300]}")
|
||||
shown += 1
|
||||
if shown >= 5:
|
||||
break
|
||||
|
||||
# Show sample of notes that DON'T have the junk pattern (real data)
|
||||
print(f"\nSample notes WITHOUT 'read-only' junk (first 5):")
|
||||
shown = 0
|
||||
for notes in all_notes:
|
||||
if 'read-only' not in notes.lower() and len(notes) > 5:
|
||||
print(f" ---")
|
||||
print(f" {notes[:300]}")
|
||||
shown += 1
|
||||
if shown >= 5:
|
||||
break
|
||||
Reference in New Issue
Block a user