claudetools/temp/bardach_merge_analysis.py

"""Analyze Temp vs Contacts folders for merge strategy."""
import subprocess, json, sys

TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
CLAUDE_APP = "fabb3421-8b34-484b-bc17-e46de9703418"
CLAUDE_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
USER = "barbara@bardach.net"

# All contact fields we want to preserve
SELECT_FIELDS = (
    "id,displayName,givenName,surname,middleName,nickName,"
    "emailAddresses,homePhones,businessPhones,"
    "companyName,jobTitle,department,"
    "homeAddress,businessAddress,otherAddress,"
    "birthday,personalNotes,"
    "categories,title,generation,imAddresses,"
    "parentFolderId"
)

def get_token():
    r = subprocess.run([
        'curl', '-s', '-X', 'POST',
        f'https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token',
        '-d', f'client_id={CLAUDE_APP}&client_secret={CLAUDE_SECRET}&scope=https://graph.microsoft.com/.default&grant_type=client_credentials'
    ], capture_output=True, text=True)
    return json.loads(r.stdout)['access_token']

def pull_contacts(token, folder_id=None, folder_name="default"):
    if folder_id:
        base = f"https://graph.microsoft.com/v1.0/users/{USER}/contactFolders/{folder_id}/contacts"
    else:
        base = f"https://graph.microsoft.com/v1.0/users/{USER}/contacts"

    url = f"{base}?$top=100&$select={SELECT_FIELDS}"
    all_contacts = []
    page = 0

    while url:
        page += 1
        r = subprocess.run(['curl', '-s', '-H', f'Authorization: Bearer {token}', url],
                          capture_output=True, text=True)
        data = json.loads(r.stdout)

        if 'error' in data:
            print(f"  Error: {data['error'].get('message','')[:200]}")
            break

        items = data.get('value', [])
        all_contacts.extend(items)
        url = data.get('@odata.nextLink')

        if page % 10 == 0:
            print(f"  {folder_name}: page {page}, {len(all_contacts)} contacts...")

        if not items:
            break

    print(f"  {folder_name}: {len(all_contacts)} total")
    return all_contacts

token = get_token()
print("[OK] Token acquired\n")

# Get folder IDs
print("Getting contact folders...")
r = subprocess.run(['curl', '-s', '-H', f'Authorization: Bearer {token}',
    f'https://graph.microsoft.com/v1.0/users/{USER}/contactFolders?$select=displayName,id'],
    capture_output=True, text=True)
folders = json.loads(r.stdout).get('value', [])
temp_id = None
contacts_id = None
for f in folders:
    print(f"  {f['displayName']}: {f['id'][:40]}...")
    if f['displayName'] == 'Temp':
        temp_id = f['id']
    elif f['displayName'] == 'Contacts':
        contacts_id = f['id']

if not temp_id:
    print("[ERROR] Temp folder not found!")
    sys.exit(1)

# Pull both folders
print("\nPulling Contacts folder...")
main_contacts = pull_contacts(token, contacts_id, "Contacts")

print("\nPulling Temp folder...")
temp_contacts = pull_contacts(token, temp_id, "Temp")

# Save raw data
with open('D:/ClaudeTools/temp/bardach_main_contacts.json', 'w') as f:
    json.dump(main_contacts, f, indent=2)
with open('D:/ClaudeTools/temp/bardach_temp_contacts.json', 'w') as f:
    json.dump(temp_contacts, f, indent=2)
print(f"\nSaved raw data files")

# Build matching keys
def make_key(c):
    """Create a matching key from name."""
    name = (c.get('displayName') or '').strip().lower()
    return name

def make_email_keys(c):
    """Get all email addresses as keys."""
    return set(e.get('address', '').strip().lower()
               for e in c.get('emailAddresses', [])
               if e.get('address'))

# Index main contacts
main_by_name = {}
main_by_email = {}
for c in main_contacts:
    key = make_key(c)
    if key:
        main_by_name.setdefault(key, []).append(c)
    for email in make_email_keys(c):
        main_by_email.setdefault(email, []).append(c)

# Categorize temp contacts
matched_by_name = []
matched_by_email = []
unmatched = []
blank = []

for c in temp_contacts:
    key = make_key(c)
    emails = make_email_keys(c)

    if not key and not emails:
        blank.append(c)
        continue

    if key and key in main_by_name:
        matched_by_name.append((c, main_by_name[key]))
        continue

    email_match = None
    for email in emails:
        if email in main_by_email:
            email_match = main_by_email[email]
            break

    if email_match:
        matched_by_email.append((c, email_match))
    else:
        unmatched.append(c)

print(f"\n{'='*60}")
print(f"MERGE ANALYSIS")
print(f"{'='*60}")
print(f"Main Contacts folder: {len(main_contacts)}")
print(f"Temp folder (iCloud): {len(temp_contacts)}")
print(f"")
print(f"Matched by name:      {len(matched_by_name)}")
print(f"Matched by email:     {len(matched_by_email)}")
print(f"Unmatched (new):      {len(unmatched)}")
print(f"Blank (no name/email):{len(blank)}")
print(f"Total categorized:    {len(matched_by_name)+len(matched_by_email)+len(unmatched)+len(blank)}")

# Analyze what fields the temp contacts have that main doesn't
print(f"\n{'='*60}")
print(f"FIELD ANALYSIS - What Temp contacts add")
print(f"{'='*60}")

fields_to_check = ['personalNotes', 'companyName', 'jobTitle', 'birthday',
                    'homeAddress', 'businessAddress', 'homePhones', 'businessPhones',
                    'nickName', 'categories']

for field in fields_to_check:
    temp_has = sum(1 for c in temp_contacts if c.get(field) and
                   (isinstance(c[field], str) and c[field].strip() or
                    isinstance(c[field], list) and len(c[field]) > 0 or
                    isinstance(c[field], dict) and any(v for v in c[field].values())))
    main_has = sum(1 for c in main_contacts if c.get(field) and
                   (isinstance(c[field], str) and c[field].strip() or
                    isinstance(c[field], list) and len(c[field]) > 0 or
                    isinstance(c[field], dict) and any(v for v in c[field].values())))
    print(f"  {field:25s}: Temp={temp_has:5d}  Main={main_has:5d}")

# For matched contacts, how many have notes in temp but not in main?
notes_to_merge = 0
for temp_c, main_matches in matched_by_name + matched_by_email:
    temp_notes = (temp_c.get('personalNotes') or '').strip()
    if temp_notes:
        main_notes = (main_matches[0].get('personalNotes') or '').strip()
        if not main_notes:
            notes_to_merge += 1

print(f"\n  Matched contacts where Temp has notes but Main doesn't: {notes_to_merge}")

# Sample unmatched
print(f"\n{'='*60}")
print(f"SAMPLE UNMATCHED (first 30 - these would be ADDED to main)")
print(f"{'='*60}")
for c in unmatched[:30]:
    name = c.get('displayName', '(no name)')
    emails = ', '.join(e.get('address','') for e in c.get('emailAddresses',[]))
    company = c.get('companyName', '')
    detail = emails or company or ''
    print(f"  {name}" + (f" - {detail}" if detail else ""))
if len(unmatched) > 30:
    print(f"  ... and {len(unmatched)-30} more")

# Sample blank
if blank:
    print(f"\n{'='*60}")
    print(f"BLANK CONTACTS ({len(blank)} - no displayName or email)")
    print(f"{'='*60}")
    for c in blank[:10]:
        # Show whatever fields they have
        non_empty = {k: v for k, v in c.items()
                     if v and k not in ('id', 'parentFolderId', '@odata.etag', 'changeKey',
                                        'createdDateTime', 'lastModifiedDateTime', 'categories',
                                        'flag', 'emailAddresses', 'homePhones', 'businessPhones',
                                        'imAddresses')
                     and not k.startswith('@')}
        print(f"  Fields: {list(non_empty.keys())}")