#!/usr/bin/env python3 """ Bardach Contact Merge: Merge extra data from Temp contacts into Main contacts, then delete the Temp copies. Main is authoritative - only ADD missing data. """ import json import subprocess import time import re import sys from datetime import datetime # Force unbuffered output sys.stdout.reconfigure(line_buffering=True) sys.stderr.reconfigure(line_buffering=True) # ============================================================ # Configuration # ============================================================ TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f" CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418" CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO" SCOPE = "https://graph.microsoft.com/.default" USER = "barbara@bardach.net" BASE_URL = f"https://graph.microsoft.com/v1.0/users/{USER}/contacts" DATA_FILE = "D:/ClaudeTools/temp/bardach_temp_vs_main.json" LOG_FILE = "D:/ClaudeTools/temp/bardach_merge_results.json" THROTTLE_DELAY = 0.35 # seconds between API calls # ============================================================ # Helpers # ============================================================ def get_token(): """Acquire OAuth2 token via client credentials.""" url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token" cmd = [ "curl", "-s", "-X", "POST", url, "-H", "Content-Type: application/x-www-form-urlencoded", "-d", f"client_id={CLIENT_ID}&scope={SCOPE}&client_secret={CLIENT_SECRET}&grant_type=client_credentials" ] result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) data = json.loads(result.stdout) if "access_token" not in data: print(f"[ERROR] Token acquisition failed: {data}") sys.exit(1) print(f"[OK] Token acquired at {datetime.now().strftime('%H:%M:%S')}") return data["access_token"] def api_get(token, url): """GET request to Graph API.""" cmd = [ "curl", "-s", "-X", "GET", url, "-H", f"Authorization: Bearer {token}", "-H", "Content-Type: application/json" ] result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) return json.loads(result.stdout) def api_patch(token, contact_id, body): """PATCH a contact.""" url = f"{BASE_URL}/{contact_id}" body_json = json.dumps(body) cmd = [ "curl", "-s", "-X", "PATCH", url, "-H", f"Authorization: Bearer {token}", "-H", "Content-Type: application/json", "-d", body_json ] result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) if result.returncode != 0: return {"error": result.stderr} try: resp = json.loads(result.stdout) except json.JSONDecodeError: return {"error": f"Non-JSON response: {result.stdout[:200]}"} return resp def api_delete(token, contact_id): """DELETE a contact. Returns True on success (204), False on error.""" url = f"{BASE_URL}/{contact_id}" cmd = [ "curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", "-X", "DELETE", url, "-H", f"Authorization: Bearer {token}" ] result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) code = result.stdout.strip() return code in ("204", "200") def is_icloud_junk(notes): """Check if personalNotes is iCloud/Outlook read-only junk.""" if not notes: return True lower = notes.lower() # Pattern 1: contains both "read-only" and "outlook" if "read-only" in lower and "outlook" in lower: return True # Pattern 2: "this contact is read-only" type text if "this contact is read-only" in lower: return True # Pattern 3: Just "read-only" with "edit" or "tap" or "link" (iCloud boilerplate) if "read-only" in lower and ("tap" in lower or "edit" in lower or "link" in lower): return True return False def normalize_phone(phone): """Strip non-digit characters for comparison.""" return re.sub(r'[^0-9+]', '', phone) def is_address_empty(addr): """Check if an address dict is empty/null.""" if not addr or not isinstance(addr, dict): return True for v in addr.values(): if v and str(v).strip(): return False return True # ============================================================ # STEP 1: Load data and analyze notes # ============================================================ print("=" * 70) print("STEP 1: Load data and analyze personalNotes") print("=" * 70) with open(DATA_FILE, "r", encoding="utf-8") as f: data = json.load(f) matches = data["matches_with_extras"] exact_matches = data.get("exact_matches", []) print(f"[INFO] Loaded {len(matches)} matches_with_extras") print(f"[INFO] Loaded {len(exact_matches)} exact_matches (no extras)") # Analyze notes notes_junk = 0 notes_real = 0 notes_none = 0 real_notes_samples = [] for m in matches: ef = m.get("extra_fields", {}) if "personalNotes" not in ef: notes_none += 1 continue notes = ef["personalNotes"] if is_icloud_junk(notes): notes_junk += 1 else: notes_real += 1 if len(real_notes_samples) < 10: real_notes_samples.append({ "displayName": m["displayName"], "notes": notes[:200] }) print(f"\n personalNotes breakdown:") print(f" iCloud junk: {notes_junk}") print(f" Real content: {notes_real}") print(f" No notes field: {notes_none}") print(f" Total: {notes_junk + notes_real + notes_none}") if real_notes_samples: print(f"\n Sample real notes ({len(real_notes_samples)}):") for i, s in enumerate(real_notes_samples): print(f" [{i+1}] {s['displayName']}: {s['notes']}") # ============================================================ # STEP 2: Build merge plan # ============================================================ print("\n" + "=" * 70) print("STEP 2: Build merge plan") print("=" * 70) needs_merge = [] nothing_to_merge = [] needs_fetch = [] # contacts where we need to GET current Main data (emails/phones) field_counts = { "personalNotes": 0, "emailAddresses": 0, "homePhones": 0, "businessPhones": 0, "companyName": 0, "jobTitle": 0, "homeAddress": 0, "businessAddress": 0, "otherAddress": 0, "birthday": 0, "nickName": 0, } for m in matches: ef = m.get("extra_fields", {}) merge_fields = {} requires_fetch = False for field, value in ef.items(): if field == "personalNotes": if not is_icloud_junk(value): merge_fields["personalNotes"] = value elif field == "emailAddresses": if value: # non-empty list merge_fields["emailAddresses"] = value requires_fetch = True elif field == "homePhones": if value: merge_fields["homePhones"] = value requires_fetch = True elif field == "businessPhones": if value: merge_fields["businessPhones"] = value requires_fetch = True elif field in ("companyName", "jobTitle", "nickName"): if value and str(value).strip(): merge_fields[field] = value elif field in ("homeAddress", "businessAddress", "otherAddress"): if not is_address_empty(value): merge_fields[field] = value elif field == "birthday": if value: merge_fields[field] = value # Skip any unknown fields if merge_fields: entry = { "temp_id": m["temp_id"], "main_id": m["main_id"], "displayName": m["displayName"], "merge_fields": merge_fields, "requires_fetch": requires_fetch, } needs_merge.append(entry) if requires_fetch: needs_fetch.append(entry) for fk in merge_fields: if fk in field_counts: field_counts[fk] += 1 else: nothing_to_merge.append(m["displayName"]) print(f"\n Contacts needing merge: {len(needs_merge)}") print(f" Contacts nothing to merge: {len(nothing_to_merge)}") print(f" Contacts needing fetch: {len(needs_fetch)} (have emails/phones to append)") print(f"\n Field merge counts:") for fk, cnt in sorted(field_counts.items(), key=lambda x: -x[1]): if cnt > 0: print(f" {fk}: {cnt}") # ============================================================ # STEP 3: Fetch current Main data for contacts needing email/phone merge # ============================================================ print("\n" + "=" * 70) print("STEP 3: Fetch Main contact data for email/phone merges") print("=" * 70) token = get_token() fetch_count = 0 fetch_errors = 0 for entry in needs_fetch: if fetch_count > 0 and fetch_count % 500 == 0: token = get_token() if fetch_count > 0 and fetch_count % 100 == 0: print(f" [INFO] Fetched {fetch_count}/{len(needs_fetch)}...") url = f"{BASE_URL}/{entry['main_id']}?$select=emailAddresses,homePhones,businessPhones" resp = api_get(token, url) time.sleep(THROTTLE_DELAY) fetch_count += 1 if "error" in resp: print(f" [ERROR] Fetch {entry['displayName']}: {resp['error'].get('message', resp['error'])}") fetch_errors += 1 entry["current_main"] = None continue entry["current_main"] = { "emailAddresses": resp.get("emailAddresses", []), "homePhones": resp.get("homePhones", []), "businessPhones": resp.get("businessPhones", []), } print(f"\n [OK] Fetched {fetch_count} contacts ({fetch_errors} errors)") # ============================================================ # Build PATCH bodies # ============================================================ print("\n" + "=" * 70) print("STEP 3b: Build PATCH bodies") print("=" * 70) patches = [] # list of (main_id, displayName, patch_body, temp_id) skipped_no_change = 0 for entry in needs_merge: mf = entry["merge_fields"] patch = {} # Simple fields - set directly (these are only in extra_fields if Main lacks them) for sf in ("personalNotes", "companyName", "jobTitle", "nickName", "birthday", "homeAddress", "businessAddress", "otherAddress"): if sf in mf: patch[sf] = mf[sf] # Email addresses - need to append to existing if "emailAddresses" in mf: current = entry.get("current_main", {}) if current is None: # Fetch failed, skip emails for this one pass else: existing_emails = {e.get("address", "").lower() for e in current.get("emailAddresses", []) if e.get("address")} new_emails = [] for email in mf["emailAddresses"]: addr = email if isinstance(email, str) else email.get("address", "") if addr.lower() not in existing_emails: new_emails.append(addr) if new_emails: # Build full list: existing + new (Graph API replaces the array) full_list = list(current.get("emailAddresses", [])) for addr in new_emails: full_list.append({"address": addr, "name": addr}) # Graph API max 3 email addresses patch["emailAddresses"] = full_list[:3] # Home phones - append if "homePhones" in mf: current = entry.get("current_main", {}) if current is None: pass else: existing_norm = {normalize_phone(p) for p in current.get("homePhones", [])} new_phones = [] for p in mf["homePhones"]: if normalize_phone(p) not in existing_norm: new_phones.append(p) if new_phones: full_list = list(current.get("homePhones", [])) + new_phones patch["homePhones"] = full_list[:2] # Graph API max 2 # Business phones - append if "businessPhones" in mf: current = entry.get("current_main", {}) if current is None: pass else: existing_norm = {normalize_phone(p) for p in current.get("businessPhones", [])} new_phones = [] for p in mf["businessPhones"]: if normalize_phone(p) not in existing_norm: new_phones.append(p) if new_phones: full_list = list(current.get("businessPhones", [])) + new_phones patch["businessPhones"] = full_list[:2] if patch: patches.append((entry["main_id"], entry["displayName"], patch, entry["temp_id"])) else: skipped_no_change += 1 print(f" [INFO] Built {len(patches)} PATCH operations") print(f" [INFO] Skipped {skipped_no_change} (no actual changes after dedup)") # ============================================================ # STEP 4: Execute PATCHes # ============================================================ print("\n" + "=" * 70) print("STEP 4: Execute PATCH operations") print("=" * 70) token = get_token() patch_success = 0 patch_fail = 0 patch_errors_log = [] for i, (main_id, name, body, temp_id) in enumerate(patches): if i > 0 and i % 500 == 0: token = get_token() if i > 0 and i % 100 == 0: print(f" [INFO] Patched {i}/{len(patches)} ({patch_success} ok, {patch_fail} fail)") resp = api_patch(token, main_id, body) time.sleep(THROTTLE_DELAY) if "error" in resp: patch_fail += 1 err_msg = resp["error"].get("message", str(resp["error"])) if isinstance(resp["error"], dict) else str(resp["error"]) patch_errors_log.append({"name": name, "main_id": main_id, "error": err_msg, "body": body}) if patch_fail <= 5: print(f" [ERROR] {name}: {err_msg}") else: patch_success += 1 print(f"\n [OK] PATCH complete: {patch_success} success, {patch_fail} failures") # ============================================================ # STEP 5: Delete ALL Temp contacts (both exact_matches and matches_with_extras) # ============================================================ print("\n" + "=" * 70) print("STEP 5: Delete Temp contacts") print("=" * 70) # Collect all temp IDs all_temp_ids = [] for m in matches: all_temp_ids.append((m["temp_id"], m["displayName"])) for m in exact_matches: all_temp_ids.append((m["temp_id"], m["displayName"])) print(f" [INFO] Total Temp contacts to delete: {len(all_temp_ids)}") print(f" From matches_with_extras: {len(matches)}") print(f" From exact_matches: {len(exact_matches)}") token = get_token() del_success = 0 del_fail = 0 del_errors_log = [] for i, (tid, name) in enumerate(all_temp_ids): if i > 0 and i % 500 == 0: token = get_token() if i > 0 and i % 200 == 0: print(f" [INFO] Deleted {i}/{len(all_temp_ids)} ({del_success} ok, {del_fail} fail)") ok = api_delete(token, tid) time.sleep(THROTTLE_DELAY) if ok: del_success += 1 else: del_fail += 1 del_errors_log.append({"name": name, "temp_id": tid}) if del_fail <= 5: print(f" [ERROR] Delete {name}: failed") print(f"\n [OK] DELETE complete: {del_success} success, {del_fail} failures") # ============================================================ # STEP 6: Verify # ============================================================ print("\n" + "=" * 70) print("STEP 6: Verification") print("=" * 70) token = get_token() # Count Temp folder contacts # First find the Temp folder ID folders_url = f"https://graph.microsoft.com/v1.0/users/{USER}/contactFolders?$filter=displayName eq 'Temp'" folders_resp = api_get(token, folders_url) time.sleep(THROTTLE_DELAY) temp_count = "unknown" if "value" in folders_resp and folders_resp["value"]: temp_folder_id = folders_resp["value"][0]["id"] count_url = f"https://graph.microsoft.com/v1.0/users/{USER}/contactFolders/{temp_folder_id}/contacts?$count=true&$top=1" count_resp = api_get(token, count_url) temp_count = count_resp.get("@odata.count", len(count_resp.get("value", []))) # If @odata.count not available, try paging if temp_count == 0 or isinstance(temp_count, int): pass else: temp_count = len(count_resp.get("value", [])) elif "value" in folders_resp and not folders_resp["value"]: temp_count = "Folder not found (may have been deleted)" else: temp_count = f"Error: {folders_resp}" # Count Main contacts folder main_url = f"{BASE_URL}?$top=1&$count=true" main_resp = api_get(token, main_url) main_count = main_resp.get("@odata.count", "unknown") print(f" Temp folder contacts remaining: {temp_count}") print(f" Main contacts count: {main_count}") # ============================================================ # Save results # ============================================================ results = { "timestamp": datetime.now().isoformat(), "step1_notes_analysis": { "icloud_junk": notes_junk, "real_content": notes_real, "no_notes": notes_none, }, "step2_merge_plan": { "needs_merge": len(needs_merge), "nothing_to_merge": len(nothing_to_merge), "needs_fetch": len(needs_fetch), "field_counts": field_counts, }, "step3_fetched": { "total": fetch_count, "errors": fetch_errors, }, "step4_patches": { "total": len(patches), "success": patch_success, "failures": patch_fail, "error_samples": patch_errors_log[:20], }, "step5_deletes": { "total": len(all_temp_ids), "success": del_success, "failures": del_fail, "error_samples": del_errors_log[:20], }, "step6_verification": { "temp_remaining": temp_count, "main_count": main_count, }, } with open(LOG_FILE, "w", encoding="utf-8") as f: json.dump(results, f, indent=2, default=str) print(f"\n[OK] Results saved to {LOG_FILE}") # ============================================================ # Final summary # ============================================================ print("\n" + "=" * 70) print("FINAL SUMMARY") print("=" * 70) print(f" Notes analyzed: {notes_junk} junk / {notes_real} real / {notes_none} none") print(f" Merges planned: {len(needs_merge)} contacts") print(f" PATCHes sent: {len(patches)} ({patch_success} ok, {patch_fail} fail)") print(f" DELETEs sent: {len(all_temp_ids)} ({del_success} ok, {del_fail} fail)") print(f" Temp remaining: {temp_count}") print(f" Main count: {main_count}") print("=" * 70)