#!/usr/bin/env python3
"""
Bardach Contact Merge: Merge extra data from Temp contacts into Main contacts,
then delete the Temp copies. Main is authoritative - only ADD missing data.
"""

import json
import subprocess
import time
import re
import sys
from datetime import datetime

# Force unbuffered output
sys.stdout.reconfigure(line_buffering=True)
sys.stderr.reconfigure(line_buffering=True)

# ============================================================
# Configuration
# ============================================================
TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
SCOPE = "https://graph.microsoft.com/.default"
USER = "barbara@bardach.net"
BASE_URL = f"https://graph.microsoft.com/v1.0/users/{USER}/contacts"
DATA_FILE = "D:/ClaudeTools/temp/bardach_temp_vs_main.json"
LOG_FILE = "D:/ClaudeTools/temp/bardach_merge_results.json"
THROTTLE_DELAY = 0.35  # seconds between API calls

# ============================================================
# Helpers
# ============================================================
def get_token():
    """Acquire OAuth2 token via client credentials."""
    url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
    cmd = [
        "curl", "-s", "-X", "POST", url,
        "-H", "Content-Type: application/x-www-form-urlencoded",
        "-d", f"client_id={CLIENT_ID}&scope={SCOPE}&client_secret={CLIENT_SECRET}&grant_type=client_credentials"
    ]
    result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
    data = json.loads(result.stdout)
    if "access_token" not in data:
        print(f"[ERROR] Token acquisition failed: {data}")
        sys.exit(1)
    print(f"[OK] Token acquired at {datetime.now().strftime('%H:%M:%S')}")
    return data["access_token"]


def api_get(token, url):
    """GET request to Graph API."""
    cmd = [
        "curl", "-s", "-X", "GET", url,
        "-H", f"Authorization: Bearer {token}",
        "-H", "Content-Type: application/json"
    ]
    result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
    return json.loads(result.stdout)


def api_patch(token, contact_id, body):
    """PATCH a contact."""
    url = f"{BASE_URL}/{contact_id}"
    body_json = json.dumps(body)
    cmd = [
        "curl", "-s", "-X", "PATCH", url,
        "-H", f"Authorization: Bearer {token}",
        "-H", "Content-Type: application/json",
        "-d", body_json
    ]
    result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
    if result.returncode != 0:
        return {"error": result.stderr}
    try:
        resp = json.loads(result.stdout)
    except json.JSONDecodeError:
        return {"error": f"Non-JSON response: {result.stdout[:200]}"}
    return resp


def api_delete(token, contact_id):
    """DELETE a contact. Returns True on success (204), False on error."""
    url = f"{BASE_URL}/{contact_id}"
    cmd = [
        "curl", "-s", "-o", "/dev/null", "-w", "%{http_code}",
        "-X", "DELETE", url,
        "-H", f"Authorization: Bearer {token}"
    ]
    result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
    code = result.stdout.strip()
    return code in ("204", "200")


def is_icloud_junk(notes):
    """Check if personalNotes is iCloud/Outlook read-only junk."""
    if not notes:
        return True
    lower = notes.lower()
    # Pattern 1: contains both "read-only" and "outlook"
    if "read-only" in lower and "outlook" in lower:
        return True
    # Pattern 2: "this contact is read-only" type text
    if "this contact is read-only" in lower:
        return True
    # Pattern 3: Just "read-only" with "edit" or "tap" or "link" (iCloud boilerplate)
    if "read-only" in lower and ("tap" in lower or "edit" in lower or "link" in lower):
        return True
    return False


def normalize_phone(phone):
    """Strip non-digit characters for comparison."""
    return re.sub(r'[^0-9+]', '', phone)


def is_address_empty(addr):
    """Check if an address dict is empty/null."""
    if not addr or not isinstance(addr, dict):
        return True
    for v in addr.values():
        if v and str(v).strip():
            return False
    return True


# ============================================================
# STEP 1: Load data and analyze notes
# ============================================================
print("=" * 70)
print("STEP 1: Load data and analyze personalNotes")
print("=" * 70)

with open(DATA_FILE, "r", encoding="utf-8") as f:
    data = json.load(f)

matches = data["matches_with_extras"]
exact_matches = data.get("exact_matches", [])
print(f"[INFO] Loaded {len(matches)} matches_with_extras")
print(f"[INFO] Loaded {len(exact_matches)} exact_matches (no extras)")

# Analyze notes
notes_junk = 0
notes_real = 0
notes_none = 0
real_notes_samples = []

for m in matches:
    ef = m.get("extra_fields", {})
    if "personalNotes" not in ef:
        notes_none += 1
        continue
    notes = ef["personalNotes"]
    if is_icloud_junk(notes):
        notes_junk += 1
    else:
        notes_real += 1
        if len(real_notes_samples) < 10:
            real_notes_samples.append({
                "displayName": m["displayName"],
                "notes": notes[:200]
            })

print(f"\n  personalNotes breakdown:")
print(f"    iCloud junk:    {notes_junk}")
print(f"    Real content:   {notes_real}")
print(f"    No notes field: {notes_none}")
print(f"    Total:          {notes_junk + notes_real + notes_none}")

if real_notes_samples:
    print(f"\n  Sample real notes ({len(real_notes_samples)}):")
    for i, s in enumerate(real_notes_samples):
        print(f"    [{i+1}] {s['displayName']}: {s['notes']}")

# ============================================================
# STEP 2: Build merge plan
# ============================================================
print("\n" + "=" * 70)
print("STEP 2: Build merge plan")
print("=" * 70)

needs_merge = []
nothing_to_merge = []
needs_fetch = []  # contacts where we need to GET current Main data (emails/phones)

field_counts = {
    "personalNotes": 0,
    "emailAddresses": 0,
    "homePhones": 0,
    "businessPhones": 0,
    "companyName": 0,
    "jobTitle": 0,
    "homeAddress": 0,
    "businessAddress": 0,
    "otherAddress": 0,
    "birthday": 0,
    "nickName": 0,
}

for m in matches:
    ef = m.get("extra_fields", {})
    merge_fields = {}
    requires_fetch = False

    for field, value in ef.items():
        if field == "personalNotes":
            if not is_icloud_junk(value):
                merge_fields["personalNotes"] = value
        elif field == "emailAddresses":
            if value:  # non-empty list
                merge_fields["emailAddresses"] = value
                requires_fetch = True
        elif field == "homePhones":
            if value:
                merge_fields["homePhones"] = value
                requires_fetch = True
        elif field == "businessPhones":
            if value:
                merge_fields["businessPhones"] = value
                requires_fetch = True
        elif field in ("companyName", "jobTitle", "nickName"):
            if value and str(value).strip():
                merge_fields[field] = value
        elif field in ("homeAddress", "businessAddress", "otherAddress"):
            if not is_address_empty(value):
                merge_fields[field] = value
        elif field == "birthday":
            if value:
                merge_fields[field] = value
        # Skip any unknown fields

    if merge_fields:
        entry = {
            "temp_id": m["temp_id"],
            "main_id": m["main_id"],
            "displayName": m["displayName"],
            "merge_fields": merge_fields,
            "requires_fetch": requires_fetch,
        }
        needs_merge.append(entry)
        if requires_fetch:
            needs_fetch.append(entry)
        for fk in merge_fields:
            if fk in field_counts:
                field_counts[fk] += 1
    else:
        nothing_to_merge.append(m["displayName"])

print(f"\n  Contacts needing merge:    {len(needs_merge)}")
print(f"  Contacts nothing to merge: {len(nothing_to_merge)}")
print(f"  Contacts needing fetch:    {len(needs_fetch)} (have emails/phones to append)")
print(f"\n  Field merge counts:")
for fk, cnt in sorted(field_counts.items(), key=lambda x: -x[1]):
    if cnt > 0:
        print(f"    {fk}: {cnt}")

# ============================================================
# STEP 3: Fetch current Main data for contacts needing email/phone merge
# ============================================================
print("\n" + "=" * 70)
print("STEP 3: Fetch Main contact data for email/phone merges")
print("=" * 70)

token = get_token()
fetch_count = 0
fetch_errors = 0

for entry in needs_fetch:
    if fetch_count > 0 and fetch_count % 500 == 0:
        token = get_token()
    if fetch_count > 0 and fetch_count % 100 == 0:
        print(f"  [INFO] Fetched {fetch_count}/{len(needs_fetch)}...")

    url = f"{BASE_URL}/{entry['main_id']}?$select=emailAddresses,homePhones,businessPhones"
    resp = api_get(token, url)
    time.sleep(THROTTLE_DELAY)
    fetch_count += 1

    if "error" in resp:
        print(f"  [ERROR] Fetch {entry['displayName']}: {resp['error'].get('message', resp['error'])}")
        fetch_errors += 1
        entry["current_main"] = None
        continue

    entry["current_main"] = {
        "emailAddresses": resp.get("emailAddresses", []),
        "homePhones": resp.get("homePhones", []),
        "businessPhones": resp.get("businessPhones", []),
    }

print(f"\n  [OK] Fetched {fetch_count} contacts ({fetch_errors} errors)")

# ============================================================
# Build PATCH bodies
# ============================================================
print("\n" + "=" * 70)
print("STEP 3b: Build PATCH bodies")
print("=" * 70)

patches = []  # list of (main_id, displayName, patch_body, temp_id)
skipped_no_change = 0

for entry in needs_merge:
    mf = entry["merge_fields"]
    patch = {}

    # Simple fields - set directly (these are only in extra_fields if Main lacks them)
    for sf in ("personalNotes", "companyName", "jobTitle", "nickName", "birthday",
               "homeAddress", "businessAddress", "otherAddress"):
        if sf in mf:
            patch[sf] = mf[sf]

    # Email addresses - need to append to existing
    if "emailAddresses" in mf:
        current = entry.get("current_main", {})
        if current is None:
            # Fetch failed, skip emails for this one
            pass
        else:
            existing_emails = {e.get("address", "").lower() for e in current.get("emailAddresses", []) if e.get("address")}
            new_emails = []
            for email in mf["emailAddresses"]:
                addr = email if isinstance(email, str) else email.get("address", "")
                if addr.lower() not in existing_emails:
                    new_emails.append(addr)
            if new_emails:
                # Build full list: existing + new (Graph API replaces the array)
                full_list = list(current.get("emailAddresses", []))
                for addr in new_emails:
                    full_list.append({"address": addr, "name": addr})
                # Graph API max 3 email addresses
                patch["emailAddresses"] = full_list[:3]

    # Home phones - append
    if "homePhones" in mf:
        current = entry.get("current_main", {})
        if current is None:
            pass
        else:
            existing_norm = {normalize_phone(p) for p in current.get("homePhones", [])}
            new_phones = []
            for p in mf["homePhones"]:
                if normalize_phone(p) not in existing_norm:
                    new_phones.append(p)
            if new_phones:
                full_list = list(current.get("homePhones", [])) + new_phones
                patch["homePhones"] = full_list[:2]  # Graph API max 2

    # Business phones - append
    if "businessPhones" in mf:
        current = entry.get("current_main", {})
        if current is None:
            pass
        else:
            existing_norm = {normalize_phone(p) for p in current.get("businessPhones", [])}
            new_phones = []
            for p in mf["businessPhones"]:
                if normalize_phone(p) not in existing_norm:
                    new_phones.append(p)
            if new_phones:
                full_list = list(current.get("businessPhones", [])) + new_phones
                patch["businessPhones"] = full_list[:2]

    if patch:
        patches.append((entry["main_id"], entry["displayName"], patch, entry["temp_id"]))
    else:
        skipped_no_change += 1

print(f"  [INFO] Built {len(patches)} PATCH operations")
print(f"  [INFO] Skipped {skipped_no_change} (no actual changes after dedup)")

# ============================================================
# STEP 4: Execute PATCHes
# ============================================================
print("\n" + "=" * 70)
print("STEP 4: Execute PATCH operations")
print("=" * 70)

token = get_token()
patch_success = 0
patch_fail = 0
patch_errors_log = []

for i, (main_id, name, body, temp_id) in enumerate(patches):
    if i > 0 and i % 500 == 0:
        token = get_token()
    if i > 0 and i % 100 == 0:
        print(f"  [INFO] Patched {i}/{len(patches)} ({patch_success} ok, {patch_fail} fail)")

    resp = api_patch(token, main_id, body)
    time.sleep(THROTTLE_DELAY)

    if "error" in resp:
        patch_fail += 1
        err_msg = resp["error"].get("message", str(resp["error"])) if isinstance(resp["error"], dict) else str(resp["error"])
        patch_errors_log.append({"name": name, "main_id": main_id, "error": err_msg, "body": body})
        if patch_fail <= 5:
            print(f"  [ERROR] {name}: {err_msg}")
    else:
        patch_success += 1

print(f"\n  [OK] PATCH complete: {patch_success} success, {patch_fail} failures")

# ============================================================
# STEP 5: Delete ALL Temp contacts (both exact_matches and matches_with_extras)
# ============================================================
print("\n" + "=" * 70)
print("STEP 5: Delete Temp contacts")
print("=" * 70)

# Collect all temp IDs
all_temp_ids = []
for m in matches:
    all_temp_ids.append((m["temp_id"], m["displayName"]))
for m in exact_matches:
    all_temp_ids.append((m["temp_id"], m["displayName"]))

print(f"  [INFO] Total Temp contacts to delete: {len(all_temp_ids)}")
print(f"    From matches_with_extras: {len(matches)}")
print(f"    From exact_matches: {len(exact_matches)}")

token = get_token()
del_success = 0
del_fail = 0
del_errors_log = []

for i, (tid, name) in enumerate(all_temp_ids):
    if i > 0 and i % 500 == 0:
        token = get_token()
    if i > 0 and i % 200 == 0:
        print(f"  [INFO] Deleted {i}/{len(all_temp_ids)} ({del_success} ok, {del_fail} fail)")

    ok = api_delete(token, tid)
    time.sleep(THROTTLE_DELAY)

    if ok:
        del_success += 1
    else:
        del_fail += 1
        del_errors_log.append({"name": name, "temp_id": tid})
        if del_fail <= 5:
            print(f"  [ERROR] Delete {name}: failed")

print(f"\n  [OK] DELETE complete: {del_success} success, {del_fail} failures")

# ============================================================
# STEP 6: Verify
# ============================================================
print("\n" + "=" * 70)
print("STEP 6: Verification")
print("=" * 70)

token = get_token()

# Count Temp folder contacts
# First find the Temp folder ID
folders_url = f"https://graph.microsoft.com/v1.0/users/{USER}/contactFolders?$filter=displayName eq 'Temp'"
folders_resp = api_get(token, folders_url)
time.sleep(THROTTLE_DELAY)

temp_count = "unknown"
if "value" in folders_resp and folders_resp["value"]:
    temp_folder_id = folders_resp["value"][0]["id"]
    count_url = f"https://graph.microsoft.com/v1.0/users/{USER}/contactFolders/{temp_folder_id}/contacts?$count=true&$top=1"
    count_resp = api_get(token, count_url)
    temp_count = count_resp.get("@odata.count", len(count_resp.get("value", [])))
    # If @odata.count not available, try paging
    if temp_count == 0 or isinstance(temp_count, int):
        pass
    else:
        temp_count = len(count_resp.get("value", []))
elif "value" in folders_resp and not folders_resp["value"]:
    temp_count = "Folder not found (may have been deleted)"
else:
    temp_count = f"Error: {folders_resp}"

# Count Main contacts folder
main_url = f"{BASE_URL}?$top=1&$count=true"
main_resp = api_get(token, main_url)
main_count = main_resp.get("@odata.count", "unknown")

print(f"  Temp folder contacts remaining: {temp_count}")
print(f"  Main contacts count: {main_count}")

# ============================================================
# Save results
# ============================================================
results = {
    "timestamp": datetime.now().isoformat(),
    "step1_notes_analysis": {
        "icloud_junk": notes_junk,
        "real_content": notes_real,
        "no_notes": notes_none,
    },
    "step2_merge_plan": {
        "needs_merge": len(needs_merge),
        "nothing_to_merge": len(nothing_to_merge),
        "needs_fetch": len(needs_fetch),
        "field_counts": field_counts,
    },
    "step3_fetched": {
        "total": fetch_count,
        "errors": fetch_errors,
    },
    "step4_patches": {
        "total": len(patches),
        "success": patch_success,
        "failures": patch_fail,
        "error_samples": patch_errors_log[:20],
    },
    "step5_deletes": {
        "total": len(all_temp_ids),
        "success": del_success,
        "failures": del_fail,
        "error_samples": del_errors_log[:20],
    },
    "step6_verification": {
        "temp_remaining": temp_count,
        "main_count": main_count,
    },
}

with open(LOG_FILE, "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, default=str)

print(f"\n[OK] Results saved to {LOG_FILE}")

# ============================================================
# Final summary
# ============================================================
print("\n" + "=" * 70)
print("FINAL SUMMARY")
print("=" * 70)
print(f"  Notes analyzed:  {notes_junk} junk / {notes_real} real / {notes_none} none")
print(f"  Merges planned:  {len(needs_merge)} contacts")
print(f"  PATCHes sent:    {len(patches)} ({patch_success} ok, {patch_fail} fail)")
print(f"  DELETEs sent:    {len(all_temp_ids)} ({del_success} ok, {del_fail} fail)")
print(f"  Temp remaining:  {temp_count}")
print(f"  Main count:      {main_count}")
print("=" * 70)