sync: Auto-sync from ACG-M-L5090 at 2026-03-10 19:11:00
Synced files: - Quote wizard frontend (all components, hooks, types, config) - API updates (config, models, routers, schemas, services) - Client work (bg-builders, gurushow) - Scripts (BGB Lesley termination, CIPP, Datto, migration) - Temp files (Bardach contacts, VWP investigation, misc) - Credentials and session logs - Email service, PHP API, session logs Machine: ACG-M-L5090 Timestamp: 2026-03-10 19:11:00 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
288
temp/bardach_main_dupes.py
Normal file
288
temp/bardach_main_dupes.py
Normal file
@@ -0,0 +1,288 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Find and analyze duplicate contacts in Barbara Bardach's Main Contacts folder."""
|
||||
|
||||
import subprocess
|
||||
import json
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
|
||||
CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
|
||||
CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
|
||||
USER = "barbara@bardach.net"
|
||||
|
||||
SELECT_FIELDS = "id,displayName,givenName,surname,emailAddresses,homePhones,businessPhones,companyName,jobTitle,personalNotes,homeAddress,businessAddress,birthday,lastModifiedDateTime"
|
||||
|
||||
|
||||
def curl_json(args):
|
||||
"""Run curl and return parsed JSON."""
|
||||
result = subprocess.run(
|
||||
["curl", "-s", "-S"] + args,
|
||||
capture_output=True, text=True, timeout=60
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f"[ERROR] curl failed: {result.stderr}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
try:
|
||||
return json.loads(result.stdout)
|
||||
except json.JSONDecodeError:
|
||||
print(f"[ERROR] Invalid JSON response: {result.stdout[:500]}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_token():
|
||||
"""Get access token using client credentials flow."""
|
||||
url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
|
||||
data = (
|
||||
f"grant_type=client_credentials"
|
||||
f"&client_id={CLIENT_ID}"
|
||||
f"&client_secret={CLIENT_SECRET}"
|
||||
f"&scope=https%3A%2F%2Fgraph.microsoft.com%2F.default"
|
||||
)
|
||||
resp = curl_json([
|
||||
"-X", "POST", url,
|
||||
"-H", "Content-Type: application/x-www-form-urlencoded",
|
||||
"-d", data
|
||||
])
|
||||
if "access_token" not in resp:
|
||||
print(f"[ERROR] Token request failed: {json.dumps(resp, indent=2)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print("[OK] Got access token")
|
||||
return resp["access_token"]
|
||||
|
||||
|
||||
def get_all_contacts(token):
|
||||
"""Pull all contacts from the default contacts folder with pagination."""
|
||||
contacts = []
|
||||
url = (
|
||||
f"https://graph.microsoft.com/v1.0/users/{USER}/contacts"
|
||||
f"?$select={SELECT_FIELDS}&$top=250"
|
||||
)
|
||||
page = 1
|
||||
while url:
|
||||
print(f" Fetching page {page}...")
|
||||
resp = curl_json([
|
||||
"-H", f"Authorization: Bearer {token}",
|
||||
"-H", "Content-Type: application/json",
|
||||
url
|
||||
])
|
||||
if "error" in resp:
|
||||
print(f"[ERROR] Graph API error: {json.dumps(resp['error'], indent=2)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
batch = resp.get("value", [])
|
||||
contacts.extend(batch)
|
||||
print(f" Got {len(batch)} contacts (total: {len(contacts)})")
|
||||
url = resp.get("@odata.nextLink")
|
||||
page += 1
|
||||
return contacts
|
||||
|
||||
|
||||
def count_filled_fields(contact):
|
||||
"""Count how many fields have meaningful data."""
|
||||
score = 0
|
||||
for key in ["givenName", "surname", "companyName", "jobTitle", "birthday"]:
|
||||
if contact.get(key):
|
||||
score += 1
|
||||
if contact.get("personalNotes") and contact["personalNotes"].strip():
|
||||
score += 2 # notes are valuable
|
||||
for key in ["emailAddresses", "homePhones", "businessPhones"]:
|
||||
val = contact.get(key)
|
||||
if val and len(val) > 0:
|
||||
score += len(val)
|
||||
for key in ["homeAddress", "businessAddress"]:
|
||||
addr = contact.get(key)
|
||||
if addr and any(addr.get(f) for f in ["street", "city", "state", "postalCode"]):
|
||||
score += 1
|
||||
# Prefer more recently modified
|
||||
return score
|
||||
|
||||
|
||||
def summarize_differences(contacts):
|
||||
"""Summarize what differs between duplicate contacts."""
|
||||
diffs = []
|
||||
fields_to_compare = [
|
||||
"givenName", "surname", "companyName", "jobTitle", "birthday",
|
||||
"personalNotes"
|
||||
]
|
||||
list_fields = ["emailAddresses", "homePhones", "businessPhones"]
|
||||
addr_fields = ["homeAddress", "businessAddress"]
|
||||
|
||||
for field in fields_to_compare:
|
||||
values = set()
|
||||
for c in contacts:
|
||||
v = c.get(field)
|
||||
if v:
|
||||
values.add(str(v).strip())
|
||||
if len(values) > 1:
|
||||
diffs.append(f"{field}: {values}")
|
||||
elif len(values) == 1:
|
||||
pass # same across all
|
||||
# if 0, nobody has it
|
||||
|
||||
for field in list_fields:
|
||||
all_vals = []
|
||||
for c in contacts:
|
||||
v = c.get(field, []) or []
|
||||
if field == "emailAddresses":
|
||||
items = sorted([e.get("address", "") for e in v if e.get("address")])
|
||||
else:
|
||||
items = sorted(v) if v else []
|
||||
all_vals.append(tuple(items))
|
||||
if len(set(all_vals)) > 1:
|
||||
diffs.append(f"{field} differ: {[list(x) for x in all_vals]}")
|
||||
|
||||
for field in addr_fields:
|
||||
addrs = []
|
||||
for c in contacts:
|
||||
a = c.get(field) or {}
|
||||
parts = [a.get("street",""), a.get("city",""), a.get("state",""), a.get("postalCode","")]
|
||||
addrs.append(tuple(p.strip() if p else "" for p in parts))
|
||||
if len(set(addrs)) > 1:
|
||||
diffs.append(f"{field} differ")
|
||||
|
||||
# Check lastModifiedDateTime
|
||||
dates = [c.get("lastModifiedDateTime", "unknown") for c in contacts]
|
||||
if len(set(dates)) > 1:
|
||||
diffs.append(f"lastModified: {dates}")
|
||||
|
||||
return "; ".join(diffs) if diffs else "No differences found (exact duplicates)"
|
||||
|
||||
|
||||
def analyze_duplicates(contacts):
|
||||
"""Group by displayName and find duplicates."""
|
||||
groups = defaultdict(list)
|
||||
for c in contacts:
|
||||
name = (c.get("displayName") or "").strip().lower()
|
||||
if name:
|
||||
groups[name].append(c)
|
||||
|
||||
duplicate_groups = []
|
||||
for name, group in sorted(groups.items()):
|
||||
if len(group) < 2:
|
||||
continue
|
||||
# Score each contact
|
||||
scored = [(count_filled_fields(c), c.get("lastModifiedDateTime", ""), c) for c in group]
|
||||
# Sort by score desc, then by lastModified desc
|
||||
scored.sort(key=lambda x: (x[0], x[1]), reverse=True)
|
||||
keeper = scored[0][2]
|
||||
deletable = [s[2] for s in scored[1:]]
|
||||
differences = summarize_differences(group)
|
||||
|
||||
duplicate_groups.append({
|
||||
"name": group[0].get("displayName", name),
|
||||
"count": len(group),
|
||||
"contacts": group,
|
||||
"keeper_id": keeper["id"],
|
||||
"delete_ids": [c["id"] for c in deletable],
|
||||
"differences": differences,
|
||||
"_scores": [(s[0], s[2]["id"][:8]) for s in scored]
|
||||
})
|
||||
|
||||
return duplicate_groups
|
||||
|
||||
|
||||
def print_report(contacts, dup_groups):
|
||||
"""Print a detailed report."""
|
||||
total_removable = sum(len(g["delete_ids"]) for g in dup_groups)
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print(f"DUPLICATE CONTACTS ANALYSIS - Barbara Bardach")
|
||||
print("=" * 80)
|
||||
print(f"Total contacts in Main Contacts: {len(contacts)}")
|
||||
print(f"Duplicate groups found: {len(dup_groups)}")
|
||||
print(f"Total removable contacts: {total_removable}")
|
||||
print("=" * 80)
|
||||
|
||||
for i, g in enumerate(dup_groups, 1):
|
||||
print(f"\n--- Group {i}: {g['name']} ({g['count']} contacts) ---")
|
||||
for j, c in enumerate(g["contacts"]):
|
||||
is_keeper = c["id"] == g["keeper_id"]
|
||||
marker = "[KEEP]" if is_keeper else "[DELETE]"
|
||||
score = [s[0] for s in g["_scores"] if s[1] == c["id"][:8]][0] if g.get("_scores") else "?"
|
||||
|
||||
print(f" {marker} (score={score}) id={c['id'][:12]}...")
|
||||
print(f" displayName: {c.get('displayName')}")
|
||||
print(f" givenName: {c.get('givenName')} surname: {c.get('surname')}")
|
||||
|
||||
emails = c.get("emailAddresses") or []
|
||||
if emails:
|
||||
print(f" emails: {[e.get('address') for e in emails]}")
|
||||
|
||||
hphones = c.get("homePhones") or []
|
||||
if hphones:
|
||||
print(f" homePhones: {hphones}")
|
||||
|
||||
bphones = c.get("businessPhones") or []
|
||||
if bphones:
|
||||
print(f" businessPhones: {bphones}")
|
||||
|
||||
if c.get("companyName"):
|
||||
print(f" company: {c['companyName']}")
|
||||
if c.get("jobTitle"):
|
||||
print(f" jobTitle: {c['jobTitle']}")
|
||||
if c.get("birthday"):
|
||||
print(f" birthday: {c['birthday']}")
|
||||
|
||||
for addr_field in ["homeAddress", "businessAddress"]:
|
||||
addr = c.get(addr_field) or {}
|
||||
parts = [addr.get(f, "") for f in ["street", "city", "state", "postalCode"]]
|
||||
if any(p for p in parts):
|
||||
print(f" {addr_field}: {', '.join(p for p in parts if p)}")
|
||||
|
||||
notes = c.get("personalNotes", "")
|
||||
if notes and notes.strip():
|
||||
preview = notes.strip()[:80].replace("\n", " ")
|
||||
print(f" notes: {preview}{'...' if len(notes.strip()) > 80 else ''}")
|
||||
|
||||
print(f" lastModified: {c.get('lastModifiedDateTime')}")
|
||||
|
||||
print(f" Differences: {g['differences']}")
|
||||
|
||||
return total_removable
|
||||
|
||||
|
||||
def main():
|
||||
print("[INFO] Starting duplicate contact analysis for Barbara Bardach")
|
||||
|
||||
# Step 1: Get token
|
||||
token = get_token()
|
||||
|
||||
# Step 2+3: Get all contacts from default contacts folder
|
||||
print("[INFO] Fetching all contacts from Main Contacts folder...")
|
||||
contacts = get_all_contacts(token)
|
||||
print(f"[OK] Retrieved {len(contacts)} total contacts")
|
||||
|
||||
if not contacts:
|
||||
print("[WARNING] No contacts found!")
|
||||
sys.exit(0)
|
||||
|
||||
# Step 4+5: Find duplicates
|
||||
print("[INFO] Analyzing duplicates...")
|
||||
dup_groups = analyze_duplicates(contacts)
|
||||
|
||||
# Step 6+7: Print report
|
||||
total_removable = print_report(contacts, dup_groups)
|
||||
|
||||
# Step 8: Save analysis JSON
|
||||
# Remove internal _scores from output
|
||||
output_groups = []
|
||||
for g in dup_groups:
|
||||
out = dict(g)
|
||||
out.pop("_scores", None)
|
||||
output_groups.append(out)
|
||||
|
||||
analysis = {
|
||||
"total_contacts": len(contacts),
|
||||
"duplicate_groups": len(dup_groups),
|
||||
"total_removable": total_removable,
|
||||
"groups": output_groups
|
||||
}
|
||||
|
||||
output_path = r"D:\ClaudeTools\temp\bardach_main_dupes_analysis.json"
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(analysis, f, indent=2, default=str)
|
||||
print(f"\n[OK] Analysis saved to {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user