Synced files: - Quote wizard frontend (all components, hooks, types, config) - API updates (config, models, routers, schemas, services) - Client work (bg-builders, gurushow) - Scripts (BGB Lesley termination, CIPP, Datto, migration) - Temp files (Bardach contacts, VWP investigation, misc) - Credentials and session logs - Email service, PHP API, session logs Machine: ACG-M-L5090 Timestamp: 2026-03-10 19:11:00 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
415 lines
14 KiB
Python
415 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""Find real two-way correspondents missing from Barbara's contacts and extract phone numbers from signatures."""
|
|
|
|
import json
|
|
import re
|
|
import subprocess
|
|
import time
|
|
import html
|
|
import urllib.parse
|
|
from datetime import datetime
|
|
|
|
# ── Config ──
|
|
INPUT_FILE = r"D:\ClaudeTools\temp\bardach_missing_contacts.json"
|
|
OUTPUT_FILE = r"D:\ClaudeTools\temp\bardach_missing_real_contacts.json"
|
|
|
|
TENANT = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
|
|
CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
|
|
CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
|
|
USER_EMAIL = "barbara@bardach.net"
|
|
|
|
TOKEN_URL = f"https://login.microsoftonline.com/{TENANT}/oauth2/v2.0/token"
|
|
GRAPH_BASE = f"https://graph.microsoft.com/v1.0/users/{USER_EMAIL}"
|
|
|
|
# ── Junk filters ──
|
|
JUNK_KEYWORDS = [
|
|
"noreply", "no-reply", "donotreply", "notification", "alert",
|
|
"mailer-daemon", "postmaster", "unsubscribe", "bounce",
|
|
"support@", "info@", "help@", "service@", "billing@",
|
|
"news@", "newsletter", "marketing", "promo"
|
|
]
|
|
|
|
COMMERCIAL_DOMAINS = [
|
|
"amazon.com", "google.com", "facebook.com", "apple.com", "microsoft.com",
|
|
"paypal.com", "ebay.com", "nextdoor.com", "linkedin.com", "twitter.com",
|
|
"instagram.com", "fidelity.com", "schwab.com", "vanguard.com",
|
|
"intuit.com", "turbotax.com"
|
|
]
|
|
|
|
# ── Token management ──
|
|
_token = None
|
|
_api_call_count = 0
|
|
|
|
def get_token():
|
|
"""Get a fresh OAuth2 token."""
|
|
result = subprocess.run(
|
|
["curl", "-s", "-X", "POST", TOKEN_URL,
|
|
"-d", f"client_id={CLIENT_ID}",
|
|
"-d", f"client_secret={CLIENT_SECRET}",
|
|
"-d", "scope=https://graph.microsoft.com/.default",
|
|
"-d", "grant_type=client_credentials"],
|
|
capture_output=True, text=True
|
|
)
|
|
data = json.loads(result.stdout)
|
|
if "access_token" not in data:
|
|
print(f"[ERROR] Token request failed: {data}")
|
|
raise RuntimeError("Failed to get token")
|
|
return data["access_token"]
|
|
|
|
def refresh_token_if_needed():
|
|
"""Refresh token every 30 API calls."""
|
|
global _token, _api_call_count
|
|
if _token is None or _api_call_count >= 30:
|
|
_token = get_token()
|
|
_api_call_count = 0
|
|
print(f" [Token refreshed]")
|
|
return _token
|
|
|
|
def graph_get(url, retries=3):
|
|
"""Make a GET request to Graph API using curl -G with --data-urlencode for proper encoding."""
|
|
global _api_call_count
|
|
token = refresh_token_if_needed()
|
|
_api_call_count += 1
|
|
|
|
for attempt in range(retries):
|
|
result = subprocess.run(
|
|
["curl", "-s", "--url", url,
|
|
"-H", f"Authorization: Bearer {token}",
|
|
"-H", "Content-Type: application/json",
|
|
"-H", "ConsistencyLevel: eventual"],
|
|
capture_output=True, text=True
|
|
)
|
|
if not result.stdout:
|
|
if attempt < retries - 1:
|
|
time.sleep(2)
|
|
continue
|
|
return None
|
|
|
|
try:
|
|
data = json.loads(result.stdout)
|
|
except json.JSONDecodeError:
|
|
if attempt < retries - 1:
|
|
time.sleep(2)
|
|
continue
|
|
return None
|
|
|
|
if "error" in data:
|
|
code = data["error"].get("code", "")
|
|
if code in ("TooManyRequests", "ServiceUnavailable", "GatewayTimeout") or "429" in str(code):
|
|
wait = 5 * (attempt + 1)
|
|
print(f" [Throttled, waiting {wait}s...]")
|
|
time.sleep(wait)
|
|
token = get_token()
|
|
_api_call_count = 0
|
|
continue
|
|
return None
|
|
return data
|
|
|
|
return None
|
|
|
|
def graph_search(email, top=3):
|
|
"""Search messages from a specific email using $search (which works, unlike $filter on from)."""
|
|
global _api_call_count
|
|
token = refresh_token_if_needed()
|
|
_api_call_count += 1
|
|
|
|
base_url = f"{GRAPH_BASE}/messages"
|
|
|
|
for attempt in range(3):
|
|
result = subprocess.run(
|
|
["curl", "-s", "-G", base_url,
|
|
"--data-urlencode", f"$search=\"from:{email}\"",
|
|
"--data-urlencode", "$select=subject,from,body",
|
|
"--data-urlencode", f"$top={top}",
|
|
"-H", f"Authorization: Bearer {token}",
|
|
"-H", "Content-Type: application/json",
|
|
"-H", "ConsistencyLevel: eventual"],
|
|
capture_output=True, text=True
|
|
)
|
|
|
|
if not result.stdout:
|
|
if attempt < 2:
|
|
time.sleep(2)
|
|
continue
|
|
return None
|
|
|
|
try:
|
|
data = json.loads(result.stdout)
|
|
except json.JSONDecodeError:
|
|
if attempt < 2:
|
|
time.sleep(2)
|
|
continue
|
|
return None
|
|
|
|
if "error" in data:
|
|
code = data["error"].get("code", "")
|
|
if code in ("TooManyRequests", "ServiceUnavailable", "GatewayTimeout") or "429" in str(code):
|
|
wait = 5 * (attempt + 1)
|
|
print(f" [Throttled, waiting {wait}s...]")
|
|
time.sleep(wait)
|
|
token = get_token()
|
|
_api_call_count = 0
|
|
continue
|
|
return None
|
|
return data
|
|
|
|
return None
|
|
|
|
# ── Phone extraction ──
|
|
PHONE_RE = re.compile(r'[\(]?\d{3}[\)\s.\-]?\s?\d{3}[\s.\-]?\d{4}')
|
|
LABELED_PHONE_RE = re.compile(
|
|
r'(?:Tel|Phone|Cell|Mobile|Office|Direct|Fax)[:\s]*\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4}',
|
|
re.IGNORECASE
|
|
)
|
|
LABEL_RE = re.compile(r'(Tel|Phone|Cell|Mobile|Office|Direct|Fax)', re.IGNORECASE)
|
|
SIGNATURE_MARKERS = [
|
|
'--', '---', '____', '====', 'Best regards', 'Kind regards', 'Regards',
|
|
'Sincerely', 'Thank you', 'Thanks', 'Sent from', 'Get Outlook',
|
|
'Best,', 'Cheers', 'Warm regards', 'All the best'
|
|
]
|
|
|
|
# Markers that indicate the start of a quoted/forwarded reply (stop searching past these)
|
|
REPLY_MARKERS = [
|
|
'From:', 'Sent:', '-----Original Message', '________________________________',
|
|
'On ', '> On ', 'Begin forwarded message', 'wrote:'
|
|
]
|
|
|
|
def strip_html(text):
|
|
"""Remove HTML tags and decode entities."""
|
|
text = re.sub(r'<br\s*/?>', '\n', text, flags=re.IGNORECASE)
|
|
text = re.sub(r'</?(?:p|div|tr|td|li|blockquote|table|tbody|thead|th|hr)[^>]*>', '\n', text, flags=re.IGNORECASE)
|
|
text = re.sub(r'<[^>]+>', '', text)
|
|
text = html.unescape(text)
|
|
# Collapse multiple blank lines
|
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
|
return text
|
|
|
|
def extract_first_message_body(body_html):
|
|
"""Extract just the first (most recent) message from a thread, cutting off quoted replies."""
|
|
text = strip_html(body_html)
|
|
lines = text.split('\n')
|
|
|
|
# Find where the quoted reply starts (typically after the first message + signature)
|
|
# Look for reply markers starting from line 5 (skip subject/header area)
|
|
cutoff = len(lines)
|
|
for i in range(5, len(lines)):
|
|
line = lines[i].strip()
|
|
# "From: Name <email>" pattern indicating quoted message
|
|
if re.match(r'^From:\s+.+', line) and i > 10:
|
|
cutoff = i
|
|
break
|
|
# "On <date>, <name> wrote:" pattern
|
|
if re.match(r'^On .+wrote:\s*$', line):
|
|
cutoff = i
|
|
break
|
|
if '-----Original Message' in line:
|
|
cutoff = i
|
|
break
|
|
if line.startswith('________________________________'):
|
|
cutoff = i
|
|
break
|
|
|
|
return '\n'.join(lines[:cutoff])
|
|
|
|
def extract_phone_from_body(body_html, sender_email):
|
|
"""Extract phone number from email signature area of the FIRST message only."""
|
|
if not body_html:
|
|
return None, None
|
|
|
|
# Get just the first message (not quoted replies) to avoid picking up OTHER people's numbers
|
|
first_msg = extract_first_message_body(body_html)
|
|
lines = first_msg.split('\n')
|
|
|
|
# Find signature start - search from bottom up for signature markers
|
|
sig_start = None
|
|
for i in range(len(lines) - 1, max(len(lines) - 40, -1), -1):
|
|
line = lines[i].strip()
|
|
for marker in SIGNATURE_MARKERS:
|
|
if marker.lower() in line.lower():
|
|
sig_start = i
|
|
break
|
|
if sig_start is not None:
|
|
break
|
|
|
|
# If no signature marker found, use last 25 lines of first message
|
|
if sig_start is None:
|
|
sig_start = max(0, len(lines) - 25)
|
|
|
|
sig_text = '\n'.join(lines[sig_start:])
|
|
|
|
# First try labeled phone numbers in signature
|
|
labeled = LABELED_PHONE_RE.search(sig_text)
|
|
if labeled:
|
|
match_text = labeled.group(0)
|
|
label_match = LABEL_RE.search(match_text)
|
|
label = label_match.group(1).capitalize() if label_match else None
|
|
phone = PHONE_RE.search(match_text)
|
|
if phone:
|
|
return normalize_phone(phone.group(0)), label
|
|
|
|
# Then try any phone number in signature
|
|
phone = PHONE_RE.search(sig_text)
|
|
if phone:
|
|
return normalize_phone(phone.group(0)), None
|
|
|
|
# Fallback: search entire first message for labeled phones
|
|
labeled_full = LABELED_PHONE_RE.search(first_msg)
|
|
if labeled_full:
|
|
match_text = labeled_full.group(0)
|
|
label_match = LABEL_RE.search(match_text)
|
|
label = label_match.group(1).capitalize() if label_match else None
|
|
phone = PHONE_RE.search(match_text)
|
|
if phone:
|
|
return normalize_phone(phone.group(0)), label
|
|
|
|
# Last resort: any phone in the first message
|
|
phone = PHONE_RE.search(first_msg)
|
|
if phone:
|
|
return normalize_phone(phone.group(0)), None
|
|
|
|
return None, None
|
|
|
|
def normalize_phone(raw):
|
|
"""Normalize phone to (xxx) xxx-xxxx format."""
|
|
digits = re.sub(r'\D', '', raw)
|
|
if len(digits) == 11 and digits[0] == '1':
|
|
digits = digits[1:]
|
|
if len(digits) == 10:
|
|
return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
|
|
return raw.strip()
|
|
|
|
# ── Main ──
|
|
def main():
|
|
print("=" * 80)
|
|
print(" Bardach Missing Real Contacts - Phone Number Finder")
|
|
print("=" * 80)
|
|
|
|
# 1. Load input
|
|
with open(INPUT_FILE, encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
missing = data["missing"]
|
|
print(f"\n[INFO] Total missing contacts loaded: {len(missing)}")
|
|
|
|
# 2. Filter sent_count > 0
|
|
two_way = [c for c in missing if c["sent_count"] > 0]
|
|
print(f"[INFO] Two-way correspondents (sent_count > 0): {len(two_way)}")
|
|
|
|
# 3. Filter junk
|
|
def is_junk(email):
|
|
email_lower = email.lower()
|
|
for kw in JUNK_KEYWORDS:
|
|
if kw in email_lower:
|
|
return True
|
|
domain = email_lower.split('@')[-1] if '@' in email_lower else ''
|
|
for cd in COMMERCIAL_DOMAINS:
|
|
if domain == cd or domain.endswith('.' + cd):
|
|
return True
|
|
return False
|
|
|
|
real = [c for c in two_way if not is_junk(c["email"])]
|
|
print(f"[INFO] After junk filter: {len(real)}")
|
|
|
|
# 4. Sort by total descending
|
|
real.sort(key=lambda c: c["total"], reverse=True)
|
|
|
|
print(f"\n[SUCCESS] {len(real)} real two-way correspondents are missing from contacts\n")
|
|
|
|
# 5. Phone lookup for top 60
|
|
top_n = min(60, len(real))
|
|
print(f"[INFO] Searching for phone numbers in top {top_n} contacts...")
|
|
print("-" * 80)
|
|
|
|
results = []
|
|
phones_found = 0
|
|
|
|
for idx, contact in enumerate(real[:top_n]):
|
|
email = contact["email"]
|
|
name = contact["display_name"] or email.split('@')[0]
|
|
print(f" [{idx+1:2d}/{top_n}] {name[:35]:35s} <{email[:40]}>", end="", flush=True)
|
|
|
|
# Search for 3 most recent emails FROM this address using $search
|
|
phone = None
|
|
phone_label = None
|
|
resp = graph_search(email, top=3)
|
|
|
|
if resp and "value" in resp:
|
|
for msg in resp["value"]:
|
|
# Verify this message is actually FROM the target email
|
|
msg_from = msg.get("from", {}).get("emailAddress", {}).get("address", "").lower()
|
|
if msg_from != email.lower():
|
|
continue
|
|
body_content = msg.get("body", {}).get("content", "")
|
|
phone, phone_label = extract_phone_from_body(body_content, email)
|
|
if phone:
|
|
break
|
|
|
|
if phone:
|
|
phones_found += 1
|
|
label_str = f" ({phone_label})" if phone_label else ""
|
|
print(f" -> {phone}{label_str}")
|
|
else:
|
|
print(f" -> --")
|
|
|
|
results.append({
|
|
"email": email,
|
|
"display_name": contact["display_name"],
|
|
"sent_count": contact["sent_count"],
|
|
"received_count": contact["received_count"],
|
|
"total": contact["total"],
|
|
"phone": phone,
|
|
"phone_label": phone_label
|
|
})
|
|
|
|
# Add remaining contacts (beyond top 60) without phone lookup
|
|
for contact in real[top_n:]:
|
|
results.append({
|
|
"email": contact["email"],
|
|
"display_name": contact["display_name"],
|
|
"sent_count": contact["sent_count"],
|
|
"received_count": contact["received_count"],
|
|
"total": contact["total"],
|
|
"phone": None,
|
|
"phone_label": None
|
|
})
|
|
|
|
# 7. Save output
|
|
output = {
|
|
"generated": datetime.now().isoformat(),
|
|
"total_two_way": len(real),
|
|
"with_phone": phones_found,
|
|
"without_phone": len(real) - phones_found,
|
|
"contacts": results
|
|
}
|
|
|
|
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
|
|
json.dump(output, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"\n[SUCCESS] Saved to {OUTPUT_FILE}")
|
|
|
|
# 8. Print table
|
|
print(f"\n{'='*110}")
|
|
print(f" MISSING REAL CONTACTS - TOP {top_n} (sorted by total exchanges)")
|
|
print(f"{'='*110}")
|
|
print(f" {'#':>3} {'Name':<30} {'Email':<40} {'Total':>6} {'Phone':<25}")
|
|
print(f" {'-'*3} {'-'*30} {'-'*40} {'-'*6} {'-'*25}")
|
|
|
|
for i, c in enumerate(results[:top_n]):
|
|
name = (c["display_name"] or c["email"].split('@')[0])[:30]
|
|
email_short = c["email"][:40]
|
|
phone_str = c["phone"] or "--"
|
|
if c["phone_label"]:
|
|
phone_str = f"{c['phone']} ({c['phone_label']})"
|
|
print(f" {i+1:3d} {name:<30} {email_short:<40} {c['total']:6d} {phone_str}")
|
|
|
|
print(f"\n{'='*110}")
|
|
print(f" SUMMARY")
|
|
print(f"{'='*110}")
|
|
print(f" Total two-way correspondents missing: {len(real)}")
|
|
print(f" Phone numbers found (top {top_n}): {phones_found}")
|
|
print(f" Without phone (top {top_n}): {top_n - phones_found}")
|
|
print(f"{'='*110}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|