sync: Auto-sync from ACG-M-L5090 at 2026-03-10 19:11:00
Synced files: - Quote wizard frontend (all components, hooks, types, config) - API updates (config, models, routers, schemas, services) - Client work (bg-builders, gurushow) - Scripts (BGB Lesley termination, CIPP, Datto, migration) - Temp files (Bardach contacts, VWP investigation, misc) - Credentials and session logs - Email service, PHP API, session logs Machine: ACG-M-L5090 Timestamp: 2026-03-10 19:11:00 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
414
temp/bardach_missing_real_contacts.py
Normal file
414
temp/bardach_missing_real_contacts.py
Normal file
@@ -0,0 +1,414 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Find real two-way correspondents missing from Barbara's contacts and extract phone numbers from signatures."""
|
||||
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
import html
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
|
||||
# ── Config ──
|
||||
INPUT_FILE = r"D:\ClaudeTools\temp\bardach_missing_contacts.json"
|
||||
OUTPUT_FILE = r"D:\ClaudeTools\temp\bardach_missing_real_contacts.json"
|
||||
|
||||
TENANT = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
|
||||
CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
|
||||
CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
|
||||
USER_EMAIL = "barbara@bardach.net"
|
||||
|
||||
TOKEN_URL = f"https://login.microsoftonline.com/{TENANT}/oauth2/v2.0/token"
|
||||
GRAPH_BASE = f"https://graph.microsoft.com/v1.0/users/{USER_EMAIL}"
|
||||
|
||||
# ── Junk filters ──
|
||||
JUNK_KEYWORDS = [
|
||||
"noreply", "no-reply", "donotreply", "notification", "alert",
|
||||
"mailer-daemon", "postmaster", "unsubscribe", "bounce",
|
||||
"support@", "info@", "help@", "service@", "billing@",
|
||||
"news@", "newsletter", "marketing", "promo"
|
||||
]
|
||||
|
||||
COMMERCIAL_DOMAINS = [
|
||||
"amazon.com", "google.com", "facebook.com", "apple.com", "microsoft.com",
|
||||
"paypal.com", "ebay.com", "nextdoor.com", "linkedin.com", "twitter.com",
|
||||
"instagram.com", "fidelity.com", "schwab.com", "vanguard.com",
|
||||
"intuit.com", "turbotax.com"
|
||||
]
|
||||
|
||||
# ── Token management ──
|
||||
_token = None
|
||||
_api_call_count = 0
|
||||
|
||||
def get_token():
|
||||
"""Get a fresh OAuth2 token."""
|
||||
result = subprocess.run(
|
||||
["curl", "-s", "-X", "POST", TOKEN_URL,
|
||||
"-d", f"client_id={CLIENT_ID}",
|
||||
"-d", f"client_secret={CLIENT_SECRET}",
|
||||
"-d", "scope=https://graph.microsoft.com/.default",
|
||||
"-d", "grant_type=client_credentials"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
data = json.loads(result.stdout)
|
||||
if "access_token" not in data:
|
||||
print(f"[ERROR] Token request failed: {data}")
|
||||
raise RuntimeError("Failed to get token")
|
||||
return data["access_token"]
|
||||
|
||||
def refresh_token_if_needed():
|
||||
"""Refresh token every 30 API calls."""
|
||||
global _token, _api_call_count
|
||||
if _token is None or _api_call_count >= 30:
|
||||
_token = get_token()
|
||||
_api_call_count = 0
|
||||
print(f" [Token refreshed]")
|
||||
return _token
|
||||
|
||||
def graph_get(url, retries=3):
|
||||
"""Make a GET request to Graph API using curl -G with --data-urlencode for proper encoding."""
|
||||
global _api_call_count
|
||||
token = refresh_token_if_needed()
|
||||
_api_call_count += 1
|
||||
|
||||
for attempt in range(retries):
|
||||
result = subprocess.run(
|
||||
["curl", "-s", "--url", url,
|
||||
"-H", f"Authorization: Bearer {token}",
|
||||
"-H", "Content-Type: application/json",
|
||||
"-H", "ConsistencyLevel: eventual"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
if not result.stdout:
|
||||
if attempt < retries - 1:
|
||||
time.sleep(2)
|
||||
continue
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(result.stdout)
|
||||
except json.JSONDecodeError:
|
||||
if attempt < retries - 1:
|
||||
time.sleep(2)
|
||||
continue
|
||||
return None
|
||||
|
||||
if "error" in data:
|
||||
code = data["error"].get("code", "")
|
||||
if code in ("TooManyRequests", "ServiceUnavailable", "GatewayTimeout") or "429" in str(code):
|
||||
wait = 5 * (attempt + 1)
|
||||
print(f" [Throttled, waiting {wait}s...]")
|
||||
time.sleep(wait)
|
||||
token = get_token()
|
||||
_api_call_count = 0
|
||||
continue
|
||||
return None
|
||||
return data
|
||||
|
||||
return None
|
||||
|
||||
def graph_search(email, top=3):
|
||||
"""Search messages from a specific email using $search (which works, unlike $filter on from)."""
|
||||
global _api_call_count
|
||||
token = refresh_token_if_needed()
|
||||
_api_call_count += 1
|
||||
|
||||
base_url = f"{GRAPH_BASE}/messages"
|
||||
|
||||
for attempt in range(3):
|
||||
result = subprocess.run(
|
||||
["curl", "-s", "-G", base_url,
|
||||
"--data-urlencode", f"$search=\"from:{email}\"",
|
||||
"--data-urlencode", "$select=subject,from,body",
|
||||
"--data-urlencode", f"$top={top}",
|
||||
"-H", f"Authorization: Bearer {token}",
|
||||
"-H", "Content-Type: application/json",
|
||||
"-H", "ConsistencyLevel: eventual"],
|
||||
capture_output=True, text=True
|
||||
)
|
||||
|
||||
if not result.stdout:
|
||||
if attempt < 2:
|
||||
time.sleep(2)
|
||||
continue
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(result.stdout)
|
||||
except json.JSONDecodeError:
|
||||
if attempt < 2:
|
||||
time.sleep(2)
|
||||
continue
|
||||
return None
|
||||
|
||||
if "error" in data:
|
||||
code = data["error"].get("code", "")
|
||||
if code in ("TooManyRequests", "ServiceUnavailable", "GatewayTimeout") or "429" in str(code):
|
||||
wait = 5 * (attempt + 1)
|
||||
print(f" [Throttled, waiting {wait}s...]")
|
||||
time.sleep(wait)
|
||||
token = get_token()
|
||||
_api_call_count = 0
|
||||
continue
|
||||
return None
|
||||
return data
|
||||
|
||||
return None
|
||||
|
||||
# ── Phone extraction ──
|
||||
PHONE_RE = re.compile(r'[\(]?\d{3}[\)\s.\-]?\s?\d{3}[\s.\-]?\d{4}')
|
||||
LABELED_PHONE_RE = re.compile(
|
||||
r'(?:Tel|Phone|Cell|Mobile|Office|Direct|Fax)[:\s]*\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4}',
|
||||
re.IGNORECASE
|
||||
)
|
||||
LABEL_RE = re.compile(r'(Tel|Phone|Cell|Mobile|Office|Direct|Fax)', re.IGNORECASE)
|
||||
SIGNATURE_MARKERS = [
|
||||
'--', '---', '____', '====', 'Best regards', 'Kind regards', 'Regards',
|
||||
'Sincerely', 'Thank you', 'Thanks', 'Sent from', 'Get Outlook',
|
||||
'Best,', 'Cheers', 'Warm regards', 'All the best'
|
||||
]
|
||||
|
||||
# Markers that indicate the start of a quoted/forwarded reply (stop searching past these)
|
||||
REPLY_MARKERS = [
|
||||
'From:', 'Sent:', '-----Original Message', '________________________________',
|
||||
'On ', '> On ', 'Begin forwarded message', 'wrote:'
|
||||
]
|
||||
|
||||
def strip_html(text):
|
||||
"""Remove HTML tags and decode entities."""
|
||||
text = re.sub(r'<br\s*/?>', '\n', text, flags=re.IGNORECASE)
|
||||
text = re.sub(r'</?(?:p|div|tr|td|li|blockquote|table|tbody|thead|th|hr)[^>]*>', '\n', text, flags=re.IGNORECASE)
|
||||
text = re.sub(r'<[^>]+>', '', text)
|
||||
text = html.unescape(text)
|
||||
# Collapse multiple blank lines
|
||||
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||
return text
|
||||
|
||||
def extract_first_message_body(body_html):
|
||||
"""Extract just the first (most recent) message from a thread, cutting off quoted replies."""
|
||||
text = strip_html(body_html)
|
||||
lines = text.split('\n')
|
||||
|
||||
# Find where the quoted reply starts (typically after the first message + signature)
|
||||
# Look for reply markers starting from line 5 (skip subject/header area)
|
||||
cutoff = len(lines)
|
||||
for i in range(5, len(lines)):
|
||||
line = lines[i].strip()
|
||||
# "From: Name <email>" pattern indicating quoted message
|
||||
if re.match(r'^From:\s+.+', line) and i > 10:
|
||||
cutoff = i
|
||||
break
|
||||
# "On <date>, <name> wrote:" pattern
|
||||
if re.match(r'^On .+wrote:\s*$', line):
|
||||
cutoff = i
|
||||
break
|
||||
if '-----Original Message' in line:
|
||||
cutoff = i
|
||||
break
|
||||
if line.startswith('________________________________'):
|
||||
cutoff = i
|
||||
break
|
||||
|
||||
return '\n'.join(lines[:cutoff])
|
||||
|
||||
def extract_phone_from_body(body_html, sender_email):
|
||||
"""Extract phone number from email signature area of the FIRST message only."""
|
||||
if not body_html:
|
||||
return None, None
|
||||
|
||||
# Get just the first message (not quoted replies) to avoid picking up OTHER people's numbers
|
||||
first_msg = extract_first_message_body(body_html)
|
||||
lines = first_msg.split('\n')
|
||||
|
||||
# Find signature start - search from bottom up for signature markers
|
||||
sig_start = None
|
||||
for i in range(len(lines) - 1, max(len(lines) - 40, -1), -1):
|
||||
line = lines[i].strip()
|
||||
for marker in SIGNATURE_MARKERS:
|
||||
if marker.lower() in line.lower():
|
||||
sig_start = i
|
||||
break
|
||||
if sig_start is not None:
|
||||
break
|
||||
|
||||
# If no signature marker found, use last 25 lines of first message
|
||||
if sig_start is None:
|
||||
sig_start = max(0, len(lines) - 25)
|
||||
|
||||
sig_text = '\n'.join(lines[sig_start:])
|
||||
|
||||
# First try labeled phone numbers in signature
|
||||
labeled = LABELED_PHONE_RE.search(sig_text)
|
||||
if labeled:
|
||||
match_text = labeled.group(0)
|
||||
label_match = LABEL_RE.search(match_text)
|
||||
label = label_match.group(1).capitalize() if label_match else None
|
||||
phone = PHONE_RE.search(match_text)
|
||||
if phone:
|
||||
return normalize_phone(phone.group(0)), label
|
||||
|
||||
# Then try any phone number in signature
|
||||
phone = PHONE_RE.search(sig_text)
|
||||
if phone:
|
||||
return normalize_phone(phone.group(0)), None
|
||||
|
||||
# Fallback: search entire first message for labeled phones
|
||||
labeled_full = LABELED_PHONE_RE.search(first_msg)
|
||||
if labeled_full:
|
||||
match_text = labeled_full.group(0)
|
||||
label_match = LABEL_RE.search(match_text)
|
||||
label = label_match.group(1).capitalize() if label_match else None
|
||||
phone = PHONE_RE.search(match_text)
|
||||
if phone:
|
||||
return normalize_phone(phone.group(0)), label
|
||||
|
||||
# Last resort: any phone in the first message
|
||||
phone = PHONE_RE.search(first_msg)
|
||||
if phone:
|
||||
return normalize_phone(phone.group(0)), None
|
||||
|
||||
return None, None
|
||||
|
||||
def normalize_phone(raw):
|
||||
"""Normalize phone to (xxx) xxx-xxxx format."""
|
||||
digits = re.sub(r'\D', '', raw)
|
||||
if len(digits) == 11 and digits[0] == '1':
|
||||
digits = digits[1:]
|
||||
if len(digits) == 10:
|
||||
return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
|
||||
return raw.strip()
|
||||
|
||||
# ── Main ──
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print(" Bardach Missing Real Contacts - Phone Number Finder")
|
||||
print("=" * 80)
|
||||
|
||||
# 1. Load input
|
||||
with open(INPUT_FILE, encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
missing = data["missing"]
|
||||
print(f"\n[INFO] Total missing contacts loaded: {len(missing)}")
|
||||
|
||||
# 2. Filter sent_count > 0
|
||||
two_way = [c for c in missing if c["sent_count"] > 0]
|
||||
print(f"[INFO] Two-way correspondents (sent_count > 0): {len(two_way)}")
|
||||
|
||||
# 3. Filter junk
|
||||
def is_junk(email):
|
||||
email_lower = email.lower()
|
||||
for kw in JUNK_KEYWORDS:
|
||||
if kw in email_lower:
|
||||
return True
|
||||
domain = email_lower.split('@')[-1] if '@' in email_lower else ''
|
||||
for cd in COMMERCIAL_DOMAINS:
|
||||
if domain == cd or domain.endswith('.' + cd):
|
||||
return True
|
||||
return False
|
||||
|
||||
real = [c for c in two_way if not is_junk(c["email"])]
|
||||
print(f"[INFO] After junk filter: {len(real)}")
|
||||
|
||||
# 4. Sort by total descending
|
||||
real.sort(key=lambda c: c["total"], reverse=True)
|
||||
|
||||
print(f"\n[SUCCESS] {len(real)} real two-way correspondents are missing from contacts\n")
|
||||
|
||||
# 5. Phone lookup for top 60
|
||||
top_n = min(60, len(real))
|
||||
print(f"[INFO] Searching for phone numbers in top {top_n} contacts...")
|
||||
print("-" * 80)
|
||||
|
||||
results = []
|
||||
phones_found = 0
|
||||
|
||||
for idx, contact in enumerate(real[:top_n]):
|
||||
email = contact["email"]
|
||||
name = contact["display_name"] or email.split('@')[0]
|
||||
print(f" [{idx+1:2d}/{top_n}] {name[:35]:35s} <{email[:40]}>", end="", flush=True)
|
||||
|
||||
# Search for 3 most recent emails FROM this address using $search
|
||||
phone = None
|
||||
phone_label = None
|
||||
resp = graph_search(email, top=3)
|
||||
|
||||
if resp and "value" in resp:
|
||||
for msg in resp["value"]:
|
||||
# Verify this message is actually FROM the target email
|
||||
msg_from = msg.get("from", {}).get("emailAddress", {}).get("address", "").lower()
|
||||
if msg_from != email.lower():
|
||||
continue
|
||||
body_content = msg.get("body", {}).get("content", "")
|
||||
phone, phone_label = extract_phone_from_body(body_content, email)
|
||||
if phone:
|
||||
break
|
||||
|
||||
if phone:
|
||||
phones_found += 1
|
||||
label_str = f" ({phone_label})" if phone_label else ""
|
||||
print(f" -> {phone}{label_str}")
|
||||
else:
|
||||
print(f" -> --")
|
||||
|
||||
results.append({
|
||||
"email": email,
|
||||
"display_name": contact["display_name"],
|
||||
"sent_count": contact["sent_count"],
|
||||
"received_count": contact["received_count"],
|
||||
"total": contact["total"],
|
||||
"phone": phone,
|
||||
"phone_label": phone_label
|
||||
})
|
||||
|
||||
# Add remaining contacts (beyond top 60) without phone lookup
|
||||
for contact in real[top_n:]:
|
||||
results.append({
|
||||
"email": contact["email"],
|
||||
"display_name": contact["display_name"],
|
||||
"sent_count": contact["sent_count"],
|
||||
"received_count": contact["received_count"],
|
||||
"total": contact["total"],
|
||||
"phone": None,
|
||||
"phone_label": None
|
||||
})
|
||||
|
||||
# 7. Save output
|
||||
output = {
|
||||
"generated": datetime.now().isoformat(),
|
||||
"total_two_way": len(real),
|
||||
"with_phone": phones_found,
|
||||
"without_phone": len(real) - phones_found,
|
||||
"contacts": results
|
||||
}
|
||||
|
||||
with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(output, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n[SUCCESS] Saved to {OUTPUT_FILE}")
|
||||
|
||||
# 8. Print table
|
||||
print(f"\n{'='*110}")
|
||||
print(f" MISSING REAL CONTACTS - TOP {top_n} (sorted by total exchanges)")
|
||||
print(f"{'='*110}")
|
||||
print(f" {'#':>3} {'Name':<30} {'Email':<40} {'Total':>6} {'Phone':<25}")
|
||||
print(f" {'-'*3} {'-'*30} {'-'*40} {'-'*6} {'-'*25}")
|
||||
|
||||
for i, c in enumerate(results[:top_n]):
|
||||
name = (c["display_name"] or c["email"].split('@')[0])[:30]
|
||||
email_short = c["email"][:40]
|
||||
phone_str = c["phone"] or "--"
|
||||
if c["phone_label"]:
|
||||
phone_str = f"{c['phone']} ({c['phone_label']})"
|
||||
print(f" {i+1:3d} {name:<30} {email_short:<40} {c['total']:6d} {phone_str}")
|
||||
|
||||
print(f"\n{'='*110}")
|
||||
print(f" SUMMARY")
|
||||
print(f"{'='*110}")
|
||||
print(f" Total two-way correspondents missing: {len(real)}")
|
||||
print(f" Phone numbers found (top {top_n}): {phones_found}")
|
||||
print(f" Without phone (top {top_n}): {top_n - phones_found}")
|
||||
print(f"{'='*110}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user