sync: Auto-sync from ACG-M-L5090 at 2026-03-10 19:11:00

Synced files: - Quote wizard frontend (all components, hooks, types, config) - API updates (config, models, routers, schemas, services) - Client work (bg-builders, gurushow) - Scripts (BGB Lesley termination, CIPP, Datto, migration) - Temp files (Bardach contacts, VWP investigation, misc) - Credentials and session logs - Email service, PHP API, session logs Machine: ACG-M-L5090 Timestamp: 2026-03-10 19:11:00 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 19:59:08 -07:00
parent a1a19f8c00
commit fa15b03180
169 changed files with 879909 additions and 1243 deletions
--- a/temp/bardach_missing_real_contacts.py
+++ b/temp/bardach_missing_real_contacts.py
@@ -0,0 +1,414 @@
+#!/usr/bin/env python3
+"""Find real two-way correspondents missing from Barbara's contacts and extract phone numbers from signatures."""
+
+import json
+import re
+import subprocess
+import time
+import html
+import urllib.parse
+from datetime import datetime
+
+# ── Config ──
+INPUT_FILE = r"D:\ClaudeTools\temp\bardach_missing_contacts.json"
+OUTPUT_FILE = r"D:\ClaudeTools\temp\bardach_missing_real_contacts.json"
+
+TENANT = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
+CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
+CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
+USER_EMAIL = "barbara@bardach.net"
+
+TOKEN_URL = f"https://login.microsoftonline.com/{TENANT}/oauth2/v2.0/token"
+GRAPH_BASE = f"https://graph.microsoft.com/v1.0/users/{USER_EMAIL}"
+
+# ── Junk filters ──
+JUNK_KEYWORDS = [
+    "noreply", "no-reply", "donotreply", "notification", "alert",
+    "mailer-daemon", "postmaster", "unsubscribe", "bounce",
+    "support@", "info@", "help@", "service@", "billing@",
+    "news@", "newsletter", "marketing", "promo"
+]
+
+COMMERCIAL_DOMAINS = [
+    "amazon.com", "google.com", "facebook.com", "apple.com", "microsoft.com",
+    "paypal.com", "ebay.com", "nextdoor.com", "linkedin.com", "twitter.com",
+    "instagram.com", "fidelity.com", "schwab.com", "vanguard.com",
+    "intuit.com", "turbotax.com"
+]
+
+# ── Token management ──
+_token = None
+_api_call_count = 0
+
+def get_token():
+    """Get a fresh OAuth2 token."""
+    result = subprocess.run(
+        ["curl", "-s", "-X", "POST", TOKEN_URL,
+         "-d", f"client_id={CLIENT_ID}",
+         "-d", f"client_secret={CLIENT_SECRET}",
+         "-d", "scope=https://graph.microsoft.com/.default",
+         "-d", "grant_type=client_credentials"],
+        capture_output=True, text=True
+    )
+    data = json.loads(result.stdout)
+    if "access_token" not in data:
+        print(f"[ERROR] Token request failed: {data}")
+        raise RuntimeError("Failed to get token")
+    return data["access_token"]
+
+def refresh_token_if_needed():
+    """Refresh token every 30 API calls."""
+    global _token, _api_call_count
+    if _token is None or _api_call_count >= 30:
+        _token = get_token()
+        _api_call_count = 0
+        print(f"  [Token refreshed]")
+    return _token
+
+def graph_get(url, retries=3):
+    """Make a GET request to Graph API using curl -G with --data-urlencode for proper encoding."""
+    global _api_call_count
+    token = refresh_token_if_needed()
+    _api_call_count += 1
+
+    for attempt in range(retries):
+        result = subprocess.run(
+            ["curl", "-s", "--url", url,
+             "-H", f"Authorization: Bearer {token}",
+             "-H", "Content-Type: application/json",
+             "-H", "ConsistencyLevel: eventual"],
+            capture_output=True, text=True
+        )
+        if not result.stdout:
+            if attempt < retries - 1:
+                time.sleep(2)
+                continue
+            return None
+
+        try:
+            data = json.loads(result.stdout)
+        except json.JSONDecodeError:
+            if attempt < retries - 1:
+                time.sleep(2)
+                continue
+            return None
+
+        if "error" in data:
+            code = data["error"].get("code", "")
+            if code in ("TooManyRequests", "ServiceUnavailable", "GatewayTimeout") or "429" in str(code):
+                wait = 5 * (attempt + 1)
+                print(f"    [Throttled, waiting {wait}s...]")
+                time.sleep(wait)
+                token = get_token()
+                _api_call_count = 0
+                continue
+            return None
+        return data
+
+    return None
+
+def graph_search(email, top=3):
+    """Search messages from a specific email using $search (which works, unlike $filter on from)."""
+    global _api_call_count
+    token = refresh_token_if_needed()
+    _api_call_count += 1
+
+    base_url = f"{GRAPH_BASE}/messages"
+
+    for attempt in range(3):
+        result = subprocess.run(
+            ["curl", "-s", "-G", base_url,
+             "--data-urlencode", f"$search=\"from:{email}\"",
+             "--data-urlencode", "$select=subject,from,body",
+             "--data-urlencode", f"$top={top}",
+             "-H", f"Authorization: Bearer {token}",
+             "-H", "Content-Type: application/json",
+             "-H", "ConsistencyLevel: eventual"],
+            capture_output=True, text=True
+        )
+
+        if not result.stdout:
+            if attempt < 2:
+                time.sleep(2)
+                continue
+            return None
+
+        try:
+            data = json.loads(result.stdout)
+        except json.JSONDecodeError:
+            if attempt < 2:
+                time.sleep(2)
+                continue
+            return None
+
+        if "error" in data:
+            code = data["error"].get("code", "")
+            if code in ("TooManyRequests", "ServiceUnavailable", "GatewayTimeout") or "429" in str(code):
+                wait = 5 * (attempt + 1)
+                print(f"    [Throttled, waiting {wait}s...]")
+                time.sleep(wait)
+                token = get_token()
+                _api_call_count = 0
+                continue
+            return None
+        return data
+
+    return None
+
+# ── Phone extraction ──
+PHONE_RE = re.compile(r'[\(]?\d{3}[\)\s.\-]?\s?\d{3}[\s.\-]?\d{4}')
+LABELED_PHONE_RE = re.compile(
+    r'(?:Tel|Phone|Cell|Mobile|Office|Direct|Fax)[:\s]*\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4}',
+    re.IGNORECASE
+)
+LABEL_RE = re.compile(r'(Tel|Phone|Cell|Mobile|Office|Direct|Fax)', re.IGNORECASE)
+SIGNATURE_MARKERS = [
+    '--', '---', '____', '====', 'Best regards', 'Kind regards', 'Regards',
+    'Sincerely', 'Thank you', 'Thanks', 'Sent from', 'Get Outlook',
+    'Best,', 'Cheers', 'Warm regards', 'All the best'
+]
+
+# Markers that indicate the start of a quoted/forwarded reply (stop searching past these)
+REPLY_MARKERS = [
+    'From:', 'Sent:', '-----Original Message', '________________________________',
+    'On ', '> On ', 'Begin forwarded message', 'wrote:'
+]
+
+def strip_html(text):
+    """Remove HTML tags and decode entities."""
+    text = re.sub(r'<br\s*/?>', '\n', text, flags=re.IGNORECASE)
+    text = re.sub(r'</?(?:p|div|tr|td|li|blockquote|table|tbody|thead|th|hr)[^>]*>', '\n', text, flags=re.IGNORECASE)
+    text = re.sub(r'<[^>]+>', '', text)
+    text = html.unescape(text)
+    # Collapse multiple blank lines
+    text = re.sub(r'\n{3,}', '\n\n', text)
+    return text
+
+def extract_first_message_body(body_html):
+    """Extract just the first (most recent) message from a thread, cutting off quoted replies."""
+    text = strip_html(body_html)
+    lines = text.split('\n')
+
+    # Find where the quoted reply starts (typically after the first message + signature)
+    # Look for reply markers starting from line 5 (skip subject/header area)
+    cutoff = len(lines)
+    for i in range(5, len(lines)):
+        line = lines[i].strip()
+        # "From: Name <email>" pattern indicating quoted message
+        if re.match(r'^From:\s+.+', line) and i > 10:
+            cutoff = i
+            break
+        # "On <date>, <name> wrote:" pattern
+        if re.match(r'^On .+wrote:\s*$', line):
+            cutoff = i
+            break
+        if '-----Original Message' in line:
+            cutoff = i
+            break
+        if line.startswith('________________________________'):
+            cutoff = i
+            break
+
+    return '\n'.join(lines[:cutoff])
+
+def extract_phone_from_body(body_html, sender_email):
+    """Extract phone number from email signature area of the FIRST message only."""
+    if not body_html:
+        return None, None
+
+    # Get just the first message (not quoted replies) to avoid picking up OTHER people's numbers
+    first_msg = extract_first_message_body(body_html)
+    lines = first_msg.split('\n')
+
+    # Find signature start - search from bottom up for signature markers
+    sig_start = None
+    for i in range(len(lines) - 1, max(len(lines) - 40, -1), -1):
+        line = lines[i].strip()
+        for marker in SIGNATURE_MARKERS:
+            if marker.lower() in line.lower():
+                sig_start = i
+                break
+        if sig_start is not None:
+            break
+
+    # If no signature marker found, use last 25 lines of first message
+    if sig_start is None:
+        sig_start = max(0, len(lines) - 25)
+
+    sig_text = '\n'.join(lines[sig_start:])
+
+    # First try labeled phone numbers in signature
+    labeled = LABELED_PHONE_RE.search(sig_text)
+    if labeled:
+        match_text = labeled.group(0)
+        label_match = LABEL_RE.search(match_text)
+        label = label_match.group(1).capitalize() if label_match else None
+        phone = PHONE_RE.search(match_text)
+        if phone:
+            return normalize_phone(phone.group(0)), label
+
+    # Then try any phone number in signature
+    phone = PHONE_RE.search(sig_text)
+    if phone:
+        return normalize_phone(phone.group(0)), None
+
+    # Fallback: search entire first message for labeled phones
+    labeled_full = LABELED_PHONE_RE.search(first_msg)
+    if labeled_full:
+        match_text = labeled_full.group(0)
+        label_match = LABEL_RE.search(match_text)
+        label = label_match.group(1).capitalize() if label_match else None
+        phone = PHONE_RE.search(match_text)
+        if phone:
+            return normalize_phone(phone.group(0)), label
+
+    # Last resort: any phone in the first message
+    phone = PHONE_RE.search(first_msg)
+    if phone:
+        return normalize_phone(phone.group(0)), None
+
+    return None, None
+
+def normalize_phone(raw):
+    """Normalize phone to (xxx) xxx-xxxx format."""
+    digits = re.sub(r'\D', '', raw)
+    if len(digits) == 11 and digits[0] == '1':
+        digits = digits[1:]
+    if len(digits) == 10:
+        return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
+    return raw.strip()
+
+# ── Main ──
+def main():
+    print("=" * 80)
+    print("  Bardach Missing Real Contacts - Phone Number Finder")
+    print("=" * 80)
+
+    # 1. Load input
+    with open(INPUT_FILE, encoding='utf-8') as f:
+        data = json.load(f)
+
+    missing = data["missing"]
+    print(f"\n[INFO] Total missing contacts loaded: {len(missing)}")
+
+    # 2. Filter sent_count > 0
+    two_way = [c for c in missing if c["sent_count"] > 0]
+    print(f"[INFO] Two-way correspondents (sent_count > 0): {len(two_way)}")
+
+    # 3. Filter junk
+    def is_junk(email):
+        email_lower = email.lower()
+        for kw in JUNK_KEYWORDS:
+            if kw in email_lower:
+                return True
+        domain = email_lower.split('@')[-1] if '@' in email_lower else ''
+        for cd in COMMERCIAL_DOMAINS:
+            if domain == cd or domain.endswith('.' + cd):
+                return True
+        return False
+
+    real = [c for c in two_way if not is_junk(c["email"])]
+    print(f"[INFO] After junk filter: {len(real)}")
+
+    # 4. Sort by total descending
+    real.sort(key=lambda c: c["total"], reverse=True)
+
+    print(f"\n[SUCCESS] {len(real)} real two-way correspondents are missing from contacts\n")
+
+    # 5. Phone lookup for top 60
+    top_n = min(60, len(real))
+    print(f"[INFO] Searching for phone numbers in top {top_n} contacts...")
+    print("-" * 80)
+
+    results = []
+    phones_found = 0
+
+    for idx, contact in enumerate(real[:top_n]):
+        email = contact["email"]
+        name = contact["display_name"] or email.split('@')[0]
+        print(f"  [{idx+1:2d}/{top_n}] {name[:35]:35s} <{email[:40]}>", end="", flush=True)
+
+        # Search for 3 most recent emails FROM this address using $search
+        phone = None
+        phone_label = None
+        resp = graph_search(email, top=3)
+
+        if resp and "value" in resp:
+            for msg in resp["value"]:
+                # Verify this message is actually FROM the target email
+                msg_from = msg.get("from", {}).get("emailAddress", {}).get("address", "").lower()
+                if msg_from != email.lower():
+                    continue
+                body_content = msg.get("body", {}).get("content", "")
+                phone, phone_label = extract_phone_from_body(body_content, email)
+                if phone:
+                    break
+
+        if phone:
+            phones_found += 1
+            label_str = f" ({phone_label})" if phone_label else ""
+            print(f"  -> {phone}{label_str}")
+        else:
+            print(f"  -> --")
+
+        results.append({
+            "email": email,
+            "display_name": contact["display_name"],
+            "sent_count": contact["sent_count"],
+            "received_count": contact["received_count"],
+            "total": contact["total"],
+            "phone": phone,
+            "phone_label": phone_label
+        })
+
+    # Add remaining contacts (beyond top 60) without phone lookup
+    for contact in real[top_n:]:
+        results.append({
+            "email": contact["email"],
+            "display_name": contact["display_name"],
+            "sent_count": contact["sent_count"],
+            "received_count": contact["received_count"],
+            "total": contact["total"],
+            "phone": None,
+            "phone_label": None
+        })
+
+    # 7. Save output
+    output = {
+        "generated": datetime.now().isoformat(),
+        "total_two_way": len(real),
+        "with_phone": phones_found,
+        "without_phone": len(real) - phones_found,
+        "contacts": results
+    }
+
+    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
+        json.dump(output, f, indent=2, ensure_ascii=False)
+
+    print(f"\n[SUCCESS] Saved to {OUTPUT_FILE}")
+
+    # 8. Print table
+    print(f"\n{'='*110}")
+    print(f"  MISSING REAL CONTACTS - TOP {top_n} (sorted by total exchanges)")
+    print(f"{'='*110}")
+    print(f"  {'#':>3}  {'Name':<30} {'Email':<40} {'Total':>6}  {'Phone':<25}")
+    print(f"  {'-'*3}  {'-'*30} {'-'*40} {'-'*6}  {'-'*25}")
+
+    for i, c in enumerate(results[:top_n]):
+        name = (c["display_name"] or c["email"].split('@')[0])[:30]
+        email_short = c["email"][:40]
+        phone_str = c["phone"] or "--"
+        if c["phone_label"]:
+            phone_str = f"{c['phone']} ({c['phone_label']})"
+        print(f"  {i+1:3d}  {name:<30} {email_short:<40} {c['total']:6d}  {phone_str}")
+
+    print(f"\n{'='*110}")
+    print(f"  SUMMARY")
+    print(f"{'='*110}")
+    print(f"  Total two-way correspondents missing: {len(real)}")
+    print(f"  Phone numbers found (top {top_n}):      {phones_found}")
+    print(f"  Without phone (top {top_n}):             {top_n - phones_found}")
+    print(f"{'='*110}")
+
+if __name__ == "__main__":
+    main()