sync: Auto-sync from ACG-M-L5090 at 2026-03-10 19:11:00

Synced files: - Quote wizard frontend (all components, hooks, types, config) - API updates (config, models, routers, schemas, services) - Client work (bg-builders, gurushow) - Scripts (BGB Lesley termination, CIPP, Datto, migration) - Temp files (Bardach contacts, VWP investigation, misc) - Credentials and session logs - Email service, PHP API, session logs Machine: ACG-M-L5090 Timestamp: 2026-03-10 19:11:00 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 19:59:08 -07:00
parent a1a19f8c00
commit fa15b03180
169 changed files with 879909 additions and 1243 deletions
--- a/temp/bardach_url_analysis.py
+++ b/temp/bardach_url_analysis.py
@@ -0,0 +1,349 @@
+#!/usr/bin/env python3
+"""Analyze website/URL fields for all Bardach contacts in Microsoft 365."""
+
+import json
+import subprocess
+import sys
+import time
+from urllib.parse import urlparse
+from collections import defaultdict
+
+TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
+CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
+CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
+SCOPE = "https://graph.microsoft.com/.default"
+USER = "barbara@bardach.net"
+TOKEN_URL = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
+
+JUNK_PATTERNS = [
+    "ms-outlook://",
+    "linkedin.com/in/",
+    "linkedin.com/company/",
+    "outlook.live.com",
+    "profile.live.com",
+    "people.live.com",
+    "social.microsoft.com",
+    "contact.skype.com",
+    "d.docs.live.net",
+    "storage.live.com",
+    "onedrive.live.com",
+    "1drv.ms",
+    "facebook.com",
+    "twitter.com",
+    "x.com",
+    "plus.google.com",
+    "instagram.com",
+    "myspace.com",
+    "flickr.com",
+    "foursquare.com",
+    "about.me",
+    "gravatar.com",
+    "apis.live.net",
+    "cid-",
+    "skype:",
+]
+
+# Social media domains that M365 auto-links
+SOCIAL_DOMAINS = {
+    "linkedin.com", "www.linkedin.com",
+    "facebook.com", "www.facebook.com", "m.facebook.com",
+    "twitter.com", "www.twitter.com",
+    "x.com", "www.x.com",
+    "instagram.com", "www.instagram.com",
+    "plus.google.com",
+    "myspace.com", "www.myspace.com",
+    "flickr.com", "www.flickr.com",
+    "foursquare.com", "www.foursquare.com",
+    "about.me",
+    "gravatar.com", "www.gravatar.com",
+}
+
+# Microsoft internal domains
+MS_DOMAINS = {
+    "outlook.live.com", "profile.live.com", "people.live.com",
+    "social.microsoft.com", "contact.skype.com",
+    "d.docs.live.net", "storage.live.com", "onedrive.live.com",
+    "1drv.ms", "apis.live.net",
+}
+
+
+def get_token():
+    result = subprocess.run(
+        ["curl", "-s", "-X", "POST", TOKEN_URL,
+         "-H", "Content-Type: application/x-www-form-urlencoded",
+         "-d", f"client_id={CLIENT_ID}&scope={SCOPE}&client_secret={CLIENT_SECRET}&grant_type=client_credentials"],
+        capture_output=True, text=True
+    )
+    data = json.loads(result.stdout)
+    if "access_token" not in data:
+        print(f"[ERROR] Token acquisition failed: {data}")
+        sys.exit(1)
+    return data["access_token"]
+
+
+def fetch_all_contacts(token):
+    """Fetch all contacts with pagination."""
+    contacts = []
+    select = "id,displayName,businessHomePage,emailAddresses,personalNotes,companyName"
+    url = f"https://graph.microsoft.com/v1.0/users/{USER}/contacts?$select={select}&$top=100"
+    call_count = 0
+
+    while url:
+        call_count += 1
+        # Re-acquire token every 500 calls
+        if call_count > 1 and (call_count - 1) % 500 == 0:
+            print(f"[INFO] Re-acquiring token at call {call_count}...")
+            token = get_token()
+            print("[OK] Token re-acquired")
+
+        result = subprocess.run(
+            ["curl", "-s", "-X", "GET", url,
+             "-H", f"Authorization: Bearer {token}",
+             "-H", "Content-Type: application/json"],
+            capture_output=True, text=True
+        )
+        data = json.loads(result.stdout)
+
+        if "value" not in data:
+            print(f"[ERROR] Unexpected response: {json.dumps(data)[:300]}")
+            break
+
+        batch = data["value"]
+        contacts.extend(batch)
+
+        if len(contacts) % 500 == 0 or len(contacts) % 100 < len(batch):
+            if len(contacts) % 500 < 100:
+                print(f"[INFO] Fetched {len(contacts)} contacts so far...")
+
+        url = data.get("@odata.nextLink")
+        if url:
+            time.sleep(0.05)
+
+    return contacts, token
+
+
+def categorize_url(url_str):
+    """Categorize a URL as junk, legitimate, or suspicious."""
+    if not url_str or not url_str.strip():
+        return "suspicious", "empty"
+
+    url_lower = url_str.lower().strip()
+
+    # Check for obviously malformed
+    if len(url_lower) < 4:
+        return "suspicious", "too_short"
+
+    # Check junk patterns
+    for pattern in JUNK_PATTERNS:
+        if pattern in url_lower:
+            return "junk", pattern
+
+    # Try parsing
+    try:
+        # Add scheme if missing
+        parse_url = url_lower
+        if not parse_url.startswith("http"):
+            parse_url = "https://" + parse_url
+        parsed = urlparse(parse_url)
+        domain = parsed.netloc.lower()
+
+        # Check social/MS domains
+        if domain in SOCIAL_DOMAINS:
+            return "junk", domain
+        if domain in MS_DOMAINS:
+            return "junk", domain
+
+        # Check for no real domain
+        if not domain or "." not in domain:
+            return "suspicious", "no_valid_domain"
+
+        return "legitimate", domain
+
+    except Exception:
+        return "suspicious", "parse_error"
+
+
+def extract_domain(url_str):
+    """Extract domain from URL."""
+    if not url_str:
+        return "unknown"
+    url_lower = url_str.lower().strip()
+    if not url_lower.startswith("http"):
+        url_lower = "https://" + url_lower
+    try:
+        parsed = urlparse(url_lower)
+        return parsed.netloc or "unknown"
+    except Exception:
+        return "unknown"
+
+
+def main():
+    print("=" * 70)
+    print("BARDACH CONTACTS - WEBSITE/URL FIELD ANALYSIS")
+    print("=" * 70)
+    print()
+
+    token = get_token()
+    print("[OK] Token acquired")
+    print("[INFO] Fetching all contacts...")
+
+    contacts, token = fetch_all_contacts(token)
+    total = len(contacts)
+    print(f"[OK] Fetched {total} total contacts")
+    print()
+
+    # Analyze businessHomePage
+    contacts_with_url = []
+    contacts_without_url = []
+
+    for c in contacts:
+        bhp = c.get("businessHomePage")
+        if bhp and bhp.strip():
+            contacts_with_url.append(c)
+        else:
+            contacts_without_url.append(c)
+
+    print(f"[INFO] Contacts with businessHomePage: {len(contacts_with_url)}")
+    print(f"[INFO] Contacts without businessHomePage: {len(contacts_without_url)}")
+    print()
+
+    # Categorize
+    junk_contacts = []
+    legitimate_contacts = []
+    suspicious_contacts = []
+    linkedin_profiles = []
+    facebook_profiles = []
+
+    domain_counts = defaultdict(int)
+    junk_by_pattern = defaultdict(list)
+
+    for c in contacts_with_url:
+        url = c.get("businessHomePage", "").strip()
+        category, detail = categorize_url(url)
+        domain = extract_domain(url)
+        domain_counts[domain] += 1
+
+        entry = {
+            "id": c["id"],
+            "displayName": c.get("displayName", ""),
+            "url": url,
+            "companyName": c.get("companyName", ""),
+            "category": category,
+            "detail": detail,
+            "domain": domain,
+        }
+
+        if category == "junk":
+            junk_contacts.append(entry)
+            junk_by_pattern[detail].append(entry)
+
+            # Cross-reference LinkedIn
+            url_lower = url.lower()
+            if "linkedin.com" in url_lower:
+                linkedin_profiles.append(entry)
+            elif "facebook.com" in url_lower:
+                facebook_profiles.append(entry)
+
+        elif category == "legitimate":
+            legitimate_contacts.append(entry)
+        else:
+            suspicious_contacts.append(entry)
+
+    # Print report
+    print("=" * 70)
+    print("RESULTS SUMMARY")
+    print("=" * 70)
+    print(f"Total contacts:                    {total}")
+    print(f"Contacts with businessHomePage:    {len(contacts_with_url)}")
+    print(f"  - Junk (auto-inserted):          {len(junk_contacts)}")
+    print(f"  - Legitimate websites:           {len(legitimate_contacts)}")
+    print(f"  - Suspicious/broken:             {len(suspicious_contacts)}")
+    print()
+
+    # Junk URLs grouped by pattern
+    print("=" * 70)
+    print("JUNK URLs BY PATTERN")
+    print("=" * 70)
+    for pattern, entries in sorted(junk_by_pattern.items(), key=lambda x: -len(x[1])):
+        print(f"\n  Pattern: {pattern} ({len(entries)} contacts)")
+        for e in entries[:5]:
+            print(f"    - {e['displayName']}: {e['url']}")
+        if len(entries) > 5:
+            print(f"    ... and {len(entries) - 5} more")
+
+    # Suspicious URLs
+    print()
+    print("=" * 70)
+    print("SUSPICIOUS/BROKEN URLs")
+    print("=" * 70)
+    if suspicious_contacts:
+        for e in suspicious_contacts:
+            print(f"  - {e['displayName']}: \"{e['url']}\" (reason: {e['detail']})")
+    else:
+        print("  None found")
+
+    # Legitimate URLs (first 30)
+    print()
+    print("=" * 70)
+    print("LEGITIMATE URLs (first 30)")
+    print("=" * 70)
+    for e in legitimate_contacts[:30]:
+        company = f" [{e['companyName']}]" if e['companyName'] else ""
+        print(f"  - {e['displayName']}{company}: {e['url']}")
+    if len(legitimate_contacts) > 30:
+        print(f"  ... and {len(legitimate_contacts) - 30} more")
+
+    # Domain distribution
+    print()
+    print("=" * 70)
+    print("DOMAIN DISTRIBUTION")
+    print("=" * 70)
+    for domain, count in sorted(domain_counts.items(), key=lambda x: -x[1]):
+        print(f"  {domain}: {count}")
+
+    # LinkedIn cross-reference
+    print()
+    print("=" * 70)
+    print(f"LINKEDIN PROFILES ({len(linkedin_profiles)})")
+    print("=" * 70)
+    for e in linkedin_profiles:
+        print(f"  - {e['displayName']}: {e['url']}")
+
+    # Facebook cross-reference
+    print()
+    print("=" * 70)
+    print(f"FACEBOOK PROFILES ({len(facebook_profiles)})")
+    print("=" * 70)
+    for e in facebook_profiles:
+        print(f"  - {e['displayName']}: {e['url']}")
+
+    # Save results
+    results = {
+        "summary": {
+            "total_contacts": total,
+            "contacts_with_url": len(contacts_with_url),
+            "contacts_without_url": len(contacts_without_url),
+            "junk_count": len(junk_contacts),
+            "legitimate_count": len(legitimate_contacts),
+            "suspicious_count": len(suspicious_contacts),
+            "linkedin_count": len(linkedin_profiles),
+            "facebook_count": len(facebook_profiles),
+        },
+        "junk_contacts": junk_contacts,
+        "legitimate_contacts": legitimate_contacts,
+        "suspicious_contacts": suspicious_contacts,
+        "linkedin_profiles": linkedin_profiles,
+        "facebook_profiles": facebook_profiles,
+        "domain_distribution": dict(sorted(domain_counts.items(), key=lambda x: -x[1])),
+        "junk_by_pattern": {k: [{"displayName": e["displayName"], "url": e["url"]} for e in v]
+                           for k, v in sorted(junk_by_pattern.items(), key=lambda x: -len(x[1]))},
+    }
+
+    out_path = "D:/ClaudeTools/temp/bardach_url_analysis.json"
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(results, f, indent=2, ensure_ascii=False)
+    print(f"\n[OK] Results saved to {out_path}")
+
+
+if __name__ == "__main__":
+    main()