Synced files: - Quote wizard frontend (all components, hooks, types, config) - API updates (config, models, routers, schemas, services) - Client work (bg-builders, gurushow) - Scripts (BGB Lesley termination, CIPP, Datto, migration) - Temp files (Bardach contacts, VWP investigation, misc) - Credentials and session logs - Email service, PHP API, session logs Machine: ACG-M-L5090 Timestamp: 2026-03-10 19:11:00 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
350 lines
10 KiB
Python
350 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""Analyze website/URL fields for all Bardach contacts in Microsoft 365."""
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from urllib.parse import urlparse
|
|
from collections import defaultdict
|
|
|
|
TENANT_ID = "dd4a82e8-85a3-44ac-8800-07945ab4d95f"
|
|
CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
|
|
CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
|
|
SCOPE = "https://graph.microsoft.com/.default"
|
|
USER = "barbara@bardach.net"
|
|
TOKEN_URL = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
|
|
|
|
JUNK_PATTERNS = [
|
|
"ms-outlook://",
|
|
"linkedin.com/in/",
|
|
"linkedin.com/company/",
|
|
"outlook.live.com",
|
|
"profile.live.com",
|
|
"people.live.com",
|
|
"social.microsoft.com",
|
|
"contact.skype.com",
|
|
"d.docs.live.net",
|
|
"storage.live.com",
|
|
"onedrive.live.com",
|
|
"1drv.ms",
|
|
"facebook.com",
|
|
"twitter.com",
|
|
"x.com",
|
|
"plus.google.com",
|
|
"instagram.com",
|
|
"myspace.com",
|
|
"flickr.com",
|
|
"foursquare.com",
|
|
"about.me",
|
|
"gravatar.com",
|
|
"apis.live.net",
|
|
"cid-",
|
|
"skype:",
|
|
]
|
|
|
|
# Social media domains that M365 auto-links
|
|
SOCIAL_DOMAINS = {
|
|
"linkedin.com", "www.linkedin.com",
|
|
"facebook.com", "www.facebook.com", "m.facebook.com",
|
|
"twitter.com", "www.twitter.com",
|
|
"x.com", "www.x.com",
|
|
"instagram.com", "www.instagram.com",
|
|
"plus.google.com",
|
|
"myspace.com", "www.myspace.com",
|
|
"flickr.com", "www.flickr.com",
|
|
"foursquare.com", "www.foursquare.com",
|
|
"about.me",
|
|
"gravatar.com", "www.gravatar.com",
|
|
}
|
|
|
|
# Microsoft internal domains
|
|
MS_DOMAINS = {
|
|
"outlook.live.com", "profile.live.com", "people.live.com",
|
|
"social.microsoft.com", "contact.skype.com",
|
|
"d.docs.live.net", "storage.live.com", "onedrive.live.com",
|
|
"1drv.ms", "apis.live.net",
|
|
}
|
|
|
|
|
|
def get_token():
|
|
result = subprocess.run(
|
|
["curl", "-s", "-X", "POST", TOKEN_URL,
|
|
"-H", "Content-Type: application/x-www-form-urlencoded",
|
|
"-d", f"client_id={CLIENT_ID}&scope={SCOPE}&client_secret={CLIENT_SECRET}&grant_type=client_credentials"],
|
|
capture_output=True, text=True
|
|
)
|
|
data = json.loads(result.stdout)
|
|
if "access_token" not in data:
|
|
print(f"[ERROR] Token acquisition failed: {data}")
|
|
sys.exit(1)
|
|
return data["access_token"]
|
|
|
|
|
|
def fetch_all_contacts(token):
|
|
"""Fetch all contacts with pagination."""
|
|
contacts = []
|
|
select = "id,displayName,businessHomePage,emailAddresses,personalNotes,companyName"
|
|
url = f"https://graph.microsoft.com/v1.0/users/{USER}/contacts?$select={select}&$top=100"
|
|
call_count = 0
|
|
|
|
while url:
|
|
call_count += 1
|
|
# Re-acquire token every 500 calls
|
|
if call_count > 1 and (call_count - 1) % 500 == 0:
|
|
print(f"[INFO] Re-acquiring token at call {call_count}...")
|
|
token = get_token()
|
|
print("[OK] Token re-acquired")
|
|
|
|
result = subprocess.run(
|
|
["curl", "-s", "-X", "GET", url,
|
|
"-H", f"Authorization: Bearer {token}",
|
|
"-H", "Content-Type: application/json"],
|
|
capture_output=True, text=True
|
|
)
|
|
data = json.loads(result.stdout)
|
|
|
|
if "value" not in data:
|
|
print(f"[ERROR] Unexpected response: {json.dumps(data)[:300]}")
|
|
break
|
|
|
|
batch = data["value"]
|
|
contacts.extend(batch)
|
|
|
|
if len(contacts) % 500 == 0 or len(contacts) % 100 < len(batch):
|
|
if len(contacts) % 500 < 100:
|
|
print(f"[INFO] Fetched {len(contacts)} contacts so far...")
|
|
|
|
url = data.get("@odata.nextLink")
|
|
if url:
|
|
time.sleep(0.05)
|
|
|
|
return contacts, token
|
|
|
|
|
|
def categorize_url(url_str):
|
|
"""Categorize a URL as junk, legitimate, or suspicious."""
|
|
if not url_str or not url_str.strip():
|
|
return "suspicious", "empty"
|
|
|
|
url_lower = url_str.lower().strip()
|
|
|
|
# Check for obviously malformed
|
|
if len(url_lower) < 4:
|
|
return "suspicious", "too_short"
|
|
|
|
# Check junk patterns
|
|
for pattern in JUNK_PATTERNS:
|
|
if pattern in url_lower:
|
|
return "junk", pattern
|
|
|
|
# Try parsing
|
|
try:
|
|
# Add scheme if missing
|
|
parse_url = url_lower
|
|
if not parse_url.startswith("http"):
|
|
parse_url = "https://" + parse_url
|
|
parsed = urlparse(parse_url)
|
|
domain = parsed.netloc.lower()
|
|
|
|
# Check social/MS domains
|
|
if domain in SOCIAL_DOMAINS:
|
|
return "junk", domain
|
|
if domain in MS_DOMAINS:
|
|
return "junk", domain
|
|
|
|
# Check for no real domain
|
|
if not domain or "." not in domain:
|
|
return "suspicious", "no_valid_domain"
|
|
|
|
return "legitimate", domain
|
|
|
|
except Exception:
|
|
return "suspicious", "parse_error"
|
|
|
|
|
|
def extract_domain(url_str):
|
|
"""Extract domain from URL."""
|
|
if not url_str:
|
|
return "unknown"
|
|
url_lower = url_str.lower().strip()
|
|
if not url_lower.startswith("http"):
|
|
url_lower = "https://" + url_lower
|
|
try:
|
|
parsed = urlparse(url_lower)
|
|
return parsed.netloc or "unknown"
|
|
except Exception:
|
|
return "unknown"
|
|
|
|
|
|
def main():
|
|
print("=" * 70)
|
|
print("BARDACH CONTACTS - WEBSITE/URL FIELD ANALYSIS")
|
|
print("=" * 70)
|
|
print()
|
|
|
|
token = get_token()
|
|
print("[OK] Token acquired")
|
|
print("[INFO] Fetching all contacts...")
|
|
|
|
contacts, token = fetch_all_contacts(token)
|
|
total = len(contacts)
|
|
print(f"[OK] Fetched {total} total contacts")
|
|
print()
|
|
|
|
# Analyze businessHomePage
|
|
contacts_with_url = []
|
|
contacts_without_url = []
|
|
|
|
for c in contacts:
|
|
bhp = c.get("businessHomePage")
|
|
if bhp and bhp.strip():
|
|
contacts_with_url.append(c)
|
|
else:
|
|
contacts_without_url.append(c)
|
|
|
|
print(f"[INFO] Contacts with businessHomePage: {len(contacts_with_url)}")
|
|
print(f"[INFO] Contacts without businessHomePage: {len(contacts_without_url)}")
|
|
print()
|
|
|
|
# Categorize
|
|
junk_contacts = []
|
|
legitimate_contacts = []
|
|
suspicious_contacts = []
|
|
linkedin_profiles = []
|
|
facebook_profiles = []
|
|
|
|
domain_counts = defaultdict(int)
|
|
junk_by_pattern = defaultdict(list)
|
|
|
|
for c in contacts_with_url:
|
|
url = c.get("businessHomePage", "").strip()
|
|
category, detail = categorize_url(url)
|
|
domain = extract_domain(url)
|
|
domain_counts[domain] += 1
|
|
|
|
entry = {
|
|
"id": c["id"],
|
|
"displayName": c.get("displayName", ""),
|
|
"url": url,
|
|
"companyName": c.get("companyName", ""),
|
|
"category": category,
|
|
"detail": detail,
|
|
"domain": domain,
|
|
}
|
|
|
|
if category == "junk":
|
|
junk_contacts.append(entry)
|
|
junk_by_pattern[detail].append(entry)
|
|
|
|
# Cross-reference LinkedIn
|
|
url_lower = url.lower()
|
|
if "linkedin.com" in url_lower:
|
|
linkedin_profiles.append(entry)
|
|
elif "facebook.com" in url_lower:
|
|
facebook_profiles.append(entry)
|
|
|
|
elif category == "legitimate":
|
|
legitimate_contacts.append(entry)
|
|
else:
|
|
suspicious_contacts.append(entry)
|
|
|
|
# Print report
|
|
print("=" * 70)
|
|
print("RESULTS SUMMARY")
|
|
print("=" * 70)
|
|
print(f"Total contacts: {total}")
|
|
print(f"Contacts with businessHomePage: {len(contacts_with_url)}")
|
|
print(f" - Junk (auto-inserted): {len(junk_contacts)}")
|
|
print(f" - Legitimate websites: {len(legitimate_contacts)}")
|
|
print(f" - Suspicious/broken: {len(suspicious_contacts)}")
|
|
print()
|
|
|
|
# Junk URLs grouped by pattern
|
|
print("=" * 70)
|
|
print("JUNK URLs BY PATTERN")
|
|
print("=" * 70)
|
|
for pattern, entries in sorted(junk_by_pattern.items(), key=lambda x: -len(x[1])):
|
|
print(f"\n Pattern: {pattern} ({len(entries)} contacts)")
|
|
for e in entries[:5]:
|
|
print(f" - {e['displayName']}: {e['url']}")
|
|
if len(entries) > 5:
|
|
print(f" ... and {len(entries) - 5} more")
|
|
|
|
# Suspicious URLs
|
|
print()
|
|
print("=" * 70)
|
|
print("SUSPICIOUS/BROKEN URLs")
|
|
print("=" * 70)
|
|
if suspicious_contacts:
|
|
for e in suspicious_contacts:
|
|
print(f" - {e['displayName']}: \"{e['url']}\" (reason: {e['detail']})")
|
|
else:
|
|
print(" None found")
|
|
|
|
# Legitimate URLs (first 30)
|
|
print()
|
|
print("=" * 70)
|
|
print("LEGITIMATE URLs (first 30)")
|
|
print("=" * 70)
|
|
for e in legitimate_contacts[:30]:
|
|
company = f" [{e['companyName']}]" if e['companyName'] else ""
|
|
print(f" - {e['displayName']}{company}: {e['url']}")
|
|
if len(legitimate_contacts) > 30:
|
|
print(f" ... and {len(legitimate_contacts) - 30} more")
|
|
|
|
# Domain distribution
|
|
print()
|
|
print("=" * 70)
|
|
print("DOMAIN DISTRIBUTION")
|
|
print("=" * 70)
|
|
for domain, count in sorted(domain_counts.items(), key=lambda x: -x[1]):
|
|
print(f" {domain}: {count}")
|
|
|
|
# LinkedIn cross-reference
|
|
print()
|
|
print("=" * 70)
|
|
print(f"LINKEDIN PROFILES ({len(linkedin_profiles)})")
|
|
print("=" * 70)
|
|
for e in linkedin_profiles:
|
|
print(f" - {e['displayName']}: {e['url']}")
|
|
|
|
# Facebook cross-reference
|
|
print()
|
|
print("=" * 70)
|
|
print(f"FACEBOOK PROFILES ({len(facebook_profiles)})")
|
|
print("=" * 70)
|
|
for e in facebook_profiles:
|
|
print(f" - {e['displayName']}: {e['url']}")
|
|
|
|
# Save results
|
|
results = {
|
|
"summary": {
|
|
"total_contacts": total,
|
|
"contacts_with_url": len(contacts_with_url),
|
|
"contacts_without_url": len(contacts_without_url),
|
|
"junk_count": len(junk_contacts),
|
|
"legitimate_count": len(legitimate_contacts),
|
|
"suspicious_count": len(suspicious_contacts),
|
|
"linkedin_count": len(linkedin_profiles),
|
|
"facebook_count": len(facebook_profiles),
|
|
},
|
|
"junk_contacts": junk_contacts,
|
|
"legitimate_contacts": legitimate_contacts,
|
|
"suspicious_contacts": suspicious_contacts,
|
|
"linkedin_profiles": linkedin_profiles,
|
|
"facebook_profiles": facebook_profiles,
|
|
"domain_distribution": dict(sorted(domain_counts.items(), key=lambda x: -x[1])),
|
|
"junk_by_pattern": {k: [{"displayName": e["displayName"], "url": e["url"]} for e in v]
|
|
for k, v in sorted(junk_by_pattern.items(), key=lambda x: -len(x[1]))},
|
|
}
|
|
|
|
out_path = "D:/ClaudeTools/temp/bardach_url_analysis.json"
|
|
with open(out_path, "w", encoding="utf-8") as f:
|
|
json.dump(results, f, indent=2, ensure_ascii=False)
|
|
print(f"\n[OK] Results saved to {out_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|