Synced files: - Quote wizard frontend (all components, hooks, types, config) - API updates (config, models, routers, schemas, services) - Client work (bg-builders, gurushow) - Scripts (BGB Lesley termination, CIPP, Datto, migration) - Temp files (Bardach contacts, VWP investigation, misc) - Credentials and session logs - Email service, PHP API, session logs Machine: ACG-M-L5090 Timestamp: 2026-03-10 19:11:00 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
520 lines
20 KiB
Python
520 lines
20 KiB
Python
"""
|
|
Valley Wide Plastering - Resolve victim email addresses from display names.
|
|
|
|
Strategy:
|
|
1. Load victim names from vwp_victim_emails.json
|
|
2. Pull ALL contacts from JR's mailbox via Graph API
|
|
3. Search JR's sent items for Box.com invitation emails
|
|
4. Search JR's inbox for emails from box.com containing "invited"
|
|
5. Match victim names against contacts + email extractions
|
|
6. Output resolved and unresolved lists
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
import time
|
|
import requests
|
|
from collections import defaultdict
|
|
|
|
# --- Configuration ---
|
|
TENANT_ID = "5c53ae9f-7071-4248-b834-8685b646450f"
|
|
APP_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
|
|
APP_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
|
|
JR_USER_ID = "0af923d0-48c5-4cc1-8553-c60625802815"
|
|
|
|
INPUT_FILE = r"D:\ClaudeTools\temp\vwp_victim_emails.json"
|
|
OUTPUT_FILE = r"D:\ClaudeTools\temp\vwp_resolved_victims.json"
|
|
|
|
GRAPH_BASE = "https://graph.microsoft.com/v1.0"
|
|
|
|
|
|
def get_token():
|
|
url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
|
|
data = {
|
|
"client_id": APP_ID,
|
|
"client_secret": APP_SECRET,
|
|
"scope": "https://graph.microsoft.com/.default",
|
|
"grant_type": "client_credentials",
|
|
}
|
|
r = requests.post(url, data=data)
|
|
r.raise_for_status()
|
|
return r.json()["access_token"]
|
|
|
|
|
|
def graph_get_all(token, url, params=None):
|
|
"""Page through all results from a Graph API endpoint."""
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
results = []
|
|
next_url = url
|
|
while next_url:
|
|
r = requests.get(next_url, headers=headers, params=params)
|
|
if r.status_code == 429:
|
|
retry = int(r.headers.get("Retry-After", 5))
|
|
print(f" [THROTTLED] Waiting {retry}s...")
|
|
time.sleep(retry)
|
|
continue
|
|
r.raise_for_status()
|
|
data = r.json()
|
|
results.extend(data.get("value", []))
|
|
next_url = data.get("@odata.nextLink")
|
|
params = None # nextLink already has params
|
|
return results
|
|
|
|
|
|
def normalize(name):
|
|
"""Normalize a name for comparison."""
|
|
if not name:
|
|
return ""
|
|
# Remove parenthetical suffixes like (Contractor)
|
|
name = re.sub(r'\s*\(.*?\)\s*', ' ', name)
|
|
# Remove numbers
|
|
name = re.sub(r'\d+', '', name)
|
|
# Lowercase, strip extra whitespace
|
|
return ' '.join(name.lower().split())
|
|
|
|
|
|
def name_variants(name):
|
|
"""Generate matching variants for a name."""
|
|
n = normalize(name)
|
|
variants = {n}
|
|
parts = n.split()
|
|
if len(parts) >= 2:
|
|
# "Last, First" -> "first last"
|
|
if ',' in name:
|
|
cleaned = name.replace(',', ' ')
|
|
parts2 = cleaned.lower().split()
|
|
if len(parts2) >= 2:
|
|
variants.add(f"{parts2[1]} {parts2[0]}")
|
|
variants.add(f"{parts2[0]} {parts2[1]}")
|
|
# first last
|
|
variants.add(f"{parts[0]} {parts[-1]}")
|
|
# last first
|
|
variants.add(f"{parts[-1]} {parts[0]}")
|
|
return variants
|
|
|
|
|
|
def extract_emails_from_text(text):
|
|
"""Extract email addresses from text."""
|
|
if not text:
|
|
return []
|
|
pattern = r'[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}'
|
|
return list(set(re.findall(pattern, text)))
|
|
|
|
|
|
def main():
|
|
# Load victim data
|
|
with open(INPUT_FILE, 'r') as f:
|
|
victim_data = json.load(f)
|
|
|
|
name_only_victims = victim_data["victims_identified_by_name_only"]
|
|
already_resolved = victim_data["confirmed_victim_emails_from_box_acceptance"]
|
|
print(f"[INFO] {len(name_only_victims)} victims to resolve by name")
|
|
print(f"[INFO] {len(already_resolved)} already resolved")
|
|
|
|
# Get token
|
|
print("[INFO] Authenticating...")
|
|
token = get_token()
|
|
print("[OK] Token acquired")
|
|
|
|
# --- Strategy 1: Pull JR's contacts ---
|
|
print("\n[INFO] Pulling JR's contacts...")
|
|
contacts = []
|
|
try:
|
|
contacts_url = f"{GRAPH_BASE}/users/{JR_USER_ID}/contacts"
|
|
contacts = graph_get_all(token, contacts_url, {"$top": "999", "$select": "displayName,emailAddresses,givenName,surname"})
|
|
print(f"[OK] Got {len(contacts)} contacts")
|
|
except Exception as e:
|
|
print(f"[WARNING] Contacts API failed (likely missing Contacts.Read permission): {e}")
|
|
print("[INFO] Will rely on mail search and GAL lookup instead")
|
|
|
|
# Build contact lookup: normalized name -> list of emails
|
|
contact_map = defaultdict(set)
|
|
for c in contacts:
|
|
dn = c.get("displayName", "")
|
|
gn = c.get("givenName", "")
|
|
sn = c.get("surname", "")
|
|
emails = [e.get("address", "") for e in c.get("emailAddresses", []) if e.get("address")]
|
|
if not emails:
|
|
continue
|
|
# Index by displayName variants
|
|
for v in name_variants(dn):
|
|
for em in emails:
|
|
contact_map[v].add(em.lower())
|
|
# Also index by givenName + surname
|
|
if gn and sn:
|
|
full = f"{gn} {sn}".lower().strip()
|
|
for em in emails:
|
|
contact_map[full].add(em.lower())
|
|
|
|
# --- Strategy 2: Search JR's sent items for Box invitation emails ---
|
|
print("\n[INFO] Searching JR's sent items for Box.com invitations...")
|
|
sent_emails = []
|
|
for search_q in ["box.com invitation", "box.com invited", "has been invited to"]:
|
|
url = f"{GRAPH_BASE}/users/{JR_USER_ID}/mailFolders/sentitems/messages"
|
|
params = {
|
|
"$search": f'"{search_q}"',
|
|
"$top": "200",
|
|
"$select": "subject,body,toRecipients,ccRecipients,bccRecipients,sentDateTime",
|
|
}
|
|
try:
|
|
results = graph_get_all(token, url, params)
|
|
sent_emails.extend(results)
|
|
print(f" Found {len(results)} sent messages matching '{search_q}'")
|
|
except Exception as e:
|
|
print(f" [WARNING] Search for '{search_q}' failed: {e}")
|
|
|
|
# Deduplicate by message id
|
|
seen_ids = set()
|
|
unique_sent = []
|
|
for m in sent_emails:
|
|
mid = m.get("id", "")
|
|
if mid not in seen_ids:
|
|
seen_ids.add(mid)
|
|
unique_sent.append(m)
|
|
print(f"[OK] {len(unique_sent)} unique sent messages found")
|
|
|
|
# Extract name->email mappings from sent items
|
|
sent_map = defaultdict(set)
|
|
for m in unique_sent:
|
|
# Get all recipients
|
|
for field in ["toRecipients", "ccRecipients", "bccRecipients"]:
|
|
for recip in m.get(field, []) or []:
|
|
ea = recip.get("emailAddress", {})
|
|
name = ea.get("name", "")
|
|
addr = ea.get("address", "")
|
|
if name and addr:
|
|
for v in name_variants(name):
|
|
sent_map[v].add(addr.lower())
|
|
# Also extract emails from body
|
|
body_content = m.get("body", {}).get("content", "")
|
|
body_emails = extract_emails_from_text(body_content)
|
|
# Try to associate body emails with subject names
|
|
subject = m.get("subject", "")
|
|
for em in body_emails:
|
|
if "box.com" not in em and "noreply" not in em and "valleywide" not in em.lower():
|
|
# Store under a generic key - we'll try to match later
|
|
sent_map["__body_emails__"].add(em.lower())
|
|
|
|
# --- Strategy 3: Search JR's inbox for emails FROM box.com ---
|
|
print("\n[INFO] Searching JR's inbox for Box.com notification emails...")
|
|
inbox_emails = []
|
|
for search_q in ["from:box.com invited", "from:box.com invitation", "from:noreply@box.com"]:
|
|
url = f"{GRAPH_BASE}/users/{JR_USER_ID}/messages"
|
|
params = {
|
|
"$search": f'"{search_q}"',
|
|
"$top": "200",
|
|
"$select": "subject,body,from,toRecipients,ccRecipients,sentDateTime",
|
|
}
|
|
try:
|
|
results = graph_get_all(token, url, params)
|
|
inbox_emails.extend(results)
|
|
print(f" Found {len(results)} inbox messages matching '{search_q}'")
|
|
except Exception as e:
|
|
print(f" [WARNING] Search for '{search_q}' failed: {e}")
|
|
|
|
# Deduplicate
|
|
seen_ids2 = set()
|
|
unique_inbox = []
|
|
for m in inbox_emails:
|
|
mid = m.get("id", "")
|
|
if mid not in seen_ids2:
|
|
seen_ids2.add(mid)
|
|
unique_inbox.append(m)
|
|
print(f"[OK] {len(unique_inbox)} unique inbox messages found")
|
|
|
|
# Extract from inbox - look for victim names and emails in body/subject
|
|
inbox_map = defaultdict(set)
|
|
all_body_emails = set()
|
|
for m in unique_inbox:
|
|
body_content = m.get("body", {}).get("content", "")
|
|
subject = m.get("subject", "")
|
|
|
|
# Extract all emails from body
|
|
body_emails = extract_emails_from_text(body_content)
|
|
for em in body_emails:
|
|
em_lower = em.lower()
|
|
if "box.com" not in em_lower and "noreply" not in em_lower and "valleywide" not in em_lower:
|
|
all_body_emails.add(em_lower)
|
|
|
|
# Check recipients
|
|
for field in ["toRecipients", "ccRecipients"]:
|
|
for recip in m.get(field, []) or []:
|
|
ea = recip.get("emailAddress", {})
|
|
name = ea.get("name", "")
|
|
addr = ea.get("address", "")
|
|
if name and addr:
|
|
for v in name_variants(name):
|
|
inbox_map[v].add(addr.lower())
|
|
|
|
# Try to extract name-email pairs from body HTML
|
|
for em in body_emails:
|
|
em_lower = em.lower()
|
|
if "box.com" in em_lower or "noreply" in em_lower:
|
|
continue
|
|
# Use local part as potential name hint
|
|
local_part = em.split('@')[0]
|
|
local_clean = re.sub(r'[._\-\d]+', ' ', local_part).strip().lower()
|
|
if len(local_clean) > 2:
|
|
inbox_map[local_clean].add(em_lower)
|
|
|
|
print(f"[INFO] Extracted {len(all_body_emails)} unique non-Box emails from inbox bodies")
|
|
|
|
# --- Strategy 4: Search for Box collaboration/sharing emails specifically ---
|
|
print("\n[INFO] Searching for Box collaboration emails...")
|
|
collab_emails = []
|
|
for search_q in ["box.com collaborate", "shared a file with you", "shared a folder with you"]:
|
|
url = f"{GRAPH_BASE}/users/{JR_USER_ID}/messages"
|
|
params = {
|
|
"$search": f'"{search_q}"',
|
|
"$top": "200",
|
|
"$select": "subject,body,from,toRecipients,ccRecipients,sentDateTime",
|
|
}
|
|
try:
|
|
results = graph_get_all(token, url, params)
|
|
collab_emails.extend(results)
|
|
print(f" Found {len(results)} messages matching '{search_q}'")
|
|
except Exception as e:
|
|
print(f" [WARNING] Search for '{search_q}' failed: {e}")
|
|
|
|
# Process collaboration emails
|
|
for m in collab_emails:
|
|
body_content = m.get("body", {}).get("content", "")
|
|
body_emails = extract_emails_from_text(body_content)
|
|
for em in body_emails:
|
|
em_lower = em.lower()
|
|
if "box.com" not in em_lower and "noreply" not in em_lower and "valleywide" not in em_lower:
|
|
all_body_emails.add(em_lower)
|
|
|
|
# --- Strategy 5: Search tenant directory (GAL) for victim names ---
|
|
print("\n[INFO] Searching tenant directory (GAL) for victim names...")
|
|
gal_map = defaultdict(set)
|
|
# Pull all users from the directory
|
|
try:
|
|
users_url = f"{GRAPH_BASE}/users"
|
|
all_users = graph_get_all(token, users_url, {"$top": "999", "$select": "displayName,mail,userPrincipalName,givenName,surname"})
|
|
print(f"[OK] Got {len(all_users)} directory users")
|
|
for u in all_users:
|
|
dn = u.get("displayName", "")
|
|
mail = u.get("mail", "") or u.get("userPrincipalName", "")
|
|
gn = u.get("givenName", "")
|
|
sn = u.get("surname", "")
|
|
if not mail:
|
|
continue
|
|
for v in name_variants(dn):
|
|
gal_map[v].add(mail.lower())
|
|
if gn and sn:
|
|
full = f"{gn} {sn}".lower().strip()
|
|
gal_map[full].add(mail.lower())
|
|
except Exception as e:
|
|
print(f"[WARNING] Directory users lookup failed: {e}")
|
|
|
|
# --- Strategy 6: Try People API for broader name resolution ---
|
|
print("\n[INFO] Searching People API for victim names...")
|
|
people_map = defaultdict(set)
|
|
# Only search for names that are specific enough (2+ words, not generic)
|
|
specific_names = [n for n in name_only_victims if len(n.split()) >= 2 and len(n) > 5]
|
|
searched = 0
|
|
people_api_works = True
|
|
for victim_name in specific_names:
|
|
if not people_api_works:
|
|
break
|
|
url = f"{GRAPH_BASE}/users/{JR_USER_ID}/people"
|
|
params = {
|
|
"$search": f'"{victim_name}"',
|
|
"$top": "5",
|
|
"$select": "displayName,scoredEmailAddresses,givenName,surname",
|
|
}
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
try:
|
|
r = requests.get(url, headers=headers, params=params)
|
|
if r.status_code == 403:
|
|
print(f" [WARNING] People API returned 403 - skipping")
|
|
people_api_works = False
|
|
break
|
|
if r.status_code == 429:
|
|
retry = int(r.headers.get("Retry-After", 5))
|
|
print(f" [THROTTLED] Waiting {retry}s...")
|
|
time.sleep(retry)
|
|
r = requests.get(url, headers=headers, params=params)
|
|
if r.status_code == 200:
|
|
people = r.json().get("value", [])
|
|
for p in people:
|
|
pname = p.get("displayName", "")
|
|
pemails = [e.get("address", "") for e in p.get("scoredEmailAddresses", []) if e.get("address")]
|
|
if pemails:
|
|
for v in name_variants(pname):
|
|
for em in pemails:
|
|
people_map[v].add(em.lower())
|
|
searched += 1
|
|
if searched % 50 == 0:
|
|
print(f" Searched {searched}/{len(specific_names)} names...")
|
|
except Exception as e:
|
|
pass # Silently continue on individual failures
|
|
|
|
print(f"[OK] People API searched for {searched} names, found {len(people_map)} name entries")
|
|
|
|
# --- Strategy 7: Search JR's mail for each unresolved name directly ---
|
|
# This catches cases where someone emailed JR and their display name matches
|
|
print("\n[INFO] Searching JR's mailbox for unresolved victim names...")
|
|
mail_search_map = defaultdict(set)
|
|
mail_searched = 0
|
|
for victim_name in name_only_victims:
|
|
# Skip single-word or very short names - too many false positives
|
|
if len(victim_name.split()) < 2 or len(victim_name) < 5:
|
|
continue
|
|
url = f"{GRAPH_BASE}/users/{JR_USER_ID}/messages"
|
|
params = {
|
|
"$search": f'"from:{victim_name}"',
|
|
"$top": "5",
|
|
"$select": "from,subject",
|
|
}
|
|
headers_req = {"Authorization": f"Bearer {token}"}
|
|
try:
|
|
r = requests.get(url, headers=headers_req, params=params)
|
|
if r.status_code == 429:
|
|
retry = int(r.headers.get("Retry-After", 5))
|
|
time.sleep(retry)
|
|
r = requests.get(url, headers=headers_req, params=params)
|
|
if r.status_code == 200:
|
|
msgs = r.json().get("value", [])
|
|
for msg in msgs:
|
|
fr = msg.get("from", {}).get("emailAddress", {})
|
|
fname = fr.get("name", "")
|
|
faddr = fr.get("address", "")
|
|
if fname and faddr:
|
|
# Check if the from name actually matches the victim
|
|
fname_norm = normalize(fname)
|
|
victim_norm = normalize(victim_name)
|
|
# Require strong match
|
|
if fname_norm == victim_norm or set(fname_norm.split()) == set(victim_norm.split()):
|
|
mail_search_map[victim_norm].add(faddr.lower())
|
|
mail_searched += 1
|
|
if mail_searched % 50 == 0:
|
|
print(f" Searched {mail_searched} names...")
|
|
except Exception as e:
|
|
pass
|
|
|
|
print(f"[OK] Mail search completed for {mail_searched} names, found {len(mail_search_map)} matches")
|
|
|
|
# --- Now resolve victims ---
|
|
print("\n[INFO] Resolving victim names to email addresses...")
|
|
resolved = {}
|
|
unresolved = []
|
|
resolution_source = {}
|
|
|
|
for victim_name in name_only_victims:
|
|
found_emails = set()
|
|
source = []
|
|
|
|
victim_variants = name_variants(victim_name)
|
|
|
|
# Check contacts
|
|
for v in victim_variants:
|
|
if v in contact_map:
|
|
found_emails.update(contact_map[v])
|
|
source.append("contacts")
|
|
|
|
# Check sent items
|
|
for v in victim_variants:
|
|
if v in sent_map:
|
|
found_emails.update(sent_map[v])
|
|
source.append("sent_items")
|
|
|
|
# Check inbox
|
|
for v in victim_variants:
|
|
if v in inbox_map:
|
|
found_emails.update(inbox_map[v])
|
|
source.append("inbox")
|
|
|
|
# Check GAL/directory
|
|
for v in victim_variants:
|
|
if v in gal_map:
|
|
found_emails.update(gal_map[v])
|
|
source.append("directory")
|
|
|
|
# Check people API
|
|
for v in victim_variants:
|
|
if v in people_map:
|
|
found_emails.update(people_map[v])
|
|
source.append("people_api")
|
|
|
|
# Check direct mail search
|
|
vn = normalize(victim_name)
|
|
if vn in mail_search_map:
|
|
found_emails.update(mail_search_map[vn])
|
|
source.append("mail_from_search")
|
|
|
|
# Filter out obviously wrong emails
|
|
exclude_patterns = ['box.com', 'noreply', 'valleywideplastering', 'buildingconnected.com', 'team@', 'no-reply', 'donotreply']
|
|
found_emails = {e for e in found_emails if e and '@' in e and not any(p in e for p in exclude_patterns)}
|
|
|
|
if found_emails:
|
|
resolved[victim_name] = sorted(found_emails)
|
|
resolution_source[victim_name] = list(set(source))
|
|
else:
|
|
unresolved.append(victim_name)
|
|
|
|
# --- Build output ---
|
|
all_resolved_emails = set()
|
|
for emails in resolved.values():
|
|
all_resolved_emails.update(emails)
|
|
|
|
# Combine with already-known emails
|
|
all_victim_emails = set(e.lower() for e in already_resolved) | all_resolved_emails
|
|
|
|
output = {
|
|
"investigation": "Valley Wide Plastering BEC - Victim Email Resolution",
|
|
"run_date": time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
"summary": {
|
|
"previously_resolved": len(already_resolved),
|
|
"newly_resolved_by_name": len(resolved),
|
|
"still_unresolved": len(unresolved),
|
|
"total_unique_victim_emails": len(all_victim_emails),
|
|
"total_victims_identified": len(already_resolved) + len(resolved) + len(unresolved),
|
|
},
|
|
"all_victim_emails_combined": sorted(all_victim_emails),
|
|
"newly_resolved": {
|
|
name: {
|
|
"emails": emails,
|
|
"source": resolution_source.get(name, [])
|
|
}
|
|
for name, emails in sorted(resolved.items())
|
|
},
|
|
"previously_confirmed_emails": sorted(already_resolved, key=str.lower),
|
|
"unresolved_names": sorted(unresolved, key=lambda x: x.lower()),
|
|
"body_emails_found_but_unmatched": sorted(all_body_emails - all_victim_emails),
|
|
}
|
|
|
|
with open(OUTPUT_FILE, 'w') as f:
|
|
json.dump(output, f, indent=2)
|
|
|
|
# --- Print summary ---
|
|
print("\n" + "=" * 60)
|
|
print("RESOLUTION RESULTS")
|
|
print("=" * 60)
|
|
print(f"Previously resolved emails: {len(already_resolved)}")
|
|
print(f"Newly resolved by name: {len(resolved)}")
|
|
print(f"Still unresolved: {len(unresolved)}")
|
|
print(f"Total unique victim emails: {len(all_victim_emails)}")
|
|
print(f"Unmatched body emails found: {len(all_body_emails - all_victim_emails)}")
|
|
print()
|
|
|
|
if resolved:
|
|
print("--- Newly Resolved ---")
|
|
for name, emails in sorted(resolved.items()):
|
|
src = ", ".join(resolution_source.get(name, []))
|
|
print(f" {name}: {', '.join(emails)} [{src}]")
|
|
print()
|
|
|
|
if unresolved:
|
|
print(f"--- Unresolved ({len(unresolved)} names) ---")
|
|
for name in sorted(unresolved, key=lambda x: x.lower()):
|
|
print(f" {name}")
|
|
|
|
print(f"\n[OK] Results saved to {OUTPUT_FILE}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|