Files
claudetools/temp/vwp_resolve_victims.py
Mike Swanson fa15b03180 sync: Auto-sync from ACG-M-L5090 at 2026-03-10 19:11:00
Synced files:
- Quote wizard frontend (all components, hooks, types, config)
- API updates (config, models, routers, schemas, services)
- Client work (bg-builders, gurushow)
- Scripts (BGB Lesley termination, CIPP, Datto, migration)
- Temp files (Bardach contacts, VWP investigation, misc)
- Credentials and session logs
- Email service, PHP API, session logs

Machine: ACG-M-L5090
Timestamp: 2026-03-10 19:11:00

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 19:59:08 -07:00

520 lines
20 KiB
Python

"""
Valley Wide Plastering - Resolve victim email addresses from display names.
Strategy:
1. Load victim names from vwp_victim_emails.json
2. Pull ALL contacts from JR's mailbox via Graph API
3. Search JR's sent items for Box.com invitation emails
4. Search JR's inbox for emails from box.com containing "invited"
5. Match victim names against contacts + email extractions
6. Output resolved and unresolved lists
"""
import json
import re
import sys
import time
import requests
from collections import defaultdict
# --- Configuration ---
TENANT_ID = "5c53ae9f-7071-4248-b834-8685b646450f"
APP_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
APP_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
JR_USER_ID = "0af923d0-48c5-4cc1-8553-c60625802815"
INPUT_FILE = r"D:\ClaudeTools\temp\vwp_victim_emails.json"
OUTPUT_FILE = r"D:\ClaudeTools\temp\vwp_resolved_victims.json"
GRAPH_BASE = "https://graph.microsoft.com/v1.0"
def get_token():
url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
data = {
"client_id": APP_ID,
"client_secret": APP_SECRET,
"scope": "https://graph.microsoft.com/.default",
"grant_type": "client_credentials",
}
r = requests.post(url, data=data)
r.raise_for_status()
return r.json()["access_token"]
def graph_get_all(token, url, params=None):
"""Page through all results from a Graph API endpoint."""
headers = {"Authorization": f"Bearer {token}"}
results = []
next_url = url
while next_url:
r = requests.get(next_url, headers=headers, params=params)
if r.status_code == 429:
retry = int(r.headers.get("Retry-After", 5))
print(f" [THROTTLED] Waiting {retry}s...")
time.sleep(retry)
continue
r.raise_for_status()
data = r.json()
results.extend(data.get("value", []))
next_url = data.get("@odata.nextLink")
params = None # nextLink already has params
return results
def normalize(name):
"""Normalize a name for comparison."""
if not name:
return ""
# Remove parenthetical suffixes like (Contractor)
name = re.sub(r'\s*\(.*?\)\s*', ' ', name)
# Remove numbers
name = re.sub(r'\d+', '', name)
# Lowercase, strip extra whitespace
return ' '.join(name.lower().split())
def name_variants(name):
"""Generate matching variants for a name."""
n = normalize(name)
variants = {n}
parts = n.split()
if len(parts) >= 2:
# "Last, First" -> "first last"
if ',' in name:
cleaned = name.replace(',', ' ')
parts2 = cleaned.lower().split()
if len(parts2) >= 2:
variants.add(f"{parts2[1]} {parts2[0]}")
variants.add(f"{parts2[0]} {parts2[1]}")
# first last
variants.add(f"{parts[0]} {parts[-1]}")
# last first
variants.add(f"{parts[-1]} {parts[0]}")
return variants
def extract_emails_from_text(text):
"""Extract email addresses from text."""
if not text:
return []
pattern = r'[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}'
return list(set(re.findall(pattern, text)))
def main():
# Load victim data
with open(INPUT_FILE, 'r') as f:
victim_data = json.load(f)
name_only_victims = victim_data["victims_identified_by_name_only"]
already_resolved = victim_data["confirmed_victim_emails_from_box_acceptance"]
print(f"[INFO] {len(name_only_victims)} victims to resolve by name")
print(f"[INFO] {len(already_resolved)} already resolved")
# Get token
print("[INFO] Authenticating...")
token = get_token()
print("[OK] Token acquired")
# --- Strategy 1: Pull JR's contacts ---
print("\n[INFO] Pulling JR's contacts...")
contacts = []
try:
contacts_url = f"{GRAPH_BASE}/users/{JR_USER_ID}/contacts"
contacts = graph_get_all(token, contacts_url, {"$top": "999", "$select": "displayName,emailAddresses,givenName,surname"})
print(f"[OK] Got {len(contacts)} contacts")
except Exception as e:
print(f"[WARNING] Contacts API failed (likely missing Contacts.Read permission): {e}")
print("[INFO] Will rely on mail search and GAL lookup instead")
# Build contact lookup: normalized name -> list of emails
contact_map = defaultdict(set)
for c in contacts:
dn = c.get("displayName", "")
gn = c.get("givenName", "")
sn = c.get("surname", "")
emails = [e.get("address", "") for e in c.get("emailAddresses", []) if e.get("address")]
if not emails:
continue
# Index by displayName variants
for v in name_variants(dn):
for em in emails:
contact_map[v].add(em.lower())
# Also index by givenName + surname
if gn and sn:
full = f"{gn} {sn}".lower().strip()
for em in emails:
contact_map[full].add(em.lower())
# --- Strategy 2: Search JR's sent items for Box invitation emails ---
print("\n[INFO] Searching JR's sent items for Box.com invitations...")
sent_emails = []
for search_q in ["box.com invitation", "box.com invited", "has been invited to"]:
url = f"{GRAPH_BASE}/users/{JR_USER_ID}/mailFolders/sentitems/messages"
params = {
"$search": f'"{search_q}"',
"$top": "200",
"$select": "subject,body,toRecipients,ccRecipients,bccRecipients,sentDateTime",
}
try:
results = graph_get_all(token, url, params)
sent_emails.extend(results)
print(f" Found {len(results)} sent messages matching '{search_q}'")
except Exception as e:
print(f" [WARNING] Search for '{search_q}' failed: {e}")
# Deduplicate by message id
seen_ids = set()
unique_sent = []
for m in sent_emails:
mid = m.get("id", "")
if mid not in seen_ids:
seen_ids.add(mid)
unique_sent.append(m)
print(f"[OK] {len(unique_sent)} unique sent messages found")
# Extract name->email mappings from sent items
sent_map = defaultdict(set)
for m in unique_sent:
# Get all recipients
for field in ["toRecipients", "ccRecipients", "bccRecipients"]:
for recip in m.get(field, []) or []:
ea = recip.get("emailAddress", {})
name = ea.get("name", "")
addr = ea.get("address", "")
if name and addr:
for v in name_variants(name):
sent_map[v].add(addr.lower())
# Also extract emails from body
body_content = m.get("body", {}).get("content", "")
body_emails = extract_emails_from_text(body_content)
# Try to associate body emails with subject names
subject = m.get("subject", "")
for em in body_emails:
if "box.com" not in em and "noreply" not in em and "valleywide" not in em.lower():
# Store under a generic key - we'll try to match later
sent_map["__body_emails__"].add(em.lower())
# --- Strategy 3: Search JR's inbox for emails FROM box.com ---
print("\n[INFO] Searching JR's inbox for Box.com notification emails...")
inbox_emails = []
for search_q in ["from:box.com invited", "from:box.com invitation", "from:noreply@box.com"]:
url = f"{GRAPH_BASE}/users/{JR_USER_ID}/messages"
params = {
"$search": f'"{search_q}"',
"$top": "200",
"$select": "subject,body,from,toRecipients,ccRecipients,sentDateTime",
}
try:
results = graph_get_all(token, url, params)
inbox_emails.extend(results)
print(f" Found {len(results)} inbox messages matching '{search_q}'")
except Exception as e:
print(f" [WARNING] Search for '{search_q}' failed: {e}")
# Deduplicate
seen_ids2 = set()
unique_inbox = []
for m in inbox_emails:
mid = m.get("id", "")
if mid not in seen_ids2:
seen_ids2.add(mid)
unique_inbox.append(m)
print(f"[OK] {len(unique_inbox)} unique inbox messages found")
# Extract from inbox - look for victim names and emails in body/subject
inbox_map = defaultdict(set)
all_body_emails = set()
for m in unique_inbox:
body_content = m.get("body", {}).get("content", "")
subject = m.get("subject", "")
# Extract all emails from body
body_emails = extract_emails_from_text(body_content)
for em in body_emails:
em_lower = em.lower()
if "box.com" not in em_lower and "noreply" not in em_lower and "valleywide" not in em_lower:
all_body_emails.add(em_lower)
# Check recipients
for field in ["toRecipients", "ccRecipients"]:
for recip in m.get(field, []) or []:
ea = recip.get("emailAddress", {})
name = ea.get("name", "")
addr = ea.get("address", "")
if name and addr:
for v in name_variants(name):
inbox_map[v].add(addr.lower())
# Try to extract name-email pairs from body HTML
for em in body_emails:
em_lower = em.lower()
if "box.com" in em_lower or "noreply" in em_lower:
continue
# Use local part as potential name hint
local_part = em.split('@')[0]
local_clean = re.sub(r'[._\-\d]+', ' ', local_part).strip().lower()
if len(local_clean) > 2:
inbox_map[local_clean].add(em_lower)
print(f"[INFO] Extracted {len(all_body_emails)} unique non-Box emails from inbox bodies")
# --- Strategy 4: Search for Box collaboration/sharing emails specifically ---
print("\n[INFO] Searching for Box collaboration emails...")
collab_emails = []
for search_q in ["box.com collaborate", "shared a file with you", "shared a folder with you"]:
url = f"{GRAPH_BASE}/users/{JR_USER_ID}/messages"
params = {
"$search": f'"{search_q}"',
"$top": "200",
"$select": "subject,body,from,toRecipients,ccRecipients,sentDateTime",
}
try:
results = graph_get_all(token, url, params)
collab_emails.extend(results)
print(f" Found {len(results)} messages matching '{search_q}'")
except Exception as e:
print(f" [WARNING] Search for '{search_q}' failed: {e}")
# Process collaboration emails
for m in collab_emails:
body_content = m.get("body", {}).get("content", "")
body_emails = extract_emails_from_text(body_content)
for em in body_emails:
em_lower = em.lower()
if "box.com" not in em_lower and "noreply" not in em_lower and "valleywide" not in em_lower:
all_body_emails.add(em_lower)
# --- Strategy 5: Search tenant directory (GAL) for victim names ---
print("\n[INFO] Searching tenant directory (GAL) for victim names...")
gal_map = defaultdict(set)
# Pull all users from the directory
try:
users_url = f"{GRAPH_BASE}/users"
all_users = graph_get_all(token, users_url, {"$top": "999", "$select": "displayName,mail,userPrincipalName,givenName,surname"})
print(f"[OK] Got {len(all_users)} directory users")
for u in all_users:
dn = u.get("displayName", "")
mail = u.get("mail", "") or u.get("userPrincipalName", "")
gn = u.get("givenName", "")
sn = u.get("surname", "")
if not mail:
continue
for v in name_variants(dn):
gal_map[v].add(mail.lower())
if gn and sn:
full = f"{gn} {sn}".lower().strip()
gal_map[full].add(mail.lower())
except Exception as e:
print(f"[WARNING] Directory users lookup failed: {e}")
# --- Strategy 6: Try People API for broader name resolution ---
print("\n[INFO] Searching People API for victim names...")
people_map = defaultdict(set)
# Only search for names that are specific enough (2+ words, not generic)
specific_names = [n for n in name_only_victims if len(n.split()) >= 2 and len(n) > 5]
searched = 0
people_api_works = True
for victim_name in specific_names:
if not people_api_works:
break
url = f"{GRAPH_BASE}/users/{JR_USER_ID}/people"
params = {
"$search": f'"{victim_name}"',
"$top": "5",
"$select": "displayName,scoredEmailAddresses,givenName,surname",
}
headers = {"Authorization": f"Bearer {token}"}
try:
r = requests.get(url, headers=headers, params=params)
if r.status_code == 403:
print(f" [WARNING] People API returned 403 - skipping")
people_api_works = False
break
if r.status_code == 429:
retry = int(r.headers.get("Retry-After", 5))
print(f" [THROTTLED] Waiting {retry}s...")
time.sleep(retry)
r = requests.get(url, headers=headers, params=params)
if r.status_code == 200:
people = r.json().get("value", [])
for p in people:
pname = p.get("displayName", "")
pemails = [e.get("address", "") for e in p.get("scoredEmailAddresses", []) if e.get("address")]
if pemails:
for v in name_variants(pname):
for em in pemails:
people_map[v].add(em.lower())
searched += 1
if searched % 50 == 0:
print(f" Searched {searched}/{len(specific_names)} names...")
except Exception as e:
pass # Silently continue on individual failures
print(f"[OK] People API searched for {searched} names, found {len(people_map)} name entries")
# --- Strategy 7: Search JR's mail for each unresolved name directly ---
# This catches cases where someone emailed JR and their display name matches
print("\n[INFO] Searching JR's mailbox for unresolved victim names...")
mail_search_map = defaultdict(set)
mail_searched = 0
for victim_name in name_only_victims:
# Skip single-word or very short names - too many false positives
if len(victim_name.split()) < 2 or len(victim_name) < 5:
continue
url = f"{GRAPH_BASE}/users/{JR_USER_ID}/messages"
params = {
"$search": f'"from:{victim_name}"',
"$top": "5",
"$select": "from,subject",
}
headers_req = {"Authorization": f"Bearer {token}"}
try:
r = requests.get(url, headers=headers_req, params=params)
if r.status_code == 429:
retry = int(r.headers.get("Retry-After", 5))
time.sleep(retry)
r = requests.get(url, headers=headers_req, params=params)
if r.status_code == 200:
msgs = r.json().get("value", [])
for msg in msgs:
fr = msg.get("from", {}).get("emailAddress", {})
fname = fr.get("name", "")
faddr = fr.get("address", "")
if fname and faddr:
# Check if the from name actually matches the victim
fname_norm = normalize(fname)
victim_norm = normalize(victim_name)
# Require strong match
if fname_norm == victim_norm or set(fname_norm.split()) == set(victim_norm.split()):
mail_search_map[victim_norm].add(faddr.lower())
mail_searched += 1
if mail_searched % 50 == 0:
print(f" Searched {mail_searched} names...")
except Exception as e:
pass
print(f"[OK] Mail search completed for {mail_searched} names, found {len(mail_search_map)} matches")
# --- Now resolve victims ---
print("\n[INFO] Resolving victim names to email addresses...")
resolved = {}
unresolved = []
resolution_source = {}
for victim_name in name_only_victims:
found_emails = set()
source = []
victim_variants = name_variants(victim_name)
# Check contacts
for v in victim_variants:
if v in contact_map:
found_emails.update(contact_map[v])
source.append("contacts")
# Check sent items
for v in victim_variants:
if v in sent_map:
found_emails.update(sent_map[v])
source.append("sent_items")
# Check inbox
for v in victim_variants:
if v in inbox_map:
found_emails.update(inbox_map[v])
source.append("inbox")
# Check GAL/directory
for v in victim_variants:
if v in gal_map:
found_emails.update(gal_map[v])
source.append("directory")
# Check people API
for v in victim_variants:
if v in people_map:
found_emails.update(people_map[v])
source.append("people_api")
# Check direct mail search
vn = normalize(victim_name)
if vn in mail_search_map:
found_emails.update(mail_search_map[vn])
source.append("mail_from_search")
# Filter out obviously wrong emails
exclude_patterns = ['box.com', 'noreply', 'valleywideplastering', 'buildingconnected.com', 'team@', 'no-reply', 'donotreply']
found_emails = {e for e in found_emails if e and '@' in e and not any(p in e for p in exclude_patterns)}
if found_emails:
resolved[victim_name] = sorted(found_emails)
resolution_source[victim_name] = list(set(source))
else:
unresolved.append(victim_name)
# --- Build output ---
all_resolved_emails = set()
for emails in resolved.values():
all_resolved_emails.update(emails)
# Combine with already-known emails
all_victim_emails = set(e.lower() for e in already_resolved) | all_resolved_emails
output = {
"investigation": "Valley Wide Plastering BEC - Victim Email Resolution",
"run_date": time.strftime("%Y-%m-%d %H:%M:%S"),
"summary": {
"previously_resolved": len(already_resolved),
"newly_resolved_by_name": len(resolved),
"still_unresolved": len(unresolved),
"total_unique_victim_emails": len(all_victim_emails),
"total_victims_identified": len(already_resolved) + len(resolved) + len(unresolved),
},
"all_victim_emails_combined": sorted(all_victim_emails),
"newly_resolved": {
name: {
"emails": emails,
"source": resolution_source.get(name, [])
}
for name, emails in sorted(resolved.items())
},
"previously_confirmed_emails": sorted(already_resolved, key=str.lower),
"unresolved_names": sorted(unresolved, key=lambda x: x.lower()),
"body_emails_found_but_unmatched": sorted(all_body_emails - all_victim_emails),
}
with open(OUTPUT_FILE, 'w') as f:
json.dump(output, f, indent=2)
# --- Print summary ---
print("\n" + "=" * 60)
print("RESOLUTION RESULTS")
print("=" * 60)
print(f"Previously resolved emails: {len(already_resolved)}")
print(f"Newly resolved by name: {len(resolved)}")
print(f"Still unresolved: {len(unresolved)}")
print(f"Total unique victim emails: {len(all_victim_emails)}")
print(f"Unmatched body emails found: {len(all_body_emails - all_victim_emails)}")
print()
if resolved:
print("--- Newly Resolved ---")
for name, emails in sorted(resolved.items()):
src = ", ".join(resolution_source.get(name, []))
print(f" {name}: {', '.join(emails)} [{src}]")
print()
if unresolved:
print(f"--- Unresolved ({len(unresolved)} names) ---")
for name in sorted(unresolved, key=lambda x: x.lower()):
print(f" {name}")
print(f"\n[OK] Results saved to {OUTPUT_FILE}")
if __name__ == "__main__":
main()