""" Valley Wide Plastering - Resolve victim email addresses from display names. Strategy: 1. Load victim names from vwp_victim_emails.json 2. Pull ALL contacts from JR's mailbox via Graph API 3. Search JR's sent items for Box.com invitation emails 4. Search JR's inbox for emails from box.com containing "invited" 5. Match victim names against contacts + email extractions 6. Output resolved and unresolved lists """ import json import re import sys import time import requests from collections import defaultdict # --- Configuration --- TENANT_ID = "5c53ae9f-7071-4248-b834-8685b646450f" APP_ID = "fabb3421-8b34-484b-bc17-e46de9703418" APP_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO" JR_USER_ID = "0af923d0-48c5-4cc1-8553-c60625802815" INPUT_FILE = r"D:\ClaudeTools\temp\vwp_victim_emails.json" OUTPUT_FILE = r"D:\ClaudeTools\temp\vwp_resolved_victims.json" GRAPH_BASE = "https://graph.microsoft.com/v1.0" def get_token(): url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token" data = { "client_id": APP_ID, "client_secret": APP_SECRET, "scope": "https://graph.microsoft.com/.default", "grant_type": "client_credentials", } r = requests.post(url, data=data) r.raise_for_status() return r.json()["access_token"] def graph_get_all(token, url, params=None): """Page through all results from a Graph API endpoint.""" headers = {"Authorization": f"Bearer {token}"} results = [] next_url = url while next_url: r = requests.get(next_url, headers=headers, params=params) if r.status_code == 429: retry = int(r.headers.get("Retry-After", 5)) print(f" [THROTTLED] Waiting {retry}s...") time.sleep(retry) continue r.raise_for_status() data = r.json() results.extend(data.get("value", [])) next_url = data.get("@odata.nextLink") params = None # nextLink already has params return results def normalize(name): """Normalize a name for comparison.""" if not name: return "" # Remove parenthetical suffixes like (Contractor) name = re.sub(r'\s*\(.*?\)\s*', ' ', name) # Remove numbers name = re.sub(r'\d+', '', name) # Lowercase, strip extra whitespace return ' '.join(name.lower().split()) def name_variants(name): """Generate matching variants for a name.""" n = normalize(name) variants = {n} parts = n.split() if len(parts) >= 2: # "Last, First" -> "first last" if ',' in name: cleaned = name.replace(',', ' ') parts2 = cleaned.lower().split() if len(parts2) >= 2: variants.add(f"{parts2[1]} {parts2[0]}") variants.add(f"{parts2[0]} {parts2[1]}") # first last variants.add(f"{parts[0]} {parts[-1]}") # last first variants.add(f"{parts[-1]} {parts[0]}") return variants def extract_emails_from_text(text): """Extract email addresses from text.""" if not text: return [] pattern = r'[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}' return list(set(re.findall(pattern, text))) def main(): # Load victim data with open(INPUT_FILE, 'r') as f: victim_data = json.load(f) name_only_victims = victim_data["victims_identified_by_name_only"] already_resolved = victim_data["confirmed_victim_emails_from_box_acceptance"] print(f"[INFO] {len(name_only_victims)} victims to resolve by name") print(f"[INFO] {len(already_resolved)} already resolved") # Get token print("[INFO] Authenticating...") token = get_token() print("[OK] Token acquired") # --- Strategy 1: Pull JR's contacts --- print("\n[INFO] Pulling JR's contacts...") contacts = [] try: contacts_url = f"{GRAPH_BASE}/users/{JR_USER_ID}/contacts" contacts = graph_get_all(token, contacts_url, {"$top": "999", "$select": "displayName,emailAddresses,givenName,surname"}) print(f"[OK] Got {len(contacts)} contacts") except Exception as e: print(f"[WARNING] Contacts API failed (likely missing Contacts.Read permission): {e}") print("[INFO] Will rely on mail search and GAL lookup instead") # Build contact lookup: normalized name -> list of emails contact_map = defaultdict(set) for c in contacts: dn = c.get("displayName", "") gn = c.get("givenName", "") sn = c.get("surname", "") emails = [e.get("address", "") for e in c.get("emailAddresses", []) if e.get("address")] if not emails: continue # Index by displayName variants for v in name_variants(dn): for em in emails: contact_map[v].add(em.lower()) # Also index by givenName + surname if gn and sn: full = f"{gn} {sn}".lower().strip() for em in emails: contact_map[full].add(em.lower()) # --- Strategy 2: Search JR's sent items for Box invitation emails --- print("\n[INFO] Searching JR's sent items for Box.com invitations...") sent_emails = [] for search_q in ["box.com invitation", "box.com invited", "has been invited to"]: url = f"{GRAPH_BASE}/users/{JR_USER_ID}/mailFolders/sentitems/messages" params = { "$search": f'"{search_q}"', "$top": "200", "$select": "subject,body,toRecipients,ccRecipients,bccRecipients,sentDateTime", } try: results = graph_get_all(token, url, params) sent_emails.extend(results) print(f" Found {len(results)} sent messages matching '{search_q}'") except Exception as e: print(f" [WARNING] Search for '{search_q}' failed: {e}") # Deduplicate by message id seen_ids = set() unique_sent = [] for m in sent_emails: mid = m.get("id", "") if mid not in seen_ids: seen_ids.add(mid) unique_sent.append(m) print(f"[OK] {len(unique_sent)} unique sent messages found") # Extract name->email mappings from sent items sent_map = defaultdict(set) for m in unique_sent: # Get all recipients for field in ["toRecipients", "ccRecipients", "bccRecipients"]: for recip in m.get(field, []) or []: ea = recip.get("emailAddress", {}) name = ea.get("name", "") addr = ea.get("address", "") if name and addr: for v in name_variants(name): sent_map[v].add(addr.lower()) # Also extract emails from body body_content = m.get("body", {}).get("content", "") body_emails = extract_emails_from_text(body_content) # Try to associate body emails with subject names subject = m.get("subject", "") for em in body_emails: if "box.com" not in em and "noreply" not in em and "valleywide" not in em.lower(): # Store under a generic key - we'll try to match later sent_map["__body_emails__"].add(em.lower()) # --- Strategy 3: Search JR's inbox for emails FROM box.com --- print("\n[INFO] Searching JR's inbox for Box.com notification emails...") inbox_emails = [] for search_q in ["from:box.com invited", "from:box.com invitation", "from:noreply@box.com"]: url = f"{GRAPH_BASE}/users/{JR_USER_ID}/messages" params = { "$search": f'"{search_q}"', "$top": "200", "$select": "subject,body,from,toRecipients,ccRecipients,sentDateTime", } try: results = graph_get_all(token, url, params) inbox_emails.extend(results) print(f" Found {len(results)} inbox messages matching '{search_q}'") except Exception as e: print(f" [WARNING] Search for '{search_q}' failed: {e}") # Deduplicate seen_ids2 = set() unique_inbox = [] for m in inbox_emails: mid = m.get("id", "") if mid not in seen_ids2: seen_ids2.add(mid) unique_inbox.append(m) print(f"[OK] {len(unique_inbox)} unique inbox messages found") # Extract from inbox - look for victim names and emails in body/subject inbox_map = defaultdict(set) all_body_emails = set() for m in unique_inbox: body_content = m.get("body", {}).get("content", "") subject = m.get("subject", "") # Extract all emails from body body_emails = extract_emails_from_text(body_content) for em in body_emails: em_lower = em.lower() if "box.com" not in em_lower and "noreply" not in em_lower and "valleywide" not in em_lower: all_body_emails.add(em_lower) # Check recipients for field in ["toRecipients", "ccRecipients"]: for recip in m.get(field, []) or []: ea = recip.get("emailAddress", {}) name = ea.get("name", "") addr = ea.get("address", "") if name and addr: for v in name_variants(name): inbox_map[v].add(addr.lower()) # Try to extract name-email pairs from body HTML for em in body_emails: em_lower = em.lower() if "box.com" in em_lower or "noreply" in em_lower: continue # Use local part as potential name hint local_part = em.split('@')[0] local_clean = re.sub(r'[._\-\d]+', ' ', local_part).strip().lower() if len(local_clean) > 2: inbox_map[local_clean].add(em_lower) print(f"[INFO] Extracted {len(all_body_emails)} unique non-Box emails from inbox bodies") # --- Strategy 4: Search for Box collaboration/sharing emails specifically --- print("\n[INFO] Searching for Box collaboration emails...") collab_emails = [] for search_q in ["box.com collaborate", "shared a file with you", "shared a folder with you"]: url = f"{GRAPH_BASE}/users/{JR_USER_ID}/messages" params = { "$search": f'"{search_q}"', "$top": "200", "$select": "subject,body,from,toRecipients,ccRecipients,sentDateTime", } try: results = graph_get_all(token, url, params) collab_emails.extend(results) print(f" Found {len(results)} messages matching '{search_q}'") except Exception as e: print(f" [WARNING] Search for '{search_q}' failed: {e}") # Process collaboration emails for m in collab_emails: body_content = m.get("body", {}).get("content", "") body_emails = extract_emails_from_text(body_content) for em in body_emails: em_lower = em.lower() if "box.com" not in em_lower and "noreply" not in em_lower and "valleywide" not in em_lower: all_body_emails.add(em_lower) # --- Strategy 5: Search tenant directory (GAL) for victim names --- print("\n[INFO] Searching tenant directory (GAL) for victim names...") gal_map = defaultdict(set) # Pull all users from the directory try: users_url = f"{GRAPH_BASE}/users" all_users = graph_get_all(token, users_url, {"$top": "999", "$select": "displayName,mail,userPrincipalName,givenName,surname"}) print(f"[OK] Got {len(all_users)} directory users") for u in all_users: dn = u.get("displayName", "") mail = u.get("mail", "") or u.get("userPrincipalName", "") gn = u.get("givenName", "") sn = u.get("surname", "") if not mail: continue for v in name_variants(dn): gal_map[v].add(mail.lower()) if gn and sn: full = f"{gn} {sn}".lower().strip() gal_map[full].add(mail.lower()) except Exception as e: print(f"[WARNING] Directory users lookup failed: {e}") # --- Strategy 6: Try People API for broader name resolution --- print("\n[INFO] Searching People API for victim names...") people_map = defaultdict(set) # Only search for names that are specific enough (2+ words, not generic) specific_names = [n for n in name_only_victims if len(n.split()) >= 2 and len(n) > 5] searched = 0 people_api_works = True for victim_name in specific_names: if not people_api_works: break url = f"{GRAPH_BASE}/users/{JR_USER_ID}/people" params = { "$search": f'"{victim_name}"', "$top": "5", "$select": "displayName,scoredEmailAddresses,givenName,surname", } headers = {"Authorization": f"Bearer {token}"} try: r = requests.get(url, headers=headers, params=params) if r.status_code == 403: print(f" [WARNING] People API returned 403 - skipping") people_api_works = False break if r.status_code == 429: retry = int(r.headers.get("Retry-After", 5)) print(f" [THROTTLED] Waiting {retry}s...") time.sleep(retry) r = requests.get(url, headers=headers, params=params) if r.status_code == 200: people = r.json().get("value", []) for p in people: pname = p.get("displayName", "") pemails = [e.get("address", "") for e in p.get("scoredEmailAddresses", []) if e.get("address")] if pemails: for v in name_variants(pname): for em in pemails: people_map[v].add(em.lower()) searched += 1 if searched % 50 == 0: print(f" Searched {searched}/{len(specific_names)} names...") except Exception as e: pass # Silently continue on individual failures print(f"[OK] People API searched for {searched} names, found {len(people_map)} name entries") # --- Strategy 7: Search JR's mail for each unresolved name directly --- # This catches cases where someone emailed JR and their display name matches print("\n[INFO] Searching JR's mailbox for unresolved victim names...") mail_search_map = defaultdict(set) mail_searched = 0 for victim_name in name_only_victims: # Skip single-word or very short names - too many false positives if len(victim_name.split()) < 2 or len(victim_name) < 5: continue url = f"{GRAPH_BASE}/users/{JR_USER_ID}/messages" params = { "$search": f'"from:{victim_name}"', "$top": "5", "$select": "from,subject", } headers_req = {"Authorization": f"Bearer {token}"} try: r = requests.get(url, headers=headers_req, params=params) if r.status_code == 429: retry = int(r.headers.get("Retry-After", 5)) time.sleep(retry) r = requests.get(url, headers=headers_req, params=params) if r.status_code == 200: msgs = r.json().get("value", []) for msg in msgs: fr = msg.get("from", {}).get("emailAddress", {}) fname = fr.get("name", "") faddr = fr.get("address", "") if fname and faddr: # Check if the from name actually matches the victim fname_norm = normalize(fname) victim_norm = normalize(victim_name) # Require strong match if fname_norm == victim_norm or set(fname_norm.split()) == set(victim_norm.split()): mail_search_map[victim_norm].add(faddr.lower()) mail_searched += 1 if mail_searched % 50 == 0: print(f" Searched {mail_searched} names...") except Exception as e: pass print(f"[OK] Mail search completed for {mail_searched} names, found {len(mail_search_map)} matches") # --- Now resolve victims --- print("\n[INFO] Resolving victim names to email addresses...") resolved = {} unresolved = [] resolution_source = {} for victim_name in name_only_victims: found_emails = set() source = [] victim_variants = name_variants(victim_name) # Check contacts for v in victim_variants: if v in contact_map: found_emails.update(contact_map[v]) source.append("contacts") # Check sent items for v in victim_variants: if v in sent_map: found_emails.update(sent_map[v]) source.append("sent_items") # Check inbox for v in victim_variants: if v in inbox_map: found_emails.update(inbox_map[v]) source.append("inbox") # Check GAL/directory for v in victim_variants: if v in gal_map: found_emails.update(gal_map[v]) source.append("directory") # Check people API for v in victim_variants: if v in people_map: found_emails.update(people_map[v]) source.append("people_api") # Check direct mail search vn = normalize(victim_name) if vn in mail_search_map: found_emails.update(mail_search_map[vn]) source.append("mail_from_search") # Filter out obviously wrong emails exclude_patterns = ['box.com', 'noreply', 'valleywideplastering', 'buildingconnected.com', 'team@', 'no-reply', 'donotreply'] found_emails = {e for e in found_emails if e and '@' in e and not any(p in e for p in exclude_patterns)} if found_emails: resolved[victim_name] = sorted(found_emails) resolution_source[victim_name] = list(set(source)) else: unresolved.append(victim_name) # --- Build output --- all_resolved_emails = set() for emails in resolved.values(): all_resolved_emails.update(emails) # Combine with already-known emails all_victim_emails = set(e.lower() for e in already_resolved) | all_resolved_emails output = { "investigation": "Valley Wide Plastering BEC - Victim Email Resolution", "run_date": time.strftime("%Y-%m-%d %H:%M:%S"), "summary": { "previously_resolved": len(already_resolved), "newly_resolved_by_name": len(resolved), "still_unresolved": len(unresolved), "total_unique_victim_emails": len(all_victim_emails), "total_victims_identified": len(already_resolved) + len(resolved) + len(unresolved), }, "all_victim_emails_combined": sorted(all_victim_emails), "newly_resolved": { name: { "emails": emails, "source": resolution_source.get(name, []) } for name, emails in sorted(resolved.items()) }, "previously_confirmed_emails": sorted(already_resolved, key=str.lower), "unresolved_names": sorted(unresolved, key=lambda x: x.lower()), "body_emails_found_but_unmatched": sorted(all_body_emails - all_victim_emails), } with open(OUTPUT_FILE, 'w') as f: json.dump(output, f, indent=2) # --- Print summary --- print("\n" + "=" * 60) print("RESOLUTION RESULTS") print("=" * 60) print(f"Previously resolved emails: {len(already_resolved)}") print(f"Newly resolved by name: {len(resolved)}") print(f"Still unresolved: {len(unresolved)}") print(f"Total unique victim emails: {len(all_victim_emails)}") print(f"Unmatched body emails found: {len(all_body_emails - all_victim_emails)}") print() if resolved: print("--- Newly Resolved ---") for name, emails in sorted(resolved.items()): src = ", ".join(resolution_source.get(name, [])) print(f" {name}: {', '.join(emails)} [{src}]") print() if unresolved: print(f"--- Unresolved ({len(unresolved)} names) ---") for name in sorted(unresolved, key=lambda x: x.lower()): print(f" {name}") print(f"\n[OK] Results saved to {OUTPUT_FILE}") if __name__ == "__main__": main()