#!/usr/bin/env python3 """ Extract victim email addresses from Box.com acceptance notifications in JR's compromised mailbox (Valley Wide Plastering BEC investigation). Strategy: 1. Search acceptance notifications for email addresses in body/subject 2. Extract display names from subjects where no email found 3. Search JR's Sent Items for the original Box sharing invitations 4. Cross-reference to map names -> emails """ import subprocess import json import re import sys import time import urllib.parse TENANT_ID = "5c53ae9f-7071-4248-b834-8685b646450f" CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418" CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO" USER_ID = "0af923d0-48c5-4cc1-8553-c60625802815" GRAPH_BASE = "https://graph.microsoft.com/v1.0" def get_token(): url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token" result = subprocess.run([ "curl", "-s", "-X", "POST", url, "-H", "Content-Type: application/x-www-form-urlencoded", "-d", f"client_id={CLIENT_ID}&scope=https%3A%2F%2Fgraph.microsoft.com%2F.default&client_secret={CLIENT_SECRET}&grant_type=client_credentials" ], capture_output=True, text=True) data = json.loads(result.stdout) if "access_token" not in data: print(f"[ERROR] Failed to get token: {json.dumps(data, indent=2)}") sys.exit(1) print("[OK] Got access token") return data["access_token"] def graph_get(token, url): result = subprocess.run([ "curl", "-s", "-X", "GET", url, "-H", f"Authorization: Bearer {token}", "-H", "Content-Type: application/json", "-H", "Prefer: outlook.body-content-type=text" ], capture_output=True, text=True) if not result.stdout.strip(): return {"error": "empty response"} return json.loads(result.stdout) def extract_emails_from_text(text): pattern = r'[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}' return re.findall(pattern, text) def extract_name_from_subject(subject): """Extract the person's name/identifier from acceptance subject.""" # Pattern: "NAME has accepted the invitation to your 'Valley Wide..." m = re.match(r"^(.+?)\s+has accepted the invitation to your", subject) if m: return m.group(1).strip() return None def main(): print("=" * 70) print("VWP BEC Investigation - Box.com Victim Email Extraction") print("=" * 70) token = get_token() # ================================================================ # PHASE 1: Get ALL acceptance notification emails # ================================================================ print("\n[INFO] Phase 1: Fetching ALL Box acceptance emails...") all_acceptance_emails = [] url = ( f"{GRAPH_BASE}/users/{USER_ID}/messages" f"?$search=%22from%3Anoreply%40box.com%20subject%3Aaccepted%22" f"&$top=50" f"&$select=id,subject,bodyPreview,from,receivedDateTime" ) page = 1 while url: print(f" Fetching page {page}...") data = graph_get(token, url) if "value" not in data: print(f" [WARNING] Error: {json.dumps(data, indent=2)[:300]}") break all_acceptance_emails.extend(data["value"]) print(f" Page {page}: {len(data['value'])} emails (total: {len(all_acceptance_emails)})") url = data.get("@odata.nextLink") page += 1 if page > 20: break time.sleep(0.3) print(f"\n[INFO] Total acceptance emails: {len(all_acceptance_emails)}") # ================================================================ # PHASE 2: Extract emails from body + names from subjects # ================================================================ print("\n[INFO] Phase 2: Extracting emails from message bodies...") victim_emails = set() names_without_emails = [] # (name, subject) tuples box_internal = {"noreply@box.com", "no-reply@box.com"} jr_email = "j-r@valleywideplastering.com" for i, email in enumerate(all_acceptance_emails): msg_id = email["id"] subject = email.get("subject", "") # Get full body full_url = ( f"{GRAPH_BASE}/users/{USER_ID}/messages/{msg_id}" f"?$select=id,subject,body,toRecipients,ccRecipients" ) full = graph_get(token, full_url) body_content = full.get("body", {}).get("content", "") # Extract all emails from body found_emails = set() for addr in extract_emails_from_text(body_content): addr_lower = addr.lower().strip() if (addr_lower not in box_internal and "box.com" not in addr_lower and addr_lower != jr_email): found_emails.add(addr_lower) # Also check toRecipients for r in full.get("toRecipients", []): addr = r.get("emailAddress", {}).get("address", "") if addr: addr_lower = addr.lower().strip() if addr_lower != jr_email and addr_lower not in box_internal: found_emails.add(addr_lower) # Check subject for email-as-name pattern name = extract_name_from_subject(subject) if name: name_emails = extract_emails_from_text(name) if name_emails: for e in name_emails: found_emails.add(e.lower()) if found_emails: for e in found_emails: if e not in victim_emails: print(f" [FOUND] {e}") victim_emails.add(e) else: # We only got the name, not the email if name and name.lower() != jr_email: names_without_emails.append((name, subject)) if (i + 1) % 20 == 0: print(f" Processed {i+1}/{len(all_acceptance_emails)} emails... ({len(victim_emails)} emails found so far)") time.sleep(0.15) print(f"\n[INFO] Phase 2 complete:") print(f" Emails found directly: {len(victim_emails)}") print(f" Names without emails: {len(names_without_emails)}") # Deduplicate names unique_names = list(set([n for n, s in names_without_emails])) if unique_names: print(f"\n[INFO] Names without email addresses ({len(unique_names)}):") for n in sorted(unique_names): print(f" {n}") # ================================================================ # PHASE 3: Search Sent Items for original Box invitations # ================================================================ print("\n[INFO] Phase 3: Searching Sent Items for Box invitation emails...") # Box sends invitations FROM the sharer, so check sent items # Also search for Box collaboration emails in the inbox url = ( f"{GRAPH_BASE}/users/{USER_ID}/mailFolders/sentitems/messages" f"?$search=%22Valley%20Wide%20Plastering%22" f"&$top=50" f"&$select=id,subject,toRecipients,ccRecipients,bccRecipients,bodyPreview,receivedDateTime" ) sent_data = graph_get(token, url) if "value" in sent_data: print(f" Found {len(sent_data['value'])} sent emails mentioning Valley Wide Plastering") for email in sent_data["value"]: for field in ["toRecipients", "ccRecipients", "bccRecipients"]: for r in email.get(field, []): addr = r.get("emailAddress", {}).get("address", "") if addr: addr_lower = addr.lower().strip() if (addr_lower != jr_email and addr_lower not in box_internal and "box.com" not in addr_lower): if addr_lower not in victim_emails: print(f" [NEW from sent] {addr_lower} (subject: {email.get('subject', '')[:60]})") victim_emails.add(addr_lower) # Also search for Box invitation emails (sent by Box on behalf of JR) print("\n[INFO] Phase 3b: Searching for Box invitation sent notifications...") url = ( f"{GRAPH_BASE}/users/{USER_ID}/messages" f"?$search=%22from%3Anoreply%40box.com%20subject%3Ainvited%22" f"&$top=50" f"&$select=id,subject,body,receivedDateTime" ) page = 1 while url: data = graph_get(token, url) if "value" not in data: break print(f" Page {page}: {len(data['value'])} invitation emails") for email in data["value"]: # Get full body for each full_url = ( f"{GRAPH_BASE}/users/{USER_ID}/messages/{email['id']}" f"?$select=body,subject,toRecipients" ) full = graph_get(token, full_url) body = full.get("body", {}).get("content", "") for addr in extract_emails_from_text(body): addr_lower = addr.lower().strip() if (addr_lower != jr_email and addr_lower not in box_internal and "box.com" not in addr_lower): if addr_lower not in victim_emails: print(f" [NEW from invitation] {addr_lower}") victim_emails.add(addr_lower) time.sleep(0.15) url = data.get("@odata.nextLink") page += 1 if page > 10: break time.sleep(0.3) # ================================================================ # PHASE 4: Search for Box "shared" notifications # ================================================================ print("\n[INFO] Phase 4: Searching for Box 'shared' notifications...") url = ( f"{GRAPH_BASE}/users/{USER_ID}/messages" f"?$search=%22from%3Anoreply%40box.com%20subject%3Ashared%22" f"&$top=50" f"&$select=id,subject,body,receivedDateTime" ) data = graph_get(token, url) if "value" in data: print(f" Found {len(data['value'])} 'shared' emails") for email in data["value"]: full_url = ( f"{GRAPH_BASE}/users/{USER_ID}/messages/{email['id']}" f"?$select=body,subject" ) full = graph_get(token, full_url) body = full.get("body", {}).get("content", "") subject = full.get("subject", "") for addr in extract_emails_from_text(body): addr_lower = addr.lower().strip() if (addr_lower != jr_email and addr_lower not in box_internal and "box.com" not in addr_lower): if addr_lower not in victim_emails: print(f" [NEW from shared] {addr_lower} (subject: {subject[:60]})") victim_emails.add(addr_lower) time.sleep(0.15) # ================================================================ # RESULTS # ================================================================ victim_list = sorted(victim_emails) print("\n" + "=" * 70) print(f"FINAL RESULTS: {len(victim_list)} unique victim email addresses") print("=" * 70) for addr in victim_list: print(f" {addr}") if unique_names: print(f"\n[WARNING] {len(unique_names)} victims identified by NAME only (no email extracted):") for n in sorted(unique_names): print(f" {n}") output = { "investigation": "Valley Wide Plastering BEC", "source": "Box.com notifications in JR mailbox", "total_acceptance_emails": len(all_acceptance_emails), "unique_victim_emails": len(victim_list), "victim_emails": victim_list, "names_without_emails": sorted(unique_names) if unique_names else [] } output_path = r"D:\ClaudeTools\temp\vwp_victim_emails.json" with open(output_path, "w") as f: json.dump(output, f, indent=2) print(f"\n[OK] Results saved to {output_path}") if __name__ == "__main__": main()