Files
claudetools/temp/vwp_extract_victim_emails.py
Mike Swanson fa15b03180 sync: Auto-sync from ACG-M-L5090 at 2026-03-10 19:11:00
Synced files:
- Quote wizard frontend (all components, hooks, types, config)
- API updates (config, models, routers, schemas, services)
- Client work (bg-builders, gurushow)
- Scripts (BGB Lesley termination, CIPP, Datto, migration)
- Temp files (Bardach contacts, VWP investigation, misc)
- Credentials and session logs
- Email service, PHP API, session logs

Machine: ACG-M-L5090
Timestamp: 2026-03-10 19:11:00

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 19:59:08 -07:00

301 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Extract victim email addresses from Box.com acceptance notifications
in JR's compromised mailbox (Valley Wide Plastering BEC investigation).
Strategy:
1. Search acceptance notifications for email addresses in body/subject
2. Extract display names from subjects where no email found
3. Search JR's Sent Items for the original Box sharing invitations
4. Cross-reference to map names -> emails
"""
import subprocess
import json
import re
import sys
import time
import urllib.parse
TENANT_ID = "5c53ae9f-7071-4248-b834-8685b646450f"
CLIENT_ID = "fabb3421-8b34-484b-bc17-e46de9703418"
CLIENT_SECRET = "~QJ8Q~NyQSs4OcGqHZyPrA2CVnq9KBfKiimntbMO"
USER_ID = "0af923d0-48c5-4cc1-8553-c60625802815"
GRAPH_BASE = "https://graph.microsoft.com/v1.0"
def get_token():
url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
result = subprocess.run([
"curl", "-s", "-X", "POST", url,
"-H", "Content-Type: application/x-www-form-urlencoded",
"-d", f"client_id={CLIENT_ID}&scope=https%3A%2F%2Fgraph.microsoft.com%2F.default&client_secret={CLIENT_SECRET}&grant_type=client_credentials"
], capture_output=True, text=True)
data = json.loads(result.stdout)
if "access_token" not in data:
print(f"[ERROR] Failed to get token: {json.dumps(data, indent=2)}")
sys.exit(1)
print("[OK] Got access token")
return data["access_token"]
def graph_get(token, url):
result = subprocess.run([
"curl", "-s", "-X", "GET", url,
"-H", f"Authorization: Bearer {token}",
"-H", "Content-Type: application/json",
"-H", "Prefer: outlook.body-content-type=text"
], capture_output=True, text=True)
if not result.stdout.strip():
return {"error": "empty response"}
return json.loads(result.stdout)
def extract_emails_from_text(text):
pattern = r'[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}'
return re.findall(pattern, text)
def extract_name_from_subject(subject):
"""Extract the person's name/identifier from acceptance subject."""
# Pattern: "NAME has accepted the invitation to your 'Valley Wide..."
m = re.match(r"^(.+?)\s+has accepted the invitation to your", subject)
if m:
return m.group(1).strip()
return None
def main():
print("=" * 70)
print("VWP BEC Investigation - Box.com Victim Email Extraction")
print("=" * 70)
token = get_token()
# ================================================================
# PHASE 1: Get ALL acceptance notification emails
# ================================================================
print("\n[INFO] Phase 1: Fetching ALL Box acceptance emails...")
all_acceptance_emails = []
url = (
f"{GRAPH_BASE}/users/{USER_ID}/messages"
f"?$search=%22from%3Anoreply%40box.com%20subject%3Aaccepted%22"
f"&$top=50"
f"&$select=id,subject,bodyPreview,from,receivedDateTime"
)
page = 1
while url:
print(f" Fetching page {page}...")
data = graph_get(token, url)
if "value" not in data:
print(f" [WARNING] Error: {json.dumps(data, indent=2)[:300]}")
break
all_acceptance_emails.extend(data["value"])
print(f" Page {page}: {len(data['value'])} emails (total: {len(all_acceptance_emails)})")
url = data.get("@odata.nextLink")
page += 1
if page > 20:
break
time.sleep(0.3)
print(f"\n[INFO] Total acceptance emails: {len(all_acceptance_emails)}")
# ================================================================
# PHASE 2: Extract emails from body + names from subjects
# ================================================================
print("\n[INFO] Phase 2: Extracting emails from message bodies...")
victim_emails = set()
names_without_emails = [] # (name, subject) tuples
box_internal = {"noreply@box.com", "no-reply@box.com"}
jr_email = "j-r@valleywideplastering.com"
for i, email in enumerate(all_acceptance_emails):
msg_id = email["id"]
subject = email.get("subject", "")
# Get full body
full_url = (
f"{GRAPH_BASE}/users/{USER_ID}/messages/{msg_id}"
f"?$select=id,subject,body,toRecipients,ccRecipients"
)
full = graph_get(token, full_url)
body_content = full.get("body", {}).get("content", "")
# Extract all emails from body
found_emails = set()
for addr in extract_emails_from_text(body_content):
addr_lower = addr.lower().strip()
if (addr_lower not in box_internal and
"box.com" not in addr_lower and
addr_lower != jr_email):
found_emails.add(addr_lower)
# Also check toRecipients
for r in full.get("toRecipients", []):
addr = r.get("emailAddress", {}).get("address", "")
if addr:
addr_lower = addr.lower().strip()
if addr_lower != jr_email and addr_lower not in box_internal:
found_emails.add(addr_lower)
# Check subject for email-as-name pattern
name = extract_name_from_subject(subject)
if name:
name_emails = extract_emails_from_text(name)
if name_emails:
for e in name_emails:
found_emails.add(e.lower())
if found_emails:
for e in found_emails:
if e not in victim_emails:
print(f" [FOUND] {e}")
victim_emails.add(e)
else:
# We only got the name, not the email
if name and name.lower() != jr_email:
names_without_emails.append((name, subject))
if (i + 1) % 20 == 0:
print(f" Processed {i+1}/{len(all_acceptance_emails)} emails... ({len(victim_emails)} emails found so far)")
time.sleep(0.15)
print(f"\n[INFO] Phase 2 complete:")
print(f" Emails found directly: {len(victim_emails)}")
print(f" Names without emails: {len(names_without_emails)}")
# Deduplicate names
unique_names = list(set([n for n, s in names_without_emails]))
if unique_names:
print(f"\n[INFO] Names without email addresses ({len(unique_names)}):")
for n in sorted(unique_names):
print(f" {n}")
# ================================================================
# PHASE 3: Search Sent Items for original Box invitations
# ================================================================
print("\n[INFO] Phase 3: Searching Sent Items for Box invitation emails...")
# Box sends invitations FROM the sharer, so check sent items
# Also search for Box collaboration emails in the inbox
url = (
f"{GRAPH_BASE}/users/{USER_ID}/mailFolders/sentitems/messages"
f"?$search=%22Valley%20Wide%20Plastering%22"
f"&$top=50"
f"&$select=id,subject,toRecipients,ccRecipients,bccRecipients,bodyPreview,receivedDateTime"
)
sent_data = graph_get(token, url)
if "value" in sent_data:
print(f" Found {len(sent_data['value'])} sent emails mentioning Valley Wide Plastering")
for email in sent_data["value"]:
for field in ["toRecipients", "ccRecipients", "bccRecipients"]:
for r in email.get(field, []):
addr = r.get("emailAddress", {}).get("address", "")
if addr:
addr_lower = addr.lower().strip()
if (addr_lower != jr_email and
addr_lower not in box_internal and
"box.com" not in addr_lower):
if addr_lower not in victim_emails:
print(f" [NEW from sent] {addr_lower} (subject: {email.get('subject', '')[:60]})")
victim_emails.add(addr_lower)
# Also search for Box invitation emails (sent by Box on behalf of JR)
print("\n[INFO] Phase 3b: Searching for Box invitation sent notifications...")
url = (
f"{GRAPH_BASE}/users/{USER_ID}/messages"
f"?$search=%22from%3Anoreply%40box.com%20subject%3Ainvited%22"
f"&$top=50"
f"&$select=id,subject,body,receivedDateTime"
)
page = 1
while url:
data = graph_get(token, url)
if "value" not in data:
break
print(f" Page {page}: {len(data['value'])} invitation emails")
for email in data["value"]:
# Get full body for each
full_url = (
f"{GRAPH_BASE}/users/{USER_ID}/messages/{email['id']}"
f"?$select=body,subject,toRecipients"
)
full = graph_get(token, full_url)
body = full.get("body", {}).get("content", "")
for addr in extract_emails_from_text(body):
addr_lower = addr.lower().strip()
if (addr_lower != jr_email and
addr_lower not in box_internal and
"box.com" not in addr_lower):
if addr_lower not in victim_emails:
print(f" [NEW from invitation] {addr_lower}")
victim_emails.add(addr_lower)
time.sleep(0.15)
url = data.get("@odata.nextLink")
page += 1
if page > 10:
break
time.sleep(0.3)
# ================================================================
# PHASE 4: Search for Box "shared" notifications
# ================================================================
print("\n[INFO] Phase 4: Searching for Box 'shared' notifications...")
url = (
f"{GRAPH_BASE}/users/{USER_ID}/messages"
f"?$search=%22from%3Anoreply%40box.com%20subject%3Ashared%22"
f"&$top=50"
f"&$select=id,subject,body,receivedDateTime"
)
data = graph_get(token, url)
if "value" in data:
print(f" Found {len(data['value'])} 'shared' emails")
for email in data["value"]:
full_url = (
f"{GRAPH_BASE}/users/{USER_ID}/messages/{email['id']}"
f"?$select=body,subject"
)
full = graph_get(token, full_url)
body = full.get("body", {}).get("content", "")
subject = full.get("subject", "")
for addr in extract_emails_from_text(body):
addr_lower = addr.lower().strip()
if (addr_lower != jr_email and
addr_lower not in box_internal and
"box.com" not in addr_lower):
if addr_lower not in victim_emails:
print(f" [NEW from shared] {addr_lower} (subject: {subject[:60]})")
victim_emails.add(addr_lower)
time.sleep(0.15)
# ================================================================
# RESULTS
# ================================================================
victim_list = sorted(victim_emails)
print("\n" + "=" * 70)
print(f"FINAL RESULTS: {len(victim_list)} unique victim email addresses")
print("=" * 70)
for addr in victim_list:
print(f" {addr}")
if unique_names:
print(f"\n[WARNING] {len(unique_names)} victims identified by NAME only (no email extracted):")
for n in sorted(unique_names):
print(f" {n}")
output = {
"investigation": "Valley Wide Plastering BEC",
"source": "Box.com notifications in JR mailbox",
"total_acceptance_emails": len(all_acceptance_emails),
"unique_victim_emails": len(victim_list),
"victim_emails": victim_list,
"names_without_emails": sorted(unique_names) if unique_names else []
}
output_path = r"D:\ClaudeTools\temp\vwp_victim_emails.json"
with open(output_path, "w") as f:
json.dump(output, f, indent=2)
print(f"\n[OK] Results saved to {output_path}")
if __name__ == "__main__":
main()