#!/usr/bin/env python3 """Refresh profile fields (DisplayName, AccountEnabled, LastSignIn, LastInteractiveSignIn, JobTitle, Department) for batch-1 and batch-3 rows that previously got profile_err_400. Uses the split-query pattern from batch-2: query core profile fields separately from signInActivity. Preserves all other columns (License/MFA/AdminRoles/GroupCount) and row order. Cleans profile_err_400 / profile:err400 from the Notes column when the refresh succeeds. """ import csv import json import os import sys import time import urllib.parse import urllib.request TOKEN_FILE = r"C:\claudetools\clients\cascades-tucson\reports\user-detail-batches\.token_refresh" BASE_DIR = r"C:\claudetools\clients\cascades-tucson\reports\user-detail-batches" BATCH_FILES = { "batch-1": os.path.join(BASE_DIR, "batch-1.csv"), "batch-3": os.path.join(BASE_DIR, "batch-3.csv"), } with open(TOKEN_FILE, "r") as f: TOKEN = f.read().strip() HEADERS_BASE = { "Authorization": f"Bearer {TOKEN}", "Accept": "application/json", } FIELDNAMES = [ "UPN", "DisplayName", "AccountEnabled", "MailboxType", "Licenses", "MFARegistered", "MFAMethods", "DefaultMFAMethod", "LastSignIn", "LastInteractiveSignIn", "AdminRoles", "JobTitle", "Department", "GroupCount", "Notes", ] PROFILE_ERR_TOKENS = {"profile_err_400", "profile:err400"} def graph_get(url, extra_headers=None, max_retries=4): headers = dict(HEADERS_BASE) if extra_headers: headers.update(extra_headers) attempt = 0 while True: req = urllib.request.Request(url, headers=headers, method="GET") try: with urllib.request.urlopen(req, timeout=30) as resp: data = resp.read().decode("utf-8") return resp.status, json.loads(data) if data else {} except urllib.error.HTTPError as e: body = e.read().decode("utf-8", errors="replace") try: j = json.loads(body) except Exception: j = {"raw": body} if e.code == 429 and attempt < max_retries: retry_after = int(e.headers.get("Retry-After", "2")) time.sleep(max(retry_after, 2)) attempt += 1 continue if e.code >= 500 and attempt < max_retries: time.sleep(2 ** attempt) attempt += 1 continue return e.code, j except Exception as e: if attempt < max_retries: time.sleep(2 ** attempt) attempt += 1 continue return 0, {"error": str(e)} def refresh_profile(upn): """Fetch profile + signInActivity using split-query pattern. Returns (fields_dict, extra_notes_list, fail_reason_or_None).""" out = { "DisplayName": "", "AccountEnabled": "", "LastSignIn": "", "LastInteractiveSignIn": "", "JobTitle": "", "Department": "", } notes = [] enc = urllib.parse.quote(upn) # Step 1: core profile url = ( f"https://graph.microsoft.com/v1.0/users/{enc}" "?$select=id,userPrincipalName,displayName,accountEnabled,jobTitle,department" ) status, data = graph_get(url) if status == 404: return out, ["not_found"], "not_found" if status != 200: reason = f"profile_http_{status}" notes.append(reason) return out, notes, reason user_id = data.get("id", "") out["DisplayName"] = data.get("displayName") or "" out["AccountEnabled"] = str(data.get("accountEnabled", "")).lower() out["JobTitle"] = data.get("jobTitle") or "" out["Department"] = data.get("department") or "" # Step 2: signInActivity via GUID if user_id: url2 = ( f"https://graph.microsoft.com/v1.0/users/{user_id}" "?$select=signInActivity" ) status2, data2 = graph_get(url2) if status2 == 200: sia = data2.get("signInActivity") or {} out["LastSignIn"] = sia.get("lastSignInDateTime") or "" out["LastInteractiveSignIn"] = ( sia.get("lastSignInDateTime") or sia.get("lastNonInteractiveSignInDateTime") or "" ) elif status2 in (401, 403): out["LastSignIn"] = "scope_unavailable" out["LastInteractiveSignIn"] = "scope_unavailable" notes.append("signin:scope_unavailable") elif status2 == 400: out["LastSignIn"] = "scope_unavailable" out["LastInteractiveSignIn"] = "scope_unavailable" notes.append("signin:err400") else: notes.append(f"signin:http_{status2}") return out, notes, None def clean_notes(existing_notes, succeeded, extra_notes): """Strip profile_err tokens if refresh succeeded; merge extra notes.""" tokens = [t.strip() for t in (existing_notes or "").split(";") if t.strip()] if succeeded: tokens = [t for t in tokens if t not in PROFILE_ERR_TOKENS] for e in extra_notes: if e and e not in tokens: tokens.append(e) return ";".join(tokens) def process_file(path): with open(path, "r", newline="", encoding="utf-8") as f: reader = csv.DictReader(f) rows = list(reader) updated = 0 failures = [] for i, row in enumerate(rows): upn = row.get("UPN", "").strip() if not upn: continue print(f"[{os.path.basename(path)}] {i+1}/{len(rows)} {upn}", file=sys.stderr) fields, extra_notes, fail_reason = refresh_profile(upn) if fail_reason is None: # Profile core succeeded for k, v in fields.items(): row[k] = v row["Notes"] = clean_notes(row.get("Notes", ""), succeeded=True, extra_notes=extra_notes) updated += 1 else: # Profile still failed - leave blank fields and record row["Notes"] = clean_notes(row.get("Notes", ""), succeeded=False, extra_notes=extra_notes) failures.append((upn, fail_reason)) time.sleep(0.5) with open(path, "w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=FIELDNAMES, quoting=csv.QUOTE_MINIMAL) writer.writeheader() for r in rows: # Ensure all fields are present in order clean_row = {k: r.get(k, "") for k in FIELDNAMES} writer.writerow(clean_row) return updated, len(rows), failures def main(): summary = {} all_failures = [] stale_findings = [] for label, path in BATCH_FILES.items(): updated, total, failures = process_file(path) summary[label] = (updated, total) all_failures.extend([(label, u, r) for (u, r) in failures]) # Post-scan for stale findings for label, path in BATCH_FILES.items(): with open(path, "r", newline="", encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: upn = row.get("UPN", "") if row.get("AccountEnabled", "").lower() == "false": stale_findings.append(f"{label}: {upn} account DISABLED") last = row.get("LastSignIn", "") if last and last not in ("scope_unavailable", "") and "T" in last: year_part = last.split("-")[0] try: y = int(year_part) if y < 2025: stale_findings.append( f"{label}: {upn} last sign-in {last}" ) except ValueError: pass elif last == "" and row.get("AccountEnabled", "").lower() == "true": stale_findings.append(f"{label}: {upn} never signed in (no LastSignIn)") print("\n=== SUMMARY ===", file=sys.stderr) for label, (u, t) in summary.items(): print(f" {label}: {u}/{t} rows updated", file=sys.stderr) if all_failures: print(" Failures:", file=sys.stderr) for label, upn, reason in all_failures: print(f" {label} {upn}: {reason}", file=sys.stderr) else: print(" No failures.", file=sys.stderr) if stale_findings: print(" Stale findings:", file=sys.stderr) for s in stale_findings: print(f" {s}", file=sys.stderr) # machine-readable output print(json.dumps({ "summary": {k: {"updated": v[0], "total": v[1]} for k, v in summary.items()}, "failures": [{"batch": b, "upn": u, "reason": r} for (b, u, r) in all_failures], "stale": stale_findings, }, indent=2)) if __name__ == "__main__": main()