diff --git a/.claude/memory/MEMORY.md b/.claude/memory/MEMORY.md index f5bb251b..e25f2a7d 100644 --- a/.claude/memory/MEMORY.md +++ b/.claude/memory/MEMORY.md @@ -44,6 +44,7 @@ - [AD2 SSH MTU blackhole](ad2-ssh-mtu-blackhole.md) — AD2 SSH "lockouts"/mid-session read-errors over the Dataforth OpenVPN were a PMTU blackhole (tunnel PMTU ~1424 vs adapter MTU 1500), NOT a ban/account-lockout/flaky tunnel. Fix: pin the OpenVPN adapter MTU to 1400 (done on GURU-5070 via its SYSTEM RMM agent); permanent = `mssfix 1360` on the OpenVPN server. Diagnose over RMM, not SSH. - [DSCA33/45 resolved via Hoffman](project_dsca33_45_resolved_via_hoffman.md) — The "lost" DSCA33/45 spec files are recoverable from the Hoffman API (original certs survived the wipe); do NOT ask John. 56/58 models mined into projects/dataforth-dos/dsca33-45-templates.json; only DSCA33-1948 + DSCA45-1746 (24 units) lack an original. AD2 handoff: DSCA33-45-HOFFMAN-RECOVERY-2026-06-18.md. - [AD2 comms via sync only](ad2-comms-via-sync-only.md) — The AD2 Dataforth-box Claude session is coord-API-isolated (Gitea only); coord msg/lock/todo never reach it. Coordinate with AD2 ONLY via git /sync (committed docs + ## Note blocks). +- [reference_syncro_agent_handle_leak](reference_syncro_agent_handle_leak.md) -- RDS "no available computers in the pool" (0x3/0x408) can really be a SyncroLive.Agent.Runner handle leak starving the box. How to spot + fix. ## Users - [Howard Enos](user_howard.md) — Mike's brother, technician, full access. Machines: ACG-TECH03L, Howard-Home (authoritative in users.json). @@ -124,6 +125,7 @@ - [feedback_bitdefender_unattended_install](feedback_bitdefender_unattended_install.md) -- Bitdefender unattended RMM install must use the FULL KIT as SYSTEM (silent, no UAC) — the downloader stub fails headless and triggers UAC - [Broken [[backlinks]] are write-me-later markers — flesh out from session history, don't delete](feedback_broken_backlinks_are_writeme_markers.md) -- A [[name]] link in a memory body whose target file doesn't exist is NOT an error to clean up — it's an intentional marker that that memory is worth writing. When you hit one (or memory-dream lists them), flesh the missing memory out from the session logs / session history, don't strip the link. - [feedback_rmm_longops_fire_and_forget](feedback_rmm_longops_fire_and_forget.md) -- Long-running RMM endpoint ops (software installs, big downloads) must be fire-and-forget, not live-monitored +- [Broken [[backlinks]] are write-me-later markers — flesh out from session history, don't delete](feedback_broken_backlinks_are_writeme_markers.md) -- A [[name]] link in a memory body whose target file doesn't exist is NOT an error to clean up — it's an intentional marker that that memory is worth writing. When you hit one (or memory-dream lists them), flesh the missing memory out from the session logs / session history, don't strip the link. ## Machine - [GURU-5070 Workstation Setup](reference_workstation_setup.md) — Mike's primary (owner confirmed 2026-05-26). Windows 11 Pro. Renamed from OC-5070 → ACG-5070/acg-guru-5070 → GURU-5070; all the same box, all Mike's. diff --git a/.claude/skills/bitdefender/scripts/gz.py b/.claude/skills/bitdefender/scripts/gz.py index b6d3fe2c..657fb754 100644 --- a/.claude/skills/bitdefender/scripts/gz.py +++ b/.claude/skills/bitdefender/scripts/gz.py @@ -38,6 +38,7 @@ import argparse import dataclasses import json import os +import re import subprocess import sys @@ -312,11 +313,15 @@ def cmd_company_delete(client, args): def cmd_endpoints(client, args): + if args.company and not _require_oid(args.company, "company"): + return 2 _emit(client.list_endpoints(args.company, per_page=args.per_page), args.json, _print_endpoint_table) def cmd_endpoint(client, args): + if not _require_oid(args.endpoint_id, "endpoint"): + return 2 _emit(client.get_endpoint_details(args.endpoint_id), args.json, _print_kv) @@ -348,6 +353,8 @@ def cmd_policy(client, args): # getPolicyDetails returns the FULL granular module configuration (verified # live 2026-06-21). Use --json for the complete settings tree; the table # view shows the top-level keys only. + if not _require_oid(args.policy_id, "policy"): + return 2 _emit(client.get_policy_details(args.policy_id), args.json, _print_kv) @@ -649,6 +656,12 @@ def cmd_inventory(client, args): def cmd_create_package(client, args): + if args.company and not _require_oid(args.company, "company"): + return 2 + if not _gated(f"create installer package '{args.name}'" + + (f" in company {args.company}" if args.company else ""), + args.confirm): + return 3 result = client.create_package( package_name=args.name, company_id=args.company, @@ -656,6 +669,7 @@ def cmd_create_package(client, args): language=args.language, ) _emit({"created": args.name, "result": result}, args.json, _print_kv) + return 0 def cmd_install_links(client, args): @@ -664,16 +678,33 @@ def cmd_install_links(client, args): def cmd_scan(client, args): + for t in args.targets: + if not _require_oid(t, "scan target"): + return 2 + if not _gated(f"start a type-{args.type} scan on {len(args.targets)} " + f"target(s): {','.join(args.targets)}", args.confirm): + return 3 result = client.create_scan_task( target_ids=args.targets, scan_type=args.type, name=args.name ) _emit({"scanTask": result}, args.json, _print_kv) + return 0 def cmd_move(client, args): + for e in args.endpoints: + if not _require_oid(e, "endpoint"): + return 2 + if not _require_oid(args.group, "group"): + return 2 + if not _gated(f"move {len(args.endpoints)} endpoint(s) into group " + f"{args.group} (changes inherited policy): " + f"{','.join(args.endpoints)}", args.confirm): + return 3 result = client.move_endpoints(args.endpoints, args.group) _emit({"moved": args.endpoints, "to": args.group, "result": result}, args.json, _print_kv) + return 0 def _print_tags(tags) -> None: @@ -709,8 +740,15 @@ def cmd_reconfigure(client, args): def cmd_make_group(client, args): + if args.parent and not _require_oid(args.parent, "parent group"): + return 2 + if not _gated(f"create custom group '{args.name}'" + + (f" under parent {args.parent}" if args.parent else ""), + args.confirm): + return 3 result = client.create_custom_group(args.name, args.parent) _emit({"createdGroup": args.name, "result": result}, args.json, _print_kv) + return 0 # Substrings that mark a JSON-RPC method as state-destroying. `raw` can reach @@ -724,7 +762,11 @@ DESTRUCTIVE_RAW_PATTERNS = ("delete", "createuninstall", "createremove", "configurenotif", "createcompany", "suspendcompany", "activatecompany", "setendpointlabel", "createreport", "createrestore", "createcustomrule", "changeincident", - "updateincident", "sendtestpush") + "updateincident", "sendtestpush", + # state-changing methods also exposed as gated + # subcommands - keep them gated via `raw` too. + "moveendpoints", "movecustomgroup", "createscan", + "createpackage", "createcustomgroup") def _is_destructive_method(method: str) -> bool: @@ -750,11 +792,31 @@ def cmd_raw(client, args): return 0 +# --- input validation --------------------------------------------------------- +# GravityZone object IDs are 24-char hex (Mongo ObjectId). Validating client-side +# stops a malformed/empty id from hitting the live tenant AND from being mislogged +# as a functional skill error (the API's "Expected format: 24-char hex ID" reply +# matches none of the expected-error markers, so it would otherwise land in +# errorlog.md as noise - the bulk of the 2026-06-21 bitdefender entries). +_OID_RE = re.compile(r"^[0-9a-fA-F]{24}$") + + +def _require_oid(value, label: str) -> bool: + """True if `value` is a valid 24-char hex id; else print [ERROR] and False. + The caller should `return 2` (user-input error) and must NOT log it.""" + if value and _OID_RE.match(str(value)): + return True + print(f"[ERROR] {label} '{value}' is not a valid 24-char hex GravityZone id.", + file=sys.stderr) + return False + + # --- destructive (gated) ------------------------------------------------------ def _gated(action_desc: str, confirm: bool) -> bool: if not confirm: - print("[WARNING] Refusing destructive action without --confirm.") - print(f"[INFO] Would: {action_desc}") + print("[WARNING] Refusing destructive action without --confirm.", + file=sys.stderr) + print(f"[INFO] Would: {action_desc}", file=sys.stderr) return False return True diff --git a/clients/cascades-tucson/session-logs/2026-06/2026-06-25-howard-alma-offboarding-recovery-verify.md b/clients/cascades-tucson/session-logs/2026-06/2026-06-25-howard-alma-offboarding-recovery-verify.md new file mode 100644 index 00000000..b1d0b24f --- /dev/null +++ b/clients/cascades-tucson/session-logs/2026-06/2026-06-25-howard-alma-offboarding-recovery-verify.md @@ -0,0 +1,115 @@ +## User +- **User:** Howard Enos (howard) +- **Machine:** Howard-Home +- **Role:** tech + +## Session Summary + +A prior session working Cascades of Tucson was lost mid-task (Howard accidentally cleared the +working context). This session reconstructed what that lost session had done, confirmed an +outstanding message to Mike had NOT been sent, sent it, then verified the underlying work +end-to-end against live systems. + +The lost session had offboarded **Alma Montt** (terminated; Memory Care Life Enrichment / +MC Reception; no PHI/clinical access). Recovery was possible because the work product survived +in two places even though no session log had been checkpointed: the offboarding record +`clients/cascades-tucson/docs/security/offboarding-2026-06-25-alma-montt.md` and the uncommitted +datto-edr skill changes in the working tree (unrelated to Alma). The coord message log (last 30) +confirmed nothing about "tenant control / remove access" had gone out. + +The one item the lost session had flagged for Mike was a tenant-security decision: resetting +Alma's M365 password required JIT-elevating the `ComputerGuru - Tenant Admin` service principal +to **Privileged Authentication Administrator (PAA)**, and Microsoft Graph blocked the automatic +teardown ("removing self from built-in role is not allowed"). That SP is therefore still holding +a standing PAA role on the Cascades tenant. Because tenant role/access posture is Mike's call +(admin/owner, Global Admin), a coord message was sent to Mike (his most-active session today, +GURU-5070/claude-main) laying out the issue, the exact removal steps, and a recommended posture +(keep JIT, fix the teardown so it stops stranding the role). + +Finally, the Alma offboarding was verified live rather than trusted from the doc. M365 state was +read via the remediation-tool skill (investigator Graph token + investigator-exo Exchange token), +and on-prem AD state via the rmm skill against CS-SERVER. All eight claimed actions confirmed +true against live state. The only loose end is the leftover PAA role assignment, now in Mike's +queue. + +## Key Decisions + +- **Did not guess the message content for Mike.** Reconstructed it from the surviving offboarding + doc rather than fabricating; the "remove some access" item is specifically the leftover PAA role + on the Tenant Admin SP. +- **Sent the coord message to Mike's most-active session (GURU-5070)** rather than broadcasting, + since it is a decision specifically for Mike. Coord messages persist/queue if he is on another box. +- **Verified offboarding live with least-privilege tokens** (investigator + investigator-exo for + read; read-only Get-ADUser on CS-SERVER) rather than relying on the doc's self-reported results. +- **Recommended keeping JIT elevation (no standing PAA)** for the Tenant Admin SP and fixing the + teardown, vs. granting permanent PAA — least-privilege for an auth-admin role. + +## Problems Encountered + +- **Lost session context** — Howard cleared the working context accidentally. Resolved by grepping + surviving artifacts (offboarding doc, uncommitted working-tree changes) + the coord message log + to reconstruct state; confirmed the Mike message was never sent. +- **Bash `$UID` collision** — first Graph query used a variable named `UID`, which is a readonly + bash builtin (expanded to the OS uid 197609), so the query hit the wrong resource + (`Request_ResourceNotFound: '197609'`). Resolved by renaming the variable to `AID`. +- **EXO MailboxPermission AccessRights parse** — initial parse pulled the wrong key and showed + `AccessRights: None`; re-queried the raw permission object to confirm Shelby.Trozzi holds + `FullAccess` (not inherited, not deny). + +## Configuration Changes + +- **Created:** `clients/cascades-tucson/session-logs/2026-06/2026-06-25-howard-alma-offboarding-recovery-verify.md` (this log) +- No code or config changes made this session (verification was read-only). +- Note: uncommitted **datto-edr skill** changes remain in the working tree from the lost session + (`.claude/skills/datto-edr/scripts/edr.py`, `edr_client.py`, new `selftest.py`) — Locations vs + scan-Targets inventory-model refactor. Unrelated to Alma; left as-is for a separate review/commit. + +## Credentials & Secrets + +- Alma Montt offboarding password stored for emergency recovery/audit only at vault + `clients/cascades-tucson/alma-montt` (do NOT re-enable without authorization). No new credentials + created or discovered this session. + +## Infrastructure & Servers + +- **M365 tenant:** cascadestucson.com — Tenant ID `207fa277-e9d8-4eb7-ada1-1064d2221498` +- **Alma Montt M365 object id:** `b2fb546e-687a-4647-b286-9c8edd3d989f` +- **On-prem DC:** CS-SERVER (192.168.2.254), `cascades.local`; GuruRMM agent id (live this session) + `c39f1de7-d5b6-45ae-b132-e06977ab1713` (re-enrolls — resolve live by hostname). +- **Remediation apps used:** ComputerGuru Security Investigator (`bfbc12a4-f0dd-4e12-b06d-997e7271e10c`, + Graph read + EXO read). The PAA-stranded SP is **ComputerGuru - Tenant Admin**. +- **Coord API:** http://172.16.3.30:8001/api/coord — message sent to GURU-5070/claude-main. + +## Commands & Outputs + +- Graph user verify (Security Investigator token): + `GET /v1.0/users/{id}?$select=accountEnabled,assignedLicenses,showInAddressList` -> + `accountEnabled=false`, `assignedLicenses=[]`, `showInAddressList=false`; `memberOf` -> none. +- EXO mailbox verify (Security Investigator EXO token): + `GET adminapi/beta/{tenant}/Mailbox('Alma.Montt@cascadestucson.com')` -> + `RecipientTypeDetails=SharedMailbox`; `.../MailboxPermission` -> `Shelby.Trozzi -> [FullAccess]` + (IsInherited=false, Deny=false). +- AD verify (RMM, CS-SERVER, exit 0): + `Get-ADUser Alma.Montt -Properties Enabled,MemberOf,DistinguishedName` -> + `Enabled=False`, `DN=CN=Alma Montt,OU=Excluded-From-Sync,DC=cascades,DC=local`, `GroupCount=0`. +- Coord message POST -> id `4b2bb6a9-881b-4003-984c-687183b96802` (to GURU-5070/claude-main). + +## Pending / Incomplete Tasks + +- **[MIKE / SECURITY] Remove the standing Privileged Authentication Administrator role from the + `ComputerGuru - Tenant Admin` SP** on the Cascades tenant (Entra portal: Roles & admins -> + Privileged Authentication Administrator -> remove the SP). LEAVE its Conditional Access + Administrator role (intentional). Either Mike does it, or grants Howard GA briefly. Message sent. +- **[POSTURE] Decide the JIT-elevation pattern** so password resets via the Tenant Admin SP stop + stranding PAA (self-removal is blocked by Graph). Recommended: keep JIT, fix teardown. +- **[SEPARATE] datto-edr skill changes** uncommitted in the working tree — review + commit/discard + on their own. +- Reconcile: Alma removed from proposed share rosters + (`docs/migration/share-group-roster-proposed-2026-06-25.md`). + +## Reference Information + +- Offboarding record: `clients/cascades-tucson/docs/security/offboarding-2026-06-25-alma-montt.md` +- Termination runbook: `docs/security/termination-procedures.md` +- Coord message id: `4b2bb6a9-881b-4003-984c-687183b96802` +- Cascades wiki: `wiki/clients/cascades-tucson.md`