From 96fb4110ea433f1c3d2a1027e24a52f3ee87a420 Mon Sep 17 00:00:00 2001 From: Mike Swanson Date: Mon, 1 Jun 2026 14:29:07 -0700 Subject: [PATCH] Add b2 skill: Backblaze B2 management CLI (storage cost, prefix purge) B2 Native API v3 client for the ACG B2 account: status, buckets, keys, files, bucket-size, usage/cost ($0.00695/GB), gated create/delete bucket+key, and gated lifecycle-based delete-prefix/lifecycle-remove for prefix purges. Read-only by default; destructive ops require --confirm. Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude/skills/b2/.gitignore | 3 + .claude/skills/b2/SKILL.md | 261 ++++++ .claude/skills/b2/references/api-reference.md | 236 ++++++ .claude/skills/b2/scripts/b2.py | 766 ++++++++++++++++++ .claude/skills/b2/scripts/b2_client.py | 674 +++++++++++++++ .claude/skills/b2/scripts/selftest.py | 200 +++++ .gitignore | 3 + 7 files changed, 2143 insertions(+) create mode 100644 .claude/skills/b2/.gitignore create mode 100644 .claude/skills/b2/SKILL.md create mode 100644 .claude/skills/b2/references/api-reference.md create mode 100644 .claude/skills/b2/scripts/b2.py create mode 100644 .claude/skills/b2/scripts/b2_client.py create mode 100644 .claude/skills/b2/scripts/selftest.py diff --git a/.claude/skills/b2/.gitignore b/.claude/skills/b2/.gitignore new file mode 100644 index 0000000..d2eed78 --- /dev/null +++ b/.claude/skills/b2/.gitignore @@ -0,0 +1,3 @@ +# b2 skill local cache. Holds the live B2 authorization token (a SECRET, valid +# ~24h) plus the per-account apiUrl/accountId. NEVER commit this directory. +.cache/ diff --git a/.claude/skills/b2/SKILL.md b/.claude/skills/b2/SKILL.md new file mode 100644 index 0000000..e4f8c4c --- /dev/null +++ b/.claude/skills/b2/SKILL.md @@ -0,0 +1,261 @@ +--- +name: b2 +description: >- + Manage Arizona Computer Guru's (ACG) Backblaze B2 storage account via the B2 + Native API v3. Talks to the LIVE production B2 account (accountId 46f69bc61163, + region us-west-001) that holds the per-client MSP360/CloudBerry backup + destinations. List buckets and application keys, list files / file versions, + compute per-bucket stored size, and produce the headline storage-cost report + (the mspbackups storage-cost calc). Provision buckets and scoped backup keys + and delete buckets/keys (all destructive ops are gated behind --confirm). + Read-only by default. Invoke for: "backblaze", "b2", "b2 storage", "bucket", + "storage cost", "backup storage", "mspbackups storage", "list buckets b2". +--- + +# Backblaze B2 Skill + +Standalone CLI client for the Backblaze B2 Native API v3. Talks to the live ACG +B2 account. Read-only by default; destructive operations are gated behind +`--confirm`. + +## Running the CLI + +This machine's Python launcher is `py` (per identity.json). The scripts also +work with `python`/`python3`. + +```bash +# from the scripts dir, or pass full paths +py "$CLAUDETOOLS_ROOT/.claude/skills/b2/scripts/b2.py" status +py "$CLAUDETOOLS_ROOT/.claude/skills/b2/scripts/b2.py" buckets +py "$CLAUDETOOLS_ROOT/.claude/skills/b2/scripts/b2.py" usage --json +``` + +Transport auto-selects: uses `httpx` if installed, otherwise stdlib `urllib` +(no third-party dependency required). + +## Credentials + +Credentials are NEVER hardcoded. At runtime the client loads them from the SOPS +vault: + +``` +bash "$CLAUDETOOLS_ROOT/.claude/scripts/vault.sh" \ + get-field projects/claudetools/backblaze-b2.sops.yaml key_id +bash "$CLAUDETOOLS_ROOT/.claude/scripts/vault.sh" \ + get-field projects/claudetools/backblaze-b2.sops.yaml credentials.application_key +``` + +`CLAUDETOOLS_ROOT` resolves from the env var, else `claudetools_root` in +`.claude/identity.json`, else the repo root derived from the script's own +location (no hardcoded drive letters). For testing you can override with the +`B2_KEY_ID` and `B2_APPLICATION_KEY` env vars (both must be set to use the +override). + +Authorization is HTTP Basic against `b2_authorize_account` (username = key id, +password = application key). The skill uses the "ClaudeTools" application key +(`00146f69bc611630000000009`). + +## Cache model (important — the token is a SECRET) + +After a successful authorize, the CLI caches the auth result at +`.claude/skills/b2/.cache/auth.json`: + +- **Cached:** `authorizationToken` (the bearer token, valid ~24h), the + per-account `apiUrl` / `s3ApiUrl` / `downloadUrl`, `accountId`, key + capabilities, and key scope (`bucketId` / `namePrefix`). +- **TTL:** treated as valid for ~23h. The client re-authorizes when the cache is + stale, or automatically on a `401` with `expired_auth_token` / `bad_auth_token` + (exactly one re-authorize + retry per call). +- **This cache holds a live secret.** It is gitignored twice (root `.gitignore` + plus a local `.gitignore` in the skill dir) and must never be committed. + +The per-account `apiUrl` always comes from the live authorize response — there +is no config file for endpoints (project rule). + +## Safety gating + +Destructive subcommands refuse to run without `--confirm`; without it they print +what they would do and exit non-zero (3): + +- `create-bucket [--type allPrivate|allPublic] --confirm` +- `create-key --name --capabilities [--bucket ] [--prefix

] [--duration-seconds N] --confirm` +- `delete-bucket --confirm` (B2 refuses to delete a non-empty bucket — the error is surfaced verbatim) +- `delete-key --confirm` +- `delete-prefix [ ...] --confirm` (schedules an IRREVERSIBLE lifecycle purge — see "Prefix purge / lifecycle" below) +- `lifecycle-remove [ ...] --confirm` (removes lifecycle rules) + +`create-key` prints the returned `applicationKey` exactly ONCE with a warning — +B2 never shows it again, so store it in the vault immediately. + +`raw` refuses any method name containing `create` / `delete` / `update` / `hide` +/ `cancel` unless `--confirm` is passed. `raw` prints the upstream response +verbatim, which may carry sensitive data (keys, tokens) — review before pasting +into tickets/logs. Destructive calls are NEVER retried automatically. + +## Common commands + +```bash +B2="py $CLAUDETOOLS_ROOT/.claude/skills/b2/scripts/b2.py" + +# Status / inventory +$B2 status +$B2 buckets +$B2 keys + +# Files +$B2 files ACG-Internal # latest names +$B2 files ACG-Internal --prefix MBS- --limit 50 +$B2 files ACG-Internal --versions # all versions + +# Size + cost +$B2 bucket-size ACG-Internal +$B2 usage # headline cost report, all buckets +$B2 usage --bucket ACG-Dataforth # one bucket +$B2 cost --json # alias for usage + +# Provisioning (gated) +$B2 create-bucket ACG-NewClient --confirm +$B2 create-key --name acg-newclient-backup \ + --capabilities listBuckets,listFiles,readFiles,writeFiles,deleteFiles \ + --bucket ACG-NewClient --confirm +$B2 delete-bucket ACG-OldClient --confirm +$B2 delete-key 00146f69bc611630000000abc --confirm + +# Lifecycle / prefix purge (see next section) +$B2 lifecycle ACG-Internal # read-only: list current rules +$B2 delete-prefix ACG-Internal "MBS-/CBB_/" --confirm +$B2 lifecycle-remove ACG-Internal "MBS-/CBB_/" --confirm + +# Power use — any v3 method directly +$B2 raw --method b2_list_buckets --body '{"accountId":"46f69bc61163"}' +``` + +## Prefix purge / lifecycle + +Some backup destinations hold **1.2M+ file versions** under a single machine +prefix. Per-file deletion (`b2_delete_file_version` per version) is impractical +at that scale, so this skill purges a prefix via a **B2 bucket lifecycle rule** +instead. B2's own daily lifecycle pass does the deletion server-side. + +### How the purge mechanism works + +A purge rule is: + +```json +{ "fileNamePrefix": "MBS-/CBB_/", + "daysFromUploadingToHiding": 1, + "daysFromHidingToDeleting": 1 } +``` + +With both day-counts at `1`, B2's daily lifecycle pass **hides** any file more +than 1 day old, then **deletes** hidden files more than 1 day old. Every version +under the prefix (all are years old) becomes eligible immediately, so the prefix +is fully purged within **~24-48h** (two daily passes). This is **irreversible**: +there is no recycle bin and no undo once the pass deletes a version. + +`fileNamePrefix: ""` means the WHOLE BUCKET — the skill refuses that and any +other too-broad prefix (see safety below). + +### Lifecycle rules REPLACE, they don't append + +`b2_update_bucket`'s `lifecycleRules` **replaces the entire array** — it is not +additive. The skill therefore always reads the current rules (`b2_list_buckets` +scoped to the bucketId), merges the change, and writes the full set back. This +read-merge-write of the complete array is what preserves pre-existing rules. + +**This account's `b2_update_bucket` does NOT support `ifRevisionMatch`** (it +returns HTTP 400 `bad_request: unknown field ... ifRevisionMatch`), so the skill +sends no optimistic-lock token and writes are **last-write-wins**. There is no +409-conflict retry (no revision guard to violate); the skill makes at most one +defensive retry on a transient 5xx, then fails — it never loops. The bucket +`revision` is still read and displayed for reference, but is never sent back. +B2 allows up to **100** lifecycle rules per bucket; the skill refuses an update +that would exceed that. + +### Commands + +```bash +B2="py $CLAUDETOOLS_ROOT/.claude/skills/b2/scripts/b2.py" + +# 1. READ-ONLY: see the bucket's current lifecycle rules + revision +$B2 lifecycle ACG-Internal +$B2 lifecycle ACG-Internal --json + +# 2. DRY-RUN: what WOULD be scheduled (no --confirm -> exits 3, writes nothing) +$B2 delete-prefix ACG-Internal "MBS-/CBB_/" + +# 3. COMMIT the purge (gated). Idempotent: an identical rule already present +# is skipped. You can pass several prefixes at once. +$B2 delete-prefix ACG-Internal \ + "MBS-/CBB_OLDPC1/" \ + "MBS-/CBB_OLDPC2/" --confirm + +# 4. After ~24-48h, verify the data is gone (size should have dropped) +$B2 bucket-size ACG-Internal + +# 5. Clean up the now-spent purge rule(s) so they don't sit on the bucket +$B2 lifecycle-remove ACG-Internal \ + "MBS-/CBB_OLDPC1/" \ + "MBS-/CBB_OLDPC2/" --confirm +``` + +### Safety validations (hard-fail even with --confirm) + +`delete-prefix` refuses (exit 2) any prefix that is: + +- empty, `"/"`, `"*"`, or **contains no `/`** — too broad (would risk a + whole-bucket or whole-account-tree purge); +- exactly an **account root** `MBS-/` (one path segment + trailing slash) — + this would purge ALL machines under that account. Override only with the extra + `--allow-account-root` flag (NOT used by default — purge machine-level `CBB_` + prefixes instead). + +A valid machine target looks like `MBS-/CBB_/`. The command also +**warns** (does not fail) when a prefix lacks a trailing `/`, since a slash-less +prefix can match a sibling whose name merely starts with the same characters. + +Without `--confirm`, `delete-prefix` prints the exact rule(s) it WOULD add and a +prominent `[WARNING]` about irreversible deletion, then exits non-zero (3) — +matching the other gated commands. `lifecycle-remove` is similarly gated +(removing a rule is far less dangerous than adding one, but gated for +consistency); without `--confirm` it lists which existing rules it would remove +and exits 3. + +### Cleanup workflow + +`delete-prefix` -> wait ~24-48h for B2's lifecycle pass -> `bucket-size` to +verify the stored size dropped -> `lifecycle-remove` to strip the spent purge +rule. Leaving the rule in place is harmless (everything under the prefix is +already gone) but tidy buckets are easier to reason about. + +## Storage cost + +ACG's Backblaze B2 cost basis is **$0.00695 per GB** stored, defined as +`RATE_PER_GB_USD` in `scripts/b2_client.py` (recorded in +`.claude/memory/reference_backblaze_storage_rate.md`) and used by the GuruRMM +mspbackups storage-cost calc. Override at the CLI with `--rate`. + +GB is **decimal** (`bytes / 1e9`), matching how storage providers bill — NOT +2^30. Cost = `GB * rate`. + +Size MUST be summed over **all file versions** (`b2_list_file_versions`, +`action == "upload"`), not just the latest names, because B2 bills every stored +version. `bucket-size` and `usage` do this automatically. For very large buckets +this issues many list transactions — `usage` prints a `[WARNING]` to that effect. + +## Account structure (for context) + +- accountId `46f69bc61163`, region `us-west-001`. +- 12 buckets (all `allPrivate`), mostly per-client MSP360/CloudBerry backup + destinations with keys like `MBS-/CBB_/...`: ACG-BST, ACG-Brett, + ACG-Dataforth, ACG-GLAZTECH, ACG-IX, ACG-Internal, ACG-Lens, ACG-PST, + ACG-REDNOUR, Horseshoe, MSPBackups20200311, VWP-Backup. `usage` derives a + client label by stripping the `ACG-` prefix. +- 2 application keys: `cloudberrykey` (the MSP360/CloudBerry key) and + `ClaudeTools` (the key this skill uses). + +## Reference + +Full verified v3 auth flow, every method used with request/response shape, the +file-version billing note, pagination, the error shape, and the cost formula: +`references/api-reference.md`. diff --git a/.claude/skills/b2/references/api-reference.md b/.claude/skills/b2/references/api-reference.md new file mode 100644 index 0000000..5a901e3 --- /dev/null +++ b/.claude/skills/b2/references/api-reference.md @@ -0,0 +1,236 @@ +# Backblaze B2 Native API v3 Reference + +Verified spec for the methods used by this skill, against the live ACG B2 +account (accountId `46f69bc61163`, region `us-west-001`). All facts below were +confirmed against the live API. + +--- + +## Authorization (v3) + +- **Authorize URL (fixed, global):** + `https://api.backblazeb2.com/b2api/v3/b2_authorize_account` +- **Auth:** HTTP Basic. Username = key id, password = application key. +- **Method:** GET in the B2 docs; this skill issues it as an HTTPS POST with the + Basic header, which the endpoint accepts. + +### Authorize response (v3 shape) + +```json +{ + "accountId": "46f69bc61163", + "authorizationToken": "", + "apiInfo": { + "storageApi": { + "apiUrl": "https://api001.backblazeb2.com", + "s3ApiUrl": "https://s3.us-west-001.backblazeb2.com", + "downloadUrl": "https://f001.backblazeb2.com", + "recommendedPartSize": 100000000, + "absoluteMinimumPartSize": 5000000, + "capabilities": ["listBuckets", "listFiles", "..."], + "bucketId": null, + "namePrefix": null + } + } +} +``` + +- **v3 nesting:** `apiUrl` / `s3ApiUrl` / `downloadUrl` / `capabilities` / + `bucketId` / `namePrefix` live under `apiInfo.storageApi`. (v2 returned + `apiUrl` / `downloadUrl` at the top level — do NOT parse them there for v3.) +- `bucketId == null` and `namePrefix == null` means an **account-wide** key; + non-null means the key is scoped to one bucket / name prefix. +- The per-account `apiUrl` is dynamic and comes ONLY from this response — never + from a config file. + +### Cache model + +The skill caches `authorizationToken` + `apiUrl` + `accountId` (plus the other +fields) in `.claude/skills/b2/.cache/auth.json` with an `authorized_at` +timestamp. It is treated as valid for ~23h (B2 tokens last 24h). A `401` with +code `expired_auth_token` or `bad_auth_token` triggers exactly one re-authorize ++ retry. **The cached token is a secret; the cache dir is gitignored.** + +--- + +## Subsequent calls + +- **URL:** `POST /b2api/v3/` +- **Header:** `Authorization: ` (raw token, not Basic). +- **Body:** a JSON object. +- **Content-Type:** `application/json`. + +### Error shape + +B2 returns HTTP 4xx with a JSON body: + +```json +{ "status": 400, "code": "", "message": "" } +``` + +The skill surfaces `status` / `code` / `message` verbatim. On `401` with +`expired_auth_token` / `bad_auth_token` it re-authorizes once and retries; all +other errors raise. Destructive calls are never retried automatically. + +--- + +## Methods used + +All POST to `/b2api/v3/`. + +### b2_list_buckets + +- **Body:** `{"accountId": "", "bucketId": }` +- **Returns:** `{"buckets": [{bucketName, bucketId, bucketType, revision, + fileLockConfiguration:{isFileLockEnabled}, lifecycleRules, options}]}` +- Pass `bucketId` to scope the result to a single bucket. +- **`revision`** (int) is the bucket's optimistic-concurrency version. NOTE: + this account's `b2_update_bucket` does NOT accept `ifRevisionMatch`, so this + value is informational only — it is read but never sent back on a write. +- **`lifecycleRules`** (array) is the bucket's current lifecycle rule set (see + the rule shape under `b2_update_bucket`). An empty array means no rules. + +### b2_list_keys + +- **Body:** `{"accountId": "", "maxKeyCount": 1000, + "startApplicationKeyId": }` +- **Returns:** `{"keys": [{keyName, applicationKeyId, capabilities:[...], + bucketId, namePrefix, expirationTimestamp}], "nextApplicationKeyId": }` +- Paginate on `nextApplicationKeyId`. + +### b2_list_file_names + +- **Body:** `{"bucketId": "", "maxFileCount": 10000, "prefix": , + "startFileName": }` +- **Returns:** `{"files": [{fileName, contentLength, action, uploadTimestamp, + ...}], "nextFileName": }` +- Lists the **latest** name for each file. Use for quick listings; NOT for + size/cost. + +### b2_list_file_versions + +- **Body:** `{"bucketId": "", "maxFileCount": 10000, "prefix": , + "startFileName": , "startFileId": }` +- **Returns:** `{"files": [{fileName, fileId, contentLength, action, ...}], + "nextFileName": , "nextFileId": ...}` +- **Pagination:** carry BOTH `nextFileName` AND `nextFileId` into the next + request; stop when `nextFileName` is null. + +#### action types + +| action | meaning | billed? | +|---|---|---| +| `upload` | a real stored object | YES | +| `hide` | a hide marker (`contentLength` 0) | no | +| `start` | an unfinished large file | no | +| `folder` | a virtual folder placeholder | no | + +**For size/cost, sum `contentLength` over ALL versions where +`action == "upload"`.** B2 bills every stored version, not just the latest, so +size MUST use `b2_list_file_versions`, never `b2_list_file_names`. + +### b2_create_bucket (destructive — gated) + +- **Body:** `{"accountId": "", "bucketName": "", "bucketType": + "allPrivate"}` +- `bucketType` defaults to `allPrivate`; the CLI allows `allPublic` via `--type`. + +### b2_create_key (destructive — gated) + +- **Body:** `{"accountId": "", "keyName": "", + "capabilities": [...], "bucketId": , "namePrefix": , + "validDurationInSeconds": }` +- **Returns:** `{applicationKeyId, applicationKey, keyName, capabilities, + bucketId, namePrefix}`. +- **`applicationKey` is the SECRET and is shown ONCE on creation** — it cannot be + retrieved later. The CLI prints it with a prominent warning to store it in the + vault immediately. +- `bucketId` scopes the key to one bucket (the CLI resolves a bucket name to its + id via `b2_list_buckets`). + +### b2_delete_bucket (destructive — gated) + +- **Body:** `{"accountId": "", "bucketId": ""}` +- B2 **refuses to delete a non-empty bucket**; that error is surfaced verbatim. + +### b2_delete_key (destructive — gated) + +- **Body:** `{"applicationKeyId": ""}` + +### b2_update_bucket (destructive — gated) + +Used for the prefix-purge feature: it writes the bucket's lifecycle rules. + +- **Body:** `{"accountId": "", "bucketId": "", + "lifecycleRules": [...]}` +- **Returns:** the updated bucket object (same shape as a `b2_list_buckets` + entry, including the NEW `revision` and the written `lifecycleRules`). +- **`lifecycleRules` REPLACES the entire array — it is NOT additive (caveat).** + To add/remove a single rule you MUST first read the current rules via + `b2_list_buckets` (scoped to the bucketId), merge your change into the full + array, then write the complete set back. Writing just the one rule you want + silently drops every other rule. +- **No `ifRevisionMatch` on this account/endpoint.** This B2 account's + `b2_update_bucket` REJECTS `ifRevisionMatch` with **HTTP 400** `bad_request` + ("unknown field ... ifRevisionMatch"), so no optimistic-lock token is sent. + Writes are therefore **last-write-wins** — the read-merge-write of the full + rules array on every change is what keeps pre-existing rules intact. The skill + does not implement a 409-conflict retry (there is no revision guard); it makes + at most one defensive retry on a transient 5xx, never loops. +- B2 allows up to **100** lifecycle rules per bucket. + +#### Lifecycle rule shape + +```json +{ "fileNamePrefix": "MBS-/CBB_/", + "daysFromUploadingToHiding": 1, + "daysFromHidingToDeleting": 1 } +``` + +- `fileNamePrefix` — files whose name starts with this string are governed by the + rule. `""` means the WHOLE BUCKET. +- `daysFromUploadingToHiding` — after this many days, B2's daily lifecycle pass + HIDES the file (creates a hide marker). +- `daysFromHidingToDeleting` — after this many days hidden, B2 DELETES the file + version permanently. +- With both `= 1`, every version older than ~1 day is hidden then deleted on the + next daily passes -> full server-side purge of the prefix within ~24-48h. This + is the mechanism the skill uses to purge prefixes with 1.2M+ versions, where + per-file `b2_delete_file_version` would be impractical. **Irreversible — no + recycle bin.** + +### b2_get_file_info / b2_delete_file_version + +- `b2_get_file_info` body: `{"fileId": ""}` (read). +- `b2_delete_file_version` body: `{"fileName": "", "fileId": ""}` + (destructive; exposed only via `raw --confirm` or the client helper). + +--- + +## Cost formula + +``` +GB = bytes / 1_000_000_000 # decimal GB (billing unit, NOT 2^30) +cost = GB * RATE_PER_GB_USD # RATE_PER_GB_USD = 0.00695 (ACG cost basis) +``` + +`RATE_PER_GB_USD` is ACG's cost basis, recorded in +`.claude/memory/reference_backblaze_storage_rate.md`, and used by the GuruRMM +mspbackups storage-cost calc. Override with `--rate`. The `usage` report sums +upload-version bytes per bucket, converts to decimal GB, multiplies by the rate, +sorts by cost desc, and prints a TOTAL row plus grand totals. For `ACG-*` +buckets it derives a client label by stripping the `ACG-` prefix. + +--- + +## Account structure (live) + +- accountId `46f69bc61163`, region `us-west-001`. +- 12 buckets (all `allPrivate`): ACG-BST, ACG-Brett, ACG-Dataforth, ACG-GLAZTECH, + ACG-IX, ACG-Internal, ACG-Lens, ACG-PST, ACG-REDNOUR, Horseshoe, + MSPBackups20200311, VWP-Backup. Most hold MSP360/CloudBerry data + (`MBS-/CBB_/...`). +- 2 application keys: `cloudberrykey` (`00146f69bc611630000000003`, the + MSP360/CloudBerry key) and `ClaudeTools` (`00146f69bc611630000000009`, used by + this skill). diff --git a/.claude/skills/b2/scripts/b2.py b/.claude/skills/b2/scripts/b2.py new file mode 100644 index 0000000..26514b7 --- /dev/null +++ b/.claude/skills/b2/scripts/b2.py @@ -0,0 +1,766 @@ +#!/usr/bin/env python3 +"""CLI for the b2 skill — Backblaze B2 Native API v3 (ACG production account). + +Read-only subcommands run freely. Destructive subcommands (create-bucket, +create-key, delete-bucket, delete-key) refuse to run unless --confirm is passed; +without it they print what they WOULD do and exit non-zero (3). + +Output: --json emits raw JSON; otherwise a readable table/summary. + +Usage examples: + python b2.py status + python b2.py buckets + python b2.py buckets --json + python b2.py keys + python b2.py files ACG-Internal --prefix MBS- --limit 50 + python b2.py files ACG-Internal --versions + python b2.py bucket-size ACG-Internal + python b2.py usage + python b2.py usage --bucket ACG-Dataforth + python b2.py cost --json + python b2.py create-bucket NewBucket --confirm + python b2.py create-key --name client-backup --capabilities listFiles,readFiles \\ + --bucket ACG-Internal --confirm + python b2.py delete-bucket OldBucket --confirm + python b2.py delete-key 00146f69bc611630000000abc --confirm + python b2.py lifecycle ACG-Internal + python b2.py delete-prefix ACG-Internal "MBS-/CBB_/" --confirm + python b2.py lifecycle-remove ACG-Internal "MBS-/CBB_/" --confirm + python b2.py raw --method b2_list_buckets --body '{"accountId":""}' +""" +from __future__ import annotations + +import argparse +import json +import sys +from typing import Optional + +from b2_client import B2Client, B2Error, RATE_PER_GB_USD, BYTES_PER_GB, BYTES_PER_GIB + + +def _emit(obj, as_json: bool, table_fn=None) -> None: + if as_json or table_fn is None: + print(json.dumps(obj, indent=2, default=str)) + else: + table_fn(obj) + + +def _client_label(bucket_name: str) -> str: + """Derive a client label: strip the 'ACG-' prefix; leave others as-is.""" + if bucket_name.startswith("ACG-"): + return bucket_name[len("ACG-"):] + return bucket_name + + +def _fmt_usd(amount: float) -> str: + return f"${amount:,.4f}" + + +# --- table renderers ---------------------------------------------------------- +def _print_status(info: dict) -> None: + caps = info.get("capabilities") or [] + scope = "account-wide" if not info.get("bucketId") else f"bucket {info['bucketId']}" + print("[INFO] Backblaze B2 authorization") + print(f" accountId: {info.get('accountId')}") + print(f" apiUrl: {info.get('apiUrl')}") + print(f" s3ApiUrl: {info.get('s3ApiUrl')}") + print(f" downloadUrl: {info.get('downloadUrl')}") + print(f" key scope: {scope}") + if info.get("namePrefix"): + print(f" namePrefix: {info.get('namePrefix')}") + print(f" capabilities: {len(caps)} -> {', '.join(sorted(caps))}") + print(f" authorized_at: {info.get('authorized_at')}") + + +def _print_buckets(buckets: list) -> None: + print(f"Buckets: {len(buckets)}") + print(f" {'NAME':28} {'TYPE':12} {'LOCK':5} BUCKET-ID") + for b in sorted(buckets, key=lambda x: x.get("bucketName", "")): + lock = (b.get("fileLockConfiguration") or {}).get("isFileLockEnabled") + lock_str = "yes" if lock else "no" + print(f" {b.get('bucketName',''):28} {b.get('bucketType',''):12} " + f"{lock_str:5} {b.get('bucketId','')}") + + +def _print_keys(keys: list) -> None: + print(f"Application keys: {len(keys)}") + print(f" {'NAME':22} {'APP-KEY-ID':28} {'SCOPE':18} {'CAPS':4} {'PREFIX':12} EXPIRES") + for k in keys: + scope = k.get("bucketId") or "account-wide" + caps = k.get("capabilities") or [] + prefix = k.get("namePrefix") or "-" + exp = k.get("expirationTimestamp") + exp_str = str(exp) if exp else "never" + print(f" {k.get('keyName',''):22} {k.get('applicationKeyId',''):28} " + f"{scope:18} {len(caps):<4} {prefix:12} {exp_str}") + + +def _print_files(files: list) -> None: + print(f"Files: {len(files)}") + print(f" {'ACTION':8} {'SIZE':>14} {'UPLOADED':>16} NAME") + for f in files: + size = f.get("contentLength", 0) or 0 + ts = f.get("uploadTimestamp", "") + print(f" {str(f.get('action','')):8} {size:>14,} {str(ts):>16} " + f"{f.get('fileName','')}") + + +def _print_bucket_size(data: dict) -> None: + print(f"Bucket: {data['bucketName']}") + print(f" stored bytes: {data['bytes']:,}") + print(f" stored GB (1e9): {data['gb']:.4f}") + print(f" stored GiB: {data['gib']:.4f}") + print(f" distinct files: {data['file_count']:,}") + print(f" upload versions: {data['version_count']:,}") + print(f" versions seen: {data['total_versions_seen']:,} " + "(includes hide/start/folder markers)") + + +def _print_usage(report: dict) -> None: + rows = report["buckets"] + rate = report["rate"] + print(f"[INFO] Storage cost report (rate = ${rate:.5f} / GB, " + "GB = bytes / 1e9, all versions counted)") + print(f"[WARNING] Sized via b2_list_file_versions across all versions; " + "large buckets may issue many list transactions.") + print() + print(f" {'CLIENT/BUCKET':24} {'BYTES':>16} {'GB':>12} {'COST':>14}") + print(f" {'-'*24} {'-'*16} {'-'*12} {'-'*14}") + for r in rows: + print(f" {r['label']:24} {r['bytes']:>16,} {r['gb']:>12.4f} " + f"{_fmt_usd(r['cost']):>14}") + print(f" {'-'*24} {'-'*16} {'-'*12} {'-'*14}") + print(f" {'TOTAL':24} {report['total_bytes']:>16,} " + f"{report['total_gb']:>12.4f} {_fmt_usd(report['total_cost']):>14}") + + +def _print_lifecycle(data: dict) -> None: + rules = data.get("lifecycleRules") or [] + print(f"Bucket: {data.get('bucketName')} (revision {data.get('revision')})") + print(f"Lifecycle rules: {len(rules)}") + if not rules: + print(" (none - files are kept until explicitly deleted)") + return + print(f" {'FILE-NAME-PREFIX':40} {'HIDE@days':>9} {'DELETE@days':>11}") + print(f" {'-'*40} {'-'*9} {'-'*11}") + for r in rules: + prefix = r.get("fileNamePrefix", "") + prefix_disp = "(whole bucket)" if prefix == "" else prefix + hide = r.get("daysFromUploadingToHiding") + delete = r.get("daysFromHidingToDeleting") + hide_disp = "-" if hide is None else str(hide) + delete_disp = "-" if delete is None else str(delete) + print(f" {prefix_disp:40} {hide_disp:>9} {delete_disp:>11}") + + +# --- command handlers --------------------------------------------------------- +def cmd_status(client, args): + _emit(client.auth_info, args.json, _print_status) + return 0 + + +def cmd_buckets(client, args): + _emit(client.list_buckets(), args.json, _print_buckets) + return 0 + + +def cmd_keys(client, args): + _emit(client.list_keys(), args.json, _print_keys) + return 0 + + +def cmd_files(client, args): + bucket = client.resolve_bucket(args.bucket_name) + bucket_id = bucket["bucketId"] + if args.versions: + files = client.list_file_versions( + bucket_id, prefix=args.prefix, limit=args.limit + ) + else: + files = client.list_file_names( + bucket_id, prefix=args.prefix, limit=args.limit + ) + _emit(files, args.json, _print_files) + return 0 + + +def cmd_bucket_size(client, args): + bucket = client.resolve_bucket(args.bucket_name) + if not args.json: + print(f"[INFO] Listing all versions in '{args.bucket_name}' " + "(may take a while for large buckets)...", file=sys.stderr) + data = client.bucket_size(bucket["bucketId"]) + data["bucketName"] = args.bucket_name + _emit(data, args.json, _print_bucket_size) + return 0 + + +def cmd_usage(client, args): + rate = args.rate + buckets = client.list_buckets() + if args.bucket: + buckets = [b for b in buckets if b.get("bucketName") == args.bucket] + if not buckets: + print(f"[ERROR] No bucket named '{args.bucket}'.", file=sys.stderr) + return 1 + + if not args.json: + print(f"[INFO] Computing storage cost across {len(buckets)} bucket(s); " + "this lists every version and may issue many list transactions...", + file=sys.stderr) + + rows = [] + total_bytes = 0 + for b in buckets: + size = client.bucket_size(b["bucketId"]) + cost = size["gb"] * rate + total_bytes += size["bytes"] + rows.append({ + "bucket": b.get("bucketName", ""), + "label": _client_label(b.get("bucketName", "")), + "bytes": size["bytes"], + "gb": size["gb"], + "gib": size["gib"], + "version_count": size["version_count"], + "file_count": size["file_count"], + "cost": cost, + }) + + rows.sort(key=lambda r: r["cost"], reverse=True) + total_gb = total_bytes / BYTES_PER_GB + report = { + "rate": rate, + "buckets": rows, + "total_bytes": total_bytes, + "total_gb": total_gb, + "total_gib": total_bytes / BYTES_PER_GIB, + "total_cost": total_gb * rate, + } + _emit(report, args.json, _print_usage) + return 0 + + +# --- lifecycle (prefix purge) ------------------------------------------------- +# A "purge" rule hides any file > 1 day after upload, then deletes hidden files +# > 1 day later — so B2's daily lifecycle pass removes EVERY version under the +# prefix within ~24-48h. All current targets predate today by years, so they are +# eligible immediately on the next pass. There is no recycle bin: this is +# irreversible server-side deletion. +PURGE_DAYS_FROM_UPLOAD_TO_HIDE = 1 +PURGE_DAYS_FROM_HIDE_TO_DELETE = 1 + + +def _purge_rule(prefix: str) -> dict: + """Build the canonical 1/1-day purge lifecycle rule for a prefix.""" + return { + "fileNamePrefix": prefix, + "daysFromUploadingToHiding": PURGE_DAYS_FROM_UPLOAD_TO_HIDE, + "daysFromHidingToDeleting": PURGE_DAYS_FROM_HIDE_TO_DELETE, + } + + +def _rule_matches_prefix(rule: dict, prefix: str) -> bool: + """A lifecycle rule targets `prefix` iff its fileNamePrefix equals it exactly.""" + return rule.get("fileNamePrefix", "") == prefix + + +def _is_purge_rule(rule: dict, prefix: str) -> bool: + """True if `rule` is an identical 1/1-day purge rule for `prefix`.""" + return ( + _rule_matches_prefix(rule, prefix) + and rule.get("daysFromUploadingToHiding") == PURGE_DAYS_FROM_UPLOAD_TO_HIDE + and rule.get("daysFromHidingToDeleting") == PURGE_DAYS_FROM_HIDE_TO_DELETE + ) + + +def _validate_purge_prefix(prefix: str, allow_account_root: bool) -> Optional[str]: + """Return an error string if `prefix` is too broad to purge; else None. + + Hard-fail rules (apply even with --confirm): + * empty / "/" / "*" / no "/" at all -> too broad (whole-bucket / whole-account) + * exactly a bucket root "MBS-/" -> account-level, requires + --allow-account-root (off by default; we only purge CBB_ machine prefixes) + A valid machine target looks like "MBS-/CBB_/". + """ + if prefix in ("", "/", "*"): + return (f"prefix {prefix!r} is too broad — it would purge the whole " + "bucket or account. Refusing.") + if "/" not in prefix: + return (f"prefix {prefix!r} contains no '/'; a top-level prefix can match " + "an entire MBS- account tree. Refusing. A valid target looks " + "like 'MBS-/CBB_/'.") + # Account root looks like "MBS-/" with exactly one trailing slash + # and no further path segment (i.e. the only '/' is the terminal one). + stripped = prefix[:-1] if prefix.endswith("/") else prefix + if "/" not in stripped: + # Single segment followed by a slash, e.g. "MBS-/": account-level. + if not allow_account_root: + return (f"prefix {prefix!r} is an account root (MBS-/); purging " + "it removes ALL machines under that account. Pass " + "--allow-account-root to override (NOT recommended — purge " + "machine-level CBB_ prefixes instead).") + return None + + +def _purge_prefix_warning(prefix: str) -> str: + return ( + f"[WARNING] Scheduling IRREVERSIBLE server-side deletion of ALL versions " + f"under '{prefix}'. B2's daily lifecycle pass will hide files >1 day old " + f"then delete hidden files >1 day old, fully purging the prefix within " + f"~24-48h. There is NO recycle bin and NO undo." + ) + + +def cmd_lifecycle(client, args): + """READ-ONLY: list a bucket's current lifecycle rules.""" + bucket = client.resolve_bucket(args.bucket_name) + full = client.get_bucket_with_revision(bucket["bucketId"]) + data = { + "bucketName": args.bucket_name, + "bucketId": full.get("bucketId"), + "revision": full.get("revision"), + "lifecycleRules": full.get("lifecycleRules") or [], + } + _emit(data, args.json, _print_lifecycle) + return 0 + + +def cmd_delete_prefix(client, args): + """GATED, DESTRUCTIVE: add a 1/1-day purge lifecycle rule per prefix.""" + bucket = client.resolve_bucket(args.bucket_name) + bucket_id = bucket["bucketId"] + + # Hard-fail validation first — applies even with --confirm. + for prefix in args.prefixes: + err = _validate_purge_prefix(prefix, args.allow_account_root) + if err: + print(f"[ERROR] {err}", file=sys.stderr) + return 2 + + # Soft warning: recommend a trailing slash so the prefix can't match a + # sibling whose name merely starts with these characters. + for prefix in args.prefixes: + if not prefix.endswith("/"): + print(f"[WARNING] prefix {prefix!r} does not end with '/'; it will " + "also match any file whose name merely starts with it.", + file=sys.stderr) + + if not args.confirm: + print("[WARNING] Refusing destructive action without --confirm.") + for prefix in args.prefixes: + print(_purge_prefix_warning(prefix)) + print(f"[INFO] Would add purge rule: fileNamePrefix={prefix!r}, " + f"daysFromUploadingToHiding={PURGE_DAYS_FROM_UPLOAD_TO_HIDE}, " + f"daysFromHidingToDeleting={PURGE_DAYS_FROM_HIDE_TO_DELETE}") + return 3 + + result = _apply_lifecycle_change( + client, bucket_id, args.prefixes, add=True, json_out=args.json + ) + return result + + +def cmd_lifecycle_remove(client, args): + """GATED: remove lifecycle rule(s) whose fileNamePrefix matches the prefix(es).""" + bucket = client.resolve_bucket(args.bucket_name) + bucket_id = bucket["bucketId"] + + if not args.confirm: + # Read so we can show exactly which existing rules would be removed. + full = client.get_bucket_with_revision(bucket_id) + existing = full.get("lifecycleRules") or [] + print("[WARNING] Refusing to modify lifecycle rules without --confirm.") + any_match = False + for prefix in args.prefixes: + matches = [r for r in existing if _rule_matches_prefix(r, prefix)] + if matches: + any_match = True + for r in matches: + print(f"[INFO] Would remove rule: fileNamePrefix={prefix!r} " + f"(daysFromUploadingToHiding=" + f"{r.get('daysFromUploadingToHiding')}, " + f"daysFromHidingToDeleting=" + f"{r.get('daysFromHidingToDeleting')})") + else: + print(f"[INFO] No lifecycle rule matches prefix {prefix!r} " + "(nothing to remove).") + if not any_match: + print("[INFO] No matching rules; this would be a no-op.") + return 3 + + result = _apply_lifecycle_change( + client, bucket_id, args.prefixes, add=False, json_out=args.json + ) + return result + + +def _apply_lifecycle_change(client, bucket_id, prefixes, *, add, json_out): + """Read the current rules, merge (add) or filter (remove), then write back. + + Read -> merge -> write the COMPLETE rules array. This account's + b2_update_bucket does not accept an `ifRevisionMatch` optimistic-lock token, + so writes are last-write-wins; merging onto the freshly-read set is what + preserves pre-existing rules. The read still returns the bucket `revision`, + but it is informational only and is not sent back. Returns a CLI exit code. + """ + full = client.get_bucket_with_revision(bucket_id) + existing = list(full.get("lifecycleRules") or []) + + if add: + merged = list(existing) + added, skipped = [], [] + for prefix in prefixes: + if any(_is_purge_rule(r, prefix) for r in existing): + skipped.append(prefix) + continue + # Replace any non-purge rule on the same prefix rather than + # stacking two rules for one prefix. + merged = [r for r in merged if not _rule_matches_prefix(r, prefix)] + merged.append(_purge_rule(prefix)) + added.append(prefix) + if not added: + _emit({"bucketId": bucket_id, "added": [], "skipped": skipped, + "lifecycleRules": existing}, json_out, + lambda o: print("[OK] All requested purge rules already " + f"present; nothing to do. (skipped: " + f"{', '.join(skipped) or 'none'})")) + return 0 + change_summary = {"added": added, "skipped": skipped} + else: + to_remove = [p for p in prefixes + if any(_rule_matches_prefix(r, p) for r in existing)] + merged = [r for r in existing + if not any(_rule_matches_prefix(r, p) for p in prefixes)] + if not to_remove: + _emit({"bucketId": bucket_id, "removed": [], + "lifecycleRules": existing}, json_out, + lambda o: print("[OK] No matching lifecycle rules to " + "remove; nothing to do.")) + return 0 + change_summary = {"removed": to_remove} + + if len(merged) > 100: + print(f"[ERROR] Resulting rule count {len(merged)} exceeds B2's " + "limit of 100 lifecycle rules per bucket.", file=sys.stderr) + return 1 + + try: + updated = client.update_bucket_lifecycle(bucket_id, merged) + except B2Error as exc: + # No optimistic-lock retry path here (no ifRevisionMatch). Make a single + # defensive retry on a transient/server-side hiccup, then give up — never + # loop. A 4xx (e.g. bad_request) is a request problem, not transient, so + # re-raise it immediately. + if exc.status is not None and 500 <= exc.status < 600: + print(f"[WARNING] b2_update_bucket transient failure (HTTP " + f"{exc.status}); retrying once...", file=sys.stderr) + updated = client.update_bucket_lifecycle(bucket_id, merged) + else: + raise + + out = { + "bucketId": bucket_id, + "newRevision": updated.get("revision"), + "lifecycleRules": updated.get("lifecycleRules") or merged, + } + out.update(change_summary) + + def _render(_o): + if add: + for p in change_summary["added"]: + print(f"[OK] Added purge rule for {p!r} " + f"({PURGE_DAYS_FROM_UPLOAD_TO_HIDE}/" + f"{PURGE_DAYS_FROM_HIDE_TO_DELETE} days).") + for p in change_summary["skipped"]: + print(f"[INFO] Purge rule for {p!r} already present; skipped.") + print(_purge_prefix_warning( + ", ".join(change_summary["added"]))) + else: + for p in change_summary["removed"]: + print(f"[OK] Removed lifecycle rule(s) for {p!r}.") + print(f"[INFO] Bucket now has " + f"{len(updated.get('lifecycleRules') or merged)} rule(s); " + f"revision {updated.get('revision')}.") + + _emit(out, json_out, _render) + return 0 + + +# --- gating helper ------------------------------------------------------------ +def _gated(action_desc: str, confirm: bool) -> bool: + if not confirm: + print("[WARNING] Refusing destructive action without --confirm.") + print(f"[INFO] Would: {action_desc}") + return False + return True + + +def cmd_create_bucket(client, args): + if not _gated(f"create {args.type} bucket '{args.name}'", args.confirm): + return 3 + result = client.create_bucket(args.name, bucket_type=args.type) + _emit({"createdBucket": args.name, "result": result}, args.json, + lambda o: print(f"[OK] Created bucket '{args.name}' " + f"(id {result.get('bucketId')}, type {result.get('bucketType')}).")) + return 0 + + +def cmd_create_key(client, args): + caps = [c.strip() for c in args.capabilities.split(",") if c.strip()] + if not caps: + print("[ERROR] --capabilities must list at least one capability.", + file=sys.stderr) + return 2 + bucket_id = None + if args.bucket: + bucket_id = client.resolve_bucket(args.bucket)["bucketId"] + scope = f"bucket '{args.bucket}'" if args.bucket else "account-wide" + desc = (f"create key '{args.name}' scoped {scope} with capabilities " + f"{','.join(caps)}") + if not _gated(desc, args.confirm): + return 3 + result = client.create_key( + key_name=args.name, + capabilities=caps, + bucket_id=bucket_id, + name_prefix=args.prefix, + valid_duration_seconds=args.duration_seconds, + ) + if args.json: + # Surface the one-time-key warning on STDERR so piping --json to a file + # still alerts the operator — the applicationKey cannot be retrieved again. + print("[WARNING] store this key in the vault now - it cannot be " + "retrieved again", file=sys.stderr) + print(json.dumps(result, indent=2, default=str)) + else: + print(f"[OK] Created application key '{args.name}'.") + print(f" applicationKeyId: {result.get('applicationKeyId')}") + print(f" capabilities: {', '.join(result.get('capabilities', []))}") + print(f" scope: " + f"{result.get('bucketId') or 'account-wide'}") + if result.get("namePrefix"): + print(f" namePrefix: {result.get('namePrefix')}") + print() + print("[WARNING] The applicationKey below is shown ONCE and CANNOT be " + "retrieved later. Store it in the SOPS vault immediately:") + print(f" applicationKey: {result.get('applicationKey')}") + return 0 + + +def cmd_delete_bucket(client, args): + bucket = client.resolve_bucket(args.name) + if not _gated(f"delete bucket '{args.name}' (id {bucket['bucketId']})", + args.confirm): + return 3 + result = client.delete_bucket(bucket["bucketId"]) + _emit({"deletedBucket": args.name, "result": result}, args.json, + lambda o: print(f"[OK] Deleted bucket '{args.name}'.")) + return 0 + + +def cmd_delete_key(client, args): + if not _gated(f"delete application key '{args.application_key_id}'", + args.confirm): + return 3 + result = client.delete_key(args.application_key_id) + _emit({"deletedKey": args.application_key_id, "result": result}, args.json, + lambda o: print(f"[OK] Deleted application key " + f"'{args.application_key_id}'.")) + return 0 + + +# Substrings that mark a method as state-changing; `raw` gates these behind +# --confirm (mirror the bitdefender raw gating). Covers the obvious +# delete/create/update/hide/cancel verbs plus the large-file and copy mutators +# whose names don't contain those verbs (b2_copy_file, b2_copy_part, +# b2_start_large_file, b2_upload_file/part, b2_finish_large_file). +DESTRUCTIVE_RAW_PATTERNS = ( + "delete", + "create", + "update", + "hide", + "cancel", + "copy", + "finish", + "upload", + "start", +) + + +def _is_destructive_method(method: str) -> bool: + m = method.lower() + return any(pat in m for pat in DESTRUCTIVE_RAW_PATTERNS) + + +def cmd_raw(client, args): + if _is_destructive_method(args.method) and not args.confirm: + print(f"[WARNING] '{args.method}' looks state-changing; refusing without " + "--confirm.", file=sys.stderr) + return 3 + try: + body = json.loads(args.body) if args.body else {} + except json.JSONDecodeError as exc: + print(f"[ERROR] --body is not valid JSON: {exc}", file=sys.stderr) + return 2 + if not isinstance(body, dict): + print("[ERROR] --body must be a JSON object.", file=sys.stderr) + return 2 + result = client.call(args.method, body) + # Output may carry sensitive data (keys, tokens) — review before reuse. + print(json.dumps(result, indent=2, default=str)) + return 0 + + +# --- parser ------------------------------------------------------------------- +def build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + prog="b2.py", + description="Backblaze B2 Native API v3 CLI (ACG production account).", + ) + common = argparse.ArgumentParser(add_help=False) + common.add_argument("--json", action="store_true", help="Emit raw JSON output.") + sub = p.add_subparsers(dest="command", required=True) + + sub.add_parser("status", help="Authorize and show account/key info.", + parents=[common]) + sub.add_parser("buckets", help="List buckets.", parents=[common]) + sub.add_parser("keys", help="List application keys.", parents=[common]) + + sp = sub.add_parser("files", help="List files in a bucket.", parents=[common]) + sp.add_argument("bucket_name") + sp.add_argument("--prefix", help="Restrict to files under this prefix.") + sp.add_argument("--versions", action="store_true", + help="List ALL versions (default: latest names only).") + sp.add_argument("--limit", type=int, help="Stop after N files.") + + sp = sub.add_parser("bucket-size", + help="Sum stored bytes/GB for one bucket (all versions).", + parents=[common]) + sp.add_argument("bucket_name") + + sp = sub.add_parser("usage", + help="Storage cost across all buckets (headline report).", + parents=[common]) + sp.add_argument("--bucket", help="Scope the report to a single bucket name.") + sp.add_argument("--rate", type=float, default=RATE_PER_GB_USD, + help=f"USD per GB (default {RATE_PER_GB_USD}, ACG cost basis).") + + # alias: cost == usage + sp = sub.add_parser("cost", help="Alias for 'usage'.", parents=[common]) + sp.add_argument("--bucket", help="Scope the report to a single bucket name.") + sp.add_argument("--rate", type=float, default=RATE_PER_GB_USD, + help=f"USD per GB (default {RATE_PER_GB_USD}, ACG cost basis).") + + # gated (destructive) + sp = sub.add_parser("create-bucket", help="Create a bucket (gated).", + parents=[common]) + sp.add_argument("name") + sp.add_argument("--type", default="allPrivate", + choices=["allPrivate", "allPublic"], + help="Bucket type (default allPrivate).") + sp.add_argument("--confirm", action="store_true") + + sp = sub.add_parser("create-key", help="Create an application key (gated).", + parents=[common]) + sp.add_argument("--name", required=True, help="Key name (keyName).") + sp.add_argument("--capabilities", required=True, + help="Comma-separated capability list, e.g. " + "listFiles,readFiles,writeFiles.") + sp.add_argument("--bucket", help="Scope the key to this bucket name " + "(resolves to bucketId).") + sp.add_argument("--prefix", help="Restrict the key to a name prefix.") + sp.add_argument("--duration-seconds", type=int, + help="Optional key lifetime (validDurationInSeconds).") + sp.add_argument("--confirm", action="store_true") + + sp = sub.add_parser("delete-bucket", help="Delete a bucket (gated).", + parents=[common]) + sp.add_argument("name") + sp.add_argument("--confirm", action="store_true") + + sp = sub.add_parser("delete-key", help="Delete an application key (gated).", + parents=[common]) + sp.add_argument("application_key_id") + sp.add_argument("--confirm", action="store_true") + + # lifecycle (read-only) + sp = sub.add_parser("lifecycle", + help="List a bucket's current lifecycle rules (read-only).", + parents=[common]) + sp.add_argument("bucket_name") + + # delete-prefix (gated, destructive) + sp = sub.add_parser( + "delete-prefix", + help="Schedule a server-side purge of everything under a prefix via a " + "1/1-day lifecycle rule (gated, IRREVERSIBLE).", + parents=[common], + ) + sp.add_argument("bucket_name") + sp.add_argument("prefixes", nargs="+", metavar="prefix", + help="One or more file-name prefixes " + "(e.g. 'MBS-/CBB_/').") + sp.add_argument("--allow-account-root", action="store_true", + help="Permit purging an account-level 'MBS-/' root. " + "NOT recommended; off by default.") + sp.add_argument("--confirm", action="store_true") + + # lifecycle-remove (gated) + sp = sub.add_parser( + "lifecycle-remove", + help="Remove lifecycle rule(s) matching a prefix (cleanup after a purge " + "completes; gated).", + parents=[common], + ) + sp.add_argument("bucket_name") + sp.add_argument("prefixes", nargs="+", metavar="prefix", + help="One or more fileNamePrefix values to remove.") + sp.add_argument("--confirm", action="store_true") + + sp = sub.add_parser("raw", help="Call any B2 v3 method directly (power use).", + parents=[common]) + sp.add_argument("--method", required=True, help="e.g. b2_list_buckets.") + sp.add_argument("--body", default="{}", help="JSON object request body.") + sp.add_argument("--confirm", action="store_true", + help="Required for state-changing methods " + "(create/delete/update/hide/cancel). Output may carry " + "sensitive data — review before reuse.") + + return p + + +HANDLERS = { + "status": cmd_status, + "buckets": cmd_buckets, + "keys": cmd_keys, + "files": cmd_files, + "bucket-size": cmd_bucket_size, + "usage": cmd_usage, + "cost": cmd_usage, + "create-bucket": cmd_create_bucket, + "create-key": cmd_create_key, + "delete-bucket": cmd_delete_bucket, + "delete-key": cmd_delete_key, + "lifecycle": cmd_lifecycle, + "delete-prefix": cmd_delete_prefix, + "lifecycle-remove": cmd_lifecycle_remove, + "raw": cmd_raw, +} + + +def main(argv=None) -> int: + args = build_parser().parse_args(argv) + handler = HANDLERS[args.command] + try: + client = B2Client() + rc = handler(client, args) + return rc if isinstance(rc, int) else 0 + except B2Error as exc: + print(f"[ERROR] {exc}", file=sys.stderr) + return 1 + except KeyboardInterrupt: + return 130 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.claude/skills/b2/scripts/b2_client.py b/.claude/skills/b2/scripts/b2_client.py new file mode 100644 index 0000000..251d0e8 --- /dev/null +++ b/.claude/skills/b2/scripts/b2_client.py @@ -0,0 +1,674 @@ +#!/usr/bin/env python3 +"""Backblaze B2 Native API v3 client for the b2 skill. + +Standalone. Talks to the live ACG Backblaze B2 account. Read-only helpers run +freely; state-changing helpers return the raw upstream result and the CLI caller +is responsible for gating them behind --confirm. + +Transport: prefers httpx if installed, else falls back to stdlib urllib so the +script has no hard third-party dependency. + +Credentials: never hardcoded. The key id + application key are loaded at runtime +from the SOPS vault, or from the B2_KEY_ID / B2_APPLICATION_KEY env vars (testing +override). Authorization is HTTP Basic against b2_authorize_account; the returned +authorizationToken + apiUrl + accountId are then cached locally (the token is a +secret — the cache file is gitignored and must never be committed). +""" +from __future__ import annotations + +import base64 +import json +import os +import subprocess +import sys +import urllib.error +import urllib.request +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Callable, Optional + +# --- optional httpx ----------------------------------------------------------- +# urllib (stdlib) is always available as the fallback transport; httpx is used +# when present for connection pooling/timeouts. +try: + import httpx # type: ignore + + _HAS_HTTPX = True +except ImportError: # pragma: no cover - depends on environment + _HAS_HTTPX = False + +# Cap upstream error bodies surfaced in exceptions. The cached auth token can be +# echoed by some endpoints; bound the blast radius rather than echo full bodies. +ERROR_BODY_MAX_CHARS = 600 + +# --- constants ---------------------------------------------------------------- +# The B2 authorize endpoint is fixed and global. The per-account apiUrl is NOT +# hardcoded: it comes from the live authorize response (apiInfo.storageApi.apiUrl) +# per project rule (no config file for endpoints). +B2_AUTHORIZE_URL = os.environ.get( + "B2_AUTHORIZE_URL", + "https://api.backblazeb2.com/b2api/v3/b2_authorize_account", +) +B2_TIMEOUT_SECONDS = 120.0 +B2_CONNECT_TIMEOUT_SECONDS = 15.0 + +VAULT_ENTRY = "projects/claudetools/backblaze-b2.sops.yaml" +VAULT_FIELD_KEY_ID = "key_id" +VAULT_FIELD_APP_KEY = "credentials.application_key" + +# ACG's Backblaze B2 cost basis, USD per GB stored. Recorded in +# .claude/memory/reference_backblaze_storage_rate.md and used by the GuruRMM +# mspbackups storage-cost calc. GB here is decimal (bytes / 1e9), matching how +# storage providers bill (NOT 2^30). Override at the CLI with --rate. +RATE_PER_GB_USD = 0.00695 + +# Treat a cached auth token as valid for ~23h (B2 tokens last 24h); re-authorize +# when stale or on a 401 expired/bad-token error. +AUTH_TTL_SECONDS = 23 * 3600 + +# Decimal vs binary divisors for size reporting. +BYTES_PER_GB = 1_000_000_000 # 1e9, decimal GB (billing unit) +BYTES_PER_GIB = 1024 ** 3 # binary GiB (human reference) + +SKILL_DIR = Path(__file__).resolve().parent.parent +CACHE_DIR = SKILL_DIR / ".cache" +AUTH_CACHE_FILE = CACHE_DIR / "auth.json" + + +class B2Error(RuntimeError): + """Raised for transport, auth, or B2 API errors. + + `status` is the HTTP status when the error came from a B2 API response + (None for transport/parse failures). `code` is the B2 error code string + (e.g. "bad_request"). These let callers branch on a specific failure + without string-matching the message. + """ + + def __init__(self, message: str, *, status: Optional[int] = None, + code: Optional[str] = None): + super().__init__(message) + self.status = status + self.code = code + + +# --- credential loading ------------------------------------------------------- +def _resolve_claudetools_root() -> Path: + """Resolve the ClaudeTools repo root: env var, then identity.json, then derived. + + Final fallback is derived from this file's location + (.claude/skills/b2/scripts -> repo root) so it works on the Mac/Linux fleet, + not only the Windows default. No hardcoded drive letters. + """ + # SKILL_DIR = .../.claude/skills/b2 ; root is three levels up. + derived_root = SKILL_DIR.parent.parent.parent + + env_root = os.environ.get("CLAUDETOOLS_ROOT") + if env_root: + return Path(env_root) + + identity_path = derived_root / ".claude" / "identity.json" + if identity_path.exists(): + try: + data = json.loads(identity_path.read_text(encoding="utf-8")) + root = data.get("claudetools_root") + if root: + return Path(root) + except (json.JSONDecodeError, OSError): + pass + + return derived_root + + +def _vault_get_field(field: str) -> str: + """Fetch one field from the B2 vault entry via the ClaudeTools vault wrapper.""" + root = _resolve_claudetools_root() + vault_script = root / ".claude" / "scripts" / "vault.sh" + if not vault_script.exists(): + raise B2Error( + f"Cannot load B2 credential: vault wrapper not found at {vault_script} " + "and the B2_KEY_ID / B2_APPLICATION_KEY env vars are not set." + ) + try: + completed = subprocess.run( + ["bash", str(vault_script), "get-field", VAULT_ENTRY, field], + capture_output=True, + text=True, + timeout=60, + ) + except FileNotFoundError as exc: + raise B2Error( + "Cannot load B2 credential: 'bash' not found on PATH. Install Git Bash " + "or set B2_KEY_ID / B2_APPLICATION_KEY." + ) from exc + except subprocess.TimeoutExpired as exc: + raise B2Error("Cannot load B2 credential: vault call timed out.") from exc + + if completed.returncode != 0: + raise B2Error( + f"Cannot load B2 credential '{field}' from vault " + f"(exit {completed.returncode}): {completed.stderr.strip()}" + ) + value = completed.stdout.strip() + if not value: + raise B2Error(f"Vault returned an empty value for '{field}'.") + return value + + +def load_credentials() -> tuple[str, str]: + """Load (key_id, application_key). + + Order: B2_KEY_ID / B2_APPLICATION_KEY env overrides (both must be set to use + the override), then the SOPS vault wrapper. Never returns empty values. + """ + env_key_id = os.environ.get("B2_KEY_ID") + env_app_key = os.environ.get("B2_APPLICATION_KEY") + if env_key_id and env_app_key: + return env_key_id.strip(), env_app_key.strip() + + key_id = _vault_get_field(VAULT_FIELD_KEY_ID) + app_key = _vault_get_field(VAULT_FIELD_APP_KEY) + return key_id, app_key + + +# --- client ------------------------------------------------------------------- +class B2Client: + """Thin client over the B2 Native API v3. + + Authorization is lazy and cached. The first call that needs the account + authorizes (or loads a fresh cached token); a 401 with an expired/bad token + code triggers exactly one re-authorize + retry. + """ + + def __init__( + self, + key_id: Optional[str] = None, + application_key: Optional[str] = None, + timeout: float = B2_TIMEOUT_SECONDS, + connect_timeout: float = B2_CONNECT_TIMEOUT_SECONDS, + ): + self._key_id = key_id + self._application_key = application_key + self.timeout = timeout + self.connect_timeout = connect_timeout + + # Populated by _ensure_auth(). + self._auth_token: Optional[str] = None + self._api_url: Optional[str] = None + self._account_id: Optional[str] = None + self._auth_info: Optional[dict] = None + + # -- credentials ----------------------------------------------------------- + def _load_creds(self) -> tuple[str, str]: + if not self._key_id or not self._application_key: + self._key_id, self._application_key = load_credentials() + return self._key_id, self._application_key + + # -- low-level HTTP -------------------------------------------------------- + def _http_post( + self, + url: str, + *, + body: Optional[bytes] = None, + headers: Optional[dict] = None, + basic_auth: Optional[tuple[str, str]] = None, + ) -> tuple[int, dict]: + """POST and return (status_code, parsed_json). Raises B2Error on transport + failure or unparseable body. HTTP 4xx/5xx are returned (not raised) so the + caller can inspect the B2 error code for the auth-retry path. + """ + hdrs = dict(headers or {}) + hdrs.setdefault("Content-Type", "application/json") + if basic_auth is not None: + token = base64.b64encode( + f"{basic_auth[0]}:{basic_auth[1]}".encode("utf-8") + ).decode("ascii") + hdrs["Authorization"] = f"Basic {token}" + + if _HAS_HTTPX: + try: + timeout = httpx.Timeout(self.timeout, connect=self.connect_timeout) + with httpx.Client(timeout=timeout) as client: + resp = client.post(url, content=body, headers=hdrs) + return resp.status_code, self._parse_json(resp.text, resp.status_code) + except httpx.TimeoutException as exc: + raise B2Error(f"B2 request timed out: {exc}") from exc + except httpx.HTTPError as exc: + raise B2Error(f"B2 request failed: {exc}") from exc + + # stdlib fallback + req = urllib.request.Request(url, data=body, method="POST", headers=hdrs) + try: + with urllib.request.urlopen(req, timeout=self.timeout) as resp: + raw = resp.read().decode("utf-8", errors="replace") + return resp.getcode(), self._parse_json(raw, resp.getcode()) + except urllib.error.HTTPError as exc: + raw = exc.read().decode("utf-8", errors="replace") + return exc.code, self._parse_json(raw, exc.code) + except urllib.error.URLError as exc: + raise B2Error(f"B2 request failed: {exc}") from exc + + @staticmethod + def _parse_json(text: str, status: int) -> dict: + if not text: + return {} + try: + parsed = json.loads(text) + except json.JSONDecodeError as exc: + snippet = text[:ERROR_BODY_MAX_CHARS] + raise B2Error( + f"B2 returned non-JSON body (HTTP {status}): {snippet}" + ) from exc + if not isinstance(parsed, dict): + raise B2Error(f"B2 returned a non-object JSON body (HTTP {status}).") + return parsed + + # -- authorization + cache ------------------------------------------------- + def _read_auth_cache(self) -> Optional[dict]: + if not AUTH_CACHE_FILE.exists(): + return None + try: + return json.loads(AUTH_CACHE_FILE.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return None + + def _write_auth_cache(self, cache: dict) -> None: + CACHE_DIR.mkdir(parents=True, exist_ok=True) + AUTH_CACHE_FILE.write_text( + json.dumps(cache, indent=2, sort_keys=True), encoding="utf-8" + ) + # The cache holds a live bearer token (a secret); restrict it to the + # owner so other local users on the multi-user fleet can't read it. + # No-op / best-effort on platforms or filesystems that don't honor + # POSIX mode bits (e.g. some Windows filesystems) — never fatal. + try: + os.chmod(AUTH_CACHE_FILE, 0o600) + except OSError: + pass + + @staticmethod + def _auth_cache_fresh(cache: dict) -> bool: + ts = cache.get("authorized_at") + if not ts: + return False + try: + when = datetime.fromisoformat(ts) + except ValueError: + return False + if when.tzinfo is None: + when = when.replace(tzinfo=timezone.utc) + age = (datetime.now(timezone.utc) - when).total_seconds() + return age < AUTH_TTL_SECONDS + + def _authorize(self) -> dict: + """Call b2_authorize_account with HTTP Basic and cache the v3 result.""" + key_id, app_key = self._load_creds() + # b2_authorize_account rejects an empty request body ("object should + # start with brace") — send an explicit empty JSON object. + status, body = self._http_post( + B2_AUTHORIZE_URL, + body=b"{}", + basic_auth=(key_id, app_key), + ) + if status != 200: + detail = self._format_api_error(body) + raise B2Error(f"b2_authorize_account failed (HTTP {status}): {detail}") + + # v3 nests apiUrl/downloadUrl under apiInfo.storageApi (v2 had them at top). + storage = (body.get("apiInfo") or {}).get("storageApi") or {} + api_url = storage.get("apiUrl") + account_id = body.get("accountId") + token = body.get("authorizationToken") + if not api_url or not account_id or not token: + raise B2Error( + "b2_authorize_account response missing apiUrl/accountId/token " + "(unexpected v3 shape)." + ) + + cache = { + "authorized_at": datetime.now(timezone.utc).isoformat(), + "accountId": account_id, + "authorizationToken": token, + "apiUrl": api_url, + "s3ApiUrl": storage.get("s3ApiUrl"), + "downloadUrl": storage.get("downloadUrl"), + "recommendedPartSize": storage.get("recommendedPartSize"), + "absoluteMinimumPartSize": storage.get("absoluteMinimumPartSize"), + "capabilities": storage.get("capabilities") or [], + "bucketId": storage.get("bucketId"), + "namePrefix": storage.get("namePrefix"), + } + self._write_auth_cache(cache) + return cache + + def _ensure_auth(self, force: bool = False) -> dict: + """Populate auth state from a fresh cache or a new authorize call.""" + if not force: + cache = self._read_auth_cache() + if cache and self._auth_cache_fresh(cache): + self._apply_auth(cache) + return cache + cache = self._authorize() + self._apply_auth(cache) + return cache + + def _apply_auth(self, cache: dict) -> None: + self._auth_token = cache.get("authorizationToken") + self._api_url = cache.get("apiUrl") + self._account_id = cache.get("accountId") + self._auth_info = cache + + @property + def account_id(self) -> str: + if not self._account_id: + self._ensure_auth() + assert self._account_id is not None + return self._account_id + + @property + def auth_info(self) -> dict: + if self._auth_info is None: + self._ensure_auth() + assert self._auth_info is not None + return self._auth_info + + # -- API call -------------------------------------------------------------- + @staticmethod + def _format_api_error(body: dict) -> str: + """B2 errors are {status, code, message}. Surface them verbatim.""" + if not isinstance(body, dict): + return str(body)[:ERROR_BODY_MAX_CHARS] + code = body.get("code", "?") + message = body.get("message", "") + status = body.get("status", "?") + return f"status={status} code={code} message={message}"[:ERROR_BODY_MAX_CHARS] + + def call(self, method: str, params: Optional[dict] = None) -> dict: + """POST to /b2api/v3/ with the auth token. + + On a 401 with an expired/bad token code, re-authorize once and retry. + Other non-200 responses raise with the verbatim B2 error. + """ + self._ensure_auth() + body_dict = params or {} + result = self._call_once(method, body_dict) + status, body = result + if status == 401 and isinstance(body, dict) and body.get("code") in ( + "expired_auth_token", + "bad_auth_token", + ): + # Token rotated or expired: re-authorize exactly once, then retry. + self._ensure_auth(force=True) + status, body = self._call_once(method, body_dict) + if status != 200: + code = body.get("code") if isinstance(body, dict) else None + raise B2Error( + f"B2 {method} failed (HTTP {status}): {self._format_api_error(body)}", + status=status, + code=code, + ) + return body + + def _call_once(self, method: str, params: dict) -> tuple[int, dict]: + assert self._api_url is not None and self._auth_token is not None + url = f"{self._api_url}/b2api/v3/{method}" + data = json.dumps(params).encode("utf-8") + return self._http_post( + url, body=data, headers={"Authorization": self._auth_token} + ) + + # ====================================================================== + # READ METHODS (safe) + # ====================================================================== + def list_buckets(self) -> list[dict]: + body = self.call("b2_list_buckets", {"accountId": self.account_id}) + return body.get("buckets", []) or [] + + def get_bucket_with_revision(self, bucket_id: str) -> dict: + """Return the full bucket object for one bucketId (b2_list_buckets scoped). + + The returned dict carries `.lifecycleRules` (the current array) and + `.revision` (an int). The current rules are read so a write can merge + the desired change into the complete set (lifecycleRules REPLACES the + whole array). NOTE: this account's b2_update_bucket does NOT accept + `ifRevisionMatch`, so the `revision` value is informational only and is + never sent back — updates are last-write-wins. + """ + body = self.call( + "b2_list_buckets", + {"accountId": self.account_id, "bucketId": bucket_id}, + ) + buckets = body.get("buckets", []) or [] + if not buckets: + raise B2Error( + f"b2_list_buckets returned no bucket for bucketId '{bucket_id}'." + ) + return buckets[0] + + def list_keys(self, max_key_count: int = 1000) -> list[dict]: + """List all application keys, paginating on nextApplicationKeyId.""" + keys: list[dict] = [] + start: Optional[str] = None + while True: + params: dict = { + "accountId": self.account_id, + "maxKeyCount": max_key_count, + } + if start: + params["startApplicationKeyId"] = start + body = self.call("b2_list_keys", params) + keys.extend(body.get("keys", []) or []) + start = body.get("nextApplicationKeyId") + if not start: + break + return keys + + def resolve_bucket(self, bucket_name: str) -> dict: + """Return the bucket dict for a bucket name, or raise.""" + for b in self.list_buckets(): + if b.get("bucketName") == bucket_name: + return b + raise B2Error(f"No bucket named '{bucket_name}' in this account.") + + def list_file_names( + self, + bucket_id: str, + prefix: Optional[str] = None, + limit: Optional[int] = None, + max_file_count: int = 10000, + ) -> list[dict]: + """List latest file names in a bucket (b2_list_file_names), paginated. + + Use for a quick file listing; for SIZE/COST use list_file_versions, since + B2 bills every stored version, not just the latest. + """ + files: list[dict] = [] + start_name: Optional[str] = None + while True: + params: dict = {"bucketId": bucket_id, "maxFileCount": max_file_count} + if prefix: + params["prefix"] = prefix + if start_name is not None: + params["startFileName"] = start_name + body = self.call("b2_list_file_names", params) + batch = body.get("files", []) or [] + files.extend(batch) + if limit is not None and len(files) >= limit: + return files[:limit] + start_name = body.get("nextFileName") + if not start_name: + break + return files + + def list_file_versions( + self, + bucket_id: str, + prefix: Optional[str] = None, + limit: Optional[int] = None, + max_file_count: int = 10000, + on_page: Optional[Callable[[int], None]] = None, + ) -> list[dict]: + """List ALL file versions in a bucket (b2_list_file_versions), paginated. + + Pagination uses BOTH nextFileName AND nextFileId until nextFileName is + null. Every stored version counts toward billed storage, so size/cost must + be summed over these (action == "upload"), not over latest names alone. + + on_page(count) is called after each page with the running version count + (lets the CLI warn/progress on very large buckets). + """ + versions: list[dict] = [] + start_name: Optional[str] = None + start_id: Optional[str] = None + while True: + params: dict = {"bucketId": bucket_id, "maxFileCount": max_file_count} + if prefix: + params["prefix"] = prefix + if start_name is not None: + params["startFileName"] = start_name + if start_id is not None: + params["startFileId"] = start_id + body = self.call("b2_list_file_versions", params) + batch = body.get("files", []) or [] + versions.extend(batch) + if on_page is not None: + on_page(len(versions)) + if limit is not None and len(versions) >= limit: + return versions[:limit] + start_name = body.get("nextFileName") + start_id = body.get("nextFileId") + if not start_name: + break + return versions + + def get_file_info(self, file_id: str) -> dict: + return self.call("b2_get_file_info", {"fileId": file_id}) + + # ====================================================================== + # SIZE / COST + # ====================================================================== + @staticmethod + def stored_bytes(versions: list[dict]) -> int: + """Sum contentLength over versions with action == 'upload' (billed objects). + + 'hide' / 'start' / 'folder' actions are not billed stored objects, so they + are excluded. + """ + total = 0 + for v in versions: + if v.get("action") == "upload": + total += int(v.get("contentLength", 0) or 0) + return total + + def bucket_size( + self, + bucket_id: str, + on_page: Optional[Callable[[int], None]] = None, + ) -> dict: + """Compute total stored bytes, GB/GiB, and version/file counts for a bucket.""" + versions = self.list_file_versions(bucket_id, on_page=on_page) + upload_versions = [v for v in versions if v.get("action") == "upload"] + total_bytes = sum(int(v.get("contentLength", 0) or 0) for v in upload_versions) + distinct_names = {v.get("fileName") for v in upload_versions} + return { + "bytes": total_bytes, + "gb": total_bytes / BYTES_PER_GB, + "gib": total_bytes / BYTES_PER_GIB, + "version_count": len(upload_versions), + "file_count": len(distinct_names), + "total_versions_seen": len(versions), + } + + # ====================================================================== + # STATE-CHANGING METHODS (caller MUST gate behind --confirm) + # ====================================================================== + def create_bucket(self, bucket_name: str, bucket_type: str = "allPrivate") -> dict: + return self.call( + "b2_create_bucket", + { + "accountId": self.account_id, + "bucketName": bucket_name, + "bucketType": bucket_type, + }, + ) + + def create_key( + self, + key_name: str, + capabilities: list[str], + bucket_id: Optional[str] = None, + name_prefix: Optional[str] = None, + valid_duration_seconds: Optional[int] = None, + ) -> dict: + params: dict = { + "accountId": self.account_id, + "keyName": key_name, + "capabilities": capabilities, + } + if bucket_id: + params["bucketId"] = bucket_id + if name_prefix: + params["namePrefix"] = name_prefix + if valid_duration_seconds is not None: + params["validDurationInSeconds"] = valid_duration_seconds + return self.call("b2_create_key", params) + + def delete_bucket(self, bucket_id: str) -> dict: + return self.call( + "b2_delete_bucket", + {"accountId": self.account_id, "bucketId": bucket_id}, + ) + + def delete_key(self, application_key_id: str) -> dict: + return self.call( + "b2_delete_key", {"applicationKeyId": application_key_id} + ) + + def delete_file_version(self, file_name: str, file_id: str) -> dict: + return self.call( + "b2_delete_file_version", + {"fileName": file_name, "fileId": file_id}, + ) + + def update_bucket_lifecycle( + self, + bucket_id: str, + lifecycle_rules: list[dict], + ) -> dict: + """Write the FULL lifecycle-rules array onto a bucket (b2_update_bucket). + + IMPORTANT: lifecycleRules REPLACES the entire array — it is NOT additive. + Callers must read the current rules (get_bucket_with_revision), merge the + desired change, and pass the complete set back here. + + This account's b2_update_bucket rejects `ifRevisionMatch` (HTTP 400 + bad_request: "unknown field ... ifRevisionMatch"), so no optimistic-lock + token is sent. Writes are therefore last-write-wins; the read-merge-write + of the complete rules array is what keeps pre-existing rules intact. + Returns the updated bucket object. + """ + return self.call( + "b2_update_bucket", + { + "accountId": self.account_id, + "bucketId": bucket_id, + "lifecycleRules": lifecycle_rules, + }, + ) + + +def main() -> int: + """Minimal self-check: authorize and report transport + account id.""" + try: + client = B2Client() + info = client.auth_info + print("[OK] authorized; transport =", + "httpx" if _HAS_HTTPX else "urllib") + print(f"[INFO] accountId={info.get('accountId')} apiUrl={info.get('apiUrl')}") + return 0 + except B2Error as exc: + print(f"[ERROR] {exc}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.claude/skills/b2/scripts/selftest.py b/.claude/skills/b2/scripts/selftest.py new file mode 100644 index 0000000..a576121 --- /dev/null +++ b/.claude/skills/b2/scripts/selftest.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +"""Read-only self-test harness for the b2 skill. + +Runs each CLI command as an isolated subprocess and checks exit code + output +markers. Makes ZERO write calls: create/delete are only exercised in their +--confirm-absent refusal path (rc 3), and `raw` write-method gating is checked +without confirm. Sizes the SMALLEST bucket only to stay cheap. + +Asserts the known accountId is present and both known application keys +("cloudberrykey", "ClaudeTools") are listed. Prints a PASS/FAIL report. +""" +from __future__ import annotations + +import json +import os +import subprocess +import sys + +HERE = os.path.dirname(os.path.abspath(__file__)) +B2 = os.path.join(HERE, "b2.py") + +EXPECTED_ACCOUNT_ID = "46f69bc61163" +EXPECTED_KEY_NAMES = {"cloudberrykey", "ClaudeTools"} + +results = [] + + +def run(args): + env = dict(os.environ) + env["PYTHONIOENCODING"] = "utf-8" + p = subprocess.run([sys.executable, B2] + args, capture_output=True, + text=True, env=env, timeout=300) + return p.returncode, p.stdout, p.stderr + + +def record(name, ok, detail, sample=""): + status = "PASS" if ok else "FAIL" + results.append((status, name, detail, sample.replace("\n", " ")[:120])) + + +def check(name, args, *, want_rc=None, out_has=None, err_has=None, + out_json_ok=False): + rc, out, err = run(args) + problems = [] + if want_rc is not None and rc != want_rc: + problems.append(f"rc={rc} want {want_rc}") + if out_has and out_has not in out: + problems.append(f"stdout missing {out_has!r}") + if err_has and err_has not in err: + problems.append(f"stderr missing {err_has!r}") + if out_json_ok: + try: + json.loads(out) + except Exception as e: + problems.append(f"stdout not valid JSON: {e}") + record(name, not problems, "; ".join(problems), out[:120]) + return rc, out, err + + +# --- auth / status --- +rc, out, err = check("status table", ["status"], want_rc=0, out_has="accountId") +if rc == 0 and EXPECTED_ACCOUNT_ID not in out: + record("status accountId match", False, + f"expected accountId {EXPECTED_ACCOUNT_ID} not in status output") +else: + record("status accountId match", True, "") +check("status json", ["status", "--json"], want_rc=0, out_json_ok=True) + +# --- buckets --- +check("buckets table", ["buckets"], want_rc=0, out_has="Buckets:") +rc, out, err = check("buckets json", ["buckets", "--json"], want_rc=0, + out_json_ok=True) +buckets = [] +if rc == 0: + try: + buckets = json.loads(out) + except Exception: + buckets = [] +record("buckets non-empty", bool(buckets), + "" if buckets else "no buckets returned") + +# --- keys: assert both known keys present --- +rc, out, err = check("keys json", ["keys", "--json"], want_rc=0, out_json_ok=True) +if rc == 0: + try: + keys = json.loads(out) + names = {k.get("keyName") for k in keys} + missing = EXPECTED_KEY_NAMES - names + record("keys include known names", not missing, + f"missing {missing}" if missing else "") + except Exception as e: + record("keys include known names", False, f"parse error: {e}") +else: + record("keys include known names", False, "keys json call failed") + +# --- bucket-size on a known-small bucket only (cheap) --- +# Probing all 12 buckets with `files --limit 1000` to discover the smallest +# burns one (paginating) list pass per bucket. The size/cost path is what we +# actually want to smoke-test, so target the known-small ACG-IX bucket directly +# and size only that one. This cuts the read-transaction cost from ~12 list +# passes to ~1 while keeping the bucket-size / usage / cost assertions just as +# meaningful. Fall back to the first listed bucket if ACG-IX is ever removed. +KNOWN_SMALL_BUCKET = "ACG-IX" +smallest = None +if buckets: + names = {b.get("bucketName") for b in buckets} + if KNOWN_SMALL_BUCKET in names: + smallest = KNOWN_SMALL_BUCKET + else: + smallest = buckets[0].get("bucketName") + record("found small bucket to size", smallest is not None, + "" if smallest else "no buckets to size") + +if smallest: + rc, out, err = check(f"bucket-size {smallest}", ["bucket-size", smallest], + want_rc=0, out_has="stored bytes:") + check(f"bucket-size json {smallest}", ["bucket-size", smallest, "--json"], + want_rc=0, out_json_ok=True) + +# --- usage scoped to the smallest bucket (cheap headline-feature smoke test) --- +if smallest: + check("usage scoped json", ["usage", "--bucket", smallest, "--json"], + want_rc=0, out_json_ok=True) + check("usage scoped table", ["usage", "--bucket", smallest], + want_rc=0, out_has="TOTAL") + check("cost alias scoped", ["cost", "--bucket", smallest, "--json"], + want_rc=0, out_json_ok=True) + +# --- error handling --- +check("files bogus bucket -> rc1", ["files", "no-such-bucket-xyz"], + want_rc=1, err_has="[ERROR]") +check("usage bogus bucket -> rc1", ["usage", "--bucket", "no-such-bucket-xyz"], + want_rc=1, err_has="[ERROR]") + +# --- argparse: missing required arg -> rc2 --- +check("files missing positional -> rc2", ["files"], want_rc=2) +check("create-key missing --name -> rc2", + ["create-key", "--capabilities", "listFiles"], want_rc=2) + +# --- gating: destructive without --confirm -> rc3, NO write call --- +check("create-bucket no confirm -> rc3", ["create-bucket", "X"], want_rc=3, + out_has="Would") +check("create-key no confirm -> rc3", + ["create-key", "--name", "X", "--capabilities", "listFiles"], + want_rc=3, out_has="Would") +check("delete-key no confirm -> rc3", ["delete-key", "000bogus"], want_rc=3) + +# --- lifecycle: read-only listing on the small bucket --- +if smallest: + check(f"lifecycle {smallest} table", ["lifecycle", smallest], + want_rc=0, out_has="Lifecycle rules:") + check(f"lifecycle {smallest} json", ["lifecycle", smallest, "--json"], + want_rc=0, out_json_ok=True) + +# --- delete-prefix: REFUSAL paths only (no --confirm, never writes) --- +if smallest: + # Valid-looking machine prefix, no --confirm -> rc3, shows the WOULD-add line + # and the irreversible-deletion warning. This does NOT add a rule. + check("delete-prefix no confirm -> rc3 (would add + warning)", + ["delete-prefix", smallest, "MBS-00000000/CBB_SELFTEST/"], + want_rc=3, out_has="Would add purge rule") + # Too-broad prefixes are HARD-FAIL (rc2) even though --confirm is absent here; + # the validation runs first. None of these write anything. + check("delete-prefix empty -> rc2 (too broad)", + ["delete-prefix", smallest, ""], want_rc=2, err_has="too broad") + check("delete-prefix no-slash -> rc2 (too broad)", + ["delete-prefix", smallest, "MBS-noslash"], + want_rc=2, err_has="no '/'") + check("delete-prefix account-root -> rc2 (needs --allow-account-root)", + ["delete-prefix", smallest, "MBS-00000000/"], + want_rc=2, err_has="account root") + +# --- lifecycle-remove: REFUSAL path only (no --confirm, never writes) --- +if smallest: + check("lifecycle-remove no confirm -> rc3", + ["lifecycle-remove", smallest, "MBS-00000000/CBB_SELFTEST/"], + want_rc=3, out_has="Refusing") + +# --- raw gating --- +check("raw write method no confirm -> rc3", + ["raw", "--method", "b2_delete_bucket", "--body", "{}"], want_rc=3) +check("raw update_bucket gated -> rc3", + ["raw", "--method", "b2_update_bucket", "--body", "{}"], want_rc=3) +check("raw bad json body -> rc2", + ["raw", "--method", "b2_list_buckets", "--body", "{bad"], want_rc=2) +check("raw read ok", + ["raw", "--method", "b2_list_buckets", + "--body", json.dumps({"accountId": EXPECTED_ACCOUNT_ID})], + want_rc=0, out_json_ok=True) + +# --- report --- +print("\n==== b2 skill self-test ====") +npass = sum(1 for r in results if r[0] == "PASS") +for status, name, prob, sample in results: + line = f"[{status}] {name}" + if prob: + line += f" -> {prob}" + print(line) +print(f"\n{npass}/{len(results)} passed, {len(results)-npass} failed") +sys.exit(0 if npass == len(results) else 1) diff --git a/.gitignore b/.gitignore index 8b16868..16fe25f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ tmp-remediation/ # Bitdefender skill cache (identity/structure only — no secrets/PII) .claude/skills/bitdefender/.cache/ +# B2 skill cache (holds the live B2 authorization token — a SECRET) +.claude/skills/b2/.cache/ + # Local settings (machine-specific) .claude/settings.local.json .claude/identity.json