Files
claudetools/.claude/skills/b2/scripts/b2_client.py
Mike Swanson 96fb4110ea Add b2 skill: Backblaze B2 management CLI (storage cost, prefix purge)
B2 Native API v3 client for the ACG B2 account: status, buckets, keys,
files, bucket-size, usage/cost ($0.00695/GB), gated create/delete bucket+key,
and gated lifecycle-based delete-prefix/lifecycle-remove for prefix purges.
Read-only by default; destructive ops require --confirm.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-01 14:31:09 -07:00

675 lines
26 KiB
Python

#!/usr/bin/env python3
"""Backblaze B2 Native API v3 client for the b2 skill.
Standalone. Talks to the live ACG Backblaze B2 account. Read-only helpers run
freely; state-changing helpers return the raw upstream result and the CLI caller
is responsible for gating them behind --confirm.
Transport: prefers httpx if installed, else falls back to stdlib urllib so the
script has no hard third-party dependency.
Credentials: never hardcoded. The key id + application key are loaded at runtime
from the SOPS vault, or from the B2_KEY_ID / B2_APPLICATION_KEY env vars (testing
override). Authorization is HTTP Basic against b2_authorize_account; the returned
authorizationToken + apiUrl + accountId are then cached locally (the token is a
secret — the cache file is gitignored and must never be committed).
"""
from __future__ import annotations
import base64
import json
import os
import subprocess
import sys
import urllib.error
import urllib.request
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Optional
# --- optional httpx -----------------------------------------------------------
# urllib (stdlib) is always available as the fallback transport; httpx is used
# when present for connection pooling/timeouts.
try:
import httpx # type: ignore
_HAS_HTTPX = True
except ImportError: # pragma: no cover - depends on environment
_HAS_HTTPX = False
# Cap upstream error bodies surfaced in exceptions. The cached auth token can be
# echoed by some endpoints; bound the blast radius rather than echo full bodies.
ERROR_BODY_MAX_CHARS = 600
# --- constants ----------------------------------------------------------------
# The B2 authorize endpoint is fixed and global. The per-account apiUrl is NOT
# hardcoded: it comes from the live authorize response (apiInfo.storageApi.apiUrl)
# per project rule (no config file for endpoints).
B2_AUTHORIZE_URL = os.environ.get(
"B2_AUTHORIZE_URL",
"https://api.backblazeb2.com/b2api/v3/b2_authorize_account",
)
B2_TIMEOUT_SECONDS = 120.0
B2_CONNECT_TIMEOUT_SECONDS = 15.0
VAULT_ENTRY = "projects/claudetools/backblaze-b2.sops.yaml"
VAULT_FIELD_KEY_ID = "key_id"
VAULT_FIELD_APP_KEY = "credentials.application_key"
# ACG's Backblaze B2 cost basis, USD per GB stored. Recorded in
# .claude/memory/reference_backblaze_storage_rate.md and used by the GuruRMM
# mspbackups storage-cost calc. GB here is decimal (bytes / 1e9), matching how
# storage providers bill (NOT 2^30). Override at the CLI with --rate.
RATE_PER_GB_USD = 0.00695
# Treat a cached auth token as valid for ~23h (B2 tokens last 24h); re-authorize
# when stale or on a 401 expired/bad-token error.
AUTH_TTL_SECONDS = 23 * 3600
# Decimal vs binary divisors for size reporting.
BYTES_PER_GB = 1_000_000_000 # 1e9, decimal GB (billing unit)
BYTES_PER_GIB = 1024 ** 3 # binary GiB (human reference)
SKILL_DIR = Path(__file__).resolve().parent.parent
CACHE_DIR = SKILL_DIR / ".cache"
AUTH_CACHE_FILE = CACHE_DIR / "auth.json"
class B2Error(RuntimeError):
"""Raised for transport, auth, or B2 API errors.
`status` is the HTTP status when the error came from a B2 API response
(None for transport/parse failures). `code` is the B2 error code string
(e.g. "bad_request"). These let callers branch on a specific failure
without string-matching the message.
"""
def __init__(self, message: str, *, status: Optional[int] = None,
code: Optional[str] = None):
super().__init__(message)
self.status = status
self.code = code
# --- credential loading -------------------------------------------------------
def _resolve_claudetools_root() -> Path:
"""Resolve the ClaudeTools repo root: env var, then identity.json, then derived.
Final fallback is derived from this file's location
(.claude/skills/b2/scripts -> repo root) so it works on the Mac/Linux fleet,
not only the Windows default. No hardcoded drive letters.
"""
# SKILL_DIR = .../.claude/skills/b2 ; root is three levels up.
derived_root = SKILL_DIR.parent.parent.parent
env_root = os.environ.get("CLAUDETOOLS_ROOT")
if env_root:
return Path(env_root)
identity_path = derived_root / ".claude" / "identity.json"
if identity_path.exists():
try:
data = json.loads(identity_path.read_text(encoding="utf-8"))
root = data.get("claudetools_root")
if root:
return Path(root)
except (json.JSONDecodeError, OSError):
pass
return derived_root
def _vault_get_field(field: str) -> str:
"""Fetch one field from the B2 vault entry via the ClaudeTools vault wrapper."""
root = _resolve_claudetools_root()
vault_script = root / ".claude" / "scripts" / "vault.sh"
if not vault_script.exists():
raise B2Error(
f"Cannot load B2 credential: vault wrapper not found at {vault_script} "
"and the B2_KEY_ID / B2_APPLICATION_KEY env vars are not set."
)
try:
completed = subprocess.run(
["bash", str(vault_script), "get-field", VAULT_ENTRY, field],
capture_output=True,
text=True,
timeout=60,
)
except FileNotFoundError as exc:
raise B2Error(
"Cannot load B2 credential: 'bash' not found on PATH. Install Git Bash "
"or set B2_KEY_ID / B2_APPLICATION_KEY."
) from exc
except subprocess.TimeoutExpired as exc:
raise B2Error("Cannot load B2 credential: vault call timed out.") from exc
if completed.returncode != 0:
raise B2Error(
f"Cannot load B2 credential '{field}' from vault "
f"(exit {completed.returncode}): {completed.stderr.strip()}"
)
value = completed.stdout.strip()
if not value:
raise B2Error(f"Vault returned an empty value for '{field}'.")
return value
def load_credentials() -> tuple[str, str]:
"""Load (key_id, application_key).
Order: B2_KEY_ID / B2_APPLICATION_KEY env overrides (both must be set to use
the override), then the SOPS vault wrapper. Never returns empty values.
"""
env_key_id = os.environ.get("B2_KEY_ID")
env_app_key = os.environ.get("B2_APPLICATION_KEY")
if env_key_id and env_app_key:
return env_key_id.strip(), env_app_key.strip()
key_id = _vault_get_field(VAULT_FIELD_KEY_ID)
app_key = _vault_get_field(VAULT_FIELD_APP_KEY)
return key_id, app_key
# --- client -------------------------------------------------------------------
class B2Client:
"""Thin client over the B2 Native API v3.
Authorization is lazy and cached. The first call that needs the account
authorizes (or loads a fresh cached token); a 401 with an expired/bad token
code triggers exactly one re-authorize + retry.
"""
def __init__(
self,
key_id: Optional[str] = None,
application_key: Optional[str] = None,
timeout: float = B2_TIMEOUT_SECONDS,
connect_timeout: float = B2_CONNECT_TIMEOUT_SECONDS,
):
self._key_id = key_id
self._application_key = application_key
self.timeout = timeout
self.connect_timeout = connect_timeout
# Populated by _ensure_auth().
self._auth_token: Optional[str] = None
self._api_url: Optional[str] = None
self._account_id: Optional[str] = None
self._auth_info: Optional[dict] = None
# -- credentials -----------------------------------------------------------
def _load_creds(self) -> tuple[str, str]:
if not self._key_id or not self._application_key:
self._key_id, self._application_key = load_credentials()
return self._key_id, self._application_key
# -- low-level HTTP --------------------------------------------------------
def _http_post(
self,
url: str,
*,
body: Optional[bytes] = None,
headers: Optional[dict] = None,
basic_auth: Optional[tuple[str, str]] = None,
) -> tuple[int, dict]:
"""POST and return (status_code, parsed_json). Raises B2Error on transport
failure or unparseable body. HTTP 4xx/5xx are returned (not raised) so the
caller can inspect the B2 error code for the auth-retry path.
"""
hdrs = dict(headers or {})
hdrs.setdefault("Content-Type", "application/json")
if basic_auth is not None:
token = base64.b64encode(
f"{basic_auth[0]}:{basic_auth[1]}".encode("utf-8")
).decode("ascii")
hdrs["Authorization"] = f"Basic {token}"
if _HAS_HTTPX:
try:
timeout = httpx.Timeout(self.timeout, connect=self.connect_timeout)
with httpx.Client(timeout=timeout) as client:
resp = client.post(url, content=body, headers=hdrs)
return resp.status_code, self._parse_json(resp.text, resp.status_code)
except httpx.TimeoutException as exc:
raise B2Error(f"B2 request timed out: {exc}") from exc
except httpx.HTTPError as exc:
raise B2Error(f"B2 request failed: {exc}") from exc
# stdlib fallback
req = urllib.request.Request(url, data=body, method="POST", headers=hdrs)
try:
with urllib.request.urlopen(req, timeout=self.timeout) as resp:
raw = resp.read().decode("utf-8", errors="replace")
return resp.getcode(), self._parse_json(raw, resp.getcode())
except urllib.error.HTTPError as exc:
raw = exc.read().decode("utf-8", errors="replace")
return exc.code, self._parse_json(raw, exc.code)
except urllib.error.URLError as exc:
raise B2Error(f"B2 request failed: {exc}") from exc
@staticmethod
def _parse_json(text: str, status: int) -> dict:
if not text:
return {}
try:
parsed = json.loads(text)
except json.JSONDecodeError as exc:
snippet = text[:ERROR_BODY_MAX_CHARS]
raise B2Error(
f"B2 returned non-JSON body (HTTP {status}): {snippet}"
) from exc
if not isinstance(parsed, dict):
raise B2Error(f"B2 returned a non-object JSON body (HTTP {status}).")
return parsed
# -- authorization + cache -------------------------------------------------
def _read_auth_cache(self) -> Optional[dict]:
if not AUTH_CACHE_FILE.exists():
return None
try:
return json.loads(AUTH_CACHE_FILE.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return None
def _write_auth_cache(self, cache: dict) -> None:
CACHE_DIR.mkdir(parents=True, exist_ok=True)
AUTH_CACHE_FILE.write_text(
json.dumps(cache, indent=2, sort_keys=True), encoding="utf-8"
)
# The cache holds a live bearer token (a secret); restrict it to the
# owner so other local users on the multi-user fleet can't read it.
# No-op / best-effort on platforms or filesystems that don't honor
# POSIX mode bits (e.g. some Windows filesystems) — never fatal.
try:
os.chmod(AUTH_CACHE_FILE, 0o600)
except OSError:
pass
@staticmethod
def _auth_cache_fresh(cache: dict) -> bool:
ts = cache.get("authorized_at")
if not ts:
return False
try:
when = datetime.fromisoformat(ts)
except ValueError:
return False
if when.tzinfo is None:
when = when.replace(tzinfo=timezone.utc)
age = (datetime.now(timezone.utc) - when).total_seconds()
return age < AUTH_TTL_SECONDS
def _authorize(self) -> dict:
"""Call b2_authorize_account with HTTP Basic and cache the v3 result."""
key_id, app_key = self._load_creds()
# b2_authorize_account rejects an empty request body ("object should
# start with brace") — send an explicit empty JSON object.
status, body = self._http_post(
B2_AUTHORIZE_URL,
body=b"{}",
basic_auth=(key_id, app_key),
)
if status != 200:
detail = self._format_api_error(body)
raise B2Error(f"b2_authorize_account failed (HTTP {status}): {detail}")
# v3 nests apiUrl/downloadUrl under apiInfo.storageApi (v2 had them at top).
storage = (body.get("apiInfo") or {}).get("storageApi") or {}
api_url = storage.get("apiUrl")
account_id = body.get("accountId")
token = body.get("authorizationToken")
if not api_url or not account_id or not token:
raise B2Error(
"b2_authorize_account response missing apiUrl/accountId/token "
"(unexpected v3 shape)."
)
cache = {
"authorized_at": datetime.now(timezone.utc).isoformat(),
"accountId": account_id,
"authorizationToken": token,
"apiUrl": api_url,
"s3ApiUrl": storage.get("s3ApiUrl"),
"downloadUrl": storage.get("downloadUrl"),
"recommendedPartSize": storage.get("recommendedPartSize"),
"absoluteMinimumPartSize": storage.get("absoluteMinimumPartSize"),
"capabilities": storage.get("capabilities") or [],
"bucketId": storage.get("bucketId"),
"namePrefix": storage.get("namePrefix"),
}
self._write_auth_cache(cache)
return cache
def _ensure_auth(self, force: bool = False) -> dict:
"""Populate auth state from a fresh cache or a new authorize call."""
if not force:
cache = self._read_auth_cache()
if cache and self._auth_cache_fresh(cache):
self._apply_auth(cache)
return cache
cache = self._authorize()
self._apply_auth(cache)
return cache
def _apply_auth(self, cache: dict) -> None:
self._auth_token = cache.get("authorizationToken")
self._api_url = cache.get("apiUrl")
self._account_id = cache.get("accountId")
self._auth_info = cache
@property
def account_id(self) -> str:
if not self._account_id:
self._ensure_auth()
assert self._account_id is not None
return self._account_id
@property
def auth_info(self) -> dict:
if self._auth_info is None:
self._ensure_auth()
assert self._auth_info is not None
return self._auth_info
# -- API call --------------------------------------------------------------
@staticmethod
def _format_api_error(body: dict) -> str:
"""B2 errors are {status, code, message}. Surface them verbatim."""
if not isinstance(body, dict):
return str(body)[:ERROR_BODY_MAX_CHARS]
code = body.get("code", "?")
message = body.get("message", "")
status = body.get("status", "?")
return f"status={status} code={code} message={message}"[:ERROR_BODY_MAX_CHARS]
def call(self, method: str, params: Optional[dict] = None) -> dict:
"""POST to <apiUrl>/b2api/v3/<method> with the auth token.
On a 401 with an expired/bad token code, re-authorize once and retry.
Other non-200 responses raise with the verbatim B2 error.
"""
self._ensure_auth()
body_dict = params or {}
result = self._call_once(method, body_dict)
status, body = result
if status == 401 and isinstance(body, dict) and body.get("code") in (
"expired_auth_token",
"bad_auth_token",
):
# Token rotated or expired: re-authorize exactly once, then retry.
self._ensure_auth(force=True)
status, body = self._call_once(method, body_dict)
if status != 200:
code = body.get("code") if isinstance(body, dict) else None
raise B2Error(
f"B2 {method} failed (HTTP {status}): {self._format_api_error(body)}",
status=status,
code=code,
)
return body
def _call_once(self, method: str, params: dict) -> tuple[int, dict]:
assert self._api_url is not None and self._auth_token is not None
url = f"{self._api_url}/b2api/v3/{method}"
data = json.dumps(params).encode("utf-8")
return self._http_post(
url, body=data, headers={"Authorization": self._auth_token}
)
# ======================================================================
# READ METHODS (safe)
# ======================================================================
def list_buckets(self) -> list[dict]:
body = self.call("b2_list_buckets", {"accountId": self.account_id})
return body.get("buckets", []) or []
def get_bucket_with_revision(self, bucket_id: str) -> dict:
"""Return the full bucket object for one bucketId (b2_list_buckets scoped).
The returned dict carries `.lifecycleRules` (the current array) and
`.revision` (an int). The current rules are read so a write can merge
the desired change into the complete set (lifecycleRules REPLACES the
whole array). NOTE: this account's b2_update_bucket does NOT accept
`ifRevisionMatch`, so the `revision` value is informational only and is
never sent back — updates are last-write-wins.
"""
body = self.call(
"b2_list_buckets",
{"accountId": self.account_id, "bucketId": bucket_id},
)
buckets = body.get("buckets", []) or []
if not buckets:
raise B2Error(
f"b2_list_buckets returned no bucket for bucketId '{bucket_id}'."
)
return buckets[0]
def list_keys(self, max_key_count: int = 1000) -> list[dict]:
"""List all application keys, paginating on nextApplicationKeyId."""
keys: list[dict] = []
start: Optional[str] = None
while True:
params: dict = {
"accountId": self.account_id,
"maxKeyCount": max_key_count,
}
if start:
params["startApplicationKeyId"] = start
body = self.call("b2_list_keys", params)
keys.extend(body.get("keys", []) or [])
start = body.get("nextApplicationKeyId")
if not start:
break
return keys
def resolve_bucket(self, bucket_name: str) -> dict:
"""Return the bucket dict for a bucket name, or raise."""
for b in self.list_buckets():
if b.get("bucketName") == bucket_name:
return b
raise B2Error(f"No bucket named '{bucket_name}' in this account.")
def list_file_names(
self,
bucket_id: str,
prefix: Optional[str] = None,
limit: Optional[int] = None,
max_file_count: int = 10000,
) -> list[dict]:
"""List latest file names in a bucket (b2_list_file_names), paginated.
Use for a quick file listing; for SIZE/COST use list_file_versions, since
B2 bills every stored version, not just the latest.
"""
files: list[dict] = []
start_name: Optional[str] = None
while True:
params: dict = {"bucketId": bucket_id, "maxFileCount": max_file_count}
if prefix:
params["prefix"] = prefix
if start_name is not None:
params["startFileName"] = start_name
body = self.call("b2_list_file_names", params)
batch = body.get("files", []) or []
files.extend(batch)
if limit is not None and len(files) >= limit:
return files[:limit]
start_name = body.get("nextFileName")
if not start_name:
break
return files
def list_file_versions(
self,
bucket_id: str,
prefix: Optional[str] = None,
limit: Optional[int] = None,
max_file_count: int = 10000,
on_page: Optional[Callable[[int], None]] = None,
) -> list[dict]:
"""List ALL file versions in a bucket (b2_list_file_versions), paginated.
Pagination uses BOTH nextFileName AND nextFileId until nextFileName is
null. Every stored version counts toward billed storage, so size/cost must
be summed over these (action == "upload"), not over latest names alone.
on_page(count) is called after each page with the running version count
(lets the CLI warn/progress on very large buckets).
"""
versions: list[dict] = []
start_name: Optional[str] = None
start_id: Optional[str] = None
while True:
params: dict = {"bucketId": bucket_id, "maxFileCount": max_file_count}
if prefix:
params["prefix"] = prefix
if start_name is not None:
params["startFileName"] = start_name
if start_id is not None:
params["startFileId"] = start_id
body = self.call("b2_list_file_versions", params)
batch = body.get("files", []) or []
versions.extend(batch)
if on_page is not None:
on_page(len(versions))
if limit is not None and len(versions) >= limit:
return versions[:limit]
start_name = body.get("nextFileName")
start_id = body.get("nextFileId")
if not start_name:
break
return versions
def get_file_info(self, file_id: str) -> dict:
return self.call("b2_get_file_info", {"fileId": file_id})
# ======================================================================
# SIZE / COST
# ======================================================================
@staticmethod
def stored_bytes(versions: list[dict]) -> int:
"""Sum contentLength over versions with action == 'upload' (billed objects).
'hide' / 'start' / 'folder' actions are not billed stored objects, so they
are excluded.
"""
total = 0
for v in versions:
if v.get("action") == "upload":
total += int(v.get("contentLength", 0) or 0)
return total
def bucket_size(
self,
bucket_id: str,
on_page: Optional[Callable[[int], None]] = None,
) -> dict:
"""Compute total stored bytes, GB/GiB, and version/file counts for a bucket."""
versions = self.list_file_versions(bucket_id, on_page=on_page)
upload_versions = [v for v in versions if v.get("action") == "upload"]
total_bytes = sum(int(v.get("contentLength", 0) or 0) for v in upload_versions)
distinct_names = {v.get("fileName") for v in upload_versions}
return {
"bytes": total_bytes,
"gb": total_bytes / BYTES_PER_GB,
"gib": total_bytes / BYTES_PER_GIB,
"version_count": len(upload_versions),
"file_count": len(distinct_names),
"total_versions_seen": len(versions),
}
# ======================================================================
# STATE-CHANGING METHODS (caller MUST gate behind --confirm)
# ======================================================================
def create_bucket(self, bucket_name: str, bucket_type: str = "allPrivate") -> dict:
return self.call(
"b2_create_bucket",
{
"accountId": self.account_id,
"bucketName": bucket_name,
"bucketType": bucket_type,
},
)
def create_key(
self,
key_name: str,
capabilities: list[str],
bucket_id: Optional[str] = None,
name_prefix: Optional[str] = None,
valid_duration_seconds: Optional[int] = None,
) -> dict:
params: dict = {
"accountId": self.account_id,
"keyName": key_name,
"capabilities": capabilities,
}
if bucket_id:
params["bucketId"] = bucket_id
if name_prefix:
params["namePrefix"] = name_prefix
if valid_duration_seconds is not None:
params["validDurationInSeconds"] = valid_duration_seconds
return self.call("b2_create_key", params)
def delete_bucket(self, bucket_id: str) -> dict:
return self.call(
"b2_delete_bucket",
{"accountId": self.account_id, "bucketId": bucket_id},
)
def delete_key(self, application_key_id: str) -> dict:
return self.call(
"b2_delete_key", {"applicationKeyId": application_key_id}
)
def delete_file_version(self, file_name: str, file_id: str) -> dict:
return self.call(
"b2_delete_file_version",
{"fileName": file_name, "fileId": file_id},
)
def update_bucket_lifecycle(
self,
bucket_id: str,
lifecycle_rules: list[dict],
) -> dict:
"""Write the FULL lifecycle-rules array onto a bucket (b2_update_bucket).
IMPORTANT: lifecycleRules REPLACES the entire array — it is NOT additive.
Callers must read the current rules (get_bucket_with_revision), merge the
desired change, and pass the complete set back here.
This account's b2_update_bucket rejects `ifRevisionMatch` (HTTP 400
bad_request: "unknown field ... ifRevisionMatch"), so no optimistic-lock
token is sent. Writes are therefore last-write-wins; the read-merge-write
of the complete rules array is what keeps pre-existing rules intact.
Returns the updated bucket object.
"""
return self.call(
"b2_update_bucket",
{
"accountId": self.account_id,
"bucketId": bucket_id,
"lifecycleRules": lifecycle_rules,
},
)
def main() -> int:
"""Minimal self-check: authorize and report transport + account id."""
try:
client = B2Client()
info = client.auth_info
print("[OK] authorized; transport =",
"httpx" if _HAS_HTTPX else "urllib")
print(f"[INFO] accountId={info.get('accountId')} apiUrl={info.get('apiUrl')}")
return 0
except B2Error as exc:
print(f"[ERROR] {exc}", file=sys.stderr)
return 1
if __name__ == "__main__":
raise SystemExit(main())