claudetools/.claude/skills/b2/scripts/selftest.py

#!/usr/bin/env python3
"""Read-only self-test harness for the b2 skill.

Runs each CLI command as an isolated subprocess and checks exit code + output
markers. Makes ZERO write calls: create/delete are only exercised in their
--confirm-absent refusal path (rc 3), and `raw` write-method gating is checked
without confirm. Sizes the SMALLEST bucket only to stay cheap.

Asserts the known accountId is present and both known application keys
("cloudberrykey", "ClaudeTools") are listed. Prints a PASS/FAIL report.
"""
from __future__ import annotations

import json
import os
import subprocess
import sys

HERE = os.path.dirname(os.path.abspath(__file__))
B2 = os.path.join(HERE, "b2.py")

EXPECTED_ACCOUNT_ID = "46f69bc61163"
EXPECTED_KEY_NAMES = {"cloudberrykey", "ClaudeTools"}

results = []


def run(args):
    env = dict(os.environ)
    env["PYTHONIOENCODING"] = "utf-8"
    p = subprocess.run([sys.executable, B2] + args, capture_output=True,
                       text=True, env=env, timeout=300)
    return p.returncode, p.stdout, p.stderr


def record(name, ok, detail, sample=""):
    status = "PASS" if ok else "FAIL"
    results.append((status, name, detail, sample.replace("\n", " ")[:120]))


def check(name, args, *, want_rc=None, out_has=None, err_has=None,
          out_json_ok=False):
    rc, out, err = run(args)
    problems = []
    if want_rc is not None and rc != want_rc:
        problems.append(f"rc={rc} want {want_rc}")
    if out_has and out_has not in out:
        problems.append(f"stdout missing {out_has!r}")
    if err_has and err_has not in err:
        problems.append(f"stderr missing {err_has!r}")
    if out_json_ok:
        try:
            json.loads(out)
        except Exception as e:
            problems.append(f"stdout not valid JSON: {e}")
    record(name, not problems, "; ".join(problems), out[:120])
    return rc, out, err


# --- auth / status ---
rc, out, err = check("status table", ["status"], want_rc=0, out_has="accountId")
if rc == 0 and EXPECTED_ACCOUNT_ID not in out:
    record("status accountId match", False,
           f"expected accountId {EXPECTED_ACCOUNT_ID} not in status output")
else:
    record("status accountId match", True, "")
check("status json", ["status", "--json"], want_rc=0, out_json_ok=True)

# --- buckets ---
check("buckets table", ["buckets"], want_rc=0, out_has="Buckets:")
rc, out, err = check("buckets json", ["buckets", "--json"], want_rc=0,
                     out_json_ok=True)
buckets = []
if rc == 0:
    try:
        buckets = json.loads(out)
    except Exception:
        buckets = []
record("buckets non-empty", bool(buckets),
       "" if buckets else "no buckets returned")

# --- keys: assert both known keys present ---
rc, out, err = check("keys json", ["keys", "--json"], want_rc=0, out_json_ok=True)
if rc == 0:
    try:
        keys = json.loads(out)
        names = {k.get("keyName") for k in keys}
        missing = EXPECTED_KEY_NAMES - names
        record("keys include known names", not missing,
               f"missing {missing}" if missing else "")
    except Exception as e:
        record("keys include known names", False, f"parse error: {e}")
else:
    record("keys include known names", False, "keys json call failed")

# --- bucket-size on a known-small bucket only (cheap) ---
# Probing all 12 buckets with `files --limit 1000` to discover the smallest
# burns one (paginating) list pass per bucket. The size/cost path is what we
# actually want to smoke-test, so target the known-small ACG-IX bucket directly
# and size only that one. This cuts the read-transaction cost from ~12 list
# passes to ~1 while keeping the bucket-size / usage / cost assertions just as
# meaningful. Fall back to the first listed bucket if ACG-IX is ever removed.
KNOWN_SMALL_BUCKET = "ACG-IX"
smallest = None
if buckets:
    names = {b.get("bucketName") for b in buckets}
    if KNOWN_SMALL_BUCKET in names:
        smallest = KNOWN_SMALL_BUCKET
    else:
        smallest = buckets[0].get("bucketName")
    record("found small bucket to size", smallest is not None,
           "" if smallest else "no buckets to size")

if smallest:
    rc, out, err = check(f"bucket-size {smallest}", ["bucket-size", smallest],
                         want_rc=0, out_has="stored bytes:")
    check(f"bucket-size json {smallest}", ["bucket-size", smallest, "--json"],
          want_rc=0, out_json_ok=True)

# --- usage scoped to the smallest bucket (cheap headline-feature smoke test) ---
if smallest:
    check("usage scoped json", ["usage", "--bucket", smallest, "--json"],
          want_rc=0, out_json_ok=True)
    check("usage scoped table", ["usage", "--bucket", smallest],
          want_rc=0, out_has="TOTAL")
    check("cost alias scoped", ["cost", "--bucket", smallest, "--json"],
          want_rc=0, out_json_ok=True)

# --- error handling ---
check("files bogus bucket -> rc1", ["files", "no-such-bucket-xyz"],
      want_rc=1, err_has="[ERROR]")
check("usage bogus bucket -> rc1", ["usage", "--bucket", "no-such-bucket-xyz"],
      want_rc=1, err_has="[ERROR]")

# --- argparse: missing required arg -> rc2 ---
check("files missing positional -> rc2", ["files"], want_rc=2)
check("create-key missing --name -> rc2",
      ["create-key", "--capabilities", "listFiles"], want_rc=2)

# --- gating: destructive without --confirm -> rc3, NO write call ---
check("create-bucket no confirm -> rc3", ["create-bucket", "X"], want_rc=3,
      out_has="Would")
check("create-key no confirm -> rc3",
      ["create-key", "--name", "X", "--capabilities", "listFiles"],
      want_rc=3, out_has="Would")
check("delete-key no confirm -> rc3", ["delete-key", "000bogus"], want_rc=3)

# --- lifecycle: read-only listing on the small bucket ---
if smallest:
    check(f"lifecycle {smallest} table", ["lifecycle", smallest],
          want_rc=0, out_has="Lifecycle rules:")
    check(f"lifecycle {smallest} json", ["lifecycle", smallest, "--json"],
          want_rc=0, out_json_ok=True)

# --- delete-prefix: REFUSAL paths only (no --confirm, never writes) ---
if smallest:
    # Valid-looking machine prefix, no --confirm -> rc3, shows the WOULD-add line
    # and the irreversible-deletion warning. This does NOT add a rule.
    check("delete-prefix no confirm -> rc3 (would add + warning)",
          ["delete-prefix", smallest, "MBS-00000000/CBB_SELFTEST/"],
          want_rc=3, out_has="Would add purge rule")
    # Too-broad prefixes are HARD-FAIL (rc2) even though --confirm is absent here;
    # the validation runs first. None of these write anything.
    check("delete-prefix empty -> rc2 (too broad)",
          ["delete-prefix", smallest, ""], want_rc=2, err_has="too broad")
    check("delete-prefix no-slash -> rc2 (too broad)",
          ["delete-prefix", smallest, "MBS-noslash"],
          want_rc=2, err_has="no '/'")
    check("delete-prefix account-root -> rc2 (needs --allow-account-root)",
          ["delete-prefix", smallest, "MBS-00000000/"],
          want_rc=2, err_has="account root")

# --- lifecycle-remove: REFUSAL path only (no --confirm, never writes) ---
if smallest:
    check("lifecycle-remove no confirm -> rc3",
          ["lifecycle-remove", smallest, "MBS-00000000/CBB_SELFTEST/"],
          want_rc=3, out_has="Refusing")

# --- raw gating ---
check("raw write method no confirm -> rc3",
      ["raw", "--method", "b2_delete_bucket", "--body", "{}"], want_rc=3)
check("raw update_bucket gated -> rc3",
      ["raw", "--method", "b2_update_bucket", "--body", "{}"], want_rc=3)
check("raw bad json body -> rc2",
      ["raw", "--method", "b2_list_buckets", "--body", "{bad"], want_rc=2)
check("raw read ok",
      ["raw", "--method", "b2_list_buckets",
       "--body", json.dumps({"accountId": EXPECTED_ACCOUNT_ID})],
      want_rc=0, out_json_ok=True)

# --- report ---
print("\n==== b2 skill self-test ====")
npass = sum(1 for r in results if r[0] == "PASS")
for status, name, prob, sample in results:
    line = f"[{status}] {name}"
    if prob:
        line += f"  -> {prob}"
    print(line)
print(f"\n{npass}/{len(results)} passed, {len(results)-npass} failed")
sys.exit(0 if npass == len(results) else 1)