fix(bitdefender): third-pass - companies pagination, connect-retry, Retry-After ceiling, ctx mgr
Remaining LOW/NIT items from the second review pass: - list_all_companies() paginates the company list; sweep-all + refresh_inventory no longer truncate a >100-company tenant. - Pre-send connection failures (httpx ConnectError/ConnectTimeout; urllib URLError not wrapping a timeout) are now retried as 'connect' - always safe (no side effect) even for non-idempotent writes; ambiguous read-timeouts stay idempotent-gated. - Explicit Retry-After honored up to RETRY_AFTER_MAX_SECONDS (120s) instead of the 30s exponential cap, so a server-mandated cooldown isn't cut short. - GravityZoneClient is now a context manager (__enter__/__exit__ -> close()). - incident-status/note reject an empty --set-json (rc2), matching account-update/notif. - selftest: +connect/Retry-After/ctx-mgr unit coverage, incident empty-json assertion. 79/79. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -619,6 +619,10 @@ def cmd_incident_status(client, args):
|
||||
fields, rc = _load_json_arg(args.set_json, "set-json")
|
||||
if rc:
|
||||
return rc
|
||||
if not fields:
|
||||
print("[ERROR] --set-json (object of fields to set) is required.",
|
||||
file=sys.stderr)
|
||||
return 2
|
||||
if not _gated(f"change incident status (type={args.type})", args.confirm):
|
||||
return 3
|
||||
result = client.change_incident_status(args.type, fields)
|
||||
@@ -630,6 +634,10 @@ def cmd_incident_note(client, args):
|
||||
fields, rc = _load_json_arg(args.set_json, "set-json")
|
||||
if rc:
|
||||
return rc
|
||||
if not fields:
|
||||
print("[ERROR] --set-json (object of fields to set) is required.",
|
||||
file=sys.stderr)
|
||||
return 2
|
||||
if not _gated(f"update incident note (type={args.type})", args.confirm):
|
||||
return 3
|
||||
result = client.update_incident_note(args.type, fields)
|
||||
|
||||
@@ -58,12 +58,14 @@ ERROR_BODY_MAX_CHARS = 500
|
||||
RETRY_STATUSES = frozenset({429, 500, 502, 503, 504})
|
||||
RETRY_MAX_ATTEMPTS = 4 # total tries = 1 initial + up to (MAX-1) retries
|
||||
RETRY_BASE_DELAY_SECONDS = 1.0
|
||||
RETRY_MAX_DELAY_SECONDS = 30.0
|
||||
RETRY_MAX_DELAY_SECONDS = 30.0 # cap on the exponential backoff
|
||||
RETRY_AFTER_MAX_SECONDS = 120.0 # higher cap for a server-mandated Retry-After
|
||||
|
||||
|
||||
class _RetryableHTTP(Exception):
|
||||
"""Internal signal that a request failed transiently and may be retried.
|
||||
`code` is the HTTP status (int) or the string 'timeout'."""
|
||||
`code` is the HTTP status (int) or a string: 'timeout' (ambiguous - may have
|
||||
reached the server) or 'connect' (pre-send - no side effect, always safe)."""
|
||||
|
||||
def __init__(self, code, headers=None, detail=""):
|
||||
self.code = code
|
||||
@@ -81,8 +83,10 @@ def _retry_delay(headers, attempt: int) -> float:
|
||||
except AttributeError:
|
||||
ra = None
|
||||
if ra:
|
||||
# An explicit server-mandated Retry-After is honored up to a HIGHER cap
|
||||
# than the exponential backoff (don't retry early into another 429).
|
||||
try:
|
||||
return min(float(ra), RETRY_MAX_DELAY_SECONDS)
|
||||
return min(float(ra), RETRY_AFTER_MAX_SECONDS)
|
||||
except (TypeError, ValueError):
|
||||
try:
|
||||
dt = parsedate_to_datetime(ra)
|
||||
@@ -91,7 +95,7 @@ def _retry_delay(headers, attempt: int) -> float:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
delta = (dt - datetime.now(timezone.utc)).total_seconds()
|
||||
if delta > 0:
|
||||
return min(delta, RETRY_MAX_DELAY_SECONDS)
|
||||
return min(delta, RETRY_AFTER_MAX_SECONDS)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
backoff = min(RETRY_BASE_DELAY_SECONDS * (2 ** attempt), RETRY_MAX_DELAY_SECONDS)
|
||||
@@ -241,6 +245,12 @@ class GravityZoneClient:
|
||||
finally:
|
||||
self._httpx_client = None
|
||||
|
||||
def __enter__(self) -> "GravityZoneClient":
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc) -> None:
|
||||
self.close()
|
||||
|
||||
@property
|
||||
def _client(self):
|
||||
"""Lazily create and reuse a single httpx.Client so a multi-call sweep
|
||||
@@ -281,16 +291,17 @@ class GravityZoneClient:
|
||||
return body
|
||||
|
||||
def _post(self, url: str, payload: dict, idempotent: bool = False) -> Any:
|
||||
"""POST with bounded retry on transient failures. A 429 (pre-processing
|
||||
rate-limit reject, no side effect) is always retried; a timeout/5xx is
|
||||
retried ONLY for idempotent (read) calls, so a non-idempotent write is
|
||||
never silently re-executed after a server-side commit + timeout."""
|
||||
"""POST with bounded retry on transient failures. Failures with NO
|
||||
possible side effect (429 pre-processing reject, 'connect' pre-send
|
||||
failure) are always retried; an ambiguous timeout/5xx (may have committed
|
||||
server-side) is retried ONLY for idempotent (read) calls, so a
|
||||
non-idempotent write is never silently re-executed."""
|
||||
data = json.dumps(payload).encode("utf-8")
|
||||
for attempt in range(RETRY_MAX_ATTEMPTS):
|
||||
try:
|
||||
return self._post_once(url, data)
|
||||
except _RetryableHTTP as exc:
|
||||
retry_safe = (exc.code == 429) or idempotent
|
||||
retry_safe = (exc.code in (429, "connect")) or idempotent
|
||||
if not retry_safe or attempt >= RETRY_MAX_ATTEMPTS - 1:
|
||||
note = "" if retry_safe else " (non-idempotent; not retried)"
|
||||
raise GravityZoneError(
|
||||
@@ -315,6 +326,11 @@ class GravityZoneClient:
|
||||
url, content=data, auth=(self.api_key, ""),
|
||||
headers={"Content-Type": "application/json"})
|
||||
resp.raise_for_status()
|
||||
except (httpx.ConnectError, httpx.ConnectTimeout) as exc:
|
||||
# Pre-send: the connection never established -> no side effect,
|
||||
# always safe to retry (must precede TimeoutException since
|
||||
# ConnectTimeout subclasses it).
|
||||
raise _RetryableHTTP("connect", detail=str(exc)) from exc
|
||||
except httpx.TimeoutException as exc:
|
||||
raise _RetryableHTTP("timeout", detail=str(exc)) from exc
|
||||
except httpx.HTTPStatusError as exc:
|
||||
@@ -357,7 +373,12 @@ class GravityZoneClient:
|
||||
except TimeoutError as exc:
|
||||
raise _RetryableHTTP("timeout", detail=str(exc)) from exc
|
||||
except urllib.error.URLError as exc:
|
||||
raise GravityZoneError(f"GravityZone request failed: {exc}") from exc
|
||||
# A bare URLError (not HTTPError) is a connection-level failure. If it
|
||||
# wraps a read timeout it is ambiguous ('timeout'); otherwise it is a
|
||||
# pre-send connect failure ('connect', always safe to retry).
|
||||
reason = getattr(exc, "reason", None)
|
||||
code = "timeout" if isinstance(reason, TimeoutError) else "connect"
|
||||
raise _RetryableHTTP(code, detail=str(exc)) from exc
|
||||
try:
|
||||
return json.loads(raw.decode("utf-8"))
|
||||
except ValueError as exc:
|
||||
@@ -451,6 +472,31 @@ class GravityZoneClient:
|
||||
items = [i for i in result.get("items", []) if i.get("type") == 1]
|
||||
return {"total": len(items), "items": items}
|
||||
|
||||
def list_all_companies(self) -> list[dict]:
|
||||
"""Every company under the ACG container, paginated (type==1 only).
|
||||
list_companies() returns only one page; callers that must see the WHOLE
|
||||
fleet (sweep-all, inventory refresh) use this so a >100-company tenant is
|
||||
not silently truncated."""
|
||||
companies: list[dict] = []
|
||||
page = 1
|
||||
per_page = 100
|
||||
while True:
|
||||
result = self._jsonrpc_request(
|
||||
"network", "getNetworkInventoryItems",
|
||||
{"parentId": ACG_COMPANIES_CONTAINER_ID,
|
||||
"page": page, "perPage": per_page},
|
||||
) or {}
|
||||
raw = result.get("items", [])
|
||||
companies.extend(i for i in raw if i.get("type") == 1)
|
||||
if len(raw) < per_page: # short raw page = last page
|
||||
break
|
||||
page += 1
|
||||
if page > MAX_PAGINATION_PAGES:
|
||||
print(f"[WARNING] list_all_companies hit the {MAX_PAGINATION_PAGES}-"
|
||||
"page ceiling; companies may be truncated.", file=sys.stderr)
|
||||
break
|
||||
return companies
|
||||
|
||||
def list_endpoints(
|
||||
self, parent_id: Optional[str] = None, page: int = 1, per_page: int = 100
|
||||
) -> dict:
|
||||
@@ -553,7 +599,7 @@ class GravityZoneClient:
|
||||
"""Sweep every client company. The companies container is NOT a valid
|
||||
endpoint parent, so iterate each company and sweep it individually."""
|
||||
summaries: list[GZEndpointSummary] = []
|
||||
companies = self.list_companies(per_page=100).get("items", [])
|
||||
companies = self.list_all_companies()
|
||||
for company in companies:
|
||||
cid = company.get("id")
|
||||
if not cid:
|
||||
@@ -1249,7 +1295,7 @@ class GravityZoneClient:
|
||||
endpoints_map: dict[str, dict] = {}
|
||||
policies_map: dict[str, str] = {}
|
||||
|
||||
companies = self.list_companies(per_page=100).get("items", [])
|
||||
companies = self.list_all_companies()
|
||||
for c in companies:
|
||||
cid = c.get("id")
|
||||
if cid:
|
||||
|
||||
@@ -132,8 +132,9 @@ check("quarantine-remove no confirm -> rc3", ["quarantine-remove", "--items", "x
|
||||
check("quarantine-restore no confirm -> rc3", ["quarantine-restore", "--items", "x"], want_rc=3)
|
||||
check("custom-rule-create no confirm -> rc3", ["custom-rule-create", "--name", "R"], want_rc=3)
|
||||
check("custom-rule-delete no confirm -> rc3", ["custom-rule-delete", "--id", "x"], want_rc=3)
|
||||
check("incident-status no confirm -> rc3", ["incident-status", "--type", "t", "--set-json", "{}"], want_rc=3)
|
||||
check("incident-note no confirm -> rc3", ["incident-note", "--type", "t", "--set-json", "{}"], want_rc=3)
|
||||
check("incident-status no confirm -> rc3", ["incident-status", "--type", "t", "--set-json", "{\"status\":1}"], want_rc=3)
|
||||
check("incident-status empty set-json -> rc2", ["incident-status", "--type", "t", "--set-json", "{}", "--confirm"], want_rc=2)
|
||||
check("incident-note no confirm -> rc3", ["incident-note", "--type", "t", "--set-json", "{\"text\":\"x\"}"], want_rc=3)
|
||||
check("raw createReport no confirm -> rc3", ["raw", "--module", "reports", "--method", "createReport", "--params", "{}"], want_rc=3)
|
||||
check("raw createCustomRule no confirm -> rc3", ["raw", "--module", "incidents", "--method", "createCustomRule", "--params", "{}"], want_rc=3)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user