fix(bitdefender): third-pass - companies pagination, connect-retry, Retry-After ceiling, ctx mgr
Remaining LOW/NIT items from the second review pass: - list_all_companies() paginates the company list; sweep-all + refresh_inventory no longer truncate a >100-company tenant. - Pre-send connection failures (httpx ConnectError/ConnectTimeout; urllib URLError not wrapping a timeout) are now retried as 'connect' - always safe (no side effect) even for non-idempotent writes; ambiguous read-timeouts stay idempotent-gated. - Explicit Retry-After honored up to RETRY_AFTER_MAX_SECONDS (120s) instead of the 30s exponential cap, so a server-mandated cooldown isn't cut short. - GravityZoneClient is now a context manager (__enter__/__exit__ -> close()). - incident-status/note reject an empty --set-json (rc2), matching account-update/notif. - selftest: +connect/Retry-After/ctx-mgr unit coverage, incident empty-json assertion. 79/79. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -619,6 +619,10 @@ def cmd_incident_status(client, args):
|
|||||||
fields, rc = _load_json_arg(args.set_json, "set-json")
|
fields, rc = _load_json_arg(args.set_json, "set-json")
|
||||||
if rc:
|
if rc:
|
||||||
return rc
|
return rc
|
||||||
|
if not fields:
|
||||||
|
print("[ERROR] --set-json (object of fields to set) is required.",
|
||||||
|
file=sys.stderr)
|
||||||
|
return 2
|
||||||
if not _gated(f"change incident status (type={args.type})", args.confirm):
|
if not _gated(f"change incident status (type={args.type})", args.confirm):
|
||||||
return 3
|
return 3
|
||||||
result = client.change_incident_status(args.type, fields)
|
result = client.change_incident_status(args.type, fields)
|
||||||
@@ -630,6 +634,10 @@ def cmd_incident_note(client, args):
|
|||||||
fields, rc = _load_json_arg(args.set_json, "set-json")
|
fields, rc = _load_json_arg(args.set_json, "set-json")
|
||||||
if rc:
|
if rc:
|
||||||
return rc
|
return rc
|
||||||
|
if not fields:
|
||||||
|
print("[ERROR] --set-json (object of fields to set) is required.",
|
||||||
|
file=sys.stderr)
|
||||||
|
return 2
|
||||||
if not _gated(f"update incident note (type={args.type})", args.confirm):
|
if not _gated(f"update incident note (type={args.type})", args.confirm):
|
||||||
return 3
|
return 3
|
||||||
result = client.update_incident_note(args.type, fields)
|
result = client.update_incident_note(args.type, fields)
|
||||||
|
|||||||
@@ -58,12 +58,14 @@ ERROR_BODY_MAX_CHARS = 500
|
|||||||
RETRY_STATUSES = frozenset({429, 500, 502, 503, 504})
|
RETRY_STATUSES = frozenset({429, 500, 502, 503, 504})
|
||||||
RETRY_MAX_ATTEMPTS = 4 # total tries = 1 initial + up to (MAX-1) retries
|
RETRY_MAX_ATTEMPTS = 4 # total tries = 1 initial + up to (MAX-1) retries
|
||||||
RETRY_BASE_DELAY_SECONDS = 1.0
|
RETRY_BASE_DELAY_SECONDS = 1.0
|
||||||
RETRY_MAX_DELAY_SECONDS = 30.0
|
RETRY_MAX_DELAY_SECONDS = 30.0 # cap on the exponential backoff
|
||||||
|
RETRY_AFTER_MAX_SECONDS = 120.0 # higher cap for a server-mandated Retry-After
|
||||||
|
|
||||||
|
|
||||||
class _RetryableHTTP(Exception):
|
class _RetryableHTTP(Exception):
|
||||||
"""Internal signal that a request failed transiently and may be retried.
|
"""Internal signal that a request failed transiently and may be retried.
|
||||||
`code` is the HTTP status (int) or the string 'timeout'."""
|
`code` is the HTTP status (int) or a string: 'timeout' (ambiguous - may have
|
||||||
|
reached the server) or 'connect' (pre-send - no side effect, always safe)."""
|
||||||
|
|
||||||
def __init__(self, code, headers=None, detail=""):
|
def __init__(self, code, headers=None, detail=""):
|
||||||
self.code = code
|
self.code = code
|
||||||
@@ -81,8 +83,10 @@ def _retry_delay(headers, attempt: int) -> float:
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
ra = None
|
ra = None
|
||||||
if ra:
|
if ra:
|
||||||
|
# An explicit server-mandated Retry-After is honored up to a HIGHER cap
|
||||||
|
# than the exponential backoff (don't retry early into another 429).
|
||||||
try:
|
try:
|
||||||
return min(float(ra), RETRY_MAX_DELAY_SECONDS)
|
return min(float(ra), RETRY_AFTER_MAX_SECONDS)
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
try:
|
try:
|
||||||
dt = parsedate_to_datetime(ra)
|
dt = parsedate_to_datetime(ra)
|
||||||
@@ -91,7 +95,7 @@ def _retry_delay(headers, attempt: int) -> float:
|
|||||||
dt = dt.replace(tzinfo=timezone.utc)
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
delta = (dt - datetime.now(timezone.utc)).total_seconds()
|
delta = (dt - datetime.now(timezone.utc)).total_seconds()
|
||||||
if delta > 0:
|
if delta > 0:
|
||||||
return min(delta, RETRY_MAX_DELAY_SECONDS)
|
return min(delta, RETRY_AFTER_MAX_SECONDS)
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
pass
|
pass
|
||||||
backoff = min(RETRY_BASE_DELAY_SECONDS * (2 ** attempt), RETRY_MAX_DELAY_SECONDS)
|
backoff = min(RETRY_BASE_DELAY_SECONDS * (2 ** attempt), RETRY_MAX_DELAY_SECONDS)
|
||||||
@@ -241,6 +245,12 @@ class GravityZoneClient:
|
|||||||
finally:
|
finally:
|
||||||
self._httpx_client = None
|
self._httpx_client = None
|
||||||
|
|
||||||
|
def __enter__(self) -> "GravityZoneClient":
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *exc) -> None:
|
||||||
|
self.close()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _client(self):
|
def _client(self):
|
||||||
"""Lazily create and reuse a single httpx.Client so a multi-call sweep
|
"""Lazily create and reuse a single httpx.Client so a multi-call sweep
|
||||||
@@ -281,16 +291,17 @@ class GravityZoneClient:
|
|||||||
return body
|
return body
|
||||||
|
|
||||||
def _post(self, url: str, payload: dict, idempotent: bool = False) -> Any:
|
def _post(self, url: str, payload: dict, idempotent: bool = False) -> Any:
|
||||||
"""POST with bounded retry on transient failures. A 429 (pre-processing
|
"""POST with bounded retry on transient failures. Failures with NO
|
||||||
rate-limit reject, no side effect) is always retried; a timeout/5xx is
|
possible side effect (429 pre-processing reject, 'connect' pre-send
|
||||||
retried ONLY for idempotent (read) calls, so a non-idempotent write is
|
failure) are always retried; an ambiguous timeout/5xx (may have committed
|
||||||
never silently re-executed after a server-side commit + timeout."""
|
server-side) is retried ONLY for idempotent (read) calls, so a
|
||||||
|
non-idempotent write is never silently re-executed."""
|
||||||
data = json.dumps(payload).encode("utf-8")
|
data = json.dumps(payload).encode("utf-8")
|
||||||
for attempt in range(RETRY_MAX_ATTEMPTS):
|
for attempt in range(RETRY_MAX_ATTEMPTS):
|
||||||
try:
|
try:
|
||||||
return self._post_once(url, data)
|
return self._post_once(url, data)
|
||||||
except _RetryableHTTP as exc:
|
except _RetryableHTTP as exc:
|
||||||
retry_safe = (exc.code == 429) or idempotent
|
retry_safe = (exc.code in (429, "connect")) or idempotent
|
||||||
if not retry_safe or attempt >= RETRY_MAX_ATTEMPTS - 1:
|
if not retry_safe or attempt >= RETRY_MAX_ATTEMPTS - 1:
|
||||||
note = "" if retry_safe else " (non-idempotent; not retried)"
|
note = "" if retry_safe else " (non-idempotent; not retried)"
|
||||||
raise GravityZoneError(
|
raise GravityZoneError(
|
||||||
@@ -315,6 +326,11 @@ class GravityZoneClient:
|
|||||||
url, content=data, auth=(self.api_key, ""),
|
url, content=data, auth=(self.api_key, ""),
|
||||||
headers={"Content-Type": "application/json"})
|
headers={"Content-Type": "application/json"})
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
except (httpx.ConnectError, httpx.ConnectTimeout) as exc:
|
||||||
|
# Pre-send: the connection never established -> no side effect,
|
||||||
|
# always safe to retry (must precede TimeoutException since
|
||||||
|
# ConnectTimeout subclasses it).
|
||||||
|
raise _RetryableHTTP("connect", detail=str(exc)) from exc
|
||||||
except httpx.TimeoutException as exc:
|
except httpx.TimeoutException as exc:
|
||||||
raise _RetryableHTTP("timeout", detail=str(exc)) from exc
|
raise _RetryableHTTP("timeout", detail=str(exc)) from exc
|
||||||
except httpx.HTTPStatusError as exc:
|
except httpx.HTTPStatusError as exc:
|
||||||
@@ -357,7 +373,12 @@ class GravityZoneClient:
|
|||||||
except TimeoutError as exc:
|
except TimeoutError as exc:
|
||||||
raise _RetryableHTTP("timeout", detail=str(exc)) from exc
|
raise _RetryableHTTP("timeout", detail=str(exc)) from exc
|
||||||
except urllib.error.URLError as exc:
|
except urllib.error.URLError as exc:
|
||||||
raise GravityZoneError(f"GravityZone request failed: {exc}") from exc
|
# A bare URLError (not HTTPError) is a connection-level failure. If it
|
||||||
|
# wraps a read timeout it is ambiguous ('timeout'); otherwise it is a
|
||||||
|
# pre-send connect failure ('connect', always safe to retry).
|
||||||
|
reason = getattr(exc, "reason", None)
|
||||||
|
code = "timeout" if isinstance(reason, TimeoutError) else "connect"
|
||||||
|
raise _RetryableHTTP(code, detail=str(exc)) from exc
|
||||||
try:
|
try:
|
||||||
return json.loads(raw.decode("utf-8"))
|
return json.loads(raw.decode("utf-8"))
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
@@ -451,6 +472,31 @@ class GravityZoneClient:
|
|||||||
items = [i for i in result.get("items", []) if i.get("type") == 1]
|
items = [i for i in result.get("items", []) if i.get("type") == 1]
|
||||||
return {"total": len(items), "items": items}
|
return {"total": len(items), "items": items}
|
||||||
|
|
||||||
|
def list_all_companies(self) -> list[dict]:
|
||||||
|
"""Every company under the ACG container, paginated (type==1 only).
|
||||||
|
list_companies() returns only one page; callers that must see the WHOLE
|
||||||
|
fleet (sweep-all, inventory refresh) use this so a >100-company tenant is
|
||||||
|
not silently truncated."""
|
||||||
|
companies: list[dict] = []
|
||||||
|
page = 1
|
||||||
|
per_page = 100
|
||||||
|
while True:
|
||||||
|
result = self._jsonrpc_request(
|
||||||
|
"network", "getNetworkInventoryItems",
|
||||||
|
{"parentId": ACG_COMPANIES_CONTAINER_ID,
|
||||||
|
"page": page, "perPage": per_page},
|
||||||
|
) or {}
|
||||||
|
raw = result.get("items", [])
|
||||||
|
companies.extend(i for i in raw if i.get("type") == 1)
|
||||||
|
if len(raw) < per_page: # short raw page = last page
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
if page > MAX_PAGINATION_PAGES:
|
||||||
|
print(f"[WARNING] list_all_companies hit the {MAX_PAGINATION_PAGES}-"
|
||||||
|
"page ceiling; companies may be truncated.", file=sys.stderr)
|
||||||
|
break
|
||||||
|
return companies
|
||||||
|
|
||||||
def list_endpoints(
|
def list_endpoints(
|
||||||
self, parent_id: Optional[str] = None, page: int = 1, per_page: int = 100
|
self, parent_id: Optional[str] = None, page: int = 1, per_page: int = 100
|
||||||
) -> dict:
|
) -> dict:
|
||||||
@@ -553,7 +599,7 @@ class GravityZoneClient:
|
|||||||
"""Sweep every client company. The companies container is NOT a valid
|
"""Sweep every client company. The companies container is NOT a valid
|
||||||
endpoint parent, so iterate each company and sweep it individually."""
|
endpoint parent, so iterate each company and sweep it individually."""
|
||||||
summaries: list[GZEndpointSummary] = []
|
summaries: list[GZEndpointSummary] = []
|
||||||
companies = self.list_companies(per_page=100).get("items", [])
|
companies = self.list_all_companies()
|
||||||
for company in companies:
|
for company in companies:
|
||||||
cid = company.get("id")
|
cid = company.get("id")
|
||||||
if not cid:
|
if not cid:
|
||||||
@@ -1249,7 +1295,7 @@ class GravityZoneClient:
|
|||||||
endpoints_map: dict[str, dict] = {}
|
endpoints_map: dict[str, dict] = {}
|
||||||
policies_map: dict[str, str] = {}
|
policies_map: dict[str, str] = {}
|
||||||
|
|
||||||
companies = self.list_companies(per_page=100).get("items", [])
|
companies = self.list_all_companies()
|
||||||
for c in companies:
|
for c in companies:
|
||||||
cid = c.get("id")
|
cid = c.get("id")
|
||||||
if cid:
|
if cid:
|
||||||
|
|||||||
@@ -132,8 +132,9 @@ check("quarantine-remove no confirm -> rc3", ["quarantine-remove", "--items", "x
|
|||||||
check("quarantine-restore no confirm -> rc3", ["quarantine-restore", "--items", "x"], want_rc=3)
|
check("quarantine-restore no confirm -> rc3", ["quarantine-restore", "--items", "x"], want_rc=3)
|
||||||
check("custom-rule-create no confirm -> rc3", ["custom-rule-create", "--name", "R"], want_rc=3)
|
check("custom-rule-create no confirm -> rc3", ["custom-rule-create", "--name", "R"], want_rc=3)
|
||||||
check("custom-rule-delete no confirm -> rc3", ["custom-rule-delete", "--id", "x"], want_rc=3)
|
check("custom-rule-delete no confirm -> rc3", ["custom-rule-delete", "--id", "x"], want_rc=3)
|
||||||
check("incident-status no confirm -> rc3", ["incident-status", "--type", "t", "--set-json", "{}"], want_rc=3)
|
check("incident-status no confirm -> rc3", ["incident-status", "--type", "t", "--set-json", "{\"status\":1}"], want_rc=3)
|
||||||
check("incident-note no confirm -> rc3", ["incident-note", "--type", "t", "--set-json", "{}"], want_rc=3)
|
check("incident-status empty set-json -> rc2", ["incident-status", "--type", "t", "--set-json", "{}", "--confirm"], want_rc=2)
|
||||||
|
check("incident-note no confirm -> rc3", ["incident-note", "--type", "t", "--set-json", "{\"text\":\"x\"}"], want_rc=3)
|
||||||
check("raw createReport no confirm -> rc3", ["raw", "--module", "reports", "--method", "createReport", "--params", "{}"], want_rc=3)
|
check("raw createReport no confirm -> rc3", ["raw", "--module", "reports", "--method", "createReport", "--params", "{}"], want_rc=3)
|
||||||
check("raw createCustomRule no confirm -> rc3", ["raw", "--module", "incidents", "--method", "createCustomRule", "--params", "{}"], want_rc=3)
|
check("raw createCustomRule no confirm -> rc3", ["raw", "--module", "incidents", "--method", "createCustomRule", "--params", "{}"], want_rc=3)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user