diff --git a/.claude/skills/bitdefender/scripts/gz.py b/.claude/skills/bitdefender/scripts/gz.py index f14699b1..183dd635 100644 --- a/.claude/skills/bitdefender/scripts/gz.py +++ b/.claude/skills/bitdefender/scripts/gz.py @@ -1310,6 +1310,7 @@ HANDLERS = { def main(argv=None) -> int: args = build_parser().parse_args(argv) handler = HANDLERS[args.command] + client = None try: client = GravityZoneClient() rc = handler(client, args) @@ -1322,6 +1323,9 @@ def main(argv=None) -> int: return 1 except KeyboardInterrupt: return 130 + finally: + if client is not None: + client.close() if __name__ == "__main__": diff --git a/.claude/skills/bitdefender/scripts/gz_client.py b/.claude/skills/bitdefender/scripts/gz_client.py index 6bc25621..8ad2917e 100644 --- a/.claude/skills/bitdefender/scripts/gz_client.py +++ b/.claude/skills/bitdefender/scripts/gz_client.py @@ -20,12 +20,15 @@ from __future__ import annotations import base64 import json import os +import random import subprocess import sys +import time import urllib.error import urllib.request from dataclasses import dataclass, field from datetime import datetime, timezone +from email.utils import parsedate_to_datetime from pathlib import Path from typing import Any, Optional @@ -44,6 +47,52 @@ except ImportError: # pragma: no cover - depends on environment # other data - bound the blast radius rather than echo full bodies into logs. ERROR_BODY_MAX_CHARS = 500 +# --- transient-failure retry policy ------------------------------------------- +# The live tenant is rate-limited (real HTTP 429s observed during sweeps, which +# fan out one getManagedEndpointDetails per endpoint across every company). Retry +# 429/5xx/timeout with bounded exponential backoff, honoring Retry-After. +RETRY_STATUSES = frozenset({429, 500, 502, 503, 504}) +RETRY_MAX_ATTEMPTS = 4 # total tries = 1 initial + up to (MAX-1) retries +RETRY_BASE_DELAY_SECONDS = 1.0 +RETRY_MAX_DELAY_SECONDS = 30.0 + + +class _RetryableHTTP(Exception): + """Internal signal that a request failed transiently and may be retried. + `code` is the HTTP status (int) or the string 'timeout'.""" + + def __init__(self, code, headers=None, detail=""): + self.code = code + self.headers = headers or {} + self.detail = detail + super().__init__(f"transient {code}") + + +def _retry_delay(headers, attempt: int) -> float: + """Seconds to wait before the next retry: honor a Retry-After header when + present (numeric seconds or an HTTP-date), else exponential backoff + jitter.""" + ra = None + try: + ra = headers.get("Retry-After") or headers.get("retry-after") + except AttributeError: + ra = None + if ra: + try: + return min(float(ra), RETRY_MAX_DELAY_SECONDS) + except (TypeError, ValueError): + try: + dt = parsedate_to_datetime(ra) + if dt is not None: + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + delta = (dt - datetime.now(timezone.utc)).total_seconds() + if delta > 0: + return min(delta, RETRY_MAX_DELAY_SECONDS) + except (TypeError, ValueError): + pass + backoff = min(RETRY_BASE_DELAY_SECONDS * (2 ** attempt), RETRY_MAX_DELAY_SECONDS) + return backoff + random.uniform(0.0, backoff * 0.25) + # --- constants ---------------------------------------------------------------- GRAVITYZONE_API_BASE_URL = os.environ.get( "GRAVITYZONE_API_BASE_URL", @@ -169,6 +218,24 @@ class GravityZoneClient: self._api_key = api_key # lazily loaded if None self.timeout = timeout self.connect_timeout = connect_timeout + self._httpx_client = None # reused across calls (pooling) when httpx present + + def close(self) -> None: + """Close the pooled httpx client, if one was opened.""" + if self._httpx_client is not None: + try: + self._httpx_client.close() + finally: + self._httpx_client = None + + @property + def _client(self): + """Lazily create and reuse a single httpx.Client so a multi-call sweep + shares one connection pool instead of a TLS handshake per request.""" + if self._httpx_client is None: + timeout = httpx.Timeout(self.timeout, connect=self.connect_timeout) + self._httpx_client = httpx.Client(timeout=timeout) + return self._httpx_client @property def api_key(self) -> str: @@ -196,24 +263,49 @@ class GravityZoneClient: return body def _post(self, url: str, payload: dict) -> Any: + """POST with bounded retry on transient failures (429/5xx/timeout).""" data = json.dumps(payload).encode("utf-8") + for attempt in range(RETRY_MAX_ATTEMPTS): + try: + return self._post_once(url, data) + except _RetryableHTTP as exc: + if attempt >= RETRY_MAX_ATTEMPTS - 1: + raise GravityZoneError( + f"GravityZone HTTP {exc.code} after {RETRY_MAX_ATTEMPTS} " + f"attempts: {exc.detail}".rstrip(": ") + ) from exc + delay = _retry_delay(exc.headers, attempt) + print( + f"[WARNING] GravityZone {exc.code} - retry " + f"{attempt + 1}/{RETRY_MAX_ATTEMPTS - 1} in {delay:.1f}s", + file=sys.stderr, + ) + time.sleep(delay) + # unreachable: the loop either returns or raises on the final attempt + raise GravityZoneError("GravityZone request failed: retries exhausted") + + def _post_once(self, url: str, data: bytes) -> Any: + """One POST. Returns parsed JSON, raises _RetryableHTTP on a transient + failure, or GravityZoneError on a terminal one.""" if _HAS_HTTPX: try: - timeout = httpx.Timeout(self.timeout, connect=self.connect_timeout) - with httpx.Client(timeout=timeout) as client: - resp = client.post(url, content=data, auth=(self.api_key, ""), - headers={"Content-Type": "application/json"}) + resp = self._client.post( + url, content=data, auth=(self.api_key, ""), + headers={"Content-Type": "application/json"}) resp.raise_for_status() return resp.json() except httpx.TimeoutException as exc: - raise GravityZoneError(f"GravityZone request timed out: {exc}") from exc + raise _RetryableHTTP("timeout", detail=str(exc)) from exc except httpx.HTTPStatusError as exc: + code = exc.response.status_code detail = (exc.response.text or "")[:ERROR_BODY_MAX_CHARS] + if code in RETRY_STATUSES: + raise _RetryableHTTP(code, exc.response.headers, detail) from exc raise GravityZoneError( - f"GravityZone HTTP {exc.response.status_code}: {detail}" - ) from exc + f"GravityZone HTTP {code}: {detail}") from exc except httpx.HTTPError as exc: - raise GravityZoneError(f"GravityZone request failed: {exc}") from exc + raise GravityZoneError( + f"GravityZone request failed: {exc}") from exc # stdlib fallback token = base64.b64encode(f"{self.api_key}:".encode("utf-8")).decode("ascii") @@ -232,7 +324,12 @@ class GravityZoneClient: return json.loads(raw.decode("utf-8")) except urllib.error.HTTPError as exc: detail = exc.read().decode("utf-8", errors="replace")[:ERROR_BODY_MAX_CHARS] + if exc.code in RETRY_STATUSES: + raise _RetryableHTTP(exc.code, getattr(exc, "headers", None), + detail) from exc raise GravityZoneError(f"GravityZone HTTP {exc.code}: {detail}") from exc + except TimeoutError as exc: + raise _RetryableHTTP("timeout", detail=str(exc)) from exc except urllib.error.URLError as exc: raise GravityZoneError(f"GravityZone request failed: {exc}") from exc