Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 50 additions & 33 deletions portkey_ai/_vendor/openai/_base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,39 +709,56 @@ def _calculate_retry_timeout(
return timeout if timeout >= 0 else 0

def _should_retry(self, response: httpx.Response) -> bool:
# Note: this is not a standard header
should_retry_header = response.headers.get("x-should-retry")

# If the server explicitly says whether or not to retry, obey.
if should_retry_header == "true":
log.debug("Retrying as header `x-should-retry` is set to `true`")
return True
if should_retry_header == "false":
log.debug("Not retrying as header `x-should-retry` is set to `false`")
# Custom Retry Conditions
retry_status_code = response.status_code
retry_trace_id = response.headers.get("x-portkey-trace-id")
retry_request_id = response.headers.get("x-portkey-request-id")
retry_gateway_exception = response.headers.get("x-portkey-gateway-exception")

if (
retry_status_code < 500
or retry_trace_id
or retry_request_id
or retry_gateway_exception
):
return False

# Retry on request timeouts.
if response.status_code == 408:
log.debug("Retrying due to status code %i", response.status_code)
return True

# Retry on lock timeouts.
if response.status_code == 409:
log.debug("Retrying due to status code %i", response.status_code)
return True

# Retry on rate limits.
if response.status_code == 429:
log.debug("Retrying due to status code %i", response.status_code)
return True

# Retry internal errors.
if response.status_code >= 500:
log.debug("Retrying due to status code %i", response.status_code)
return True

log.debug("Not retrying")
return False
return True

# # Note: CSG: Commenting this logic for reference to the original code
# # Note: this is not a standard header
# should_retry_header = response.headers.get("x-should-retry")

# # If the server explicitly says whether or not to retry, obey.
# if should_retry_header == "true":
# log.debug("Retrying as header `x-should-retry` is set to `true`")
# return True
# if should_retry_header == "false":
# log.debug("Not retrying as header `x-should-retry` is set to `false`")
# return False

# # Retry on request timeouts.
# if response.status_code == 408:
# log.debug("Retrying due to status code %i", response.status_code)
# return True

# # Retry on lock timeouts.
# if response.status_code == 409:
# log.debug("Retrying due to status code %i", response.status_code)
# return True

# # Retry on rate limits.
# if response.status_code == 429:
# log.debug("Retrying due to status code %i", response.status_code)
# return True

# # Retry internal errors.
# if response.status_code >= 500:
# log.debug("Retrying due to status code %i", response.status_code)
# return True

# log.debug("Not retrying")
# return False

def _idempotency_key(self) -> str:
return f"stainless-python-retry-{uuid.uuid4()}"
Expand Down Expand Up @@ -998,7 +1015,7 @@ def _request(
except httpx.TimeoutException as err:
log.debug("Encountered httpx.TimeoutException", exc_info=True)

if remaining_retries > 0:
if remaining_retries > 0 and self._should_retry(err.response):
return self._retry_request(
input_options,
cast_to,
Expand Down Expand Up @@ -1581,7 +1598,7 @@ async def _request(
except httpx.TimeoutException as err:
log.debug("Encountered httpx.TimeoutException", exc_info=True)

if remaining_retries > 0:
if remaining_retries > 0 and self._should_retry(err.response):
return await self._retry_request(
input_options,
cast_to,
Expand Down
2 changes: 1 addition & 1 deletion portkey_ai/_vendor/openai/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

# default timeout is 10 minutes
DEFAULT_TIMEOUT = httpx.Timeout(timeout=600.0, connect=5.0)
DEFAULT_MAX_RETRIES = 2
DEFAULT_MAX_RETRIES = 1
DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=1000, max_keepalive_connections=100)

INITIAL_RETRY_DELAY = 0.5
Expand Down
77 changes: 77 additions & 0 deletions portkey_ai/api_resources/base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self) -> None:
class APIClient:
_client: httpx.Client
_default_stream_cls: Union[type[Stream[Any]], None] = None
max_retries: int = 1

def __init__(
self,
Expand Down Expand Up @@ -577,11 +578,29 @@ def _build_request(self, options: Options) -> httpx.Request:
)
return request

def _should_retry(self, response: httpx.Response) -> bool:
# Retry Conditions
retry_status_code = response.status_code
retry_trace_id = response.headers.get("x-portkey-trace-id")
retry_request_id = response.headers.get("x-portkey-request-id")
retry_gateway_exception = response.headers.get("x-portkey-gateway-exception")

if (
retry_status_code < 500
or retry_trace_id
or retry_request_id
or retry_gateway_exception
):
return False

return True

@overload
def _request(
self,
*,
options: Options,
retry_count: int = 0,
stream: Literal[False],
cast_to: Type[ResponseT],
stream_cls: Type[StreamT],
Expand All @@ -593,6 +612,7 @@ def _request(
self,
*,
options: Options,
retry_count: int = 0,
stream: Literal[True],
cast_to: Type[ResponseT],
stream_cls: Type[StreamT],
Expand All @@ -604,6 +624,7 @@ def _request(
self,
*,
options: Options,
retry_count: int = 0,
stream: bool,
cast_to: Type[ResponseT],
stream_cls: Type[StreamT],
Expand All @@ -614,6 +635,7 @@ def _request(
self,
*,
options: Options,
retry_count: int = 0,
stream: bool,
cast_to: Type[ResponseT],
stream_cls: Type[StreamT],
Expand All @@ -625,11 +647,28 @@ def _request(
except httpx.HTTPStatusError as err: # 4xx and 5xx errors
# If the response is streamed then we need to explicitly read the response
# to completion before attempting to access the response text.

if retry_count < self.max_retries and self._should_retry(err.response):
self._request(
options=options,
stream=stream,
cast_to=cast_to,
stream_cls=stream_cls,
retry_count=retry_count + 1,
)
err.response.read()
raise self._make_status_error_from_response(request, err.response) from None
except httpx.TimeoutException as err:
raise APITimeoutError(request=request) from err
except Exception as err:
if retry_count < self.max_retries:
self._request(
options=options,
stream=stream,
cast_to=cast_to,
stream_cls=stream_cls,
retry_count=retry_count + 1,
)
raise APIConnectionError(request=request) from err

self.response_headers = res.headers
Expand Down Expand Up @@ -692,6 +731,7 @@ def __del__(self) -> None:
class AsyncAPIClient:
_client: httpx.AsyncClient
_default_stream_cls: Union[type[AsyncStream[Any]], None] = None
max_retries: int = 1

def __init__(
self,
Expand Down Expand Up @@ -1222,11 +1262,29 @@ async def _build_request(self, options: Options) -> httpx.Request:
)
return request

def _should_retry(self, response: httpx.Response) -> bool:
# Retry Conditions
retry_status_code = response.status_code
retry_trace_id = response.headers.get("x-portkey-trace-id")
retry_request_id = response.headers.get("x-portkey-request-id")
retry_gateway_exception = response.headers.get("x-portkey-gateway-exception")

if (
retry_status_code < 500
or retry_trace_id
or retry_request_id
or retry_gateway_exception
):
return False

return True

@overload
async def _request(
self,
*,
options: Options,
retry_count: int = 0,
stream: Literal[False],
cast_to: Type[ResponseT],
stream_cls: Type[AsyncStreamT],
Expand All @@ -1238,6 +1296,7 @@ async def _request(
self,
*,
options: Options,
retry_count: int = 0,
stream: Literal[True],
cast_to: Type[ResponseT],
stream_cls: Type[AsyncStreamT],
Expand All @@ -1249,6 +1308,7 @@ async def _request(
self,
*,
options: Options,
retry_count: int = 0,
stream: bool,
cast_to: Type[ResponseT],
stream_cls: Type[AsyncStreamT],
Expand All @@ -1259,6 +1319,7 @@ async def _request(
self,
*,
options: Options,
retry_count: int = 0,
stream: bool,
cast_to: Type[ResponseT],
stream_cls: Type[AsyncStreamT],
Expand All @@ -1270,11 +1331,27 @@ async def _request(
except httpx.HTTPStatusError as err: # 4xx and 5xx errors
# If the response is streamed then we need to explicitly read the response
# to completion before attempting to access the response text.
if retry_count < self.max_retries and self._should_retry(err.response):
await self._request(
options=options,
stream=stream,
cast_to=cast_to,
stream_cls=stream_cls,
retry_count=retry_count + 1,
)
await err.response.aread()
raise self._make_status_error_from_response(request, err.response) from None
except httpx.TimeoutException as err:
raise APITimeoutError(request=request) from err
except Exception as err:
if retry_count < self.max_retries:
await self._request(
options=options,
stream=stream,
cast_to=cast_to,
stream_cls=stream_cls,
retry_count=retry_count + 1,
)
raise APIConnectionError(request=request) from err

self.response_headers = res.headers
Expand Down
4 changes: 2 additions & 2 deletions portkey_ai/api_resources/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def __init__(
base_url=self.base_url,
default_headers=self.allHeaders,
http_client=http_client,
max_retries=0,
max_retries=1,
websocket_base_url=self.websocket_base_url,
)

Expand Down Expand Up @@ -351,7 +351,7 @@ def __init__(
base_url=self.base_url,
default_headers=self.allHeaders,
http_client=http_client,
max_retries=0,
max_retries=1,
websocket_base_url=self.websocket_base_url,
)

Expand Down
Loading