Skip to content

make retry_time configurable, add doc #53

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 22 additions & 19 deletions autogen/oai/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ class Completion(openai_Completion):
seed = 41
cache_path = f".cache/{seed}"
# retry after this many seconds
retry_time = 10
retry_wait_time = 10
# fail a request after hitting RateLimitError for this many seconds
retry_timeout = 120
max_retry_period = 120
# time out for request to openai server
request_timeout = 60

Expand Down Expand Up @@ -181,7 +181,7 @@ def _book_keeping(cls, config: Dict, response):
def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True):
"""Get the response from the openai api call.

Try cache first. If not found, call the openai api. If the api call fails, retry after retry_time.
Try cache first. If not found, call the openai api. If the api call fails, retry after retry_wait_time.
"""
config = config.copy()
openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
Expand All @@ -199,7 +199,8 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
)
start_time = time.time()
request_timeout = cls.request_timeout
retry_timeout = config.pop("retry_timeout", cls.retry_timeout)
max_retry_period = config.pop("max_retry_period", cls.max_retry_period)
retry_wait_time = config.pop("retry_wait_time", cls.retry_wait_time)
while True:
try:
if "request_timeout" in config:
Expand All @@ -211,18 +212,18 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
APIConnectionError,
):
# transient error
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
sleep(cls.retry_time)
logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
sleep(retry_wait_time)
except APIError as err:
error_code = err and err.json_body and isinstance(err.json_body, dict) and err.json_body.get("error")
error_code = error_code and error_code.get("code")
if error_code == "content_filter":
raise
# transient error
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
sleep(cls.retry_time)
logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
sleep(retry_wait_time)
except (RateLimitError, Timeout) as err:
time_left = retry_timeout - (time.time() - start_time + cls.retry_time)
time_left = max_retry_period - (time.time() - start_time + retry_wait_time)
if (
time_left > 0
and isinstance(err, RateLimitError)
Expand All @@ -233,16 +234,16 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
if isinstance(err, Timeout):
request_timeout <<= 1
request_timeout = min(request_timeout, time_left)
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
sleep(cls.retry_time)
logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
sleep(retry_wait_time)
elif raise_on_ratelimit_or_timeout:
raise
else:
response = -1
if use_cache and isinstance(err, Timeout):
cls._cache.set(key, response)
logger.warning(
f"Failed to get response from openai api due to getting RateLimitError or Timeout for {retry_timeout} seconds."
f"Failed to get response from openai api due to getting RateLimitError or Timeout for {max_retry_period} seconds."
)
return response
except InvalidRequestError:
Expand Down Expand Up @@ -743,9 +744,11 @@ def yes_or_no_filter(context, config, response):
When set to False, -1 will be returned when all configs fail.
allow_format_str_template (bool, Optional): Whether to allow format string template in the config.
**config: Configuration for the openai API call. This is used as parameters for calling openai API.
Besides the parameters for the openai API call, it can also contain a seed (int) for the cache.
This is useful when implementing "controlled randomness" for the completion.
Also, the "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
The "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
Besides the parameters for the openai API call, it can also contain:
- `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
- `seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.

Returns:
Responses from OpenAI API, with additional fields.
Expand All @@ -763,9 +766,9 @@ def yes_or_no_filter(context, config, response):
base_config = config.copy()
base_config["allow_format_str_template"] = allow_format_str_template
base_config.update(each_config)
if i < last and filter_func is None and "retry_timeout" not in base_config:
# retry_timeout = 0 to avoid retrying when no filter is given
base_config["retry_timeout"] = 0
if i < last and filter_func is None and "max_retry_period" not in base_config:
# max_retry_period = 0 to avoid retrying when no filter is given
base_config["max_retry_period"] = 0
try:
response = cls.create(
context,
Expand Down Expand Up @@ -1103,7 +1106,7 @@ def stop_logging(cls):


class ChatCompletion(Completion):
"""A class for OpenAI API ChatCompletion."""
"""A class for OpenAI API ChatCompletion. Share the same API as Completion."""

default_search_space = Completion.default_search_space.copy()
default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])
Expand Down
2 changes: 1 addition & 1 deletion autogen/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.3"
__version__ = "0.1.4"
2 changes: 1 addition & 1 deletion test/oai/test_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def test_humaneval(num_samples=1):
config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-3.5-turbo"]),
prompt="",
max_tokens=1,
retry_timeout=0,
max_retry_period=0,
raise_on_ratelimit_or_timeout=False,
)
# assert response == -1
Expand Down
2 changes: 1 addition & 1 deletion test/twoagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Load LLM inference endpoints from an env variable or a file
# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
# and OAI_CONFIG_LIST_sample.json
# and OAI_CONFIG_LIST_sample
config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
assistant = AssistantAgent("assistant", llm_config={"config_list": config_list})
user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding"})
Expand Down
10 changes: 10 additions & 0 deletions website/docs/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,13 @@ You can also explicitly specify that by:
```python
assistant = autogen.AssistantAgent(name="assistant", llm_config={"api_key": ...})
```

## Handle Rate Limit Error and Timeout Error

You can set `retry_wait_time` and `max_retry_period` to handle rate limit error. And you can set `request_timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the [`create`](/docs/reference/oai/completion#create) function for LLM inference.

- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
- `max_retry_period` (int): the total timeout (in seconds) allowed for retrying failed requests.
- `request_timeout` (int): the timeout (in seconds) sent with a single request.

Please refer to the [documentation](/docs/Use-Cases/enhanced_inference#runtime-error) for more info.
6 changes: 5 additions & 1 deletion website/docs/Use-Cases/enhanced_inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,11 @@ API call results are cached locally and reused when the same request is issued.

### Runtime error

It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Initial request timeout, retry timeout and retry time interval can be configured via `request_timeout`, `retry_timeout` and `autogen.Completion.retry_time`.
It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Request timeout, max retry period and retry wait time can be configured via `request_timeout`, `max_retry_period` and `retry_wait_time`.

- `request_timeout` (int): the timeout (in seconds) sent with a single request.
- `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.

Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,

Expand Down