From dc55b294aef545c3b5bb87665a93f21e1e1bb3de Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Sat, 30 Sep 2023 09:21:07 -0700 Subject: [PATCH] make retry_time configurable, add doc (#53) * make retry_time configurable, add doc * in seconds * retry_wait_time * bump version to 0.1.4 * remove .json * rename * time --- autogen/oai/completion.py | 41 +++++++++++--------- autogen/version.py | 2 +- test/oai/test_completion.py | 2 +- test/twoagent.py | 2 +- website/docs/FAQ.md | 10 +++++ website/docs/Use-Cases/enhanced_inference.md | 6 ++- 6 files changed, 40 insertions(+), 23 deletions(-) diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py index 229820efbb3a..af2088ad9747 100644 --- a/autogen/oai/completion.py +++ b/autogen/oai/completion.py @@ -105,9 +105,9 @@ class Completion(openai_Completion): seed = 41 cache_path = f".cache/{seed}" # retry after this many seconds - retry_time = 10 + retry_wait_time = 10 # fail a request after hitting RateLimitError for this many seconds - retry_timeout = 120 + max_retry_period = 120 # time out for request to openai server request_timeout = 60 @@ -181,7 +181,7 @@ def _book_keeping(cls, config: Dict, response): def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True): """Get the response from the openai api call. - Try cache first. If not found, call the openai api. If the api call fails, retry after retry_time. + Try cache first. If not found, call the openai api. If the api call fails, retry after retry_wait_time. """ config = config.copy() openai.api_key_path = config.pop("api_key_path", openai.api_key_path) @@ -199,7 +199,8 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca ) start_time = time.time() request_timeout = cls.request_timeout - retry_timeout = config.pop("retry_timeout", cls.retry_timeout) + max_retry_period = config.pop("max_retry_period", cls.max_retry_period) + retry_wait_time = config.pop("retry_wait_time", cls.retry_wait_time) while True: try: if "request_timeout" in config: @@ -211,18 +212,18 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca APIConnectionError, ): # transient error - logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1) - sleep(cls.retry_time) + logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1) + sleep(retry_wait_time) except APIError as err: error_code = err and err.json_body and isinstance(err.json_body, dict) and err.json_body.get("error") error_code = error_code and error_code.get("code") if error_code == "content_filter": raise # transient error - logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1) - sleep(cls.retry_time) + logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1) + sleep(retry_wait_time) except (RateLimitError, Timeout) as err: - time_left = retry_timeout - (time.time() - start_time + cls.retry_time) + time_left = max_retry_period - (time.time() - start_time + retry_wait_time) if ( time_left > 0 and isinstance(err, RateLimitError) @@ -233,8 +234,8 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca if isinstance(err, Timeout): request_timeout <<= 1 request_timeout = min(request_timeout, time_left) - logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1) - sleep(cls.retry_time) + logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1) + sleep(retry_wait_time) elif raise_on_ratelimit_or_timeout: raise else: @@ -242,7 +243,7 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca if use_cache and isinstance(err, Timeout): cls._cache.set(key, response) logger.warning( - f"Failed to get response from openai api due to getting RateLimitError or Timeout for {retry_timeout} seconds." + f"Failed to get response from openai api due to getting RateLimitError or Timeout for {max_retry_period} seconds." ) return response except InvalidRequestError: @@ -743,9 +744,11 @@ def yes_or_no_filter(context, config, response): When set to False, -1 will be returned when all configs fail. allow_format_str_template (bool, Optional): Whether to allow format string template in the config. **config: Configuration for the openai API call. This is used as parameters for calling openai API. - Besides the parameters for the openai API call, it can also contain a seed (int) for the cache. - This is useful when implementing "controlled randomness" for the completion. - Also, the "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context. + The "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context. + Besides the parameters for the openai API call, it can also contain: + - `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests. + - `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request. + - `seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion. Returns: Responses from OpenAI API, with additional fields. @@ -763,9 +766,9 @@ def yes_or_no_filter(context, config, response): base_config = config.copy() base_config["allow_format_str_template"] = allow_format_str_template base_config.update(each_config) - if i < last and filter_func is None and "retry_timeout" not in base_config: - # retry_timeout = 0 to avoid retrying when no filter is given - base_config["retry_timeout"] = 0 + if i < last and filter_func is None and "max_retry_period" not in base_config: + # max_retry_period = 0 to avoid retrying when no filter is given + base_config["max_retry_period"] = 0 try: response = cls.create( context, @@ -1103,7 +1106,7 @@ def stop_logging(cls): class ChatCompletion(Completion): - """A class for OpenAI API ChatCompletion.""" + """A class for OpenAI API ChatCompletion. Share the same API as Completion.""" default_search_space = Completion.default_search_space.copy() default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"]) diff --git a/autogen/version.py b/autogen/version.py index ae7362549b3c..bbab0242f6aa 100644 --- a/autogen/version.py +++ b/autogen/version.py @@ -1 +1 @@ -__version__ = "0.1.3" +__version__ = "0.1.4" diff --git a/test/oai/test_completion.py b/test/oai/test_completion.py index 9ae952a0249d..934dbadc9016 100644 --- a/test/oai/test_completion.py +++ b/test/oai/test_completion.py @@ -227,7 +227,7 @@ def test_humaneval(num_samples=1): config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-3.5-turbo"]), prompt="", max_tokens=1, - retry_timeout=0, + max_retry_period=0, raise_on_ratelimit_or_timeout=False, ) # assert response == -1 diff --git a/test/twoagent.py b/test/twoagent.py index cc5c435d4858..e2e1818e8ade 100644 --- a/test/twoagent.py +++ b/test/twoagent.py @@ -2,7 +2,7 @@ # Load LLM inference endpoints from an env variable or a file # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints -# and OAI_CONFIG_LIST_sample.json +# and OAI_CONFIG_LIST_sample config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") assistant = AssistantAgent("assistant", llm_config={"config_list": config_list}) user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding"}) diff --git a/website/docs/FAQ.md b/website/docs/FAQ.md index 0e77ad275f3f..3cd0ae048feb 100644 --- a/website/docs/FAQ.md +++ b/website/docs/FAQ.md @@ -99,3 +99,13 @@ You can also explicitly specify that by: ```python assistant = autogen.AssistantAgent(name="assistant", llm_config={"api_key": ...}) ``` + +## Handle Rate Limit Error and Timeout Error + +You can set `retry_wait_time` and `max_retry_period` to handle rate limit error. And you can set `request_timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the [`create`](/docs/reference/oai/completion#create) function for LLM inference. + +- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request. +- `max_retry_period` (int): the total timeout (in seconds) allowed for retrying failed requests. +- `request_timeout` (int): the timeout (in seconds) sent with a single request. + +Please refer to the [documentation](/docs/Use-Cases/enhanced_inference#runtime-error) for more info. diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md index d50d67b81c5b..0d33fa181377 100644 --- a/website/docs/Use-Cases/enhanced_inference.md +++ b/website/docs/Use-Cases/enhanced_inference.md @@ -123,7 +123,11 @@ API call results are cached locally and reused when the same request is issued. ### Runtime error -It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Initial request timeout, retry timeout and retry time interval can be configured via `request_timeout`, `retry_timeout` and `autogen.Completion.retry_time`. +It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Request timeout, max retry period and retry wait time can be configured via `request_timeout`, `max_retry_period` and `retry_wait_time`. + +- `request_timeout` (int): the timeout (in seconds) sent with a single request. +- `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests. +- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request. Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,