diff --git a/autogen/oai/client.py b/autogen/oai/client.py index 3ae37257b21e..8f6e3f185b6a 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -279,7 +279,12 @@ def create(self, params: Dict[str, Any]) -> ChatCompletion: # Prepare the final ChatCompletion object based on the accumulated data model = chunk.model.replace("gpt-35", "gpt-3.5") # hack for Azure API - prompt_tokens = count_token(params["messages"], model) + try: + prompt_tokens = count_token(params["messages"], model) + except NotImplementedError as e: + # Catch token calculation error if streaming with customized models. + logger.warning(str(e)) + prompt_tokens = 0 response = ChatCompletion( id=chunk.id, model=chunk.model,