diff --git a/mteb/models/openai_models.py b/mteb/models/openai_models.py index c187bfa317..619a4a747f 100644 --- a/mteb/models/openai_models.py +++ b/mteb/models/openai_models.py @@ -25,7 +25,7 @@ def __init__( **kwargs, ) -> None: """Wrapper for OpenAIs embedding API. - To handle documents larger than 8192 tokens, we truncate the document to the specified sequence length. + To handle documents larger than 8191 tokens, we truncate the document to the specified sequence length. """ requires_package(self, "openai", "Openai text embedding") from openai import OpenAI @@ -124,7 +124,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: OpenAIWrapper, model_name="text-embedding-3-small", tokenizer_name="cl100k_base", - max_tokens=8192, + max_tokens=8191, ), max_tokens=8191, embed_dim=1536, @@ -149,7 +149,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: OpenAIWrapper, model_name="text-embedding-3-large", tokenizer_name="cl100k_base", - max_tokens=8192, + max_tokens=8191, ), max_tokens=8191, embed_dim=3072, @@ -172,7 +172,7 @@ def _to_numpy(self, embedding_response) -> np.ndarray: OpenAIWrapper, model_name="text-embedding-ada-002", tokenizer_name="cl100k_base", - max_tokens=8192, + max_tokens=8191, ), reference="https://openai.com/index/new-and-improved-embedding-model/", max_tokens=8191,