vllm-project · vllm-bot · Apr 24, 2025 · Apr 22, 2025 · Apr 22, 2025 · Apr 23, 2025
diff --git a/docs/source/models/pooling_models.md b/docs/source/models/pooling_models.md
@@ -159,14 +159,14 @@ For example, setting `dimensions` parameter while using the `BAAI/bge-m3` model
 
 ### Manually enable Matryoshka Embeddings
 
-There is currently no official interface for specifying support for Matryoshka Embeddings. In vLLM, we simply check the existence of the fields `is_matryoshka` or `matryoshka_dimensions` inside `config.json`.
+There is currently no official interface for specifying support for Matryoshka Embeddings. In vLLM, if `is_matryoshka` is `True` in `config.json,` it is allowed to change the output to arbitrary dimensions. Using `matryoshka_dimensions` can control the allowed output dimensions.
 
-For models that support Matryoshka Embeddings but not recognized by vLLM, please manually override the config using `hf_overrides={"is_matryoshka": True}` (offline) or `--hf_overrides '{"is_matryoshka": true}'` (online).
+For models that support Matryoshka Embeddings but not recognized by vLLM, please manually override the config using `hf_overrides={"is_matryoshka": True}`, `hf_overrides={"matryoshka_dimensions": [allowed output dimensions]}` (offline) or `--hf_overrides '{"is_matryoshka": true}'`,  `--hf_overrides '{"matryoshka_dimensions": [allowed output dimensions]}'`(online).
 
 Here is an example to serve a model with Matryoshka Embeddings enabled.
 
 ```text
-vllm serve Snowflake/snowflake-arctic-embed-m-v1.5 --hf_overrides '{"is_matryoshka":true}'
+vllm serve Snowflake/snowflake-arctic-embed-m-v1.5 --hf_overrides '{"matryoshka_dimensions":[256]}'
 ```
 
 ### Offline Inference
@@ -204,14 +204,14 @@ curl http://127.0.0.1:8000/v1/embeddings \
     "input": "Follow the white rabbit.",
     "model": "jinaai/jina-embeddings-v3",
     "encoding_format": "float",
-    "dimensions": 1
+    "dimensions": 32
   }'
 ```
 
 Expected output:
 
 ```json
-{"id":"embd-0aab28c384d348c3b8f0eb783109dc5f","object":"list","created":1744195454,"model":"jinaai/jina-embeddings-v3","data":[{"index":0,"object":"embedding","embedding":[-1.0]}],"usage":{"prompt_tokens":10,"total_tokens":10,"completion_tokens":0,"prompt_tokens_details":null}}
+{"id":"embd-0aab28c384d348c3b8f0eb783109dc5f","object":"list","created":1744195454,"model":"jinaai/jina-embeddings-v3","data":[{"index":0,"object":"embedding","embedding":["32 floating-point numbers"]}],"usage":{"prompt_tokens":10,"total_tokens":10,"completion_tokens":0,"prompt_tokens_details":null}}
 ```
 
 A openai client example can be found here: <gh-file:examples/online_serving/openai_embedding_matryoshka_fy.py>
diff --git a/examples/online_serving/openai_embedding_matryoshka_fy.py b/examples/online_serving/openai_embedding_matryoshka_fy.py
@@ -25,11 +25,11 @@ def main():
     responses = client.embeddings.create(
         input=["Follow the white rabbit."],
         model=model,
-        dimensions=1,
+        dimensions=32,
     )
 
     for data in responses.data:
-        print(data.embedding)  # List of float of len 1
+        print(data.embedding)  # List of float of len 32
 
 
 if __name__ == "__main__":