1818# - Different model results in differences more than 1e-3
1919# 1e-4 is a good tolerance threshold
2020MTEB_EMBED_TASKS = ["STS12" ]
21- MTEB_EMBED_TOL = 1e-4
21+ MTEB_EMBED_TOL = 0.02
2222
2323# See #19344
2424MTEB_RERANK_TASKS = ["NFCorpus" ]
@@ -175,6 +175,7 @@ def mteb_test_embed_models(hf_runner,
175175 with vllm_runner (model_info .name ,
176176 runner = "pooling" ,
177177 max_model_len = None ,
178+ enforce_eager = True ,
178179 ** vllm_extra_kwargs ) as vllm_model :
179180
180181 model_config = vllm_model .llm .llm_engine .model_config
@@ -198,6 +199,7 @@ def mteb_test_embed_models(hf_runner,
198199 st_main_score = run_mteb_embed_task (hf_model , MTEB_EMBED_TASKS )
199200 st_dtype = next (hf_model .model .parameters ()).dtype
200201
202+ print ("Model:" , model_info .name )
201203 print ("VLLM:" , vllm_dtype , vllm_main_score )
202204 print ("SentenceTransformers:" , st_dtype , st_main_score )
203205 print ("Difference:" , st_main_score - vllm_main_score )
@@ -286,6 +288,7 @@ def mteb_test_rerank_models(hf_runner,
286288 runner = "pooling" ,
287289 max_model_len = None ,
288290 max_num_seqs = 8 ,
291+ enforce_eager = True ,
289292 ** vllm_extra_kwargs ) as vllm_model :
290293
291294 model_config = vllm_model .llm .llm_engine .model_config
@@ -304,6 +307,7 @@ def mteb_test_rerank_models(hf_runner,
304307 st_main_score , st_dtype = mteb_test_rerank_models_hf (
305308 hf_runner , model_info .name , hf_model_callback )
306309
310+ print ("Model:" , model_info .name )
307311 print ("VLLM:" , vllm_dtype , vllm_main_score )
308312 print ("SentenceTransformers:" , st_dtype , st_main_score )
309313 print ("Difference:" , st_main_score - vllm_main_score )
0 commit comments