diff --git a/mteb/models/model2vec_models.py b/mteb/models/model2vec_models.py index ee79f1cafa..2b0a2ed02c 100644 --- a/mteb/models/model2vec_models.py +++ b/mteb/models/model2vec_models.py @@ -235,3 +235,122 @@ def encode( public_training_code="https://github.com/MinishLab/model2vec", public_training_data=None, ) + +pubmed_bert_100k = ModelMeta( + loader=partial( + Model2VecWrapper, model_name="NeuML/pubmedbert-base-embeddings-100K" + ), + name="NeuML/pubmedbert-base-embeddings-100K", + languages=["eng_Latn"], + open_weights=True, + revision="bac5e3b12fb8c650e92a19c41b436732c4f16e9e", + release_date="2025-01-03", + n_parameters=1 * 1e5, + memory_usage_mb=0, + max_tokens=np.inf, + embed_dim=64, + license="apache-2.0", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-100K", + use_instructions=False, + adapted_from="NeuML/pubmedbert-base-embeddings", + superseded_by=None, + training_datasets={}, + public_training_code="https://huggingface.co/NeuML/pubmedbert-base-embeddings-100K#training", + public_training_data="https://pubmed.ncbi.nlm.nih.gov/download/", +) + +pubmed_bert_500k = ModelMeta( + loader=partial( + Model2VecWrapper, model_name="NeuML/pubmedbert-base-embeddings-500K" + ), + name="NeuML/pubmedbert-base-embeddings-500K", + languages=["eng_Latn"], + open_weights=True, + revision="34ba71e35c393fdad7ed695113f653feb407b16b", + release_date="2025-01-03", + n_parameters=5 * 1e5, + memory_usage_mb=2, + max_tokens=np.inf, + embed_dim=64, + license="apache-2.0", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-500K", + use_instructions=False, + adapted_from="NeuML/pubmedbert-base-embeddings", + superseded_by=None, + training_datasets={}, + public_training_code="https://huggingface.co/NeuML/pubmedbert-base-embeddings-500K#training", + public_training_data="https://pubmed.ncbi.nlm.nih.gov/download/", +) + +pubmed_bert_1m = ModelMeta( + loader=partial(Model2VecWrapper, model_name="NeuML/pubmedbert-base-embeddings-1M"), + name="NeuML/pubmedbert-base-embeddings-1M", + languages=["eng_Latn"], + open_weights=True, + revision="2b7fed222594708da6d88bcda92ae9b434b7ddd1", + release_date="2025-01-03", + n_parameters=1 * 1e6, + memory_usage_mb=2, + max_tokens=np.inf, + embed_dim=64, + license="apache-2.0", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-1M", + use_instructions=False, + adapted_from="NeuML/pubmedbert-base-embeddings", + superseded_by=None, + training_datasets={}, + public_training_code="https://huggingface.co/NeuML/pubmedbert-base-embeddings-1M#training", + public_training_data="https://pubmed.ncbi.nlm.nih.gov/download/", +) + +pubmed_bert_2m = ModelMeta( + loader=partial(Model2VecWrapper, model_name="NeuML/pubmedbert-base-embeddings-2M"), + name="NeuML/pubmedbert-base-embeddings-2M", + languages=["eng_Latn"], + open_weights=True, + revision="1d7bbe04d6713e425161146bfdc71473cbed498a", + release_date="2025-01-03", + n_parameters=1.95 * 1e6, + memory_usage_mb=7, + max_tokens=np.inf, + embed_dim=64, + license="apache-2.0", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-2M", + use_instructions=False, + adapted_from="NeuML/pubmedbert-base-embeddings", + superseded_by=None, + training_datasets={}, + public_training_code="https://huggingface.co/NeuML/pubmedbert-base-embeddings-2M#training", + public_training_data="https://pubmed.ncbi.nlm.nih.gov/download/", +) + +pubmed_bert_8m = ModelMeta( + loader=partial(Model2VecWrapper, model_name="NeuML/pubmedbert-base-embeddings-8M"), + name="NeuML/pubmedbert-base-embeddings-8M", + languages=["eng_Latn"], + open_weights=True, + revision="387d350015e963744f4fafe56a574b7cd48646c9", + release_date="2025-01-03", + n_parameters=7.81 * 1e6, + memory_usage_mb=30, + max_tokens=np.inf, + embed_dim=256, + license="apache-2.0", + similarity_fn_name="cosine", + framework=["NumPy"], + reference="https://huggingface.co/NeuML/pubmedbert-base-embeddings-8M", + use_instructions=False, + adapted_from="NeuML/pubmedbert-base-embeddings", + superseded_by=None, + training_datasets={}, + public_training_code="https://huggingface.co/NeuML/pubmedbert-base-embeddings-8M#training", + public_training_data="https://pubmed.ncbi.nlm.nih.gov/download/", +)