From 76d2e962a303cac27f50990a7adb54f80adc8cdd Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Fri, 3 Jan 2025 08:54:11 +0000
Subject: [PATCH 01/33] add model loading test for models below 2B params

---
 tests/test_models/test_model_loading.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 tests/test_models/test_model_loading.py

diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
new file mode 100644
index 0000000000..fa4e5c6436
--- /dev/null
+++ b/tests/test_models/test_model_loading.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+import logging
+
+import pytest
+
+from mteb import get_model
+from mteb.models.overview import MODEL_REGISTRY
+
+logging.basicConfig(level=logging.INFO)
+
+
+@pytest.mark.parametrize("model_name", MODEL_REGISTRY.keys())
+def test_get_all_models_below_n_param_threshold(model_name: str):
+    """Test that we can get all models with a number of parameters below a threshold."""
+    model_meta = MODEL_REGISTRY.get_model_meta(model_name=model_name)
+    assert model_meta is not None
+    if model_meta.n_parameters is not None and model_meta.n_parameters < 2e9:
+        m = get_model(model_name)
+        assert m is not None

From a9d0c442c4f381542d29fa6bd8711b7e8964f777 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Fri, 3 Jan 2025 08:54:59 +0000
Subject: [PATCH 02/33] add failure message to include model namne

---
 tests/test_models/test_model_loading.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
index fa4e5c6436..d72101f6fe 100644
--- a/tests/test_models/test_model_loading.py
+++ b/tests/test_models/test_model_loading.py
@@ -17,4 +17,4 @@ def test_get_all_models_below_n_param_threshold(model_name: str):
     assert model_meta is not None
     if model_meta.n_parameters is not None and model_meta.n_parameters < 2e9:
         m = get_model(model_name)
-        assert m is not None
+        assert m is not None, f"Failed to load model {model_name}"

From 766aad28a073bcc121d0954e17977bd0344c964e Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Fri, 3 Jan 2025 09:54:54 +0000
Subject: [PATCH 03/33] use the real get_model_meta

---
 tests/test_models/test_model_loading.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
index d72101f6fe..aa367168aa 100644
--- a/tests/test_models/test_model_loading.py
+++ b/tests/test_models/test_model_loading.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from mteb import get_model
+from mteb import get_model, get_model_meta
 from mteb.models.overview import MODEL_REGISTRY
 
 logging.basicConfig(level=logging.INFO)
@@ -13,7 +13,7 @@
 @pytest.mark.parametrize("model_name", MODEL_REGISTRY.keys())
 def test_get_all_models_below_n_param_threshold(model_name: str):
     """Test that we can get all models with a number of parameters below a threshold."""
-    model_meta = MODEL_REGISTRY.get_model_meta(model_name=model_name)
+    model_meta = get_model_meta(model_name=model_name)
     assert model_meta is not None
     if model_meta.n_parameters is not None and model_meta.n_parameters < 2e9:
         m = get_model(model_name)

From 230d4f2ecd5e7b78af1e3b3d10e16d1c7e790083 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Fri, 3 Jan 2025 10:30:56 +0000
Subject: [PATCH 04/33] use cache folder

---
 tests/test_models/test_model_loading.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
index aa367168aa..2d87a34603 100644
--- a/tests/test_models/test_model_loading.py
+++ b/tests/test_models/test_model_loading.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import logging
+from pathlib import Path
 
 import pytest
 
@@ -10,11 +11,28 @@
 logging.basicConfig(level=logging.INFO)
 
 
+CACHE_FOLDER = Path(__file__).parent / ".cache"
+
+
+@pytest.fixture(scope="module", autouse=True)
+def setup_and_teardown():
+    # Setup code: create cache folder if it doesn't exist
+    CACHE_FOLDER.mkdir(parents=True, exist_ok=True)
+    yield
+    # Teardown code: remove cache folder and its contents
+    for item in CACHE_FOLDER.iterdir():
+        if item.is_file():
+            item.unlink()
+        elif item.is_dir():
+            item.rmdir()
+    CACHE_FOLDER.rmdir()
+
+
 @pytest.mark.parametrize("model_name", MODEL_REGISTRY.keys())
 def test_get_all_models_below_n_param_threshold(model_name: str):
     """Test that we can get all models with a number of parameters below a threshold."""
     model_meta = get_model_meta(model_name=model_name)
     assert model_meta is not None
     if model_meta.n_parameters is not None and model_meta.n_parameters < 2e9:
-        m = get_model(model_name)
+        m = get_model(model_name, cache_folder=CACHE_FOLDER)
         assert m is not None, f"Failed to load model {model_name}"

From 99abdb5c13d56d9f4f2b5811883dbd2637f986c5 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Fri, 3 Jan 2025 11:00:51 +0000
Subject: [PATCH 05/33] teardown per function

---
 tests/test_models/test_model_loading.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
index 2d87a34603..1e0f9078c3 100644
--- a/tests/test_models/test_model_loading.py
+++ b/tests/test_models/test_model_loading.py
@@ -14,18 +14,14 @@
 CACHE_FOLDER = Path(__file__).parent / ".cache"
 
 
-@pytest.fixture(scope="module", autouse=True)
-def setup_and_teardown():
-    # Setup code: create cache folder if it doesn't exist
-    CACHE_FOLDER.mkdir(parents=True, exist_ok=True)
-    yield
+@pytest.fixture(scope="function", autouse=True)
+def teardown():
     # Teardown code: remove cache folder and its contents
     for item in CACHE_FOLDER.iterdir():
         if item.is_file():
             item.unlink()
         elif item.is_dir():
             item.rmdir()
-    CACHE_FOLDER.rmdir()
 
 
 @pytest.mark.parametrize("model_name", MODEL_REGISTRY.keys())

From 0cbdaa01a20ca94de5da4d660f4451470ea51153 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Fri, 3 Jan 2025 11:34:40 +0000
Subject: [PATCH 06/33] fix directory removal

---
 tests/test_models/test_model_loading.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
index 1e0f9078c3..57e8a4d6e7 100644
--- a/tests/test_models/test_model_loading.py
+++ b/tests/test_models/test_model_loading.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import logging
+import shutil
 from pathlib import Path
 
 import pytest
@@ -14,14 +15,13 @@
 CACHE_FOLDER = Path(__file__).parent / ".cache"
 
 
-@pytest.fixture(scope="function", autouse=True)
-def teardown():
-    # Teardown code: remove cache folder and its contents
+def teardown_function():
+    """Remove cache folder and its contents"""
     for item in CACHE_FOLDER.iterdir():
         if item.is_file():
             item.unlink()
         elif item.is_dir():
-            item.rmdir()
+            shutil.rmtree(item)
 
 
 @pytest.mark.parametrize("model_name", MODEL_REGISTRY.keys())

From 59cc65bc662259af3bf8135e92e547896523129c Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Sat, 4 Jan 2025 13:36:18 +0000
Subject: [PATCH 07/33] write to file

---
 scripts/test_model_loading.py | 70 +++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 scripts/test_model_loading.py

diff --git a/scripts/test_model_loading.py b/scripts/test_model_loading.py
new file mode 100644
index 0000000000..9c6b8d7f83
--- /dev/null
+++ b/scripts/test_model_loading.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+import json
+import logging
+import shutil
+from pathlib import Path
+
+from mteb import get_model, get_model_meta
+from mteb.models.overview import MODEL_REGISTRY
+
+logging.basicConfig(level=logging.INFO)
+
+
+CACHE_FOLDER = Path(__file__).parent / ".cache"
+
+
+def teardown_function():
+    """Remove cache folder and its contents"""
+    for item in CACHE_FOLDER.iterdir():
+        if item.is_file():
+            item.unlink()
+        elif item.is_dir():
+            shutil.rmtree(item)
+
+
+def get_model_below_n_param_threshold(model_name: str) -> str:
+    """Test that we can get all models with a number of parameters below a threshold."""
+    model_meta = get_model_meta(model_name=model_name)
+    assert model_meta is not None
+    if model_meta.n_parameters is not None:
+        if model_meta.n_parameters >= 2e9:
+            return "Over threshold. Not tested."
+        elif "API" in model_meta.framework:
+            try:
+                m = get_model(model_name)
+                if m is not None:
+                    return "None"
+            except Exception as e:
+                logging.warning(f"Failed to load model {model_name} with error {e}")
+                return e.__str__()
+        try:
+            m = get_model(model_name, cache_folder=CACHE_FOLDER)
+            if m is not None:
+                return "None"
+        except Exception as e:
+            logging.warning(f"Failed to load model {model_name} with error {e}")
+            return e.__str__()
+        finally:
+            teardown_function()
+
+
+if __name__ == "__main__":
+    output_file = Path(__file__).parent / "failures.json"
+
+    # Load existing results if the file exists
+    results = {}
+    if output_file.exists():
+        with output_file.open("r") as f:
+            results = json.load(f)
+
+    all_model_names = list(MODEL_REGISTRY.keys())
+    for model_name in all_model_names:
+        error_msg = get_model_below_n_param_threshold(model_name)
+        results[model_name] = error_msg
+
+        results = dict(sorted(results.items()))
+
+        # Write the results to the file after each iteration
+        with output_file.open("w") as f:
+            json.dump(results, f, indent=4)

From ea1d21fa614ae43162fb7991ecc8915919d0cd10 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Sat, 4 Jan 2025 13:43:13 +0000
Subject: [PATCH 08/33] wip loading from before

---
 scripts/failures.json         | 23 +++++++++++++++++++++++
 scripts/test_model_loading.py |  2 +-
 2 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 scripts/failures.json

diff --git a/scripts/failures.json b/scripts/failures.json
new file mode 100644
index 0000000000..8a43643e51
--- /dev/null
+++ b/scripts/failures.json
@@ -0,0 +1,23 @@
+{
+    "BAAI/bge-base-en-v1.5": "None",
+    "BAAI/bge-large-en-v1.5": "None",
+    "BAAI/bge-small-en-v1.5": "None",
+    "Cohere/Cohere-embed-english-light-v3.0": null,
+    "Cohere/Cohere-embed-english-v3.0": null,
+    "Cohere/Cohere-embed-multilingual-light-v3.0": null,
+    "Cohere/Cohere-embed-multilingual-v3.0": null,
+    "Snowflake/snowflake-arctic-embed-l": "None",
+    "Snowflake/snowflake-arctic-embed-l-v2.0": "None",
+    "Snowflake/snowflake-arctic-embed-m": "None",
+    "Snowflake/snowflake-arctic-embed-m-long": "None",
+    "Snowflake/snowflake-arctic-embed-m-v1.5": "None",
+    "Snowflake/snowflake-arctic-embed-m-v2.0": "None",
+    "Snowflake/snowflake-arctic-embed-s": "None",
+    "Snowflake/snowflake-arctic-embed-xs": "None",
+    "bm25s": null,
+    "colbert-ir/colbertv2.0": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`.",
+    "intfloat/e5-mistral-7b-instruct": "Over threshold. Not tested.",
+    "intfloat/multilingual-e5-large-instruct": "XLMRobertaModel.__init__() got an unexpected keyword argument 'cache_folder'",
+    "intfloat/multilingual-e5-small": "None",
+    "jinaai/jina-colbert-v2": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`."
+}
\ No newline at end of file
diff --git a/scripts/test_model_loading.py b/scripts/test_model_loading.py
index 9c6b8d7f83..f76bf1e2b9 100644
--- a/scripts/test_model_loading.py
+++ b/scripts/test_model_loading.py
@@ -58,7 +58,7 @@ def get_model_below_n_param_threshold(model_name: str) -> str:
         with output_file.open("r") as f:
             results = json.load(f)
 
-    all_model_names = list(MODEL_REGISTRY.keys())
+    all_model_names = list(MODEL_REGISTRY.keys()) - list(results.keys())
     for model_name in all_model_names:
         error_msg = get_model_below_n_param_threshold(model_name)
         results[model_name] = error_msg

From 129e8ccf6ff201b9a6e2b43b191d018af1a37a4c Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Sat, 4 Jan 2025 14:05:16 +0000
Subject: [PATCH 09/33] wip

---
 scripts/failures.json                                  | 10 +++++++++-
 ...{test_model_loading.py => model_loading_testing.py} |  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)
 rename scripts/{test_model_loading.py => model_loading_testing.py} (96%)

diff --git a/scripts/failures.json b/scripts/failures.json
index 8a43643e51..6f7e5123e4 100644
--- a/scripts/failures.json
+++ b/scripts/failures.json
@@ -6,6 +6,8 @@
     "Cohere/Cohere-embed-english-v3.0": null,
     "Cohere/Cohere-embed-multilingual-light-v3.0": null,
     "Cohere/Cohere-embed-multilingual-v3.0": null,
+    "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": "None",
+    "Salesforce/SFR-Embedding-2_R": "Over threshold. Not tested.",
     "Snowflake/snowflake-arctic-embed-l": "None",
     "Snowflake/snowflake-arctic-embed-l-v2.0": "None",
     "Snowflake/snowflake-arctic-embed-m": "None",
@@ -14,10 +16,16 @@
     "Snowflake/snowflake-arctic-embed-m-v2.0": "None",
     "Snowflake/snowflake-arctic-embed-s": "None",
     "Snowflake/snowflake-arctic-embed-xs": "None",
+    "ai-forever/sbert_large_mt_nlu_ru": "None",
     "bm25s": null,
     "colbert-ir/colbertv2.0": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`.",
+    "google/flan-t5-large": null,
     "intfloat/e5-mistral-7b-instruct": "Over threshold. Not tested.",
     "intfloat/multilingual-e5-large-instruct": "XLMRobertaModel.__init__() got an unexpected keyword argument 'cache_folder'",
     "intfloat/multilingual-e5-small": "None",
-    "jinaai/jina-colbert-v2": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`."
+    "jinaai/jina-colbert-v2": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`.",
+    "nvidia/NV-Embed-v1": "Over threshold. Not tested.",
+    "samaya-ai/promptriever-llama3.1-8b-instruct-v1": "To use the RepLLaMA based models `peft` is required. Please install it with `pip install 'mteb[peft]'`.",
+    "thenlper/gte-base": "None",
+    "unicamp-dl/mt5-13b-mmarco-100k": null
 }
\ No newline at end of file
diff --git a/scripts/test_model_loading.py b/scripts/model_loading_testing.py
similarity index 96%
rename from scripts/test_model_loading.py
rename to scripts/model_loading_testing.py
index f76bf1e2b9..ef665377f4 100644
--- a/scripts/test_model_loading.py
+++ b/scripts/model_loading_testing.py
@@ -58,7 +58,7 @@ def get_model_below_n_param_threshold(model_name: str) -> str:
         with output_file.open("r") as f:
             results = json.load(f)
 
-    all_model_names = list(MODEL_REGISTRY.keys()) - list(results.keys())
+    all_model_names = list(set(MODEL_REGISTRY.keys()) - set(results.keys()))
     for model_name in all_model_names:
         error_msg = get_model_below_n_param_threshold(model_name)
         results[model_name] = error_msg

From 8fbb48faa2434ea383f00c4ffdf635f574d0368c Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Sat, 4 Jan 2025 16:30:29 +0200
Subject: [PATCH 10/33] Rename model_loading_testing.py to model_loading.py

---
 scripts/{model_loading_testing.py => model_loading.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename scripts/{model_loading_testing.py => model_loading.py} (100%)

diff --git a/scripts/model_loading_testing.py b/scripts/model_loading.py
similarity index 100%
rename from scripts/model_loading_testing.py
rename to scripts/model_loading.py

From fb95ee7e441d4bae967f8545a8917eee7d0e9a87 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Sat, 4 Jan 2025 16:40:32 +0200
Subject: [PATCH 11/33] Delete tests/test_models/test_model_loading.py

---
 tests/test_models/test_model_loading.py | 34 -------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 tests/test_models/test_model_loading.py

diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
deleted file mode 100644
index 57e8a4d6e7..0000000000
--- a/tests/test_models/test_model_loading.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from __future__ import annotations
-
-import logging
-import shutil
-from pathlib import Path
-
-import pytest
-
-from mteb import get_model, get_model_meta
-from mteb.models.overview import MODEL_REGISTRY
-
-logging.basicConfig(level=logging.INFO)
-
-
-CACHE_FOLDER = Path(__file__).parent / ".cache"
-
-
-def teardown_function():
-    """Remove cache folder and its contents"""
-    for item in CACHE_FOLDER.iterdir():
-        if item.is_file():
-            item.unlink()
-        elif item.is_dir():
-            shutil.rmtree(item)
-
-
-@pytest.mark.parametrize("model_name", MODEL_REGISTRY.keys())
-def test_get_all_models_below_n_param_threshold(model_name: str):
-    """Test that we can get all models with a number of parameters below a threshold."""
-    model_meta = get_model_meta(model_name=model_name)
-    assert model_meta is not None
-    if model_meta.n_parameters is not None and model_meta.n_parameters < 2e9:
-        m = get_model(model_name, cache_folder=CACHE_FOLDER)
-        assert m is not None, f"Failed to load model {model_name}"

From 41c4b5c40d309a89a5b8a6f4467a99fe591d62a4 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Sun, 5 Jan 2025 09:36:07 +0000
Subject: [PATCH 12/33] checks for models below 2B

---
 scripts/failures.json | 225 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 194 insertions(+), 31 deletions(-)

diff --git a/scripts/failures.json b/scripts/failures.json
index 6f7e5123e4..18af55ae0e 100644
--- a/scripts/failures.json
+++ b/scripts/failures.json
@@ -1,31 +1,194 @@
-{
-    "BAAI/bge-base-en-v1.5": "None",
-    "BAAI/bge-large-en-v1.5": "None",
-    "BAAI/bge-small-en-v1.5": "None",
-    "Cohere/Cohere-embed-english-light-v3.0": null,
-    "Cohere/Cohere-embed-english-v3.0": null,
-    "Cohere/Cohere-embed-multilingual-light-v3.0": null,
-    "Cohere/Cohere-embed-multilingual-v3.0": null,
-    "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": "None",
-    "Salesforce/SFR-Embedding-2_R": "Over threshold. Not tested.",
-    "Snowflake/snowflake-arctic-embed-l": "None",
-    "Snowflake/snowflake-arctic-embed-l-v2.0": "None",
-    "Snowflake/snowflake-arctic-embed-m": "None",
-    "Snowflake/snowflake-arctic-embed-m-long": "None",
-    "Snowflake/snowflake-arctic-embed-m-v1.5": "None",
-    "Snowflake/snowflake-arctic-embed-m-v2.0": "None",
-    "Snowflake/snowflake-arctic-embed-s": "None",
-    "Snowflake/snowflake-arctic-embed-xs": "None",
-    "ai-forever/sbert_large_mt_nlu_ru": "None",
-    "bm25s": null,
-    "colbert-ir/colbertv2.0": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`.",
-    "google/flan-t5-large": null,
-    "intfloat/e5-mistral-7b-instruct": "Over threshold. Not tested.",
-    "intfloat/multilingual-e5-large-instruct": "XLMRobertaModel.__init__() got an unexpected keyword argument 'cache_folder'",
-    "intfloat/multilingual-e5-small": "None",
-    "jinaai/jina-colbert-v2": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`.",
-    "nvidia/NV-Embed-v1": "Over threshold. Not tested.",
-    "samaya-ai/promptriever-llama3.1-8b-instruct-v1": "To use the RepLLaMA based models `peft` is required. Please install it with `pip install 'mteb[peft]'`.",
-    "thenlper/gte-base": "None",
-    "unicamp-dl/mt5-13b-mmarco-100k": null
-}
\ No newline at end of file
+{"Alibaba-NLP/gte-Qwen1.5-7B-instruct": "Over threshold. Not tested.",
+ "Alibaba-NLP/gte-Qwen2-1.5B-instruct": "Over threshold. Not tested.",
+ "Alibaba-NLP/gte-Qwen2-7B-instruct": "Over threshold. Not tested.",
+ "BAAI/bge-base-en-v1.5": "None",
+ "BAAI/bge-large-en-v1.5": "Over threshold. Not tested.",
+ "BAAI/bge-reranker-v2-m3": "None",
+ "BAAI/bge-small-en-v1.5": "None",
+ "BeastyZ/e5-R-mistral-7b": "Over threshold. Not tested.",
+ "Cohere/Cohere-embed-english-light-v3.0": "None",
+ "Cohere/Cohere-embed-english-v3.0": "None",
+ "Cohere/Cohere-embed-multilingual-light-v3.0": "None",
+ "Cohere/Cohere-embed-multilingual-v3.0": "None",
+ "DeepPavlov/distilrubert-small-cased-conversational": "None",
+ "DeepPavlov/rubert-base-cased": "Over threshold. Not tested.",
+ "DeepPavlov/rubert-base-cased-sentence": "None",
+ "Gameselo/STS-multilingual-mpnet-base-v2": "None",
+ "GritLM/GritLM-7B": "Over threshold. Not tested.",
+ "GritLM/GritLM-8x7B": "Over threshold. Not tested.",
+ "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": "None",
+ "HIT-TMG/KaLM-embedding-multilingual-mini-v1": "None",
+ "Haon-Chen/speed-embedding-7b-instruct": "Over threshold. Not tested.",
+ "Hum-Works/lodestone-base-4096-v1": "None",
+ "Jaume/gemma-2b-embeddings": "Over threshold. Not tested.",
+ "Lajavaness/bilingual-embedding-base": "None",
+ "Lajavaness/bilingual-embedding-large": "None",
+ "Lajavaness/bilingual-embedding-small": "None",
+ "Linq-AI-Research/Linq-Embed-Mistral": "Over threshold. Not tested.",
+ "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised": "Over threshold. Not tested.",
+ "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse": "Over threshold. Not tested.",
+ "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised": "Over threshold. Not tested.",
+ "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse": "Over threshold. Not tested.",
+ "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": "Over threshold. Not tested.",
+ "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": "Over threshold. Not tested.",
+ "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised": "Over threshold. Not tested.",
+ "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse": "Over threshold. Not tested.",
+ "Mihaiii/Bulbasaur": "None",
+ "Mihaiii/Ivysaur": "None",
+ "Mihaiii/Squirtle": "None",
+ "Mihaiii/Venusaur": "None",
+ "Mihaiii/Wartortle": "None",
+ "Mihaiii/gte-micro": "None",
+ "Mihaiii/gte-micro-v4": "None",
+ "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": "None",
+ "Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": "None",
+ "Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": "None",
+ "Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": "None",
+ "Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": "None",
+ "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": "None",
+ "OrdalieTech/Solon-embeddings-large-0.1": "None",
+ "OrlikB/KartonBERT-USE-base-v1": "None",
+ "OrlikB/st-polish-kartonberta-base-alpha-v1": "None",
+ "Salesforce/SFR-Embedding-2_R": "Over threshold. Not tested.",
+ "Salesforce/SFR-Embedding-Mistral": "Over threshold. Not tested.",
+ "Snowflake/snowflake-arctic-embed-l": "None",
+ "Snowflake/snowflake-arctic-embed-l-v2.0": "None",
+ "Snowflake/snowflake-arctic-embed-m": "None",
+ "Snowflake/snowflake-arctic-embed-m-long": "None",
+ "Snowflake/snowflake-arctic-embed-m-v1.5": "None",
+ "Snowflake/snowflake-arctic-embed-m-v2.0": "please install xformers",
+ "Snowflake/snowflake-arctic-embed-s": "None",
+ "Snowflake/snowflake-arctic-embed-xs": "None",
+ "WhereIsAI/UAE-Large-V1": "None",
+ "aari1995/German_Semantic_STS_V2": "None",
+ "abhinand/MedEmbed-small-v0.1": "None",
+ "ai-forever/ru-en-RoSBERTa": "None",
+ "ai-forever/sbert_large_mt_nlu_ru": "None",
+ "ai-forever/sbert_large_nlu_ru": "None",
+ "avsolatorio/GIST-Embedding-v0": "None",
+ "avsolatorio/GIST-all-MiniLM-L6-v2": "None",
+ "avsolatorio/GIST-large-Embedding-v0": "None",
+ "avsolatorio/GIST-small-Embedding-v0": "None",
+ "avsolatorio/NoInstruct-small-Embedding-v0": "BertModel.__init__() got an unexpected keyword argument 'cache_folder'",
+ "bigscience/sgpt-bloom-7b1-msmarco": "None",
+ "bm25s": "None",
+ "brahmairesearch/slx-v0.1": "None",
+ "castorini/monobert-large-msmarco": "None",
+ "castorini/monot5-3b-msmarco-10k": "None",
+ "castorini/monot5-base-msmarco-10k": "None",
+ "castorini/monot5-large-msmarco-10k": "None",
+ "castorini/monot5-small-msmarco-10k": "None",
+ "castorini/repllama-v1-7b-lora-passage": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-67794457-7e56cbf325381c760c430207;a79cc472-a4fc-49dc-80f0-9d4b8cb5ef42)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.",
+ "cointegrated/LaBSE-en-ru": "None",
+ "cointegrated/rubert-tiny": "None",
+ "cointegrated/rubert-tiny2": "None",
+ "colbert-ir/colbertv2.0": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`.",
+ "consciousAI/cai-lunaris-text-embeddings": "None",
+ "consciousAI/cai-stellaris-text-embeddings": "None",
+ "deepfile/embedder-100p": "None",
+ "deepvk/USER-base": "None",
+ "deepvk/USER-bge-m3": "None",
+ "deepvk/deberta-v1-base": "None",
+ "dunzhang/stella_en_1.5B_v5": "Please install `pip install gritlm` to use dunzhang/stella_en_1.5B_v5.",
+ "dunzhang/stella_en_400M_v5": "Please install `pip install gritlm` to use dunzhang/stella_en_400M_v5.",
+ "dwzhu/e5-base-4k": "None",
+ "google/flan-t5-base": "None",
+ "google/flan-t5-large": "None",
+ "google/flan-t5-xl": "None",
+ "google/flan-t5-xxl": "None",
+ "google/text-embedding-004": "None",
+ "google/text-embedding-005": "None",
+ "google/text-multilingual-embedding-002": "None",
+ "ibm-granite/granite-embedding-107m-multilingual": "None",
+ "ibm-granite/granite-embedding-125m-english": "None",
+ "ibm-granite/granite-embedding-278m-multilingual": "None",
+ "ibm-granite/granite-embedding-30m-english": "None",
+ "infgrad/jasper_en_vision_language_v1": "Over threshold. Not tested.",
+ "infgrad/stella-base-en-v2": "None",
+ "intfloat/e5-base": "None",
+ "intfloat/e5-base-v2": "None",
+ "intfloat/e5-large": "None",
+ "intfloat/e5-large-v2": "None",
+ "intfloat/e5-mistral-7b-instruct": "Over threshold. Not tested.",
+ "intfloat/e5-small": "None",
+ "intfloat/e5-small-v2": "None",
+ "intfloat/multilingual-e5-base": "None",
+ "intfloat/multilingual-e5-large": "None",
+ "intfloat/multilingual-e5-large-instruct": "Please install `pip install gritlm` to use intfloat/multilingual-e5-large-instruct.",
+ "intfloat/multilingual-e5-small": "None",
+ "izhx/udever-bloom-1b1": "None",
+ "izhx/udever-bloom-3b": "None",
+ "izhx/udever-bloom-560m": "None",
+ "izhx/udever-bloom-7b1": "None",
+ "jhu-clsp/FollowIR-7B": "None",
+ "jinaai/jina-colbert-v2": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`.",
+ "jinaai/jina-embedding-b-en-v1": "None",
+ "jinaai/jina-embedding-s-en-v1": "None",
+ "jinaai/jina-embeddings-v2-base-en": "None",
+ "jinaai/jina-embeddings-v2-small-en": "None",
+ "jinaai/jina-embeddings-v3": "None",
+ "jinaai/jina-reranker-v2-base-multilingual": "None",
+ "keeeeenw/MicroLlama-text-embedding": "None",
+ "malenia1/ternary-weight-embedding": "None",
+ "manu/bge-m3-custom-fr": "None",
+ "manu/sentence_croissant_alpha_v0.2": "Over threshold. Not tested.",
+ "manu/sentence_croissant_alpha_v0.3": "Over threshold. Not tested.",
+ "manu/sentence_croissant_alpha_v0.4": "Over threshold. Not tested.",
+ "meta-llama/Llama-2-7b-chat-hf": "None",
+ "meta-llama/Llama-2-7b-hf": "None",
+ "minishlab/M2V_base_glove": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
+ "minishlab/M2V_base_glove_subword": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
+ "minishlab/M2V_base_output": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
+ "minishlab/M2V_multilingual_output": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
+ "minishlab/potion-base-2M": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
+ "minishlab/potion-base-4M": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
+ "minishlab/potion-base-8M": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
+ "mistralai/Mistral-7B-Instruct-v0.2": "None",
+ "mixedbread-ai/mxbai-embed-large-v1": "None",
+ "nomic-ai/nomic-embed-text-v1": "None",
+ "nomic-ai/nomic-embed-text-v1-ablated": "None",
+ "nomic-ai/nomic-embed-text-v1-unsupervised": "None",
+ "nomic-ai/nomic-embed-text-v1.5": "None",
+ "nvidia/NV-Embed-v1": "Over threshold. Not tested.",
+ "nvidia/NV-Embed-v2": "Over threshold. Not tested.",
+ "omarelshehy/arabic-english-sts-matryoshka": "None",
+ "openai/text-embedding-3-large": "None",
+ "openai/text-embedding-3-small": "None",
+ "openai/text-embedding-ada-002": "None",
+ "openbmb/MiniCPM-Embedding": "Over threshold. Not tested.",
+ "samaya-ai/RepLLaMA-reproduced": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-6779403c-1bd84d333e938afa4e7cf86b;b873eea6-3c10-4659-b6da-2288d83e721b)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.",
+ "samaya-ai/promptriever-llama2-7b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-677940f7-6c2bfcaa7985abb1165185ff;efdd2ef8-60a0-45c3-a92b-b24784b30b43)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.",
+ "samaya-ai/promptriever-llama3.1-8b-instruct-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct.\n401 Client Error. (Request ID: Root=1-6779430b-3277d7961f3c88ab56ecf91f;a476a013-b28f-47c6-bd95-e3d6fe823468)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/config.json.\nAccess to model meta-llama/Llama-3.1-8B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in.",
+ "samaya-ai/promptriever-llama3.1-8b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3.1-8B.\n401 Client Error. (Request ID: Root=1-67794236-778bc0f015eceb3f30368362;74618ba7-71f1-404f-9a5a-e52e590a9700)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3.1-8B/resolve/main/config.json.\nAccess to model meta-llama/Llama-3.1-8B is restricted. You must have access to it and be authenticated to access it. Please log in.",
+ "samaya-ai/promptriever-mistral-v0.1-7b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-v0.1.\n401 Client Error. (Request ID: Root=1-67794457-688a6d9c24a9e8f15cf70d28;da3a233f-7c7c-4919-9cee-72a1d66acdb6)\n\nCannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json.\nAccess to model mistralai/Mistral-7B-v0.1 is restricted. You must have access to it and be authenticated to access it. Please log in.",
+ "sdadas/mmlw-e5-base": "None",
+ "sdadas/mmlw-e5-large": "None",
+ "sdadas/mmlw-e5-small": "None",
+ "sdadas/mmlw-roberta-base": "None",
+ "sdadas/mmlw-roberta-large": "None",
+ "sentence-transformer/multi-qa-MiniLM-L6-cos-v1": "sentence-transformer/multi-qa-MiniLM-L6-cos-v1 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`",
+ "sentence-transformers/LaBSE": "None",
+ "sentence-transformers/all-MiniLM-L12-v2": "None",
+ "sentence-transformers/all-MiniLM-L6-v2": "None",
+ "sentence-transformers/all-mpnet-base-v2": "None",
+ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": "None",
+ "sentence-transformers/paraphrase-multilingual-mpnet-base-v2": "None",
+ "sergeyzh/LaBSE-ru-turbo": "None",
+ "sergeyzh/rubert-tiny-turbo": "None",
+ "shibing624/text2vec-base-multilingual": "None",
+ "silma-ai/silma-embeddding-matryoshka-v0.1": "None",
+ "thenlper/gte-base": "None",
+ "thenlper/gte-large": "None",
+ "thenlper/gte-small": "None",
+ "unicamp-dl/mt5-13b-mmarco-100k": "None",
+ "unicamp-dl/mt5-base-mmarco-v2": "None",
+ "voyage-large-2": "None",
+ "voyageai/voyage-2": "None",
+ "voyageai/voyage-3": "None",
+ "voyageai/voyage-3-lite": "None",
+ "voyageai/voyage-code-2": "None",
+ "voyageai/voyage-finance-2": "None",
+ "voyageai/voyage-large-2-instruct": "None",
+ "voyageai/voyage-law-2": "None",
+ "voyageai/voyage-multilingual-2": "None",
+ "zeta-alpha-ai/Zeta-Alpha-E5-Mistral": "Over threshold. Not tested."}
\ No newline at end of file

From 9af61d0be76783ef22c512c6bf75d3cc981823b0 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Sun, 5 Jan 2025 23:15:42 +0000
Subject: [PATCH 13/33] try not using cache folder

---
 scripts/failures.json    | 28 ++++++++++++++--------------
 scripts/model_loading.py | 10 ++++++++++
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/scripts/failures.json b/scripts/failures.json
index 18af55ae0e..989915a191 100644
--- a/scripts/failures.json
+++ b/scripts/failures.json
@@ -56,7 +56,7 @@
  "Snowflake/snowflake-arctic-embed-m": "None",
  "Snowflake/snowflake-arctic-embed-m-long": "None",
  "Snowflake/snowflake-arctic-embed-m-v1.5": "None",
- "Snowflake/snowflake-arctic-embed-m-v2.0": "please install xformers",
+ "Snowflake/snowflake-arctic-embed-m-v2.0": "None",
  "Snowflake/snowflake-arctic-embed-s": "None",
  "Snowflake/snowflake-arctic-embed-xs": "None",
  "WhereIsAI/UAE-Large-V1": "None",
@@ -69,7 +69,7 @@
  "avsolatorio/GIST-all-MiniLM-L6-v2": "None",
  "avsolatorio/GIST-large-Embedding-v0": "None",
  "avsolatorio/GIST-small-Embedding-v0": "None",
- "avsolatorio/NoInstruct-small-Embedding-v0": "BertModel.__init__() got an unexpected keyword argument 'cache_folder'",
+ "avsolatorio/NoInstruct-small-Embedding-v0": "None",
  "bigscience/sgpt-bloom-7b1-msmarco": "None",
  "bm25s": "None",
  "brahmairesearch/slx-v0.1": "None",
@@ -82,15 +82,15 @@
  "cointegrated/LaBSE-en-ru": "None",
  "cointegrated/rubert-tiny": "None",
  "cointegrated/rubert-tiny2": "None",
- "colbert-ir/colbertv2.0": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`.",
+ "colbert-ir/colbertv2.0": "None",
  "consciousAI/cai-lunaris-text-embeddings": "None",
  "consciousAI/cai-stellaris-text-embeddings": "None",
  "deepfile/embedder-100p": "None",
  "deepvk/USER-base": "None",
  "deepvk/USER-bge-m3": "None",
  "deepvk/deberta-v1-base": "None",
- "dunzhang/stella_en_1.5B_v5": "Please install `pip install gritlm` to use dunzhang/stella_en_1.5B_v5.",
- "dunzhang/stella_en_400M_v5": "Please install `pip install gritlm` to use dunzhang/stella_en_400M_v5.",
+ "dunzhang/stella_en_1.5B_v5": "None",
+ "dunzhang/stella_en_400M_v5": "None",
  "dwzhu/e5-base-4k": "None",
  "google/flan-t5-base": "None",
  "google/flan-t5-large": "None",
@@ -114,14 +114,14 @@
  "intfloat/e5-small-v2": "None",
  "intfloat/multilingual-e5-base": "None",
  "intfloat/multilingual-e5-large": "None",
- "intfloat/multilingual-e5-large-instruct": "Please install `pip install gritlm` to use intfloat/multilingual-e5-large-instruct.",
+ "intfloat/multilingual-e5-large-instruct": "None",
  "intfloat/multilingual-e5-small": "None",
  "izhx/udever-bloom-1b1": "None",
  "izhx/udever-bloom-3b": "None",
  "izhx/udever-bloom-560m": "None",
  "izhx/udever-bloom-7b1": "None",
  "jhu-clsp/FollowIR-7B": "None",
- "jinaai/jina-colbert-v2": "To use the ColBERT models `pylate` is required. Please install it with `pip install mteb[pylate]`.",
+ "jinaai/jina-colbert-v2": "None",
  "jinaai/jina-embedding-b-en-v1": "None",
  "jinaai/jina-embedding-s-en-v1": "None",
  "jinaai/jina-embeddings-v2-base-en": "None",
@@ -136,13 +136,13 @@
  "manu/sentence_croissant_alpha_v0.4": "Over threshold. Not tested.",
  "meta-llama/Llama-2-7b-chat-hf": "None",
  "meta-llama/Llama-2-7b-hf": "None",
- "minishlab/M2V_base_glove": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
- "minishlab/M2V_base_glove_subword": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
- "minishlab/M2V_base_output": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
- "minishlab/M2V_multilingual_output": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
- "minishlab/potion-base-2M": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
- "minishlab/potion-base-4M": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
- "minishlab/potion-base-8M": "To use the Model2Vec models `model2vec` is required. Please install it with `pip install mteb[model2vec]`.",
+ "minishlab/M2V_base_glove": "None",
+ "minishlab/M2V_base_glove_subword": "None",
+ "minishlab/M2V_base_output": "None",
+ "minishlab/M2V_multilingual_output": "None",
+ "minishlab/potion-base-2M": "None",
+ "minishlab/potion-base-4M": "None",
+ "minishlab/potion-base-8M": "None",
  "mistralai/Mistral-7B-Instruct-v0.2": "None",
  "mixedbread-ai/mxbai-embed-large-v1": "None",
  "nomic-ai/nomic-embed-text-v1": "None",
diff --git a/scripts/model_loading.py b/scripts/model_loading.py
index ef665377f4..7fe478ba11 100644
--- a/scripts/model_loading.py
+++ b/scripts/model_loading.py
@@ -42,6 +42,16 @@ def get_model_below_n_param_threshold(model_name: str) -> str:
             m = get_model(model_name, cache_folder=CACHE_FOLDER)
             if m is not None:
                 return "None"
+        except TypeError:  # when cache_folder is not supported.
+            try:
+                m = get_model(model_name)
+                if m is not None:
+                    return "None"
+            except Exception as e:
+                logging.warning(f"Failed to load model {model_name} with error {e}")
+                return e.__str__()
+            finally:
+                teardown_function()
         except Exception as e:
             logging.warning(f"Failed to load model {model_name} with error {e}")
             return e.__str__()

From b8777d19fc0074f93065ed2a475bbbf3e8b3d121 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Mon, 6 Jan 2025 21:22:33 +0000
Subject: [PATCH 14/33] update script with scan_cache_dir and add args

---
 scripts/failures.json            | 194 ------------------------------
 scripts/model_load_failures.json | 196 +++++++++++++++++++++++++++++++
 scripts/model_loading.py         |  69 +++++++----
 3 files changed, 241 insertions(+), 218 deletions(-)
 delete mode 100644 scripts/failures.json
 create mode 100644 scripts/model_load_failures.json

diff --git a/scripts/failures.json b/scripts/failures.json
deleted file mode 100644
index 989915a191..0000000000
--- a/scripts/failures.json
+++ /dev/null
@@ -1,194 +0,0 @@
-{"Alibaba-NLP/gte-Qwen1.5-7B-instruct": "Over threshold. Not tested.",
- "Alibaba-NLP/gte-Qwen2-1.5B-instruct": "Over threshold. Not tested.",
- "Alibaba-NLP/gte-Qwen2-7B-instruct": "Over threshold. Not tested.",
- "BAAI/bge-base-en-v1.5": "None",
- "BAAI/bge-large-en-v1.5": "Over threshold. Not tested.",
- "BAAI/bge-reranker-v2-m3": "None",
- "BAAI/bge-small-en-v1.5": "None",
- "BeastyZ/e5-R-mistral-7b": "Over threshold. Not tested.",
- "Cohere/Cohere-embed-english-light-v3.0": "None",
- "Cohere/Cohere-embed-english-v3.0": "None",
- "Cohere/Cohere-embed-multilingual-light-v3.0": "None",
- "Cohere/Cohere-embed-multilingual-v3.0": "None",
- "DeepPavlov/distilrubert-small-cased-conversational": "None",
- "DeepPavlov/rubert-base-cased": "Over threshold. Not tested.",
- "DeepPavlov/rubert-base-cased-sentence": "None",
- "Gameselo/STS-multilingual-mpnet-base-v2": "None",
- "GritLM/GritLM-7B": "Over threshold. Not tested.",
- "GritLM/GritLM-8x7B": "Over threshold. Not tested.",
- "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": "None",
- "HIT-TMG/KaLM-embedding-multilingual-mini-v1": "None",
- "Haon-Chen/speed-embedding-7b-instruct": "Over threshold. Not tested.",
- "Hum-Works/lodestone-base-4096-v1": "None",
- "Jaume/gemma-2b-embeddings": "Over threshold. Not tested.",
- "Lajavaness/bilingual-embedding-base": "None",
- "Lajavaness/bilingual-embedding-large": "None",
- "Lajavaness/bilingual-embedding-small": "None",
- "Linq-AI-Research/Linq-Embed-Mistral": "Over threshold. Not tested.",
- "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised": "Over threshold. Not tested.",
- "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse": "Over threshold. Not tested.",
- "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised": "Over threshold. Not tested.",
- "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse": "Over threshold. Not tested.",
- "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": "Over threshold. Not tested.",
- "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": "Over threshold. Not tested.",
- "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised": "Over threshold. Not tested.",
- "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse": "Over threshold. Not tested.",
- "Mihaiii/Bulbasaur": "None",
- "Mihaiii/Ivysaur": "None",
- "Mihaiii/Squirtle": "None",
- "Mihaiii/Venusaur": "None",
- "Mihaiii/Wartortle": "None",
- "Mihaiii/gte-micro": "None",
- "Mihaiii/gte-micro-v4": "None",
- "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": "None",
- "Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": "None",
- "Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": "None",
- "Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": "None",
- "Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": "None",
- "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": "None",
- "OrdalieTech/Solon-embeddings-large-0.1": "None",
- "OrlikB/KartonBERT-USE-base-v1": "None",
- "OrlikB/st-polish-kartonberta-base-alpha-v1": "None",
- "Salesforce/SFR-Embedding-2_R": "Over threshold. Not tested.",
- "Salesforce/SFR-Embedding-Mistral": "Over threshold. Not tested.",
- "Snowflake/snowflake-arctic-embed-l": "None",
- "Snowflake/snowflake-arctic-embed-l-v2.0": "None",
- "Snowflake/snowflake-arctic-embed-m": "None",
- "Snowflake/snowflake-arctic-embed-m-long": "None",
- "Snowflake/snowflake-arctic-embed-m-v1.5": "None",
- "Snowflake/snowflake-arctic-embed-m-v2.0": "None",
- "Snowflake/snowflake-arctic-embed-s": "None",
- "Snowflake/snowflake-arctic-embed-xs": "None",
- "WhereIsAI/UAE-Large-V1": "None",
- "aari1995/German_Semantic_STS_V2": "None",
- "abhinand/MedEmbed-small-v0.1": "None",
- "ai-forever/ru-en-RoSBERTa": "None",
- "ai-forever/sbert_large_mt_nlu_ru": "None",
- "ai-forever/sbert_large_nlu_ru": "None",
- "avsolatorio/GIST-Embedding-v0": "None",
- "avsolatorio/GIST-all-MiniLM-L6-v2": "None",
- "avsolatorio/GIST-large-Embedding-v0": "None",
- "avsolatorio/GIST-small-Embedding-v0": "None",
- "avsolatorio/NoInstruct-small-Embedding-v0": "None",
- "bigscience/sgpt-bloom-7b1-msmarco": "None",
- "bm25s": "None",
- "brahmairesearch/slx-v0.1": "None",
- "castorini/monobert-large-msmarco": "None",
- "castorini/monot5-3b-msmarco-10k": "None",
- "castorini/monot5-base-msmarco-10k": "None",
- "castorini/monot5-large-msmarco-10k": "None",
- "castorini/monot5-small-msmarco-10k": "None",
- "castorini/repllama-v1-7b-lora-passage": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-67794457-7e56cbf325381c760c430207;a79cc472-a4fc-49dc-80f0-9d4b8cb5ef42)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.",
- "cointegrated/LaBSE-en-ru": "None",
- "cointegrated/rubert-tiny": "None",
- "cointegrated/rubert-tiny2": "None",
- "colbert-ir/colbertv2.0": "None",
- "consciousAI/cai-lunaris-text-embeddings": "None",
- "consciousAI/cai-stellaris-text-embeddings": "None",
- "deepfile/embedder-100p": "None",
- "deepvk/USER-base": "None",
- "deepvk/USER-bge-m3": "None",
- "deepvk/deberta-v1-base": "None",
- "dunzhang/stella_en_1.5B_v5": "None",
- "dunzhang/stella_en_400M_v5": "None",
- "dwzhu/e5-base-4k": "None",
- "google/flan-t5-base": "None",
- "google/flan-t5-large": "None",
- "google/flan-t5-xl": "None",
- "google/flan-t5-xxl": "None",
- "google/text-embedding-004": "None",
- "google/text-embedding-005": "None",
- "google/text-multilingual-embedding-002": "None",
- "ibm-granite/granite-embedding-107m-multilingual": "None",
- "ibm-granite/granite-embedding-125m-english": "None",
- "ibm-granite/granite-embedding-278m-multilingual": "None",
- "ibm-granite/granite-embedding-30m-english": "None",
- "infgrad/jasper_en_vision_language_v1": "Over threshold. Not tested.",
- "infgrad/stella-base-en-v2": "None",
- "intfloat/e5-base": "None",
- "intfloat/e5-base-v2": "None",
- "intfloat/e5-large": "None",
- "intfloat/e5-large-v2": "None",
- "intfloat/e5-mistral-7b-instruct": "Over threshold. Not tested.",
- "intfloat/e5-small": "None",
- "intfloat/e5-small-v2": "None",
- "intfloat/multilingual-e5-base": "None",
- "intfloat/multilingual-e5-large": "None",
- "intfloat/multilingual-e5-large-instruct": "None",
- "intfloat/multilingual-e5-small": "None",
- "izhx/udever-bloom-1b1": "None",
- "izhx/udever-bloom-3b": "None",
- "izhx/udever-bloom-560m": "None",
- "izhx/udever-bloom-7b1": "None",
- "jhu-clsp/FollowIR-7B": "None",
- "jinaai/jina-colbert-v2": "None",
- "jinaai/jina-embedding-b-en-v1": "None",
- "jinaai/jina-embedding-s-en-v1": "None",
- "jinaai/jina-embeddings-v2-base-en": "None",
- "jinaai/jina-embeddings-v2-small-en": "None",
- "jinaai/jina-embeddings-v3": "None",
- "jinaai/jina-reranker-v2-base-multilingual": "None",
- "keeeeenw/MicroLlama-text-embedding": "None",
- "malenia1/ternary-weight-embedding": "None",
- "manu/bge-m3-custom-fr": "None",
- "manu/sentence_croissant_alpha_v0.2": "Over threshold. Not tested.",
- "manu/sentence_croissant_alpha_v0.3": "Over threshold. Not tested.",
- "manu/sentence_croissant_alpha_v0.4": "Over threshold. Not tested.",
- "meta-llama/Llama-2-7b-chat-hf": "None",
- "meta-llama/Llama-2-7b-hf": "None",
- "minishlab/M2V_base_glove": "None",
- "minishlab/M2V_base_glove_subword": "None",
- "minishlab/M2V_base_output": "None",
- "minishlab/M2V_multilingual_output": "None",
- "minishlab/potion-base-2M": "None",
- "minishlab/potion-base-4M": "None",
- "minishlab/potion-base-8M": "None",
- "mistralai/Mistral-7B-Instruct-v0.2": "None",
- "mixedbread-ai/mxbai-embed-large-v1": "None",
- "nomic-ai/nomic-embed-text-v1": "None",
- "nomic-ai/nomic-embed-text-v1-ablated": "None",
- "nomic-ai/nomic-embed-text-v1-unsupervised": "None",
- "nomic-ai/nomic-embed-text-v1.5": "None",
- "nvidia/NV-Embed-v1": "Over threshold. Not tested.",
- "nvidia/NV-Embed-v2": "Over threshold. Not tested.",
- "omarelshehy/arabic-english-sts-matryoshka": "None",
- "openai/text-embedding-3-large": "None",
- "openai/text-embedding-3-small": "None",
- "openai/text-embedding-ada-002": "None",
- "openbmb/MiniCPM-Embedding": "Over threshold. Not tested.",
- "samaya-ai/RepLLaMA-reproduced": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-6779403c-1bd84d333e938afa4e7cf86b;b873eea6-3c10-4659-b6da-2288d83e721b)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.",
- "samaya-ai/promptriever-llama2-7b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-677940f7-6c2bfcaa7985abb1165185ff;efdd2ef8-60a0-45c3-a92b-b24784b30b43)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.",
- "samaya-ai/promptriever-llama3.1-8b-instruct-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct.\n401 Client Error. (Request ID: Root=1-6779430b-3277d7961f3c88ab56ecf91f;a476a013-b28f-47c6-bd95-e3d6fe823468)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/config.json.\nAccess to model meta-llama/Llama-3.1-8B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in.",
- "samaya-ai/promptriever-llama3.1-8b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3.1-8B.\n401 Client Error. (Request ID: Root=1-67794236-778bc0f015eceb3f30368362;74618ba7-71f1-404f-9a5a-e52e590a9700)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3.1-8B/resolve/main/config.json.\nAccess to model meta-llama/Llama-3.1-8B is restricted. You must have access to it and be authenticated to access it. Please log in.",
- "samaya-ai/promptriever-mistral-v0.1-7b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-v0.1.\n401 Client Error. (Request ID: Root=1-67794457-688a6d9c24a9e8f15cf70d28;da3a233f-7c7c-4919-9cee-72a1d66acdb6)\n\nCannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json.\nAccess to model mistralai/Mistral-7B-v0.1 is restricted. You must have access to it and be authenticated to access it. Please log in.",
- "sdadas/mmlw-e5-base": "None",
- "sdadas/mmlw-e5-large": "None",
- "sdadas/mmlw-e5-small": "None",
- "sdadas/mmlw-roberta-base": "None",
- "sdadas/mmlw-roberta-large": "None",
- "sentence-transformer/multi-qa-MiniLM-L6-cos-v1": "sentence-transformer/multi-qa-MiniLM-L6-cos-v1 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`",
- "sentence-transformers/LaBSE": "None",
- "sentence-transformers/all-MiniLM-L12-v2": "None",
- "sentence-transformers/all-MiniLM-L6-v2": "None",
- "sentence-transformers/all-mpnet-base-v2": "None",
- "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": "None",
- "sentence-transformers/paraphrase-multilingual-mpnet-base-v2": "None",
- "sergeyzh/LaBSE-ru-turbo": "None",
- "sergeyzh/rubert-tiny-turbo": "None",
- "shibing624/text2vec-base-multilingual": "None",
- "silma-ai/silma-embeddding-matryoshka-v0.1": "None",
- "thenlper/gte-base": "None",
- "thenlper/gte-large": "None",
- "thenlper/gte-small": "None",
- "unicamp-dl/mt5-13b-mmarco-100k": "None",
- "unicamp-dl/mt5-base-mmarco-v2": "None",
- "voyage-large-2": "None",
- "voyageai/voyage-2": "None",
- "voyageai/voyage-3": "None",
- "voyageai/voyage-3-lite": "None",
- "voyageai/voyage-code-2": "None",
- "voyageai/voyage-finance-2": "None",
- "voyageai/voyage-large-2-instruct": "None",
- "voyageai/voyage-law-2": "None",
- "voyageai/voyage-multilingual-2": "None",
- "zeta-alpha-ai/Zeta-Alpha-E5-Mistral": "Over threshold. Not tested."}
\ No newline at end of file
diff --git a/scripts/model_load_failures.json b/scripts/model_load_failures.json
new file mode 100644
index 0000000000..efc12f4fda
--- /dev/null
+++ b/scripts/model_load_failures.json
@@ -0,0 +1,196 @@
+{
+    "Alibaba-NLP/gte-Qwen1.5-7B-instruct": "Over threshold. Not tested.",
+    "Alibaba-NLP/gte-Qwen2-1.5B-instruct": "None",
+    "Alibaba-NLP/gte-Qwen2-7B-instruct": "Over threshold. Not tested.",
+    "BAAI/bge-base-en-v1.5": "None",
+    "BAAI/bge-large-en-v1.5": "Over threshold. Not tested.",
+    "BAAI/bge-reranker-v2-m3": "None",
+    "BAAI/bge-small-en-v1.5": "None",
+    "BeastyZ/e5-R-mistral-7b": "Over threshold. Not tested.",
+    "Cohere/Cohere-embed-english-light-v3.0": "None",
+    "Cohere/Cohere-embed-english-v3.0": "None",
+    "Cohere/Cohere-embed-multilingual-light-v3.0": "None",
+    "Cohere/Cohere-embed-multilingual-v3.0": "None",
+    "DeepPavlov/distilrubert-small-cased-conversational": "None",
+    "DeepPavlov/rubert-base-cased": "None",
+    "DeepPavlov/rubert-base-cased-sentence": "None",
+    "Gameselo/STS-multilingual-mpnet-base-v2": "None",
+    "GritLM/GritLM-7B": "Over threshold. Not tested.",
+    "GritLM/GritLM-8x7B": "Over threshold. Not tested.",
+    "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": "None",
+    "HIT-TMG/KaLM-embedding-multilingual-mini-v1": "None",
+    "Haon-Chen/speed-embedding-7b-instruct": "Over threshold. Not tested.",
+    "Hum-Works/lodestone-base-4096-v1": "None",
+    "Jaume/gemma-2b-embeddings": "Over threshold. Not tested.",
+    "Lajavaness/bilingual-embedding-base": "None",
+    "Lajavaness/bilingual-embedding-large": "None",
+    "Lajavaness/bilingual-embedding-small": "None",
+    "Linq-AI-Research/Linq-Embed-Mistral": "Over threshold. Not tested.",
+    "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised": "Over threshold. Not tested.",
+    "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse": "Over threshold. Not tested.",
+    "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised": "Over threshold. Not tested.",
+    "McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse": "Over threshold. Not tested.",
+    "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": "Over threshold. Not tested.",
+    "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": "Over threshold. Not tested.",
+    "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised": "Over threshold. Not tested.",
+    "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse": "Over threshold. Not tested.",
+    "Mihaiii/Bulbasaur": "None",
+    "Mihaiii/Ivysaur": "None",
+    "Mihaiii/Squirtle": "None",
+    "Mihaiii/Venusaur": "None",
+    "Mihaiii/Wartortle": "None",
+    "Mihaiii/gte-micro": "None",
+    "Mihaiii/gte-micro-v4": "None",
+    "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": "None",
+    "Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": "None",
+    "Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": "None",
+    "Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": "None",
+    "Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": "None",
+    "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": "None",
+    "OrdalieTech/Solon-embeddings-large-0.1": "None",
+    "OrlikB/KartonBERT-USE-base-v1": "None",
+    "OrlikB/st-polish-kartonberta-base-alpha-v1": "None",
+    "Salesforce/SFR-Embedding-2_R": "Over threshold. Not tested.",
+    "Salesforce/SFR-Embedding-Mistral": "Over threshold. Not tested.",
+    "Snowflake/snowflake-arctic-embed-l": "None",
+    "Snowflake/snowflake-arctic-embed-l-v2.0": "None",
+    "Snowflake/snowflake-arctic-embed-m": "None",
+    "Snowflake/snowflake-arctic-embed-m-long": "None",
+    "Snowflake/snowflake-arctic-embed-m-v1.5": "None",
+    "Snowflake/snowflake-arctic-embed-m-v2.0": "None",
+    "Snowflake/snowflake-arctic-embed-s": "None",
+    "Snowflake/snowflake-arctic-embed-xs": "None",
+    "WhereIsAI/UAE-Large-V1": "None",
+    "aari1995/German_Semantic_STS_V2": "None",
+    "abhinand/MedEmbed-small-v0.1": "None",
+    "ai-forever/ru-en-RoSBERTa": "None",
+    "ai-forever/sbert_large_mt_nlu_ru": "None",
+    "ai-forever/sbert_large_nlu_ru": "None",
+    "avsolatorio/GIST-Embedding-v0": "None",
+    "avsolatorio/GIST-all-MiniLM-L6-v2": "None",
+    "avsolatorio/GIST-large-Embedding-v0": "None",
+    "avsolatorio/GIST-small-Embedding-v0": "None",
+    "avsolatorio/NoInstruct-small-Embedding-v0": "None",
+    "bigscience/sgpt-bloom-7b1-msmarco": "None",
+    "bm25s": "None",
+    "brahmairesearch/slx-v0.1": "None",
+    "castorini/monobert-large-msmarco": "None",
+    "castorini/monot5-3b-msmarco-10k": "None",
+    "castorini/monot5-base-msmarco-10k": "None",
+    "castorini/monot5-large-msmarco-10k": "None",
+    "castorini/monot5-small-msmarco-10k": "None",
+    "castorini/repllama-v1-7b-lora-passage": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-67794457-7e56cbf325381c760c430207;a79cc472-a4fc-49dc-80f0-9d4b8cb5ef42)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.",
+    "cointegrated/LaBSE-en-ru": "None",
+    "cointegrated/rubert-tiny": "None",
+    "cointegrated/rubert-tiny2": "None",
+    "colbert-ir/colbertv2.0": "None",
+    "consciousAI/cai-lunaris-text-embeddings": "None",
+    "consciousAI/cai-stellaris-text-embeddings": "None",
+    "deepfile/embedder-100p": "None",
+    "deepvk/USER-base": "None",
+    "deepvk/USER-bge-m3": "None",
+    "deepvk/deberta-v1-base": "None",
+    "dunzhang/stella_en_1.5B_v5": "None",
+    "dunzhang/stella_en_400M_v5": "None",
+    "dwzhu/e5-base-4k": "None",
+    "google/flan-t5-base": "None",
+    "google/flan-t5-large": "None",
+    "google/flan-t5-xl": "None",
+    "google/flan-t5-xxl": "None",
+    "google/text-embedding-004": "None",
+    "google/text-embedding-005": "None",
+    "google/text-multilingual-embedding-002": "None",
+    "ibm-granite/granite-embedding-107m-multilingual": "None",
+    "ibm-granite/granite-embedding-125m-english": "None",
+    "ibm-granite/granite-embedding-278m-multilingual": "None",
+    "ibm-granite/granite-embedding-30m-english": "None",
+    "infgrad/jasper_en_vision_language_v1": "Over threshold. Not tested.",
+    "infgrad/stella-base-en-v2": "None",
+    "intfloat/e5-base": "None",
+    "intfloat/e5-base-v2": "None",
+    "intfloat/e5-large": "None",
+    "intfloat/e5-large-v2": "None",
+    "intfloat/e5-mistral-7b-instruct": "Over threshold. Not tested.",
+    "intfloat/e5-small": "None",
+    "intfloat/e5-small-v2": "None",
+    "intfloat/multilingual-e5-base": "None",
+    "intfloat/multilingual-e5-large": "None",
+    "intfloat/multilingual-e5-large-instruct": "None",
+    "intfloat/multilingual-e5-small": "None",
+    "izhx/udever-bloom-1b1": "None",
+    "izhx/udever-bloom-3b": "None",
+    "izhx/udever-bloom-560m": "None",
+    "izhx/udever-bloom-7b1": "None",
+    "jhu-clsp/FollowIR-7B": "None",
+    "jinaai/jina-colbert-v2": "None",
+    "jinaai/jina-embedding-b-en-v1": "None",
+    "jinaai/jina-embedding-s-en-v1": "None",
+    "jinaai/jina-embeddings-v2-base-en": "None",
+    "jinaai/jina-embeddings-v2-small-en": "None",
+    "jinaai/jina-embeddings-v3": "None",
+    "jinaai/jina-reranker-v2-base-multilingual": "None",
+    "keeeeenw/MicroLlama-text-embedding": "None",
+    "malenia1/ternary-weight-embedding": "None",
+    "manu/bge-m3-custom-fr": "None",
+    "manu/sentence_croissant_alpha_v0.2": "None",
+    "manu/sentence_croissant_alpha_v0.3": "Over threshold. Not tested.",
+    "manu/sentence_croissant_alpha_v0.4": "Over threshold. Not tested.",
+    "meta-llama/Llama-2-7b-chat-hf": "None",
+    "meta-llama/Llama-2-7b-hf": "None",
+    "minishlab/M2V_base_glove": "None",
+    "minishlab/M2V_base_glove_subword": "None",
+    "minishlab/M2V_base_output": "None",
+    "minishlab/M2V_multilingual_output": "None",
+    "minishlab/potion-base-2M": "None",
+    "minishlab/potion-base-4M": "None",
+    "minishlab/potion-base-8M": "None",
+    "mistralai/Mistral-7B-Instruct-v0.2": "None",
+    "mixedbread-ai/mxbai-embed-large-v1": "None",
+    "nomic-ai/nomic-embed-text-v1": "None",
+    "nomic-ai/nomic-embed-text-v1-ablated": "None",
+    "nomic-ai/nomic-embed-text-v1-unsupervised": "None",
+    "nomic-ai/nomic-embed-text-v1.5": "None",
+    "nvidia/NV-Embed-v1": "Over threshold. Not tested.",
+    "nvidia/NV-Embed-v2": "Over threshold. Not tested.",
+    "omarelshehy/arabic-english-sts-matryoshka": "None",
+    "openai/text-embedding-3-large": "None",
+    "openai/text-embedding-3-small": "None",
+    "openai/text-embedding-ada-002": "None",
+    "openbmb/MiniCPM-Embedding": "Over threshold. Not tested.",
+    "samaya-ai/RepLLaMA-reproduced": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-6779403c-1bd84d333e938afa4e7cf86b;b873eea6-3c10-4659-b6da-2288d83e721b)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.",
+    "samaya-ai/promptriever-llama2-7b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.\n401 Client Error. (Request ID: Root=1-677940f7-6c2bfcaa7985abb1165185ff;efdd2ef8-60a0-45c3-a92b-b24784b30b43)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.\nAccess to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in.",
+    "samaya-ai/promptriever-llama3.1-8b-instruct-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct.\n401 Client Error. (Request ID: Root=1-6779430b-3277d7961f3c88ab56ecf91f;a476a013-b28f-47c6-bd95-e3d6fe823468)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/config.json.\nAccess to model meta-llama/Llama-3.1-8B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in.",
+    "samaya-ai/promptriever-llama3.1-8b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3.1-8B.\n401 Client Error. (Request ID: Root=1-677bba8f-608cf825273d8d2b0670b5ad;066bb2fa-3bef-4fb9-b3cb-4c5ffee41047)\n\nCannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3.1-8B/resolve/main/config.json.\nAccess to model meta-llama/Llama-3.1-8B is restricted. You must have access to it and be authenticated to access it. Please log in.",
+    "samaya-ai/promptriever-mistral-v0.1-7b-v1": "You are trying to access a gated repo.\nMake sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-v0.1.\n401 Client Error. (Request ID: Root=1-67794457-688a6d9c24a9e8f15cf70d28;da3a233f-7c7c-4919-9cee-72a1d66acdb6)\n\nCannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json.\nAccess to model mistralai/Mistral-7B-v0.1 is restricted. You must have access to it and be authenticated to access it. Please log in.",
+    "sdadas/mmlw-e5-base": "None",
+    "sdadas/mmlw-e5-large": "None",
+    "sdadas/mmlw-e5-small": "None",
+    "sdadas/mmlw-roberta-base": "None",
+    "sdadas/mmlw-roberta-large": "None",
+    "sentence-transformer/multi-qa-MiniLM-L6-cos-v1": "sentence-transformer/multi-qa-MiniLM-L6-cos-v1 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`",
+    "sentence-transformers/LaBSE": "None",
+    "sentence-transformers/all-MiniLM-L12-v2": "None",
+    "sentence-transformers/all-MiniLM-L6-v2": "None",
+    "sentence-transformers/all-mpnet-base-v2": "None",
+    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": "None",
+    "sentence-transformers/paraphrase-multilingual-mpnet-base-v2": "None",
+    "sergeyzh/LaBSE-ru-turbo": "None",
+    "sergeyzh/rubert-tiny-turbo": "None",
+    "shibing624/text2vec-base-multilingual": "None",
+    "silma-ai/silma-embeddding-matryoshka-v0.1": "None",
+    "thenlper/gte-base": "None",
+    "thenlper/gte-large": "None",
+    "thenlper/gte-small": "None",
+    "unicamp-dl/mt5-13b-mmarco-100k": "None",
+    "unicamp-dl/mt5-base-mmarco-v2": "None",
+    "voyage-large-2": "None",
+    "voyageai/voyage-2": "None",
+    "voyageai/voyage-3": "None",
+    "voyageai/voyage-3-lite": "None",
+    "voyageai/voyage-code-2": "None",
+    "voyageai/voyage-finance-2": "None",
+    "voyageai/voyage-large-2-instruct": "None",
+    "voyageai/voyage-law-2": "None",
+    "voyageai/voyage-multilingual-2": "None",
+    "zeta-alpha-ai/Zeta-Alpha-E5-Mistral": "Over threshold. Not tested."
+}
\ No newline at end of file
diff --git a/scripts/model_loading.py b/scripts/model_loading.py
index 7fe478ba11..14f7aaee04 100644
--- a/scripts/model_loading.py
+++ b/scripts/model_loading.py
@@ -1,26 +1,28 @@
 from __future__ import annotations
 
+import argparse
 import json
 import logging
-import shutil
 from pathlib import Path
 
+from huggingface_hub import scan_cache_dir
+
 from mteb import get_model, get_model_meta
 from mteb.models.overview import MODEL_REGISTRY
 
 logging.basicConfig(level=logging.INFO)
 
 
-CACHE_FOLDER = Path(__file__).parent / ".cache"
-
-
 def teardown_function():
-    """Remove cache folder and its contents"""
-    for item in CACHE_FOLDER.iterdir():
-        if item.is_file():
-            item.unlink()
-        elif item.is_dir():
-            shutil.rmtree(item)
+    hf_cache_info = scan_cache_dir()
+    all_revisions = []
+    for repo in list(hf_cache_info.repos):
+        for revision in list(repo.revisions):
+            all_revisions.append(revision.commit_hash)
+
+    delete_strategy = scan_cache_dir().delete_revisions(*all_revisions)
+    print("Will free " + delete_strategy.expected_freed_size_str)
+    delete_strategy.execute()
 
 
 def get_model_below_n_param_threshold(model_name: str) -> str:
@@ -28,30 +30,22 @@ def get_model_below_n_param_threshold(model_name: str) -> str:
     model_meta = get_model_meta(model_name=model_name)
     assert model_meta is not None
     if model_meta.n_parameters is not None:
-        if model_meta.n_parameters >= 2e9:
+        if model_meta.n_parameters >= 7.1e9:
             return "Over threshold. Not tested."
         elif "API" in model_meta.framework:
             try:
                 m = get_model(model_name)
                 if m is not None:
+                    del m
                     return "None"
             except Exception as e:
                 logging.warning(f"Failed to load model {model_name} with error {e}")
                 return e.__str__()
         try:
-            m = get_model(model_name, cache_folder=CACHE_FOLDER)
+            m = get_model(model_name)
             if m is not None:
+                del m
                 return "None"
-        except TypeError:  # when cache_folder is not supported.
-            try:
-                m = get_model(model_name)
-                if m is not None:
-                    return "None"
-            except Exception as e:
-                logging.warning(f"Failed to load model {model_name} with error {e}")
-                return e.__str__()
-            finally:
-                teardown_function()
         except Exception as e:
             logging.warning(f"Failed to load model {model_name} with error {e}")
             return e.__str__()
@@ -59,8 +53,28 @@ def get_model_below_n_param_threshold(model_name: str) -> str:
             teardown_function()
 
 
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--omit_previous_success",
+        action="store_true",
+        default=False,
+        help="Omit models that have been successfully loaded in the past",
+    )
+    parser.add_argument(
+        "--run_missing",
+        action="store_true",
+        default=True,
+        help="Run the missing models in the registry that are missing from existing results.",
+    )
+
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
-    output_file = Path(__file__).parent / "failures.json"
+    output_file = Path(__file__).parent / "model_load_failures.json"
+
+    args = parse_args()
 
     # Load existing results if the file exists
     results = {}
@@ -68,7 +82,14 @@ def get_model_below_n_param_threshold(model_name: str) -> str:
         with output_file.open("r") as f:
             results = json.load(f)
 
-    all_model_names = list(set(MODEL_REGISTRY.keys()) - set(results.keys()))
+    omit_keys = []
+    if args.run_missing:
+        omit_keys = list(results.keys())
+
+    if args.omit_previous_success:
+        omit_keys = [k for k, v in results.items() if v == "None"]
+
+    all_model_names = list(set(MODEL_REGISTRY.keys()) - set(omit_keys))
     for model_name in all_model_names:
         error_msg = get_model_below_n_param_threshold(model_name)
         results[model_name] = error_msg

From bd56f86f6ab973016ca80a34a9fc62903cafc92d Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Mon, 6 Jan 2025 21:45:39 +0000
Subject: [PATCH 15/33] add github CI: detect changed model files and run model
 loading test

---
 .github/workflows/model_loading.yml | 52 +++++++++++++++++++++++++++++
 scripts/model_loading.py            | 26 ++++++++++-----
 2 files changed, 70 insertions(+), 8 deletions(-)
 create mode 100644 .github/workflows/model_loading.yml

diff --git a/.github/workflows/model_loading.yml b/.github/workflows/model_loading.yml
new file mode 100644
index 0000000000..3583160eae
--- /dev/null
+++ b/.github/workflows/model_loading.yml
@@ -0,0 +1,52 @@
+name: Model Loading
+
+on:
+  pull_request:
+    paths:
+      - 'mteb/models/**.py'
+
+jobs:
+  extract-and-run:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+
+    - name: Extract model names from changed files
+      id: extract_models
+      run: |
+        changed_files=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep 'mteb/models/.*.py')
+        model_names=()
+        for file in $changed_files; do
+          python -c "
+            import ast
+            from mteb.model_meta import ModelMeta
+
+            with open('$file', 'r') as f:
+                tree = ast.parse(f.read())
+                for node in ast.walk(tree):
+                    if isinstance(node, ast.Assign):
+                        for target in node.targets:
+                            if isinstance(target, ast.Name) and isinstance(node.value, ast.Call) and node.value.func.id == 'ModelMeta':
+                                model_name = [kw.value.s for kw in node.value.keywords if kw.arg == 'name'][0]
+                                print(model_name)
+                    " >> mteb/scripts/model_names.txt
+        done
+        model_names=$(cat mteb/scripts/model_names.txt | tr '\n' ' ')
+        echo "::set-output name=model_names::$model_names"
+
+    - name: Run model_loading.py
+      if: steps.extract_models.outputs.model_names != ''
+      run: |
+        python mteb/scripts/model_loading.py --model_name "${{ steps.extract_models.outputs.model_names }}"
\ No newline at end of file
diff --git a/scripts/model_loading.py b/scripts/model_loading.py
index 14f7aaee04..068f361090 100644
--- a/scripts/model_loading.py
+++ b/scripts/model_loading.py
@@ -30,7 +30,7 @@ def get_model_below_n_param_threshold(model_name: str) -> str:
     model_meta = get_model_meta(model_name=model_name)
     assert model_meta is not None
     if model_meta.n_parameters is not None:
-        if model_meta.n_parameters >= 7.1e9:
+        if model_meta.n_parameters >= 2e9:
             return "Over threshold. Not tested."
         elif "API" in model_meta.framework:
             try:
@@ -64,9 +64,16 @@ def parse_args():
     parser.add_argument(
         "--run_missing",
         action="store_true",
-        default=True,
+        default=False,
         help="Run the missing models in the registry that are missing from existing results.",
     )
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        nargs="+",
+        default=None,
+        help="Run the script for specific model names, e.g. model_1, model_2",
+    )
 
     return parser.parse_args()
 
@@ -82,14 +89,17 @@ def parse_args():
         with output_file.open("r") as f:
             results = json.load(f)
 
-    omit_keys = []
-    if args.run_missing:
-        omit_keys = list(results.keys())
+    if args.model_name:
+        all_model_names = args.model_name
+    else:
+        omit_keys = []
+        if args.run_missing:
+            omit_keys = list(results.keys())
+        elif args.omit_previous_success:
+            omit_keys = [k for k, v in results.items() if v == "None"]
 
-    if args.omit_previous_success:
-        omit_keys = [k for k, v in results.items() if v == "None"]
+        all_model_names = list(set(MODEL_REGISTRY.keys()) - set(omit_keys))
 
-    all_model_names = list(set(MODEL_REGISTRY.keys()) - set(omit_keys))
     for model_name in all_model_names:
         error_msg = get_model_below_n_param_threshold(model_name)
         results[model_name] = error_msg

From dcdd80a096a90746f2de47c231da7cfb1bc86e23 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Mon, 6 Jan 2025 21:49:51 +0000
Subject: [PATCH 16/33] install all model dependencies

---
 .github/workflows/model_loading.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/model_loading.yml b/.github/workflows/model_loading.yml
index 3583160eae..79ba7bf211 100644
--- a/.github/workflows/model_loading.yml
+++ b/.github/workflows/model_loading.yml
@@ -22,6 +22,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements.txt
+        pip install mteb[pylate] gritlm mteb[model2vec] xformers
 
     - name: Extract model names from changed files
       id: extract_models

From 64d9c838daf131fb8fb1040ab6a63bdbd8364579 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 09:54:30 +0000
Subject: [PATCH 17/33] dependecy installations and move file location

---
 .github/workflows/model_loading.yml                   | 11 +++++------
 scripts/model_loading.py                              |  7 ++++++-
 .../test_models}/model_load_failures.json             |  0
 3 files changed, 11 insertions(+), 7 deletions(-)
 rename {scripts => tests/test_models}/model_load_failures.json (100%)

diff --git a/.github/workflows/model_loading.yml b/.github/workflows/model_loading.yml
index 79ba7bf211..0e888f465d 100644
--- a/.github/workflows/model_loading.yml
+++ b/.github/workflows/model_loading.yml
@@ -21,8 +21,8 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        pip install mteb[pylate] gritlm mteb[model2vec] xformers
+        make install
+        pip install .[pylate] gritlm .[model2vec] xformers
 
     - name: Extract model names from changed files
       id: extract_models
@@ -33,7 +33,6 @@ jobs:
           python -c "
             import ast
             from mteb.model_meta import ModelMeta
-
             with open('$file', 'r') as f:
                 tree = ast.parse(f.read())
                 for node in ast.walk(tree):
@@ -42,12 +41,12 @@ jobs:
                             if isinstance(target, ast.Name) and isinstance(node.value, ast.Call) and node.value.func.id == 'ModelMeta':
                                 model_name = [kw.value.s for kw in node.value.keywords if kw.arg == 'name'][0]
                                 print(model_name)
-                    " >> mteb/scripts/model_names.txt
+                    " >> scripts/model_names.txt
         done
-        model_names=$(cat mteb/scripts/model_names.txt | tr '\n' ' ')
+        model_names=$(cat scripts/model_names.txt | tr '\n' ' ')
         echo "::set-output name=model_names::$model_names"
 
     - name: Run model_loading.py
       if: steps.extract_models.outputs.model_names != ''
       run: |
-        python mteb/scripts/model_loading.py --model_name "${{ steps.extract_models.outputs.model_names }}"
\ No newline at end of file
+        python scripts/model_loading.py --model_name "${{ steps.extract_models.outputs.model_names }}"
\ No newline at end of file
diff --git a/scripts/model_loading.py b/scripts/model_loading.py
index 068f361090..8be21a2a32 100644
--- a/scripts/model_loading.py
+++ b/scripts/model_loading.py
@@ -79,7 +79,12 @@ def parse_args():
 
 
 if __name__ == "__main__":
-    output_file = Path(__file__).parent / "model_load_failures.json"
+    output_file = (
+        Path(__file__).parent.parent
+        / "tests"
+        / "test_models"
+        / "model_load_failures.json"
+    )
 
     args = parse_args()
 
diff --git a/scripts/model_load_failures.json b/tests/test_models/model_load_failures.json
similarity index 100%
rename from scripts/model_load_failures.json
rename to tests/test_models/model_load_failures.json

From 0eef8737c84d29d9bd59793c85b3780764770054 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 10:00:32 +0000
Subject: [PATCH 18/33] should trigger a model load test in CI

---
 mteb/models/bge_models.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mteb/models/bge_models.py b/mteb/models/bge_models.py
index cc183374c6..c14344aca5 100644
--- a/mteb/models/bge_models.py
+++ b/mteb/models/bge_models.py
@@ -4,6 +4,7 @@
 
 from mteb.model_meta import ModelMeta, sentence_transformers_loader
 
+# testing changes to file.
 model_prompts = {"query": "Represent this sentence for searching relevant passages: "}
 
 bge_small_en_v1_5 = ModelMeta(

From 86ad3480d5ccabf369592298501f0dd85e9bfd88 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 10:14:53 +0000
Subject: [PATCH 19/33] find correct commit for diff

---
 .github/workflows/model_loading.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/model_loading.yml b/.github/workflows/model_loading.yml
index 0e888f465d..5c80901307 100644
--- a/.github/workflows/model_loading.yml
+++ b/.github/workflows/model_loading.yml
@@ -27,7 +27,8 @@ jobs:
     - name: Extract model names from changed files
       id: extract_models
       run: |
-        changed_files=$(git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep 'mteb/models/.*.py')
+        base_commit=$(git merge-base origin/main HEAD)
+        changed_files=$(git diff --name-only $base_commit HEAD | grep 'mteb/models/.*.py')
         model_names=()
         for file in $changed_files; do
           python -c "

From 9cf1280ff98fc8863b1980a721da0bcc9d4f8307 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 10:26:21 +0000
Subject: [PATCH 20/33] explicity fetch base branch

---
 .github/workflows/model_loading.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/model_loading.yml b/.github/workflows/model_loading.yml
index 5c80901307..5b48611c89 100644
--- a/.github/workflows/model_loading.yml
+++ b/.github/workflows/model_loading.yml
@@ -27,7 +27,9 @@ jobs:
     - name: Extract model names from changed files
       id: extract_models
       run: |
-        base_commit=$(git merge-base origin/main HEAD)
+        git fetch origin ${{ github.event.pull_request.base.ref }}:refs/remotes/origin/${{ github.event.pull_request.base.ref }}
+        base_branch=${{ github.event.pull_request.base.ref }}
+        base_commit=$(git merge-base origin/$base_branch HEAD)
         changed_files=$(git diff --name-only $base_commit HEAD | grep 'mteb/models/.*.py')
         model_names=()
         for file in $changed_files; do

From 39823116f7ead93c9ab238ed92a54d26967a2629 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 10:48:42 +0000
Subject: [PATCH 21/33] add make command

---
 Makefile                        | 9 ++++++++-
 mteb/models/instruct_wrapper.py | 2 +-
 pyproject.toml                  | 2 ++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index c1404270d9..5fc95b297c 100644
--- a/Makefile
+++ b/Makefile
@@ -35,4 +35,11 @@ pr:
 build-docs:
 	@echo "--- 📚 Building documentation ---"
 	# since we do not have a documentation site, this just build tables for the .md files
-	python docs/create_tasks_table.py
\ No newline at end of file
+	python docs/create_tasks_table.py
+
+
+model-load-test:
+	@echo "--- 🚀 Running model load test ---"
+	make install-for-tests
+	pip install ".[pylate,gritlm,xformers,model2vec]"
+	python tests/test_models/test_model_loading.py
\ No newline at end of file
diff --git a/mteb/models/instruct_wrapper.py b/mteb/models/instruct_wrapper.py
index 303a386836..2ee3a09b56 100644
--- a/mteb/models/instruct_wrapper.py
+++ b/mteb/models/instruct_wrapper.py
@@ -24,7 +24,7 @@ def instruct_wrapper(
         from gritlm import GritLM
     except ImportError:
         raise ImportError(
-            f"Please install `pip install gritlm` to use {model_name_or_path}."
+            f"Please install `pip install mteb[gritlm]` to use {model_name_or_path}."
         )
 
     class InstructWrapper(GritLM, Wrapper):
diff --git a/pyproject.toml b/pyproject.toml
index 7b2ea365f3..9e84dce554 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,6 +65,8 @@ openai = ["openai>=1.41.0", "tiktoken>=0.8.0"]
 model2vec = ["model2vec>=0.3.0"]
 pylate = ["pylate>=1.1.4"]
 bm25s = ["bm25s>=0.2.6", "PyStemmer>=2.2.0.3"]
+gritlm = ["gritlm>=1.0.2"]
+xformers = ["xformers>=0.0.29"]
 
 
 [tool.coverage.report]

From 6fbaf0fa111ec4c1d212a6325f9a4b57631947be Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 10:49:10 +0000
Subject: [PATCH 22/33] try to run in python instead and add pytest

---
 .github/workflows/model_loading.yml        | 36 +--------------
 .gitignore                                 |  3 ++
 scripts/extract_model_names.py             | 53 ++++++++++++++++++++++
 tests/test_models/model_load_failures.json |  1 +
 tests/test_models/test_model_loading.py    | 39 ++++++++++++++++
 5 files changed, 98 insertions(+), 34 deletions(-)
 create mode 100644 scripts/extract_model_names.py
 create mode 100644 tests/test_models/test_model_loading.py

diff --git a/.github/workflows/model_loading.yml b/.github/workflows/model_loading.yml
index 5b48611c89..dd50cc5ed0 100644
--- a/.github/workflows/model_loading.yml
+++ b/.github/workflows/model_loading.yml
@@ -18,38 +18,6 @@ jobs:
       with:
         python-version: '3.10'
 
-    - name: Install dependencies
+    - name: Install dependencies and run tests
       run: |
-        python -m pip install --upgrade pip
-        make install
-        pip install .[pylate] gritlm .[model2vec] xformers
-
-    - name: Extract model names from changed files
-      id: extract_models
-      run: |
-        git fetch origin ${{ github.event.pull_request.base.ref }}:refs/remotes/origin/${{ github.event.pull_request.base.ref }}
-        base_branch=${{ github.event.pull_request.base.ref }}
-        base_commit=$(git merge-base origin/$base_branch HEAD)
-        changed_files=$(git diff --name-only $base_commit HEAD | grep 'mteb/models/.*.py')
-        model_names=()
-        for file in $changed_files; do
-          python -c "
-            import ast
-            from mteb.model_meta import ModelMeta
-            with open('$file', 'r') as f:
-                tree = ast.parse(f.read())
-                for node in ast.walk(tree):
-                    if isinstance(node, ast.Assign):
-                        for target in node.targets:
-                            if isinstance(target, ast.Name) and isinstance(node.value, ast.Call) and node.value.func.id == 'ModelMeta':
-                                model_name = [kw.value.s for kw in node.value.keywords if kw.arg == 'name'][0]
-                                print(model_name)
-                    " >> scripts/model_names.txt
-        done
-        model_names=$(cat scripts/model_names.txt | tr '\n' ' ')
-        echo "::set-output name=model_names::$model_names"
-
-    - name: Run model_loading.py
-      if: steps.extract_models.outputs.model_names != ''
-      run: |
-        python scripts/model_loading.py --model_name "${{ steps.extract_models.outputs.model_names }}"
\ No newline at end of file
+        make model-load-test
diff --git a/.gitignore b/.gitignore
index 868f0f1745..977fe8dc1a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -145,3 +145,6 @@ tests/create_meta/model_card.md
 # removed results from mteb repo they are now available at: https://github.com/embeddings-benchmark/results
 results/
 uv.lock
+
+# model loading tests
+model_names.txt
\ No newline at end of file
diff --git a/scripts/extract_model_names.py b/scripts/extract_model_names.py
new file mode 100644
index 0000000000..9f1c7a3cbc
--- /dev/null
+++ b/scripts/extract_model_names.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import ast
+import sys
+from pathlib import Path
+
+from git import Repo
+
+
+def get_changed_files(base_branch="main"):
+    repo = Repo(".")
+    repo.remotes.origin.fetch(base_branch)
+    base_commit = repo.merge_base(f"origin/{base_branch}", "HEAD")[0]
+    diff = repo.git.diff("--name-only", base_commit, "HEAD")
+    changed_files = diff.splitlines()
+    return [
+        f for f in changed_files if f.startswith("mteb/models/") and f.endswith(".py")
+    ]
+
+
+def extract_model_names(files: list[str]) -> list[str]:
+    model_names = []
+    for file in files:
+        with open(file) as f:
+            tree = ast.parse(f.read())
+            for node in ast.walk(tree):
+                if isinstance(node, ast.Assign):
+                    for target in node.targets:
+                        if (
+                            isinstance(target, ast.Name)
+                            and isinstance(node.value, ast.Call)
+                            and node.value.func.id == "ModelMeta"
+                        ):
+                            model_name = [
+                                kw.value.s
+                                for kw in node.value.keywords
+                                if kw.arg == "name"
+                            ][0]
+                            model_names.append(model_name)
+    return model_names
+
+
+if __name__ == "__main__":
+    """
+    Can pass in base brnach as an argument. Defaults to 'main'.
+    e.g. python extract_model_names.py mieb
+    """
+    base_branch = sys.argv[1] if len(sys.argv) > 1 else "main"
+    changed_files = get_changed_files(base_branch)
+    model_names = extract_model_names(changed_files)
+    output_file = Path(__file__).parent / "model_names.txt"
+    with output_file.open("w") as f:
+        f.write(" ".join(model_names))
diff --git a/tests/test_models/model_load_failures.json b/tests/test_models/model_load_failures.json
index efc12f4fda..f1be1c940b 100644
--- a/tests/test_models/model_load_failures.json
+++ b/tests/test_models/model_load_failures.json
@@ -6,6 +6,7 @@
     "BAAI/bge-large-en-v1.5": "Over threshold. Not tested.",
     "BAAI/bge-reranker-v2-m3": "None",
     "BAAI/bge-small-en-v1.5": "None",
+    "BAAI/bge-small-en-v1.5 BAAI/bge-base-en-v1.5 BAAI/bge-large-en-v1.5": null,
     "BeastyZ/e5-R-mistral-7b": "Over threshold. Not tested.",
     "Cohere/Cohere-embed-english-light-v3.0": "None",
     "Cohere/Cohere-embed-english-v3.0": "None",
diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
new file mode 100644
index 0000000000..4239e1e95f
--- /dev/null
+++ b/tests/test_models/test_model_loading.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+import subprocess
+from pathlib import Path
+
+import pytest
+
+
+def get_model_names():
+    model_names_file_path = (
+        Path(__file__).parent.parent.parent / "scripts" / "model_names.txt"
+    )
+    extract_model_names_file_path = (
+        Path(__file__).parent.parent.parent / "scripts" / "extract_model_names.py"
+    )
+    _ = subprocess.run(
+        ["python", extract_model_names_file_path],
+        capture_output=True,
+        text=True,
+    )
+    with model_names_file_path.open() as f:
+        model_names = f.read().strip()
+    return model_names
+
+
+model_names = get_model_names()
+
+
+@pytest.mark.skipif(not model_names, reason="No updates to models.")
+def test_model_loading():
+    model_loading_file_path = (
+        Path(__file__).parent.parent.parent / "scripts" / "model_loading.py"
+    )
+    result = subprocess.run(
+        ["python", model_loading_file_path, "--model_name", model_names],
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode == 0, result.stderr

From 8830034ea06a5dc8a4c50306ec096cffa41b9405 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 11:11:11 +0000
Subject: [PATCH 23/33] fix attribute error and add read mode

---
 scripts/extract_model_names.py          | 17 +++++++++++------
 tests/test_models/test_model_loading.py |  2 +-
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/scripts/extract_model_names.py b/scripts/extract_model_names.py
index 9f1c7a3cbc..1ca32a0ee3 100644
--- a/scripts/extract_model_names.py
+++ b/scripts/extract_model_names.py
@@ -29,14 +29,19 @@ def extract_model_names(files: list[str]) -> list[str]:
                         if (
                             isinstance(target, ast.Name)
                             and isinstance(node.value, ast.Call)
+                            and isinstance(node.value.func, ast.Name)
                             and node.value.func.id == "ModelMeta"
                         ):
-                            model_name = [
-                                kw.value.s
-                                for kw in node.value.keywords
-                                if kw.arg == "name"
-                            ][0]
-                            model_names.append(model_name)
+                            model_name = next(
+                                (
+                                    kw.value.value
+                                    for kw in node.value.keywords
+                                    if kw.arg == "name"
+                                ),
+                                None,
+                            )
+                            if model_name:
+                                model_names.append(model_name)
     return model_names
 
 
diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
index 4239e1e95f..bee7a933aa 100644
--- a/tests/test_models/test_model_loading.py
+++ b/tests/test_models/test_model_loading.py
@@ -18,7 +18,7 @@ def get_model_names():
         capture_output=True,
         text=True,
     )
-    with model_names_file_path.open() as f:
+    with model_names_file_path.open("r") as f:
         model_names = f.read().strip()
     return model_names
 

From b1c2021ab92d5d2573e5788fb19af805b2b707ec Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 11:33:11 +0000
Subject: [PATCH 24/33] separate script calling

---
 Makefile                                | 1 +
 tests/test_models/test_model_loading.py | 8 --------
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/Makefile b/Makefile
index 5fc95b297c..bc2aecaa9d 100644
--- a/Makefile
+++ b/Makefile
@@ -42,4 +42,5 @@ model-load-test:
 	@echo "--- 🚀 Running model load test ---"
 	make install-for-tests
 	pip install ".[pylate,gritlm,xformers,model2vec]"
+	python scripts/extract_model_names.py
 	python tests/test_models/test_model_loading.py
\ No newline at end of file
diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
index bee7a933aa..140b8392c6 100644
--- a/tests/test_models/test_model_loading.py
+++ b/tests/test_models/test_model_loading.py
@@ -10,14 +10,6 @@ def get_model_names():
     model_names_file_path = (
         Path(__file__).parent.parent.parent / "scripts" / "model_names.txt"
     )
-    extract_model_names_file_path = (
-        Path(__file__).parent.parent.parent / "scripts" / "extract_model_names.py"
-    )
-    _ = subprocess.run(
-        ["python", extract_model_names_file_path],
-        capture_output=True,
-        text=True,
-    )
     with model_names_file_path.open("r") as f:
         model_names = f.read().strip()
     return model_names

From d8431381b90d5b251e074f99127a4f3b5cc6297c Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 15:50:35 +0000
Subject: [PATCH 25/33] let pip install be cached and specify repo path

---
 .github/workflows/model_loading.yml | 9 ++++++++-
 scripts/extract_model_names.py      | 3 ++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/model_loading.yml b/.github/workflows/model_loading.yml
index dd50cc5ed0..5eb7dfb48d 100644
--- a/.github/workflows/model_loading.yml
+++ b/.github/workflows/model_loading.yml
@@ -20,4 +20,11 @@ jobs:
 
     - name: Install dependencies and run tests
       run: |
-        make model-load-test
+        make install-for-tests
+        pip install ".[pylate,gritlm,xformers,model2vec]"
+    - name: Extract model names
+      run: |
+        python scripts/extract_model_names.py
+    - name: Run tests
+      run: |
+        python tests/test_models/test_model_loading.py
diff --git a/scripts/extract_model_names.py b/scripts/extract_model_names.py
index 1ca32a0ee3..d533b0e4f4 100644
--- a/scripts/extract_model_names.py
+++ b/scripts/extract_model_names.py
@@ -8,7 +8,8 @@
 
 
 def get_changed_files(base_branch="main"):
-    repo = Repo(".")
+    repo_path = Path(__file__).parent.parent
+    repo = Repo(repo_path)
     repo.remotes.origin.fetch(base_branch)
     base_commit = repo.merge_base(f"origin/{base_branch}", "HEAD")[0]
     diff = repo.git.diff("--name-only", base_commit, "HEAD")

From f994ab18c0d2c500f4efc85bc445ee0669ea758e Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 16:26:34 +0000
Subject: [PATCH 26/33] check ancestry

---
 scripts/extract_model_names.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scripts/extract_model_names.py b/scripts/extract_model_names.py
index d533b0e4f4..6dc5d9beb6 100644
--- a/scripts/extract_model_names.py
+++ b/scripts/extract_model_names.py
@@ -11,6 +11,11 @@ def get_changed_files(base_branch="main"):
     repo_path = Path(__file__).parent.parent
     repo = Repo(repo_path)
     repo.remotes.origin.fetch(base_branch)
+    check = repo.is_ancestor(repo.commit(f"origin/{base_branch}"), repo.commit("HEAD"))
+    if not check:
+        raise ValueError(
+            f"HEAD is not a descendant of origin/{base_branch}. Please rebase your branch."
+        )
     base_commit = repo.merge_base(f"origin/{base_branch}", "HEAD")[0]
     diff = repo.git.diff("--name-only", base_commit, "HEAD")
     changed_files = diff.splitlines()

From 95d804dcb675b6c9647dc25f6e0ecad61d963cae Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 16:38:18 +0000
Subject: [PATCH 27/33] add cache and rebase

---
 .github/workflows/model_loading.yml | 1 +
 scripts/extract_model_names.py      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/model_loading.yml b/.github/workflows/model_loading.yml
index 5eb7dfb48d..0691a020b6 100644
--- a/.github/workflows/model_loading.yml
+++ b/.github/workflows/model_loading.yml
@@ -17,6 +17,7 @@ jobs:
       uses: actions/setup-python@v4
       with:
         python-version: '3.10'
+        cache: 'pip'
 
     - name: Install dependencies and run tests
       run: |
diff --git a/scripts/extract_model_names.py b/scripts/extract_model_names.py
index 6dc5d9beb6..54661f9b09 100644
--- a/scripts/extract_model_names.py
+++ b/scripts/extract_model_names.py
@@ -11,6 +11,7 @@ def get_changed_files(base_branch="main"):
     repo_path = Path(__file__).parent.parent
     repo = Repo(repo_path)
     repo.remotes.origin.fetch(base_branch)
+    repo.git.rebase(f"origin/{base_branch}")
     check = repo.is_ancestor(repo.commit(f"origin/{base_branch}"), repo.commit("HEAD"))
     if not check:
         raise ValueError(

From a85a2cd95953591277f461b84996d6c30f76d916 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Tue, 7 Jan 2025 16:45:27 +0000
Subject: [PATCH 28/33] try to merge instead of rebase

---
 scripts/extract_model_names.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/extract_model_names.py b/scripts/extract_model_names.py
index 54661f9b09..5ec7b9d7dd 100644
--- a/scripts/extract_model_names.py
+++ b/scripts/extract_model_names.py
@@ -11,7 +11,7 @@ def get_changed_files(base_branch="main"):
     repo_path = Path(__file__).parent.parent
     repo = Repo(repo_path)
     repo.remotes.origin.fetch(base_branch)
-    repo.git.rebase(f"origin/{base_branch}")
+    repo.git.merge(f"origin/{base_branch}")
     check = repo.is_ancestor(repo.commit(f"origin/{base_branch}"), repo.commit("HEAD"))
     if not check:
         raise ValueError(

From 609c8831d50e490800e5118526a3057dd2bfa12c Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Wed, 8 Jan 2025 13:52:25 +0000
Subject: [PATCH 29/33] try without merge base

---
 scripts/extract_model_names.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/scripts/extract_model_names.py b/scripts/extract_model_names.py
index 5ec7b9d7dd..bc2c68077e 100644
--- a/scripts/extract_model_names.py
+++ b/scripts/extract_model_names.py
@@ -11,14 +11,12 @@ def get_changed_files(base_branch="main"):
     repo_path = Path(__file__).parent.parent
     repo = Repo(repo_path)
     repo.remotes.origin.fetch(base_branch)
-    repo.git.merge(f"origin/{base_branch}")
-    check = repo.is_ancestor(repo.commit(f"origin/{base_branch}"), repo.commit("HEAD"))
-    if not check:
-        raise ValueError(
-            f"HEAD is not a descendant of origin/{base_branch}. Please rebase your branch."
-        )
-    base_commit = repo.merge_base(f"origin/{base_branch}", "HEAD")[0]
-    diff = repo.git.diff("--name-only", base_commit, "HEAD")
+
+    base_commit = repo.commit(f"origin/{base_branch}")
+    head_commit = repo.commit("HEAD")
+
+    diff = repo.git.diff("--name-only", base_commit, head_commit)
+
     changed_files = diff.splitlines()
     return [
         f for f in changed_files if f.startswith("mteb/models/") and f.endswith(".py")

From 44ccf08114cd8c2eb7fdbf5d181b08e5d0157d75 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Wed, 8 Jan 2025 14:07:43 +0000
Subject: [PATCH 30/33] check if file exists first

---
 tests/test_models/test_model_loading.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
index 140b8392c6..bf056b19c1 100644
--- a/tests/test_models/test_model_loading.py
+++ b/tests/test_models/test_model_loading.py
@@ -6,12 +6,14 @@
 import pytest
 
 
-def get_model_names():
+def get_model_names() -> list[str]:
+    model_names = []
     model_names_file_path = (
         Path(__file__).parent.parent.parent / "scripts" / "model_names.txt"
     )
-    with model_names_file_path.open("r") as f:
-        model_names = f.read().strip()
+    if model_names_file_path.exists():
+        with model_names_file_path.open("r") as f:
+            model_names = f.read().strip()
     return model_names
 
 

From d479c5f09fdae8c0e959b6de5c62cff0b1e0a035 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Wed, 8 Jan 2025 17:28:48 +0200
Subject: [PATCH 31/33] Apply suggestions from code review

Co-authored-by: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>
---
 Makefile                       | 3 +--
 mteb/models/bge_models.py      | 1 -
 scripts/extract_model_names.py | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index bc2aecaa9d..fe2f8c558f 100644
--- a/Makefile
+++ b/Makefile
@@ -40,7 +40,6 @@ build-docs:
 
 model-load-test:
 	@echo "--- 🚀 Running model load test ---"
-	make install-for-tests
-	pip install ".[pylate,gritlm,xformers,model2vec]"
+	pip install ".[dev, speedtask, pylate,gritlm,xformers,model2vec]"
 	python scripts/extract_model_names.py
 	python tests/test_models/test_model_loading.py
\ No newline at end of file
diff --git a/mteb/models/bge_models.py b/mteb/models/bge_models.py
index c14344aca5..cc183374c6 100644
--- a/mteb/models/bge_models.py
+++ b/mteb/models/bge_models.py
@@ -4,7 +4,6 @@
 
 from mteb.model_meta import ModelMeta, sentence_transformers_loader
 
-# testing changes to file.
 model_prompts = {"query": "Represent this sentence for searching relevant passages: "}
 
 bge_small_en_v1_5 = ModelMeta(
diff --git a/scripts/extract_model_names.py b/scripts/extract_model_names.py
index bc2c68077e..dbe99a990e 100644
--- a/scripts/extract_model_names.py
+++ b/scripts/extract_model_names.py
@@ -52,7 +52,7 @@ def extract_model_names(files: list[str]) -> list[str]:
 
 if __name__ == "__main__":
     """
-    Can pass in base brnach as an argument. Defaults to 'main'.
+    Can pass in base branch as an argument. Defaults to 'main'.
     e.g. python extract_model_names.py mieb
     """
     base_branch = sys.argv[1] if len(sys.argv) > 1 else "main"

From fb26eab27f4022b6350e9ffeaf146e87cc222ca1 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Wed, 8 Jan 2025 17:39:51 +0200
Subject: [PATCH 32/33] Update .github/workflows/model_loading.yml

Co-authored-by: Kenneth Enevoldsen <kennethcenevoldsen@gmail.com>
---
 .github/workflows/model_loading.yml | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/.github/workflows/model_loading.yml b/.github/workflows/model_loading.yml
index 0691a020b6..8707a9c1d6 100644
--- a/.github/workflows/model_loading.yml
+++ b/.github/workflows/model_loading.yml
@@ -21,11 +21,4 @@ jobs:
 
     - name: Install dependencies and run tests
       run: |
-        make install-for-tests
-        pip install ".[pylate,gritlm,xformers,model2vec]"
-    - name: Extract model names
-      run: |
-        python scripts/extract_model_names.py
-    - name: Run tests
-      run: |
-        python tests/test_models/test_model_loading.py
+        make model-load-test

From a9ffc880be97913459a6d38a0961f4cdc28667d5 Mon Sep 17 00:00:00 2001
From: Isaac Chung <chungisaac1217@gmail.com>
Date: Thu, 9 Jan 2025 15:23:58 +0000
Subject: [PATCH 33/33] address review comments to run test once from CI and
 not pytest

---
 Makefile                                      |  2 +-
 .../test_models}/model_loading.py             | 23 +++++++++----
 tests/test_models/test_model_loading.py       | 33 -------------------
 3 files changed, 18 insertions(+), 40 deletions(-)
 rename {scripts => tests/test_models}/model_loading.py (84%)
 delete mode 100644 tests/test_models/test_model_loading.py

diff --git a/Makefile b/Makefile
index fe2f8c558f..6e8647a2ce 100644
--- a/Makefile
+++ b/Makefile
@@ -42,4 +42,4 @@ model-load-test:
 	@echo "--- 🚀 Running model load test ---"
 	pip install ".[dev, speedtask, pylate,gritlm,xformers,model2vec]"
 	python scripts/extract_model_names.py
-	python tests/test_models/test_model_loading.py
\ No newline at end of file
+	python tests/test_models/model_loading.py --model_name_file scripts/model_names.txt
\ No newline at end of file
diff --git a/scripts/model_loading.py b/tests/test_models/model_loading.py
similarity index 84%
rename from scripts/model_loading.py
rename to tests/test_models/model_loading.py
index 8be21a2a32..3f22db733f 100644
--- a/scripts/model_loading.py
+++ b/tests/test_models/model_loading.py
@@ -74,17 +74,18 @@ def parse_args():
         default=None,
         help="Run the script for specific model names, e.g. model_1, model_2",
     )
+    parser.add_argument(
+        "--model_name_file",
+        type=str,
+        default=None,
+        help="Filename containing space-separated model names to test.",
+    )
 
     return parser.parse_args()
 
 
 if __name__ == "__main__":
-    output_file = (
-        Path(__file__).parent.parent
-        / "tests"
-        / "test_models"
-        / "model_load_failures.json"
-    )
+    output_file = Path(__file__).parent / "model_load_failures.json"
 
     args = parse_args()
 
@@ -96,6 +97,16 @@ def parse_args():
 
     if args.model_name:
         all_model_names = args.model_name
+    elif args.model_name_file:
+        all_model_names = []
+        if Path(args.model_name_file).exists():
+            with open(args.model_name_file) as f:
+                all_model_names = f.read().strip().split()
+        else:
+            logging.warning(
+                f"Model name file {args.model_name_file} does not exist. Exiting."
+            )
+            exit(1)
     else:
         omit_keys = []
         if args.run_missing:
diff --git a/tests/test_models/test_model_loading.py b/tests/test_models/test_model_loading.py
deleted file mode 100644
index bf056b19c1..0000000000
--- a/tests/test_models/test_model_loading.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from __future__ import annotations
-
-import subprocess
-from pathlib import Path
-
-import pytest
-
-
-def get_model_names() -> list[str]:
-    model_names = []
-    model_names_file_path = (
-        Path(__file__).parent.parent.parent / "scripts" / "model_names.txt"
-    )
-    if model_names_file_path.exists():
-        with model_names_file_path.open("r") as f:
-            model_names = f.read().strip()
-    return model_names
-
-
-model_names = get_model_names()
-
-
-@pytest.mark.skipif(not model_names, reason="No updates to models.")
-def test_model_loading():
-    model_loading_file_path = (
-        Path(__file__).parent.parent.parent / "scripts" / "model_loading.py"
-    )
-    result = subprocess.run(
-        ["python", model_loading_file_path, "--model_name", model_names],
-        capture_output=True,
-        text=True,
-    )
-    assert result.returncode == 0, result.stderr