embeddings-benchmark · Samoed · Oct 22, 2025 · Jul 11, 2025 · Jul 15, 2025 · Jul 15, 2025
diff --git a/.github/ISSUE_TEMPLATE/enhancement.yaml b/.github/ISSUE_TEMPLATE/enhancement.yaml
@@ -8,4 +8,3 @@ body:
       description: Please provide a clear and concise description of the feature you would like to see added.
     validations:
       required: true
-
diff --git a/.github/ISSUE_TEMPLATE/eval_request.yaml b/.github/ISSUE_TEMPLATE/eval_request.yaml
@@ -0,0 +1,33 @@
+name: 📊 Evaluation Request
+description: Create a request for a model to be evaluated in MTEB
+title: "Evaluate model: {model_id}"
+labels: ["evaluation request"]
+body:
+  - type: input
+    attributes:
+      label: Model link on Hugging Face
+      description: Please provide a link to the model on Hugging Face. If the model is closed-source, please provide a link to the model provider or documentation.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: What do you want it to be evaluated on?
+      description: Please specify the tasks or benchmarks you would like this model to be evaluated on.
+    validations:
+      required: True
+  - type: dropdown
+    id: contribute
+    attributes:
+      label: Are you interested in contributing to the evaluation of this model?
+      description: By default MTEB maintainters will only handle evaluation on private subsets due to resource constraints. If you are interested in contributing to the evaluation, please let us know. 
+      options:
+        - "Yes"
+        - "No"
+  - type: dropdown
+    id: exists
+    attributes:
+      label: Does this model already exist in MTEB?
+      description: If you are unsure, please check using mteb model registry (e.g. using `mteb.get_model_meta("model_id")`).
+      options:
+        - "Yes"
+        - "No"
diff --git a/.github/workflows/dataset_loading_pr.yml b/.github/workflows/dataset_loading_pr.yml
@@ -0,0 +1,28 @@
+name: Datasets available on HuggingFace - PR
+
+on:
+  pull_request:
+    paths:
+      - "mteb/tasks/**.py"
+
+jobs:
+  run-pr-datasets-loading-check:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.11'
+        cache: 'pip'
+
+    - name: Install dependencies
+      run: |
+          make install-for-tests
+
+    - name: Run dataset loading tests
+      run: |
+        make dataset-load-test-pr BASE_BRANCH=${{ github.event.pull_request.base.ref }}
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -1,6 +1,5 @@
 # GitHub action for the task table generation.
-
-name: documentation
+name: tables
 
 on:
   push:
@@ -24,11 +23,12 @@ jobs:
 
       - name: Install dependencies
         run: |
-          make install
+          python -m pip install --upgrade pip
+          pip install -e . --group docs
 
       - name: Create table
         run: |
-          make build-docs
+          make build-tables
 
   create-table-and-push:
     if: github.ref == 'refs/heads/main'
@@ -43,11 +43,12 @@ jobs:
 
       - name: Install dependencies
         run: |
-          make install
+          python -m pip install --upgrade pip
+          pip install -e . --group docs
 
       - name: Create table
         run: |
-          make build-docs
+          make build-tables
 
       - name: Push table
         env:

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -0,0 +1,32 @@
+name: Documentation
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+
+permissions:
+  contents: write
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e . --group docs
+
+      - name: Build and Deploy
+        if: github.event_name == 'push'
+        run: mkdocs gh-deploy --force
+
+      - name: Build
+        if: github.event_name == 'pull_request'
+        run: make build-docs
diff --git a/.github/workflows/leaderboard_build.yml b/.github/workflows/leaderboard_build.yml
@@ -4,7 +4,6 @@ on:
   push:
     branches: [main]
   pull_request:
-    branches: [main]
 
 jobs:
   leaderboard:
@@ -26,4 +25,4 @@ jobs:
 
       - name: Run leaderboard build test
         run: |
-          make leaderboard-build-test
+          make leaderboard-build-test
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -15,11 +15,11 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest] #, macos-latest, windows-latest]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
         include:
-          # Add Windows with Python 3.8 only to avoid tests taking too long
+          # Add Windows with Python 3.10 only to avoid tests taking too long
           - os: windows-latest
-            python-version: "3.9"
+            python-version: "3.10"
 
     steps:
       - uses: actions/checkout@v3

diff --git a/.gitignore b/.gitignore
@@ -143,7 +143,6 @@ sb.ipynb
 tests/create_meta/model_card.md
 
 # removed results from mteb repo they are now available at: https://github.com/embeddings-benchmark/results
-results/
 uv.lock
 
 # model loading tests
@@ -152,3 +151,10 @@ mteb/leaderboard/__cached_results.json
 
 # gradio
 .gradio/
+
+# codecarbon
+powermetrics_log.txt
+
+# vscode
+.vscode/launch.json
+
diff --git a/Makefile b/Makefile
@@ -6,7 +6,7 @@ install:
 install-for-tests:
 	@echo "--- 🚀 Installing project dependencies for test ---"
 	@echo "This ensures that the project is not installed in editable mode"
-	pip install ".[image]" --group dev
+	pip install ".[bm25s,pylate,image,audio,codecarbon,faiss-cpu]" --group dev
 
 lint:
 	@echo "--- 🧹 Running linters ---"
@@ -34,12 +34,22 @@ pr:
 	make test
 
 
-build-docs:
-	@echo "--- 📚 Building documentation ---"
-	# since we do not have a documentation site, this just build tables for the .md files
+build-tables:
+	@echo "--- 📚 Building tables ---"
+	# This just build tables for the .md files
 	python docs/create_tasks_table.py
 	python docs/create_benchmarks_table.py
 
+build-docs:
+	@echo "--- 📚 Building documentation ---"
+	python docs/overview/create_available_tasks.py
+	python docs/overview/create_available_models.py
+	python docs/overview/create_available_benchmarks.py
+
+serve-docs:
+	@echo "--- 📚 Serving documentation ---"
+	python -m mkdocs serve
+
 
 model-load-test:
 	@echo "--- 🚀 Running model load test ---"
@@ -52,6 +62,10 @@ dataset-load-test:
 	@echo "--- 🚀 Running dataset load test ---"
 	pytest -m test_datasets
 
+dataset-load-test-pr:
+	@echo "--- 🚀 Running dataset load test for PR ---"
+	eval "$$(python -m scripts.extract_datasets $(BASE_BRANCH))" && pytest -m test_datasets
+
 leaderboard-build-test:
 	@echo "--- 🚀 Running leaderboard build test ---"
 	pytest -n auto -m leaderboard_stability
@@ -70,3 +84,8 @@ format-citations:
 check: ## Run code quality tools.
 	@echo "--- 🧹 Running code quality tools ---"
 	@pre-commit run -a
+
+.PHONY: typecheck
+typecheck:
+	@echo "--- 🔍 Running type checks ---"
+	mypy mteb
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,4 +8,3 @@ body:
		description: Please provide a clear and concise description of the feature you would like to see added.
		validations:
		required: true