embeddings-benchmark · KennethEnevoldsen · Jan 14, 2026 · Jan 1, 2026
diff --git a/docs/mmteb/validate_points.py b/docs/mmteb/validate_points.py
@@ -4,6 +4,8 @@
 from jsonlines import Reader
 from pydantic import BaseModel, ConfigDict, Field, ValidationError, conint, constr
 
+logger = logging.getLogger(__name__)
+
 commit_exceptions = {
     "scores_from_old_system",
     # <100 points: from before max points were enforced
@@ -54,7 +56,7 @@ def validate_jsonl_files(folder_path):
                 try:
                     # Validate JSON object against schema
                     x = JsonObject(**line)
-                    logging.debug(x)
+                    logger.debug(x)
                     check_max_points(x, commit_n)
 
                 except ValidationError as e:

diff --git a/mteb/_evaluators/retrieval_metrics.py b/mteb/_evaluators/retrieval_metrics.py
@@ -140,7 +140,7 @@ def calculate_pmrr(original_run, new_run, changed_qrels):
     changes = []
     for qid in changed_qrels.keys():
         if qid + "-og" not in original_run or qid + "-changed" not in new_run:
-            logging.warning(f"Query {qid} not found in the runs for calculating p-MRR")
+            logger.warning(f"Query {qid} not found in the runs for calculating p-MRR")
             continue
         original_qid_run = original_run[qid + "-og"]
         new_qid_run = new_run[qid + "-changed"]

diff --git a/mteb/abstasks/retrieval_dataset_loaders.py b/mteb/abstasks/retrieval_dataset_loaders.py
@@ -136,7 +136,7 @@ def _load_corpus(self) -> CorpusDatasetType:
                 "_id", "id"
             )
         logger.info("Loaded %d %s Documents.", len(corpus_ds), self.split.upper())
-        logger.info("Doc Example: %s", corpus_ds[0])
+        logger.debug("Doc Example: %s", corpus_ds[0])
         return corpus_ds
 
     def _load_queries(self) -> QueryDatasetType:
@@ -152,7 +152,7 @@ def _load_queries(self) -> QueryDatasetType:
             )
 
         logger.info("Loaded %d %s queries.", len(queries_ds), self.split.upper())
-        logger.info("Query Example: %s", queries_ds[0])
+        logger.debug("Query Example: %s", queries_ds[0])
 
         return queries_ds
 

diff --git a/mteb/abstasks/text/reranking.py b/mteb/abstasks/text/reranking.py
@@ -100,7 +100,7 @@ def transform_old_dataset_format(self, given_dataset: Dataset | None = None):
         if self.metadata.name not in OLD_FORMAT_RERANKING_TASKS:
             return
 
-        logging.info(
+        logger.info(
             f"Transforming old format to standard format for {self.metadata.name}"
         )
 

diff --git a/mteb/models/model_implementations/voyage_v.py b/mteb/models/model_implementations/voyage_v.py
@@ -16,6 +16,8 @@
 if TYPE_CHECKING:
     from PIL import Image
 
+logger = logging.getLogger(__name__)
+
 
 def _downsample_image(
     image: Image.Image, max_pixels: int = 16000000, target_longest_side: int = 4000
@@ -37,17 +39,17 @@ def _downsample_image(
             new_width = int(width * (target_longest_side / height))
 
         new_size = (new_width, new_height)
-        logging.info(
+        logger.info(
             f"Downsampling image from {width}x{height} to {new_width}x{new_height}"
         )
         return image.resize(new_size, Image.LANCZOS)
     if width > height:
         if width > 10000:
-            logging.error("Processing extremely wide images.")
+            logger.error("Processing extremely wide images.")
             return image.resize((10000, height), Image.LANCZOS)
     else:
         if height > 10000:
-            logging.error("Processing extremely high images.")
+            logger.error("Processing extremely high images.")
             return image.resize((width, 10000), Image.LANCZOS)
     return image
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -233,6 +233,7 @@ select = [
     "TID",     # tidy-imports
     "D",       # pydocstyle
     "PGH",  # pygrep-hooks Use specific rule codes when ignoring type issues
+    "LOG",  # logging
 ]
 
 ignore = [

diff --git a/tests/test_models/model_loading.py b/tests/test_models/model_loading.py
@@ -10,6 +10,8 @@
 
 logging.basicConfig(level=logging.INFO)
 
+logger = logging.getLogger(__name__)
+
 
 def teardown_function(revision: str):
     """Teardown function to delete the model revision from the cache."""
@@ -32,7 +34,7 @@ def get_model_below_n_param_threshold(model_name: str, threshold: float = 2e9) -
                     del m
                     return "None"
             except Exception as e:
-                logging.warning(f"Failed to load model {model_name} with error {e}")
+                logger.warning(f"Failed to load model {model_name} with error {e}")
                 return e.__str__()
         try:
             m = get_model(model_name)
@@ -42,7 +44,7 @@ def get_model_below_n_param_threshold(model_name: str, threshold: float = 2e9) -
                 del m
                 return "None"
         except Exception as e:
-            logging.warning(f"Failed to load model {model_name} with error {e}")
+            logger.warning(f"Failed to load model {model_name} with error {e}")
             return e.__str__()
 
 
@@ -97,7 +99,7 @@ def parse_args():
             with model_name_file.open() as f:
                 all_model_names = f.read().strip().split()
         else:
-            logging.warning(
+            logger.warning(
                 f"Model name file {args.model_name_file} does not exist. Exiting."
             )
             exit(1)