diff --git a/mteb/abstasks/clustering.py b/mteb/abstasks/clustering.py index e21ef296fc..ea86864d7a 100644 --- a/mteb/abstasks/clustering.py +++ b/mteb/abstasks/clustering.py @@ -1,5 +1,6 @@ import itertools import logging +import os import random from collections import defaultdict from pathlib import Path @@ -31,6 +32,7 @@ MultilingualDataset = dict[HFSubset, DatasetDict] +OMP_NUM_THREADS = 4 def _evaluate_clustering_bootstrapped( @@ -53,6 +55,17 @@ def _evaluate_clustering_bootstrapped( - A dictionary where keys are level names (e.g., "Level 0", "Level 1", etc.) and values are lists of V-measure scores for each clustering experiment at that level. - A dictionary where keys are level names and values are lists of cluster assignments for each clustering experiment at that level. """ + # set OMP_NUM_THREADS for reproductibility + if "OMP_NUM_THREADS" not in os.environ: + logger.info( + f"Setting OMP_NUM_THREADS to {OMP_NUM_THREADS} for clustering to ensure reproducibility." + ) + os.environ["OMP_NUM_THREADS"] = str(OMP_NUM_THREADS) + else: + logger.info( + f"Using existing OMP_NUM_THREADS={os.environ['OMP_NUM_THREADS']} for clustering, this may lead to non-reproducible results. Set it to {OMP_NUM_THREADS} to ensure reproducibility." + ) + v_measures = defaultdict(list) cluster_assignments = defaultdict(list) if max_depth is not None: