diff --git a/docs/mmteb/validate_points.py b/docs/mmteb/validate_points.py index 13bee8c047..2ee0720e5e 100644 --- a/docs/mmteb/validate_points.py +++ b/docs/mmteb/validate_points.py @@ -4,6 +4,8 @@ from jsonlines import Reader from pydantic import BaseModel, ConfigDict, Field, ValidationError, conint, constr +logger = logging.getLogger(__name__) + commit_exceptions = { "scores_from_old_system", # <100 points: from before max points were enforced @@ -54,7 +56,7 @@ def validate_jsonl_files(folder_path): try: # Validate JSON object against schema x = JsonObject(**line) - logging.debug(x) + logger.debug(x) check_max_points(x, commit_n) except ValidationError as e: diff --git a/mteb/_evaluators/retrieval_metrics.py b/mteb/_evaluators/retrieval_metrics.py index 2e8ac2110b..1d1f2b51bb 100644 --- a/mteb/_evaluators/retrieval_metrics.py +++ b/mteb/_evaluators/retrieval_metrics.py @@ -140,7 +140,7 @@ def calculate_pmrr(original_run, new_run, changed_qrels): changes = [] for qid in changed_qrels.keys(): if qid + "-og" not in original_run or qid + "-changed" not in new_run: - logging.warning(f"Query {qid} not found in the runs for calculating p-MRR") + logger.warning(f"Query {qid} not found in the runs for calculating p-MRR") continue original_qid_run = original_run[qid + "-og"] new_qid_run = new_run[qid + "-changed"] diff --git a/mteb/abstasks/retrieval_dataset_loaders.py b/mteb/abstasks/retrieval_dataset_loaders.py index 4ecb4be359..6c25dae129 100644 --- a/mteb/abstasks/retrieval_dataset_loaders.py +++ b/mteb/abstasks/retrieval_dataset_loaders.py @@ -136,7 +136,7 @@ def _load_corpus(self) -> CorpusDatasetType: "_id", "id" ) logger.info("Loaded %d %s Documents.", len(corpus_ds), self.split.upper()) - logger.info("Doc Example: %s", corpus_ds[0]) + logger.debug("Doc Example: %s", corpus_ds[0]) return corpus_ds def _load_queries(self) -> QueryDatasetType: @@ -152,7 +152,7 @@ def _load_queries(self) -> QueryDatasetType: ) logger.info("Loaded %d %s queries.", len(queries_ds), self.split.upper()) - logger.info("Query Example: %s", queries_ds[0]) + logger.debug("Query Example: %s", queries_ds[0]) return queries_ds diff --git a/mteb/abstasks/text/reranking.py b/mteb/abstasks/text/reranking.py index f142b8a63e..425af197e6 100644 --- a/mteb/abstasks/text/reranking.py +++ b/mteb/abstasks/text/reranking.py @@ -100,7 +100,7 @@ def transform_old_dataset_format(self, given_dataset: Dataset | None = None): if self.metadata.name not in OLD_FORMAT_RERANKING_TASKS: return - logging.info( + logger.info( f"Transforming old format to standard format for {self.metadata.name}" ) diff --git a/mteb/models/model_implementations/voyage_v.py b/mteb/models/model_implementations/voyage_v.py index b84b3040f2..21037dd428 100644 --- a/mteb/models/model_implementations/voyage_v.py +++ b/mteb/models/model_implementations/voyage_v.py @@ -16,6 +16,8 @@ if TYPE_CHECKING: from PIL import Image +logger = logging.getLogger(__name__) + def _downsample_image( image: Image.Image, max_pixels: int = 16000000, target_longest_side: int = 4000 @@ -37,17 +39,17 @@ def _downsample_image( new_width = int(width * (target_longest_side / height)) new_size = (new_width, new_height) - logging.info( + logger.info( f"Downsampling image from {width}x{height} to {new_width}x{new_height}" ) return image.resize(new_size, Image.LANCZOS) if width > height: if width > 10000: - logging.error("Processing extremely wide images.") + logger.error("Processing extremely wide images.") return image.resize((10000, height), Image.LANCZOS) else: if height > 10000: - logging.error("Processing extremely high images.") + logger.error("Processing extremely high images.") return image.resize((width, 10000), Image.LANCZOS) return image diff --git a/pyproject.toml b/pyproject.toml index 6fe28affd7..0019df0008 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -233,6 +233,7 @@ select = [ "TID", # tidy-imports "D", # pydocstyle "PGH", # pygrep-hooks Use specific rule codes when ignoring type issues + "LOG", # logging ] ignore = [ diff --git a/tests/test_models/model_loading.py b/tests/test_models/model_loading.py index 410c4733c1..ce0fff0c74 100644 --- a/tests/test_models/model_loading.py +++ b/tests/test_models/model_loading.py @@ -10,6 +10,8 @@ logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def teardown_function(revision: str): """Teardown function to delete the model revision from the cache.""" @@ -32,7 +34,7 @@ def get_model_below_n_param_threshold(model_name: str, threshold: float = 2e9) - del m return "None" except Exception as e: - logging.warning(f"Failed to load model {model_name} with error {e}") + logger.warning(f"Failed to load model {model_name} with error {e}") return e.__str__() try: m = get_model(model_name) @@ -42,7 +44,7 @@ def get_model_below_n_param_threshold(model_name: str, threshold: float = 2e9) - del m return "None" except Exception as e: - logging.warning(f"Failed to load model {model_name} with error {e}") + logger.warning(f"Failed to load model {model_name} with error {e}") return e.__str__() @@ -97,7 +99,7 @@ def parse_args(): with model_name_file.open() as f: all_model_names = f.read().strip().split() else: - logging.warning( + logger.warning( f"Model name file {args.model_name_file} does not exist. Exiting." ) exit(1)