Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/mmteb/validate_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from jsonlines import Reader
from pydantic import BaseModel, ConfigDict, Field, ValidationError, conint, constr

logger = logging.getLogger(__name__)

commit_exceptions = {
"scores_from_old_system",
# <100 points: from before max points were enforced
Expand Down Expand Up @@ -54,7 +56,7 @@ def validate_jsonl_files(folder_path):
try:
# Validate JSON object against schema
x = JsonObject(**line)
logging.debug(x)
logger.debug(x)
check_max_points(x, commit_n)

except ValidationError as e:
Expand Down
2 changes: 1 addition & 1 deletion mteb/_evaluators/retrieval_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def calculate_pmrr(original_run, new_run, changed_qrels):
changes = []
for qid in changed_qrels.keys():
if qid + "-og" not in original_run or qid + "-changed" not in new_run:
logging.warning(f"Query {qid} not found in the runs for calculating p-MRR")
logger.warning(f"Query {qid} not found in the runs for calculating p-MRR")
continue
original_qid_run = original_run[qid + "-og"]
new_qid_run = new_run[qid + "-changed"]
Expand Down
4 changes: 2 additions & 2 deletions mteb/abstasks/retrieval_dataset_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def _load_corpus(self) -> CorpusDatasetType:
"_id", "id"
)
logger.info("Loaded %d %s Documents.", len(corpus_ds), self.split.upper())
logger.info("Doc Example: %s", corpus_ds[0])
logger.debug("Doc Example: %s", corpus_ds[0])
return corpus_ds

def _load_queries(self) -> QueryDatasetType:
Expand All @@ -152,7 +152,7 @@ def _load_queries(self) -> QueryDatasetType:
)

logger.info("Loaded %d %s queries.", len(queries_ds), self.split.upper())
logger.info("Query Example: %s", queries_ds[0])
logger.debug("Query Example: %s", queries_ds[0])

return queries_ds

Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/text/reranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def transform_old_dataset_format(self, given_dataset: Dataset | None = None):
if self.metadata.name not in OLD_FORMAT_RERANKING_TASKS:
return

logging.info(
logger.info(
f"Transforming old format to standard format for {self.metadata.name}"
)

Expand Down
8 changes: 5 additions & 3 deletions mteb/models/model_implementations/voyage_v.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
if TYPE_CHECKING:
from PIL import Image

logger = logging.getLogger(__name__)


def _downsample_image(
image: Image.Image, max_pixels: int = 16000000, target_longest_side: int = 4000
Expand All @@ -37,17 +39,17 @@ def _downsample_image(
new_width = int(width * (target_longest_side / height))

new_size = (new_width, new_height)
logging.info(
logger.info(
f"Downsampling image from {width}x{height} to {new_width}x{new_height}"
)
return image.resize(new_size, Image.LANCZOS)
if width > height:
if width > 10000:
logging.error("Processing extremely wide images.")
logger.error("Processing extremely wide images.")
return image.resize((10000, height), Image.LANCZOS)
else:
if height > 10000:
logging.error("Processing extremely high images.")
logger.error("Processing extremely high images.")
return image.resize((width, 10000), Image.LANCZOS)
return image

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ select = [
"TID", # tidy-imports
"D", # pydocstyle
"PGH", # pygrep-hooks Use specific rule codes when ignoring type issues
"LOG", # logging
]

ignore = [
Expand Down
8 changes: 5 additions & 3 deletions tests/test_models/model_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__name__)


def teardown_function(revision: str):
"""Teardown function to delete the model revision from the cache."""
Expand All @@ -32,7 +34,7 @@ def get_model_below_n_param_threshold(model_name: str, threshold: float = 2e9) -
del m
return "None"
except Exception as e:
logging.warning(f"Failed to load model {model_name} with error {e}")
logger.warning(f"Failed to load model {model_name} with error {e}")
return e.__str__()
try:
m = get_model(model_name)
Expand All @@ -42,7 +44,7 @@ def get_model_below_n_param_threshold(model_name: str, threshold: float = 2e9) -
del m
return "None"
except Exception as e:
logging.warning(f"Failed to load model {model_name} with error {e}")
logger.warning(f"Failed to load model {model_name} with error {e}")
return e.__str__()


Expand Down Expand Up @@ -97,7 +99,7 @@ def parse_args():
with model_name_file.open() as f:
all_model_names = f.read().strip().split()
else:
logging.warning(
logger.warning(
f"Model name file {args.model_name_file} does not exist. Exiting."
)
exit(1)
Expand Down