Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions mteb/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ def _evaluate_task(

task.check_if_dataset_is_superseded()

data_loaded = task.data_loaded
if not data_loaded:
data_preloaded = task.data_loaded
if not data_preloaded:
try:
task.load_data()
except DatasetNotFoundError as e:
Expand Down Expand Up @@ -176,7 +176,7 @@ def _evaluate_task(
kg_co2_emissions=None,
)

if data_loaded: # only unload if we loaded the data
if not data_preloaded: # only unload if we loaded the data
task.unload_data()

return result
Expand Down
25 changes: 25 additions & 0 deletions tests/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,3 +271,28 @@ def load_error():
results = mteb.evaluate(model, [error_task, task], cache=None, raise_error=False)
assert len(results.task_results) == 1
assert len(results.exceptions) == 1


def test_evaluate_unloads_data_when_not_preloaded():
"""Test that evaluate() unloads data when it was not preloaded."""
model = MockSentenceTransformer()
task = MockClassificationTask()

assert task.data_loaded is False
mteb.evaluate(model, task, cache=None, co2_tracker=False)
assert task.data_loaded is False, "evaluate() should unload data it loaded"


def test_evaluate_preserves_preloaded_data_across_multiple_calls():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you create a test that would cause TypeError: 'NoneType' object is not subscriptable on main? I tried this and similar tests multiple times and can't get this error

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed the test - it fails on main with exactly the expected error:

  _ = task.dataset["test"]  # Verify dataset wasn't unloaded                                                  

E TypeError: 'NoneType' object is not subscriptable

"""Test that preloaded data persists across multiple evaluate() calls."""
model = MockSentenceTransformer()
task = MockClassificationTask()

task.load_data()
assert task.data_loaded is True

mteb.evaluate(model, task, cache=None, co2_tracker=False)
_ = task.dataset["test"] # Verify dataset wasn't unloaded

mteb.evaluate(model, task, cache=None, co2_tracker=False)
_ = task.dataset["test"] # Verify dataset persists across multiple calls