Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions docs/usage/loading_results.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@ For instance, if you are selecting the best model for semantic text similarity (

```python
import mteb
from mteb.cache import ResultCache

tasks = mteb.get_tasks(tasks=["STS12"])
model_names = ["intfloat/multilingual-e5-large"]

cache = ResultCache("~/.cache/mteb")
cache = mteb.ResultCache("~/.cache/mteb")
results = cache.load_results(models=model_names, tasks=tasks)
```

Expand All @@ -36,16 +35,16 @@ All previously submitted results are available results [repository](https://gith
You can download this using:

```python
from mteb.cache import ResultCache
import mteb

cache = ResultCache()
cache = mteb.ResultCache()
cache.download_from_remote() # download results from the remote repository
```

From here, you can work with the cache as usual. For instance, if you are selecting the best model for your French and English retrieval task on legal documents, you could fetch the relevant tasks and create a dataframe of the results using the following code:

```python
from mteb.cache import ResultCache
import mteb

# select your tasks
tasks = mteb.get_tasks(task_types=["Retrieval"], languages=["eng", "fra"], domains=["Legal"])
Expand All @@ -56,7 +55,7 @@ model_names = [
]


cache = ResultCache()
cache = mteb.ResultCache()
cache.download_from_remote() # download results from the remote repository. Might take a while the first time.

results = cache.load_results(
Expand Down Expand Up @@ -88,11 +87,10 @@ If you loaded results for a specific benchmark, you can get the aggregated bench

```python
import mteb
from mteb.cache import ResultCache

# Load results for a specific benchmark
benchmark = mteb.get_benchmark("MTEB(eng, v2)")
cache = ResultCache()
cache = mteb.ResultCache()
cache.download_from_remote() # download results from the remote repository
results = cache.load_results(
models=["intfloat/e5-small", "intfloat/multilingual-e5-small"],
Expand Down
8 changes: 4 additions & 4 deletions docs/whats_new.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ results = mteb.evaluate(model, tasks)
### Better local and online caching
The new [`mteb.ResultCache`][mteb.cache.ResultCache] makes managing the cache notably easier:
```py
from mteb.cache import ResultCache
import mteb

model = ...
tasks = ...

cache = ResultCache(cache_path="~/.cache/mteb") # default
cache = mteb.ResultCache(cache_path="~/.cache/mteb") # default

# simple evaluate with cache
results = mteb.evaluate(model, tasks, cache=cache) # only runs if results not in cache
Expand Down Expand Up @@ -169,9 +169,9 @@ We've added a lot of new documentation to make it easier to get started with MTE
The new `ResultCache` also makes it easier to load, inspect and compare both local and online results:

```py
from mteb.cache import ResultCache
import mteb

cache = ResultCache(cache_path="~/.cache/mteb") # default
cache = mteb.ResultCache(cache_path="~/.cache/mteb") # default
cache.download_from_remote() # download the latest results from the remote repository

# load both local and online results
Expand Down
2 changes: 2 additions & 0 deletions mteb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from mteb import types
from mteb.abstasks import AbsTask
from mteb.abstasks.task_metadata import TaskMetadata
from mteb.cache import ResultCache
from mteb.deprecated_evaluator import MTEB
from mteb.evaluate import evaluate
from mteb.filter_tasks import filter_tasks
Expand Down Expand Up @@ -33,6 +34,7 @@
"CrossEncoderProtocol",
"EncoderProtocol",
"IndexEncoderSearchProtocol",
"ResultCache",
"SearchProtocol",
"SentenceTransformerEncoderWrapper",
"TaskMetadata",
Expand Down
16 changes: 8 additions & 8 deletions mteb/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ class ResultCache:
"""Class to handle the local cache of MTEB results.

Examples:
>>> from mteb.cache import ResultCache
>>> cache = ResultCache(cache_path="~/.cache/mteb") # default
>>> import mteb
>>> cache = mteb.ResultCache(cache_path="~/.cache/mteb") # default
>>> cache.download_from_remote() # download the latest results from the remote repository
>>> result = cache.load_results("task_name", "model_name")
"""
Expand Down Expand Up @@ -320,8 +320,8 @@ def _download_cached_results_from_branch(
OSError: On other file system errors

Examples:
>>> from mteb.cache import ResultCache
>>> cache = ResultCache()
>>> import mteb
>>> cache = mteb.ResultCache()
>>> # Download optimized cached results
>>> cache_file = cache._download_cached_results_from_branch()
>>> # Use custom output path
Expand Down Expand Up @@ -460,8 +460,8 @@ def get_cache_paths(
A list of paths in the cache directory.

Examples:
>>> from mteb.cache import ResultCache
>>> cache = ResultCache()
>>> import mteb
>>> cache = mteb.ResultCache()
>>>
>>> # Get all cache paths
>>> paths = cache.get_cache_paths()
Expand Down Expand Up @@ -642,8 +642,8 @@ def load_results(
A BenchmarkResults object containing the results for the specified models and tasks.

Examples:
>>> from mteb.cache import ResultCache
>>> cache = ResultCache()
>>> import mteb
>>> cache = mteb.ResultCache()
>>>
>>> # Load results for specific models and tasks
>>> results = cache.load_results(
Expand Down