Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/dataset_loading.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v3

- name: Cache Hugging Face
id: cache-hf
uses: actions/cache@v4
with:
key: cache-dataset-loading
path: ${{ github.workspace }}/.cache/dataset_check_cache.json

- name: Set up Python
uses: actions/setup-python@v4
with:
Expand Down
9 changes: 4 additions & 5 deletions mteb/models/ops_moa_models.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from __future__ import annotations

from mteb.model_meta import ModelMeta
from mteb.models.wrapper import Wrapper
from functools import partial

from sentence_transformers import SentenceTransformer
import torch
import torch.nn as nn
from huggingface_hub import snapshot_download

from mteb.model_meta import ModelMeta
from mteb.models.wrapper import Wrapper


class CustomWrapper(Wrapper):
Expand Down
20 changes: 20 additions & 0 deletions tests/test_tasks/test_all_abstasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

import json
import logging
from datetime import datetime
from pathlib import Path
from unittest.mock import Mock, patch

import huggingface_hub
Expand Down Expand Up @@ -75,9 +78,26 @@ def test_load_data(
)
@pytest.mark.parametrize("dataset_revision", dataset_revisions)
def test_dataset_on_hf(dataset_revision: tuple[str, str]):
CACHE_FILE = Path("./.cache/dataset_check_cache.json")
repo_id, revision = dataset_revision
today = datetime.now().strftime("%Y-%m-%d")
repo_key = repo_id + "-" + revision

if CACHE_FILE.exists():
with CACHE_FILE.open("r") as f:
cache = json.load(f)
else:
cache = {}

if cache.get(repo_key) == {"repo_id": repo_id, "revision": revision, "date": today}:
pytest.skip(f"Dataset {repo_id} - {revision} already checked today")

try:
huggingface_hub.dataset_info(repo_id, revision=revision)

cache[repo_key] = {"repo_id": repo_id, "revision": revision, "date": today}
with CACHE_FILE.open("w") as f:
json.dump(cache, f)
except (
huggingface_hub.errors.RepositoryNotFoundError,
huggingface_hub.errors.RevisionNotFoundError,
Expand Down
Loading