diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index 0d18e331db..46a82f190b 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -46,7 +46,7 @@ from typing import TYPE_CHECKING -__version__ = "0.25.0.dev0" +__version__ = "0.26.0.dev0" # Alphabetical order of definitions is ensured in tests # WARNING: any comment added in this dictionary definition will be lost when @@ -129,7 +129,6 @@ "file_download": [ "HfFileMetadata", "_CACHED_NO_EXIST", - "cached_download", "get_hf_file_metadata", "hf_hub_download", "hf_hub_url", @@ -645,7 +644,6 @@ def __dir__(): from .file_download import ( _CACHED_NO_EXIST, # noqa: F401 HfFileMetadata, # noqa: F401 - cached_download, # noqa: F401 get_hf_file_metadata, # noqa: F401 hf_hub_download, # noqa: F401 hf_hub_url, # noqa: F401 diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py index 2d60f9b6c1..daeb473750 100644 --- a/src/huggingface_hub/file_download.py +++ b/src/huggingface_hub/file_download.py @@ -1,9 +1,7 @@ import contextlib import copy import errno -import fnmatch import inspect -import json import os import re import shutil @@ -22,11 +20,7 @@ __version__, # noqa: F401 # for backward compatibility constants, ) -from ._local_folder import ( - get_local_download_paths, - read_download_metadata, - write_download_metadata, -) +from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata from .constants import ( HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility @@ -65,10 +59,8 @@ tqdm, validate_hf_hub_args, ) -from .utils._deprecation import _deprecate_arguments, _deprecate_method from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility from .utils._typing import HTTP_METHOD_T -from .utils.insecure_hashlib import sha256 from .utils.sha import sha_fileobj @@ -262,85 +254,6 @@ def hf_hub_url( return url -@_deprecate_method(version="0.26", message="Use `hf_hub_download` to benefit from the new cache layout.") -def url_to_filename(url: str, etag: Optional[str] = None) -> str: - """Generate a local filename from a url. - - Convert `url` into a hashed filename in a reproducible way. If `etag` is - specified, append its hash to the url's, delimited by a period. If the url - ends with .h5 (Keras HDF5 weights) adds '.h5' to the name so that TF 2.0 can - identify it as a HDF5 file (see - https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1380) - - Args: - url (`str`): - The address to the file. - etag (`str`, *optional*): - The ETag of the file. - - Returns: - The generated filename. - """ - url_bytes = url.encode("utf-8") - filename = sha256(url_bytes).hexdigest() - - if etag: - etag_bytes = etag.encode("utf-8") - filename += "." + sha256(etag_bytes).hexdigest() - - if url.endswith(".h5"): - filename += ".h5" - - return filename - - -@_deprecate_method(version="0.26", message="Use `hf_hub_url` instead.") -def filename_to_url( - filename, - cache_dir: Optional[str] = None, - legacy_cache_layout: bool = False, -) -> Tuple[str, str]: - """ - Return the url and etag (which may be `None`) stored for `filename`. Raise - `EnvironmentError` if `filename` or its stored metadata do not exist. - - Args: - filename (`str`): - The name of the file - cache_dir (`str`, *optional*): - The cache directory to use instead of the default one. - legacy_cache_layout (`bool`, *optional*, defaults to `False`): - If `True`, uses the legacy file cache layout i.e. just call `hf_hub_url` - then `cached_download`. This is deprecated as the new cache layout is - more powerful. - """ - if not legacy_cache_layout: - warnings.warn( - "`filename_to_url` uses the legacy way cache file layout", - FutureWarning, - ) - - if cache_dir is None: - cache_dir = constants.HF_HUB_CACHE - if isinstance(cache_dir, Path): - cache_dir = str(cache_dir) - - cache_path = os.path.join(cache_dir, filename) - if not os.path.exists(cache_path): - raise EnvironmentError(f"file {cache_path} not found") - - meta_path = cache_path + ".json" - if not os.path.exists(meta_path): - raise EnvironmentError(f"file {meta_path} not found") - - with open(meta_path, encoding="utf-8") as meta_file: - metadata = json.load(meta_file) - url = metadata["url"] - etag = metadata["etag"] - - return url, etag - - def _request_wrapper( method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params ) -> requests.Response: @@ -574,249 +487,6 @@ def http_get( ) -@validate_hf_hub_args -@_deprecate_method(version="0.26", message="Use `hf_hub_download` instead.") -def cached_download( - url: str, - *, - library_name: Optional[str] = None, - library_version: Optional[str] = None, - cache_dir: Union[str, Path, None] = None, - user_agent: Union[Dict, str, None] = None, - force_download: bool = False, - force_filename: Optional[str] = None, - proxies: Optional[Dict] = None, - etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, - resume_download: Optional[bool] = None, - token: Union[bool, str, None] = None, - local_files_only: bool = False, - legacy_cache_layout: bool = False, -) -> str: - """ - Download from a given URL and cache it if it's not already present in the - local cache. - - Given a URL, this function looks for the corresponding file in the local - cache. If it's not there, download it. Then return the path to the cached - file. - - Will raise errors tailored to the Hugging Face Hub. - - Args: - url (`str`): - The path to the file to be downloaded. - library_name (`str`, *optional*): - The name of the library to which the object corresponds. - library_version (`str`, *optional*): - The version of the library. - cache_dir (`str`, `Path`, *optional*): - Path to the folder where cached files are stored. - user_agent (`dict`, `str`, *optional*): - The user-agent info in the form of a dictionary or a string. - force_download (`bool`, *optional*, defaults to `False`): - Whether the file should be downloaded even if it already exists in - the local cache. - force_filename (`str`, *optional*): - Use this name instead of a generated file name. - proxies (`dict`, *optional*): - Dictionary mapping protocol to the URL of the proxy passed to - `requests.request`. - etag_timeout (`float`, *optional* defaults to `10`): - When fetching ETag, how many seconds to wait for the server to send - data before giving up which is passed to `requests.request`. - token (`bool`, `str`, *optional*): - A token to be used for the download. - - If `True`, the token is read from the HuggingFace config - folder. - - If a string, it's used as the authentication token. - local_files_only (`bool`, *optional*, defaults to `False`): - If `True`, avoid downloading the file and return the path to the - local cached file if it exists. - legacy_cache_layout (`bool`, *optional*, defaults to `False`): - Set this parameter to `True` to mention that you'd like to continue - the old cache layout. Putting this to `True` manually will not raise - any warning when using `cached_download`. We recommend using - `hf_hub_download` to take advantage of the new cache. - - Returns: - Local path (string) of file or if networking is off, last version of - file cached on disk. - - - - Raises the following errors: - - - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) - if `token=True` and the token cannot be found. - - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) - if ETag cannot be determined. - - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) - if some parameter value is invalid - - [`~utils.RepositoryNotFoundError`] - If the repository to download from cannot be found. This may be because it doesn't exist, - or because it is set to `private` and you do not have access. - - [`~utils.RevisionNotFoundError`] - If the revision to download from cannot be found. - - [`~utils.EntryNotFoundError`] - If the file to download cannot be found. - - [`~utils.LocalEntryNotFoundError`] - If network is disabled or unavailable and file is not found in cache. - - - """ - if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT: - # Respect environment variable above user value - etag_timeout = constants.HF_HUB_ETAG_TIMEOUT - - if not legacy_cache_layout: - warnings.warn( - "'cached_download' is the legacy way to download files from the HF hub, please consider upgrading to" - " 'hf_hub_download'", - FutureWarning, - ) - if resume_download is not None: - warnings.warn( - "`resume_download` is deprecated and will be removed in version 1.0.0. " - "Downloads always resume when possible. " - "If you want to force a new download, use `force_download=True`.", - FutureWarning, - ) - - if cache_dir is None: - cache_dir = constants.HF_HUB_CACHE - if isinstance(cache_dir, Path): - cache_dir = str(cache_dir) - - os.makedirs(cache_dir, exist_ok=True) - - headers = build_hf_headers( - token=token, - library_name=library_name, - library_version=library_version, - user_agent=user_agent, - ) - - url_to_download = url - etag = None - expected_size = None - if not local_files_only: - try: - # Temporary header: we want the full (decompressed) content size returned to be able to check the - # downloaded file size - headers["Accept-Encoding"] = "identity" - r = _request_wrapper( - method="HEAD", - url=url, - headers=headers, - allow_redirects=False, - follow_relative_redirects=True, - proxies=proxies, - timeout=etag_timeout, - ) - headers.pop("Accept-Encoding", None) - hf_raise_for_status(r) - etag = r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag") - # We favor a custom header indicating the etag of the linked resource, and - # we fallback to the regular etag header. - # If we don't have any of those, raise an error. - if etag is None: - raise FileMetadataError( - "Distant resource does not have an ETag, we won't be able to reliably ensure reproducibility." - ) - # We get the expected size of the file, to check the download went well. - expected_size = _int_or_none(r.headers.get("Content-Length")) - # In case of a redirect, save an extra redirect on the request.get call, - # and ensure we download the exact atomic version even if it changed - # between the HEAD and the GET (unlikely, but hey). - # Useful for lfs blobs that are stored on a CDN. - if 300 <= r.status_code <= 399: - url_to_download = r.headers["Location"] - headers.pop("authorization", None) - expected_size = None # redirected -> can't know the expected size - except (requests.exceptions.SSLError, requests.exceptions.ProxyError): - # Actually raise for those subclasses of ConnectionError - raise - except ( - requests.exceptions.ConnectionError, - requests.exceptions.Timeout, - OfflineModeIsEnabled, - ): - # Otherwise, our Internet connection is down. - # etag is None - pass - - filename = force_filename if force_filename is not None else url_to_filename(url, etag) - - # get cache path to put the file - cache_path = os.path.join(cache_dir, filename) - - # etag is None == we don't have a connection or we passed local_files_only. - # try to get the last downloaded one - if etag is None: - if os.path.exists(cache_path) and not force_download: - return cache_path - else: - matching_files = [ - file - for file in fnmatch.filter(os.listdir(cache_dir), filename.split(".")[0] + ".*") - if not file.endswith(".json") and not file.endswith(".lock") - ] - if len(matching_files) > 0 and not force_download and force_filename is None: - return os.path.join(cache_dir, matching_files[-1]) - else: - # If files cannot be found and local_files_only=True, - # the models might've been found if local_files_only=False - # Notify the user about that - if local_files_only: - raise LocalEntryNotFoundError( - "Cannot find the requested files in the cached path and" - " outgoing traffic has been disabled. To enable model look-ups" - " and downloads online, set 'local_files_only' to False." - ) - else: - raise LocalEntryNotFoundError( - "Connection error, and we cannot find the requested files in" - " the cached path. Please try again or make sure your Internet" - " connection is on." - ) - - # From now on, etag is not None. - if os.path.exists(cache_path) and not force_download: - return cache_path - - # Prevent parallel downloads of the same file with a lock. - lock_path = cache_path + ".lock" - - # Some Windows versions do not allow for paths longer than 255 characters. - # In this case, we must specify it is an extended path by using the "\\?\" prefix. - if os.name == "nt" and len(os.path.abspath(lock_path)) > 255: - lock_path = "\\\\?\\" + os.path.abspath(lock_path) - - if os.name == "nt" and len(os.path.abspath(cache_path)) > 255: - cache_path = "\\\\?\\" + os.path.abspath(cache_path) - - with WeakFileLock(lock_path): - _download_to_tmp_and_move( - incomplete_path=Path(cache_path + ".incomplete"), - destination_path=Path(cache_path), - url_to_download=url_to_download, - proxies=proxies, - headers=headers, - expected_size=expected_size, - filename=filename, - force_download=force_download, - ) - - if force_filename is None: - logger.info("creating metadata file for %s", cache_path) - meta = {"url": url, "etag": etag} - meta_path = cache_path + ".json" - with open(meta_path, "w") as meta_file: - json.dump(meta, meta_file) - - return cache_path - - def _normalize_etag(etag: Optional[str]) -> Optional[str]: """Normalize ETag HTTP header, so it can be used to create nice filepaths. @@ -990,14 +660,6 @@ def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None: pass -@_deprecate_arguments( - version="0.26.0", - deprecated_args=["legacy_cache_layout"], - custom_message=( - "Legacy cache layout has been deprecated since August 2022 and will soon be removed. " - "See https://huggingface.co/docs/huggingface_hub/guides/manage-cache for more details." - ), -) @validate_hf_hub_args def hf_hub_download( repo_id: str, @@ -1018,8 +680,6 @@ def hf_hub_download( local_files_only: bool = False, headers: Optional[Dict[str, str]] = None, endpoint: Optional[str] = None, - # Deprecated args - legacy_cache_layout: bool = False, resume_download: Optional[bool] = None, force_filename: Optional[str] = None, local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", @@ -1101,10 +761,6 @@ def hf_hub_download( local cached file if it exists. headers (`dict`, *optional*): Additional headers to be sent with the request. - legacy_cache_layout (`bool`, *optional*, defaults to `False`): - If `True`, uses the legacy file cache layout i.e. just call [`hf_hub_url`] - then `cached_download`. This is deprecated as the new cache layout is - more powerful. Returns: `str`: Local path of file or if networking is off, last version of file cached on disk. @@ -1137,7 +793,6 @@ def hf_hub_download( "which keeps the filenames as they are on the Hub, is now in place.", FutureWarning, ) - legacy_cache_layout = True if resume_download is not None: warnings.warn( "`resume_download` is deprecated and will be removed in version 1.0.0. " @@ -1146,31 +801,6 @@ def hf_hub_download( FutureWarning, ) - if legacy_cache_layout: - url = hf_hub_url( - repo_id, - filename, - subfolder=subfolder, - repo_type=repo_type, - revision=revision, - endpoint=endpoint, - ) - - return cached_download( - url, - library_name=library_name, - library_version=library_version, - cache_dir=cache_dir, - user_agent=user_agent, - force_download=force_download, - force_filename=force_filename, - proxies=proxies, - etag_timeout=etag_timeout, - token=token, - local_files_only=local_files_only, - legacy_cache_layout=legacy_cache_layout, - ) - if cache_dir is None: cache_dir = constants.HF_HUB_CACHE if revision is None: diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 6cbce977a0..a74bdef0eb 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -5471,7 +5471,6 @@ def hf_hub_download( local_files_only: bool = False, # Deprecated args resume_download: Optional[bool] = None, - legacy_cache_layout: bool = False, force_filename: Optional[str] = None, local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", ) -> str: @@ -5592,7 +5591,6 @@ def hf_hub_download( token=token, headers=self.headers, local_files_only=local_files_only, - legacy_cache_layout=legacy_cache_layout, ) @validate_hf_hub_args diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index eb2e4adf04..63204da6d4 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -37,17 +37,7 @@ import re import time import warnings -from typing import ( - TYPE_CHECKING, - Any, - Dict, - Iterable, - List, - Literal, - Optional, - Union, - overload, -) +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union, overload from requests import HTTPError from requests.structures import CaseInsensitiveDict @@ -101,12 +91,7 @@ ZeroShotClassificationOutputElement, ZeroShotImageClassificationOutputElement, ) -from huggingface_hub.utils import ( - build_hf_headers, - get_session, - hf_raise_for_status, -) -from huggingface_hub.utils._deprecation import _deprecate_positional_args +from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status if TYPE_CHECKING: @@ -158,7 +143,6 @@ class InferenceClient: follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None. """ - @_deprecate_positional_args(version="0.26") def __init__( self, model: Optional[str] = None, diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py index ad48954342..83e5070b30 100644 --- a/src/huggingface_hub/inference/_generated/_async_client.py +++ b/src/huggingface_hub/inference/_generated/_async_client.py @@ -24,18 +24,7 @@ import re import time import warnings -from typing import ( - TYPE_CHECKING, - Any, - AsyncIterable, - Dict, - List, - Literal, - Optional, - Set, - Union, - overload, -) +from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Set, Union, overload from requests.structures import CaseInsensitiveDict @@ -88,10 +77,7 @@ ZeroShotClassificationOutputElement, ZeroShotImageClassificationOutputElement, ) -from huggingface_hub.utils import ( - build_hf_headers, -) -from huggingface_hub.utils._deprecation import _deprecate_positional_args +from huggingface_hub.utils import build_hf_headers from .._common import _async_yield_from, _import_aiohttp @@ -148,7 +134,6 @@ class AsyncInferenceClient: follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None. """ - @_deprecate_positional_args(version="0.26") def __init__( self, model: Optional[str] = None, diff --git a/tests/test_file_download.py b/tests/test_file_download.py index 9e265f4b5d..2445e6ebb0 100644 --- a/tests/test_file_download.py +++ b/tests/test_file_download.py @@ -13,7 +13,6 @@ # limitations under the License. import io import os -import re import shutil import stat import unittest @@ -30,13 +29,7 @@ import huggingface_hub.file_download from huggingface_hub import HfApi, RepoUrl, constants from huggingface_hub._local_folder import write_download_metadata -from huggingface_hub.errors import ( - EntryNotFoundError, - GatedRepoError, - LocalEntryNotFoundError, - RepositoryNotFoundError, - RevisionNotFoundError, -) +from huggingface_hub.errors import EntryNotFoundError, GatedRepoError, LocalEntryNotFoundError from huggingface_hub.file_download import ( _CACHED_NO_EXIST, HfFileMetadata, @@ -45,33 +38,22 @@ _get_pointer_path, _normalize_etag, _request_wrapper, - cached_download, - filename_to_url, get_hf_file_metadata, hf_hub_download, hf_hub_url, http_get, try_to_load_from_cache, ) -from huggingface_hub.utils import ( - SoftTemporaryDirectory, - get_session, - hf_raise_for_status, -) +from huggingface_hub.utils import SoftTemporaryDirectory, get_session, hf_raise_for_status from .testing_constants import ENDPOINT_STAGING, OTHER_TOKEN, TOKEN from .testing_utils import ( DUMMY_MODEL_ID, - DUMMY_MODEL_ID_PINNED_SHA1, - DUMMY_MODEL_ID_PINNED_SHA256, - DUMMY_MODEL_ID_REVISION_INVALID, DUMMY_MODEL_ID_REVISION_ONE_SPECIFIC_COMMIT, DUMMY_RENAMED_NEW_MODEL_ID, DUMMY_RENAMED_OLD_MODEL_ID, SAMPLE_DATASET_IDENTIFIER, - OfflineSimulationMode, expect_deprecation, - offline, repo_name, use_tmp_repo, with_production_testing, @@ -187,41 +169,6 @@ def test_download_regular_file_from_private_renamed_repo(self, repo_url: RepoUrl @with_production_testing class CachedDownloadTests(unittest.TestCase): - @expect_deprecation("cached_download") - @expect_deprecation("url_to_filename") - def test_bogus_url(self): - url = "https://bogus" - with self.assertRaisesRegex(ValueError, "Connection error"): - _ = cached_download(url, legacy_cache_layout=True) - - @expect_deprecation("cached_download") - @expect_deprecation("url_to_filename") - def test_no_connection(self): - invalid_url = hf_hub_url( - DUMMY_MODEL_ID, - filename=constants.CONFIG_NAME, - revision=DUMMY_MODEL_ID_REVISION_INVALID, - ) - valid_url = hf_hub_url(DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, revision=REVISION_ID_DEFAULT) - self.assertIsNotNone(cached_download(valid_url, force_download=True, legacy_cache_layout=True)) - for offline_mode in OfflineSimulationMode: - with offline(mode=offline_mode): - with self.assertRaisesRegex(ValueError, "Connection error"): - _ = cached_download(invalid_url, legacy_cache_layout=True) - with self.assertRaisesRegex(ValueError, "Connection error"): - _ = cached_download(valid_url, force_download=True, legacy_cache_layout=True) - self.assertIsNotNone(cached_download(valid_url, legacy_cache_layout=True)) - - @expect_deprecation("cached_download") - def test_file_not_found_on_repo(self): - # Valid revision (None) but missing file on repo. - url = hf_hub_url(DUMMY_MODEL_ID, filename="missing.bin") - with self.assertRaisesRegex( - EntryNotFoundError, - re.compile("404 Client Error(.*)Entry Not Found", flags=re.DOTALL), - ): - _ = cached_download(url, legacy_cache_layout=True) - def test_file_not_found_locally_and_network_disabled(self): # Valid file but missing locally and network is disabled. with SoftTemporaryDirectory() as tmpdir: @@ -245,21 +192,6 @@ def test_file_not_found_locally_and_network_disabled(self): local_files_only=True, ) - @expect_deprecation("cached_download") - @expect_deprecation("url_to_filename") - def test_file_not_found_locally_and_network_disabled_legacy(self): - # Valid file but missing locally and network is disabled. - url = hf_hub_url(DUMMY_MODEL_ID, filename=constants.CONFIG_NAME) - with SoftTemporaryDirectory() as tmpdir: - # Get without network must fail - with pytest.raises(LocalEntryNotFoundError): - cached_download( - url, - legacy_cache_layout=True, - local_files_only=True, - cache_dir=tmpdir, - ) - def test_private_repo_and_file_cached_locally(self): api = HfApi(endpoint=ENDPOINT_STAGING, token=TOKEN) repo_id = api.create_repo(repo_id=repo_name(), private=True).repo_id @@ -293,95 +225,6 @@ def test_file_cached_and_read_only_access(self): # Set permission back for cleanup _recursive_chmod(tmpdir, 0o777) - @expect_deprecation("cached_download") - def test_revision_not_found(self): - # Valid file but missing revision - url = hf_hub_url( - DUMMY_MODEL_ID, - filename=constants.CONFIG_NAME, - revision=DUMMY_MODEL_ID_REVISION_INVALID, - ) - with self.assertRaisesRegex( - RevisionNotFoundError, - re.compile("404 Client Error(.*)Revision Not Found", flags=re.DOTALL), - ): - _ = cached_download(url, legacy_cache_layout=True) - - @expect_deprecation("cached_download") - def test_repo_not_found(self): - # Invalid model file. - url = hf_hub_url("bert-base", filename="pytorch_model.bin") - with self.assertRaisesRegex( - RepositoryNotFoundError, - re.compile("401 Client Error(.*)Repository Not Found", flags=re.DOTALL), - ): - _ = cached_download(url, legacy_cache_layout=True) - - @expect_deprecation("cached_download") - @expect_deprecation("url_to_filename") - @expect_deprecation("filename_to_url") - def test_standard_object(self): - url = hf_hub_url(DUMMY_MODEL_ID, filename=constants.CONFIG_NAME, revision=REVISION_ID_DEFAULT) - filepath = cached_download(url, force_download=True, legacy_cache_layout=True) - metadata = filename_to_url(filepath, legacy_cache_layout=True) - self.assertEqual(metadata, (url, f'"{DUMMY_MODEL_ID_PINNED_SHA1}"')) - - @expect_deprecation("cached_download") - @expect_deprecation("url_to_filename") - @expect_deprecation("filename_to_url") - def test_standard_object_rev(self): - # Same object, but different revision - url = hf_hub_url( - DUMMY_MODEL_ID, - filename=constants.CONFIG_NAME, - revision=DUMMY_MODEL_ID_REVISION_ONE_SPECIFIC_COMMIT, - ) - filepath = cached_download(url, force_download=True, legacy_cache_layout=True) - metadata = filename_to_url(filepath, legacy_cache_layout=True) - self.assertNotEqual(metadata[1], f'"{DUMMY_MODEL_ID_PINNED_SHA1}"') - # Caution: check that the etag is *not* equal to the one from `test_standard_object` - - @expect_deprecation("cached_download") - @expect_deprecation("url_to_filename") - @expect_deprecation("filename_to_url") - def test_lfs_object(self): - url = hf_hub_url(DUMMY_MODEL_ID, filename=constants.PYTORCH_WEIGHTS_NAME, revision=REVISION_ID_DEFAULT) - filepath = cached_download(url, force_download=True, legacy_cache_layout=True) - metadata = filename_to_url(filepath, legacy_cache_layout=True) - self.assertEqual(metadata, (url, f'"{DUMMY_MODEL_ID_PINNED_SHA256}"')) - - @expect_deprecation("cached_download") - @expect_deprecation("url_to_filename") - @expect_deprecation("filename_to_url") - def test_dataset_standard_object_rev(self): - url = hf_hub_url( - DATASET_ID, - filename=DATASET_SAMPLE_PY_FILE, - repo_type=constants.REPO_TYPE_DATASET, - revision=DATASET_REVISION_ID_ONE_SPECIFIC_COMMIT, - ) - # now let's download - filepath = cached_download(url, force_download=True, legacy_cache_layout=True) - metadata = filename_to_url(filepath, legacy_cache_layout=True) - self.assertNotEqual(metadata[1], f'"{DUMMY_MODEL_ID_PINNED_SHA1}"') - - @expect_deprecation("cached_download") - @expect_deprecation("url_to_filename") - @expect_deprecation("filename_to_url") - def test_dataset_lfs_object(self): - url = hf_hub_url( - DATASET_ID, - filename="dev-v1.1.json", - repo_type=constants.REPO_TYPE_DATASET, - revision=DATASET_REVISION_ID_ONE_SPECIFIC_COMMIT, - ) - filepath = cached_download(url, force_download=True, legacy_cache_layout=True) - metadata = filename_to_url(filepath, legacy_cache_layout=True) - self.assertEqual( - metadata, - (url, '"95aa6a52d5d6a735563366753ca50492a658031da74f301ac5238b03966972c9"'), - ) - @xfail_on_windows(reason="umask is UNIX-specific") def test_hf_hub_download_custom_cache_permission(self): """Checks `hf_hub_download` respect the cache dir permission. @@ -410,23 +253,6 @@ def test_download_from_a_renamed_repo_with_hf_hub_download(self): filepath = hf_hub_download(DUMMY_RENAMED_OLD_MODEL_ID, "config.json", cache_dir=tmpdir) self.assertTrue(os.path.exists(filepath)) - def test_download_from_a_renamed_repo_with_cached_download(self): - """Checks `cached_download` works also on a renamed repo. - - Regression test for #981. - https://github.com/huggingface/huggingface_hub/issues/981 - """ - with pytest.warns(FutureWarning): - with SoftTemporaryDirectory() as tmpdir: - filepath = cached_download( - hf_hub_url( - DUMMY_RENAMED_OLD_MODEL_ID, - filename="config.json", - ), - cache_dir=tmpdir, - ) - self.assertTrue(os.path.exists(filepath)) - def test_hf_hub_download_with_empty_subfolder(self): """ Check subfolder arg is processed correctly when empty string is passed to @@ -544,21 +370,6 @@ def test_hf_hub_url_with_endpoint(self): "https://hf-ci.co/julien-c/dummy-unknown/resolve/main/config.json", ) - @expect_deprecation("hf_hub_download") - @expect_deprecation("cached_download") - @expect_deprecation("filename_to_url") - @expect_deprecation("url_to_filename") - def test_hf_hub_download_legacy(self): - filepath = hf_hub_download( - DUMMY_MODEL_ID, - filename=constants.CONFIG_NAME, - revision=REVISION_ID_DEFAULT, - force_download=True, - legacy_cache_layout=True, - ) - metadata = filename_to_url(filepath, legacy_cache_layout=True) - self.assertEqual(metadata[1], f'"{DUMMY_MODEL_ID_PINNED_SHA1}"') - def test_try_to_load_from_cache_exist(self): # Make sure the file is cached filepath = hf_hub_download(DUMMY_MODEL_ID, filename=constants.CONFIG_NAME) @@ -766,25 +577,6 @@ def test_hf_hub_download_when_tmp_file_is_complete(self): # Download must not fail hf_hub_download(DUMMY_MODEL_ID, filename="pytorch_model.bin", cache_dir=tmpdir) - @expect_deprecation("cached_download") - @expect_deprecation("url_to_filename") - def test_cached_download_from_github(self): - """Regression test for #1449. - - File consistency check was failing due to compression in HTTP request which made the expected size smaller than - the actual one. `cached_download` is deprecated but still heavily used so we need to make sure it works. - - See: - - https://github.com/huggingface/huggingface_hub/issues/1449. - - https://github.com/huggingface/diffusers/issues/3213. - """ - with SoftTemporaryDirectory() as cache_dir: - cached_download( - url="https://raw.githubusercontent.com/huggingface/diffusers/v0.15.1/examples/community/lpw_stable_diffusion.py", - token=None, - cache_dir=cache_dir, - ) - @unittest.skipIf(os.name == "nt", "Lock files are always deleted on Windows.") def test_keep_lock_file(self): """Lock files should not be deleted on Linux.""" diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index a028066847..219780068f 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -3274,7 +3274,6 @@ def test_hf_hub_download_alias(self, mock: Mock) -> None: etag_timeout=10, resume_download=None, local_files_only=False, - legacy_cache_layout=False, headers=None, )