diff --git a/src/huggingface_hub/utils/_http.py b/src/huggingface_hub/utils/_http.py index f1628e1a5d..726dc7b7c6 100644 --- a/src/huggingface_hub/utils/_http.py +++ b/src/huggingface_hub/utils/_http.py @@ -774,26 +774,17 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: str | None = No if error_code == "RevisionNotFound": message = f"{response.status_code} Client Error." + "\n\n" + f"Revision Not Found for url: {response.url}." - revision_err = _format(RevisionNotFoundError, message, response) - revision_err.repo_type = repo_type - revision_err.repo_id = repo_id - raise revision_err from e + raise _format_with_repo_info(RevisionNotFoundError, message, response, repo_type, repo_id) from e elif error_code == "EntryNotFound": message = f"{response.status_code} Client Error." + "\n\n" + f"Entry Not Found for url: {response.url}." - entry_err = _format(RemoteEntryNotFoundError, message, response) - entry_err.repo_type = repo_type - entry_err.repo_id = repo_id - raise entry_err from e + raise _format_with_repo_info(RemoteEntryNotFoundError, message, response, repo_type, repo_id) from e elif error_code == "GatedRepo": message = ( f"{response.status_code} Client Error." + "\n\n" + f"Cannot access gated repo for url {response.url}." ) - gated_err = _format(GatedRepoError, message, response) - gated_err.repo_type = repo_type - gated_err.repo_id = repo_id - raise gated_err from e + raise _format_with_repo_info(GatedRepoError, message, response, repo_type, repo_id) from e elif error_message == "Access to this resource is disabled.": message = ( @@ -817,9 +808,9 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: str | None = No + "\nPlease make sure you specified the correct bucket id (namespace/name)." + "\nIf the bucket is private, make sure you are authenticated and your token has the required permissions." ) - bucket_err = _format(BucketNotFoundError, message, response) - bucket_err.bucket_id = _parse_bucket_id_from_url(request_url) - raise bucket_err from e + raise _format_with_bucket_info( + BucketNotFoundError, message, response, _parse_bucket_id_from_url(request_url) + ) from e elif error_code == "RepoNotFound" or ( response.status_code == 401 @@ -841,10 +832,7 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: str | None = No " make sure you are authenticated and your token has the required permissions." + "\nFor more details, see https://huggingface.co/docs/huggingface_hub/authentication" ) - repo_err = _format(RepositoryNotFoundError, message, response) - repo_err.repo_type = repo_type - repo_err.repo_id = repo_id - raise repo_err from e + raise _format_with_repo_info(RepositoryNotFoundError, message, response, repo_type, repo_id) from e elif response.status_code == 400: message = ( @@ -1012,6 +1000,37 @@ def _format(error_type: type[_HfHubHTTPErrorT], custom_message: str, response: h return error_type(final_error_message.strip(), response=response, server_message=server_message or None) +def _format_with_repo_info( + error_type: type[_HfHubHTTPErrorT], + message: str, + response: httpx.Response, + repo_type: str | None, + repo_id: str | None, +) -> _HfHubHTTPErrorT: + """Like _format but also sets repo_type and repo_id on the error. + + This is a separate function to avoid storing the error in a local variable + in the caller's frame, which would create a reference cycle via exception + __cause__ tracebacks. + """ + err = _format(error_type, message, response) + err.repo_type = repo_type + err.repo_id = repo_id + return err + + +def _format_with_bucket_info( + error_type: type[_HfHubHTTPErrorT], + message: str, + response: httpx.Response, + bucket_id: str | None, +) -> _HfHubHTTPErrorT: + """Like _format but also sets bucket_id on the error.""" + err = _format(error_type, message, response) + err.bucket_id = bucket_id + return err + + def _curlify(request: httpx.Request) -> str: """Convert a `httpx.Request` into a curl command (str). diff --git a/tests/test_utils_http.py b/tests/test_utils_http.py index fc81e2f8d5..a289efb79a 100644 --- a/tests/test_utils_http.py +++ b/tests/test_utils_http.py @@ -12,11 +12,20 @@ from httpx import ConnectTimeout, HTTPError from huggingface_hub.constants import ENDPOINT -from huggingface_hub.errors import BucketNotFoundError, HfHubHTTPError, OfflineModeIsEnabled, RepositoryNotFoundError +from huggingface_hub.errors import ( + BucketNotFoundError, + HfHubHTTPError, + OfflineModeIsEnabled, + RemoteEntryNotFoundError, + RepositoryNotFoundError, + RevisionNotFoundError, +) from huggingface_hub.utils._http import ( _WARNED_TOPICS, RateLimitInfo, _adjust_range_header, + _format_with_bucket_info, + _format_with_repo_info, _parse_bucket_id_from_url, _parse_repo_info_from_url, _warn_on_warning_headers, @@ -702,3 +711,49 @@ def test_non_bucket_url(self): def test_http_url(self): assert _parse_bucket_id_from_url("http://localhost:8080/api/buckets/ns/name") == "ns/name" + + +class TestFormatWithRepoInfo: + def _make_response(self): + response = Mock(spec=httpx.Response) + response.status_code = 404 + response.url = "https://huggingface.co/api/models/ns/repo" + response.headers = httpx.Headers({}) + response.json.return_value = {} + return response + + def test_sets_repo_type_and_repo_id(self): + err = _format_with_repo_info(RepositoryNotFoundError, "not found", self._make_response(), "model", "ns/repo") + assert err.repo_type == "model" + assert err.repo_id == "ns/repo" + + def test_sets_none_values(self): + err = _format_with_repo_info(RevisionNotFoundError, "not found", self._make_response(), None, None) + assert err.repo_type is None + assert err.repo_id is None + + def test_works_with_remote_entry_not_found(self): + err = _format_with_repo_info( + RemoteEntryNotFoundError, "entry not found", self._make_response(), "model", "ns/repo" + ) + assert isinstance(err, RemoteEntryNotFoundError) + assert err.repo_type == "model" + assert err.repo_id == "ns/repo" + + +class TestFormatWithBucketInfo: + def _make_response(self): + response = Mock(spec=httpx.Response) + response.status_code = 404 + response.url = "https://huggingface.co/api/buckets/ns/name" + response.headers = httpx.Headers({}) + response.json.return_value = {} + return response + + def test_sets_bucket_id(self): + err = _format_with_bucket_info(BucketNotFoundError, "not found", self._make_response(), "ns/name") + assert err.bucket_id == "ns/name" + + def test_sets_none_bucket_id(self): + err = _format_with_bucket_info(BucketNotFoundError, "not found", self._make_response(), None) + assert err.bucket_id is None