Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 55 additions & 16 deletions src/huggingface_hub/cli/_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,63 @@
)


CLI_ERROR_MAPPINGS: dict[type[Exception], Callable[[Exception], str]] = {
BucketNotFoundError: lambda e: (
"Bucket not found. Check the bucket id (namespace/name). If the bucket is private, make sure you are authenticated."
),
RepositoryNotFoundError: lambda e: (
"Repository not found. Check the `repo_id` and `repo_type` parameters. If the repo is private, make sure you are authenticated."
),
RevisionNotFoundError: lambda e: "Revision not found. Check the `revision` parameter.",
GatedRepoError: lambda e: "Access denied. This repository requires approval.",
LocalTokenNotFoundError: lambda e: "Not logged in. Run 'hf auth login' first.",
RemoteEntryNotFoundError: lambda e: "File not found in repository.",
HfHubHTTPError: lambda e: str(e),
ValueError: lambda e: f"Invalid value. {e}",
def _format_repo_not_found(error: RepositoryNotFoundError) -> str:
label = error.repo_type.capitalize() if error.repo_type else "Repository"
if error.repo_id:
msg = f"{label} '{error.repo_id}' not found."
else:
msg = f"{label} not found."
msg += " If the repo is private, make sure you are authenticated."
return msg


def _format_gated_repo(error: GatedRepoError) -> str:
label = error.repo_type if error.repo_type else "repository"
if error.repo_id:
return f"Access denied. {label.capitalize()} '{error.repo_id}' requires approval."
return f"Access denied. This {label} requires approval."


def _format_bucket_not_found(error: BucketNotFoundError) -> str:
if error.bucket_id:
return f"Bucket '{error.bucket_id}' not found. If the bucket is private, make sure you are authenticated."
return "Bucket not found. Check the bucket id (namespace/name). If the bucket is private, make sure you are authenticated."


def _format_entry_not_found(error: RemoteEntryNotFoundError) -> str:
label = error.repo_type if error.repo_type else "repository"
url = str(error.response.url) if error.response else None
if error.repo_id:
msg = f"File not found in {label} '{error.repo_id}'."
else:
msg = f"File not found in {label}."
if url:
msg += f"\nURL: {url}"
return msg


def _format_revision_not_found(error: RevisionNotFoundError) -> str:
label = error.repo_type if error.repo_type else "repository"
if error.repo_id:
return f"Revision not found in {label} '{error.repo_id}'."
return f"Revision not found in {label}. Check the revision parameter."
Comment thread
cursor[bot] marked this conversation as resolved.


CLI_ERROR_MAPPINGS: dict[type[Exception], Callable[[Exception], str]] = { # type: ignore
# GatedRepoError must come before RepositoryNotFoundError (it's a subclass).
GatedRepoError: _format_gated_repo, # type: ignore[dict-item]
BucketNotFoundError: _format_bucket_not_found, # type: ignore[dict-item]
RepositoryNotFoundError: _format_repo_not_found, # type: ignore[dict-item]
RevisionNotFoundError: _format_revision_not_found, # type: ignore[dict-item]
LocalTokenNotFoundError: lambda _: "Not logged in. Run 'hf auth login' first.",
RemoteEntryNotFoundError: _format_entry_not_found, # type: ignore[dict-item]
HfHubHTTPError: lambda error: str(error),
ValueError: lambda error: f"Invalid value. {error}",
}


def format_known_exception(e: Exception) -> Optional[str]:
def format_known_exception(error: Exception) -> Optional[str]:
for exc_type, formatter in CLI_ERROR_MAPPINGS.items():
if isinstance(e, exc_type):
return formatter(e)
if isinstance(error, exc_type):
return formatter(error)
return None
33 changes: 33 additions & 0 deletions src/huggingface_hub/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ class BucketNotFoundError(HfHubHTTPError):
"""
Raised when trying to access a bucket that does not exist.

Attributes:
bucket_id (`str` or `None`):
The bucket id (namespace/name) that was not found, if it could be determined from the request URL.

Example:

```py
Expand All @@ -206,6 +210,8 @@ class BucketNotFoundError(HfHubHTTPError):
```
"""

bucket_id: Optional[str] = None


# REPOSITORY ERRORS

Expand All @@ -215,6 +221,12 @@ class RepositoryNotFoundError(HfHubHTTPError):
Raised when trying to access a hf.co URL with an invalid repository name, or
with a private repo name the user does not have access to.

Attributes:
repo_id (`str` or `None`):
The repo id that was not found, if it could be determined from the request URL.
repo_type (`str` or `None`):
The repo type ("model", "dataset", or "space"), if it could be determined from the request URL.

Example:

```py
Expand All @@ -230,6 +242,9 @@ class RepositoryNotFoundError(HfHubHTTPError):
```
"""

repo_id: Optional[str] = None
repo_type: Optional[str] = None


class GatedRepoError(RepositoryNotFoundError):
"""
Expand Down Expand Up @@ -279,6 +294,12 @@ class RevisionNotFoundError(HfHubHTTPError):
Raised when trying to access a hf.co URL with a valid repository but an invalid
revision.

Attributes:
repo_id (`str` or `None`):
The repo id, if it could be determined from the request URL.
repo_type (`str` or `None`):
The repo type ("model", "dataset", or "space"), if it could be determined from the request URL.

Example:

```py
Expand All @@ -291,6 +312,9 @@ class RevisionNotFoundError(HfHubHTTPError):
```
"""

repo_id: Optional[str] = None
repo_type: Optional[str] = None


# ENTRY ERRORS
class EntryNotFoundError(Exception):
Expand All @@ -316,6 +340,12 @@ class RemoteEntryNotFoundError(HfHubHTTPError, EntryNotFoundError):
Raised when trying to access a hf.co URL with a valid repository and revision
but an invalid filename.

Attributes:
repo_id (`str` or `None`):
The repo id, if it could be determined from the request URL.
repo_type (`str` or `None`):
The repo type ("model", "dataset", or "space"), if it could be determined from the request URL.

Example:

```py
Expand All @@ -328,6 +358,9 @@ class RemoteEntryNotFoundError(HfHubHTTPError, EntryNotFoundError):
```
"""

repo_id: Optional[str] = None
repo_type: Optional[str] = None


class LocalEntryNotFoundError(FileNotFoundError, EntryNotFoundError):
"""
Expand Down
88 changes: 75 additions & 13 deletions src/huggingface_hub/utils/_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from contextlib import contextmanager
from dataclasses import dataclass
from shlex import quote
from typing import Any, Callable, Generator, Mapping, Optional, Union
from typing import Any, Callable, Generator, Mapping, Optional, TypeVar, Union
from urllib.parse import urlparse

import httpx
Expand Down Expand Up @@ -169,6 +169,47 @@ def parse_ratelimit_headers(headers: Mapping[str, str]) -> Optional[RateLimitInf
flags=re.VERBOSE,
)

# Regex to extract repo_type and repo_id from API URLs.
# Captures: group(1) = repo_type plural (models/datasets/spaces), group(2) = first path segment, group(3) = optional second segment.
_REPO_ID_FROM_URL_REGEX = re.compile(r"^https?://[^/]+/api/(models|datasets|spaces)/([^/]+)(?:/([^/]+))?")

# Regex to extract bucket_id (namespace/name) from bucket API URLs.
_BUCKET_ID_FROM_URL_REGEX = re.compile(r"^https?://[^/]+/api/buckets/([^/]+/[^/]+)")

# Sub-paths that follow a repo_id in API URLs (not part of the repo name).
_REPO_URL_SUBPATHS = {"resolve", "tree", "blob", "raw", "refs", "commit", "discussions", "settings", "revision"}
Comment on lines +174 to +180
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: not 100% bullet-proof but parsing doesn't have to be perfect (it's just a convenience field for better errors)



def _parse_repo_info_from_url(url: str) -> tuple[Optional[str], Optional[str]]:
"""Extract (repo_type, repo_id) from an API URL.

Returns canonical repo_type values: "model", "dataset", "space" (or None).

Examples:
>>> _parse_repo_info_from_url("https://huggingface.co/api/models/user/repo")
("model", "user/repo")
>>> _parse_repo_info_from_url("https://huggingface.co/api/datasets/user/repo/resolve/main/data.csv")
("dataset", "user/repo")
>>> _parse_repo_info_from_url("https://huggingface.co/api/models/bert-base-cased/resolve/main/config.json")
("model", "bert-base-cased")
"""
match = _REPO_ID_FROM_URL_REGEX.search(url)
if not match:
return None, None
repo_type = constants.REPO_TYPES_MAPPING.get(match.group(1))
first, second = match.group(2), match.group(3)
if second and second not in _REPO_URL_SUBPATHS:
repo_id = f"{first}/{second}"
else:
repo_id = first
return repo_type, repo_id


def _parse_bucket_id_from_url(url: str) -> Optional[str]:
"""Extract bucket_id (namespace/name) from a bucket API URL."""
match = _BUCKET_ID_FROM_URL_REGEX.search(url)
return match.group(1) if match else None


def hf_request_event_hook(request: httpx.Request) -> None:
"""
Expand Down Expand Up @@ -725,19 +766,34 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: Optional[str] =
error_code = response.headers.get("X-Error-Code")
error_message = response.headers.get("X-Error-Message")

# Parse repo info from request URL (used to enrich errors below)
request_url = (
str(response.request.url) if response.request is not None and response.request.url is not None else None
)
repo_type, repo_id = _parse_repo_info_from_url(request_url) if request_url else (None, None)

if error_code == "RevisionNotFound":
message = f"{response.status_code} Client Error." + "\n\n" + f"Revision Not Found for url: {response.url}."
raise _format(RevisionNotFoundError, message, response) from e
revision_err = _format(RevisionNotFoundError, message, response)
revision_err.repo_type = repo_type
revision_err.repo_id = repo_id
raise revision_err from e

elif error_code == "EntryNotFound":
message = f"{response.status_code} Client Error." + "\n\n" + f"Entry Not Found for url: {response.url}."
raise _format(RemoteEntryNotFoundError, message, response) from e
entry_err = _format(RemoteEntryNotFoundError, message, response)
entry_err.repo_type = repo_type
entry_err.repo_id = repo_id
raise entry_err from e

elif error_code == "GatedRepo":
message = (
f"{response.status_code} Client Error." + "\n\n" + f"Cannot access gated repo for url {response.url}."
)
raise _format(GatedRepoError, message, response) from e
gated_err = _format(GatedRepoError, message, response)
gated_err.repo_type = repo_type
gated_err.repo_id = repo_id
raise gated_err from e

elif error_message == "Access to this resource is disabled.":
message = (
Expand All @@ -751,9 +807,8 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: Optional[str] =

elif (
error_code == "RepoNotFound"
and response.request is not None
and response.request.url is not None
and BUCKET_API_REGEX.search(str(response.request.url)) is not None
and request_url is not None
and BUCKET_API_REGEX.search(request_url) is not None
):
message = (
f"{response.status_code} Client Error."
Expand All @@ -762,14 +817,15 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: Optional[str] =
+ "\nPlease make sure you specified the correct bucket id (namespace/name)."
+ "\nIf the bucket is private, make sure you are authenticated."
)
raise _format(BucketNotFoundError, message, response) from e
bucket_err = _format(BucketNotFoundError, message, response)
bucket_err.bucket_id = _parse_bucket_id_from_url(request_url)
raise bucket_err from e

elif error_code == "RepoNotFound" or (
response.status_code == 401
and error_message != "Invalid credentials in Authorization header"
and response.request is not None
and response.request.url is not None
and REPO_API_REGEX.search(str(response.request.url)) is not None
and request_url is not None
and REPO_API_REGEX.search(request_url) is not None
):
# 401 is misleading as it is returned for:
# - private and gated repos if user is not authenticated
Expand All @@ -785,7 +841,10 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: Optional[str] =
" make sure you are authenticated. For more details, see"
" https://huggingface.co/docs/huggingface_hub/authentication"
)
raise _format(RepositoryNotFoundError, message, response) from e
repo_err = _format(RepositoryNotFoundError, message, response)
repo_err.repo_type = repo_type
repo_err.repo_id = repo_id
raise repo_err from e

elif response.status_code == 400:
message = (
Expand Down Expand Up @@ -857,7 +916,10 @@ def _warn_on_warning_headers(response: httpx.Response) -> None:
logger.warning(message)


def _format(error_type: type[HfHubHTTPError], custom_message: str, response: httpx.Response) -> HfHubHTTPError:
_HfHubHTTPErrorT = TypeVar("_HfHubHTTPErrorT", bound=HfHubHTTPError)


def _format(error_type: type[_HfHubHTTPErrorT], custom_message: str, response: httpx.Response) -> _HfHubHTTPErrorT:
server_errors = []

# Retrieve server error from header
Expand Down
Loading
Loading