Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,13 @@ Several modes of authentication are supported:
or container engine) and fetch the credentials automatically from the
metadata service.

- if ``token=dict(...)`` or ``token=<filepath>``, you may supply a token
generated by the gcloud_ utility. This can be
- if ``token=dict(...)``, ``token=<filepath>`` or ``token=<raw_token_str>``, you may
supply a token generated by the gcloud_ utility. This can be

- a python dictionary

- a raw token string

- the path to a file containing the JSON returned by logging in with the
gcloud CLI tool (e.g.,
``~/.config/gcloud/application_default_credentials.json`` or
Expand All @@ -116,6 +118,15 @@ Several modes of authentication are supported:
directory, and must be manually expanded with a utility like
``os.path.expanduser()``.

Please note that credentials automatically refresh 5 minutes prior to their
actual expiration to prevent edge-case errors. In scenarios where refreshing
is not possible (e.g., when using raw tokens), the system will fail early and
will not retry if the attributes required for refreshing are missing.
By default, the raw token expiration time is retrieved from the backend. You
can disable this by setting FETCH_RAW_TOKEN_EXPIRY=0. When this setting is
enabled, the system assumes the token has no expiration date, effectively
disabling the 5-minute preemptive refresh.

- you can also generate tokens via Oauth2 in the browser using ``token='browser'``,
which gcsfs then caches in a special file, ~/.gcs_tokens, and can subsequently be accessed with ``token='cache'``.

Expand Down
63 changes: 60 additions & 3 deletions gcsfs/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow

from gcsfs.retry import HttpError
from gcsfs.retry import HttpError, NonRetryableError

logger = logging.getLogger("gcsfs.credentials")

Expand All @@ -38,6 +38,48 @@
}
}

TOKEN_INFO_TIMEOUT_SECONDS = 10
LOCAL_REFRESH_BUFFER = 300 # Greater than google.auth._helpers.REFRESH_THRESHOLD


def _get_creds_from_raw_token(token):
# Default to True. Only disable if user explicitly says 'false', '0', or 'off'.
env_val = os.environ.get("FETCH_RAW_TOKEN_EXPIRY", "true").lower()
should_fetch_expiry = env_val not in ("false", "0", "off", "no")
Comment on lines +47 to +48
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would we NOT attempt to refresh expired or expiring tokens?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, maybe I understand following your description in the PR. This would need careful documentation in the RTD prose pages.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, because we can't refresh the expired token without these parameters

This would need careful documentation in the RTD prose pages.

Let me know the path where i would need to add documentation? I'm happy to add them

Copy link
Member

@martindurant martindurant Jan 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In docs/source/index.rst . Actually, the text there is not particularly thorough, so mentioning it in GCSFileSystem's docstring is another option.


if should_fetch_expiry:
response = requests.get(
"https://oauth2.googleapis.com/tokeninfo",
params={"access_token": token},
timeout=TOKEN_INFO_TIMEOUT_SECONDS,
)

if response.status_code == 400:
# Token is likely expired or invalid format
raise ValueError("Provided token is either not valid, or expired.")

response.raise_for_status()
expiry = datetime.utcfromtimestamp(float(response.json()["exp"]))

time_remaining = max(
0,
(
expiry.replace(tzinfo=timezone.utc) - datetime.now(timezone.utc)
).total_seconds(),
)
if time_remaining <= LOCAL_REFRESH_BUFFER:
raise ValueError(
f"The provided raw token expires in {time_remaining} seconds, "
f"which is less than the safety buffer ({LOCAL_REFRESH_BUFFER}). "
"This may cause immediate authentication failures. "
"To bypass this check and safety buffer, you can set the environment "
"variable FETCH_RAW_TOKEN_EXPIRY=false (expiry will be unknown)."
)
else:
expiry = None

return Credentials(token, expiry=expiry)


class GoogleCredentials:
def __init__(self, project, access, token, check_credentials=None, on_google=True):
Expand Down Expand Up @@ -161,7 +203,7 @@ def _connect_token(self, token):
with open(token) as data:
token = json.load(data)
else:
token = Credentials(token)
token = _get_creds_from_raw_token(token)
if isinstance(token, dict):
credentials = self._dict_to_credentials(token)
elif isinstance(token, google.auth.credentials.Credentials):
Expand Down Expand Up @@ -190,7 +232,7 @@ def _credentials_valid(self, refresh_buffer):
)
)

def maybe_refresh(self, refresh_buffer=300):
def maybe_refresh(self, refresh_buffer=LOCAL_REFRESH_BUFFER):
"""
Check and refresh credentials if needed
"""
Expand All @@ -210,6 +252,21 @@ def maybe_refresh(self, refresh_buffer=300):
try:
self.credentials.refresh(req)
except gauth.exceptions.RefreshError as error:
# There may be scenarios where this error is raised from the client side due
# to missing necessary attributes to refresh the token, For instance
# https://github.com/googleapis/google-auth-library-python/blob/main/google/oauth2/_credentials_async.py#L51
# In such cases, the request gets retried
# with backoff strategy, which can be avoided.

# Check for client side errors (if any)
if (
"credentials do not contain the necessary fields need to refresh"
in str(error)
):
raise NonRetryableError(
"Got error while refreshing credentials."
) from error

# Re-raise as HttpError with a 401 code and the expected message
raise HttpError(
{"code": 401, "message": "Invalid Credentials"}
Expand Down
8 changes: 8 additions & 0 deletions gcsfs/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ class ChecksumError(Exception):
pass


class NonRetryableError(Exception):
"""Raised when the underlying error can not be retried, or continued further."""

pass


RETRIABLE_EXCEPTIONS = (
requests.exceptions.ChunkedEncodingError,
requests.exceptions.ConnectionError,
Expand All @@ -69,6 +75,8 @@ class ChecksumError(Exception):

def is_retriable(exception):
"""Returns True if this exception is retriable."""
if isinstance(exception, NonRetryableError):
return False

if isinstance(exception, HttpError):
# Add 401 to retriable errors when it's an auth expiration issue
Expand Down
148 changes: 144 additions & 4 deletions gcsfs/tests/test_credentials.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
import datetime
import os
from unittest.mock import Mock, patch

import pytest

from gcsfs import GCSFileSystem
from gcsfs.credentials import GoogleCredentials
from gcsfs.retry import HttpError
from gcsfs.retry import HttpError, NonRetryableError

MOCK_TOKEN_STR = "ya29.valid_raw_token_string"
MOCK_EXP_TIMESTAMP = 1764620492 # 2025-12-01 20:21:32 UTC


def test_googlecredentials_none():
Expand All @@ -13,6 +20,139 @@ def test_googlecredentials_none():

@pytest.mark.parametrize("token", ["", "incorrect.token", "x" * 100])
def test_credentials_from_raw_token(token):
with pytest.raises(HttpError, match="Invalid Credentials"):
fs = GCSFileSystem(project="myproject", token=token)
fs.ls("/")
with patch.dict(os.environ, {"FETCH_RAW_TOKEN_EXPIRY": "false"}):
with pytest.raises(HttpError, match="Invalid Credentials"):
fs = GCSFileSystem(project="myproject", token=token)
fs.ls("/")


@pytest.fixture
def mock_token_info_api_response():
"""Returns a mock response object that mimics a valid Google Token Info response"""
resp = Mock()
resp.status_code = 200
resp.json.return_value = {"exp": str(MOCK_EXP_TIMESTAMP)}
return resp


def test_raw_token_credentials_init_with_raw_token_fetches_expiry(
mock_token_info_api_response,
):
"""
Test that initializing GoogleCredentials with a raw string token
triggers the API lookup and sets the expiry.
"""
future_time = int(
(
datetime.datetime.now(datetime.timezone.utc)
+ datetime.timedelta(seconds=600)
).timestamp()
)
mock_token_info_api_response.json.return_value = {"exp": str(future_time)}

with patch(
"gcsfs.credentials.requests.get", return_value=mock_token_info_api_response
) as mock_get:
creds = GoogleCredentials(
project="my-project", token=MOCK_TOKEN_STR, access="read_only"
)
mock_get.assert_called_once_with(
"https://oauth2.googleapis.com/tokeninfo",
params={"access_token": MOCK_TOKEN_STR},
timeout=10,
)

assert creds.credentials.token == MOCK_TOKEN_STR
assert creds.credentials.expiry is not None
assert creds.credentials.expiry == datetime.datetime.utcfromtimestamp(
future_time
)


def test_raw_token_credentials_init_env_var_disables_fetch(
mock_token_info_api_response,
):
"""Test that the FETCH_RAW_TOKEN_EXPIRY environment variable stops the network call."""
with patch.dict(os.environ, {"FETCH_RAW_TOKEN_EXPIRY": "false"}):
with patch(
"gcsfs.credentials.requests.get", return_value=mock_token_info_api_response
) as mock_get:
creds = GoogleCredentials(
project="my-project", token=MOCK_TOKEN_STR, access="read_only"
)
mock_get.assert_not_called()
assert creds.credentials.token == MOCK_TOKEN_STR
assert creds.credentials.expiry is None


def test_raw_token_credentials_init_raises_on_invalid_token(
mock_token_info_api_response,
):
"""Test that if the API returns 400 (Bad Request), the class initialization fails."""
mock_token_info_api_response.status_code = 400
mock_token_info_api_response.json.return_value = {"error": "invalid_token"}

with patch(
"gcsfs.credentials.requests.get", return_value=mock_token_info_api_response
):
with pytest.raises(ValueError, match="Provided token is either not valid"):
GoogleCredentials(
project="my-project", token="bad_token_string", access="read_only"
)


def test_raw_token_credentials_refresh_throws_error_after_expiry(
mock_token_info_api_response,
):
"""Tests that raw token cred refresh throws error after expiry."""
future_time = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(
seconds=600
)
mock_token_info_api_response.json.return_value = {
"exp": str(int(future_time.timestamp()))
}

with patch(
"gcsfs.credentials.requests.get", return_value=mock_token_info_api_response
) as _:
creds = GoogleCredentials(
project="my-project", token="my_token", access="read_only"
)

# Refresh before expiry
with patch("gcsfs.credentials.requests.Session") as mock_session:
creds.maybe_refresh()
mock_session.assert_not_called()

creds.credentials.expiry = datetime.datetime.utcnow() - datetime.timedelta(
minutes=10
)

# Refresh after expiry
with pytest.raises(
NonRetryableError, match="Got error while refreshing credentials"
):
creds.maybe_refresh()


def test_raw_token_credentials_init_raises_on_short_lived_token(
mock_token_info_api_response,
):
"""
Test that if the token expires too soon (less than the safety buffer),
we raise a ValueError immediately to warn the user.
"""
future_time = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(
minutes=2
)
mock_token_info_api_response.json.return_value = {
"exp": str(int(future_time.timestamp()))
}

with patch(
"gcsfs.credentials.requests.get", return_value=mock_token_info_api_response
):
with pytest.raises(ValueError, match="less than the safety buffer"):
GoogleCredentials(
project="my-project", token="short_lived_token", access="read_only"
)
Loading