Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions google/cloud/storage/_media/_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ def __init__(
headers=None,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
super(Download, self).__init__(
media_url, stream=stream, start=start, end=end, headers=headers, retry=retry
Expand All @@ -178,6 +179,7 @@ def __init__(
self.checksum = (
"crc32c" if _helpers._is_crc32c_available_and_fast() else "md5"
)
self.single_shot_download = single_shot_download
self._bytes_downloaded = 0
self._expected_checksum = None
self._checksum_object = None
Expand Down
47 changes: 33 additions & 14 deletions google/cloud/storage/_media/requests/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,24 @@ def _write_to_stream(self, response):
# the stream is indeed compressed, this will delegate the checksum
# object to the decoder and return a _DoNothingHash here.
local_checksum_object = _add_decoder(response.raw, checksum_object)
body_iter = response.iter_content(
chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False
)
for chunk in body_iter:
self._stream.write(chunk)
self._bytes_downloaded += len(chunk)
local_checksum_object.update(chunk)

if self.single_shot_download:
# This is useful for smaller files, or when the user wants to
# download the entire file in one go.
content = response.raw.read(decode_content=True)
self._stream.write(content)
self._bytes_downloaded += len(content)
local_checksum_object.update(content)
response._content_consumed = True
else:
body_iter = response.iter_content(
chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE,
decode_unicode=False,
)
for chunk in body_iter:
self._stream.write(chunk)
self._bytes_downloaded += len(chunk)
local_checksum_object.update(chunk)

# Don't validate the checksum for partial responses.
if (
Expand Down Expand Up @@ -345,13 +356,21 @@ def _write_to_stream(self, response):
checksum_object = self._checksum_object

with response:
body_iter = response.raw.stream(
_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False
)
for chunk in body_iter:
self._stream.write(chunk)
self._bytes_downloaded += len(chunk)
checksum_object.update(chunk)
if self.single_shot_download:
# This is useful for smaller files, or when the user wants to
# download the entire file in one go.
content = response.raw.read()
self._stream.write(content)
self._bytes_downloaded += len(content)
checksum_object.update(content)
else:
body_iter = response.raw.stream(
_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False
)
for chunk in body_iter:
self._stream.write(chunk)
self._bytes_downloaded += len(chunk)
checksum_object.update(chunk)
response._content_consumed = True

# Don't validate the checksum for partial responses.
Expand Down
44 changes: 44 additions & 0 deletions google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,7 @@ def _do_download(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Perform a download without any error handling.

Expand Down Expand Up @@ -1047,13 +1048,18 @@ def _do_download(
See the retry.py source code and docstrings in this package
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.
"""

extra_attributes = {
"url.full": download_url,
"download.chunk_size": f"{self.chunk_size}",
"download.raw_download": raw_download,
"upload.checksum": f"{checksum}",
"download.single_shot_download": single_shot_download,
}
args = {"timeout": timeout}

Expand All @@ -1073,6 +1079,7 @@ def _do_download(
end=end,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)
with create_trace_span(
name=f"Storage.{download_class}/consume",
Expand Down Expand Up @@ -1127,6 +1134,7 @@ def download_to_file(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Download the contents of this blob into a file-like object.

Expand Down Expand Up @@ -1222,6 +1230,10 @@ def download_to_file(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.

:raises: :class:`google.cloud.exceptions.NotFound`
"""
with create_trace_span(name="Storage.Blob.downloadToFile"):
Expand All @@ -1240,6 +1252,7 @@ def download_to_file(
timeout=timeout,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)

def _handle_filename_and_download(self, filename, *args, **kwargs):
Expand Down Expand Up @@ -1285,6 +1298,7 @@ def download_to_filename(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Download the contents of this blob into a named file.

Expand Down Expand Up @@ -1370,6 +1384,10 @@ def download_to_filename(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.

:raises: :class:`google.cloud.exceptions.NotFound`
"""
with create_trace_span(name="Storage.Blob.downloadToFilename"):
Expand All @@ -1388,6 +1406,7 @@ def download_to_filename(
timeout=timeout,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)

def download_as_bytes(
Expand All @@ -1405,6 +1424,7 @@ def download_as_bytes(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Download the contents of this blob as a bytes object.

Expand Down Expand Up @@ -1484,6 +1504,10 @@ def download_as_bytes(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.

:rtype: bytes
:returns: The data stored in this blob.

Expand All @@ -1507,6 +1531,7 @@ def download_as_bytes(
timeout=timeout,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)
return string_buffer.getvalue()

Expand All @@ -1524,6 +1549,7 @@ def download_as_string(
if_metageneration_not_match=None,
timeout=_DEFAULT_TIMEOUT,
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""(Deprecated) Download the contents of this blob as a bytes object.

Expand Down Expand Up @@ -1594,6 +1620,10 @@ def download_as_string(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.

:rtype: bytes
:returns: The data stored in this blob.

Expand All @@ -1616,6 +1646,7 @@ def download_as_string(
if_metageneration_not_match=if_metageneration_not_match,
timeout=timeout,
retry=retry,
single_shot_download=single_shot_download,
)

def download_as_text(
Expand All @@ -1633,6 +1664,7 @@ def download_as_text(
if_metageneration_not_match=None,
timeout=_DEFAULT_TIMEOUT,
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Download the contents of this blob as text (*not* bytes).

Expand Down Expand Up @@ -1705,6 +1737,10 @@ def download_as_text(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.

:rtype: text
:returns: The data stored in this blob, decoded to text.
"""
Expand All @@ -1722,6 +1758,7 @@ def download_as_text(
if_metageneration_not_match=if_metageneration_not_match,
timeout=timeout,
retry=retry,
single_shot_download=single_shot_download,
)

if encoding is not None:
Expand Down Expand Up @@ -4019,6 +4056,7 @@ def open(
For downloads only, the following additional arguments are supported:

- ``raw_download``
- ``single_shot_download``

For uploads only, the following additional arguments are supported:

Expand Down Expand Up @@ -4209,6 +4247,7 @@ def _prep_and_do_download(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
command=None,
):
"""Download the contents of a blob object into a file-like object.
Expand Down Expand Up @@ -4294,6 +4333,10 @@ def _prep_and_do_download(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.

:type command: str
:param command:
(Optional) Information about which interface for download was used,
Expand Down Expand Up @@ -4349,6 +4392,7 @@ def _prep_and_do_download(
timeout=timeout,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)
except InvalidResponse as exc:
_raise_from_invalid_response(exc)
Expand Down
5 changes: 5 additions & 0 deletions google/cloud/storage/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,7 @@ def download_blob_to_file(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Download the contents of a blob object or blob URI into a file-like object.

Expand Down Expand Up @@ -1216,6 +1217,9 @@ def download_blob_to_file(
See the retry.py source code and docstrings in this package
(google.cloud.storage.retry) for information on retry types and how
to configure them.

single_shot_download (bool):
(Optional) If true, download the object in a single request.
"""
with create_trace_span(name="Storage.Client.downloadBlobToFile"):
if not isinstance(blob_or_uri, Blob):
Expand All @@ -1236,6 +1240,7 @@ def download_blob_to_file(
timeout=timeout,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)

def list_blobs(
Expand Down
8 changes: 6 additions & 2 deletions google/cloud/storage/fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"timeout",
"retry",
"raw_download",
"single_shot_download",
}

# Valid keyword arguments for upload methods.
Expand Down Expand Up @@ -99,8 +100,9 @@ class BlobReader(io.BufferedIOBase):
- ``if_metageneration_not_match``
- ``timeout``
- ``raw_download``
- ``single_shot_download``

Note that download_kwargs (excluding ``raw_download``) are also applied to blob.reload(),
Note that download_kwargs (excluding ``raw_download`` and ``single_shot_download``) are also applied to blob.reload(),
if a reload is needed during seek().
"""

Expand Down Expand Up @@ -177,7 +179,9 @@ def seek(self, pos, whence=0):

if self._blob.size is None:
reload_kwargs = {
k: v for k, v in self._download_kwargs.items() if k != "raw_download"
k: v
for k, v in self._download_kwargs.items()
if (k != "raw_download" and k != "single_shot_download")
}
self._blob.reload(**reload_kwargs)

Expand Down
39 changes: 39 additions & 0 deletions tests/resumable_media/system/requests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,23 @@ def test_download_full(self, add_files, authorized_transport, checksum):
assert self._read_response_content(response) == actual_contents
check_tombstoned(download, authorized_transport)

@pytest.mark.parametrize("checksum", ["auto", "md5", "crc32c", None])
def test_single_shot_download_full(self, add_files, authorized_transport, checksum):
for info in ALL_FILES:
actual_contents = self._get_contents(info)
blob_name = get_blob_name(info)

# Create the actual download object.
media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name)
download = self._make_one(
media_url, checksum=checksum, single_shot_download=True
)
# Consume the resource with single_shot_download enabled.
response = download.consume(authorized_transport)
assert response.status_code == http.client.OK
assert self._read_response_content(response) == actual_contents
check_tombstoned(download, authorized_transport)

def test_download_to_stream(self, add_files, authorized_transport):
for info in ALL_FILES:
actual_contents = self._get_contents(info)
Expand All @@ -306,6 +323,28 @@ def test_download_to_stream(self, add_files, authorized_transport):
assert stream.getvalue() == actual_contents
check_tombstoned(download, authorized_transport)

def test_single_shot_download_to_stream(self, add_files, authorized_transport):
for info in ALL_FILES:
actual_contents = self._get_contents(info)
blob_name = get_blob_name(info)

# Create the actual download object.
media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name)
stream = io.BytesIO()
download = self._make_one(
media_url, stream=stream, single_shot_download=True
)
# Consume the resource with single_shot_download enabled.
response = download.consume(authorized_transport)
assert response.status_code == http.client.OK
with pytest.raises(RuntimeError) as exc_info:
getattr(response, "content")
assert exc_info.value.args == (NO_BODY_ERR,)
assert response._content is False
assert response._content_consumed is True
assert stream.getvalue() == actual_contents
check_tombstoned(download, authorized_transport)

def test_download_gzip_w_stored_content_headers(
self, add_files, authorized_transport
):
Expand Down
Loading