Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion google/cloud/storage/_media/_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class Download(DownloadBase):
``start`` to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
checksum Optional([str]): The type of checksum to compute to verify
checksum (Optional[str]): The type of checksum to compute to verify
the integrity of the object. The response headers must contain
a checksum of the requested type. If the headers lack an
appropriate checksum (for instance in the case of transcoded or
Expand All @@ -157,6 +157,9 @@ class Download(DownloadBase):
See the retry.py source code and docstrings in this package
(google.cloud.storage.retry) for information on retry types and how
to configure them.
single_shot_download (Optional[bool]): If true, download the object in a single request.
Caution: Enabling this will increase the memory overload for your application.
Please enable this as per your use case.

"""

Expand All @@ -169,6 +172,7 @@ def __init__(
headers=None,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
super(Download, self).__init__(
media_url, stream=stream, start=start, end=end, headers=headers, retry=retry
Expand All @@ -178,6 +182,7 @@ def __init__(
self.checksum = (
"crc32c" if _helpers._is_crc32c_available_and_fast() else "md5"
)
self.single_shot_download = single_shot_download
self._bytes_downloaded = 0
self._expected_checksum = None
self._checksum_object = None
Expand Down
47 changes: 33 additions & 14 deletions google/cloud/storage/_media/requests/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,24 @@ def _write_to_stream(self, response):
# the stream is indeed compressed, this will delegate the checksum
# object to the decoder and return a _DoNothingHash here.
local_checksum_object = _add_decoder(response.raw, checksum_object)
body_iter = response.iter_content(
chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False
)
for chunk in body_iter:
self._stream.write(chunk)
self._bytes_downloaded += len(chunk)
local_checksum_object.update(chunk)

# This is useful for smaller files, or when the user wants to
# download the entire file in one go.
if self.single_shot_download:
content = response.raw.read(decode_content=True)
self._stream.write(content)
self._bytes_downloaded += len(content)
local_checksum_object.update(content)
response._content_consumed = True
else:
body_iter = response.iter_content(
chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE,
decode_unicode=False,
)
for chunk in body_iter:
self._stream.write(chunk)
self._bytes_downloaded += len(chunk)
local_checksum_object.update(chunk)

# Don't validate the checksum for partial responses.
if (
Expand Down Expand Up @@ -345,13 +356,21 @@ def _write_to_stream(self, response):
checksum_object = self._checksum_object

with response:
body_iter = response.raw.stream(
_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False
)
for chunk in body_iter:
self._stream.write(chunk)
self._bytes_downloaded += len(chunk)
checksum_object.update(chunk)
# This is useful for smaller files, or when the user wants to
# download the entire file in one go.
if self.single_shot_download:
content = response.raw.read()
self._stream.write(content)
self._bytes_downloaded += len(content)
checksum_object.update(content)
else:
body_iter = response.raw.stream(
_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False
)
for chunk in body_iter:
self._stream.write(chunk)
self._bytes_downloaded += len(chunk)
checksum_object.update(chunk)
response._content_consumed = True

# Don't validate the checksum for partial responses.
Expand Down
61 changes: 61 additions & 0 deletions google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,7 @@ def _do_download(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Perform a download without any error handling.

Expand Down Expand Up @@ -1047,13 +1048,20 @@ def _do_download(
See the retry.py source code and docstrings in this package
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.
Caution: Enabling this will increase the memory overload for your application.
Please enable this as per your use case.
"""

extra_attributes = {
"url.full": download_url,
"download.chunk_size": f"{self.chunk_size}",
"download.raw_download": raw_download,
"upload.checksum": f"{checksum}",
"download.single_shot_download": single_shot_download,
}
args = {"timeout": timeout}

Expand All @@ -1073,6 +1081,10 @@ def _do_download(
end=end,
checksum=checksum,
retry=retry,
# NOTE: single_shot_download is only supported in Download and RawDownload
# classes, i.e., when chunk_size is set to None (the default value). It is
# not supported for chunked downloads.
single_shot_download=single_shot_download,
)
with create_trace_span(
name=f"Storage.{download_class}/consume",
Expand Down Expand Up @@ -1127,6 +1139,7 @@ def download_to_file(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Download the contents of this blob into a file-like object.

Expand Down Expand Up @@ -1222,6 +1235,12 @@ def download_to_file(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.
Caution: Enabling this will increase the memory overload for your application.
Please enable this as per your use case.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding the note on memory consumption.

Based on the design doc, I understand we are introducing single shot download support in multiple phases. Note that if and when we want to have single_shot_download defaulted to True, that would be a breaking change and would require a major version bump, so something to keep in mind when planning for next version changes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cojenco how is that a breaking change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure @cojenco, will keep these things in mind.


:raises: :class:`google.cloud.exceptions.NotFound`
"""
with create_trace_span(name="Storage.Blob.downloadToFile"):
Expand All @@ -1240,6 +1259,7 @@ def download_to_file(
timeout=timeout,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)

def _handle_filename_and_download(self, filename, *args, **kwargs):
Expand Down Expand Up @@ -1285,6 +1305,7 @@ def download_to_filename(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Download the contents of this blob into a named file.

Expand Down Expand Up @@ -1370,6 +1391,12 @@ def download_to_filename(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.
Caution: Enabling this will increase the memory overload for your application.
Please enable this as per your use case.

:raises: :class:`google.cloud.exceptions.NotFound`
"""
with create_trace_span(name="Storage.Blob.downloadToFilename"):
Expand All @@ -1388,6 +1415,7 @@ def download_to_filename(
timeout=timeout,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)

def download_as_bytes(
Expand All @@ -1405,6 +1433,7 @@ def download_as_bytes(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Download the contents of this blob as a bytes object.

Expand Down Expand Up @@ -1484,6 +1513,12 @@ def download_as_bytes(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.
Caution: Enabling this will increase the memory overload for your application.
Please enable this as per your use case.

:rtype: bytes
:returns: The data stored in this blob.

Expand All @@ -1507,6 +1542,7 @@ def download_as_bytes(
timeout=timeout,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)
return string_buffer.getvalue()

Expand All @@ -1524,6 +1560,7 @@ def download_as_string(
if_metageneration_not_match=None,
timeout=_DEFAULT_TIMEOUT,
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""(Deprecated) Download the contents of this blob as a bytes object.

Expand Down Expand Up @@ -1594,6 +1631,12 @@ def download_as_string(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.
Caution: Enabling this will increase the memory overload for your application.
Please enable this as per your use case.

:rtype: bytes
:returns: The data stored in this blob.

Expand All @@ -1616,6 +1659,7 @@ def download_as_string(
if_metageneration_not_match=if_metageneration_not_match,
timeout=timeout,
retry=retry,
single_shot_download=single_shot_download,
)

def download_as_text(
Expand All @@ -1633,6 +1677,7 @@ def download_as_text(
if_metageneration_not_match=None,
timeout=_DEFAULT_TIMEOUT,
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Download the contents of this blob as text (*not* bytes).

Expand Down Expand Up @@ -1705,6 +1750,12 @@ def download_as_text(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.
Caution: Enabling this will increase the memory overload for your application.
Please enable this as per your use case.

:rtype: text
:returns: The data stored in this blob, decoded to text.
"""
Expand All @@ -1722,6 +1773,7 @@ def download_as_text(
if_metageneration_not_match=if_metageneration_not_match,
timeout=timeout,
retry=retry,
single_shot_download=single_shot_download,
)

if encoding is not None:
Expand Down Expand Up @@ -4019,6 +4071,7 @@ def open(
For downloads only, the following additional arguments are supported:

- ``raw_download``
- ``single_shot_download``

For uploads only, the following additional arguments are supported:

Expand Down Expand Up @@ -4209,6 +4262,7 @@ def _prep_and_do_download(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
command=None,
):
"""Download the contents of a blob object into a file-like object.
Expand Down Expand Up @@ -4294,6 +4348,12 @@ def _prep_and_do_download(
(google.cloud.storage.retry) for information on retry types and how
to configure them.

:type single_shot_download: bool
:param single_shot_download:
(Optional) If true, download the object in a single request.
Caution: Enabling this will increase the memory overload for your application.
Please enable this as per your use case.

:type command: str
:param command:
(Optional) Information about which interface for download was used,
Expand Down Expand Up @@ -4349,6 +4409,7 @@ def _prep_and_do_download(
timeout=timeout,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)
except InvalidResponse as exc:
_raise_from_invalid_response(exc)
Expand Down
5 changes: 5 additions & 0 deletions google/cloud/storage/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,7 @@ def download_blob_to_file(
timeout=_DEFAULT_TIMEOUT,
checksum="auto",
retry=DEFAULT_RETRY,
single_shot_download=False,
):
"""Download the contents of a blob object or blob URI into a file-like object.

Expand Down Expand Up @@ -1216,6 +1217,9 @@ def download_blob_to_file(
See the retry.py source code and docstrings in this package
(google.cloud.storage.retry) for information on retry types and how
to configure them.

single_shot_download (bool):
(Optional) If true, download the object in a single request.
"""
with create_trace_span(name="Storage.Client.downloadBlobToFile"):
if not isinstance(blob_or_uri, Blob):
Expand All @@ -1236,6 +1240,7 @@ def download_blob_to_file(
timeout=timeout,
checksum=checksum,
retry=retry,
single_shot_download=single_shot_download,
)

def list_blobs(
Expand Down
8 changes: 6 additions & 2 deletions google/cloud/storage/fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"timeout",
"retry",
"raw_download",
"single_shot_download",
}

# Valid keyword arguments for upload methods.
Expand Down Expand Up @@ -99,8 +100,9 @@ class BlobReader(io.BufferedIOBase):
- ``if_metageneration_not_match``
- ``timeout``
- ``raw_download``
- ``single_shot_download``

Note that download_kwargs (excluding ``raw_download``) are also applied to blob.reload(),
Note that download_kwargs (excluding ``raw_download`` and ``single_shot_download``) are also applied to blob.reload(),
if a reload is needed during seek().
"""

Expand Down Expand Up @@ -177,7 +179,9 @@ def seek(self, pos, whence=0):

if self._blob.size is None:
reload_kwargs = {
k: v for k, v in self._download_kwargs.items() if k != "raw_download"
k: v
for k, v in self._download_kwargs.items()
if (k != "raw_download" and k != "single_shot_download")
}
self._blob.reload(**reload_kwargs)

Expand Down
Loading