Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sdk/storage/azure-storage-blob/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

### Features Added
- Added support for service version 2021-08-06.
- Added a new version of client-side encryption for blobs (version 2.0) which utilizes AES-GCM-256 encryption.
If you are currently using client-side encryption, it is **highly recommended** to switch to a form of server-side
encryption (Customer-Provided Key, Encryption Scope, etc.) or version 2.0 of client-side encryption. The encryption
version can be specified on any client constructor via the `encryption_version` keyword (`encryption_version='2.0'`).

## 12.12.0 (2022-05-09)

Expand Down
2 changes: 2 additions & 0 deletions sdk/storage/azure-storage-blob/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,8 @@ Defaults to `False`.
Use the following keyword arguments when instantiating a client to configure encryption:

* __require_encryption__ (bool): If set to True, will enforce that objects are encrypted and decrypt them.
* __encryption_version__ (str): Specifies the version of encryption to use. Current options are `'2.0'` or `'1.0'` and
the default value is `'1.0'`. Version 1.0 is deprecated, and it is **highly recommended** to use version 2.0.
* __key_encryption_key__ (object): The user-provided key-encryption-key. The instance must implement the following methods:
- `wrap_key(key)`--wraps the specified key using an algorithm of the user's choice.
- `get_key_wrap_algorithm()`--returns the algorithm used to wrap the specified symmetric key.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -355,11 +355,14 @@ def _upload_blob_options( # pylint:disable=too-many-statements
raise ValueError("Encryption required but no key was provided.")
encryption_options = {
'required': self.require_encryption,
'version': self.encryption_version,
'key': self.key_encryption_key,
'resolver': self.key_resolver_function,
}
if self.key_encryption_key is not None:
cek, iv, encryption_data = generate_blob_encryption_data(self.key_encryption_key)
cek, iv, encryption_data = generate_blob_encryption_data(
self.key_encryption_key,
self.encryption_version)
encryption_options['cek'] = cek
encryption_options['vector'] = iv
encryption_options['data'] = encryption_data
Expand Down Expand Up @@ -422,6 +425,8 @@ def _upload_blob_options( # pylint:disable=too-many-statements
kwargs['client'] = self._client.block_blob
kwargs['data'] = data
elif blob_type == BlobType.PageBlob:
if self.encryption_version == '2.0' and (self.require_encryption or self.key_encryption_key is not None):
raise ValueError("Encryption version 2.0 does not currently support page blobs.")
kwargs['client'] = self._client.page_blob
elif blob_type == BlobType.AppendBlob:
if self.require_encryption or (self.key_encryption_key is not None):
Expand Down Expand Up @@ -4126,5 +4131,5 @@ def _get_container_client(self): # pylint: disable=client-method-missing-kwargs
"{}://{}".format(self.scheme, self.primary_hostname), container_name=self.container_name,
credential=self._raw_credential, api_version=self.api_version, _configuration=self._config,
_pipeline=_pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
key_resolver_function=self.key_resolver_function)
require_encryption=self.require_encryption, encryption_version=self.encryption_version,
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)
Original file line number Diff line number Diff line change
Expand Up @@ -683,8 +683,8 @@ def get_container_client(self, container):
self.url, container_name=container_name,
credential=self.credential, api_version=self.api_version, _configuration=self._config,
_pipeline=_pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
key_resolver_function=self.key_resolver_function)
require_encryption=self.require_encryption, encryption_version=self.encryption_version,
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)

def get_blob_client(
self, container, # type: Union[ContainerProperties, str]
Expand Down Expand Up @@ -736,5 +736,5 @@ def get_blob_client(
self.url, container_name=container_name, blob_name=blob_name, snapshot=snapshot,
credential=self.credential, api_version=self.api_version, _configuration=self._config,
_pipeline=_pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
key_resolver_function=self.key_resolver_function)
require_encryption=self.require_encryption, encryption_version=self.encryption_version,
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)
Original file line number Diff line number Diff line change
Expand Up @@ -323,16 +323,16 @@ def _rename_container(self, new_name, **kwargs):
"""
lease = kwargs.pop('lease', None)
try:
kwargs['source_lease_id'] = lease.id # type: str
kwargs['source_lease_id'] = lease.id
except AttributeError:
kwargs['source_lease_id'] = lease
try:
renamed_container = ContainerClient(
"{}://{}".format(self.scheme, self.primary_hostname), container_name=new_name,
credential=self.credential, api_version=self.api_version, _configuration=self._config,
_pipeline=self._pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
key_resolver_function=self.key_resolver_function)
require_encryption=self.require_encryption, encryption_version=self.encryption_version,
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)
renamed_container._client.container.rename(self.container_name, **kwargs) # pylint: disable = protected-access
return renamed_container
except HttpResponseError as error:
Expand Down Expand Up @@ -619,8 +619,8 @@ def _get_blob_service_client(self): # pylint: disable=client-method-missing-kwa
"{}://{}".format(self.scheme, self.primary_hostname),
credential=self._raw_credential, api_version=self.api_version, _configuration=self._config,
_location_mode=self._location_mode, _hosts=self._hosts, require_encryption=self.require_encryption,
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function,
_pipeline=_pipeline)
encryption_version=self.encryption_version, key_encryption_key=self.key_encryption_key,
key_resolver_function=self.key_resolver_function, _pipeline=_pipeline)

@distributed_trace
def get_container_access_policy(self, **kwargs):
Expand Down Expand Up @@ -1608,5 +1608,5 @@ def get_blob_client(
self.url, container_name=self.container_name, blob_name=blob_name, snapshot=snapshot,
credential=self.credential, api_version=self.api_version, _configuration=self._config,
_pipeline=_pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
key_resolver_function=self.key_resolver_function)
require_encryption=self.require_encryption, encryption_version=self.encryption_version,
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)
87 changes: 62 additions & 25 deletions sdk/storage/azure-storage-blob/azure/storage/blob/_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,26 @@
from azure.core.exceptions import HttpResponseError, ServiceResponseError

from azure.core.tracing.common import with_current_context
from ._shared.encryption import decrypt_blob
from ._shared.encryption import (
adjust_blob_size_for_encryption,
decrypt_blob,
get_adjusted_download_range_and_offset,
is_encryption_v2,
parse_encryption_data
)
from ._shared.request_handlers import validate_and_format_range_headers
from ._shared.response_handlers import process_storage_error, parse_length_from_content_range
from ._deserialize import get_page_ranges_result
from ._deserialize import deserialize_blob_properties, get_page_ranges_result


def process_range_and_offset(start_range, end_range, length, encryption):
def process_range_and_offset(start_range, end_range, length, encryption_options, encryption_data):
start_offset, end_offset = 0, 0
if encryption.get("key") is not None or encryption.get("resolver") is not None:
if start_range is not None:
# Align the start of the range along a 16 byte block
start_offset = start_range % 16
start_range -= start_offset

# Include an extra 16 bytes for the IV if necessary
# Because of the previous offsetting, start_range will always
# be a multiple of 16.
if start_range > 0:
start_offset += 16
start_range -= 16

if length is not None:
# Align the end of the range along a 16 byte block
end_offset = 15 - (end_range % 16)
end_range += end_offset
if encryption_options.get("key") is not None or encryption_options.get("resolver") is not None:
return get_adjusted_download_range_and_offset(
start_range,
end_range,
length,
encryption_data)

return (start_range, end_range), (start_offset, end_offset)

Expand Down Expand Up @@ -81,6 +76,7 @@ def __init__(
parallel=None,
validate_content=None,
encryption_options=None,
encryption_data=None,
progress_hook=None,
**kwargs
):
Expand Down Expand Up @@ -108,6 +104,7 @@ def __init__(

# Encryption
self.encryption_options = encryption_options
self.encryption_data = encryption_data

# Parameters for each get operation
self.validate_content = validate_content
Expand Down Expand Up @@ -183,7 +180,7 @@ def _do_optimize(self, given_range_start, given_range_end):

def _download_chunk(self, chunk_start, chunk_end):
download_range, offset = process_range_and_offset(
chunk_start, chunk_end, chunk_end, self.encryption_options
chunk_start, chunk_end, chunk_end, self.encryption_options, self.encryption_data
)

# No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
Expand Down Expand Up @@ -335,6 +332,10 @@ def __init__(
self._file_size = None
self._non_empty_ranges = None
self._response = None
self._encryption_data = None

if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
self._get_encryption_data_request()

# The service only provides transactional MD5s for chunks under 4MB.
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
Expand All @@ -349,7 +350,11 @@ def __init__(
initial_request_end = initial_request_start + self._first_get_size - 1

self._initial_range, self._initial_offset = process_range_and_offset(
initial_request_start, initial_request_end, self._end_range, self._encryption_options
initial_request_start,
initial_request_end,
self._end_range,
self._encryption_options,
self._encryption_data
)

self._response = self._initial_request()
Expand All @@ -376,6 +381,21 @@ def __init__(
def __len__(self):
return self.size

def _get_encryption_data_request(self):
# Save current request cls
download_cls = self._request_options.pop('cls', None)
# Adjust cls for get_properties
self._request_options['cls'] = deserialize_blob_properties

properties = self._clients.blob.get_properties(**self._request_options)
# This will return None if there is no encryption metadata or there are parsing errors.
# That is acceptable here, the proper error will be caught and surfaced hen attempting
Comment thread
jalauzon-msft marked this conversation as resolved.
Outdated
# to decrypt the blob.
self._encryption_data = parse_encryption_data(properties.metadata)

# Restore cls for download
self._request_options['cls'] = download_cls

def _initial_request(self):
range_header, range_validation = validate_and_format_range_headers(
self._initial_range[0],
Expand Down Expand Up @@ -405,6 +425,9 @@ def _initial_request(self):
# Parse the total file size and adjust the download size if ranges
# were specified
self._file_size = parse_length_from_content_range(response.properties.content_range)
# Remove any extra encryption data size from blob size
self._file_size = adjust_blob_size_for_encryption(self._file_size, self._encryption_data)

if self._end_range is not None:
# Use the end range index unless it is over the end of the file
self.size = min(self._file_size, self._end_range - self._start_range + 1)
Expand Down Expand Up @@ -465,7 +488,8 @@ def _initial_request(self):

# If the file is small, the download is complete at this point.
# If file size is large, download the rest of the file in chunks.
if response.properties.size != self.size:
# Use less than here for encryption.
if response.properties.size < self.size:
if self._request_options.get("modified_access_conditions"):
self._request_options["modified_access_conditions"].if_match = response.properties.etag
else:
Expand Down Expand Up @@ -494,18 +518,25 @@ def chunks(self):
if self._end_range is not None:
# Use the end range index unless it is over the end of the file
data_end = min(self._file_size, self._end_range + 1)

data_start = self._initial_range[1] + 1 # Start where the first download ended
# For encryption V2 only, adjust start to the end of the fetched data rather than download size
if is_encryption_v2(self._encryption_data):
data_start = (self._start_range or 0) + len(self._current_content)

iter_downloader = _ChunkDownloader(
client=self._clients.blob,
non_empty_ranges=self._non_empty_ranges,
total_size=self.size,
chunk_size=self._config.max_chunk_get_size,
current_progress=self._first_get_size,
start_range=self._initial_range[1] + 1, # start where the first download ended
start_range=data_start,
end_range=data_end,
stream=None,
parallel=False,
validate_content=self._validate_content,
encryption_options=self._encryption_options,
encryption_data=self._encryption_data,
use_location=self._location_mode,
**self._request_options
)
Expand Down Expand Up @@ -599,18 +630,24 @@ def readinto(self, stream):
# Use the length unless it is over the end of the file
data_end = min(self._file_size, self._end_range + 1)

data_start = self._initial_range[1] + 1 # Start where the first download ended
# For encryption V2 only, adjust start to the end of the fetched data rather than download size
if is_encryption_v2(self._encryption_data):
data_start = (self._start_range or 0) + len(self._current_content)

downloader = _ChunkDownloader(
client=self._clients.blob,
non_empty_ranges=self._non_empty_ranges,
total_size=self.size,
chunk_size=self._config.max_chunk_get_size,
current_progress=self._first_get_size,
start_range=self._initial_range[1] + 1, # Start where the first download ended
start_range=data_start,
end_range=data_end,
stream=stream,
parallel=parallel,
validate_content=self._validate_content,
encryption_options=self._encryption_options,
encryption_data=self._encryption_data,
use_location=self._location_mode,
progress_hook=self._progress_hook,
**self._request_options
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def __init__(
self._hosts = {LocationMode.PRIMARY: primary_hostname, LocationMode.SECONDARY: secondary_hostname}

self.require_encryption = kwargs.get("require_encryption", False)
self.encryption_version = kwargs.get("encryption_version", "1.0")
self.key_encryption_key = kwargs.get("key_encryption_key")
self.key_resolver_function = kwargs.get("key_resolver_function")
self._config, self._pipeline = self._create_pipeline(self.credential, storage_sdk=service, **kwargs)
Expand Down
Loading