Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ jobs:
name: Run tests - google.cloud.bigtable
command: |
if [[ -n $(grep bigtable ~/target_packages) ]]; then
test_utils/scripts/circleci/prepare_bigtable.sh
export DOWNLOAD_BIGTABLE_TEST_SERVER=0
nox -f bigtable/nox.py
fi
- run:
Expand Down
205 changes: 205 additions & 0 deletions bigtable/google/cloud/bigtable/retry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Provides function wrappers that implement retrying."""

import random
import time
import six
import sys

from google.cloud._helpers import _to_bytes
from google.cloud.bigtable._generated import (
bigtable_pb2 as data_messages_v2_pb2)
from google.gax import config, errors
from grpc import RpcError


_MILLIS_PER_SECOND = 1000


class ReadRowsIterator(object):
"""Creates an iterator equivalent to a_iter, but that retries on certain
exceptions.
"""

def __init__(self, client, name, start_key, end_key, filter_, limit,
end_inclusive, retry_options, **kwargs):
self.client = client
self.retry_options = retry_options
self.name = name
self.start_key = start_key
self.start_key_closed = True
self.end_key = end_key
self.filter_ = filter_
self.limit = limit
self.end_inclusive = end_inclusive
self.delay_mult = retry_options.backoff_settings.retry_delay_multiplier
self.max_delay_millis = \
retry_options.backoff_settings.max_retry_delay_millis
self.timeout_mult = \
retry_options.backoff_settings.rpc_timeout_multiplier
self.max_timeout = \
(retry_options.backoff_settings.max_rpc_timeout_millis /
_MILLIS_PER_SECOND)
self.total_timeout = \
(retry_options.backoff_settings.total_timeout_millis /
_MILLIS_PER_SECOND)
self._responses_for_row = 0
self.set_stream()

def set_start_key(self, start_key):
"""
Sets the row key at which this iterator will begin reading.
"""
self.start_key = start_key
self.start_key_closed = False

def set_stream(self):
"""
Resets the read stream by making an RPC on the 'ReadRows' endpoint.
"""
req_pb = _create_row_request(self.name, start_key=self.start_key,
start_key_closed=self.start_key_closed,
end_key=self.end_key,
filter_=self.filter_, limit=self.limit,
end_inclusive=self.end_inclusive)
self.stream = self.client._data_stub.ReadRows(req_pb)

@property
def responses_for_row(self):
""" Property that gives the number of calls made so far for the current
row. If 1, then either this row is being read for the first time,
or the most recent response required a retry, causing the row to be
read again

:rtype: int
:returns: Int that gives the number of calls made so far for the
current row.
"""
return self._responses_for_row

def clear_responses_for_row(self):
"""
Signals that a new row has been started.
"""
self._responses_for_row = 0

def next(self, *args, **kwargs):
"""
Read and return the next chunk from the stream.
Retry on idempotent failure.
"""
delay = self.retry_options.backoff_settings.initial_retry_delay_millis
exc = errors.RetryError('Retry total timeout exceeded before any'
'response was received')

now = time.time()
deadline = now + self.total_timeout
while deadline is None or now < deadline:
self._responses_for_row += 1
try:
return(six.next(self.stream))
except StopIteration as stop:
raise stop
except RpcError as error: # pylint: disable=broad-except
code = config.exc_to_code(error)
if code not in self.retry_options.retry_codes:
six.reraise(type(error), error)

# pylint: disable=redefined-variable-type
exc = errors.RetryError(
'Retry total timeout exceeded with exception', error)

# Sleep a random number which will, on average, equal the
# expected delay.
to_sleep = random.uniform(0, delay * 2)
time.sleep(to_sleep / _MILLIS_PER_SECOND)
delay = min(delay * self.delay_mult, self.max_delay_millis)
now = time.time()
self._responses_for_row = 0
self.set_stream()

six.reraise(errors.RetryError, exc, sys.exc_info()[2])

def __next__(self, *args, **kwargs):
return self.next(*args, **kwargs)


def _create_row_request(table_name, row_key=None, start_key=None,
start_key_closed=True, end_key=None, filter_=None,
limit=None, end_inclusive=False):
"""Creates a request to read rows in a table.

:type table_name: str
:param table_name: The name of the table to read from.

:type row_key: bytes
:param row_key: (Optional) The key of a specific row to read from.

:type start_key: bytes
:param start_key: (Optional) The beginning of a range of row keys to
read from. The range will include ``start_key``. If
left empty, will be interpreted as the empty string.

:type end_key: bytes
:param end_key: (Optional) The end of a range of row keys to read from.
The range will not include ``end_key``. If left empty,
will be interpreted as an infinite string.

:type filter_: :class:`.RowFilter`
:param filter_: (Optional) The filter to apply to the contents of the
specified row(s). If unset, reads the entire table.

:type limit: int
:param limit: (Optional) The read will terminate after committing to N
rows' worth of results. The default (zero) is to return
all results.

:rtype: :class:`data_messages_v2_pb2.ReadRowsRequest`
:returns: The ``ReadRowsRequest`` protobuf corresponding to the inputs.
:raises: :class:`ValueError <exceptions.ValueError>` if both
``row_key`` and one of ``start_key`` and ``end_key`` are set
"""
request_kwargs = {'table_name': table_name}
if (row_key is not None and
(start_key is not None or end_key is not None)):
raise ValueError('Row key and row range cannot be '
'set simultaneously')
range_kwargs = {}
if start_key is not None or end_key is not None:
if start_key is not None:
if start_key_closed:
range_kwargs['start_key_closed'] = _to_bytes(start_key)
else:
range_kwargs['start_key_open'] = _to_bytes(start_key)
if end_key is not None:
end_key_key = 'end_key_open'
if end_inclusive:
end_key_key = 'end_key_closed'
range_kwargs[end_key_key] = _to_bytes(end_key)
if filter_ is not None:
request_kwargs['filter'] = filter_.to_pb()
if limit is not None:
request_kwargs['rows_limit'] = limit

message = data_messages_v2_pb2.ReadRowsRequest(**request_kwargs)

if row_key is not None:
message.rows.row_keys.append(_to_bytes(row_key))

if range_kwargs:
message.rows.row_ranges.add(**range_kwargs)

return message
13 changes: 13 additions & 0 deletions bigtable/google/cloud/bigtable/row_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ def consume_next(self):

self._last_scanned_row_key = response.last_scanned_row_key

if hasattr(self._response_iterator, 'responses_for_row'):
if (self._response_iterator.responses_for_row == 1):

This comment was marked as spam.

This comment was marked as spam.

self._clear_accumulated_row()

row = self._row
cell = self._cell

Expand Down Expand Up @@ -300,6 +304,10 @@ def consume_next(self):

if chunk.commit_row:
self._save_current_row()
if hasattr(self._response_iterator, 'set_start_key'):
self._response_iterator.set_start_key(chunk.row_key)
if hasattr(self._response_iterator, 'clear_responses_for_row'):
self._response_iterator.clear_responses_for_row()
row = cell = None
continue

Expand Down Expand Up @@ -345,6 +353,11 @@ def _validate_chunk_status(chunk):
# No negative value_size (inferred as a general constraint).
_raise_if(chunk.value_size < 0)

def _clear_accumulated_row(self):
self._row = None
self._cell = None
self._previous_cell = None

def _validate_chunk_new_row(self, chunk):
"""Helper for :meth:`_validate_chunk`."""
assert self.state == self.NEW_ROW
Expand Down
107 changes: 31 additions & 76 deletions bigtable/google/cloud/bigtable/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from google.api_core.exceptions import RetryError
from google.api_core.retry import if_exception_type
from google.api_core.retry import Retry
from google.cloud._helpers import _to_bytes
from google.cloud.bigtable._generated import (
bigtable_pb2 as data_messages_v2_pb2)
from google.cloud.bigtable._generated import (
Expand All @@ -31,8 +30,27 @@
from google.cloud.bigtable.row import ConditionalRow
from google.cloud.bigtable.row import DirectRow
from google.cloud.bigtable.row_data import PartialRowsData
from google.gax import RetryOptions, BackoffSettings
from google.cloud.bigtable.retry import ReadRowsIterator, _create_row_request
from grpc import StatusCode

BACKOFF_SETTINGS = BackoffSettings(
initial_retry_delay_millis=10,
retry_delay_multiplier=1.3,
max_retry_delay_millis=30000,
initial_rpc_timeout_millis=25 * 60 * 1000,
rpc_timeout_multiplier=1.0,
max_rpc_timeout_millis=25 * 60 * 1000,
total_timeout_millis=30 * 60 * 1000
)

RETRY_CODES = [
StatusCode.DEADLINE_EXCEEDED,
StatusCode.ABORTED,
StatusCode.INTERNAL,
StatusCode.UNAVAILABLE
]


# Maximum number of mutations in bulk (MutateRowsRequest message):
# (https://cloud.google.com/bigtable/docs/reference/data/rpc/
Expand Down Expand Up @@ -277,7 +295,7 @@ def read_row(self, row_key, filter_=None):
return rows_data.rows[row_key]

def read_rows(self, start_key=None, end_key=None, limit=None,
filter_=None, end_inclusive=False):
filter_=None, end_inclusive=False, backoff_settings=None):
"""Read rows from this table.

:type start_key: bytes
Expand Down Expand Up @@ -308,13 +326,18 @@ def read_rows(self, start_key=None, end_key=None, limit=None,
:returns: A :class:`.PartialRowsData` convenience wrapper for consuming
the streamed results.
"""
request_pb = _create_row_request(
self.name, start_key=start_key, end_key=end_key, filter_=filter_,
limit=limit, end_inclusive=end_inclusive)

This comment was marked as spam.

This comment was marked as spam.

client = self._instance._client
response_iterator = client._data_stub.ReadRows(request_pb)
# We expect an iterator of `data_messages_v2_pb2.ReadRowsResponse`
return PartialRowsData(response_iterator)
if backoff_settings is None:
backoff_settings = BACKOFF_SETTINGS
RETRY_OPTIONS = RetryOptions(
retry_codes=RETRY_CODES,
backoff_settings=backoff_settings
)

retrying_iterator = ReadRowsIterator(client, self.name, start_key,
end_key, filter_, limit,
end_inclusive, RETRY_OPTIONS)
return PartialRowsData(retrying_iterator)

def mutate_rows(self, rows, retry=DEFAULT_RETRY):
"""Mutates multiple rows in bulk.
Expand Down Expand Up @@ -495,74 +518,6 @@ def _do_mutate_retryable_rows(self):
return self.responses_statuses


def _create_row_request(table_name, row_key=None, start_key=None, end_key=None,
filter_=None, limit=None, end_inclusive=False):
"""Creates a request to read rows in a table.

:type table_name: str
:param table_name: The name of the table to read from.

:type row_key: bytes
:param row_key: (Optional) The key of a specific row to read from.

:type start_key: bytes
:param start_key: (Optional) The beginning of a range of row keys to
read from. The range will include ``start_key``. If
left empty, will be interpreted as the empty string.

:type end_key: bytes
:param end_key: (Optional) The end of a range of row keys to read from.
The range will not include ``end_key``. If left empty,
will be interpreted as an infinite string.

:type filter_: :class:`.RowFilter`
:param filter_: (Optional) The filter to apply to the contents of the
specified row(s). If unset, reads the entire table.

:type limit: int
:param limit: (Optional) The read will terminate after committing to N
rows' worth of results. The default (zero) is to return
all results.

:type end_inclusive: bool
:param end_inclusive: (Optional) Whether the ``end_key`` should be
considered inclusive. The default is False (exclusive).

:rtype: :class:`data_messages_v2_pb2.ReadRowsRequest`
:returns: The ``ReadRowsRequest`` protobuf corresponding to the inputs.
:raises: :class:`ValueError <exceptions.ValueError>` if both
``row_key`` and one of ``start_key`` and ``end_key`` are set
"""
request_kwargs = {'table_name': table_name}
if (row_key is not None and
(start_key is not None or end_key is not None)):
raise ValueError('Row key and row range cannot be '
'set simultaneously')
range_kwargs = {}
if start_key is not None or end_key is not None:
if start_key is not None:
range_kwargs['start_key_closed'] = _to_bytes(start_key)
if end_key is not None:
end_key_key = 'end_key_open'
if end_inclusive:
end_key_key = 'end_key_closed'
range_kwargs[end_key_key] = _to_bytes(end_key)
if filter_ is not None:
request_kwargs['filter'] = filter_.to_pb()
if limit is not None:
request_kwargs['rows_limit'] = limit

message = data_messages_v2_pb2.ReadRowsRequest(**request_kwargs)

if row_key is not None:
message.rows.row_keys.append(_to_bytes(row_key))

if range_kwargs:
message.rows.row_ranges.add(**range_kwargs)

return message


def _mutate_rows_request(table_name, rows):
"""Creates a request to mutate rows in a table.

Expand Down
Loading