diff --git a/tensorflow_io/core/kernels/bigtable/bigtable_dataset_kernel.cc b/tensorflow_io/core/kernels/bigtable/bigtable_dataset_kernel.cc index 596da7396..66db5e7d7 100644 --- a/tensorflow_io/core/kernels/bigtable/bigtable_dataset_kernel.cc +++ b/tensorflow_io/core/kernels/bigtable/bigtable_dataset_kernel.cc @@ -225,7 +225,9 @@ class Iterator : public DatasetIterator { filters.push_back(std::move(f)); } - return cbt::Filter::InterleaveFromRange(filters.begin(), filters.end()); + return filters.size() > 1 ? cbt::Filter::InterleaveFromRange( + filters.begin(), filters.end()) + : filters[0]; } static std::pair ColumnToFamilyAndQualifier( diff --git a/tensorflow_io/python/ops/bigtable/bigtable_dataset_ops.py b/tensorflow_io/python/ops/bigtable/bigtable_dataset_ops.py index 9ddbcffb5..4380b0ba1 100644 --- a/tensorflow_io/python/ops/bigtable/bigtable_dataset_ops.py +++ b/tensorflow_io/python/ops/bigtable/bigtable_dataset_ops.py @@ -57,7 +57,7 @@ def read_rows( self, columns: List[str], row_set: bigtable_row_set.RowSet, - filter: filters.BigtableFilter = filters.latest(), + filter: filters.BigtableFilter = None, output_type=tf.string, ): """Retrieves values from Google Bigtable sorted by RowKeys. @@ -69,23 +69,24 @@ def read_rows( Returns: A `tf.data.Dataset` returning the cell contents. """ + + # Python initializes the default arguments once at the start of the + # program. If the fork happens after that (for instance when we run + # tests using xdist) the program deadlocks and hangs. That is why we + # have to make sure, all default arguments are initialized on each + # invocation. + if filter is None: + filter = filters.latest() return _BigtableDataset( - self._client_resource, - self._table_id, - columns, - row_set, - filter, - output_type, + self._client_resource, self._table_id, columns, row_set, filter, output_type ) def parallel_read_rows( self, columns: List[str], num_parallel_calls=tf.data.AUTOTUNE, - row_set: bigtable_row_set.RowSet = bigtable_row_set.from_rows_or_ranges( - bigtable_row_range.infinite() - ), - filter: filters.BigtableFilter = filters.latest(), + row_set: bigtable_row_set.RowSet = None, + filter: filters.BigtableFilter = None, output_type=tf.string, ): """Retrieves values from Google Bigtable in parallel. The ammount of work @@ -101,11 +102,17 @@ def parallel_read_rows( A `tf.data.Dataset` returning the cell contents. """ + # We have to make sure that all the default arguments are initialized + # on each invocation. For more info see read_rows method. + if row_set is None: + row_set = bigtable_row_set.from_rows_or_ranges( + bigtable_row_range.infinite() + ) + if filter is None: + filter = filters.latest() + samples = core_ops.bigtable_split_row_set_evenly( - self._client_resource, - row_set._impl, - self._table_id, - num_parallel_calls, + self._client_resource, row_set._impl, self._table_id, num_parallel_calls ) def map_func(idx): diff --git a/tests/test_bigtable/bigtable_emulator.py b/tests/test_bigtable/bigtable_emulator.py index 02cabe92f..975eb01ac 100644 --- a/tests/test_bigtable/bigtable_emulator.py +++ b/tests/test_bigtable/bigtable_emulator.py @@ -30,12 +30,14 @@ CBT_EMULATOR_SEARCH_PATHS = [ "/usr/lib/google-cloud-sdk/platform/bigtable-emulator/cbtemulator", "/usr/local/google-cloud-sdk/platform/bigtable-emulator/cbtemulator", + "/v/google-cloud-sdk/platform/bigtable-emulator/cbtemulator", "cbtemulator", ] CBT_CLI_SEARCH_PATHS = [ "/usr/local/google-cloud-sdk/bin/cbt", "/usr/bin/cbt", + "/v/google-cloud-sdk/bin/cbt", "cbt", ] diff --git a/tests/test_gcloud/test_pubsub_bigtable.sh b/tests/test_gcloud/test_pubsub_bigtable.sh index ff504b924..71d1adec3 100755 --- a/tests/test_gcloud/test_pubsub_bigtable.sh +++ b/tests/test_gcloud/test_pubsub_bigtable.sh @@ -34,7 +34,7 @@ fi curl -sSOL https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-236.0.0-darwin-x86_64.tar.gz tar -xzf google-cloud-sdk-236.0.0-darwin-x86_64.tar.gz google-cloud-sdk/install.sh -q -google-cloud-sdk/bin/gcloud -q components install beta +google-cloud-sdk/bin/gcloud -q components install beta bigtable cbt google-cloud-sdk/bin/gcloud -q components install pubsub-emulator google-cloud-sdk/bin/gcloud -q components update beta google-cloud-sdk/bin/gcloud -q beta emulators pubsub start &