Skip to content

Commit

Permalink
Update Protobuf and GCP dependencies in Beam Python SDK (#24599)
Browse files Browse the repository at this point in the history
* Update build dependencies and mypy-protobuf

* Update import

* Add os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'

* Update gen_protos.py

* Manually generate proto file.

* update proto file

* Remove redundant code

* Add encoding

* Update protobuf

* Remove tensorflow ad google cloud profiler

* update dependencies

* update facets-overview

* Update goolge-cloud-langugage code to support breaking changes from v2.0

* Update video intelligence breaking changes

* Update lower bounds on some gcp deps

* Update deps

* Debug

* Update gen_protos.py to supoort google._upb

* Change checking condition to cpp to upb

* Update google-cloud-datastore ByteSize

* Update Dockerfile

* raise Error

* Update dependencies bound

* Add optional field

* Add license headers

* Add more optional

* Changes types to make them compatible with mypy-protobuf generated stubs

* Add license for grpcio-status

* update mypy to 0.790

* add more optional fields to satisfy mypy type checker

* Identify and solve the mypy type checks

* fix lint

* Revert "add more optional fields to satisfy mypy type checker"

This reverts commit 1ebc815.

* Revert "Add more optional"

This reverts commit 3801c16.

* Revert "Add optional field"

This reverts commit 35631f5.

* add relax_strict_optional_primitives to mypy_out

* Fix up formatting

* Fix up lint

* Fix up formatting

* Fix up lint

* fix lint

* Fix up docs

* Add gh issue to update code and doc strings

* Update lower bound of gcp dependencies

* Add tensorflow rc

* Add tf rc version and comment onnx tests

* Update container dependencies

* comment onnx task

* Change bound on google-cloud-recommendations

* Modify FakeMutation to support mocking proto messages

* Update comment

* Add todo for uncommenting onnx changes

* update comment

* Update dependencies

* revert doc change

* Refactor code/remove helper function from util

* Update comment

* Uncomment changes

* Update tensorflow

* Comment tfx-bsl tests

* Update tox.ini syntax

* refactor

* fix requirements

* Skip tensorflow tests on Python 3.7
  • Loading branch information
AnandInguva authored Mar 15, 2023
1 parent 720762b commit 5cb1711
Show file tree
Hide file tree
Showing 34 changed files with 300 additions and 587 deletions.
319 changes: 27 additions & 292 deletions sdks/python/apache_beam/coders/proto2_coder_test_messages_pb2.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: apache_beam/coders/proto2_coder_test_messages.proto

"""Generated protocol buffer code."""
from google.protobuf.internal import builder as _builder
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

Expand All @@ -30,293 +30,28 @@



DESCRIPTOR = _descriptor.FileDescriptor(
name='apache_beam/coders/proto2_coder_test_messages.proto',
package='proto2_coder_test_messages',
syntax='proto2',
serialized_options=b'\n\'org.apache.beam.sdk.extensions.protobuf',
create_key=_descriptor._internal_create_key,
serialized_pb=b'\n3apache_beam/coders/proto2_coder_test_messages.proto\x12\x1aproto2_coder_test_messages\"P\n\x08MessageA\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\t\x12\x34\n\x06\x66ield2\x18\x02 \x03(\x0b\x32$.proto2_coder_test_messages.MessageB\"\x1a\n\x08MessageB\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\x08\"\x10\n\x08MessageC*\x04\x08\x64\x10j\"\xad\x01\n\x0eMessageWithMap\x12\x46\n\x06\x66ield1\x18\x01 \x03(\x0b\x32\x36.proto2_coder_test_messages.MessageWithMap.Field1Entry\x1aS\n\x0b\x46ield1Entry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x33\n\x05value\x18\x02 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:\x02\x38\x01\"V\n\x18ReferencesMessageWithMap\x12:\n\x06\x66ield1\x18\x01 \x03(\x0b\x32*.proto2_coder_test_messages.MessageWithMap:Z\n\x06\x66ield1\x12$.proto2_coder_test_messages.MessageC\x18\x65 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:Z\n\x06\x66ield2\x12$.proto2_coder_test_messages.MessageC\x18\x66 \x01(\x0b\x32$.proto2_coder_test_messages.MessageBB)\n\'org.apache.beam.sdk.extensions.protobuf'
)


FIELD1_FIELD_NUMBER = 101
field1 = _descriptor.FieldDescriptor(
name='field1', full_name='proto2_coder_test_messages.field1', index=0,
number=101, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=True, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key)
FIELD2_FIELD_NUMBER = 102
field2 = _descriptor.FieldDescriptor(
name='field2', full_name='proto2_coder_test_messages.field2', index=1,
number=102, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=True, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key)


_MESSAGEA = _descriptor.Descriptor(
name='MessageA',
full_name='proto2_coder_test_messages.MessageA',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='field1', full_name='proto2_coder_test_messages.MessageA.field1', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='field2', full_name='proto2_coder_test_messages.MessageA.field2', index=1,
number=2, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=83,
serialized_end=163,
)


_MESSAGEB = _descriptor.Descriptor(
name='MessageB',
full_name='proto2_coder_test_messages.MessageB',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='field1', full_name='proto2_coder_test_messages.MessageB.field1', index=0,
number=1, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=165,
serialized_end=191,
)


_MESSAGEC = _descriptor.Descriptor(
name='MessageC',
full_name='proto2_coder_test_messages.MessageC',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=True,
syntax='proto2',
extension_ranges=[(100, 106), ],
oneofs=[
],
serialized_start=193,
serialized_end=209,
)


_MESSAGEWITHMAP_FIELD1ENTRY = _descriptor.Descriptor(
name='Field1Entry',
full_name='proto2_coder_test_messages.MessageWithMap.Field1Entry',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='key', full_name='proto2_coder_test_messages.MessageWithMap.Field1Entry.key', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=b"".decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
_descriptor.FieldDescriptor(
name='value', full_name='proto2_coder_test_messages.MessageWithMap.Field1Entry.value', index=1,
number=2, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=b'8\001',
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=302,
serialized_end=385,
)

_MESSAGEWITHMAP = _descriptor.Descriptor(
name='MessageWithMap',
full_name='proto2_coder_test_messages.MessageWithMap',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='field1', full_name='proto2_coder_test_messages.MessageWithMap.field1', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[_MESSAGEWITHMAP_FIELD1ENTRY, ],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=212,
serialized_end=385,
)


_REFERENCESMESSAGEWITHMAP = _descriptor.Descriptor(
name='ReferencesMessageWithMap',
full_name='proto2_coder_test_messages.ReferencesMessageWithMap',
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name='field1', full_name='proto2_coder_test_messages.ReferencesMessageWithMap.field1', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto2',
extension_ranges=[],
oneofs=[
],
serialized_start=387,
serialized_end=473,
)

_MESSAGEA.fields_by_name['field2'].message_type = _MESSAGEB
_MESSAGEWITHMAP_FIELD1ENTRY.fields_by_name['value'].message_type = _MESSAGEA
_MESSAGEWITHMAP_FIELD1ENTRY.containing_type = _MESSAGEWITHMAP
_MESSAGEWITHMAP.fields_by_name['field1'].message_type = _MESSAGEWITHMAP_FIELD1ENTRY
_REFERENCESMESSAGEWITHMAP.fields_by_name['field1'].message_type = _MESSAGEWITHMAP
DESCRIPTOR.message_types_by_name['MessageA'] = _MESSAGEA
DESCRIPTOR.message_types_by_name['MessageB'] = _MESSAGEB
DESCRIPTOR.message_types_by_name['MessageC'] = _MESSAGEC
DESCRIPTOR.message_types_by_name['MessageWithMap'] = _MESSAGEWITHMAP
DESCRIPTOR.message_types_by_name['ReferencesMessageWithMap'] = _REFERENCESMESSAGEWITHMAP
DESCRIPTOR.extensions_by_name['field1'] = field1
DESCRIPTOR.extensions_by_name['field2'] = field2
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

MessageA = _reflection.GeneratedProtocolMessageType('MessageA', (_message.Message,), {
'DESCRIPTOR' : _MESSAGEA,
'__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2'
# @@protoc_insertion_point(class_scope:proto2_coder_test_messages.MessageA)
})
_sym_db.RegisterMessage(MessageA)

MessageB = _reflection.GeneratedProtocolMessageType('MessageB', (_message.Message,), {
'DESCRIPTOR' : _MESSAGEB,
'__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2'
# @@protoc_insertion_point(class_scope:proto2_coder_test_messages.MessageB)
})
_sym_db.RegisterMessage(MessageB)

MessageC = _reflection.GeneratedProtocolMessageType('MessageC', (_message.Message,), {
'DESCRIPTOR' : _MESSAGEC,
'__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2'
# @@protoc_insertion_point(class_scope:proto2_coder_test_messages.MessageC)
})
_sym_db.RegisterMessage(MessageC)

MessageWithMap = _reflection.GeneratedProtocolMessageType('MessageWithMap', (_message.Message,), {

'Field1Entry' : _reflection.GeneratedProtocolMessageType('Field1Entry', (_message.Message,), {
'DESCRIPTOR' : _MESSAGEWITHMAP_FIELD1ENTRY,
'__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2'
# @@protoc_insertion_point(class_scope:proto2_coder_test_messages.MessageWithMap.Field1Entry)
})
,
'DESCRIPTOR' : _MESSAGEWITHMAP,
'__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2'
# @@protoc_insertion_point(class_scope:proto2_coder_test_messages.MessageWithMap)
})
_sym_db.RegisterMessage(MessageWithMap)
_sym_db.RegisterMessage(MessageWithMap.Field1Entry)

ReferencesMessageWithMap = _reflection.GeneratedProtocolMessageType('ReferencesMessageWithMap', (_message.Message,), {
'DESCRIPTOR' : _REFERENCESMESSAGEWITHMAP,
'__module__' : 'apache_beam.coders.proto2_coder_test_messages_pb2'
# @@protoc_insertion_point(class_scope:proto2_coder_test_messages.ReferencesMessageWithMap)
})
_sym_db.RegisterMessage(ReferencesMessageWithMap)

field1.message_type = _MESSAGEA
MessageC.RegisterExtension(field1)
field2.message_type = _MESSAGEB
MessageC.RegisterExtension(field2)

DESCRIPTOR._options = None
_MESSAGEWITHMAP_FIELD1ENTRY._options = None
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n3apache_beam/coders/proto2_coder_test_messages.proto\x12\x1aproto2_coder_test_messages\"P\n\x08MessageA\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\t\x12\x34\n\x06\x66ield2\x18\x02 \x03(\x0b\x32$.proto2_coder_test_messages.MessageB\"\x1a\n\x08MessageB\x12\x0e\n\x06\x66ield1\x18\x01 \x01(\x08\"\x10\n\x08MessageC*\x04\x08\x64\x10j\"\xad\x01\n\x0eMessageWithMap\x12\x46\n\x06\x66ield1\x18\x01 \x03(\x0b\x32\x36.proto2_coder_test_messages.MessageWithMap.Field1Entry\x1aS\n\x0b\x46ield1Entry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x33\n\x05value\x18\x02 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:\x02\x38\x01\"V\n\x18ReferencesMessageWithMap\x12:\n\x06\x66ield1\x18\x01 \x03(\x0b\x32*.proto2_coder_test_messages.MessageWithMap:Z\n\x06\x66ield1\x12$.proto2_coder_test_messages.MessageC\x18\x65 \x01(\x0b\x32$.proto2_coder_test_messages.MessageA:Z\n\x06\x66ield2\x12$.proto2_coder_test_messages.MessageC\x18\x66 \x01(\x0b\x32$.proto2_coder_test_messages.MessageBB)\n\'org.apache.beam.sdk.extensions.protobuf')

_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'apache_beam.coders.proto2_coder_test_messages_pb2', globals())
if _descriptor._USE_C_DESCRIPTORS == False:
MessageC.RegisterExtension(field1)
MessageC.RegisterExtension(field2)

DESCRIPTOR._options = None
DESCRIPTOR._serialized_options = b'\n\'org.apache.beam.sdk.extensions.protobuf'
_MESSAGEWITHMAP_FIELD1ENTRY._options = None
_MESSAGEWITHMAP_FIELD1ENTRY._serialized_options = b'8\001'
_MESSAGEA._serialized_start=83
_MESSAGEA._serialized_end=163
_MESSAGEB._serialized_start=165
_MESSAGEB._serialized_end=191
_MESSAGEC._serialized_start=193
_MESSAGEC._serialized_end=209
_MESSAGEWITHMAP._serialized_start=212
_MESSAGEWITHMAP._serialized_end=385
_MESSAGEWITHMAP_FIELD1ENTRY._serialized_start=302
_MESSAGEWITHMAP_FIELD1ENTRY._serialized_end=385
_REFERENCESMESSAGEWITHMAP._serialized_start=387
_REFERENCESMESSAGEWITHMAP._serialized_end=473
# @@protoc_insertion_point(module_scope)
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ def process(self, element):
client_element = self.element_to_client_batch_item(element)
self._batch_elements.append(client_element)
self.add_to_batch(client_element)
self._batch_bytes_size += self._batch.mutations[-1].ByteSize()
self._batch_bytes_size += self._batch.mutations[-1]._pb.ByteSize()

if (len(self._batch.mutations) >= self._target_batch_size or
self._batch_bytes_size > util.WRITE_BATCH_MAX_BYTES_SIZE):
Expand Down
21 changes: 15 additions & 6 deletions sdks/python/apache_beam/io/gcp/datastore/v1new/datastoreio_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,20 @@
client = None


# used for internal testing only
class FakeMessage:
def __init__(self, entity, key):
self.entity = entity
self.key = key

def ByteSize(self):
if self.entity is not None:
return helpers.entity_to_protobuf(self.entity)._pb.ByteSize()
else:
return self.key.to_protobuf()._pb.ByteSize()


# used for internal testing only
class FakeMutation(object):
def __init__(self, entity=None, key=None):
"""Fake mutation request object.
Expand All @@ -63,12 +77,7 @@ def __init__(self, entity=None, key=None):
"""
self.entity = entity
self.key = key

def ByteSize(self):
if self.entity is not None:
return helpers.entity_to_protobuf(self.entity).ByteSize()
else:
return self.key.to_protobuf().ByteSize()
self._pb = FakeMessage(entity, key)


class FakeBatch(object):
Expand Down
9 changes: 9 additions & 0 deletions sdks/python/apache_beam/ml/gcp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

# TODO(https://github.com/apache/beam/issues/25625)

# update code. For example,
# use class something(beam.PTransform) instead of decorator @beam.ptransform_fn
# on a function.

# update type annotations to follow pep 484 https://peps.python.org/pep-0484/
# and also update doc strings.
Loading

0 comments on commit 5cb1711

Please sign in to comment.